├── .gitignore
├── .prettierrc
├── README.md
├── client
    ├── android
    │   ├── api-reference.mdx
    │   ├── introduction.mdx
    │   └── transports
    │   │   ├── daily.mdx
    │   │   ├── gemini-websocket.mdx
    │   │   ├── openai-webrtc.mdx
    │   │   └── small-webrtc.mdx
    ├── c++
    │   ├── api-reference.mdx
    │   ├── introduction.mdx
    │   └── transport.mdx
    ├── introduction.mdx
    ├── ios
    │   ├── api-reference.mdx
    │   ├── introduction.mdx
    │   └── transports
    │   │   ├── daily.mdx
    │   │   ├── gemini-websocket.mdx
    │   │   └── openai-webrtc.mdx
    ├── js
    │   ├── api-reference
    │   │   ├── actions.mdx
    │   │   ├── callbacks.mdx
    │   │   ├── client-constructor.mdx
    │   │   ├── client-methods.mdx
    │   │   ├── configuration.mdx
    │   │   ├── errors.mdx
    │   │   ├── messages.mdx
    │   │   └── services.mdx
    │   ├── helpers
    │   │   ├── introduction.mdx
    │   │   └── llm.mdx
    │   ├── introduction.mdx
    │   └── transports
    │   │   ├── daily.mdx
    │   │   ├── gemini.mdx
    │   │   ├── openai-webrtc.mdx
    │   │   ├── realtime.mdx
    │   │   ├── small-webrtc.mdx
    │   │   └── transport.mdx
    ├── react-native
    │   ├── api-reference.mdx
    │   └── introduction.mdx
    └── react
    │   ├── components.mdx
    │   ├── hooks.mdx
    │   └── introduction.mdx
├── examples.mdx
├── favicon.svg
├── getting-started
    ├── core-concepts.mdx
    ├── installation.mdx
    ├── next-steps.mdx
    ├── overview.mdx
    └── quickstart.mdx
├── guides
    ├── deployment
    │   ├── cerebrium.mdx
    │   ├── fly.mdx
    │   ├── images
    │   │   ├── deployment-1.png
    │   │   ├── deployment-fly.png
    │   │   └── modal.jpg
    │   ├── modal.mdx
    │   ├── overview.mdx
    │   ├── pattern.mdx
    │   └── pipecat-cloud.mdx
    ├── features
    │   ├── gemini-multimodal-live.mdx
    │   ├── krisp.mdx
    │   ├── metrics.mdx
    │   ├── openai-audio-models-and-apis.mdx
    │   └── pipecat-flows.mdx
    ├── fundamentals
    │   ├── context-management.mdx
    │   ├── custom-frame-processor.mdx
    │   ├── detecting-user-idle.mdx
    │   ├── end-pipeline.mdx
    │   ├── function-calling.mdx
    │   ├── recording-audio.mdx
    │   ├── recording-transcripts.mdx
    │   └── user-input-muting.mdx
    ├── introduction.mdx
    └── telephony
    │   ├── daily-webrtc.mdx
    │   ├── dialout.mdx
    │   ├── images
    │       └── twilio-webhook-setup.png
    │   ├── overview.mdx
    │   ├── twilio-daily-webrtc.mdx
    │   └── twilio-websockets.mdx
├── images
    ├── allow-krisp.png
    ├── architecture-1.png
    ├── architecture-2.png
    ├── async-frame-processor.svg
    ├── async-parallel-pipeline.svg
    ├── checks-passed.png
    ├── food-ordering-flow.png
    ├── gemini-client-final.png
    ├── hero-dark.svg
    ├── hero-light.svg
    ├── krisp-portal.png
    ├── open-anyway-krisp.png
    ├── openai-cascade.jpg
    ├── openai-s2s.jpg
    ├── openai-twilio.png
    ├── simple-pipeline.svg
    ├── sync-frame-processor.svg
    └── sync-parallel-pipeline.svg
├── logo
    ├── dark.svg
    ├── light.svg
    ├── pipecat-logo.png
    ├── pipecat-logo.svg
    └── pipecat-social.png
├── mint.json
├── pipecat-docs.png
├── server
    ├── frameworks
    │   ├── flows
    │   │   └── pipecat-flows.mdx
    │   └── rtvi
    │   │   ├── introduction.mdx
    │   │   ├── rtvi-observer.mdx
    │   │   └── rtvi-processor.mdx
    ├── introduction.mdx
    ├── links
    │   └── server-reference.mdx
    ├── pipeline
    │   ├── heartbeats.mdx
    │   ├── parallel-pipeline.mdx
    │   ├── pipeline-idle-detection.mdx
    │   ├── pipeline-params.mdx
    │   └── pipeline-task.mdx
    ├── services
    │   ├── analytics
    │   │   └── sentry.mdx
    │   ├── image-generation
    │   │   ├── fal.mdx
    │   │   ├── google-imagen.mdx
    │   │   └── openai.mdx
    │   ├── llm
    │   │   ├── anthropic.mdx
    │   │   ├── aws.mdx
    │   │   ├── azure.mdx
    │   │   ├── cerebras.mdx
    │   │   ├── deepseek.mdx
    │   │   ├── fireworks.mdx
    │   │   ├── gemini.mdx
    │   │   ├── google-vertex.mdx
    │   │   ├── grok.mdx
    │   │   ├── groq.mdx
    │   │   ├── nim.mdx
    │   │   ├── ollama.mdx
    │   │   ├── openai.mdx
    │   │   ├── openpipe.mdx
    │   │   ├── openrouter.mdx
    │   │   ├── perplexity.mdx
    │   │   ├── qwen.mdx
    │   │   ├── sambanova.mdx
    │   │   └── together.mdx
    │   ├── memory
    │   │   └── mem0.mdx
    │   ├── s2s
    │   │   ├── aws.mdx
    │   │   ├── gemini.mdx
    │   │   └── openai.mdx
    │   ├── serializers
    │   │   ├── exotel.mdx
    │   │   ├── introduction.mdx
    │   │   ├── plivo.mdx
    │   │   ├── telnyx.mdx
    │   │   └── twilio.mdx
    │   ├── stt
    │   │   ├── assemblyai.mdx
    │   │   ├── aws.mdx
    │   │   ├── azure.mdx
    │   │   ├── cartesia.mdx
    │   │   ├── deepgram.mdx
    │   │   ├── fal.mdx
    │   │   ├── gladia.mdx
    │   │   ├── google.mdx
    │   │   ├── groq.mdx
    │   │   ├── openai.mdx
    │   │   ├── riva.mdx
    │   │   ├── sambanova.mdx
    │   │   ├── ultravox.mdx
    │   │   └── whisper.mdx
    │   ├── supported-services.mdx
    │   ├── transport
    │   │   ├── daily.mdx
    │   │   ├── fastapi-websocket.mdx
    │   │   ├── small-webrtc.mdx
    │   │   └── websocket-server.mdx
    │   ├── tts
    │   │   ├── aws.mdx
    │   │   ├── azure.mdx
    │   │   ├── cartesia.mdx
    │   │   ├── deepgram.mdx
    │   │   ├── elevenlabs.mdx
    │   │   ├── fish.mdx
    │   │   ├── google.mdx
    │   │   ├── groq.mdx
    │   │   ├── lmnt.mdx
    │   │   ├── minimax.mdx
    │   │   ├── neuphonic.mdx
    │   │   ├── openai.mdx
    │   │   ├── piper.mdx
    │   │   ├── playht.mdx
    │   │   ├── rime.mdx
    │   │   ├── riva.mdx
    │   │   ├── sarvam.mdx
    │   │   └── xtts.mdx
    │   ├── video
    │   │   ├── simli.mdx
    │   │   └── tavus.mdx
    │   └── vision
    │   │   └── moondream.mdx
    └── utilities
    │   ├── audio
    │       ├── audio-buffer-processor.mdx
    │       ├── koala-filter.mdx
    │       ├── krisp-filter.mdx
    │       ├── noisereduce-filter.mdx
    │       ├── silero-vad-analyzer.mdx
    │       └── soundfile-mixer.mdx
    │   ├── daily
    │       └── rest-helpers.mdx
    │   ├── dtmf-aggregator.mdx
    │   ├── filters
    │       ├── frame-filter.mdx
    │       ├── function-filter.mdx
    │       ├── identify-filter.mdx
    │       ├── null-filter.mdx
    │       ├── stt-mute.mdx
    │       ├── wake-check-filter.mdx
    │       └── wake-notifier-filter.mdx
    │   ├── frame
    │       └── producer-consumer.mdx
    │   ├── interruption-strategies.mdx
    │   ├── mcp
    │       └── mcp.mdx
    │   ├── observers
    │       ├── debug-observer.mdx
    │       ├── llm-observer.mdx
    │       ├── observer-pattern.mdx
    │       ├── transcription-observer.mdx
    │       └── turn-tracking-observer.mdx
    │   ├── opentelemetry.mdx
    │   ├── serializers
    │       ├── introduction.mdx
    │       ├── plivo.mdx
    │       ├── telnyx.mdx
    │       └── twilio.mdx
    │   ├── smart-turn
    │       ├── fal-smart-turn.mdx
    │       ├── local-coreml-smart-turn.mdx
    │       └── smart-turn-overview.mdx
    │   ├── text
    │       ├── markdown-text-filter.mdx
    │       └── pattern-pair-aggregator.mdx
    │   ├── transcript-processor.mdx
    │   ├── user-idle-processor.mdx
    │   └── watchdog-timers.mdx
└── snippets
    └── snippet-intro.mdx


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | 


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "semi": true,
3 |   "tabWidth": 2,
4 |   "useTabs": false,
5 |   "singleQuote": false
6 | }
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <h1><div align="center">
 2 |   <img alt="pipecat" width="500px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/docs/main/pipecat-docs.png" />
 3 | </div></h1>
 4 | 
 5 | Welcome to the Pipecat documentation repository! This project contains the official [documentation](https://docs.pipecat.ai)] for the [Pipecat](https://github.com/pipecat-ai/pipecat) open-source project.
 6 | 
 7 | This repository is deployed on [docs.pipecat.ai](https://docs.pipecat.ai).
 8 | 
 9 | ## What is Pipecat
10 | 
11 | `pipecat` is a framework for building voice (and multimodal) conversational agents. Things like personal coaches, meeting assistants, [storytelling toys for kids](https://storytelling-chatbot.fly.dev/), customer support bots, [intake flows](https://www.youtube.com/watch?v=lDevgsp9vn0), and snarky social companions.
12 | 
13 | ## Documentation Structure
14 | 
15 | This repository is dedicated to maintaining up-to-date, high-quality documentation to support users and contributors. Here you’ll find:
16 | 
17 | - **User Guides**: Step-by-step instructions to get started with Pipecat.
18 | - **API Documentation**: Detailed API references.
19 | - **Tutorials**: Hands-on tutorials to help you automate your workflows.
20 | 
21 | ## Contributing to the Documentation
22 | 
23 | We welcome contributions of all kinds! Whether you're fixing a typo, adding a new section, or improving the readability of the existing content, your help is appreciated. Follow these steps to get involved:
24 | 
25 | 1. **Fork this repository**: Start by forking the Pipecat Documentation repository to your GitHub account.
26 | 
27 | 2. **Clone the repository**: Clone your forked repository to your local machine.
28 |    ```bash
29 |    git clone https://github.com/your-username/docs
30 |    ```
31 | 3. **Create a branch**: For your contribution, create a new branch.
32 |    ```bash
33 |    git checkout -b your-branch-name
34 |    ```
35 | 4. **Make your changes**: Edit or add files as necessary.
36 | 5. **Test your changes**: Ensure that your changes look correct and follow the style guide. Refer to **Development** section to test portal locally.
37 | 6. **Commit your changes**: Once you're satisfied with your changes, commit them with a meaningful message.
38 | 
39 | ```bash
40 | git commit -m "Description of your changes"
41 | ```
42 | 
43 | 7. **Push your changes**: Push your branch to your forked repository.
44 | 
45 | ```bash
46 | git push origin your-branch-name
47 | ```
48 | 
49 | 9. **Submit a Pull Request (PR)**: Open a PR from your forked repository to the main branch of this repo. Describe the changes you've made clearly.
50 | 
51 | Our maintainers will review your PR, and once everything is good, your contributions will be merged!
52 | 
53 | ### Development
54 | 
55 | The documentation is using [Mintlify](https://mintlify.com/) to render beautifuly. Mintlify comes with a set of [components](https://mintlify.com/docs/content/components/) to help you write better more interactive documentation.
56 | 
57 | Install the [Mintlify CLI](https://www.npmjs.com/package/mintlify) to preview the documentation changes locally. To install, use the following command
58 | 
59 | ```
60 | npm i -g mintlify
61 | ```
62 | 
63 | Run the following command at the root of your documentation (where mint.json is)
64 | 
65 | ```
66 | mintlify dev
67 | ```
68 | 
69 | Open `https://localhost:3000` in your browser and check your changes.
70 | 
71 | #### Troubleshooting
72 | 
73 | - Mintlify dev isn't running - Run `mintlify install` it'll re-install dependencies.
74 | - Page loads as a 404 - Make sure you are running in a folder with `mint.json`
75 | 


--------------------------------------------------------------------------------
/client/android/api-reference.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "API Reference"
3 | url: "https://docs-android.rtvi.ai/"
4 | ---
5 | 


--------------------------------------------------------------------------------
/client/android/introduction.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "SDK Introduction"
 3 | description: "Build Android applications with Pipecat's Kotlin client library"
 4 | ---
 5 | 
 6 | The Pipecat Android SDK provides a Kotlin implementation for building voice and multimodal AI applications on Android. It handles:
 7 | 
 8 | - Real-time audio and video streaming
 9 | - Bot communication and state management
10 | - Media device handling
11 | - Configuration management
12 | - Event handling
13 | 
14 | ## Installation
15 | 
16 | Add the dependency for your chosen transport to your `build.gradle` file. For example, to use the Daily transport:
17 | 
18 | ```gradle
19 | implementation "ai.pipecat:daily-transport:0.3.3"
20 | ```
21 | 
22 | ## Example
23 | 
24 | Here's a simple example using Daily as the transport layer. Note that the `clientConfig` is optional and depends
25 | on what is required by the bot backend.
26 | 
27 | ```kotlin
28 | val clientConfig = listOf(
29 |     ServiceConfig(
30 |         service = "llm",
31 |         options = listOf(
32 |             Option("model", "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"),
33 |             Option("messages", Value.Array(
34 |                 Value.Object(
35 |                     "role" to Value.Str("system"),
36 |                     "content" to Value.Str("You are a helpful assistant.")
37 |                 )
38 |             ))
39 |         )
40 |     ),
41 |     ServiceConfig(
42 |         service = "tts",
43 |         options = listOf(
44 |             Option("voice", "79a125e8-cd45-4c13-8a67-188112f4dd22")
45 |         )
46 |     )
47 | )
48 | 
49 | val callbacks = object : RTVIEventCallbacks() {
50 |     override fun onBackendError(message: String) {
51 |         Log.e(TAG, "Error from backend: $message")
52 |     }
53 | }
54 | 
55 | val options = RTVIClientOptions(
56 |     services = listOf(ServiceRegistration("llm", "together"), ServiceRegistration("tts", "cartesia")),
57 |     params = RTVIClientParams(baseUrl = "<your API url>", config = clientConfig)
58 | )
59 | 
60 | val client = RTVIClient(DailyTransport.Factory(context), callbacks, options)
61 | client.connect().await() // Using Coroutines
62 | 
63 | // Or using callbacks:
64 | // client.start().withCallback { /* handle completion */ }
65 | ```
66 | 
67 | ## Documentation
68 | 
69 | <CardGroup cols={2}>
70 |   <Card
71 |     title="API Reference"
72 |     icon="book"
73 |     href="https://docs-android.rtvi.ai/"
74 |   >
75 |     Complete SDK API documentation
76 |   </Card>
77 | 
78 |   <Card title="Daily Transport" icon="network-wired" href="./transports/daily">
79 |     WebRTC implementation using Daily
80 |   </Card>
81 | </CardGroup>
82 | 


--------------------------------------------------------------------------------
/client/android/transports/daily.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Daily WebRTC Transport"
 3 | description: "WebRTC implementation for Android using Daily"
 4 | ---
 5 | 
 6 | The Daily transport implementation enables real-time audio and video communication in your Pipecat Android applications using [Daily's](https://www.daily.co/) WebRTC infrastructure.
 7 | 
 8 | ## Installation
 9 | 
10 | Add the Daily transport dependency to your `build.gradle`:
11 | 
12 | ```gradle
13 | implementation "ai.pipecat:daily-transport:0.3.7"
14 | ```
15 | 
16 | ## Usage
17 | 
18 | Create a client using the Daily transport:
19 | 
20 | ```kotlin
21 | val callbacks = object : RTVIEventCallbacks() {
22 |     override fun onBackendError(message: String) {
23 |         Log.e(TAG, "Error from backend: $message")
24 |     }
25 | }
26 | 
27 | val options = RTVIClientOptions(
28 |     services = listOf(ServiceRegistration("llm", "together"), ServiceRegistration("tts", "cartesia")),
29 |     params = RTVIClientParams(baseUrl = "<your API url>", config = clientConfig)
30 | )
31 | 
32 | val client = RTVIClient(DailyTransport.Factory(context), callbacks, options)
33 | client.connect().await()
34 | ```
35 | 
36 | ## Configuration
37 | 
38 | Your server endpoint should return Daily-specific configuration:
39 | 
40 | ```json
41 | {
42 |   "url": "https://your-domain.daily.co/room-name",
43 |   "token": "your-daily-token"
44 | }
45 | ```
46 | 
47 | ## Resources
48 | 
49 | <CardGroup cols={2}>
50 |     <Card
51 |         horizontal
52 |         title="Demo"
53 |         icon="play"
54 |         href="https://github.com/pipecat-ai/pipecat/tree/main/examples/simple-chatbot/client/android"
55 |     >
56 |         Simple Chatbot Demo
57 |     </Card>
58 | 
59 |     <Card
60 |         horizontal
61 |         title="Source"
62 |         icon="github"
63 |         href="https://github.com/pipecat-ai/pipecat-client-android-transports/"
64 |     >
65 |         Client Transports
66 |     </Card>
67 | </CardGroup>
68 | 
69 | <Card
70 |     title="Daily Transport Reference"
71 |     icon="book-open"
72 |     href="https://docs-android.rtvi.ai/"
73 | >
74 |   Complete API documentation for the Daily transport implementation
75 | </Card>
76 | 


--------------------------------------------------------------------------------
/client/android/transports/gemini-websocket.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Gemini Live Websocket Transport"
 3 | description: "Websocket implementation for Android using Gemini"
 4 | ---
 5 | 
 6 | The Gemini Live Websocket transport implementation enables real-time audio communication with the Gemini Multimodal Live service, using a direct websocket connection.
 7 | 
 8 | <Note>
 9 |   Transports of this type are designed primarily for development and testing
10 |   purposes. For production applications, you will need to build a server
11 |   component with a server-friendly transport, like the
12 |   [DailyTransport](./daily), to securely handle API keys.
13 | </Note>
14 | 
15 | ## Installation
16 | 
17 | Add the transport dependency to your `build.gradle`:
18 | 
19 | ```gradle
20 | implementation "ai.pipecat:gemini-live-websocket-transport:0.3.7"
21 | ```
22 | 
23 | ## Usage
24 | 
25 | Create a client:
26 | 
27 | ```kotlin
28 | val transport = GeminiLiveWebsocketTransport.Factory(context)
29 | 
30 | val options = RTVIClientOptions(
31 |     params = RTVIClientParams(
32 |         baseUrl = null,
33 |         config = GeminiLiveWebsocketTransport.buildConfig(
34 |             apiKey = "<your Gemini api key>",
35 |             generationConfig = Value.Object(
36 |                 "speech_config" to Value.Object(
37 |                     "voice_config" to Value.Object(
38 |                         "prebuilt_voice_config" to Value.Object(
39 |                             "voice_name" to Value.Str("Puck")
40 |                         )
41 |                     )
42 |                 )
43 |             ),
44 |             initialUserMessage = "How tall is the Eiffel Tower?"
45 |         )
46 |     )
47 | )
48 | 
49 | val client = RTVIClient(transport, callbacks, options)
50 | 
51 | client.start().withCallback {
52 |     // ...
53 | }
54 | ```
55 | 
56 | ## Resources
57 | 
58 | <CardGroup cols={2}>
59 |   <Card
60 |     horizontal
61 |     title="Demo"
62 |     icon="play"
63 |     href="https://github.com/pipecat-ai/pipecat-client-android-gemini-live-websocket-demo"
64 |   >
65 |     Simple Chatbot Demo
66 |   </Card>
67 | 
68 |   <Card
69 |     horizontal
70 |     title="Source"
71 |     icon="github"
72 |     href="https://github.com/pipecat-ai/pipecat-client-android-transports/"
73 |   >
74 |     Client Transports
75 |   </Card>
76 | </CardGroup>
77 | 
78 | <Card
79 |   title="Pipecat Android Client Reference"
80 |   icon="book-open"
81 |   href="https://docs-android.rtvi.ai/"
82 | >
83 |   Complete API documentation for the Pipecat Android client.
84 | </Card>
85 | 


--------------------------------------------------------------------------------
/client/android/transports/openai-webrtc.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "OpenAI Realtime WebRTC Transport"
 3 | description: "WebRTC implementation for Android using OpenAI"
 4 | ---
 5 | 
 6 | The OpenAI Realtime WebRTC transport implementation enables real-time audio communication with the OpenAI Realtime service, using a direct WebRTC connection.
 7 | 
 8 | ## Installation
 9 | 
10 | Add the transport dependency to your `build.gradle`:
11 | 
12 | ```gradle
13 | implementation "ai.pipecat:openai-realtime-webrtc-transport:0.3.7"
14 | ```
15 | 
16 | ## Usage
17 | 
18 | Create a client:
19 | 
20 | ```kotlin
21 | val transport = OpenAIRealtimeWebRTCTransport.Factory(context)
22 | 
23 | val options = RTVIClientOptions(
24 |     params = RTVIClientParams(
25 |         baseUrl = null,
26 |         config = OpenAIRealtimeWebRTCTransport.buildConfig(
27 |             apiKey = apiKey,
28 |             initialMessages = listOf(
29 |                 LLMContextMessage(role = "user", content = "How tall is the Eiffel Tower?")
30 |             ),
31 |             initialConfig = OpenAIRealtimeSessionConfig(
32 |                 voice = "ballad",
33 |                 turnDetection = Value.Object("type" to Value.Str("semantic_vad")),
34 |                 inputAudioNoiseReduction = Value.Object("type" to Value.Str("near_field")),
35 |                 inputAudioTranscription = Value.Object("model" to Value.Str("gpt-4o-transcribe"))
36 |             )
37 |         )
38 |     )
39 | )
40 | 
41 | val client = RTVIClient(transport, callbacks, options)
42 | 
43 | client.start().withCallback {
44 |     // ...
45 | }
46 | ```
47 | 
48 | ## Resources
49 | 
50 | <CardGroup cols={2}>
51 |   <Card
52 |     horizontal
53 |     title="Demo"
54 |     icon="play"
55 |     href="https://github.com/pipecat-ai/pipecat-client-android-openai-realtime-webrtc-demo"
56 |   >
57 |     Simple Chatbot Demo
58 |   </Card>
59 | 
60 |   <Card
61 |     horizontal
62 |     title="Source"
63 |     icon="github"
64 |     href="https://github.com/pipecat-ai/pipecat-client-android-transports/"
65 |   >
66 |     Client Transports
67 |   </Card>
68 | </CardGroup>
69 | 
70 | <Card
71 |   title="Pipecat Android Client Reference"
72 |   icon="book-open"
73 |   href="https://docs-android.rtvi.ai/"
74 | >
75 |   Complete API documentation for the Pipecat Android client.
76 | </Card>
77 | 


--------------------------------------------------------------------------------
/client/android/transports/small-webrtc.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Small WebRTC Transport"
 3 | description: "WebRTC implementation for Android"
 4 | ---
 5 | 
 6 | The Small WebRTC transport implementation enables real-time audio communication with the Small WebRTC Pipecat transport, using a direct WebRTC connection.
 7 | 
 8 | ## Installation
 9 | 
10 | Add the transport dependency to your `build.gradle`:
11 | 
12 | ```gradle
13 | implementation "ai.pipecat:small-webrtc-transport:0.3.7"
14 | ```
15 | 
16 | ## Usage
17 | 
18 | Create a client:
19 | 
20 | ```kotlin
21 | val transport = SmallWebRTCTransport.Factory(context, baseUrl)
22 | 
23 | val options = RTVIClientOptions(
24 |     params = RTVIClientParams(baseUrl = null),
25 |     enableMic = true,
26 |     enableCam = true
27 | )
28 | 
29 | val client = RTVIClient(transport, callbacks, options)
30 | 
31 | client.start().withCallback {
32 |     // ...
33 | }
34 | ```
35 | 
36 | ## Resources
37 | 
38 | <CardGroup cols={2}>
39 |   <Card
40 |     horizontal
41 |     title="Demo"
42 |     icon="play"
43 |     href="https://github.com/pipecat-ai/pipecat/tree/main/examples/p2p-webrtc/video-transform/client/android"
44 |   >
45 |     Demo App
46 |   </Card>
47 | 
48 |   <Card
49 |     horizontal
50 |     title="Source"
51 |     icon="github"
52 |     href="https://github.com/pipecat-ai/pipecat-client-android-transports/"
53 |   >
54 |     Client Transports
55 |   </Card>
56 | </CardGroup>
57 | 
58 | <Card
59 |   title="Pipecat Android Client Reference"
60 |   icon="book-open"
61 |   href="https://docs-android.rtvi.ai/"
62 | >
63 |   Complete API documentation for the Pipecat Android client.
64 | </Card>
65 | 


--------------------------------------------------------------------------------
/client/c++/api-reference.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "API Reference"
3 | url: "https://github.com/pipecat-ai/pipecat-client-cxx"
4 | ---
5 | 


--------------------------------------------------------------------------------
/client/c++/introduction.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "SDK Introduction"
 3 | description: "Build native applications with Pipecat’s C++ client library"
 4 | ---
 5 | 
 6 | The Pipecat C++ SDK provides a native implementation for building voice and multimodal AI applications. It supports:
 7 | 
 8 | - Linux (`x86_64` and `aarch64`)
 9 | - macOS (`aarch64`)
10 | - Windows (`x86_64`)
11 | 
12 | ## Dependencies
13 | 
14 | ### libcurl
15 | 
16 | The SDK uses [libcurl](https://curl.se/libcurl/) for HTTP requests.
17 | 
18 | <Tabs>
19 |   <Tab title="Linux">
20 | 
21 | ```bash
22 | sudo apt-get install libcurl4-openssl-dev
23 | ```
24 | 
25 |     </Tab>
26 |   <Tab title="macOS">
27 | 
28 | On macOS `libcurl` is already included so there is nothing to install.
29 | 
30 |   </Tab>
31 |   <Tab title="Windows">
32 | 
33 | On Windows we use [vcpkg](https://vcpkg.io/en/) to install dependencies. You
34 | need to set it up following one of the
35 | [tutorials](https://learn.microsoft.com/en-us/vcpkg/get_started/get-started).
36 | 
37 | The `libcurl` dependency will be automatically downloaded when building.
38 | 
39 |   </Tab>
40 | </Tabs>
41 | 
42 | ## Installation
43 | 
44 | Build the SDK using CMake:
45 | 
46 | <Tabs>
47 |   <Tab title="Linux/macOS">
48 |     ```bash
49 |     cmake . -G Ninja -Bbuild -DCMAKE_BUILD_TYPE=Release
50 |     ninja -C build
51 |     ```
52 |   </Tab>
53 |   <Tab title="Windows">
54 | 
55 |     ```bash
56 |     # Initialize Visual Studio environment
57 |     "C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC\Auxiliary\Build\vcvarsall.bat" amd64
58 |     # Configure and build
59 |     cmake . -Bbuild --preset vcpkg
60 |     cmake --buildbuild --config Release
61 |     ```
62 | 
63 |   </Tab>
64 | </Tabs>
65 | 
66 | ### Cross-compilation
67 | 
68 | For Linux aarch64:
69 | 
70 | ```bash
71 | cmake . -G Ninja -Bbuild -DCMAKE_TOOLCHAIN_FILE=aarch64-linux-toolchain.cmake -DCMAKE_BUILD_TYPE=Release
72 | ninja -C build
73 | ```
74 | 
75 | ## Documentation
76 | 
77 | <CardGroup cols={2}>
78 |   <Card
79 |     title="API Reference"
80 |     icon="book"
81 |     href="https://github.com/pipecat-ai/pipecat-client-cxx"
82 |   >
83 |     Complete SDK API documentation
84 |   </Card>
85 | 
86 |   <Card title="Daily Transport" icon="network-wired" href="./transport">
87 |     WebRTC implementation using Daily
88 |   </Card>
89 | </CardGroup>
90 | 


--------------------------------------------------------------------------------
/client/c++/transport.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Daily WebRTC Transport"
 3 | description: "WebRTC implementation for C++ using Daily"
 4 | ---
 5 | 
 6 | The Daily transport implementation enables real-time audio and video communication in your Pipecat C++ applications using [Daily's](https://www.daily.co/) WebRTC infrastructure.
 7 | 
 8 | ## Dependencies
 9 | 
10 | ### Daily Core C++ SDK
11 | 
12 | Download the [Daily Core C++ SDK](https://github.com/daily-co/daily-core-sdk) from the [available releases](https://github.com/daily-co/daily-core-sdk/releases) for your platform and set:
13 | 
14 | ```bash
15 | export DAILY_CORE_PATH=/path/to/daily-core-sdk
16 | ```
17 | 
18 | ### Pipecat C++ SDK
19 | 
20 | Build the base [Pipecat C++ SDK](https://github.com/pipecat-ai/pipecat-client-cxx-daily) first and set:
21 | 
22 | ```bash
23 | export PIPECAT_SDK_PATH=/path/to/pipecat-client-cxx
24 | ```
25 | 
26 | ## Building
27 | 
28 | First, set a few environment variables:
29 | 
30 | ```bash
31 | PIPECAT_SDK_PATH=/path/to/pipecat-client-cxx
32 | DAILY_CORE_PATH=/path/to/daily-core-sdk
33 | ```
34 | 
35 | Then, build the project:
36 | 
37 | <Tabs>
38 |   <Tab title="Linux/macOS">
39 | 
40 |     ```bash
41 |     cmake . -G Ninja -Bbuild -DCMAKE_BUILD_TYPE=Release
42 |     ninja -C build
43 |     ```
44 | 
45 |   </Tab>
46 |   <Tab title="Windows">
47 | 
48 |     ```bash
49 |     # Initialize Visual Studio environment
50 |     "C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC\Auxiliary\Build\vcvarsall.bat" amd64
51 | 
52 |     # Configure and build
53 |     cmake . -Bbuild --preset vcpkg
54 |     cmake --build build --config Release
55 |     ```
56 | 
57 |   </Tab>
58 | </Tabs>
59 | 
60 | ## Examples
61 | 
62 | <CardGroup cols={3}>
63 |   <Card
64 |     title="Basic Client"
65 |     icon="code"
66 |     href="https://github.com/pipecat-ai/pipecat-client-cxx-daily/blob/main/examples/c++"
67 |   >
68 |     Simple C++ implementation example
69 |   </Card>
70 | 
71 | {" "}
72 | 
73 | <Card
74 |   title="Audio Client"
75 |   icon="waveform"
76 |   href="https://github.com/pipecat-ai/pipecat-client-cxx-daily/blob/main/examples/c++-portaudio"
77 | >
78 |   C++ client with PortAudio support
79 | </Card>
80 | 
81 |   <Card
82 |     title="Node.js Server"
83 |     icon="server"
84 |     href="https://github.com/pipecat-ai/pipecat-client-cxx-daily/blob/main/examples/server"
85 |   >
86 |     Example Node.js proxy implementation
87 |   </Card>
88 | </CardGroup>
89 | 


--------------------------------------------------------------------------------
/client/ios/api-reference.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "API Reference"
3 | url: "https://docs-ios.pipecat.ai/"
4 | ---
5 | 


--------------------------------------------------------------------------------
/client/ios/introduction.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "SDK Introduction"
  3 | description: "Build iOS applications with Pipecat’s Swift client library"
  4 | ---
  5 | 
  6 | The Pipecat iOS SDK provides a Swift implementation for building voice and multimodal AI applications on iOS. It handles:
  7 | 
  8 | - Real-time audio streaming
  9 | - Bot communication and state management
 10 | - Media device handling
 11 | - Configuration management
 12 | - Event handling
 13 | 
 14 | ## Installation
 15 | 
 16 | Add the SDK to your project using Swift Package Manager:
 17 | 
 18 | ```swift
 19 | // Core SDK
 20 | .package(url: "https://github.com/pipecat-ai/pipecat-client-ios.git", from: "0.3.0"),
 21 | 
 22 | // Daily transport implementation
 23 | .package(url: "https://github.com/pipecat-ai/pipecat-client-ios-daily.git", from: "0.3.0"),
 24 | ```
 25 | 
 26 | Then add the dependencies to your target:
 27 | 
 28 | ```swift
 29 | .target(name: "YourApp", dependencies: [
 30 |     .product(name: "PipecatClientIOS", package: "pipecat-client-ios")
 31 |     .product(name: "PipecatClientIOSDaily", package: "pipecat-client-ios-daily")
 32 | ]),
 33 | ```
 34 | 
 35 | ## Example
 36 | 
 37 | Here's a simple example using Daily as the transport layer:
 38 | 
 39 | ```swift
 40 | import PipecatClientIOS
 41 | import PipecatClientIOSDaily
 42 | 
 43 | let clientConfig = [
 44 |     ServiceConfig(
 45 |         service: "llm",
 46 |         options: [
 47 |             Option(name: "model", value: .string("meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo")),
 48 |             Option(name: "messages", value: .array([
 49 |                 .object([
 50 |                     "role" : .string("system"),
 51 |                     "content": .string("You are a helpful assistant.")
 52 |                 ])
 53 |             ]))
 54 |         ]
 55 |     ),
 56 |     ServiceConfig(
 57 |         service: "tts",
 58 |         options: [
 59 |             Option(name: "voice", value: .string("79a125e8-cd45-4c13-8a67-188112f4dd22"))
 60 |         ]
 61 |     )
 62 | ]
 63 | 
 64 | let options = RTVIClientOptions.init(
 65 |     enableMic: true,
 66 |     params: RTVIClientParams(
 67 |         baseUrl: $PIPECAT_API_URL,
 68 |         config: clientConfig
 69 |     )
 70 | )
 71 | 
 72 | let client = RTVIClient.init(
 73 |     transport: DailyTransport.init(options: configOptions),
 74 |     options: configOptions
 75 | )
 76 | try await client.start()
 77 | ```
 78 | 
 79 | ## Documentation
 80 | 
 81 | <CardGroup cols={2}>
 82 |   <Card
 83 |     title="API Reference"
 84 |     icon="book"
 85 |     href="https://docs-ios.pipecat.ai/"
 86 |   >
 87 |     Complete SDK API documentation
 88 |   </Card>
 89 |   <Card
 90 |     horizontal
 91 |     title="Source"
 92 |     icon="github"
 93 |     href="https://github.com/pipecat-ai/pipecat-client-ios/"
 94 |   >
 95 |     Pipecat Client iOS
 96 |   </Card>
 97 | </CardGroup>
 98 | <CardGroup cols={2}>
 99 |   <Card
100 |     horizontal
101 |     title="Demo"
102 |     icon="play"
103 |     href="https://github.com/pipecat-ai/pipecat/tree/main/examples/simple-chatbot/client/ios"
104 |   >
105 |     Simple Chatbot Demo
106 |   </Card>
107 |   <Card
108 |     title="Daily Transport"
109 |     icon="network-wired"
110 |     href="./transports/daily"
111 |   >
112 |     WebRTC implementation using Daily
113 |   </Card>
114 | </CardGroup>
115 | 


--------------------------------------------------------------------------------
/client/ios/transports/daily.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Daily WebRTC Transport"
 3 | description: "WebRTC implementation for iOS using Daily"
 4 | ---
 5 | 
 6 | The Daily transport implementation enables real-time audio and video communication in your Pipecat iOS applications using [Daily's](https://www.daily.co/) WebRTC infrastructure.
 7 | 
 8 | ## Installation
 9 | 
10 | Add the Daily transport package to your project:
11 | 
12 | ```swift
13 | .package(url: "https://github.com/pipecat-ai/pipecat-client-ios-daily.git", from: "0.3.0")
14 | 
15 | // Add to your target dependencies
16 | .target(name: "YourApp", dependencies: [
17 |     .product(name: "PipecatClientIOSDaily", package: "pipecat-client-ios-daily")
18 | ])
19 | ```
20 | 
21 | ## Usage
22 | 
23 | Create a client using the Daily transport:
24 | 
25 | ```swift
26 | import PipecatClientIOS
27 | import PipecatClientIOSDaily
28 | 
29 | let configOptions = RTVIClientOptions.init(
30 |     enableMic: true,
31 |     params: RTVIClientParams(
32 |         baseUrl: $PIPECAT_API_URL
33 |     )
34 | )
35 | 
36 | let client = RTVIClient.init(
37 |     transport: DailyTransport.init(options: configOptions),
38 |     options: configOptions
39 | )
40 | 
41 | try await client.start()
42 | ```
43 | 
44 | ## Configuration
45 | 
46 | Your server endpoint should return Daily-specific configuration:
47 | 
48 | ```swift
49 | // Example server response
50 | {
51 |     "url": "https://your-domain.daily.co/room-name",
52 |     "token": "your-daily-token"
53 | }
54 | ```
55 | 
56 | ## API Reference
57 | 
58 | <CardGroup cols={2}>
59 |   <Card
60 |     horizontal
61 |     title="Demo"
62 |     icon="play"
63 |     href="https://github.com/pipecat-ai/pipecat/tree/main/examples/simple-chatbot/client/ios"
64 |   >
65 |     Simple Chatbot Demo
66 |   </Card>
67 | 
68 |   <Card
69 |     horizontal
70 |     title="Source"
71 |     icon="github"
72 |     href="https://github.com/pipecat-ai/pipecat-client-ios-daily/"
73 |   >
74 |     Daily Transport
75 |   </Card>
76 | </CardGroup>
77 | <Card
78 |   title="Daily Transport Reference"
79 |   icon="book-open"
80 |   href="https://docs-ios.pipecat.ai/PipecatClientIOSDaily/documentation/pipecatclientiosdaily"
81 | >
82 |   Complete API documentation for the Daily transport implementation
83 | </Card>
84 | 


--------------------------------------------------------------------------------
/client/ios/transports/gemini-websocket.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Gemini Live Websocket Transport"
 3 | description: "Websocket implementation for iOS using Gemini"
 4 | ---
 5 | 
 6 | The Gemini Live Websocket transport implementation enables real-time audio communication with the Gemini Multimodal Live service, using a direct websocket connection.
 7 | 
 8 | <Note>
 9 |   Transports of this type are designed primarily for development and testing
10 |   purposes. For production applications, you will need to build a server
11 |   component with a server-friendly transport, like the
12 |   [DailyTransport](./daily), to securely handle API keys.
13 | </Note>
14 | 
15 | ## Installation
16 | 
17 | Add the Gemini transport package to your project:
18 | 
19 | ```swift
20 | .package(url: "https://github.com/pipecat-ai/pipecat-client-ios-gemini-live-websocket.git", from: "0.3.1"),
21 | 
22 | // Add to your target dependencies
23 | .target(name: "YourApp", dependencies: [
24 |     .product(name: "PipecatClientIOSGeminiLiveWebSocket", package: "pipecat-client-ios-gemini-live-websocket")
25 | ],
26 | ```
27 | 
28 | ## Usage
29 | 
30 | Create a client:
31 | 
32 | ```swift
33 | let options: RTVIClientOptions = .init(
34 |     params: .init(config: [
35 |         .init(
36 |             service: "llm",
37 |             options: [
38 |                 .init(name: "api_key", value: .string("<your Gemini api key>")),
39 |                 .init(name: "initial_messages", value: .array([
40 |                     .object([
41 |                         "role": .string("user"), // "user" | "system"
42 |                         "content": .string("I need your help planning my next vacation.")
43 |                     ])
44 |                 ])),
45 |                 .init(name: "generation_config", value: .object([
46 |                     "speech_config": .object([
47 |                         "voice_config": .object([
48 |                             "prebuilt_voice_config": .object([
49 |                                 "voice_name": .string("Puck") // "Puck" | "Charon" | "Kore" | "Fenrir" | "Aoede"
50 |                             ])
51 |                         ])
52 |                     ])
53 |                 ]))
54 |             ]
55 |         )
56 |     ])
57 | )
58 | 
59 | let client = GeminiLiveWebSocketVoiceClient(options: options)
60 | 
61 | try await client.start()
62 | ```
63 | 
64 | ## API Reference
65 | 
66 | <CardGroup cols={2}>
67 |   <Card
68 |     horizontal
69 |     title="Demo"
70 |     icon="play"
71 |     href="https://github.com/pipecat-ai/pipecat-client-ios-gemini-live-websocket-demo"
72 |   >
73 |     Simple Chatbot Gemini Demo
74 |   </Card>
75 | 
76 |   <Card
77 |     horizontal
78 |     title="Source"
79 |     icon="github"
80 |     href="https://github.com/pipecat-ai/pipecat-client-ios-gemini-live-websocket"
81 |   >
82 |     iOS Gemini Live WebSocket
83 |   </Card>
84 | </CardGroup>
85 | <Card
86 |   title="Pipecat iOS Client Reference"
87 |   icon="book-open"
88 |   href="https://docs-ios.pipecat.ai/PipecatClientIOSGeminiLiveWebSocket/documentation/pipecatclientiosgeminilivewebsocket"
89 | >
90 |   Complete API documentation for the Gemini transport implementation
91 | </Card>
92 | 


--------------------------------------------------------------------------------
/client/ios/transports/openai-webrtc.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "OpenAIRealTimeWebRTCTransport"
 3 | ---
 4 | 
 5 | ## Overview
 6 | 
 7 | The OpenAI Realtime WebRTC transport implementation enables real-time audio communication directly with the [OpenAI Realtime API using WebRTC](https://platform.openai.com/docs/guides/realtime-webrtc) voice-to-voice service.
 8 | It handles media device management, audio/video streams, and state management for the connection.
 9 | 
10 | ## Installation
11 | 
12 | Add the OpenAI transport package to your project:
13 | 
14 | ```swift
15 | .package(url: "https://github.com/pipecat-ai/pipecat-client-ios-openai-realtime.git", from: "0.0.1"),
16 | 
17 | // Add to your target dependencies
18 | .target(name: "YourApp", dependencies: [
19 |     .product(name: "PipecatClientIOSOpenAIRealtimeWebrtc", package: "pipecat-client-ios-openai-realtime")
20 | ],
21 | ```
22 | 
23 | ## Usage
24 | 
25 | Create a client:
26 | 
27 | ```swift
28 | let rtviClientOptions = RTVIClientOptions.init(
29 |     enableMic: currentSettings.enableMic,
30 |     enableCam: false,
31 |     params: .init(config: [
32 |         .init(
33 |             service: "llm",
34 |             options: [
35 |                 .init(name: "api_key", value: .string(openaiAPIKey)),
36 |                 .init(name: "initial_messages", value: .array([
37 |                     .object([
38 |                         "role": .string("user"), // "user" | "system"
39 |                         "content": .string("Start by introducing yourself.")
40 |                     ])
41 |                 ])),
42 |                 .init(name: "session_config", value: .object([
43 |                     "instructions": .string("You are Chatbot, a friendly and helpful assistant who provides useful information, including weather updates."),
44 |                     "voice": .string("echo"),
45 |                     "input_audio_noise_reduction": .object([
46 |                         "type": .string("near_field")
47 |                     ]),
48 |                     "turn_detection": .object([
49 |                         "type": .string("semantic_vad")
50 |                     ])
51 |                 ])),
52 |             ]
53 |         )
54 |     ])
55 | )
56 | self.rtviClientIOS = RTVIClient.init(
57 |     transport: OpenAIRealtimeTransport.init(options: rtviClientOptions),
58 |     options: rtviClientOptions
59 | )
60 | try await rtviClientIOS.start()
61 | ```
62 | 
63 | <Note type="warning">
64 |   Currently, invalid session configurations will result in the OpenAI connection
65 |   being failed.
66 | </Note>
67 | 
68 | ## API Reference
69 | 
70 | <CardGroup cols={2}>
71 |   <Card
72 |     horizontal
73 |     title="Demo"
74 |     icon="play"
75 |     href="https://github.com/pipecat-ai/pipecat-client-ios-openai-realtime-webrtc-demo"
76 |   >
77 |     Simple Chatbot OpenAI Demo
78 |   </Card>
79 | 
80 |   <Card
81 |     horizontal
82 |     title="Source"
83 |     icon="github"
84 |     href="https://github.com/pipecat-ai/pipecat-client-ios-openai-realtime-webrtc"
85 |   >
86 |     iOS OpenAI Realtime WebRTC
87 |   </Card>
88 | </CardGroup>
89 | <Card
90 |   title="Pipecat iOS Client Reference"
91 |   icon="book-open"
92 |   href="https://docs-ios.pipecat.ai/PipecatClientIOSOpenAIRealtimeWebrtc/documentation/pipecatclientiosopenairealtimewebrtc"
93 | >
94 |   Complete API documentation for the OpenAI transport implementation
95 | </Card>
96 | 


--------------------------------------------------------------------------------
/client/js/api-reference/configuration.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Configuration"
 3 | description: "Passing service configuration values to a bot."
 4 | ---
 5 | 
 6 | Pipecat bots expose [services](./services) and service configuration options to clients. Your client config can be set when initializing your bot or at runtime. The configuration follows the RTVI standard for consistency and compatibility.
 7 | 
 8 | A typical bot config, in JSON, might look like this:
 9 | 
10 | ```json
11 | [
12 |   {
13 |     "service": "vad",
14 |     "options": [{ "name": "params", "value": { "stop_secs": 3.0 } }]
15 |   },
16 |   {
17 |     "service": "tts",
18 |     "options": [
19 |       { "name": "voice", "value": "79a125e8-cd45-4c13-8a67-188112f4dd22" }
20 |     ]
21 |   },
22 |   {
23 |     "service": "llm",
24 |     "options": [
25 |       {
26 |         "name": "model",
27 |         "value": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
28 |       },
29 |       {
30 |         "name": "initial_messages",
31 |         "value": [
32 |           {
33 |             "role": "system",
34 |             "content": `You are a assistant called ExampleBot. You can ask me anything.
35 |               Keep responses brief and legible.
36 |               Your responses will converted to audio. Please do not include any special characters in your response other than '!' or '?'.
37 |               Start by briefly introducing yourself.`
38 |           }
39 |         ]
40 |       },
41 |       { "name": "run_on_config", "value": true }
42 |     ]
43 |   }
44 | ]
45 | ```
46 | 
47 | ## Client-side configuration
48 | 
49 | You can pass a config into the `RTVIClient` params properties on the [constructor](./client-constructor).
50 | Passing a config from the client is optional. A bot will always start with a default config if no config is passed from the client.
51 | Some RTVI implementations may also choose to ignore configs passed from the client, for security or other reasons.
52 | 
53 | ### Working with the RTVI config array
54 | 
55 | RTVI config is defined as a list because order matters; configurations are applied sequentially on your bot pipeline.
56 | For example, to configure a TTS service with a new voice and an LLM service with new prompting, specify the TTS first to ensure the voice is applied before the prompting.
57 | This ordering principle also applies to service options, ensuring deterministic outcomes for all RTVI implementations.
58 | 
59 | RTVI clients instances expose various methods for working with config arrays which you can read about [here](./client-methods#bot-configuration).
60 | 
61 | ## Server-side configuration
62 | 
63 | Platforms implementing RTVI on the server side will generally provide a method for passing a config into a `connect` endpoint. It's a good practice to use the same config format for both client-side and server-side configuration, though of course this choice is left up to the implementor of the server-side APIs.
64 | 
65 | ### Setting service API keys
66 | 
67 | It's important to note that API keys should never be included in configuration messages from or to clients. Clients shouldn't have access to API keys at all.
68 | 
69 | Platforms implementing RTVI should use a separate mechanism for passing API keys to a bot. A typical approach is to connect to a bot with a larger, "meta config" that includes API keys, a list of services the bot should instantiate, the client-visible bot configuration, and perhaps other fields like the maximum session duration.
70 | 
71 | For example:
72 | 
73 | ```javascript
74 | const bot_connect_rest_api_payload = {
75 |   api_keys: api_keys_map_for_env
76 |   max_duration: duration_in_seconds
77 |   services: [{ llm: "together", tts: "cartesia" }]
78 |   config: config_passed_from_client
79 | };
80 | ```
81 | 


--------------------------------------------------------------------------------
/client/js/api-reference/errors.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Errors"
 3 | ---
 4 | 
 5 | <ParamField path="RTVIError">
 6 |   Base `RTVIClient` error type. `status:number`: unique identifier (or HTTP code
 7 |   if applicable) `message:string`: explanation of error
 8 | </ParamField>
 9 | 
10 | <ParamField path="ConnectionTimeoutError">
11 |   Bot did not enter a ready state within the specified `timeout` constructor
12 |   param.
13 | </ParamField>
14 | 
15 | <ParamField path="StartBotError">
16 |   Invalid response from the provided `params.baseUrl` route. This may be due to the base URL being unavailable, or failure to parse the provided configuration.
17 |   Any error text returned by the base URL endpoint will be referenced in the `message` property.
18 | 
19 | - `status:number` returned HTTP status code
20 | - `message:string` Verbose error message (if provided via `info` response)
21 | - `error:string` Error type (defaults to `invalid-request-error`)
22 | 
23 | </ParamField>
24 | 
25 | <ParamField path="TransportStartError">
26 |   Transport was not able to connect. Check the auth bundle returned by the
27 |   baseUrl is valid.
28 | </ParamField>
29 | 
30 | <ParamField path="BotNotReadyError">
31 |   Client attempted an action or method that requires the bot to be in a ready
32 |   state. You must call `connect` first and wait for the bot to be ready.
33 | </ParamField>
34 | 
35 | <ParamField path="ConfigUpdateError">
36 |   Bot was unable to parse provided configuration properties.
37 | </ParamField>
38 | 


--------------------------------------------------------------------------------
/client/js/api-reference/messages.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Messages"
 3 | ---
 4 | 
 5 | The Pipecat JavaScript client can send and receive arbitrary data messages to a bot using the RTVI standard for message handling.
 6 | 
 7 | Messages differ from [actions](./actions) in that they are not associated with a specific service and do not return a Promise.
 8 | 
 9 | Messages are used by a client for passing an instruction to a bot that typically results in a [callback or event](./callbacks).
10 | 
11 | Examples of messages include:
12 | 
13 | - `updateConfig`
14 | - `describeConfig`
15 | - `describeActions`
16 | - `disconnectBot`
17 | 
18 | Developers can build their own messages and create handlers to respond to messages received from a bot either by extending `RTVIClient` with their own class or by building a [helper](./helpers/introduction).
19 | 
20 | ### sendMessage(message: RTVIMessage)
21 | 
22 | This is a "fire and forget" function to send an arbitrary message to a bot. It does not wait for a response, so nothing is returned.
23 | 
24 | Most clients will not use this function directly. They will use [actions](./actions) and [helper methods](./helpers), instead.
25 | 
26 | It is up to the transport layer to pack and unpack messages, send messages over the wire, and deliver messages to the SDK so that events can be emitted.
27 | 
28 | ### Anatomy of a message
29 | 
30 | ```typescript
31 | {
32 |   label: "rtvi-ai",
33 |   type: "EVENT_TYPE",
34 |   data: { EVENT_TYPE_DATA }
35 | }
36 | 
37 | // example message dispatch
38 | rtviClient.sendMessage({
39 | 	label: "rtvi-ai",
40 | 	type: "myMessage",
41 | 	data: {
42 | 		"hello": "world"
43 |   	}
44 | });
45 | ```
46 | 


--------------------------------------------------------------------------------
/client/js/helpers/llm.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "LLM Helper"
  3 | ---
  4 | 
  5 | The Pipecat JavaScript client includes an LLM helper for common language model tasks and workflows.
  6 | 
  7 | ## Using the LLM helper
  8 | 
  9 | ```typescript
 10 | import { RTVIClient, LLMHelper } from "@pipecat-ai/client-js";
 11 | 
 12 | const rtviClient = new RTVIClient({
 13 |   // ...
 14 | });
 15 | const llmHelper = new LLMHelper({
 16 |   callbacks: {
 17 |     // ...
 18 |   },
 19 | });
 20 | rtviClient.registerHelper("llm", llmHelper);
 21 | ```
 22 | 
 23 | ## Actions
 24 | 
 25 | All of the below are abstracted [actions](/v02/api-reference/actions).
 26 | 
 27 | As with all actions, they can be awaited or chained with `.then()`.
 28 | 
 29 | If the bot is unable to process the action, it will trigger the `onMessageError` callback and `MessageError` event.
 30 | 
 31 | ### getContext()
 32 | 
 33 | `llm:get_context`
 34 | 
 35 | Retrieve LLM context from bot. Returns `Promise<LLMContext>`
 36 | 
 37 | ```typescript
 38 | const llmHelper = rtviClient.getHelper("llm") as LLMHelper;
 39 | const ctx = await llmHelper.getContext();
 40 | 
 41 | // >  { messages?: LLMContextMessage[]; tools?: []; }
 42 | ```
 43 | 
 44 | ### setContext()
 45 | 
 46 | `llm:set_context`
 47 | 
 48 | Replaces the current LLM context with the provided one. Returns `Promise<boolean>`.
 49 | 
 50 | <ParamField path="context" type="LLMContext" required>
 51 |   LLMContext option to set.
 52 | </ParamField>
 53 | 
 54 | <ParamField path="interrupt" type="boolean" default="false">
 55 |   Interrupt the current conversation and apply the new context immediately.
 56 | </ParamField>
 57 | 
 58 | ```typescript
 59 | const llmHelper = rtviClient.getHelper("llm") as LLMHelper;
 60 | await llmHelper.setContext(
 61 |   {
 62 |     messages: [
 63 |       {
 64 |         role: "system",
 65 |         content: "Your are a helpful assistant",
 66 |       },
 67 |       {
 68 |         role: "user",
 69 |         content: "Tell me a joke",
 70 |       },
 71 |     ],
 72 |   },
 73 |   false | true
 74 | );
 75 | 
 76 | // >  true | false (if error)
 77 | ```
 78 | 
 79 | ### appendToMessages()
 80 | 
 81 | `llm:append_to_messages`
 82 | 
 83 | Append a new message to the existing context. Returns `Promise<boolean>`.
 84 | 
 85 | <ParamField path="context" type="LLMContextMessage" required>
 86 |   New message to apply to the context.
 87 | </ParamField>
 88 | 
 89 | <ParamField path="runImmediately" type="boolean" default="false">
 90 |   Apply the new message immediately, or wait until the current turn concludes.
 91 | </ParamField>
 92 | 
 93 | ```typescript
 94 | const llmHelper = rtviClient.getHelper("llm") as LLMHelper;
 95 | await llmHelper.appendToMessages(
 96 |   {
 97 |     role: "user",
 98 |     content: "Tell me a joke",
 99 |   },
100 |   false | true
101 | );
102 | 
103 | // >  true | false (if error)
104 | ```
105 | 
106 | ## Callbacks and events
107 | 
108 | ```typescript
109 | onLLMJsonCompletion: (jsonString: string) => void;
110 | onLLMFunctionCall: (func: LLMFunctionCallData) => void;
111 | onLLMFunctionCallStart: (functionName: string) => void;
112 | onLLMMessage: (message: LLMContextMessage) => void;
113 | ```
114 | 


--------------------------------------------------------------------------------
/client/js/introduction.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "SDK Introduction"
 3 | description: "Build web applications with Pipecat’s JavaScript client library"
 4 | ---
 5 | 
 6 | The Pipecat JavaScript SDK provides a lightweight client implementation that handles:
 7 | 
 8 | - Device and media stream management
 9 | - Managing bot configuration
10 | - Sending actions to the bot
11 | - Handling bot messages and responses
12 | - Managing session state and errors
13 | 
14 | ## Installation
15 | 
16 | Install the SDK and a transport implementation (e.g. Daily for WebRTC):
17 | 
18 | ```bash
19 | npm install @pipecat-ai/client-js
20 | npm install @pipecat-ai/daily-transport
21 | ```
22 | 
23 | ## Example
24 | 
25 | Here's a simple example using Daily as the transport layer:
26 | 
27 | ```javascript
28 | import { RTVIClient } from "@pipecat-ai/client-js";
29 | import { DailyTransport } from "@pipecat-ai/daily-transport";
30 | 
31 | // Handle incoming audio from the bot
32 | function handleBotAudio(track, participant) {
33 |   if (participant.local || track.kind !== "audio") return;
34 | 
35 |   const audioElement = document.createElement("audio");
36 |   audioElement.srcObject = new MediaStream([track]);
37 |   document.body.appendChild(audioElement);
38 |   audioElement.play();
39 | }
40 | 
41 | // Create and configure the client
42 | const rtviClient = new RTVIClient({
43 |   params: {
44 |     baseUrl: process.env.PIPECAT_API_URL || "/api",
45 |   },
46 |   transport: new DailyTransport(),
47 |   enableMic: true,
48 |   callbacks: {
49 |     onTrackStart: handleBotAudio,
50 |   },
51 | });
52 | 
53 | // Connect to your bot
54 | rtviClient.connect();
55 | ```
56 | 
57 | ## Explore the SDK
58 | 
59 | <CardGroup cols={2}>
60 |   <Card
61 |     title="Client Constructor"
62 |     icon="cube"
63 |     href="/client/js/api-reference/client-constructor"
64 |   >
65 |     Configure your client instance with transport and callbacks
66 |   </Card>
67 |   <Card
68 |     title="Client Methods"
69 |     icon="code"
70 |     href="/client/js/api-reference/client-methods"
71 |   >
72 |     Core methods for interacting with your bot
73 |   </Card>
74 |   <Card title="API Reference" icon="book" href="/client/js/api-reference">
75 |     Detailed documentation of all available APIs
76 |   </Card>
77 |   <Card title="Helpers" icon="wand-magic-sparkles" href="/client/js/helpers">
78 |     Utility functions for common operations
79 |   </Card>
80 | </CardGroup>
81 | 
82 | The Pipecat JavaScript SDK implements the [RTVI standard](/client/introduction#about-rtvi) for real-time AI inference, ensuring compatibility with any RTVI-compatible server and transport layer.
83 | 


--------------------------------------------------------------------------------
/client/js/transports/gemini.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "GeminiLiveWebSocketTransport"
  3 | ---
  4 | 
  5 | ## Overview
  6 | 
  7 | The `GeminiLiveWebsocketTransport` class extends the [`RealTimeWebsocketTransport`](./realtime) to implement a fully functional [RTVI `Transport`](./transport). It provides a framework for implementing real-time communication directly with the [Gemini Multimodal Live](https://ai.google.dev/api/multimodal-live) service. `RealTimeWebsocketTransport` handles media device management, audio/video streams, and state management for the connection.
  8 | 
  9 | <Note>
 10 |   Transports of this type are designed primarily for development and testing
 11 |   purposes. For production applications, you will need to build a server
 12 |   component with a server-friendly transport, like the
 13 |   [DailyTransport](./daily), to securely handle API keys.
 14 | </Note>
 15 | 
 16 | ## Usage
 17 | 
 18 | ### Basic Setup
 19 | 
 20 | ```javascript
 21 | import { GeminiLiveWebsocketTransport, GeminiLLMServiceOptions } from '@pipecat-ai/gemini-live-websocket-transport';
 22 | import { RTVIClientOptions } from '@pipecat-ai/client-js';
 23 | 
 24 | const options: GeminiLLMServiceOptions = {
 25 |   api_key: 'YOUR_API_KEY',
 26 |   generation_config: {
 27 |     temperature: 0.7,
 28 |     maxOutput_tokens: 1000
 29 |   }
 30 | };
 31 | 
 32 | const transport = new GeminiLiveWebsocketTransport(options);
 33 | let RTVIConfig: RTVIClientOptions = {
 34 |   transport,
 35 |   ...
 36 | };
 37 | 
 38 | ```
 39 | 
 40 | ### Configuration Options
 41 | 
 42 | ```typescript
 43 | interface GeminiLLMServiceOptions {
 44 |   api_key: string; // Required: Your Gemini API key
 45 |   initial_messages?: Array<{
 46 |     // Optional: Initial conversation context
 47 |     content: string;
 48 |     role: string;
 49 |   }>;
 50 |   generation_config?: {
 51 |     // Optional: Generation parameters
 52 |     candidate_count?: number;
 53 |     max_output_tokens?: number;
 54 |     temperature?: number;
 55 |     top_p?: number;
 56 |     top_k?: number;
 57 |     presence_penalty?: number;
 58 |     frequency_penalty?: number;
 59 |     response_modalities?: string;
 60 |     speech_config?: {
 61 |       voice_config?: {
 62 |         prebuilt_voice_config?: {
 63 |           voice_name: "Puck" | "Charon" | "Kore" | "Fenrir" | "Aoede";
 64 |         };
 65 |       };
 66 |     };
 67 |   };
 68 | }
 69 | ```
 70 | 
 71 | ### Sending Messages
 72 | 
 73 | ```javascript
 74 | // at setup time...
 75 | llmHelper = new LLMHelper({});
 76 | rtviClient.registerHelper("llm", llmHelper);
 77 | // the 'llm' name in this call above isn't used.
 78 | //that value is specific to working with a pipecat pipeline
 79 | 
 80 | // at time of sending message...
 81 | // Send text prompt message
 82 | llmHelper.appendToMessages({ role: "user", content: 'Hello OpenAI!' });
 83 | ```
 84 | 
 85 | ### Handling Events
 86 | 
 87 | The transport implements the various [RTVI event handlers](https://docs.pipecat.ai/client/js/api-reference/callbacks). Check out the docs or samples for more info.
 88 | 
 89 | ## More Information
 90 | 
 91 | <CardGroup cols={2}>
 92 |   <Card
 93 |     horizontal
 94 |     title="Demo"
 95 |     icon="play"
 96 |     href="https://github.com/pipecat-ai/pipecat-client-web-transports/tree/main/examples/directToLLMTransports"
 97 |   >
 98 |     Gemini MultiModal Live Basic Demo
 99 |   </Card>
100 | 
101 |   <Card
102 |     horizontal
103 |     title="Source"
104 |     icon="github"
105 |     href="https://github.com/pipecat-ai/pipecat-client-web-transports/tree/main/transports/gemini-live-websocket-transport"
106 |   >
107 |     `GeminiLiveWebsocketTransport`
108 |   </Card>
109 | </CardGroup>
110 | <Card
111 |   horizontal
112 |   title="Package"
113 |   icon="browser"
114 |   href="https://www.npmjs.com/package/@pipecat-ai/gemini-live-websocket-transport"
115 | >
116 |   `@pipecat-ai/realtime-websocket-transport`
117 | </Card>
118 | 


--------------------------------------------------------------------------------
/client/js/transports/realtime.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "RealTimeWebSocketTransport"
 3 | ---
 4 | 
 5 | ## Overview
 6 | 
 7 | The Pipecat client SDKs offer multiple transport packages for real-time communication with a Voice-to-Voice LLM servers. The RealTimeWebSocketTransport provides an abstraction for establishing direct connections to LLM servers and handling media streaming.
 8 | 
 9 | <Note>
10 |   Transports of this type are designed primarily for development and testing
11 |   purposes. For production applications, you will need to build a server
12 |   component with a server-friendly transport, like the
13 |   [DailyTransport](./daily), to securely handle API keys.
14 | </Note>
15 | 
16 | ## Transports
17 | 
18 | | Transport                    | Service                                                             | Package                                       |
19 | | ---------------------------- | ------------------------------------------------------------------- | --------------------------------------------- |
20 | | GeminiLiveWebsocketTransport | [Gemini MultiModal Live](https://ai.google.dev/api/multimodal-live) | `@pipecat-ai/gemini-live-websocket-transport` |
21 | 
22 | ## More Information
23 | 
24 | <Card
25 |   horizontal
26 |   title="Source"
27 |   icon="github"
28 |   href="https://github.com/pipecat-ai/pipecat-client-web-transports/tree/main/transports/realtime-websocket-transport"
29 | >
30 |   `RealTimeWebSocketTransport`
31 | </Card>
32 | 
33 | <Card
34 |   horizontal
35 |   title="Package"
36 |   icon="browser"
37 |   href="https://www.npmjs.com/package/@pipecat-ai/realtime-websocket-transport"
38 | >
39 |   `@pipecat-ai/realtime-websocket-transport`
40 | </Card>
41 | 


--------------------------------------------------------------------------------
/client/js/transports/transport.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Transport Overview"
 3 | sidebarTitle: "Overview"
 4 | ---
 5 | 
 6 | Transports are the means by which Pipecat clients communicate with their bot services. They handle both message exchange between client and server and real-time media transport. Pipecat implements the RTVI standard for these communications.
 7 | 
 8 | ## Transport lifecycle
 9 | 
10 | Your transport instance is constructed when you call `connect()` on your client instance.
11 | 
12 | ```typescript
13 | import { RTVIClient } from "@pipecat-ai/client-js";
14 | import { DailyTransport } from "@pipecat-ai/daily-transport";
15 | 
16 | const rtviClient = new RTVIClient({
17 |   transport: new DailyTransport(),
18 |   ...
19 | });
20 | 
21 | await rtviClient.connect(); // Transport instance is created here
22 | await rtviClient.disconnect(); // Transport instance is destroyed here
23 | ```
24 | 
25 | ## Transport states
26 | 
27 | `TransportState`
28 | 
29 | Your transport instance goes through a series of states during its lifecycle. These states are:
30 | 
31 | <Steps>
32 |   <Step title="Disconnected">
33 |     Transport is idle and has not yet been initialized (default state).
34 |   </Step>
35 |   <Step title="Initializing">
36 |     Transport is being initialized. Typically in response to a
37 |     `rtviClient.initDevices()` call, where the transport is being set up in
38 |     order to enumerate local media devices.
39 |   </Step>
40 |   <Step title="Initialized">
41 |     Transport has been initialized and is ready to connect. This state is
42 |     typically reached after a successful `rtviClient.initDevices()` call.
43 |   </Step>
44 |   <Step title="Authenticating">
45 |     Your client has called `rtviClient.connect()` and is waiting for a response
46 |     from your server containing 'auth bundle' credentials (such as a session URL
47 |     and token.)
48 |   </Step>
49 |   <Step title="Connecting">
50 |     Transport has received the 'auth bundle' and is connecting to the server.
51 |   </Step>
52 |   <Step title="Connected">
53 |     Transport has successfully connected to the session and is awaiting a
54 |     client-ready signal (indicated audio and video tracks are ready to be sent.)
55 |   </Step>
56 |   <Step title="Ready">Transport is ready and the session can begin.</Step>
57 |   <Step title="Disconnecting">
58 |     Transport is disconnecting from the session.
59 |   </Step>
60 |   <Step title="Error">An error occurred during the transport lifecycle.</Step>
61 | </Steps>
62 | 
63 | You can access the current transport state via `rtviClient.state`, or by defining a callback or event:
64 | 
65 | ```typescript
66 | // Callback
67 | const rtviClient = new RTVIClient({
68 |   transport: new DailyTransport(),
69 |   callbacks: {
70 | 	onTransportStateChange: (state) => {
71 | 	  console.log(state);
72 |   }
73 |   //...
74 | });
75 | 
76 | // Event
77 | rtviClient.on(RTVIEvent.TransportStateChanged, (e) => console.log(e));
78 | 
79 | // Client getter
80 | console.log(rtviClient.state); // Disconnected <TransportState>
81 | ```
82 | 


--------------------------------------------------------------------------------
/client/react-native/api-reference.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "API Reference"
 3 | description: "API reference for the Pipecat React Native SDK"
 4 | ---
 5 | 
 6 | <Note>
 7 |   The Pipecat React Native SDK leverages the Pipecat JavaScript SDK for seamless integration with React Native applications.
 8 |   For detailed information, please reference to the [Javascript SDK docs](/client/js/api-reference/client-constructor).
 9 | 
10 |   **Just ensure you use the appropriate transport layer for React Native.**
11 | </Note>
12 | 


--------------------------------------------------------------------------------
/client/react-native/introduction.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "SDK Introduction"
  3 | description: "Build React Native applications with Pipecat's React Native client library"
  4 | ---
  5 | 
  6 | The Pipecat React Native SDK leverages the [Pipecat JavaScript SDK](/client/js/introduction) to provide seamless integration for React Native applications.
  7 | Since the JavaScript SDK is designed to work across both web and React Native platforms, the core functionalities remain the same:
  8 | - Device and media stream management
  9 | - Managing bot configuration
 10 | - Sending actions to the bot
 11 | - Handling bot messages and responses
 12 | - Managing session state and errors
 13 | 
 14 | The primary difference lies in the transport layer, which is tailored to support the unique requirements of the React Native environment.
 15 | 
 16 | For example, when using the SDK with React Native, you would install `RNDailyTransport` instead of `DailyTransport`.
 17 | 
 18 | ## Installation
 19 | 
 20 | Install the SDK and a transport implementation (e.g. Daily for WebRTC):
 21 | 
 22 | ```bash
 23 | npm i @pipecat-ai/react-native-daily-transport
 24 | npm i @daily-co/react-native-daily-js@^0.70.0
 25 | npm i @daily-co/react-native-webrtc@^118.0.3-daily.2
 26 | npm i @react-native-async-storage/async-storage@^1.23.1
 27 | npm i react-native-background-timer@^2.4.1
 28 | npm i react-native-get-random-values@^1.11.0
 29 | ```
 30 | 
 31 | <Note>Installing `@pipecat-ai/react-native-daily-transport` automatically includes the corresponding version of the JavaScript SDK.</Note>
 32 | 
 33 | If you are using Expo, you will also need to add the following dependencies:
 34 | 
 35 | ```bash
 36 | npm i @config-plugins/react-native-webrtc@^10.0.0
 37 | npm i @daily-co/config-plugin-rn-daily-js@0.0.7
 38 | ```
 39 | 
 40 | ## Requirements
 41 | 
 42 | This package introduces some constraints on what OS/SDK versions your project can support:
 43 | 
 44 | - iOS: Deployment target >= 13
 45 | - Android: `minSdkVersion` >= 24
 46 | 
 47 | ## Quick start
 48 | 
 49 | Here's a simple example using Daily as the transport layer:
 50 | 
 51 | ```tsx
 52 | import { RNDailyTransport } from '@pipecat-ai/react-native-daily-transport';
 53 | import { RTVIClient } from '@pipecat-ai/client-js';
 54 | 
 55 | // Create and configure the client
 56 | let voiceClient = new RTVIClient({
 57 |   params: {
 58 |     baseUrl: process.env.PIPECAT_API_URL || "/api",
 59 |   },
 60 |   transport: new RNDailyTransport(),
 61 |   enableMic: true
 62 | });
 63 | 
 64 | // Connect to your bot
 65 | await voiceClient.connect();
 66 | ```
 67 | 
 68 | > You can find a basic working example [here](https://github.com/pipecat-ai/pipecat-client-react-native-daily-transport/tree/main/example)
 69 | > and a more comprehensive example [here](https://github.com/daily-demos/daily-bots-react-native-demo/).
 70 | 
 71 | ## Explore the SDK
 72 | 
 73 | The Pipecat React Native SDK leverages the Pipecat JavaScript SDK for seamless integration with React Native applications. For detailed information, refer to our JavaScript documentation.
 74 | 
 75 | > Just ensure you use the appropriate transport layer for React Native.
 76 | 
 77 | <CardGroup cols={2}>
 78 |   <Card
 79 |     title="Client Constructor"
 80 |     icon="cube"
 81 |     href="/client/js/api-reference/client-constructor"
 82 |   >
 83 |     Configure your client instance with transport and callbacks
 84 |   </Card>
 85 |   <Card
 86 |     title="Client Methods"
 87 |     icon="code"
 88 |     href="/client/js/api-reference/client-methods"
 89 |   >
 90 |     Core methods for interacting with your bot
 91 |   </Card>
 92 |   <Card title="API Reference" icon="book" href="/client/js/api-reference">
 93 |     Detailed documentation of all available APIs
 94 |   </Card>
 95 |   <Card title="Helpers" icon="wand-magic-sparkles" href="/client/js/helpers">
 96 |     Utility functions for common operations
 97 |   </Card>
 98 | </CardGroup>
 99 | 
100 | 


--------------------------------------------------------------------------------
/client/react/components.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Components"
  3 | description: "Ready-to-use React components for Pipecat applications"
  4 | ---
  5 | 
  6 | The Pipecat React SDK provides several components for handling audio, video, and visualization in your application.
  7 | 
  8 | ## RTVIClientProvider
  9 | 
 10 | The root component for providing Pipecat client context to your application.
 11 | 
 12 | ### Props
 13 | 
 14 | - `client` (RTVIClient, required): A singleton instance of RTVIClient
 15 | 
 16 | ```jsx
 17 | <RTVIClientProvider client={rtviClient}>
 18 |   {/* Child components */}
 19 | </RTVIClientProvider>
 20 | ```
 21 | 
 22 | ## RTVIClientAudio
 23 | 
 24 | Creates a new `<audio>` element that mounts the bot's audio track.
 25 | 
 26 | ### Props
 27 | 
 28 | No props required
 29 | 
 30 | ```jsx
 31 | <RTVIClientAudio />
 32 | ```
 33 | 
 34 | ## RTVIClientVideo
 35 | 
 36 | Creates a new `<video>` element that renders either the bot or local participant's video track.
 37 | 
 38 | ### Props
 39 | 
 40 | - `participant` ("local" | "bot"): Defines which participant's video track is rendered
 41 | - `fit` ("contain" | "cover", optional): Defines whether the video should be fully contained or cover the box. Default: 'contain'
 42 | - `mirror` (boolean, optional): Forces the video to be mirrored, if set
 43 | - `onResize(dimensions: object)` (function, optional): Triggered whenever the video's rendered width or height changes
 44 | 
 45 | ```jsx
 46 | <RTVIClientVideo
 47 |   participant="local"
 48 |   fit="cover"
 49 |   mirror
 50 |   onResize={({ aspectRatio, height, width }) => {
 51 |     console.log("Video dimensions changed:", { aspectRatio, height, width });
 52 |   }}
 53 | />
 54 | ```
 55 | 
 56 | ## RTVIClientCamToggle
 57 | 
 58 | A headless component to read and set the local participant's camera state.
 59 | 
 60 | ### Props
 61 | 
 62 | - `onCamEnabledChanged(enabled: boolean)` (function, optional): Triggered whenever the local participant's camera state changes
 63 | - `disabled` (boolean, optional): If true, the component will not allow toggling the camera state. Default: false
 64 | - `children` (function, optional): A render prop that provides state and handlers to the children
 65 | 
 66 | ```jsx
 67 | <RTVIClientCamToggle
 68 |   onCamEnabledChanged={(enabled) => console.log("Camera enabled:", enabled)}
 69 |   disabled={false}
 70 | >
 71 |   {({ disabled, isCamEnabled, onClick }) => (
 72 |     <button disabled={disabled} onClick={onClick}>
 73 |       {isCamEnabled ? "Disable Camera" : "Enable Camera"}
 74 |     </button>
 75 |   )}
 76 | </RTVIClientCamToggle>
 77 | ```
 78 | 
 79 | ## RTVIClientMicToggle
 80 | 
 81 | A headless component to read and set the local participant's microphone state.
 82 | 
 83 | ### Props
 84 | 
 85 | - `onMicEnabledChanged(enabled: boolean)` (function, optional): Triggered whenever the local participant's microphone state changes
 86 | - `disabled` (boolean, optional): If true, the component will not allow toggling the microphone state. Default: false
 87 | - `children` (function, optional): A render prop that provides state and handlers to the children
 88 | 
 89 | ```jsx
 90 | <RTVIClientMicToggle
 91 |   onMicEnabledChanged={(enabled) => console.log("Microphone enabled:", enabled)}
 92 |   disabled={false}
 93 | >
 94 |   {({ disabled, isMicEnabled, onClick }) => (
 95 |     <button disabled={disabled} onClick={onClick}>
 96 |       {isMicEnabled ? "Disable Microphone" : "Enable Microphone"}
 97 |     </button>
 98 |   )}
 99 | </RTVIClientMicToggle>
100 | ```
101 | 
102 | ## VoiceVisualizer
103 | 
104 | Renders a visual representation of audio input levels on a `<canvas>` element.
105 | 
106 | ### Props
107 | 
108 | - `participantType` (string, required): The participant type to visualize audio for
109 | - `backgroundColor` (string, optional): The background color of the canvas. Default: 'transparent'
110 | - `barColor` (string, optional): The color of the audio level bars. Default: 'black'
111 | - `barCount` (number, optional): The number of bars to display. Default: 5
112 | - `barGap` (number, optional): The gap between bars in pixels. Default: 12
113 | - `barWidth` (number, optional): The width of each bar in pixels. Default: 30
114 | - `barMaxHeight` (number, optional): The maximum height at full volume of each bar in pixels. Default: 120
115 | 
116 | ```jsx
117 | <VoiceVisualizer
118 |   participantType="local"
119 |   backgroundColor="white"
120 |   barColor="black"
121 |   barGap={1}
122 |   barWidth={4}
123 |   barMaxHeight={24}
124 | />
125 | ```


--------------------------------------------------------------------------------
/client/react/hooks.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Hooks"
  3 | description: "React hooks for accessing Pipecat client functionality"
  4 | ---
  5 | 
  6 | The Pipecat React SDK provides hooks for accessing client functionality, managing media devices, and handling events.
  7 | 
  8 | ## useRTVIClient
  9 | 
 10 | Provides access to the RTVIClient instance originally passed to RTVIClientProvider.
 11 | 
 12 | ```jsx
 13 | import { useRTVIClient } from "@pipecat-ai/client-react";
 14 | 
 15 | function MyComponent() {
 16 |   const rtviClient = useRTVIClient();
 17 | }
 18 | ```
 19 | 
 20 | ## useRTVIClientEvent
 21 | 
 22 | Allows subscribing to RTVI client events. It is advised to wrap handlers with `useCallback`.
 23 | 
 24 | ### Arguments
 25 | 
 26 | - `event` (RTVIEvent, required)
 27 | - `handler` (function, required)
 28 | 
 29 | ```jsx
 30 | import { useCallback } from "react";
 31 | import { RTVIEvent, TransportState } from "@pipecat-ai/client-js";
 32 | import { useRTVIClientEvent } from "@pipecat-ai/client-react";
 33 | 
 34 | function EventListener() {
 35 |   useRTVIClientEvent(
 36 |     RTVIEvent.TransportStateChanged,
 37 |     useCallback((transportState: TransportState) => {
 38 |       console.log("Transport state changed to", transportState);
 39 |     }, [])
 40 |   );
 41 | }
 42 | ```
 43 | 
 44 | ## useRTVIClientMediaDevices
 45 | 
 46 | Manage and list available media devices.
 47 | 
 48 | ```jsx
 49 | import { useRTVIClientMediaDevices } from "@pipecat-ai/client-react";
 50 | 
 51 | function DeviceSelector() {
 52 |   const {
 53 |     availableCams,
 54 |     availableMics,
 55 |     selectedCam,
 56 |     selectedMic,
 57 |     updateCam,
 58 |     updateMic,
 59 |   } = useRTVIClientMediaDevices();
 60 | 
 61 |   return (
 62 |     <>
 63 |       <select
 64 |         name="cam"
 65 |         onChange={(ev) => updateCam(ev.target.value)}
 66 |         value={selectedCam?.deviceId}
 67 |       >
 68 |         {availableCams.map((cam) => (
 69 |           <option key={cam.deviceId} value={cam.deviceId}>
 70 |             {cam.label}
 71 |           </option>
 72 |         ))}
 73 |       </select>
 74 |       <select
 75 |         name="mic"
 76 |         onChange={(ev) => updateMic(ev.target.value)}
 77 |         value={selectedMic?.deviceId}
 78 |       >
 79 |         {availableMics.map((mic) => (
 80 |           <option key={mic.deviceId} value={mic.deviceId}>
 81 |             {mic.label}
 82 |           </option>
 83 |         ))}
 84 |       </select>
 85 |     </>
 86 |   );
 87 | }
 88 | ```
 89 | 
 90 | ## useRTVIClientMediaTrack
 91 | 
 92 | Access audio and video tracks.
 93 | 
 94 | ### Arguments
 95 | 
 96 | - `trackType` ("audio" | "video", required)
 97 | - `participantType` ("bot" | "local", required)
 98 | 
 99 | ```jsx
100 | import { useRTVIClientMediaTrack } from "@pipecat-ai/client-react";
101 | 
102 | function MyTracks() {
103 |   const localAudioTrack = useRTVIClientMediaTrack("audio", "local");
104 |   const botAudioTrack = useRTVIClientMediaTrack("audio", "bot");
105 | }
106 | ```
107 | 
108 | ## useRTVIClientTransportState
109 | 
110 | Returns the current transport state.
111 | 
112 | ```jsx
113 | import { useRTVIClientTransportState } from "@pipecat-ai/client-react";
114 | 
115 | function ConnectionStatus() {
116 |   const transportState = useRTVIClientTransportState();
117 | }
118 | ```
119 | 
120 | ## useRTVIClientCamControl
121 | 
122 | Controls the local participant's camera state.
123 | 
124 | ```jsx
125 | import { useRTVIClientCamControl } from "@pipecat-ai/client-react";
126 | function CamToggle() {
127 |   const { enableCam, isCamEnabled } = useRTVIClientCamControl();
128 | 
129 |   return (
130 |     <button onClick={() => enableCam(!isCamEnabled)}>
131 |       {isCamEnabled ? "Disable Camera" : "Enable Camera"}
132 |     </button>
133 |   );
134 | }
135 | ```
136 | 
137 | ## useRTVIClientMicControl
138 | 
139 | Controls the local participant's microphone state.
140 | 
141 | ```jsx
142 | import { useRTVIClientMicControl } from "@pipecat-ai/client-react";
143 | function MicToggle() {
144 |   const { enableMic, isMicEnabled } = useRTVIClientMicControl();
145 | 
146 |   return (
147 |     <button onClick={() => enableMic(!isMicEnabled)}>
148 |       {isMicEnabled ? "Disable Microphone" : "Enable Microphone"}
149 |     </button>
150 |   );
151 | }
152 | ```
153 | 


--------------------------------------------------------------------------------
/client/react/introduction.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "SDK Introduction"
 3 | description: "Build React applications with Pipecat's React client library"
 4 | ---
 5 | 
 6 | The Pipecat React SDK provides React-specific components and hooks for building voice and multimodal AI applications. It wraps the core JavaScript SDK functionality in an idiomatic React interface that handles:
 7 | 
 8 | - React context for client state management
 9 | - Components for audio and video rendering
10 | - Hooks for accessing client functionality
11 | - Media device management
12 | - Event handling through hooks
13 | 
14 | ## Installation
15 | 
16 | Install the SDK, core client, and a transport implementation (e.g. Daily for WebRTC):
17 | 
18 | ```bash
19 | npm install @pipecat-ai/client-js
20 | npm install @pipecat-ai/client-react
21 | npm install @pipecat-ai/daily-transport
22 | ```
23 | 
24 | ## Example
25 | 
26 | Here's a simple example using Daily as the transport layer:
27 | 
28 | ```tsx
29 | import { RTVIClient } from "@pipecat-ai/client-js";
30 | import {
31 |   RTVIClientProvider,
32 |   RTVIClientAudio,
33 |   useRTVIClient,
34 | } from "@pipecat-ai/client-react";
35 | import { DailyTransport } from "@pipecat-ai/daily-transport";
36 | 
37 | // Create the client instance
38 | const client = new RTVIClient({
39 |   params: {
40 |     baseUrl: process.env.PIPECAT_API_URL || "/api",
41 |     endpoint: {
42 |       connect: "/connect",
43 |     },
44 |   },
45 |   transport: new DailyTransport(),
46 |   enableMic: true,
47 | });
48 | 
49 | // Root component wraps the app with the provider
50 | function App() {
51 |   return (
52 |     <RTVIClientProvider client={client}>
53 |       <VoiceBot />
54 |       <RTVIClientAudio />
55 |     </RTVIClientProvider>
56 |   );
57 | }
58 | 
59 | // Component using the client
60 | function VoiceBot() {
61 |   const client = useRTVIClient();
62 | 
63 |   return (
64 |     <button onClick={() => client.connect()}>Start Conversation</button>;
65 |   );
66 | }
67 | ```
68 | 
69 | ## Explore the SDK
70 | 
71 | <CardGroup cols={2}>
72 |   <Card title="Components" icon="puzzle-piece" href="/client/react/components">
73 |     Ready-to-use components for audio, video, and visualization
74 |   </Card>
75 |   <Card title="Hooks" icon="code" href="/client/react/hooks">
76 |     React hooks for accessing client functionality
77 |   </Card>
78 | </CardGroup>
79 | 
80 | The Pipecat React SDK builds on top of the [JavaScript SDK](/client/js/introduction) to provide an idiomatic React interface while maintaining compatibility with the RTVI standard.
81 | 


--------------------------------------------------------------------------------
/examples.mdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/examples.mdx


--------------------------------------------------------------------------------
/favicon.svg:
--------------------------------------------------------------------------------
1 | <svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path d="M3.3088 5.05615C3.64682 4.92779 4.02833 5.02411 4.26653 5.29797L7.36884 8.86461H16.6312L19.7335 5.29797C19.9717 5.02411 20.3532 4.92779 20.6912 5.05615C21.0292 5.18452 21.253 5.51072 21.253 5.87504V13.75H24V15.5H19.5181V8.19909L17.6762 10.3167C17.5115 10.506 17.2738 10.6146 17.0241 10.6146H6.9759C6.72616 10.6146 6.48854 10.506 6.32383 10.3167L4.48193 8.19909V15.5H0V13.75H2.74699V5.87504C2.74699 5.51072 2.97078 5.18452 3.3088 5.05615Z" fill="black"/>
3 | <path d="M19.5181 17.25H24V19H19.5181V17.25Z" fill="black"/>
4 | <path d="M0 17.25H4.48193V19H0V17.25Z" fill="black"/>
5 | <path d="M9.25301 14.3333C9.25301 14.9777 8.73517 15.5 8.09639 15.5C7.4576 15.5 6.93976 14.9777 6.93976 14.3333C6.93976 13.689 7.4576 13.1667 8.09639 13.1667C8.73517 13.1667 9.25301 13.689 9.25301 14.3333Z" fill="black"/>
6 | <path d="M17.0602 14.3333C17.0602 14.9777 16.5424 15.5 15.9036 15.5C15.2648 15.5 14.747 14.9777 14.747 14.3333C14.747 13.689 15.2648 13.1667 15.9036 13.1667C16.5424 13.1667 17.0602 13.689 17.0602 14.3333Z" fill="black"/>
7 | </svg>
8 | 


--------------------------------------------------------------------------------
/guides/deployment/images/deployment-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/guides/deployment/images/deployment-1.png


--------------------------------------------------------------------------------
/guides/deployment/images/deployment-fly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/guides/deployment/images/deployment-fly.png


--------------------------------------------------------------------------------
/guides/deployment/images/modal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/guides/deployment/images/modal.jpg


--------------------------------------------------------------------------------
/guides/features/metrics.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Metrics"
 3 | description: "Learn how to monitor performance and LLM/TTS usage with Pipecat."
 4 | ---
 5 | 
 6 | When developing real-time, multimodal AI applications, monitoring two key
 7 | factors is crucial: performance (latency) and LLM/TTS usage. Performance impacts
 8 | user experience, while usage can affect operational costs. Pipecat offers
 9 | built-in metrics for both, which can be enabled with straightforward
10 | configuration options.
11 | 
12 | ## Enabling performance metrics
13 | 
14 | Set `enable_metrics=True` in `PipelineParams` when creating a task:
15 | 
16 | ```python Example config
17 | task = PipelineTask(
18 |             pipeline,
19 |             params=PipelineParams(
20 |                 ...
21 |                 enable_metrics=True,
22 |                 ...
23 |             ),
24 |         )
25 | ```
26 | 
27 | Once enabled, Pipecat logs the following metrics:
28 | 
29 | | Metric          | Description                                     |
30 | | --------------- | ----------------------------------------------- |
31 | | TTFB            | Time To First Byte in seconds                   |
32 | | Processing Time | Time taken by the service to respond in seconds |
33 | 
34 | ```console Sample output
35 | AnthropicLLMService#0 TTFB: 0.8378312587738037
36 | CartesiaTTSService#0 processing time: 0.0005071163177490234
37 | CartesiaTTSService#0 TTFB: 0.17177796363830566
38 | AnthropicLLMService#0 processing time: 2.4927797317504883
39 | ```
40 | 
41 | ### Limiting TTFB responses
42 | 
43 | If you only want the **first** TTFB measurement for each service, you can
44 | optionally pass `report_only_initial_ttfb=True` in `PipelineParams`:
45 | 
46 | ```python Example config
47 | task = PipelineTask(
48 |             pipeline,
49 |             params=PipelineParams(
50 |                 ...
51 |                 enable_metrics=True,
52 |                 report_only_initial_ttfb=True,
53 |                 ...
54 |             ),
55 |         )
56 | ```
57 | 
58 | > **Note:** `enable_metrics=True` is required for this setting to have an
59 | > effect.
60 | 
61 | # Enabling LLM/TTS Usage Metrics
62 | 
63 | Set `enable_usage_metrics=True` in PipelineParams when creating a task:
64 | 
65 | ```python Example config
66 | task = PipelineTask(
67 |             pipeline,
68 |             params=PipelineParams(
69 |                 ...
70 |                 enable_usage_metrics=True,
71 |                 ...
72 |             ),
73 |         )
74 | ```
75 | 
76 | Pipecat will log the following as applicable:
77 | 
78 | | Metric    | Description                                 |
79 | | --------- | ------------------------------------------- |
80 | | LLM Usage | Number of prompt and completion tokens used |
81 | | TTS Usage | Number of characters processed              |
82 | 
83 | ```console Sample output
84 | CartesiaTTSService#0 usage characters: 65
85 | AnthropicLLMService#0 prompt tokens: 104, completion tokens: 53
86 | ```
87 | 
88 | > **Note:** Usage metrics are recorded per interaction and do not represent
89 | > running totals.
90 | 


--------------------------------------------------------------------------------
/guides/fundamentals/context-management.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Context Management"
 3 | description: "A guide to working with Pipecat's Context and Context Aggregators"
 4 | ---
 5 | 
 6 | ## What is Context in Pipecat?
 7 | 
 8 | In Pipecat, **context** refers to the text that the LLM uses to perform an inference. Commonly, this is the text inputted to the LLM and outputted from the LLM. The context consists of a list of alternating user/assistant messages that represents the information you want an LLM to respond to. Since Pipecat is a real-time voice (and multimodal) AI framework, the context serves as the collective history of the entire conversation.
 9 | 
10 | ## How Context Updates During Conversations
11 | 
12 | After every user and bot turn in the conversation, processors in the pipeline push frames to update the context:
13 | 
14 | - **STT Service**: Pushes `TranscriptionFrame` objects that represent what the user says.
15 | - **LLM and TTS Services**: Work together to represent what the bot says. The LLM streams tokens (as `LLMTextFrame`s) to the TTS service, which outputs `TTSTextFrame`s representing the bot's spoken words.
16 | 
17 | ## Setting Up Context Management
18 | 
19 | Pipecat includes a context aggregator class that creates and manages context for both user and assistant messages. Here's how to set it up:
20 | 
21 | ### 1. Create the Context and Context Aggregator
22 | 
23 | ```python
24 | # Create LLM service
25 | llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
26 | 
27 | # Create context
28 | context = OpenAILLMContext(messages, tools)
29 | 
30 | # Create context aggregator instance
31 | context_aggregator = llm.create_context_aggregator(context)
32 | ```
33 | 
34 | The context (which represents the conversation) is passed to the context aggregator. This ensures that both user and assistant instances of the context aggregators have access to the shared conversation context.
35 | 
36 | ### 2. Add Context Aggregators to Your Pipeline
37 | 
38 | ```python
39 | pipeline = Pipeline([
40 |     transport.input(),
41 |     stt,
42 |     context_aggregator.user(),      # User context aggregator
43 |     llm,
44 |     tts,
45 |     transport.output(),
46 |     context_aggregator.assistant(), # Assistant context aggregator
47 | ])
48 | ```
49 | 
50 | ## Context Aggregator Placement
51 | 
52 | The placement of context aggregator instances in your pipeline is **crucial** for proper operation:
53 | 
54 | ### User Context Aggregator
55 | 
56 | Place the user context aggregator **downstream from the STT service**. Since the user's speech results in `TranscriptionFrame` objects pushed by the STT service, the user aggregator needs to be positioned to collect these frames.
57 | 
58 | ### Assistant Context Aggregator
59 | 
60 | Place the assistant context aggregator **after `transport.output()`**. This positioning is important because:
61 | 
62 | - The TTS service outputs spoken words in addition to audio
63 | - The assistant aggregator must be downstream to collect those frames
64 | - It ensures context updates happen word-by-word for specific services (e.g. Cartesia, ElevenLabs, and Rime)
65 | - Your context stays updated at the word level in case an interruption occurs
66 | 
67 | <Tip>
68 |   Always place the assistant context aggregator **after** `transport.output()`
69 |   to ensure proper word-level context updates during interruptions.
70 | </Tip>
71 | 
72 | ## Manually Managing Context
73 | 
74 | You can programmatically add new messages to the context by pushing or queueing specific frames:
75 | 
76 | ### Adding Messages
77 | 
78 | - **`LLMMessagesAppendFrame`**: Appends a new message to the existing context
79 | - **`LLMMessagesUpdateFrame`**: Completely replaces the existing context with new context provided in the frame
80 | 
81 | ### Retrieving Current Context
82 | 
83 | The context aggregator provides a `get_context_frame()` method to obtain the latest context:
84 | 
85 | ```python
86 | await task.queue_frames([context_aggregator.user().get_context_frame()])
87 | ```
88 | 
89 | ## Triggering Bot Responses
90 | 
91 | You'll commonly use this manual mechanism—obtaining the current context and pushing/queueing it—to trigger the bot to speak in two scenarios:
92 | 
93 | 1. **Starting a pipeline** where the bot should speak first
94 | 2. **After pushing new context frames** using `LLMMessagesAppendFrame` or `LLMMessagesUpdateFrame`
95 | 
96 | This gives you fine-grained control over when and how the bot responds during the conversation flow.
97 | 


--------------------------------------------------------------------------------
/guides/fundamentals/recording-transcripts.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Recording Conversation Transcripts"
  3 | sidebarTitle: "Recording Transcripts"
  4 | description: "Learn how to collect and save conversation transcripts between users and your bot"
  5 | ---
  6 | 
  7 | ## Overview
  8 | 
  9 | Recording transcripts of conversations between users and your bot is useful for debugging, analysis, and creating a record of interactions. Pipecat's `TranscriptProcessor` makes it easy to collect both user and bot messages as they occur.
 10 | 
 11 | ## How It Works
 12 | 
 13 | The `TranscriptProcessor` collects transcripts by:
 14 | 
 15 | 1. Capturing what the user says (from `TranscriptionFrame`s)
 16 | 2. Capturing what the bot says (from `TTSTextFrame`s)
 17 | 3. Emitting events with transcript updates in real-time
 18 | 4. Allowing you to handle these events with custom logic
 19 | 
 20 | <Note>
 21 |   The `TranscriptProcessor` provides two separate processors: one for user
 22 |   speech and one for assistant speech. Both emit the same event type when new
 23 |   transcript content is available.
 24 | </Note>
 25 | 
 26 | ## Basic Implementation
 27 | 
 28 | ### Step 1: Create a Transcript Processor
 29 | 
 30 | First, initialize the transcript processor:
 31 | 
 32 | ```python
 33 | from pipecat.processors.transcript_processor import TranscriptProcessor
 34 | 
 35 | # Create a single transcript processor instance
 36 | transcript = TranscriptProcessor()
 37 | ```
 38 | 
 39 | ### Step 2: Add to Your Pipeline
 40 | 
 41 | Place the processors in your pipeline at the appropriate positions:
 42 | 
 43 | ```python
 44 | pipeline = Pipeline(
 45 |     [
 46 |         transport.input(),
 47 |         stt,                            # Speech-to-text
 48 |         transcript.user(),              # Captures user transcripts
 49 |         context_aggregator.user(),
 50 |         llm,
 51 |         tts,                            # Text-to-speech
 52 |         transport.output(),
 53 |         transcript.assistant(),         # Captures assistant transcripts
 54 |         context_aggregator.assistant(),
 55 |     ]
 56 | )
 57 | ```
 58 | 
 59 | <Note>
 60 |   Place `transcript.user()` after the STT processor and `transcript.assistant()`
 61 |   after `transport.output()` to ensure accurate transcript collection.
 62 | </Note>
 63 | 
 64 | ### Step 3: Handle Transcript Updates
 65 | 
 66 | Register an event handler to process transcript updates:
 67 | 
 68 | ```python
 69 | @transcript.event_handler("on_transcript_update")
 70 | async def handle_transcript_update(processor, frame):
 71 |     # Each message contains role (user/assistant), content, and timestamp
 72 |     for message in frame.messages:
 73 |         print(f"[{message.timestamp}] {message.role}: {message.content}")
 74 | ```
 75 | 
 76 | <Tip>
 77 |   In addition to console logging, you can save transcripts to a database or file
 78 |   for later analysis.
 79 | </Tip>
 80 | 
 81 | ## Next Steps
 82 | 
 83 | <CardGroup cols={2}>
 84 |   <Card
 85 |     title="Try the Transcript Example"
 86 |     icon="code"
 87 |     iconType="duotone"
 88 |     href="https://github.com/pipecat-ai/pipecat/blob/main/examples/foundational/28-transcription-processor.py"
 89 |   >
 90 |     Explore a complete working example that demonstrates how to collect and save
 91 |     conversation transcripts with Pipecat.
 92 |   </Card>
 93 | 
 94 |   <Card
 95 |     title="TranscriptProcessor Reference"
 96 |     icon="book"
 97 |     iconType="duotone"
 98 |     href="/server/utilities/transcript-processor"
 99 |   >
100 |     Read the complete API reference documentation for advanced configuration
101 |     options and event handlers.
102 |   </Card>
103 | </CardGroup>
104 | 
105 | Consider implementing transcript recording in your application for debugging during development and preserving important conversations in production. The transcript data can also be useful for analyzing conversation patterns and improving your bot's responses over time.
106 | 


--------------------------------------------------------------------------------
/guides/introduction.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Guides
  3 | description: "Learn how to deploy, scale, and extend your Pipecat applications"
  4 | ---
  5 | 
  6 | These guides cover key aspects of building and deploying Pipecat applications. Choose a guide based on what you want to accomplish:
  7 | 
  8 | ## Features
  9 | 
 10 | <CardGroup cols={2}>
 11 |   <Card
 12 |     title="OpenAI Audio Models and APIs"
 13 |     icon="microphone"
 14 |     href="/guides/features/openai-audio-models-and-apis"
 15 |   >
 16 |     Build voice agents with OpenAI audio models
 17 |   </Card>
 18 |   <Card
 19 |     title="Gemini Multimodal Live"
 20 |     icon="robot"
 21 |     href="/guides/features/gemini-multimodal-live"
 22 |   >
 23 |     Build real-time AI chatbots with Gemini
 24 |   </Card>
 25 |   <Card
 26 |     title="Function Calling"
 27 |     icon="function"
 28 |     href="/guides/fundamentals/function-calling"
 29 |   >
 30 |     Implement custom functions in your bot
 31 |   </Card>
 32 |   <Card
 33 |     title="Metrics & Monitoring"
 34 |     icon="chart-line"
 35 |     href="/guides/features/metrics"
 36 |   >
 37 |     Track and monitor your application
 38 |   </Card>
 39 |   <Card title="Noise Reduction" icon="waveform" href="/guides/features/krisp">
 40 |     Improve audio quality with Krisp
 41 |   </Card>
 42 |   <Card
 43 |     title="Pipecat Flows"
 44 |     icon="diagram-project"
 45 |     href="/guides/features/pipecat-flows"
 46 |   >
 47 |     Build structured conversation flows
 48 |   </Card>
 49 | </CardGroup>
 50 | 
 51 | ## Telephony
 52 | 
 53 | <CardGroup cols={2}>
 54 |   <Card title="Overview" icon="book-open" href="/guides/telephony/overview">
 55 |     Introduction to voice and telephony features
 56 |   </Card>
 57 |   <Card
 58 |     title="WebRTC with Daily"
 59 |     icon="phone"
 60 |     href="/guides/telephony/daily-webrtc"
 61 |   >
 62 |     Implement dial-in using Daily's WebRTC
 63 |   </Card>
 64 |   <Card
 65 |     title="Twilio + Daily Integration"
 66 |     icon="phone-volume"
 67 |     href="/guides/telephony/twilio-daily-webrtc"
 68 |   >
 69 |     Combine Twilio and Daily for advanced telephony
 70 |   </Card>
 71 |   <Card
 72 |     title="WebSockets with Twilio"
 73 |     icon="plug"
 74 |     href="/guides/telephony/twilio-websockets"
 75 |   >
 76 |     Using WebSockets for Twilio integration
 77 |   </Card>
 78 |   <Card
 79 |     title="Dialout Capabilities"
 80 |     icon="phone-arrow-up-right"
 81 |     href="/guides/telephony/dialout"
 82 |   >
 83 |     Enable outbound calling with Daily
 84 |   </Card>
 85 | </CardGroup>
 86 | 
 87 | ## Deployment
 88 | 
 89 | <CardGroup cols={2}>
 90 |   <Card title="Overview" icon="book-open" href="/guides/deployment/overview">
 91 |     Learn the basics of deploying Pipecat applications
 92 |   </Card>
 93 |   <Card
 94 |     title="Deployment Patterns"
 95 |     icon="sitemap"
 96 |     href="/guides/deployment/pattern"
 97 |   >
 98 |     Common architectures and deployment strategies
 99 |   </Card>
100 |   <Card
101 |     title="Deploying to Pipecat Cloud"
102 |     icon="cat"
103 |     href="/guides/deployment/pipecat-cloud"
104 |   >
105 |     Step-by-step guide for deploying to Pipecat Cloud
106 |   </Card>
107 |   <Card title="Deploying to Fly.io" icon="cloud" href="/guides/deployment/fly">
108 |     Step-by-step guide for deploying to Fly.io
109 |   </Card>
110 |   <Card
111 |     title="Deploying to Cerebrium"
112 |     icon="server"
113 |     href="/guides/deployment/cerebrium"
114 |   >
115 |     Deploy your application on Cerebrium
116 |   </Card>
117 | </CardGroup>
118 | 
119 | ## New to Pipecat?
120 | 
121 | <Card title="Get started" icon="rocket" href="/getting-started/installation">
122 |   Check out our Getting Started guide to build your first application
123 | </Card>
124 | 


--------------------------------------------------------------------------------
/guides/telephony/dialout.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Dialout: WebRTC (Daily)"
 3 | description: "Learn how to preview changes locally"
 4 | ---
 5 | 
 6 | <Note>
 7 |   We're working on this page and will update it soon. In the meantime, you can
 8 |   follow the instructions in the [Quickstart](/getting-started/quickstart) guide
 9 |   to get started.
10 | </Note>
11 | 


--------------------------------------------------------------------------------
/guides/telephony/images/twilio-webhook-setup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/guides/telephony/images/twilio-webhook-setup.png


--------------------------------------------------------------------------------
/guides/telephony/overview.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Overview"
 3 | description: "How to call your bots (or have them call you)"
 4 | ---
 5 | 
 6 | ## Introduction
 7 | 
 8 | You can dial-in to your Pipecat bots, and have them dial-out too, across both PSTN and SIP. The technical implementation will depend on your chosen transport and phone number vendor; each will likely have their own methods and events to consider.
 9 | 
10 | ## Which transport should I use?
11 | 
12 | This really depends on your project. We have examples that cover both WebRTC (Daily) and Twilio (WebSockets), and Pipecat supports multiple different types of media transport: local, WebSockets, WebRTC etc.
13 | 
14 | - Use Pipecat's native Twilio WebSockets integration for simple workflows that are only telephony-based.
15 | 
16 |   - The call is managed by Twilio (or similar telephony provider), which means that the bot is not able to perform any form of complex call control. Typically Twilio-specific APIs need to be implemented (for example, when you’re already using Twilio Studio, Twilio Flex, etc).
17 |   - We strongly recommend against using WebSockets for non-telephony use cases (mobile apps, web browsers, etc.). See below.
18 | 
19 | - You must use SIP for use cases like the below. These require SIP-based call control:
20 | 
21 |   - Multi-agents or multi-party calls
22 |   - Connect to legacy call centers powered by open source or cloud
23 |   - Forwarding calls, agent assist/co-pilot, warm transfers, etc.
24 |   - Supporting different telephony vendors without having telephony platform-specific code
25 | 
26 | - We strongly recommend using WebRTC for non telephony use cases — ie, mobile apps, web-based experiences. WebRTC is designed to support users on devices with varying network conditions at scale. Learn more in the Voice AI & Voice Agents Illustrated Primer [here](https://voiceaiandvoiceagents.com/#websockets-webrtc)
27 | 
28 | <Note>
29 |   **Please note:** you can configure your Pipecat bots to handle multiple
30 |   vendors. You could,for example, use both Daily and Twilio as phone number
31 |   vendors concurrently.
32 | </Note>
33 | 
34 | ## What are PSTN and SIP? What are the differences?
35 | 
36 | PSTN is an abbreviation for traditional phone networks, consisting of the physical phone lines, cables and transmission links. One of the main differences to consider between these two forms of telephony is PSTN operates on a one user per line basis while SIP can have multiple users per line.
37 | 
38 | Depending on your use-case, you may or may not want to have a single phone number that routes users to a specific bot session, something we'll cover in the following guides.
39 | 


--------------------------------------------------------------------------------
/images/allow-krisp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/images/allow-krisp.png


--------------------------------------------------------------------------------
/images/architecture-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/images/architecture-1.png


--------------------------------------------------------------------------------
/images/architecture-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/images/architecture-2.png


--------------------------------------------------------------------------------
/images/checks-passed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/images/checks-passed.png


--------------------------------------------------------------------------------
/images/food-ordering-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/images/food-ordering-flow.png


--------------------------------------------------------------------------------
/images/gemini-client-final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/images/gemini-client-final.png


--------------------------------------------------------------------------------
/images/krisp-portal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/images/krisp-portal.png


--------------------------------------------------------------------------------
/images/open-anyway-krisp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/images/open-anyway-krisp.png


--------------------------------------------------------------------------------
/images/openai-cascade.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/images/openai-cascade.jpg


--------------------------------------------------------------------------------
/images/openai-s2s.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/images/openai-s2s.jpg


--------------------------------------------------------------------------------
/images/openai-twilio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/images/openai-twilio.png


--------------------------------------------------------------------------------
/logo/pipecat-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/logo/pipecat-logo.png


--------------------------------------------------------------------------------
/logo/pipecat-logo.svg:
--------------------------------------------------------------------------------
1 | <svg width="332" height="332" viewBox="0 0 332 332" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <rect width="332" height="332" fill="white"/>
3 | <path d="M45.7718 70.7701C50.4477 69.0096 55.7252 70.3307 59.0204 74.0864L101.936 123.001H230.064L272.98 74.0864C276.275 70.3307 281.552 69.0096 286.228 70.7701C290.904 72.5306 294 77.0042 294 82.0005V190H332V214H270V113.873L244.52 142.915C242.242 145.512 238.955 147.001 235.5 147.001H96.5C93.0452 147.001 89.7581 145.512 87.4796 142.915L62 113.873V214H0V190H38V82.0005C38 77.0042 41.0958 72.5306 45.7718 70.7701Z" fill="black"/>
4 | <path d="M270 238.001H332V262.001H270V238.001Z" fill="black"/>
5 | <path d="M0 238.001H62V262.001H0V238.001Z" fill="black"/>
6 | <path d="M128 198.001C128 206.837 120.837 214.001 112 214.001C103.163 214.001 96 206.837 96 198.001C96 189.164 103.163 182.001 112 182.001C120.837 182.001 128 189.164 128 198.001Z" fill="black"/>
7 | <path d="M236 198.001C236 206.837 228.837 214.001 220 214.001C211.163 214.001 204 206.837 204 198.001C204 189.164 211.163 182.001 220 182.001C228.837 182.001 236 189.164 236 198.001Z" fill="black"/>
8 | </svg>
9 | 


--------------------------------------------------------------------------------
/logo/pipecat-social.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/logo/pipecat-social.png


--------------------------------------------------------------------------------
/pipecat-docs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pipecat-ai/docs/80b59d5354ea682610363f70246104b5cb731a91/pipecat-docs.png


--------------------------------------------------------------------------------
/server/frameworks/rtvi/introduction.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "RTVI (Real-Time Voice Interaction)"
  3 | sidebarTitle: "Overview"
  4 | description: "Build real-time voice and multimodal applications with Pipecat’s RTVI protocol"
  5 | ---
  6 | 
  7 | Pipecat's RTVI (Real-Time Voice Interaction) protocol provides a standardized communication layer between clients and servers for building real-time voice and multimodal applications. It handles the synchronization of user and bot interactions, transcriptions, LLM processing, and text-to-speech delivery.
  8 | 
  9 | <CardGroup cols={2}>
 10 |   <Card title="Speaking States" icon="microphone">
 11 |     Track when users and bots start/stop speaking for natural turn-taking
 12 |   </Card>
 13 | 
 14 | <Card title="Transcription" icon="closed-captioning">
 15 |   Handle real-time transcriptions from both users and bots
 16 | </Card>
 17 | 
 18 | <Card title="LLM Processing" icon="brain">
 19 |   Manage LLM responses and function calls with proper client notifications
 20 | </Card>
 21 | 
 22 |   <Card title="TTS Management" icon="waveform-lines">
 23 |     Control text-to-speech state and audio delivery
 24 |   </Card>
 25 | </CardGroup>
 26 | 
 27 | ## Architecture
 28 | 
 29 | RTVI operates with two primary components:
 30 | 
 31 | 1. **RTVIProcessor** - A frame processor residing in the pipeline that serves as the entry point for sending and receiving messages to/from the client.
 32 | 
 33 | 2. **RTVIObserver** - An observer that monitors pipeline events and translates them into client-compatible messages, handling:
 34 |    - Speaking state changes
 35 |    - Transcription updates
 36 |    - LLM responses
 37 |    - TTS events
 38 |    - Performance metrics
 39 | 
 40 | ## Basic Example
 41 | 
 42 | Here's how to set up RTVI in your Pipecat application:
 43 | 
 44 | ```python
 45 | from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
 46 | 
 47 | # Create the RTVI processor
 48 | rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
 49 | 
 50 | # Include the RTVIProcessor in your pipeline
 51 | pipeline = Pipeline(
 52 |     [
 53 |         transport.input(),
 54 |         rtvi,
 55 |         stt,
 56 |         context_aggregator.user(),
 57 |         llm,
 58 |         tts,
 59 |         transport.output(),
 60 |         context_aggregator.assistant(),
 61 |     ]
 62 | )
 63 | 
 64 | # Add the RTVIObserver to your pipeline task
 65 | task = PipelineTask(
 66 |     pipeline,
 67 |     params=PipelineParams(
 68 |         allow_interruptions=True,
 69 |         enable_metrics=True,
 70 |     ),
 71 |     observers=[RTVIObserver(rtvi)],
 72 | )
 73 | 
 74 | # Handle client connection
 75 | @rtvi.event_handler("on_client_ready")
 76 | async def on_client_ready(rtvi):
 77 |     # Signal bot is ready to receive messages
 78 |     await rtvi.set_bot_ready()
 79 |     # Initialize the conversation
 80 |     await task.queue_frames([context_aggregator.user().get_context_frame()])
 81 | 
 82 | # Handle participant disconnection
 83 | @transport.event_handler("on_participant_left")
 84 | async def on_participant_left(transport, participant, reason):
 85 |     await task.cancel()
 86 | 
 87 | # Run the pipeline
 88 | runner = PipelineRunner()
 89 | await runner.run(task)
 90 | ```
 91 | 
 92 | ## Protocol Flow
 93 | 
 94 | 1. Client connects and sends a `client-ready` message
 95 | 2. Server responds with `bot-ready` and initial configuration
 96 | 3. Client and server exchange real-time events:
 97 |    - Speaking state changes (`user/bot-started/stopped-speaking`)
 98 |    - Transcriptions (`user/bot-transcription`)
 99 |    - LLM processing (`bot-llm-text`, `llm-function-call`)
100 |    - TTS events (`bot-tts-text`, `bot-tts-audio`)
101 | 
102 | ## Key Components
103 | 
104 | <CardGroup cols={2}>
105 |   <Card 
106 |     title="RTVIProcessor" 
107 |     icon="gear"
108 |     href="/server/frameworks/rtvi/rtvi-processor">
109 |     Configure and manage RTVI services, actions, and client communication
110 |   </Card>
111 | 
112 |   <Card 
113 |     title="RTVIObserver" 
114 |     icon="magnifying-glass"
115 |     href="/server/frameworks/rtvi/rtvi-observer">
116 |     Translate internal pipeline events to standardized client messages
117 |   </Card>
118 | </CardGroup>
119 | 
120 | ## Client Integration
121 | 
122 | RTVI is implemented in Pipecat client SDKs, providing a high-level API to interact with the protocol. Visit the Pipecat Client SDKs documentation:
123 | 
124 | <Card title="Client SDKs" icon="mobile-screen" href="/client/introduction">
125 |   Learn how to implement RTVI on the client-side with our JavaScript, React, and
126 |   mobile SDKs
127 | </Card>
128 | 


--------------------------------------------------------------------------------
/server/frameworks/rtvi/rtvi-observer.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "RTVI Observer"
 3 | description: "Converting pipeline frames to RTVI protocol messages"
 4 | ---
 5 | 
 6 | The `RTVIObserver` translates Pipecat's internal pipeline events into standardized RTVI protocol messages. It monitors frame flow through the pipeline and generates corresponding client messages based on event types.
 7 | 
 8 | ## Purpose
 9 | 
10 | The `RTVIObserver` serves two main functions:
11 | 
12 | 1. Converting internal pipeline frames to client-compatible RTVI messages
13 | 2. Managing aggregated state for multi-frame events (like bot transcriptions)
14 | 
15 | ## Adding to a Pipeline
16 | 
17 | The observer is attached to a pipeline task along with the RTVI processor:
18 | 
19 | ```python
20 | # Create the RTVIProcessor
21 | rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
22 | 
23 | # Add to pipeline
24 | pipeline = Pipeline([
25 |     transport.input(),
26 |     rtvi,
27 |     # Other processors...
28 | ])
29 | 
30 | # Create pipeline task with observer
31 | task = PipelineTask(
32 |     pipeline,
33 |     params=PipelineParams(allow_interruptions=True),
34 |     observers=[RTVIObserver(rtvi)],  # Add the observer here
35 | )
36 | ```
37 | 
38 | ## Frame Translation
39 | 
40 | The observer maps Pipecat's internal frames to RTVI protocol messages:
41 | 
42 | | Pipeline Frame              | RTVI Message                                |
43 | | --------------------------- | ------------------------------------------- |
44 | | **Speech Events**           |
45 | | `UserStartedSpeakingFrame`  | `RTVIUserStartedSpeakingMessage`            |
46 | | `UserStoppedSpeakingFrame`  | `RTVIUserStoppedSpeakingMessage`            |
47 | | `BotStartedSpeakingFrame`   | `RTVIBotStartedSpeakingMessage`             |
48 | | `BotStoppedSpeakingFrame`   | `RTVIBotStoppedSpeakingMessage`             |
49 | | **Transcription**           |
50 | | `TranscriptionFrame`        | `RTVIUserTranscriptionMessage(final=true)`  |
51 | | `InterimTranscriptionFrame` | `RTVIUserTranscriptionMessage(final=false)` |
52 | | **LLM Processing**          |
53 | | `LLMFullResponseStartFrame` | `RTVIBotLLMStartedMessage`                  |
54 | | `LLMFullResponseEndFrame`   | `RTVIBotLLMStoppedMessage`                  |
55 | | `LLMTextFrame`              | `RTVIBotLLMTextMessage`                     |
56 | | **TTS Events**              |
57 | | `TTSStartedFrame`           | `RTVIBotTTSStartedMessage`                  |
58 | | `TTSStoppedFrame`           | `RTVIBotTTSStoppedMessage`                  |
59 | | `TTSTextFrame`              | `RTVIBotTTSTextMessage`                     |
60 | | **Context/Metrics**         |
61 | | `OpenAILLMContextFrame`     | `RTVIUserLLMTextMessage`                    |
62 | | `MetricsFrame`              | `RTVIMetricsMessage`                        |
63 | | `RTVIServerMessageFrame`    | `RTVIServerMessage`                         |
64 | 


--------------------------------------------------------------------------------
/server/introduction.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Server API Reference"
 3 | description: "Complete reference for Pipecat’s Python server APIs and services"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | Pipecat's server-side functionality is organized into three main categories:
 9 | 
10 | <CardGroup cols={3}>
11 |   <Card
12 |     title="Services"
13 |     icon="puzzle-piece"
14 |     href="/server/services/supported-services"
15 |   >
16 |     AI service integrations for speech, language, vision, and more
17 |   </Card>
18 |   <Card title="Frameworks" icon="puzzle" href="/server/frameworks">
19 |     > User RTVI for client/server communication or Pipecat Flows for building structured conversations
20 |   </Card>
21 | 
22 |   <Card title="Utilities" icon="wrench" href="/server/utilities">
23 |     Helper functions and tools for audio, filtering, and flows
24 |   </Card>
25 | </CardGroup>
26 | 
27 | ## Service Categories
28 | 
29 | Pipecat integrates with various AI services across different categories:
30 | 
31 | <CardGroup cols={2}>
32 |   <Card title="Transport" icon="network-wired">
33 |     WebRTC and WebSocket implementations for real-time communication
34 |   </Card>
35 |   <Card title="Speech Processing" icon="waveform-lines">
36 |     Speech-to-text, text-to-speech, and speech-to-speech services
37 |   </Card>
38 |   <Card title="Language Models" icon="brain">
39 |     Integration with various LLM providers
40 |   </Card>
41 |   <Card title="Vision & Media" icon="image">
42 |     Image generation, video processing, and computer vision
43 |   </Card>
44 | </CardGroup>
45 | 
46 | ## Getting Started
47 | 
48 | 1. Browse our [Supported Services](/server/services/supported-services) to see available integrations
49 | 2. Install required dependencies for your chosen services
50 | 3. Reference individual service docs for detailed configuration options
51 | 
52 | ## Example Usage
53 | 
54 | ```python
55 | from pipecat.audio.vad.silero import SileroVADAnalyzer
56 | from pipecat.pipeline import Pipeline
57 | from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
58 | from pipecat.services.cartesia.tts import CartesiaTTSService
59 | from pipecat.services.deepgram.stt import DeepgramSTTService
60 | from pipecat.services.openai.llm import OpenAILLMService
61 | from pipecat.transports.services.daily import DailyParams, DailyTransport
62 | 
63 | transport = DailyTransport(
64 |     room_url,
65 |     token,
66 |     "Respond bot",
67 |     DailyParams(
68 |         audio_in_enabled=True,
69 |         audio_out_enabled=True,
70 |         vad_analyzer=SileroVADAnalyzer(),
71 |     ),
72 | )
73 | 
74 | # Configure services
75 | stt = DeepgramSTTService(api_key=KEY)
76 | llm = OpenAILLMService(api_key=KEY, model="gpt-4o")
77 | tts = CartesiaTTSService(api_key=KEY, voice_id=ID)
78 | 
79 | # Create context and aggregators
80 | context = OpenAILLMContext(
81 |     messages=[{"role": "system", "content": "You are a helpful assistant."}]
82 | )
83 | context_aggregator = llm.create_context_aggregator(context)
84 | 
85 | # Create pipeline
86 | pipeline = Pipeline([
87 |     transport.input(),
88 |     stt,
89 |     context_aggregator.user(),
90 |     llm,
91 |     tts,
92 |     transport.output(),
93 |     context_aggregator,assistant()
94 | ])
95 | ```
96 | 


--------------------------------------------------------------------------------
/server/links/server-reference.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Reference docs"
3 | url: "https://pipecat-docs.readthedocs.io/"
4 | icon: "book"
5 | ---
6 | 


--------------------------------------------------------------------------------
/server/pipeline/heartbeats.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Pipeline Heartbeats"
 3 | description: "Monitor pipeline health with heartbeat frames"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | Pipeline heartbeats provide a way to monitor the health of your pipeline by sending periodic heartbeat frames through the system. When enabled, the pipeline will send heartbeat frames every second and monitor their progress through the pipeline.
 9 | 
10 | ## Enabling Heartbeats
11 | 
12 | Heartbeats can be enabled by setting `enable_heartbeats` to `True` in the `PipelineParams`:
13 | 
14 | ```python
15 | from pipecat.pipeline.task import PipelineParams, PipelineTask
16 | 
17 | pipeline = Pipeline([...])
18 | params = params=PipelineParams(enable_heartbeats=True)
19 | task = PipelineTask(pipeline, params)
20 | ```
21 | 
22 | ## How It Works
23 | 
24 | When heartbeats are enabled:
25 | 
26 | 1. The pipeline sends a `HeartbeatFrame` every second
27 | 2. The frame traverses through all processors in the pipeline, from source to sink
28 | 3. The pipeline monitors how long it takes for heartbeat frames to complete their journey
29 | 4. If a heartbeat frame isn't received within 5 seconds, a warning is logged
30 | 
31 | ## Monitoring Output
32 | 
33 | The system will log:
34 | 
35 | - Trace-level logs showing heartbeat processing time
36 | - Warning messages if heartbeats aren't received within the monitoring window
37 | 
38 | Example warning message:
39 | 
40 | ```
41 | WARNING    PipelineTask#1: heartbeat frame not received for more than 5.0 seconds
42 | ```
43 | 
44 | ## Use Cases
45 | 
46 | Heartbeat monitoring is useful for:
47 | 
48 | - Detecting pipeline stalls or blockages
49 | - Monitoring processing latency through the pipeline
50 | - Identifying performance issues in specific processors
51 | - Ensuring the pipeline remains responsive
52 | 
53 | ## Configuration
54 | 
55 | The heartbeat system uses two key timing constants:
56 | 
57 | - `HEARTBEAT_SECONDS = 1.0` - Interval between heartbeat frames
58 | - `HEARTBEAT_MONITOR_SECONDS = 10.0` - Time before warning if no heartbeat received
59 | 
60 | <Note>
61 |   These values are currently fixed but may be configurable in future versions.
62 | </Note>
63 | 


--------------------------------------------------------------------------------
/server/pipeline/parallel-pipeline.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "ParallelPipeline"
  3 | description: "Run multiple pipeline branches in parallel, with synchronized inputs and outputs for complex flows"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `ParallelPipeline` allows you to create multiple independent processing branches that run simultaneously, sharing input and coordinating output. It's particularly useful for multi-agent systems, parallel stream processing, and creating redundant service paths.
  9 | 
 10 | Each branch receives the same downstream frames, processes them independently, and the results are merged back into a single stream. System frames (like `StartFrame` and `EndFrame`) are synchronized across all branches.
 11 | 
 12 | ## Constructor Parameters
 13 | 
 14 | <ParamField path="*args" type="List[List[FrameProcessor]]" required>
 15 |   Multiple lists of processors, where each list defines a parallel branch. All
 16 |   branches execute simultaneously when frames flow through the pipeline.
 17 | </ParamField>
 18 | 
 19 | ## Usage Examples
 20 | 
 21 | ### Multi-Agent Conversation
 22 | 
 23 | Create a conversation with two AI agents that can interact with the user independently:
 24 | 
 25 | ```python
 26 | pipeline = Pipeline([
 27 |     transport.input(),
 28 |     ParallelPipeline(
 29 |         # Agent 1: Customer service representative
 30 |         [
 31 |             stt_1,
 32 |             context_aggregator.user_a(),
 33 |             llm_agent_1,
 34 |             tts_agent_1,
 35 |         ],
 36 |         # Agent 2: Technical specialist
 37 |         [   stt_2,
 38 |             context_aggregator.user_b(),
 39 |             llm_agent_2,
 40 |             tts_agent_2,
 41 |         ]
 42 |     ),
 43 |     transport.output(),
 44 | ])
 45 | ```
 46 | 
 47 | ### Redundant Services with Failover
 48 | 
 49 | Set up redundant services with automatic failover:
 50 | 
 51 | ```python
 52 | pipeline = Pipeline([
 53 |     transport.input(),
 54 |     stt,
 55 |     ParallelPipeline(
 56 |         # Primary LLM service
 57 |         [   gate_primary,
 58 |             primary_llm,
 59 |             error_detector,
 60 |         ],
 61 |         # Backup LLM service (used only if primary fails)
 62 |         [   gate_backup,
 63 |             backup_llm,
 64 |             fallback_processor,
 65 |         ]
 66 |     ),
 67 |     tts,
 68 |     transport.output(),
 69 | ])
 70 | ```
 71 | 
 72 | ### Cross-Branch Communication
 73 | 
 74 | Using Producer/Consumer processors to share data between branches:
 75 | 
 76 | ```python
 77 | # Create producer/consumer pair for cross-branch communication
 78 | frame_producer = ProducerProcessor(filter=is_important_frame)
 79 | frame_consumer = ConsumerProcessor(producer=frame_producer)
 80 | 
 81 | pipeline = Pipeline([
 82 |     transport.input(),
 83 |     ParallelPipeline(
 84 |         # Branch that generates important frames
 85 |         [
 86 |             stt,
 87 |             llm,
 88 |             tts,
 89 |             frame_producer,  # Share frames with other branch
 90 |         ],
 91 |         # Branch that consumes those frames
 92 |         [
 93 |             frame_consumer,  # Receive frames from other branch
 94 |             llm,             # Speech to Speech LLM (audio in)
 95 |         ]
 96 |     ),
 97 |     transport.output(),
 98 | ])
 99 | ```
100 | 
101 | ## How It Works
102 | 
103 | 1. `ParallelPipeline` adds special source and sink processors to each branch
104 | 2. System frames (like `StartFrame` and `EndFrame`) are sent to all branches
105 | 3. Other frames flow downstream to all branch sources
106 | 4. Results from each branch are collected at the sinks
107 | 5. The pipeline ensures `EndFrame`s are only passed through after all branches complete
108 | 


--------------------------------------------------------------------------------
/server/pipeline/pipeline-idle-detection.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Pipeline Idle Detection"
  3 | description: "Automatically detect and handle idle pipelines with no bot activity"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | Pipeline idle detection is a feature that monitors activity in your pipeline and can automatically cancel tasks when no meaningful bot interactions are occurring. This helps prevent pipelines from running indefinitely when a conversation has naturally ended but wasn't properly terminated.
  9 | 
 10 | ## How It Works
 11 | 
 12 | The system monitors specific "activity frames" that indicate the bot is actively engaged in the conversation. By default, these are:
 13 | 
 14 | - `BotSpeakingFrame` - When the bot is speaking
 15 | - `LLMFullResponseEndFrame` - When the LLM has completed a response
 16 | 
 17 | If no activity frames are detected within the configured timeout period (5 minutes by default), the system considers the pipeline idle and can automatically terminate it.
 18 | 
 19 | <Note>
 20 |   Idle detection only starts after the pipeline has begun processing frames. The
 21 |   idle timer resets whenever an activity frame (as specified in
 22 |   `idle_timeout_frames`) is received.
 23 | </Note>
 24 | 
 25 | ## Configuration
 26 | 
 27 | You can configure idle detection behavior when creating a `PipelineTask`:
 28 | 
 29 | ```python
 30 | from pipecat.pipeline.task import PipelineParams, PipelineTask
 31 | 
 32 | # Default configuration - cancel after 5 minutes of inactivity
 33 | task = PipelineTask(pipeline)
 34 | 
 35 | # Custom configuration
 36 | task = PipelineTask(
 37 |     pipeline,
 38 |     params=PipelineParams(allow_interruptions=True),
 39 |     idle_timeout_secs=600,  # 10 minute timeout
 40 |     idle_timeout_frames=(BotSpeakingFrame,),  # Only monitor bot speaking
 41 |     cancel_on_idle_timeout=False,  # Don't auto-cancel, just notify
 42 | )
 43 | ```
 44 | 
 45 | ## Configuration Parameters
 46 | 
 47 | <ParamField path="idle_timeout_secs" type="Optional[float]" default="300">
 48 |   Timeout in seconds before considering the pipeline idle. Set to `None` to
 49 |   disable idle detection.
 50 | </ParamField>
 51 | 
 52 | <ParamField
 53 |   path="idle_timeout_frames"
 54 |   type="Tuple[Type[Frame], ...]"
 55 |   default="(BotSpeakingFrame, LLMFullResponseEndFrame)"
 56 | >
 57 |   Frame types that should prevent the pipeline from being considered idle.
 58 | </ParamField>
 59 | 
 60 | <ParamField path="cancel_on_idle_timeout" type="bool" default="True">
 61 |   Whether to automatically cancel the pipeline task when idle timeout is
 62 |   reached.
 63 | </ParamField>
 64 | 
 65 | ## Handling Idle Timeouts
 66 | 
 67 | You can respond to idle timeout events by adding an event handler:
 68 | 
 69 | ```python
 70 | @task.event_handler("on_idle_timeout")
 71 | async def on_idle_timeout(task):
 72 |     logger.info("Pipeline has been idle for too long")
 73 |     # Perform any custom cleanup or logging
 74 |     # Note: If cancel_on_idle_timeout=True, the pipeline will be cancelled after this handler runs
 75 | ```
 76 | 
 77 | ## Example Implementation
 78 | 
 79 | Here's a complete example showing how to configure idle detection with custom handling:
 80 | 
 81 | ```python
 82 | from pipecat.frames.frames import BotSpeakingFrame, LLMFullResponseEndFrame, TTSSpeakFrame
 83 | from pipecat.pipeline.runner import PipelineRunner
 84 | from pipecat.pipeline.task import PipelineParams, PipelineTask
 85 | 
 86 | # Create pipeline
 87 | pipeline = Pipeline([...])
 88 | 
 89 | # Configure task with custom idle settings
 90 | task = PipelineTask(
 91 |     pipeline,
 92 |     params=PipelineParams(allow_interruptions=True),
 93 |     idle_timeout_secs=180,  # 3 minutes
 94 |     cancel_on_idle_timeout=False  # Don't auto-cancel
 95 | )
 96 | 
 97 | # Add event handler for idle timeout
 98 | @task.event_handler("on_idle_timeout")
 99 | async def on_idle_timeout(task):
100 |     logger.info("Conversation has been idle for 3 minutes")
101 | 
102 |     # Add a farewell message
103 |     await task.queue_frame(TTSSpeakFrame("I haven't heard from you in a while. Goodbye!"))
104 | 
105 |     # Then end the conversation gracefully
106 |     await task.stop_when_done()
107 | 
108 | runner = PipelineRunner()
109 | 
110 | await runner.run(task)
111 | ```
112 | 


--------------------------------------------------------------------------------
/server/services/analytics/sentry.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Sentry Metrics"
  3 | description: "Performance monitoring integration with Sentry for Pipecat frame processors"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `SentryMetrics` extends `FrameProcessorMetrics` to provide performance monitoring integration with Sentry. It tracks Time to First Byte (TTFB) and processing duration metrics for frame processors.
  9 | 
 10 | ## Installation
 11 | 
 12 | To use Sentry metrics, install the Sentry SDK:
 13 | 
 14 | ```bash
 15 | pip install "pipecat-ai[sentry]"
 16 | ```
 17 | 
 18 | ## Configuration
 19 | 
 20 | Sentry must be initialized in your application before metrics will be collected:
 21 | 
 22 | ```python
 23 | import sentry_sdk
 24 | 
 25 | sentry_sdk.init(
 26 |     dsn="your-sentry-dsn",
 27 |     traces_sample_rate=1.0,
 28 | )
 29 | ```
 30 | 
 31 | ## Usage Example
 32 | 
 33 | ```python
 34 | import sentry_sdk
 35 | 
 36 | from pipecat.services.openai.llm import OpenAILLMService
 37 | from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
 38 | from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 39 | from pipecat.processors.metrics.sentry import SentryMetrics
 40 | from pipecat.transports.services.daily import DailyParams, DailyTransport
 41 | 
 42 | async def create_metrics_pipeline():
 43 |     sentry_sdk.init(
 44 |         dsn="your-sentry-dsn",
 45 |         traces_sample_rate=1.0,
 46 |     )
 47 | 
 48 |     transport = DailyTransport(
 49 |         room_url,
 50 |         token,
 51 |         "Chatbot",
 52 |         DailyParams(
 53 |             audio_out_enabled=True,
 54 |             audio_in_enabled=True,
 55 |             video_out_enabled=False,
 56 |             vad_analyzer=SileroVADAnalyzer(),
 57 |             transcription_enabled=True,
 58 |         ),
 59 |     )
 60 | 
 61 |     tts = ElevenLabsTTSService(
 62 |         api_key=os.getenv("ELEVENLABS_API_KEY"),
 63 |         metrics = SentryMetrics(),
 64 |     )
 65 | 
 66 |     llm = OpenAILLMService(
 67 |         api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o"),
 68 |         metrics = SentryMetrics(),
 69 |     )
 70 | 
 71 |     messages = [
 72 |         {
 73 |             "role": "system",
 74 |             "content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by introducing yourself. Keep all your responses to 12 words or fewer.",
 75 |         },
 76 |     ]
 77 | 
 78 |     context = OpenAILLMContext(messages)
 79 |     context_aggregator = llm.create_context_aggregator(context)
 80 | 
 81 |     # Use in pipeline
 82 |     pipeline = Pipeline([
 83 |         transport.input(),
 84 |         context_aggregator.user(),
 85 |         llm,
 86 |         tts,
 87 |         transport.output(),
 88 |         context_aggregator.assistant(),
 89 |     ])
 90 | ```
 91 | 
 92 | ## Transaction Information
 93 | 
 94 | Each transaction includes:
 95 | 
 96 | - Operation type (`ttfb` or `processing`)
 97 | - Description with processor name
 98 | - Start timestamp
 99 | - End timestamp
100 | - Unique transaction ID
101 | 
102 | ## Fallback Behavior
103 | 
104 | If Sentry is not available (not installed or not initialized):
105 | 
106 | - Warning logs are generated
107 | - Metric methods execute without error
108 | - No data is sent to Sentry
109 | 
110 | ## Notes
111 | 
112 | - Requires Sentry SDK to be installed and initialized
113 | - Thread-safe metric collection
114 | - Automatic transaction management
115 | - Supports selective TTFB reporting
116 | - Integrates with Sentry's performance monitoring
117 | - Provides detailed timing information
118 | - Maintains timing data even when Sentry is unavailable
119 | 


--------------------------------------------------------------------------------
/server/services/image-generation/google-imagen.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Google Imagen"
  3 | description: "Image generation service implementation using Google’s Imagen models"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `GoogleImageGenService` provides high-quality image generation capabilities using Google's Imagen models. It supports generating multiple images from text prompts with various customization options.
  9 | 
 10 | ## Installation
 11 | 
 12 | To use `GoogleImageGenService`, install the required dependencies:
 13 | 
 14 | ```bash
 15 | pip install "pipecat-ai[google]"
 16 | ```
 17 | 
 18 | You'll also need to set up your Google API key as an environment variable: `GOOGLE_API_KEY`
 19 | 
 20 | ## Configuration
 21 | 
 22 | ### Constructor Parameters
 23 | 
 24 | <ParamField path="params" type="InputParams" default="InputParams()">
 25 |   Generation parameters configuration
 26 | </ParamField>
 27 | 
 28 | <ParamField path="api_key" type="str" required>
 29 |   Google API key for authentication
 30 | </ParamField>
 31 | 
 32 | ### Input Parameters
 33 | 
 34 | <ParamField path="number_of_images" type="int" default="1">
 35 |   Number of images to generate (1-8)
 36 | </ParamField>
 37 | 
 38 | <ParamField path="model" type="str" default="imagen-3.0-generate-002">
 39 |   Model identifier
 40 | </ParamField>
 41 | 
 42 | <ParamField path="negative_prompt" type="str" default="None">
 43 |   Elements to exclude from generation
 44 | </ParamField>
 45 | 
 46 | ## Input
 47 | 
 48 | The service accepts text prompts through its image generation pipeline.
 49 | 
 50 | ## Output Frames
 51 | 
 52 | ### URLImageRawFrame
 53 | 
 54 | <ParamField path="url" type="string">
 55 |   Generated image URL (null for Google implementation as it returns raw bytes)
 56 | </ParamField>
 57 | 
 58 | <ParamField path="image" type="bytes">
 59 |   Raw image data
 60 | </ParamField>
 61 | 
 62 | <ParamField path="size" type="tuple">
 63 |   Image dimensions (width, height)
 64 | </ParamField>
 65 | 
 66 | <ParamField path="format" type="string">
 67 |   Image format (e.g., 'JPEG')
 68 | </ParamField>
 69 | 
 70 | ### ErrorFrame
 71 | 
 72 | <ParamField path="error" type="string">
 73 |   Error information if generation fails
 74 | </ParamField>
 75 | 
 76 | ## Usage Example
 77 | 
 78 | ```python
 79 | from pipecat.services.google.image import GoogleImageGenService
 80 | 
 81 | # Configure service
 82 | image_gen = GoogleImageGenService(
 83 |     api_key="your-google-api-key",
 84 |     params=GoogleImageGenService.InputParams(
 85 |         number_of_images=2,
 86 |         model="imagen-3.0-generate-002",
 87 |         negative_prompt="blurry, distorted, low quality"
 88 |     )
 89 | )
 90 | 
 91 | # Use in pipeline
 92 | main_pipeline = Pipeline(
 93 |     [
 94 |         transport.input(),
 95 |         context_aggregator.user(),
 96 |         llm_service,
 97 |         image_gen,
 98 |         tts_service,
 99 |         transport.output(),
100 |         context_aggregator.assistant(),
101 |     ]
102 | )
103 | ```
104 | 
105 | ## Frame Flow
106 | 
107 | ```mermaid
108 | graph TD
109 |     A[TextFrame] --> B[GoogleImageGenService]
110 |     B --> C[URLImageRawFrame]
111 |     B --> D[ErrorFrame]
112 | ```
113 | 
114 | ## Metrics Support
115 | 
116 | The service supports metrics collection:
117 | 
118 | - Time to First Byte (TTFB)
119 | - Processing duration
120 | - API response metrics
121 | 
122 | ## Model Support
123 | 
124 | Google's Imagen service offers different model variants:
125 | 
126 | | Model ID                | Description                                   |
127 | | ----------------------- | --------------------------------------------- |
128 | | imagen-3.0-generate-002 | Latest Imagen model with high-quality outputs |
129 | 
130 | See other available models in [Google's Imagen documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/imagen-api).
131 | 
132 | ## Error Handling
133 | 
134 | ```python
135 | try:
136 |     async for frame in service.run_image_gen(prompt):
137 |         if isinstance(frame, ErrorFrame):
138 |             handle_error(frame.error)
139 | except Exception as e:
140 |     logger.error(f"Image generation error: {e}")
141 | ```
142 | 


--------------------------------------------------------------------------------
/server/services/serializers/exotel.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "ExotelFrameSerializer"
  3 | description: "Serializer for Exotel WebSocket media streaming protocol"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `ExotelFrameSerializer` enables integration with Exotel's WebSocket media streaming protocol, allowing your Pipecat application to handle phone calls via Exotel's voice services.
  9 | 
 10 | ## Features
 11 | 
 12 | - Bidirectional audio conversion between Pipecat and Exotel
 13 | - DTMF (touch-tone) event handling
 14 | 
 15 | ## Installation
 16 | 
 17 | The `ExotelFrameSerializer` does not require any additional dependencies beyond the core Pipecat library.
 18 | 
 19 | ## Configuration
 20 | 
 21 | ### Constructor Parameters
 22 | 
 23 | <ParamField path="stream_id" type="str" required>
 24 |   The Stream ID for Exotel
 25 | </ParamField>
 26 | 
 27 | <ParamField path="call_sid" type="Optional[str]" default="None">
 28 |   The associated Exotel Call SID.
 29 | </ParamField>
 30 | 
 31 | <ParamField path="params" type="InputParams" default="InputParams()">
 32 |   Configuration parameters
 33 | </ParamField>
 34 | 
 35 | ### InputParams Configuration
 36 | 
 37 | <ParamField path="exotel_sample_rate" type="int" default="8000">
 38 |   Sample rate used by Exotel (typically 8kHz)
 39 | </ParamField>
 40 | 
 41 | <ParamField path="sample_rate" type="int | None" default="None">
 42 |   Optional override for pipeline input sample rate
 43 | </ParamField>
 44 | 
 45 | ## Basic Usage
 46 | 
 47 | ```python
 48 | from pipecat.serializers.exotel import ExotelFrameSerializer
 49 | from pipecat.transports.network.fastapi_websocket import (
 50 |     FastAPIWebsocketTransport,
 51 |     FastAPIWebsocketParams
 52 | )
 53 | 
 54 | # Extract required values from Exotel WebSocket connection
 55 | stream_id = call_data["stream_id"]
 56 | call_sid = call_data["start"]["call_sid"]
 57 | 
 58 | # Create serializer
 59 | serializer = ExotelFrameSerializer(
 60 |     stream_id=stream_id,
 61 |     call_sid,
 62 | )
 63 | 
 64 | # Use with FastAPIWebsocketTransport
 65 | transport = FastAPIWebsocketTransport(
 66 |     websocket=websocket,
 67 |     params=FastAPIWebsocketParams(
 68 |         audio_in_enabled=True,
 69 |         audio_out_enabled=True,
 70 |         vad_analyzer=SileroVADAnalyzer(),
 71 |         serializer=serializer,
 72 |     )
 73 | )
 74 | ```
 75 | 
 76 | ## Server Code Example
 77 | 
 78 | Here's a complete example of handling a Exotel WebSocket connection:
 79 | 
 80 | ```python
 81 | from fastapi import FastAPI, WebSocket
 82 | from pipecat.serializers.exotel import ExotelFrameSerializer
 83 | import json
 84 | import os
 85 | 
 86 | app = FastAPI()
 87 | 
 88 | @app.websocket("/ws")
 89 | async def websocket_endpoint(websocket: WebSocket):
 90 |     await websocket.accept()
 91 | 
 92 |     # Read initial messages from Exotel
 93 |     start_data = websocket.iter_text()
 94 |     await start_data.__anext__()  # Skip first message
 95 | 
 96 |     # Parse the second message to get call details
 97 |     call_data = json.loads(await start_data.__anext__())
 98 | 
 99 |     # Extract Exotel-specific IDs and encoding
100 |     stream_id = call_data["stream_id"]
101 |     call_sid = call_data["start"]["call_sid"]
102 | 
103 |     # Create serializer with API key for auto hang-up
104 |     serializer = ExotelFrameSerializer(
105 |         stream_id=stream_id,
106 |         call_sid=call_sid,
107 |     )
108 | 
109 |     # Continue with transport and pipeline setup...
110 | ```
111 | 


--------------------------------------------------------------------------------
/server/services/serializers/introduction.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Frame Serializer Overview"
 3 | description: "Overview of frame serializers for converting between Pipecat frames and external protocols"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | Frame serializers are components that convert between Pipecat's internal frame format and external protocols or formats. They're essential when integrating with third-party services or APIs that have their own message formats.
 9 | 
10 | ## Core Responsibilities
11 | 
12 | Serializers handle:
13 | 
14 | 1. **Serialization**: Converting Pipecat frames to external formats or protocols
15 | 2. **Deserialization**: Converting external messages to Pipecat frames
16 | 3. **Protocol-specific behaviors**: Managing unique aspects of each integration
17 | 
18 | ## Available Serializers
19 | 
20 | Pipecat includes serializers for popular voice and communications platforms:
21 | 
22 | <CardGroup cols={2}>
23 |   <Card
24 |     title="Exotel Serializer"
25 |     icon="phone"
26 |     href="/server/services/serializers/exotel"
27 |   >
28 |     For integrating with Exotel WebSocket media streaming
29 |   </Card>
30 |   <Card
31 |     title="Plivo Serializer"
32 |     icon="phone"
33 |     href="/server/services/serializers/plivo"
34 |   >
35 |     For integrating with Telnyx WebSocket media streaming
36 |   </Card>
37 |   <Card
38 |     title="Telnyx Serializer"
39 |     icon="phone"
40 |     href="/server/services/serializers/telnyx"
41 |   >
42 |     For integrating with Telnyx WebSocket media streaming
43 |   </Card>
44 |   <Card
45 |     title="Twilio Serializer"
46 |     icon="phone"
47 |     href="/server/services/serializers/twilio"
48 |   >
49 |     For integrating with Twilio Media Streams WebSocket protocol
50 |   </Card>
51 | </CardGroup>
52 | 
53 | ## Custom Serializers
54 | 
55 | You can create custom serializers by implementing the `FrameSerializer` base class:
56 | 
57 | ```python
58 | from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
59 | from pipecat.frames.frames import Frame, StartFrame
60 | 
61 | class MyCustomSerializer(FrameSerializer):
62 |     @property
63 |     def type(self) -> FrameSerializerType:
64 |         return FrameSerializerType.TEXT  # or BINARY
65 | 
66 |     async def setup(self, frame: StartFrame):
67 |         # Initialize with pipeline configuration
68 |         pass
69 | 
70 |     async def serialize(self, frame: Frame) -> str | bytes | None:
71 |         # Convert Pipecat frame to external format
72 |         pass
73 | 
74 |     async def deserialize(self, data: str | bytes) -> Frame | None:
75 |         # Convert external data to Pipecat frame
76 |         pass
77 | ```
78 | 


--------------------------------------------------------------------------------
/server/services/stt/azure.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Azure"
  3 | description: "Speech-to-text service using Azure Cognitive Services Speech SDK"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `AzureSTTService` provides real-time speech recognition using Azure's Cognitive Services Speech SDK. It supports continuous recognition and multiple languages.
  9 | 
 10 | ## Installation
 11 | 
 12 | To use `AzureSTTService`, install the required dependencies:
 13 | 
 14 | ```bash
 15 | pip install "pipecat-ai[azure]"
 16 | ```
 17 | 
 18 | You'll also need to set up the following environment variables:
 19 | 
 20 | - `AZURE_API_KEY`
 21 | - `AZURE_REGION`
 22 | 
 23 | ## Configuration
 24 | 
 25 | ### Constructor Parameters
 26 | 
 27 | <ParamField path="api_key" type="str" required>
 28 |   Azure Speech Service API key
 29 | </ParamField>
 30 | 
 31 | <ParamField path="region" type="str" required>
 32 |   Azure region identifier
 33 | </ParamField>
 34 | 
 35 | <ParamField path="language" type="Language" default="Language.EN_US">
 36 |   Recognition language
 37 | </ParamField>
 38 | 
 39 | <ParamField path="sample_rate" type="int" default="None">
 40 |   Input audio sample rate in Hz
 41 | </ParamField>
 42 | 
 43 | <ParamField path="channels" type="int" default="1">
 44 |   Number of audio channels
 45 | </ParamField>
 46 | 
 47 | ## Input
 48 | 
 49 | The service processes audio data through a `PushAudioInputStream`:
 50 | 
 51 | - PCM format
 52 | - Configurable sample rate
 53 | - Mono or stereo input
 54 | 
 55 | ## Output Frames
 56 | 
 57 | <ParamField path="TranscriptionFrame" type="Frame">
 58 |   Contains: - Recognized text - Empty user ID - ISO 8601 formatted timestamp
 59 | </ParamField>
 60 | 
 61 | ## Methods
 62 | 
 63 | See the [STT base class methods](/server/base-classes/speech#methods) for additional functionality.
 64 | 
 65 | ### Language Setting
 66 | 
 67 | ```python
 68 | await service.set_language(Language.FR)
 69 | ```
 70 | 
 71 | ## Language Support
 72 | 
 73 | Azure STT supports the following languages and regional variants:
 74 | 
 75 | | Language Code    | Description         | Service Codes    |
 76 | | ---------------- | ------------------- | ---------------- |
 77 | | `Language.ZH`    | Chinese             | `zh-CN`          |
 78 | | `Language.EN_US` | English (US)        | `en-US`          |
 79 | | `Language.EN_IN` | English (India)     | `en-IN`          |
 80 | | `Language.FR`    | French              | `fr-FR`          |
 81 | | `Language.DE`    | German              | `de-DE`          |
 82 | | `Language.HI`    | Hindi               | `hi-IN`          |
 83 | | `Language.IT`    | Italian             | `it-IT`          |
 84 | | `Language.JA`    | Japanese            | `ja-JP`          |
 85 | | `Language.KO`    | Korean              | `ko-KR`          |
 86 | | `Language.PT_BR` | Portuguese (Brazil) | `pt-BR`          |
 87 | | `Language.ES`    | Spanish             | `es-ES`, `es-MX` |
 88 | 
 89 | ## Usage Example
 90 | 
 91 | ```python
 92 | # Configure service
 93 | stt = AzureSTTService(
 94 |     api_key="your-api-key",
 95 |     region="eastus",
 96 |     language=Language.EN_US,
 97 |     sample_rate=16000,
 98 |     channels=1
 99 | )
100 | 
101 | # Use in pipeline
102 | pipeline = Pipeline([
103 |     transport.input(),
104 |     stt,
105 |     llm,
106 |     ...
107 | ])
108 | ```
109 | 
110 | ## Frame Flow
111 | 
112 | ```mermaid
113 | graph TD
114 |     A[InputAudioRawFrame] --> B[AzureSTTService]
115 |     B --> C[InterimTranscriptionFrame]
116 |     B --> D[TranscriptionFrame]
117 |     B --> E[ErrorFrame]
118 |     C --> E[Real-time Processing]
119 |     D --> F[Final Processing]
120 | ```
121 | 
122 | ## Notes
123 | 
124 | - Supports continuous recognition
125 | - Handles automatic reconnection
126 | - Provides real-time transcription
127 | - Thread-safe processing
128 | - Automatic resource cleanup
129 | 


--------------------------------------------------------------------------------
/server/services/stt/whisper.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Whisper"
  3 | description: "Speech-to-text service implementation using locally-downloaded Whisper models"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `WhisperSTTService` provides speech-to-text capabilities using OpenAI's Whisper models running locally. It supports multiple model sizes and configurations for offline transcription.
  9 | 
 10 | ## Installation
 11 | 
 12 | To use `WhisperSTTService`, install the required dependencies:
 13 | 
 14 | ```bash
 15 | pip install "pipecat-ai[whisper]"
 16 | ```
 17 | 
 18 | ## Configuration
 19 | 
 20 | ### Constructor Parameters
 21 | 
 22 | <ParamField path="model" type="str | Model" default="Model.DISTIL_MEDIUM_EN">
 23 |   Whisper model to use. Can be a string or Model enum value
 24 | </ParamField>
 25 | 
 26 | <ParamField path="device" type="str" default="auto">
 27 |   Device to run the model on ('cpu', 'cuda', or 'auto')
 28 | </ParamField>
 29 | 
 30 | <ParamField path="compute_type" type="str" default="default">
 31 |   Computation type for model inference
 32 | </ParamField>
 33 | 
 34 | <ParamField path="no_speech_prob" type="float" default="0.4">
 35 |   Threshold for filtering out non-speech segments
 36 | </ParamField>
 37 | 
 38 | ### Available Models
 39 | 
 40 | ```python
 41 | class Model(Enum):
 42 |     TINY = "tiny"                   # Smallest, fastest model
 43 |     BASE = "base"                   # Basic model
 44 |     MEDIUM = "medium"               # Medium-sized model
 45 |     LARGE = "large-v3"              # Largest, most accurate model
 46 |     DISTIL_LARGE_V2 = "Systran/faster-distil-whisper-large-v2"
 47 |     DISTIL_MEDIUM_EN = "Systran/faster-distil-whisper-medium.en"
 48 | ```
 49 | 
 50 | ## Input
 51 | 
 52 | The service processes raw audio data with the following requirements:
 53 | 
 54 | - PCM audio format
 55 | - 16-bit depth
 56 | - Single channel (mono)
 57 | - Normalized to float32 range [-1.0, 1.0]
 58 | 
 59 | ## Output Frames
 60 | 
 61 | ### TranscriptionFrame
 62 | 
 63 | Generated for transcriptions, containing:
 64 | 
 65 | <ParamField path="text" type="string">
 66 |   Transcribed text
 67 | </ParamField>
 68 | 
 69 | <ParamField path="user_id" type="string">
 70 |   User identifier
 71 | </ParamField>
 72 | 
 73 | <ParamField path="timestamp" type="string">
 74 |   ISO 8601 formatted timestamp
 75 | </ParamField>
 76 | 
 77 | ### ErrorFrame
 78 | 
 79 | Generated when transcription errors occur, containing error details.
 80 | 
 81 | ## Usage Example
 82 | 
 83 | ```python
 84 | from pipecat.services.whisper.stt import WhisperSTTService, Model
 85 | 
 86 | # Configure service with default model
 87 | stt = WhisperSTTService(
 88 |     model=Model.DISTIL_MEDIUM_EN,
 89 |     device="cuda",
 90 |     no_speech_prob=0.4
 91 | )
 92 | 
 93 | # Or use a custom model path
 94 | stt = WhisperSTTService(
 95 |     model="path/to/custom/model",
 96 |     device="cpu"
 97 | )
 98 | 
 99 | # Use in pipeline
100 | pipeline = Pipeline([
101 |     transport.input(),
102 |     stt,
103 |     llm,
104 |     ...
105 | ])
106 | ```
107 | 
108 | ## Methods
109 | 
110 | See the [STT base class methods](/server/base-classes/speech#methods) for additional functionality.
111 | 
112 | ## Model Selection Guide
113 | 
114 | | Model              | Size  | Speed   | Accuracy       | Memory Usage |
115 | | ------------------ | ----- | ------- | -------------- | ------------ |
116 | | `TINY`             | 39M   | Fastest | Basic          | Minimal      |
117 | | `BASE`             | 74M   | Fast    | Good           | Low          |
118 | | `MEDIUM`           | 769M  | Medium  | Better         | Moderate     |
119 | | `LARGE`            | 1.5GB | Slow    | Best           | High         |
120 | | `DISTIL_MEDIUM_EN` | ~400M | Fast    | Good (English) | Moderate     |
121 | | `DISTIL_LARGE_V2`  | ~750M | Medium  | Better         | Moderate     |
122 | 
123 | ## Frame Flow
124 | 
125 | ```mermaid
126 | graph TD
127 |     A[InputAudioRawFrame] --> B[WhisperSTTService]
128 |     B --> C[Audio Normalization]
129 |     C --> D[Model Inference]
130 |     D --> E[TranscriptionFrame]
131 |     B --> F[ErrorFrame]
132 | ```
133 | 
134 | ## Metrics Support
135 | 
136 | The service collects processing metrics:
137 | 
138 | - Time to First Byte (TTFB)
139 | - Processing duration
140 | - Model loading time
141 | - Inference time
142 | 
143 | ## Notes
144 | 
145 | - Runs completely offline after model download
146 | - First run requires model download
147 | - Supports CPU and CUDA acceleration
148 | - Processes audio in segments
149 | - Filters out non-speech segments
150 | - Thread-safe processing
151 | - Automatic error handling
152 | 


--------------------------------------------------------------------------------
/server/services/tts/deepgram.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Deepgram"
  3 | description: "Text-to-speech service implementation using Deepgram’s Aura API"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `DeepgramTTSService` converts text to speech using Deepgram's Aura API. It supports various voices and audio configurations.
  9 | 
 10 | ## Installation
 11 | 
 12 | To use `DeepgramTTSService`, install the required dependencies:
 13 | 
 14 | ```bash
 15 | pip install "pipecat-ai[deepgram]"
 16 | ```
 17 | 
 18 | You'll also need to set up your Deepgram API key as an environment variable: `DEEPGRAM_API_KEY`
 19 | 
 20 | ## Configuration
 21 | 
 22 | ### Constructor Parameters
 23 | 
 24 | <ParamField path="api_key" type="str" required>
 25 |   Your Deepgram API key
 26 | </ParamField>
 27 | 
 28 | <ParamField path="voice" type="str" default="aura-helios-en">
 29 |   Voice identifier to use for synthesis
 30 | </ParamField>
 31 | 
 32 | <ParamField path="sample_rate" type="int" default="None">
 33 |   Output audio sample rate in Hz
 34 | </ParamField>
 35 | 
 36 | <ParamField path="encoding" type="str" default="linear16">
 37 |   Audio encoding format
 38 | </ParamField>
 39 | 
 40 | <ParamField path="text_filter" type="BaseTextFilter" default="None">
 41 |   Modifies text provided to the TTS. [Learn
 42 |   more](/server/base-classes/text#text-filters) about the available filters.
 43 | </ParamField>
 44 | 
 45 | ## Input
 46 | 
 47 | The service accepts text input through its TTS pipeline.
 48 | 
 49 | ## Output Frames
 50 | 
 51 | ### TTSStartedFrame
 52 | 
 53 | Signals the start of audio generation.
 54 | 
 55 | ### TTSAudioRawFrame
 56 | 
 57 | Contains generated audio data:
 58 | 
 59 | <ParamField path="audio" type="bytes">
 60 |   Raw audio data chunk
 61 | </ParamField>
 62 | 
 63 | <ParamField path="sample_rate" type="int">
 64 |   Audio sample rate (24kHz default)
 65 | </ParamField>
 66 | 
 67 | <ParamField path="num_channels" type="int">
 68 |   Number of audio channels (1 for mono)
 69 | </ParamField>
 70 | 
 71 | ### TTSStoppedFrame
 72 | 
 73 | Signals the completion of audio generation.
 74 | 
 75 | ## Methods
 76 | 
 77 | See the [TTS base class methods](/server/base-classes/speech#ttsservice) for additional functionality.
 78 | 
 79 | ## Language Support
 80 | 
 81 | Deepgram TTS supports the following languages and regional variants:
 82 | 
 83 | | Language Code | Description | Service Codes |
 84 | | ------------- | ----------- | ------------- |
 85 | | `Language.EN` | English     | `en`          |
 86 | 
 87 | ## Usage Example
 88 | 
 89 | ```python
 90 | from pipecat.services.deepgram.tts import DeepgramTTSService
 91 | 
 92 | # Configure service
 93 | tts = DeepgramTTSService(
 94 |     api_key="your-api-key",
 95 |     voice="aura-helios-en",
 96 |     sample_rate=24000
 97 | )
 98 | 
 99 | # Use in pipeline
100 | pipeline = Pipeline([
101 |     ...,
102 |     llm,
103 |     tts,
104 |     transport.output(),
105 | ])
106 | ```
107 | 
108 | ## Frame Flow
109 | 
110 | ```mermaid
111 | graph TD
112 |     A[TextFrame] --> B[DeepgramTTSService]
113 |     B --> C[TTSStartedFrame]
114 |     B --> D[TTSAudioRawFrame]
115 |     B --> E[TTSStoppedFrame]
116 |     B --> F[ErrorFrame]
117 | ```
118 | 
119 | ## Metrics Support
120 | 
121 | The service supports metrics collection:
122 | 
123 | - Time to First Byte (TTFB)
124 | - TTS usage metrics
125 | - Processing duration
126 | 
127 | ## Audio Processing
128 | 
129 | - Streams audio in 8KB chunks
130 | - Supports 16-bit PCM format
131 | - Generates mono audio output
132 | - Handles memory buffering
133 | 
134 | ## Notes
135 | 
136 | - Requires valid Deepgram API key
137 | - Streams audio in chunks
138 | - Supports various voices
139 | - Provides metrics collection
140 | - Handles memory efficiently
141 | - Thread-safe processing
142 | 


--------------------------------------------------------------------------------
/server/services/tts/groq.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Groq"
  3 | description: "Text-to-speech service implementation using Groq’s TTS API"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `GroqTTSService` converts text to speech using Groq's TTS API. It supports real-time audio generation with multiple voices.
  9 | 
 10 | ## Installation
 11 | 
 12 | To use `GroqTTSService`, install the required dependencies:
 13 | 
 14 | ```bash
 15 | pip install "pipecat-ai[groq]"
 16 | ```
 17 | 
 18 | You'll also need to set up your Groq API key as an environment variable: `GROQ_API_KEY`.
 19 | 
 20 | <Tip>
 21 |   You can obtain a Groq Cloud API key by signing up at
 22 |   [Groq](https://console.groq.com/login).
 23 | </Tip>
 24 | 
 25 | ## Configuration
 26 | 
 27 | ### Constructor Parameters
 28 | 
 29 | <ParamField path="api_key" type="str" required>
 30 |   Your Groq API key
 31 | </ParamField>
 32 | 
 33 | <ParamField path="output_format" type="str" default="wav">
 34 |   Audio output format
 35 | </ParamField>
 36 | 
 37 | <ParamField path="params" type="InputParams" default="InputParams()">
 38 |   Configuration parameters for speech generation
 39 | </ParamField>
 40 | 
 41 | <ParamField path="model_name" type="str" default="playai-tts">
 42 |   TTS model to use. See the Groq Cloud docs for [available
 43 |   models](https://console.groq.com/docs/text-to-speech).
 44 | </ParamField>
 45 | 
 46 | <ParamField path="voice_id" type="str" default="Celeste-PlayAI">
 47 |   Voice identifier to use for synthesis
 48 | </ParamField>
 49 | 
 50 | ### Input Parameters
 51 | 
 52 | <ParamField path="language" type="Language" default="Language.EN">
 53 |   Language for speech synthesis
 54 | </ParamField>
 55 | 
 56 | <ParamField path="speed" type="float" default="1.0">
 57 |   Speech rate multiplier (higher values produce faster speech)
 58 | </ParamField>
 59 | 
 60 | <ParamField path="seed" type="Optional[int]" default="None">
 61 |   Random seed for reproducible audio generation
 62 | </ParamField>
 63 | 
 64 | ## Input
 65 | 
 66 | The service accepts text input through the pipeline, including streaming text from an LLM service.
 67 | 
 68 | ## Output Frames
 69 | 
 70 | ### TTSStartedFrame
 71 | 
 72 | Signals the start of audio generation.
 73 | 
 74 | ### TTSAudioRawFrame
 75 | 
 76 | Contains generated audio data:
 77 | 
 78 | <ParamField path="audio" type="bytes">
 79 |   Raw audio data chunk
 80 | </ParamField>
 81 | 
 82 | <ParamField path="sample_rate" type="int">
 83 |   Audio sample rate, based on the constructor setting
 84 | </ParamField>
 85 | 
 86 | <ParamField path="num_channels" type="int">
 87 |   Number of audio channels (1 for mono)
 88 | </ParamField>
 89 | 
 90 | ### TTSStoppedFrame
 91 | 
 92 | Signals the completion of audio generation.
 93 | 
 94 | ## Methods
 95 | 
 96 | See the [TTS base class methods](/server/base-classes/speech#ttsservice) for additional functionality.
 97 | 
 98 | ## Language Support
 99 | 
100 | `GroqTTSService` supports the following languages:
101 | 
102 | | Language Code | Description | Service Codes |
103 | | ------------- | ----------- | ------------- |
104 | | `Language.EN` | English     | `en`          |
105 | 
106 | ## Usage Example
107 | 
108 | ```python
109 | from pipecat.services.groq.tts import GroqTTSService
110 | from pipecat.transcriptions.language import Language
111 | 
112 | # Configure service
113 | tts = GroqTTSService(
114 |     api_key="your-api-key",
115 |     model_name="playai-tts",
116 |     voice_id="Celeste-PlayAI",
117 |     params=GroqTTSService.InputParams(
118 |         language=Language.EN,
119 |         speed=1.0,
120 |         seed=42
121 |     )
122 | )
123 | 
124 | # Use in pipeline
125 | pipeline = Pipeline([
126 |     ...,
127 |     llm,
128 |     tts,
129 |     transport.output(),
130 | ])
131 | ```
132 | 
133 | ## Frame Flow
134 | 
135 | ```mermaid
136 | graph TD
137 |     A[TextFrame] --> B[GroqTTSService]
138 |     B --> C[TTSStartedFrame]
139 |     B --> D[TTSAudioRawFrame]
140 |     B --> E[TTSStoppedFrame]
141 |     B --> F[ErrorFrame]
142 | ```
143 | 
144 | ## Metrics Support
145 | 
146 | The service supports metrics collection:
147 | 
148 | - Time to First Byte (TTFB)
149 | - Processing duration
150 | 
151 | ## Audio Processing
152 | 
153 | - Streams audio in chunks
154 | - Outputs mono audio at the defined sample rate
155 | - Handles WAV header removal automatically
156 | - Supports WAV format by default
157 | 
158 | ## Notes
159 | 
160 | - Requires a Groq Cloud API key
161 | - Streams audio in chunks for efficient processing
162 | - Automatically handles WAV headers in the response
163 | - Provides metrics collection
164 | - Supports configurable speech parameters
165 | 


--------------------------------------------------------------------------------
/server/services/tts/openai.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "OpenAI"
  3 | description: "Text-to-speech service using OpenAI’s TTS API"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `OpenAITTSService` converts text to speech using OpenAI's TTS API. It supports multiple voices and provides high-quality audio output at 24kHz using both traditional TTS models and the gpt-4o TTS models.
  9 | 
 10 | ## Installation
 11 | 
 12 | To use `OpenAITTSService`, install the required dependencies:
 13 | 
 14 | ```bash
 15 | pip install "pipecat-ai[openai]"
 16 | ```
 17 | 
 18 | You'll also need to set up your OpenAI API key as an environment variable: `OPENAI_API_KEY`
 19 | 
 20 | ## Configuration
 21 | 
 22 | ### Constructor Parameters
 23 | 
 24 | <ParamField path="api_key" type="str | None" optional>
 25 |   OpenAI API key
 26 | </ParamField>
 27 | 
 28 | <ParamField path="voice" type="str" default="alloy">
 29 |   
 30 | Voice identifier.
 31 | 
 32 | Options:
 33 | 
 34 | - `"alloy"`
 35 | - `"echo"`
 36 | - `"fable"`
 37 | - `"onyx"`
 38 | - `"nova"`
 39 | - `"shimmer"`
 40 | 
 41 | </ParamField>
 42 | 
 43 | <ParamField path="model" type="str" default="gpt-4o-mini-tts">
 44 |   
 45 | Model to use.
 46 | 
 47 | Options:
 48 | 
 49 | - `"gpt-4o-mini-tts"`
 50 | - `"tts-1"`
 51 | - `"tts-1-hd"`
 52 | 
 53 | </ParamField>
 54 | 
 55 | <ParamField path="sample_rate" type="int" default="None">
 56 |   Output audio sample rate in Hz. Supports only `24000` Hz.
 57 | </ParamField>
 58 | 
 59 | <ParamField path="text_filter" type="BaseTextFilter" default="None">
 60 |   Modifies text provided to the TTS. [Learn
 61 |   more](/server/base-classes/text#text-filters) about the available filters.
 62 | </ParamField>
 63 | 
 64 | ## Output Frames
 65 | 
 66 | ### Control Frames
 67 | 
 68 | <ParamField path="TTSStartedFrame" type="Frame">
 69 |   Signals start of audio generation
 70 | </ParamField>
 71 | 
 72 | <ParamField path="TTSStoppedFrame" type="Frame">
 73 |   Signals completion of audio generation
 74 | </ParamField>
 75 | 
 76 | ### Audio Frames
 77 | 
 78 | <ParamField path="TTSAudioRawFrame" type="Frame">
 79 | 
 80 | Contains generated audio data:
 81 | 
 82 | - PCM encoded audio
 83 | - 24kHz sample rate
 84 | - Mono channel
 85 | 
 86 | </ParamField>
 87 | 
 88 | ### Error Frames
 89 | 
 90 | <ParamField path="ErrorFrame" type="Frame">
 91 |   Contains error information if TTS fails
 92 | </ParamField>
 93 | 
 94 | ## Methods
 95 | 
 96 | See the [TTS base class methods](/server/base-classes/speech#ttsservice) for additional functionality.
 97 | 
 98 | ## Models
 99 | 
100 | | Model             | Description                | Best For                                                            |
101 | | ----------------- | -------------------------- | ------------------------------------------------------------------- |
102 | | `gpt-4o-mini-tts` | Latest GPT-based TTS model | Faster generation, improved prosody, recommended for most use cases |
103 | | `tts-1`           | Original TTS model         | Standard quality speech                                             |
104 | | `tts-1-hd`        | High-definition TTS model  | Premium quality speech with higher fidelity                         |
105 | 
106 | ## Language Support
107 | 
108 | OpenAI TTS supports the following languages and regional variants:
109 | 
110 | | Language Code | Description | Service Codes |
111 | | ------------- | ----------- | ------------- |
112 | | `Language.EN` | English     | `en`          |
113 | 
114 | ## Usage Example
115 | 
116 | ```python
117 | from pipecat.services.openai.tts import OpenAITTSService
118 | 
119 | # Configure service
120 | tts = OpenAITTSService(
121 |     voice="nova",
122 |     model="gpt-4o-mini-tts",
123 | )
124 | 
125 | # Use in pipeline
126 | pipeline = Pipeline([
127 |     ...,
128 |     llm,
129 |     tts,
130 |     transport.output(),
131 | ])
132 | ```
133 | 
134 | ## Frame Flow
135 | 
136 | ```mermaid
137 | graph TD
138 |     A[TextFrame] --> B[OpenAITTSService]
139 |     B --> C[TTSStartedFrame]
140 |     B --> D[TTSAudioRawFrame]
141 |     B --> E[TTSStoppedFrame]
142 |     B --> F[ErrorFrame]
143 | ```
144 | 
145 | ## Metrics Support
146 | 
147 | The service supports metrics collection:
148 | 
149 | - Time to First Byte (TTFB)
150 | - TTS usage metrics
151 | - Processing duration
152 | 
153 | ## Notes
154 | 
155 | - Outputs PCM audio at 24kHz
156 | - Streams audio in 1KB chunks
157 | - Supports multiple voices
158 | - Uses GPT-4o Mini TTS by default for improved quality
159 | - Includes metrics collection
160 | - Thread-safe processing
161 | - Handles empty text gracefully
162 | 


--------------------------------------------------------------------------------
/server/services/tts/piper.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Piper"
  3 | description: "Text-to-speech service implementation using the Piper TTS server"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `PiperTTSService` converts text to speech using the Piper TTS server. This service provides integration with a locally-running Piper TTS service, offering self-hosted speech synthesis capabilities.
  9 | 
 10 | ## Installation
 11 | 
 12 | To use `PiperTTSService`, no additional dependencies in Pipecat are required.
 13 | 
 14 | You'll also need to set up a running Piper TTS server following the [Piper HTTP server documentation](https://github.com/rhasspy/piper/blob/master/src/python_run/README_http.md).
 15 | 
 16 | ## Configuration
 17 | 
 18 | ### Constructor Parameters
 19 | 
 20 | <ParamField path="base_url" type="str" required>
 21 |   API base URL for the Piper TTS server (without a trailing slash)
 22 | </ParamField>
 23 | 
 24 | <ParamField path="aiohttp_session" type="aiohttp.ClientSession" required>
 25 |   aiohttp ClientSession for making HTTP requests
 26 | </ParamField>
 27 | 
 28 | <ParamField path="sample_rate" type="Optional[int]" default="None">
 29 |   Output sample rate in Hz. When None, the sample rate depends on the voice
 30 |   model being used by the Piper server.
 31 | </ParamField>
 32 | 
 33 | <ParamField path="text_filter" type="BaseTextFilter" default="None">
 34 |   Modifies text provided to the TTS. [Learn
 35 |   more](/server/base-classes/text#text-filters) about the available filters.
 36 | </ParamField>
 37 | 
 38 | ## Input
 39 | 
 40 | The service accepts text input through its TTS pipeline.
 41 | 
 42 | ## Output Frames
 43 | 
 44 | ### TTSStartedFrame
 45 | 
 46 | Signals the start of audio generation.
 47 | 
 48 | ### TTSAudioRawFrame
 49 | 
 50 | Contains generated audio data:
 51 | 
 52 | <ParamField path="audio" type="bytes">
 53 |   Raw audio data chunk
 54 | </ParamField>
 55 | 
 56 | <ParamField path="sample_rate" type="int">
 57 |   Audio sample rate (depends on the Piper model)
 58 | </ParamField>
 59 | 
 60 | <ParamField path="num_channels" type="int">
 61 |   Number of audio channels (1 for mono)
 62 | </ParamField>
 63 | 
 64 | ### TTSStoppedFrame
 65 | 
 66 | Signals the completion of audio generation.
 67 | 
 68 | ### ErrorFrame
 69 | 
 70 | Signals that an error occurred during audio generation:
 71 | 
 72 | <ParamField path="error" type="str">
 73 |   Error message
 74 | </ParamField>
 75 | 
 76 | ## Methods
 77 | 
 78 | See the [TTS base class methods](/server/base-classes/speech#ttsservice) for additional functionality.
 79 | 
 80 | ## Usage Example
 81 | 
 82 | ```python
 83 | import aiohttp
 84 | from pipecat.services.piper.tts import PiperTTSService
 85 | 
 86 | # Create aiohttp session
 87 | session = aiohttp.ClientSession()
 88 | 
 89 | # Configure service
 90 | tts = PiperTTSService(
 91 |     base_url="http://localhost:5000/api/tts",
 92 |     aiohttp_session=session,
 93 |     sample_rate=22050  # Optional: specify if you know the model's sample rate
 94 | )
 95 | 
 96 | # Use in pipeline
 97 | pipeline = Pipeline([
 98 |     ...,
 99 |     llm,
100 |     tts,
101 |     transport.output(),
102 | ])
103 | ```
104 | 
105 | ## Frame Flow
106 | 
107 | ```mermaid
108 | graph TD
109 |     A[TextFrame] --> B[PiperTTSService]
110 |     B --> C[TTSStartedFrame]
111 |     B --> D[TTSAudioRawFrame]
112 |     B --> E[TTSStoppedFrame]
113 |     B --> F[ErrorFrame]
114 | ```
115 | 
116 | ## Metrics Support
117 | 
118 | The service supports metrics collection:
119 | 
120 | - Time to First Byte (TTFB)
121 | - TTS usage metrics
122 | - Processing duration
123 | 
124 | ## Audio Processing
125 | 
126 | - Streams audio in 1KB chunks
127 | - Automatically handles WAV headers in the response
128 | - Outputs mono audio
129 | - Supports the sample rate specified by your Piper voice model
130 | 
131 | ## Notes
132 | 
133 | - Requires a running Piper TTS server
134 | - Self-hosted solution with no external API dependencies
135 | - Streams audio in chunks for efficient processing
136 | - Automatically handles WAV headers in the response
137 | - Provides metrics collection
138 | 


--------------------------------------------------------------------------------
/server/utilities/audio/koala-filter.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "KoalaFilter"
 3 | description: "Audio noise reduction filter using Koala AI technology from Picovoice"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | `KoalaFilter` is an audio processor that reduces background noise in real-time audio streams using Koala Noise Suppression technology from Picovoice. It inherits from `BaseAudioFilter` and processes audio frames to improve audio quality by removing unwanted noise.
 9 | 
10 | To use Koala, you need a Picovoice access key. Get started at [Picovoice Console](https://console.picovoice.ai/signup).
11 | 
12 | ## Installation
13 | 
14 | The Koala filter requires additional dependencies:
15 | 
16 | ```bash
17 | pip install "pipecat-ai[koala]"
18 | ```
19 | 
20 | You'll also need to set up your Koala access key as an environment variable: `KOALA_ACCESS_KEY`
21 | 
22 | ## Constructor Parameters
23 | 
24 | <ParamField path="access_key" type="str" required>
25 |   Picovoice access key for using the Koala noise suppression service
26 | </ParamField>
27 | 
28 | ## Input Frames
29 | 
30 | <ParamField path="FilterEnableFrame" type="Frame">
31 | 
32 | Specific control frame to toggle filtering on/off
33 | 
34 | ```python
35 | from pipecat.frames.frames import FilterEnableFrame
36 | 
37 | # Disable noise reduction
38 | await task.queue_frame(FilterEnableFrame(False))
39 | 
40 | # Re-enable noise reduction
41 | await task.queue_frame(FilterEnableFrame(True))
42 | ```
43 | 
44 | </ParamField>
45 | 
46 | ## Usage Example
47 | 
48 | ```python
49 | from pipecat.audio.filters.koala_filter import KoalaFilter
50 | 
51 | transport = DailyTransport(
52 |     room_url,
53 |     token,
54 |     "Respond bot",
55 |     DailyParams(
56 |         audio_in_filter=KoalaFilter(access_key=os.getenv("KOALA_ACCESS_KEY")), # Enable Koala noise reduction
57 |         audio_in_enabled=True,
58 |         audio_out_enabled=True,
59 |         vad_analyzer=SileroVADAnalyzer(),
60 |     ),
61 | )
62 | ```
63 | 
64 | ## Audio Flow
65 | 
66 | ```mermaid
67 | graph TD
68 |     A[AudioRawFrame] --> B[KoalaFilter]
69 |     B[KoalaFilter] --> C[VAD]
70 |     C[VAD] --> D[STT]
71 | ```
72 | 
73 | ## Notes
74 | 
75 | - Requires Picovoice access key
76 | - Supports real-time audio processing
77 | - Handles 16-bit PCM audio format
78 | - Can be dynamically enabled/disabled
79 | - Maintains audio quality while reducing noise
80 | - Efficient processing for low latency
81 | - Automatically handles audio frame buffering
82 | - Sample rate must match Koala's required sample rate
83 | 


--------------------------------------------------------------------------------
/server/utilities/audio/krisp-filter.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "KrispFilter"
  3 | description: "Audio noise reduction filter using Krisp AI technology"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `KrispFilter` is an audio processor that reduces background noise in real-time audio streams using Krisp AI technology. It inherits from `BaseAudioFilter` and processes audio frames to improve audio quality by removing unwanted noise.
  9 | 
 10 | To use Krisp, you need a Krisp SDK license. Get started at [Krisp.ai](https://krisp.ai/developers/).
 11 | 
 12 | <Tip>
 13 |   Looking for help getting started with Krisp and Pipecat? Checkout our [Krisp
 14 |   noise cancellation guide](/guides/features/krisp).
 15 | </Tip>
 16 | 
 17 | ## Installation
 18 | 
 19 | The Krisp filter requires additional dependencies:
 20 | 
 21 | ```bash
 22 | pip install "pipecat-ai[krisp]"
 23 | ```
 24 | 
 25 | ## Environment Variables
 26 | 
 27 | You need to provide the path to the Krisp model. This can either be done by setting the `KRISP_MODEL_PATH` environment variable or by setting the `model_path` in the constructor.
 28 | 
 29 | ## Constructor Parameters
 30 | 
 31 | <ParamField path="sample_type" type="str" default="PCM_16">
 32 |   Audio sample type format
 33 | </ParamField>
 34 | 
 35 | <ParamField path="channels" type="int" default="1">
 36 |   Number of audio channels
 37 | </ParamField>
 38 | 
 39 | <ParamField path="model_path" type="str" default="None">
 40 | Path to the Krisp model file.
 41 | 
 42 | You can set the `model_path` directly. Alternatively, you can set the `KRISP_MODEL_PATH` environment variable to the model file path.
 43 | 
 44 | </ParamField>
 45 | 
 46 | ## Input Frames
 47 | 
 48 | <ParamField path="FilterEnableFrame" type="Frame">
 49 |   Specific control frame to toggle filtering on/off
 50 | 
 51 | ```python
 52 | from pipecat.frames.frames import FilterEnableFrame
 53 | 
 54 | # Disable noise reduction
 55 | await task.queue_frame(FilterEnableFrame(False))
 56 | 
 57 | # Re-enable noise reduction
 58 | await task.queue_frame(FilterEnableFrame(True))
 59 | ```
 60 | 
 61 | </ParamField>
 62 | 
 63 | ## Usage Example
 64 | 
 65 | ```python
 66 | from pipecat.audio.filters.krisp_filter import KrispFilter
 67 | 
 68 | transport = DailyTransport(
 69 |     room_url,
 70 |     token,
 71 |     "Respond bot",
 72 |     DailyParams(
 73 |         audio_in_filter=KrispFilter(), # Enable Krisp noise reduction
 74 |         audio_in_enabled=True,
 75 |         audio_out_enabled=True,
 76 |         vad_analyzer=SileroVADAnalyzer(),
 77 |     ),
 78 | )
 79 | ```
 80 | 
 81 | ## Audio Flow
 82 | 
 83 | ```mermaid
 84 | graph TD
 85 |     A[AudioRawFrame] --> B[KrispFilter]
 86 |     B[KrispFilter] --> C[VAD]
 87 |     C[VAD] --> D[STT]
 88 | ```
 89 | 
 90 | ## Notes
 91 | 
 92 | - Requires Krisp SDK and model file to be available
 93 | - Supports real-time audio processing
 94 | - Supports additional features like background voice removal
 95 | - Handles PCM_16 audio format
 96 | - Thread-safe for pipeline processing
 97 | - Can be dynamically enabled/disabled
 98 | - Maintains audio quality while reducing noise
 99 | - Efficient processing for low latency
100 | 


--------------------------------------------------------------------------------
/server/utilities/audio/noisereduce-filter.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "NoisereduceFilter"
 3 | description: "Audio noise reduction filter using the noisereduce library"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | `NoisereduceFilter` is an audio processor that reduces background noise in real-time audio streams using the noisereduce library. It inherits from `BaseAudioFilter` and processes audio frames to improve audio quality by removing unwanted noise.
 9 | 
10 | ## Installation
11 | 
12 | The noisereduce filter requires additional dependencies:
13 | 
14 | ```bash
15 | pip install "pipecat-ai[noisereduce]"
16 | ```
17 | 
18 | ## Constructor Parameters
19 | 
20 | This filter has no configurable parameters in its constructor.
21 | 
22 | ## Input Frames
23 | 
24 | <ParamField path="FilterEnableFrame" type="Frame">
25 | 
26 | Specific control frame to toggle filtering on/off
27 | 
28 | ```python
29 | from pipecat.frames.frames import FilterEnableFrame
30 | 
31 | # Disable noise reduction
32 | await task.queue_frame(FilterEnableFrame(False))
33 | 
34 | # Re-enable noise reduction
35 | await task.queue_frame(FilterEnableFrame(True))
36 | ```
37 | 
38 | </ParamField>
39 | 
40 | ## Usage Example
41 | 
42 | ```python
43 | from pipecat.audio.filters.noisereduce_filter import NoisereduceFilter
44 | 
45 | transport = DailyTransport(
46 |     room_url,
47 |     token,
48 |     "Respond bot",
49 |     DailyParams(
50 |         audio_in_filter=NoisereduceFilter(), # Enable noise reduction
51 |         audio_in_enabled=True,
52 |         audio_out_enabled=True,
53 |         vad_analyzer=SileroVADAnalyzer(),
54 |     ),
55 | )
56 | ```
57 | 
58 | ## Audio Flow
59 | 
60 | ```mermaid
61 | graph TD
62 |     A[AudioRawFrame] --> B[NoisereduceFilter]
63 |     B[NoisereduceFilter] --> C[VAD]
64 |     C[VAD] --> D[STT]
65 | ```
66 | 
67 | ## Notes
68 | 
69 | - Lightweight alternative to Krisp for noise reduction
70 | - Supports real-time audio processing
71 | - Handles PCM_16 audio format
72 | - Thread-safe for pipeline processing
73 | - Can be dynamically enabled/disabled
74 | - No additional configuration required
75 | - Uses statistical noise reduction techniques
76 | 


--------------------------------------------------------------------------------
/server/utilities/audio/silero-vad-analyzer.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "SileroVADAnalyzer"
 3 | description: "Voice Activity Detection analyzer using the Silero VAD ONNX model"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | `SileroVADAnalyzer` is a Voice Activity Detection (VAD) analyzer that uses the Silero VAD ONNX model to detect speech in audio streams. It provides high-accuracy speech detection with efficient processing using ONNX runtime.
 9 | 
10 | ## Installation
11 | 
12 | The Silero VAD analyzer requires additional dependencies:
13 | 
14 | ```bash
15 | pip install "pipecat-ai[silero]"
16 | ```
17 | 
18 | ## Constructor Parameters
19 | 
20 | <ParamField path="sample_rate" type="int" default="None">
21 |   Audio sample rate in Hz. Must be either 8000 or 16000.
22 | </ParamField>
23 | 
24 | <ParamField path="params" type="VADParams" default="VADParams()">
25 |   Voice Activity Detection parameters object
26 |   <Expandable title="properties">
27 |     <ParamField path="confidence" type="float" default="0.7">
28 |       Confidence threshold for speech detection. Higher values make detection more strict. Must be between 0 and 1.
29 |     </ParamField>
30 | 
31 |     <ParamField path="start_secs" type="float" default="0.2">
32 |       Time in seconds that speech must be detected before transitioning to SPEAKING state.
33 |     </ParamField>
34 | 
35 |     <ParamField path="stop_secs" type="float" default="0.8">
36 |       Time in seconds of silence required before transitioning back to QUIET state.
37 |     </ParamField>
38 | 
39 |     <ParamField path="min_volume" type="float" default="0.6">
40 |       Minimum audio volume threshold for speech detection. Must be between 0 and 1.
41 |     </ParamField>
42 | 
43 |   </Expandable>
44 | </ParamField>
45 | 
46 | ## Usage Example
47 | 
48 | ```python
49 | transport = DailyTransport(
50 |     room_url,
51 |     token,
52 |     "Respond bot",
53 |     DailyParams(
54 |         audio_in_enabled=True,
55 |         audio_out_enabled=True,
56 |         vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
57 |     ),
58 | )
59 | ```
60 | 
61 | ## Technical Details
62 | 
63 | ### Sample Rate Requirements
64 | 
65 | The analyzer supports two sample rates:
66 | 
67 | - 8000 Hz (256 samples per frame)
68 | - 16000 Hz (512 samples per frame)
69 | 
70 | Model Management
71 | 
72 | - Uses ONNX runtime for efficient inference
73 | - Automatically resets model state every 5 seconds to manage memory
74 | - Runs on CPU by default for consistent performance
75 | - Includes built-in model file
76 | 
77 | ## Notes
78 | 
79 | - High-accuracy speech detection
80 | - Efficient ONNX-based processing
81 | - Automatic memory management
82 | - Thread-safe for pipeline processing
83 | - Built-in model file included
84 | - CPU-optimized inference
85 | - Supports 8kHz and 16kHz audio
86 | 


--------------------------------------------------------------------------------
/server/utilities/audio/soundfile-mixer.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "SoundfileMixer"
  3 | description: "Audio mixer for combining real-time audio with sound files"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `SoundfileMixer` is an audio mixer that combines incoming audio with audio from files. It supports multiple audio file formats through the soundfile library and can handle runtime volume adjustments and sound switching.
  9 | 
 10 | ## Installation
 11 | 
 12 | The soundfile mixer requires additional dependencies:
 13 | 
 14 | ```bash
 15 | pip install "pipecat-ai[soundfile]"
 16 | ```
 17 | 
 18 | ## Constructor Parameters
 19 | 
 20 | <ParamField path="sound_files" type="Mapping[str, str]" required>
 21 | 
 22 | Dictionary mapping sound names to file paths. Files must be mono (single channel).
 23 | 
 24 | </ParamField>
 25 | 
 26 | <ParamField path="default_sound" type="str" required>
 27 | 
 28 | Name of the default sound to play (must be a key in sound_files).
 29 | 
 30 | </ParamField>
 31 | 
 32 | <ParamField path="volume" type="float" default="0.4">
 33 | 
 34 | Initial volume for the mixed sound. Values typically range from 0.0 to 1.0, but can go higher.
 35 | 
 36 | </ParamField>
 37 | 
 38 | <ParamField path="loop" type="bool" default="true">
 39 | 
 40 | Whether to loop the sound file when it reaches the end.
 41 | 
 42 | </ParamField>
 43 | 
 44 | ## Control Frames
 45 | 
 46 | <ParamField path="MixerUpdateSettingsFrame" type="Frame">
 47 | 
 48 | Updates mixer settings at runtime
 49 | 
 50 | <Expandable title="properties">
 51 | <ParamField path="sound" type="str">
 52 | Changes the current playing sound (must be a key in sound_files)
 53 | </ParamField>
 54 | <ParamField path="volume" type="float">
 55 | Updates the mixing volume
 56 | </ParamField>
 57 | 
 58 | <ParamField path="loop" type="bool">
 59 | Updates whether the sound should loop
 60 | </ParamField>
 61 | </Expandable>
 62 | </ParamField>
 63 | 
 64 | <ParamField path="MixerEnableFrame" type="Frame">
 65 | Enables or disables the mixer
 66 | 
 67 | <Expandable title="properties">
 68 | <ParamField path="enable" type="bool">
 69 | Whether mixing should be enabled
 70 | </ParamField> 
 71 | </Expandable>
 72 | </ParamField>
 73 | 
 74 | ## Usage Example
 75 | 
 76 | ```python
 77 | # Initialize mixer with sound files
 78 | mixer = SoundfileMixer(
 79 |     sound_files={"office": "office_ambience.wav"},
 80 |     default_sound="office",
 81 |     volume=2.0,
 82 | )
 83 | 
 84 | # Add to transport
 85 | transport = DailyTransport(
 86 |     room_url,
 87 |     token,
 88 |     "Audio Bot",
 89 |     DailyParams(
 90 |         audio_out_enabled=True,
 91 |         audio_out_mixer=mixer,
 92 |     ),
 93 | )
 94 | 
 95 | # Control mixer at runtime
 96 | await task.queue_frame(MixerUpdateSettingsFrame({"volume": 0.5}))
 97 | await task.queue_frame(MixerEnableFrame(False))  # Disable mixing
 98 | await task.queue_frame(MixerEnableFrame(True))   # Enable mixing
 99 | ```
100 | 
101 | ## Notes
102 | 
103 | - Supports any audio format that soundfile can read
104 | - Automatically resamples audio files to match output sample rate
105 | - Files must be mono (single channel)
106 | - Thread-safe for pipeline processing
107 | - Can dynamically switch between multiple sound files
108 | - Volume can be adjusted in real-time
109 | - Mixing can be enabled/disabled on demand
110 | 


--------------------------------------------------------------------------------
/server/utilities/filters/frame-filter.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "FrameFilter"
 3 | description: "Processor that selectively passes through only specified frame types"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | `FrameFilter` is a processor that filters frames based on their types, only passing through frames that match specified types (plus some system frames like `EndFrame` and `SystemFrame`).
 9 | 
10 | ## Constructor Parameters
11 | 
12 | <ParamField path="types" type="Tuple[Type[Frame], ...]" required>
13 |   Tuple of frame types that should be passed through the filter
14 | </ParamField>
15 | 
16 | ## Functionality
17 | 
18 | When a frame passes through the filter, it is checked against the provided types. Only frames that match one of the specified types (or are system frames) will be passed downstream. All other frames are dropped.
19 | 
20 | ## Output Frames
21 | 
22 | The processor always passes through:
23 | 
24 | - Frames matching any of the specified types
25 | - `EndFrame` and `SystemFrame` instances (always allowed, so as to not block the pipeline)
26 | 
27 | ## Usage Example
28 | 
29 | ```python
30 | from pipecat.frames.frames import TextFrame, AudioRawFrame, Frame
31 | from pipecat.processors.filters import FrameFilter
32 | from typing import Tuple, Type
33 | 
34 | # Create a filter that only passes TextFrames and AudioRawFrames
35 | text_and_audio_filter = FrameFilter(
36 |     types=(TextFrame, AudioRawFrame)
37 | )
38 | 
39 | # Add to pipeline
40 | pipeline = Pipeline([
41 |     source,
42 |     text_and_audio_filter,  # Filters out all other frame types
43 |     destination
44 | ])
45 | ```
46 | 
47 | ## Frame Flow
48 | 
49 | ```mermaid
50 | graph TD
51 |     A[Input Frames] --> B[FrameFilter]
52 |     B --> C{Frame Type Check}
53 |     C -->|Matches Allowed Types| D[Output Frame]
54 |     C -->|System Frame| D
55 |     C -->|Other Frame Types| E[Dropped]
56 | ```
57 | 
58 | ## Notes
59 | 
60 | - Simple but powerful way to restrict which frame types flow through parts of your pipeline
61 | - Always allows system frames to pass through for proper pipeline operation
62 | - Can be used to isolate specific parts of your pipeline from certain frame types
63 | - Efficient implementation with minimal overhead
64 | 


--------------------------------------------------------------------------------
/server/utilities/filters/function-filter.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "FunctionFilter"
 3 | description: "Processor that filters frames using a custom filter function"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | `FunctionFilter` is a flexible processor that uses a custom async function to determine which frames to pass through. This allows for complex, dynamic filtering logic beyond simple type checking.
 9 | 
10 | ## Constructor Parameters
11 | 
12 | <ParamField path="filter" type="Callable[[Frame], Awaitable[bool]]" required>
13 |   Async function that examines each frame and returns True to allow it or False
14 |   to filter it out
15 | </ParamField>
16 | 
17 | <ParamField
18 |   path="direction"
19 |   type="FrameDirection"
20 |   default="FrameDirection.DOWNSTREAM"
21 | >
22 |   Which direction of frames to filter (DOWNSTREAM or UPSTREAM)
23 | </ParamField>
24 | 
25 | ## Functionality
26 | 
27 | When a frame passes through the processor:
28 | 
29 | 1. System frames and end frames are always passed through
30 | 2. Frames moving in a different direction than specified are always passed through
31 | 3. Other frames are passed to the filter function
32 | 4. If the filter function returns True, the frame is passed through
33 | 
34 | ## Output Frames
35 | 
36 | The processor conditionally passes through frames based on:
37 | 
38 | - Frame type (system frames and end frames always pass)
39 | - Frame direction (only filters in the specified direction)
40 | - Result of the custom filter function
41 | 
42 | ## Usage Example
43 | 
44 | ```python
45 | from pipecat.frames.frames import TextFrame, Frame
46 | from pipecat.processors.filters import FunctionFilter
47 | from pipecat.processors.frame_processor import FrameDirection
48 | 
49 | # Create filter that only allows TextFrames with more than 10 characters
50 | async def long_text_filter(frame: Frame) -> bool:
51 |     if isinstance(frame, TextFrame):
52 |         return len(frame.text) > 10
53 |     return False
54 | 
55 | # Apply filter to downstream frames only
56 | text_length_filter = FunctionFilter(
57 |     filter=long_text_filter,
58 |     direction=FrameDirection.DOWNSTREAM
59 | )
60 | 
61 | # Add to pipeline
62 | pipeline = Pipeline([
63 |     source,
64 |     text_length_filter,  # Filters out short text frames
65 |     destination
66 | ])
67 | ```
68 | 
69 | ## Frame Flow
70 | 
71 | ```mermaid
72 | graph TD
73 |     A[Input Frames] --> B[FunctionFilter]
74 |     B --> C{System/End Frame?}
75 |     C -->|Yes| F[Output Frame]
76 |     C -->|No| D{Correct Direction?}
77 |     D -->|No| F
78 |     D -->|Yes| E{Filter Function}
79 |     E -->|Returns True| F
80 |     E -->|Returns False| G[Dropped]
81 | ```
82 | 
83 | ## Notes
84 | 
85 | - Provides maximum flexibility for complex filtering logic
86 | - Can incorporate dynamic conditions that change at runtime
87 | - Only filters frames moving in the specified direction
88 | - Always passes through system frames for proper pipeline operation
89 | - Can be used to create sophisticated content-based filters
90 | - Supports async filter functions for complex processing
91 | 


--------------------------------------------------------------------------------
/server/utilities/filters/identify-filter.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "IdentityFilter"
 3 | description: "Processor that passes all frames through without modification"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | `IdentityFilter` is a simple pass-through processor that forwards all frames without any modification or filtering. It acts as a transparent layer in your pipeline, allowing all frames to flow through unchanged.
 9 | 
10 | <Tip>
11 |   Check out Observers for an option that delivers similar functionality but
12 |   doesn't require a processor to reside in the Pipeline.
13 | </Tip>
14 | 
15 | ## Constructor Parameters
16 | 
17 | The `IdentityFilter` constructor accepts no specific parameters beyond those inherited from `FrameProcessor`.
18 | 
19 | ## Functionality
20 | 
21 | When a frame passes through the processor, it is immediately forwarded in the same direction with no changes. This applies to all frame types and both directions (upstream and downstream).
22 | 
23 | ## Use Cases
24 | 
25 | While functionally equivalent to having no filter at all, `IdentityFilter` can be useful in several scenarios:
26 | 
27 | - Testing `ParallelPipeline` configurations to ensure frames aren't duplicated
28 | - Acting as a placeholder where a more complex filter might be added later
29 | - Monitoring frame flow in pipelines by adding logging in subclasses
30 | - Creating a base class for more complex conditional filters
31 | 
32 | ## Usage Example
33 | 
34 | ```python
35 | from pipecat.processors.filters import IdentityFilter
36 | 
37 | # Create an identity filter
38 | pass_through = IdentityFilter()
39 | 
40 | # Add to pipeline
41 | pipeline = Pipeline([
42 |     source,
43 |     pass_through,  # All frames pass through unchanged
44 |     destination
45 | ])
46 | ```
47 | 
48 | ## Frame Flow
49 | 
50 | ```mermaid
51 | graph LR
52 |     A[Input Frame] --> B[IdentityFilter] --> C[Output Frame]
53 | ```
54 | 
55 | ## Notes
56 | 
57 | - Simplest possible filter implementation
58 | - Passes all frames through without modification
59 | - Useful in testing parallel pipelines
60 | - Can serve as a placeholder or base class
61 | - Zero overhead in normal operation
62 | 


--------------------------------------------------------------------------------
/server/utilities/filters/null-filter.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "NullFilter"
 3 | description: "Processor that blocks all frames except system frames"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | `NullFilter` is a filtering processor that blocks all frames from passing through, with the exception of system frames and end frames which are required for proper pipeline operation.
 9 | 
10 | ## Constructor Parameters
11 | 
12 | The `NullFilter` constructor accepts no specific parameters beyond those inherited from `FrameProcessor`.
13 | 
14 | ## Functionality
15 | 
16 | When a frame passes through the processor:
17 | 
18 | - If the frame is a `SystemFrame` or `EndFrame`, it is passed through
19 | - All other frame types are blocked and do not continue through the pipeline
20 | 
21 | This filter effectively acts as a barrier that allows only the essential system frames required for pipeline initialization, shutdown, and management.
22 | 
23 | ## Use Cases
24 | 
25 | `NullFilter` is useful in several scenarios:
26 | 
27 | - Temporarily disabling parts of a pipeline without removing components
28 | - Creating dead-end branches in parallel pipelines
29 | - Testing pipeline behavior with blocked communication
30 | - Implementing conditional pipelines where certain paths should be blocked
31 | 
32 | ## Usage Example
33 | 
34 | ```python
35 | from pipecat.processors.filters import NullFilter
36 | 
37 | # Create a null filter that blocks all non-system frames
38 | blocker = NullFilter()
39 | 
40 | # Add to pipeline
41 | pipeline = Pipeline([
42 |     source,
43 |     blocker,  # Blocks all regular frames
44 |     destination  # Will only receive system frames
45 | ])
46 | ```
47 | 
48 | ## Frame Flow
49 | 
50 | ```mermaid
51 | graph TD
52 |     A[Input Frames] --> B[NullFilter]
53 |     B --> C{System/End Frame?}
54 |     C -->|Yes| D[Output Frame]
55 |     C -->|No| E[Blocked]
56 | ```
57 | 
58 | ## Notes
59 | 
60 | - Blocks all regular frames in both directions
61 | - Only allows system frames and end frames to pass through
62 | - Useful for testing, debugging, and creating conditional pipelines
63 | - Minimal overhead as it performs simple type checking
64 | - Can be used to temporarily disable parts of a pipeline
65 | 


--------------------------------------------------------------------------------
/server/utilities/filters/wake-check-filter.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "WakeCheckFilter"
 3 | description: "Processor that passes frames only after detecting wake phrases in transcriptions"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | `WakeCheckFilter` monitors `TranscriptionFrame`s for specified wake phrases and only allows frames to pass through after a wake phrase has been detected. It includes a keepalive timeout to maintain the awake state for a period after detection, allowing continuous conversation without requiring repeated wake phrases.
 9 | 
10 | ## Constructor Parameters
11 | 
12 | <ParamField path="wake_phrases" type="list[str]" required>
13 |   List of wake phrases to detect in transcriptions
14 | </ParamField>
15 | 
16 | <ParamField path="keepalive_timeout" type="float" default="3">
17 |   Number of seconds to remain in the awake state after each transcription
18 | </ParamField>
19 | 
20 | ## Functionality
21 | 
22 | The filter maintains state for each participant and processes frames as follows:
23 | 
24 | 1. `TranscriptionFrame` objects are checked for wake phrases
25 | 2. If a wake phrase is detected, the filter enters the "AWAKE" state
26 | 3. While in the "AWAKE" state, all transcription frames pass through
27 | 4. After no activity for the keepalive timeout period, the filter returns to "IDLE"
28 | 5. All non-transcription frames pass through normally
29 | 
30 | Wake phrases are detected using regular expressions that match whole words with flexible spacing, making detection resilient to minor transcription variations.
31 | 
32 | ## States
33 | 
34 | <ParamField path="IDLE" type="WakeState">
35 |   Default state - only non-transcription frames pass through
36 | </ParamField>
37 | 
38 | <ParamField path="AWAKE" type="WakeState">
39 |   Active state after wake phrase detection - all frames pass through
40 | </ParamField>
41 | 
42 | ## Output Frames
43 | 
44 | - All non-transcription frames pass through unchanged
45 | - After wake phrase detection, transcription frames pass through
46 | - When awake, transcription frames reset the keepalive timer
47 | 
48 | ## Usage Example
49 | 
50 | ```python
51 | from pipecat.processors.filters import WakeCheckFilter
52 | 
53 | # Create filter with wake phrases
54 | wake_filter = WakeCheckFilter(
55 |     wake_phrases=["hey assistant", "ok computer", "listen up"],
56 |     keepalive_timeout=5.0  # Stay awake for 5 seconds after each transcription
57 | )
58 | 
59 | # Add to pipeline
60 | pipeline = Pipeline([
61 |     transport.input(),
62 |     stt_service,
63 |     wake_filter,  # Only passes transcriptions after wake phrases
64 |     llm_service,
65 |     tts_service,
66 |     transport.output()
67 | ])
68 | ```
69 | 
70 | ## Frame Flow
71 | 
72 | ```mermaid
73 | graph TD
74 |     A[Input Frames] --> B[WakeCheckFilter]
75 |     B --> C{Transcription Frame?}
76 |     C -->|No| F[Output Frame]
77 |     C -->|Yes| D{Wake State}
78 |     D -->|AWAKE| E{Keepalive Expired?}
79 |     E -->|No| F
80 |     E -->|Yes| G[Return to IDLE]
81 |     D -->|IDLE| H{Contains Wake Phrase?}
82 |     H -->|Yes| I[Set AWAKE] --> F
83 |     H -->|No| J[Filtered Out]
84 | ```
85 | 
86 | ## Notes
87 | 
88 | - Maintains separate state for each participant ID
89 | - Uses regex pattern matching for resilient wake phrase detection
90 | - Accumulates transcription text to detect phrases across multiple frames
91 | - Trims accumulated text when wake phrase is detected
92 | - Supports multiple wake phrases
93 | - Passes all non-transcription frames through unchanged
94 | - Error handling produces ErrorFrames for robust operation
95 | - Case-insensitive matching for natural language use
96 | 


--------------------------------------------------------------------------------
/server/utilities/filters/wake-notifier-filter.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "WakeNotifierFilter"
 3 | description: "Processor that triggers a notifier when specified frame types pass a custom filter"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | `WakeNotifierFilter` monitors the pipeline for specific frame types and triggers a notification when those frames pass a custom filter condition. It passes all frames through unchanged while performing this notification side-effect.
 9 | 
10 | ## Constructor Parameters
11 | 
12 | <ParamField path="notifier" type="BaseNotifier" required>
13 |   The notifier object to trigger when conditions are met
14 | </ParamField>
15 | 
16 | <ParamField path="types" type="Tuple[Type[Frame]]" required>
17 |   Tuple of frame types to monitor
18 | </ParamField>
19 | 
20 | <ParamField path="filter" type="Callable[[Frame], Awaitable[bool]]" required>
21 |   Async function that examines each matching frame and returns True to trigger
22 |   notification
23 | </ParamField>
24 | 
25 | ## Functionality
26 | 
27 | The processor operates as follows:
28 | 
29 | 1. Checks if the incoming frame matches any of the specified types
30 | 2. If it's a matching type, calls the filter function with the frame
31 | 3. If the filter returns True, triggers the notifier
32 | 4. Passes all frames through unchanged, regardless of the filtering result
33 | 
34 | This allows for notification side-effects without modifying the pipeline's data flow.
35 | 
36 | ## Output Frames
37 | 
38 | - All frames pass through unchanged in their original direction
39 | - No frames are modified or filtered out
40 | 
41 | ## Usage Example
42 | 
43 | ```python
44 | from pipecat.frames.frames import TranscriptionFrame, UserStartedSpeakingFrame
45 | from pipecat.processors.filters import WakeNotifierFilter
46 | from pipecat.sync.event_notifier import EventNotifier
47 | 
48 | # Create an event notifier
49 | wake_event = EventNotifier()
50 | 
51 | # Create filter that notifies when certain wake phrases are detected
52 | async def wake_phrase_filter(frame):
53 |     if isinstance(frame, TranscriptionFrame):
54 |         return "hey assistant" in frame.text.lower()
55 |     return False
56 | 
57 | # Add to pipeline
58 | wake_notifier = WakeNotifierFilter(
59 |     notifier=wake_event,
60 |     types=(TranscriptionFrame, UserStartedSpeakingFrame),
61 |     filter=wake_phrase_filter
62 | )
63 | 
64 | # In another component, wait for the notification
65 | async def handle_wake_event():
66 |     await wake_event.wait()
67 |     print("Wake phrase detected!")
68 | ```
69 | 
70 | ## Frame Flow
71 | 
72 | ```mermaid
73 | graph TD
74 |     A[Input Frame] --> B[WakeNotifierFilter]
75 |     B --> C{Matching Type?}
76 |     C -->|Yes| D{Filter Function}
77 |     D -->|Returns True| E[Notify]
78 |     D -->|Returns False| F[Pass Through]
79 |     C -->|No| F
80 |     E --> F
81 | ```
82 | 
83 | ## Notes
84 | 
85 | - Acts as a transparent pass-through for all frames
86 | - Can trigger external events without modifying pipeline flow
87 | - Useful for signaling between pipeline components
88 | - Can monitor for multiple frame types simultaneously
89 | - Uses async filter function for complex conditions
90 | - Functions as a "listener" that doesn't affect the data stream
91 | - Can be used for logging, analytics, or coordinating external systems
92 | 


--------------------------------------------------------------------------------
/server/utilities/frame/producer-consumer.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Producer & Consumer Processors"
  3 | description: "Route frames between different parts of a pipeline, allowing selective frame sharing across parallel branches or within complex pipelines"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | The Producer and Consumer processors work as a pair to route frames between different parts of a pipeline, particularly useful when working with [`ParallelPipeline`](/server/pipeline/parallel-pipeline). They allow you to selectively capture frames from one pipeline branch and inject them into another.
  9 | 
 10 | ## ProducerProcessor
 11 | 
 12 | `ProducerProcessor` examines frames flowing through the pipeline, applies a filter to decide which frames to share, and optionally transforms these frames before sending them to connected consumers.
 13 | 
 14 | ### Constructor Parameters
 15 | 
 16 | <ParamField path="filter" type="Callable[[Frame], Awaitable[bool]]" required>
 17 |   An async function that determines which frames should be sent to consumers.
 18 |   Should return `True` for frames to be shared.
 19 | </ParamField>
 20 | 
 21 | <ParamField
 22 |   path="transformer"
 23 |   type="Callable[[Frame], Awaitable[Frame]]"
 24 |   default="identity_transformer"
 25 | >
 26 |   Optional async function that transforms frames before sending to consumers. By
 27 |   default, passes frames unchanged.
 28 | </ParamField>
 29 | 
 30 | <ParamField path="passthrough" type="bool" default="True">
 31 |   When `True`, passes all frames through the normal pipeline flow. When `False`,
 32 |   only passes through frames that don't match the filter.
 33 | </ParamField>
 34 | 
 35 | ## ConsumerProcessor
 36 | 
 37 | `ConsumerProcessor` receives frames from a `ProducerProcessor` and injects them into its pipeline branch.
 38 | 
 39 | ### Constructor Parameters
 40 | 
 41 | <ParamField path="producer" type="ProducerProcessor" required>
 42 |   The producer processor that will send frames to this consumer.
 43 | </ParamField>
 44 | 
 45 | <ParamField
 46 |   path="transformer"
 47 |   type="Callable[[Frame], Awaitable[Frame]]"
 48 |   default="identity_transformer"
 49 | >
 50 |   Optional async function that transforms frames before injecting them into the
 51 |   pipeline.
 52 | </ParamField>
 53 | 
 54 | <ParamField
 55 |   path="direction"
 56 |   type="FrameDirection"
 57 |   default="FrameDirection.DOWNSTREAM"
 58 | >
 59 |   The direction in which to push received frames. Usually `DOWNSTREAM` to send
 60 |   frames forward in the pipeline.
 61 | </ParamField>
 62 | 
 63 | ## Usage Examples
 64 | 
 65 | ### Basic Usage: Moving TTS Audio Between Branches
 66 | 
 67 | ```python
 68 | # Create a producer that captures TTS audio frames
 69 | async def is_tts_audio(frame: Frame) -> bool:
 70 |     return isinstance(frame, TTSAudioRawFrame)
 71 | 
 72 | # Define an async transformer function
 73 | async def tts_to_input_audio_transformer(frame: Frame) -> Frame:
 74 |     if isinstance(frame, TTSAudioRawFrame):
 75 |         # Convert TTS audio to input audio format
 76 |         return InputAudioRawFrame(
 77 |             audio=frame.audio,
 78 |             sample_rate=frame.sample_rate,
 79 |             num_channels=frame.num_channels
 80 |         )
 81 |     return frame
 82 | 
 83 | producer = ProducerProcessor(
 84 |     filter=is_tts_audio,
 85 |     transformer=tts_to_input_audio_transformer
 86 |     passthrough=True  # Keep these frames in original pipeline
 87 | )
 88 | 
 89 | # Create a consumer to receive the frames
 90 | consumer = ConsumerProcessor(
 91 |     producer=producer,
 92 |     direction=FrameDirection.DOWNSTREAM
 93 | )
 94 | 
 95 | # Use in a ParallelPipeline
 96 | pipeline = Pipeline([
 97 |     transport.input(),
 98 |     ParallelPipeline(
 99 |         # Branch 1: LLM for bot responses
100 |         [
101 |             llm,
102 |             tts,
103 |             producer,  # Capture TTS audio here
104 |         ],
105 |         # Branch 2: Audio processing branch
106 |         [
107 |             consumer,  # Receive TTS audio here
108 |             llm, # Speech-to-Speech LLM (audio in)
109 |         ]
110 |     ),
111 |     transport.output(),
112 | ])
113 | ```
114 | 


--------------------------------------------------------------------------------
/server/utilities/mcp/mcp.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "MCPClient"
  3 | description: "Service to connect to MCP (Model Context Protocol) servers"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | MCP is an open standard for enabling AI agents to interact with external data and tools. `MCPClient` provides a way to access and call tools via MCP. For example, instead of writing bespoke function call implementations for an external API, you may use an MCP server that provides a bridge to the API. _Be aware there may be security implications._ See [MCP documenation](https://github.com/modelcontextprotocol) for more details.
  9 | 
 10 | ## Installation
 11 | 
 12 | To use `MCPClient`, install the required dependencies:
 13 | 
 14 | ```bash
 15 | pip install "pipecat-ai[mcp]"
 16 | ```
 17 | 
 18 | You may also need to set environment variables as required by the specific MCP server to which you are connecting.
 19 | 
 20 | ## Configuration
 21 | 
 22 | ### Constructor Parameters
 23 | 
 24 | You can connect to your MCP server via Stdio or SSE transport. See [here](https://modelcontextprotocol.io/docs/concepts/transports#built-in-transport-types) for more documentation on MCP transports.
 25 | 
 26 | <ParamField path="server_params" type="str | StdioServerParameters" required>
 27 | 
 28 | You can provide either:
 29 | 
 30 | - URL: "https://your.mcp.server/sse"
 31 | - StdioServerParameters, which are defined as:
 32 | 
 33 | ```python
 34 |   StdioServerParameters(
 35 |         command="python",  # Executable
 36 |         args=["example_server.py"],  # Optional command line arguments
 37 |         env=None,  # Optional environment variables
 38 |     )
 39 | ```
 40 | 
 41 | </ParamField>
 42 | 
 43 | ### Input Parameters
 44 | 
 45 | See more information regarding server params [here](https://github.com/modelcontextprotocol/python-sdk?tab=readme-ov-file#writing-mcp-clients).
 46 | 
 47 | ## Usage Example
 48 | 
 49 | ### MCP Stdio Transport Implementation
 50 | 
 51 | ```python
 52 | 
 53 | # Import MCPClient and StdioServerParameters
 54 | ...
 55 | from mcp import StdioServerParameters
 56 | from pipecat.services.mcp_service import MCPClient
 57 | ...
 58 | 
 59 | # Initialize an LLM
 60 | llm = ...
 61 | 
 62 | # Initialize and configure MCPClient with server parameters
 63 | mcp = MCPClient(
 64 |         server_params=StdioServerParameters(
 65 |             command=shutil.which("npx"),
 66 |             args=["-y", "@name/mcp-server-name@latest"],
 67 |             env={"ENV_API_KEY": "<env_api_key>"},
 68 |         )
 69 |     )
 70 | 
 71 | # Create tools schema from the MCP server and register them with llm
 72 | tools = await mcp.register_tools(llm)
 73 | 
 74 | # Create context with system message and tools
 75 | # Tip: Let the LLM know it has access to tools from an MCP server by including it in the system prompt.
 76 | context = OpenAILLMContext(
 77 |     messages=[
 78 |         {
 79 |             "role": "system",
 80 |             "content": "You are a helpful assistant in a voice conversation. You have access to MCP tools. Keep responses concise."
 81 |         }
 82 |     ],
 83 |     tools=tools
 84 | )
 85 | ```
 86 | 
 87 | ### MCP SSE Transport Implementation
 88 | 
 89 | ```python
 90 | 
 91 | # Import MCPClient
 92 | ...
 93 | from pipecat.services.mcp_service import MCPClient
 94 | ...
 95 | 
 96 | # Initialize an LLM
 97 | llm = ...
 98 | 
 99 | # Initialize and configure MCPClient with MCP SSE server url
100 | mcp = MCPClient(server_params="https://your.mcp.server/sse")
101 | 
102 | # Create tools schema from the MCP server and register them with llm
103 | tools = await mcp.register_tools(llm)
104 | 
105 | # Create context with system message and tools
106 | # Tip: Let the LLM know it has access to tools from an MCP server by including it in the system prompt.
107 | context = OpenAILLMContext(
108 |     messages=[
109 |         {
110 |             "role": "system",
111 |             "content": "You are a helpful assistant in a voice conversation. You have access to MCP tools. Keep responses concise."
112 |         }
113 |     ],
114 |     tools=tools
115 | )
116 | ```
117 | 
118 | ## Methods
119 | 
120 | <ResponseField name="register_tools" type="async method">
121 |   Converts MCP tools to Pipecat-friendly function definitions and registers the
122 |   functions with the llm.
123 | </ResponseField>
124 | 
125 | ```python
126 | async def register_tools(self, llm) -> ToolsSchema:
127 | ```
128 | 
129 | ## Additional documentation
130 | 
131 | <Note>
132 |   See [MCP's docs](https://github.com/modelcontextprotocol/python-sdk) for MCP
133 |   related updates.
134 | </Note>
135 | 


--------------------------------------------------------------------------------
/server/utilities/observers/debug-observer.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Debug Log Observer"
  3 | sidebarTitle: "Debug Observer"
  4 | description: "Comprehensive frame logging with configurable filtering in Pipecat"
  5 | ---
  6 | 
  7 | The `DebugLogObserver` provides detailed logging of frame activity in your Pipecat pipeline, with full visibility into frame content and flexible filtering options.
  8 | 
  9 | ## Features
 10 | 
 11 | - Log all frame types and their content
 12 | - Filter by specific frame types
 13 | - Filter by source or destination components
 14 | - Automatic formatting of frame fields
 15 | - Special handling for complex data structures
 16 | 
 17 | ## Usage
 18 | 
 19 | ### Log All Frames
 20 | 
 21 | Log all frames passing through the pipeline:
 22 | 
 23 | ```python
 24 | from pipecat.observers.loggers.debug_log_observer import DebugLogObserver
 25 | 
 26 | task = PipelineTask(
 27 |     pipeline,
 28 |     params=PipelineParams(
 29 |         observers=[DebugLogObserver()],
 30 |     ),
 31 | )
 32 | ```
 33 | 
 34 | ### Filter by Frame Types
 35 | 
 36 | Log only specific frame types:
 37 | 
 38 | ```python
 39 | from pipecat.frames.frames import TranscriptionFrame, InterimTranscriptionFrame
 40 | from pipecat.observers.loggers.debug_log_observer import DebugLogObserver
 41 | 
 42 | task = PipelineTask(
 43 |     pipeline,
 44 |     params=PipelineParams(
 45 |         observers=[
 46 |             DebugLogObserver(frame_types=(
 47 |                 TranscriptionFrame,
 48 |                 InterimTranscriptionFrame
 49 |             ))
 50 |         ],
 51 |     ),
 52 | )
 53 | ```
 54 | 
 55 | ### Advanced Source/Destination Filtering
 56 | 
 57 | Filter frames based on their type and source/destination:
 58 | 
 59 | ```python
 60 | from pipecat.frames.frames import StartInterruptionFrame, UserStartedSpeakingFrame, LLMTextFrame
 61 | from pipecat.observers.loggers.debug_log_observer import DebugLogObserver, FrameEndpoint
 62 | from pipecat.transports.base_output_transport import BaseOutputTransport
 63 | from pipecat.services.stt_service import STTService
 64 | 
 65 | task = PipelineTask(
 66 |     pipeline,
 67 |     params=PipelineParams(
 68 |         observers=[
 69 |             DebugLogObserver(frame_types={
 70 |                 # Only log StartInterruptionFrame when source is BaseOutputTransport
 71 |                 StartInterruptionFrame: (BaseOutputTransport, FrameEndpoint.SOURCE),
 72 | 
 73 |                 # Only log UserStartedSpeakingFrame when destination is STTService
 74 |                 UserStartedSpeakingFrame: (STTService, FrameEndpoint.DESTINATION),
 75 | 
 76 |                 # Log LLMTextFrame regardless of source or destination
 77 |                 LLMTextFrame: None
 78 |             })
 79 |         ],
 80 |     ),
 81 | )
 82 | ```
 83 | 
 84 | ## Log Output Format
 85 | 
 86 | The observer logs each frame with its complete details:
 87 | 
 88 | ```
 89 | [Source] → [Destination]: [FrameType] [field1: value1, field2: value2, ...] at [timestamp]s
 90 | ```
 91 | 
 92 | For example:
 93 | 
 94 | ```
 95 | OpenAILLMService#0 → DailyTransport#0: LLMTextFrame text: 'Hello, how can I help you today?' at 1.24s
 96 | ```
 97 | 
 98 | ## Configuration Options
 99 | 
100 | | Parameter        | Type                                                                                   | Description                                                     |
101 | | ---------------- | -------------------------------------------------------------------------------------- | --------------------------------------------------------------- |
102 | | `frame_types`    | `Tuple[Type[Frame], ...]` or `Dict[Type[Frame], Optional[Tuple[Type, FrameEndpoint]]]` | Frame types to log, with optional source/destination filtering  |
103 | | `exclude_fields` | `Set[str]`                                                                             | Field names to exclude from logging (defaults to binary fields) |
104 | 
105 | ## FrameEndpoint Enum
106 | 
107 | The `FrameEndpoint` enum is used for source/destination filtering:
108 | 
109 | - `FrameEndpoint.SOURCE`: Filter by source component
110 | - `FrameEndpoint.DESTINATION`: Filter by destination component
111 | 


--------------------------------------------------------------------------------
/server/utilities/observers/llm-observer.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "LLM Log Observer"
 3 | sidebarTitle: "LLM Observer"
 4 | description: "Logging LLM activity in Pipecat"
 5 | ---
 6 | 
 7 | The `LLMLogObserver` provides detailed logging of Large Language Model (LLM) activity within your Pipecat pipeline. It tracks the entire lifecycle of LLM interactions, from initial prompts to final responses.
 8 | 
 9 | ## Frame Types Monitored
10 | 
11 | The observer tracks the following frame types (only from/to LLM service):
12 | 
13 | - **LLMFullResponseStartFrame**: When the LLM begins generating a response
14 | - **LLMFullResponseEndFrame**: When the LLM completes its response
15 | - **LLMTextFrame**: Individual text chunks generated by the LLM
16 | - **FunctionCallInProgressFrame**: Function/tool calls made by the LLM
17 | - **LLMMessagesFrame**: Input messages sent to the LLM
18 | - **OpenAILLMContextFrame**: Context information for OpenAI LLM calls
19 | - **FunctionCallResultFrame**: Results returned from function calls
20 | 
21 | ## Usage
22 | 
23 | ```python
24 | from pipecat.observers.loggers.llm_log_observer import LLMLogObserver
25 | 
26 | task = PipelineTask(
27 |     pipeline,
28 |     params=PipelineParams(
29 |         observers=[LLMLogObserver()],
30 |     ),
31 | )
32 | ```
33 | 
34 | ## Log Output Format
35 | 
36 | The observer uses emojis and consistent formatting for easy log reading:
37 | 
38 | - 🧠 [Source] → LLM START/END RESPONSE
39 | - 🧠 [Source] → LLM GENERATING: [text]
40 | - 🧠 [Source] → LLM FUNCTION CALL: [details]
41 | - 🧠 → [Destination] LLM MESSAGES FRAME: [messages]
42 | - 🧠 → [Destination] LLM CONTEXT FRAME: [context]
43 | 
44 | All log entries include timestamps for precise timing analysis.
45 | 


--------------------------------------------------------------------------------
/server/utilities/observers/observer-pattern.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Observer Pattern"
 3 | description: "Understanding and implementing observers in Pipecat"
 4 | ---
 5 | 
 6 | The Observer pattern in Pipecat allows non-intrusive monitoring of frames as they flow through the pipeline. Observers can watch frame traffic without affecting the pipeline's core functionality.
 7 | 
 8 | ## Base Observer
 9 | 
10 | All observers must inherit from `BaseObserver` and implement the `on_push_frame` method:
11 | 
12 | ```python
13 | from pipecat.observers.base_observer import BaseObserver
14 | 
15 | class CustomObserver(BaseObserver):
16 |     async def on_push_frame(
17 |         self,
18 |         src: FrameProcessor,
19 |         dst: FrameProcessor,
20 |         frame: Frame,
21 |         direction: FrameDirection,
22 |         timestamp: int,
23 |     ):
24 |         # Your frame observation logic here
25 |         pass
26 | ```
27 | 
28 | ## Available Observers
29 | 
30 | Pipecat provides several built-in observers:
31 | 
32 | - **LLMLogObserver**: Logs LLM activity and responses
33 | - **TranscriptionLogObserver**: Logs speech-to-text transcription events
34 | - **RTVIObserver**: Converts internal frames to RTVI protocol messages for server to client messaging
35 | 
36 | ## Using Multiple Observers
37 | 
38 | You can attach multiple observers to a pipeline task. Each observer will be notified of all frames:
39 | 
40 | ```python
41 | task = PipelineTask(
42 |     pipeline,
43 |     params=PipelineParams(
44 |         observers=[LLMLogObserver(), TranscriptionLogObserver(), CustomObserver()],
45 |     ),
46 | )
47 | ```
48 | 
49 | ## Example: Debug Observer
50 | 
51 | Here's an example observer that logs interruptions and bot speaking events:
52 | 
53 | ```python
54 | class DebugObserver(BaseObserver):
55 |     """Observer to log interruptions and bot speaking events to the console.
56 | 
57 |     Logs all frame instances of:
58 |     - StartInterruptionFrame
59 |     - BotStartedSpeakingFrame
60 |     - BotStoppedSpeakingFrame
61 | 
62 |     This allows you to see the frame flow from processor to processor through the pipeline for these frames.
63 |     Log format: [EVENT TYPE]: [source processor] → [destination processor] at [timestamp]s
64 |     """
65 | 
66 |     async def on_push_frame(
67 |         self,
68 |         src: FrameProcessor,
69 |         dst: FrameProcessor,
70 |         frame: Frame,
71 |         direction: FrameDirection,
72 |         timestamp: int,
73 |     ):
74 |         time_sec = timestamp / 1_000_000_000
75 |         arrow = "→" if direction == FrameDirection.DOWNSTREAM else "←"
76 | 
77 |         if isinstance(frame, StartInterruptionFrame):
78 |             logger.info(f"⚡ INTERRUPTION START: {src} {arrow} {dst} at {time_sec:.2f}s")
79 |         elif isinstance(frame, BotStartedSpeakingFrame):
80 |             logger.info(f"🤖 BOT START SPEAKING: {src} {arrow} {dst} at {time_sec:.2f}s")
81 |         elif isinstance(frame, BotStoppedSpeakingFrame):
82 |             logger.info(f"🤖 BOT STOP SPEAKING: {src} {arrow} {dst} at {time_sec:.2f}s")
83 | ```
84 | 
85 | ## Common Use Cases
86 | 
87 | Observers are particularly useful for:
88 | 
89 | - Debugging frame flow
90 | - Logging specific events
91 | - Monitoring pipeline behavior
92 | - Collecting metrics
93 | - Converting internal frames to external messages
94 | 


--------------------------------------------------------------------------------
/server/utilities/observers/transcription-observer.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Transcription Log Observer"
 3 | sidebarTitle: "Transcription Observer"
 4 | description: "Logging speech-to-text transcription activity in Pipecat"
 5 | ---
 6 | 
 7 | The `TranscriptionLogObserver` logs all speech-to-text transcription activity in your Pipecat pipeline, providing visibility into both final and interim transcription results.
 8 | 
 9 | ## Frame Types Monitored
10 | 
11 | The observer tracks the following frame types (only from STT service):
12 | 
13 | - **TranscriptionFrame**: Final transcription results
14 | - **InterimTranscriptionFrame**: In-progress transcription results
15 | 
16 | ## Usage
17 | 
18 | ```python
19 | from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
20 | 
21 | task = PipelineTask(
22 |     pipeline,
23 |     params=PipelineParams(
24 |         observers=[TranscriptionLogObserver()],
25 |     ),
26 | )
27 | ```
28 | 
29 | ## Log Output Format
30 | 
31 | The observer uses consistent formatting with emoji indicators:
32 | 
33 | - 💬 [Source] → TRANSCRIPTION: [text] from [user_id]
34 | - 💬 [Source] → INTERIM TRANSCRIPTION: [text] from [user_id]
35 | 
36 | All log entries include timestamps for precise timing analysis.
37 | 


--------------------------------------------------------------------------------
/server/utilities/observers/turn-tracking-observer.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Turn Tracking Observer"
 3 | description: "Track conversation turns and events in your Pipecat pipeline"
 4 | ---
 5 | 
 6 | The `TurnTrackingObserver` monitors and tracks conversational turns in your Pipecat pipeline, providing events when turns start and end. It intelligently identifies when a user-bot interaction cycle begins and completes.
 7 | 
 8 | ## Turn Lifecycle
 9 | 
10 | A turn represents a complete user-bot interaction cycle:
11 | 
12 | 1. **Start**: When the user starts speaking (or pipeline starts for first turn)
13 | 2. **Processing**: User speaks, bot processes and responds
14 | 3. **End**: After the bot finishes speaking and either:
15 |    - The user starts speaking again
16 |    - A timeout period elapses with no further activity
17 | 
18 | ## Events
19 | 
20 | The observer emits two main events:
21 | 
22 | - **`on_turn_started`**: When a new turn begins
23 |   - Parameters: `turn_number` (int)
24 | - **`on_turn_ended`**: When a turn completes
25 |   - Parameters: `turn_number` (int), `duration` (float, in seconds), `was_interrupted` (bool)
26 | 
27 | ## Usage
28 | 
29 | The observer is automatically created when you initialize a `PipelineTask` with `enable_turn_tracking=True` (which is the default):
30 | 
31 | ```python
32 | task = PipelineTask(
33 |     pipeline,
34 |     params=PipelineParams(allow_interruptions=True),
35 |     # Turn tracking is enabled by default
36 | )
37 | 
38 | # Access the observer
39 | turn_observer = task.turn_tracking_observer
40 | 
41 | # Register event handlers
42 | @turn_observer.event_handler("on_turn_started")
43 | async def on_turn_started(observer, turn_number):
44 |     logger.info(f"Turn {turn_number} started")
45 | 
46 | @turn_observer.event_handler("on_turn_ended")
47 | async def on_turn_ended(observer, turn_number, duration, was_interrupted):
48 |     status = "interrupted" if was_interrupted else "completed"
49 |     logger.info(f"Turn {turn_number} {status} in {duration:.2f}s")
50 | ```
51 | 
52 | ## Configuration
53 | 
54 | You can configure the observer's behavior when creating a `PipelineTask`:
55 | 
56 | ```python
57 | from pipecat.observers.turn_tracking_observer import TurnTrackingObserver
58 | 
59 | # Create a custom observer instance
60 | custom_turn_tracker = TurnTrackingObserver(
61 |     turn_end_timeout_secs=3.5,     # Turn end timeout (default: 2.5)
62 | )
63 | 
64 | # Add it as a regular observer
65 | task = PipelineTask(
66 |     pipeline,
67 |     observers=[custom_turn_tracker],
68 |     # Disable the default one if adding your own
69 |     enable_turn_tracking=False,
70 | )
71 | ```
72 | 
73 | ## Interruptions
74 | 
75 | The observer automatically detects interruptions when the user starts speaking while the bot is still speaking. In this case:
76 | 
77 | - The current turn is marked as interrupted (`was_interrupted=True`)
78 | - A new turn begins immediately
79 | 
80 | ## How It Works
81 | 
82 | The observer monitors specific frame types to track conversation flow:
83 | 
84 | - **StartFrame**: Initiates the first turn
85 | - **UserStartedSpeakingFrame**: Starts user speech or triggers a new turn
86 | - **BotStartedSpeakingFrame**: Marks bot speech beginning
87 | - **BotStoppedSpeakingFrame**: Starts the turn end timeout
88 | 
89 | After a bot stops speaking, the observer waits for the configured timeout period. If no further bot speech occurs, the turn ends; otherwise, it continues as part of the same turn.
90 | 
91 | ## Use Cases
92 | 
93 | - **Analytics**: Measure turn durations, interruption rates, and conversation flow
94 | - **Logging**: Record turn-based logs for diagnostics and analysis
95 | - **Visualization**: Show turn-based conversation timelines in UIs
96 | - **Tracing**: Group spans and metrics by conversation turns
97 | 


--------------------------------------------------------------------------------
/server/utilities/serializers/introduction.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Frame Serializers"
 3 | description: "Overview of frame serializers for converting between Pipecat frames and external protocols"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | Frame serializers are components that convert between Pipecat's internal frame format and external protocols or formats. They're essential when integrating with third-party services or APIs that have their own message formats.
 9 | 
10 | ## Core Responsibilities
11 | 
12 | Serializers handle:
13 | 
14 | 1. **Serialization**: Converting Pipecat frames to external formats or protocols
15 | 2. **Deserialization**: Converting external messages to Pipecat frames
16 | 3. **Protocol-specific behaviors**: Managing unique aspects of each integration
17 | 
18 | ## Available Serializers
19 | 
20 | Pipecat includes serializers for popular voice and communications platforms:
21 | 
22 | <CardGroup cols={3}>
23 |   <Card
24 |     title="Twilio Serializer"
25 |     icon="phone"
26 |     href="/server/services/serializers/twilio"
27 |   >
28 |     For integrating with Twilio Media Streams WebSocket protocol
29 |   </Card>
30 |   <Card
31 |     title="Telnyx Serializer"
32 |     icon="phone"
33 |     href="/server/services/serializers/telnyx"
34 |   >
35 |     For integrating with Telnyx WebSocket media streaming
36 |   </Card>
37 |   <Card
38 |     title="Plivo Serializer"
39 |     icon="phone"
40 |     href="/server/services/serializers/plivo"
41 |   >
42 |     For integrating with Telnyx WebSocket media streaming
43 |   </Card>
44 | </CardGroup>
45 | 
46 | ## Custom Serializers
47 | 
48 | You can create custom serializers by implementing the `FrameSerializer` base class:
49 | 
50 | ```python
51 | from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
52 | from pipecat.frames.frames import Frame, StartFrame
53 | 
54 | class MyCustomSerializer(FrameSerializer):
55 |     @property
56 |     def type(self) -> FrameSerializerType:
57 |         return FrameSerializerType.TEXT  # or BINARY
58 | 
59 |     async def setup(self, frame: StartFrame):
60 |         # Initialize with pipeline configuration
61 |         pass
62 | 
63 |     async def serialize(self, frame: Frame) -> str | bytes | None:
64 |         # Convert Pipecat frame to external format
65 |         pass
66 | 
67 |     async def deserialize(self, data: str | bytes) -> Frame | None:
68 |         # Convert external data to Pipecat frame
69 |         pass
70 | ```
71 | 


--------------------------------------------------------------------------------
/server/utilities/smart-turn/fal-smart-turn.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Fal Smart Turn"
  3 | description: "Cloud-hosted Smart Turn detection using Fal.ai"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `FalSmartTurnAnalyzer` provides an easy way to use Smart Turn detection via Fal.ai's cloud infrastructure. This implementation requires minimal setup - just an API key - and offers scalable inference without having to manage your own servers.
  9 | 
 10 | ## Installation
 11 | 
 12 | ```bash
 13 | pip install "pipecat-ai[remote-smart-turn]"
 14 | ```
 15 | 
 16 | ## Requirements
 17 | 
 18 | - A Fal.ai account and API key (get one at [Fal.ai](https://fal.ai))
 19 | - Internet connectivity for making API calls
 20 | 
 21 | ## Configuration
 22 | 
 23 | ### Constructor Parameters
 24 | 
 25 | <ParamField path="api_key" type="Optional[str]" default="None">
 26 |   Your Fal.ai API key for authentication (required unless using a custom
 27 |   deployment)
 28 | </ParamField>
 29 | 
 30 | <ParamField
 31 |   path="url"
 32 |   type="str"
 33 |   default="https://fal.run/fal-ai/smart-turn/raw"
 34 | >
 35 |   URL endpoint for the Smart Turn API (defaults to the official Fal deployment)
 36 | </ParamField>
 37 | 
 38 | <ParamField path="aiohttp_session" type="aiohttp.ClientSession" required>
 39 |   An aiohttp client session for making HTTP requests
 40 | </ParamField>
 41 | 
 42 | <ParamField path="sample_rate" type="Optional[int]" default="None">
 43 |   Audio sample rate (will be set by the transport if not provided)
 44 | </ParamField>
 45 | 
 46 | <ParamField path="params" type="SmartTurnParams" default="SmartTurnParams()">
 47 |   Configuration parameters for turn detection. See
 48 |   [SmartTurnParams](/server/utilities/smart-turn/smart-turn-overview#configuration)
 49 |   for details.
 50 | </ParamField>
 51 | 
 52 | ## Example
 53 | 
 54 | ```python
 55 | import os
 56 | import aiohttp
 57 | from pipecat.audio.turn.smart_turn.fal_smart_turn import FalSmartTurnAnalyzer
 58 | from pipecat.audio.vad.silero import SileroVADAnalyzer
 59 | from pipecat.audio.vad.vad_analyzer import VADParams
 60 | from pipecat.transports.base_transport import TransportParams
 61 | 
 62 | async def setup_transport():
 63 |     async with aiohttp.ClientSession() as session:
 64 |         transport = SmallWebRTCTransport(
 65 |             webrtc_connection=webrtc_connection,
 66 |             params=TransportParams(
 67 |                 audio_in_enabled=True,
 68 |                 audio_out_enabled=True,
 69 |                 vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
 70 |                 turn_analyzer=FalSmartTurnAnalyzer(
 71 |                     api_key=os.getenv("FAL_SMART_TURN_API_KEY"),
 72 |                     aiohttp_session=session
 73 |                 ),
 74 |             ),
 75 |         )
 76 | 
 77 |         # Continue with pipeline setup...
 78 | ```
 79 | 
 80 | ## Custom Deployment
 81 | 
 82 | You can also deploy the Smart Turn model yourself on Fal.ai and point to your custom deployment:
 83 | 
 84 | ```python
 85 | turn_analyzer=FalSmartTurnAnalyzer(
 86 |     url="https://fal.run/your-username/your-deployment/raw",
 87 |     api_key=os.getenv("FAL_API_KEY"),
 88 |     aiohttp_session=session
 89 | )
 90 | ```
 91 | 
 92 | ## Performance Considerations
 93 | 
 94 | - **Latency**: While Fal provides global infrastructure, there will be network latency compared to local inference
 95 | - **Reliability**: Depends on network connectivity and Fal.ai service availability
 96 | - **Scalability**: Handles scaling automatically based on your usage
 97 | 
 98 | ## Notes
 99 | 
100 | - Fal handles the model hosting, scaling, and infrastructure management
101 | - The session timeout is controlled by the `stop_secs` parameter
102 | - For high-throughput applications, consider deploying your own inference service
103 | 


--------------------------------------------------------------------------------
/server/utilities/smart-turn/local-coreml-smart-turn.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Local CoreML Smart Turn"
  3 | description: "Local Smart Turn detection on Apple Silicon using CoreML"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `LocalCoreMLSmartTurnAnalyzer` runs Smart Turn inference directly on your Mac using Apple's CoreML framework. This provides low-latency inference without external API dependencies, making it ideal for development and applications where network access is limited or latency is critical.
  9 | 
 10 | ## Installation
 11 | 
 12 | ```bash
 13 | pip install "pipecat-ai[local-smart-turn]"
 14 | ```
 15 | 
 16 | ## Requirements
 17 | 
 18 | - Apple Silicon Mac (M1/M2/M3 series)
 19 | - macOS 11.0 or later
 20 | 
 21 | ## Local Model Setup
 22 | 
 23 | To use the `LocalCoreMLSmartTurnAnalyzer`, you need to set up the CoreML model locally:
 24 | 
 25 | 1. Install Git LFS (Large File Storage):
 26 | 
 27 |    <CodeGroup>
 28 | 
 29 |    ```bash macOS
 30 |    brew install git-lfs
 31 |    ```
 32 | 
 33 |    ```bash Ubuntu/Debian
 34 |    sudo apt-get install git-lfs
 35 |    ```
 36 | 
 37 |    </CodeGroup>
 38 | 
 39 | 2. Initialize Git LFS
 40 | 
 41 |    ```bash
 42 |    git lfs install
 43 |    ```
 44 | 
 45 | 3. Clone the Smart Turn model repository:
 46 | 
 47 |    ```bash
 48 |    git clone https://huggingface.co/pipecat-ai/smart-turn
 49 |    ```
 50 | 
 51 | 4. Set the environment variable to the cloned repository path:
 52 | 
 53 |    ```bash
 54 |    # Add to your .env file or environment
 55 |    export LOCAL_SMART_TURN_MODEL_PATH=/path/to/smart-turn
 56 |    ```
 57 | 
 58 | ## Configuration
 59 | 
 60 | ### Constructor Parameters
 61 | 
 62 | <ParamField path="smart_turn_model_path" type="str" required>
 63 |   Path to the directory containing the Smart Turn model files
 64 | </ParamField>
 65 | 
 66 | <ParamField path="sample_rate" type="Optional[int]" default="None">
 67 |   Audio sample rate (will be set by the transport if not provided)
 68 | </ParamField>
 69 | 
 70 | <ParamField path="params" type="SmartTurnParams" default="SmartTurnParams()">
 71 |   Configuration parameters for turn detection. See
 72 |   [SmartTurnParams](/server/utilities/smart-turn/smart-turn-overview#configuration)
 73 |   for details.
 74 | </ParamField>
 75 | 
 76 | ## Example
 77 | 
 78 | ```python
 79 | import os
 80 | from pipecat.audio.turn.smart_turn.local_coreml_smart_turn import LocalCoreMLSmartTurnAnalyzer
 81 | from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
 82 | from pipecat.audio.vad.silero import SileroVADAnalyzer
 83 | from pipecat.audio.vad.vad_analyzer import VADParams
 84 | from pipecat.transports.base_transport import TransportParams
 85 | 
 86 | # Get the path to the Smart Turn model
 87 | smart_turn_model_path = os.getenv("LOCAL_SMART_TURN_MODEL_PATH")
 88 | 
 89 | # Create transport with local Smart Turn detection
 90 | transport = SmallWebRTCTransport(
 91 |     webrtc_connection=webrtc_connection,
 92 |     params=TransportParams(
 93 |         audio_in_enabled=True,
 94 |         audio_out_enabled=True,
 95 |         vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
 96 |         turn_analyzer=LocalCoreMLSmartTurnAnalyzer(
 97 |             smart_turn_model_path=smart_turn_model_path,
 98 |             params=SmartTurnParams(
 99 |                 stop_secs=2.0,  # Shorter stop time when using Smart Turn
100 |                 pre_speech_ms=0.0,
101 |                 max_duration_secs=8.0
102 |             )
103 |         ),
104 |     ),
105 | )
106 | ```
107 | 
108 | ## Performance Considerations
109 | 
110 | - **Latency**: Very low latency since inference happens locally
111 | - **Resource Usage**: Uses local CPU/GPU resources
112 | - **Reliability**: No dependency on external services or network connectivity
113 | 
114 | ## Notes
115 | 
116 | - Optimal for development environments and latency-sensitive applications
117 | - The CoreML model is optimized for Apple Silicon but will work on Intel Macs with reduced performance
118 | - First inference may be slower as the model is loaded and compiled
119 | 


--------------------------------------------------------------------------------
/server/utilities/text/markdown-text-filter.mdx:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "MarkdownTextFilter"
  3 | description: "Converts Markdown-formatted text to TTS-friendly plain text while preserving structure"
  4 | ---
  5 | 
  6 | ## Overview
  7 | 
  8 | `MarkdownTextFilter` transforms Markdown-formatted text into plain text that's suitable for text-to-speech (TTS) systems. It intelligently removes formatting elements while preserving the content structure, including proper spacing and list formatting.
  9 | 
 10 | This filter is especially valuable for LLM-generated content, which often includes Markdown formatting that would sound unnatural if read aloud by a TTS system.
 11 | 
 12 | ## Constructor
 13 | 
 14 | ```python
 15 | filter = MarkdownTextFilter(params=InputParams())
 16 | ```
 17 | 
 18 | <ParamField path="params" type="InputParams">
 19 |   Configuration parameters for the filter
 20 | </ParamField>
 21 | 
 22 | ### Input Parameters
 23 | 
 24 | Configure the filter behavior with these options:
 25 | 
 26 | <ParamField path="enable_text_filter" type="bool" default="True">
 27 |   Whether the filter is active (when False, text passes through unchanged)
 28 | </ParamField>
 29 | 
 30 | <ParamField path="filter_code" type="bool" default="False">
 31 |   Whether to remove code blocks from the output
 32 | </ParamField>
 33 | 
 34 | <ParamField path="filter_tables" type="bool" default="False">
 35 |   Whether to remove Markdown tables from the output
 36 | </ParamField>
 37 | 
 38 | ## Features
 39 | 
 40 | The filter handles these Markdown elements:
 41 | 
 42 | - **Basic Formatting**: Removes `*italic*`, `**bold**`, and other formatting markers
 43 | - **Code**: Removes inline code ticks and optionally removes code blocks
 44 | - **Lists**: Preserves numbered lists while removing Markdown formatting
 45 | - **Tables**: Optionally removes Markdown tables
 46 | - **Whitespace**: Carefully preserves meaningful whitespace for natural speech
 47 | - **HTML**: Removes HTML tags and converts entities to their plain text equivalents
 48 | 
 49 | ## Usage Examples
 50 | 
 51 | ### Basic Usage with TTS Service
 52 | 
 53 | ```python
 54 | from pipecat.utils.text.markdown_text_filter import MarkdownTextFilter
 55 | from pipecat.services.cartesia.tts import CartesiaTTSService
 56 | 
 57 | # Create the filter
 58 | md_filter = MarkdownTextFilter()
 59 | 
 60 | # Use with TTS service
 61 | tts = CartesiaTTSService(
 62 |     api_key=os.getenv("CARTESIA_API_KEY"),
 63 |     voice_id="voice_id_here",
 64 |     text_filter=md_filter
 65 | )
 66 | ```
 67 | 
 68 | ### Custom Configuration
 69 | 
 70 | ```python
 71 | # Create filter that removes code blocks and tables
 72 | md_filter = MarkdownTextFilter(
 73 |     params=MarkdownTextFilter.InputParams(
 74 |         filter_code=True,
 75 |         filter_tables=True
 76 |     )
 77 | )
 78 | ```
 79 | 
 80 | ## What Gets Removed
 81 | 
 82 | | Markdown Feature           | Example                  | Result       |
 83 | | -------------------------- | ------------------------ | ------------ |
 84 | | Bold                       | `**important**`          | `important`  |
 85 | | Italic                     | `*emphasized*`           | `emphasized` |
 86 | | Headers                    | `## Section`             | `Section`    |
 87 | | Code (inline)              | `` `code` ``             | `code`       |
 88 | | Code blocks (when enabled) | ` ```python\ncode\n``` ` | ` `          |
 89 | | Tables (when enabled)      | `\|A\|B\|\n\|--\|--\|`   | ` `          |
 90 | | HTML tags                  | `<em>text</em>`          | `text`       |
 91 | | Repeated characters        | `!!!!!!!`                | `!`          |
 92 | 
 93 | ## Notes
 94 | 
 95 | - Preserves sentence structure and readability
 96 | - Maintains whitespace that affects speech prosody
 97 | - Handles streaming text with partial Markdown elements
 98 | - Efficiently converts HTML entities to plain text characters
 99 | - Smart handling of code blocks and tables with state tracking
100 | - Integrates directly with TTS services in the Pipecat pipeline
101 | 


--------------------------------------------------------------------------------
/server/utilities/watchdog-timers.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Watchdog Timers"
 3 | description: "Monitor task freezes and processing times"
 4 | ---
 5 | 
 6 | ## Overview
 7 | 
 8 | Watchdog timers are used to detect if a Pipecat task is taking longer than expected. By default, if watchdog timers are enabled and a task takes longer than 5 seconds to reset the timer, a warning will be logged. Watchdog timers are very common in real-time applications to detect if things are frozen or taking too much time. Usually, you start the watchdog and you need to keep resetting it before the watchdog timer timeout expires.
 9 | 
10 | In Pipecat, watchdog timers are available if you create a task using the `FrameProcessor.create_task()` method. If you use `asyncio.create_task()` or `loop.create_task()`, watchdog timers will not work.
11 | 
12 | ## Configuration
13 | 
14 | Watchdog timers are disabled by default. You can enable and configure them using the following `PipelineTask` constructor arguments:
15 | 
16 | <ParamField
17 |   path="watchdog_timeout_secs"
18 |   type="float"
19 |   default="5.0"
20 | >
21 |   Watchdog timer timeout.
22 | </ParamField>
23 | 
24 | <ParamField
25 |   path="enable_watchdog_logging"
26 |   type="bool"
27 |   default="False"
28 | >
29 |   Whether to print task processing times.
30 | </ParamField>
31 | 
32 | <ParamField
33 |   path="enable_watchdog_timers"
34 |   type="bool"
35 |   default="False"
36 | >
37 |   Whether to enable watchdog timers.
38 | </ParamField>
39 | 
40 | It is possible to configure watchdog timers individually for each `FrameProcessor`, using the same argument names in the constructor, or even per task when creating them with `FrameProcessor.create_task()`.
41 | 
42 | ## How It Works
43 | 
44 | Watchdog timers are always available for every created Pipecat task:
45 | 
46 | 1. We enable watchdog timers using `enable_watchdog_timers`.
47 | 2. A task is created with `FrameProcessor.create_task()` and its watchdog timer is started.
48 | 3. The task needs to periodically call `self.reset_watchdog()` to prevent the watchdog timer to expire
49 | 4. If the watchdog timer is not reset a warning will be logged
50 | 
51 | ## Usage Examples
52 | 
53 | ```python
54 | class MyFrameProcessor(FrameProcessor):
55 | 
56 |     async def process_frame(self, frame: Frame, direction: FrameDirection):
57 |         await super().process_frame(frame, direction)
58 |         ...
59 |         self._task = self.create_task(self._task_handler())
60 |         ...
61 | 
62 |     async def _task_handler(self):
63 |         while True:
64 |             item = await self._queue.get()
65 | 
66 |             ...
67 |             # Perform any processing
68 |             ...
69 |             self.reset_watchdog()
70 | ```
71 | 
72 | ## Notes
73 | 
74 | - Watchdog timers are disabled by default
75 | - Watchdog timers only work with Pipecat tasks (use `FrameProcessor.create_task()`)
76 | - Watchdog timers can be enabled and configure globally with `PipelineTask`, per `FrameProcessor` or per task
77 | 


--------------------------------------------------------------------------------
/snippets/snippet-intro.mdx:
--------------------------------------------------------------------------------
1 | One of the core principles of software development is DRY (Don't Repeat
2 | Yourself). This is a principle that apply to documentation as
3 | well. If you find yourself repeating the same content in multiple places, you
4 | should consider creating a custom snippet to keep your content in sync.
5 | 


--------------------------------------------------------------------------------