├── .gitignore
├── .parcelrc
├── .prettierrc
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── examples
    └── directToLLMTransports
    │   ├── README.md
    │   ├── README.md.bak
    │   ├── env.example
    │   ├── index.html
    │   ├── package-lock.json
    │   ├── package.json
    │   ├── src
    │       ├── app.ts
    │       ├── styles.css
    │       └── vite-env.d.ts
    │   └── tsconfig.json
├── lib
    ├── media-mgmt
    │   ├── dailyMediaManager.ts
    │   └── mediaManager.ts
    ├── wavtools
    │   ├── index.js
    │   └── lib
    │   │   ├── analysis
    │   │       ├── audio_analysis.js
    │   │       └── constants.js
    │   │   ├── mediastream_recorder.js
    │   │   ├── wav_packer.js
    │   │   ├── wav_recorder.js
    │   │   ├── wav_stream_player.js
    │   │   └── worklets
    │   │       ├── audio_processor.js
    │   │       └── stream_processor.js
    └── websocket-utils
    │   └── reconnectingWebSocket.ts
├── package-lock.json
├── package.json
└── transports
    ├── daily
        ├── CHANGELOG.md
        ├── LICENSE
        ├── README.md
        ├── package.json
        ├── src
        │   ├── index.ts
        │   └── transport.ts
        └── tsconfig.json
    ├── gemini-live-websocket-transport
        ├── LICENSE
        ├── README.md
        ├── package.json
        ├── src
        │   ├── directToLLMBaseWebSocketTransport.ts
        │   ├── geminiLiveWebSocketTransport.ts
        │   └── index.ts
        └── tsconfig.json
    ├── openai-realtime-webrtc-transport
        ├── LICENSE
        ├── README.md
        ├── package.json
        ├── src
        │   ├── OpenAIRealTimeWebRTCTransport.ts
        │   └── index.ts
        └── tsconfig.json
    ├── small-webrtc-transport
        ├── CHANGELOG.md
        ├── LICENSE
        ├── README.md
        ├── package.json
        ├── src
        │   ├── index.ts
        │   └── smallWebRTCTransport.ts
        └── tsconfig.json
    └── websocket-transport
        ├── LICENSE
        ├── README.md
        ├── package.json
        ├── proto
            ├── frames.proto
            └── generate_typescript.sh
        ├── src
            ├── generated
            │   └── proto
            │   │   └── frames.ts
            ├── index.ts
            └── webSocketTransport.ts
        └── tsconfig.json


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | pnpm-debug.log*
 8 | lerna-debug.log*
 9 | 
10 | node_modules
11 | dist
12 | dist-ssr
13 | *.local
14 | .parcel-cache
15 | 
16 | .env
17 | 
18 | # Editor directories and files
19 | .vscode/*
20 | !.vscode/extensions.json
21 | .idea
22 | .DS_Store
23 | *.suo
24 | *.ntvs*
25 | *.njsproj
26 | *.sln
27 | *.sw?
28 | 


--------------------------------------------------------------------------------
/.parcelrc:
--------------------------------------------------------------------------------
1 | {
2 |     "extends": "@parcel/config-default",
3 |     "transformers": {
4 |         "*.{ts,tsx}": [
5 |             "@parcel/transformer-typescript-tsc"
6 |         ]
7 |     }
8 | }


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |     "semi": true,
3 |     "tabWidth": 2,
4 |     "useTabs": false,
5 |     "singleQuote": false
6 | }


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | ## Contributing to Pipecat Client Web Trnsports
  2 | 
  3 | We welcome contributions of all kinds! Your help is appreciated. Follow these steps to get involved:
  4 | 
  5 | 1. **Fork this repository**: Start by forking the Pipecat Client Web Transports repository to your GitHub account.
  6 | 
  7 | 2. **Clone the repository**: Clone your forked repository to your local machine.
  8 |    ```bash
  9 |    git clone https://github.com/your-username/pipecat-client-web-transports
 10 |    ```
 11 | 3. **Create a branch**: For your contribution, create a new branch.
 12 |    ```bash
 13 |    git checkout -b your-branch-name
 14 |    ```
 15 | 4. **Make your changes**: Edit or add files as necessary.
 16 | 5. **Test your changes**: Ensure that your changes look correct and follow the style set in the codebase.
 17 | 6. **Commit your changes**: Once you're satisfied with your changes, commit them with a meaningful message.
 18 | 
 19 | ```bash
 20 | git commit -m "Description of your changes"
 21 | ```
 22 | 
 23 | 7. **Push your changes**: Push your branch to your forked repository.
 24 | 
 25 | ```bash
 26 | git push origin your-branch-name
 27 | ```
 28 | 
 29 | 9. **Submit a Pull Request (PR)**: Open a PR from your forked repository to the main branch of this repo.
 30 |    > Important: Describe the changes you've made clearly!
 31 | 
 32 | Our maintainers will review your PR, and once everything is good, your contributions will be merged!
 33 | 
 34 | # Contributor Covenant Code of Conduct
 35 | 
 36 | ## Our Pledge
 37 | 
 38 | We as members, contributors, and leaders pledge to make participation in our
 39 | community a harassment-free experience for everyone, regardless of age, body
 40 | size, visible or invisible disability, ethnicity, sex characteristics, gender
 41 | identity and expression, level of experience, education, socio-economic status,
 42 | nationality, personal appearance, race, caste, color, religion, or sexual
 43 | identity and orientation.
 44 | 
 45 | We pledge to act and interact in ways that contribute to an open, welcoming,
 46 | diverse, inclusive, and healthy community.
 47 | 
 48 | ## Our Standards
 49 | 
 50 | Examples of behavior that contributes to a positive environment for our
 51 | community include:
 52 | 
 53 | - Demonstrating empathy and kindness toward other people
 54 | - Being respectful of differing opinions, viewpoints, and experiences
 55 | - Giving and gracefully accepting constructive feedback
 56 | - Accepting responsibility and apologizing to those affected by our mistakes,
 57 |   and learning from the experience
 58 | - Focusing on what is best not just for us as individuals, but for the overall
 59 |   community
 60 | 
 61 | Examples of unacceptable behavior include:
 62 | 
 63 | - The use of sexualized language or imagery, and sexual attention or advances of
 64 |   any kind
 65 | - Trolling, insulting or derogatory comments, and personal or political attacks
 66 | - Public or private harassment
 67 | - Publishing others' private information, such as a physical or email address,
 68 |   without their explicit permission
 69 | - Other conduct which could reasonably be considered inappropriate in a
 70 |   professional setting
 71 | 
 72 | ## Enforcement Responsibilities
 73 | 
 74 | Community leaders are responsible for clarifying and enforcing our standards of
 75 | acceptable behavior and will take appropriate and fair corrective action in
 76 | response to any behavior that they deem inappropriate, threatening, offensive,
 77 | or harmful.
 78 | 
 79 | Community leaders have the right and responsibility to remove, edit, or reject
 80 | comments, commits, code, wiki edits, issues, and other contributions that are
 81 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 82 | decisions when appropriate.
 83 | 
 84 | ## Scope
 85 | 
 86 | This Code of Conduct applies within all community spaces, and also applies when
 87 | an individual is officially representing the community in public spaces.
 88 | Examples of representing our community include using an official email address,
 89 | posting via an official social media account, or acting as an appointed
 90 | representative at an online or offline event.
 91 | 
 92 | ## Enforcement
 93 | 
 94 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 95 | reported to the community leaders responsible for enforcement at pipecat-ai@daily.co.
 96 | All complaints will be reviewed and investigated promptly and fairly.
 97 | 
 98 | All community leaders are obligated to respect the privacy and security of the
 99 | reporter of any incident.
100 | 
101 | ## Enforcement Guidelines
102 | 
103 | Community leaders will follow these Community Impact Guidelines in determining
104 | the consequences for any action they deem in violation of this Code of Conduct:
105 | 
106 | ### 1. Correction
107 | 
108 | **Community Impact**: Use of inappropriate language or other behavior deemed
109 | unprofessional or unwelcome in the community.
110 | 
111 | **Consequence**: A private, written warning from community leaders, providing
112 | clarity around the nature of the violation and an explanation of why the
113 | behavior was inappropriate. A public apology may be requested.
114 | 
115 | ### 2. Warning
116 | 
117 | **Community Impact**: A violation through a single incident or series of
118 | actions.
119 | 
120 | **Consequence**: A warning with consequences for continued behavior. No
121 | interaction with the people involved, including unsolicited interaction with
122 | those enforcing the Code of Conduct, for a specified period of time. This
123 | includes avoiding interactions in community spaces as well as external channels
124 | like social media. Violating these terms may lead to a temporary or permanent
125 | ban.
126 | 
127 | ### 3. Temporary Ban
128 | 
129 | **Community Impact**: A serious violation of community standards, including
130 | sustained inappropriate behavior.
131 | 
132 | **Consequence**: A temporary ban from any sort of interaction or public
133 | communication with the community for a specified period of time. No public or
134 | private interaction with the people involved, including unsolicited interaction
135 | with those enforcing the Code of Conduct, is allowed during this period.
136 | Violating these terms may lead to a permanent ban.
137 | 
138 | ### 4. Permanent Ban
139 | 
140 | **Community Impact**: Demonstrating a pattern of violation of community
141 | standards, including sustained inappropriate behavior, harassment of an
142 | individual, or aggression toward or disparagement of classes of individuals.
143 | 
144 | **Consequence**: A permanent ban from any sort of public interaction within the
145 | community.
146 | 
147 | ## Attribution
148 | 
149 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
150 | version 2.1, available at
151 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
152 | 
153 | Community Impact Guidelines were inspired by
154 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
155 | 
156 | For answers to common questions about this code of conduct, see the FAQ at
157 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
158 | [https://www.contributor-covenant.org/translations][translations].
159 | 
160 | [homepage]: https://www.contributor-covenant.org
161 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
162 | [Mozilla CoC]: https://github.com/mozilla/diversity
163 | [FAQ]: https://www.contributor-covenant.org/faq
164 | [translations]: https://www.contributor-covenant.org/translations
165 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2024, Daily
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Pipecat Client Web Transports
  2 | 
  3 | [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai/client/js/transports/transport)
  4 | [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat)
  5 | 
  6 | A mono-repo to house the various supported Transport options to be used with the pipecat-client-web library. Currently, there are four transports: `small-webrtc-transport`, `daily-transport`, `gemini-live-websocket-transport`, and `openai-realtime-webrtc-transport`.
  7 | 
  8 | ## Documentation
  9 | 
 10 | Pipecat Transports are intended to be used in conjunction with a Pipecat web client. Please refer to the full Pipecat client documentation [here](https://docs.pipecat.ai/client/introduction) and an overview of the [Transport API here](https://docs.pipecat.ai/client/js/transports/transport)
 11 | 
 12 | ## Current Transports
 13 | 
 14 | ### [SmallWebRTCTransport](/transports/small-webrtc-transport/README.md)
 15 | 
 16 | [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai/client/js/transports/small-webrtc)
 17 | [![README](https://img.shields.io/badge/README-goldenrod)](/transports/small-webrtc-transport/README.md)
 18 | [![Demo](https://img.shields.io/badge/Demo-forestgreen)](https://github.com/pipecat-ai/pipecat/tree/main/examples/p2p-webrtc)
 19 | ![NPM Version](https://img.shields.io/npm/v/@pipecat-ai/small-webrtc-transport)
 20 | 
 21 | This Transport creates a peer-to-peer WebRTC connection between the client and the bot process. This Transport is the client-side counterpart to the Pipecat [SmallWebRTCTransport component](https://docs.pipecat.ai/server/services/transport/small-webrtc).
 22 | 
 23 | This is the simplest low-latency audio/video transport for Pipecat. This transport is recommended for local development and demos. Things to be aware of:
 24 |   - This transport is a direct connection between the client and the bot process. If you need multiple clients to connect to the same bot, you will need to use a different transport.
 25 |   - For production usage at scale, a distributed WebRTC network that can do edge/mesh routing, has session-level observability and metrics, and can offload recording and other auxiliary services is often useful.
 26 | 
 27 | Typical media flow using a SmallWebRTCTransport:
 28 | ```
 29 |                                             ┌──────────────────────────────────────────────────┐ 
 30 |                                             │                                                  │ 
 31 |  ┌─────────────────────────┐                │                       Server       ┌─────────┐   │ 
 32 |  │                         │                │                                    │Pipecat  │   │ 
 33 |  │            Client       │  RTVI Messages │                                    │Pipeline │   │ 
 34 |  │                         │       &        │                                              │   │ 
 35 |  │ ┌────────────────────┐  │  WebRTC Media  │  ┌────────────────────┐    media   │ ┌─────┐ │   │ 
 36 |  │ │SmallWebRTCTransport│◄─┼────────────────┼─►│SmallWebRTCTransport┼────────────┼─► STT │ │   │ 
 37 |  │ └────────────────────┘  │                │  └───────▲────────────┘     in     │ └──┬──┘ │   │ 
 38 |  │                         │                │          │                         │    │    │   │ 
 39 |  └─────────────────────────┘                │          │                         │ ┌──▼──┐ │   │ 
 40 |                                             │          │                         │ │ LLM │ │   │ 
 41 |                                             │          │                         │ └──┬──┘ │   │ 
 42 |                                             │          │                         │    │    │   │ 
 43 |                                             │          │                         │ ┌──▼──┐ │   │ 
 44 |                                             │          │           media         │ │ TTS │ │   │ 
 45 |                                             │          └─────────────────────────┼─┴─────┘ │   │ 
 46 |                                             │                       out          └─────────┘   │ 
 47 |                                             │                                                  │ 
 48 |                                             └──────────────────────────────────────────────────┘ 
 49 | ```
 50 | 
 51 | ### [DailyTransport](/transports/daily/README.md)
 52 | 
 53 | [![Docs](https://img.shields.io/badge/Documention-blue)](https://docs.pipecat.ai/client/js/transports/daily)
 54 | [![README](https://img.shields.io/badge/README-goldenrod)](/transports/daily/README.md)
 55 | [![Demo](https://img.shields.io/badge/Demo-forestgreen)](https://github.com/pipecat-ai/pipecat/tree/main/examples/simple-chatbot)
 56 | ![NPM Version](https://img.shields.io/npm/v/@pipecat-ai/daily-transport)
 57 | 
 58 | This Transport uses the [Daily](https://daily.co) audio and video calling service to connect to a bot and stream media over a WebRTC connection. This Transport is the client-side counterpart to the Pipecat [DailyTransport component](https://docs.pipecat.ai/server/services/transport/daily).
 59 | 
 60 | Typical media flow using a DailyTransport:
 61 | ```
 62 |                                                                                        
 63 |                                        ┌────────────────────────────────────────────┐  
 64 |                                        │                                            │  
 65 |   ┌───────────────────┐                │                 Server       ┌─────────┐   │  
 66 |   │                   │                │                              │Pipecat  │   │  
 67 |   │      Client       │  RTVI Messages │                              │Pipeline │   │  
 68 |   │                   │       &        │                              │         │   │  
 69 |   │ ┌──────────────┐  │  WebRTC Media  │  ┌──────────────┐    media   │ ┌─────┐ │   │  
 70 |   │ │DailyTransport│◄─┼────────────────┼─►│DailyTransport┼────────────┼─► STT │ │   │  
 71 |   │ └──────────────┘  │                │  └───────▲──────┘     in     │ └──┬──┘ │   │  
 72 |   │                   │                │          │                   │    │    │   │  
 73 |   └───────────────────┘                │          │                   │ ┌──▼──┐ │   │  
 74 |                                        │          │                   │ │ LLM │ │   │  
 75 |                                        │          │                   │ └──┬──┘ │   │  
 76 |                                        │          │                   │    │    │   │  
 77 |                                        │          │                   │ ┌──▼──┐ │   │  
 78 |                                        │          │     media         │ │ TTS │ │   │  
 79 |                                        │          └───────────────────┼─┴─────┘ │   │  
 80 |                                        │                 out          └─────────┘   │  
 81 |                                        │                                            │  
 82 |                                        └────────────────────────────────────────────┘  
 83 |                                                                                        
 84 | ```
 85 | 
 86 | ### [GeminiLiveWebSocketTransport](transports/gemini-live-websocket-transport/README.md)
 87 | [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai/client/js/transports/gemini)
 88 | [![README](https://img.shields.io/badge/README-goldenrod)](transports/gemini-live-websocket-transport/README.md)
 89 | [![Demo](https://img.shields.io/badge/Demo-forestgreen)](examples/directToLLMTransports/README.md)
 90 | ![NPM Version](https://img.shields.io/npm/v/@pipecat-ai/gemini-live-websocket-transport)
 91 | 
 92 | This Transport extends the [RealTimeWebSocketTransport](transports/realtime-websocket-transport/README) and connects directly to Gemini over a WebSocket connection using the Multimodal Live API. This type of transport is great for testing different services out without the need to build a server component. Just be aware that it is insecure since you will need to have access to your Gemini API Key client-side so not probably something you want to use in your production app.
 93 | 
 94 | Media flow using a GeminiLiveWebSocketTransport:
 95 | ```
 96 |                 Client                                      Server        
 97 |   ┌────────────────────────────────────┐                                  
 98 |   │                                    │                                  
 99 |   │            RTVIClient              │                ┌──────────────┐  
100 |   │                                    │    Media over  │              │  
101 |   │  ┌──────────────────────────────┐  │    WebSocket   │    Gemini    │  
102 |   │  │ GeminiLiveWebSocketTransport │◄─┼────────────────┼─►  Server    │  
103 |   │  └──────────────────────────────┘  │                │              │  
104 |   │                                    │                └──────────────┘  
105 |   └────────────────────────────────────┘                                  
106 | ```
107 | 
108 | ### [OpenAIRealTimeWebRTCTransport](transports/gemini-live-websocket-transport/README.md)
109 | [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai/client/js/transports/openai-webrtc)
110 | [![README](https://img.shields.io/badge/README-goldenrod)](transports/openai-realtime-webrtc-transport/README.md)
111 | [![Demo](https://img.shields.io/badge/Demo-forestgreen)](examples/directToLLMTransports/README.md)
112 | ![NPM Version](https://img.shields.io/npm/v/@pipecat-ai/openai-realtime-webrtc-transport)
113 | 
114 | This Transport connects directly to OpenAI over a WebRTC connection using the RealTime API. This type of transport is great for testing different services out without the need to build a server component. Just be aware that it is insecure since you will need to have access to your OpenAI API Key client-side so not probably something you want to use in your production app. It does not implement the Ephemeral Token process.
115 | 
116 | Media flow using a OpenAIRealTimeWebRTCTransport:
117 | ```
118 |                 Client                                      Server        
119 |   ┌─────────────────────────────────────┐                                  
120 |   │                                     │                                  
121 |   │            RTVIClient               │                ┌──────────────┐  
122 |   │                                     │    Media over  │              │  
123 |   │  ┌───────────────────────────────┐  │      WebRTC    │    OpenAI    │  
124 |   │  │ OpenAIRealTimeWebRTCTransport │◄─┼────────────────┼─►  Server    │  
125 |   │  └───────────────────────────────┘  │                │              │  
126 |   │                                     │                └──────────────┘  
127 |   └─────────────────────────────────────┘                                  
128 | ```
129 | 
130 | ## Local Development
131 | 
132 | ### Build the transport libraries
133 | 
134 | ```bash
135 | $ npm i
136 | $ npm run build
137 | ```
138 | 
139 | ## License
140 | BSD-2 Clause
141 | 
142 | ## Contributing
143 | We welcome contributions from the community! Whether you're fixing bugs, improving documentation, or adding new features, here's how you can help:
144 | 
145 | - **Found a bug?** Open an [issue](https://github.com/pipecat-ai/pipecat-client-web-transports/issues)
146 | - **Have a feature idea?** Start a [discussion](https://discord.gg/pipecat)
147 | - **Want to contribute code?** Check our [CONTRIBUTING.md](CONTRIBUTING.md) guide
148 | - **Documentation improvements?** [Docs](https://github.com/pipecat-ai/docs) PRs are always welcome
149 | 
150 | Before submitting a pull request, please check existing issues and PRs to avoid duplicates.
151 | 
152 | We aim to review all contributions promptly and provide constructive feedback to help get your changes merged.


--------------------------------------------------------------------------------
/examples/directToLLMTransports/README.md:
--------------------------------------------------------------------------------
  1 | # Pipecat (RTVI) Client Demo for Direct Communication with LLMs
  2 | 
  3 | ## Overview
  4 | This application demonstrates a real-time voice interaction system using the RTVI Client library with both the Gemini Multimodal Live and OpenAI RealTime WebRTC integrations. It enables two-way communication between users and the LLM, featuring voice input/output, text messaging, and various audio controls.
  5 | 
  6 | ## Features
  7 | - Real-time voice interaction with a Gemini Multimodal Live bot
  8 | - Real-time voice interaction with an OpenAI RealTime bot
  9 | - Microphone input control and device selection
 10 | - Text-based message prompting
 11 | - Audio visualization through dynamic speech bubbles
 12 | - Comprehensive event handling system
 13 | - Connection state management
 14 | 
 15 | ## Prerequisites
 16 | - Gemini API key (set as environment variable `VITE_DANGEROUS_GEMINI_API_KEY`)
 17 | - OpenAI API key (set as environment variable `VITE_DANGEROUS_OPENAI_API_KEY`)
 18 | - Optional [OpenWeather API](https://openweathermap.org/api) key for fetching weather. If none is provided, the app will generate something random.
 19 | - Modern web browser with WebSocket support
 20 | - Access to microphone
 21 | 
 22 | ## Dependencies
 23 | ```
 24 | # from base folder
 25 | $ npm i
 26 | $ npm run build
 27 | ```
 28 | 
 29 | 
 30 | ## Setup and Installation
 31 | ```
 32 | npm i
 33 | npm run dev
 34 | 
 35 | cp env.example .env
 36 | # update .env with API keys
 37 | ```
 38 | 
 39 | ### To run the example with Gemini MultiModal Live:
 40 | 
 41 | Open [http://localhost:5173/](http://localhost:5173/)
 42 | 
 43 | ### To run the example with OpenAI RealTime:
 44 | 
 45 | Open [http://localhost:5173?service=openai](http://localhost:5173?service=openai)
 46 | 
 47 | ## Documentation Reference
 48 | [RTVI Client Documentation](https://docs.pipecat.ai/client/introduction)
 49 | [Gemini Multimodal Live Documentation](https://ai.google.dev/api/multimodal-live)
 50 | [OpenAI RealTime WebRTC Documentation](https://platform.openai.com/docs/guides/realtime-webrtc)
 51 | 
 52 | ## Usage
 53 | 
 54 | ### Initialization
 55 | The application automatically initializes when the DOM content is loaded. It sets up:
 56 | - Audio device selection
 57 | - Microphone controls
 58 | - Bot connection management
 59 | - Event handlers
 60 | 
 61 | ### Controls
 62 | - **Toggle Bot**: Connect/disconnect the AI assistant
 63 | - **Mute/Unmute**: Control microphone input
 64 | - **Microphone Selection**: Choose input device
 65 | - **Text Input**: Send text messages to the bot
 66 | 
 67 | ### Event Handling
 68 | The application handles various events including:
 69 | - Transport state changes
 70 | - Bot connection status
 71 | - Audio track management
 72 | - Speech detection
 73 | - Error handling
 74 | - Audio level visualization
 75 | 
 76 | ## Key Components
 77 | 
 78 | ### RTVIClient Configuration
 79 | ```typescript
 80 | let RTVIConfig: RTVIClientOptions = {
 81 |   transport,
 82 |   params: {
 83 |     baseUrl: "api",
 84 |     requestData: { },
 85 |   },
 86 |   enableMic: true,
 87 |   enableCam: false,
 88 |   timeout: 30 * 1000,
 89 | };
 90 | ```
 91 | 
 92 | ### Gemini Multimodal Live Service Configuration
 93 | ```typescript
 94 | const llm_service_options: GeminiLLMServiceOptions = {
 95 |   api_key: process.env.VITE_DANGEROUS_GEMINI_API_KEY,
 96 |   model: "models/gemini-2.0-flash-exp",
 97 |   // ... additional configuration
 98 | };
 99 | ```
100 | 
101 | For all service options and their defaults, see [GeminiLLMServiceOptions](../../transports/gemini-live-websocket-transport/src/geminiLiveWebSocketTransport.ts#21)
102 | 
103 | ### OpenAI Realtime API Service Configuration
104 | ```typescript
105 | const llm_service_options: OpenAIServiceOptions = {
106 |   api_key: import.meta.env.VITE_DANGEROUS_OPENAI_API_KEY,
107 |   // ... additional configuration
108 | };
109 | ```
110 | 
111 | For all service options and their defaults, see [OpenAIServiceOptions](../../transports/openai-realtime-webrtc-transport/src/OpenAIRealTimeWebRTCTransport.ts#28)
112 | 
113 | ## Notes
114 | - Gemini integration currently does not support transcripts
115 | 
116 | ## License
117 | BSD-2 Clause
118 | 


--------------------------------------------------------------------------------
/examples/directToLLMTransports/README.md.bak:
--------------------------------------------------------------------------------
 1 | 
 2 | # Pipecat JavaScript Client SDK Example using a Gemini MultiModal Live Transport
 3 | 
 4 | ## Setup
 5 | 
 6 | Build transport dependencies
 7 | 
 8 | ```
 9 | # from base folder
10 | $ yarn
11 | $ yarn workspace @pipecat-ai/realtime-websocket-transport build
12 | $ yarn workspace @pipecat-ai/gemini-live-websocket-transport
13 | ```
14 | 
15 | ## Install and run locally
16 | 
17 | ```
18 | npm i
19 | npm run dev
20 | 
21 | cp env.example .env
22 | # update .env with API keys
23 | ```
24 | 
25 | Open [http://localhost:5173/](http://localhost:5173/)
26 | 
27 | ## Demo code
28 | 
29 | This is bare-bones LLM voice chat app that sets up an [RTVI Client](https://github.com/pipecat-ai/pipecat-client-web)(Pipecat's client-side JS component) with a [GeminiLiveWebsocketTransport](../../transports/gemini-live-websocket-transport/src/geminiLiveWebSocketTransport.ts) to communicate directly with Google's Multimodal Live API over a websocket connection.
30 | 
31 | The application code is all in two files:
32 | 
33 |   - [index.html](./index.html)
34 |   - [src/app.ts](./src/app.ts)
35 | 


--------------------------------------------------------------------------------
/examples/directToLLMTransports/env.example:
--------------------------------------------------------------------------------
1 | VITE_DANGEROUS_GEMINI_API_KEY=
2 | VITE_DANGEROUS_OPENAI_API_KEY=
3 | VITE_DANGEROUS_OPENWEATHER_API_KEY=


--------------------------------------------------------------------------------
/examples/directToLLMTransports/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <title>My Web App</title>
 5 |     <link rel="stylesheet" href="src/styles.css" />
 6 |     <script src="src/app.ts" type="module"></script>
 7 |   </head>
 8 | 
 9 |   <body>
10 |     <div id="connected-div">
11 |       <div id="status">Transport state: disconnected</div>
12 |       <div id="controls">
13 |         <button id="toggleBot">Connect</button>
14 |         <button id="toggleMute">Mute Mic</button>
15 |         <div>
16 |           <label for="microphone">Microphone:</label>
17 |           <select name="microphone" id="mic-picker"></select>
18 |         </div>
19 |       </div>
20 |       <div id="text-div">
21 |         <label for="text-input">Enter text:</label>
22 |         <textarea
23 |           id="text-input"
24 |           name="text-input"
25 |           rows="4"
26 |           cols="50"
27 |         ></textarea>
28 |         <button id="submit-text">Submit</button>
29 |       </div>
30 |       <div id="bubbles">
31 |         <div id="user-bubble" class="bubble"></div>
32 |         <div id="bot-bubble" class="bubble"></div>
33 |       </div>
34 |     </div>
35 | 
36 |     <div id="audio"></div>
37 |   </body>
38 | </html>
39 | 


--------------------------------------------------------------------------------
/examples/directToLLMTransports/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "1116",
 3 |   "version": "1.0.0",
 4 |   "main": "server/server.ts",
 5 |   "scripts": {
 6 |     "dev": "vite",
 7 |     "build": "vite build",
 8 |     "preview": "vite preview"
 9 |   },
10 |   "keywords": [],
11 |   "author": "",
12 |   "license": "BSD-2-Clause",
13 |   "description": "",
14 |   "dependencies": {
15 |     "@pipecat-ai/client-js": "^0.3.5",
16 |     "@pipecat-ai/gemini-live-websocket-transport": "file:../../transports/gemini-live-websocket-transport",
17 |     "@pipecat-ai/openai-realtime-webrtc-transport": "file:../../transports/openai-realtime-webrtc-transport",
18 |     "dotenv": "^16.4.5",
19 |     "express": "^4.21.1",
20 |     "morgan": "^1.10.0"
21 |   },
22 |   "devDependencies": {
23 |     "@types/express": "^5.0.0",
24 |     "@types/morgan": "^1.9.9",
25 |     "@types/node": "^22.9.0",
26 |     "concurrently": "^9.1.0",
27 |     "eslint": "^9.15.0",
28 |     "nodemon": "^3.1.7",
29 |     "ts-node": "^10.9.2",
30 |     "ts-node-dev": "^2.0.0",
31 |     "typescript": "^5.6.3",
32 |     "vite": "^5.4.11"
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/examples/directToLLMTransports/src/app.ts:
--------------------------------------------------------------------------------
  1 | // Import Types for Gemini Transport
  2 | import {
  3 |   GeminiLiveWebsocketTransport,
  4 |   GeminiLLMServiceOptions,
  5 | } from "@pipecat-ai/gemini-live-websocket-transport";
  6 | 
  7 | import {
  8 |   OpenAIRealTimeWebRTCTransport,
  9 |   OpenAIServiceOptions,
 10 | } from "@pipecat-ai/openai-realtime-webrtc-transport";
 11 | 
 12 | // Import core Pipecat RTVI Client and types
 13 | import {
 14 |   LLMHelper,
 15 |   FunctionCallParams,
 16 |   Transport,
 17 |   RTVIClient,
 18 |   RTVIEvent,
 19 |   RTVIMessage,
 20 |   Participant,
 21 |   TranscriptData,
 22 |   BotTTSTextData,
 23 |   RTVIClientOptions,
 24 | } from "@pipecat-ai/client-js";
 25 | 
 26 | // Global variables for DOM elements and client state
 27 | let statusDiv: HTMLElement;
 28 | let audioDiv: HTMLDivElement;
 29 | let toggleBotButton: HTMLButtonElement;
 30 | let submitBtn: HTMLButtonElement;
 31 | let rtviClient: RTVIClient;
 32 | let llmHelper: LLMHelper;
 33 | let botRunning = false;
 34 | 
 35 | // Initialize the application when DOM is fully loaded
 36 | document.addEventListener("DOMContentLoaded", () => {
 37 |   statusDiv = document.getElementById("status")!;
 38 |   toggleBotButton = document.getElementById("toggleBot")! as HTMLButtonElement;
 39 |   toggleBotButton.addEventListener("click", () => toggleBot());
 40 | 
 41 |   // Handle microphone device selection
 42 |   document.getElementById("mic-picker")!.onchange = (e) => {
 43 |     const target = e.target as HTMLSelectElement;
 44 |     console.log("user changed device", target, target.value);
 45 |     rtviClient.updateMic(target.value);
 46 |   };
 47 | 
 48 |   // Set up mute button functionality
 49 |   const muteBtn = document.getElementById("toggleMute")!;
 50 |   muteBtn.addEventListener("click", () => {
 51 |     muteBtn.textContent = rtviClient.isMicEnabled ? "Unmute Mic" : "Mute Mic";
 52 |     rtviClient.enableMic(!rtviClient.isMicEnabled);
 53 |   });
 54 | 
 55 |   // Set up text submission button
 56 |   submitBtn = document.getElementById("submit-text")! as HTMLButtonElement;
 57 |   submitBtn.addEventListener("click", () => {
 58 |     sendUserMessage();
 59 |   });
 60 |   submitBtn.disabled = true;
 61 | 
 62 |   // Initialize the bot
 63 |   initBot();
 64 | });
 65 | 
 66 | // Connect / Disconnect from bot
 67 | async function toggleBot() {
 68 |   toggleBotButton.disabled = true;
 69 |   if (botRunning) {
 70 |     console.log("disconnecting bot");
 71 |     await disconnectBot();
 72 |   } else {
 73 |     console.log("connecting bot");
 74 |     await connectBot();
 75 |   }
 76 |   toggleBotButton.textContent = botRunning ? "Disconnect" : "Connect";
 77 | }
 78 | 
 79 | // Initialize the bot with configuration
 80 | async function initBot() {
 81 |   const urlParams = new URLSearchParams(window.location.search);
 82 |   const service = urlParams.get("service") || "gemini";
 83 |   const { transport, service_options } =
 84 |     service === "gemini" ? initGeminiTransport() : initOpenAITransport();
 85 | 
 86 |   // Configure RTVI client options
 87 |   let RTVIConfig: RTVIClientOptions = {
 88 |     transport,
 89 |     params: {
 90 |       baseUrl: "api",
 91 |       requestData: { service_options },
 92 |     },
 93 |     enableMic: true,
 94 |     enableCam: false,
 95 |     timeout: 30 * 1000,
 96 |   };
 97 |   RTVIConfig.customConnectHandler = () => Promise.resolve();
 98 | 
 99 |   // Create new RTVI client instance
100 |   rtviClient = new RTVIClient(RTVIConfig);
101 |   llmHelper = new LLMHelper({});
102 |   llmHelper.handleFunctionCall(async (fn: FunctionCallParams) => {
103 |     return await handleFunctionCall(fn.functionName, fn.arguments);
104 |   });
105 |   rtviClient.registerHelper(service, llmHelper);
106 | 
107 |   // Make RTVI client and transport available globally for debugging
108 |   (window as any).client = rtviClient;
109 | 
110 |   // Set up RTVI event handlers and initialize devices
111 |   setupEventHandlers(rtviClient);
112 |   await setupDevices();
113 | }
114 | 
115 | // Initialize the Gemini LLM and its service options
116 | function initGeminiTransport() {
117 |   // Configure Gemini LLM service options
118 |   const llm_service_options: GeminiLLMServiceOptions = {
119 |     api_key: import.meta.env.VITE_DANGEROUS_GEMINI_API_KEY,
120 |     model: "models/gemini-2.0-flash-exp",
121 |     initial_messages: [
122 |       // Set up initial system and user messages.
123 |       // Without the user message, the bot will not respond immediately
124 |       // and wait for the user to speak first.
125 |       {
126 |         role: "model",
127 |         content: "You are a pencil salesman...",
128 |       },
129 |       { role: "user", content: "Hello!" },
130 |     ],
131 |     settings: {
132 |       speech_config: {
133 |         voice_config: {
134 |           prebuilt_voice_config: {
135 |             // Options are: "Puck" | "Charon" | "Kore" | "Fenrir" | "Aoede"
136 |             voice_name: "Charon",
137 |           },
138 |         },
139 |       },
140 |     },
141 |   };
142 | 
143 |   // Initialize transport
144 |   let transport: Transport = new GeminiLiveWebsocketTransport(
145 |     llm_service_options
146 |   );
147 | 
148 |   return { transport, service_options: llm_service_options };
149 | }
150 | 
151 | function initOpenAITransport() {
152 |   // Configure OpenAI LLM service options
153 |   const llm_service_options: OpenAIServiceOptions = {
154 |     api_key: import.meta.env.VITE_DANGEROUS_OPENAI_API_KEY,
155 |     settings: {
156 |       instructions: "You are a pirate. You are looking for buried treasure.",
157 |       voice: "echo",
158 |       input_audio_noise_reduction: { type: "near_field" },
159 |       turn_detection: { type: "semantic_vad" },
160 |       tools: [
161 |         {
162 |           type: "function",
163 |           name: "changeBackgroundColor",
164 |           description: "Change the background color of the page",
165 |           parameters: {
166 |             type: "object",
167 |             properties: {
168 |               color: {
169 |                 type: "string",
170 |                 description: "A hex value of the color",
171 |               },
172 |             },
173 |           },
174 |         },
175 |         {
176 |           type: "function",
177 |           name: "getWeather",
178 |           description: "Gets the current weather for a given location",
179 |           parameters: {
180 |             type: "object",
181 |             properties: {
182 |               location: {
183 |                 type: "string",
184 |                 description: "A city or location",
185 |               },
186 |             },
187 |           },
188 |         },
189 |       ],
190 |     },
191 |     initial_messages: [{ role: "user", content: "Hello" }],
192 |   };
193 | 
194 |   // Initialize transport
195 |   let transport: Transport = new OpenAIRealTimeWebRTCTransport(
196 |     llm_service_options
197 |   );
198 | 
199 |   return { transport, service_options: llm_service_options };
200 | }
201 | 
202 | // Initialize and update available audio devices
203 | async function setupDevices() {
204 |   await rtviClient.initDevices();
205 |   const mics = await rtviClient.getAllMics();
206 |   updateMicList(mics);
207 | }
208 | 
209 | // Updates the microphone selection dropdown
210 | function updateMicList(mics: MediaDeviceInfo[]) {
211 |   const micPicker = document.getElementById("mic-picker")!;
212 |   micPicker.replaceChildren();
213 |   const curMic = rtviClient.selectedMic?.deviceId;
214 |   mics.forEach((mic) => {
215 |     let el = document.createElement("option");
216 |     el.textContent = mic.label;
217 |     el.value = mic.deviceId;
218 |     micPicker.appendChild(el);
219 |     if (mic.deviceId === curMic) {
220 |       el.selected = true;
221 |     }
222 |   });
223 | }
224 | 
225 | // Connect client to Gemini Multimodal Live bot
226 | async function connectBot() {
227 |   statusDiv.textContent = "Joining...";
228 |   try {
229 |     await rtviClient.connect();
230 |     console.log("READY! Let's GO!");
231 |   } catch (e) {
232 |     console.error("Error connecting", e);
233 |     toggleBotButton.disabled = false;
234 |     return;
235 |   }
236 |   toggleBotButton.disabled = false;
237 |   submitBtn.disabled = false;
238 |   botRunning = true;
239 | }
240 | 
241 | // Disconnect client from Gemini Multimodal Live bot
242 | async function disconnectBot() {
243 |   try {
244 |     await rtviClient.disconnect();
245 |   } catch (e) {
246 |     console.error("Error disconnecting", e);
247 |   }
248 |   toggleBotButton.disabled = false;
249 |   submitBtn.disabled = true;
250 |   botRunning = false;
251 | }
252 | 
253 | // Set up event handlers for RTVI client
254 | // https://docs.pipecat.ai/client/js/api-reference/callbacks#2-event-listeners
255 | export async function setupEventHandlers(rtviClient: RTVIClient) {
256 |   audioDiv = document.getElementById("audio") as HTMLDivElement;
257 | 
258 |   rtviClient.on(RTVIEvent.TransportStateChanged, (state: string) => {
259 |     console.log(`-- transport state change: ${state} --`);
260 |     statusDiv.textContent = `Transport state: ${state}`;
261 |     if (state === "disconnected") {
262 |       botRunning = false;
263 |       toggleBotButton.textContent = "Connect";
264 |     }
265 |   });
266 | 
267 |   rtviClient.on(RTVIEvent.Connected, () => {
268 |     console.log("-- user connected --");
269 |   });
270 | 
271 |   rtviClient.on(RTVIEvent.Disconnected, () => {
272 |     console.log("-- user disconnected --");
273 |   });
274 | 
275 |   rtviClient.on(RTVIEvent.BotConnected, () => {
276 |     console.log("-- bot connected --");
277 |   });
278 | 
279 |   rtviClient.on(RTVIEvent.BotDisconnected, () => {
280 |     console.log("--bot disconnected --");
281 |   });
282 | 
283 |   rtviClient.on(RTVIEvent.BotReady, () => {
284 |     console.log("-- bot ready to chat! --");
285 |   });
286 | 
287 |   // For realtime v2v transports, this event will only fire for the
288 |   // local participant.
289 |   rtviClient.on(
290 |     RTVIEvent.TrackStarted,
291 |     (track: MediaStreamTrack, participant?: Participant) => {
292 |       console.log(" --> track started", participant, track);
293 |       if (participant?.local) {
294 |         return;
295 |       }
296 |       let audio = document.createElement("audio");
297 |       audio.srcObject = new MediaStream([track]);
298 |       audio.autoplay = true;
299 |       audioDiv.appendChild(audio);
300 |     }
301 |   );
302 | 
303 |   // For realtime v2v transports, this event will only fire for the
304 |   // local participant.
305 |   rtviClient.on(
306 |     RTVIEvent.TrackStopped,
307 |     (track: MediaStreamTrack, participant?: Participant) => {
308 |       console.log(" --> track stopped", participant, track);
309 |     }
310 |   );
311 | 
312 |   rtviClient.on(RTVIEvent.UserStartedSpeaking, () => {
313 |     console.log("-- user started speaking -- ");
314 |   });
315 | 
316 |   rtviClient.on(RTVIEvent.UserStoppedSpeaking, () => {
317 |     console.log("-- user stopped speaking -- ");
318 |   });
319 | 
320 |   rtviClient.on(RTVIEvent.BotStartedSpeaking, () => {
321 |     console.log("-- bot started speaking -- ");
322 |   });
323 | 
324 |   rtviClient.on(RTVIEvent.BotStoppedSpeaking, () => {
325 |     console.log("-- bot stopped speaking -- ");
326 |   });
327 | 
328 |   // multimodal live does not currently provide transcripts so this will not fire
329 |   rtviClient.on(RTVIEvent.UserTranscript, (transcript: TranscriptData) => {
330 |     console.log("[EVENT] UserTranscript", transcript);
331 |   });
332 | 
333 |   // multimodal live does not currently provide transcripts so this will not fire
334 |   rtviClient.on(RTVIEvent.BotTtsText, (data: BotTTSTextData) => {
335 |     console.log("[EVENT] BotTtsText", data);
336 |   });
337 | 
338 |   // multimodal live does not currently provide transcripts so this will not fire
339 |   rtviClient.on(RTVIEvent.BotTranscript, (data: BotTTSTextData) => {
340 |     console.log("[EVENT] BotTranscript", data);
341 |   });
342 | 
343 |   rtviClient.on(RTVIEvent.Error, (message: RTVIMessage) => {
344 |     console.log("[EVENT] RTVI Error!", message);
345 |   });
346 | 
347 |   rtviClient.on(RTVIEvent.MessageError, (message: RTVIMessage) => {
348 |     console.log("[EVENT] RTVI ErrorMessage error!", message);
349 |   });
350 | 
351 |   // multimodal live does not currently provide metrics so this will not fire
352 |   rtviClient.on(RTVIEvent.Metrics, (data) => {
353 |     // let's only print out ttfb for now
354 |     if (!data.ttfb) {
355 |       return;
356 |     }
357 |     data.ttfb.map((metric) => {
358 |       console.log(`[METRICS] ${metric.processor} ttfb: ${metric.value}`);
359 |     });
360 |   });
361 | 
362 |   rtviClient.on(RTVIEvent.MicUpdated, (mic: MediaDeviceInfo) => {
363 |     const micPicker = document.getElementById("mic-picker")!;
364 |     for (let i = 0; i < micPicker.children.length; i++) {
365 |       let el = micPicker.children[i] as HTMLOptionElement;
366 |       el.selected = el.value === mic.deviceId;
367 |     }
368 |   });
369 | 
370 |   rtviClient.on(RTVIEvent.AvailableMicsUpdated, (mics: MediaDeviceInfo[]) => {
371 |     updateMicList(mics);
372 |   });
373 | 
374 |   rtviClient.on(RTVIEvent.LocalAudioLevel, (level: number) => {
375 |     updateSpeakerBubble(level, "user");
376 |   });
377 |   rtviClient.on(RTVIEvent.RemoteAudioLevel, (level: number) => {
378 |     updateSpeakerBubble(level, "bot");
379 |   });
380 | }
381 | 
382 | // Send user message to bot.
383 | function sendUserMessage() {
384 |   const textInput = document.getElementById("text-input")! as HTMLInputElement;
385 |   llmHelper.appendToMessages({ role: "user", content: textInput.value }, true);
386 |   textInput.value = "";
387 | }
388 | 
389 | // Update the speaker bubble size based on the audio level
390 | function updateSpeakerBubble(level: number, whom: string) {
391 |   const volume = level * 100;
392 |   const userBubble = document.getElementById(
393 |     whom === "user" ? "user-bubble" : "bot-bubble"
394 |   )!;
395 |   // Scale the bubble size based on the volume value
396 |   const scale = 1 + volume / 50; // Adjust the divisor to control the scaling effect
397 |   userBubble.style.transform = `scale(${scale})`;
398 | }
399 | 
400 | function _generateRandomWeather() {
401 |   const temperature = Math.random() * 200 - 80;
402 |   const humidity = Math.random() * 100;
403 |   const conditions = ["sunny", "cloudy", "rainy", "snowy"];
404 |   const condition = conditions[Math.floor(Math.random() * conditions.length)];
405 |   const windSpeed = Math.random() * 50;
406 |   const windGusts = windSpeed + Math.random() * 20;
407 |   return {
408 |     temperature,
409 |     humidity,
410 |     condition,
411 |     windSpeed,
412 |     windGusts,
413 |   };
414 | }
415 | 
416 | async function handleFunctionCall(functionName: string, args: unknown) {
417 |   console.log("[EVENT] LLMFunctionCall", functionName);
418 |   const toolFunctions: { [key: string]: any } = {
419 |     changeBackgroundColor: ({ color }: { [key: string]: string }) => {
420 |       console.log("changing background color to", color);
421 |       document.body.style.backgroundColor = color;
422 |       return { success: true, color };
423 |     },
424 |     getWeather: async ({ location }: { [key: string]: string }) => {
425 |       console.log("getting weather for", location);
426 |       const key = import.meta.env.VITE_DANGEROUS_OPENWEATHER_API_KEY;
427 |       if (!key) {
428 |         const ret = { success: true, weather: _generateRandomWeather() };
429 |         console.log("returning weather", ret);
430 |         return ret;
431 |       }
432 |       const locationReq = await fetch(
433 |         `http://api.openweathermap.org/geo/1.0/direct?q=${location}&limit=1&appid=${key}`
434 |       );
435 |       const locJson = await locationReq.json();
436 |       const loc = { lat: locJson[0].lat, lon: locJson[0].lon };
437 |       const exclude = ["minutely", "hourly", "daily"].join(",");
438 |       const weatherRec = await fetch(
439 |         `https://api.openweathermap.org/data/3.0/onecall?lat=${loc.lat}&lon=${loc.lon}&exclude=${exclude}&appid=${key}`
440 |       );
441 |       const weather = await weatherRec.json();
442 |       return { success: true, weather: weather.current };
443 |     },
444 |   };
445 |   const toolFunction = toolFunctions[functionName];
446 |   if (toolFunction) {
447 |     let result = await toolFunction(args);
448 |     console.debug("returning result", result);
449 |     return result;
450 |   }
451 | }
452 | 


--------------------------------------------------------------------------------
/examples/directToLLMTransports/src/styles.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |   max-width: 800px;
  3 |   margin: 20px 20px;
  4 |   font-family: system-ui, -apple-system, sans-serif;
  5 | }
  6 | 
  7 | #join-div a {
  8 |   color: #2563eb;
  9 |   cursor: pointer;
 10 | }
 11 | 
 12 | #connected-div {
 13 |   width: 90vw;
 14 |   margin: 20px 2vw;
 15 |   display: flex;
 16 |   flex-direction: column;
 17 |   justify-content: space-between;
 18 | }
 19 | 
 20 | #controls {
 21 |   display: flex;
 22 |   justify-content: left;
 23 |   margin: 10px 0;
 24 | }
 25 | 
 26 | #controls div {
 27 |   padding: 5px;
 28 |   margin: 0 5px;
 29 | }
 30 | 
 31 | button {
 32 |   padding: 5px;
 33 |   margin: 0 5px;
 34 |   width: 8em;
 35 |   border-radius: 10px;
 36 |   background-color: aliceblue;
 37 | }
 38 | 
 39 | button:active {
 40 |   transform: translateY(1px); /* Move the button down slightly */
 41 |   box-shadow: 2px 2px #666; /* Add a shadow to create a pressed effect */
 42 | }
 43 | 
 44 | #text-div {
 45 |   display: flex;
 46 |   flex-direction: column;
 47 |   margin: 10px 0;
 48 | }
 49 | 
 50 | #text-div label {
 51 |   margin: 5px 0;
 52 | }
 53 | 
 54 | #text-div textarea {
 55 |   margin: 5px 0;
 56 |   padding: 5px;
 57 |   width: 50%;
 58 |   border-radius: 10px;
 59 | }
 60 | 
 61 | #bubbles {
 62 |   margin: 20px 0px;
 63 |   border-radius: 16px;
 64 |   display: flex;
 65 |   flex-direction: row;
 66 | }
 67 | 
 68 | .bubble {
 69 |   width: 50px;
 70 |   height: 50px;
 71 |   border-radius: 50%;
 72 |   transition: transform 0.1s ease;
 73 |   margin: 15px;
 74 | }
 75 | 
 76 | #user-bubble {
 77 |   background-color: #4caf50;
 78 | }
 79 | 
 80 | #bot-bubble {
 81 |   background-color: #2196f3;
 82 | }
 83 | 
 84 | #content h2 {
 85 |   padding-left: 20px;
 86 | }
 87 | 
 88 | #chat-text {
 89 |   display: flex;
 90 |   flex-direction: column;
 91 |   align-items: left;
 92 |   overflow-y: auto;
 93 |   padding: 20px;
 94 |   flex: 1;
 95 | }
 96 | 
 97 | .user-message {
 98 |   display: flex;
 99 |   flex-direction: column;
100 |   background-color: #f0f0f0;
101 |   padding: 16px;
102 |   margin: 12px 6px;
103 |   border-radius: 8px;
104 |   line-height: 1.5;
105 | }
106 | 
107 | .user-message .interim {
108 |   color: #707070;
109 | }
110 | 
111 | .user-message::before {
112 |   content: "User\A";
113 |   font-size: 14px;
114 |   color: #666;
115 |   font-weight: 500;
116 |   display: block;
117 |   padding-bottom: 0.4em;
118 | }
119 | 
120 | .assistant-message {
121 |   display: flex;
122 |   flex-direction: column;
123 |   background-color: #ffffff;
124 |   border: 1px solid #e0e0e0;
125 |   margin: 12px 6px;
126 |   padding: 16px;
127 |   border-radius: 8px;
128 |   line-height: 1.5;
129 | }
130 | 
131 | .assistant-message::before {
132 |   content: "Assistant";
133 |   font-size: 14px;
134 |   color: #666;
135 |   font-weight: 500;
136 |   display: block;
137 |   padding-bottom: 0.4em;
138 | }
139 | 


--------------------------------------------------------------------------------
/examples/directToLLMTransports/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="vite/client" />
2 | 


--------------------------------------------------------------------------------
/examples/directToLLMTransports/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "module": "ESNext",
 5 |     "lib": ["ES2020", "DOM", "DOM.Iterable"],
 6 |     "skipLibCheck": true,
 7 |     "jsx": "preserve",
 8 | 
 9 |     /* Bundler mode */
10 |     "moduleResolution": "bundler",
11 |     "allowImportingTsExtensions": true,
12 |     "allowJs": true,
13 |     "noEmit": true,
14 |     "resolveJsonModule": true,
15 |     "isolatedModules": true,
16 |     "moduleDetection": "force",
17 | 
18 |     /* Linting */
19 |     "strict": true,
20 |     "noUnusedLocals": true,
21 |     "noUnusedParameters": false,
22 |     "noFallthroughCasesInSwitch": true
23 |   },
24 |   "include": ["src", "lib"]
25 | }
26 | 


--------------------------------------------------------------------------------
/lib/media-mgmt/dailyMediaManager.ts:
--------------------------------------------------------------------------------
  1 | import { MediaManager } from "./mediaManager";
  2 | import { MediaStreamRecorder, WavStreamPlayer } from "../wavtools";
  3 | 
  4 | import Daily, {
  5 |   DailyCall,
  6 |   DailyEventObjectAvailableDevicesUpdated,
  7 |   DailyEventObjectLocalAudioLevel,
  8 |   DailyEventObjectSelectedDevicesUpdated,
  9 |   DailyEventObjectTrack,
 10 |   DailyParticipant,
 11 |   DailyParticipantsObject,
 12 | } from "@daily-co/daily-js";
 13 | import { Participant, Tracks } from "@pipecat-ai/client-js";
 14 | 
 15 | export class DailyMediaManager extends MediaManager {
 16 |   private _daily: DailyCall;
 17 |   private _mediaStreamRecorder: MediaStreamRecorder | undefined;
 18 |   private _wavStreamPlayer: WavStreamPlayer | undefined;
 19 | 
 20 |   private _initialized: boolean;
 21 |   private _connected: boolean;
 22 |   private _connectResolve: ((value: void | PromiseLike<void>) => void) | null;
 23 | 
 24 |   private _currentAudioTrack: MediaStreamTrack | null;
 25 |   private _selectedCam: MediaDeviceInfo | Record<string, never> = {};
 26 |   private _selectedMic: MediaDeviceInfo | Record<string, never> = {};
 27 |   private _selectedSpeaker: MediaDeviceInfo | Record<string, never> = {};
 28 | 
 29 |   private _remoteAudioLevelInterval: NodeJS.Timeout | null = null;
 30 | 
 31 |   private onTrackStartedCallback?: (event: DailyEventObjectTrack) => void;
 32 |   private onTrackStoppedCallback?: (event: DailyEventObjectTrack) => void;
 33 | 
 34 |   private _recorderChunkSize: number | undefined = undefined;
 35 | 
 36 |   constructor(
 37 |     enablePlayer: boolean = true,
 38 |     enableRecording: boolean = true,
 39 |     onTrackStartedCallback?: (event: DailyEventObjectTrack) => void,
 40 |     onTrackStoppedCallback?: (event: DailyEventObjectTrack) => void,
 41 |     recorderChunkSize: number | undefined = undefined,
 42 |     recorderSampleRate: number = 24000,
 43 |   ) {
 44 |     super();
 45 |     this._initialized = false;
 46 |     this._connected = false;
 47 |     this._currentAudioTrack = null;
 48 |     this._connectResolve = null;
 49 |     this.onTrackStartedCallback = onTrackStartedCallback;
 50 |     this.onTrackStoppedCallback = onTrackStoppedCallback;
 51 |     this._recorderChunkSize = recorderChunkSize;
 52 | 
 53 |     this._daily = Daily.getCallInstance() ?? Daily.createCallObject();
 54 | 
 55 |     if (enableRecording) {
 56 |       this._mediaStreamRecorder = new MediaStreamRecorder({
 57 |         sampleRate: recorderSampleRate,
 58 |       });
 59 |     }
 60 |     if (enablePlayer) {
 61 |       this._wavStreamPlayer = new WavStreamPlayer({ sampleRate: 24000 });
 62 |     }
 63 | 
 64 |     this._daily.on("track-started", this.handleTrackStarted.bind(this));
 65 |     this._daily.on("track-stopped", this.handleTrackStopped.bind(this));
 66 |     this._daily.on(
 67 |       "available-devices-updated",
 68 |       this._handleAvailableDevicesUpdated.bind(this),
 69 |     );
 70 |     this._daily.on(
 71 |       "selected-devices-updated",
 72 |       this._handleSelectedDevicesUpdated.bind(this),
 73 |     );
 74 |     this._daily.on("local-audio-level", this._handleLocalAudioLevel.bind(this));
 75 |   }
 76 | 
 77 |   async initialize(): Promise<void> {
 78 |     if (this._initialized) {
 79 |       console.warn("DailyMediaManager already initialized");
 80 |       return;
 81 |     }
 82 |     const infos = await this._daily.startCamera({
 83 |       startVideoOff: !this._camEnabled,
 84 |       startAudioOff: !this._micEnabled,
 85 |     });
 86 |     const { devices } = await this._daily.enumerateDevices();
 87 |     const cams = devices.filter((d) => d.kind === "videoinput");
 88 |     const mics = devices.filter((d) => d.kind === "audioinput");
 89 |     const speakers = devices.filter((d) => d.kind === "audiooutput");
 90 |     this._callbacks.onAvailableCamsUpdated?.(cams);
 91 |     this._callbacks.onAvailableMicsUpdated?.(mics);
 92 |     this._callbacks.onAvailableSpeakersUpdated?.(speakers);
 93 |     this._selectedCam = infos.camera;
 94 |     this._callbacks.onCamUpdated?.(infos.camera as MediaDeviceInfo);
 95 |     this._selectedMic = infos.mic;
 96 |     this._callbacks.onMicUpdated?.(infos.mic as MediaDeviceInfo);
 97 |     this._selectedSpeaker = infos.speaker;
 98 |     this._callbacks.onSpeakerUpdated?.(infos.speaker as MediaDeviceInfo);
 99 | 
100 |     // Instantiate audio observers
101 |     if (!this._daily.isLocalAudioLevelObserverRunning())
102 |       await this._daily.startLocalAudioLevelObserver(100);
103 | 
104 |     if (this._wavStreamPlayer) {
105 |       await this._wavStreamPlayer.connect();
106 |       if (!this._remoteAudioLevelInterval) {
107 |         this._remoteAudioLevelInterval = setInterval(() => {
108 |           const frequencies = this._wavStreamPlayer!.getFrequencies();
109 |           let aveVal = 0;
110 |           if (frequencies.values?.length) {
111 |             aveVal =
112 |               frequencies.values.reduce((a, c) => a + c, 0) /
113 |               frequencies.values.length;
114 |           }
115 |           this._handleRemoteAudioLevel(aveVal);
116 |         }, 100);
117 |       }
118 |     }
119 |     this._initialized = true;
120 |   }
121 | 
122 |   async connect(): Promise<void> {
123 |     if (this._connected) {
124 |       console.warn("DailyMediaManager already connected");
125 |       return;
126 |     }
127 |     this._connected = true;
128 |     if (!this._initialized) {
129 |       return new Promise((resolve) => {
130 |         (async () => {
131 |           this._connectResolve = resolve;
132 |           await this.initialize();
133 |         })();
134 |       });
135 |     }
136 |     if (this._micEnabled) {
137 |       this._startRecording();
138 |     }
139 |   }
140 | 
141 |   async disconnect(): Promise<void> {
142 |     if (this._remoteAudioLevelInterval) {
143 |       clearInterval(this._remoteAudioLevelInterval);
144 |     }
145 |     this._remoteAudioLevelInterval = null;
146 |     this._daily.leave();
147 |     this._currentAudioTrack = null;
148 |     await this._mediaStreamRecorder?.end();
149 |     this._wavStreamPlayer?.interrupt();
150 |     this._initialized = false;
151 |     this._connected = false;
152 |   }
153 | 
154 |   async userStartedSpeaking(): Promise<unknown> {
155 |     return this._wavStreamPlayer?.interrupt();
156 |   }
157 | 
158 |   bufferBotAudio(
159 |     data: ArrayBuffer | Int16Array,
160 |     id?: string,
161 |   ): Int16Array | undefined {
162 |     return this._wavStreamPlayer?.add16BitPCM(data, id);
163 |   }
164 | 
165 |   async getAllMics(): Promise<MediaDeviceInfo[]> {
166 |     let devices = (await this._daily.enumerateDevices()).devices;
167 |     return devices.filter((device) => device.kind === "audioinput");
168 |   }
169 |   async getAllCams(): Promise<MediaDeviceInfo[]> {
170 |     let devices = (await this._daily.enumerateDevices()).devices;
171 |     return devices.filter((device) => device.kind === "videoinput");
172 |   }
173 |   async getAllSpeakers(): Promise<MediaDeviceInfo[]> {
174 |     let devices = (await this._daily.enumerateDevices()).devices;
175 |     return devices.filter((device) => device.kind === "audiooutput");
176 |   }
177 | 
178 |   updateMic(micId: string) {
179 |     this._daily
180 |       .setInputDevicesAsync({ audioDeviceId: micId })
181 |       .then((deviceInfo) => {
182 |         this._selectedMic = deviceInfo.mic;
183 |       });
184 |   }
185 |   updateCam(camId: string) {
186 |     this._daily
187 |       .setInputDevicesAsync({ videoDeviceId: camId })
188 |       .then((deviceInfo) => {
189 |         this._selectedCam = deviceInfo.camera;
190 |       });
191 |   }
192 |   async updateSpeaker(speakerId: string): Promise<void> {
193 |     if (speakerId !== "default" && this._selectedSpeaker.deviceId === speakerId)
194 |       return;
195 |     let sID = speakerId;
196 |     if (sID === "default") {
197 |       const speakers = await this.getAllSpeakers();
198 |       const defaultSpeaker = speakers.find((s) => s.deviceId === "default");
199 |       if (!defaultSpeaker) {
200 |         console.warn("No default speaker found");
201 |         return;
202 |       }
203 |       speakers.splice(speakers.indexOf(defaultSpeaker), 1);
204 |       const defaultSpeakerCp = speakers.find((s) =>
205 |         defaultSpeaker.label.includes(s.label),
206 |       );
207 |       sID = defaultSpeakerCp?.deviceId ?? speakerId;
208 |     }
209 |     this._wavStreamPlayer?.updateSpeaker(sID).then(() => {
210 |       this._selectedSpeaker = { deviceId: speakerId } as MediaDeviceInfo;
211 |       this._callbacks.onSpeakerUpdated?.(this._selectedSpeaker);
212 |     });
213 |   }
214 | 
215 |   get selectedMic(): MediaDeviceInfo | Record<string, never> {
216 |     return this._selectedMic;
217 |   }
218 |   get selectedCam(): MediaDeviceInfo | Record<string, never> {
219 |     return this._selectedCam;
220 |   }
221 |   get selectedSpeaker(): MediaDeviceInfo | Record<string, never> {
222 |     return this._selectedSpeaker;
223 |   }
224 | 
225 |   async enableMic(enable: boolean): Promise<void> {
226 |     this._micEnabled = enable;
227 |     if (!this._daily.participants()?.local) return;
228 |     this._daily.setLocalAudio(enable);
229 |     if (this._mediaStreamRecorder) {
230 |       if (enable) {
231 |         if (this._mediaStreamRecorder.getStatus() === "paused") {
232 |           this._startRecording();
233 |         } // else, we'll record on the track-started event
234 |       } else {
235 |         if (this._mediaStreamRecorder.getStatus() === "recording") {
236 |           this._mediaStreamRecorder.pause();
237 |         }
238 |       }
239 |     }
240 |   }
241 |   enableCam(enable: boolean): void {
242 |     this._camEnabled = enable;
243 |     this._daily.setLocalVideo(enable);
244 |   }
245 | 
246 |   get isCamEnabled(): boolean {
247 |     return this._daily.localVideo();
248 |   }
249 |   get isMicEnabled(): boolean {
250 |     return this._daily.localAudio();
251 |   }
252 | 
253 |   tracks(): Tracks {
254 |     const participants: DailyParticipantsObject = this._daily.participants();
255 |     return {
256 |       local: {
257 |         audio: participants?.local?.tracks?.audio?.persistentTrack,
258 |         video: participants?.local?.tracks?.video?.persistentTrack,
259 |       },
260 |     };
261 |   }
262 | 
263 |   private _startRecording(): void {
264 |     if (!this._connected || !this._mediaStreamRecorder) return;
265 |     try {
266 |       this._mediaStreamRecorder.record((data) => {
267 |         this._userAudioCallback(data.mono);
268 |       }, this._recorderChunkSize);
269 |     } catch (e) {
270 |       const err = e as Error;
271 |       if (!err.message.includes("Already recording")) {
272 |         console.error("Error starting recording", e);
273 |       }
274 |     }
275 |   }
276 | 
277 |   private _handleAvailableDevicesUpdated(
278 |     event: DailyEventObjectAvailableDevicesUpdated,
279 |   ) {
280 |     this._callbacks.onAvailableCamsUpdated?.(
281 |       event.availableDevices.filter((d) => d.kind === "videoinput"),
282 |     );
283 |     this._callbacks.onAvailableMicsUpdated?.(
284 |       event.availableDevices.filter((d) => d.kind === "audioinput"),
285 |     );
286 |     this._callbacks.onAvailableSpeakersUpdated?.(
287 |       event.availableDevices.filter((d) => d.kind === "audiooutput"),
288 |     );
289 |     if (this._selectedSpeaker.deviceId === "default") {
290 |       this.updateSpeaker("default");
291 |     }
292 |   }
293 | 
294 |   private _handleSelectedDevicesUpdated(
295 |     event: DailyEventObjectSelectedDevicesUpdated,
296 |   ) {
297 |     if (this._selectedCam?.deviceId !== event.devices.camera) {
298 |       this._selectedCam = event.devices.camera;
299 |       this._callbacks.onCamUpdated?.(event.devices.camera as MediaDeviceInfo);
300 |     }
301 |     if (this._selectedMic?.deviceId !== event.devices.mic) {
302 |       this._selectedMic = event.devices.mic;
303 |       this._callbacks.onMicUpdated?.(event.devices.mic as MediaDeviceInfo);
304 |     }
305 |   }
306 | 
307 |   private _handleLocalAudioLevel(ev: DailyEventObjectLocalAudioLevel) {
308 |     this._callbacks.onLocalAudioLevel?.(ev.audioLevel);
309 |   }
310 | 
311 |   private _handleRemoteAudioLevel(audioLevel: number) {
312 |     this._callbacks.onRemoteAudioLevel?.(audioLevel, botParticipant());
313 |   }
314 | 
315 |   protected async handleTrackStarted(event: DailyEventObjectTrack) {
316 |     if (!event.participant?.local) return;
317 |     if (event.track.kind === "audio") {
318 |       if (this._mediaStreamRecorder) {
319 |         const status = this._mediaStreamRecorder.getStatus();
320 |         switch (status) {
321 |           case "ended":
322 |             await this._mediaStreamRecorder.begin(event.track);
323 |             if (this._connected) {
324 |               this._startRecording();
325 |               if (this._connectResolve) {
326 |                 this._connectResolve();
327 |                 this._connectResolve = null;
328 |               }
329 |             }
330 |             break;
331 |           case "paused":
332 |             this._startRecording();
333 |             break;
334 |           case "recording":
335 |           default:
336 |             if (this._currentAudioTrack !== event.track) {
337 |               await this._mediaStreamRecorder.end();
338 |               await this._mediaStreamRecorder.begin(event.track);
339 |               this._startRecording();
340 |             } else {
341 |               console.warn(
342 |                 "track-started event received for current track and already recording",
343 |               );
344 |             }
345 |             break;
346 |         }
347 |       }
348 |       this._currentAudioTrack = event.track;
349 |     }
350 |     this._callbacks.onTrackStarted?.(
351 |       event.track,
352 |       event.participant
353 |         ? dailyParticipantToParticipant(event.participant)
354 |         : undefined,
355 |     );
356 |     this.onTrackStartedCallback?.(event);
357 |   }
358 | 
359 |   protected handleTrackStopped(event: DailyEventObjectTrack) {
360 |     if (!event.participant?.local) return;
361 |     if (event.track.kind === "audio") {
362 |       if (
363 |         this._mediaStreamRecorder &&
364 |         this._mediaStreamRecorder.getStatus() === "recording"
365 |       ) {
366 |         this._mediaStreamRecorder.pause();
367 |       }
368 |     }
369 |     this._callbacks.onTrackStopped?.(
370 |       event.track,
371 |       event.participant
372 |         ? dailyParticipantToParticipant(event.participant)
373 |         : undefined,
374 |     );
375 |     this.onTrackStoppedCallback?.(event);
376 |   }
377 | }
378 | 
379 | const dailyParticipantToParticipant = (p: DailyParticipant): Participant => ({
380 |   id: p.user_id,
381 |   local: p.local,
382 |   name: p.user_name,
383 | });
384 | 
385 | const botParticipant = () => ({
386 |   id: "bot",
387 |   local: false,
388 |   name: "Bot",
389 | });
390 | 


--------------------------------------------------------------------------------
/lib/media-mgmt/mediaManager.ts:
--------------------------------------------------------------------------------
  1 | import { WavRecorder, WavStreamPlayer } from "../wavtools";
  2 | 
  3 | import {
  4 |   RTVIClientOptions,
  5 |   RTVIEventCallbacks,
  6 |   Tracks,
  7 | } from "@pipecat-ai/client-js";
  8 | 
  9 | export abstract class MediaManager {
 10 |   declare protected _userAudioCallback: (data: ArrayBuffer) => void;
 11 |   declare protected _options: RTVIClientOptions;
 12 |   protected _callbacks: RTVIEventCallbacks = {};
 13 | 
 14 |   protected _micEnabled: boolean;
 15 |   protected _camEnabled: boolean;
 16 | 
 17 |   constructor() {
 18 |     this._micEnabled = true;
 19 |     this._camEnabled = false;
 20 |   }
 21 | 
 22 |   setUserAudioCallback(userAudioCallback: (data: ArrayBuffer) => void) {
 23 |     this._userAudioCallback = userAudioCallback;
 24 |   }
 25 |   setRTVIOptions(options: RTVIClientOptions, override: boolean = false) {
 26 |     if (this._options && !override) return;
 27 |     this._options = options;
 28 |     this._callbacks = options.callbacks ?? {};
 29 |     this._micEnabled = options.enableMic ?? true;
 30 |     this._camEnabled = options.enableCam ?? false;
 31 |   }
 32 | 
 33 |   abstract initialize(): Promise<void>;
 34 |   abstract connect(): Promise<void>;
 35 |   abstract disconnect(): Promise<void>;
 36 | 
 37 |   abstract userStartedSpeaking(): Promise<unknown>;
 38 |   abstract bufferBotAudio(
 39 |     data: ArrayBuffer | Int16Array,
 40 |     id?: string,
 41 |   ): Int16Array | undefined;
 42 | 
 43 |   abstract getAllMics(): Promise<MediaDeviceInfo[]>;
 44 |   abstract getAllCams(): Promise<MediaDeviceInfo[]>;
 45 |   abstract getAllSpeakers(): Promise<MediaDeviceInfo[]>;
 46 | 
 47 |   abstract updateMic(micId: string): void;
 48 |   abstract updateCam(camId: string): void;
 49 |   abstract updateSpeaker(speakerId: string): void;
 50 | 
 51 |   abstract get selectedMic(): MediaDeviceInfo | Record<string, never>;
 52 |   abstract get selectedCam(): MediaDeviceInfo | Record<string, never>;
 53 |   abstract get selectedSpeaker(): MediaDeviceInfo | Record<string, never>;
 54 | 
 55 |   abstract enableMic(enable: boolean): void;
 56 |   abstract enableCam(enable: boolean): void;
 57 | 
 58 |   abstract get isCamEnabled(): boolean;
 59 |   abstract get isMicEnabled(): boolean;
 60 | 
 61 |   abstract tracks(): Tracks;
 62 | }
 63 | 
 64 | export class WavMediaManager extends MediaManager {
 65 |   private _wavRecorder;
 66 |   private _wavStreamPlayer;
 67 | 
 68 |   private _initialized = false;
 69 |   private _recorderChunkSize: number | undefined = undefined;
 70 | 
 71 |   constructor(
 72 |     recorderChunkSize: number | undefined = undefined,
 73 |     recorderSampleRate: number | undefined = 24000,
 74 |   ) {
 75 |     super();
 76 |     this._recorderChunkSize = recorderChunkSize;
 77 |     this._wavRecorder = new WavRecorder({ sampleRate: recorderSampleRate });
 78 |     this._wavStreamPlayer = new WavStreamPlayer({ sampleRate: 24000 });
 79 |   }
 80 | 
 81 |   async initialize(): Promise<void> {
 82 |     await this._wavRecorder.begin();
 83 |     this._wavRecorder.listenForDeviceChange(null);
 84 |     this._wavRecorder.listenForDeviceChange(
 85 |       this._handleAvailableDevicesUpdated.bind(this),
 86 |     );
 87 |     await this._wavStreamPlayer.connect();
 88 |     this._initialized = true;
 89 |   }
 90 | 
 91 |   async connect(): Promise<void> {
 92 |     if (!this._initialized) {
 93 |       await this.initialize();
 94 |     }
 95 |     const isAlreadyRecording = this._wavRecorder.getStatus() == "recording";
 96 |     if (this._micEnabled && !isAlreadyRecording) {
 97 |       await this._startRecording();
 98 |     }
 99 |   }
100 | 
101 |   async disconnect(): Promise<void> {
102 |     if (!this._initialized) {
103 |       return;
104 |     }
105 |     await this._wavRecorder.end();
106 |     await this._wavStreamPlayer.interrupt();
107 |     this._initialized = false;
108 |   }
109 | 
110 |   async userStartedSpeaking(): Promise<unknown> {
111 |     return this._wavStreamPlayer.interrupt();
112 |   }
113 | 
114 |   bufferBotAudio(data: ArrayBuffer | Int16Array, id?: string): Int16Array {
115 |     return this._wavStreamPlayer.add16BitPCM(data, id);
116 |   }
117 | 
118 |   getAllMics(): Promise<MediaDeviceInfo[]> {
119 |     return this._wavRecorder.listDevices();
120 |   }
121 |   getAllCams(): Promise<MediaDeviceInfo[]> {
122 |     // TODO: Video not supported yet
123 |     return Promise.resolve([]);
124 |   }
125 |   getAllSpeakers(): Promise<MediaDeviceInfo[]> {
126 |     // TODO: Implement speaker support
127 |     return Promise.resolve([]);
128 |   }
129 | 
130 |   async updateMic(micId: string): Promise<void> {
131 |     const prevMic = this._wavRecorder.deviceSelection;
132 |     await this._wavRecorder.end();
133 |     await this._wavRecorder.begin(micId);
134 |     if (this._micEnabled) {
135 |       await this._startRecording();
136 |     }
137 |     const curMic = this._wavRecorder.deviceSelection;
138 |     if (curMic && prevMic && prevMic.label !== curMic.label) {
139 |       this._callbacks.onMicUpdated?.(curMic);
140 |     }
141 |   }
142 | 
143 |   updateCam(camId: string): void {
144 |     // TODO: Video not supported yet
145 |   }
146 |   updateSpeaker(speakerId: string): void {
147 |     // TODO: Implement speaker support
148 |   }
149 | 
150 |   get selectedMic(): MediaDeviceInfo | Record<string, never> {
151 |     return this._wavRecorder.deviceSelection ?? {};
152 |   }
153 |   get selectedCam(): MediaDeviceInfo | Record<string, never> {
154 |     // TODO: Video not supported yet
155 |     return {};
156 |   }
157 |   get selectedSpeaker(): MediaDeviceInfo | Record<string, never> {
158 |     // TODO: Implement speaker support
159 |     return {};
160 |   }
161 | 
162 |   async enableMic(enable: boolean): Promise<void> {
163 |     this._micEnabled = enable;
164 |     if (!this._wavRecorder.stream) return;
165 |     this._wavRecorder.stream
166 |       .getAudioTracks()
167 |       .forEach((track: MediaStreamTrack) => {
168 |         track.enabled = enable;
169 |         if (!enable) {
170 |           this._callbacks.onTrackStopped?.(track, localParticipant());
171 |         }
172 |       });
173 |     if (enable) {
174 |       await this._startRecording();
175 |     } else {
176 |       await this._wavRecorder.pause();
177 |     }
178 |   }
179 |   enableCam(enable: boolean): void {
180 |     // TODO: Video not supported yet
181 |   }
182 | 
183 |   get isCamEnabled(): boolean {
184 |     // TODO: Video not supported yet
185 |     return false;
186 |   }
187 |   get isMicEnabled(): boolean {
188 |     return this._micEnabled;
189 |   }
190 | 
191 |   tracks(): Tracks {
192 |     const tracks = this._wavRecorder.stream?.getTracks()[0];
193 |     return { local: tracks ? { audio: tracks } : {} };
194 |   }
195 | 
196 |   private async _startRecording() {
197 |     await this._wavRecorder.record((data) => {
198 |       this._userAudioCallback(data.mono);
199 |     }, this._recorderChunkSize);
200 |     const track = this._wavRecorder.stream?.getAudioTracks()[0];
201 |     if (track) {
202 |       this._callbacks.onTrackStarted?.(track, localParticipant());
203 |     }
204 |   }
205 | 
206 |   private _handleAvailableDevicesUpdated(devices: MediaDeviceInfo[]) {
207 |     this._callbacks.onAvailableCamsUpdated?.(
208 |       devices.filter((d) => d.kind === "videoinput"),
209 |     );
210 |     this._callbacks.onAvailableMicsUpdated?.(
211 |       devices.filter((d) => d.kind === "audioinput"),
212 |     );
213 |     // if the current device went away or we're using the default and
214 |     // the default changed, reset the mic.
215 |     const defaultDevice = devices.find((d) => d.deviceId === "default");
216 |     const currentDevice = this._wavRecorder.deviceSelection;
217 |     if (
218 |       currentDevice &&
219 |       (!devices.some((d) => d.deviceId === currentDevice.deviceId) ||
220 |         (currentDevice.deviceId === "default" &&
221 |           currentDevice.label !== defaultDevice?.label))
222 |     ) {
223 |       this.updateMic("");
224 |     }
225 |   }
226 | }
227 | 
228 | const localParticipant = () => {
229 |   return {
230 |     id: "local",
231 |     name: "",
232 |     local: true,
233 |   };
234 | };
235 | 


--------------------------------------------------------------------------------
/lib/wavtools/index.js:
--------------------------------------------------------------------------------
 1 | import { WavPacker } from './lib/wav_packer.js';
 2 | import { AudioAnalysis } from './lib/analysis/audio_analysis.js';
 3 | import { WavStreamPlayer } from './lib/wav_stream_player.js';
 4 | import { WavRecorder } from './lib/wav_recorder.js';
 5 | import { MediaStreamRecorder } from './lib/mediastream_recorder.js';
 6 | 
 7 | export {
 8 |   AudioAnalysis,
 9 |   MediaStreamRecorder,
10 |   WavPacker,
11 |   WavStreamPlayer,
12 |   WavRecorder,
13 | };
14 | 


--------------------------------------------------------------------------------
/lib/wavtools/lib/analysis/audio_analysis.js:
--------------------------------------------------------------------------------
  1 | import {
  2 |   noteFrequencies,
  3 |   noteFrequencyLabels,
  4 |   voiceFrequencies,
  5 |   voiceFrequencyLabels,
  6 | } from './constants.js';
  7 | 
  8 | /**
  9 |  * Output of AudioAnalysis for the frequency domain of the audio
 10 |  * @typedef {Object} AudioAnalysisOutputType
 11 |  * @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
 12 |  * @property {number[]} frequencies Raw frequency bucket values
 13 |  * @property {string[]} labels Labels for the frequency bucket values
 14 |  */
 15 | 
 16 | /**
 17 |  * Analyzes audio for visual output
 18 |  * @class
 19 |  */
 20 | export class AudioAnalysis {
 21 |   /**
 22 |    * Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
 23 |    * returns human-readable formatting and labels
 24 |    * @param {AnalyserNode} analyser
 25 |    * @param {number} sampleRate
 26 |    * @param {Float32Array} [fftResult]
 27 |    * @param {"frequency"|"music"|"voice"} [analysisType]
 28 |    * @param {number} [minDecibels] default -100
 29 |    * @param {number} [maxDecibels] default -30
 30 |    * @returns {AudioAnalysisOutputType}
 31 |    */
 32 |   static getFrequencies(
 33 |     analyser,
 34 |     sampleRate,
 35 |     fftResult,
 36 |     analysisType = 'frequency',
 37 |     minDecibels = -100,
 38 |     maxDecibels = -30,
 39 |   ) {
 40 |     if (!fftResult) {
 41 |       fftResult = new Float32Array(analyser.frequencyBinCount);
 42 |       analyser.getFloatFrequencyData(fftResult);
 43 |     }
 44 |     const nyquistFrequency = sampleRate / 2;
 45 |     const frequencyStep = (1 / fftResult.length) * nyquistFrequency;
 46 |     let outputValues;
 47 |     let frequencies;
 48 |     let labels;
 49 |     if (analysisType === 'music' || analysisType === 'voice') {
 50 |       const useFrequencies =
 51 |         analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
 52 |       const aggregateOutput = Array(useFrequencies.length).fill(minDecibels);
 53 |       for (let i = 0; i < fftResult.length; i++) {
 54 |         const frequency = i * frequencyStep;
 55 |         const amplitude = fftResult[i];
 56 |         for (let n = useFrequencies.length - 1; n >= 0; n--) {
 57 |           if (frequency > useFrequencies[n]) {
 58 |             aggregateOutput[n] = Math.max(aggregateOutput[n], amplitude);
 59 |             break;
 60 |           }
 61 |         }
 62 |       }
 63 |       outputValues = aggregateOutput;
 64 |       frequencies =
 65 |         analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
 66 |       labels =
 67 |         analysisType === 'voice' ? voiceFrequencyLabels : noteFrequencyLabels;
 68 |     } else {
 69 |       outputValues = Array.from(fftResult);
 70 |       frequencies = outputValues.map((_, i) => frequencyStep * i);
 71 |       labels = frequencies.map((f) => `${f.toFixed(2)} Hz`);
 72 |     }
 73 |     // We normalize to {0, 1}
 74 |     const normalizedOutput = outputValues.map((v) => {
 75 |       return Math.max(
 76 |         0,
 77 |         Math.min((v - minDecibels) / (maxDecibels - minDecibels), 1),
 78 |       );
 79 |     });
 80 |     const values = new Float32Array(normalizedOutput);
 81 |     return {
 82 |       values,
 83 |       frequencies,
 84 |       labels,
 85 |     };
 86 |   }
 87 | 
 88 |   /**
 89 |    * Creates a new AudioAnalysis instance for an HTMLAudioElement
 90 |    * @param {HTMLAudioElement} audioElement
 91 |    * @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
 92 |    * @returns {AudioAnalysis}
 93 |    */
 94 |   constructor(audioElement, audioBuffer = null) {
 95 |     this.fftResults = [];
 96 |     if (audioBuffer) {
 97 |       /**
 98 |        * Modified from
 99 |        * https://stackoverflow.com/questions/75063715/using-the-web-audio-api-to-analyze-a-song-without-playing
100 |        *
101 |        * We do this to populate FFT values for the audio if provided an `audioBuffer`
102 |        * The reason to do this is that Safari fails when using `createMediaElementSource`
103 |        * This has a non-zero RAM cost so we only opt-in to run it on Safari, Chrome is better
104 |        */
105 |       const { length, sampleRate } = audioBuffer;
106 |       const offlineAudioContext = new OfflineAudioContext({
107 |         length,
108 |         sampleRate,
109 |       });
110 |       const source = offlineAudioContext.createBufferSource();
111 |       source.buffer = audioBuffer;
112 |       const analyser = offlineAudioContext.createAnalyser();
113 |       analyser.fftSize = 8192;
114 |       analyser.smoothingTimeConstant = 0.1;
115 |       source.connect(analyser);
116 |       // limit is :: 128 / sampleRate;
117 |       // but we just want 60fps - cuts ~1s from 6MB to 1MB of RAM
118 |       const renderQuantumInSeconds = 1 / 60;
119 |       const durationInSeconds = length / sampleRate;
120 |       const analyze = (index) => {
121 |         const suspendTime = renderQuantumInSeconds * index;
122 |         if (suspendTime < durationInSeconds) {
123 |           offlineAudioContext.suspend(suspendTime).then(() => {
124 |             const fftResult = new Float32Array(analyser.frequencyBinCount);
125 |             analyser.getFloatFrequencyData(fftResult);
126 |             this.fftResults.push(fftResult);
127 |             analyze(index + 1);
128 |           });
129 |         }
130 |         if (index === 1) {
131 |           offlineAudioContext.startRendering();
132 |         } else {
133 |           offlineAudioContext.resume();
134 |         }
135 |       };
136 |       source.start(0);
137 |       analyze(1);
138 |       this.audio = audioElement;
139 |       this.context = offlineAudioContext;
140 |       this.analyser = analyser;
141 |       this.sampleRate = sampleRate;
142 |       this.audioBuffer = audioBuffer;
143 |     } else {
144 |       const audioContext = new AudioContext();
145 |       const track = audioContext.createMediaElementSource(audioElement);
146 |       const analyser = audioContext.createAnalyser();
147 |       analyser.fftSize = 8192;
148 |       analyser.smoothingTimeConstant = 0.1;
149 |       track.connect(analyser);
150 |       analyser.connect(audioContext.destination);
151 |       this.audio = audioElement;
152 |       this.context = audioContext;
153 |       this.analyser = analyser;
154 |       this.sampleRate = this.context.sampleRate;
155 |       this.audioBuffer = null;
156 |     }
157 |   }
158 | 
159 |   /**
160 |    * Gets the current frequency domain data from the playing audio track
161 |    * @param {"frequency"|"music"|"voice"} [analysisType]
162 |    * @param {number} [minDecibels] default -100
163 |    * @param {number} [maxDecibels] default -30
164 |    * @returns {AudioAnalysisOutputType}
165 |    */
166 |   getFrequencies(
167 |     analysisType = 'frequency',
168 |     minDecibels = -100,
169 |     maxDecibels = -30,
170 |   ) {
171 |     let fftResult = null;
172 |     if (this.audioBuffer && this.fftResults.length) {
173 |       const pct = this.audio.currentTime / this.audio.duration;
174 |       const index = Math.min(
175 |         (pct * this.fftResults.length) | 0,
176 |         this.fftResults.length - 1,
177 |       );
178 |       fftResult = this.fftResults[index];
179 |     }
180 |     return AudioAnalysis.getFrequencies(
181 |       this.analyser,
182 |       this.sampleRate,
183 |       fftResult,
184 |       analysisType,
185 |       minDecibels,
186 |       maxDecibels,
187 |     );
188 |   }
189 | 
190 |   /**
191 |    * Resume the internal AudioContext if it was suspended due to the lack of
192 |    * user interaction when the AudioAnalysis was instantiated.
193 |    * @returns {Promise<true>}
194 |    */
195 |   async resumeIfSuspended() {
196 |     if (this.context.state === 'suspended') {
197 |       await this.context.resume();
198 |     }
199 |     return true;
200 |   }
201 | }
202 | 
203 | globalThis.AudioAnalysis = AudioAnalysis;
204 | 


--------------------------------------------------------------------------------
/lib/wavtools/lib/analysis/constants.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Constants for help with visualization
 3 |  * Helps map frequency ranges from Fast Fourier Transform
 4 |  * to human-interpretable ranges, notably music ranges and
 5 |  * human vocal ranges.
 6 |  */
 7 | 
 8 | // Eighth octave frequencies
 9 | const octave8Frequencies = [
10 |   4186.01, 4434.92, 4698.63, 4978.03, 5274.04, 5587.65, 5919.91, 6271.93,
11 |   6644.88, 7040.0, 7458.62, 7902.13,
12 | ];
13 | 
14 | // Labels for each of the above frequencies
15 | const octave8FrequencyLabels = [
16 |   'C',
17 |   'C#',
18 |   'D',
19 |   'D#',
20 |   'E',
21 |   'F',
22 |   'F#',
23 |   'G',
24 |   'G#',
25 |   'A',
26 |   'A#',
27 |   'B',
28 | ];
29 | 
30 | /**
31 |  * All note frequencies from 1st to 8th octave
32 |  * in format "A#8" (A#, 8th octave)
33 |  */
34 | export const noteFrequencies = [];
35 | export const noteFrequencyLabels = [];
36 | for (let i = 1; i <= 8; i++) {
37 |   for (let f = 0; f < octave8Frequencies.length; f++) {
38 |     const freq = octave8Frequencies[f];
39 |     noteFrequencies.push(freq / Math.pow(2, 8 - i));
40 |     noteFrequencyLabels.push(octave8FrequencyLabels[f] + i);
41 |   }
42 | }
43 | 
44 | /**
45 |  * Subset of the note frequencies between 32 and 2000 Hz
46 |  * 6 octave range: C1 to B6
47 |  */
48 | const voiceFrequencyRange = [32.0, 2000.0];
49 | export const voiceFrequencies = noteFrequencies.filter((_, i) => {
50 |   return (
51 |     noteFrequencies[i] > voiceFrequencyRange[0] &&
52 |     noteFrequencies[i] < voiceFrequencyRange[1]
53 |   );
54 | });
55 | export const voiceFrequencyLabels = noteFrequencyLabels.filter((_, i) => {
56 |   return (
57 |     noteFrequencies[i] > voiceFrequencyRange[0] &&
58 |     noteFrequencies[i] < voiceFrequencyRange[1]
59 |   );
60 | });
61 | 


--------------------------------------------------------------------------------
/lib/wavtools/lib/mediastream_recorder.js:
--------------------------------------------------------------------------------
  1 | import { AudioProcessorSrc } from "./worklets/audio_processor.js";
  2 | import { AudioAnalysis } from "./analysis/audio_analysis.js";
  3 | import { WavPacker } from "./wav_packer.js";
  4 | 
  5 | /**
  6 |  * Decodes audio into a wav file
  7 |  * @typedef {Object} DecodedAudioType
  8 |  * @property {Blob} blob
  9 |  * @property {string} url
 10 |  * @property {Float32Array} values
 11 |  * @property {AudioBuffer} audioBuffer
 12 |  */
 13 | 
 14 | /**
 15 |  * Records live stream of user audio as PCM16 "audio/wav" data
 16 |  * @class
 17 |  */
 18 | export class MediaStreamRecorder {
 19 |   /**
 20 |    * Create a new MediaStreamRecorder instance
 21 |    * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
 22 |    * @returns {MediaStreamRecorder}
 23 |    */
 24 |   constructor({
 25 |     sampleRate = 44100,
 26 |     outputToSpeakers = false,
 27 |     debug = false,
 28 |   } = {}) {
 29 |     // Script source
 30 |     this.scriptSrc = AudioProcessorSrc;
 31 |     // Config
 32 |     this.sampleRate = sampleRate;
 33 |     this.outputToSpeakers = outputToSpeakers;
 34 |     this.debug = !!debug;
 35 |     // State variables
 36 |     this.stream = null;
 37 |     this.processor = null;
 38 |     this.source = null;
 39 |     this.node = null;
 40 |     this.recording = false;
 41 |     // Event handling with AudioWorklet
 42 |     this._lastEventId = 0;
 43 |     this.eventReceipts = {};
 44 |     this.eventTimeout = 5000;
 45 |     // Process chunks of audio
 46 |     this._chunkProcessor = () => {};
 47 |     this._chunkProcessorSize = void 0;
 48 |     this._chunkProcessorBuffer = {
 49 |       raw: new ArrayBuffer(0),
 50 |       mono: new ArrayBuffer(0),
 51 |     };
 52 |   }
 53 | 
 54 |   /**
 55 |    * Logs data in debug mode
 56 |    * @param {...any} arguments
 57 |    * @returns {true}
 58 |    */
 59 |   log() {
 60 |     if (this.debug) {
 61 |       this.log(...arguments);
 62 |     }
 63 |     return true;
 64 |   }
 65 | 
 66 |   /**
 67 |    * Retrieves the current sampleRate for the recorder
 68 |    * @returns {number}
 69 |    */
 70 |   getSampleRate() {
 71 |     return this.sampleRate;
 72 |   }
 73 | 
 74 |   /**
 75 |    * Retrieves the current status of the recording
 76 |    * @returns {"ended"|"paused"|"recording"}
 77 |    */
 78 |   getStatus() {
 79 |     if (!this.processor) {
 80 |       return "ended";
 81 |     } else if (!this.recording) {
 82 |       return "paused";
 83 |     } else {
 84 |       return "recording";
 85 |     }
 86 |   }
 87 | 
 88 |   /**
 89 |    * Sends an event to the AudioWorklet
 90 |    * @private
 91 |    * @param {string} name
 92 |    * @param {{[key: string]: any}} data
 93 |    * @param {AudioWorkletNode} [_processor]
 94 |    * @returns {Promise<{[key: string]: any}>}
 95 |    */
 96 |   async _event(name, data = {}, _processor = null) {
 97 |     _processor = _processor || this.processor;
 98 |     if (!_processor) {
 99 |       throw new Error("Can not send events without recording first");
100 |     }
101 |     const message = {
102 |       event: name,
103 |       id: this._lastEventId++,
104 |       data,
105 |     };
106 |     _processor.port.postMessage(message);
107 |     const t0 = new Date().valueOf();
108 |     while (!this.eventReceipts[message.id]) {
109 |       if (new Date().valueOf() - t0 > this.eventTimeout) {
110 |         throw new Error(`Timeout waiting for "${name}" event`);
111 |       }
112 |       await new Promise((res) => setTimeout(() => res(true), 1));
113 |     }
114 |     const payload = this.eventReceipts[message.id];
115 |     delete this.eventReceipts[message.id];
116 |     return payload;
117 |   }
118 | 
119 |   /**
120 |    * Begins a recording session for the given audioTrack
121 |    * Microphone recording indicator will appear on browser tab but status will be "paused"
122 |    * @param {MediaStreamTrack} [audioTrack] if no device provided, default device will be used
123 |    * @returns {Promise<true>}
124 |    */
125 |   async begin(audioTrack) {
126 |     if (this.processor) {
127 |       throw new Error(
128 |         `Already connected: please call .end() to start a new session`
129 |       );
130 |     }
131 | 
132 |     if (!audioTrack || audioTrack.kind !== "audio") {
133 |       throw new Error("No audio track provided");
134 |     }
135 | 
136 |     this.stream = new MediaStream([audioTrack]);
137 | 
138 |     const context = new AudioContext({ sampleRate: this.sampleRate });
139 |     const source = context.createMediaStreamSource(this.stream);
140 |     // Load and execute the module script.
141 |     try {
142 |       await context.audioWorklet.addModule(this.scriptSrc);
143 |     } catch (e) {
144 |       console.error(e);
145 |       throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
146 |     }
147 |     const processor = new AudioWorkletNode(context, "audio_processor");
148 |     processor.port.onmessage = (e) => {
149 |       const { event, id, data } = e.data;
150 |       if (event === "receipt") {
151 |         this.eventReceipts[id] = data;
152 |       } else if (event === "chunk") {
153 |         if (this._chunkProcessorSize) {
154 |           const buffer = this._chunkProcessorBuffer;
155 |           this._chunkProcessorBuffer = {
156 |             raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
157 |             mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
158 |           };
159 |           if (
160 |             this._chunkProcessorBuffer.mono.byteLength >=
161 |             this._chunkProcessorSize
162 |           ) {
163 |             this._chunkProcessor(this._chunkProcessorBuffer);
164 |             this._chunkProcessorBuffer = {
165 |               raw: new ArrayBuffer(0),
166 |               mono: new ArrayBuffer(0),
167 |             };
168 |           }
169 |         } else {
170 |           this._chunkProcessor(data);
171 |         }
172 |       }
173 |     };
174 | 
175 |     const node = source.connect(processor);
176 |     const analyser = context.createAnalyser();
177 |     analyser.fftSize = 8192;
178 |     analyser.smoothingTimeConstant = 0.1;
179 |     node.connect(analyser);
180 |     if (this.outputToSpeakers) {
181 |       // eslint-disable-next-line no-console
182 |       console.warn(
183 |         "Warning: Output to speakers may affect sound quality,\n" +
184 |           "especially due to system audio feedback preventative measures.\n" +
185 |           "use only for debugging"
186 |       );
187 |       analyser.connect(context.destination);
188 |     }
189 | 
190 |     this.source = source;
191 |     this.node = node;
192 |     this.analyser = analyser;
193 |     this.processor = processor;
194 |     return true;
195 |   }
196 | 
197 |   /**
198 |    * Gets the current frequency domain data from the recording track
199 |    * @param {"frequency"|"music"|"voice"} [analysisType]
200 |    * @param {number} [minDecibels] default -100
201 |    * @param {number} [maxDecibels] default -30
202 |    * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
203 |    */
204 |   getFrequencies(
205 |     analysisType = "frequency",
206 |     minDecibels = -100,
207 |     maxDecibels = -30
208 |   ) {
209 |     if (!this.processor) {
210 |       throw new Error("Session ended: please call .begin() first");
211 |     }
212 |     return AudioAnalysis.getFrequencies(
213 |       this.analyser,
214 |       this.sampleRate,
215 |       null,
216 |       analysisType,
217 |       minDecibels,
218 |       maxDecibels
219 |     );
220 |   }
221 | 
222 |   /**
223 |    * Pauses the recording
224 |    * Keeps microphone stream open but halts storage of audio
225 |    * @returns {Promise<true>}
226 |    */
227 |   async pause() {
228 |     if (!this.processor) {
229 |       throw new Error("Session ended: please call .begin() first");
230 |     } else if (!this.recording) {
231 |       throw new Error("Already paused: please call .record() first");
232 |     }
233 |     if (this._chunkProcessorBuffer.raw.byteLength) {
234 |       this._chunkProcessor(this._chunkProcessorBuffer);
235 |     }
236 |     this.log("Pausing ...");
237 |     await this._event("stop");
238 |     this.recording = false;
239 |     return true;
240 |   }
241 | 
242 |   /**
243 |    * Start recording stream and storing to memory from the connected audio source
244 |    * @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor]
245 |    * @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio
246 |    * @returns {Promise<true>}
247 |    */
248 |   async record(chunkProcessor = () => {}, chunkSize = 8192) {
249 |     if (!this.processor) {
250 |       throw new Error("Session ended: please call .begin() first");
251 |     } else if (this.recording) {
252 |       throw new Error("Already recording: HELLO please call .pause() first");
253 |     } else if (typeof chunkProcessor !== "function") {
254 |       throw new Error(`chunkProcessor must be a function`);
255 |     }
256 |     this._chunkProcessor = chunkProcessor;
257 |     this._chunkProcessorSize = chunkSize;
258 |     this._chunkProcessorBuffer = {
259 |       raw: new ArrayBuffer(0),
260 |       mono: new ArrayBuffer(0),
261 |     };
262 |     this.log("Recording ...");
263 |     await this._event("start");
264 |     this.recording = true;
265 |     return true;
266 |   }
267 | 
268 |   /**
269 |    * Clears the audio buffer, empties stored recording
270 |    * @returns {Promise<true>}
271 |    */
272 |   async clear() {
273 |     if (!this.processor) {
274 |       throw new Error("Session ended: please call .begin() first");
275 |     }
276 |     await this._event("clear");
277 |     return true;
278 |   }
279 | 
280 |   /**
281 |    * Reads the current audio stream data
282 |    * @returns {Promise<{meanValues: Float32Array, channels: Array<Float32Array>}>}
283 |    */
284 |   async read() {
285 |     if (!this.processor) {
286 |       throw new Error("Session ended: please call .begin() first");
287 |     }
288 |     this.log("Reading ...");
289 |     const result = await this._event("read");
290 |     return result;
291 |   }
292 | 
293 |   /**
294 |    * Saves the current audio stream to a file
295 |    * @param {boolean} [force] Force saving while still recording
296 |    * @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
297 |    */
298 |   async save(force = false) {
299 |     if (!this.processor) {
300 |       throw new Error("Session ended: please call .begin() first");
301 |     }
302 |     if (!force && this.recording) {
303 |       throw new Error(
304 |         "Currently recording: please call .pause() first, or call .save(true) to force"
305 |       );
306 |     }
307 |     this.log("Exporting ...");
308 |     const exportData = await this._event("export");
309 |     const packer = new WavPacker();
310 |     const result = packer.pack(this.sampleRate, exportData.audio);
311 |     return result;
312 |   }
313 | 
314 |   /**
315 |    * Ends the current recording session and saves the result
316 |    * @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
317 |    */
318 |   async end() {
319 |     if (!this.processor) {
320 |       throw new Error("Session ended: please call .begin() first");
321 |     }
322 | 
323 |     const _processor = this.processor;
324 | 
325 |     this.log("Stopping ...");
326 |     await this._event("stop");
327 |     this.recording = false;
328 | 
329 |     this.log("Exporting ...");
330 |     const exportData = await this._event("export", {}, _processor);
331 | 
332 |     this.processor.disconnect();
333 |     this.source.disconnect();
334 |     this.node.disconnect();
335 |     this.analyser.disconnect();
336 |     this.stream = null;
337 |     this.processor = null;
338 |     this.source = null;
339 |     this.node = null;
340 | 
341 |     const packer = new WavPacker();
342 |     const result = packer.pack(this.sampleRate, exportData.audio);
343 |     return result;
344 |   }
345 | 
346 |   /**
347 |    * Performs a full cleanup of WavRecorder instance
348 |    * Stops actively listening via microphone and removes existing listeners
349 |    * @returns {Promise<true>}
350 |    */
351 |   async quit() {
352 |     this.listenForDeviceChange(null);
353 |     if (this.processor) {
354 |       await this.end();
355 |     }
356 |     return true;
357 |   }
358 | }
359 | 
360 | globalThis.WavRecorder = WavRecorder;
361 | 


--------------------------------------------------------------------------------
/lib/wavtools/lib/wav_packer.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Raw wav audio file contents
  3 |  * @typedef {Object} WavPackerAudioType
  4 |  * @property {Blob} blob
  5 |  * @property {string} url
  6 |  * @property {number} channelCount
  7 |  * @property {number} sampleRate
  8 |  * @property {number} duration
  9 |  */
 10 | 
 11 | /**
 12 |  * Utility class for assembling PCM16 "audio/wav" data
 13 |  * @class
 14 |  */
 15 | export class WavPacker {
 16 |   /**
 17 |    * Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
 18 |    * @param {Float32Array} float32Array
 19 |    * @returns {ArrayBuffer}
 20 |    */
 21 |   static floatTo16BitPCM(float32Array) {
 22 |     const buffer = new ArrayBuffer(float32Array.length * 2);
 23 |     const view = new DataView(buffer);
 24 |     let offset = 0;
 25 |     for (let i = 0; i < float32Array.length; i++, offset += 2) {
 26 |       let s = Math.max(-1, Math.min(1, float32Array[i]));
 27 |       view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
 28 |     }
 29 |     return buffer;
 30 |   }
 31 | 
 32 |   /**
 33 |    * Concatenates two ArrayBuffers
 34 |    * @param {ArrayBuffer} leftBuffer
 35 |    * @param {ArrayBuffer} rightBuffer
 36 |    * @returns {ArrayBuffer}
 37 |    */
 38 |   static mergeBuffers(leftBuffer, rightBuffer) {
 39 |     const tmpArray = new Uint8Array(
 40 |       leftBuffer.byteLength + rightBuffer.byteLength
 41 |     );
 42 |     tmpArray.set(new Uint8Array(leftBuffer), 0);
 43 |     tmpArray.set(new Uint8Array(rightBuffer), leftBuffer.byteLength);
 44 |     return tmpArray.buffer;
 45 |   }
 46 | 
 47 |   /**
 48 |    * Packs data into an Int16 format
 49 |    * @private
 50 |    * @param {number} size 0 = 1x Int16, 1 = 2x Int16
 51 |    * @param {number} arg value to pack
 52 |    * @returns
 53 |    */
 54 |   _packData(size, arg) {
 55 |     return [
 56 |       new Uint8Array([arg, arg >> 8]),
 57 |       new Uint8Array([arg, arg >> 8, arg >> 16, arg >> 24]),
 58 |     ][size];
 59 |   }
 60 | 
 61 |   /**
 62 |    * Packs audio into "audio/wav" Blob
 63 |    * @param {number} sampleRate
 64 |    * @param {{bitsPerSample: number, channels: Array<Float32Array>, data: Int16Array}} audio
 65 |    * @returns {WavPackerAudioType}
 66 |    */
 67 |   pack(sampleRate, audio) {
 68 |     if (!audio?.bitsPerSample) {
 69 |       throw new Error(`Missing "bitsPerSample"`);
 70 |     } else if (!audio?.channels) {
 71 |       throw new Error(`Missing "channels"`);
 72 |     } else if (!audio?.data) {
 73 |       throw new Error(`Missing "data"`);
 74 |     }
 75 |     const { bitsPerSample, channels, data } = audio;
 76 |     const output = [
 77 |       // Header
 78 |       'RIFF',
 79 |       this._packData(
 80 |         1,
 81 |         4 + (8 + 24) /* chunk 1 length */ + (8 + 8) /* chunk 2 length */
 82 |       ), // Length
 83 |       'WAVE',
 84 |       // chunk 1
 85 |       'fmt ', // Sub-chunk identifier
 86 |       this._packData(1, 16), // Chunk length
 87 |       this._packData(0, 1), // Audio format (1 is linear quantization)
 88 |       this._packData(0, channels.length),
 89 |       this._packData(1, sampleRate),
 90 |       this._packData(1, (sampleRate * channels.length * bitsPerSample) / 8), // Byte rate
 91 |       this._packData(0, (channels.length * bitsPerSample) / 8),
 92 |       this._packData(0, bitsPerSample),
 93 |       // chunk 2
 94 |       'data', // Sub-chunk identifier
 95 |       this._packData(
 96 |         1,
 97 |         (channels[0].length * channels.length * bitsPerSample) / 8
 98 |       ), // Chunk length
 99 |       data,
100 |     ];
101 |     const blob = new Blob(output, { type: 'audio/mpeg' });
102 |     const url = URL.createObjectURL(blob);
103 |     return {
104 |       blob,
105 |       url,
106 |       channelCount: channels.length,
107 |       sampleRate,
108 |       duration: data.byteLength / (channels.length * sampleRate * 2),
109 |     };
110 |   }
111 | }
112 | 
113 | globalThis.WavPacker = WavPacker;
114 | 


--------------------------------------------------------------------------------
/lib/wavtools/lib/wav_recorder.js:
--------------------------------------------------------------------------------
  1 | import { AudioProcessorSrc } from './worklets/audio_processor.js';
  2 | import { AudioAnalysis } from './analysis/audio_analysis.js';
  3 | import { WavPacker } from './wav_packer.js';
  4 | 
  5 | /**
  6 |  * Decodes audio into a wav file
  7 |  * @typedef {Object} DecodedAudioType
  8 |  * @property {Blob} blob
  9 |  * @property {string} url
 10 |  * @property {Float32Array} values
 11 |  * @property {AudioBuffer} audioBuffer
 12 |  */
 13 | 
 14 | /**
 15 |  * Records live stream of user audio as PCM16 "audio/wav" data
 16 |  * @class
 17 |  */
 18 | export class WavRecorder {
 19 |   /**
 20 |    * Create a new WavRecorder instance
 21 |    * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
 22 |    * @returns {WavRecorder}
 23 |    */
 24 |   constructor({
 25 |     sampleRate = 44100,
 26 |     outputToSpeakers = false,
 27 |     debug = false,
 28 |   } = {}) {
 29 |     // Script source
 30 |     this.scriptSrc = AudioProcessorSrc;
 31 |     // Config
 32 |     this.sampleRate = sampleRate;
 33 |     this.outputToSpeakers = outputToSpeakers;
 34 |     this.debug = !!debug;
 35 |     this._deviceChangeCallback = null;
 36 |     this._devices = [];
 37 |     this.deviceSelection = null;
 38 |     // State variables
 39 |     this.stream = null;
 40 |     this.processor = null;
 41 |     this.source = null;
 42 |     this.node = null;
 43 |     this.recording = false;
 44 |     // Event handling with AudioWorklet
 45 |     this._lastEventId = 0;
 46 |     this.eventReceipts = {};
 47 |     this.eventTimeout = 5000;
 48 |     // Process chunks of audio
 49 |     this._chunkProcessor = () => {};
 50 |     this._chunkProcessorSize = void 0;
 51 |     this._chunkProcessorBuffer = {
 52 |       raw: new ArrayBuffer(0),
 53 |       mono: new ArrayBuffer(0),
 54 |     };
 55 |   }
 56 | 
 57 |   /**
 58 |    * Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer
 59 |    * @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData
 60 |    * @param {number} sampleRate
 61 |    * @param {number} fromSampleRate
 62 |    * @returns {Promise<DecodedAudioType>}
 63 |    */
 64 |   static async decode(audioData, sampleRate = 44100, fromSampleRate = -1) {
 65 |     const context = new AudioContext({ sampleRate });
 66 |     let arrayBuffer;
 67 |     let blob;
 68 |     if (audioData instanceof Blob) {
 69 |       if (fromSampleRate !== -1) {
 70 |         throw new Error(
 71 |           `Can not specify "fromSampleRate" when reading from Blob`
 72 |         );
 73 |       }
 74 |       blob = audioData;
 75 |       arrayBuffer = await blob.arrayBuffer();
 76 |     } else if (audioData instanceof ArrayBuffer) {
 77 |       if (fromSampleRate !== -1) {
 78 |         throw new Error(
 79 |           `Can not specify "fromSampleRate" when reading from ArrayBuffer`
 80 |         );
 81 |       }
 82 |       arrayBuffer = audioData;
 83 |       blob = new Blob([arrayBuffer], { type: 'audio/wav' });
 84 |     } else {
 85 |       let float32Array;
 86 |       let data;
 87 |       if (audioData instanceof Int16Array) {
 88 |         data = audioData;
 89 |         float32Array = new Float32Array(audioData.length);
 90 |         for (let i = 0; i < audioData.length; i++) {
 91 |           float32Array[i] = audioData[i] / 0x8000;
 92 |         }
 93 |       } else if (audioData instanceof Float32Array) {
 94 |         float32Array = audioData;
 95 |       } else if (audioData instanceof Array) {
 96 |         float32Array = new Float32Array(audioData);
 97 |       } else {
 98 |         throw new Error(
 99 |           `"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`
100 |         );
101 |       }
102 |       if (fromSampleRate === -1) {
103 |         throw new Error(
104 |           `Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`
105 |         );
106 |       } else if (fromSampleRate < 3000) {
107 |         throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`);
108 |       }
109 |       if (!data) {
110 |         data = WavPacker.floatTo16BitPCM(float32Array);
111 |       }
112 |       const audio = {
113 |         bitsPerSample: 16,
114 |         channels: [float32Array],
115 |         data,
116 |       };
117 |       const packer = new WavPacker();
118 |       const result = packer.pack(fromSampleRate, audio);
119 |       blob = result.blob;
120 |       arrayBuffer = await blob.arrayBuffer();
121 |     }
122 |     const audioBuffer = await context.decodeAudioData(arrayBuffer);
123 |     const values = audioBuffer.getChannelData(0);
124 |     const url = URL.createObjectURL(blob);
125 |     return {
126 |       blob,
127 |       url,
128 |       values,
129 |       audioBuffer,
130 |     };
131 |   }
132 | 
133 |   /**
134 |    * Logs data in debug mode
135 |    * @param {...any} arguments
136 |    * @returns {true}
137 |    */
138 |   log() {
139 |     if (this.debug) {
140 |       this.log(...arguments);
141 |     }
142 |     return true;
143 |   }
144 | 
145 |   /**
146 |    * Retrieves the current sampleRate for the recorder
147 |    * @returns {number}
148 |    */
149 |   getSampleRate() {
150 |     return this.sampleRate;
151 |   }
152 | 
153 |   /**
154 |    * Retrieves the current status of the recording
155 |    * @returns {"ended"|"paused"|"recording"}
156 |    */
157 |   getStatus() {
158 |     if (!this.processor) {
159 |       return 'ended';
160 |     } else if (!this.recording) {
161 |       return 'paused';
162 |     } else {
163 |       return 'recording';
164 |     }
165 |   }
166 | 
167 |   /**
168 |    * Sends an event to the AudioWorklet
169 |    * @private
170 |    * @param {string} name
171 |    * @param {{[key: string]: any}} data
172 |    * @param {AudioWorkletNode} [_processor]
173 |    * @returns {Promise<{[key: string]: any}>}
174 |    */
175 |   async _event(name, data = {}, _processor = null) {
176 |     _processor = _processor || this.processor;
177 |     if (!_processor) {
178 |       throw new Error('Can not send events without recording first');
179 |     }
180 |     const message = {
181 |       event: name,
182 |       id: this._lastEventId++,
183 |       data,
184 |     };
185 |     _processor.port.postMessage(message);
186 |     const t0 = new Date().valueOf();
187 |     while (!this.eventReceipts[message.id]) {
188 |       if (new Date().valueOf() - t0 > this.eventTimeout) {
189 |         throw new Error(`Timeout waiting for "${name}" event`);
190 |       }
191 |       await new Promise((res) => setTimeout(() => res(true), 1));
192 |     }
193 |     const payload = this.eventReceipts[message.id];
194 |     delete this.eventReceipts[message.id];
195 |     return payload;
196 |   }
197 | 
198 |   /**
199 |    * Sets device change callback, remove if callback provided is `null`
200 |    * @param {(Array<MediaDeviceInfo & {default: boolean}>): void|null} callback
201 |    * @returns {true}
202 |    */
203 |   listenForDeviceChange(callback) {
204 |     if (callback === null && this._deviceChangeCallback) {
205 |       navigator.mediaDevices.removeEventListener(
206 |         'devicechange',
207 |         this._deviceChangeCallback
208 |       );
209 |       this._deviceChangeCallback = null;
210 |     } else if (callback !== null) {
211 |       // Basically a debounce; we only want this called once when devices change
212 |       // And we only want the most recent callback() to be executed
213 |       // if a few are operating at the same time
214 |       let lastId = 0;
215 |       let lastDevices = [];
216 |       const serializeDevices = (devices) =>
217 |         devices
218 |           .map((d) => d.deviceId)
219 |           .sort()
220 |           .join(',');
221 |       const cb = async () => {
222 |         let id = ++lastId;
223 |         const devices = await this.listDevices();
224 |         if (id === lastId) {
225 |           if (serializeDevices(lastDevices) !== serializeDevices(devices)) {
226 |             lastDevices = devices;
227 |             callback(devices.slice());
228 |           }
229 |         }
230 |       };
231 |       navigator.mediaDevices.addEventListener('devicechange', cb);
232 |       cb();
233 |       this._deviceChangeCallback = cb;
234 |     }
235 |     return true;
236 |   }
237 | 
238 |   /**
239 |    * Manually request permission to use the microphone
240 |    * @returns {Promise<true>}
241 |    */
242 |   async requestPermission() {
243 |     const permissionStatus = await navigator.permissions.query({
244 |       name: 'microphone',
245 |     });
246 |     if (permissionStatus.state === 'denied') {
247 |       window.alert('You must grant microphone access to use this feature.');
248 |     } else if (permissionStatus.state === 'prompt') {
249 |       try {
250 |         const stream = await navigator.mediaDevices.getUserMedia({
251 |           audio: true,
252 |         });
253 |         const tracks = stream.getTracks();
254 |         tracks.forEach((track) => track.stop());
255 |       } catch (e) {
256 |         window.alert('You must grant microphone access to use this feature.');
257 |       }
258 |     }
259 |     return true;
260 |   }
261 | 
262 |   /**
263 |    * List all eligible devices for recording, will request permission to use microphone
264 |    * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
265 |    */
266 |   async listDevices() {
267 |     if (
268 |       !navigator.mediaDevices ||
269 |       !('enumerateDevices' in navigator.mediaDevices)
270 |     ) {
271 |       throw new Error('Could not request user devices');
272 |     }
273 |     await this.requestPermission();
274 |     const devices = await navigator.mediaDevices.enumerateDevices();
275 |     const audioDevices = devices.filter(
276 |       (device) => device.kind === 'audioinput'
277 |     );
278 |     return audioDevices;
279 |     // const defaultDeviceIndex = audioDevices.findIndex(
280 |     //   (device) => device.deviceId === 'default'
281 |     // );
282 |     // const deviceList = [];
283 |     // if (defaultDeviceIndex !== -1) {
284 |     //   let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0];
285 |     //   let existingIndex = audioDevices.findIndex(
286 |     //     (device) => device.groupId === defaultDevice.groupId
287 |     //   );
288 |     //   if (existingIndex !== -1) {
289 |     //     defaultDevice = audioDevices.splice(existingIndex, 1)[0];
290 |     //   }
291 |     //   defaultDevice.default = true;
292 |     //   deviceList.push(defaultDevice);
293 |     // }
294 |     // return deviceList.concat(audioDevices);
295 |   }
296 | 
297 |   /**
298 |    * Begins a recording session and requests microphone permissions if not already granted
299 |    * Microphone recording indicator will appear on browser tab but status will be "paused"
300 |    * @param {string} [deviceId] if no device provided, default device will be used
301 |    * @returns {Promise<true>}
302 |    */
303 |   async begin(deviceId) {
304 |     if (this.processor) {
305 |       throw new Error(
306 |         `Already connected: please call .end() to start a new session`
307 |       );
308 |     }
309 | 
310 |     if (
311 |       !navigator.mediaDevices ||
312 |       !('getUserMedia' in navigator.mediaDevices)
313 |     ) {
314 |       throw new Error('Could not request user media');
315 |     }
316 |     deviceId = deviceId ?? this.deviceSelection?.deviceId;
317 |     try {
318 |       const config = { audio: true };
319 |       if (deviceId) {
320 |         config.audio = { deviceId: { exact: deviceId } };
321 |       }
322 |       this.stream = await navigator.mediaDevices.getUserMedia(config);
323 |     } catch (err) {
324 |       throw new Error('Could not start media stream');
325 |     }
326 | 
327 |     this.listDevices().then((devices) => {
328 |       deviceId = this.stream.getAudioTracks()[0].getSettings().deviceId;
329 |       console.log(
330 |         'find current device',
331 |         devices,
332 |         deviceId,
333 |         this.stream.getAudioTracks()[0].getSettings()
334 |       );
335 |       this.deviceSelection = devices.find((d) => d.deviceId === deviceId);
336 |       console.log('current device', this.deviceSelection);
337 |     });
338 |     const context = new AudioContext({ sampleRate: this.sampleRate });
339 |     const source = context.createMediaStreamSource(this.stream);
340 |     // Load and execute the module script.
341 |     try {
342 |       await context.audioWorklet.addModule(this.scriptSrc);
343 |     } catch (e) {
344 |       console.error(e);
345 |       throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
346 |     }
347 |     const processor = new AudioWorkletNode(context, 'audio_processor');
348 |     processor.port.onmessage = (e) => {
349 |       const { event, id, data } = e.data;
350 |       if (event === 'receipt') {
351 |         this.eventReceipts[id] = data;
352 |       } else if (event === 'chunk') {
353 |         if (this._chunkProcessorSize) {
354 |           const buffer = this._chunkProcessorBuffer;
355 |           this._chunkProcessorBuffer = {
356 |             raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
357 |             mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
358 |           };
359 |           if (
360 |             this._chunkProcessorBuffer.mono.byteLength >=
361 |             this._chunkProcessorSize
362 |           ) {
363 |             this._chunkProcessor(this._chunkProcessorBuffer);
364 |             this._chunkProcessorBuffer = {
365 |               raw: new ArrayBuffer(0),
366 |               mono: new ArrayBuffer(0),
367 |             };
368 |           }
369 |         } else {
370 |           this._chunkProcessor(data);
371 |         }
372 |       }
373 |     };
374 | 
375 |     const node = source.connect(processor);
376 |     const analyser = context.createAnalyser();
377 |     analyser.fftSize = 8192;
378 |     analyser.smoothingTimeConstant = 0.1;
379 |     node.connect(analyser);
380 |     if (this.outputToSpeakers) {
381 |       // eslint-disable-next-line no-console
382 |       console.warn(
383 |         'Warning: Output to speakers may affect sound quality,\n' +
384 |           'especially due to system audio feedback preventative measures.\n' +
385 |           'use only for debugging'
386 |       );
387 |       analyser.connect(context.destination);
388 |     }
389 | 
390 |     this.source = source;
391 |     this.node = node;
392 |     this.analyser = analyser;
393 |     this.processor = processor;
394 |     console.log('begin completed');
395 |     return true;
396 |   }
397 | 
398 |   /**
399 |    * Gets the current frequency domain data from the recording track
400 |    * @param {"frequency"|"music"|"voice"} [analysisType]
401 |    * @param {number} [minDecibels] default -100
402 |    * @param {number} [maxDecibels] default -30
403 |    * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
404 |    */
405 |   getFrequencies(
406 |     analysisType = 'frequency',
407 |     minDecibels = -100,
408 |     maxDecibels = -30
409 |   ) {
410 |     if (!this.processor) {
411 |       throw new Error('Session ended: please call .begin() first');
412 |     }
413 |     return AudioAnalysis.getFrequencies(
414 |       this.analyser,
415 |       this.sampleRate,
416 |       null,
417 |       analysisType,
418 |       minDecibels,
419 |       maxDecibels
420 |     );
421 |   }
422 | 
423 |   /**
424 |    * Pauses the recording
425 |    * Keeps microphone stream open but halts storage of audio
426 |    * @returns {Promise<true>}
427 |    */
428 |   async pause() {
429 |     if (!this.processor) {
430 |       throw new Error('Session ended: please call .begin() first');
431 |     } else if (!this.recording) {
432 |       throw new Error('Already paused: please call .record() first');
433 |     }
434 |     if (this._chunkProcessorBuffer.raw.byteLength) {
435 |       this._chunkProcessor(this._chunkProcessorBuffer);
436 |     }
437 |     this.log('Pausing ...');
438 |     await this._event('stop');
439 |     this.recording = false;
440 |     return true;
441 |   }
442 | 
443 |   /**
444 |    * Start recording stream and storing to memory from the connected audio source
445 |    * @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor]
446 |    * @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio
447 |    * @returns {Promise<true>}
448 |    */
449 |   async record(chunkProcessor = () => {}, chunkSize = 8192) {
450 |     if (!this.processor) {
451 |       throw new Error('Session ended: please call .begin() first');
452 |     } else if (this.recording) {
453 |       throw new Error('Already recording: please call .pause() first');
454 |     } else if (typeof chunkProcessor !== 'function') {
455 |       throw new Error(`chunkProcessor must be a function`);
456 |     }
457 |     this._chunkProcessor = chunkProcessor;
458 |     this._chunkProcessorSize = chunkSize;
459 |     this._chunkProcessorBuffer = {
460 |       raw: new ArrayBuffer(0),
461 |       mono: new ArrayBuffer(0),
462 |     };
463 |     this.log('Recording ...');
464 |     await this._event('start');
465 |     this.recording = true;
466 |     return true;
467 |   }
468 | 
469 |   /**
470 |    * Clears the audio buffer, empties stored recording
471 |    * @returns {Promise<true>}
472 |    */
473 |   async clear() {
474 |     if (!this.processor) {
475 |       throw new Error('Session ended: please call .begin() first');
476 |     }
477 |     await this._event('clear');
478 |     return true;
479 |   }
480 | 
481 |   /**
482 |    * Reads the current audio stream data
483 |    * @returns {Promise<{meanValues: Float32Array, channels: Array<Float32Array>}>}
484 |    */
485 |   async read() {
486 |     if (!this.processor) {
487 |       throw new Error('Session ended: please call .begin() first');
488 |     }
489 |     this.log('Reading ...');
490 |     const result = await this._event('read');
491 |     return result;
492 |   }
493 | 
494 |   /**
495 |    * Saves the current audio stream to a file
496 |    * @param {boolean} [force] Force saving while still recording
497 |    * @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
498 |    */
499 |   async save(force = false) {
500 |     if (!this.processor) {
501 |       throw new Error('Session ended: please call .begin() first');
502 |     }
503 |     if (!force && this.recording) {
504 |       throw new Error(
505 |         'Currently recording: please call .pause() first, or call .save(true) to force'
506 |       );
507 |     }
508 |     this.log('Exporting ...');
509 |     const exportData = await this._event('export');
510 |     const packer = new WavPacker();
511 |     const result = packer.pack(this.sampleRate, exportData.audio);
512 |     return result;
513 |   }
514 | 
515 |   /**
516 |    * Ends the current recording session and saves the result
517 |    * @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
518 |    */
519 |   async end() {
520 |     if (!this.processor) {
521 |       throw new Error('Session ended: please call .begin() first');
522 |     }
523 | 
524 |     const _processor = this.processor;
525 | 
526 |     this.log('Stopping ...');
527 |     await this._event('stop');
528 |     this.recording = false;
529 |     const tracks = this.stream.getTracks();
530 |     tracks.forEach((track) => track.stop());
531 | 
532 |     this.log('Exporting ...');
533 |     const exportData = await this._event('export', {}, _processor);
534 | 
535 |     this.processor.disconnect();
536 |     this.source.disconnect();
537 |     this.node.disconnect();
538 |     this.analyser.disconnect();
539 |     this.stream = null;
540 |     this.processor = null;
541 |     this.source = null;
542 |     this.node = null;
543 | 
544 |     const packer = new WavPacker();
545 |     const result = packer.pack(this.sampleRate, exportData.audio);
546 |     return result;
547 |   }
548 | 
549 |   /**
550 |    * Performs a full cleanup of WavRecorder instance
551 |    * Stops actively listening via microphone and removes existing listeners
552 |    * @returns {Promise<true>}
553 |    */
554 |   async quit() {
555 |     this.listenForDeviceChange(null);
556 |     // we do not reset this on end so that selections persist across starts
557 |     this.deviceSelection = null;
558 |     if (this.processor) {
559 |       await this.end();
560 |     }
561 |     return true;
562 |   }
563 | }
564 | 
565 | globalThis.WavRecorder = WavRecorder;
566 | 


--------------------------------------------------------------------------------
/lib/wavtools/lib/wav_stream_player.js:
--------------------------------------------------------------------------------
  1 | import { StreamProcessorSrc } from "./worklets/stream_processor.js";
  2 | import { AudioAnalysis } from "./analysis/audio_analysis.js";
  3 | 
  4 | /**
  5 |  * Plays audio streams received in raw PCM16 chunks from the browser
  6 |  * @class
  7 |  */
  8 | export class WavStreamPlayer {
  9 |   /**
 10 |    * Creates a new WavStreamPlayer instance
 11 |    * @param {{sampleRate?: number}} options
 12 |    * @returns {WavStreamPlayer}
 13 |    */
 14 |   constructor({ sampleRate = 44100 } = {}) {
 15 |     this.scriptSrc = StreamProcessorSrc;
 16 |     this.sampleRate = sampleRate;
 17 |     this.context = null;
 18 |     this.stream = null;
 19 |     this.analyser = null;
 20 |     this.trackSampleOffsets = {};
 21 |     this.interruptedTrackIds = {};
 22 |   }
 23 | 
 24 |   /**
 25 |    * Connects the audio context and enables output to speakers
 26 |    * @returns {Promise<true>}
 27 |    */
 28 |   async connect() {
 29 |     this.context = new AudioContext({ sampleRate: this.sampleRate });
 30 |     if (this._speakerID) {
 31 |       this.context.setSinkId(this._speakerID);
 32 |     }
 33 |     if (this.context.state === "suspended") {
 34 |       await this.context.resume();
 35 |     }
 36 |     try {
 37 |       await this.context.audioWorklet.addModule(this.scriptSrc);
 38 |     } catch (e) {
 39 |       console.error(e);
 40 |       throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
 41 |     }
 42 |     const analyser = this.context.createAnalyser();
 43 |     analyser.fftSize = 8192;
 44 |     analyser.smoothingTimeConstant = 0.1;
 45 |     this.analyser = analyser;
 46 |     return true;
 47 |   }
 48 | 
 49 |   /**
 50 |    * Gets the current frequency domain data from the playing track
 51 |    * @param {"frequency"|"music"|"voice"} [analysisType]
 52 |    * @param {number} [minDecibels] default -100
 53 |    * @param {number} [maxDecibels] default -30
 54 |    * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
 55 |    */
 56 |   getFrequencies(
 57 |     analysisType = "frequency",
 58 |     minDecibels = -100,
 59 |     maxDecibels = -30
 60 |   ) {
 61 |     if (!this.analyser) {
 62 |       throw new Error("Not connected, please call .connect() first");
 63 |     }
 64 |     return AudioAnalysis.getFrequencies(
 65 |       this.analyser,
 66 |       this.sampleRate,
 67 |       null,
 68 |       analysisType,
 69 |       minDecibels,
 70 |       maxDecibels
 71 |     );
 72 |   }
 73 | 
 74 |   /**
 75 |    * @param {string} speaker deviceId
 76 |    */
 77 |   async updateSpeaker(speaker) {
 78 |     const _prevSpeaker = this._speakerID;
 79 |     this._speakerID = speaker;
 80 |     if (this.context) {
 81 |       try {
 82 |         if (speaker === "default") {
 83 |           await this.context.setSinkId();
 84 |         } else {
 85 |           await this.context.setSinkId(speaker);
 86 |         }
 87 |       } catch (e) {
 88 |         console.error(`Could not set sinkId to ${speaker}: ${e}`);
 89 |         this._speakerID = _prevSpeaker;
 90 |       }
 91 |     }
 92 |   }
 93 | 
 94 |   /**
 95 |    * Starts audio streaming
 96 |    * @private
 97 |    * @returns {Promise<true>}
 98 |    */
 99 |   _start() {
100 |     const streamNode = new AudioWorkletNode(this.context, "stream_processor");
101 |     streamNode.connect(this.context.destination);
102 |     streamNode.port.onmessage = (e) => {
103 |       const { event } = e.data;
104 |       if (event === "stop") {
105 |         streamNode.disconnect();
106 |         this.stream = null;
107 |       } else if (event === "offset") {
108 |         const { requestId, trackId, offset } = e.data;
109 |         const currentTime = offset / this.sampleRate;
110 |         this.trackSampleOffsets[requestId] = { trackId, offset, currentTime };
111 |       }
112 |     };
113 |     this.analyser.disconnect();
114 |     streamNode.connect(this.analyser);
115 |     this.stream = streamNode;
116 |     return true;
117 |   }
118 | 
119 |   /**
120 |    * Adds 16BitPCM data to the currently playing audio stream
121 |    * You can add chunks beyond the current play point and they will be queued for play
122 |    * @param {ArrayBuffer|Int16Array} arrayBuffer
123 |    * @param {string} [trackId]
124 |    * @returns {Int16Array}
125 |    */
126 |   add16BitPCM(arrayBuffer, trackId = "default") {
127 |     if (typeof trackId !== "string") {
128 |       throw new Error(`trackId must be a string`);
129 |     } else if (this.interruptedTrackIds[trackId]) {
130 |       return;
131 |     }
132 |     if (!this.stream) {
133 |       this._start();
134 |     }
135 |     let buffer;
136 |     if (arrayBuffer instanceof Int16Array) {
137 |       buffer = arrayBuffer;
138 |     } else if (arrayBuffer instanceof ArrayBuffer) {
139 |       buffer = new Int16Array(arrayBuffer);
140 |     } else {
141 |       throw new Error(`argument must be Int16Array or ArrayBuffer`);
142 |     }
143 |     this.stream.port.postMessage({ event: "write", buffer, trackId });
144 |     return buffer;
145 |   }
146 | 
147 |   /**
148 |    * Gets the offset (sample count) of the currently playing stream
149 |    * @param {boolean} [interrupt]
150 |    * @returns {{trackId: string|null, offset: number, currentTime: number}}
151 |    */
152 |   async getTrackSampleOffset(interrupt = false) {
153 |     if (!this.stream) {
154 |       return null;
155 |     }
156 |     const requestId = crypto.randomUUID();
157 |     this.stream.port.postMessage({
158 |       event: interrupt ? "interrupt" : "offset",
159 |       requestId,
160 |     });
161 |     let trackSampleOffset;
162 |     while (!trackSampleOffset) {
163 |       trackSampleOffset = this.trackSampleOffsets[requestId];
164 |       await new Promise((r) => setTimeout(() => r(), 1));
165 |     }
166 |     const { trackId } = trackSampleOffset;
167 |     if (interrupt && trackId) {
168 |       this.interruptedTrackIds[trackId] = true;
169 |     }
170 |     return trackSampleOffset;
171 |   }
172 | 
173 |   /**
174 |    * Strips the current stream and returns the sample offset of the audio
175 |    * @param {boolean} [interrupt]
176 |    * @returns {{trackId: string|null, offset: number, currentTime: number}}
177 |    */
178 |   async interrupt() {
179 |     return this.getTrackSampleOffset(true);
180 |   }
181 | }
182 | 
183 | globalThis.WavStreamPlayer = WavStreamPlayer;
184 | 


--------------------------------------------------------------------------------
/lib/wavtools/lib/worklets/audio_processor.js:
--------------------------------------------------------------------------------
  1 | const AudioProcessorWorklet = `
  2 | class AudioProcessor extends AudioWorkletProcessor {
  3 | 
  4 |   constructor() {
  5 |     super();
  6 |     this.port.onmessage = this.receive.bind(this);
  7 |     this.initialize();
  8 |   }
  9 | 
 10 |   initialize() {
 11 |     this.foundAudio = false;
 12 |     this.recording = false;
 13 |     this.chunks = [];
 14 |   }
 15 | 
 16 |   /**
 17 |    * Concatenates sampled chunks into channels
 18 |    * Format is chunk[Left[], Right[]]
 19 |    */
 20 |   readChannelData(chunks, channel = -1, maxChannels = 9) {
 21 |     let channelLimit;
 22 |     if (channel !== -1) {
 23 |       if (chunks[0] && chunks[0].length - 1 < channel) {
 24 |         throw new Error(
 25 |           \`Channel \${channel} out of range: max \${chunks[0].length}\`
 26 |         );
 27 |       }
 28 |       channelLimit = channel + 1;
 29 |     } else {
 30 |       channel = 0;
 31 |       channelLimit = Math.min(chunks[0] ? chunks[0].length : 1, maxChannels);
 32 |     }
 33 |     const channels = [];
 34 |     for (let n = channel; n < channelLimit; n++) {
 35 |       const length = chunks.reduce((sum, chunk) => {
 36 |         return sum + chunk[n].length;
 37 |       }, 0);
 38 |       const buffers = chunks.map((chunk) => chunk[n]);
 39 |       const result = new Float32Array(length);
 40 |       let offset = 0;
 41 |       for (let i = 0; i < buffers.length; i++) {
 42 |         result.set(buffers[i], offset);
 43 |         offset += buffers[i].length;
 44 |       }
 45 |       channels[n] = result;
 46 |     }
 47 |     return channels;
 48 |   }
 49 | 
 50 |   /**
 51 |    * Combines parallel audio data into correct format,
 52 |    * channels[Left[], Right[]] to float32Array[LRLRLRLR...]
 53 |    */
 54 |   formatAudioData(channels) {
 55 |     if (channels.length === 1) {
 56 |       // Simple case is only one channel
 57 |       const float32Array = channels[0].slice();
 58 |       const meanValues = channels[0].slice();
 59 |       return { float32Array, meanValues };
 60 |     } else {
 61 |       const float32Array = new Float32Array(
 62 |         channels[0].length * channels.length
 63 |       );
 64 |       const meanValues = new Float32Array(channels[0].length);
 65 |       for (let i = 0; i < channels[0].length; i++) {
 66 |         const offset = i * channels.length;
 67 |         let meanValue = 0;
 68 |         for (let n = 0; n < channels.length; n++) {
 69 |           float32Array[offset + n] = channels[n][i];
 70 |           meanValue += channels[n][i];
 71 |         }
 72 |         meanValues[i] = meanValue / channels.length;
 73 |       }
 74 |       return { float32Array, meanValues };
 75 |     }
 76 |   }
 77 | 
 78 |   /**
 79 |    * Converts 32-bit float data to 16-bit integers
 80 |    */
 81 |   floatTo16BitPCM(float32Array) {
 82 |     const buffer = new ArrayBuffer(float32Array.length * 2);
 83 |     const view = new DataView(buffer);
 84 |     let offset = 0;
 85 |     for (let i = 0; i < float32Array.length; i++, offset += 2) {
 86 |       let s = Math.max(-1, Math.min(1, float32Array[i]));
 87 |       view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
 88 |     }
 89 |     return buffer;
 90 |   }
 91 | 
 92 |   /**
 93 |    * Retrieves the most recent amplitude values from the audio stream
 94 |    * @param {number} channel
 95 |    */
 96 |   getValues(channel = -1) {
 97 |     const channels = this.readChannelData(this.chunks, channel);
 98 |     const { meanValues } = this.formatAudioData(channels);
 99 |     return { meanValues, channels };
100 |   }
101 | 
102 |   /**
103 |    * Exports chunks as an audio/wav file
104 |    */
105 |   export() {
106 |     const channels = this.readChannelData(this.chunks);
107 |     const { float32Array, meanValues } = this.formatAudioData(channels);
108 |     const audioData = this.floatTo16BitPCM(float32Array);
109 |     return {
110 |       meanValues: meanValues,
111 |       audio: {
112 |         bitsPerSample: 16,
113 |         channels: channels,
114 |         data: audioData,
115 |       },
116 |     };
117 |   }
118 | 
119 |   receive(e) {
120 |     const { event, id } = e.data;
121 |     let receiptData = {};
122 |     switch (event) {
123 |       case 'start':
124 |         this.recording = true;
125 |         break;
126 |       case 'stop':
127 |         this.recording = false;
128 |         break;
129 |       case 'clear':
130 |         this.initialize();
131 |         break;
132 |       case 'export':
133 |         receiptData = this.export();
134 |         break;
135 |       case 'read':
136 |         receiptData = this.getValues();
137 |         break;
138 |       default:
139 |         break;
140 |     }
141 |     // Always send back receipt
142 |     this.port.postMessage({ event: 'receipt', id, data: receiptData });
143 |   }
144 | 
145 |   sendChunk(chunk) {
146 |     const channels = this.readChannelData([chunk]);
147 |     const { float32Array, meanValues } = this.formatAudioData(channels);
148 |     const rawAudioData = this.floatTo16BitPCM(float32Array);
149 |     const monoAudioData = this.floatTo16BitPCM(meanValues);
150 |     this.port.postMessage({
151 |       event: 'chunk',
152 |       data: {
153 |         mono: monoAudioData,
154 |         raw: rawAudioData,
155 |       },
156 |     });
157 |   }
158 | 
159 |   process(inputList, outputList, parameters) {
160 |     // Copy input to output (e.g. speakers)
161 |     // Note that this creates choppy sounds with Mac products
162 |     const sourceLimit = Math.min(inputList.length, outputList.length);
163 |     for (let inputNum = 0; inputNum < sourceLimit; inputNum++) {
164 |       const input = inputList[inputNum];
165 |       const output = outputList[inputNum];
166 |       const channelCount = Math.min(input.length, output.length);
167 |       for (let channelNum = 0; channelNum < channelCount; channelNum++) {
168 |         input[channelNum].forEach((sample, i) => {
169 |           output[channelNum][i] = sample;
170 |         });
171 |       }
172 |     }
173 |     const inputs = inputList[0];
174 |     // There's latency at the beginning of a stream before recording starts
175 |     // Make sure we actually receive audio data before we start storing chunks
176 |     let sliceIndex = 0;
177 |     if (!this.foundAudio) {
178 |       for (const channel of inputs) {
179 |         sliceIndex = 0; // reset for each channel
180 |         if (this.foundAudio) {
181 |           break;
182 |         }
183 |         if (channel) {
184 |           for (const value of channel) {
185 |             if (value !== 0) {
186 |               // find only one non-zero entry in any channel
187 |               this.foundAudio = true;
188 |               break;
189 |             } else {
190 |               sliceIndex++;
191 |             }
192 |           }
193 |         }
194 |       }
195 |     }
196 |     if (inputs && inputs[0] && this.foundAudio && this.recording) {
197 |       // We need to copy the TypedArray, because the \`process\`
198 |       // internals will reuse the same buffer to hold each input
199 |       const chunk = inputs.map((input) => input.slice(sliceIndex));
200 |       this.chunks.push(chunk);
201 |       this.sendChunk(chunk);
202 |     }
203 |     return true;
204 |   }
205 | }
206 | 
207 | registerProcessor('audio_processor', AudioProcessor);
208 | `;
209 | 
210 | const script = new Blob([AudioProcessorWorklet], {
211 |   type: 'application/javascript',
212 | });
213 | const src = URL.createObjectURL(script);
214 | export const AudioProcessorSrc = src;
215 | 


--------------------------------------------------------------------------------
/lib/wavtools/lib/worklets/stream_processor.js:
--------------------------------------------------------------------------------
 1 | export const StreamProcessorWorklet = `
 2 | class StreamProcessor extends AudioWorkletProcessor {
 3 |   constructor() {
 4 |     super();
 5 |     this.hasStarted = false;
 6 |     this.hasInterrupted = false;
 7 |     this.outputBuffers = [];
 8 |     this.bufferLength = 128;
 9 |     this.write = { buffer: new Float32Array(this.bufferLength), trackId: null };
10 |     this.writeOffset = 0;
11 |     this.trackSampleOffsets = {};
12 |     this.port.onmessage = (event) => {
13 |       if (event.data) {
14 |         const payload = event.data;
15 |         if (payload.event === 'write') {
16 |           const int16Array = payload.buffer;
17 |           const float32Array = new Float32Array(int16Array.length);
18 |           for (let i = 0; i < int16Array.length; i++) {
19 |             float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32
20 |           }
21 |           this.writeData(float32Array, payload.trackId);
22 |         } else if (
23 |           payload.event === 'offset' ||
24 |           payload.event === 'interrupt'
25 |         ) {
26 |           const requestId = payload.requestId;
27 |           const trackId = this.write.trackId;
28 |           const offset = this.trackSampleOffsets[trackId] || 0;
29 |           this.port.postMessage({
30 |             event: 'offset',
31 |             requestId,
32 |             trackId,
33 |             offset,
34 |           });
35 |           if (payload.event === 'interrupt') {
36 |             this.hasInterrupted = true;
37 |           }
38 |         } else {
39 |           throw new Error(\`Unhandled event "\${payload.event}"\`);
40 |         }
41 |       }
42 |     };
43 |   }
44 | 
45 |   writeData(float32Array, trackId = null) {
46 |     let { buffer } = this.write;
47 |     let offset = this.writeOffset;
48 |     for (let i = 0; i < float32Array.length; i++) {
49 |       buffer[offset++] = float32Array[i];
50 |       if (offset >= buffer.length) {
51 |         this.outputBuffers.push(this.write);
52 |         this.write = { buffer: new Float32Array(this.bufferLength), trackId };
53 |         buffer = this.write.buffer;
54 |         offset = 0;
55 |       }
56 |     }
57 |     this.writeOffset = offset;
58 |     return true;
59 |   }
60 | 
61 |   process(inputs, outputs, parameters) {
62 |     const output = outputs[0];
63 |     const outputChannelData = output[0];
64 |     const outputBuffers = this.outputBuffers;
65 |     if (this.hasInterrupted) {
66 |       this.port.postMessage({ event: 'stop' });
67 |       return false;
68 |     } else if (outputBuffers.length) {
69 |       this.hasStarted = true;
70 |       const { buffer, trackId } = outputBuffers.shift();
71 |       for (let i = 0; i < outputChannelData.length; i++) {
72 |         outputChannelData[i] = buffer[i] || 0;
73 |       }
74 |       if (trackId) {
75 |         this.trackSampleOffsets[trackId] =
76 |           this.trackSampleOffsets[trackId] || 0;
77 |         this.trackSampleOffsets[trackId] += buffer.length;
78 |       }
79 |       return true;
80 |     } else if (this.hasStarted) {
81 |       this.port.postMessage({ event: 'stop' });
82 |       return false;
83 |     } else {
84 |       return true;
85 |     }
86 |   }
87 | }
88 | 
89 | registerProcessor('stream_processor', StreamProcessor);
90 | `;
91 | 
92 | const script = new Blob([StreamProcessorWorklet], {
93 |   type: 'application/javascript',
94 | });
95 | const src = URL.createObjectURL(script);
96 | export const StreamProcessorSrc = src;
97 | 


--------------------------------------------------------------------------------
/lib/websocket-utils/reconnectingWebSocket.ts:
--------------------------------------------------------------------------------
  1 | import { EventEmitter } from "events";
  2 | 
  3 | const readyStates = ["CONNECTING", "OPEN", "CLOSING", "CLOSED"];
  4 | const KEEP_ALIVE_INTERVAL = 5000;
  5 | const KEEP_ALIVE_TIMEOUT = 15000;
  6 | // client side code in soupSFU has a timeout of 15 seconds for command response
  7 | // 5 seconds seems reasonable that it provides roughly 3 retry attempts
  8 | const WEBSOCKET_CONNECTION_TIMEOUT = 150 * 1000;
  9 | const DEFAULT_RECONNECT_ATTEMPTS = 2;
 10 | const MAX_RECONNECT_ATTEMPTS = 10;
 11 | const DEFAULT_RECONNECT_INTERVAL = 1000;
 12 | const MAX_RECONNECT_INTERVAL = 30 * 1000;
 13 | const DEFAULT_RECONNECT_DECAY = 1.5;
 14 | 
 15 | const WEBSOCKET_TIMEOUT_CODE = 4100;
 16 | 
 17 | const SIG_CONNECTION_CANCELED = "SIG_CONNECTION_CANCELED";
 18 | const WEBSOCKET_ERROR = "WEBSOCKET_ERROR";
 19 | 
 20 | enum LOG_LEVEL {
 21 |   DEBUG,
 22 |   ERROR,
 23 |   INFO,
 24 |   WARN,
 25 | }
 26 | 
 27 | class rWebSocket {
 28 |   private _ws: WebSocket;
 29 |   _closedManually: boolean = false;
 30 |   _errored: boolean = false;
 31 |   _rejected: boolean = false;
 32 |   _timed_out: boolean = false;
 33 |   _initialConnectionOk: string | boolean = false;
 34 | 
 35 |   constructor(url: string, protocols?: string | string[]) {
 36 |     this._ws = new WebSocket(url, protocols);
 37 |   }
 38 | 
 39 |   addEventListener(
 40 |     type: string,
 41 |     listener: (this: WebSocket, ev: Event) => any,
 42 |   ) {
 43 |     this._ws.addEventListener(type, listener);
 44 |   }
 45 | 
 46 |   // Add other WebSocket methods as needed
 47 |   close(code?: number, reason?: string) {
 48 |     this._ws.close(code, reason);
 49 |   }
 50 | 
 51 |   send(data: string | ArrayBuffer | Blob | ArrayBufferView) {
 52 |     this._ws.send(data);
 53 |   }
 54 | 
 55 |   // Add getters for WebSocket properties
 56 |   get url() {
 57 |     return this._ws.url;
 58 |   }
 59 | 
 60 |   get readyState() {
 61 |     return this._ws.readyState;
 62 |   }
 63 | }
 64 | 
 65 | interface WebSocketOptions {
 66 |   parseBlobToJson?: boolean;
 67 | }
 68 | 
 69 | /**
 70 |  * Builds on top of Javascript Websockets
 71 |  *
 72 |  * This behaves like the Websocket library in every way, except if it fails to
 73 |  * connect or if it gets disconnected, it will try to reconnect depending on
 74 |  * the maximum number of reconnect attempts set. retry is not enabled for initial
 75 |  * connection. When initial connection fails it is best to check yourself before
 76 |  * you keep wreckin' yourself.
 77 |  *
 78 |  * It is API compatible, so when you have:
 79 |  *   ws = new WebSocket('ws://....');
 80 |  * you can replace with:
 81 |  *   ws = new ReconnectingWebSocket('ws://....');
 82 |  *
 83 |  * While it is API compatible with the NodeJS ws library, we provide the
 84 |  * following additional properties and events on the ReconnectingWebSocket.
 85 |  *
 86 |  * Events:
 87 |  *
 88 |  * connection-timeout
 89 |  * - Emitted when the web socket connection times out.
 90 |  *
 91 |  * reconnecting
 92 |  * - Emitted after a manual close of the web socket is done and before retrying
 93 |  *   the connection.
 94 |  *
 95 |  * reconnect-failed
 96 |  * - Emitted when the number of connection attempts exceeds the set number of
 97 |  *   reconnection attempts.
 98 |  *
 99 |  * keep-alive
100 |  * - Emitted when the set keep alive interval elapses. This event may be used
101 |  *   to have ping pong keep-alive mechanism for web socket health.
102 |  *
103 |  * Properties:
104 |  *
105 |  * keepAliveTimeout
106 |  * - The timeout for keep-alive. Default: 15000
107 |  *
108 |  * keepAliveInterval
109 |  * - The interval at which to emit keep-alive event. Default: 5000
110 |  *
111 |  * shouldRetryFn
112 |  * - A callback function which should return boolean to determine if a web
113 |  *   socket reconnection attempt should be made. When not set, connection is
114 |  *   always retried.
115 |  *
116 |  * connectionTimeout
117 |  * - The timeout interval for considering whether the connection timed out.
118 |  *   Default: 20000 ms
119 |  *
120 |  * maxReconnectAttempts
121 |  * - The maximum number of attempts to be made for reconnection. Default: 2
122 |  *
123 |  * reconnectInterval
124 |  * - The interval to wait before attempting a reconnection. Default: 1000 ms
125 |  */
126 | export class ReconnectingWebSocket extends EventEmitter {
127 |   /** The connection is not yet open. */
128 |   static readonly CONNECTING: 0;
129 |   /** The connection is open and ready to communicate. */
130 |   static readonly OPEN: 1;
131 |   /** The connection is in the process of closing. */
132 |   static readonly CLOSING: 2;
133 |   /** The connection is closed. */
134 |   static readonly CLOSED: 3;
135 | 
136 |   private _ws: rWebSocket | null;
137 | 
138 |   _url: string;
139 |   _protocols: string | string[] | undefined;
140 | 
141 |   declare private _keepAliveTimeout: number;
142 |   declare private _keepAliveInterval: number;
143 |   declare private _lastMsgRecvTime: number;
144 |   declare private _lastMsgSendTime: number;
145 |   declare private _disconnected: boolean;
146 |   declare private _keepIntervalID: NodeJS.Timeout | null;
147 |   declare private _connectionTimeout: number;
148 |   declare private _connectionTimeoutID: NodeJS.Timeout | undefined;
149 |   declare private _reconnectTimeoutID: NodeJS.Timeout | undefined;
150 |   declare private _shouldRetryFn: (() => boolean) | null;
151 |   declare private _reconnectAttempts: number;
152 |   declare private _allowedReconnectAttempts: number;
153 |   declare private _reconnectInterval: number;
154 |   declare private _maxReconnectInterval: number;
155 |   declare private _reconnectDecay: number;
156 |   declare private _parseBlobToJson: boolean;
157 | 
158 |   constructor(
159 |     address: string,
160 |     protocols?: string | string[],
161 |     options: WebSocketOptions = {},
162 |   ) {
163 |     super();
164 | 
165 |     if (!address) {
166 |       throw new Error("Need a valid WebSocket URL");
167 |     }
168 | 
169 |     this._ws = null;
170 | 
171 |     this._url = address;
172 |     this._protocols = protocols;
173 |     this._parseBlobToJson = options?.parseBlobToJson ?? true;
174 | 
175 |     this.init();
176 |   }
177 | 
178 |   private init() {
179 |     this._keepAliveTimeout = KEEP_ALIVE_TIMEOUT;
180 |     this._keepAliveInterval = KEEP_ALIVE_INTERVAL;
181 |     this._disconnected = false;
182 |     this._keepIntervalID = null;
183 |     this._shouldRetryFn = null;
184 |     this._connectionTimeout = WEBSOCKET_CONNECTION_TIMEOUT;
185 |     this._reconnectAttempts = 0;
186 |     this._allowedReconnectAttempts = DEFAULT_RECONNECT_ATTEMPTS;
187 |     this._reconnectInterval = DEFAULT_RECONNECT_INTERVAL;
188 |     this._maxReconnectInterval = MAX_RECONNECT_INTERVAL;
189 |     this._reconnectDecay = DEFAULT_RECONNECT_DECAY;
190 |   }
191 | 
192 |   public async connect() {
193 |     return new Promise((resolve, reject) => {
194 |       this._disconnected = false;
195 |       this.clearReconnectTimeout();
196 | 
197 |       let ws: rWebSocket = new rWebSocket(this._url, this._protocols);
198 |       this.setConnectionTimeout();
199 | 
200 |       ws.addEventListener("close", (evt) => {
201 |         const closeEvent = evt as CloseEvent;
202 |         let code = ws._timed_out ? WEBSOCKET_TIMEOUT_CODE : closeEvent.code;
203 |         let reason = ws._timed_out
204 |           ? "websocket connection timed out"
205 |           : closeEvent.reason;
206 |         ws._timed_out = false;
207 |         if (!ws._closedManually && ws._initialConnectionOk) {
208 |           console.warn(
209 |             `signaling socket closed unexpectedly: ${code}${
210 |               reason ? " " + reason : ""
211 |             }`,
212 |           );
213 |           this._closeSocket();
214 |           this.emit("close", code, reason);
215 |         } else {
216 |           this.log("signaling socket closed");
217 |         }
218 |         if (!ws._closedManually && (ws._errored || ws._timed_out)) {
219 |           console.warn(
220 |             `signaling socket closed on error: ${code}${
221 |               reason ? " " + reason : ""
222 |             }`,
223 |           );
224 |           if (!ws._rejected) {
225 |             ws._rejected = true;
226 |             const err = new Error(
227 |               `WebSocket connection error (${code}): ${reason}`,
228 |             );
229 |             err.name = WEBSOCKET_ERROR;
230 |             reject(err);
231 |           }
232 |         }
233 |       });
234 |       ws.addEventListener("open", (evt) => {
235 |         this.log("wss connection opened to", LOG_LEVEL.DEBUG, this._url);
236 |         this.clearConnectionTimeout();
237 |         // now that the timeout closes the socket, in theory this onopen
238 |         // callback should never happen in the first place, but seems
239 |         // harmless to leave these safeguards in
240 |         if (ws._rejected || ws._timed_out) {
241 |           return;
242 |         }
243 |         if (ws._closedManually || (this._ws && this._ws !== ws)) {
244 |           ws._rejected = true;
245 |           ws.close();
246 |           let err = Error(
247 |             "wss connection interrupted by disconnect or newer connection",
248 |           );
249 |           err.name = SIG_CONNECTION_CANCELED;
250 |           reject(err);
251 |           return;
252 |         }
253 |         ws._initialConnectionOk = this._url;
254 |         this._lastMsgRecvTime = Date.now();
255 |         if (this._keepAliveInterval) {
256 |           this._keepIntervalID = setInterval(
257 |             () => this.checkSocketHealthAndSendKeepAlive(),
258 |             this._keepAliveInterval,
259 |           );
260 |         }
261 |         this._ws = ws;
262 |         this.emit("open");
263 |         resolve(ws);
264 |       });
265 |       ws.addEventListener("error", (evt) => {
266 |         // fyi: evt is an Event here, with 0 amount of helpful info. If there
267 |         //   happens to be info about the error, it's included in the
268 |         //   accompanying close event (because that make sense. shakes head)
269 |         //   SO. We do not reject here. Instead, we just set the _errored
270 |         //   flag on the socket so when the close event occurs, it knows to
271 |         //   reject the promise
272 |         if (!ws._closedManually) {
273 |           const wsTarget = evt.currentTarget as WebSocket;
274 |           this.log(`websocket error event: ${wsTarget?.url}`);
275 |         }
276 |         ws._errored = true;
277 |       });
278 |       ws.addEventListener("message", (msg) => {
279 |         void this._handleMessage(msg as MessageEvent);
280 |       });
281 |     });
282 |   }
283 | 
284 |   private setConnectionTimeout() {
285 |     this._connectionTimeoutID = setTimeout(async () => {
286 |       this.log("Connection reconnect attempt timed out.");
287 |       this.emit("connection-timeout");
288 |       this.clearConnectionTimeout();
289 |       await this._closeSocket();
290 |     }, this._connectionTimeout);
291 |   }
292 | 
293 |   private clearConnectionTimeout() {
294 |     clearTimeout(this._connectionTimeoutID);
295 |     this._connectionTimeoutID = undefined;
296 |   }
297 | 
298 |   private clearReconnectTimeout() {
299 |     clearTimeout(this._reconnectTimeoutID);
300 |     this._reconnectTimeoutID = undefined;
301 |   }
302 | 
303 |   private clearKeepAliveInterval() {
304 |     if (this._keepIntervalID) {
305 |       clearInterval(this._keepIntervalID);
306 |       this._keepIntervalID = null;
307 |     }
308 |   }
309 | 
310 |   private async checkSocketHealthAndSendKeepAlive() {
311 |     if (!(this._ws && this._ws.readyState === WebSocket.OPEN)) {
312 |       return;
313 |     }
314 | 
315 |     if (!this._keepAliveTimeout || !this._keepAliveInterval) {
316 |       return;
317 |     }
318 | 
319 |     // See if we haven't gotten a message back recently, and if we
320 |     // haven't, close the socket. the os timeouts to detect if a socket
321 |     // has gone stale are longer than we want.
322 |     if (Date.now() - this._lastMsgRecvTime > this._keepAliveTimeout) {
323 |       this.log("Connection is stale, need to reconnect", LOG_LEVEL.WARN);
324 |       await this._closeSocket();
325 |       return;
326 |     }
327 | 
328 |     // Only emit the keep-alive event if we haven't sent anything else recently
329 |     if (Date.now() - this._lastMsgSendTime < this._keepAliveInterval) {
330 |       return;
331 |     }
332 | 
333 |     this.log("Emitting keep-alive", LOG_LEVEL.DEBUG);
334 |     this.emit("keep-alive");
335 |   }
336 | 
337 |   // We use the word manually here to imply the application using this code
338 |   // or this code itself will decide to close the socket.
339 |   private async _closeSocket() {
340 |     this.log("Closing");
341 |     try {
342 |       this.clearKeepAliveInterval();
343 |       this._lastMsgRecvTime = 0;
344 | 
345 |       if (this._ws) {
346 |         this._ws._closedManually = true;
347 |         this._ws.close();
348 |       }
349 | 
350 |       // query retry function if we want to retry.
351 |       const shouldRetry =
352 |         this._ws?._initialConnectionOk &&
353 |         this._shouldRetryFn &&
354 |         this._shouldRetryFn();
355 | 
356 |       this._ws = null;
357 | 
358 |       if (shouldRetry) {
359 |         this.log("Emitting reconnect", LOG_LEVEL.DEBUG);
360 |         this.emit("reconnecting");
361 |         await this.retryFailedConnection();
362 |       }
363 |     } catch (error) {
364 |       this.log(`Error while closing and retrying: ${error}`, LOG_LEVEL.ERROR);
365 |     }
366 |   }
367 | 
368 |   private async retryFailedConnection() {
369 |     if (this._reconnectAttempts < this._allowedReconnectAttempts) {
370 |       if (this._reconnectTimeoutID) {
371 |         this.log("Retry already scheduled");
372 |         return;
373 |       }
374 |       this.log("Retrying failed connection");
375 |       let timeout =
376 |         // The timeout logic is taken from
377 |         // https://github.com/joewalnes/reconnecting-websocket
378 |         this._reconnectInterval *
379 |         Math.pow(this._reconnectDecay, this._reconnectAttempts);
380 |       timeout =
381 |         timeout > this._maxReconnectInterval
382 |           ? this._maxReconnectInterval
383 |           : timeout;
384 |       this.log(`Reconnecting in ${timeout / 1000} seconds`);
385 | 
386 |       this._reconnectAttempts += 1;
387 |       this._reconnectTimeoutID = setTimeout(() => this.connect(), timeout);
388 |     } else {
389 |       this.log("Maximum connection retry attempts exceeded", LOG_LEVEL.ERROR);
390 |       this.emit("reconnect-failed");
391 |     }
392 |   }
393 | 
394 |   private log(
395 |     msg: string,
396 |     log_level: LOG_LEVEL = LOG_LEVEL.DEBUG,
397 |     ...args: any
398 |   ) {
399 |     switch (log_level) {
400 |       case LOG_LEVEL.DEBUG:
401 |         console.debug(`websocket: ${msg}`, ...args);
402 |         break;
403 |       case LOG_LEVEL.ERROR:
404 |         console.error(`websocket: ${msg}`, ...args);
405 |         break;
406 |       case LOG_LEVEL.WARN:
407 |         console.warn(`websocket: ${msg}`, ...args);
408 |         break;
409 |       case LOG_LEVEL.INFO:
410 |       default:
411 |         console.log(`websocket: ${msg}`, ...args);
412 |         break;
413 |     }
414 |   }
415 | 
416 |   async send(data: any) {
417 |     try {
418 |       if (this._ws && this._ws.readyState === WebSocket.OPEN) {
419 |         this._lastMsgSendTime = Date.now();
420 |         this._ws.send(data);
421 |       } else {
422 |         this.log(`Failed to send data, web socket not open.`, LOG_LEVEL.ERROR);
423 |       }
424 |     } catch (error) {
425 |       this.log(`Failed to send data. ${error}`, LOG_LEVEL.ERROR);
426 |     }
427 |   }
428 | 
429 |   async close() {
430 |     try {
431 |       this.log("Closing websocket");
432 |       this._disconnected = true;
433 |       this.clearReconnectTimeout();
434 |       this._closeSocket();
435 |     } catch (error) {
436 |       this.log(`Failed to close websocket. ${error}`);
437 |     }
438 |   }
439 | 
440 |   get readyState(): number {
441 |     return this._ws?.readyState ?? WebSocket.CLOSED;
442 |   }
443 | 
444 |   get url(): string {
445 |     return this._url;
446 |   }
447 | 
448 |   get keepAliveTimeout(): number {
449 |     return this._keepAliveTimeout;
450 |   }
451 | 
452 |   set keepAliveTimeout(keepAliveTimeout: number) {
453 |     if (typeof keepAliveTimeout === "number") {
454 |       this.log(`Setting ACK freshness timeout to ${keepAliveTimeout}`);
455 |       this._keepAliveTimeout = keepAliveTimeout;
456 |     }
457 |   }
458 | 
459 |   get keepAliveInterval(): number {
460 |     return this._keepAliveInterval;
461 |   }
462 | 
463 |   set keepAliveInterval(keepAliveInterval: number) {
464 |     if (typeof keepAliveInterval === "number") {
465 |       this.log(`Setting keep-alive interval to ${keepAliveInterval}`);
466 |       this._keepAliveInterval = keepAliveInterval;
467 |     }
468 |   }
469 | 
470 |   set shouldRetryFn(cb: () => boolean) {
471 |     if (typeof cb === "function") {
472 |       this._shouldRetryFn = cb;
473 |     }
474 |   }
475 | 
476 |   get connectionTimeout(): number {
477 |     return this._connectionTimeout;
478 |   }
479 | 
480 |   set connectionTimeout(timeout: number) {
481 |     if (typeof timeout === "number") {
482 |       this._connectionTimeout = timeout;
483 |     }
484 |   }
485 | 
486 |   get maxReconnectAttempts(): number {
487 |     return this._allowedReconnectAttempts;
488 |   }
489 | 
490 |   set maxReconnectAttempts(attempts: number) {
491 |     if (attempts > 0 && attempts < MAX_RECONNECT_ATTEMPTS) {
492 |       this.log(`Setting maximum connection retry attempts to ${attempts}`);
493 |       this._allowedReconnectAttempts = attempts;
494 |     } else {
495 |       this._allowedReconnectAttempts = DEFAULT_RECONNECT_ATTEMPTS;
496 |     }
497 |   }
498 | 
499 |   get reconnectInterval(): number {
500 |     return this._reconnectInterval;
501 |   }
502 | 
503 |   set reconnectInterval(interval: number) {
504 |     if (typeof interval === "number") {
505 |       this._reconnectInterval =
506 |         interval < this._maxReconnectInterval
507 |           ? interval
508 |           : this._maxReconnectInterval;
509 |     }
510 |   }
511 | 
512 |   async _handleMessage(event: MessageEvent) {
513 |     this._lastMsgRecvTime = Date.now();
514 |     const data = event.data;
515 | 
516 |     const _parsePromise = new Promise((resolve, reject) => {
517 |       if (typeof data === "string") {
518 |         // Handle text message
519 |         resolve(data);
520 |       } else if (data instanceof ArrayBuffer) {
521 |         // Handle binary message
522 |         const arrayBuffer = data;
523 |         // Parse the ArrayBuffer as needed
524 |         // Example: Convert ArrayBuffer to Uint8Array
525 |         resolve(new Uint8Array(arrayBuffer));
526 |         // Process the Uint8Array as needed
527 |       } else if (data instanceof Blob) {
528 |         if (!this._parseBlobToJson) {
529 |           resolve(data);
530 |           return;
531 |         }
532 |         // Handle Blob message
533 |         const blob = data;
534 |         // Convert Blob to ArrayBuffer
535 |         const reader = new FileReader();
536 |         reader.onload = () => {
537 |           const text = reader.result as string;
538 |           try {
539 |             const json = JSON.parse(text);
540 |             resolve(json);
541 |           } catch (e) {
542 |             console.error("Failed to parse JSON from Blob:", e);
543 |           }
544 |         };
545 |         reader.readAsText(blob);
546 |       }
547 |     });
548 | 
549 |     let msg = await _parsePromise;
550 | 
551 |     this.emit("message", msg);
552 |   }
553 | }
554 | 
555 | [
556 |   "binaryType",
557 |   "bufferedAmount",
558 |   "extensions",
559 |   "protocol",
560 |   "readyState",
561 |   "url",
562 |   "keepAliveTimeout",
563 |   "keepAliveInterval",
564 |   "shouldRetryFn",
565 |   "connectionTimeout",
566 |   "maxReconnectAttempts",
567 |   "reconnectInterval",
568 | ].forEach((property) => {
569 |   Object.defineProperty(ReconnectingWebSocket.prototype, property, {
570 |     enumerable: true,
571 |   });
572 | });
573 | 
574 | ["CONNECTING", "OPEN", "CLOSING", "CLOSED"].forEach((property) => {
575 |   Object.defineProperty(ReconnectingWebSocket.prototype, property, {
576 |     enumerable: true,
577 |     value: readyStates.indexOf(property),
578 |   });
579 | });
580 | 
581 | ["CONNECTING", "OPEN", "CLOSING", "CLOSED"].forEach((property) => {
582 |   Object.defineProperty(ReconnectingWebSocket, property, {
583 |     enumerable: true,
584 |     value: readyStates.indexOf(property),
585 |   });
586 | });
587 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "private": true,
 3 |   "name": "pipecat-client-web-transports",
 4 |   "version": "0.0.0",
 5 |   "workspaces": [
 6 |     "transports/*"
 7 |   ],
 8 |   "scripts": {
 9 |     "build": "npm run build --workspaces"
10 |   },
11 |   "devDependencies": {
12 |     "@parcel/packager-ts": "^2.13.2",
13 |     "@parcel/transformer-typescript-tsc": "^2.13.2",
14 |     "@parcel/transformer-typescript-types": "^2.13.2",
15 |     "@parcel/validator-typescript": "^2.12.0",
16 |     "@swc/helpers": "^0.5.13",
17 |     "parcel": "^2.13.2",
18 |     "prettier": "^3.5.3",
19 |     "typescript": "^5.5.4"
20 |   },
21 |   "peerDependencies": {
22 |     "@daily-co/daily-js": "^0.77.0"
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/transports/daily/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to **Pipecat Daily WebRTC Transport** will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 7 | 
 8 | ## [0.4.0]
 9 | 
10 | - Bumped dependency to @pipecat-ai/client-js@~0.4.0
11 | 
12 | ## [0.3.10]
13 | 
14 | - Fix an issue where iOS devices have ~500ms of audio cut off after declaring
15 |   that the track state is playable.
16 | 
17 | ## [0.3.9]
18 | 
19 | DO NOT USE
20 | 
21 | ## [0.3.8]
22 | 
23 | - Fix issue resulting in the camera starting despite enableCam setting.
24 | 
25 | ## [0.3.7]
26 | 
27 | - Added support for disconnecting the client if the Daily call errors out.
28 | 
29 | ## [0.3.6]
30 | 
31 | ### Fixed
32 | 
33 | - Fixed an issue where the transport could call `clientReady()` multiple times,
34 |   once for each `track-started` event. Now, `clientReady()` is called for the
35 |   first track only.
36 | 
37 | - Added support for buffering audio until the bot is ready using the
38 |   `bufferLocalAudioUntilBotReady` property. Once the bot is ready, the buffered
39 |   audio will be sent, allowing the user to begin speaking before the bot has
40 |   joined the call.
41 | 
42 | ## [0.3.4] - 2024-12-16
43 | 
44 | ### Added
45 | 
46 | - Screen sharing support
47 |   - Added `startScreenShare` and `stopScreenShare` methods
48 |   - Added `isSharingScreen` getter property
49 | 
50 | ## [0.3.3] - 2024-12-11
51 | 
52 | - Fixed READMEs
53 | 
54 | ## [0.3.2] - 2024-12-11
55 | 
56 | - Added new abstract `RealtimeWebsocketTransport` class for direct
57 |   voice-to-voice transports
58 | 
59 | - Added new `GeminiLiveWebsocketTransport`
60 | 
61 | - Added [basic example](./examples/geminiMultiModalLive) for using
62 |   `GeminiLiveWebsocketTransport`
63 | 
64 | ## [0.2.3] - 2024-12-06
65 | 
66 | ### Fixed
67 | 
68 | - Added missing event support for managing audio speakers
69 | 
70 | ## [0.2.2] - 2024-11-12
71 | 
72 | ### Added
73 | 
74 | - Implemented log levels as part of `realtime-ai` package.
75 | 
76 | ## [0.2.1] - 2024-10-28
77 | 
78 | - Version bump to align with core `realtime-ai` package.
79 | 


--------------------------------------------------------------------------------
/transports/daily/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2024, Daily
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/transports/daily/README.md:
--------------------------------------------------------------------------------
 1 | # Pipecat's Real-Time Voice Inference - Daily Transport
 2 | 
 3 | [![Docs](https://img.shields.io/badge/documentation-blue)](https://docs.pipecat.ai/client/js/transports/daily)
 4 | ![NPM Version](https://img.shields.io/npm/v/@pipecat-ai/daily-transport)
 5 | [![Demo](https://img.shields.io/badge/Demo-coral)](https://github.com/pipecat-ai/pipecat/tree/main/examples/simple-chatbot)
 6 | 
 7 | Daily transport package for use with `@pipecat-ai/client-js`.
 8 | 
 9 | ## Installation
10 | 
11 | ```bash copy
12 | npm install \
13 | @pipecat-ai/client-js \
14 | @pipecat-ai/daily-transport
15 | ```
16 | 
17 | ## Overview
18 | 
19 | The DailyTransport class provides a WebRTC transport layer using [Daily.co's](https://daily.co) infrastructure. It handles audio/video device management, WebRTC connections, and real-time communication between clients and bots.
20 | 
21 | ## Features
22 | 
23 | - 🎥 Complete camera device management
24 | - 🎤 Microphone input handling
25 | - 🔊 Speaker output control
26 | - 📡 WebRTC connection management
27 | - 🤖 Bot participant tracking
28 | - 📊 Audio level monitoring
29 | - 💬 Real-time messaging
30 |   
31 | ## Usage
32 | 
33 | ### Basic Setup
34 | 
35 | ```javascript
36 | import { RTVIClient } from "@pipecat-ai/client-js";
37 | import { DailyTransport } from "@pipecat-ai/daily-transport";
38 | 
39 | const transport = new DailyTransport({
40 |   dailyFactoryOptions: {
41 |     // Daily.co specific configuration
42 |   }
43 | });
44 | 
45 | const rtviClient = new RTVIClient({
46 |     transport,
47 |     enableCam: false,  // Default camera off
48 |     enableMic: true,   // Default microphone on
49 |     callbacks: {
50 |       // Event handlers
51 |     },
52 |     params: {
53 |       baseUrl,
54 |       endpoints
55 |     }
56 |     // ...
57 | });
58 | 
59 | await rtviClient.connect();
60 | ```
61 | 
62 | ## API Reference
63 | 
64 | ### Constructor Options
65 | 
66 | ```typescript
67 | interface DailyTransportConstructorOptions {
68 |   dailyFactoryOptions?: DailyFactoryOptions;  // Daily.co specific configuration
69 | }
70 | ```
71 | 
72 | ### States
73 | 
74 | The transport can be in one of these states:
75 | - "initializing"
76 | - "initialized"
77 | - "connecting"
78 | - "connected"
79 | - "ready"
80 | - "disconnecting"
81 | - "error"
82 | 
83 | ## Events
84 | 
85 | The transport implements the various [RTVI event handlers](https://docs.pipecat.ai/client/js/api-reference/callbacks). Check out the docs or samples for more info.
86 | 
87 | ## Error Handling
88 | 
89 | The transport includes error handling for:
90 | - Connection failures
91 | - Device errors
92 | - Authentication issues
93 | - Message transmission problems
94 | 
95 | ## License
96 | BSD-2 Clause
97 | 


--------------------------------------------------------------------------------
/transports/daily/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@pipecat-ai/daily-transport",
 3 |   "version": "0.4.0",
 4 |   "license": "BSD-2-Clause",
 5 |   "main": "dist/index.js",
 6 |   "module": "dist/index.module.js",
 7 |   "types": "dist/index.d.ts",
 8 |   "source": "src/index.ts",
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "git+https://github.com/pipecat-ai/pipecat-client-web-transports.git"
12 |   },
13 |   "files": [
14 |     "dist",
15 |     "package.json",
16 |     "README.md"
17 |   ],
18 |   "scripts": {
19 |     "build": "parcel build --no-cache",
20 |     "dev": "parcel watch",
21 |     "lint": "eslint . --ext ts --report-unused-disable-directives --max-warnings 0"
22 |   },
23 |   "devDependencies": {
24 |     "@pipecat-ai/client-js": "^0.4.0",
25 |     "eslint": "9.11.1",
26 |     "eslint-config-prettier": "^9.1.0",
27 |     "eslint-plugin-simple-import-sort": "^12.1.1"
28 |   },
29 |   "peerDependencies": {
30 |     "@pipecat-ai/client-js": "~0.4.0"
31 |   },
32 |   "dependencies": {
33 |     "@daily-co/daily-js": "^0.77.0"
34 |   },
35 |   "description": "Pipecat Daily Transport Package",
36 |   "author": "Daily.co",
37 |   "bugs": {
38 |     "url": "https://github.com/pipecat-ai/pipecat-client-web-transports/issues"
39 |   },
40 |   "homepage": "https://github.com/pipecat-ai/pipecat-client-web-transports/blob/main/transports/daily-webrtc/README.md"
41 | }
42 | 


--------------------------------------------------------------------------------
/transports/daily/src/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./transport";
2 | 


--------------------------------------------------------------------------------
/transports/daily/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |       "target": "ES2020",
 4 |       "module": "ESNext",
 5 |       "lib": ["ES2020", "DOM", "DOM.Iterable"],
 6 |       "skipLibCheck": true,
 7 |       "jsx": "preserve",
 8 | 
 9 |       /* Bundler mode */
10 |       "moduleResolution": "bundler",
11 |       "allowImportingTsExtensions": true,
12 |       "allowJs": true,
13 |       "noEmit": true,
14 |       "resolveJsonModule": true,
15 |       "isolatedModules": true,
16 |       "moduleDetection": "force",
17 | 
18 |       /* Linting */
19 |       "strict": true,
20 |       "noUnusedLocals": true,
21 |       "noUnusedParameters": false,
22 |       "noFallthroughCasesInSwitch": true
23 |     },
24 |     "include": ["src"]
25 |   }
26 | 


--------------------------------------------------------------------------------
/transports/gemini-live-websocket-transport/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2024, Daily
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/transports/gemini-live-websocket-transport/README.md:
--------------------------------------------------------------------------------
  1 | # Gemini Live Websocket Transport
  2 | 
  3 | [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai/client/js/transports/gemini)
  4 | [![Demo](https://img.shields.io/badge/Demo-forestgreen)](examples/directToLLMTransports/README.md)
  5 | ![NPM Version](https://img.shields.io/npm/v/@pipecat-ai/gemini-live-websocket-transport)
  6 | 
  7 | A real-time websocket transport implementation for interacting with Google's Gemini Multimodal Live API, supporting bidirectional audio and unidirectional text communication.
  8 | 
  9 | ## Installation
 10 | 
 11 | ```bash copy
 12 | npm install \
 13 | @pipecat-ai/client-js \
 14 | @pipecat-ai/real-time-websocket-transport \
 15 | @pipecat-ai/gemini-live-websocket-transport
 16 | ```
 17 | 
 18 | ## Overview
 19 | 
 20 | The `GeminiLiveWebsocketTransport` class extends the `DirectToLLMBaseWebSocketTransport` to implement a fully functional [RTVI `Transport`](https://docs.pipecat.ai/client/js/transports/transport). It provides a framework for implementing real-time communication directly with the [Gemini Multimodal Live](https://ai.google.dev/api/multimodal-live) voice-to-voice service. It handles media device management, audio/video streams, and state management for the connection.
 21 | 
 22 | ## Features
 23 | 
 24 | - Real-time bidirectional communication with Gemini Multimodal Live
 25 | - Input device management
 26 | - Audio streaming support
 27 | - Text message support
 28 | - Automatic reconnection handling
 29 | - Configurable generation parameters
 30 | - Support for initial conversation context
 31 | 
 32 | ## Usage
 33 | 
 34 | ### Basic Setup
 35 | 
 36 | ```javascript
 37 | import { GeminiLiveWebsocketTransport, GeminiLLMServiceOptions } from '@pipecat-ai/gemini-live-websocket-transport';
 38 | 
 39 | const options: GeminiLLMServiceOptions = {
 40 |   api_key: 'YOUR_API_KEY',
 41 |   generation_config: {
 42 |     temperature: 0.7,
 43 |     maxOutput_tokens: 1000
 44 |   }
 45 | };
 46 | 
 47 | const transport = new GeminiLiveWebsocketTransport(options);
 48 | let RTVIConfig: RTVIClientOptions = {
 49 |   transport,
 50 |   ...
 51 | };
 52 | 
 53 | ```
 54 | 
 55 | ### Configuration Options
 56 | 
 57 | ```typescript
 58 | interface GeminiLLMServiceOptions {
 59 |   api_key: string;                    // Required: Your Gemini API key
 60 |   initial_messages?: Array<{          // Optional: Initial conversation context
 61 |     content: string;
 62 |     role: string;
 63 |   }>;
 64 |   generation_config?: {               // Optional: Generation parameters
 65 |     candidate_count?: number;
 66 |     maxOutput_tokens?: number;
 67 |     temperature?: number;
 68 |     top_p?: number;
 69 |     top_k?: number;
 70 |     presence_penalty?: number;
 71 |     frequency_penalty?: number;
 72 |     response_modalities?: string;
 73 |     speech_config?: {
 74 |       voice_config?: {
 75 |         prebuilt_voice_config?: {
 76 |           voice_name: "Puck" | "Charon" | "Kore" | "Fenrir" | "Aoede";
 77 |         };
 78 |       };
 79 |     };
 80 |   };
 81 | }
 82 | ```
 83 | 
 84 | ### Sending Messages
 85 | 
 86 | ```javascript
 87 | // at setup time...
 88 | llmHelper = new LLMHelper({});
 89 | rtviClient.registerHelper("llm", llmHelper);
 90 | // the 'llm' name in this call above isn't used.
 91 | //that value is specific to working with a pipecat pipeline
 92 | 
 93 | // at time of sending message...
 94 | // Send text prompt message
 95 | llmHelper.appendToMessages({ role: "user", content: 'Hello Gemini!' });
 96 | ```
 97 | 
 98 | ### Handling Events
 99 | 
100 | The transport implements the various [RTVI event handlers](https://docs.pipecat.ai/client/js/api-reference/callbacks). Check out the docs or samples for more info.
101 | 
102 | ## API Reference
103 | 
104 | ### Methods
105 | 
106 | - `initialize()`: Set up the transport and establish connection
107 | - `sendMessage(message)`: Send a text message
108 | - `handleUserAudioStream(data)`: Stream audio data to the model
109 | - `disconnectLLM()`: Close the connection
110 | - `sendReadyMessage()`: Signal ready state
111 | 
112 | ### States
113 | 
114 | The transport can be in one of the following states:
115 | - "disconnected"
116 | - "initializing"
117 | - "initialized"
118 | - "connecting"
119 | - "connected"
120 | - "ready"
121 | - "disconnecting
122 | - "error"
123 | 
124 | ## Error Handling
125 | 
126 | The transport includes comprehensive error handling for:
127 | - Connection failures
128 | - Websocket errors
129 | - API key validation
130 | - Message transmission errors
131 | 
132 | ## License
133 | BSD-2 Clause
134 | 


--------------------------------------------------------------------------------
/transports/gemini-live-websocket-transport/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@pipecat-ai/gemini-live-websocket-transport",
 3 |   "version": "0.4.0",
 4 |   "license": "BSD-2-Clause",
 5 |   "main": "dist/index.js",
 6 |   "module": "dist/index.module.js",
 7 |   "types": "dist/index.d.ts",
 8 |   "source": "src/index.ts",
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "git+https://github.com/pipecat-ai/pipecat-client-web-transports.git"
12 |   },
13 |   "files": [
14 |     "dist",
15 |     "package.json",
16 |     "README.md"
17 |   ],
18 |   "scripts": {
19 |     "build": "parcel build --no-cache",
20 |     "dev": "parcel watch",
21 |     "lint": "eslint . --ext ts --report-unused-disable-directives --max-warnings 0"
22 |   },
23 |   "dependencies": {
24 |     "@daily-co/daily-js": "^0.79.0"
25 |   },
26 |   "devDependencies": {
27 |     "@pipecat-ai/client-js": "^0.4.0",
28 |     "@types/node": "^22.9.0",
29 |     "eslint": "9.11.1",
30 |     "eslint-config-prettier": "^9.1.0",
31 |     "eslint-plugin-simple-import-sort": "^12.1.1"
32 |   },
33 |   "peerDependencies": {
34 |     "@pipecat-ai/client-js": "~0.4.0"
35 |   },
36 |   "description": "Pipecat Gemini Multimodal Live Transport Package",
37 |   "author": "Daily.co",
38 |   "bugs": {
39 |     "url": "https://github.com/pipecat-ai/pipecat-client-web-transports/issues"
40 |   },
41 |   "homepage": "https://github.com/pipecat-ai/pipecat-client-web-transports/blob/main/transports/gemini-live-websocket-transport/README.md"
42 | }
43 | 


--------------------------------------------------------------------------------
/transports/gemini-live-websocket-transport/src/directToLLMBaseWebSocketTransport.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |   BotTTSTextData,
  3 |   RTVIClientOptions,
  4 |   RTVIMessage,
  5 |   Tracks,
  6 |   TranscriptData,
  7 |   Transport,
  8 |   TransportState,
  9 | } from "@pipecat-ai/client-js";
 10 | 
 11 | import { MediaManager } from "../../../lib/media-mgmt/mediaManager";
 12 | 
 13 | export interface LLMServiceOptions {
 14 |   api_key?: string;
 15 |   initial_messages?: Array<unknown>;
 16 |   model?: string;
 17 |   settings?: Record<string, unknown>;
 18 | }
 19 | 
 20 | /**
 21 |  * DirectToLLMBaseWebSocketTransport is an abstract class that provides a client-side
 22 |  * interface for connecting to a real-time AI service. It is intended to
 23 |  * connect directly to the service. (No Pipecat server is involved.)
 24 |  */
 25 | export abstract class DirectToLLMBaseWebSocketTransport extends Transport {
 26 |   // Utilities for audio.
 27 |   private _mediaManager;
 28 |   protected _service_options: LLMServiceOptions;
 29 | 
 30 |   protected _botIsSpeaking = false;
 31 | 
 32 |   constructor(service_options: LLMServiceOptions, manager: MediaManager) {
 33 |     super();
 34 |     this._service_options = service_options;
 35 |     this._mediaManager = manager;
 36 |     this._mediaManager.setUserAudioCallback(
 37 |       this.handleUserAudioStream.bind(this),
 38 |     );
 39 |   }
 40 | 
 41 |   /**
 42 |    * This method will be called from initialize()
 43 |    * Subclasses should initialize the LLM client and media player/recorder
 44 |    * and call initializeAudio() from within this method.
 45 |    */
 46 |   abstract initializeLLM(): void;
 47 |   /**
 48 |    * This method will be called from initialize()
 49 |    * Subclasses should etup listeners for LLM events from within this method
 50 |    */
 51 |   abstract attachLLMListeners(): void;
 52 |   /**
 53 |    * This method will be called from connect()
 54 |    * Subclasses should connect to the LLM and pass along the initial messages
 55 |    * @param initial_messages
 56 |    */
 57 |   abstract connectLLM(): Promise<void>;
 58 |   /**
 59 |    * This method will be called from disconnect()
 60 |    * Subclasses should disconnect from the LLM
 61 |    */
 62 |   abstract disconnectLLM(): Promise<void>;
 63 |   /**
 64 |    * This method will be called regularly with audio data from the user
 65 |    * Subclasses should handle this data and pass it along to the LLM
 66 |    * @param data ArrayBuffer of audio data
 67 |    */
 68 |   abstract handleUserAudioStream(data: ArrayBuffer): void;
 69 | 
 70 |   // subclasses should implement this method to initialize the LLM
 71 |   // client and call super() on this method
 72 |   initialize(
 73 |     options: RTVIClientOptions,
 74 |     messageHandler: (ev: RTVIMessage) => void,
 75 |   ): void {
 76 |     this._options = options;
 77 |     this._callbacks = options.callbacks ?? {};
 78 |     this._onMessage = messageHandler;
 79 | 
 80 |     this._mediaManager.setRTVIOptions(options);
 81 | 
 82 |     this.initializeLLM();
 83 | 
 84 |     this.attachDeviceListeners();
 85 |     this.attachLLMListeners();
 86 | 
 87 |     this.state = "disconnected";
 88 |   }
 89 | 
 90 |   async initDevices(): Promise<void> {
 91 |     this.state = "initializing";
 92 |     await this._mediaManager.initialize();
 93 |     this.state = "initialized";
 94 |   }
 95 | 
 96 |   async connect(
 97 |     authBundle: unknown,
 98 |     abortController: AbortController,
 99 |   ): Promise<void> {
100 |     this.state = "connecting";
101 | 
102 |     await this.connectLLM();
103 | 
104 |     // connect user audio to llm
105 |     this._mediaManager.connect();
106 |     this.state = "connected";
107 |     this._callbacks.onConnected?.();
108 |   }
109 | 
110 |   async disconnect(): Promise<void> {
111 |     this.state = "disconnecting";
112 |     await this._mediaManager.disconnect();
113 |     await this.disconnectLLM();
114 |     this.state = "disconnected";
115 |     this._callbacks.onDisconnected?.();
116 |   }
117 | 
118 |   getAllMics(): Promise<MediaDeviceInfo[]> {
119 |     return this._mediaManager.getAllMics();
120 |   }
121 |   getAllCams(): Promise<MediaDeviceInfo[]> {
122 |     return this._mediaManager.getAllCams();
123 |   }
124 |   getAllSpeakers(): Promise<MediaDeviceInfo[]> {
125 |     return this._mediaManager.getAllSpeakers();
126 |   }
127 | 
128 |   async updateMic(micId: string): Promise<void> {
129 |     return this._mediaManager.updateMic(micId);
130 |   }
131 |   updateCam(camId: string): void {
132 |     return this._mediaManager.updateCam(camId);
133 |   }
134 |   updateSpeaker(speakerId: string): void {
135 |     return this._mediaManager.updateSpeaker(speakerId);
136 |   }
137 | 
138 |   get selectedMic(): MediaDeviceInfo | Record<string, never> {
139 |     return this._mediaManager.selectedMic;
140 |   }
141 |   get selectedCam(): MediaDeviceInfo | Record<string, never> {
142 |     return this._mediaManager.selectedCam;
143 |   }
144 |   get selectedSpeaker(): MediaDeviceInfo | Record<string, never> {
145 |     return this._mediaManager.selectedSpeaker;
146 |   }
147 | 
148 |   enableMic(enable: boolean): void {
149 |     this._mediaManager.enableMic(enable);
150 |   }
151 |   enableCam(enable: boolean): void {
152 |     this._mediaManager.enableCam(enable);
153 |   }
154 | 
155 |   get isCamEnabled(): boolean {
156 |     return this._mediaManager.isCamEnabled;
157 |   }
158 |   get isMicEnabled(): boolean {
159 |     return this._mediaManager.isMicEnabled;
160 |   }
161 | 
162 |   get state(): TransportState {
163 |     return this._state;
164 |   }
165 | 
166 |   set state(state: TransportState) {
167 |     if (this._state === state) return;
168 | 
169 |     this._state = state;
170 |     this._callbacks.onTransportStateChanged?.(state);
171 |   }
172 | 
173 |   get expiry(): number | undefined {
174 |     return this._expiry;
175 |   }
176 | 
177 |   tracks(): Tracks {
178 |     return this._mediaManager.tracks();
179 |   }
180 | 
181 |   // Realtime event handlers
182 |   async userStartedSpeaking(): Promise<unknown> {
183 |     // Handle interruption
184 |     const trackSampleOffset = await this._mediaManager.userStartedSpeaking();
185 |     this._callbacks.onUserStartedSpeaking?.();
186 |     return trackSampleOffset;
187 |   }
188 | 
189 |   userStoppedSpeaking(): void {
190 |     this._callbacks.onUserStoppedSpeaking?.();
191 |   }
192 | 
193 |   userTranscript(transcript: TranscriptData): void {
194 |     this._callbacks.onUserTranscript?.(transcript);
195 |   }
196 | 
197 |   botStartedSpeaking(): void {
198 |     if (!this._botIsSpeaking) {
199 |       this._botIsSpeaking = true;
200 |       this._callbacks.onBotStartedSpeaking?.();
201 |     }
202 |   }
203 | 
204 |   botStoppedSpeaking(): void {
205 |     if (this._botIsSpeaking) {
206 |       this._botIsSpeaking = false;
207 |       this._callbacks.onBotStoppedSpeaking?.();
208 |     }
209 |   }
210 | 
211 |   botTtsText(data: BotTTSTextData): void {
212 |     this._callbacks.onBotTtsText?.(data);
213 |   }
214 | 
215 |   bufferBotAudio(audio: ArrayBuffer, id?: string): void {
216 |     this._mediaManager.bufferBotAudio(audio, id);
217 |   }
218 | 
219 |   connectionError(errorMsg: string): void {
220 |     console.error(errorMsg);
221 |     this.state = "error";
222 |     this.disconnect();
223 |   }
224 | 
225 |   private attachDeviceListeners(): void {}
226 | }
227 | 


--------------------------------------------------------------------------------
/transports/gemini-live-websocket-transport/src/geminiLiveWebSocketTransport.ts:
--------------------------------------------------------------------------------
  1 | import { MediaManager } from "../../../lib/media-mgmt/mediaManager";
  2 | import { DailyMediaManager } from "../../../lib/media-mgmt/dailyMediaManager";
  3 | 
  4 | import {
  5 |   logger,
  6 |   RTVIActionRequestData,
  7 |   RTVIMessage,
  8 |   RTVIMessageType,
  9 |   TransportStartError,
 10 | } from "@pipecat-ai/client-js";
 11 | import { ReconnectingWebSocket } from "../../../lib/websocket-utils/reconnectingWebSocket";
 12 | import {
 13 |   DirectToLLMBaseWebSocketTransport,
 14 |   LLMServiceOptions,
 15 | } from "./directToLLMBaseWebSocketTransport";
 16 | 
 17 | const HOST = `generativelanguage.googleapis.com`;
 18 | const BIDI_PATH = `google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
 19 | const MODEL = "models/gemini-2.0-flash-exp";
 20 | 
 21 | export interface GeminiLLMServiceOptions extends LLMServiceOptions {
 22 |   initial_messages?: Array<{ content: string; role: string }>;
 23 |   api_key: string;
 24 |   settings?: {
 25 |     candidate_count?: number;
 26 |     maxOutput_tokens?: number;
 27 |     temperature?: number;
 28 |     top_p?: number;
 29 |     top_k?: number;
 30 |     presence_penalty?: number;
 31 |     frequency_penalty?: number;
 32 |     response_modalities?: string;
 33 |     speech_config?: {
 34 |       voice_config?: {
 35 |         prebuilt_voice_config?: {
 36 |           voice_name: "Puck" | "Charon" | "Kore" | "Fenrir" | "Aoede";
 37 |           // | "Voice O";
 38 |         };
 39 |       };
 40 |     };
 41 |   };
 42 | }
 43 | 
 44 | export class GeminiLiveWebsocketTransport extends DirectToLLMBaseWebSocketTransport {
 45 |   declare private _ws: ReconnectingWebSocket | null;
 46 |   declare private _botResponseID: number;
 47 |   declare private _botIsReadyResolve:
 48 |     | ((value: void | PromiseLike<void>) => void)
 49 |     | null;
 50 | 
 51 |   constructor(
 52 |     service_options: GeminiLLMServiceOptions,
 53 |     manager?: MediaManager,
 54 |   ) {
 55 |     if (!manager) {
 56 |       manager = new DailyMediaManager();
 57 |     }
 58 |     super(service_options, manager);
 59 | 
 60 |     this._ws = null;
 61 | 
 62 |     this._botResponseID = 0;
 63 |   }
 64 | 
 65 |   initializeLLM(): void {
 66 |     const service_options = this._service_options as GeminiLLMServiceOptions;
 67 |     const apiKey = service_options.api_key;
 68 |     if (!apiKey) {
 69 |       console.error("!!! No API key provided in llm_service_options");
 70 |       return;
 71 |     }
 72 |     const base_url = `wss://${HOST}/ws/${BIDI_PATH}`;
 73 |     this._ws = new ReconnectingWebSocket(`${base_url}?key=${apiKey}`);
 74 |     // don't run the keep alive interval until we determine if there's an api for it
 75 |     this._ws.keepAliveInterval = 0;
 76 |   }
 77 | 
 78 |   // This is called from super.initialize()
 79 |   attachLLMListeners(): void {
 80 |     if (!this._ws) {
 81 |       console.error(
 82 |         "attachLLMListeners called before the websocket is initialized. Be sure to call initializeLLM() first.",
 83 |       );
 84 |       return;
 85 |     }
 86 |     this._ws.on("open", () => {});
 87 |     this._ws.on("message", async (msg: any) => {
 88 |       const content = msg.serverContent;
 89 |       if (!content) {
 90 |         if ("setupComplete" in msg) {
 91 |           this.state = "ready";
 92 |           if (this._botIsReadyResolve) {
 93 |             this._botIsReadyResolve();
 94 |             this._botIsReadyResolve = null;
 95 |           }
 96 |         } else {
 97 |           console.log("received unknown message", msg);
 98 |         }
 99 |         return;
100 |       }
101 |       if (content.modelTurn) {
102 |         let result: ArrayBuffer | null = null;
103 |         content.modelTurn.parts?.forEach((part: { inlineData: any }) => {
104 |           if (part.inlineData?.data) {
105 |             if (result) {
106 |               mergeBuffers(result, base64ToArrayBuffer(part.inlineData.data));
107 |             } else {
108 |               result = base64ToArrayBuffer(part.inlineData.data);
109 |             }
110 |           }
111 |         });
112 |         if (result) {
113 |           if (!this._botIsSpeaking) {
114 |             this._botResponseID++;
115 |             this.botStartedSpeaking();
116 |           }
117 |           this.bufferBotAudio(result, this._botResponseID.toString());
118 |         }
119 |       } else if (content.interrupted) {
120 |         await this.userStartedSpeaking();
121 |       } else if (content.turnComplete) {
122 |         this.botStoppedSpeaking();
123 |       } else {
124 |         // console.log('unhandled message', content);
125 |       }
126 |     });
127 |     this._ws.on("error", (error: Error) => {
128 |       this.connectionError(`websocket error: ${error}`);
129 |     });
130 |     this._ws.on("connection-timeout", () => {
131 |       this.connectionError("websocket connection timed out");
132 |     });
133 |     this._ws.on("close", (code: number) => {
134 |       this.connectionError(`websocket connection closed. Code: ${code}`);
135 |     });
136 |     this._ws.on("reconnect-failed", () => {
137 |       this.connectionError(`websocket reconnect failed`);
138 |     });
139 |   }
140 | 
141 |   async connectLLM(): Promise<void> {
142 |     if (!this._ws) {
143 |       console.error(
144 |         "connectLLM called before the websocket is initialized. Be sure to call initializeLLM() first.",
145 |       );
146 |       return;
147 |     }
148 |     try {
149 |       await this._ws.connect();
150 |     } catch (error) {
151 |       const msg = `Failed to connect to LLM: ${error}`;
152 |       console.error(msg);
153 |       this.state = "error";
154 |       throw new TransportStartError(msg);
155 |     }
156 | 
157 |     const service_options = this._service_options as GeminiLLMServiceOptions;
158 |     const model = service_options?.model ?? MODEL;
159 |     const generation_config = service_options?.settings ?? {};
160 |     let config = { setup: { model, generation_config } };
161 |     await this._sendMsg(config);
162 | 
163 |     // For this bare-bones prototype, let's just see if we have any initial_messages in the params
164 |     // we were constructed with.
165 |     if (service_options?.initial_messages) {
166 |       service_options.initial_messages.forEach(
167 |         (msg: { content: string; role: string }) => {
168 |           this._sendTextInput(msg.content, msg.role);
169 |         },
170 |       );
171 |     }
172 |   }
173 | 
174 |   async disconnectLLM(): Promise<void> {
175 |     await this._ws?.close();
176 |   }
177 | 
178 |   async sendReadyMessage(): Promise<void> {
179 |     const p = new Promise<void>((resolve) => {
180 |       if (this.state === "ready") {
181 |         resolve();
182 |       } else {
183 |         this._botIsReadyResolve = resolve;
184 |       }
185 |     });
186 |     await p;
187 |     this._onMessage({
188 |       type: RTVIMessageType.BOT_READY,
189 |       data: {},
190 |     } as RTVIMessage);
191 |   }
192 | 
193 |   handleUserAudioStream(data: ArrayBuffer): void {
194 |     if (this.state === "ready") {
195 |       try {
196 |         void this._sendAudioInput(data);
197 |       } catch (error) {
198 |         console.error("Error adding audio to stream player", error);
199 |         this.state = "error";
200 |         // todo: should check this error more carefully, implement disconnect, implement
201 |         // ping/ack connection monitoring and reconnection logic, etc.
202 |       }
203 |     }
204 |   }
205 | 
206 |   sendMessage(message: RTVIMessage): void {
207 |     switch (message.type) {
208 |       case "action":
209 |         {
210 |           const data = message.data as RTVIActionRequestData;
211 |           switch (data.action) {
212 |             case "append_to_messages":
213 |               if (data.arguments) {
214 |                 for (const a of data.arguments) {
215 |                   if (a.name === "messages") {
216 |                     const value = a.value as Array<{
217 |                       content: string;
218 |                       role: string;
219 |                     }>;
220 |                     for (const m of value) {
221 |                       this._sendTextInput(m.content, m.role);
222 |                     }
223 |                   }
224 |                 }
225 |               }
226 |               break;
227 |             case "get_context":
228 |             case "set_context":
229 |               console.warn("get_context and set_context are not implemented");
230 |               break;
231 |           }
232 |         }
233 |         break;
234 |     }
235 |   }
236 | 
237 |   async _sendAudioInput(data: ArrayBuffer): Promise<void> {
238 |     // TODO: pull this number from the media manager
239 |     const sampleRate = 24000;
240 |     const msg = {
241 |       realtimeInput: {
242 |         mediaChunks: [
243 |           {
244 |             mimeType: `audio/pcm;rate=${sampleRate}`,
245 |             data: arrayBufferToBase64(data),
246 |           },
247 |         ],
248 |       },
249 |     };
250 |     await this._sendMsg(msg);
251 |   }
252 | 
253 |   async _sendTextInput(text: string, role: string): Promise<void> {
254 |     const msg = {
255 |       clientContent: {
256 |         turns: [
257 |           {
258 |             role,
259 |             parts: [{ text }],
260 |           },
261 |         ],
262 |         turnComplete: role === "user" ? true : false,
263 |       },
264 |     };
265 |     await this._sendMsg(msg);
266 |   }
267 | 
268 |   async _sendMsg(msg: unknown): Promise<void> {
269 |     if (!this._ws) {
270 |       console.error("sendMsg called but WS is null");
271 |       return;
272 |     }
273 |     if (this._ws.readyState !== WebSocket.OPEN) {
274 |       console.error("attempt to send to closed socket");
275 |       return;
276 |     }
277 |     if (!msg) {
278 |       console.error("need a msg to send a msg");
279 |       return;
280 |     }
281 |     try {
282 |       await this._ws.send(JSON.stringify(msg));
283 |     } catch (e) {
284 |       console.error("sendMsg error", e);
285 |     }
286 |   }
287 | 
288 |   // Not implemented
289 |   enableScreenShare(enable: boolean): void {
290 |     logger.error(
291 |       "startScreenShare not implemented for GeminiLiveWebsocketTransport",
292 |     );
293 |     throw new Error("Not implemented");
294 |   }
295 | 
296 |   public get isSharingScreen(): boolean {
297 |     logger.error(
298 |       "isSharingScreen not implemented for GeminiLiveWebsocketTransport",
299 |     );
300 |     return false;
301 |   }
302 | }
303 | 
304 | function base64ToArrayBuffer(base64: string): ArrayBuffer {
305 |   const binaryString = atob(base64);
306 |   const len = binaryString.length;
307 |   const bytes = new Uint8Array(len);
308 |   for (let i = 0; i < len; i++) {
309 |     bytes[i] = binaryString.charCodeAt(i);
310 |   }
311 |   return bytes.buffer;
312 | }
313 | 
314 | function arrayBufferToBase64(buffer: ArrayBuffer): string {
315 |   const bytes = new Uint8Array(buffer);
316 |   let binary = "";
317 |   for (let i = 0; i < bytes.byteLength; i++) {
318 |     binary += String.fromCharCode(bytes[i]);
319 |   }
320 |   return btoa(binary);
321 | }
322 | 
323 | function mergeBuffers(
324 |   leftBuffer: ArrayBuffer,
325 |   rightBuffer: ArrayBuffer,
326 | ): ArrayBuffer {
327 |   const tmpArray = new Uint8Array(
328 |     leftBuffer.byteLength + rightBuffer.byteLength,
329 |   );
330 |   tmpArray.set(new Uint8Array(leftBuffer), 0);
331 |   tmpArray.set(new Uint8Array(rightBuffer), leftBuffer.byteLength);
332 |   return tmpArray.buffer;
333 | }
334 | 


--------------------------------------------------------------------------------
/transports/gemini-live-websocket-transport/src/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./geminiLiveWebSocketTransport";
2 | 


--------------------------------------------------------------------------------
/transports/gemini-live-websocket-transport/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "module": "ESNext",
 5 |     "lib": ["ES2020", "DOM", "DOM.Iterable"],
 6 |     "types": ["node"],
 7 |     "skipLibCheck": true,
 8 |     "jsx": "preserve",
 9 | 
10 |     /* Bundler mode */
11 |     "moduleResolution": "bundler",
12 |     "allowImportingTsExtensions": true,
13 |     "allowJs": true,
14 |     "noEmit": true,
15 |     "resolveJsonModule": true,
16 |     "isolatedModules": true,
17 |     "moduleDetection": "force",
18 | 
19 |     /* Linting */
20 |     "strict": true,
21 |     "noUnusedLocals": true,
22 |     "noUnusedParameters": false,
23 |     "noFallthroughCasesInSwitch": true
24 |   },
25 |   "include": ["src"]
26 | }
27 | 


--------------------------------------------------------------------------------
/transports/openai-realtime-webrtc-transport/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2024, Daily
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/transports/openai-realtime-webrtc-transport/README.md:
--------------------------------------------------------------------------------
  1 | # OpenAI RealTime WebRTC Transport
  2 | 
  3 | [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai/client/js/transports/openai-webrtc)
  4 | [![Demo](https://img.shields.io/badge/Demo-forestgreen)](examples/directToLLMTransports/README.md)
  5 | ![NPM Version](https://img.shields.io/npm/v/@pipecat-ai/openai-realtime-webrtc-transport)
  6 | 
  7 | A real-time websocket transport implementation for interacting with Google's Gemini Multimodal Live API, supporting bidirectional audio and unidirectional text communication.
  8 | 
  9 | ## Installation
 10 | 
 11 | ```bash copy
 12 | npm install \
 13 | @pipecat-ai/client-js \
 14 | @pipecat-ai/openai-realtime-webrtc-transport
 15 | ```
 16 | 
 17 | ## Overview
 18 | 
 19 | The `OpenAIRealTimeWebRTCTransport` is a fully functional [RTVI `Transport`](https://docs.pipecat.ai/client/js/transports/transport). It provides a framework for implementing real-time communication directly with the [OpenAI Realtime API using WebRTC](https://platform.openai.com/docs/guides/realtime-webrtc) voice-to-voice service. It handles media device management, audio/video streams, and state management for the connection.
 20 | 
 21 | ## Features
 22 | 
 23 | - Real-time bidirectional communication with OpenAI Realtime API
 24 | - Input device management
 25 | - Audio streaming support
 26 | - Text message support
 27 | - Automatic reconnection handling
 28 | - Configurable generation parameters
 29 | - Support for initial conversation context
 30 | 
 31 | ## Usage
 32 | 
 33 | ### Basic Setup
 34 | 
 35 | ```javascript
 36 | import { OpenAIRealTimeWebRTCTransport, OpenAIServiceOptions } from '@pipecat-ai/openai-realtime-webrtc-transport';
 37 | 
 38 | const options: OpenAIServiceOptions = {
 39 |   api_key: 'YOUR_API_KEY',
 40 |   session_config: {
 41 |     instructions: 'you are a confused jellyfish',
 42 |   }
 43 | };
 44 | 
 45 | const transport = new OpenAIRealTimeWebRTCTransport(options);
 46 | let RTVIConfig: RTVIClientOptions = {
 47 |   transport,
 48 |   ...
 49 | };
 50 | 
 51 | ```
 52 | 
 53 | ### Configuration Options
 54 | 
 55 | ```typescript
 56 | /**********************************
 57 |  * OpenAI-specific types
 58 |  *   types and comments below are based on:
 59 |  *     gpt-4o-realtime-preview-2024-12-17
 60 |  **********************************/
 61 | type JSONSchema = { [key: string]: any };
 62 | export type OpenAIFunctionTool = {
 63 |   type: "function";
 64 |   name: string;
 65 |   description: string;
 66 |   parameters: JSONSchema;
 67 | };
 68 | 
 69 | export type OpenAIServerVad = {
 70 |   type: "server_vad";
 71 |   create_response?: boolean; // defaults to true
 72 |   interrupt_response?: boolean; // defaults to true
 73 |   prefix_padding_ms?: number; // defaults to 300ms
 74 |   silence_duration_ms?: number; // defaults to 500ms
 75 |   threshold?: number; // range (0.0, 1.0); defaults to 0.5
 76 | };
 77 | 
 78 | export type OpenAISemanticVAD = {
 79 |   type: "semantic_vad";
 80 |   eagerness?: "low" | "medium" | "high" | "auto"; // defaults to "auto", equivalent to "medium"
 81 |   create_response?: boolean; // defaults to true
 82 |   interrupt_response?: boolean; // defaults to true
 83 | };
 84 | 
 85 | export type OpenAISessionConfig = Partial<{
 86 |   modalities?: string;
 87 |   instructions?: string;
 88 |   voice?:
 89 |     | "alloy"
 90 |     | "ash"
 91 |     | "ballad"
 92 |     | "coral"
 93 |     | "echo"
 94 |     | "sage"
 95 |     | "shimmer"
 96 |     | "verse";
 97 |   input_audio_noise_reduction?: {
 98 |     type: "near_field" | "far_field";
 99 |   } | null; // defaults to null/off
100 |   input_audio_transcription?: {
101 |     model: "whisper-1" | "gpt-4o-transcribe" | "gpt-4o-mini-transcribe";
102 |     language?: string;
103 |     prompt?: string[] | string; // gpt-4o models take a string
104 |   } | null; // we default this to gpt-4o-transcribe
105 |   turn_detection?: OpenAIServerVad | OpenAISemanticVAD | null; // defaults to server_vad
106 |   temperature?: number;
107 |   max_tokens?: number | "inf";
108 |   tools?: Array<OpenAIFunctionTool>;
109 | }>;
110 | 
111 | export interface OpenAIServiceOptions {
112 |   api_key: string;
113 |   model?: string;
114 |   initial_messages?: LLMContextMessage[];
115 |   settings?: OpenAISessionConfig;
116 | }
117 | ```
118 | 
119 | ### Sending Messages
120 | 
121 | ```javascript
122 | // at setup time...
123 | llmHelper = new LLMHelper({});
124 | rtviClient.registerHelper("llm", llmHelper);
125 | // the 'llm' name in this call above isn't used.
126 | //that value is specific to working with a pipecat pipeline
127 | 
128 | // at time of sending message...
129 | // Send text prompt message
130 | llmHelper.appendToMessages({ role: "user", content: 'Hello OpenAI!' });
131 | ```
132 | 
133 | ### Handling Events
134 | 
135 | The transport implements the various [RTVI event handlers](https://docs.pipecat.ai/client/js/api-reference/callbacks). Check out the docs or samples for more info.
136 | 
137 | ### Updating Session Configuration
138 | 
139 | ```javascript
140 | transport.updateSessionConfig({
141 |   instructions: 'you are a an over-sharing neighbor',
142 |   input_audio_noise_reduction: {
143 |     type: 'near_field'
144 |   }
145 | });
146 | ```
147 | 
148 | ## API Reference
149 | 
150 | ### Methods
151 | 
152 | - `initialize()`: Set up the transport and establish connection
153 | - `sendMessage(message)`: Send a text message
154 | - `handleUserAudioStream(data)`: Stream audio data to the model
155 | - `disconnectLLM()`: Close the connection
156 | - `sendReadyMessage()`: Signal ready state
157 | 
158 | ### States
159 | 
160 | The transport can be in one of the following states:
161 | - "disconnected"
162 | - "initializing"
163 | - "initialized"
164 | - "connecting"
165 | - "connected"
166 | - "ready"
167 | - "disconnecting
168 | - "error"
169 | 
170 | ## Error Handling
171 | 
172 | The transport includes comprehensive error handling for:
173 | - Connection failures
174 | - WebRTC connection errors
175 | - API key validation
176 | - Message transmission errors
177 | 
178 | ## License
179 | BSD-2 Clause
180 | 


--------------------------------------------------------------------------------
/transports/openai-realtime-webrtc-transport/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@pipecat-ai/openai-realtime-webrtc-transport",
 3 |   "version": "0.4.0",
 4 |   "license": "BSD-2-Clause",
 5 |   "main": "dist/index.js",
 6 |   "module": "dist/index.module.js",
 7 |   "types": "dist/index.d.ts",
 8 |   "source": "src/index.ts",
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "git+https://github.com/pipecat-ai/pipecat-client-web-transports.git"
12 |   },
13 |   "files": [
14 |     "dist",
15 |     "package.json",
16 |     "README.md"
17 |   ],
18 |   "scripts": {
19 |     "build": "parcel build --no-cache",
20 |     "dev": "parcel watch",
21 |     "lint": "eslint . --ext ts --report-unused-disable-directives --max-warnings 0"
22 |   },
23 |   "devDependencies": {
24 |     "@pipecat-ai/client-js": "^0.4.0",
25 |     "@types/node": "^22.9.0",
26 |     "eslint": "9.11.1",
27 |     "eslint-config-prettier": "^9.1.0",
28 |     "eslint-plugin-simple-import-sort": "^12.1.1"
29 |   },
30 |   "peerDependencies": {
31 |     "@pipecat-ai/client-js": "~0.4.0"
32 |   },
33 |   "dependencies": {
34 |     "@daily-co/daily-js": "^0.77.0",
35 |     "dequal": "^2.0.3"
36 |   },
37 |   "description": "Pipecat OpenAI RealTime Transport Package",
38 |   "author": "Daily.co",
39 |   "bugs": {
40 |     "url": "https://github.com/pipecat-ai/pipecat-client-web-transports/issues"
41 |   },
42 |   "homepage": "https://github.com/pipecat-ai/pipecat-client-web-transports/blob/main/transports/openai-llm-direct-transport/README.md"
43 | }
44 | 


--------------------------------------------------------------------------------
/transports/openai-realtime-webrtc-transport/src/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./OpenAIRealTimeWebRTCTransport";
2 | 


--------------------------------------------------------------------------------
/transports/openai-realtime-webrtc-transport/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "module": "ESNext",
 5 |     "lib": ["ES2020", "DOM", "DOM.Iterable"],
 6 |     "types": ["node"],
 7 |     "skipLibCheck": true,
 8 |     "jsx": "preserve",
 9 | 
10 |     /* Bundler mode */
11 |     "moduleResolution": "bundler",
12 |     "allowImportingTsExtensions": true,
13 |     "noEmit": true,
14 |     "resolveJsonModule": true,
15 |     "isolatedModules": true,
16 |     "moduleDetection": "force",
17 | 
18 |     /* Linting */
19 |     "strict": true,
20 |     "noUnusedLocals": true,
21 |     "noUnusedParameters": false,
22 |     "noFallthroughCasesInSwitch": true
23 |   },
24 |   "include": ["src"]
25 | }
26 | 


--------------------------------------------------------------------------------
/transports/small-webrtc-transport/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to **Pipecat Small WebRTC Transport** will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 7 | 
 8 | ## [0.4.0]
 9 | 
10 | - Bumped dependency to @pipecat-ai/client-js@~0.4.0
11 | 
12 | ## [0.0.5] - 2025-05-19
13 | 
14 | ### Fixed
15 | 
16 | - `SmallWebRTCTransport` updates transport state to 'ready' when client ready message is sent.
17 | 
18 | ## [0.0.4] - 2025-04-29
19 | 
20 | ### Added
21 | 
22 | - Added `waitForICEGathering` property: this allows users to configure whether the transport should 
23 |     explicitly wait for the iceGatheringState to become complete during the negotiation phase.
24 | 
25 | ### Fixed
26 | 
27 | - `SmallWebRTCTransport` class now accepts `RTCIceServer`[] instead of just the `String`[] of urls.
28 | 
29 | ## [0.0.3] - 2025-04-11
30 | 
31 | ### Added
32 | 
33 | - Handling a new incoming `peerLeft` signalling messages from Pipecat.
34 | 
35 | ## [0.0.2] - 2025-04-10
36 | 
37 | ### Added
38 | 
39 | - Send a signalling message whenever a track is enabled or disabled.
40 | - Handle incoming `renegotiate` signalling messages from Pipecat in a new format.
41 | 
42 | ## [0.0.1] - 2025-04-09
43 | 
44 | ### Added
45 | 
46 | - Web client transport for the Pipecat **SmallWebRTCTransport**.
47 | 


--------------------------------------------------------------------------------
/transports/small-webrtc-transport/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2024, Daily
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/transports/small-webrtc-transport/README.md:
--------------------------------------------------------------------------------
 1 | # Pipecat's Real-Time Voice Inference - Small WebRTC Transport
 2 | 
 3 | [![Docs](https://img.shields.io/badge/documentation-blue)](https://docs.pipecat.ai/client/js/transports/small-webrtc)
 4 | ![NPM Version](https://img.shields.io/npm/v/@pipecat-ai/small-webrtc-transport)
 5 | [![Demo](https://img.shields.io/badge/Demo-forestgreen)](https://github.com/pipecat-ai/pipecat/tree/main/examples/p2p-webrtc)
 6 | 
 7 | Small WebRTC transport package for use with `@pipecat-ai/client-js`.
 8 | 
 9 | ## Installation
10 | 
11 | ```bash copy
12 | npm install \
13 | @pipecat-ai/client-js \
14 | @pipecat-ai/small-webrtc-transport
15 | ```
16 | 
17 | ## Overview
18 | 
19 | The SmallWebRTCTransport class provides a WebRTC transport layer establishing a PeerConnection with Pipecat SmallWebRTCTransport. It handles audio/video device management, WebRTC connections, and real-time communication between client and bot.
20 | 
21 | ## Features
22 | 
23 | - 🎥 Complete camera device management
24 | - 🎤 Microphone input handling
25 | - 📡 WebRTC connection management
26 | - 🤖 Bot participant tracking
27 | - 💬 Real-time messaging
28 |   
29 | ## Usage
30 | 
31 | ### Basic Setup
32 | 
33 | ```javascript
34 | import { RTVIClient } from "@pipecat-ai/client-js";
35 | import { SmallWebRTCTransport } from "@pipecat-ai/small-webrtc-transport";
36 | 
37 | const transport = new SmallWebRTCTransport();
38 | 
39 | const rtviClient = new RTVIClient({
40 |     transport,
41 |     enableCam: false,  // Default camera off
42 |     enableMic: true,   // Default microphone on
43 |     callbacks: {
44 |       // Event handlers
45 |     },
46 |     params: {
47 |       baseUrl,
48 |       endpoints
49 |     }
50 |     // ...
51 | });
52 | 
53 | await rtviClient.connect();
54 | ```
55 | 
56 | ## API Reference
57 | 
58 | ### States
59 | 
60 | The transport can be in one of these states:
61 | - "initializing"
62 | - "initialized"
63 | - "connecting"
64 | - "connected"
65 | - "ready"
66 | - "disconnecting"
67 | - "error"
68 | 
69 | ## Events
70 | 
71 | The transport implements the various [RTVI event handlers](https://docs.pipecat.ai/client/js/api-reference/callbacks). Check out the docs or samples for more info.
72 | 
73 | ## Error Handling
74 | 
75 | The transport includes error handling for:
76 | - Connection failures
77 | - Device errors
78 | 
79 | ## License
80 | BSD-2 Clause
81 | 


--------------------------------------------------------------------------------
/transports/small-webrtc-transport/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@pipecat-ai/small-webrtc-transport",
 3 |   "version": "0.4.0",
 4 |   "license": "BSD-2-Clause",
 5 |   "main": "dist/index.js",
 6 |   "module": "dist/index.module.js",
 7 |   "types": "dist/index.d.ts",
 8 |   "source": "src/index.ts",
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "git+https://github.com/pipecat-ai/pipecat-client-web-transports.git"
12 |   },
13 |   "files": [
14 |     "dist",
15 |     "package.json",
16 |     "README.md"
17 |   ],
18 |   "scripts": {
19 |     "build": "parcel build --no-cache",
20 |     "dev": "parcel watch",
21 |     "lint": "eslint . --ext ts --report-unused-disable-directives --max-warnings 0",
22 |     "prepare": "npm run build"
23 |   },
24 |   "devDependencies": {
25 |     "@pipecat-ai/client-js": "^0.4.0",
26 |     "@types/node": "^22.9.0",
27 |     "eslint": "9.11.1",
28 |     "eslint-config-prettier": "^9.1.0",
29 |     "eslint-plugin-simple-import-sort": "^12.1.1"
30 |   },
31 |   "peerDependencies": {
32 |     "@pipecat-ai/client-js": "~0.4.0"
33 |   },
34 |   "dependencies": {
35 |     "@daily-co/daily-js": "^0.77.0",
36 |     "dequal": "^2.0.3"
37 |   },
38 |   "description": "Pipecat Small WebRTC Transport Package",
39 |   "author": "Daily.co",
40 |   "bugs": {
41 |     "url": "https://github.com/pipecat-ai/pipecat-client-web-transports/issues"
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/transports/small-webrtc-transport/src/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./smallWebRTCTransport";
2 | 


--------------------------------------------------------------------------------
/transports/small-webrtc-transport/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |       "target": "ES2020",
 4 |       "module": "ESNext",
 5 |       "lib": ["ES2020", "DOM", "DOM.Iterable"],
 6 |     "types": ["node"],
 7 |       "skipLibCheck": true,
 8 |       "jsx": "preserve",
 9 | 
10 |       /* Bundler mode */
11 |       "moduleResolution": "bundler",
12 |       "allowImportingTsExtensions": true,
13 |       "allowJs": true,
14 |       "noEmit": true,
15 |       "resolveJsonModule": true,
16 |       "isolatedModules": true,
17 |       "moduleDetection": "force",
18 | 
19 |       /* Linting */
20 |       "strict": true,
21 |       "noUnusedLocals": true,
22 |       "noUnusedParameters": false,
23 |       "noFallthroughCasesInSwitch": true
24 |     },
25 |     "include": ["src"]
26 |   }
27 | 


--------------------------------------------------------------------------------
/transports/websocket-transport/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2024, Daily
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/transports/websocket-transport/README.md:
--------------------------------------------------------------------------------
 1 | # Websocket Transport
 2 | 
 3 | [![Demo](https://img.shields.io/badge/Demo-forestgreen)](https://github.com/pipecat-ai/pipecat/tree/main/examples/websocket/README.md)
 4 | ![NPM Version](https://img.shields.io/npm/v/@pipecat-ai/websocket-transport)
 5 | 
 6 | Websocket transport package for use with `@pipecat-ai/client-js`.
 7 | 
 8 | ## Installation
 9 | 
10 | ```bash copy
11 | npm install \
12 | @pipecat-ai/client-js \
13 | @pipecat-ai/websocket-transport
14 | ```
15 | 
16 | ## Overview
17 | 
18 | The WebSocketTransport class provides a Websocket transport layer establishing a connection with Pipecat WebSocketTransport. It handles audio device management and real-time communication between client and bot.
19 | 
20 | ## Features
21 | 
22 | - 🎤 Microphone input handling
23 | - 🤖 Bot participant tracking
24 | - 💬 Real-time messaging
25 | 
26 | ## Usage
27 | 
28 | ### Basic Setup
29 | 
30 | ```javascript
31 | import { RTVIClient } from "@pipecat-ai/client-js";
32 | import { WebSocketTransport } from "@pipecat-ai/small-webrtc-transport";
33 | 
34 | const transport = new WebSocketTransport();
35 | 
36 | const rtviClient = new RTVIClient({
37 |     transport,
38 |     enableMic: true,   // Default microphone on
39 |     callbacks: {
40 |       // Event handlers
41 |     },
42 |     params: {
43 |       baseUrl,
44 |       endpoints
45 |     }
46 |     // ...
47 | });
48 | 
49 | await rtviClient.connect();
50 | ```
51 | 
52 | ## API Reference
53 | 
54 | ### States
55 | 
56 | The transport can be in one of these states:
57 | - "initializing"
58 | - "initialized"
59 | - "connecting"
60 | - "connected"
61 | - "ready"
62 | - "disconnecting"
63 | - "error"
64 | 
65 | ## Events
66 | 
67 | The transport implements the various [RTVI event handlers](https://docs.pipecat.ai/client/js/api-reference/callbacks). Check out the docs or samples for more info.
68 | 
69 | ## Error Handling
70 | 
71 | The transport includes error handling for:
72 | - Connection failures
73 | - Device errors
74 | 
75 | ## License
76 | BSD-2 Clause
77 | 
78 | 


--------------------------------------------------------------------------------
/transports/websocket-transport/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@pipecat-ai/websocket-transport",
 3 |   "version": "0.4.1",
 4 |   "license": "BSD-2-Clause",
 5 |   "main": "dist/index.js",
 6 |   "module": "dist/index.module.js",
 7 |   "types": "dist/index.d.ts",
 8 |   "source": "src/index.ts",
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "git+https://github.com/pipecat-ai/pipecat-client-web-transports.git"
12 |   },
13 |   "files": [
14 |     "dist",
15 |     "package.json",
16 |     "README.md"
17 |   ],
18 |   "scripts": {
19 |     "build_proto": "bash ./proto/generate_typescript.sh",
20 |     "build": "parcel build --no-cache",
21 |     "dev": "parcel watch",
22 |     "lint": "eslint . --ext ts --report-unused-disable-directives --max-warnings 0"
23 |   },
24 |   "devDependencies": {
25 |     "@pipecat-ai/client-js": "^0.4.0",
26 |     "@types/node": "^22.9.0",
27 |     "eslint": "9.11.1",
28 |     "eslint-config-prettier": "^9.1.0",
29 |     "eslint-plugin-simple-import-sort": "^12.1.1"
30 |   },
31 |   "peerDependencies": {
32 |     "@pipecat-ai/client-js": "~0.4.0"
33 |   },
34 |   "dependencies": {
35 |     "@daily-co/daily-js": "^0.79.0",
36 |     "@protobuf-ts/plugin": "^2.11.0",
37 |     "@protobuf-ts/runtime": "^2.11.0"
38 |   },
39 |   "description": "Pipecat Base Transport for RealTime WebSocket APIs Package",
40 |   "author": "Daily.co",
41 |   "bugs": {
42 |     "url": "https://github.com/pipecat-ai/pipecat-client-web-transports/issues"
43 |   },
44 |   "homepage": "https://github.com/pipecat-ai/pipecat-client-web-transports/blob/main/transports/realtime-websocket-transport/README.md"
45 | }
46 | 


--------------------------------------------------------------------------------
/transports/websocket-transport/proto/frames.proto:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright (c) 2024–2025, Daily
 3 | //
 4 | // SPDX-License-Identifier: BSD 2-Clause License
 5 | //
 6 | 
 7 | // Generate frames_pb2.py with:
 8 | //
 9 | //   python -m grpc_tools.protoc --proto_path=./ --python_out=./protobufs frames.proto
10 | 
11 | syntax = "proto3";
12 | 
13 | package pipecat;
14 | 
15 | // Represents a basic unit of text data.
16 | message TextFrame {
17 |   uint64 id = 1;
18 |   string name = 2;
19 |   string text = 3;
20 | }
21 | 
22 | // Represents a raw chunk of audio data,
23 | // either generated by Pipecat for playback
24 | // or to be sent to Pipecat for processing.
25 | message AudioRawFrame {
26 |   uint64 id = 1;
27 |   string name = 2;
28 |   bytes audio = 3;
29 |   uint32 sample_rate = 4;
30 |   uint32 num_channels = 5;
31 |   optional uint64 pts = 6;
32 | }
33 | 
34 | // Represents a transcribed text frame with speaker metadata.
35 | // Typically created when a participant speaks.
36 | message TranscriptionFrame {
37 |   uint64 id = 1;
38 |   string name = 2;
39 |   string text = 3;
40 |   string user_id = 4;
41 |   string timestamp = 5;
42 | }
43 | 
44 | // Wrapper for a generic message sent to or received from the transport layer.
45 | // Commonly used for RTVI protocol messages.
46 | message MessageFrame {
47 |     string data = 1;
48 | }
49 | 
50 | message Frame {
51 |   oneof frame {
52 |     TextFrame text = 1;
53 |     AudioRawFrame audio = 2;
54 |     TranscriptionFrame transcription = 3;
55 |     MessageFrame message = 4;
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/transports/websocket-transport/proto/generate_typescript.sh:
--------------------------------------------------------------------------------
1 | # Use this script to generate the typescript each time we change the frames.proto file
2 | rm -rf ./src/generated/*
3 | protoc \
4 |   --ts_out=generate_dependencies:./src/generated \
5 |   proto/frames.proto
6 | 


--------------------------------------------------------------------------------
/transports/websocket-transport/src/index.ts:
--------------------------------------------------------------------------------
1 | // export * from "./realTimeWebSocketTransport";
2 | // export * from "../../../lib/wavtools/dist/index.d.ts";
3 | 
4 | import { WavMediaManager } from "../../../lib/media-mgmt/mediaManager";
5 | import { DailyMediaManager } from "../../../lib/media-mgmt/dailyMediaManager";
6 | import { WebSocketTransport } from "./webSocketTransport.ts";
7 | 
8 | export { WavMediaManager, DailyMediaManager, WebSocketTransport };
9 | 


--------------------------------------------------------------------------------
/transports/websocket-transport/src/webSocketTransport.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |   logger,
  3 |   RTVIClientOptions,
  4 |   RTVIMessage,
  5 |   Tracks,
  6 |   Transport,
  7 |   TransportStartError,
  8 |   TransportState,
  9 | } from "@pipecat-ai/client-js";
 10 | 
 11 | import { ReconnectingWebSocket } from "../../../lib/websocket-utils/reconnectingWebSocket";
 12 | import { DailyMediaManager } from "../../../lib/media-mgmt/dailyMediaManager";
 13 | 
 14 | import { Frame } from "./generated/proto/frames";
 15 | import { MediaManager } from "../../../lib/media-mgmt/mediaManager";
 16 | 
 17 | export class WebSocketTransport extends Transport {
 18 |   declare private _ws: ReconnectingWebSocket | null;
 19 |   private static RECORDER_SAMPLE_RATE = 16_000;
 20 |   private audioQueue: ArrayBuffer[] = [];
 21 |   private _mediaManager: MediaManager;
 22 | 
 23 |   constructor() {
 24 |     super();
 25 |     this._mediaManager = new DailyMediaManager(
 26 |       true,
 27 |       true,
 28 |       undefined,
 29 |       undefined,
 30 |       512,
 31 |       WebSocketTransport.RECORDER_SAMPLE_RATE,
 32 |     );
 33 |     this._mediaManager.setUserAudioCallback(
 34 |       this.handleUserAudioStream.bind(this),
 35 |     );
 36 |     this._ws = null;
 37 |   }
 38 | 
 39 |   initialize(
 40 |     options: RTVIClientOptions,
 41 |     messageHandler: (ev: RTVIMessage) => void,
 42 |   ): void {
 43 |     this._options = options;
 44 |     this._callbacks = options.callbacks ?? {};
 45 |     this._onMessage = messageHandler;
 46 |     this._mediaManager.setRTVIOptions(options);
 47 |     this.state = "disconnected";
 48 |   }
 49 | 
 50 |   async initDevices(): Promise<void> {
 51 |     this.state = "initializing";
 52 |     await this._mediaManager.initialize();
 53 |     this.state = "initialized";
 54 |   }
 55 | 
 56 |   async connect(
 57 |     authBundle: unknown,
 58 |     abortController: AbortController,
 59 |   ): Promise<void> {
 60 |     this.state = "connecting";
 61 |     try {
 62 |       this._ws = this.initializeWebsocket(authBundle);
 63 |       await this._ws.connect();
 64 |       await this._mediaManager.connect();
 65 |       this.state = "connected";
 66 |       this._callbacks.onConnected?.();
 67 |     } catch (error) {
 68 |       const msg = `Failed to connect to websocket: ${error}`;
 69 |       logger.error(msg);
 70 |       this.state = "error";
 71 |       throw new TransportStartError(msg);
 72 |     }
 73 |   }
 74 | 
 75 |   async disconnect(): Promise<void> {
 76 |     this.state = "disconnecting";
 77 |     await this._mediaManager.disconnect();
 78 |     await this._ws?.close();
 79 |     this.state = "disconnected";
 80 |     this._callbacks.onDisconnected?.();
 81 |   }
 82 | 
 83 |   getAllMics(): Promise<MediaDeviceInfo[]> {
 84 |     return this._mediaManager.getAllMics();
 85 |   }
 86 |   getAllCams(): Promise<MediaDeviceInfo[]> {
 87 |     return this._mediaManager.getAllCams();
 88 |   }
 89 |   getAllSpeakers(): Promise<MediaDeviceInfo[]> {
 90 |     return this._mediaManager.getAllSpeakers();
 91 |   }
 92 | 
 93 |   async updateMic(micId: string): Promise<void> {
 94 |     return this._mediaManager.updateMic(micId);
 95 |   }
 96 |   updateCam(camId: string): void {
 97 |     return this._mediaManager.updateCam(camId);
 98 |   }
 99 |   updateSpeaker(speakerId: string): void {
100 |     return this._mediaManager.updateSpeaker(speakerId);
101 |   }
102 | 
103 |   get selectedMic(): MediaDeviceInfo | Record<string, never> {
104 |     return this._mediaManager.selectedMic;
105 |   }
106 |   get selectedSpeaker(): MediaDeviceInfo | Record<string, never> {
107 |     return this._mediaManager.selectedSpeaker;
108 |   }
109 | 
110 |   enableMic(enable: boolean): void {
111 |     this._mediaManager.enableMic(enable);
112 |   }
113 |   get isMicEnabled(): boolean {
114 |     return this._mediaManager.isMicEnabled;
115 |   }
116 | 
117 |   get state(): TransportState {
118 |     return this._state;
119 |   }
120 | 
121 |   set state(state: TransportState) {
122 |     if (this._state === state) return;
123 | 
124 |     this._state = state;
125 |     this._callbacks.onTransportStateChanged?.(state);
126 |   }
127 | 
128 |   get expiry(): number | undefined {
129 |     return this._expiry;
130 |   }
131 | 
132 |   tracks(): Tracks {
133 |     return this._mediaManager.tracks();
134 |   }
135 | 
136 |   initializeWebsocket(authBundle: any): ReconnectingWebSocket {
137 |     const ws = new ReconnectingWebSocket(`${authBundle.ws_url}`, undefined, {
138 |       parseBlobToJson: false,
139 |     });
140 |     // disabling the keep alive, there is no API for it inside Pipecat
141 |     ws.keepAliveInterval = 0;
142 |     ws.on("open", () => {
143 |       logger.debug("Websocket connection opened");
144 |     });
145 |     ws.on("message", async (data: Blob) => {
146 |       let arrayBuffer: ArrayBuffer = await data.arrayBuffer();
147 |       const parsedFrame = Frame.fromBinary(new Uint8Array(arrayBuffer)).frame;
148 |       if (parsedFrame.oneofKind === "audio") {
149 |         // We should be able to use parsedFrame.audio.audio.buffer but for
150 |         // some reason that contains all the bytes from the protobuf message.
151 |         const audioVector = Array.from(parsedFrame.audio.audio);
152 |         const uint8Array = new Uint8Array(audioVector);
153 |         const int16Array = new Int16Array(uint8Array.buffer);
154 |         this._mediaManager.bufferBotAudio(int16Array);
155 |       } else if (parsedFrame.oneofKind === "message") {
156 |         let jsonText = parsedFrame.message.data;
157 |         try {
158 |           let jsonMessage = JSON.parse(jsonText);
159 |           if (jsonMessage.label === "rtvi-ai") {
160 |             this._onMessage(jsonMessage as RTVIMessage);
161 |           }
162 |         } catch {
163 |           logger.warn("Failed to parse message", jsonText);
164 |         }
165 |       }
166 |     });
167 |     ws.on("error", (error: Error) => {
168 |       this.connectionError(`websocket error: ${error}`);
169 |     });
170 |     ws.on("connection-timeout", () => {
171 |       this.connectionError("websocket connection timed out");
172 |     });
173 |     ws.on("close", (code: number) => {
174 |       this.connectionError(`websocket connection closed. Code: ${code}`);
175 |     });
176 |     ws.on("reconnect-failed", () => {
177 |       this.connectionError(`websocket reconnect failed`);
178 |     });
179 |     return ws;
180 |   }
181 | 
182 |   sendReadyMessage(): void {
183 |     this.state = "ready";
184 |     this.sendMessage(RTVIMessage.clientReady());
185 |   }
186 | 
187 |   handleUserAudioStream(data: ArrayBuffer): void {
188 |     if (this.state === "ready") {
189 |       try {
190 |         void this.flushAudioQueue();
191 |         void this._sendAudioInput(data);
192 |       } catch (error) {
193 |         logger.error("Error sending audio stream to websocket:", error);
194 |         this.state = "error";
195 |       }
196 |     } else {
197 |       this.audioQueue.push(data);
198 |     }
199 |   }
200 | 
201 |   private flushAudioQueue(): void {
202 |     if (this.audioQueue.length <= 0) {
203 |       return;
204 |     }
205 |     logger.info("Will flush audio queue", this.audioQueue.length);
206 |     while (this.audioQueue.length > 0) {
207 |       const queuedData = this.audioQueue.shift();
208 |       if (queuedData) void this._sendAudioInput(queuedData);
209 |     }
210 |   }
211 | 
212 |   sendMessage(message: RTVIMessage): void {
213 |     logger.debug("Received message to send to Web Socket", message);
214 |     const frame = Frame.create({
215 |       frame: {
216 |         oneofKind: "message",
217 |         message: {
218 |           data: JSON.stringify(message),
219 |         },
220 |       },
221 |     });
222 |     void this._sendMsg(frame);
223 |   }
224 | 
225 |   async _sendAudioInput(data: ArrayBuffer): Promise<void> {
226 |     const pcmByteArray = new Uint8Array(data);
227 |     const frame = Frame.create({
228 |       frame: {
229 |         oneofKind: "audio",
230 |         audio: {
231 |           id: 0n,
232 |           name: "audio",
233 |           audio: pcmByteArray,
234 |           sampleRate: WebSocketTransport.RECORDER_SAMPLE_RATE,
235 |           numChannels: 1,
236 |         },
237 |       },
238 |     });
239 |     await this._sendMsg(frame);
240 |   }
241 | 
242 |   async _sendMsg(msg: Frame): Promise<void> {
243 |     if (!this._ws) {
244 |       logger.error("sendMsg called but WS is null");
245 |       return;
246 |     }
247 |     if (this._ws.readyState !== WebSocket.OPEN) {
248 |       logger.error("attempt to send to closed socket");
249 |       return;
250 |     }
251 |     if (!msg) {
252 |       logger.error("need a msg to send a msg");
253 |       return;
254 |     }
255 |     try {
256 |       const encodedFrame = new Uint8Array(Frame.toBinary(msg));
257 |       await this._ws.send(encodedFrame);
258 |     } catch (e) {
259 |       logger.error("sendMsg error", e);
260 |     }
261 |   }
262 | 
263 |   connectionError(errorMsg: string): void {
264 |     console.error(errorMsg);
265 |     this.state = "error";
266 |     void this.disconnect();
267 |   }
268 | 
269 |   // Not implemented
270 |   enableScreenShare(enable: boolean): void {
271 |     logger.error("startScreenShare not implemented for WebSocketTransport");
272 |     throw new Error("Not implemented");
273 |   }
274 | 
275 |   public get isSharingScreen(): boolean {
276 |     logger.error("isSharingScreen not implemented for WebSocketTransport");
277 |     return false;
278 |   }
279 | 
280 |   enableCam(enable: boolean) {
281 |     logger.error("enableCam not implemented for WebSocketTransport");
282 |     throw new Error("Not implemented");
283 |   }
284 | 
285 |   get isCamEnabled(): boolean {
286 |     logger.error("isCamEnabled not implemented for WebSocketTransport");
287 |     return false;
288 |   }
289 | 
290 |   get selectedCam(): MediaDeviceInfo | Record<string, never> {
291 |     logger.error("selectedCam not implemented for WebSocketTransport");
292 |     throw new Error("Not implemented");
293 |   }
294 | }
295 | 


--------------------------------------------------------------------------------
/transports/websocket-transport/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "module": "ESNext",
 5 |     "lib": ["ES2020", "DOM", "DOM.Iterable"],
 6 |     "types": ["node"],
 7 |     "skipLibCheck": true,
 8 |     "jsx": "preserve",
 9 | 
10 |     /* Bundler mode */
11 |     "moduleResolution": "bundler",
12 |     "allowImportingTsExtensions": true,
13 |     "allowJs": true,
14 |     "noEmit": true,
15 |     "resolveJsonModule": true,
16 |     "isolatedModules": true,
17 |     "moduleDetection": "force",
18 | 
19 |     /* Linting */
20 |     "strict": true,
21 |     "noUnusedLocals": true,
22 |     "noUnusedParameters": false,
23 |     "noFallthroughCasesInSwitch": true
24 |   },
25 |   "include": ["src"]
26 | }
27 | 


--------------------------------------------------------------------------------