├── .cargo └── config.toml ├── .gitignore ├── .ruff.toml ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── daily-python.gif ├── daily.pyi ├── demos ├── README.md ├── audio │ ├── async_wav_audio_send.py │ ├── raw_audio_receive.py │ ├── raw_audio_send.py │ ├── timed_wav_audio_receive.py │ ├── wav_audio_receive.py │ └── wav_audio_send.py ├── deepgram │ └── deepgram_text_to_speech.py ├── flask │ ├── README.md │ ├── app.py │ └── bot.py ├── google │ ├── google_speech_to_text.py │ └── google_text_to_speech.py ├── gstreamer │ └── media_player.py ├── gtk │ └── gtk_app.py ├── openai │ └── dall-e.py ├── pyaudio │ └── record_and_play.py ├── qt │ └── qt_app.py ├── recording │ ├── auto_recording.py │ ├── env.example │ └── sample.jpg ├── remote_participant_control │ └── remote_participant_control.py ├── requirements.txt ├── vad │ └── native_vad.py ├── video │ └── send_image.py └── yolo │ └── yolo.py ├── docs ├── Makefile └── src │ ├── api_reference.rst │ ├── conf.py │ ├── favicon.ico │ ├── index.rst │ └── types.rst ├── pyproject.toml ├── requirements-linux.txt ├── requirements.txt └── src ├── call_client.rs ├── call_client ├── delegate.rs ├── event.rs ├── event_handler.rs ├── live_stream.rs └── recording.rs ├── context.rs ├── lib.rs ├── media.rs ├── media ├── audio_data.rs ├── custom_audio_source.rs ├── custom_audio_track.rs ├── native_vad.rs ├── video_frame.rs ├── virtual_camera_device.rs ├── virtual_microphone_device.rs └── virtual_speaker_device.rs ├── util.rs └── util ├── dict.rs └── memory.rs /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.x86_64-unknown-linux-gnu] 2 | rustflags = ["-C", "link-args=-fuse-ld=lld"] 3 | 4 | [target.aarch64-unknown-linux-gnu] 5 | linker = "aarch64-linux-gnu-gcc" 6 | 7 | [target.aarch64-apple-darwin] 8 | rustflags = [ 9 | "-C", "link-arg=-undefined", 10 | "-C", "link-arg=dynamic_lookup", 11 | "-C", "link-arg=-ObjC", 12 | ] 13 | 14 | [target.x86_64-apple-darwin] 15 | rustflags = [ 16 | "-C", "link-arg=-undefined", 17 | "-C", "link-arg=dynamic_lookup", 18 | "-C", "link-arg=-ObjC", 19 | ] 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | .pytest_cache/ 6 | *.py[cod] 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | .venv/ 14 | env/ 15 | bin/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | include/ 26 | man/ 27 | venv/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | pip-selfcheck.json 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | 45 | # Translations 46 | *.mo 47 | 48 | # Mr Developer 49 | .mr.developer.cfg 50 | .project 51 | .pydevproject 52 | 53 | # Rope 54 | .ropeproject 55 | 56 | # Django stuff: 57 | *.log 58 | *.pot 59 | 60 | .DS_Store 61 | 62 | # Sphinx documentation 63 | docs/_build/ 64 | 65 | # PyCharm 66 | .idea/ 67 | 68 | # VSCode 69 | .vscode/ 70 | 71 | # Pyenv 72 | .python-version -------------------------------------------------------------------------------- /.ruff.toml: -------------------------------------------------------------------------------- 1 | line-length=100 -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "daily-python" 3 | version = "0.19.1" 4 | edition = "2021" 5 | license = "BSD 2-Clause License" 6 | resolver = "2" 7 | 8 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 9 | [lib] 10 | name = "daily" 11 | crate-type = ["cdylib"] 12 | 13 | [dependencies] 14 | pyo3 = { version = "0.22.6", features = ["abi3-py37", "py-clone"] } 15 | daily-core = { path = "../daily-core" } 16 | futures = { version = "0.3.31", default-features = false, features = [ 17 | "std", 18 | "executor", 19 | ] } 20 | lazy_static = "1.5.0" 21 | libc = "0.2.171" 22 | serde = { version = "1.0.219", default-features = false, features = ["derive"] } 23 | serde_json = "1.0.140" 24 | tracing = "0.1.41" 25 | uuid = { version = "1.16.0", default-features = false, features = [ 26 | "serde", 27 | "v4", 28 | ] } 29 | webrtc-daily = { path = "../webrtc-daily" } 30 | 31 | # You might be wondering why do we need this, since `daily-python` does not 32 | # build for WASM. 33 | # Cargo's crate resolve will check the entire dependency tree regardless of the 34 | # actual build target, and thus will fail to find a `wasm-bindgen` with the 35 | # features we need, since they are not upstreamed yet. 36 | # Because daily-python is not a member of the larger `daily-x` workspace, it 37 | # does not know of the patches that we apply on wasm-bindgen there, so we have 38 | # to tell Cargo again that we're patching all the wasm-bindgen crates. 39 | [patch.crates-io] 40 | wasm-bindgen = { git = "https://github.com/daily-co/wasm-bindgen", branch = "daily-co" } 41 | wasm-bindgen-futures = { git = "https://github.com/daily-co/wasm-bindgen", branch = "daily-co" } 42 | js-sys = { git = "https://github.com/daily-co/wasm-bindgen", branch = "daily-co" } 43 | web-sys = { git = "https://github.com/daily-co/wasm-bindgen", branch = "daily-co" } 44 | 45 | [lints.clippy] 46 | # Fix clippy issue bcause of older pyo3 < 0.23.x. 47 | useless_conversion = "allow" 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2023, Daily 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![PyPI](https://img.shields.io/pypi/v/daily-python)](https://pypi.org/project/daily-python) 2 | [![Docs](https://img.shields.io/badge/API-docs-00CC00)](https://reference-python.daily.co/) 3 | 4 | # 🐍 Daily Client SDK for Python 5 | 6 | > **Looking to develop voice and video agents?** 7 | > 8 | > Check out our voice and multimodal conversational AI framework [Pipecat](https://github.com/pipecat-ai/pipecat), which has excellent support for Daily and already uses this Python SDK. 9 | 10 | The Daily client SDK for Python allows you to build video and audio calling into your native desktop and server applications. 11 | 12 | The SDK's core features include: 13 | 14 | - Joining a Daily meeting as a participant 15 | - As a meeting participant, configuring inputs, publishing, and subscription settings 16 | - Updating remote participant permissions 17 | - Receiving video and/or audio from other participants 18 | - Sending video and/or audio into a meeting 19 | - Starting and stopping recordings and live streams 20 | 21 | ## 📚 Documentation 22 | 23 | See the [Daily Python API docs](https://reference-python.daily.co/index.html). 24 | 25 | For demos on how to use `daily-python`, refer to the [demos](https://github.com/daily-co/daily-python/tree/main/demos) directory. 26 | 27 | ## 📋 Requirements 28 | 29 | - Python 3.7 or newer 30 | - glibc 2.28 or newer 31 | 32 | ## 🛠️ Installation 33 | 34 | `daily-python` can be easily installed using `pip`: 35 | 36 | ```bash 37 | pip install daily-python 38 | ``` 39 | 40 | To upgrade: 41 | 42 | ```bash 43 | pip install -U daily-python 44 | ``` 45 | 46 | ## 🛟 Getting help 47 | 48 | ➡️ [Join our Discord](https://discord.gg/dailyco) 49 | 50 | ➡️ [Getting start guide](https://docs.daily.co/reference/daily-python) 51 | -------------------------------------------------------------------------------- /daily-python.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/daily-python/1f354bcb7fe7201e9c274da95f2f7e17c649cb36/daily-python.gif -------------------------------------------------------------------------------- /demos/README.md: -------------------------------------------------------------------------------- 1 | # daily-python demos 2 | 3 | Here you can find a few demos that use Daily's Python SDK: 4 | 5 | - **audio**: Examples on how to send and receive RAW audio or WAV files. 6 | - **deepgram**: An example showing how to use Deepgram [Text-To-Speech](https://developers.deepgram.com/docs/text-to-speech) API. 7 | - **flask**: A demo that uses [Flask](https://flask.palletsprojects.com/) and [Celery](https://docs.celeryq.dev/) to launch multiple concurrent audio bots. 8 | - **google**: Audio examples using Google [Speech-To-Text](https://cloud.google.com/speech-to-text) and [Text-To-Speech](https://cloud.google.com/text-to-speech) APIs. 9 | - **gstreamer**: A media player based on [GStreamer](https://gstreamer.freedesktop.org/) that sends a video file into a meeting. 10 | - **gtk**: A native [Gtk](https://www.gtk.org/) application that shows how to receive and render video frames for a participant. 11 | - **openai**: A demo that takes spoken audio, converts it to text prompt, and uses [DALL-E](https://openai.com/dall-e) to generate an image. 12 | - **pyaudio**: A demo that shows how to use [PyAudio](https://www.qt.io/qt-for-python) to record and play audio with real microphones and speakers. 13 | - **qt**: A native [Qt](https://www.qt.io/qt-for-python) application that shows how to receive and render video frames for a participant. 14 | - **vad**: Voice Activity Detection (VAD) examples. 15 | - **video**: Examples on how to send and receive video or images. 16 | - **yolo**: A demo that detects objects in a participant's video feed using [YOLOv5](https://pypi.org/project/yolov5/). 17 | 18 | # Running 19 | 20 | The demos have a few Python dependecies. To keep things clean, it's always a 21 | good idea to use a virtual environment: 22 | 23 | ``` 24 | python3 -m venv .venv 25 | source .venv/bin/activate 26 | ``` 27 | 28 | Once the virtual environment is activated you can install the dependencies via 29 | `pip`: 30 | 31 | ``` 32 | pip3 install -r requirements.txt 33 | ``` 34 | 35 | ℹ️ `daily-python` is not included in the `requirements.txt` file so you need to 36 | install it manually: 37 | 38 | ``` 39 | pip3 install daily-python 40 | ``` 41 | 42 | ⚠️ It's possible that some requirements fail to install because of missing system 43 | dependecies (e.g. `PyAudio` depends on the `portaudio` library). In those cases, 44 | it is necessary to install those dependencies manually (error messages might 45 | give hints on what system libraries are missing). Another alternative is to 46 | remove the conflicting dependecies from `requirements.txt`. 47 | 48 | Finally, view the demo files for more details, including how to run them. 49 | -------------------------------------------------------------------------------- /demos/audio/async_wav_audio_send.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and send the audio from a WAV file into 3 | # the meeting. It uses the asyncio library. 4 | # 5 | # Usage: python3 async_wav_audio_send.py -m MEETING_URL -i FILE.wav 6 | # 7 | 8 | import asyncio 9 | import argparse 10 | import signal 11 | import wave 12 | 13 | from daily import * 14 | 15 | SAMPLE_RATE = 16000 16 | NUM_CHANNELS = 1 17 | 18 | 19 | class AsyncSendWavApp: 20 | def __init__(self, input_file_name, sample_rate, num_channels): 21 | self.__mic_device = Daily.create_microphone_device( 22 | "my-mic", 23 | sample_rate=sample_rate, 24 | channels=num_channels, 25 | non_blocking=True, 26 | ) 27 | 28 | self.__client = CallClient() 29 | 30 | self.__client.update_subscription_profiles( 31 | {"base": {"camera": "unsubscribed", "microphone": "unsubscribed"}} 32 | ) 33 | 34 | self.__app_error = None 35 | 36 | self.__start_event = asyncio.Event() 37 | self.__task = asyncio.get_running_loop().create_task(self.send_wav_file(input_file_name)) 38 | 39 | async def run(self, meeting_url): 40 | (data, error) = await self.join(meeting_url) 41 | 42 | if error: 43 | print(f"Unable to join meeting: {error}") 44 | self.__app_error = error 45 | 46 | self.__start_event.set() 47 | 48 | await self.__task 49 | 50 | async def join(self, meeting_url): 51 | future = asyncio.get_running_loop().create_future() 52 | 53 | def join_completion(data, error): 54 | future.get_loop().call_soon_threadsafe(future.set_result, (data, error)) 55 | 56 | self.__client.join( 57 | meeting_url, 58 | client_settings={ 59 | "inputs": { 60 | "camera": False, 61 | "microphone": {"isEnabled": True, "settings": {"deviceId": "my-mic"}}, 62 | } 63 | }, 64 | completion=join_completion, 65 | ) 66 | 67 | return await future 68 | 69 | async def leave(self): 70 | future = asyncio.get_running_loop().create_future() 71 | 72 | def leave_completion(error): 73 | future.get_loop().call_soon_threadsafe(future.set_result, error) 74 | 75 | self.__client.leave(completion=leave_completion) 76 | 77 | await future 78 | 79 | self.__client.release() 80 | 81 | self.__task.cancel() 82 | await self.__task 83 | 84 | async def write_frames(self, frames): 85 | future = asyncio.get_running_loop().create_future() 86 | 87 | def write_completion(count): 88 | future.get_loop().call_soon_threadsafe(future.set_result, count) 89 | 90 | self.__mic_device.write_frames(frames, completion=write_completion) 91 | 92 | await future 93 | 94 | async def send_wav_file(self, file_name): 95 | await self.__start_event.wait() 96 | 97 | if self.__app_error: 98 | print(f"Unable to send WAV file!") 99 | return 100 | 101 | try: 102 | wav = wave.open(file_name, "rb") 103 | 104 | sent_frames = 0 105 | total_frames = wav.getnframes() 106 | sample_rate = wav.getframerate() 107 | while sent_frames < total_frames: 108 | # Read 100ms worth of audio frames. 109 | frames = wav.readframes(int(sample_rate / 10)) 110 | if len(frames) > 0: 111 | await self.write_frames(frames) 112 | sent_frames += sample_rate / 10 113 | except asyncio.CancelledError: 114 | pass 115 | 116 | 117 | async def sig_handler(app): 118 | print("Ctrl-C detected. Exiting!") 119 | await app.leave() 120 | 121 | 122 | async def main(): 123 | parser = argparse.ArgumentParser() 124 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 125 | parser.add_argument("-i", "--input", required=True, help="WAV input file") 126 | parser.add_argument( 127 | "-c", "--channels", type=int, default=NUM_CHANNELS, help="Number of channels" 128 | ) 129 | parser.add_argument("-r", "--rate", type=int, default=SAMPLE_RATE, help="Sample rate") 130 | 131 | args = parser.parse_args() 132 | 133 | Daily.init() 134 | 135 | app = AsyncSendWavApp(args.input, args.rate, args.channels) 136 | 137 | loop = asyncio.get_running_loop() 138 | 139 | loop.add_signal_handler(signal.SIGINT, lambda *args: asyncio.create_task(sig_handler(app))) 140 | 141 | await app.run(args.meeting) 142 | 143 | 144 | if __name__ == "__main__": 145 | asyncio.run(main()) 146 | -------------------------------------------------------------------------------- /demos/audio/raw_audio_receive.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and record the meeting audio into standard 3 | # output. The recorded audio format has 16-bit per sample. 4 | # 5 | # Usage: python3 raw_audio_receive.py -m MEETING_URL > FILE.raw 6 | # 7 | # The following example shows how to send back the recorded audio using a 8 | # GStreamer pipeline and raw_audio_send.py: 9 | # 10 | # gst-launch-1.0 -q filesrc location=FILE.raw ! \ 11 | # rawaudioparse num-channels=1 pcm-format=s16le sample-rate=16000 ! \ 12 | # fdsink fd=1 sync=true | python3 raw_audio_send.py -m MEETING_URL 13 | # 14 | 15 | import argparse 16 | import sys 17 | import threading 18 | 19 | from daily import * 20 | 21 | SAMPLE_RATE = 16000 22 | NUM_CHANNELS = 1 23 | 24 | 25 | class ReceiveAudioApp: 26 | def __init__(self, sample_rate, num_channels): 27 | self.__sample_rate = sample_rate 28 | 29 | self.__speaker_device = Daily.create_speaker_device( 30 | "my-speaker", sample_rate=sample_rate, channels=num_channels 31 | ) 32 | Daily.select_speaker_device("my-speaker") 33 | 34 | self.__client = CallClient() 35 | self.__client.update_subscription_profiles( 36 | {"base": {"camera": "unsubscribed", "microphone": "subscribed"}} 37 | ) 38 | 39 | self.__app_quit = False 40 | self.__app_error = None 41 | 42 | self.__start_event = threading.Event() 43 | self.__thread = threading.Thread(target=self.receive_audio) 44 | self.__thread.start() 45 | 46 | def on_joined(self, data, error): 47 | if error: 48 | print(f"Unable to join meeting: {error}") 49 | self.__app_error = error 50 | self.__start_event.set() 51 | 52 | def run(self, meeting_url): 53 | self.__client.join(meeting_url, completion=self.on_joined) 54 | self.__thread.join() 55 | 56 | def leave(self): 57 | self.__app_quit = True 58 | self.__thread.join() 59 | self.__client.leave() 60 | self.__client.release() 61 | 62 | def receive_audio(self): 63 | self.__start_event.wait() 64 | 65 | if self.__app_error: 66 | print(f"Unable to receive audio!") 67 | return 68 | 69 | while not self.__app_quit: 70 | # Read 100ms worth of audio frames. 71 | buffer = self.__speaker_device.read_frames(int(self.__sample_rate / 10)) 72 | if len(buffer) > 0: 73 | sys.stdout.buffer.write(buffer) 74 | 75 | 76 | def main(): 77 | parser = argparse.ArgumentParser() 78 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 79 | parser.add_argument( 80 | "-c", "--channels", type=int, default=NUM_CHANNELS, help="Number of channels" 81 | ) 82 | parser.add_argument("-r", "--rate", type=int, default=SAMPLE_RATE, help="Sample rate") 83 | 84 | args = parser.parse_args() 85 | 86 | Daily.init() 87 | 88 | app = ReceiveAudioApp(args.rate, args.channels) 89 | 90 | try: 91 | app.run(args.meeting) 92 | except KeyboardInterrupt: 93 | print("Ctrl-C detected. Exiting!", file=sys.stderr) 94 | finally: 95 | app.leave() 96 | 97 | 98 | if __name__ == "__main__": 99 | main() 100 | -------------------------------------------------------------------------------- /demos/audio/raw_audio_send.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and send raw audio received through the 3 | # standard input. The audio format is required to have 16-bit per sample. 4 | # 5 | # Usage: python3 raw_audio_send.py -m MEETING_URL 6 | # 7 | # The following example sends audio from a GStreamer pipeline: 8 | # 9 | # gst-launch-1.0 -q audiotestsrc is-live=true samplesperbuffer=160 ! \ 10 | # audio/x-raw,rate=16000,channels=1,format=S16LE ! \ 11 | # fdsink fd=1 sync=true | python3 raw_audio_send.py -m MEETING_URL 12 | # 13 | 14 | import argparse 15 | import sys 16 | import threading 17 | 18 | from daily import * 19 | 20 | 21 | SAMPLE_RATE = 16000 22 | NUM_CHANNELS = 1 23 | BYTES_PER_SAMPLE = 2 24 | 25 | 26 | class SendAudioApp: 27 | def __init__(self, sample_rate, num_channels): 28 | self.__sample_rate = sample_rate 29 | self.__num_channels = num_channels 30 | 31 | self.__mic_device = Daily.create_microphone_device( 32 | "my-mic", sample_rate=sample_rate, channels=num_channels 33 | ) 34 | 35 | self.__client = CallClient() 36 | 37 | self.__client.update_subscription_profiles( 38 | {"base": {"camera": "unsubscribed", "microphone": "unsubscribed"}} 39 | ) 40 | 41 | self.__app_quit = False 42 | self.__app_error = None 43 | 44 | self.__start_event = threading.Event() 45 | self.__thread = threading.Thread(target=self.send_raw_audio) 46 | self.__thread.start() 47 | 48 | def on_joined(self, data, error): 49 | if error: 50 | print(f"Unable to join meeting: {error}") 51 | self.__app_error = error 52 | self.__start_event.set() 53 | 54 | def run(self, meeting_url): 55 | self.__client.join( 56 | meeting_url, 57 | client_settings={ 58 | "inputs": { 59 | "camera": False, 60 | "microphone": {"isEnabled": True, "settings": {"deviceId": "my-mic"}}, 61 | } 62 | }, 63 | completion=self.on_joined, 64 | ) 65 | self.__thread.join() 66 | 67 | def leave(self): 68 | self.__app_quit = True 69 | self.__thread.join() 70 | self.__client.leave() 71 | self.__client.release() 72 | 73 | def send_raw_audio(self): 74 | self.__start_event.wait() 75 | 76 | if self.__app_error: 77 | print(f"Unable to send audio!") 78 | return 79 | 80 | while not self.__app_quit: 81 | num_bytes = int(self.__sample_rate / 10) * self.__num_channels * BYTES_PER_SAMPLE 82 | buffer = sys.stdin.buffer.read(num_bytes) 83 | if buffer: 84 | self.__mic_device.write_frames(buffer) 85 | 86 | 87 | def main(): 88 | parser = argparse.ArgumentParser() 89 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 90 | parser.add_argument( 91 | "-c", "--channels", type=int, default=NUM_CHANNELS, help="Number of channels" 92 | ) 93 | parser.add_argument("-r", "--rate", type=int, default=SAMPLE_RATE, help="Sample rate") 94 | 95 | args = parser.parse_args() 96 | 97 | Daily.init() 98 | 99 | app = SendAudioApp(args.rate, args.channels) 100 | 101 | try: 102 | app.run(args.meeting) 103 | except KeyboardInterrupt: 104 | print("Ctrl-C detected. Exiting!") 105 | finally: 106 | app.leave() 107 | 108 | 109 | if __name__ == "__main__": 110 | main() 111 | -------------------------------------------------------------------------------- /demos/audio/timed_wav_audio_receive.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and record the meeting audio into a WAV 3 | # for the given number of seconds (defaults to 10). The saved WAV file will have 4 | # a sample rate of 16000, 16-bit per sample and mono audio channel. 5 | # 6 | # Usage: python3 timed_wav_audio_receive.py -m MEETING_URL -o FILE.wav [-s SECONDS] 7 | # 8 | 9 | import argparse 10 | import threading 11 | import time 12 | import sys 13 | import wave 14 | 15 | from daily import * 16 | 17 | SAMPLE_RATE = 16000 18 | BYTES_PER_SAMPLE = 2 19 | NUM_CHANNELS = 1 20 | 21 | 22 | class TimedReceiveWavApp(EventHandler): 23 | def __init__(self, output_file_name, sample_rate, num_channels, seconds): 24 | self.__output_file_name = output_file_name 25 | self.__seconds = seconds 26 | self.__sample_rate = sample_rate 27 | self.__num_channels = num_channels 28 | self.__speaker_device = Daily.create_speaker_device( 29 | "my-speaker", sample_rate=sample_rate, channels=num_channels, non_blocking=True 30 | ) 31 | Daily.select_speaker_device("my-speaker") 32 | 33 | self.__client = CallClient(event_handler=self) 34 | self.__client.update_subscription_profiles( 35 | {"base": {"camera": "unsubscribed", "microphone": "subscribed"}} 36 | ) 37 | 38 | self.__app_quit = False 39 | self.__app_error = None 40 | 41 | self.__start_event = threading.Event() 42 | self.__thread = threading.Thread(target=self.receive_audio) 43 | self.__thread.start() 44 | 45 | def on_participant_updated(self, participant): 46 | if participant["info"]["isLocal"]: 47 | return 48 | if participant["media"]["microphone"]["state"] == "playable": 49 | self.__start_event.set() 50 | 51 | def run(self, meeting_url): 52 | self.__client.join(meeting_url) 53 | self.__thread.join() 54 | 55 | def leave(self): 56 | self.__thread.join() 57 | self.__client.leave() 58 | self.__client.release() 59 | 60 | def write_buffer_to_wav(self, buffer): 61 | with wave.open(self.__output_file_name, "wb") as wav_file: 62 | wav_file.setnchannels(self.__num_channels) 63 | wav_file.setsampwidth(BYTES_PER_SAMPLE) 64 | wav_file.setframerate(self.__sample_rate) 65 | wav_file.writeframes(buffer) 66 | self.__app_quit = True 67 | print("done") 68 | 69 | def receive_audio(self): 70 | print(f"waiting for a playable track") 71 | 72 | self.__start_event.wait() 73 | 74 | if self.__app_error: 75 | print("Unable to receive audio!") 76 | return 77 | 78 | print(f"buffering for {self.__seconds} seconds", end="") 79 | 80 | self.__speaker_device.read_frames( 81 | self.__sample_rate * self.__seconds, 82 | completion=lambda buffer: self.write_buffer_to_wav(buffer), 83 | ) 84 | 85 | while not self.__app_quit: 86 | print(".", end="") 87 | sys.stdout.flush() 88 | time.sleep(0.2) 89 | 90 | 91 | def main(): 92 | parser = argparse.ArgumentParser() 93 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 94 | parser.add_argument("-o", "--output", required=True, help="WAV output file") 95 | parser.add_argument( 96 | "-s", 97 | "--seconds", 98 | type=int, 99 | default=10, 100 | required=False, 101 | help="Number of seconds (default: 10)", 102 | ) 103 | parser.add_argument( 104 | "-c", "--channels", type=int, default=NUM_CHANNELS, help="Number of channels" 105 | ) 106 | parser.add_argument("-r", "--rate", type=int, default=SAMPLE_RATE, help="Sample rate") 107 | args = parser.parse_args() 108 | 109 | Daily.init() 110 | 111 | app = TimedReceiveWavApp(args.output, args.rate, args.channels, args.seconds) 112 | 113 | try: 114 | app.run(args.meeting) 115 | except KeyboardInterrupt: 116 | print("Ctrl-C detected. Exiting!") 117 | finally: 118 | app.leave() 119 | 120 | 121 | if __name__ == "__main__": 122 | main() 123 | -------------------------------------------------------------------------------- /demos/audio/wav_audio_receive.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and record the meeting audio into a 3 | # WAV. 4 | # 5 | # Usage: python3 wav_audio_receive.py -m MEETING_URL -o FILE.wav 6 | # 7 | 8 | import argparse 9 | import threading 10 | import wave 11 | 12 | from daily import * 13 | 14 | 15 | SAMPLE_RATE = 16000 16 | NUM_CHANNELS = 1 17 | 18 | 19 | class ReceiveWavApp: 20 | def __init__(self, input_file_name, sample_rate, num_channels): 21 | self.__sample_rate = sample_rate 22 | self.__speaker_device = Daily.create_speaker_device( 23 | "my-speaker", sample_rate=sample_rate, channels=num_channels 24 | ) 25 | Daily.select_speaker_device("my-speaker") 26 | 27 | self.__wave = wave.open(input_file_name, "wb") 28 | self.__wave.setnchannels(num_channels) 29 | self.__wave.setsampwidth(2) # 16-bit LINEAR PCM 30 | self.__wave.setframerate(sample_rate) 31 | 32 | self.__client = CallClient() 33 | self.__client.update_subscription_profiles( 34 | {"base": {"camera": "unsubscribed", "microphone": "subscribed"}} 35 | ) 36 | 37 | self.__app_quit = False 38 | self.__app_error = None 39 | 40 | self.__start_event = threading.Event() 41 | self.__thread = threading.Thread(target=self.receive_audio) 42 | self.__thread.start() 43 | 44 | def on_joined(self, data, error): 45 | if error: 46 | print(f"Unable to join meeting: {error}") 47 | self.__app_error = error 48 | self.__start_event.set() 49 | 50 | def run(self, meeting_url): 51 | self.__client.join(meeting_url, completion=self.on_joined) 52 | self.__thread.join() 53 | 54 | def leave(self): 55 | self.__app_quit = True 56 | self.__thread.join() 57 | self.__client.leave() 58 | self.__client.release() 59 | 60 | def receive_audio(self): 61 | self.__start_event.wait() 62 | 63 | if self.__app_error: 64 | print(f"Unable to receive audio!") 65 | return 66 | 67 | while not self.__app_quit: 68 | # Read 100ms worth of audio frames. 69 | buffer = self.__speaker_device.read_frames(int(self.__sample_rate / 10)) 70 | if len(buffer) > 0: 71 | self.__wave.writeframes(buffer) 72 | 73 | self.__wave.close() 74 | 75 | 76 | def main(): 77 | parser = argparse.ArgumentParser() 78 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 79 | parser.add_argument( 80 | "-c", "--channels", type=int, default=NUM_CHANNELS, help="Number of channels" 81 | ) 82 | parser.add_argument("-r", "--rate", type=int, default=SAMPLE_RATE, help="Sample rate") 83 | parser.add_argument("-o", "--output", required=True, help="WAV output file") 84 | args = parser.parse_args() 85 | 86 | Daily.init() 87 | 88 | app = ReceiveWavApp(args.output, args.rate, args.channels) 89 | 90 | try: 91 | app.run(args.meeting) 92 | except KeyboardInterrupt: 93 | print("Ctrl-C detected. Exiting!") 94 | finally: 95 | app.leave() 96 | 97 | 98 | if __name__ == "__main__": 99 | main() 100 | -------------------------------------------------------------------------------- /demos/audio/wav_audio_send.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and send the audio from a WAV file into 3 | # the meeting. 4 | # 5 | # Usage: python3 wav_audio_send.py -m MEETING_URL -i FILE.wav 6 | # 7 | 8 | import argparse 9 | import threading 10 | import wave 11 | 12 | from daily import * 13 | 14 | SAMPLE_RATE = 16000 15 | NUM_CHANNELS = 1 16 | 17 | 18 | class SendWavApp: 19 | def __init__(self, input_file_name, sample_rate, num_channels): 20 | self.__mic_device = Daily.create_microphone_device( 21 | "my-mic", sample_rate=sample_rate, channels=num_channels 22 | ) 23 | 24 | self.__client = CallClient() 25 | 26 | self.__client.update_subscription_profiles( 27 | {"base": {"camera": "unsubscribed", "microphone": "unsubscribed"}} 28 | ) 29 | 30 | self.__app_quit = False 31 | self.__app_error = None 32 | 33 | self.__start_event = threading.Event() 34 | self.__thread = threading.Thread(target=self.send_wav_file, args=[input_file_name]) 35 | self.__thread.start() 36 | 37 | def on_joined(self, data, error): 38 | if error: 39 | print(f"Unable to join meeting: {error}") 40 | self.__app_error = error 41 | self.__start_event.set() 42 | 43 | def run(self, meeting_url): 44 | self.__client.join( 45 | meeting_url, 46 | client_settings={ 47 | "inputs": { 48 | "camera": False, 49 | "microphone": {"isEnabled": True, "settings": {"deviceId": "my-mic"}}, 50 | } 51 | }, 52 | completion=self.on_joined, 53 | ) 54 | self.__thread.join() 55 | 56 | def leave(self): 57 | self.__app_quit = True 58 | self.__thread.join() 59 | self.__client.leave() 60 | self.__client.release() 61 | 62 | def send_wav_file(self, file_name): 63 | self.__start_event.wait() 64 | 65 | if self.__app_error: 66 | print(f"Unable to send WAV file!") 67 | return 68 | 69 | wav = wave.open(file_name, "rb") 70 | 71 | sent_frames = 0 72 | total_frames = wav.getnframes() 73 | sample_rate = wav.getframerate() 74 | while not self.__app_quit and sent_frames < total_frames: 75 | # Read 100ms worth of audio frames. 76 | frames = wav.readframes(int(sample_rate / 10)) 77 | if len(frames) > 0: 78 | self.__mic_device.write_frames(frames) 79 | sent_frames += sample_rate / 10 80 | 81 | 82 | def main(): 83 | parser = argparse.ArgumentParser() 84 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 85 | parser.add_argument("-i", "--input", required=True, help="WAV input file") 86 | parser.add_argument( 87 | "-c", "--channels", type=int, default=NUM_CHANNELS, help="Number of channels" 88 | ) 89 | parser.add_argument("-r", "--rate", type=int, default=SAMPLE_RATE, help="Sample rate") 90 | 91 | args = parser.parse_args() 92 | 93 | Daily.init() 94 | 95 | app = SendWavApp(args.input, args.rate, args.channels) 96 | 97 | try: 98 | app.run(args.meeting) 99 | except KeyboardInterrupt: 100 | print("Ctrl-C detected. Exiting!") 101 | finally: 102 | app.leave() 103 | 104 | 105 | if __name__ == "__main__": 106 | main() 107 | -------------------------------------------------------------------------------- /demos/deepgram/deepgram_text_to_speech.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and, given a text file with senteces (one 3 | # per line), will translate text into audio using Deepgram's Text-To-Speech API 4 | # and will send it into the meeting. 5 | # 6 | # The demo requires a Deepgram API key set in the DG_API_KEY environment variable. 7 | # 8 | # See https://developers.deepgram.com/docs/text-to-speech 9 | # 10 | # Usage: python3 deepgram_speech_to_text.py -m MEETING_URL -i FILE 11 | # 12 | 13 | import argparse 14 | import os 15 | import time 16 | 17 | from daily import * 18 | from deepgram import ( 19 | DeepgramClient, 20 | SpeakOptions, 21 | ) 22 | 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 25 | parser.add_argument("-i", "--input", required=True, help="File with sentences (one per line)") 26 | args = parser.parse_args() 27 | 28 | Daily.init() 29 | 30 | # We create a virtual microphone device so we can read audio samples from the 31 | # meeting. 32 | microphone = Daily.create_microphone_device("my-mic", sample_rate=16000, channels=1) 33 | 34 | client = CallClient() 35 | 36 | print() 37 | print(f"Joining {args.meeting} ...") 38 | 39 | # Join and tell our call client that we will be using our new virtual 40 | # microphone. 41 | client.join( 42 | args.meeting, 43 | client_settings={ 44 | "inputs": {"microphone": {"isEnabled": True, "settings": {"deviceId": "my-mic"}}} 45 | }, 46 | ) 47 | 48 | # Make sure we are joined. It would be better to use join() completion 49 | # callback. 50 | time.sleep(3) 51 | 52 | sentences_file = open(args.input, "r") 53 | 54 | deepgram = DeepgramClient(api_key=os.getenv("DG_API_KEY")) 55 | 56 | speak_options = SpeakOptions( 57 | model="aura-asteria-en", encoding="linear16", sample_rate="16000", container="none" 58 | ) 59 | 60 | print() 61 | 62 | for sentence in sentences_file.readlines(): 63 | print(f"Processing: {sentence.strip()}") 64 | print() 65 | 66 | speak_source = {"text": sentence.strip()} 67 | 68 | response = deepgram.speak.rest.v("1").stream_raw(speak_source, speak_options) 69 | 70 | # Send all the audio frames to the microphone. 71 | microphone.write_frames(response.read()) 72 | 73 | # Let everything finish 74 | time.sleep(2) 75 | 76 | client.leave() 77 | client.release() 78 | -------------------------------------------------------------------------------- /demos/flask/README.md: -------------------------------------------------------------------------------- 1 | # daily-python Flask/Celery demo 2 | 3 | This is a Flask application that allows you to connect bots to a Daily meeting 4 | by making POST requests to a URL. The bots will synthesize sentences to audio 5 | into the meeting. 6 | 7 | # Dependencies 8 | 9 | The application needs Flask, Celery and Redis Python packages installed. It also 10 | assumes you have a local running Redis server. 11 | 12 | To install the Python packages you can type: 13 | 14 | ```bash 15 | pip3 install flask celery redis 16 | ``` 17 | 18 | Installing a Redis server might be specific to your operating system. 19 | 20 | # Usage 21 | 22 | Once all the dependencies are installed we will first run the Celery worker in 23 | one terminal: 24 | 25 | ```bash 26 | celery -A app.celery worker --loglevel INFO 27 | ``` 28 | 29 | Then we will run the Flask application: 30 | 31 | ```bash 32 | flask run 33 | ``` 34 | 35 | # Making requests 36 | 37 | The body of the request is a JSON object with the following fields: 38 | 39 | ```json 40 | { 41 | "bot_name": "BOT_NAME", 42 | "meeting_url": "DAILY_MEETING_URL" 43 | } 44 | ``` 45 | 46 | We can easily make a request with `curl`: 47 | 48 | ```bash 49 | curl -d '{"bot_name": "BOT_NAME", "meeting_url":"DAILY_MEETING_URL"}' -H "Content-Type: application/json" -X POST http://localhost:5000 50 | ``` 51 | 52 | This will be received by the Flask application and a new process will be 53 | created. 54 | -------------------------------------------------------------------------------- /demos/flask/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request 2 | 3 | from celery import Celery, Task 4 | 5 | from billiard.context import Process 6 | 7 | from bot import start_bot 8 | 9 | 10 | def celery_init_app(app: Flask) -> Celery: 11 | class FlaskTask(Task): 12 | def __call__(self, *args: object, **kwargs: object) -> object: 13 | with app.app_context(): 14 | return self.run(*args, **kwargs) 15 | 16 | celery_app = Celery(app.name, task_cls=FlaskTask) 17 | celery_app.config_from_object(app.config["CELERY"]) 18 | celery_app.set_default() 19 | app.extensions["celery"] = celery_app 20 | return celery_app 21 | 22 | 23 | app = Flask(__name__) 24 | app.config.from_mapping( 25 | CELERY=dict( 26 | broker_url="redis://localhost", 27 | result_backend="redis://localhost", 28 | task_ignore_result=True, 29 | ), 30 | ) 31 | app.config.from_prefixed_env() 32 | 33 | celery = celery_init_app(app) 34 | 35 | 36 | @celery.task 37 | def create_bot(bot_name, meeting_url): 38 | process = Process(target=start_bot, args=(bot_name, meeting_url)) 39 | process.start() 40 | process.join() 41 | 42 | 43 | @app.route("/", methods=["POST"]) 44 | def new_bot(): 45 | content = request.get_json(silent=True) 46 | bot_name = content["bot_name"] 47 | meeting_url = content["meeting_url"] 48 | create_bot.delay(bot_name, meeting_url) 49 | return "" 50 | -------------------------------------------------------------------------------- /demos/flask/bot.py: -------------------------------------------------------------------------------- 1 | import io 2 | import threading 3 | 4 | from daily import * 5 | 6 | from google.cloud import texttospeech 7 | 8 | voice = texttospeech.VoiceSelectionParams(language_code="en-US", name="en-US-Studio-M") 9 | 10 | audio_config = texttospeech.AudioConfig( 11 | audio_encoding=texttospeech.AudioEncoding.LINEAR16, speaking_rate=1.0, sample_rate_hertz=16000 12 | ) 13 | 14 | 15 | class Bot: 16 | def __init__(self, name, microphone): 17 | self.__name = name 18 | 19 | self.__speech_client = texttospeech.TextToSpeechClient() 20 | 21 | self.__call_client = CallClient() 22 | 23 | self.__bot_error = None 24 | 25 | self.__start_event = threading.Event() 26 | self.__thread = threading.Thread(target=self.send_audio, args=[microphone]) 27 | self.__thread.start() 28 | 29 | def on_joined(self, data, error): 30 | if error: 31 | print(f"Unable to join meeting: {error}") 32 | self.__bot_error = error 33 | self.__start_event.set() 34 | 35 | def run(self, meeting_url): 36 | self.__call_client.join( 37 | meeting_url, 38 | client_settings={ 39 | "inputs": { 40 | "camera": False, 41 | "microphone": {"isEnabled": True, "settings": {"deviceId": "my-mic"}}, 42 | } 43 | }, 44 | completion=self.on_joined, 45 | ) 46 | self.__thread.join() 47 | 48 | def leave(self): 49 | self.__call_client.leave() 50 | self.__call_client.release() 51 | 52 | def send_audio(self, microphone): 53 | self.__start_event.wait() 54 | 55 | if self.__bot_error: 56 | print(f"Unable to send audio!") 57 | return 58 | 59 | # NOTE: This is just an example. These sentences should probably come 60 | # from somewhere else. 61 | sentences = [ 62 | "Hello. I hope you're doing well." 63 | "This is a bot written with Flask, Celery and Daily Python SDK." 64 | "Have a nice day!" 65 | ] 66 | 67 | for sentence in sentences: 68 | self.synthesize_sentence(microphone, sentence) 69 | 70 | def synthesize_sentence(self, microphone, sentence): 71 | synthesis_input = texttospeech.SynthesisInput(text=sentence.strip()) 72 | 73 | response = self.__speech_client.synthesize_speech( 74 | input=synthesis_input, voice=voice, audio_config=audio_config 75 | ) 76 | 77 | stream = io.BytesIO(response.audio_content) 78 | 79 | # Skip RIFF header 80 | stream.read(44) 81 | 82 | microphone.write_frames(stream.read()) 83 | 84 | 85 | # 86 | # This is now a new process (because we created a Process in create_bot() in 87 | # app.py), so it's safe to initialize Daily.init() as it will be executed just 88 | # once in the new process. 89 | # 90 | # However, to pass information back to the main application we can't just return 91 | # values from functions or update application global state, because processes 92 | # are independent. But there are multiple alternatives: 93 | # 94 | # - Pipes and queues: https://billiard.readthedocs.io/en/latest/library/multiprocessing.html#pipes-and-queues 95 | # - Redis Pub/Sub 96 | # - More sophisticated messages queues: SQS, RabbitMQ, Kakfa 97 | # 98 | 99 | 100 | def start_bot(bot_name, meeting_url): 101 | Daily.init() 102 | 103 | microphone = Daily.create_microphone_device("my-mic", sample_rate=16000, channels=1) 104 | 105 | bot = Bot(bot_name, microphone) 106 | bot.run(meeting_url) 107 | bot.leave() 108 | -------------------------------------------------------------------------------- /demos/google/google_speech_to_text.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting, will listen for audio for 10 seconds and 3 | # will use Google Speech-To-Text API to translate that audio to text. 4 | # 5 | # The demo requires Google Speech-To-Text credentials. 6 | # 7 | # See https://cloud.google.com/speech-to-text/docs/before-you-begin 8 | # 9 | # Usage: python3 google_speech_to_text.py -m MEETING_URL 10 | # 11 | 12 | from daily import * 13 | from google.cloud import speech 14 | 15 | import argparse 16 | import io 17 | import time 18 | import wave 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 22 | args = parser.parse_args() 23 | 24 | Daily.init() 25 | 26 | speaker = Daily.create_speaker_device("my-speaker", sample_rate=16000, channels=1) 27 | 28 | Daily.select_speaker_device("my-speaker") 29 | 30 | client = CallClient() 31 | 32 | print() 33 | print(f"Joining {args.meeting} ...") 34 | 35 | client.join(args.meeting) 36 | 37 | # Make sure we are joined. It would be better to use join() completion 38 | # callback. 39 | time.sleep(3) 40 | 41 | SAMPLE_RATE = 16000 42 | SECONDS_TO_READ = 10 43 | FRAMES_TO_READ = SAMPLE_RATE * SECONDS_TO_READ 44 | 45 | print() 46 | print(f"Now, say something in the meeting for {int(SECONDS_TO_READ)} seconds ...") 47 | 48 | # We are creating a WAV file in memory so we can later grab the whole buffer and 49 | # send it to Google Speech-To-Text API. 50 | content = io.BufferedRandom(io.BytesIO()) 51 | 52 | out_wave = wave.open(content, "wb") 53 | out_wave.setnchannels(1) 54 | out_wave.setsampwidth(2) # 16-bit LINEAR PCM 55 | out_wave.setframerate(16000) 56 | 57 | # Here we are reading from the virtual speaker and writing the audio frames into 58 | # the in-memory WAV file. 59 | buffer = speaker.read_frames(FRAMES_TO_READ) 60 | out_wave.writeframesraw(buffer) 61 | 62 | out_wave.close() 63 | 64 | # We go to the beginning of the WAV buffer stream. 65 | content.seek(0) 66 | 67 | # We create and audio object with the contents of the in-memory WAV file. 68 | audio = speech.RecognitionAudio(content=content.read()) 69 | 70 | # Configure Google Speech-To-Text so it receives 16-bit LINEAR PCM. 71 | config = speech.RecognitionConfig( 72 | encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, 73 | sample_rate_hertz=16000, 74 | language_code="en-US", 75 | ) 76 | 77 | speech_client = speech.SpeechClient() 78 | 79 | print() 80 | print(f"Transcribing with Google Speech-To-Text API ...") 81 | 82 | response = speech_client.recognize(config=config, audio=audio) 83 | 84 | print() 85 | for result in response.results: 86 | print(f"Transcript: {result.alternatives[0].transcript}") 87 | 88 | client.leave() 89 | client.release() 90 | -------------------------------------------------------------------------------- /demos/google/google_text_to_speech.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and, given a text file with senteces (one 3 | # per line), will translate text into audio using Google Text-To-Speech API and 4 | # will send it into the meeting. 5 | # 6 | # The demo requires Google Speech-To-Text credentials. 7 | # 8 | # See https://cloud.google.com/text-to-speech/docs/before-you-begin 9 | # 10 | # Usage: python3 google_speech_to_text.py -m MEETING_URL -i FILE 11 | # 12 | 13 | from daily import * 14 | from google.cloud import texttospeech 15 | 16 | import argparse 17 | import io 18 | import time 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 22 | parser.add_argument("-i", "--input", required=True, help="File with sentences (one per line)") 23 | args = parser.parse_args() 24 | 25 | Daily.init() 26 | 27 | # We create a virtual microphone device so we can read audio samples from the 28 | # meeting. 29 | microphone = Daily.create_microphone_device("my-mic", sample_rate=16000, channels=1) 30 | 31 | client = CallClient() 32 | 33 | print() 34 | print(f"Joining {args.meeting} ...") 35 | 36 | # Join and tell our call client that we will be using our new virtual 37 | # microphone. 38 | client.join( 39 | args.meeting, 40 | client_settings={ 41 | "inputs": {"microphone": {"isEnabled": True, "settings": {"deviceId": "my-mic"}}} 42 | }, 43 | ) 44 | 45 | # Make sure we are joined. It would be better to use join() completion 46 | # callback. 47 | time.sleep(3) 48 | 49 | sentences_file = open(args.input, "r") 50 | 51 | voice = texttospeech.VoiceSelectionParams(language_code="en-US", name="en-US-Studio-M") 52 | 53 | audio_config = texttospeech.AudioConfig( 54 | audio_encoding=texttospeech.AudioEncoding.LINEAR16, speaking_rate=1.0, sample_rate_hertz=16000 55 | ) 56 | 57 | speech_client = texttospeech.TextToSpeechClient() 58 | 59 | print() 60 | 61 | for sentence in sentences_file.readlines(): 62 | print(f"Processing: {sentence.strip()}") 63 | print() 64 | 65 | synthesis_input = texttospeech.SynthesisInput(text=sentence.strip()) 66 | 67 | response = speech_client.synthesize_speech( 68 | input=synthesis_input, voice=voice, audio_config=audio_config 69 | ) 70 | 71 | # Create an in-memory buffer with API's response. 72 | stream = io.BytesIO(response.audio_content) 73 | 74 | # The API response includes a WAV RIFF header, so we want to skip that since 75 | # that's not part of the audio samples. 76 | stream.read(44) 77 | 78 | # Send all the audio frames to the microphone. 79 | microphone.write_frames(stream.read()) 80 | 81 | # Let everything finish 82 | time.sleep(2) 83 | 84 | client.leave() 85 | client.release() 86 | -------------------------------------------------------------------------------- /demos/gstreamer/media_player.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and send a given media file. The media 3 | # file can be of any format supported by your local GStreamer. 4 | # 5 | # Usage: python3 media_player.py -m MEETING_URL -i FILE 6 | # 7 | 8 | import argparse 9 | 10 | from daily import * 11 | 12 | import gi 13 | 14 | # autopep8: off 15 | gi.require_version("Gst", "1.0") 16 | gi.require_version("GstApp", "1.0") 17 | from gi.repository import Gst, GstApp, GLib 18 | # autopep8: on 19 | 20 | VIDEO_WIDTH = 1280 21 | VIDEO_HEIGHT = 720 22 | AUDIO_SAMPLE_RATE = 48000 23 | AUDIO_CHANNELS = 2 24 | 25 | 26 | class GstApp: 27 | def __init__(self, filename): 28 | self.__camera = Daily.create_camera_device( 29 | "my-camera", width=VIDEO_WIDTH, height=VIDEO_HEIGHT, color_format="I420" 30 | ) 31 | self.__microphone = Daily.create_microphone_device( 32 | "my-mic", sample_rate=AUDIO_SAMPLE_RATE, channels=AUDIO_CHANNELS, non_blocking=True 33 | ) 34 | 35 | self.__client = CallClient() 36 | 37 | self.__client.update_subscription_profiles( 38 | {"base": {"camera": "unsubscribed", "microphone": "unsubscribed"}} 39 | ) 40 | 41 | self.__player = Gst.Pipeline.new("player") 42 | 43 | source = Gst.ElementFactory.make("filesrc", None) 44 | source.set_property("location", filename) 45 | 46 | decodebin = Gst.ElementFactory.make("decodebin", None) 47 | decodebin.connect("pad-added", self.decodebin_callback) 48 | 49 | self.__player.add(source) 50 | self.__player.add(decodebin) 51 | source.link(decodebin) 52 | 53 | bus = self.__player.get_bus() 54 | bus.add_signal_watch() 55 | bus.connect("message", self.on_message) 56 | 57 | self.__loop = GLib.MainLoop() 58 | 59 | def on_joined(self, data, error): 60 | if error: 61 | print(f"Unable to join meeting: {error}") 62 | else: 63 | self.__player.set_state(Gst.State.PLAYING) 64 | 65 | def run(self, meeting_url): 66 | self.__client.join( 67 | meeting_url, 68 | client_settings={ 69 | "inputs": { 70 | "camera": {"isEnabled": True, "settings": {"deviceId": "my-camera"}}, 71 | "microphone": {"isEnabled": True, "settings": {"deviceId": "my-mic"}}, 72 | }, 73 | "publishing": { 74 | "camera": { 75 | "isPublishing": True, 76 | "sendSettings": { 77 | "encodings": { 78 | "low": { 79 | "maxBitrate": 1000000, 80 | "maxFramerate": 30.0, 81 | "scaleResolutionDownBy": 1.0, 82 | } 83 | } 84 | }, 85 | } 86 | }, 87 | }, 88 | completion=self.on_joined, 89 | ) 90 | self.__loop.run() 91 | 92 | def leave(self): 93 | if self.__client: 94 | self.__client.leave() 95 | self.__client.release() 96 | self.__player.set_state(Gst.State.NULL) 97 | self.__loop.quit() 98 | self.__client = None 99 | 100 | def on_message(self, bus, message): 101 | t = message.type 102 | if t == Gst.MessageType.EOS: 103 | self.leave() 104 | elif t == Gst.MessageType.ERROR: 105 | err, debug = message.parse_error() 106 | print(f"Error: {err} : {debug}") 107 | self.leave() 108 | return True 109 | 110 | def decodebin_callback(self, decodebin, pad): 111 | caps_string = pad.get_current_caps().to_string() 112 | if caps_string.startswith("audio"): 113 | self.decodebin_audio(pad) 114 | elif caps_string.startswith("video"): 115 | self.decodebin_video(pad) 116 | 117 | def decodebin_audio(self, pad): 118 | queue_audio = Gst.ElementFactory.make("queue", None) 119 | audioconvert = Gst.ElementFactory.make("audioconvert", None) 120 | audioresample = Gst.ElementFactory.make("audioresample", None) 121 | audiocapsfilter = Gst.ElementFactory.make("capsfilter", None) 122 | audiocaps = Gst.Caps.from_string( 123 | f"audio/x-raw,format=S16LE,rate={AUDIO_SAMPLE_RATE},channels={AUDIO_CHANNELS},layout=interleaved" 124 | ) 125 | audiocapsfilter.set_property("caps", audiocaps) 126 | appsink_audio = Gst.ElementFactory.make("appsink", None) 127 | appsink_audio.set_property("emit-signals", True) 128 | appsink_audio.connect("new-sample", self.appsink_audio_new_sample) 129 | 130 | self.__player.add(queue_audio) 131 | self.__player.add(audioconvert) 132 | self.__player.add(audioresample) 133 | self.__player.add(audiocapsfilter) 134 | self.__player.add(appsink_audio) 135 | queue_audio.sync_state_with_parent() 136 | audioconvert.sync_state_with_parent() 137 | audioresample.sync_state_with_parent() 138 | audiocapsfilter.sync_state_with_parent() 139 | appsink_audio.sync_state_with_parent() 140 | 141 | queue_audio.link(audioconvert) 142 | audioconvert.link(audioresample) 143 | audioresample.link(audiocapsfilter) 144 | audiocapsfilter.link(appsink_audio) 145 | 146 | queue_pad = queue_audio.get_static_pad("sink") 147 | pad.link(queue_pad) 148 | 149 | def decodebin_video(self, pad): 150 | queue_video = Gst.ElementFactory.make("queue", None) 151 | videoconvert = Gst.ElementFactory.make("videoconvert", None) 152 | videoscale = Gst.ElementFactory.make("videoscale", None) 153 | videocapsfilter = Gst.ElementFactory.make("capsfilter", None) 154 | videocaps = Gst.Caps.from_string( 155 | f"video/x-raw,format=I420,width={VIDEO_WIDTH},height={VIDEO_HEIGHT}" 156 | ) 157 | videocapsfilter.set_property("caps", videocaps) 158 | 159 | appsink_video = Gst.ElementFactory.make("appsink", None) 160 | appsink_video.set_property("emit-signals", True) 161 | appsink_video.connect("new-sample", self.appsink_video_new_sample) 162 | 163 | self.__player.add(queue_video) 164 | self.__player.add(videoconvert) 165 | self.__player.add(videoscale) 166 | self.__player.add(videocapsfilter) 167 | self.__player.add(appsink_video) 168 | queue_video.sync_state_with_parent() 169 | videoconvert.sync_state_with_parent() 170 | videoscale.sync_state_with_parent() 171 | videocapsfilter.sync_state_with_parent() 172 | appsink_video.sync_state_with_parent() 173 | 174 | queue_video.link(videoconvert) 175 | videoconvert.link(videoscale) 176 | videoscale.link(videocapsfilter) 177 | videocapsfilter.link(appsink_video) 178 | 179 | queue_pad = queue_video.get_static_pad("sink") 180 | pad.link(queue_pad) 181 | 182 | def appsink_audio_new_sample(self, appsink): 183 | buffer = appsink.pull_sample().get_buffer() 184 | (_, info) = buffer.map(Gst.MapFlags.READ) 185 | self.__microphone.write_frames(info.data) 186 | buffer.unmap(info) 187 | return Gst.FlowReturn.OK 188 | 189 | def appsink_video_new_sample(self, appsink): 190 | buffer = appsink.pull_sample().get_buffer() 191 | (_, info) = buffer.map(Gst.MapFlags.READ) 192 | self.__camera.write_frame(info.data) 193 | buffer.unmap(info) 194 | return Gst.FlowReturn.OK 195 | 196 | 197 | def main(): 198 | parser = argparse.ArgumentParser() 199 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 200 | parser.add_argument("-i", "--input", required=True, help="Video file") 201 | 202 | args = parser.parse_args() 203 | 204 | Gst.init(None) 205 | Daily.init() 206 | 207 | app = GstApp(args.input) 208 | 209 | try: 210 | app.run(args.meeting) 211 | except KeyboardInterrupt: 212 | print("Ctrl-C detected. Exiting!") 213 | finally: 214 | app.leave() 215 | 216 | 217 | if __name__ == "__main__": 218 | main() 219 | -------------------------------------------------------------------------------- /demos/gtk/gtk_app.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and will receive and render video frames 3 | # for a given participant ID. 4 | # 5 | # If `-a` is specified, it will save a WAV file with the audio for only that 6 | # participant and it will also reproduce it. 7 | # 8 | # If `-s` is specified, it will render the screen share (if available) otherwise 9 | # it defaults to the participant camera. 10 | # 11 | # Usage: python gtk_app.py -m MEETING_URL -p PARTICIPANT_ID [-a] [-s] 12 | # 13 | 14 | from daily import * 15 | import argparse 16 | import sys 17 | import wave 18 | 19 | import cairo 20 | import gi 21 | 22 | # autopep8: off 23 | gi.require_version("Gtk", "4.0") 24 | from gi.repository import GLib, Gtk 25 | # autopep8: on 26 | 27 | 28 | class DailyGtkApp(Gtk.Application): 29 | def __init__(self, meeting_url, participant_id, save_audio, screen_share): 30 | super().__init__(application_id="co.daily.DailyGtkApp") 31 | 32 | self.__client = CallClient() 33 | self.__client.update_subscription_profiles( 34 | { 35 | "base": { 36 | "microphone": "subscribed", 37 | "camera": "unsubscribed" if screen_share else "subscribed", 38 | "screenVideo": "subscribed" if screen_share else "unsubscribed", 39 | } 40 | } 41 | ) 42 | 43 | self.__width = 1280 44 | self.__height = 720 45 | 46 | self.__frame = None 47 | self.__frame_width = self.__width 48 | self.__frame_height = self.__height 49 | 50 | self.__black_frame = bytearray(self.__width * self.__height * 4) 51 | 52 | self.__joined = False 53 | self.__meeting_url = meeting_url 54 | self.__participant_id = participant_id 55 | 56 | self.__save_audio = save_audio 57 | 58 | self.__video_source = "camera" 59 | if screen_share: 60 | self.__video_source = "screenVideo" 61 | 62 | def do_activate(self): 63 | window = Gtk.ApplicationWindow(application=self, title="daily-python Gtk demo") 64 | window.set_default_size(self.__width, self.__height) 65 | 66 | main_box = Gtk.Box(spacing=6, orientation=Gtk.Orientation.VERTICAL) 67 | inputs_box = Gtk.Box(spacing=6, orientation=Gtk.Orientation.HORIZONTAL) 68 | 69 | drawing_area = Gtk.DrawingArea() 70 | drawing_area.set_hexpand(True) 71 | drawing_area.set_vexpand(True) 72 | drawing_area.set_draw_func(self.drawing_area_draw, None) 73 | 74 | meeting_label = Gtk.Label(label="Meeting URL:") 75 | meeting_entry = Gtk.Entry() 76 | meeting_entry.set_hexpand(True) 77 | meeting_entry.set_text(self.__meeting_url) 78 | 79 | participant_label = Gtk.Label(label="Participant ID:") 80 | participant_entry = Gtk.Entry() 81 | participant_entry.set_hexpand(True) 82 | participant_entry.set_text(self.__participant_id) 83 | 84 | button = Gtk.Button(label="Join") 85 | button.connect("clicked", self.on_join_or_leave) 86 | 87 | inputs_box.append(meeting_label) 88 | inputs_box.append(meeting_entry) 89 | inputs_box.append(participant_label) 90 | inputs_box.append(participant_entry) 91 | inputs_box.append(button) 92 | 93 | main_box.append(drawing_area) 94 | main_box.append(inputs_box) 95 | 96 | window.set_child(main_box) 97 | 98 | self.__button = button 99 | self.__drawing_area = drawing_area 100 | self.__meeting_entry = meeting_entry 101 | self.__participant_entry = participant_entry 102 | 103 | window.present() 104 | 105 | def on_join_or_leave(self, button): 106 | if self.__joined: 107 | self.leave() 108 | self.__button.set_label("Join") 109 | else: 110 | meeting_url = self.__meeting_entry.get_text() 111 | participant_id = self.__participant_entry.get_text() 112 | 113 | if self.__save_audio: 114 | self.__wave = wave.open(f"participant-{participant_id}.wav", "wb") 115 | self.__wave.setnchannels(1) 116 | self.__wave.setsampwidth(2) # 16-bit LINEAR PCM 117 | self.__wave.setframerate(48000) 118 | 119 | self.join(meeting_url, participant_id) 120 | self.__button.set_label("Leave") 121 | 122 | def on_joined(self, data, error): 123 | if not error: 124 | self.__joined = True 125 | 126 | def on_left(self, error): 127 | self.__frame = None 128 | self.__drawing_area.queue_draw() 129 | self.__joined = False 130 | if self.__save_audio: 131 | self.__wave.close() 132 | 133 | def join(self, meeting_url, participant_id): 134 | if not meeting_url or not participant_id: 135 | return 136 | 137 | if self.__save_audio: 138 | self.__client.set_audio_renderer(participant_id, self.on_audio_data) 139 | 140 | self.__client.set_video_renderer( 141 | participant_id, 142 | self.on_video_frame, 143 | video_source=self.__video_source, 144 | color_format="BGRA", 145 | ) 146 | 147 | self.__client.join(meeting_url, completion=self.on_joined) 148 | 149 | def leave(self): 150 | self.__client.leave(completion=self.on_left) 151 | 152 | def drawing_area_draw(self, area, context, w, h, data): 153 | if self.__joined and self.__frame is not None: 154 | image = bytearray(self.__frame.buffer) 155 | else: 156 | image = self.__black_frame 157 | 158 | width = self.__frame_width 159 | height = self.__frame_height 160 | 161 | stride = cairo.ImageSurface.format_stride_for_width(cairo.FORMAT_ARGB32, width) 162 | cairo_surface = cairo.ImageSurface.create_for_data( 163 | image, cairo.FORMAT_ARGB32, width, height, stride 164 | ) 165 | 166 | width_ratio = float(self.__width) / float(width) 167 | height_ratio = float(self.__height) / float(height) 168 | scale_xy = min(height_ratio, width_ratio) 169 | 170 | context.scale(scale_xy, scale_xy) 171 | 172 | context.set_source_surface(cairo_surface) 173 | context.paint() 174 | 175 | def on_audio_data(self, participant_id, audio_data, audio_source): 176 | self.__wave.writeframes(audio_data.audio_frames) 177 | 178 | def on_video_frame(self, participant_id, video_frame, video_source): 179 | self.__frame_width = video_frame.width 180 | self.__frame_height = video_frame.height 181 | self.__frame = video_frame 182 | self.__drawing_area.queue_draw() 183 | 184 | 185 | def main(): 186 | parser = argparse.ArgumentParser() 187 | parser.add_argument("-m", "--meeting", default="", help="Meeting URL") 188 | parser.add_argument("-p", "--participant", default="", help="Participant ID") 189 | parser.add_argument( 190 | "-a", 191 | "--audio", 192 | default=False, 193 | action="store_true", 194 | help="Store participant audio in a file (participant-ID.wav)", 195 | ) 196 | parser.add_argument( 197 | "-s", 198 | "--screen", 199 | default=False, 200 | action="store_true", 201 | help="Render screen share (if available) instead of camera", 202 | ) 203 | args = parser.parse_args() 204 | 205 | Daily.init() 206 | 207 | app = DailyGtkApp(args.meeting, args.participant, args.audio, args.screen) 208 | sys.exit(app.run()) 209 | 210 | 211 | if __name__ == "__main__": 212 | main() 213 | -------------------------------------------------------------------------------- /demos/openai/dall-e.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting, will listen for an audio prompt, will use 3 | # Google Speech-To-Text API to translate that audio to text and then will use 4 | # that text as a prompt for DALL-E to generate an image. The image will then be 5 | # sent to the meeting using a virtual camera device. 6 | # 7 | # The demo requires Google Speech-To-Text credentials and an OpenAI API key. 8 | # 9 | # See: 10 | # https://cloud.google.com/speech-to-text/docs/before-you-begin 11 | # https://platform.openai.com/docs/api-reference/authentication 12 | # 13 | # Usage: python3 dall-e.py -m MEETING_URL 14 | # 15 | 16 | from daily import * 17 | from google.cloud import speech 18 | from PIL import Image 19 | from openai import OpenAI 20 | 21 | import argparse 22 | import io 23 | import os 24 | import time 25 | import wave 26 | from base64 import b64decode 27 | 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 30 | args = parser.parse_args() 31 | 32 | 33 | Daily.init() 34 | 35 | CAMERA_WIDTH = 1024 36 | CAMERA_HEIGHT = 1024 37 | 38 | speaker = Daily.create_speaker_device("my-speaker", sample_rate=16000, channels=1) 39 | camera = Daily.create_camera_device( 40 | "my-camera", width=CAMERA_WIDTH, height=CAMERA_HEIGHT, color_format="RGB" 41 | ) 42 | 43 | Daily.select_speaker_device("my-speaker") 44 | 45 | client = CallClient() 46 | 47 | print() 48 | print(f"Joining {args.meeting} ...") 49 | 50 | client.join( 51 | args.meeting, 52 | client_settings={ 53 | "inputs": { 54 | "camera": {"isEnabled": True, "settings": {"deviceId": "my-camera"}}, 55 | "microphone": False, 56 | } 57 | }, 58 | ) 59 | 60 | # Make sure we are joined. It would be better to use join() completion 61 | # callback. 62 | time.sleep(3) 63 | 64 | SAMPLE_RATE = 16000 65 | SECONDS_TO_READ = 10 66 | FRAMES_TO_READ = SAMPLE_RATE * SECONDS_TO_READ 67 | 68 | print() 69 | print(f"Now, say something in the meeting for {int(SECONDS_TO_READ)} seconds ...") 70 | 71 | # We are creating a WAV file in memory so we can later grab the whole buffer and 72 | # send it to Google Speech-To-Text API. 73 | content = io.BufferedRandom(io.BytesIO()) 74 | 75 | out_wave = wave.open(content, "wb") 76 | out_wave.setnchannels(1) 77 | out_wave.setsampwidth(2) # 16-bit LINEAR PCM 78 | out_wave.setframerate(16000) 79 | 80 | # Here we are reading from the virtual speaker and writing the audio frames into 81 | # the in-memory WAV file. 82 | buffer = speaker.read_frames(FRAMES_TO_READ) 83 | out_wave.writeframesraw(buffer) 84 | 85 | out_wave.close() 86 | 87 | # We go to the beginning of the WAV buffer stream. 88 | content.seek(0) 89 | 90 | openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) 91 | 92 | # We create and audio object with the contents of the in-memory WAV file. 93 | audio = speech.RecognitionAudio(content=content.read()) 94 | 95 | # Configure Google Speech-To-Text so it receives 16-bit LINEAR PCM. 96 | config = speech.RecognitionConfig( 97 | encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, 98 | sample_rate_hertz=16000, 99 | language_code="en-US", 100 | ) 101 | 102 | speech_client = speech.SpeechClient() 103 | 104 | print() 105 | print(f"Transcribing with Google Speech-To-Text API ...") 106 | 107 | response = speech_client.recognize(config=config, audio=audio) 108 | 109 | if len(response.results) > 0 and len(response.results[0].alternatives) > 0: 110 | prompt = response.results[0].alternatives[0].transcript 111 | 112 | print() 113 | print(f"Generating image with OpenAI for '{prompt}' ...") 114 | 115 | response = openai_client.images.generate( 116 | prompt=prompt, n=1, size=f"{CAMERA_WIDTH}x{CAMERA_HEIGHT}", response_format="b64_json" 117 | ) 118 | 119 | dalle_png = b64decode(response.data[0].b64_json) 120 | 121 | dalle_stream = io.BytesIO(dalle_png) 122 | 123 | dalle_im = Image.open(dalle_stream) 124 | 125 | try: 126 | # This is a live video stream so we need to keep drawing the image. 127 | while True: 128 | camera.write_frame(dalle_im.tobytes()) 129 | time.sleep(0.033) 130 | except KeyboardInterrupt: 131 | pass 132 | 133 | client.leave() 134 | client.release() 135 | -------------------------------------------------------------------------------- /demos/pyaudio/record_and_play.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and it will capture audio from the default 3 | # system microphone and send it to the meeting. It will also play the audio 4 | # received from the meeting via the default system speaker. 5 | # 6 | # Usage: python3 record_and_play.py -m MEETING_URL 7 | # 8 | 9 | import argparse 10 | import threading 11 | import time 12 | 13 | from daily import * 14 | 15 | import pyaudio 16 | 17 | SAMPLE_RATE = 16000 18 | NUM_CHANNELS = 1 19 | 20 | 21 | class PyAudioApp: 22 | def __init__(self, sample_rate, num_channels): 23 | self.__app_quit = False 24 | self.__sample_rate = sample_rate 25 | self.__num_channels = num_channels 26 | 27 | # We configure the microphone as non-blocking so we don't block PyAudio 28 | # when we write the frames. 29 | self.__virtual_mic = Daily.create_microphone_device( 30 | "my-mic", sample_rate=sample_rate, channels=num_channels, non_blocking=True 31 | ) 32 | 33 | # In contrast, we configure the speaker as blocking. In this case, we 34 | # read audio from the speaker and synchronously write to PyAudio's 35 | # output stream. 36 | self.__virtual_speaker = Daily.create_speaker_device( 37 | "my-speaker", 38 | sample_rate=sample_rate, 39 | channels=num_channels, 40 | ) 41 | Daily.select_speaker_device("my-speaker") 42 | 43 | self.__pyaudio = pyaudio.PyAudio() 44 | self.__input_stream = self.__pyaudio.open( 45 | format=pyaudio.paInt16, 46 | channels=num_channels, 47 | rate=sample_rate, 48 | input=True, 49 | stream_callback=self.on_input_stream, 50 | ) 51 | self.__output_stream = self.__pyaudio.open( 52 | format=pyaudio.paInt16, channels=num_channels, rate=sample_rate, output=True 53 | ) 54 | 55 | self.__client = CallClient() 56 | 57 | self.__client.update_subscription_profiles( 58 | {"base": {"camera": "unsubscribed", "microphone": "subscribed"}} 59 | ) 60 | 61 | self.__thread = threading.Thread(target=self.send_audio_stream) 62 | self.__thread.start() 63 | 64 | def on_joined(self, data, error): 65 | if error: 66 | print(f"Unable to join meeting: {error}") 67 | self.__app_quit = True 68 | 69 | def run(self, meeting_url): 70 | self.__client.join( 71 | meeting_url, 72 | client_settings={ 73 | "inputs": { 74 | "camera": False, 75 | "microphone": { 76 | "isEnabled": True, 77 | "settings": { 78 | "deviceId": "my-mic", 79 | "customConstraints": { 80 | "autoGainControl": {"exact": True}, 81 | "noiseSuppression": {"exact": True}, 82 | "echoCancellation": {"exact": True}, 83 | }, 84 | }, 85 | }, 86 | }, 87 | "publishing": { 88 | "microphone": { 89 | "isPublishing": True, 90 | "sendSettings": { 91 | "channelConfig": "stereo" if self.__num_channels == 2 else "mono", 92 | }, 93 | } 94 | }, 95 | }, 96 | completion=self.on_joined, 97 | ) 98 | self.__thread.join() 99 | 100 | def leave(self): 101 | self.__app_quit = True 102 | self.__client.leave() 103 | self.__client.release() 104 | # This is not very pretty (taken from PyAudio docs). 105 | while self.__input_stream.is_active(): 106 | time.sleep(0.1) 107 | self.__input_stream.close() 108 | self.__pyaudio.terminate() 109 | 110 | def on_input_stream(self, in_data, frame_count, time_info, status): 111 | if self.__app_quit: 112 | return None, pyaudio.paAbort 113 | 114 | # If the microphone hasn't started yet `write_frames` this will return 115 | # 0. In that case, we just tell PyAudio to continue. 116 | self.__virtual_mic.write_frames(in_data) 117 | 118 | return None, pyaudio.paContinue 119 | 120 | def send_audio_stream(self): 121 | num_frames = int(self.__sample_rate / 100) 122 | while not self.__app_quit: 123 | audio = self.__virtual_speaker.read_frames(num_frames) 124 | if audio: 125 | self.__output_stream.write(audio) 126 | else: 127 | time.sleep(0.01) 128 | self.__output_stream.close() 129 | 130 | 131 | def main(): 132 | parser = argparse.ArgumentParser() 133 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 134 | parser.add_argument( 135 | "-c", "--channels", type=int, default=NUM_CHANNELS, help="Number of channels" 136 | ) 137 | parser.add_argument("-r", "--rate", type=int, default=SAMPLE_RATE, help="Sample rate") 138 | args = parser.parse_args() 139 | 140 | Daily.init() 141 | 142 | app = PyAudioApp(args.rate, args.channels) 143 | 144 | try: 145 | app.run(args.meeting) 146 | except KeyboardInterrupt: 147 | print("Ctrl-C detected. Exiting!") 148 | finally: 149 | app.leave() 150 | 151 | 152 | if __name__ == "__main__": 153 | main() 154 | -------------------------------------------------------------------------------- /demos/qt/qt_app.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and will receive and render video frames 3 | # for a given participant ID. 4 | # 5 | # If `-a` is specified, it will save a WAV file with the audio for only that 6 | # participant and it will also reproduce it. 7 | # 8 | # If `-s` is specified, it will render the screen share (if available) otherwise 9 | # it defaults to the participant camera. 10 | # 11 | # Usage: python qt_app.py -m MEETING_URL -p PARTICIPANT_ID [-a] [-s] 12 | # 13 | 14 | import argparse 15 | import sys 16 | import wave 17 | 18 | from PySide6 import QtCore, QtGui, QtWidgets 19 | 20 | from daily import * 21 | 22 | 23 | class DailyQtWidget(QtWidgets.QWidget): 24 | frame_signal = QtCore.Signal(VideoFrame) 25 | 26 | def __init__(self, meeting_url, participant_id, save_audio, screen_share): 27 | super().__init__() 28 | 29 | self.__client = CallClient() 30 | self.__client.update_subscription_profiles( 31 | { 32 | "base": { 33 | "microphone": "subscribed", 34 | "camera": "unsubscribed" if screen_share else "subscribed", 35 | "screenVideo": "subscribed" if screen_share else "unsubscribed", 36 | } 37 | } 38 | ) 39 | 40 | self.__frame_width = 1280 41 | self.__frame_height = 720 42 | 43 | self.frame_signal.connect(self.draw_image) 44 | 45 | self.__black_frame = QtGui.QPixmap(self.__frame_width, self.__frame_height) 46 | self.__black_frame.fill(QtGui.QColor("Black")) 47 | 48 | self.__joined = False 49 | self.__meeting_url = meeting_url 50 | self.__participant_id = participant_id 51 | 52 | self.__save_audio = save_audio 53 | if save_audio: 54 | self.__wave = wave.open(f"participant-{participant_id}.wav", "wb") 55 | self.__wave.setnchannels(1) 56 | self.__wave.setsampwidth(2) # 16-bit LINEAR PCM 57 | self.__wave.setframerate(48000) 58 | 59 | self.__video_source = "camera" 60 | if screen_share: 61 | self.__video_source = "screenVideo" 62 | 63 | self.setup_ui() 64 | 65 | def setup_ui(self): 66 | main_box = QtWidgets.QVBoxLayout(self) 67 | 68 | image_label = QtWidgets.QLabel() 69 | image_label.setPixmap(self.__black_frame) 70 | 71 | meeting_label = QtWidgets.QLabel("Meeting URL:") 72 | meeting_textedit = QtWidgets.QLineEdit() 73 | meeting_textedit.setText(self.__meeting_url) 74 | 75 | participant_label = QtWidgets.QLabel("Participant ID:") 76 | participant_textedit = QtWidgets.QLineEdit() 77 | participant_textedit.setText(self.__participant_id) 78 | 79 | button = QtWidgets.QPushButton("Join") 80 | button.clicked.connect(self.on_join_or_leave) 81 | 82 | inputs_box = QtWidgets.QHBoxLayout() 83 | inputs_box.addWidget(meeting_label) 84 | inputs_box.addWidget(meeting_textedit) 85 | inputs_box.addWidget(participant_label) 86 | inputs_box.addWidget(participant_textedit) 87 | inputs_box.addWidget(button) 88 | 89 | main_box.addWidget(image_label) 90 | main_box.addLayout(inputs_box) 91 | 92 | self.__button = button 93 | self.__image_label = image_label 94 | self.__meeting_textedit = meeting_textedit 95 | self.__participant_textedit = participant_textedit 96 | 97 | def on_join_or_leave(self): 98 | if self.__joined: 99 | self.leave() 100 | self.__button.setText("Join") 101 | else: 102 | meeting_url = self.__meeting_textedit.text() 103 | participant_id = self.__participant_textedit.text() 104 | 105 | if self.__save_audio: 106 | self.__wave = wave.open(f"participant-{participant_id}.wav", "wb") 107 | self.__wave.setnchannels(1) 108 | self.__wave.setsampwidth(2) # 16-bit LINEAR PCM 109 | self.__wave.setframerate(48000) 110 | 111 | self.join(meeting_url, participant_id) 112 | self.__button.setText("Leave") 113 | 114 | def on_joined(self, data, error): 115 | if not error: 116 | self.__joined = True 117 | 118 | def on_left(self, error): 119 | self.__image_label.setPixmap(self.__black_frame) 120 | self.__joined = False 121 | if self.__save_audio: 122 | self.__wave.close() 123 | 124 | def join(self, meeting_url, participant_id): 125 | if not meeting_url or not participant_id: 126 | return 127 | 128 | if self.__save_audio: 129 | self.__client.set_audio_renderer(participant_id, self.on_audio_data) 130 | 131 | self.__client.set_video_renderer( 132 | participant_id, 133 | self.on_video_frame, 134 | video_source=self.__video_source, 135 | color_format="BGRA", 136 | ) 137 | 138 | self.__client.join(meeting_url, completion=self.on_joined) 139 | 140 | def leave(self): 141 | self.__client.leave(completion=self.on_left) 142 | 143 | def draw_image(self, video_frame): 144 | image = QtGui.QImage( 145 | video_frame.buffer, 146 | video_frame.width, 147 | video_frame.height, 148 | video_frame.width * 4, 149 | QtGui.QImage.Format.Format_ARGB32, 150 | ) 151 | scaled = image.scaled( 152 | self.__frame_width, self.__frame_height, QtCore.Qt.AspectRatioMode.KeepAspectRatio 153 | ) 154 | pixmap = QtGui.QPixmap.fromImage(scaled) 155 | self.__image_label.setPixmap(pixmap) 156 | 157 | def on_audio_data(self, participant_id, audio_data, audio_source): 158 | self.__wave.writeframes(audio_data.audio_frames) 159 | 160 | def on_video_frame(self, participant_id, video_frame, video_source): 161 | self.frame_signal.emit(video_frame) 162 | 163 | 164 | def main(): 165 | parser = argparse.ArgumentParser() 166 | parser.add_argument("-m", "--meeting", default="", help="Meeting URL") 167 | parser.add_argument("-p", "--participant", default="", help="Participant ID") 168 | parser.add_argument( 169 | "-a", 170 | "--audio", 171 | default=False, 172 | action="store_true", 173 | help="Store participant audio in a file (participant-ID.wav)", 174 | ) 175 | parser.add_argument( 176 | "-s", 177 | "--screen", 178 | default=False, 179 | action="store_true", 180 | help="Render screen share (if available) instead of camera", 181 | ) 182 | args = parser.parse_args() 183 | 184 | Daily.init() 185 | 186 | app = QtWidgets.QApplication([]) 187 | 188 | widget = DailyQtWidget(args.meeting, args.participant, args.audio, args.screen) 189 | widget.resize(1280, 720) 190 | widget.show() 191 | 192 | sys.exit(app.exec()) 193 | 194 | 195 | if __name__ == "__main__": 196 | main() 197 | -------------------------------------------------------------------------------- /demos/recording/auto_recording.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and send a given image at the specified 3 | # framerate using a virtual camera device. 4 | # 5 | # Usage: python3 auto_recording.py -m MEETING_URL -i IMAGE -f FRAME_RATE 6 | # 7 | 8 | import asyncio 9 | import argparse 10 | import time 11 | import threading 12 | from typing import Optional 13 | 14 | from daily import Daily, CallClient 15 | from PIL import Image 16 | import os 17 | import aiohttp 18 | from dotenv import load_dotenv 19 | from pydantic import Field, BaseModel 20 | 21 | # Load environment variables from .env file 22 | load_dotenv(override=True) 23 | 24 | 25 | class DailyStreamingOptions(BaseModel): 26 | """ 27 | DailyStreamingOptions equivalent in Python. 28 | """ 29 | 30 | width: Optional[int] = Field(default=None, description="Width of the video stream.") 31 | height: Optional[int] = Field(default=None, description="Height of the video stream.") 32 | fps: Optional[int] = Field(default=None, description="Frames per second of the video stream.") 33 | videobitrate: Optional[int] = Field(default=None, description="Video bitrate in kbps.") 34 | audiobitrate: Optional[int] = Field(default=None, description="Audio bitrate in kbps.") 35 | min_idle_timeout: Optional[int] = Field( 36 | default=None, description="Minimum idle timeout in seconds." 37 | ) 38 | max_duration: Optional[int] = Field( 39 | default=None, description="Maximum duration of the streaming in seconds." 40 | ) 41 | background_color: Optional[str] = Field( 42 | default=None, description="Background color for the stream." 43 | ) 44 | 45 | 46 | class DailyMeetingTokenProperties(BaseModel): 47 | """Properties for configuring a Daily meeting token. 48 | 49 | We are only using here the properties needed to configure a Daily meeting starting cloud recording automatically. 50 | 51 | Refer to the Daily API documentation for more information: 52 | https://docs.daily.co/reference/rest-api/meeting-tokens/create-meeting-token#properties 53 | """ 54 | 55 | exp: Optional[int] = Field( 56 | default=None, 57 | description="Expiration time (unix timestamp in seconds). We strongly recommend setting this value for security. If not set, the token will not expire. Refer docs for more info.", 58 | ) 59 | is_owner: Optional[bool] = Field( 60 | default=None, 61 | description="If `true`, the token will grant owner privileges in the room. Defaults to `false`.", 62 | ) 63 | start_cloud_recording: Optional[bool] = Field( 64 | default=None, 65 | description="Start cloud recording when the user joins the room. This can be used to always record and archive meetings, for example in a customer support context.", 66 | ) 67 | start_cloud_recording_opts: Optional[DailyStreamingOptions] = Field( 68 | default=None, 69 | description="Start cloud recording options for configuring automatic cloud recording when the user joins the room.", 70 | ) 71 | 72 | 73 | class DailyMeetingTokenParams(BaseModel): 74 | """Parameters for creating a Daily meeting token. 75 | 76 | Refer to the Daily API documentation for more information: 77 | https://docs.daily.co/reference/rest-api/meeting-tokens/create-meeting-token#body-params 78 | """ 79 | 80 | properties: DailyMeetingTokenProperties = Field(default_factory=DailyMeetingTokenProperties) 81 | 82 | 83 | class DailyRESTHelper: 84 | """Helper class for interacting with Daily's REST API. 85 | 86 | Args: 87 | daily_api_key: Your Daily API key 88 | daily_api_url: Daily API base URL (e.g. "https://api.daily.co/v1") 89 | aiohttp_session: Async HTTP session for making requests 90 | """ 91 | 92 | def __init__( 93 | self, 94 | *, 95 | daily_api_key: str, 96 | daily_api_url: str = "https://api.daily.co/v1", 97 | aiohttp_session: aiohttp.ClientSession, 98 | ): 99 | """Initialize the Daily REST helper.""" 100 | self.daily_api_key = daily_api_key 101 | self.daily_api_url = daily_api_url 102 | self.aiohttp_session = aiohttp_session 103 | 104 | async def get_token( 105 | self, 106 | room_url: str, 107 | expiry_time: float = 60 * 60, 108 | owner: bool = True, 109 | params: Optional[DailyMeetingTokenParams] = None, 110 | ) -> str: 111 | """Generate a meeting token for user to join a Daily room. 112 | 113 | Args: 114 | room_url: Daily room URL 115 | expiry_time: Token validity duration in seconds (default: 1 hour) 116 | owner: Whether token has owner privileges 117 | params: Parameters for creating a Daily meeting token 118 | 119 | Returns: 120 | str: Meeting token 121 | 122 | Raises: 123 | Exception: If token generation fails or room URL is missing 124 | """ 125 | if not room_url: 126 | raise Exception( 127 | "No Daily room specified. You must specify a Daily room in order a token to be generated." 128 | ) 129 | 130 | expiration: float = time.time() + expiry_time 131 | 132 | headers = {"Authorization": f"Bearer {self.daily_api_key}"} 133 | 134 | if params is None: 135 | params = DailyMeetingTokenParams( 136 | **{ 137 | "properties": { 138 | "is_owner": owner, 139 | "exp": int(expiration), 140 | } 141 | } 142 | ) 143 | else: 144 | params.properties.exp = int(expiration) 145 | params.properties.is_owner = owner 146 | 147 | json = params.model_dump(exclude_none=True) 148 | 149 | async with self.aiohttp_session.post( 150 | f"{self.daily_api_url}/meeting-tokens", headers=headers, json=json 151 | ) as r: 152 | if r.status != 200: 153 | text = await r.text() 154 | raise Exception(f"Failed to create meeting token (status: {r.status}): {text}") 155 | 156 | data = await r.json() 157 | 158 | return data["token"] 159 | 160 | 161 | class AutoRecordingApp: 162 | def __init__(self, image_file, framerate): 163 | self.__image = Image.open(image_file) 164 | self.__framerate = framerate 165 | 166 | self.__camera = Daily.create_camera_device( 167 | "my-camera", 168 | width=self.__image.width, 169 | height=self.__image.height, 170 | color_format="RGB", 171 | ) 172 | self.__client = CallClient() 173 | 174 | self.__client.update_subscription_profiles( 175 | {"base": {"camera": "unsubscribed", "microphone": "unsubscribed"}} 176 | ) 177 | 178 | self.__app_quit = False 179 | self.__app_error = None 180 | 181 | self.__start_event = threading.Event() 182 | self.__thread = threading.Thread(target=self.send_image) 183 | self.__thread.start() 184 | 185 | def on_joined(self, data, error): 186 | if error: 187 | print(f"Unable to join meeting: {error}") 188 | self.__app_error = error 189 | self.__start_event.set() 190 | 191 | def run(self, meeting_url, meeting_token): 192 | self.__client.join( 193 | meeting_url, 194 | meeting_token, 195 | client_settings={ 196 | "inputs": { 197 | "camera": { 198 | "isEnabled": True, 199 | "settings": {"deviceId": "my-camera"}, 200 | }, 201 | "microphone": False, 202 | } 203 | }, 204 | completion=self.on_joined, 205 | ) 206 | self.__thread.join() 207 | 208 | def leave(self): 209 | self.__app_quit = True 210 | self.__thread.join() 211 | self.__client.leave() 212 | self.__client.release() 213 | 214 | def send_image(self): 215 | self.__start_event.wait() 216 | 217 | if self.__app_error: 218 | print("Unable to send audio!") 219 | return 220 | 221 | sleep_time = 1.0 / self.__framerate 222 | image_bytes = self.__image.tobytes() 223 | 224 | while not self.__app_quit: 225 | self.__camera.write_frame(image_bytes) 226 | time.sleep(sleep_time) 227 | 228 | 229 | async def create_access_token(room_url: str) -> str: 230 | """Helper function to generate an access token. 231 | 232 | Returns: 233 | str: Access token 234 | 235 | Raises: 236 | Exception: If token generation fails 237 | """ 238 | 239 | async with aiohttp.ClientSession() as aiohttp_session: 240 | daily_rest_helper = DailyRESTHelper( 241 | daily_api_key=os.getenv("DAILY_API_KEY", ""), 242 | daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"), 243 | aiohttp_session=aiohttp_session, 244 | ) 245 | 246 | token = await daily_rest_helper.get_token( 247 | room_url=room_url, 248 | params=DailyMeetingTokenParams( 249 | properties=DailyMeetingTokenProperties( 250 | start_cloud_recording=True, 251 | start_cloud_recording_opts=DailyStreamingOptions( 252 | width=1920, 253 | height=1080, 254 | fps=30, 255 | videobitrate=4000, 256 | audiobitrate=128, 257 | max_duration=3600, 258 | ), 259 | ), 260 | ), 261 | ) 262 | if not token: 263 | raise Exception(f"Failed to get token for room: {room_url}") 264 | 265 | return token 266 | 267 | 268 | async def main(): 269 | parser = argparse.ArgumentParser() 270 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 271 | args = parser.parse_args() 272 | 273 | meeting_token = await create_access_token(args.meeting) 274 | print(f"Meeting token: {meeting_token}") 275 | 276 | Daily.init() 277 | 278 | app = AutoRecordingApp("sample.jpg", 30) 279 | 280 | try: 281 | app.run(args.meeting, meeting_token) 282 | except KeyboardInterrupt: 283 | print("Ctrl-C detected. Exiting!") 284 | finally: 285 | app.leave() 286 | 287 | 288 | if __name__ == "__main__": 289 | asyncio.run(main()) 290 | -------------------------------------------------------------------------------- /demos/recording/env.example: -------------------------------------------------------------------------------- 1 | DAILY_API_KEY= 2 | DAILY_API_URL=https://api.daily.co/v1 3 | 4 | 5 | -------------------------------------------------------------------------------- /demos/recording/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/daily-python/1f354bcb7fe7201e9c274da95f2f7e17c649cb36/demos/recording/sample.jpg -------------------------------------------------------------------------------- /demos/remote_participant_control/remote_participant_control.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import signal 4 | 5 | from daily import * 6 | 7 | 8 | class RemoteParticipantControlApp(EventHandler): 9 | """An interactive CLI where the user can first: 10 | - join a Daily call as an owner, or 11 | - join a Daily call as a regular participant 12 | 13 | Once they're joined, if they're an owner they can then: 14 | - revoke the first remote participant's canSend permission 15 | - restore the first remote participant's canSend permission 16 | - revoke the first remote participant's canReceive permission 17 | - restore the first remote participant's canReceive permission 18 | """ 19 | 20 | def __init__(self, meeting_url, owner_token): 21 | self.__meeting_url = meeting_url 22 | self.__owner_token = owner_token 23 | self.__client = CallClient(event_handler=self) 24 | self.__start_event = asyncio.Event() 25 | self.__task = asyncio.get_running_loop().create_task(self.__cli()) 26 | 27 | async def run(self): 28 | self.__start_event.set() 29 | try: 30 | await self.__task 31 | except asyncio.CancelledError: 32 | pass 33 | 34 | async def stop(self): 35 | future = asyncio.get_running_loop().create_future() 36 | 37 | def leave_completion(error): 38 | future.get_loop().call_soon_threadsafe(future.set_result, error) 39 | 40 | self.__client.leave(completion=leave_completion) 41 | 42 | await future 43 | 44 | self.__client.release() 45 | 46 | self.__task.cancel() 47 | await self.__task 48 | 49 | def on_participant_updated(self, participant): 50 | info = participant.get("info", {}) 51 | if not info.get("isLocal", True): 52 | print( 53 | f"\n\nremote participant updated! permissions: \n{participant.get('info', {}).get('permissions', None)}\n" 54 | ) 55 | 56 | async def __cli(self): 57 | await self.__start_event.wait() 58 | 59 | is_owner = await self.__run_cli_join_step() 60 | if is_owner: 61 | await self.__run_cli_owner_actions_step() 62 | else: 63 | await self.__run_cli_regular_participant_actions_step() 64 | 65 | async def __run_cli_join_step(self) -> bool: 66 | is_owner = False 67 | while True: 68 | print("Choose a join option:") 69 | print("1. Join as owner") 70 | print("2. Join as regular participant") 71 | print("3. Quit") 72 | join_option = await asyncio.get_event_loop().run_in_executor( 73 | None, input, "Enter choice: " 74 | ) 75 | 76 | match join_option: 77 | case "1": 78 | is_owner = True 79 | await self.__join(meeting_token=self.__owner_token) 80 | break 81 | case "2": 82 | is_owner = False 83 | await self.__join() 84 | break 85 | case "3": 86 | await self.stop() 87 | break 88 | case _: 89 | print("Invalid choice") 90 | return is_owner 91 | 92 | async def __run_cli_owner_actions_step(self): 93 | while True: 94 | print("\nChoose an action:") 95 | print("1. canSend permission: revoke") 96 | print("2. canSend permission: restore") 97 | print("3. canReceive permission: revoke") 98 | print("4. canReceive permission: restore") 99 | print("5. Quit") 100 | action = await asyncio.get_event_loop().run_in_executor(None, input, "Enter choice: ") 101 | 102 | match action: 103 | case "1": 104 | await self.__revoke_can_send_permission() 105 | case "2": 106 | await self.__restore_can_send_permission() 107 | case "3": 108 | await self.__revoke_can_receive_permission() 109 | case "4": 110 | await self.__restore_can_receive_permission() 111 | case "5": 112 | await self.stop() 113 | break 114 | case _: 115 | print("Invalid choice") 116 | 117 | async def __run_cli_regular_participant_actions_step(self): 118 | while True: 119 | print("\nChoose an action:") 120 | print("1. Quit") 121 | action = await asyncio.get_event_loop().run_in_executor(None, input, "Enter choice: ") 122 | 123 | match action: 124 | case "1": 125 | await self.stop() 126 | break 127 | case _: 128 | print("Invalid choice") 129 | 130 | async def __join(self, meeting_token=None): 131 | future = asyncio.get_running_loop().create_future() 132 | 133 | def join_completion(data, error): 134 | future.get_loop().call_soon_threadsafe(future.set_result, (data, error)) 135 | 136 | self.__client.join( 137 | meeting_url=self.__meeting_url, 138 | meeting_token=meeting_token, 139 | completion=join_completion, 140 | ) 141 | 142 | return await future 143 | 144 | async def __revoke_can_send_permission(self): 145 | await self.__update_first_remote_participant({"permissions": {"canSend": []}}) 146 | 147 | async def __restore_can_send_permission(self): 148 | await self.__update_first_remote_participant( 149 | { 150 | "permissions": { 151 | "canSend": [ 152 | "camera", 153 | "microphone", 154 | "screenVideo", 155 | "screenAudio", 156 | "customVideo", 157 | "customAudio", 158 | ] 159 | } 160 | } 161 | ) 162 | 163 | async def __revoke_can_receive_permission(self): 164 | await self.__update_first_remote_participant( 165 | {"permissions": {"canReceive": {"base": False}}} 166 | ) 167 | 168 | async def __restore_can_receive_permission(self): 169 | await self.__update_first_remote_participant( 170 | {"permissions": {"canReceive": {"base": True}}} 171 | ) 172 | 173 | async def __update_first_remote_participant(self, updates): 174 | future = asyncio.get_running_loop().create_future() 175 | 176 | def update_completion(error): 177 | future.get_loop().call_soon_threadsafe(future.set_result, (error)) 178 | 179 | first_participant_id = self.__get_first_remote_participant_id() 180 | if first_participant_id is None: 181 | print("No remote participant found; skipping") 182 | else: 183 | self.__client.update_remote_participants( 184 | remote_participants={first_participant_id: updates}, 185 | completion=update_completion, 186 | ) 187 | 188 | return await future 189 | 190 | def __get_first_remote_participant_id(self) -> str | None: 191 | participants = self.__client.participants() 192 | return next((key for key in participants.keys() if key != "local"), None) 193 | 194 | 195 | async def sig_handler(app: RemoteParticipantControlApp): 196 | print("Ctrl-C detected. Exiting!") 197 | await app.stop() 198 | 199 | 200 | async def main(): 201 | parser = argparse.ArgumentParser() 202 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 203 | parser.add_argument("-o", "--owner-token", required=True, help="Owner token") 204 | args = parser.parse_args() 205 | 206 | Daily.init() 207 | 208 | app = RemoteParticipantControlApp(args.meeting, args.owner_token) 209 | 210 | loop = asyncio.get_running_loop() 211 | loop.add_signal_handler(signal.SIGINT, lambda *args: asyncio.create_task(sig_handler(app))) 212 | 213 | await app.run() 214 | 215 | 216 | if __name__ == "__main__": 217 | asyncio.run(main()) 218 | -------------------------------------------------------------------------------- /demos/requirements.txt: -------------------------------------------------------------------------------- 1 | celery~=5.4.0 2 | deepgram-sdk~=3.5.1 3 | flask~=3.0.3 4 | google-cloud-speech~=2.27.0 5 | google-cloud-texttospeech~=2.17.2 6 | numpy~=1.26.4 7 | openai~=1.42.0 8 | pillow~=10.4.0 9 | pyaudio~=0.2.14 10 | pycairo~=1.26.1 11 | pygobject~=3.48.2 12 | pyside6~=6.7.2 13 | redis~=5.0.8 14 | yolov5~=7.0.13 15 | aiohttp~=3.10.11 16 | pydantic~=2.8.2 17 | python-dotenv 18 | -------------------------------------------------------------------------------- /demos/vad/native_vad.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and it will try to detect speech by 3 | # analyzing incoming audio frames. There are a few arguments useful to improve 4 | # speech detection during long sentences. 5 | # 6 | # Usage: python3 native_vad.py -m MEETING_URL 7 | # 8 | 9 | import argparse 10 | import sys 11 | import threading 12 | import time 13 | 14 | from enum import Enum 15 | 16 | from daily import * 17 | 18 | 19 | SAMPLE_RATE = 16000 20 | NUM_CHANNELS = 1 21 | 22 | SPEECH_THRESHOLD = 0.90 23 | SPEECH_THRESHOLD_MS = 300 24 | SILENCE_THRESHOLD_MS = 700 25 | VAD_RESET_PERIOD_MS = 2000 26 | 27 | 28 | class SpeechStatus(Enum): 29 | SPEAKING = 1 30 | NOT_SPEAKING = 2 31 | 32 | 33 | class SpeechDetection: 34 | def __init__(self, speech_threshold_ms, silence_threshold_ms, sample_rate, num_channels): 35 | self.__speech_threshold = SPEECH_THRESHOLD 36 | self.__speech_threshold_ms = speech_threshold_ms 37 | self.__silence_threshold_ms = silence_threshold_ms 38 | 39 | self.__status = SpeechStatus.NOT_SPEAKING 40 | self.__started_speaking_time = 0 41 | self.__last_speaking_time = 0 42 | 43 | self.__vad = Daily.create_native_vad( 44 | reset_period_ms=VAD_RESET_PERIOD_MS, sample_rate=sample_rate, channels=num_channels 45 | ) 46 | 47 | def analyze(self, buffer): 48 | confidence = self.__vad.analyze_frames(buffer) 49 | current_time_ms = time.time() * 1000 50 | 51 | if confidence > self.__speech_threshold: 52 | diff_ms = current_time_ms - self.__started_speaking_time 53 | 54 | if self.__status == SpeechStatus.NOT_SPEAKING: 55 | self.__started_speaking_time = current_time_ms 56 | 57 | if diff_ms > self.__speech_threshold_ms: 58 | self.__status = SpeechStatus.SPEAKING 59 | self.__last_speaking_time = current_time_ms 60 | else: 61 | diff_ms = current_time_ms - self.__last_speaking_time 62 | if diff_ms > self.__silence_threshold_ms: 63 | self.__status = SpeechStatus.NOT_SPEAKING 64 | 65 | if self.__status == SpeechStatus.SPEAKING: 66 | print("SPEAKING: " + str(confidence)) 67 | else: 68 | print("NOT SPEAKING: " + str(confidence)) 69 | 70 | 71 | class NativeVadApp: 72 | def __init__(self, speech_threshold_ms, silence_threshold_ms, sample_rate, num_channels): 73 | self.__sample_rate = sample_rate 74 | 75 | self.__vad = SpeechDetection( 76 | speech_threshold_ms=speech_threshold_ms, 77 | silence_threshold_ms=silence_threshold_ms, 78 | sample_rate=sample_rate, 79 | num_channels=num_channels, 80 | ) 81 | 82 | self.__speaker_device = Daily.create_speaker_device( 83 | "my-speaker", sample_rate=sample_rate, channels=num_channels 84 | ) 85 | Daily.select_speaker_device("my-speaker") 86 | 87 | self.__client = CallClient() 88 | self.__client.update_subscription_profiles( 89 | {"base": {"camera": "unsubscribed", "microphone": "subscribed"}} 90 | ) 91 | 92 | self.__app_quit = False 93 | self.__app_error = None 94 | 95 | self.__start_event = threading.Event() 96 | self.__thread = threading.Thread(target=self.receive_audio) 97 | self.__thread.start() 98 | 99 | def on_joined(self, data, error): 100 | if error: 101 | print(f"Unable to join meeting: {error}") 102 | self.__app_error = error 103 | self.__start_event.set() 104 | 105 | def run(self, meeting_url): 106 | self.__client.join(meeting_url, completion=self.on_joined) 107 | self.__thread.join() 108 | 109 | def leave(self): 110 | self.__app_quit = True 111 | self.__thread.join() 112 | self.__client.leave() 113 | self.__client.release() 114 | 115 | def receive_audio(self): 116 | self.__start_event.wait() 117 | 118 | if self.__app_error: 119 | print(f"Unable to receive audio!") 120 | return 121 | 122 | while not self.__app_quit: 123 | # Read 10ms worth of audio frames. 124 | buffer = self.__speaker_device.read_frames(int(self.__sample_rate / 100)) 125 | if len(buffer) > 0: 126 | self.__vad.analyze(buffer) 127 | 128 | 129 | def main(): 130 | parser = argparse.ArgumentParser() 131 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 132 | parser.add_argument( 133 | "-c", "--channels", type=int, default=NUM_CHANNELS, help="Number of channels" 134 | ) 135 | parser.add_argument("-r", "--rate", type=int, default=SAMPLE_RATE, help="Sample rate") 136 | parser.add_argument( 137 | "-p", "--speech", type=int, default=SPEECH_THRESHOLD_MS, help="Speech threshold in ms" 138 | ) 139 | parser.add_argument( 140 | "-s", "--silence", type=int, default=SILENCE_THRESHOLD_MS, help="Silence threshold in ms" 141 | ) 142 | 143 | args = parser.parse_args() 144 | 145 | Daily.init() 146 | 147 | app = NativeVadApp(args.speech, args.silence, args.rate, args.channels) 148 | 149 | try: 150 | app.run(args.meeting) 151 | except KeyboardInterrupt: 152 | print("Ctrl-C detected. Exiting!", file=sys.stderr) 153 | finally: 154 | app.leave() 155 | 156 | 157 | if __name__ == "__main__": 158 | main() 159 | -------------------------------------------------------------------------------- /demos/video/send_image.py: -------------------------------------------------------------------------------- 1 | # 2 | # This demo will join a Daily meeting and send a given image at the specified 3 | # framerate using a virtual camera device. 4 | # 5 | # Usage: python3 send_image.py -m MEETING_URL -i IMAGE -f FRAME_RATE 6 | # 7 | 8 | import argparse 9 | import time 10 | import threading 11 | 12 | from daily import * 13 | from PIL import Image 14 | 15 | 16 | class SendImageApp: 17 | def __init__(self, image_file, framerate): 18 | self.__image = Image.open(image_file) 19 | self.__framerate = framerate 20 | 21 | self.__camera = Daily.create_camera_device( 22 | "my-camera", width=self.__image.width, height=self.__image.height, color_format="RGB" 23 | ) 24 | 25 | self.__client = CallClient() 26 | 27 | self.__client.update_subscription_profiles( 28 | {"base": {"camera": "unsubscribed", "microphone": "unsubscribed"}} 29 | ) 30 | 31 | self.__app_quit = False 32 | self.__app_error = None 33 | 34 | self.__start_event = threading.Event() 35 | self.__thread = threading.Thread(target=self.send_image) 36 | self.__thread.start() 37 | 38 | def on_joined(self, data, error): 39 | if error: 40 | print(f"Unable to join meeting: {error}") 41 | self.__app_error = error 42 | self.__start_event.set() 43 | 44 | def run(self, meeting_url): 45 | self.__client.join( 46 | meeting_url, 47 | client_settings={ 48 | "inputs": { 49 | "camera": {"isEnabled": True, "settings": {"deviceId": "my-camera"}}, 50 | "microphone": False, 51 | } 52 | }, 53 | completion=self.on_joined, 54 | ) 55 | self.__thread.join() 56 | 57 | def leave(self): 58 | self.__app_quit = True 59 | self.__thread.join() 60 | self.__client.leave() 61 | self.__client.release() 62 | 63 | def send_image(self): 64 | self.__start_event.wait() 65 | 66 | if self.__app_error: 67 | print(f"Unable to send audio!") 68 | return 69 | 70 | sleep_time = 1.0 / self.__framerate 71 | image_bytes = self.__image.tobytes() 72 | 73 | while not self.__app_quit: 74 | self.__camera.write_frame(image_bytes) 75 | time.sleep(sleep_time) 76 | 77 | 78 | def main(): 79 | parser = argparse.ArgumentParser() 80 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 81 | parser.add_argument("-i", "--image", required=True, help="Image to send") 82 | parser.add_argument("-f", "--framerate", type=int, required=True, help="Framerate") 83 | args = parser.parse_args() 84 | 85 | Daily.init() 86 | 87 | app = SendImageApp(args.image, args.framerate) 88 | 89 | try: 90 | app.run(args.meeting) 91 | except KeyboardInterrupt: 92 | print("Ctrl-C detected. Exiting!") 93 | finally: 94 | app.leave() 95 | 96 | 97 | if __name__ == "__main__": 98 | main() 99 | -------------------------------------------------------------------------------- /demos/yolo/yolo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import queue 3 | import time 4 | import threading 5 | 6 | from PIL import Image 7 | from ultralytics import YOLO 8 | 9 | from daily import * 10 | 11 | 12 | class DailyYOLO(EventHandler): 13 | def __init__(self): 14 | self.__client = CallClient(event_handler=self) 15 | 16 | self.__model = YOLO("yolov8n.pt") 17 | self.__camera = None 18 | 19 | self.__time = time.time() 20 | 21 | self.__queue = queue.Queue() 22 | 23 | self.__app_quit = False 24 | 25 | self.__thread = threading.Thread(target=self.process_frames) 26 | self.__thread.start() 27 | 28 | def run(self, meeting_url): 29 | print(f"Connecting to {meeting_url}...") 30 | self.__client.join(meeting_url) 31 | print("Waiting for participants to join...") 32 | self.__thread.join() 33 | 34 | def leave(self): 35 | self.__app_quit = True 36 | self.__thread.join() 37 | self.__client.leave() 38 | self.__client.release() 39 | 40 | def on_participant_joined(self, participant): 41 | print(f"Participant {participant['id']} joined, analyzing frames...") 42 | self.__client.set_video_renderer(participant["id"], self.on_video_frame) 43 | 44 | def setup_camera(self, video_frame): 45 | if not self.__camera: 46 | self.__camera = Daily.create_camera_device( 47 | "camera", width=video_frame.width, height=video_frame.height, color_format="RGB" 48 | ) 49 | self.__client.update_inputs( 50 | {"camera": {"isEnabled": True, "settings": {"deviceId": "camera"}}} 51 | ) 52 | 53 | def process_frames(self): 54 | while not self.__app_quit: 55 | video_frame = self.__queue.get() 56 | image = Image.frombytes( 57 | "RGBA", (video_frame.width, video_frame.height), video_frame.buffer 58 | ) 59 | results = self.__model.track(image) 60 | 61 | pil = Image.fromarray(results[0].plot(), mode="RGB").tobytes() 62 | 63 | self.__camera.write_frame(pil) 64 | 65 | def on_video_frame(self, participant_id, video_frame, video_source): 66 | # Process ~15 frames per second (considering incoming frames at 30fps). 67 | if time.time() - self.__time > 0.05: 68 | self.__time = time.time() 69 | self.setup_camera(video_frame) 70 | self.__queue.put(video_frame) 71 | 72 | 73 | def main(): 74 | parser = argparse.ArgumentParser() 75 | parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") 76 | args = parser.parse_args() 77 | 78 | Daily.init() 79 | 80 | app = DailyYOLO() 81 | 82 | try: 83 | app.run(args.meeting) 84 | except KeyboardInterrupt: 85 | print("Ctrl-C detected. Exiting!") 86 | finally: 87 | app.leave() 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | 3 | # You can set these variables from the command line, and also 4 | # from the environment for the first two. 5 | SPHINXOPTS ?= 6 | SPHINXBUILD ?= sphinx-build 7 | SOURCEDIR = src 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 20 | -------------------------------------------------------------------------------- /docs/src/api_reference.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ==================================== 3 | 4 | .. autoclass:: daily.AudioData 5 | :members: 6 | 7 | .. autoclass:: daily.CustomAudioSource 8 | :members: 9 | 10 | .. autoclass:: daily.CustomAudioTrack 11 | :members: 12 | 13 | .. autoclass:: daily.CallClient 14 | :members: 15 | 16 | .. autoclass:: daily.Daily 17 | :members: 18 | 19 | .. autoclass:: daily.EventHandler 20 | :members: 21 | 22 | .. autoclass:: daily.VideoFrame 23 | :members: 24 | 25 | .. autoclass:: daily.VirtualCameraDevice 26 | :members: 27 | 28 | .. autoclass:: daily.VirtualMicrophoneDevice 29 | :members: 30 | 31 | .. autoclass:: daily.VirtualSpeakerDevice 32 | :members: 33 | -------------------------------------------------------------------------------- /docs/src/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration for the Sphinx documentation builder. 2 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 3 | 4 | import os 5 | import sys 6 | 7 | import toml 8 | 9 | 10 | def get_release_version() -> str: 11 | """Get the release version from the Cargo.toml file. 12 | 13 | :return: 14 | """ 15 | cargo_content = toml.load("../../Cargo.toml") 16 | return cargo_content["package"]["version"] 17 | 18 | 19 | # Project 20 | project = "daily-python" 21 | copyright = "2023-2025 Daily" 22 | version = get_release_version() 23 | 24 | 25 | # General 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 29 | # ones. 30 | extensions = [ 31 | "sphinx.ext.autodoc", 32 | ] 33 | 34 | autodoc_typehints = "description" 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ["_templates"] 38 | 39 | # List of patterns, relative to source directory, that match files and 40 | # directories to ignore when looking for source files. 41 | # This pattern also affects html_static_path and html_extra_path. 42 | exclude_patterns = [] 43 | 44 | # If true, the current module name will be prepended to all description 45 | # unit titles (such as .. function::). 46 | add_module_names = False 47 | 48 | # HTML output 49 | 50 | # The theme to use for HTML and HTML Help pages. See the documentation for 51 | # a list of builtin themes. 52 | html_theme = "sphinx_rtd_theme" 53 | 54 | # Add any paths that contain custom static files (such as style sheets) here, 55 | # relative to this directory. They are copied after the builtin static files, 56 | # so a file named "default.css" will overwrite the builtin "default.css". 57 | html_static_path = [] 58 | 59 | html_favicon = "favicon.ico" 60 | 61 | # Don't show "Video page source" link 62 | html_show_sourcelink = False 63 | 64 | # Don't show "Built with Sphinx" 65 | html_show_sphinx = False 66 | -------------------------------------------------------------------------------- /docs/src/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/daily-python/1f354bcb7fe7201e9c274da95f2f7e17c649cb36/docs/src/favicon.ico -------------------------------------------------------------------------------- /docs/src/index.rst: -------------------------------------------------------------------------------- 1 | Daily Client SDK for Python 2 | ======================================= 3 | 4 | Welcome to `Daily`_'s Python client SDK API reference! 5 | 6 | The Daily Client SDK for Python allows you to build video and audio calling into 7 | your native desktop and server applications. 8 | 9 | You can find installation, reference guides and a bunch of cool demos in our 10 | `reference docs `_. 11 | 12 | .. _Daily: https://daily.co 13 | 14 | .. toctree:: 15 | :maxdepth: 1 16 | 17 | api_reference 18 | types 19 | 20 | 21 | Indices and tables 22 | ======================================= 23 | 24 | * :ref:`genindex` 25 | * :ref:`modindex` 26 | * :ref:`search` 27 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["maturin>=1.2,<2.0"] 3 | build-backend = "maturin" 4 | 5 | [project] 6 | name = "daily-python" 7 | description = "Daily Client SDK for Python" 8 | license = { text = "BSD 2-Clause License" } 9 | dynamic = ["version"] 10 | readme = "README.md" 11 | requires-python = ">=3.7" 12 | keywords = ["webrtc", "audio", "video", "ai"] 13 | classifiers = [ 14 | "Development Status :: 5 - Production/Stable", 15 | "Intended Audience :: Developers", 16 | "License :: OSI Approved :: BSD License", 17 | "Topic :: Communications :: Conferencing", 18 | "Topic :: Multimedia :: Sound/Audio", 19 | "Topic :: Multimedia :: Video", 20 | "Topic :: Scientific/Engineering :: Artificial Intelligence" 21 | ] 22 | 23 | [project.urls] 24 | Home = "https://docs.daily.co/guides/products/ai-toolkit" 25 | Documentation = "https://reference-python.daily.co" 26 | Source = "https://github.com/daily-co/daily-python" 27 | Website = "https://daily.co" 28 | 29 | [tool.maturin] 30 | features = ["pyo3/extension-module"] 31 | -------------------------------------------------------------------------------- /requirements-linux.txt: -------------------------------------------------------------------------------- 1 | patchelf==0.17.2.2 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | maturin==1.8.3 2 | setuptools==78.0.2 3 | sphinx==7.4.7 4 | sphinx-rtd-theme==2.0.0 5 | toml==0.10.2 6 | -------------------------------------------------------------------------------- /src/call_client/delegate.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::HashMap, 3 | ffi::CStr, 4 | sync::{Arc, Mutex}, 5 | }; 6 | 7 | use pyo3::{ 8 | prelude::*, 9 | types::{PyBytes, PyTuple}, 10 | }; 11 | 12 | use daily_core::prelude::*; 13 | 14 | use super::event::{ 15 | args_from_event, completion_args_from_event, method_name_from_event_action, 16 | request_id_from_event, update_inner_values, Event, 17 | }; 18 | 19 | use crate::{PyAudioData, PyVideoFrame}; 20 | 21 | pub(crate) enum PyCallClientCompletion { 22 | UnaryFn(PyObject), 23 | BinaryFn(PyObject), 24 | } 25 | 26 | impl From for PyObject { 27 | fn from(value: PyCallClientCompletion) -> Self { 28 | match value { 29 | PyCallClientCompletion::UnaryFn(c) => c, 30 | PyCallClientCompletion::BinaryFn(c) => c, 31 | } 32 | } 33 | } 34 | 35 | type PyCallClientDelegateOnEventFn = 36 | unsafe fn(py: Python<'_>, delegate_ctx: &DelegateContext, event: &Event); 37 | 38 | type PyCallClientDelegateOnVideoFrameFn = unsafe fn( 39 | py: Python<'_>, 40 | delegate_ctx: &DelegateContext, 41 | renderer_id: u64, 42 | peer_id: *const libc::c_char, 43 | frame: *const NativeVideoFrame, 44 | ); 45 | 46 | type PyCallClientDelegateOnAudioDataFn = unsafe fn( 47 | py: Python<'_>, 48 | delegate_ctx: &DelegateContext, 49 | renderer_id: u64, 50 | peer_id: *const libc::c_char, 51 | audio_data: *const NativeAudioData, 52 | ); 53 | 54 | #[derive(Clone)] 55 | pub(crate) struct AudioRendererData { 56 | pub(crate) audio_source: String, 57 | pub(crate) callback: PyObject, 58 | pub(crate) audio_buffer: Vec, 59 | pub(crate) callback_interval_ms: u32, 60 | pub(crate) callback_count: u32, 61 | } 62 | 63 | #[derive(Clone)] 64 | pub(crate) struct VideoRendererData { 65 | pub(crate) video_source: String, 66 | pub(crate) callback: PyObject, 67 | } 68 | 69 | #[derive(Clone)] 70 | pub(crate) struct PyCallClientDelegateFns { 71 | pub(crate) on_event: Option, 72 | pub(crate) on_video_frame: Option, 73 | pub(crate) on_audio_data: Option, 74 | } 75 | 76 | pub(crate) struct PyCallClientInner { 77 | pub(crate) event_handler_callback: Mutex>, 78 | pub(crate) delegates: Mutex, 79 | pub(crate) completions: Mutex>, 80 | pub(crate) video_renderers: Mutex>, 81 | pub(crate) audio_renderers: Mutex>, 82 | // Non-blocking updates 83 | pub(crate) active_speaker: Mutex, 84 | pub(crate) inputs: Mutex, 85 | pub(crate) participant_counts: Mutex, 86 | pub(crate) publishing: Mutex, 87 | pub(crate) subscriptions: Mutex, 88 | pub(crate) subscription_profiles: Mutex, 89 | pub(crate) network_stats: Mutex, 90 | } 91 | 92 | #[derive(Clone)] 93 | pub(crate) struct DelegateContext { 94 | pub(crate) inner: Arc, 95 | } 96 | 97 | #[derive(Clone)] 98 | pub(crate) struct DelegateContextPtr { 99 | pub(crate) ptr: *const DelegateContext, 100 | } 101 | 102 | unsafe impl Send for DelegateContextPtr {} 103 | 104 | pub(crate) unsafe extern "C" fn on_event_native( 105 | delegate: *mut libc::c_void, 106 | event_json: *const libc::c_char, 107 | _json_len: isize, 108 | ) { 109 | // Acquire the GIL before checking if there's a delegate available. If 110 | // PyCallClient is dropping it will cleanup the delegates and will 111 | // temporarily release the GIL so we can proceed. 112 | Python::with_gil(|py| { 113 | let delegate_ctx_ptr = delegate as *const DelegateContext; 114 | 115 | // We increment the reference count because otherwise it will get dropped 116 | // when Arc::from_raw() takes ownership, and we still want to keep the 117 | // delegate pointer around. 118 | Arc::increment_strong_count(delegate_ctx_ptr); 119 | 120 | let delegate_ctx = Arc::from_raw(delegate_ctx_ptr); 121 | 122 | // Don't lock in the if statement otherwise the lock is held throughout 123 | // the delegate call. 124 | let delegate = delegate_ctx.inner.delegates.lock().unwrap().on_event; 125 | 126 | if let Some(delegate) = delegate { 127 | let event_string = CStr::from_ptr(event_json).to_string_lossy().into_owned(); 128 | let event = serde_json::from_str::(event_string.as_str()).unwrap(); 129 | 130 | delegate(py, &delegate_ctx, &event); 131 | } 132 | }); 133 | } 134 | 135 | pub(crate) unsafe extern "C" fn on_audio_data_native( 136 | delegate: *mut libc::c_void, 137 | renderer_id: u64, 138 | peer_id: *const libc::c_char, 139 | audio_data: *const NativeAudioData, 140 | ) { 141 | // Acquire the GIL before checking if there's a delegate available. If 142 | // PyCallClient is dropping it will cleanup the delegates and will 143 | // temporarily release the GIL so we can proceed. 144 | Python::with_gil(|py| { 145 | let delegate_ctx_ptr = delegate as *const DelegateContext; 146 | 147 | // We increment the reference count because otherwise it will get dropped 148 | // when Arc::from_raw() takes ownership, and we still want to keep the 149 | // delegate pointer around. 150 | Arc::increment_strong_count(delegate_ctx_ptr); 151 | 152 | let delegate_ctx = Arc::from_raw(delegate_ctx_ptr); 153 | 154 | // Don't lock in the if statement otherwise the lock is held throughout 155 | // the delegate call. 156 | let delegate = delegate_ctx.inner.delegates.lock().unwrap().on_audio_data; 157 | 158 | if let Some(delegate) = delegate { 159 | delegate(py, &delegate_ctx, renderer_id, peer_id, audio_data); 160 | } 161 | }); 162 | } 163 | 164 | pub(crate) unsafe extern "C" fn on_video_frame_native( 165 | delegate: *mut libc::c_void, 166 | renderer_id: u64, 167 | peer_id: *const libc::c_char, 168 | frame: *const NativeVideoFrame, 169 | ) { 170 | // Acquire the GIL before checking if there's a delegate available. If 171 | // PyCallClient is dropping it will cleanup the delegates and will 172 | // temporarily release the GIL so we can proceed. 173 | Python::with_gil(|py| { 174 | let delegate_ctx_ptr = delegate as *const DelegateContext; 175 | 176 | // We increment the reference count because otherwise it will get dropped 177 | // when Arc::from_raw() takes ownership, and we still want to keep the 178 | // delegate pointer around. 179 | Arc::increment_strong_count(delegate_ctx_ptr); 180 | 181 | let delegate_ctx = Arc::from_raw(delegate_ctx_ptr); 182 | 183 | // Don't lock in the if statement otherwise the lock is held throughout 184 | // the delegate call. 185 | let delegate = delegate_ctx.inner.delegates.lock().unwrap().on_video_frame; 186 | 187 | if let Some(delegate) = delegate { 188 | delegate(py, &delegate_ctx, renderer_id, peer_id, frame); 189 | } 190 | }); 191 | } 192 | 193 | pub(crate) unsafe fn on_event(py: Python<'_>, delegate_ctx: &DelegateContext, event: &Event) { 194 | match event.action.as_str() { 195 | "request-completed" => { 196 | if let Some(request_id) = request_id_from_event(event) { 197 | // Don't lock in the if statement otherwise the lock is held 198 | // throughout the callback call. 199 | let completion = delegate_ctx 200 | .inner 201 | .completions 202 | .lock() 203 | .unwrap() 204 | .remove(&request_id); 205 | if let Some(completion) = completion { 206 | if let Some(args) = completion_args_from_event(&completion, event) { 207 | let py_args = PyTuple::new_bound(py, args); 208 | 209 | let callback: PyObject = completion.into(); 210 | 211 | if let Err(error) = callback.call1(py, py_args) { 212 | error.write_unraisable_bound(py, None); 213 | } 214 | } 215 | } 216 | } 217 | } 218 | action => { 219 | if let Some(method_name) = method_name_from_event_action(action) { 220 | if let Some(args) = args_from_event(event) { 221 | // Update inner values asynchronously. We do it before 222 | // invoking the callback so new values are available if we 223 | // use the getters inside the callback. 224 | update_inner_values(py, delegate_ctx, action, args.clone()); 225 | 226 | let callback = delegate_ctx.inner.event_handler_callback.lock().unwrap(); 227 | 228 | if let Some(callback) = callback.as_ref() { 229 | let py_args = PyTuple::new_bound(py, args); 230 | 231 | if let Err(error) = callback.call_method1(py, method_name, py_args) { 232 | error.write_unraisable_bound(py, None); 233 | } 234 | } 235 | } 236 | } 237 | } 238 | } 239 | } 240 | 241 | pub(crate) unsafe fn on_audio_data( 242 | py: Python<'_>, 243 | delegate_ctx: &DelegateContext, 244 | renderer_id: u64, 245 | peer_id: *const libc::c_char, 246 | data: *const NativeAudioData, 247 | ) { 248 | // In this block we get a mutable reference to the renderer. We use that to 249 | // check if we should call the callback depending on the number of 10ms 250 | // intervals requested by the user, and also to extend our buffer if we 251 | // shouldn't call the callback. 252 | let mut call_callback = false; 253 | if let Some(renderer_data) = delegate_ctx 254 | .inner 255 | .audio_renderers 256 | .lock() 257 | .unwrap() 258 | .get_mut(&renderer_id) 259 | { 260 | // Clear our internal buffer. 261 | if renderer_data.callback_count == 0 { 262 | renderer_data.audio_buffer.clear(); 263 | } 264 | 265 | // Increment to indicate this is a new call. 266 | renderer_data.callback_count += 1; 267 | 268 | // This callback is called every 10ms. 269 | let current_interval_ms = renderer_data.callback_count * 10; 270 | 271 | // Extend our internal buffer 272 | let num_bytes = 273 | ((*data).bits_per_sample as usize * (*data).num_channels * (*data).num_audio_frames) 274 | / 8; 275 | let slice = std::slice::from_raw_parts((*data).audio_frames, num_bytes); 276 | renderer_data.audio_buffer.extend_from_slice(slice); 277 | 278 | // Check if we should call the python callback or not. 279 | call_callback = current_interval_ms == renderer_data.callback_interval_ms; 280 | if call_callback { 281 | renderer_data.callback_count = 0; 282 | } 283 | }; 284 | 285 | // Don't lock in the if statement otherwise the lock is held throughout the 286 | // callback call. 287 | let renderer_data = delegate_ctx 288 | .inner 289 | .audio_renderers 290 | .lock() 291 | .unwrap() 292 | .get(&renderer_id) 293 | .cloned(); 294 | 295 | if let Some(renderer_data) = renderer_data { 296 | let peer_id = CStr::from_ptr(peer_id).to_string_lossy().into_owned(); 297 | 298 | if call_callback { 299 | let num_bytes = renderer_data.audio_buffer.len(); 300 | let bytes_per_sample = (*data).bits_per_sample as usize / 8; 301 | let frame_size = bytes_per_sample * (*data).num_channels; 302 | let num_audio_frames = renderer_data.audio_buffer.len() / frame_size; 303 | 304 | let audio_data = PyAudioData { 305 | bits_per_sample: (*data).bits_per_sample, 306 | sample_rate: (*data).sample_rate, 307 | num_channels: (*data).num_channels, 308 | num_audio_frames, 309 | audio_frames: PyBytes::bound_from_ptr( 310 | py, 311 | renderer_data.audio_buffer.as_ptr(), 312 | num_bytes, 313 | ) 314 | .into_py(py), 315 | }; 316 | 317 | let args = PyTuple::new_bound( 318 | py, 319 | &[ 320 | peer_id.into_py(py), 321 | audio_data.into_py(py), 322 | renderer_data.audio_source.into_py(py), 323 | ], 324 | ); 325 | 326 | if let Err(error) = renderer_data.callback.call1(py, args) { 327 | error.write_unraisable_bound(py, None); 328 | } 329 | } 330 | } 331 | } 332 | 333 | pub(crate) unsafe fn on_video_frame( 334 | py: Python<'_>, 335 | delegate_ctx: &DelegateContext, 336 | renderer_id: u64, 337 | peer_id: *const libc::c_char, 338 | frame: *const NativeVideoFrame, 339 | ) { 340 | // Don't lock in the if statement otherwise the lock is held throughout the 341 | // callback call. 342 | let renderer_data = delegate_ctx 343 | .inner 344 | .video_renderers 345 | .lock() 346 | .unwrap() 347 | .get(&renderer_id) 348 | .cloned(); 349 | 350 | if let Some(renderer_data) = renderer_data { 351 | let peer_id = CStr::from_ptr(peer_id).to_string_lossy().into_owned(); 352 | 353 | let color_format = CStr::from_ptr((*frame).color_format) 354 | .to_string_lossy() 355 | .into_owned(); 356 | 357 | let video_frame = PyVideoFrame { 358 | buffer: PyBytes::bound_from_ptr(py, (*frame).buffer, (*frame).buffer_size).into_py(py), 359 | width: (*frame).width, 360 | height: (*frame).height, 361 | timestamp_us: (*frame).timestamp_us, 362 | color_format: color_format.into_py(py), 363 | }; 364 | 365 | let args = PyTuple::new_bound( 366 | py, 367 | &[ 368 | peer_id.into_py(py), 369 | video_frame.into_py(py), 370 | renderer_data.video_source.into_py(py), 371 | ], 372 | ); 373 | 374 | if let Err(error) = renderer_data.callback.call1(py, args) { 375 | error.write_unraisable_bound(py, None); 376 | } 377 | } 378 | } 379 | -------------------------------------------------------------------------------- /src/call_client/event.rs: -------------------------------------------------------------------------------- 1 | use crate::util::dict::DictValue; 2 | 3 | use super::delegate::{DelegateContext, PyCallClientCompletion}; 4 | 5 | use serde::Deserialize; 6 | use serde_json::Value; 7 | 8 | use pyo3::prelude::*; 9 | 10 | #[derive(Debug, Deserialize)] 11 | pub(crate) struct Event { 12 | pub action: String, 13 | #[serde(flatten)] 14 | pub data: DictValue, 15 | } 16 | 17 | pub(crate) fn method_name_from_event_action(action: &str) -> Option<&str> { 18 | let method_name = match action { 19 | "active-speaker-changed" => "on_active_speaker_changed", 20 | "app-message" => "on_app_message", 21 | "available-devices-updated" => "on_available_devices_updated", 22 | "call-state-updated" => "on_call_state_updated", 23 | "dialin-connected" => "on_dialin_connected", 24 | "dialin-ready" => "on_dialin_ready", 25 | "dialin-error" => "on_dialin_error", 26 | "dialin-stopped" => "on_dialin_stopped", 27 | "dialin-warning" => "on_dialin_warning", 28 | "dialout-connected" => "on_dialout_connected", 29 | "dialout-answered" => "on_dialout_answered", 30 | "dialout-error" => "on_dialout_error", 31 | "dialout-stopped" => "on_dialout_stopped", 32 | "dialout-warning" => "on_dialout_warning", 33 | "error" => "on_error", 34 | "inputs-updated" => "on_inputs_updated", 35 | "live-stream-error" => "on_live_stream_error", 36 | "live-stream-started" => "on_live_stream_started", 37 | "live-stream-stopped" => "on_live_stream_stopped", 38 | "live-stream-updated" => "on_live_stream_updated", 39 | "live-stream-warning" => "on_live_stream_warning", 40 | "network-stats-updated" => "on_network_stats_updated", 41 | "participant-counts-updated" => "on_participant_counts_updated", 42 | "participant-joined" => "on_participant_joined", 43 | "participant-left" => "on_participant_left", 44 | "participant-updated" => "on_participant_updated", 45 | "publishing-updated" => "on_publishing_updated", 46 | "recording-error" => "on_recording_error", 47 | "recording-started" => "on_recording_started", 48 | "recording-stopped" => "on_recording_stopped", 49 | "subscription-profiles-updated" => "on_subscription_profiles_updated", 50 | "subscriptions-updated" => "on_subscriptions_updated", 51 | "transcription-error" => "on_transcription_error", 52 | "transcription-message" => "on_transcription_message", 53 | "transcription-started" => "on_transcription_started", 54 | "transcription-stopped" => "on_transcription_stopped", 55 | "transcription-updated" => "on_transcription_updated", 56 | a => { 57 | tracing::debug!("unimplemented event handler {a}"); 58 | return None; 59 | } 60 | }; 61 | 62 | Some(method_name) 63 | } 64 | 65 | pub(crate) fn request_id_from_event(event: &Event) -> Option { 66 | if let Some(object) = event.data.0.as_object() { 67 | if let Some(request_id) = object.get("requestId") { 68 | if let Some(id) = request_id.get("id") { 69 | id.as_u64() 70 | } else { 71 | None 72 | } 73 | } else { 74 | None 75 | } 76 | } else { 77 | None 78 | } 79 | } 80 | 81 | pub(crate) fn args_from_event(event: &Event) -> Option> { 82 | let object = event.data.0.as_object().expect("event should be an object"); 83 | match event.action.as_str() { 84 | "active-speaker-changed" => object 85 | .get("participant") 86 | .map(|participant| vec![DictValue(participant.clone())]), 87 | "app-message" => { 88 | if let Some(message) = object.get("msgData") { 89 | object 90 | .get("from") 91 | .map(|from| vec![DictValue(message.clone()), DictValue(from.clone())]) 92 | } else { 93 | None 94 | } 95 | } 96 | "available-devices-updated" => object 97 | .get("availableDevices") 98 | .map(|devices| vec![DictValue(devices.clone())]), 99 | "call-state-updated" => object 100 | .get("state") 101 | .map(|state| vec![DictValue(state.clone())]), 102 | "dialin-connected" => Some(vec![DictValue(Value::Object(object.clone()))]), 103 | "dialin-ready" => object 104 | .get("sipEndpoint") 105 | .map(|sip_endpoint| vec![DictValue(sip_endpoint.clone())]), 106 | "dialin-error" => Some(vec![DictValue(Value::Object(object.clone()))]), 107 | "dialin-stopped" => Some(vec![DictValue(Value::Object(object.clone()))]), 108 | "dialin-warning" => Some(vec![DictValue(Value::Object(object.clone()))]), 109 | "dialout-connected" => Some(vec![DictValue(Value::Object(object.clone()))]), 110 | "dialout-answered" => Some(vec![DictValue(Value::Object(object.clone()))]), 111 | "dialout-error" => Some(vec![DictValue(Value::Object(object.clone()))]), 112 | "dialout-stopped" => Some(vec![DictValue(Value::Object(object.clone()))]), 113 | "dialout-warning" => Some(vec![DictValue(Value::Object(object.clone()))]), 114 | "error" => object 115 | .get("message") 116 | .map(|message| vec![DictValue(message.clone())]), 117 | "inputs-updated" => object 118 | .get("inputs") 119 | .map(|inputs| vec![DictValue(inputs.clone())]), 120 | "live-stream-error" => { 121 | if let Some(stream_id) = object.get("streamId") { 122 | object 123 | .get("message") 124 | .map(|message| vec![DictValue(stream_id.clone()), DictValue(message.clone())]) 125 | } else { 126 | None 127 | } 128 | } 129 | "live-stream-started" => object 130 | .get("status") 131 | .map(|status| vec![DictValue(status.clone())]), 132 | "live-stream-stopped" => object 133 | .get("streamId") 134 | .map(|stream_id| vec![DictValue(stream_id.clone())]), 135 | "live-stream-updated" => object 136 | .get("update") 137 | .map(|update| vec![DictValue(update.clone())]), 138 | "live-stream-warning" => { 139 | if let Some(stream_id) = object.get("streamId") { 140 | object 141 | .get("message") 142 | .map(|message| vec![DictValue(stream_id.clone()), DictValue(message.clone())]) 143 | } else { 144 | None 145 | } 146 | } 147 | "network-stats-updated" => Some(vec![DictValue(Value::Object(object.clone()))]), 148 | "participant-counts-updated" => Some(vec![DictValue(Value::Object(object.clone()))]), 149 | "participant-joined" => object 150 | .get("participant") 151 | .map(|participant| vec![DictValue(participant.clone())]), 152 | "participant-left" => { 153 | if let Some(participant) = object.get("participant") { 154 | object 155 | .get("leftReason") 156 | .map(|reason| vec![DictValue(participant.clone()), DictValue(reason.clone())]) 157 | } else { 158 | None 159 | } 160 | } 161 | "participant-updated" => object 162 | .get("participant") 163 | .map(|participant| vec![DictValue(participant.clone())]), 164 | "publishing-updated" => object 165 | .get("publishing") 166 | .map(|publishing| vec![DictValue(publishing.clone())]), 167 | "recording-error" => { 168 | if let Some(stream_id) = object.get("streamId") { 169 | object 170 | .get("message") 171 | .map(|message| vec![DictValue(stream_id.clone()), DictValue(message.clone())]) 172 | } else { 173 | None 174 | } 175 | } 176 | "recording-started" => object 177 | .get("status") 178 | .map(|status| vec![DictValue(status.clone())]), 179 | "recording-stopped" => object 180 | .get("streamId") 181 | .map(|stream_id| vec![DictValue(stream_id.clone())]), 182 | "subscription-profiles-updated" => object 183 | .get("profiles") 184 | .map(|profiles| vec![DictValue(profiles.clone())]), 185 | "subscriptions-updated" => object 186 | .get("subscriptions") 187 | .map(|subscriptions| vec![DictValue(subscriptions.clone())]), 188 | "transcription-error" => object 189 | .get("message") 190 | .map(|message| vec![DictValue(message.clone())]), 191 | "transcription-message" => Some(vec![DictValue(Value::Object(object.clone()))]), 192 | "transcription-started" => object 193 | .get("status") 194 | .map(|status| vec![DictValue(status.clone())]), 195 | "transcription-stopped" => { 196 | if let Some(updated_by) = object.get("updatedBy") { 197 | Some(vec![ 198 | DictValue(updated_by.clone()), 199 | DictValue(Value::Bool(false)), 200 | ]) 201 | } else { 202 | object.get("stoppedByError").map(|stopped_by_error| { 203 | vec![DictValue(Value::Null), DictValue(stopped_by_error.clone())] 204 | }) 205 | } 206 | } 207 | "transcription-updated" => object 208 | .get("update") 209 | .map(|update| vec![DictValue(update.clone())]), 210 | a => panic!("args for event {a} not supported"), 211 | } 212 | } 213 | 214 | pub(crate) fn completion_args_from_event( 215 | completion: &PyCallClientCompletion, 216 | event: &Event, 217 | ) -> Option> { 218 | let object = event.data.0.as_object().expect("event should be an object"); 219 | match event.action.as_str() { 220 | "request-completed" => { 221 | if let Some(request_success) = object.get("requestSuccess") { 222 | let args = match completion { 223 | PyCallClientCompletion::UnaryFn(_) => { 224 | vec![DictValue(Value::Null)] 225 | } 226 | PyCallClientCompletion::BinaryFn(_) => { 227 | vec![DictValue(request_success.clone()), DictValue(Value::Null)] 228 | } 229 | }; 230 | Some(args) 231 | } else if let Some(request_error) = object.get("requestError") { 232 | let args = request_error.get("msg").map(|msg| match completion { 233 | PyCallClientCompletion::UnaryFn(_) => vec![DictValue(msg.clone())], 234 | PyCallClientCompletion::BinaryFn(_) => { 235 | vec![DictValue(Value::Null), DictValue(msg.clone())] 236 | } 237 | }); 238 | Some(args.unwrap()) 239 | } else { 240 | let args = match completion { 241 | PyCallClientCompletion::UnaryFn(_) => { 242 | vec![DictValue(Value::Null)] 243 | } 244 | _ => panic!("completion binary functions should have an error or success"), 245 | }; 246 | Some(args) 247 | } 248 | } 249 | a => panic!("completion args for event {a} not supported"), 250 | } 251 | } 252 | 253 | pub(crate) fn update_inner_values( 254 | py: Python<'_>, 255 | delegate_ctx: &DelegateContext, 256 | event_action: &str, 257 | args: Vec, 258 | ) { 259 | match event_action { 260 | "active-speaker-changed" => { 261 | let mut active_speaker = delegate_ctx.inner.active_speaker.lock().unwrap(); 262 | *active_speaker = args.first().unwrap().to_object(py); 263 | } 264 | "inputs-updated" => { 265 | let mut inputs = delegate_ctx.inner.inputs.lock().unwrap(); 266 | *inputs = args.first().unwrap().to_object(py); 267 | } 268 | "network-stats-updated" => { 269 | let mut network_stats = delegate_ctx.inner.network_stats.lock().unwrap(); 270 | *network_stats = args.first().unwrap().to_object(py); 271 | } 272 | "participant-counts-updated" => { 273 | let mut participant_counts = delegate_ctx.inner.participant_counts.lock().unwrap(); 274 | *participant_counts = args.first().unwrap().to_object(py); 275 | } 276 | "publishing-updated" => { 277 | let mut publishing = delegate_ctx.inner.publishing.lock().unwrap(); 278 | *publishing = args.first().unwrap().to_object(py); 279 | } 280 | "subscription-profiles-updated" => { 281 | let mut profiles = delegate_ctx.inner.subscription_profiles.lock().unwrap(); 282 | *profiles = args.first().unwrap().to_object(py); 283 | } 284 | "subscriptions-updated" => { 285 | let mut subscriptions = delegate_ctx.inner.subscriptions.lock().unwrap(); 286 | *subscriptions = args.first().unwrap().to_object(py); 287 | } 288 | _ => (), 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /src/call_client/event_handler.rs: -------------------------------------------------------------------------------- 1 | #![allow(unused_variables)] 2 | 3 | use pyo3::prelude::*; 4 | use pyo3::types::PyTuple; 5 | 6 | /// This a base class for event handlers. Event handlers are used to handle 7 | /// events from the meeting, for example when a participant joins or leaves the 8 | /// meeting or when the active speaker changes. 9 | /// 10 | /// Event handlers are registered when creating a :class:`CallClient` and 11 | /// should be created as a subclass of this class. Since event handlers are 12 | /// created as a subclass, there is no need implement all the handler methods. 13 | #[derive(Clone, Debug)] 14 | #[pyclass(name = "EventHandler", module = "daily", subclass)] 15 | pub struct PyEventHandler; 16 | 17 | #[pymethods] 18 | impl PyEventHandler { 19 | // Since this is a base class it might be that subclasses have constructor 20 | // parameters. Constructor arguments would be passed to new() even if we 21 | // don't really need them. So, in order to accept any subclass arguments we 22 | // just use a *args extra positional arguments trick. 23 | #[new] 24 | #[pyo3(signature = (*args))] 25 | fn new(args: &Bound<'_, PyTuple>) -> PyResult { 26 | Ok(Self {}) 27 | } 28 | 29 | /// Event emitted when the active speaker of the call has changed. 30 | /// 31 | /// :param dict participant: See :ref:`Participant` 32 | fn on_active_speaker_changed(&self, participant: PyObject) -> PyResult<()> { 33 | Ok(()) 34 | } 35 | 36 | /// Event emitted when a custom app message is received from another 37 | /// participant or via the REST API. 38 | /// 39 | /// :param string message: Message received from a remote participant 40 | /// :param string sender: Sender of the message 41 | fn on_app_message(&self, message: PyObject, sender: PyObject) -> PyResult<()> { 42 | Ok(()) 43 | } 44 | 45 | /// Event emitted when an audio device is plugged or removed. 46 | /// 47 | /// :param dict available_devices: See :ref:`AvailableDevices` 48 | fn on_available_devices_updated(&self, available_devices: PyObject) -> PyResult<()> { 49 | Ok(()) 50 | } 51 | 52 | /// Event emitted when the call state changes, normally as a consequence of 53 | /// invocations to :func:`daily.CallClient.join` or 54 | /// :func:`daily.CallClient.leave` 55 | /// 56 | /// :param string state: See :ref:`CallState` 57 | fn on_call_state_updated(&self, state: PyObject) -> PyResult<()> { 58 | Ok(()) 59 | } 60 | 61 | /// Event emitted when the session with the dial-in remote end is 62 | /// established (i.e. SIP endpoint or PSTN are connectd to the Daily room). 63 | /// 64 | /// Note: connected does not mean media (audio or video) has started flowing 65 | /// between the room and PSTN, it means the room received the connection 66 | /// request and both endpoints are negotiating the media flow. 67 | /// 68 | /// :param Mapping[str, Any] data: See :ref:`DialinConnectedEvent` 69 | fn on_dialin_connected(&self, data: PyObject) -> PyResult<()> { 70 | Ok(()) 71 | } 72 | 73 | /// Event emitted in the case of dial-in errors which are fatal and the 74 | /// service cannot proceed. For example, an error in SDP negotiation is 75 | /// fatal to the media/SIP pipeline and will result in dialin-error being 76 | /// triggered. 77 | /// 78 | /// :param Mapping[str, Any] data: See :ref:`DialinEvent` 79 | fn on_dialin_error(&self, data: PyObject) -> PyResult<()> { 80 | Ok(()) 81 | } 82 | 83 | /// Event emitted when dial-in is ready. This happens after the room has 84 | /// connected to the SIP endpoint and the system is ready to receive dial-in 85 | /// calls. 86 | /// 87 | /// :param string sip_endpoint: The SIP endpoint the room has connected to 88 | fn on_dialin_ready(&self, sip_endpoint: PyObject) -> PyResult<()> { 89 | Ok(()) 90 | } 91 | 92 | /// Event emitted when the dial-in remote end disconnects the call. 93 | /// 94 | /// :param Mapping[str, Any] data: See :ref:`DialinStoppedEvent` 95 | fn on_dialin_stopped(&self, data: PyObject) -> PyResult<()> { 96 | Ok(()) 97 | } 98 | 99 | /// Event emitted there is a dial-in non-fatal error, such as the selected 100 | /// codec not being used and a fallback codec being utilized. 101 | /// 102 | /// :param Mapping[str, Any] data: See :ref:`DialinEvent` 103 | fn on_dialin_warning(&self, data: PyObject) -> PyResult<()> { 104 | Ok(()) 105 | } 106 | 107 | /// Event emitted when the session with the dial-out remote end is 108 | /// answered. 109 | /// 110 | /// :param Mapping[str, Any] data: See :ref:`DialoutEvent` 111 | fn on_dialout_answered(&self, data: PyObject) -> PyResult<()> { 112 | Ok(()) 113 | } 114 | 115 | /// Event emitted when the session with the dial-out remote end is 116 | /// established. 117 | /// 118 | /// :param Mapping[str, Any] data: See :ref:`DialoutEvent` 119 | fn on_dialout_connected(&self, data: PyObject) -> PyResult<()> { 120 | Ok(()) 121 | } 122 | 123 | /// Event emitted in the case of dial-out errors which are fatal and the 124 | /// service cannot proceed. For example, an error in SDP negotiation is 125 | /// fatal to the media/SIP pipeline and will result in dialout-error being 126 | /// triggered. 127 | /// 128 | /// :param Mapping[str, Any] data: See :ref:`DialoutEvent` 129 | fn on_dialout_error(&self, data: PyObject) -> PyResult<()> { 130 | Ok(()) 131 | } 132 | 133 | /// Event emitted when the dial-out remote end disconnects the call or the 134 | /// call is stopped by calling :func:`daily.CallClient.stop_dialout`. 135 | /// 136 | /// :param Mapping[str, Any] data: See :ref:`DialoutEvent` 137 | fn on_dialout_stopped(&self, data: PyObject) -> PyResult<()> { 138 | Ok(()) 139 | } 140 | 141 | /// Event emitted there is a dial-out non-fatal error, such as the selected 142 | /// codec not being used and a fallback codec being utilized. 143 | /// 144 | /// :param Mapping[str, Any] data: See :ref:`DialoutEvent` 145 | fn on_dialout_warning(&self, data: PyObject) -> PyResult<()> { 146 | Ok(()) 147 | } 148 | 149 | /// Event emitted when an error occurs. 150 | /// 151 | /// :param string message: The error message 152 | fn on_error(&self, message: PyObject) -> PyResult<()> { 153 | Ok(()) 154 | } 155 | 156 | /// Event emitted when the input settings are updated, normally as a 157 | /// consequence of invocations to :func:`daily.CallClient.join`, 158 | /// :func:`daily.CallClient.leave` or 159 | /// :func:`daily.CallClient.update_inputs`. 160 | /// 161 | /// :param dict inputs: See :ref:`InputSettings` 162 | fn on_inputs_updated(&self, input_settings: PyObject) -> PyResult<()> { 163 | Ok(()) 164 | } 165 | 166 | /// Event emitted for all participants when a live stream encounters an 167 | /// error. 168 | /// 169 | /// :param string stream_id: The ID of the live stream that generated the error 170 | /// :param string message: The error message 171 | fn on_live_stream_error(&self, stream_id: PyObject, message: PyObject) -> PyResult<()> { 172 | Ok(()) 173 | } 174 | 175 | /// Event emitted for all participants when a live stream starts. 176 | /// 177 | /// :param dict status: See :ref:`LiveStreamStatus` 178 | fn on_live_stream_started(&self, status: PyObject) -> PyResult<()> { 179 | Ok(()) 180 | } 181 | 182 | /// Event emitted for all participants when a live stream stops. 183 | /// 184 | /// :param string stream_id: The ID of the live stream that was stopped 185 | fn on_live_stream_stopped(&self, stream_id: PyObject) -> PyResult<()> { 186 | Ok(()) 187 | } 188 | 189 | /// Event emitted for all participants when a live stream is updated. 190 | /// 191 | /// :param Mapping[str, Any] update: See :ref:`LiveStreamUpdate` 192 | fn on_live_stream_updated(&self, update: PyObject) -> PyResult<()> { 193 | Ok(()) 194 | } 195 | 196 | /// Event emitted for all participants when a live stream encounters a 197 | /// warning. 198 | /// 199 | /// :param string stream_id: The ID of the live stream that generated the warning 200 | /// :param string message: The warning message 201 | fn on_live_stream_warning(&self, stream_id: PyObject, message: PyObject) -> PyResult<()> { 202 | Ok(()) 203 | } 204 | 205 | /// Event emitted when the logging & telemetry backend updates the network 206 | /// statistics. 207 | /// 208 | /// :param dict stats: See :ref:`NetworkStats` 209 | fn on_network_stats_updated(&self, stats: PyObject) -> PyResult<()> { 210 | Ok(()) 211 | } 212 | 213 | /// Event emitted when the participant count changes. 214 | /// 215 | /// :param dict stats: See :ref:`ParticipantCounts` 216 | fn on_participant_counts_updated(&self, counts: PyObject) -> PyResult<()> { 217 | Ok(()) 218 | } 219 | 220 | /// Event emitted when a participant joins the call. 221 | /// 222 | /// :param dict participant: See :ref:`Participant` 223 | fn on_participant_joined(&self, participant: PyObject) -> PyResult<()> { 224 | Ok(()) 225 | } 226 | 227 | /// Event emitted when a participant has left the call. 228 | /// 229 | /// :param dict participant: See :ref:`Participant` 230 | /// :param string reason: See :ref:`ParticipantLeftReason` 231 | fn on_participant_left(&self, participant: PyObject, reason: PyObject) -> PyResult<()> { 232 | Ok(()) 233 | } 234 | 235 | /// Event emitted when a participant is updated. This can mean either the 236 | /// participant's metadata was updated, or the tracks belonging to the 237 | /// participant changed. 238 | /// 239 | /// :param dict participant: See :ref:`Participant` 240 | fn on_participant_updated(&self, participant: PyObject) -> PyResult<()> { 241 | Ok(()) 242 | } 243 | 244 | /// Event emitted when the publishing settings are updated, normally as a 245 | /// consequence of invocations to :func:`daily.CallClient.join`, 246 | /// :func:`daily.CallClient.update_publishing`. 247 | /// 248 | /// :param dict publishing_settings: See :ref:`PublishingSettings` 249 | fn on_publishing_updated(&self, publishing_settings: PyObject) -> PyResult<()> { 250 | Ok(()) 251 | } 252 | 253 | /// Event emitted when a recording error occurs. 254 | /// 255 | /// :param string stream_id: The ID of the recording that generated the error 256 | /// :param string message: The error message 257 | fn on_recording_error(&self, stream_id: PyObject, message: PyObject) -> PyResult<()> { 258 | Ok(()) 259 | } 260 | 261 | /// Event emitted for all participants when a recording starts. 262 | /// 263 | /// :param dict status: See :ref:`RecordingStatus` 264 | fn on_recording_started(&self, status: PyObject) -> PyResult<()> { 265 | Ok(()) 266 | } 267 | 268 | /// Event emitted for all participants when a recording stops. 269 | /// 270 | /// :param string stream_id: The ID of the live stream that was stopped 271 | fn on_recording_stopped(&self, stream_id: PyObject) -> PyResult<()> { 272 | Ok(()) 273 | } 274 | 275 | /// Event emitted when the subscription profile settings are updated as a 276 | /// consequence of calls to 277 | /// :func:`daily.CallClient.update_subscription_profiles`. 278 | /// 279 | /// :param dict subscription_profiles: See :ref:`SubscriptionProfileSettings` 280 | fn on_subscription_profiles_updated(&self, subscription_profiles: PyObject) -> PyResult<()> { 281 | Ok(()) 282 | } 283 | 284 | /// Event emitted when the subscription settings are updated as a 285 | /// consequence of calls to :func:`daily.CallClient.update_subscriptions`. 286 | /// 287 | /// :param dict subscriptions: See :ref:`ParticipantSubscriptions` 288 | fn on_subscriptions_updated(&self, subscriptions: PyObject) -> PyResult<()> { 289 | Ok(()) 290 | } 291 | 292 | /// Event emitted when a transcription error occurs. 293 | /// 294 | /// :param string message: The error message 295 | fn on_transcription_error(&self, message: PyObject) -> PyResult<()> { 296 | Ok(()) 297 | } 298 | 299 | /// Event emitted when a transcription message is received. 300 | /// 301 | /// :param dict message: See :ref:`TranscriptionMessage` 302 | fn on_transcription_message(&self, message: PyObject) -> PyResult<()> { 303 | Ok(()) 304 | } 305 | 306 | /// Event emitted when transcription starts. 307 | /// 308 | /// :param dict status: See :ref:`TranscriptionStatus` 309 | fn on_transcription_started(&self, status: PyObject) -> PyResult<()> { 310 | Ok(()) 311 | } 312 | 313 | /// Event emitted when transcription stops. 314 | /// 315 | /// :param str stopped_by: The ID of the participant that stopped the transcription or None 316 | /// :param str stopped_by_error: Whether the transcription was stopped by an error 317 | fn on_transcription_stopped( 318 | &self, 319 | stopped_by: PyObject, 320 | stopped_by_error: PyObject, 321 | ) -> PyResult<()> { 322 | Ok(()) 323 | } 324 | 325 | /// Event emitted when transcription is updated. 326 | /// 327 | /// :param Mapping[str, Any] update: See :ref:`TranscriptionUpdated` 328 | fn on_transcription_updated(&self, update: PyObject) -> PyResult<()> { 329 | Ok(()) 330 | } 331 | } 332 | -------------------------------------------------------------------------------- /src/call_client/live_stream.rs: -------------------------------------------------------------------------------- 1 | use serde::Serialize; 2 | use serde_json::Value; 3 | 4 | #[derive(Debug, Serialize)] 5 | #[serde(tag = "preset")] 6 | pub enum LiveStreamEndpoints { 7 | #[serde(rename = "preconfigured")] 8 | PreConfigured { 9 | #[serde(rename = "preConfiguredEndpoints")] 10 | pre_configured_endpoints: Vec, 11 | }, 12 | #[serde(rename = "rtmpUrls")] 13 | RtmpUrls { 14 | #[serde(rename = "rtmpUrls")] 15 | rtmp_urls: Vec, 16 | }, 17 | } 18 | 19 | #[derive(Debug, Serialize)] 20 | #[serde(rename_all = "camelCase")] 21 | pub struct StartLiveStreamProperties { 22 | pub endpoints: LiveStreamEndpoints, 23 | #[serde(skip_serializing_if = "Option::is_none")] 24 | pub streaming_settings: Option, 25 | #[serde(skip_serializing_if = "Option::is_none")] 26 | pub stream_id: Option, 27 | #[serde(skip_serializing_if = "Option::is_none")] 28 | pub force_new: Option, 29 | } 30 | -------------------------------------------------------------------------------- /src/call_client/recording.rs: -------------------------------------------------------------------------------- 1 | use serde::Serialize; 2 | use serde_json::Value; 3 | 4 | #[derive(Debug, Serialize)] 5 | #[serde(rename_all = "camelCase")] 6 | pub struct StartRecordingProperties { 7 | pub streaming_settings: Option, 8 | pub instance_id: Option, 9 | pub force_new: Option, 10 | } 11 | -------------------------------------------------------------------------------- /src/context.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::CString; 2 | use std::str::FromStr; 3 | use std::sync::atomic::{AtomicU64, Ordering}; 4 | 5 | use crate::PyNativeVad; 6 | use crate::PyVirtualCameraDevice; 7 | use crate::PyVirtualMicrophoneDevice; 8 | use crate::PyVirtualSpeakerDevice; 9 | 10 | use webrtc_daily::sys::{ 11 | color_format::ColorFormat, device_manager::NativeDeviceManager, vad::NativeWebrtcVad, 12 | virtual_camera_device::NativeVirtualCameraDevice, 13 | virtual_microphone_device::NativeVirtualMicrophoneDevice, 14 | virtual_speaker_device::NativeVirtualSpeakerDevice, 15 | }; 16 | 17 | use daily_core::prelude::{ 18 | daily_core_context_create_audio_device_module, daily_core_context_create_device_manager, 19 | daily_core_context_create_vad, daily_core_context_create_virtual_camera_device, 20 | daily_core_context_create_virtual_microphone_device, 21 | daily_core_context_create_virtual_speaker_device, 22 | daily_core_context_device_manager_enumerated_devices, 23 | daily_core_context_device_manager_get_user_media, 24 | daily_core_context_get_selected_microphone_device, daily_core_context_select_speaker_device, 25 | WebrtcAudioDeviceModule, WebrtcPeerConnectionFactory, WebrtcTaskQueueFactory, WebrtcThread, 26 | }; 27 | 28 | use pyo3::exceptions; 29 | use pyo3::prelude::*; 30 | 31 | lazy_static! { 32 | pub(crate) static ref GLOBAL_CONTEXT: DailyContext = DailyContext::new(); 33 | } 34 | 35 | pub(crate) struct DailyContext { 36 | request_id: AtomicU64, 37 | device_manager: NativeDeviceManager, 38 | } 39 | 40 | impl DailyContext { 41 | #[allow(clippy::new_without_default)] 42 | pub fn new() -> Self { 43 | let device_manager_ptr = unsafe { daily_core_context_create_device_manager() }; 44 | 45 | let device_manager = NativeDeviceManager::from(device_manager_ptr as *mut _); 46 | 47 | Self { 48 | device_manager, 49 | request_id: AtomicU64::new(0), 50 | } 51 | } 52 | 53 | pub fn next_request_id(&self) -> u64 { 54 | self.request_id.fetch_add(1, Ordering::SeqCst) 55 | } 56 | 57 | pub fn get_enumerated_devices(&self) -> *mut libc::c_char { 58 | const EMPTY: &[u8] = b"[]\0"; 59 | 60 | let devices = unsafe { 61 | daily_core_context_device_manager_enumerated_devices( 62 | self.device_manager.as_ptr() as *const _ 63 | ) 64 | }; 65 | 66 | if devices.is_null() { 67 | EMPTY.as_ptr().cast_mut() as *mut _ 68 | } else { 69 | // NOTE(aleix): Leaking because get_enumerated_devices() uses CStr. 70 | devices as *mut _ 71 | } 72 | } 73 | 74 | #[allow(clippy::missing_safety_doc)] 75 | pub unsafe fn get_user_media( 76 | &self, 77 | peer_connection_factory: *mut WebrtcPeerConnectionFactory, 78 | signaling_thread: *mut WebrtcThread, 79 | worker_thread: *mut WebrtcThread, 80 | network_thread: *mut WebrtcThread, 81 | constraints: *const libc::c_char, 82 | ) -> *mut libc::c_void { 83 | unsafe { 84 | daily_core_context_device_manager_get_user_media( 85 | self.device_manager.as_ptr() as *mut _, 86 | peer_connection_factory, 87 | signaling_thread, 88 | worker_thread, 89 | network_thread, 90 | constraints, 91 | ) 92 | } 93 | } 94 | 95 | #[allow(clippy::missing_safety_doc)] 96 | pub unsafe fn create_audio_device_module( 97 | &self, 98 | task_queue_factory: *mut WebrtcTaskQueueFactory, 99 | ) -> *mut WebrtcAudioDeviceModule { 100 | unsafe { 101 | daily_core_context_create_audio_device_module( 102 | self.device_manager.as_ptr() as *mut _, 103 | task_queue_factory, 104 | ) 105 | } 106 | } 107 | 108 | pub fn create_camera_device( 109 | &self, 110 | device_name: &str, 111 | width: u32, 112 | height: u32, 113 | color_format: &str, 114 | ) -> PyResult { 115 | let device_name_cstr = 116 | CString::new(device_name).expect("invalid virtual camera device name string"); 117 | let color_format_cstr = CString::new(color_format).expect("invalid color format string"); 118 | 119 | if let Ok(color_format) = ColorFormat::from_str(color_format) { 120 | let mut py_device = 121 | PyVirtualCameraDevice::new(device_name, width, height, color_format); 122 | 123 | unsafe { 124 | let camera_device = daily_core_context_create_virtual_camera_device( 125 | self.device_manager.as_ptr() as *mut _, 126 | device_name_cstr.as_ptr(), 127 | width, 128 | height, 129 | color_format_cstr.as_ptr(), 130 | ); 131 | 132 | py_device 133 | .attach_camera_device(NativeVirtualCameraDevice::from(camera_device as *mut _)); 134 | } 135 | 136 | Ok(py_device) 137 | } else { 138 | Err(exceptions::PyValueError::new_err(format!( 139 | "invalid color format '{color_format}'" 140 | ))) 141 | } 142 | } 143 | 144 | pub fn create_speaker_device( 145 | &self, 146 | device_name: &str, 147 | sample_rate: u32, 148 | channels: u8, 149 | non_blocking: bool, 150 | ) -> PyResult { 151 | let device_name_cstr = 152 | CString::new(device_name).expect("invalid virtual speaker device name string"); 153 | 154 | let mut py_device = 155 | PyVirtualSpeakerDevice::new(device_name, sample_rate, channels, non_blocking); 156 | 157 | unsafe { 158 | let speaker_device = daily_core_context_create_virtual_speaker_device( 159 | self.device_manager.as_ptr() as *mut _, 160 | device_name_cstr.as_ptr(), 161 | sample_rate, 162 | channels, 163 | non_blocking, 164 | ); 165 | 166 | py_device.attach_audio_device(NativeVirtualSpeakerDevice::from(speaker_device)); 167 | } 168 | 169 | Ok(py_device) 170 | } 171 | 172 | pub fn create_microphone_device( 173 | &self, 174 | device_name: &str, 175 | sample_rate: u32, 176 | channels: u8, 177 | non_blocking: bool, 178 | ) -> PyResult { 179 | let device_name_cstr = 180 | CString::new(device_name).expect("invalid virtual microphone device name string"); 181 | 182 | let mut py_device = PyVirtualMicrophoneDevice::new(device_name, sample_rate, channels); 183 | 184 | unsafe { 185 | let microphone_device = daily_core_context_create_virtual_microphone_device( 186 | self.device_manager.as_ptr() as *mut _, 187 | device_name_cstr.as_ptr(), 188 | sample_rate, 189 | channels, 190 | non_blocking, 191 | ); 192 | 193 | py_device.attach_audio_device(NativeVirtualMicrophoneDevice::from(microphone_device)); 194 | } 195 | 196 | Ok(py_device) 197 | } 198 | 199 | pub fn create_native_vad( 200 | &self, 201 | reset_period_ms: u32, 202 | sample_rate: u32, 203 | channels: u8, 204 | ) -> PyResult { 205 | let mut py_vad = PyNativeVad::new(reset_period_ms, sample_rate, channels); 206 | 207 | unsafe { 208 | let webrtc_vad = daily_core_context_create_vad(reset_period_ms, sample_rate, channels); 209 | 210 | py_vad.attach_webrtc_vad(NativeWebrtcVad::from(webrtc_vad)); 211 | } 212 | 213 | Ok(py_vad) 214 | } 215 | 216 | pub fn select_speaker_device(&self, device_name: &str) -> PyResult<()> { 217 | let device_name_cstr = 218 | CString::new(device_name).expect("invalid virtual speaker device name string"); 219 | 220 | let selected = unsafe { 221 | daily_core_context_select_speaker_device( 222 | self.device_manager.as_ptr() as *mut _, 223 | device_name_cstr.as_ptr(), 224 | ) 225 | }; 226 | 227 | if selected { 228 | Ok(()) 229 | } else { 230 | Err(exceptions::PyRuntimeError::new_err( 231 | "unable to select virtual speaker device", 232 | )) 233 | } 234 | } 235 | 236 | pub fn get_selected_microphone_device(&self) -> *const libc::c_char { 237 | const EMPTY: &[u8] = b"\0"; 238 | 239 | let device = unsafe { 240 | daily_core_context_get_selected_microphone_device( 241 | self.device_manager.as_ptr() as *const _ 242 | ) 243 | }; 244 | 245 | if device.is_null() { 246 | EMPTY.as_ptr().cast() 247 | } else { 248 | let microphone = NativeVirtualMicrophoneDevice::from(device); 249 | microphone.name() 250 | } 251 | } 252 | } 253 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate lazy_static; 3 | 4 | pub(crate) mod call_client; 5 | pub(crate) mod context; 6 | pub(crate) mod media; 7 | pub(crate) mod util; 8 | 9 | use call_client::{PyCallClient, PyEventHandler}; 10 | use context::GLOBAL_CONTEXT; 11 | use media::{ 12 | PyAudioData, PyCustomAudioSource, PyCustomAudioTrack, PyNativeVad, PyVideoFrame, 13 | PyVirtualCameraDevice, PyVirtualMicrophoneDevice, PyVirtualSpeakerDevice, 14 | }; 15 | 16 | use std::env; 17 | use std::ffi::CString; 18 | use std::ptr; 19 | 20 | use daily_core::prelude::{ 21 | daily_core_context_create_with_threads, daily_core_context_destroy, daily_core_set_log_level, 22 | LogLevel, NativeAboutClient, NativeContextDelegate, NativeContextDelegatePtr, 23 | NativeRawWebRtcContextDelegate, NativeWebRtcContextDelegate, NativeWebRtcContextDelegateFns, 24 | NativeWebRtcContextDelegatePtr, WebrtcAudioDeviceModule, WebrtcPeerConnectionFactory, 25 | WebrtcTaskQueueFactory, WebrtcThread, 26 | }; 27 | 28 | use pyo3::prelude::*; 29 | 30 | const DAILY_PYTHON_NAME: &str = "daily-python"; 31 | const DAILY_PYTHON_VERSION: &str = env!("CARGO_PKG_VERSION"); 32 | 33 | unsafe extern "C" fn set_audio_device( 34 | _delegate: *mut libc::c_void, 35 | _device_id: *const libc::c_char, 36 | ) { 37 | // Probably nothing to do here since our microphone device is already 38 | // properly selected during getUserMedia. 39 | } 40 | 41 | unsafe extern "C" fn get_audio_device(_delegate: *mut libc::c_void) -> *const libc::c_char { 42 | GLOBAL_CONTEXT.get_selected_microphone_device() 43 | } 44 | 45 | unsafe extern "C" fn get_enumerated_devices(_delegate: *mut libc::c_void) -> *mut libc::c_char { 46 | GLOBAL_CONTEXT.get_enumerated_devices() 47 | } 48 | 49 | unsafe extern "C" fn get_user_media( 50 | _delegate: *mut libc::c_void, 51 | peer_connection_factory: *mut WebrtcPeerConnectionFactory, 52 | signaling_thread: *mut WebrtcThread, 53 | worker_thread: *mut WebrtcThread, 54 | network_thread: *mut WebrtcThread, 55 | constraints: *const libc::c_char, 56 | ) -> *mut libc::c_void { 57 | GLOBAL_CONTEXT.get_user_media( 58 | peer_connection_factory, 59 | signaling_thread, 60 | worker_thread, 61 | network_thread, 62 | constraints, 63 | ) 64 | } 65 | 66 | unsafe extern "C" fn create_audio_device_module( 67 | _delegate: *mut NativeRawWebRtcContextDelegate, 68 | task_queue_factory: *mut WebrtcTaskQueueFactory, 69 | ) -> *mut WebrtcAudioDeviceModule { 70 | GLOBAL_CONTEXT.create_audio_device_module(task_queue_factory) 71 | } 72 | 73 | /// This class is used to initialize the SDK and create virtual devices. 74 | #[pyclass(name = "Daily", module = "daily")] 75 | struct PyDaily; 76 | 77 | #[pymethods] 78 | impl PyDaily { 79 | /// Initializes the SDK. This function needs to be called before anything 80 | /// else, usually done at the application startup. 81 | /// 82 | /// :param int worker_threads: Number of internal worker threads. Increasing this number might be necessary if the application needs to create a large number of concurrent call clients 83 | #[staticmethod] 84 | #[pyo3(signature = (worker_threads = 2))] 85 | pub fn init(worker_threads: usize) { 86 | unsafe { 87 | daily_core_set_log_level(LogLevel::Off); 88 | } 89 | 90 | let library_cstr = CString::new(DAILY_PYTHON_NAME).expect("invalid library string"); 91 | let version_cstr = CString::new(DAILY_PYTHON_VERSION).expect("invalid version string"); 92 | let os_cstr = CString::new(env::consts::OS).expect("invalid OS string"); 93 | 94 | let about_client = NativeAboutClient::new( 95 | library_cstr.as_ptr(), 96 | version_cstr.as_ptr(), 97 | os_cstr.as_ptr(), 98 | ptr::null(), 99 | library_cstr.as_ptr(), // TODO replace with app name when implementing Banuba 100 | ); 101 | 102 | let context_delegate = 103 | NativeContextDelegate::new(NativeContextDelegatePtr::new(ptr::null_mut())); 104 | 105 | let webrtc_delegate = NativeWebRtcContextDelegate::new( 106 | NativeWebRtcContextDelegatePtr::new(ptr::null_mut()), 107 | NativeWebRtcContextDelegateFns::new( 108 | get_user_media, 109 | None, 110 | get_enumerated_devices, 111 | Some(create_audio_device_module), 112 | None, 113 | None, 114 | None, 115 | None, 116 | get_audio_device, 117 | set_audio_device, 118 | None, 119 | ), 120 | ); 121 | 122 | daily_core_context_create_with_threads( 123 | context_delegate, 124 | webrtc_delegate, 125 | about_client, 126 | worker_threads, 127 | ); 128 | } 129 | 130 | /// Deallocates SDK resources. This is usually called when shutting down the 131 | /// application. 132 | #[staticmethod] 133 | pub fn deinit() { 134 | // TODO(aleix): We need to make sure all clients leave before doing this 135 | // otherwise we might crash. 136 | unsafe { daily_core_context_destroy() }; 137 | } 138 | 139 | /// Creates a new virtual camera device. Camera devices are used to 140 | /// send video (i.e. video frames) into the meeting. 141 | /// 142 | /// :param str device_name: The virtual camera device name 143 | /// :param int width: Resolution width 144 | /// :param int height: Resolution height 145 | /// :param str color_format: The color format of the frames that will be written to the camera device. See :ref:`ColorFormat` 146 | /// 147 | /// :return: A new virtual camera device 148 | /// :rtype: :class:`VirtualCameraDevice` 149 | #[staticmethod] 150 | #[pyo3(signature = (device_name, width, height, color_format = "RGBA"))] 151 | pub fn create_camera_device( 152 | device_name: &str, 153 | width: u32, 154 | height: u32, 155 | color_format: &str, 156 | ) -> PyResult { 157 | GLOBAL_CONTEXT.create_camera_device(device_name, width, height, color_format) 158 | } 159 | 160 | /// Creates a new virtual speaker device. Speaker devices are used to 161 | /// receive audio (i.e. read audio frames) from the meeting. 162 | /// 163 | /// Virtual speaker devices emulate a hardware device and have the 164 | /// constraint that only one speaker can be active per process. You can 165 | /// select the active speaker with :func:`select_speaker_device`. 166 | /// 167 | /// :param str device_name: The virtual speaker device name 168 | /// :param int sample_rate: Sample rate 169 | /// :param int channels: Number of channels (2 for stereo, 1 for mono) 170 | /// :param bool non_blocking: Whether the speaker will be blocking or non-blocking 171 | /// 172 | /// :return: A new virtual speaker device 173 | /// :rtype: :class:`VirtualSpeakerDevice` 174 | #[staticmethod] 175 | #[pyo3(signature = (device_name, sample_rate = 16000, channels = 1, non_blocking = false))] 176 | pub fn create_speaker_device( 177 | device_name: &str, 178 | sample_rate: u32, 179 | channels: u8, 180 | non_blocking: bool, 181 | ) -> PyResult { 182 | GLOBAL_CONTEXT.create_speaker_device(device_name, sample_rate, channels, non_blocking) 183 | } 184 | 185 | /// Creates a new virtual microphone device. Microphone devices are used to 186 | /// send audio (i.e. write audio frames) to the meeting. 187 | /// 188 | /// Microphone devices are selected with :func:`CallClient.update_inputs`. 189 | /// 190 | /// Virtual microphone devices emulate a hardware device and have the 191 | /// constraint that only one microphone can be active per process However, 192 | /// it is possible to use a custom microphone audio track when specifying 193 | /// the call client input settings. 194 | /// 195 | /// :param str device_name: The virtual microphone device name. This can be used as a `deviceId` when configuring the call client inputs 196 | /// :param int sample_rate: Sample rate 197 | /// :param int channels: Number of channels (2 for stereo, 1 for mono) 198 | /// :param bool non_blocking: Whether the microphone will be blocking or non-blocking 199 | /// 200 | /// :return: A new virtual microphone device 201 | /// :rtype: :class:`VirtualMicrophoneDevice` 202 | #[staticmethod] 203 | #[pyo3(signature = (device_name, sample_rate = 16000, channels = 1, non_blocking = false))] 204 | pub fn create_microphone_device( 205 | device_name: &str, 206 | sample_rate: u32, 207 | channels: u8, 208 | non_blocking: bool, 209 | ) -> PyResult { 210 | GLOBAL_CONTEXT.create_microphone_device(device_name, sample_rate, channels, non_blocking) 211 | } 212 | 213 | /// Selects one of the previously created virtual speaker devices to be the 214 | /// main system speaker. Note that there can only be one speaker selected at 215 | /// a time. Also, if there are multiple participants in the meeting, the 216 | /// audio from all the participants will be mixed and that's the audio that 217 | /// is received in the speaker. 218 | /// 219 | /// :param str device_name: The name of the virtual speaker device to select 220 | #[staticmethod] 221 | pub fn select_speaker_device(device_name: &str) -> PyResult<()> { 222 | GLOBAL_CONTEXT.select_speaker_device(device_name) 223 | } 224 | 225 | /// Creates a new VAD analyzer. VADs are used to detect speech from an audio 226 | /// stream. 227 | /// 228 | /// :param int reset_period_ms: The period in milliseconds after the VAD is internally reset 229 | /// :param int sample_rate: Sample rate of the incoming audio frames 230 | /// :param int channels: Number of channels (2 for stereo, 1 for mono) of the incoming audio frames 231 | /// 232 | /// :return: A new VAD 233 | /// :rtype: :class:`NativeVad` 234 | #[staticmethod] 235 | #[pyo3(signature = (reset_period_ms = 1000, sample_rate = 16000, channels = 1))] 236 | pub fn create_native_vad( 237 | reset_period_ms: u32, 238 | sample_rate: u32, 239 | channels: u8, 240 | ) -> PyResult { 241 | GLOBAL_CONTEXT.create_native_vad(reset_period_ms, sample_rate, channels) 242 | } 243 | } 244 | 245 | /// A Python module implemented in Rust. 246 | #[pymodule] 247 | fn daily(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { 248 | m.add_class::()?; 249 | m.add_class::()?; 250 | m.add_class::()?; 251 | m.add_class::()?; 252 | m.add_class::()?; 253 | m.add_class::()?; 254 | m.add_class::()?; 255 | m.add_class::()?; 256 | m.add_class::()?; 257 | m.add_class::()?; 258 | m.add_class::()?; 259 | Ok(()) 260 | } 261 | -------------------------------------------------------------------------------- /src/media.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod audio_data; 2 | pub(crate) mod custom_audio_source; 3 | pub(crate) mod custom_audio_track; 4 | pub(crate) mod native_vad; 5 | pub(crate) mod video_frame; 6 | pub(crate) mod virtual_camera_device; 7 | pub(crate) mod virtual_microphone_device; 8 | pub(crate) mod virtual_speaker_device; 9 | 10 | pub(crate) use audio_data::PyAudioData; 11 | pub(crate) use custom_audio_source::PyCustomAudioSource; 12 | pub(crate) use custom_audio_track::PyCustomAudioTrack; 13 | pub(crate) use native_vad::PyNativeVad; 14 | pub(crate) use video_frame::PyVideoFrame; 15 | pub(crate) use virtual_camera_device::PyVirtualCameraDevice; 16 | pub(crate) use virtual_microphone_device::PyVirtualMicrophoneDevice; 17 | pub(crate) use virtual_speaker_device::PyVirtualSpeakerDevice; 18 | -------------------------------------------------------------------------------- /src/media/audio_data.rs: -------------------------------------------------------------------------------- 1 | use pyo3::prelude::*; 2 | 3 | /// This class represents received audio data. It contains a bytestring with the 4 | /// audio frames and other attributes such as bits per sample and sample rate. 5 | #[pyclass(name = "AudioData", module = "daily", get_all)] 6 | pub struct PyAudioData { 7 | /// The bits per sample of the audio data 8 | pub bits_per_sample: u32, 9 | /// The sample rate 10 | pub sample_rate: u32, 11 | /// The number of audio channels 12 | pub num_channels: usize, 13 | /// The number of audio frames 14 | pub num_audio_frames: usize, 15 | /// A bytestring with the audio frames 16 | pub audio_frames: PyObject, 17 | } 18 | -------------------------------------------------------------------------------- /src/media/custom_audio_source.rs: -------------------------------------------------------------------------------- 1 | use std::sync::atomic::{AtomicU64, Ordering}; 2 | use std::{collections::HashMap, sync::Mutex}; 3 | 4 | use crate::util::memory::AlignedI16Data; 5 | 6 | use daily_core::prelude::*; 7 | 8 | use webrtc_daily::sys::custom_audio_source::NativeDailyAudioSource; 9 | 10 | use pyo3::exceptions; 11 | use pyo3::prelude::*; 12 | use pyo3::types::{PyBytes, PyTuple}; 13 | 14 | /// This class represents a custom audio source. Custom audio sources are used 15 | /// to send audio to an audio track. See 16 | /// :func:`daily.CallClient.add_custom_audio_track`. 17 | /// 18 | /// The audio format used by custom audio sources is 16-bit linear PCM. 19 | #[pyclass(name = "CustomAudioSource", module = "daily")] 20 | pub struct PyCustomAudioSource { 21 | pub sample_rate: u32, 22 | pub channels: u8, 23 | pub audio_source: NativeDailyAudioSource, 24 | request_id: AtomicU64, 25 | completions: Mutex>, 26 | } 27 | 28 | impl PyCustomAudioSource { 29 | fn maybe_register_completion(&mut self, completion: Option) -> u64 { 30 | let request_id = self.request_id.fetch_add(1, Ordering::SeqCst); 31 | 32 | if let Some(completion) = completion { 33 | self.completions 34 | .lock() 35 | .unwrap() 36 | .insert(request_id, completion); 37 | } 38 | 39 | request_id 40 | } 41 | } 42 | 43 | #[pymethods] 44 | impl PyCustomAudioSource { 45 | #[new] 46 | pub fn new(sample_rate: u32, channels: u8) -> Self { 47 | let audio_source_ptr = unsafe { daily_core_context_create_custom_audio_source() }; 48 | 49 | let audio_source = NativeDailyAudioSource::from(audio_source_ptr); 50 | 51 | Self { 52 | sample_rate, 53 | channels, 54 | audio_source, 55 | request_id: AtomicU64::new(0), 56 | completions: Mutex::new(HashMap::new()), 57 | } 58 | } 59 | 60 | /// Returns the sample rate of this audio source (e.g. 16000). 61 | /// 62 | /// :return: The sample rate 63 | /// :rtype: int 64 | #[getter] 65 | fn sample_rate(&self) -> u32 { 66 | self.sample_rate 67 | } 68 | 69 | /// Returns the number of channels (2 for stereo and 1 for mono) of this 70 | /// audio source. 71 | /// 72 | /// :return: The number of channels 73 | /// :rtype: int 74 | #[getter] 75 | fn channels(&self) -> u8 { 76 | self.channels 77 | } 78 | 79 | /// Writes audio frames to the audio source. The frames will be sent to the 80 | /// audio track if this source is attached to a track. 81 | /// 82 | /// This function blocks, if a `completion` callback is not provided, until 83 | /// the audio frames have been written. If a `completion` callback is 84 | /// provided this function is non-blocking and the `completion` callback 85 | /// will be called when the audio frames are written. 86 | /// 87 | /// :param bytestring frames: A bytestring with the audio frames to write 88 | /// :param func completion: An optional completion callback with one parameter: (int) 89 | /// 90 | /// :return: The number of audio frames written 91 | /// :rtype: int 92 | #[pyo3(signature = (frames, completion=None))] 93 | pub fn write_frames( 94 | &mut self, 95 | frames: &Bound<'_, PyBytes>, 96 | completion: Option, 97 | ) -> PyResult { 98 | let num_bytes = frames.len()?; 99 | let bytes_per_sample: usize = 2; 100 | 101 | if num_bytes % (bytes_per_sample * self.channels as usize) != 0 { 102 | return Err(exceptions::PyValueError::new_err( 103 | "frames bytestring should contain 16-bit samples", 104 | )); 105 | } 106 | 107 | let num_frames = (num_bytes / bytes_per_sample) / self.channels as usize; 108 | 109 | let bytes = frames.as_bytes(); 110 | let aligned = AlignedI16Data::new(bytes); 111 | 112 | let request_id = self.maybe_register_completion(completion.clone()); 113 | 114 | Python::with_gil(|py| { 115 | let frames_written = py.allow_threads(move || unsafe { 116 | if completion.is_none() { 117 | daily_core_context_custom_audio_source_write_frames_sync( 118 | self.audio_source.as_ptr() as *mut _, 119 | aligned.as_ptr() as *const _, 120 | (bytes_per_sample * 8) as i32, 121 | self.sample_rate as i32, 122 | self.channels as usize, 123 | num_frames, 124 | ) 125 | } else { 126 | daily_core_context_custom_audio_source_write_frames_async( 127 | self.audio_source.as_ptr() as *mut _, 128 | aligned.as_ptr() as *const _, 129 | (bytes_per_sample * 8) as i32, 130 | self.sample_rate as i32, 131 | self.channels as usize, 132 | num_frames, 133 | request_id, 134 | on_write_frames, 135 | self as *const PyCustomAudioSource as *mut libc::c_void, 136 | ) 137 | } 138 | }); 139 | 140 | if frames_written >= 0 { 141 | Ok(frames_written.into_py(py)) 142 | } else { 143 | Err(exceptions::PyIOError::new_err( 144 | "error writing audio frames to audio source", 145 | )) 146 | } 147 | }) 148 | } 149 | } 150 | 151 | pub(crate) unsafe extern "C" fn on_write_frames( 152 | source: *mut libc::c_void, 153 | request_id: u64, 154 | num_frames: usize, 155 | ) { 156 | let audio_source: &mut PyCustomAudioSource = 157 | unsafe { &mut *(source as *mut PyCustomAudioSource) }; 158 | 159 | Python::with_gil(|py| { 160 | let completion = audio_source.completions.lock().unwrap().remove(&request_id); 161 | 162 | let args = PyTuple::new_bound(py, &[num_frames.into_py(py)]); 163 | 164 | if let Some(completion) = completion { 165 | if let Err(error) = completion.call1(py, args) { 166 | error.write_unraisable_bound(py, None); 167 | } 168 | } 169 | }) 170 | } 171 | -------------------------------------------------------------------------------- /src/media/custom_audio_track.rs: -------------------------------------------------------------------------------- 1 | use daily_core::prelude::*; 2 | 3 | use webrtc_daily::media_stream::MediaStreamTrack; 4 | 5 | use pyo3::prelude::*; 6 | use webrtc_daily::sys::webrtc::MediaStreamTrackInterface; 7 | use webrtc_daily::sys::ScopedRefPtr; 8 | 9 | use super::PyCustomAudioSource; 10 | 11 | /// This class represents a custom audio track. Custom audio tracks need a 12 | /// :class:`CustomAudioSource` to write audio frames. 13 | /// 14 | /// Custom audio tracks can be used to send additional custom tracks or as the 15 | /// main microphone track. 16 | #[pyclass(name = "CustomAudioTrack", module = "daily")] 17 | pub struct PyCustomAudioTrack { 18 | pub audio_track: MediaStreamTrack, 19 | } 20 | 21 | #[pymethods] 22 | impl PyCustomAudioTrack { 23 | #[new] 24 | pub fn new(audio_source: &PyCustomAudioSource) -> Self { 25 | let audio_track = unsafe { 26 | daily_core_context_create_custom_audio_track( 27 | audio_source.audio_source.as_ptr() as *mut _ 28 | ) 29 | }; 30 | 31 | let audio_track = audio_track as *mut MediaStreamTrackInterface; 32 | 33 | let audio_track = 34 | unsafe { MediaStreamTrack::from(ScopedRefPtr::from_retained(audio_track)) }; 35 | 36 | Self { audio_track } 37 | } 38 | 39 | /// Returns the track id. 40 | /// 41 | /// :return: The track id 42 | /// :rtype: str 43 | #[getter] 44 | fn id(&self) -> String { 45 | self.audio_track.id() 46 | } 47 | } 48 | 49 | impl Drop for PyCustomAudioTrack { 50 | fn drop(&mut self) { 51 | unsafe { 52 | daily_core_context_destroy_custom_audio_track(self.audio_track.as_mut_ptr() as *mut _) 53 | }; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/media/native_vad.rs: -------------------------------------------------------------------------------- 1 | use crate::util::memory::AlignedI16Data; 2 | 3 | use webrtc_daily::sys::vad::NativeWebrtcVad; 4 | 5 | use daily_core::prelude::daily_core_context_vad_analyze; 6 | 7 | use pyo3::exceptions; 8 | use pyo3::prelude::*; 9 | use pyo3::types::PyBytes; 10 | 11 | /// This class represents a Voice Activity Detection (VAD) analyzer. VADs are 12 | /// used to detect speech on an audio stream. 13 | /// 14 | /// This VAD implementation works by analyzing 10ms audio frames at a time 15 | /// returning a confidence probability. It is possible to build a more 16 | /// sophisticated VAD (e.g. one that detects long sentences) on top of this one. 17 | /// 18 | /// The audio format used by this VAD is 16-bit linear PCM. 19 | #[pyclass(name = "NativeVad", module = "daily")] 20 | pub struct PyNativeVad { 21 | reset_period_ms: u32, 22 | sample_rate: u32, 23 | channels: u8, 24 | webrtc_vad: Option, 25 | } 26 | 27 | impl PyNativeVad { 28 | pub fn new(reset_period_ms: u32, sample_rate: u32, channels: u8) -> Self { 29 | Self { 30 | reset_period_ms, 31 | sample_rate, 32 | channels, 33 | webrtc_vad: None, 34 | } 35 | } 36 | 37 | pub fn attach_webrtc_vad(&mut self, webrtc_vad: NativeWebrtcVad) { 38 | self.webrtc_vad = Some(webrtc_vad); 39 | } 40 | } 41 | 42 | #[pymethods] 43 | impl PyNativeVad { 44 | /// Returns the number of milliseconds after which the internal VAD is 45 | /// reset. It should be at least 20ms. 46 | /// 47 | /// :return: The sample rate 48 | /// :rtype: int 49 | #[getter] 50 | fn reset_period_ms(&self) -> u32 { 51 | self.reset_period_ms 52 | } 53 | 54 | /// Returns the sample rate of incoming audio frames for this VAD 55 | /// (e.g. 16000). 56 | /// 57 | /// :return: The sample rate 58 | /// :rtype: int 59 | #[getter] 60 | fn sample_rate(&self) -> u32 { 61 | self.sample_rate 62 | } 63 | 64 | /// Returns the number of channels (2 for stereo and 1 for mono) of incoming 65 | /// audio frames for this VAD. 66 | /// 67 | /// :return: The number of channels 68 | /// :rtype: int 69 | #[getter] 70 | fn channels(&self) -> u8 { 71 | self.channels 72 | } 73 | 74 | /// Analyzes 10ms of audio frames and returns the confidence probability 75 | /// that speech was detected. If more than 10ms of audio frames are given, 76 | /// only the first 10ms will be used. 77 | /// 78 | /// :return: The probability (from 0 to 1.0) that speech was detected 79 | /// :rtype: float 80 | fn analyze_frames(&self, frames: &Bound<'_, PyBytes>) -> PyResult { 81 | let num_bytes = frames.len()?; 82 | let bytes_per_sample = 2; 83 | 84 | // libwebrtc needs 16-bit linear PCM samples 85 | if num_bytes % bytes_per_sample != 0 { 86 | return Err(exceptions::PyValueError::new_err( 87 | "frames bytestring should contain 16-bit samples", 88 | )); 89 | } 90 | 91 | let num_frames = (num_bytes / bytes_per_sample) / self.channels as usize; 92 | 93 | let bytes = frames.as_bytes(); 94 | let aligned = AlignedI16Data::new(bytes); 95 | 96 | let confidence = Python::with_gil(|py| { 97 | py.allow_threads(move || unsafe { 98 | daily_core_context_vad_analyze( 99 | self.webrtc_vad.as_ref().unwrap().as_ptr() as *mut _, 100 | aligned.as_ptr(), 101 | num_frames, 102 | ) 103 | }) 104 | }); 105 | 106 | Ok(confidence) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/media/video_frame.rs: -------------------------------------------------------------------------------- 1 | use pyo3::prelude::*; 2 | 3 | /// This class represents a received video frame. It contains a bytestring with 4 | /// frame contents and other frame attributes such as width and height. 5 | #[pyclass(name = "VideoFrame", module = "daily", get_all)] 6 | pub struct PyVideoFrame { 7 | /// A bytestring with the frame data in the corresponding color format 8 | pub buffer: PyObject, 9 | /// The width of this frame 10 | pub width: i32, 11 | /// The height this frame 12 | pub height: i32, 13 | /// The time in microseconds that the frame was received 14 | pub timestamp_us: i64, 15 | /// The frame's color format 16 | pub color_format: PyObject, 17 | } 18 | -------------------------------------------------------------------------------- /src/media/virtual_camera_device.rs: -------------------------------------------------------------------------------- 1 | use webrtc_daily::sys::{ 2 | color_format::ColorFormat, virtual_camera_device::NativeVirtualCameraDevice, 3 | }; 4 | 5 | use daily_core::prelude::daily_core_context_virtual_camera_device_write_frame; 6 | 7 | use pyo3::exceptions; 8 | use pyo3::prelude::*; 9 | use pyo3::types::PyBytes; 10 | 11 | /// This class represents a virtual camera device. Virtual camera 12 | /// devices are used to send video to the meeting. 13 | #[pyclass(name = "VirtualCameraDevice", module = "daily")] 14 | pub struct PyVirtualCameraDevice { 15 | device_name: String, 16 | width: u32, 17 | height: u32, 18 | color_format: ColorFormat, 19 | camera_device: Option, 20 | } 21 | 22 | impl PyVirtualCameraDevice { 23 | pub fn new(device_name: &str, width: u32, height: u32, color_format: ColorFormat) -> Self { 24 | Self { 25 | device_name: device_name.to_string(), 26 | width, 27 | height, 28 | color_format, 29 | camera_device: None, 30 | } 31 | } 32 | 33 | pub fn attach_camera_device(&mut self, camera_device: NativeVirtualCameraDevice) { 34 | self.camera_device = Some(camera_device); 35 | } 36 | } 37 | 38 | #[pymethods] 39 | impl PyVirtualCameraDevice { 40 | /// Returns the device name. 41 | /// 42 | /// :return: The virtual camera device name 43 | /// :rtype: str 44 | #[getter] 45 | fn name(&self) -> String { 46 | self.device_name.clone() 47 | } 48 | 49 | /// Returns the resolution width of this camera. 50 | /// 51 | /// :return: The resolution width 52 | /// :rtype: int 53 | #[getter] 54 | fn width(&self) -> u32 { 55 | self.width 56 | } 57 | 58 | /// Returns the resolution height of this camera. 59 | /// 60 | /// :return: The resolution height 61 | /// :rtype: int 62 | #[getter] 63 | fn height(&self) -> u32 { 64 | self.height 65 | } 66 | 67 | /// Returns the color format of this camera. 68 | /// 69 | /// :return: See :ref:`ColorFormat` 70 | /// :rtype: str 71 | #[getter] 72 | fn color_format(&self) -> String { 73 | self.color_format.to_string() 74 | } 75 | 76 | /// Writes a video frame to a virtual camera device created with 77 | /// :func:`Daily.create_camera_device`. 78 | /// 79 | /// The video frame needs to be of the same color format (see 80 | /// :ref:`ColorFormat`) specified when creating the camera. 81 | /// 82 | /// :param bytestring frame: A bytestring with the video frame contents 83 | pub fn write_frame(&self, py: Python<'_>, frame: &Bound<'_, PyBytes>) -> PyResult<()> { 84 | if let Some(camera_device) = self.camera_device.as_ref() { 85 | let bytes_length = frame.len()?; 86 | 87 | let bytes = frame.as_bytes(); 88 | 89 | py.allow_threads(move || unsafe { 90 | daily_core_context_virtual_camera_device_write_frame( 91 | camera_device.as_ptr() as *mut _, 92 | bytes.as_ptr() as *const _, 93 | bytes_length, 94 | ) 95 | }); 96 | 97 | Ok(()) 98 | } else { 99 | Err(exceptions::PyRuntimeError::new_err( 100 | "no camera device has been attached", 101 | )) 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/media/virtual_microphone_device.rs: -------------------------------------------------------------------------------- 1 | use std::sync::atomic::{AtomicU64, Ordering}; 2 | use std::{collections::HashMap, sync::Mutex}; 3 | 4 | use crate::util::memory::AlignedI16Data; 5 | 6 | use webrtc_daily::sys::virtual_microphone_device::NativeVirtualMicrophoneDevice; 7 | 8 | use daily_core::prelude::daily_core_context_virtual_microphone_device_write_frames; 9 | 10 | use pyo3::exceptions; 11 | use pyo3::prelude::*; 12 | use pyo3::types::{PyBytes, PyTuple}; 13 | 14 | /// This class represents a virtual microphone device. Virtual microphone 15 | /// devices are used to send audio to the meeting. Then can be created as 16 | /// blocking or non-blocking (see :func:`Daily.create_microphone_device`). A 17 | /// blocking device will wait until :func:`VirtualMicrophoneDevice.write_frames` 18 | /// finishes writing the given audio frames. In contrast, a non-blocking 19 | /// microphone will not wait. 20 | /// 21 | /// NOTE: Virtual microphone devices emulate a hardware device and have the 22 | /// constraint that only one microphone can be active per process. You can 23 | /// select the active microphone through the input settings in 24 | /// :func:`CallClient.join` or :func:`CallClient.update_inputs`. However, it is 25 | /// possible to use a custom microphone audio track when also specifying the 26 | /// input settings. 27 | /// 28 | /// The audio format used by virtual microphone devices is 16-bit linear PCM. 29 | #[pyclass(name = "VirtualMicrophoneDevice", module = "daily")] 30 | pub struct PyVirtualMicrophoneDevice { 31 | device_name: String, 32 | sample_rate: u32, 33 | channels: u8, 34 | audio_device: Option, 35 | request_id: AtomicU64, 36 | completions: Mutex>, 37 | } 38 | 39 | impl PyVirtualMicrophoneDevice { 40 | pub fn new(device_name: &str, sample_rate: u32, channels: u8) -> Self { 41 | Self { 42 | device_name: device_name.to_string(), 43 | sample_rate, 44 | channels, 45 | audio_device: None, 46 | request_id: AtomicU64::new(0), 47 | completions: Mutex::new(HashMap::new()), 48 | } 49 | } 50 | 51 | pub fn attach_audio_device(&mut self, audio_device: NativeVirtualMicrophoneDevice) { 52 | self.audio_device = Some(audio_device); 53 | } 54 | 55 | fn maybe_register_completion(&mut self, completion: Option) -> u64 { 56 | let request_id = self.request_id.fetch_add(1, Ordering::SeqCst); 57 | 58 | if let Some(completion) = completion { 59 | self.completions 60 | .lock() 61 | .unwrap() 62 | .insert(request_id, completion); 63 | } 64 | 65 | request_id 66 | } 67 | } 68 | 69 | #[pymethods] 70 | impl PyVirtualMicrophoneDevice { 71 | /// Returns the device name. 72 | /// 73 | /// :return: The virtual microphone device name 74 | /// :rtype: str 75 | #[getter] 76 | fn name(&self) -> String { 77 | self.device_name.clone() 78 | } 79 | 80 | /// Returns the sample rate of this device (e.g. 16000). 81 | /// 82 | /// :return: The sample rate 83 | /// :rtype: int 84 | #[getter] 85 | fn sample_rate(&self) -> u32 { 86 | self.sample_rate 87 | } 88 | 89 | /// Returns the number of channels (2 for stereo and 1 for mono) of this device. 90 | /// 91 | /// :return: The number of channels 92 | /// :rtype: int 93 | #[getter] 94 | fn channels(&self) -> u8 { 95 | self.channels 96 | } 97 | 98 | /// Writes audio frames to a virtual microphone device created with 99 | /// :func:`Daily.create_microphone_device`. For non-blocking devices, the 100 | /// completion callback will be called when the audio frames have been 101 | /// written. 102 | /// 103 | /// If less than a multiple of 10ms worth of audio frames are provided 104 | /// on a blocking microphone, padding will be added up to the next multiple. 105 | /// 106 | /// :param bytestring frames: A bytestring with the audio frames to write 107 | /// :param func completion: An optional completion callback with one parameter: (int) 108 | /// 109 | /// :return: The number of audio frames written 110 | /// :rtype: int 111 | #[pyo3(signature = (frames, completion = None))] 112 | pub fn write_frames( 113 | &mut self, 114 | frames: &Bound<'_, PyBytes>, 115 | completion: Option, 116 | ) -> PyResult { 117 | if self.audio_device.is_none() { 118 | return Err(exceptions::PyRuntimeError::new_err( 119 | "no microphone device has been attached", 120 | )); 121 | } 122 | 123 | let num_bytes = frames.len()?; 124 | let bytes_per_sample: usize = 2; 125 | 126 | // libwebrtc needs 16-bit linear PCM samples 127 | if num_bytes % (bytes_per_sample * self.channels as usize) != 0 { 128 | return Err(exceptions::PyValueError::new_err( 129 | "frames bytestring should contain 16-bit samples", 130 | )); 131 | } 132 | 133 | let num_frames = (num_bytes / bytes_per_sample) / self.channels as usize; 134 | 135 | let bytes = frames.as_bytes(); 136 | let aligned = AlignedI16Data::new(bytes); 137 | 138 | let request_id = self.maybe_register_completion(completion); 139 | 140 | Python::with_gil(|py| { 141 | let frames_written = py.allow_threads(move || unsafe { 142 | daily_core_context_virtual_microphone_device_write_frames( 143 | self.audio_device.as_ref().unwrap().as_ptr() as *mut _, 144 | aligned.as_ptr(), 145 | num_frames, 146 | request_id, 147 | on_write_frames, 148 | self as *const PyVirtualMicrophoneDevice as *mut libc::c_void, 149 | ) 150 | }); 151 | 152 | if frames_written >= 0 { 153 | Ok(frames_written.into_py(py)) 154 | } else { 155 | Err(exceptions::PyIOError::new_err( 156 | "error writing audio frames to device", 157 | )) 158 | } 159 | }) 160 | } 161 | } 162 | 163 | pub(crate) unsafe extern "C" fn on_write_frames( 164 | device: *mut libc::c_void, 165 | request_id: u64, 166 | num_frames: usize, 167 | ) { 168 | let microphone: &mut PyVirtualMicrophoneDevice = 169 | unsafe { &mut *(device as *mut PyVirtualMicrophoneDevice) }; 170 | 171 | Python::with_gil(|py| { 172 | let completion = microphone.completions.lock().unwrap().remove(&request_id); 173 | 174 | if let Some(completion) = completion { 175 | let args = PyTuple::new_bound(py, &[num_frames.into_py(py)]); 176 | 177 | if let Err(error) = completion.call1(py, args) { 178 | error.write_unraisable_bound(py, None); 179 | } 180 | } 181 | }) 182 | } 183 | -------------------------------------------------------------------------------- /src/media/virtual_speaker_device.rs: -------------------------------------------------------------------------------- 1 | use std::sync::atomic::{AtomicU64, Ordering}; 2 | use std::{collections::HashMap, sync::Mutex}; 3 | 4 | use webrtc_daily::sys::virtual_speaker_device::NativeVirtualSpeakerDevice; 5 | 6 | use daily_core::prelude::daily_core_context_virtual_speaker_device_read_frames; 7 | 8 | use pyo3::exceptions; 9 | use pyo3::prelude::*; 10 | use pyo3::types::{PyBytes, PyTuple}; 11 | 12 | /// This class represents a virtual speaker device. Virtual speaker devices are 13 | /// used to receive audio from the meeting. They can be created as blocking or 14 | /// non-blocking (see :func:`Daily.create_speakler_device`). Blocking means that 15 | /// calling :func:`VirtualSpeakerDevice.read_frames` behaves synchronously until 16 | /// all the given audio frames have been read. In contrast, non-blocking will 17 | /// behave asynchronously (i.e. it won't wait). 18 | /// 19 | /// NOTE: Virtual speaker devices emulate a hardware device and have the 20 | /// constraint that only one speaker can be active per process. You can select 21 | /// the active speaker with :func:`Daily.select_speaker_device`. 22 | /// 23 | /// The audio format used by virtual speaker devices is 16-bit linear PCM. 24 | #[pyclass(name = "VirtualSpeakerDevice", module = "daily")] 25 | pub struct PyVirtualSpeakerDevice { 26 | device_name: String, 27 | sample_rate: u32, 28 | channels: u8, 29 | non_blocking: bool, 30 | audio_device: Option, 31 | request_id: AtomicU64, 32 | completions: Mutex>, 33 | } 34 | 35 | impl PyVirtualSpeakerDevice { 36 | pub fn new(device_name: &str, sample_rate: u32, channels: u8, non_blocking: bool) -> Self { 37 | Self { 38 | device_name: device_name.to_string(), 39 | sample_rate, 40 | channels, 41 | non_blocking, 42 | audio_device: None, 43 | request_id: AtomicU64::new(0), 44 | completions: Mutex::new(HashMap::new()), 45 | } 46 | } 47 | 48 | pub fn attach_audio_device(&mut self, audio_device: NativeVirtualSpeakerDevice) { 49 | self.audio_device = Some(audio_device); 50 | } 51 | 52 | fn maybe_register_completion(&mut self, completion: Option) -> u64 { 53 | let request_id = self.request_id.fetch_add(1, Ordering::SeqCst); 54 | 55 | if let Some(completion) = completion { 56 | self.completions 57 | .lock() 58 | .unwrap() 59 | .insert(request_id, completion); 60 | } 61 | 62 | request_id 63 | } 64 | } 65 | 66 | #[pymethods] 67 | impl PyVirtualSpeakerDevice { 68 | /// Returns the device name. 69 | /// 70 | /// :return: The virtual speaker device name 71 | /// :rtype: str 72 | #[getter] 73 | fn name(&self) -> String { 74 | self.device_name.clone() 75 | } 76 | 77 | /// Returns the sample rate of this device (e.g. 16000). 78 | /// 79 | /// :return: The sample rate 80 | /// :rtype: int 81 | #[getter] 82 | fn sample_rate(&self) -> u32 { 83 | self.sample_rate 84 | } 85 | 86 | /// Returns the number of channels (2 for stereo and 1 for mono) of this device. 87 | /// 88 | /// :return: The number of channels 89 | /// :rtype: int 90 | #[getter] 91 | fn channels(&self) -> u8 { 92 | self.channels 93 | } 94 | 95 | /// Reads audio frames from a virtual speaker device created with 96 | /// :func:`Daily.create_speaker_device`. For non-blocking devices, the 97 | /// completion callback will be called when the audio frames have been read. 98 | /// 99 | /// :param int num_frames: The number of audio frames to read 100 | /// :param func completion: An optional completion callback with one parameter: (bytestring) 101 | /// 102 | /// :return: The read audio frames as a bytestring, or an empty bytestring if no frames were read 103 | /// :rtype: bytestring. 104 | #[pyo3(signature = (num_frames, completion = None))] 105 | pub fn read_frames( 106 | &mut self, 107 | num_frames: usize, 108 | completion: Option, 109 | ) -> PyResult { 110 | if self.audio_device.is_none() { 111 | return Err(exceptions::PyRuntimeError::new_err( 112 | "no speaker device has been attached", 113 | )); 114 | } 115 | 116 | // In the non-blocking case, we don't want to allocate memory here 117 | // since we will exit the function right away and the memory won't 118 | // be valid. The needed memory will be allocated internally. 119 | let num_bytes = if self.non_blocking { 120 | 0 121 | } else { 122 | // libwebrtc provides with 16-bit linear PCM 123 | let bytes_per_sample = 2; 124 | num_frames * self.channels() as usize * bytes_per_sample 125 | }; 126 | let num_words = num_bytes / 2; 127 | 128 | let mut buffer: Vec = Vec::with_capacity(num_words); 129 | 130 | let request_id = self.maybe_register_completion(completion); 131 | 132 | Python::with_gil(move |py| { 133 | let buffer_bytes = buffer.as_mut_slice(); 134 | 135 | let frames_read = py.allow_threads(move || unsafe { 136 | daily_core_context_virtual_speaker_device_read_frames( 137 | self.audio_device.as_ref().unwrap().as_ptr() as *mut _, 138 | buffer_bytes.as_mut_ptr(), 139 | num_frames, 140 | request_id, 141 | on_read_frames, 142 | self as *const PyVirtualSpeakerDevice as *mut libc::c_void, 143 | ) 144 | }); 145 | 146 | if frames_read == num_frames as i32 { 147 | let py_bytes = 148 | unsafe { PyBytes::bound_from_ptr(py, buffer.as_ptr() as *const u8, num_bytes) }; 149 | Ok(py_bytes.into_py(py)) 150 | } else if frames_read == 0 { 151 | let empty_bytes: [u8; 0] = []; 152 | let py_bytes = PyBytes::new_bound(py, &empty_bytes); 153 | Ok(py_bytes.into_py(py)) 154 | } else { 155 | Err(exceptions::PyIOError::new_err( 156 | "error reading audio frames from the device", 157 | )) 158 | } 159 | }) 160 | } 161 | } 162 | 163 | pub(crate) unsafe extern "C" fn on_read_frames( 164 | device: *mut libc::c_void, 165 | request_id: u64, 166 | frames: *mut i16, 167 | num_frames: usize, 168 | ) { 169 | let speaker: &mut PyVirtualSpeakerDevice = 170 | unsafe { &mut *(device as *mut PyVirtualSpeakerDevice) }; 171 | 172 | Python::with_gil(|py| { 173 | let completion = speaker.completions.lock().unwrap().remove(&request_id); 174 | 175 | if let Some(completion) = completion { 176 | let bytes_per_sample = 2; 177 | let num_bytes = num_frames * speaker.channels() as usize * bytes_per_sample; 178 | let empty_bytes: [u8; 0] = []; 179 | 180 | let py_bytes = if num_bytes > 0 { 181 | unsafe { PyBytes::bound_from_ptr(py, frames as *const u8, num_bytes) } 182 | } else { 183 | PyBytes::new_bound(py, &empty_bytes) 184 | }; 185 | 186 | let args = PyTuple::new_bound(py, [py_bytes]); 187 | 188 | if let Err(error) = completion.call1(py, args) { 189 | error.write_unraisable_bound(py, None); 190 | } 191 | }; 192 | }) 193 | } 194 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod dict; 2 | pub(crate) mod memory; 3 | -------------------------------------------------------------------------------- /src/util/dict.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use serde::{Deserialize, Serialize}; 4 | use serde_json::Value; 5 | 6 | use pyo3::exceptions::PyTypeError; 7 | use pyo3::prelude::*; 8 | use pyo3::types::{PyBool, PyDict, PyFloat, PyList, PyLong, PyNone, PyString}; 9 | 10 | #[repr(transparent)] 11 | #[derive(Clone, Debug, Deserialize, Serialize)] 12 | pub(crate) struct DictValue(pub Value); 13 | 14 | impl DictValue { 15 | pub fn remove_null_fields(&mut self) { 16 | Self::clean_nulls(&mut self.0); 17 | } 18 | 19 | fn clean_nulls(value: &mut Value) { 20 | match value { 21 | Value::Object(obj) => { 22 | obj.retain(|_, v| { 23 | Self::clean_nulls(v); 24 | !v.is_null() 25 | }); 26 | } 27 | Value::Array(arr) => { 28 | arr.iter_mut().for_each(Self::clean_nulls); 29 | } 30 | _ => {} 31 | } 32 | } 33 | 34 | fn value_to_object(val: &Value, py: Python<'_>) -> PyObject { 35 | match val { 36 | Value::Null => py.None(), 37 | Value::Bool(b) => b.to_object(py), 38 | Value::Number(n) => n 39 | .as_i64() 40 | .map(|i| i.to_object(py)) 41 | .or_else(|| n.as_u64().map(|i| i.to_object(py))) 42 | .or_else(|| n.as_f64().map(|i| i.to_object(py))) 43 | .expect("Invalid number"), 44 | Value::String(s) => s.to_object(py), 45 | Value::Array(v) => { 46 | let inner: Vec<_> = v.iter().map(|x| Self::value_to_object(x, py)).collect(); 47 | inner.to_object(py) 48 | } 49 | Value::Object(m) => { 50 | let inner: HashMap<_, _> = m 51 | .iter() 52 | .map(|(k, v)| (k, Self::value_to_object(v, py))) 53 | .collect(); 54 | inner.to_object(py) 55 | } 56 | } 57 | } 58 | } 59 | 60 | impl ToPyObject for DictValue { 61 | fn to_object(&self, py: Python<'_>) -> PyObject { 62 | Self::value_to_object(&self.0, py) 63 | } 64 | } 65 | 66 | impl<'py> FromPyObject<'py> for DictValue { 67 | fn extract_bound(ob: &Bound<'py, PyAny>) -> Result { 68 | if ob.downcast::().is_ok() { 69 | Ok(DictValue(Value::Null)) 70 | } else if let Ok(value) = ob.downcast::() { 71 | Ok(DictValue(value.is_true().into())) 72 | } else if let Ok(value) = ob.downcast::() { 73 | let number: i64 = value.extract()?; 74 | Ok(DictValue(number.into())) 75 | } else if let Ok(value) = ob.downcast::() { 76 | let number: f64 = value.extract()?; 77 | Ok(DictValue(number.into())) 78 | } else if let Ok(value) = ob.downcast::() { 79 | Ok(DictValue(value.to_string().into())) 80 | } else if let Ok(value) = ob.downcast::() { 81 | let list: Vec = value.extract()?; 82 | let vec = list.iter().map(|v| v.0.clone()).collect(); 83 | Ok(DictValue(Value::Array(vec))) 84 | } else if let Ok(value) = ob.downcast::() { 85 | let dict: HashMap = value.extract()?; 86 | let map = dict.iter().map(|(k, v)| (k.clone(), v.0.clone())).collect(); 87 | Ok(DictValue(Value::Object(map))) 88 | } else { 89 | Err(PyErr::new::( 90 | "Invalid data (not serializable)", 91 | )) 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/util/memory.rs: -------------------------------------------------------------------------------- 1 | pub(crate) enum AlignedI16Data<'a> { 2 | AlreadyAligned(&'a [u8]), 3 | Copied(Vec), 4 | } 5 | 6 | impl<'a> AlignedI16Data<'a> { 7 | pub fn new(src: &'a [u8]) -> Self { 8 | let bytes_ptr = src.as_ptr(); 9 | 10 | // If `src`'s memory is not 16-bit aligned, create a new 16-bit aligned 11 | // memory area and copy the contents of `src` to it. Otherwise, simply 12 | // keep the original slice. 13 | if bytes_ptr as usize % 2 == 0 { 14 | AlignedI16Data::AlreadyAligned(src) 15 | } else { 16 | let num_bytes = src.len(); 17 | let num_words = num_bytes / 2; 18 | 19 | let mut words = Vec::::with_capacity(num_words); 20 | let words_ptr = words.as_mut_ptr() as *mut u8; 21 | 22 | unsafe { 23 | std::ptr::copy_nonoverlapping(bytes_ptr, words_ptr, num_bytes); 24 | words.set_len(num_words); 25 | } 26 | 27 | AlignedI16Data::Copied(words) 28 | } 29 | } 30 | 31 | pub fn as_ptr(&self) -> *const i16 { 32 | match self { 33 | AlignedI16Data::AlreadyAligned(d) => d.as_ptr() as *const i16, 34 | AlignedI16Data::Copied(d) => d.as_ptr(), 35 | } 36 | } 37 | } 38 | --------------------------------------------------------------------------------