├── .bentoignore ├── flow_diagram.png ├── service_arch.png ├── twilio_setup.png ├── requirements.txt ├── bentofile.yaml ├── service.py ├── whisper_bento.py ├── README.md ├── .gitignore ├── simple_xtts.py └── bot.py /.bentoignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | *$py.class 4 | .ipynb_checkpoints 5 | venv/ 6 | -------------------------------------------------------------------------------- /flow_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bentoml/BentoVoiceAgent/HEAD/flow_diagram.png -------------------------------------------------------------------------------- /service_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bentoml/BentoVoiceAgent/HEAD/service_arch.png -------------------------------------------------------------------------------- /twilio_setup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bentoml/BentoVoiceAgent/HEAD/twilio_setup.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bentoml>=1.3.11 2 | ctranslate2==4.5.0 3 | fastapi==0.115.0 4 | faster-whisper==1.0.0 5 | numpy~=1.0 6 | pipecat-ai[cartesia,openai,silero,xtts]==0.0.43 7 | safetensors==0.4.5 8 | torch==2.4.1 9 | -------------------------------------------------------------------------------- /bentofile.yaml: -------------------------------------------------------------------------------- 1 | service: "service.py:TwilioBot" 2 | labels: 3 | owner: bentoml-team 4 | project: gallery 5 | include: 6 | - "*.py" 7 | python: 8 | requirements_txt: requirements.txt 9 | docker: 10 | python_version: "3.11" 11 | system_packages: 12 | - ffmpeg 13 | envs: 14 | - name: OPENAI_SERVICE_URL 15 | - name: XTTS_SERVICE_URL 16 | - name: LD_LIBRARY_PATH 17 | value: "/app/.venv/lib/python3.11/site-packages/nvidia/cublas/lib:/app/.venv/lib/python3.11/site-packages/nvidia/cudnn/lib:/usr/local/lib/python3.11/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib" 18 | -------------------------------------------------------------------------------- /service.py: -------------------------------------------------------------------------------- 1 | import bentoml 2 | import json 3 | import os 4 | import typing as t 5 | 6 | from pathlib import Path 7 | from fastapi import FastAPI, WebSocket 8 | from fastapi.middleware.cors import CORSMiddleware 9 | from starlette.responses import HTMLResponse 10 | 11 | LANGUAGE_CODE = "en" 12 | 13 | app = FastAPI() 14 | 15 | @bentoml.service( 16 | traffic={"timeout": 30}, 17 | resources={ 18 | "gpu": 1, 19 | "gpu_type": "nvidia-tesla-t4", 20 | }, 21 | ) 22 | @bentoml.mount_asgi_app(app, path="/voice") 23 | class TwilioBot: 24 | 25 | def __init__(self): 26 | import torch 27 | from faster_whisper import WhisperModel 28 | self.batch_size = 16 # reduce if low on GPU mem 29 | self.device = "cuda" if torch.cuda.is_available() else "cpu" 30 | compute_type = "float16" if torch.cuda.is_available() else "int8" 31 | self.whisper_model = WhisperModel("large-v3", self.device, compute_type=compute_type) 32 | 33 | @app.post("/start_call") 34 | async def start_call(self): 35 | service_url = os.environ.get("BENTOCLOUD_DEPLOYMENT_URL") or "" 36 | assert(service_url) 37 | if service_url.startswith("http"): 38 | from urllib.parse import urlparse 39 | service_url = urlparse(service_url).netloc 40 | tmpl = """ 41 | 42 | 43 | 44 | 45 | 46 | 47 | """ 48 | return HTMLResponse(content=tmpl.format(service_url=service_url), media_type="application/xml") 49 | 50 | @app.websocket("/ws") 51 | async def websocket_endpoint(self, websocket: WebSocket): 52 | 53 | from bot import run_bot 54 | await websocket.accept() 55 | start_data = websocket.iter_text() 56 | await start_data.__anext__() 57 | call_data = json.loads(await start_data.__anext__()) 58 | stream_sid = call_data["start"]["streamSid"] 59 | print("WebSocket connection accepted") 60 | await run_bot(websocket, stream_sid, whisper_model=self.whisper_model) 61 | -------------------------------------------------------------------------------- /whisper_bento.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2024, Daily 3 | # 4 | # SPDX-License-Identifier: BSD 2-Clause License 5 | # 6 | 7 | """This module implements Whisper transcription with a locally-downloaded model.""" 8 | 9 | import asyncio 10 | 11 | from enum import Enum 12 | from typing import AsyncGenerator 13 | 14 | import numpy as np 15 | 16 | from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame 17 | from pipecat.services.ai_services import SegmentedSTTService 18 | from pipecat.utils.time import time_now_iso8601 19 | 20 | from loguru import logger 21 | 22 | try: 23 | from faster_whisper import WhisperModel 24 | except ModuleNotFoundError as e: 25 | logger.error(f"Exception: {e}") 26 | logger.error("In order to use Whisper, you need to `pip install pipecat-ai[whisper]`.") 27 | raise Exception(f"Missing module: {e}") 28 | 29 | 30 | class Model(Enum): 31 | """Class of basic Whisper model selection options""" 32 | 33 | TINY = "tiny" 34 | BASE = "base" 35 | MEDIUM = "medium" 36 | LARGE = "large-v3" 37 | DISTIL_LARGE_V2 = "Systran/faster-distil-whisper-large-v2" 38 | DISTIL_MEDIUM_EN = "Systran/faster-distil-whisper-medium.en" 39 | 40 | 41 | class BentoWhisperSTTService(SegmentedSTTService): 42 | """Class to transcribe audio with a locally-downloaded Whisper model""" 43 | 44 | def __init__( 45 | self, 46 | *, 47 | model: WhisperModel, 48 | no_speech_prob: float = 0.4, 49 | **kwargs, 50 | ): 51 | super().__init__(**kwargs) 52 | self._no_speech_prob = no_speech_prob 53 | self._model: WhisperModel = model 54 | self._load() 55 | 56 | def can_generate_metrics(self) -> bool: 57 | return True 58 | 59 | def _load(self): 60 | """Loads the Whisper model. Note that if this is the first time 61 | this model is being run, it will take time to download.""" 62 | logger.debug("Loading Whisper model...") 63 | pass 64 | logger.debug("Loaded Whisper model") 65 | 66 | async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]: 67 | """Transcribes given audio using Whisper""" 68 | if not self._model: 69 | logger.error(f"{self} error: Whisper model not available") 70 | yield ErrorFrame("Whisper model not available") 71 | return 72 | 73 | await self.start_processing_metrics() 74 | await self.start_ttfb_metrics() 75 | 76 | # Divide by 32768 because we have signed 16-bit data. 77 | audio_float = np.frombuffer(audio, dtype=np.int16).astype(np.float32) / 32768.0 78 | 79 | segments, _ = await asyncio.to_thread(self._model.transcribe, audio_float) 80 | 81 | text: str = "" 82 | for segment in segments: 83 | if segment.no_speech_prob < self._no_speech_prob: 84 | text += f"{segment.text} " 85 | 86 | await self.stop_ttfb_metrics() 87 | await self.stop_processing_metrics() 88 | 89 | if text: 90 | logger.debug(f"Transcription: [{text}]") 91 | yield TranscriptionFrame(text, "", time_now_iso8601()) 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Voice Agent with Open Source Models 2 | 3 | This repository demonstrates how to build a voice agent using open-source Large Language Models (LLMs), text-to-speech (TTS), and speech-to-text (STT) models. It utilizes [Pipecat](https://github.com/pipecat-ai/pipecat) voice pipeline and is deployed with [BentoML](https://github.com/bentoml/BentoML). The voice agent is accessible via a phone number, leveraging Twilio as the communication transport. This example can be easily extended to incorporate additional voice agent features and functionality. 4 | 5 | ![service architecture](service_arch.png) 6 | 7 | This voice agent the following models: 8 | 9 | - Llama 3.1 10 | - XTTS text-to-speech model 11 | - Whisper speech-to-text model 12 | 13 | The LLM and XTTS models are deployed as separate API endpoints, as outlined in the instructions below. These API endpoints are provided to the voice agent through environment variables. 14 | 15 | See [here](https://docs.bentoml.com/en/latest/examples/overview.html) for a full list of BentoML example projects. 16 | 17 | ## Prerequisites 18 | 19 | This repository has been verified with Python 3.11 and BentoML 1.3.9. 20 | 21 | ``` 22 | pip install -U bentoml 23 | ``` 24 | 25 | ## Dependent models 26 | 27 | Deploy the LLM and XTTS models by following the instructions provided in their respective repositories. 28 | 29 | - Deploy LLM with [BentoVLLM](https://github.com/bentoml/BentoVLLM/tree/main/llama3.1-70b-instruct-awq) 30 | - Deploy XTTS with [BentoXTTSStreaming](https://github.com/bentoml/BentoXTTSStreaming) 31 | 32 | ![flow_diagram](flow_diagram.png) 33 | 34 | Once the models are deployed, you can obtain their API endpoints from BentoCloud. These endpoints should then be set as environment variables for the voice agent deployment. 35 | 36 | - XTTS_SERVICE_URL 37 | - OPENAI_SERVICE_URL 38 | 39 | ## Run the voice agent 40 | 41 | Install the following system packages to run the voice agent locally. 42 | 43 | ``` 44 | ffmpeg 45 | ``` 46 | 47 | Install the required Python packages. 48 | 49 | ``` 50 | pip install -U -r requirements.txt 51 | ``` 52 | 53 | Start the server with endpoint URLs environment variables. Update the values as the endpoint URLs of your deployments. 54 | 55 | ``` 56 | XTTS_SERVICE_URL=https://xtts-streaming-rvpg-d3767914.mt-guc1.bentoml.ai OPENAI_SERVICE_URL=https://llama-3-1-zwu6-d3767914.mt-guc1.bentoml.ai/v1 bentoml serve 57 | ``` 58 | 59 | The server exposes two key endpoints: 60 | 61 | - `/voice/start_call`: An HTTP endpoint that serves as a Twilio webhook to initiate calls. 62 | - `/voice/ws`: A WebSocket endpoint that processes voice data in real-time. 63 | 64 | On Twilio's voice configuration page, set the voice agent endpoint (including the `/voice/start_call` path) as a webhook URL. 65 | 66 | ![twilio example setup](twilio_setup.png) 67 | 68 | ## Deploy to BentoCloud 69 | 70 | After the Service is ready, you can deploy the application to BentoCloud for better management and scalability. [Sign up](https://www.bentoml.com/) if you haven't got a BentoCloud account. 71 | 72 | Make sure you have [logged in to BentoCloud](https://docs.bentoml.com/en/latest/bentocloud/how-tos/manage-access-token.html), then run the following command to deploy it. 73 | 74 | ```bash 75 | bentoml deploy . --env XTTS_SERVICE_URL=https://xtts-streaming-rvpg-d3767914.mt-guc1.bentoml.ai --env OPENAI_SERVICE_URL=https://llama-3-1-zwu6-d3767914.mt-guc1.bentoml.ai/v1 76 | ``` 77 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /simple_xtts.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2024, Daily 3 | # 4 | # SPDX-License-Identifier: BSD 2-Clause License 5 | # 6 | 7 | import aiohttp 8 | 9 | from typing import Any, AsyncGenerator, Dict 10 | 11 | from pipecat.frames.frames import ( 12 | ErrorFrame, 13 | Frame, 14 | StartFrame, 15 | TTSAudioRawFrame, 16 | TTSStartedFrame, 17 | TTSStoppedFrame, 18 | ) 19 | from pipecat.services.ai_services import TTSService 20 | 21 | from loguru import logger 22 | 23 | import numpy as np 24 | 25 | 26 | try: 27 | import resampy 28 | except ModuleNotFoundError as e: 29 | logger.error(f"Exception: {e}") 30 | logger.error("In order to use XTTS, you need to `pip install pipecat-ai[xtts]`.") 31 | raise Exception(f"Missing module: {e}") 32 | 33 | 34 | class SimpleXTTSService(TTSService): 35 | 36 | def __init__( 37 | self, 38 | *, 39 | language: str, 40 | base_url: str, 41 | aiohttp_session: aiohttp.ClientSession, 42 | **kwargs): 43 | super().__init__(**kwargs) 44 | 45 | self._language = language 46 | self._base_url = base_url 47 | self._aiohttp_session = aiohttp_session 48 | 49 | def can_generate_metrics(self) -> bool: 50 | return True 51 | 52 | async def start(self, frame: StartFrame): 53 | await super().start(frame) 54 | 55 | async def set_voice(self, voice: str): 56 | pass 57 | 58 | async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: 59 | logger.debug(f"Generating TTS: [{text}]") 60 | 61 | url = self._base_url + "/tts/stream" 62 | 63 | payload = { 64 | "text": text.replace('.', '').replace('*', ''), 65 | "language": self._language, 66 | "add_wav_header": False, 67 | "stream_chunk_size": 20, 68 | } 69 | 70 | await self.start_ttfb_metrics() 71 | 72 | async with self._aiohttp_session.post(url, json=payload) as r: 73 | if r.status != 200: 74 | text = await r.text() 75 | logger.error(f"{self} error getting audio (status: {r.status}, error: {text})") 76 | yield ErrorFrame(f"Error getting audio (status: {r.status}, error: {text})") 77 | return 78 | 79 | await self.start_tts_usage_metrics(text) 80 | yield TTSStartedFrame() 81 | 82 | buffer = bytearray() 83 | async for chunk in r.content.iter_chunked(1024): 84 | if len(chunk) > 0: 85 | await self.stop_ttfb_metrics() 86 | # Append new chunk to the buffer 87 | buffer.extend(chunk) 88 | 89 | # Check if buffer has enough data for processing 90 | while len(buffer) >= 48000: # Assuming at least 0.5 seconds of audio data at 24000 Hz 91 | # Process the buffer up to a safe size for resampling 92 | process_data = buffer[:48000] 93 | # Remove processed data from buffer 94 | buffer = buffer[48000:] 95 | 96 | # Convert the byte data to numpy array for resampling 97 | audio_np = np.frombuffer(process_data, dtype=np.int16) 98 | # Resample the audio from 24000 Hz to 16000 Hz 99 | resampled_audio = resampy.resample(audio_np, 24000, 16000) 100 | # Convert the numpy array back to bytes 101 | resampled_audio_bytes = resampled_audio.astype(np.int16).tobytes() 102 | # Create the frame with the resampled audio 103 | frame = TTSAudioRawFrame(resampled_audio_bytes, 16000, 1) 104 | yield frame 105 | 106 | # Process any remaining data in the buffer 107 | if len(buffer) > 0: 108 | audio_np = np.frombuffer(buffer, dtype=np.int16) 109 | resampled_audio = resampy.resample(audio_np, 24000, 16000) 110 | resampled_audio_bytes = resampled_audio.astype(np.int16).tobytes() 111 | frame = TTSAudioRawFrame(resampled_audio_bytes, 16000, 1) 112 | yield frame 113 | 114 | yield TTSStoppedFrame() 115 | -------------------------------------------------------------------------------- /bot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import aiohttp 5 | from pipecat.frames.frames import EndFrame, LLMMessagesFrame 6 | from pipecat.pipeline.pipeline import Pipeline 7 | from pipecat.pipeline.runner import PipelineRunner 8 | from pipecat.pipeline.task import PipelineParams, PipelineTask 9 | from pipecat.services.openai import OpenAILLMService, OpenAILLMContext 10 | from pipecat.transports.network.fastapi_websocket import ( 11 | FastAPIWebsocketTransport, 12 | FastAPIWebsocketParams, 13 | ) 14 | from pipecat.vad.silero import SileroVADAnalyzer 15 | from pipecat.serializers.twilio import TwilioFrameSerializer 16 | 17 | from openai.types.chat import ChatCompletionToolParam 18 | 19 | from loguru import logger 20 | 21 | from whisper_bento import BentoWhisperSTTService 22 | from simple_xtts import SimpleXTTSService 23 | 24 | logger.remove(0) 25 | logger.add(sys.stderr, level="DEBUG") 26 | 27 | 28 | async def run_bot(websocket_client, stream_sid, whisper_model): 29 | transport = FastAPIWebsocketTransport( 30 | websocket=websocket_client, 31 | params=FastAPIWebsocketParams( 32 | audio_out_enabled=True, 33 | add_wav_header=False, 34 | vad_enabled=True, 35 | vad_analyzer=SileroVADAnalyzer(), 36 | vad_audio_passthrough=True, 37 | serializer=TwilioFrameSerializer(stream_sid), 38 | ), 39 | ) 40 | 41 | openai_base_url = os.getenv("OPENAI_SERVICE_URL") 42 | assert openai_base_url 43 | llm = OpenAILLMService( 44 | base_url=openai_base_url, 45 | api_key="n/a", 46 | model="meta-llama/Meta-Llama-3.1-8B-Instruct", 47 | ) 48 | 49 | stt = BentoWhisperSTTService(model=whisper_model) 50 | 51 | xtts_base_url = os.getenv("XTTS_SERVICE_URL") 52 | assert xtts_base_url 53 | client = aiohttp.ClientSession() 54 | tts = SimpleXTTSService( 55 | base_url=xtts_base_url, 56 | language="en", 57 | aiohttp_session=client, 58 | ) 59 | 60 | messages = [ 61 | { 62 | "role": "system", 63 | "content": "You are a helpful LLM assistant in an audio call. Your name is Jane. You work for Bento ML. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a helpful way while keeping your message as brief as possible. First greet with 'Hello, I am Jane from Bento ML, how may I help you?'", 64 | }, 65 | ] 66 | 67 | tools = [ 68 | ChatCompletionToolParam( 69 | type="function", 70 | function={ 71 | "name": "get_deployment_count", 72 | "description": "Get the deployment count in a region of a specific status.", 73 | "parameters": { 74 | "type": "object", 75 | "properties": { 76 | "region": { 77 | "type": "string", 78 | "enum": ["north america", "europe", "asia"], 79 | "description": "The region where the deployments are located.", 80 | }, 81 | "status": { 82 | "type": "string", 83 | "enum": ["running", "scaled to zero", "terminated"], 84 | "description": "The status of the deployment.", 85 | }, 86 | }, 87 | "required": ["region", "status"], 88 | }, 89 | }, 90 | ), 91 | ] 92 | 93 | async def start_function(function_name, llm, context): 94 | logger.debug(f"Starting function: {function_name}") 95 | 96 | async def exec_function(function_name, tool_call_id, args, llm, context, result_callback): 97 | logger.debug(f"Executing function: {function_name}") 98 | 99 | await result_callback({"deployment_count": 10}) 100 | 101 | # llm.register_function( 102 | # "get_deployment_count", 103 | # exec_function, 104 | # start_callback=start_function, 105 | # ) 106 | 107 | context = OpenAILLMContext(messages) 108 | # context = OpenAILLMContext(messages, tools) 109 | context_aggregator = llm.create_context_aggregator(context) 110 | pipeline = Pipeline( 111 | [ 112 | transport.input(), # Websocket input from client 113 | stt, # Speech-To-Text 114 | context_aggregator.user(), # User responses 115 | llm, # LLM 116 | tts, # Text-To-Speech 117 | transport.output(), # Websocket output to client 118 | context_aggregator.assistant(), # LLM responses 119 | ] 120 | ) 121 | 122 | task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True)) 123 | 124 | @transport.event_handler("on_client_connected") 125 | async def on_client_connected(transport, client): 126 | # Kick off the conversation. 127 | messages.append({"role": "system", "content": "Please introduce yourself to the user."}) 128 | await task.queue_frames([LLMMessagesFrame(messages)]) 129 | 130 | @transport.event_handler("on_client_disconnected") 131 | async def on_client_disconnected(transport, client): 132 | await task.queue_frames([EndFrame()]) 133 | 134 | runner = PipelineRunner(handle_sigint=False) 135 | 136 | await runner.run(task) 137 | --------------------------------------------------------------------------------