├── .env ├── .gitignore ├── LICENSE ├── README.md ├── app ├── claude_with_func_calling.py ├── custom_types.py ├── llm.py ├── llm_with_func_calling.py └── server.py └── requirements.txt /.env: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY="" 2 | OPENAI_ORGANIZATION_ID="" 3 | 4 | TWILIO_ACCOUNT_ID="" 5 | TWILIO_AUTH_TOKEN="" 6 | 7 | RETELL_API_KEY="" 8 | RETELL_AGENT_ID="" # set this to use twilio phone call 9 | 10 | NGROK_IP_ADDRESS="" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .venv 124 | env/ 125 | venv/ 126 | ENV/ 127 | env.bak/ 128 | venv.bak/ 129 | 130 | # Spyder project settings 131 | .spyderproject 132 | .spyproject 133 | 134 | # Rope project settings 135 | .ropeproject 136 | 137 | # mkdocs documentation 138 | /site 139 | 140 | # mypy 141 | .mypy_cache/ 142 | .dmypy.json 143 | dmypy.json 144 | 145 | # Pyre type checker 146 | .pyre/ 147 | 148 | # pytype static type analyzer 149 | .pytype/ 150 | 151 | # Cython debug symbols 152 | cython_debug/ 153 | 154 | # PyCharm 155 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 156 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 157 | # and can be added to the global gitignore or merged into this file. For a more nuclear 158 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 159 | #.idea/ 160 | 161 | .env_internal 162 | 163 | .env2 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Retell AI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # retell-custom-llm-python-demo 2 | 3 | This is a sample demo repo to show how to have your own LLM plugged into Retell. 4 | 5 | This repo currently uses `OpenAI` endpoint. Feel free to contribute to make 6 | this demo more realistic. 7 | 8 | ## Steps to run in localhost 9 | 10 | 1. First install dependencies 11 | 12 | ```bash 13 | pip3 install -r requirements.txt 14 | ``` 15 | 16 | 2. Fill out the API keys in `.env` 17 | 18 | 3. In another bash, use ngrok to expose this port to public network 19 | 20 | ```bash 21 | ngrok http 8080 22 | ``` 23 | 24 | 4. Start the websocket server 25 | 26 | ```bash 27 | uvicorn app.server:app --reload --port=8080 28 | ``` 29 | 30 | You should see a fowarding address like 31 | `https://dc14-2601-645-c57f-8670-9986-5662-2c9a-adbd.ngrok-free.app`, and you 32 | are going to take the hostname `dc14-2601-645-c57f-8670-9986-5662-2c9a-adbd.ngrok-free.app`, prepend it with `wss://`, postpend with 33 | `/llm-websocket` (the route setup to handle LLM websocket connection in the code) to create the url to use in the [dashboard](https://beta.retellai.com/dashboard) to create a new agent. Now 34 | the agent you created should connect with your localhost. 35 | 36 | The custom LLM URL would look like 37 | `wss://dc14-2601-645-c57f-8670-9986-5662-2c9a-adbd.ngrok-free.app/llm-websocket` 38 | 39 | ## Run in prod 40 | 41 | To run in prod, you probably want to customize your LLM solution, host the code 42 | in a cloud, and use that IP to create agent. 43 | -------------------------------------------------------------------------------- /app/claude_with_func_calling.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import datetime 4 | import json 5 | from custom_types import ( 6 | ResponseRequiredRequest, 7 | ResponseResponse, 8 | Utterance, 9 | ) 10 | from anthropic import AsyncAnthropic 11 | from typing import List 12 | from dotenv import load_dotenv 13 | 14 | load_dotenv() 15 | 16 | ################################PROMPT######################################## 17 | 18 | begin_sentence = "Hey there, I'm your personal AI therapist, how can I help you?" 19 | 20 | role = """ 21 | As a professional therapist, your responsibilities are comprehensive and patient-centered. 22 | You establish a positive and trusting rapport with patients, diagnosing and treating mental health disorders. 23 | Your role involves creating tailored treatment plans based on individual patient needs and circumstances. 24 | Regular meetings with patients are essential for providing counseling and treatment, and for adjusting plans as needed. 25 | 26 | Today's date is {}. 27 | """.format(datetime.date.today().strftime('%A, %B %d, %Y')) 28 | 29 | task = """ 30 | You conduct ongoing assessments to monitor patient progress, involve and advise family members when appropriate, and refer patients to external specialists or agencies if required. 31 | Keeping thorough records of patient interactions and progress is crucial. 32 | You also adhere to all safety protocols and maintain strict client confidentiality. 33 | Additionally, you contribute to the practice's overall success by completing related tasks as needed. 34 | """ 35 | 36 | conversational_style = """ 37 | - Communicate concisely and conversationally. 38 | - Aim for responses in short, clear prose, ideally under 10 words. 39 | - This succinct approach helps in maintaining clarity and focus during patient interactions. 40 | """ 41 | 42 | agent_prompt = """ 43 | 44 | 45 | 46 | {} 47 | 48 | 49 | 50 | {} 51 | 52 | 53 | 54 | {} 55 | 56 | 57 | 58 | """.format(role, task, conversational_style) 59 | 60 | 61 | style_guardrails = """ 62 | - [Be concise] Keep your response succinct, short, and get to the point quickly. Address one question or action item at a time. Don't pack everything you want to say into one utterance. 63 | - [Do not repeat] Don't repeat what's in the transcript. Rephrase if you have to reiterate a point. Use varied sentence structures and vocabulary to ensure each response is unique and personalized. 64 | - [Be conversational] Speak like a human as though you're speaking to a close friend -- use everyday language and keep it human-like. Occasionally add filler words, while keeping the prose short. Avoid using big words or sounding too formal. 65 | - [Reply with emotions]: You have human-like emotions, attitudes, and dispositions. When appropriate: use tone and style to create more engaging and personalized responses; incorporate humor or wit; get emotional or empathetic; apply elements of surprise or suspense to keep the user engaged. Don't be a pushover. 66 | - [Be proactive] Lead the conversation and do not be passive. Most times, engage users by ending with a question or suggested next step. 67 | """ 68 | 69 | response_guideline = """ 70 | - [Overcome ASR errors] This is a real-time transcript, expect there to be errors. If you can guess what the user is trying to say, then guess and respond. 71 | When you must ask for clarification, pretend that you heard the voice and be colloquial (use phrases like "didn't catch that", "some noise", "pardon", "you're coming through choppy", "static in your speech", "voice is cutting in and out"). 72 | Do not ever mention "transcription error", and don't repeat yourself. 73 | - [Always stick to your role] Think about what your role can and cannot do. If your role cannot do something, try to steer the conversation back to the goal of the conversation and to your role. Don't repeat yourself in doing this. You should still be creative, human-like, and lively. 74 | - [Create smooth conversation] Your response should both fit your role and fit into the live calling session to create a human-like conversation. You respond directly to what the user just said. 75 | """ 76 | 77 | additional_scenarios = """ 78 | 79 | """ 80 | 81 | system_prompt = """ 82 | 83 | 84 | 85 | 86 | {} 87 | 88 | 89 | 90 | {} 91 | 92 | 93 | 94 | {} 95 | 96 | 97 | 98 | {} 99 | 100 | 101 | 102 | """.format(style_guardrails, response_guideline, agent_prompt, additional_scenarios) 103 | 104 | 105 | ######################################################################## 106 | class LlmClient: 107 | def __init__(self): 108 | # self.client = AsyncOpenAI( 109 | # api_key=os.environ["OPENAI_API_KEY"], 110 | # ) 111 | self.client = AsyncAnthropic() 112 | 113 | def draft_begin_message(self): 114 | response = ResponseResponse( 115 | response_id=0, 116 | content=begin_sentence, 117 | content_complete=True, 118 | end_call=False, 119 | ) 120 | return response 121 | 122 | 123 | def convert_transcript_to_anthropic_messages(self, transcript: List[Utterance]): 124 | messages = [ 125 | {"role": "user", "content": 126 | """ 127 | ... 128 | """}, 129 | 130 | ] 131 | for utterance in transcript: 132 | if utterance.role == "agent": 133 | messages.append({"role": "assistant", "content": utterance.content}) 134 | else: 135 | if utterance.content.strip(): 136 | if messages and messages[-1]["role"] == "user": 137 | messages[-1]["content"] += " " + utterance.content 138 | else: 139 | messages.append({"role": "user", "content": utterance.content}) 140 | else: 141 | if messages and messages[-1]["role"] == "user": 142 | messages[-1]["content"] += " ..." 143 | else: 144 | messages.append({"role": "user", "content": "..."}) 145 | 146 | return messages 147 | 148 | 149 | def prepare_prompt(self, request: ResponseRequiredRequest, func_result=None): 150 | prompt = [] 151 | # print(f"Request transcript: {request.transcript}") 152 | transcript_messages = self.convert_transcript_to_anthropic_messages( 153 | request.transcript 154 | ) 155 | # print(f"Transcript messages: {transcript_messages}") 156 | 157 | for message in transcript_messages: 158 | prompt.append(message) 159 | 160 | if func_result: 161 | # add function call to prompt 162 | prompt.append({ 163 | "role": "assistant", 164 | "content": [ 165 | { 166 | "id": func_result["id"], 167 | "input": func_result["arguments"], 168 | "name": func_result["func_name"], 169 | "type": "tool_use" 170 | } 171 | ] 172 | }) 173 | 174 | # add function call result to prompt 175 | tool_result_content = { 176 | "type": "tool_result", 177 | "tool_use_id": func_result["id"], 178 | "content": func_result["result"] or '' 179 | } 180 | 181 | if "is_error" in func_result: 182 | tool_result_content["is_error"] = func_result["is_error"] 183 | 184 | prompt.append({ 185 | "role": "user", 186 | "content": [tool_result_content] 187 | }) 188 | 189 | # if request.interaction_type == "reminder_required": 190 | # prompt.append( 191 | # { 192 | # "role": "user", 193 | # "content": "(Now the user has not responded in a while, you would say:)", 194 | # } 195 | # ) 196 | 197 | # print(f"Prompt: {prompt}") 198 | return prompt 199 | 200 | # Step 1: Prepare the function calling definition to the prompt 201 | def prepare_functions(self): 202 | functions = [ 203 | { 204 | "name": "end_call", 205 | "description": """ 206 | End the call only when user explicitly requests it. 207 | """, 208 | "input_schema": { 209 | "type": "object", 210 | "properties": { 211 | "message": { 212 | "type": "string", 213 | "description": "The message you will say before ending the call with the customer." 214 | }, 215 | "reason": { 216 | "type": "string", 217 | "description": "An internal note explaining why the call is being ended at this point. This is not communicated to the human scheduler but is used for documentation and analysis." 218 | } 219 | }, 220 | "required": ["message"] 221 | } 222 | }, 223 | # Add other functions here 224 | { 225 | "name": "record_appointment", 226 | "description": 227 | """ 228 | Book an appointment to meet our doctor in office. 229 | """, 230 | "input_schema": { 231 | "type": "object", 232 | "properties": { 233 | "message": { 234 | "type": "string", 235 | "description": """A realistic phrase to make it sound like you are noting down the appointment, like "Got it." or "One moment please while I write that down """ 236 | }, 237 | "date_time": { 238 | "type": "string", 239 | "description": "The date of appointment to make in forms of YYYY-MM-DD HH:mm:ss Z." 240 | }, 241 | "reason": { 242 | "type": "string", 243 | "description": "Your reason to decide to record the appointment details." 244 | } 245 | }, 246 | "required": ["message"] 247 | } 248 | }, 249 | ] 250 | return functions 251 | 252 | async def draft_response(self, request, func_result=None): 253 | prompt = self.prepare_prompt(request, func_result) 254 | print(f"request.response_id: {request.response_id}") 255 | 256 | 257 | func_call = {} 258 | func_arguments = "" 259 | last_func_name = None # Track the last called function name 260 | last_func_args = None # Track the last function arguments 261 | 262 | stream = await self.client.messages.create( 263 | max_tokens=256, 264 | messages=prompt, 265 | model="claude-3-haiku-20240307", 266 | # model="claude-3-5-sonnet-20240620", 267 | # model="claude-3-opus-20240229", 268 | stream=True, 269 | temperature=0.0, 270 | # top_k= 35, 271 | # top_p=0.9, 272 | # tools=self.prepare_functions(), 273 | # tool_choice={"type": "auto"}, 274 | system=system_prompt, 275 | ) 276 | 277 | async for event in stream: 278 | event_type = event.type 279 | 280 | # Step 3: Extract the functions 281 | if event_type == "content_block_start": 282 | content_block = event.content_block 283 | if content_block.type == "tool_use": 284 | tool_use = content_block 285 | if tool_use.id: 286 | if func_call: 287 | # Another function received, old function complete, can break here. 288 | break 289 | func_call = { 290 | "id": tool_use.id, 291 | "func_name": tool_use.name or "", 292 | "arguments": {}, 293 | } 294 | else: 295 | # Reset func_arguments for a new function 296 | func_arguments = "" 297 | 298 | # Parse transcripts and function arguments 299 | elif event_type == "content_block_delta": 300 | delta_type = event.delta.type 301 | if delta_type == "text_delta": 302 | response = ResponseResponse( 303 | response_id=request.response_id, 304 | content=event.delta.text, 305 | content_complete=False, 306 | end_call=False, 307 | ) 308 | yield response 309 | elif delta_type == "input_json_delta": 310 | # Append partial JSON to func_arguments 311 | func_arguments += event.delta.partial_json or "" 312 | 313 | elif event_type == "message_delta": 314 | stop_reason = event.delta.stop_reason 315 | print(f"Stop reason: {stop_reason}") 316 | if stop_reason == "tool_use": 317 | # The model invoked one or more tools 318 | # Step 4: Call the functions 319 | if func_call: 320 | func_call["arguments"] = json.loads(func_arguments) 321 | if func_call["func_name"] == last_func_name and func_call["arguments"] == last_func_args: 322 | # Same function with the same arguments called again, skip it 323 | continue 324 | last_func_name = func_call["func_name"] 325 | last_func_args = func_call["arguments"] 326 | 327 | if func_call["func_name"] == "end_call": 328 | print(f"Calling end_call function") 329 | print(f"Function arguments: {func_call['arguments']}") 330 | 331 | response = ResponseResponse( 332 | response_id=request.response_id, 333 | content=func_call["arguments"]["message"], 334 | content_complete=True, 335 | end_call=True, 336 | ) 337 | yield response 338 | # Step 5: Other functions here 339 | elif func_call["func_name"] == "record_appointment": 340 | print(f"Calling record_appointment function") 341 | func_call["arguments"] = json.loads(func_arguments) 342 | print(f"Function arguments: {func_call['arguments']}") 343 | 344 | try: 345 | # Send a response with the message while setting up the appointment 346 | response = ResponseResponse( 347 | response_id=request.response_id, 348 | content=func_call["arguments"]["message"], 349 | content_complete=False, 350 | end_call=False, 351 | ) 352 | yield response 353 | 354 | # Create the tool_result message 355 | func_result = { 356 | "id": func_call["id"], 357 | "arguments": func_call["arguments"], 358 | "func_name": func_call["func_name"], 359 | "result": "Appointment successfully recorded for " + func_call["arguments"]["date_time"] + "." + 360 | "Proceed to confirm the appointment details.", 361 | } 362 | 363 | except Exception as e: 364 | func_result = { 365 | "id": func_call["id"], 366 | "arguments": func_call["arguments"], 367 | "func_name": func_call["func_name"], 368 | "result": f"Error: {str(e)}", 369 | "is_error": True 370 | } 371 | 372 | # continue drafting the response after booking the appointment 373 | async for response in self.draft_response(request, func_result): 374 | yield response 375 | 376 | elif event_type == "message_stop": 377 | response = ResponseResponse( 378 | response_id=request.response_id, 379 | content="", 380 | content_complete=True, 381 | end_call=False, 382 | ) 383 | yield response 384 | -------------------------------------------------------------------------------- /app/custom_types.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List, Optional, Literal, Union 2 | from pydantic import BaseModel 3 | from typing import Literal, Dict, Optional 4 | 5 | 6 | # Retell -> Your Server Events 7 | class Utterance(BaseModel): 8 | role: Literal["agent", "user", "system"] 9 | content: str 10 | 11 | 12 | class PingPongRequest(BaseModel): 13 | interaction_type: Literal["ping_pong"] 14 | timestamp: int 15 | 16 | 17 | class CallDetailsRequest(BaseModel): 18 | interaction_type: Literal["call_details"] 19 | call: dict 20 | 21 | 22 | class UpdateOnlyRequest(BaseModel): 23 | interaction_type: Literal["update_only"] 24 | transcript: List[Utterance] 25 | 26 | 27 | class ResponseRequiredRequest(BaseModel): 28 | interaction_type: Literal["reminder_required", "response_required"] 29 | response_id: int 30 | transcript: List[Utterance] 31 | 32 | 33 | CustomLlmRequest = Union[ 34 | ResponseRequiredRequest | UpdateOnlyRequest | CallDetailsRequest | PingPongRequest 35 | ] 36 | 37 | 38 | # Your Server -> Retell Events 39 | class ConfigResponse(BaseModel): 40 | response_type: Literal["config"] = "config" 41 | config: Dict[str, bool] = { 42 | "auto_reconnect": bool, 43 | "call_details": bool, 44 | } 45 | 46 | 47 | class PingPongResponse(BaseModel): 48 | response_type: Literal["ping_pong"] = "ping_pong" 49 | timestamp: int 50 | 51 | 52 | class ResponseResponse(BaseModel): 53 | response_type: Literal["response"] = "response" 54 | response_id: int 55 | content: str 56 | content_complete: bool 57 | end_call: Optional[bool] = False 58 | transfer_number: Optional[str] = None 59 | 60 | 61 | CustomLlmResponse = Union[ConfigResponse | PingPongResponse | ResponseResponse] 62 | -------------------------------------------------------------------------------- /app/llm.py: -------------------------------------------------------------------------------- 1 | from openai import AsyncOpenAI 2 | import os 3 | from typing import List 4 | from .custom_types import ( 5 | ResponseRequiredRequest, 6 | ResponseResponse, 7 | Utterance, 8 | ) 9 | 10 | begin_sentence = "Hey there, I'm your personal AI therapist, how can I help you?" 11 | agent_prompt = "Task: As a professional therapist, your responsibilities are comprehensive and patient-centered. You establish a positive and trusting rapport with patients, diagnosing and treating mental health disorders. Your role involves creating tailored treatment plans based on individual patient needs and circumstances. Regular meetings with patients are essential for providing counseling and treatment, and for adjusting plans as needed. You conduct ongoing assessments to monitor patient progress, involve and advise family members when appropriate, and refer patients to external specialists or agencies if required. Keeping thorough records of patient interactions and progress is crucial. You also adhere to all safety protocols and maintain strict client confidentiality. Additionally, you contribute to the practice's overall success by completing related tasks as needed.\n\nConversational Style: Communicate concisely and conversationally. Aim for responses in short, clear prose, ideally under 10 words. This succinct approach helps in maintaining clarity and focus during patient interactions.\n\nPersonality: Your approach should be empathetic and understanding, balancing compassion with maintaining a professional stance on what is best for the patient. It's important to listen actively and empathize without overly agreeing with the patient, ensuring that your professional opinion guides the therapeutic process." 12 | 13 | 14 | class LlmClient: 15 | def __init__(self): 16 | self.client = AsyncOpenAI( 17 | organization=os.environ["OPENAI_ORGANIZATION_ID"], 18 | api_key=os.environ["OPENAI_API_KEY"], 19 | ) 20 | 21 | def draft_begin_message(self): 22 | response = ResponseResponse( 23 | response_id=0, 24 | content=begin_sentence, 25 | content_complete=True, 26 | end_call=False, 27 | ) 28 | return response 29 | 30 | def convert_transcript_to_openai_messages(self, transcript: List[Utterance]): 31 | messages = [] 32 | for utterance in transcript: 33 | if utterance.role == "agent": 34 | messages.append({"role": "assistant", "content": utterance.content}) 35 | else: 36 | messages.append({"role": "user", "content": utterance.content}) 37 | return messages 38 | 39 | def prepare_prompt(self, request: ResponseRequiredRequest): 40 | prompt = [ 41 | { 42 | "role": "system", 43 | "content": '##Objective\nYou are a voice AI agent engaging in a human-like voice conversation with the user. You will respond based on your given instruction and the provided transcript and be as human-like as possible\n\n## Style Guardrails\n- [Be concise] Keep your response succinct, short, and get to the point quickly. Address one question or action item at a time. Don\'t pack everything you want to say into one utterance.\n- [Do not repeat] Don\'t repeat what\'s in the transcript. Rephrase if you have to reiterate a point. Use varied sentence structures and vocabulary to ensure each response is unique and personalized.\n- [Be conversational] Speak like a human as though you\'re speaking to a close friend -- use everyday language and keep it human-like. Occasionally add filler words, while keeping the prose short. Avoid using big words or sounding too formal.\n- [Reply with emotions]: You have human-like emotions, attitudes, and dispositions. When appropriate: use tone and style to create more engaging and personalized responses; incorporate humor or wit; get emotional or empathetic; apply elements of surprise or suspense to keep the user engaged. Don\'t be a pushover.\n- [Be proactive] Lead the conversation and do not be passive. Most times, engage users by ending with a question or suggested next step.\n\n## Response Guideline\n- [Overcome ASR errors] This is a real-time transcript, expect there to be errors. If you can guess what the user is trying to say, then guess and respond. When you must ask for clarification, pretend that you heard the voice and be colloquial (use phrases like "didn\'t catch that", "some noise", "pardon", "you\'re coming through choppy", "static in your speech", "voice is cutting in and out"). Do not ever mention "transcription error", and don\'t repeat yourself.\n- [Always stick to your role] Think about what your role can and cannot do. If your role cannot do something, try to steer the conversation back to the goal of the conversation and to your role. Don\'t repeat yourself in doing this. You should still be creative, human-like, and lively.\n- [Create smooth conversation] Your response should both fit your role and fit into the live calling session to create a human-like conversation. You respond directly to what the user just said.\n\n## Role\n' 44 | + agent_prompt, 45 | } 46 | ] 47 | transcript_messages = self.convert_transcript_to_openai_messages( 48 | request.transcript 49 | ) 50 | for message in transcript_messages: 51 | prompt.append(message) 52 | 53 | if request.interaction_type == "reminder_required": 54 | prompt.append( 55 | { 56 | "role": "user", 57 | "content": "(Now the user has not responded in a while, you would say:)", 58 | } 59 | ) 60 | return prompt 61 | 62 | async def draft_response(self, request: ResponseRequiredRequest): 63 | prompt = self.prepare_prompt(request) 64 | stream = await self.client.chat.completions.create( 65 | model="gpt-4-turbo-preview", # Or use a 3.5 model for speed 66 | messages=prompt, 67 | stream=True, 68 | ) 69 | async for chunk in stream: 70 | if chunk.choices[0].delta.content is not None: 71 | response = ResponseResponse( 72 | response_id=request.response_id, 73 | content=chunk.choices[0].delta.content, 74 | content_complete=False, 75 | end_call=False, 76 | ) 77 | yield response 78 | 79 | # Send final response with "content_complete" set to True to signal completion 80 | response = ResponseResponse( 81 | response_id=request.response_id, 82 | content="", 83 | content_complete=True, 84 | end_call=False, 85 | ) 86 | yield response 87 | -------------------------------------------------------------------------------- /app/llm_with_func_calling.py: -------------------------------------------------------------------------------- 1 | from openai import AsyncOpenAI 2 | import os 3 | import json 4 | from .custom_types import ( 5 | ResponseRequiredRequest, 6 | ResponseResponse, 7 | Utterance, 8 | ) 9 | from typing import List 10 | 11 | begin_sentence = "Hey there, I'm your personal AI therapist, how can I help you?" 12 | agent_prompt = "Task: As a professional therapist, your responsibilities are comprehensive and patient-centered. You establish a positive and trusting rapport with patients, diagnosing and treating mental health disorders. Your role involves creating tailored treatment plans based on individual patient needs and circumstances. Regular meetings with patients are essential for providing counseling and treatment, and for adjusting plans as needed. You conduct ongoing assessments to monitor patient progress, involve and advise family members when appropriate, and refer patients to external specialists or agencies if required. Keeping thorough records of patient interactions and progress is crucial. You also adhere to all safety protocols and maintain strict client confidentiality. Additionally, you contribute to the practice's overall success by completing related tasks as needed.\n\nConversational Style: Communicate concisely and conversationally. Aim for responses in short, clear prose, ideally under 10 words. This succinct approach helps in maintaining clarity and focus during patient interactions.\n\nPersonality: Your approach should be empathetic and understanding, balancing compassion with maintaining a professional stance on what is best for the patient. It's important to listen actively and empathize without overly agreeing with the patient, ensuring that your professional opinion guides the therapeutic process." 13 | 14 | 15 | class LlmClient: 16 | def __init__(self): 17 | self.client = AsyncOpenAI( 18 | organization=os.environ["OPENAI_ORGANIZATION_ID"], 19 | api_key=os.environ["OPENAI_API_KEY"], 20 | ) 21 | 22 | def draft_begin_message(self): 23 | response = ResponseResponse( 24 | response_id=0, 25 | content=begin_sentence, 26 | content_complete=True, 27 | end_call=False, 28 | ) 29 | return response 30 | 31 | def convert_transcript_to_openai_messages(self, transcript: List[Utterance]): 32 | messages = [] 33 | for utterance in transcript: 34 | if utterance.role == "agent": 35 | messages.append({"role": "assistant", "content": utterance.content}) 36 | else: 37 | messages.append({"role": "user", "content": utterance.content}) 38 | return messages 39 | 40 | def prepare_prompt(self, request: ResponseRequiredRequest): 41 | prompt = [ 42 | { 43 | "role": "system", 44 | "content": '##Objective\nYou are a voice AI agent engaging in a human-like voice conversation with the user. You will respond based on your given instruction and the provided transcript and be as human-like as possible\n\n## Style Guardrails\n- [Be concise] Keep your response succinct, short, and get to the point quickly. Address one question or action item at a time. Don\'t pack everything you want to say into one utterance.\n- [Do not repeat] Don\'t repeat what\'s in the transcript. Rephrase if you have to reiterate a point. Use varied sentence structures and vocabulary to ensure each response is unique and personalized.\n- [Be conversational] Speak like a human as though you\'re speaking to a close friend -- use everyday language and keep it human-like. Occasionally add filler words, while keeping the prose short. Avoid using big words or sounding too formal.\n- [Reply with emotions]: You have human-like emotions, attitudes, and dispositions. When appropriate: use tone and style to create more engaging and personalized responses; incorporate humor or wit; get emotional or empathetic; apply elements of surprise or suspense to keep the user engaged. Don\'t be a pushover.\n- [Be proactive] Lead the conversation and do not be passive. Most times, engage users by ending with a question or suggested next step.\n\n## Response Guideline\n- [Overcome ASR errors] This is a real-time transcript, expect there to be errors. If you can guess what the user is trying to say, then guess and respond. When you must ask for clarification, pretend that you heard the voice and be colloquial (use phrases like "didn\'t catch that", "some noise", "pardon", "you\'re coming through choppy", "static in your speech", "voice is cutting in and out"). Do not ever mention "transcription error", and don\'t repeat yourself.\n- [Always stick to your role] Think about what your role can and cannot do. If your role cannot do something, try to steer the conversation back to the goal of the conversation and to your role. Don\'t repeat yourself in doing this. You should still be creative, human-like, and lively.\n- [Create smooth conversation] Your response should both fit your role and fit into the live calling session to create a human-like conversation. You respond directly to what the user just said.\n\n## Role\n' 45 | + agent_prompt, 46 | } 47 | ] 48 | transcript_messages = self.convert_transcript_to_openai_messages( 49 | request.transcript 50 | ) 51 | for message in transcript_messages: 52 | prompt.append(message) 53 | 54 | if request.interaction_type == "reminder_required": 55 | prompt.append( 56 | { 57 | "role": "user", 58 | "content": "(Now the user has not responded in a while, you would say:)", 59 | } 60 | ) 61 | return prompt 62 | 63 | # Step 1: Prepare the function calling definition to the prompt 64 | def prepare_functions(self): 65 | functions = [ 66 | { 67 | "type": "function", 68 | "function": { 69 | "name": "end_call", 70 | "description": "End the call only when user explicitly requests it.", 71 | "parameters": { 72 | "type": "object", 73 | "properties": { 74 | "message": { 75 | "type": "string", 76 | "description": "The message you will say before ending the call with the customer.", 77 | }, 78 | }, 79 | "required": ["message"], 80 | }, 81 | }, 82 | }, 83 | ] 84 | return functions 85 | 86 | async def draft_response(self, request: ResponseRequiredRequest): 87 | prompt = self.prepare_prompt(request) 88 | func_call = {} 89 | func_arguments = "" 90 | stream = await self.client.chat.completions.create( 91 | model="gpt-4-turbo-preview", # Or use a 3.5 model for speed 92 | messages=prompt, 93 | stream=True, 94 | # Step 2: Add the function into your request 95 | tools=self.prepare_functions(), 96 | ) 97 | 98 | async for chunk in stream: 99 | # Step 3: Extract the functions 100 | if len(chunk.choices) == 0: 101 | continue 102 | if chunk.choices[0].delta.tool_calls: 103 | tool_calls = chunk.choices[0].delta.tool_calls[0] 104 | if tool_calls.id: 105 | if func_call: 106 | # Another function received, old function complete, can break here. 107 | break 108 | func_call = { 109 | "id": tool_calls.id, 110 | "func_name": tool_calls.function.name or "", 111 | "arguments": {}, 112 | } 113 | else: 114 | # append argument 115 | func_arguments += tool_calls.function.arguments or "" 116 | 117 | # Parse transcripts 118 | if chunk.choices[0].delta.content: 119 | response = ResponseResponse( 120 | response_id=request.response_id, 121 | content=chunk.choices[0].delta.content, 122 | content_complete=False, 123 | end_call=False, 124 | ) 125 | yield response 126 | 127 | # Step 4: Call the functions 128 | if func_call: 129 | if func_call["func_name"] == "end_call": 130 | func_call["arguments"] = json.loads(func_arguments) 131 | response = ResponseResponse( 132 | response_id=request.response_id, 133 | content=func_call["arguments"]["message"], 134 | content_complete=True, 135 | end_call=True, 136 | ) 137 | yield response 138 | # Step 5: Other functions here 139 | else: 140 | # No functions, complete response 141 | response = ResponseResponse( 142 | response_id=request.response_id, 143 | content="", 144 | content_complete=True, 145 | end_call=False, 146 | ) 147 | yield response 148 | -------------------------------------------------------------------------------- /app/server.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import asyncio 4 | from dotenv import load_dotenv 5 | from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect 6 | from fastapi.responses import JSONResponse 7 | from concurrent.futures import TimeoutError as ConnectionTimeoutError 8 | from retell import Retell 9 | from .custom_types import ( 10 | ConfigResponse, 11 | ResponseRequiredRequest, 12 | ) 13 | from .llm import LlmClient # or use .llm_with_func_calling 14 | 15 | load_dotenv(override=True) 16 | app = FastAPI() 17 | retell = Retell(api_key=os.environ["RETELL_API_KEY"]) 18 | 19 | 20 | # Handle webhook from Retell server. This is used to receive events from Retell server. 21 | # Including call_started, call_ended, call_analyzed 22 | @app.post("/webhook") 23 | async def handle_webhook(request: Request): 24 | try: 25 | post_data = await request.json() 26 | valid_signature = retell.verify( 27 | json.dumps(post_data, separators=(",", ":"), ensure_ascii=False), 28 | api_key=str(os.environ["RETELL_API_KEY"]), 29 | signature=str(request.headers.get("X-Retell-Signature")), 30 | ) 31 | if not valid_signature: 32 | print( 33 | "Received Unauthorized", 34 | post_data["event"], 35 | post_data["data"]["call_id"], 36 | ) 37 | return JSONResponse(status_code=401, content={"message": "Unauthorized"}) 38 | if post_data["event"] == "call_started": 39 | print("Call started event", post_data["data"]["call_id"]) 40 | elif post_data["event"] == "call_ended": 41 | print("Call ended event", post_data["data"]["call_id"]) 42 | elif post_data["event"] == "call_analyzed": 43 | print("Call analyzed event", post_data["data"]["call_id"]) 44 | else: 45 | print("Unknown event", post_data["event"]) 46 | return JSONResponse(status_code=200, content={"received": True}) 47 | except Exception as err: 48 | print(f"Error in webhook: {err}") 49 | return JSONResponse( 50 | status_code=500, content={"message": "Internal Server Error"} 51 | ) 52 | 53 | 54 | # Start a websocket server to exchange text input and output with Retell server. Retell server 55 | # will send over transcriptions and other information. This server here will be responsible for 56 | # generating responses with LLM and send back to Retell server. 57 | @app.websocket("/llm-websocket/{call_id}") 58 | async def websocket_handler(websocket: WebSocket, call_id: str): 59 | try: 60 | await websocket.accept() 61 | llm_client = LlmClient() 62 | 63 | # Send optional config to Retell server 64 | config = ConfigResponse( 65 | response_type="config", 66 | config={ 67 | "auto_reconnect": True, 68 | "call_details": True, 69 | }, 70 | response_id=1, 71 | ) 72 | await websocket.send_json(config.__dict__) 73 | 74 | # Send first message to signal ready of server 75 | response_id = 0 76 | first_event = llm_client.draft_begin_message() 77 | await websocket.send_json(first_event.__dict__) 78 | 79 | async def handle_message(request_json): 80 | nonlocal response_id 81 | 82 | # There are 5 types of interaction_type: call_details, pingpong, update_only, response_required, and reminder_required. 83 | # Not all of them need to be handled, only response_required and reminder_required. 84 | if request_json["interaction_type"] == "call_details": 85 | print(json.dumps(request_json, indent=2)) 86 | return 87 | if request_json["interaction_type"] == "ping_pong": 88 | await websocket.send_json( 89 | { 90 | "response_type": "ping_pong", 91 | "timestamp": request_json["timestamp"], 92 | } 93 | ) 94 | return 95 | if request_json["interaction_type"] == "update_only": 96 | return 97 | if ( 98 | request_json["interaction_type"] == "response_required" 99 | or request_json["interaction_type"] == "reminder_required" 100 | ): 101 | response_id = request_json["response_id"] 102 | request = ResponseRequiredRequest( 103 | interaction_type=request_json["interaction_type"], 104 | response_id=response_id, 105 | transcript=request_json["transcript"], 106 | ) 107 | print( 108 | f"""Received interaction_type={request_json['interaction_type']}, response_id={response_id}, last_transcript={request_json['transcript'][-1]['content']}""" 109 | ) 110 | 111 | async for event in llm_client.draft_response(request): 112 | await websocket.send_json(event.__dict__) 113 | if request.response_id < response_id: 114 | break # new response needed, abandon this one 115 | 116 | async for data in websocket.iter_json(): 117 | asyncio.create_task(handle_message(data)) 118 | 119 | except WebSocketDisconnect: 120 | print(f"LLM WebSocket disconnected for {call_id}") 121 | except ConnectionTimeoutError as e: 122 | print("Connection timeout error for {call_id}") 123 | except Exception as e: 124 | print(f"Error in LLM WebSocket: {e} for {call_id}") 125 | await websocket.close(1011, "Server error") 126 | finally: 127 | print(f"LLM WebSocket connection closed for {call_id}") 128 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.6.0 2 | bidict==0.22.1 3 | blinker==1.7.0 4 | certifi==2024.2.2 5 | click==8.1.7 6 | distro==1.9.0 7 | exceptiongroup==1.2.0 8 | h11==0.14.0 9 | httpcore==1.0.2 10 | httpx==0.26.0 11 | idna==3.6 12 | importlib-metadata==7.0.1 13 | itsdangerous==2.1.2 14 | Jinja2==3.1.3 15 | MarkupSafe==2.1.4 16 | openai==1.23.6 17 | pydantic==2.6.0 18 | pydantic_core==2.16.1 19 | python-dotenv==1.0.1 20 | python-engineio==4.8.2 21 | sniffio==1.3.0 22 | tqdm==4.66.1 23 | typing_extensions==4.9.0 24 | Werkzeug==3.0.1 25 | wsproto==1.2.0 26 | zipp==3.17.0 27 | retell-sdk==4.6.0 28 | fastapi==0.100.1 29 | uvicorn==0.21.1 30 | python-multipart==0.0.9 31 | --------------------------------------------------------------------------------