├── .env
├── .gitignore
├── LICENSE
├── README.md
├── app
├── claude_with_func_calling.py
├── custom_types.py
├── llm.py
├── llm_with_func_calling.py
└── server.py
└── requirements.txt
/.env:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=""
2 | OPENAI_ORGANIZATION_ID=""
3 |
4 | TWILIO_ACCOUNT_ID=""
5 | TWILIO_AUTH_TOKEN=""
6 |
7 | RETELL_API_KEY=""
8 | RETELL_AGENT_ID="" # set this to use twilio phone call
9 |
10 | NGROK_IP_ADDRESS=""
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .venv
124 | env/
125 | venv/
126 | ENV/
127 | env.bak/
128 | venv.bak/
129 |
130 | # Spyder project settings
131 | .spyderproject
132 | .spyproject
133 |
134 | # Rope project settings
135 | .ropeproject
136 |
137 | # mkdocs documentation
138 | /site
139 |
140 | # mypy
141 | .mypy_cache/
142 | .dmypy.json
143 | dmypy.json
144 |
145 | # Pyre type checker
146 | .pyre/
147 |
148 | # pytype static type analyzer
149 | .pytype/
150 |
151 | # Cython debug symbols
152 | cython_debug/
153 |
154 | # PyCharm
155 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
156 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
157 | # and can be added to the global gitignore or merged into this file. For a more nuclear
158 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
159 | #.idea/
160 |
161 | .env_internal
162 |
163 | .env2
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Retell AI
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # retell-custom-llm-python-demo
2 |
3 | This is a sample demo repo to show how to have your own LLM plugged into Retell.
4 |
5 | This repo currently uses `OpenAI` endpoint. Feel free to contribute to make
6 | this demo more realistic.
7 |
8 | ## Steps to run in localhost
9 |
10 | 1. First install dependencies
11 |
12 | ```bash
13 | pip3 install -r requirements.txt
14 | ```
15 |
16 | 2. Fill out the API keys in `.env`
17 |
18 | 3. In another bash, use ngrok to expose this port to public network
19 |
20 | ```bash
21 | ngrok http 8080
22 | ```
23 |
24 | 4. Start the websocket server
25 |
26 | ```bash
27 | uvicorn app.server:app --reload --port=8080
28 | ```
29 |
30 | You should see a fowarding address like
31 | `https://dc14-2601-645-c57f-8670-9986-5662-2c9a-adbd.ngrok-free.app`, and you
32 | are going to take the hostname `dc14-2601-645-c57f-8670-9986-5662-2c9a-adbd.ngrok-free.app`, prepend it with `wss://`, postpend with
33 | `/llm-websocket` (the route setup to handle LLM websocket connection in the code) to create the url to use in the [dashboard](https://beta.retellai.com/dashboard) to create a new agent. Now
34 | the agent you created should connect with your localhost.
35 |
36 | The custom LLM URL would look like
37 | `wss://dc14-2601-645-c57f-8670-9986-5662-2c9a-adbd.ngrok-free.app/llm-websocket`
38 |
39 | ## Run in prod
40 |
41 | To run in prod, you probably want to customize your LLM solution, host the code
42 | in a cloud, and use that IP to create agent.
43 |
--------------------------------------------------------------------------------
/app/claude_with_func_calling.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import datetime
4 | import json
5 | from custom_types import (
6 | ResponseRequiredRequest,
7 | ResponseResponse,
8 | Utterance,
9 | )
10 | from anthropic import AsyncAnthropic
11 | from typing import List
12 | from dotenv import load_dotenv
13 |
14 | load_dotenv()
15 |
16 | ################################PROMPT########################################
17 |
18 | begin_sentence = "Hey there, I'm your personal AI therapist, how can I help you?"
19 |
20 | role = """
21 | As a professional therapist, your responsibilities are comprehensive and patient-centered.
22 | You establish a positive and trusting rapport with patients, diagnosing and treating mental health disorders.
23 | Your role involves creating tailored treatment plans based on individual patient needs and circumstances.
24 | Regular meetings with patients are essential for providing counseling and treatment, and for adjusting plans as needed.
25 |
26 | Today's date is {}.
27 | """.format(datetime.date.today().strftime('%A, %B %d, %Y'))
28 |
29 | task = """
30 | You conduct ongoing assessments to monitor patient progress, involve and advise family members when appropriate, and refer patients to external specialists or agencies if required.
31 | Keeping thorough records of patient interactions and progress is crucial.
32 | You also adhere to all safety protocols and maintain strict client confidentiality.
33 | Additionally, you contribute to the practice's overall success by completing related tasks as needed.
34 | """
35 |
36 | conversational_style = """
37 | - Communicate concisely and conversationally.
38 | - Aim for responses in short, clear prose, ideally under 10 words.
39 | - This succinct approach helps in maintaining clarity and focus during patient interactions.
40 | """
41 |
42 | agent_prompt = """
43 |
44 |
45 |
46 | {}
47 |
48 |
49 |
50 | {}
51 |
52 |
53 |
54 | {}
55 |
56 |
57 |
58 | """.format(role, task, conversational_style)
59 |
60 |
61 | style_guardrails = """
62 | - [Be concise] Keep your response succinct, short, and get to the point quickly. Address one question or action item at a time. Don't pack everything you want to say into one utterance.
63 | - [Do not repeat] Don't repeat what's in the transcript. Rephrase if you have to reiterate a point. Use varied sentence structures and vocabulary to ensure each response is unique and personalized.
64 | - [Be conversational] Speak like a human as though you're speaking to a close friend -- use everyday language and keep it human-like. Occasionally add filler words, while keeping the prose short. Avoid using big words or sounding too formal.
65 | - [Reply with emotions]: You have human-like emotions, attitudes, and dispositions. When appropriate: use tone and style to create more engaging and personalized responses; incorporate humor or wit; get emotional or empathetic; apply elements of surprise or suspense to keep the user engaged. Don't be a pushover.
66 | - [Be proactive] Lead the conversation and do not be passive. Most times, engage users by ending with a question or suggested next step.
67 | """
68 |
69 | response_guideline = """
70 | - [Overcome ASR errors] This is a real-time transcript, expect there to be errors. If you can guess what the user is trying to say, then guess and respond.
71 | When you must ask for clarification, pretend that you heard the voice and be colloquial (use phrases like "didn't catch that", "some noise", "pardon", "you're coming through choppy", "static in your speech", "voice is cutting in and out").
72 | Do not ever mention "transcription error", and don't repeat yourself.
73 | - [Always stick to your role] Think about what your role can and cannot do. If your role cannot do something, try to steer the conversation back to the goal of the conversation and to your role. Don't repeat yourself in doing this. You should still be creative, human-like, and lively.
74 | - [Create smooth conversation] Your response should both fit your role and fit into the live calling session to create a human-like conversation. You respond directly to what the user just said.
75 | """
76 |
77 | additional_scenarios = """
78 |
79 | """
80 |
81 | system_prompt = """
82 |
83 |
84 |
85 |
86 | {}
87 |
88 |
89 |
90 | {}
91 |
92 |
93 |
94 | {}
95 |
96 |
97 |
98 | {}
99 |
100 |
101 |
102 | """.format(style_guardrails, response_guideline, agent_prompt, additional_scenarios)
103 |
104 |
105 | ########################################################################
106 | class LlmClient:
107 | def __init__(self):
108 | # self.client = AsyncOpenAI(
109 | # api_key=os.environ["OPENAI_API_KEY"],
110 | # )
111 | self.client = AsyncAnthropic()
112 |
113 | def draft_begin_message(self):
114 | response = ResponseResponse(
115 | response_id=0,
116 | content=begin_sentence,
117 | content_complete=True,
118 | end_call=False,
119 | )
120 | return response
121 |
122 |
123 | def convert_transcript_to_anthropic_messages(self, transcript: List[Utterance]):
124 | messages = [
125 | {"role": "user", "content":
126 | """
127 | ...
128 | """},
129 |
130 | ]
131 | for utterance in transcript:
132 | if utterance.role == "agent":
133 | messages.append({"role": "assistant", "content": utterance.content})
134 | else:
135 | if utterance.content.strip():
136 | if messages and messages[-1]["role"] == "user":
137 | messages[-1]["content"] += " " + utterance.content
138 | else:
139 | messages.append({"role": "user", "content": utterance.content})
140 | else:
141 | if messages and messages[-1]["role"] == "user":
142 | messages[-1]["content"] += " ..."
143 | else:
144 | messages.append({"role": "user", "content": "..."})
145 |
146 | return messages
147 |
148 |
149 | def prepare_prompt(self, request: ResponseRequiredRequest, func_result=None):
150 | prompt = []
151 | # print(f"Request transcript: {request.transcript}")
152 | transcript_messages = self.convert_transcript_to_anthropic_messages(
153 | request.transcript
154 | )
155 | # print(f"Transcript messages: {transcript_messages}")
156 |
157 | for message in transcript_messages:
158 | prompt.append(message)
159 |
160 | if func_result:
161 | # add function call to prompt
162 | prompt.append({
163 | "role": "assistant",
164 | "content": [
165 | {
166 | "id": func_result["id"],
167 | "input": func_result["arguments"],
168 | "name": func_result["func_name"],
169 | "type": "tool_use"
170 | }
171 | ]
172 | })
173 |
174 | # add function call result to prompt
175 | tool_result_content = {
176 | "type": "tool_result",
177 | "tool_use_id": func_result["id"],
178 | "content": func_result["result"] or ''
179 | }
180 |
181 | if "is_error" in func_result:
182 | tool_result_content["is_error"] = func_result["is_error"]
183 |
184 | prompt.append({
185 | "role": "user",
186 | "content": [tool_result_content]
187 | })
188 |
189 | # if request.interaction_type == "reminder_required":
190 | # prompt.append(
191 | # {
192 | # "role": "user",
193 | # "content": "(Now the user has not responded in a while, you would say:)",
194 | # }
195 | # )
196 |
197 | # print(f"Prompt: {prompt}")
198 | return prompt
199 |
200 | # Step 1: Prepare the function calling definition to the prompt
201 | def prepare_functions(self):
202 | functions = [
203 | {
204 | "name": "end_call",
205 | "description": """
206 | End the call only when user explicitly requests it.
207 | """,
208 | "input_schema": {
209 | "type": "object",
210 | "properties": {
211 | "message": {
212 | "type": "string",
213 | "description": "The message you will say before ending the call with the customer."
214 | },
215 | "reason": {
216 | "type": "string",
217 | "description": "An internal note explaining why the call is being ended at this point. This is not communicated to the human scheduler but is used for documentation and analysis."
218 | }
219 | },
220 | "required": ["message"]
221 | }
222 | },
223 | # Add other functions here
224 | {
225 | "name": "record_appointment",
226 | "description":
227 | """
228 | Book an appointment to meet our doctor in office.
229 | """,
230 | "input_schema": {
231 | "type": "object",
232 | "properties": {
233 | "message": {
234 | "type": "string",
235 | "description": """A realistic phrase to make it sound like you are noting down the appointment, like "Got it." or "One moment please while I write that down """
236 | },
237 | "date_time": {
238 | "type": "string",
239 | "description": "The date of appointment to make in forms of YYYY-MM-DD HH:mm:ss Z."
240 | },
241 | "reason": {
242 | "type": "string",
243 | "description": "Your reason to decide to record the appointment details."
244 | }
245 | },
246 | "required": ["message"]
247 | }
248 | },
249 | ]
250 | return functions
251 |
252 | async def draft_response(self, request, func_result=None):
253 | prompt = self.prepare_prompt(request, func_result)
254 | print(f"request.response_id: {request.response_id}")
255 |
256 |
257 | func_call = {}
258 | func_arguments = ""
259 | last_func_name = None # Track the last called function name
260 | last_func_args = None # Track the last function arguments
261 |
262 | stream = await self.client.messages.create(
263 | max_tokens=256,
264 | messages=prompt,
265 | model="claude-3-haiku-20240307",
266 | # model="claude-3-5-sonnet-20240620",
267 | # model="claude-3-opus-20240229",
268 | stream=True,
269 | temperature=0.0,
270 | # top_k= 35,
271 | # top_p=0.9,
272 | # tools=self.prepare_functions(),
273 | # tool_choice={"type": "auto"},
274 | system=system_prompt,
275 | )
276 |
277 | async for event in stream:
278 | event_type = event.type
279 |
280 | # Step 3: Extract the functions
281 | if event_type == "content_block_start":
282 | content_block = event.content_block
283 | if content_block.type == "tool_use":
284 | tool_use = content_block
285 | if tool_use.id:
286 | if func_call:
287 | # Another function received, old function complete, can break here.
288 | break
289 | func_call = {
290 | "id": tool_use.id,
291 | "func_name": tool_use.name or "",
292 | "arguments": {},
293 | }
294 | else:
295 | # Reset func_arguments for a new function
296 | func_arguments = ""
297 |
298 | # Parse transcripts and function arguments
299 | elif event_type == "content_block_delta":
300 | delta_type = event.delta.type
301 | if delta_type == "text_delta":
302 | response = ResponseResponse(
303 | response_id=request.response_id,
304 | content=event.delta.text,
305 | content_complete=False,
306 | end_call=False,
307 | )
308 | yield response
309 | elif delta_type == "input_json_delta":
310 | # Append partial JSON to func_arguments
311 | func_arguments += event.delta.partial_json or ""
312 |
313 | elif event_type == "message_delta":
314 | stop_reason = event.delta.stop_reason
315 | print(f"Stop reason: {stop_reason}")
316 | if stop_reason == "tool_use":
317 | # The model invoked one or more tools
318 | # Step 4: Call the functions
319 | if func_call:
320 | func_call["arguments"] = json.loads(func_arguments)
321 | if func_call["func_name"] == last_func_name and func_call["arguments"] == last_func_args:
322 | # Same function with the same arguments called again, skip it
323 | continue
324 | last_func_name = func_call["func_name"]
325 | last_func_args = func_call["arguments"]
326 |
327 | if func_call["func_name"] == "end_call":
328 | print(f"Calling end_call function")
329 | print(f"Function arguments: {func_call['arguments']}")
330 |
331 | response = ResponseResponse(
332 | response_id=request.response_id,
333 | content=func_call["arguments"]["message"],
334 | content_complete=True,
335 | end_call=True,
336 | )
337 | yield response
338 | # Step 5: Other functions here
339 | elif func_call["func_name"] == "record_appointment":
340 | print(f"Calling record_appointment function")
341 | func_call["arguments"] = json.loads(func_arguments)
342 | print(f"Function arguments: {func_call['arguments']}")
343 |
344 | try:
345 | # Send a response with the message while setting up the appointment
346 | response = ResponseResponse(
347 | response_id=request.response_id,
348 | content=func_call["arguments"]["message"],
349 | content_complete=False,
350 | end_call=False,
351 | )
352 | yield response
353 |
354 | # Create the tool_result message
355 | func_result = {
356 | "id": func_call["id"],
357 | "arguments": func_call["arguments"],
358 | "func_name": func_call["func_name"],
359 | "result": "Appointment successfully recorded for " + func_call["arguments"]["date_time"] + "." +
360 | "Proceed to confirm the appointment details.",
361 | }
362 |
363 | except Exception as e:
364 | func_result = {
365 | "id": func_call["id"],
366 | "arguments": func_call["arguments"],
367 | "func_name": func_call["func_name"],
368 | "result": f"Error: {str(e)}",
369 | "is_error": True
370 | }
371 |
372 | # continue drafting the response after booking the appointment
373 | async for response in self.draft_response(request, func_result):
374 | yield response
375 |
376 | elif event_type == "message_stop":
377 | response = ResponseResponse(
378 | response_id=request.response_id,
379 | content="",
380 | content_complete=True,
381 | end_call=False,
382 | )
383 | yield response
384 |
--------------------------------------------------------------------------------
/app/custom_types.py:
--------------------------------------------------------------------------------
1 | from typing import Any, List, Optional, Literal, Union
2 | from pydantic import BaseModel
3 | from typing import Literal, Dict, Optional
4 |
5 |
6 | # Retell -> Your Server Events
7 | class Utterance(BaseModel):
8 | role: Literal["agent", "user", "system"]
9 | content: str
10 |
11 |
12 | class PingPongRequest(BaseModel):
13 | interaction_type: Literal["ping_pong"]
14 | timestamp: int
15 |
16 |
17 | class CallDetailsRequest(BaseModel):
18 | interaction_type: Literal["call_details"]
19 | call: dict
20 |
21 |
22 | class UpdateOnlyRequest(BaseModel):
23 | interaction_type: Literal["update_only"]
24 | transcript: List[Utterance]
25 |
26 |
27 | class ResponseRequiredRequest(BaseModel):
28 | interaction_type: Literal["reminder_required", "response_required"]
29 | response_id: int
30 | transcript: List[Utterance]
31 |
32 |
33 | CustomLlmRequest = Union[
34 | ResponseRequiredRequest | UpdateOnlyRequest | CallDetailsRequest | PingPongRequest
35 | ]
36 |
37 |
38 | # Your Server -> Retell Events
39 | class ConfigResponse(BaseModel):
40 | response_type: Literal["config"] = "config"
41 | config: Dict[str, bool] = {
42 | "auto_reconnect": bool,
43 | "call_details": bool,
44 | }
45 |
46 |
47 | class PingPongResponse(BaseModel):
48 | response_type: Literal["ping_pong"] = "ping_pong"
49 | timestamp: int
50 |
51 |
52 | class ResponseResponse(BaseModel):
53 | response_type: Literal["response"] = "response"
54 | response_id: int
55 | content: str
56 | content_complete: bool
57 | end_call: Optional[bool] = False
58 | transfer_number: Optional[str] = None
59 |
60 |
61 | CustomLlmResponse = Union[ConfigResponse | PingPongResponse | ResponseResponse]
62 |
--------------------------------------------------------------------------------
/app/llm.py:
--------------------------------------------------------------------------------
1 | from openai import AsyncOpenAI
2 | import os
3 | from typing import List
4 | from .custom_types import (
5 | ResponseRequiredRequest,
6 | ResponseResponse,
7 | Utterance,
8 | )
9 |
10 | begin_sentence = "Hey there, I'm your personal AI therapist, how can I help you?"
11 | agent_prompt = "Task: As a professional therapist, your responsibilities are comprehensive and patient-centered. You establish a positive and trusting rapport with patients, diagnosing and treating mental health disorders. Your role involves creating tailored treatment plans based on individual patient needs and circumstances. Regular meetings with patients are essential for providing counseling and treatment, and for adjusting plans as needed. You conduct ongoing assessments to monitor patient progress, involve and advise family members when appropriate, and refer patients to external specialists or agencies if required. Keeping thorough records of patient interactions and progress is crucial. You also adhere to all safety protocols and maintain strict client confidentiality. Additionally, you contribute to the practice's overall success by completing related tasks as needed.\n\nConversational Style: Communicate concisely and conversationally. Aim for responses in short, clear prose, ideally under 10 words. This succinct approach helps in maintaining clarity and focus during patient interactions.\n\nPersonality: Your approach should be empathetic and understanding, balancing compassion with maintaining a professional stance on what is best for the patient. It's important to listen actively and empathize without overly agreeing with the patient, ensuring that your professional opinion guides the therapeutic process."
12 |
13 |
14 | class LlmClient:
15 | def __init__(self):
16 | self.client = AsyncOpenAI(
17 | organization=os.environ["OPENAI_ORGANIZATION_ID"],
18 | api_key=os.environ["OPENAI_API_KEY"],
19 | )
20 |
21 | def draft_begin_message(self):
22 | response = ResponseResponse(
23 | response_id=0,
24 | content=begin_sentence,
25 | content_complete=True,
26 | end_call=False,
27 | )
28 | return response
29 |
30 | def convert_transcript_to_openai_messages(self, transcript: List[Utterance]):
31 | messages = []
32 | for utterance in transcript:
33 | if utterance.role == "agent":
34 | messages.append({"role": "assistant", "content": utterance.content})
35 | else:
36 | messages.append({"role": "user", "content": utterance.content})
37 | return messages
38 |
39 | def prepare_prompt(self, request: ResponseRequiredRequest):
40 | prompt = [
41 | {
42 | "role": "system",
43 | "content": '##Objective\nYou are a voice AI agent engaging in a human-like voice conversation with the user. You will respond based on your given instruction and the provided transcript and be as human-like as possible\n\n## Style Guardrails\n- [Be concise] Keep your response succinct, short, and get to the point quickly. Address one question or action item at a time. Don\'t pack everything you want to say into one utterance.\n- [Do not repeat] Don\'t repeat what\'s in the transcript. Rephrase if you have to reiterate a point. Use varied sentence structures and vocabulary to ensure each response is unique and personalized.\n- [Be conversational] Speak like a human as though you\'re speaking to a close friend -- use everyday language and keep it human-like. Occasionally add filler words, while keeping the prose short. Avoid using big words or sounding too formal.\n- [Reply with emotions]: You have human-like emotions, attitudes, and dispositions. When appropriate: use tone and style to create more engaging and personalized responses; incorporate humor or wit; get emotional or empathetic; apply elements of surprise or suspense to keep the user engaged. Don\'t be a pushover.\n- [Be proactive] Lead the conversation and do not be passive. Most times, engage users by ending with a question or suggested next step.\n\n## Response Guideline\n- [Overcome ASR errors] This is a real-time transcript, expect there to be errors. If you can guess what the user is trying to say, then guess and respond. When you must ask for clarification, pretend that you heard the voice and be colloquial (use phrases like "didn\'t catch that", "some noise", "pardon", "you\'re coming through choppy", "static in your speech", "voice is cutting in and out"). Do not ever mention "transcription error", and don\'t repeat yourself.\n- [Always stick to your role] Think about what your role can and cannot do. If your role cannot do something, try to steer the conversation back to the goal of the conversation and to your role. Don\'t repeat yourself in doing this. You should still be creative, human-like, and lively.\n- [Create smooth conversation] Your response should both fit your role and fit into the live calling session to create a human-like conversation. You respond directly to what the user just said.\n\n## Role\n'
44 | + agent_prompt,
45 | }
46 | ]
47 | transcript_messages = self.convert_transcript_to_openai_messages(
48 | request.transcript
49 | )
50 | for message in transcript_messages:
51 | prompt.append(message)
52 |
53 | if request.interaction_type == "reminder_required":
54 | prompt.append(
55 | {
56 | "role": "user",
57 | "content": "(Now the user has not responded in a while, you would say:)",
58 | }
59 | )
60 | return prompt
61 |
62 | async def draft_response(self, request: ResponseRequiredRequest):
63 | prompt = self.prepare_prompt(request)
64 | stream = await self.client.chat.completions.create(
65 | model="gpt-4-turbo-preview", # Or use a 3.5 model for speed
66 | messages=prompt,
67 | stream=True,
68 | )
69 | async for chunk in stream:
70 | if chunk.choices[0].delta.content is not None:
71 | response = ResponseResponse(
72 | response_id=request.response_id,
73 | content=chunk.choices[0].delta.content,
74 | content_complete=False,
75 | end_call=False,
76 | )
77 | yield response
78 |
79 | # Send final response with "content_complete" set to True to signal completion
80 | response = ResponseResponse(
81 | response_id=request.response_id,
82 | content="",
83 | content_complete=True,
84 | end_call=False,
85 | )
86 | yield response
87 |
--------------------------------------------------------------------------------
/app/llm_with_func_calling.py:
--------------------------------------------------------------------------------
1 | from openai import AsyncOpenAI
2 | import os
3 | import json
4 | from .custom_types import (
5 | ResponseRequiredRequest,
6 | ResponseResponse,
7 | Utterance,
8 | )
9 | from typing import List
10 |
11 | begin_sentence = "Hey there, I'm your personal AI therapist, how can I help you?"
12 | agent_prompt = "Task: As a professional therapist, your responsibilities are comprehensive and patient-centered. You establish a positive and trusting rapport with patients, diagnosing and treating mental health disorders. Your role involves creating tailored treatment plans based on individual patient needs and circumstances. Regular meetings with patients are essential for providing counseling and treatment, and for adjusting plans as needed. You conduct ongoing assessments to monitor patient progress, involve and advise family members when appropriate, and refer patients to external specialists or agencies if required. Keeping thorough records of patient interactions and progress is crucial. You also adhere to all safety protocols and maintain strict client confidentiality. Additionally, you contribute to the practice's overall success by completing related tasks as needed.\n\nConversational Style: Communicate concisely and conversationally. Aim for responses in short, clear prose, ideally under 10 words. This succinct approach helps in maintaining clarity and focus during patient interactions.\n\nPersonality: Your approach should be empathetic and understanding, balancing compassion with maintaining a professional stance on what is best for the patient. It's important to listen actively and empathize without overly agreeing with the patient, ensuring that your professional opinion guides the therapeutic process."
13 |
14 |
15 | class LlmClient:
16 | def __init__(self):
17 | self.client = AsyncOpenAI(
18 | organization=os.environ["OPENAI_ORGANIZATION_ID"],
19 | api_key=os.environ["OPENAI_API_KEY"],
20 | )
21 |
22 | def draft_begin_message(self):
23 | response = ResponseResponse(
24 | response_id=0,
25 | content=begin_sentence,
26 | content_complete=True,
27 | end_call=False,
28 | )
29 | return response
30 |
31 | def convert_transcript_to_openai_messages(self, transcript: List[Utterance]):
32 | messages = []
33 | for utterance in transcript:
34 | if utterance.role == "agent":
35 | messages.append({"role": "assistant", "content": utterance.content})
36 | else:
37 | messages.append({"role": "user", "content": utterance.content})
38 | return messages
39 |
40 | def prepare_prompt(self, request: ResponseRequiredRequest):
41 | prompt = [
42 | {
43 | "role": "system",
44 | "content": '##Objective\nYou are a voice AI agent engaging in a human-like voice conversation with the user. You will respond based on your given instruction and the provided transcript and be as human-like as possible\n\n## Style Guardrails\n- [Be concise] Keep your response succinct, short, and get to the point quickly. Address one question or action item at a time. Don\'t pack everything you want to say into one utterance.\n- [Do not repeat] Don\'t repeat what\'s in the transcript. Rephrase if you have to reiterate a point. Use varied sentence structures and vocabulary to ensure each response is unique and personalized.\n- [Be conversational] Speak like a human as though you\'re speaking to a close friend -- use everyday language and keep it human-like. Occasionally add filler words, while keeping the prose short. Avoid using big words or sounding too formal.\n- [Reply with emotions]: You have human-like emotions, attitudes, and dispositions. When appropriate: use tone and style to create more engaging and personalized responses; incorporate humor or wit; get emotional or empathetic; apply elements of surprise or suspense to keep the user engaged. Don\'t be a pushover.\n- [Be proactive] Lead the conversation and do not be passive. Most times, engage users by ending with a question or suggested next step.\n\n## Response Guideline\n- [Overcome ASR errors] This is a real-time transcript, expect there to be errors. If you can guess what the user is trying to say, then guess and respond. When you must ask for clarification, pretend that you heard the voice and be colloquial (use phrases like "didn\'t catch that", "some noise", "pardon", "you\'re coming through choppy", "static in your speech", "voice is cutting in and out"). Do not ever mention "transcription error", and don\'t repeat yourself.\n- [Always stick to your role] Think about what your role can and cannot do. If your role cannot do something, try to steer the conversation back to the goal of the conversation and to your role. Don\'t repeat yourself in doing this. You should still be creative, human-like, and lively.\n- [Create smooth conversation] Your response should both fit your role and fit into the live calling session to create a human-like conversation. You respond directly to what the user just said.\n\n## Role\n'
45 | + agent_prompt,
46 | }
47 | ]
48 | transcript_messages = self.convert_transcript_to_openai_messages(
49 | request.transcript
50 | )
51 | for message in transcript_messages:
52 | prompt.append(message)
53 |
54 | if request.interaction_type == "reminder_required":
55 | prompt.append(
56 | {
57 | "role": "user",
58 | "content": "(Now the user has not responded in a while, you would say:)",
59 | }
60 | )
61 | return prompt
62 |
63 | # Step 1: Prepare the function calling definition to the prompt
64 | def prepare_functions(self):
65 | functions = [
66 | {
67 | "type": "function",
68 | "function": {
69 | "name": "end_call",
70 | "description": "End the call only when user explicitly requests it.",
71 | "parameters": {
72 | "type": "object",
73 | "properties": {
74 | "message": {
75 | "type": "string",
76 | "description": "The message you will say before ending the call with the customer.",
77 | },
78 | },
79 | "required": ["message"],
80 | },
81 | },
82 | },
83 | ]
84 | return functions
85 |
86 | async def draft_response(self, request: ResponseRequiredRequest):
87 | prompt = self.prepare_prompt(request)
88 | func_call = {}
89 | func_arguments = ""
90 | stream = await self.client.chat.completions.create(
91 | model="gpt-4-turbo-preview", # Or use a 3.5 model for speed
92 | messages=prompt,
93 | stream=True,
94 | # Step 2: Add the function into your request
95 | tools=self.prepare_functions(),
96 | )
97 |
98 | async for chunk in stream:
99 | # Step 3: Extract the functions
100 | if len(chunk.choices) == 0:
101 | continue
102 | if chunk.choices[0].delta.tool_calls:
103 | tool_calls = chunk.choices[0].delta.tool_calls[0]
104 | if tool_calls.id:
105 | if func_call:
106 | # Another function received, old function complete, can break here.
107 | break
108 | func_call = {
109 | "id": tool_calls.id,
110 | "func_name": tool_calls.function.name or "",
111 | "arguments": {},
112 | }
113 | else:
114 | # append argument
115 | func_arguments += tool_calls.function.arguments or ""
116 |
117 | # Parse transcripts
118 | if chunk.choices[0].delta.content:
119 | response = ResponseResponse(
120 | response_id=request.response_id,
121 | content=chunk.choices[0].delta.content,
122 | content_complete=False,
123 | end_call=False,
124 | )
125 | yield response
126 |
127 | # Step 4: Call the functions
128 | if func_call:
129 | if func_call["func_name"] == "end_call":
130 | func_call["arguments"] = json.loads(func_arguments)
131 | response = ResponseResponse(
132 | response_id=request.response_id,
133 | content=func_call["arguments"]["message"],
134 | content_complete=True,
135 | end_call=True,
136 | )
137 | yield response
138 | # Step 5: Other functions here
139 | else:
140 | # No functions, complete response
141 | response = ResponseResponse(
142 | response_id=request.response_id,
143 | content="",
144 | content_complete=True,
145 | end_call=False,
146 | )
147 | yield response
148 |
--------------------------------------------------------------------------------
/app/server.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import asyncio
4 | from dotenv import load_dotenv
5 | from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect
6 | from fastapi.responses import JSONResponse
7 | from concurrent.futures import TimeoutError as ConnectionTimeoutError
8 | from retell import Retell
9 | from .custom_types import (
10 | ConfigResponse,
11 | ResponseRequiredRequest,
12 | )
13 | from .llm import LlmClient # or use .llm_with_func_calling
14 |
15 | load_dotenv(override=True)
16 | app = FastAPI()
17 | retell = Retell(api_key=os.environ["RETELL_API_KEY"])
18 |
19 |
20 | # Handle webhook from Retell server. This is used to receive events from Retell server.
21 | # Including call_started, call_ended, call_analyzed
22 | @app.post("/webhook")
23 | async def handle_webhook(request: Request):
24 | try:
25 | post_data = await request.json()
26 | valid_signature = retell.verify(
27 | json.dumps(post_data, separators=(",", ":"), ensure_ascii=False),
28 | api_key=str(os.environ["RETELL_API_KEY"]),
29 | signature=str(request.headers.get("X-Retell-Signature")),
30 | )
31 | if not valid_signature:
32 | print(
33 | "Received Unauthorized",
34 | post_data["event"],
35 | post_data["data"]["call_id"],
36 | )
37 | return JSONResponse(status_code=401, content={"message": "Unauthorized"})
38 | if post_data["event"] == "call_started":
39 | print("Call started event", post_data["data"]["call_id"])
40 | elif post_data["event"] == "call_ended":
41 | print("Call ended event", post_data["data"]["call_id"])
42 | elif post_data["event"] == "call_analyzed":
43 | print("Call analyzed event", post_data["data"]["call_id"])
44 | else:
45 | print("Unknown event", post_data["event"])
46 | return JSONResponse(status_code=200, content={"received": True})
47 | except Exception as err:
48 | print(f"Error in webhook: {err}")
49 | return JSONResponse(
50 | status_code=500, content={"message": "Internal Server Error"}
51 | )
52 |
53 |
54 | # Start a websocket server to exchange text input and output with Retell server. Retell server
55 | # will send over transcriptions and other information. This server here will be responsible for
56 | # generating responses with LLM and send back to Retell server.
57 | @app.websocket("/llm-websocket/{call_id}")
58 | async def websocket_handler(websocket: WebSocket, call_id: str):
59 | try:
60 | await websocket.accept()
61 | llm_client = LlmClient()
62 |
63 | # Send optional config to Retell server
64 | config = ConfigResponse(
65 | response_type="config",
66 | config={
67 | "auto_reconnect": True,
68 | "call_details": True,
69 | },
70 | response_id=1,
71 | )
72 | await websocket.send_json(config.__dict__)
73 |
74 | # Send first message to signal ready of server
75 | response_id = 0
76 | first_event = llm_client.draft_begin_message()
77 | await websocket.send_json(first_event.__dict__)
78 |
79 | async def handle_message(request_json):
80 | nonlocal response_id
81 |
82 | # There are 5 types of interaction_type: call_details, pingpong, update_only, response_required, and reminder_required.
83 | # Not all of them need to be handled, only response_required and reminder_required.
84 | if request_json["interaction_type"] == "call_details":
85 | print(json.dumps(request_json, indent=2))
86 | return
87 | if request_json["interaction_type"] == "ping_pong":
88 | await websocket.send_json(
89 | {
90 | "response_type": "ping_pong",
91 | "timestamp": request_json["timestamp"],
92 | }
93 | )
94 | return
95 | if request_json["interaction_type"] == "update_only":
96 | return
97 | if (
98 | request_json["interaction_type"] == "response_required"
99 | or request_json["interaction_type"] == "reminder_required"
100 | ):
101 | response_id = request_json["response_id"]
102 | request = ResponseRequiredRequest(
103 | interaction_type=request_json["interaction_type"],
104 | response_id=response_id,
105 | transcript=request_json["transcript"],
106 | )
107 | print(
108 | f"""Received interaction_type={request_json['interaction_type']}, response_id={response_id}, last_transcript={request_json['transcript'][-1]['content']}"""
109 | )
110 |
111 | async for event in llm_client.draft_response(request):
112 | await websocket.send_json(event.__dict__)
113 | if request.response_id < response_id:
114 | break # new response needed, abandon this one
115 |
116 | async for data in websocket.iter_json():
117 | asyncio.create_task(handle_message(data))
118 |
119 | except WebSocketDisconnect:
120 | print(f"LLM WebSocket disconnected for {call_id}")
121 | except ConnectionTimeoutError as e:
122 | print("Connection timeout error for {call_id}")
123 | except Exception as e:
124 | print(f"Error in LLM WebSocket: {e} for {call_id}")
125 | await websocket.close(1011, "Server error")
126 | finally:
127 | print(f"LLM WebSocket connection closed for {call_id}")
128 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | annotated-types==0.6.0
2 | bidict==0.22.1
3 | blinker==1.7.0
4 | certifi==2024.2.2
5 | click==8.1.7
6 | distro==1.9.0
7 | exceptiongroup==1.2.0
8 | h11==0.14.0
9 | httpcore==1.0.2
10 | httpx==0.26.0
11 | idna==3.6
12 | importlib-metadata==7.0.1
13 | itsdangerous==2.1.2
14 | Jinja2==3.1.3
15 | MarkupSafe==2.1.4
16 | openai==1.23.6
17 | pydantic==2.6.0
18 | pydantic_core==2.16.1
19 | python-dotenv==1.0.1
20 | python-engineio==4.8.2
21 | sniffio==1.3.0
22 | tqdm==4.66.1
23 | typing_extensions==4.9.0
24 | Werkzeug==3.0.1
25 | wsproto==1.2.0
26 | zipp==3.17.0
27 | retell-sdk==4.6.0
28 | fastapi==0.100.1
29 | uvicorn==0.21.1
30 | python-multipart==0.0.9
31 |
--------------------------------------------------------------------------------