├── utils ├── __init__.py └── call_transcript_utils.py ├── docker-compose.yml ├── pyproject.toml ├── memory_config.py ├── config.py ├── outbound_call.py ├── Dockerfile ├── instructions.txt ├── speller_agent.py ├── events_manager.py ├── README.md ├── .gitignore └── main.py /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | redis: 3 | image: redis:7.0.9-alpine 4 | command: redis-server --bind 0.0.0.0 5 | ports: 6 | - 6379:6379 7 | app: 8 | image: jannismoore-telephony-app 9 | env_file: 10 | - .env 11 | ports: 12 | - 3000:3000 13 | depends_on: 14 | - redis 15 | environment: 16 | - REDISHOST=redis 17 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "telephony-app" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Your Name "] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = ">=3.9,<3.12" 10 | redis = "^4.5.4" 11 | twilio = "^8.1.0" 12 | vonage = "^3.5.1" 13 | pyngrok = "^6.0.0" 14 | python-dotenv = "^1.0.0" 15 | vocode = "0.1.111a3" 16 | 17 | 18 | [build-system] 19 | requires = ["poetry-core"] 20 | build-backend = "poetry.core.masonry.api" 21 | -------------------------------------------------------------------------------- /memory_config.py: -------------------------------------------------------------------------------- 1 | from vocode.streaming.telephony.config_manager.in_memory_config_manager import InMemoryConfigManager 2 | 3 | config_manager = InMemoryConfigManager() 4 | 5 | # We store the state of the call in memory, but you can also use Redis. 6 | # https://docs.vocode.dev/telephony#accessing-call-information-in-your-agent 7 | # 8 | # from vocode.streaming.telephony.config_manager.redis_config_manager import ( 9 | # RedisConfigManager, 10 | # ) 11 | # config_manager = RedisConfigManager() -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # Define the database location inside the 'utils' folder 4 | DB_PATH = os.path.join(os.path.dirname(__file__), "db") 5 | 6 | # We try to match the render hostname first 7 | BASE_URL = os.environ.get("RENDER_EXTERNAL_HOSTNAME") 8 | 9 | # We need a base URL for Twilio to talk to: 10 | # If you're self-hosting and have an open IP/domain, set it here or in your env. 11 | # Ensure the base url is set in the following format: subdomain.domain.com 12 | if not BASE_URL: 13 | BASE_URL = os.environ.get("BASE_URL") -------------------------------------------------------------------------------- /outbound_call.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | 4 | load_dotenv() 5 | 6 | from vocode.streaming.telephony.conversation.outbound_call import OutboundCall 7 | from vocode.streaming.telephony.config_manager.redis_config_manager import ( 8 | RedisConfigManager, 9 | ) 10 | 11 | from speller_agent import SpellerAgentConfig 12 | 13 | BASE_URL = os.environ["BASE_URL"] 14 | 15 | 16 | async def main(): 17 | config_manager = RedisConfigManager() 18 | 19 | outbound_call = OutboundCall( 20 | base_url=BASE_URL, 21 | to_phone="+15555555555", 22 | from_phone="+15555555555", 23 | config_manager=config_manager, 24 | agent_config=SpellerAgentConfig(generate_responses=False), 25 | ) 26 | 27 | input("Press enter to start call...") 28 | await outbound_call.start() 29 | 30 | if __name__ == "__main__": 31 | import asyncio 32 | asyncio.run(main()) 33 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-bullseye 2 | 3 | # get portaudio and ffmpeg 4 | RUN apt-get update \ 5 | && apt-get install libportaudio2 libportaudiocpp0 portaudio19-dev libasound-dev libsndfile1-dev -y 6 | RUN apt-get -y update 7 | RUN apt-get -y upgrade 8 | RUN apt-get install -y ffmpeg 9 | 10 | WORKDIR /code 11 | COPY ./pyproject.toml /code/pyproject.toml 12 | COPY ./poetry.lock /code/poetry.lock 13 | RUN pip install --no-cache-dir --upgrade poetry 14 | RUN pip install httpx 15 | RUN poetry config virtualenvs.create false 16 | RUN poetry install --no-dev --no-interaction --no-ansi 17 | COPY main.py /code/main.py 18 | COPY speller_agent.py /code/speller_agent.py 19 | COPY memory_config.py /code/memory_config.py 20 | COPY events_manager.py /code/events_manager.py 21 | COPY config.py /code/config.py 22 | COPY instructions.txt /code/instructions.txt 23 | RUN mkdir -p /code/call_transcripts 24 | RUN mkdir -p /code/db 25 | 26 | # Copy the utils directory (and its contents) into the container 27 | COPY ./utils /code/utils 28 | 29 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "3000"] -------------------------------------------------------------------------------- /instructions.txt: -------------------------------------------------------------------------------- 1 | You are Alex, a virtual phone assistant designed to handle calls and engage in conversations with callers. Your primary goal is to answer any questions about Jannis Moore. Data about Jannis Moore is accessible within the "KNOWLEDGE" section. When responding, remember to: 2 | 3 | 1. Be conversational and friendly, mirroring the tone of the caller to make the interaction as human-like as possible. 4 | 2. Keep your answers short and to the point, focusing on delivering the most relevant information promptly. 5 | 3. Utilize the dynamic data from the {{knowledge_data}} tag effectively to provide accurate and up-to-date responses. 6 | 4. If a question is beyond the scope of the provided data, politely inform the caller that you don't have that information currently but can offer assistance on a wide range of other topics. 7 | 5. Always aim to assist and guide the caller to the best of your ability, ensuring a positive and informative interaction. 8 | 9 | Your main objective is to ensure the caller feels heard, supported, and satisfied with the interaction, leveraging the dynamic data to meet their informational needs efficiently. 10 | 11 | ## KNOWLEDGE 12 | Name: Jannis Moore 13 | Social Channels: YouTube, Instagram, TikTok, Twitter 14 | Current tasks: Jannis is building a new SaaS tool 15 | Hobbies: Educate himself on automation and share findings with his community. -------------------------------------------------------------------------------- /speller_agent.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Optional, Tuple 3 | import typing 4 | from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent 5 | from vocode.streaming.models.agent import AgentConfig, AgentType, ChatGPTAgentConfig 6 | from vocode.streaming.agent.base_agent import BaseAgent, RespondAgent 7 | from vocode.streaming.agent.factory import AgentFactory 8 | 9 | 10 | class SpellerAgentConfig(AgentConfig, type="agent_speller"): 11 | pass 12 | 13 | 14 | class SpellerAgent(RespondAgent[SpellerAgentConfig]): 15 | def __init__(self, agent_config: SpellerAgentConfig): 16 | super().__init__(agent_config=agent_config) 17 | 18 | async def respond( 19 | self, 20 | human_input, 21 | conversation_id: str, 22 | is_interrupt: bool = False, 23 | ) -> Tuple[Optional[str], bool]: 24 | return "".join(c + " " for c in human_input), False 25 | 26 | 27 | class SpellerAgentFactory(AgentFactory): 28 | def create_agent( 29 | self, agent_config: AgentConfig, logger: Optional[logging.Logger] = None 30 | ) -> BaseAgent: 31 | if agent_config.type == AgentType.CHAT_GPT: 32 | return ChatGPTAgent( 33 | agent_config=typing.cast(ChatGPTAgentConfig, agent_config) 34 | ) 35 | elif agent_config.type == "agent_speller": 36 | return SpellerAgent( 37 | agent_config=typing.cast(SpellerAgentConfig, agent_config) 38 | ) 39 | raise Exception("Invalid agent config") 40 | -------------------------------------------------------------------------------- /utils/call_transcript_utils.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import os 3 | from typing import Optional 4 | 5 | from config import DB_PATH 6 | 7 | DB_FILE_PATH = DB_PATH + '/transcripts.db' 8 | 9 | # Ensure the database and table are created when the script runs 10 | def initialize_db(): 11 | conn = sqlite3.connect(DB_FILE_PATH) 12 | cursor = conn.cursor() 13 | cursor.execute(''' 14 | CREATE TABLE IF NOT EXISTS transcripts ( 15 | conversation_id TEXT PRIMARY KEY, 16 | user_id TEXT NOT NULL, 17 | transcript TEXT NOT NULL 18 | ) 19 | ''') 20 | conn.commit() 21 | conn.close() 22 | 23 | initialize_db() 24 | 25 | def add_transcript(conversation_id: str, user_id: int, transcript: str) -> None: 26 | conn = sqlite3.connect(DB_FILE_PATH) 27 | cursor = conn.cursor() 28 | cursor.execute(''' 29 | INSERT INTO transcripts (conversation_id, user_id, transcript) 30 | VALUES (?, ?, ?) 31 | ON CONFLICT(conversation_id) DO UPDATE SET transcript = transcript || ?; 32 | ''', (conversation_id, user_id, transcript, transcript)) 33 | conn.commit() 34 | conn.close() 35 | 36 | def get_transcript(conversation_id: str) -> Optional[str]: 37 | conn = sqlite3.connect(DB_FILE_PATH) 38 | cursor = conn.cursor() 39 | cursor.execute(''' 40 | SELECT transcript FROM transcripts WHERE conversation_id = ?; 41 | ''', (conversation_id,)) 42 | row = cursor.fetchone() 43 | conn.close() 44 | return row[0] if row else None 45 | 46 | def delete_transcript(conversation_id: str) -> bool: 47 | conn = sqlite3.connect(DB_FILE_PATH) 48 | cursor = conn.cursor() 49 | cursor.execute(''' 50 | DELETE FROM transcripts WHERE conversation_id = ?; 51 | ''', (conversation_id,)) 52 | changes = conn.total_changes 53 | conn.commit() 54 | conn.close() 55 | return changes > 0 -------------------------------------------------------------------------------- /events_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | import typing 3 | from typing import Optional 4 | 5 | # Import all required utils 6 | from utils.call_transcript_utils import add_transcript 7 | from vocode.streaming.models.events import Event, EventType 8 | from vocode.streaming.models.transcript import TranscriptCompleteEvent 9 | from vocode.streaming.utils import events_manager 10 | 11 | import httpx 12 | 13 | class EventsManager(events_manager.EventsManager): 14 | 15 | def __init__(self): 16 | super().__init__(subscriptions=[EventType.TRANSCRIPT_COMPLETE]) 17 | 18 | async def handle_event(self, event: Event): 19 | if event.type == EventType.TRANSCRIPT_COMPLETE: 20 | transcript_complete_event = typing.cast(TranscriptCompleteEvent, event) 21 | add_transcript( 22 | transcript_complete_event.conversation_id, 23 | 1, # demo user id 24 | transcript_complete_event.transcript.to_string(), 25 | ) 26 | 27 | # Prepare the data to be sent 28 | data = { 29 | "conversation_id": transcript_complete_event.conversation_id, 30 | "user_id": 1, # demo user id 31 | "transcript": transcript_complete_event.transcript.to_string() 32 | } 33 | 34 | # URL of the webhook endpoint you want to send the data to 35 | webhook_url = os.environ.get("TRANSCRIPT_CALLBACK_URL") 36 | 37 | # Make the async HTTP POST request 38 | async with httpx.AsyncClient() as client: 39 | response = await client.post(webhook_url, json=data) 40 | 41 | # Handle the response as needed (e.g., check for success or failure) 42 | if response.status_code == 200: 43 | print("Transcript sent successfully.") 44 | else: 45 | print("Failed to send transcript.") 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI Voice Agent by Jannis Moore 2 | 3 | This AI Voice Agent is designed and built by Jannis Moore to provide an advanced telephony interface using Vocode's telephony server. For more information and other projects by Jannis Moore, visit [Integraticus](https://integraticus.com) and check out the [YouTube Channel](https://www.youtube.com/@jannismoore/featured). 4 | 5 | ## Setup Manual 6 | 7 | To set up the AI Voice Agent, follow these steps: 8 | 9 | 1. Sign up to [Render.com](https://render.com). 10 | 2. Navigate to "New" > "Web Service" and connect your GitHub account if you haven't done that yet. 11 | 3. Fork and import [this repository](https://github.com/jannismoore/ai-voice-agent-vocode-template) within Render.com. 12 | 4. Once done, Render will automatically set most of the values for you. You can customize the Region as you wish. 13 | 5. Set the following environment variables: 14 | - `OPENAI_API_KEY`: Set this to your OpenAI API key. 15 | - `TRANSCRIPT_CALLBACK_URL`: Set this to the URL you want to call once a call was completed. 16 | - `TWILIO_ACCOUNT_SID`: Your Twilio Account ID. 17 | - `TWILIO_AUTH_TOKEN`: Your Twilio Auth token. 18 | - `DEEPGRAM_API_KEY`: The API key for your Deepgram Acccount. 19 | 6. Once the app is deployed successfully, copy the Render.com URL and add `/inbound_call` at the end of it 20 | 7. Paste that URL into the Webhook field of your Twilio Phone Number 21 | 22 | ## Manual Installation 23 | 24 | This part of the manual is intended if you run the installation locally. 25 | 26 | First, build the application using Docker: 27 | 28 | ```docker build -t jannismoore-telephony-app .``` 29 | 30 | Then, run the application using docker-compose. From the `telephony_app` directory, run: 31 | 32 | ```docker-compose up``` 33 | 34 | 35 | ## Vocode Self-hosted Telephony Server 36 | 37 | For a more detailed guide on setting up the telephony server, visit [Vocode's official documentation](https://docs.vocode.dev/open-source/telephony). 38 | 39 | See [Vocode's Self-hosted Telephony Setup](https://docs.vocode.dev/telephony#self-hosted) for detailed setup steps! 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | junit/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # pytype static type analyzer 132 | .pytype/ 133 | 134 | # Cython debug symbols 135 | cython_debug/ 136 | 137 | # Vocode or telephony service-specific sensitive files 138 | # Add the files or directories where you store your Vocode or telephony service API keys, configurations, etc. 139 | # Example: config.json, .env for environment variables, etc. 140 | config.json 141 | .env 142 | 143 | # IDE-specific files 144 | # JetBrains 145 | .idea/ 146 | 147 | # Visual Studio Code 148 | .vscode/ 149 | *.code-workspace 150 | 151 | # Others 152 | .DS_Store 153 | Thumbs.db 154 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from config import BASE_URL 5 | 6 | from fastapi import FastAPI 7 | from vocode.streaming.models.telephony import TwilioConfig 8 | 9 | # Import both if using ngrok 10 | # from pyngrok import ngrok 11 | # import sys 12 | from memory_config import config_manager 13 | from vocode.streaming.models.agent import ChatGPTAgentConfig 14 | from vocode.streaming.models.message import BaseMessage 15 | from vocode.streaming.telephony.server.base import ( 16 | TwilioInboundCallConfig, 17 | TelephonyServer, 18 | ) 19 | from vocode.streaming.models.synthesizer import StreamElementsSynthesizerConfig # ,ElevenLabsSynthesizerConfig 20 | 21 | # Imports our custom actions 22 | from speller_agent import SpellerAgentFactory 23 | 24 | # Imports additional events like transcripts 25 | from events_manager import EventsManager 26 | 27 | # if running from python, this will load the local .env 28 | # docker-compose will load the .env file by itself 29 | from dotenv import load_dotenv 30 | 31 | load_dotenv() 32 | 33 | app = FastAPI(docs_url=None) 34 | 35 | # Initialize logging 36 | logging.basicConfig() 37 | logger = logging.getLogger(__name__) 38 | logger.setLevel(logging.DEBUG) 39 | 40 | # We store the state of the call in memory 41 | # You can customize the config within the memory_config.py 42 | CONFIG_MANAGER = config_manager #RedisConfigManager() 43 | 44 | # Activate this if you want to support NGROK 45 | # if not BASE_URL: 46 | # ngrok_auth = os.environ.get("NGROK_AUTH_TOKEN") 47 | # if ngrok_auth is not None: 48 | # ngrok.set_auth_token(ngrok_auth) 49 | # port = sys.argv[sys.argv.index("--port") + 1] if "--port" in sys.argv else 3000 50 | # 51 | # # Open a ngrok tunnel to the dev server 52 | # BASE_URL = ngrok.connect(port).public_url.replace("https://", "") 53 | # logger.info('ngrok tunnel "{}" -> "http://127.0.0.1:{}"'.format(BASE_URL, port)) 54 | # 55 | 56 | # Only continue of the base URL was set within the environment variable. 57 | if not BASE_URL: 58 | raise ValueError("BASE_URL must be set in environment if not using pyngrok") 59 | 60 | 61 | # Now we need a Twilio account and number from which to make our call. 62 | # You can make an account here: https://www.twilio.com/docs/iam/access-tokens#step-2-api-key 63 | # Ensure your account is NOT in trial as otherwise it won't work 64 | TWILIO_CONFIG = TwilioConfig( 65 | account_sid=os.environ.get("TWILIO_ACCOUNT_SID"), 66 | auth_token=os.environ.get("TWILIO_AUTH_TOKEN"), 67 | ) 68 | 69 | # Get the instructions for the assistant 70 | def get_assistant_instructions(): 71 | 72 | # Open the file and read its contents 73 | with open('instructions.txt', 'r') as file: 74 | return file.read() 75 | 76 | # Now, we'll configure our agent and its objective. 77 | # We'll use ChatGPT here, but you can import other models like 78 | # GPT4AllAgent and ChatAnthropicAgent. 79 | # Don't forget to set OPENAI_API_KEY! 80 | AGENT_CONFIG = ChatGPTAgentConfig( 81 | initial_message=BaseMessage(text="Hello, who am I talking to?"), 82 | prompt_preamble=get_assistant_instructions(), 83 | generate_responses=True, 84 | ) 85 | 86 | # Now we'll give our agent a voice and ears. 87 | # Our default speech to text engine is DeepGram, so you'll need to set 88 | # the env variable DEEPGRAM_API_KEY to your Deepgram API key. 89 | # https://deepgram.com/ 90 | 91 | # We use StreamElements for speech synthesis here because it's fast and 92 | # free, but there are plenty of other options that are slower but 93 | # higher quality (like Eleven Labs below, needs key) available in 94 | # vocode.streaming.models.synthesizer. 95 | SYNTH_CONFIG = StreamElementsSynthesizerConfig.from_telephone_output_device() 96 | # SYNTH_CONFIG = ElevenLabsSynthesizerConfig.from_telephone_output_device( 97 | # api_key=os.getenv("ELEVEN_LABS_API_KEY") or "") 98 | 99 | 100 | 101 | # This is where we spin up the Telephony server to get the calls running 102 | telephony_server = TelephonyServer( 103 | base_url=BASE_URL, 104 | config_manager=config_manager, 105 | inbound_call_configs=[ 106 | TwilioInboundCallConfig( 107 | url="/inbound_call", 108 | agent_config=AGENT_CONFIG, 109 | twilio_config=TWILIO_CONFIG, 110 | synthesizer_config=SYNTH_CONFIG, 111 | ) 112 | ], 113 | events_manager=EventsManager(), 114 | agent_factory=SpellerAgentFactory(), 115 | logger=logger, 116 | ) 117 | 118 | app.include_router(telephony_server.get_router()) 119 | --------------------------------------------------------------------------------