├── .gitignore ├── README.md ├── avatars └── tavus │ ├── README.md │ ├── tavus.py │ └── voice-assistant-frontend │ ├── .eslintrc.json │ ├── .github │ ├── assets │ │ ├── app-icon.png │ │ ├── frontend-screenshot.png │ │ └── template-graphic.svg │ └── workflows │ │ ├── build-and-test.yaml │ │ └── sync-to-production.yaml │ ├── .gitignore │ ├── .prettierignore │ ├── .prettierrc │ ├── LICENSE │ ├── README.md │ ├── app │ ├── api │ │ └── connection-details │ │ │ └── route.ts │ ├── favicon.ico │ ├── globals.css │ ├── layout.tsx │ └── page.tsx │ ├── components │ ├── CloseIcon.tsx │ ├── FlashCard.tsx │ ├── FlashCardContainer.tsx │ ├── NoAgentNotification.tsx │ ├── Quiz.tsx │ ├── QuizContainer.tsx │ └── TranscriptionView.tsx │ ├── hooks │ ├── useCombinedTranscriptions.ts │ └── useLocalMicTrack.ts │ ├── next.config.mjs │ ├── package-lock.json │ ├── package.json │ ├── pnpm-lock.yaml │ ├── postcss.config.mjs │ ├── renovate.json │ ├── tailwind.config.ts │ ├── taskfile.yaml │ └── tsconfig.json ├── basics ├── audio.wav ├── change_agent_instructions.py ├── context_variables.py ├── exit_message.py ├── function_calling.py ├── interrupts_user.py ├── listen_and_respond.py ├── playing_audio.py ├── repeater.py └── uninterruptable.py ├── check_agent_example_coverage.py ├── complex-agents ├── medical_office_triage │ ├── prompts │ │ ├── billing_prompt.yaml │ │ ├── support_prompt.yaml │ │ └── triage_prompt.yaml │ ├── triage.py │ └── utils.py └── personal_shopper │ ├── add_test_orders.py │ ├── customer_data.db │ ├── database.py │ ├── personal_shopper.py │ ├── prompts │ ├── returns_prompt.yaml │ ├── sales_prompt.yaml │ └── triage_prompt.yaml │ └── utils.py ├── egress └── recording_agent.py ├── evaluating-agents ├── README.md ├── agent_evals.py └── agent_to_test.py ├── events ├── basic_event.py └── event_emitters.py ├── flows ├── declarative_flow.py ├── multi_stage_flow.py └── simple_flow.py ├── hardware └── pi_zero_transcriber.py ├── home_assistant ├── README.md └── homeautomation.py ├── livekit-logo-dark.png ├── mcp ├── agent.py └── server.py ├── metrics ├── metrics_llm.py ├── metrics_stt.py ├── metrics_tts.py ├── metrics_vad.py └── send-metrics-to-3p │ ├── metrics_server │ ├── README.md │ ├── app.py │ ├── requirements.txt │ └── templates │ │ └── dashboard.html │ ├── run_3p_metrics_demo.sh │ └── send_metrics_to_3p.py ├── multi-agent └── long_or_short_agent.py ├── pipeline-llm ├── anthropic_llm.py ├── cerebras_llm.py ├── google_llm.py ├── interrupt_user.py ├── large_context.py ├── lib │ └── war_and_peace.txt ├── llm_powered_content_filter.py ├── ollama_llm.py ├── openai_llm.py ├── replacing_llm_output.py ├── simple_content_filter.py └── transcription_node.py ├── pipeline-stt ├── keyword_detection.py └── transcriber.py ├── pipeline-tts ├── cartesia_tts.py ├── elevenlabs_change_language.py ├── elevenlabs_tts.py ├── only_greet.py ├── openai_tts.py ├── playai_tts.py ├── rime_tts.py ├── short_replies_only.py ├── tts_comparison.py └── tts_node.py ├── rag ├── README.md ├── build_rag_data.py ├── main.py ├── rag_db_builder.py ├── rag_handler.py ├── requirements.txt └── scrape_docs.py ├── realtime └── openai-realtime.py ├── requirements.txt ├── rpc └── rpc_agent.py ├── telephony ├── answer_call.py ├── make_call │ ├── calling_agent.py │ └── make_call.py ├── sip_lifecycle.py ├── survey_caller │ ├── make_survey_calls.py │ ├── survey_calling_agent.py │ └── survey_data.csv └── warm_handoff.py ├── tool_calling ├── call_function_tool.py └── update_tools.py ├── tracking_state └── npc_character.py ├── translators ├── pipeline_translator.py └── tts_translator.py └── vision └── agent.py /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .env.local 3 | rag/data 4 | __pycache__ 5 | /venv 6 | .DS_Store -------------------------------------------------------------------------------- /avatars/tavus/README.md: -------------------------------------------------------------------------------- 1 | # Tavus Avatar Agent 2 | 3 | A LiveKit-powered educational AI agent that uses the Tavus to create an interactive study partner focused on teaching about the Fall of the Roman Empire. 4 | 5 | Demo: https://www.youtube.com/watch?v=iuX5PDP73bQ 6 | 7 | ## Features 8 | 9 | - **Conversational Teaching**: Uses the Socratic method to guide students through learning 10 | - **Flash Cards**: Creates and manages flash cards for important concepts 11 | - **Interactive Quizzes**: Builds multiple-choice quizzes to test knowledge retention 12 | - **Visual Avatar**: Powered by Tavus for procedural visual generation 13 | - **Voice Interaction**: Natural voice conversation using Deepgram STT and ElevenLabs TTS 14 | 15 | ## Prerequisites 16 | 17 | - Python 3.10+ 18 | - LiveKit account 19 | - Tavus account with configured avatar (replica_id and persona_id) 20 | - API keys for: 21 | - OpenAI 22 | - Deepgram 23 | - ElevenLabs 24 | - Tavus 25 | 26 | ## Installation 27 | 28 | 1. Clone this repository 29 | 2. Install dependencies from the root level of `python-agents-examples` 30 | ``` 31 | pip install -r requirements.txt 32 | ``` 33 | 3. Create a `.env` file in the parent directory with your API keys 34 | 35 | ## Configuration 36 | 37 | Set the following environment variables in your `.env` file: 38 | 39 | ``` 40 | OPENAI_API_KEY=your_openai_key 41 | ELEVENLABS_API_KEY=your_elevenlabs_key 42 | DEEPGRAM_API_KEY=your_deepgram_key 43 | TAVUS_API_KEY=your_tavus_key 44 | LIVEKIT_API_KEY=your_livekit_key 45 | LIVEKIT_API_SECRET=your_livekit_secret 46 | ``` 47 | 48 | Customize the avatar by changing the `replica_id` and `persona_id` in the `entrypoint` function. 49 | 50 | ## Usage 51 | 52 | Run the agent with: 53 | 54 | ``` 55 | python tavus.py dev 56 | ``` 57 | 58 | ### Frontend Setup 59 | 60 | 1. Navigate to the frontend directory: 61 | ``` 62 | cd voice-assistant-frontend 63 | ``` 64 | 65 | 2. Install dependencies: 66 | ``` 67 | npm install 68 | ``` 69 | 70 | 3. Start the development server: 71 | ``` 72 | npm run dev 73 | ``` 74 | 75 | 4. Open your browser and navigate to: 76 | ``` 77 | http://localhost:3000 78 | ``` 79 | 80 | ### Flash Cards 81 | 82 | The agent automatically creates flash cards for important concepts. Users can flip cards through the UI or by asking the agent. 83 | 84 | ### Quizzes 85 | 86 | The agent creates interactive quizzes with multiple-choice questions. After completion: 87 | - Users receive immediate feedback on their performance 88 | - Flash cards are automatically created for incorrectly answered questions 89 | 90 | ## Extending 91 | 92 | To modify the agent's topic focus: 93 | 1. Update the instructions in the `AvatarAgent` class 94 | 2. Adjust the quiz templates and flash card content to match your subject matter 95 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["next/core-web-vitals", "next/typescript", "prettier"] 3 | } 4 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/.github/assets/app-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/livekit-examples/python-agents-examples/35d7334a47d1eec24827e237dca83bf26bd8c1ca/avatars/tavus/voice-assistant-frontend/.github/assets/app-icon.png -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/.github/assets/frontend-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/livekit-examples/python-agents-examples/35d7334a47d1eec24827e237dca83bf26bd8c1ca/avatars/tavus/voice-assistant-frontend/.github/assets/frontend-screenshot.png -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/.github/workflows/build-and-test.yaml: -------------------------------------------------------------------------------- 1 | name: Lint and Build 2 | permissions: 3 | contents: read 4 | pull-requests: read 5 | on: 6 | push: 7 | branches: [main] 8 | pull_request: 9 | branches: [main] 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: pnpm/action-setup@v4 17 | - name: Use Node.js 22 18 | uses: actions/setup-node@v4 19 | with: 20 | node-version: 22 21 | cache: "pnpm" 22 | 23 | - name: Install dependencies 24 | run: pnpm install 25 | 26 | - name: ESLint 27 | run: pnpm lint 28 | 29 | - name: Prettier 30 | run: pnpm format:check 31 | 32 | - name: Ensure build succeeds 33 | run: pnpm build 34 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/.github/workflows/sync-to-production.yaml: -------------------------------------------------------------------------------- 1 | # .github/workflows/sync-main-to-sandbox-production.yml 2 | 3 | name: Sync main to sandbox-production 4 | 5 | on: 6 | push: 7 | branches: 8 | - main 9 | 10 | permissions: 11 | contents: write 12 | pull-requests: write 13 | 14 | jobs: 15 | sync: 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - name: Checkout code 20 | uses: actions/checkout@v3 21 | with: 22 | fetch-depth: 0 # Fetch all history so we can force push 23 | 24 | - name: Set up Git 25 | run: | 26 | git config --global user.name 'github-actions[bot]' 27 | git config --global user.email 'github-actions[bot]@livekit.io' 28 | 29 | - name: Sync to sandbox-production 30 | env: 31 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 32 | run: | 33 | git checkout sandbox-production || git checkout -b sandbox-production 34 | git merge --strategy-option theirs main 35 | git push origin sandbox-production 36 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | .yarn/install-state.gz 8 | 9 | # testing 10 | /coverage 11 | 12 | # next.js 13 | /.next/ 14 | /out/ 15 | 16 | # production 17 | /build 18 | 19 | # misc 20 | .DS_Store 21 | *.pem 22 | 23 | # debug 24 | npm-debug.log* 25 | yarn-debug.log* 26 | yarn-error.log* 27 | 28 | # local env files 29 | .env*.local 30 | 31 | # vercel 32 | .vercel 33 | 34 | # typescript 35 | *.tsbuildinfo 36 | next-env.d.ts 37 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/.prettierignore: -------------------------------------------------------------------------------- 1 | .github/ 2 | dist/ 3 | docs/ 4 | node_modules/ 5 | .next/ 6 | yarn.lock 7 | pnpm-lock.yaml 8 | 9 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": false, 3 | "trailingComma": "es5", 4 | "semi": true, 5 | "tabWidth": 2, 6 | "printWidth": 100, 7 | "importOrder": ["", "^[./]"], 8 | "importOrderSeparation": false, 9 | "importOrderSortSpecifiers": true, 10 | "importOrderParserPlugins": ["typescript", "jsx"], 11 | "plugins": ["@trivago/prettier-plugin-sort-imports"] 12 | } 13 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 LiveKit, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/README.md: -------------------------------------------------------------------------------- 1 | Voice Assistant App Icon 2 | 3 | # Roman Empire Study Partner with Tavus Avatar 4 | 5 | This is a voice-enabled educational assistant built with [LiveKit Agents](https://docs.livekit.io/agents) and [Tavus Avatars](https://tavus.io/). It creates an interactive learning experience focused on the Fall of the Roman Empire, featuring flash cards, quizzes, and Socratic teaching methods. 6 | 7 | ## Features 8 | 9 | - **Conversational Learning**: AI tutor uses Socratic method to guide students through complex historical topics 10 | - **Interactive Flash Cards**: Visual aids for key concepts that can be flipped to show questions or answers 11 | - **Multiple-Choice Quizzes**: Test your knowledge with interactive quizzes that provide immediate feedback 12 | - **Realistic Avatar**: Powered by Tavus for a more engaging visual learning experience 13 | 14 | ![App screenshot](.github/assets/frontend-screenshot.png) 15 | 16 | ## Getting started 17 | 18 | Run the following commands to set up the frontend: 19 | 20 | ```bash 21 | cd voice-assistant-frontend 22 | npm install 23 | npm run dev 24 | ``` 25 | 26 | And open http://localhost:3000 in your browser. 27 | 28 | You'll need to run the Tavus agent in a separate terminal: 29 | 30 | ```bash 31 | cd avatars/tavus 32 | python tavus.py 33 | ``` 34 | 35 | > [!NOTE] 36 | > Make sure you've configured your environment variables in the `.env` file as described in the main README. 37 | 38 | ## How to Use 39 | 40 | 1. **Start a Conversation**: Begin asking questions about the Fall of the Roman Empire 41 | 2. **Use Flash Cards**: The agent will create flash cards for important concepts, which you can flip to see answers 42 | 3. **Take Quizzes**: The agent will periodically offer quizzes to test your knowledge 43 | 4. **Review Incorrect Answers**: Flash cards will automatically be created for questions you miss 44 | 45 | ## Contributing 46 | 47 | This project is open source and we welcome contributions! Please open a PR or issue through GitHub, and don't forget to join us in the [LiveKit Community Slack](https://livekit.io/join-slack)! 48 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/app/api/connection-details/route.ts: -------------------------------------------------------------------------------- 1 | import { AccessToken, AccessTokenOptions, VideoGrant } from "livekit-server-sdk"; 2 | import { NextResponse } from "next/server"; 3 | 4 | // NOTE: you are expected to define the following environment variables in `.env.local`: 5 | const API_KEY = process.env.LIVEKIT_API_KEY; 6 | const API_SECRET = process.env.LIVEKIT_API_SECRET; 7 | const LIVEKIT_URL = process.env.LIVEKIT_URL; 8 | 9 | // don't cache the results 10 | export const revalidate = 0; 11 | 12 | export type ConnectionDetails = { 13 | serverUrl: string; 14 | roomName: string; 15 | participantName: string; 16 | participantToken: string; 17 | }; 18 | 19 | export async function GET() { 20 | try { 21 | if (LIVEKIT_URL === undefined) { 22 | throw new Error("LIVEKIT_URL is not defined"); 23 | } 24 | if (API_KEY === undefined) { 25 | throw new Error("LIVEKIT_API_KEY is not defined"); 26 | } 27 | if (API_SECRET === undefined) { 28 | throw new Error("LIVEKIT_API_SECRET is not defined"); 29 | } 30 | 31 | // Generate participant token 32 | const participantIdentity = `voice_assistant_user_${Math.floor(Math.random() * 10_000)}`; 33 | const roomName = `voice_assistant_room_${Math.floor(Math.random() * 10_000)}`; 34 | const participantToken = await createParticipantToken( 35 | { identity: participantIdentity }, 36 | roomName 37 | ); 38 | 39 | // Return connection details 40 | const data: ConnectionDetails = { 41 | serverUrl: LIVEKIT_URL, 42 | roomName, 43 | participantToken: participantToken, 44 | participantName: participantIdentity, 45 | }; 46 | const headers = new Headers({ 47 | "Cache-Control": "no-store", 48 | }); 49 | return NextResponse.json(data, { headers }); 50 | } catch (error) { 51 | if (error instanceof Error) { 52 | console.error(error); 53 | return new NextResponse(error.message, { status: 500 }); 54 | } 55 | } 56 | } 57 | 58 | function createParticipantToken(userInfo: AccessTokenOptions, roomName: string) { 59 | const at = new AccessToken(API_KEY, API_SECRET, { 60 | ...userInfo, 61 | ttl: "15m", 62 | }); 63 | const grant: VideoGrant = { 64 | room: roomName, 65 | roomJoin: true, 66 | canPublish: true, 67 | canPublishData: true, 68 | canSubscribe: true, 69 | }; 70 | at.addGrant(grant); 71 | return at.toJwt(); 72 | } 73 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/livekit-examples/python-agents-examples/35d7334a47d1eec24827e237dca83bf26bd8c1ca/avatars/tavus/voice-assistant-frontend/app/favicon.ico -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/app/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | :root { 6 | --lk-va-bar-width: 72px; 7 | --lk-control-bar-height: unset; 8 | } 9 | 10 | .agent-visualizer > .lk-audio-bar { 11 | width: 72px; 12 | } 13 | 14 | .lk-agent-control-bar { 15 | @apply border-t-0 p-0 h-min mr-4; 16 | } 17 | 18 | .lk-disconnect-button { 19 | @apply h-[36px] hover:bg-[#6b221a] hover:text-[white] bg-[#31100c] border-[#6b221a]; 20 | } 21 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import "@livekit/components-styles"; 2 | import { Metadata } from "next"; 3 | import { Public_Sans } from "next/font/google"; 4 | import "./globals.css"; 5 | 6 | const publicSans400 = Public_Sans({ 7 | weight: "400", 8 | subsets: ["latin"], 9 | }); 10 | 11 | export const metadata: Metadata = { 12 | title: "Voice Assistant", 13 | }; 14 | 15 | export default function RootLayout({ 16 | children, 17 | }: Readonly<{ 18 | children: React.ReactNode; 19 | }>) { 20 | return ( 21 | 22 | {children} 23 | 24 | ); 25 | } 26 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/components/CloseIcon.tsx: -------------------------------------------------------------------------------- 1 | export function CloseIcon() { 2 | return ( 3 | 4 | 10 | 11 | ); 12 | } 13 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/components/FlashCard.tsx: -------------------------------------------------------------------------------- 1 | import { motion, AnimatePresence } from "framer-motion"; 2 | import { useState, useEffect } from "react"; 3 | 4 | export interface FlashCardData { 5 | id: string; 6 | question: string; 7 | answer: string; 8 | isFlipped?: boolean; 9 | } 10 | 11 | interface FlashCardProps { 12 | card: FlashCardData; 13 | onFlip?: (id: string) => void; 14 | } 15 | 16 | export default function FlashCard({ card, onFlip }: FlashCardProps) { 17 | const [isFlipped, setIsFlipped] = useState(card.isFlipped || false); 18 | 19 | // Update local state when card prop changes 20 | useEffect(() => { 21 | setIsFlipped(card.isFlipped || false); 22 | }, [card.isFlipped]); 23 | 24 | const handleFlip = () => { 25 | setIsFlipped(!isFlipped); 26 | if (onFlip) { 27 | onFlip(card.id); 28 | } 29 | }; 30 | 31 | return ( 32 |
36 | 37 | {!isFlipped ? ( 38 | 46 |

Question

47 |

{card.question}

48 |
49 | ) : ( 50 | 58 |

Answer

59 |

{card.answer}

60 |
61 | )} 62 |
63 |
64 | ); 65 | } 66 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/components/NoAgentNotification.tsx: -------------------------------------------------------------------------------- 1 | import type { AgentState } from "@livekit/components-react"; 2 | import { useEffect, useRef, useState } from "react"; 3 | 4 | interface NoAgentNotificationProps extends React.PropsWithChildren { 5 | state: AgentState; 6 | } 7 | 8 | /** 9 | * Renders some user info when no agent connects to the room after a certain time. 10 | */ 11 | export function NoAgentNotification(props: NoAgentNotificationProps) { 12 | const timeToWaitMs = 10_000; 13 | const timeoutRef = useRef(null); 14 | const [showNotification, setShowNotification] = useState(false); 15 | const agentHasConnected = useRef(false); 16 | 17 | // If the agent has connected, we don't need to show the notification. 18 | if ( 19 | ["listening", "thinking", "speaking"].includes(props.state) && 20 | agentHasConnected.current == false 21 | ) { 22 | agentHasConnected.current = true; 23 | } 24 | 25 | useEffect(() => { 26 | if (props.state === "connecting") { 27 | timeoutRef.current = window.setTimeout(() => { 28 | if (props.state === "connecting" && agentHasConnected.current === false) { 29 | setShowNotification(true); 30 | } 31 | }, timeToWaitMs); 32 | } else { 33 | if (timeoutRef.current) { 34 | window.clearTimeout(timeoutRef.current); 35 | } 36 | setShowNotification(false); 37 | } 38 | 39 | return () => { 40 | if (timeoutRef.current) { 41 | window.clearTimeout(timeoutRef.current); 42 | } 43 | }; 44 | }, [props.state]); 45 | 46 | return ( 47 | <> 48 | {showNotification ? ( 49 |
50 |
51 | {/* Warning Icon */} 52 | 59 | 65 | 66 |
67 |

68 | It's quiet... too quiet. Is your agent lost? Ensure your agent is properly 69 | configured and running on your machine. 70 |

71 | 76 | View guide 77 | 78 | 95 |
96 | ) : null} 97 | 98 | ); 99 | } 100 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/components/Quiz.tsx: -------------------------------------------------------------------------------- 1 | import { motion } from "framer-motion"; 2 | 3 | export interface QuizAnswer { 4 | id: string; 5 | text: string; 6 | } 7 | 8 | export interface QuizQuestion { 9 | id: string; 10 | text: string; 11 | answers: QuizAnswer[]; 12 | } 13 | 14 | interface QuizProps { 15 | question: QuizQuestion; 16 | selectedAnswerId: string | undefined; 17 | onAnswerSelect: (answerId: string) => void; 18 | } 19 | 20 | export default function Quiz({ question, selectedAnswerId, onAnswerSelect }: QuizProps) { 21 | return ( 22 |
23 |

{question.text}

24 | 25 |
26 | {question.answers.map((answer) => ( 27 |
28 | onAnswerSelect(answer.id)} 35 | className="mr-3 h-4 w-4" 36 | /> 37 | 43 |
44 | ))} 45 |
46 |
47 | ); 48 | } 49 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/components/QuizContainer.tsx: -------------------------------------------------------------------------------- 1 | import { useEffect, useState } from "react"; 2 | import { motion, AnimatePresence } from "framer-motion"; 3 | import { useRoomContext, useVoiceAssistant } from "@livekit/components-react"; 4 | import Quiz, { QuizQuestion, QuizAnswer } from "./Quiz"; 5 | 6 | export interface SubmittedQuiz { 7 | id: string; 8 | questions: QuizQuestion[]; 9 | answers: Record; 10 | } 11 | 12 | export default function QuizContainer() { 13 | const [questions, setQuestions] = useState([]); 14 | const [currentQuestionIndex, setCurrentQuestionIndex] = useState(null); 15 | const [isVisible, setIsVisible] = useState(false); 16 | const [quizId, setQuizId] = useState(null); 17 | const [selectedAnswers, setSelectedAnswers] = useState>({}); 18 | const room = useRoomContext(); 19 | const { agent } = useVoiceAssistant(); 20 | 21 | useEffect(() => { 22 | if (!room) return; 23 | 24 | // Register RPC method to receive quizzes 25 | const handleShowQuiz = async (data: any): Promise => { 26 | try { 27 | console.log("Received quiz RPC data:", data); 28 | 29 | // Check for the correct property in the RPC data 30 | if (!data || data.payload === undefined) { 31 | console.error("Invalid RPC data received:", data); 32 | return "Error: Invalid RPC data format"; 33 | } 34 | 35 | console.log("Parsing payload:", data.payload); 36 | 37 | // Parse the payload string into a JSON object 38 | const payload = typeof data.payload === 'string' 39 | ? JSON.parse(data.payload) 40 | : data.payload; 41 | 42 | if (payload.action === "show") { 43 | // Reset answers when showing a new quiz 44 | setSelectedAnswers({}); 45 | setQuizId(payload.id); 46 | setQuestions(payload.questions); 47 | setCurrentQuestionIndex(0); 48 | setIsVisible(true); 49 | } else if (payload.action === "hide") { 50 | setIsVisible(false); 51 | } 52 | 53 | return "Success"; 54 | } catch (error) { 55 | console.error("Error processing quiz data:", error); 56 | return "Error: " + (error instanceof Error ? error.message : String(error)); 57 | } 58 | }; 59 | 60 | room.localParticipant.registerRpcMethod( 61 | "client.quiz", 62 | handleShowQuiz 63 | ); 64 | 65 | return () => { 66 | // Clean up RPC method when component unmounts 67 | room.localParticipant.unregisterRpcMethod("client.quiz"); 68 | }; 69 | }, [room]); 70 | 71 | const handleAnswerSelect = (questionId: string, answerId: string) => { 72 | setSelectedAnswers(prev => ({ 73 | ...prev, 74 | [questionId]: answerId 75 | })); 76 | }; 77 | 78 | const handleSubmitQuiz = async () => { 79 | if (!agent || !quizId) return; 80 | 81 | try { 82 | console.log(`Submitting quiz ${quizId} to agent ${agent.identity}`); 83 | 84 | const payload = { 85 | id: quizId, 86 | answers: selectedAnswers 87 | }; 88 | 89 | const result = await room.localParticipant.performRpc({ 90 | destinationIdentity: agent.identity, 91 | method: "agent.submitQuiz", 92 | payload: JSON.stringify(payload) 93 | }); 94 | 95 | console.log(`Quiz submission result: ${result}`); 96 | 97 | // Hide the quiz after submission 98 | setIsVisible(false); 99 | } catch (error: unknown) { 100 | console.error("Error submitting quiz:", error); 101 | if (error instanceof Error) { 102 | console.error(error.stack); 103 | } 104 | } 105 | }; 106 | 107 | const currentQuestion = currentQuestionIndex !== null && questions[currentQuestionIndex] 108 | ? questions[currentQuestionIndex] 109 | : null; 110 | 111 | const isLastQuestion = currentQuestionIndex === questions.length - 1; 112 | const allQuestionsAnswered = questions.length > 0 && 113 | questions.every(q => selectedAnswers[q.id] !== undefined); 114 | 115 | return ( 116 | 117 | {isVisible && currentQuestion && ( 118 | 124 |
125 |

Quiz

126 | 132 |
133 | 134 | handleAnswerSelect(currentQuestion.id, answerId)} 138 | /> 139 | 140 |
141 | 150 | {(currentQuestionIndex ?? 0) + 1} / {questions.length} 151 | {!isLastQuestion ? ( 152 | 160 | ) : ( 161 | 168 | )} 169 |
170 |
171 | )} 172 |
173 | ); 174 | } 175 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/components/TranscriptionView.tsx: -------------------------------------------------------------------------------- 1 | import useCombinedTranscriptions from "@/hooks/useCombinedTranscriptions"; 2 | import * as React from "react"; 3 | 4 | export default function TranscriptionView() { 5 | const combinedTranscriptions = useCombinedTranscriptions(); 6 | const containerRef = React.useRef(null); 7 | 8 | // scroll to bottom when new transcription is added 9 | React.useEffect(() => { 10 | if (containerRef.current) { 11 | containerRef.current.scrollTop = containerRef.current.scrollHeight; 12 | } 13 | }, [combinedTranscriptions]); 14 | 15 | return ( 16 |
17 | {/* Fade-out gradient mask */} 18 |
19 |
20 | 21 | {/* Scrollable content */} 22 |
23 | {combinedTranscriptions.map((segment) => ( 24 |
33 | {segment.text} 34 |
35 | ))} 36 |
37 |
38 | ); 39 | } 40 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/hooks/useCombinedTranscriptions.ts: -------------------------------------------------------------------------------- 1 | import { useTrackTranscription, useVoiceAssistant } from "@livekit/components-react"; 2 | import { useMemo } from "react"; 3 | import useLocalMicTrack from "./useLocalMicTrack"; 4 | 5 | export default function useCombinedTranscriptions() { 6 | const { agentTranscriptions } = useVoiceAssistant(); 7 | 8 | const micTrackRef = useLocalMicTrack(); 9 | const { segments: userTranscriptions } = useTrackTranscription(micTrackRef); 10 | 11 | const combinedTranscriptions = useMemo(() => { 12 | return [ 13 | ...agentTranscriptions.map((val) => { 14 | return { ...val, role: "assistant" }; 15 | }), 16 | ...userTranscriptions.map((val) => { 17 | return { ...val, role: "user" }; 18 | }), 19 | ].sort((a, b) => a.firstReceivedTime - b.firstReceivedTime); 20 | }, [agentTranscriptions, userTranscriptions]); 21 | 22 | return combinedTranscriptions; 23 | } 24 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/hooks/useLocalMicTrack.ts: -------------------------------------------------------------------------------- 1 | import { TrackReferenceOrPlaceholder, useLocalParticipant } from "@livekit/components-react"; 2 | import { Track } from "livekit-client"; 3 | import { useMemo } from "react"; 4 | 5 | export default function useLocalMicTrack() { 6 | const { microphoneTrack, localParticipant } = useLocalParticipant(); 7 | 8 | const micTrackRef: TrackReferenceOrPlaceholder = useMemo(() => { 9 | return { 10 | participant: localParticipant, 11 | source: Track.Source.Microphone, 12 | publication: microphoneTrack, 13 | }; 14 | }, [localParticipant, microphoneTrack]); 15 | 16 | return micTrackRef; 17 | } 18 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/next.config.mjs: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = {}; 3 | 4 | export default nextConfig; 5 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "voice-assistant2", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint", 10 | "format:check": "prettier --check .", 11 | "format:write": "prettier --write ." 12 | }, 13 | "dependencies": { 14 | "@livekit/components-react": "^2.9.3", 15 | "@livekit/components-styles": "^1.1.4", 16 | "framer-motion": "^11.18.0", 17 | "livekit-client": "^2.8.0", 18 | "livekit-server-sdk": "^2.9.7", 19 | "react": "^18.3.1", 20 | "react-dom": "^18.3.1" 21 | }, 22 | "devDependencies": { 23 | "@trivago/prettier-plugin-sort-imports": "^5.2.2", 24 | "@types/node": "^20.17.13", 25 | "@types/react": "^18.3.18", 26 | "@types/react-dom": "^18.3.5", 27 | "eslint": "^8.57.1", 28 | "eslint-config-next": "14.2.28", 29 | "eslint-config-prettier": "9.1.0", 30 | "next": "14", 31 | "postcss": "^8.5.1", 32 | "prettier": "^3.4.2", 33 | "tailwindcss": "^3.4.17", 34 | "typescript": "^5.7.3" 35 | }, 36 | "packageManager": "pnpm@9.15.9" 37 | } 38 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | /** @type {import('postcss-load-config').Config} */ 2 | const config = { 3 | plugins: { 4 | tailwindcss: {}, 5 | }, 6 | }; 7 | 8 | export default config; 9 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": ["config:recommended"], 4 | "packageRules": [ 5 | { 6 | "matchUpdateTypes": ["minor", "patch", "pin", "digest"], 7 | "automerge": true 8 | } 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/tailwind.config.ts: -------------------------------------------------------------------------------- 1 | import type { Config } from "tailwindcss"; 2 | 3 | const config: Config = { 4 | content: [ 5 | "./pages/**/*.{js,ts,jsx,tsx,mdx}", 6 | "./components/**/*.{js,ts,jsx,tsx,mdx}", 7 | "./app/**/*.{js,ts,jsx,tsx,mdx}", 8 | ], 9 | theme: {}, 10 | plugins: [], 11 | }; 12 | export default config; 13 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/taskfile.yaml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | output: interleaved 3 | dotenv: [".env.local"] 4 | 5 | tasks: 6 | post_create: 7 | desc: "Runs after this template is instantiated as a Sandbox or Bootstrap" 8 | cmds: 9 | - echo -e "\nYour Next.js voice assistant is ready to go!\n" 10 | - echo -e "To give it a try, run the following commands:\r\n" 11 | - echo -e "\tcd {{.ROOT_DIR}}\r" 12 | - echo -e "\tpnpm install\r" 13 | - echo -e "\tpnpm dev\r\n" 14 | 15 | install: 16 | interactive: true 17 | cmds: 18 | - "pnpm install" 19 | 20 | dev: 21 | interactive: true 22 | cmds: 23 | - "pnpm dev" 24 | -------------------------------------------------------------------------------- /avatars/tavus/voice-assistant-frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "lib": ["dom", "dom.iterable", "esnext"], 4 | "allowJs": true, 5 | "skipLibCheck": true, 6 | "strict": true, 7 | "noEmit": true, 8 | "esModuleInterop": true, 9 | "module": "esnext", 10 | "moduleResolution": "bundler", 11 | "resolveJsonModule": true, 12 | "isolatedModules": true, 13 | "jsx": "preserve", 14 | "incremental": true, 15 | "plugins": [ 16 | { 17 | "name": "next" 18 | } 19 | ], 20 | "paths": { 21 | "@/*": ["./*"] 22 | } 23 | }, 24 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], 25 | "exclude": ["node_modules"] 26 | } 27 | -------------------------------------------------------------------------------- /basics/audio.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/livekit-examples/python-agents-examples/35d7334a47d1eec24827e237dca83bf26bd8c1ca/basics/audio.wav -------------------------------------------------------------------------------- /basics/change_agent_instructions.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import openai, silero, deepgram 8 | 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 10 | 11 | logger = logging.getLogger("listen-and-respond") 12 | logger.setLevel(logging.INFO) 13 | 14 | class ChangeInstructionsAgent(Agent): 15 | def __init__(self) -> None: 16 | super().__init__( 17 | instructions=""" 18 | You are a helpful agent. When the user speaks, you listen and respond. 19 | """, 20 | stt=deepgram.STT(), 21 | llm=openai.LLM(model="gpt-4o"), 22 | tts=openai.TTS(), 23 | vad=silero.VAD.load() 24 | ) 25 | 26 | async def on_enter(self): 27 | if self.session.participant.name.startswith("sip"): 28 | self.update_instructions(""" 29 | You are a helpful agent speaking on the phone. 30 | """) 31 | self.session.generate_reply() 32 | 33 | async def entrypoint(ctx: JobContext): 34 | await ctx.connect() 35 | 36 | session = AgentSession() 37 | 38 | await session.start( 39 | agent=ChangeInstructionsAgent(), 40 | room=ctx.room 41 | ) 42 | 43 | if __name__ == "__main__": 44 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /basics/context_variables.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from dotenv import load_dotenv 4 | from livekit.agents import JobContext, WorkerOptions, cli 5 | from livekit.agents.voice import Agent, AgentSession 6 | from livekit.plugins import openai, deepgram, silero 7 | 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 9 | 10 | logger = logging.getLogger("context-variables") 11 | logger.setLevel(logging.INFO) 12 | 13 | class ContextAgent(Agent): 14 | def __init__(self, context_vars=None) -> None: 15 | instructions = """ 16 | You are a helpful agent. The user's name is {name}. 17 | They are {age} years old and live in {city}. 18 | """ 19 | 20 | if context_vars: 21 | instructions = instructions.format(**context_vars) 22 | 23 | super().__init__( 24 | instructions=instructions, 25 | stt=deepgram.STT(), 26 | llm=openai.LLM(model="gpt-4o"), 27 | tts=openai.TTS(), 28 | vad=silero.VAD.load() 29 | ) 30 | 31 | async def on_enter(self): 32 | self.session.generate_reply() 33 | 34 | async def entrypoint(ctx: JobContext): 35 | await ctx.connect() 36 | 37 | context_variables = { 38 | "name": "Shayne", 39 | "age": 35, 40 | "city": "Toronto" 41 | } 42 | 43 | session = AgentSession() 44 | 45 | await session.start( 46 | agent=ContextAgent(context_vars=context_variables), 47 | room=ctx.room 48 | ) 49 | 50 | if __name__ == "__main__": 51 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /basics/exit_message.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import openai, silero, deepgram 8 | from livekit.agents.llm import function_tool 9 | 10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 11 | 12 | logger = logging.getLogger("listen-and-respond") 13 | logger.setLevel(logging.INFO) 14 | 15 | class GoodbyeAgent(Agent): 16 | def __init__(self) -> None: 17 | super().__init__( 18 | instructions=""" 19 | You are a helpful agent. 20 | When the user wants to stop talking to you, use the end_session function to close the session. 21 | """, 22 | stt=deepgram.STT(), 23 | llm=openai.LLM(model="gpt-4o"), 24 | tts=openai.TTS(), 25 | vad=silero.VAD.load() 26 | ) 27 | 28 | @function_tool 29 | async def end_session(self): 30 | """When the user wants to stop talking to you, use this function to close the session.""" 31 | await self.session.drain() 32 | await self.session.aclose() 33 | 34 | async def on_exit(self): 35 | await self.session.say("Goodbye!") 36 | 37 | async def entrypoint(ctx: JobContext): 38 | await ctx.connect() 39 | 40 | session = AgentSession() 41 | 42 | await session.start( 43 | agent=GoodbyeAgent(), 44 | room=ctx.room 45 | ) 46 | 47 | if __name__ == "__main__": 48 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /basics/function_calling.py: -------------------------------------------------------------------------------- 1 | ## This is a basic example of how to use function calling. 2 | ## To test the function, you can ask the agent to print to the console! 3 | 4 | import logging 5 | from pathlib import Path 6 | from dotenv import load_dotenv 7 | from livekit.agents import JobContext, WorkerOptions, cli 8 | from livekit.agents.llm import function_tool 9 | from livekit.agents.voice import Agent, AgentSession, RunContext 10 | from livekit.plugins import deepgram, openai, silero 11 | 12 | logger = logging.getLogger("function-calling") 13 | logger.setLevel(logging.INFO) 14 | 15 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 16 | 17 | class FunctionAgent(Agent): 18 | def __init__(self) -> None: 19 | super().__init__( 20 | instructions=""" 21 | You are a helpful assistant communicating through voice. Don't use any unpronouncable characters. 22 | Note: If asked to print to the console, use the `print_to_console` function. 23 | """, 24 | stt=deepgram.STT(), 25 | llm=openai.LLM(model="gpt-4o"), 26 | tts=openai.TTS(), 27 | vad=silero.VAD.load() 28 | ) 29 | 30 | @function_tool 31 | async def print_to_console(self, context: RunContext): 32 | print("Console Print Success!") 33 | return None, "I've printed to the console." 34 | 35 | async def on_enter(self): 36 | self.session.generate_reply() 37 | 38 | async def entrypoint(ctx: JobContext): 39 | await ctx.connect() 40 | 41 | session = AgentSession() 42 | 43 | await session.start( 44 | agent=FunctionAgent(), 45 | room=ctx.room 46 | ) 47 | 48 | if __name__ == "__main__": 49 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /basics/interrupts_user.py: -------------------------------------------------------------------------------- 1 | # This agent isn't interruptable, so it will keep talking even if the user tries to speak. 2 | 3 | from pathlib import Path 4 | from typing import AsyncIterable, Optional 5 | import re 6 | import logging 7 | from dotenv import load_dotenv 8 | from livekit.agents import JobContext, WorkerOptions, cli 9 | from livekit.agents.voice import Agent, AgentSession 10 | from livekit.plugins import deepgram, openai 11 | from livekit import rtc 12 | 13 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 14 | 15 | # Set up logging 16 | logging.basicConfig(level=logging.INFO) 17 | logger = logging.getLogger(__name__) 18 | 19 | class UninterruptableAgent(Agent): 20 | def __init__(self) -> None: 21 | super().__init__( 22 | instructions=""" 23 | You are a helpful assistant communicating through voice who will interrupt the user if they try to say more than one sentence. 24 | """, 25 | stt=deepgram.STT(), 26 | llm=openai.LLM(model="gpt-4o"), 27 | tts=openai.TTS(), 28 | allow_interruptions=False 29 | ) 30 | self.text_buffer = "" 31 | 32 | async def stt_node(self, text: AsyncIterable[str], model_settings: Optional[dict] = None) -> Optional[AsyncIterable[rtc.AudioFrame]]: 33 | parent_stream = super().stt_node(text, model_settings) 34 | 35 | if parent_stream is None: 36 | return None 37 | 38 | async def replay_user_input(text: str): 39 | await self.session.say("Let me stop you there, and respond. You said: " + text) 40 | 41 | async def process_stream(): 42 | async for event in parent_stream: 43 | if hasattr(event, 'type') and str(event.type) == "SpeechEventType.FINAL_TRANSCRIPT" and event.alternatives: 44 | transcript = event.alternatives[0].text 45 | 46 | self.text_buffer += " " + transcript 47 | self.text_buffer = self.text_buffer.strip() 48 | 49 | sentence_pattern = r'[.!?]+' 50 | if re.search(sentence_pattern, self.text_buffer): 51 | sentences = re.split(sentence_pattern, self.text_buffer) 52 | 53 | if len(sentences) > 1: 54 | for i in range(len(sentences) - 1): 55 | if sentences[i].strip(): 56 | logger.info(f"Complete sentence detected: '{sentences[i].strip()}'") 57 | await replay_user_input(sentences[i].strip()) 58 | 59 | self.text_buffer = sentences[-1].strip() 60 | 61 | yield event 62 | 63 | return process_stream() 64 | 65 | async def on_enter(self): 66 | self.session.say("I'll interrupt you after 1 sentence.") 67 | 68 | async def entrypoint(ctx: JobContext): 69 | await ctx.connect() 70 | 71 | session = AgentSession() 72 | 73 | await session.start( 74 | agent=UninterruptableAgent(), 75 | room=ctx.room 76 | ) 77 | 78 | if __name__ == "__main__": 79 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /basics/listen_and_respond.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import openai, silero, deepgram 8 | 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 10 | 11 | logger = logging.getLogger("listen-and-respond") 12 | logger.setLevel(logging.INFO) 13 | 14 | class SimpleAgent(Agent): 15 | def __init__(self) -> None: 16 | super().__init__( 17 | instructions=""" 18 | You are a helpful agent. When the user speaks, you listen and respond. 19 | """, 20 | stt=deepgram.STT(), 21 | llm=openai.LLM(model="gpt-4o"), 22 | tts=openai.TTS(), 23 | vad=silero.VAD.load() 24 | ) 25 | 26 | async def on_enter(self): 27 | self.session.generate_reply() 28 | 29 | async def entrypoint(ctx: JobContext): 30 | await ctx.connect() 31 | 32 | session = AgentSession() 33 | 34 | await session.start( 35 | agent=SimpleAgent(), 36 | room=ctx.room 37 | ) 38 | 39 | if __name__ == "__main__": 40 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /basics/playing_audio.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | import wave 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.llm import function_tool 7 | from livekit.agents.voice import Agent, AgentSession, RunContext 8 | from livekit.plugins import deepgram, openai, silero 9 | from livekit import rtc 10 | 11 | logger = logging.getLogger("function-calling") 12 | logger.setLevel(logging.INFO) 13 | 14 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 15 | 16 | class FunctionAgent(Agent): 17 | def __init__(self) -> None: 18 | super().__init__( 19 | instructions=""" 20 | You are a helpful assistant communicating through voice. Don't use any unpronouncable characters. 21 | If asked to play audio, use the `play_audio_file` function. 22 | """, 23 | stt=deepgram.STT(), 24 | llm=openai.LLM(model="gpt-4o"), 25 | tts=openai.TTS(), 26 | vad=silero.VAD.load() 27 | ) 28 | 29 | @function_tool 30 | async def play_audio_file(self, context: RunContext): 31 | audio_path = Path(__file__).parent / "audio.wav" 32 | 33 | with wave.open(str(audio_path), 'rb') as wav_file: 34 | num_channels = wav_file.getnchannels() 35 | sample_rate = wav_file.getframerate() 36 | frames = wav_file.readframes(wav_file.getnframes()) 37 | 38 | audio_frame = rtc.AudioFrame( 39 | data=frames, 40 | sample_rate=sample_rate, 41 | num_channels=num_channels, 42 | samples_per_channel=wav_file.getnframes() 43 | ) 44 | 45 | async def audio_generator(): 46 | yield audio_frame 47 | 48 | await self.session.say("Playing audio file", audio=audio_generator()) 49 | 50 | return None, "I've played the audio file for you." 51 | 52 | async def on_enter(self): 53 | self.session.generate_reply() 54 | 55 | async def entrypoint(ctx: JobContext): 56 | await ctx.connect() 57 | 58 | session = AgentSession() 59 | 60 | await session.start( 61 | agent=FunctionAgent(), 62 | room=ctx.room 63 | ) 64 | 65 | if __name__ == "__main__": 66 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /basics/repeater.py: -------------------------------------------------------------------------------- 1 | # Repeats what the user says using a STT -> TTS loop, without any LLM. 2 | 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import deepgram, openai 8 | 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 10 | 11 | async def entrypoint(ctx: JobContext): 12 | 13 | await ctx.connect() 14 | session = AgentSession() 15 | 16 | @session.on("user_input_transcribed") 17 | def on_transcript(transcript): 18 | if transcript.is_final: 19 | session.say(transcript.transcript) 20 | 21 | await session.start( 22 | agent=Agent( 23 | instructions="You are a helpful assistant that repeats what the user says.", 24 | stt=deepgram.STT(), 25 | tts=openai.TTS(), 26 | allow_interruptions=False 27 | ), 28 | room=ctx.room 29 | ) 30 | 31 | if __name__ == "__main__": 32 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /basics/uninterruptable.py: -------------------------------------------------------------------------------- 1 | # This agent isn't interruptable, so it will keep talking even if the user tries to speak. 2 | 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import deepgram, openai 8 | 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 10 | 11 | class UninterruptableAgent(Agent): 12 | def __init__(self) -> None: 13 | super().__init__( 14 | instructions=""" 15 | You are a helpful assistant communicating through voice who is not interruptable. 16 | """, 17 | stt=deepgram.STT(), 18 | llm=openai.LLM(model="gpt-4o"), 19 | tts=openai.TTS(), 20 | allow_interruptions=False 21 | ) 22 | 23 | async def on_enter(self): 24 | self.session.generate_reply(user_input="Say something somewhat long and boring so I can test if you're interruptable.") 25 | 26 | async def entrypoint(ctx: JobContext): 27 | await ctx.connect() 28 | 29 | session = AgentSession() 30 | 31 | await session.start( 32 | agent=UninterruptableAgent(), 33 | room=ctx.room 34 | ) 35 | 36 | if __name__ == "__main__": 37 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /check_agent_example_coverage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | check_agent_example_coverage.py 4 | ──────────────────────────────── 5 | Scan every .py file beneath the current directory (or the paths you provide) 6 | and report which public methods of livekit.agents.voice.Agent and 7 | AgentSession are already exercised or overridden in the example code. 8 | 9 | USAGE 10 | ===== 11 | 12 | # Scan the whole repo 13 | python check_agent_example_coverage.py 14 | 15 | # Or cherry-pick folders 16 | python check_agent_example_coverage.py basics/ pipeline-stt/ 17 | 18 | # Just print warnings for uncovered methods 19 | python check_agent_example_coverage.py --warn-only 20 | 21 | # Return non-zero exit code if coverage incomplete (for CI) 22 | python check_agent_example_coverage.py --fail-on-incomplete 23 | 24 | The report looks like: 25 | 26 | Agent (7/9 methods used) 27 | ✔ generate_reply 28 | ✔ on_enter 29 | ✘ on_exit 30 | … 31 | 32 | AgentSession (5/8 methods used) 33 | ✔ start 34 | ✔ stop 35 | ✘ reconnect 36 | … 37 | 38 | """ 39 | 40 | from __future__ import annotations 41 | 42 | import argparse 43 | import inspect 44 | import sys 45 | from pathlib import Path 46 | from collections import defaultdict 47 | from livekit.agents.voice import Agent, AgentSession 48 | 49 | 50 | EXCLUDE_DIRS = { 51 | ".git", 52 | ".hg", 53 | ".svn", 54 | "__pycache__", 55 | "venv", 56 | ".venv", 57 | "env", 58 | ".env", 59 | "build", 60 | "dist", 61 | } 62 | 63 | def _public_methods(cls) -> set[str]: 64 | return { 65 | name 66 | for name, obj in inspect.getmembers(cls, inspect.isfunction) 67 | if not name.startswith("_") 68 | } 69 | 70 | 71 | AGENT_METHODS = _public_methods(Agent) 72 | SESSION_METHODS = _public_methods(AgentSession) 73 | 74 | def _scan(paths: list[Path]): 75 | """Return a dict {'Agent': {m: bool}, 'AgentSession': {m: bool}}.""" 76 | found = { 77 | "Agent": defaultdict(bool, {m: False for m in AGENT_METHODS}), 78 | "AgentSession": defaultdict(bool, {m: False for m in SESSION_METHODS}), 79 | } 80 | 81 | for base in paths: 82 | for py in base.rglob("*.py"): 83 | if any(part in EXCLUDE_DIRS for part in py.parts): 84 | continue 85 | 86 | try: 87 | code = py.read_text(encoding="utf-8", errors="ignore") 88 | except Exception: 89 | continue 90 | 91 | for m in AGENT_METHODS: 92 | call_pat = f".{m}(" 93 | def_pat = f"def {m}(" 94 | if call_pat in code or def_pat in code: 95 | found["Agent"][m] = True 96 | for m in SESSION_METHODS: 97 | call_pat = f".{m}(" 98 | def_pat = f"def {m}(" 99 | if call_pat in code or def_pat in code: 100 | found["AgentSession"][m] = True 101 | return found 102 | 103 | def _report(found: dict[str, dict[str, bool]], warn_only=False): 104 | incomplete = False 105 | uncovered_methods = [] 106 | 107 | for cls, methods in found.items(): 108 | total = len(methods) 109 | used = sum(methods.values()) 110 | 111 | if used < total: 112 | incomplete = True 113 | 114 | if not warn_only: 115 | print(f"\n{cls} ({used}/{total} methods used)") 116 | for m in sorted(methods): 117 | tick = "✔" if methods[m] else "✘" 118 | print(f" {tick} {m}") 119 | if not methods[m]: 120 | uncovered_methods.append(f"{cls}.{m}") 121 | elif used < total: 122 | print(f"\nWARNING: {cls} has uncovered methods ({used}/{total} covered)") 123 | for m in sorted(methods): 124 | if not methods[m]: 125 | print(f" Missing: {cls}.{m}") 126 | uncovered_methods.append(f"{cls}.{m}") 127 | 128 | return incomplete, uncovered_methods 129 | 130 | if __name__ == "__main__": 131 | parser = argparse.ArgumentParser(description="Check Agent API coverage in examples") 132 | parser.add_argument("paths", nargs="*", default=[Path.cwd()], 133 | help="Paths to scan (default: current directory)") 134 | parser.add_argument("--warn-only", action="store_true", 135 | help="Only show warnings for uncovered methods") 136 | parser.add_argument("--fail-on-incomplete", action="store_true", 137 | help="Return non-zero exit code if coverage is incomplete") 138 | 139 | args = parser.parse_args() 140 | 141 | bases = [Path(p) for p in args.paths] 142 | coverage = _scan(bases) 143 | incomplete, uncovered = _report(coverage, warn_only=args.warn_only) 144 | 145 | if incomplete and args.fail_on_incomplete: 146 | print(f"\nERROR: Found {len(uncovered)} uncovered methods. Add examples that use these methods.") 147 | sys.exit(1) -------------------------------------------------------------------------------- /complex-agents/medical_office_triage/prompts/billing_prompt.yaml: -------------------------------------------------------------------------------- 1 | instructions: | 2 | You are the Medical Billing agent at a healthcare office. You help patients with insurance information, 3 | copayments, medical bills, payment processing, and billing inquiries. Be clear and precise with financial information. 4 | 5 | Follow these guidelines: 6 | - Greet the patient and confirm their identity for HIPAA compliance and security purposes 7 | - Address medical billing inquiries with accuracy and attention to detail 8 | - Explain medical charges, insurance coverage, copays, and payment options clearly 9 | - Handle sensitive patient financial information with appropriate security measures 10 | - Offer solutions for payment issues or medical billing discrepancies 11 | - Provide information about available payment plans or financial assistance if relevant 12 | - Thank the patient for choosing our medical practice and ask if they have other billing questions -------------------------------------------------------------------------------- /complex-agents/medical_office_triage/prompts/support_prompt.yaml: -------------------------------------------------------------------------------- 1 | instructions: | 2 | You are the Patient Support agent at a medical office. You help patients with appointment scheduling, 3 | prescription refills, medical records requests, and general healthcare questions. Be patient, empathetic and thorough in your explanations. 4 | 5 | Follow these guidelines: 6 | - Greet the patient and acknowledge that you're here to help with their healthcare needs 7 | - Ask for specific details about their request to better understand it 8 | - Provide clear information about office procedures, appointment availability, and medical services 9 | - Use simple language and avoid medical jargon unless the patient demonstrates medical knowledge 10 | - Confirm whether you've addressed their needs completely 11 | - If you can't resolve their issue, explain what steps will be taken next 12 | - Thank them for their patience and offer additional assistance if needed 13 | - Maintain patient confidentiality and follow HIPAA guidelines at all times -------------------------------------------------------------------------------- /complex-agents/medical_office_triage/prompts/triage_prompt.yaml: -------------------------------------------------------------------------------- 1 | instructions: | 2 | You are the Medical Office Triage agent. Your job is to determine if the patient needs 3 | help with medical support services or billing issues. Ask questions to understand their needs, 4 | then transfer them to the appropriate department. 5 | 6 | Follow these guidelines: 7 | - Greet the patient warmly and ask how you can help them today 8 | - Listen carefully to determine if their issue is related to medical services or billing 9 | - Ask clarifying questions if needed to properly categorize their request 10 | - For medical services: appointment scheduling, prescription refills, medical advice, test results 11 | - For billing: insurance questions, copays, medical bills, payment plans 12 | - Transfer them to the appropriate department once you understand their needs 13 | - If the patient has multiple issues, address the most urgent concern first 14 | - Be professional, courteous, and empathetic in your communication 15 | - Maintain patient confidentiality and follow HIPAA guidelines at all times -------------------------------------------------------------------------------- /complex-agents/medical_office_triage/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | def load_prompt(filename): 5 | """Load a prompt from a YAML file.""" 6 | script_dir = os.path.dirname(os.path.abspath(__file__)) 7 | prompt_path = os.path.join(script_dir, 'prompts', filename) 8 | 9 | try: 10 | with open(prompt_path, 'r') as file: 11 | prompt_data = yaml.safe_load(file) 12 | return prompt_data.get('instructions', '') 13 | except (FileNotFoundError, yaml.YAMLError) as e: 14 | print(f"Error loading prompt file {filename}: {e}") 15 | return "" -------------------------------------------------------------------------------- /complex-agents/personal_shopper/add_test_orders.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import logging 5 | from database import CustomerDatabase 6 | 7 | # Configure logging 8 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 9 | logger = logging.getLogger("test-orders") 10 | 11 | def add_test_orders(): 12 | """Add test orders for Shayne Parlo.""" 13 | # Initialize the database 14 | db = CustomerDatabase() 15 | 16 | # Create or get Shayne Parlo 17 | first_name = "Shayne" 18 | last_name = "Parlo" 19 | customer_id = db.get_or_create_customer(first_name, last_name) 20 | logger.info(f"Customer ID for {first_name} {last_name}: {customer_id}") 21 | 22 | # Add test orders 23 | 24 | # Order 1: Electronics 25 | order1 = { 26 | "items": [ 27 | { 28 | "name": "Smartphone XS Pro", 29 | "quantity": 1, 30 | "price": 999.99 31 | }, 32 | { 33 | "name": "Wireless Earbuds", 34 | "quantity": 1, 35 | "price": 149.99 36 | }, 37 | { 38 | "name": "Phone Case (Black)", 39 | "quantity": 1, 40 | "price": 29.99 41 | } 42 | ], 43 | "total": 1179.97, 44 | "payment_method": "Credit Card", 45 | "shipping_address": "123 Main St, Anytown, USA" 46 | } 47 | 48 | # Order 2: Clothing 49 | order2 = { 50 | "items": [ 51 | { 52 | "name": "Men's Casual Shirt (Blue)", 53 | "quantity": 2, 54 | "price": 39.99 55 | }, 56 | { 57 | "name": "Jeans (Dark Wash)", 58 | "quantity": 1, 59 | "price": 59.99 60 | }, 61 | { 62 | "name": "Leather Belt", 63 | "quantity": 1, 64 | "price": 34.99 65 | } 66 | ], 67 | "total": 174.96, 68 | "payment_method": "PayPal", 69 | "shipping_address": "123 Main St, Anytown, USA" 70 | } 71 | 72 | # Order 3: Home Goods 73 | order3 = { 74 | "items": [ 75 | { 76 | "name": "Coffee Maker", 77 | "quantity": 1, 78 | "price": 89.99 79 | }, 80 | { 81 | "name": "Towel Set", 82 | "quantity": 1, 83 | "price": 49.99 84 | }, 85 | { 86 | "name": "Decorative Pillows", 87 | "quantity": 2, 88 | "price": 24.99 89 | } 90 | ], 91 | "total": 189.96, 92 | "payment_method": "Credit Card", 93 | "shipping_address": "123 Main St, Anytown, USA" 94 | } 95 | 96 | # Add orders to database 97 | order1_id = db.add_order(customer_id, order1) 98 | logger.info(f"Added Order #{order1_id}: Electronics - Total: ${order1['total']}") 99 | 100 | order2_id = db.add_order(customer_id, order2) 101 | logger.info(f"Added Order #{order2_id}: Clothing - Total: ${order2['total']}") 102 | 103 | order3_id = db.add_order(customer_id, order3) 104 | logger.info(f"Added Order #{order3_id}: Home Goods - Total: ${order3['total']}") 105 | 106 | # Verify orders were added 107 | order_history = db.get_customer_order_history(first_name, last_name) 108 | logger.info(f"Order history for {first_name} {last_name}:\n{order_history}") 109 | 110 | return order1_id, order2_id, order3_id 111 | 112 | if __name__ == "__main__": 113 | order_ids = add_test_orders() 114 | print(f"Added test orders with IDs: {order_ids}") 115 | print("Test orders have been added successfully for Shayne Parlo.") -------------------------------------------------------------------------------- /complex-agents/personal_shopper/customer_data.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/livekit-examples/python-agents-examples/35d7334a47d1eec24827e237dca83bf26bd8c1ca/complex-agents/personal_shopper/customer_data.db -------------------------------------------------------------------------------- /complex-agents/personal_shopper/database.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import os 3 | import json 4 | from typing import List, Dict, Optional, Any 5 | import logging 6 | 7 | logger = logging.getLogger("personal-shopper-db") 8 | logger.setLevel(logging.INFO) 9 | 10 | class CustomerDatabase: 11 | def __init__(self, db_path: str = None): 12 | """Initialize the customer database.""" 13 | if db_path is None: 14 | # Use a default path in the same directory as this file 15 | script_dir = os.path.dirname(os.path.abspath(__file__)) 16 | db_path = os.path.join(script_dir, 'customer_data.db') 17 | 18 | self.db_path = db_path 19 | self._initialize_db() 20 | 21 | def _initialize_db(self): 22 | """Create the database and tables if they don't exist.""" 23 | conn = sqlite3.connect(self.db_path) 24 | cursor = conn.cursor() 25 | 26 | # Create customers table 27 | cursor.execute(''' 28 | CREATE TABLE IF NOT EXISTS customers ( 29 | id INTEGER PRIMARY KEY AUTOINCREMENT, 30 | first_name TEXT NOT NULL, 31 | last_name TEXT NOT NULL, 32 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 33 | ) 34 | ''') 35 | 36 | # Create orders table 37 | cursor.execute(''' 38 | CREATE TABLE IF NOT EXISTS orders ( 39 | id INTEGER PRIMARY KEY AUTOINCREMENT, 40 | customer_id INTEGER NOT NULL, 41 | order_details TEXT NOT NULL, 42 | order_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 43 | FOREIGN KEY (customer_id) REFERENCES customers (id) 44 | ) 45 | ''') 46 | 47 | conn.commit() 48 | conn.close() 49 | logger.info(f"Database initialized at {self.db_path}") 50 | 51 | def get_or_create_customer(self, first_name: str, last_name: str) -> int: 52 | """Get a customer by name or create if not exists. Returns customer ID.""" 53 | conn = sqlite3.connect(self.db_path) 54 | cursor = conn.cursor() 55 | 56 | # Check if customer exists 57 | cursor.execute( 58 | "SELECT id FROM customers WHERE first_name = ? AND last_name = ?", 59 | (first_name, last_name) 60 | ) 61 | result = cursor.fetchone() 62 | 63 | if result: 64 | customer_id = result[0] 65 | logger.info(f"Found existing customer: {first_name} {last_name} (ID: {customer_id})") 66 | else: 67 | # Create new customer 68 | cursor.execute( 69 | "INSERT INTO customers (first_name, last_name) VALUES (?, ?)", 70 | (first_name, last_name) 71 | ) 72 | customer_id = cursor.lastrowid 73 | logger.info(f"Created new customer: {first_name} {last_name} (ID: {customer_id})") 74 | 75 | conn.commit() 76 | conn.close() 77 | return customer_id 78 | 79 | def add_order(self, customer_id: int, order_details: Dict[str, Any]) -> int: 80 | """Add a new order for a customer. Returns order ID.""" 81 | conn = sqlite3.connect(self.db_path) 82 | cursor = conn.cursor() 83 | 84 | # Convert order details to JSON string 85 | order_json = json.dumps(order_details) 86 | 87 | cursor.execute( 88 | "INSERT INTO orders (customer_id, order_details) VALUES (?, ?)", 89 | (customer_id, order_json) 90 | ) 91 | 92 | order_id = cursor.lastrowid 93 | logger.info(f"Added new order (ID: {order_id}) for customer ID: {customer_id}") 94 | 95 | conn.commit() 96 | conn.close() 97 | return order_id 98 | 99 | def get_customer_orders(self, customer_id: int) -> List[Dict[str, Any]]: 100 | """Get all orders for a customer.""" 101 | conn = sqlite3.connect(self.db_path) 102 | conn.row_factory = sqlite3.Row # This enables column access by name 103 | cursor = conn.cursor() 104 | 105 | cursor.execute( 106 | "SELECT id, order_details, order_date FROM orders WHERE customer_id = ? ORDER BY order_date DESC", 107 | (customer_id,) 108 | ) 109 | 110 | orders = [] 111 | for row in cursor.fetchall(): 112 | order_data = json.loads(row['order_details']) 113 | orders.append({ 114 | 'id': row['id'], 115 | 'date': row['order_date'], 116 | 'details': order_data 117 | }) 118 | 119 | conn.close() 120 | return orders 121 | 122 | def get_customer_order_history(self, first_name: str, last_name: str) -> str: 123 | """Get a formatted string of customer order history for LLM consumption.""" 124 | conn = sqlite3.connect(self.db_path) 125 | cursor = conn.cursor() 126 | 127 | # Get customer ID 128 | cursor.execute( 129 | "SELECT id FROM customers WHERE first_name = ? AND last_name = ?", 130 | (first_name, last_name) 131 | ) 132 | result = cursor.fetchone() 133 | 134 | if not result: 135 | conn.close() 136 | return "No order history found for this customer." 137 | 138 | customer_id = result[0] 139 | orders = self.get_customer_orders(customer_id) 140 | 141 | if not orders: 142 | return f"Customer {first_name} {last_name} has no previous orders." 143 | 144 | # Format order history for LLM 145 | history = f"Order history for {first_name} {last_name}:\n\n" 146 | 147 | for order in orders: 148 | history += f"Order #{order['id']} (Date: {order['date']}):\n" 149 | details = order['details'] 150 | 151 | if 'items' in details: 152 | for item in details['items']: 153 | history += f"- {item.get('quantity', 1)}x {item.get('name', 'Unknown Item')}" 154 | if 'price' in item: 155 | history += f" (${item['price']})" 156 | history += "\n" 157 | else: 158 | # Handle case where order details might be in a different format 159 | history += f"- {json.dumps(details)}\n" 160 | 161 | history += "\n" 162 | 163 | conn.close() 164 | return history -------------------------------------------------------------------------------- /complex-agents/personal_shopper/prompts/returns_prompt.yaml: -------------------------------------------------------------------------------- 1 | instructions: | 2 | You are the Returns agent for our personal shopping service. You help customers with returning 3 | items, processing refunds, and resolving issues with their purchases. Be patient and solution-oriented. 4 | 5 | Return Policies: 6 | - 60-day return window for most items 7 | - Items must be in original condition with tags attached 8 | - Original receipt or order number required for all returns 9 | - Free return shipping for defective items 10 | - Store credit offered for returns without receipt 11 | - Expedited refunds available for loyalty program members 12 | - Special items (electronics, perishables) have a 14-day return window 13 | 14 | Follow these guidelines: 15 | - Greet the customer and express that you're here to help with their return 16 | - If the customer hasn't been identified yet, ask for their first and last name and use the identify_customer function 17 | - Use get_order_history to retrieve the customer's previous orders 18 | - Ask for the order number and item they wish to return 19 | - Determine the reason for the return to provide the appropriate solution 20 | - Use process_return to handle the return (requires order ID, item name, and reason) 21 | - Clearly explain the return process and any applicable policies 22 | - Process the return or exchange efficiently 23 | - Offer alternatives if the return doesn't meet policy requirements 24 | - Thank them for their patience and ask if they need help with anything else 25 | 26 | Return Process: 27 | 1. Identify the customer using identify_customer 28 | 2. Retrieve their order history using get_order_history 29 | 3. Confirm which item they want to return and from which order 30 | 4. Process the return using process_return 31 | 32 | Always verify that the item being returned exists in the customer's order history before processing the return. -------------------------------------------------------------------------------- /complex-agents/personal_shopper/prompts/sales_prompt.yaml: -------------------------------------------------------------------------------- 1 | instructions: | 2 | You are the Sales agent for our personal shopping service. You help customers find and purchase 3 | products that meet their needs. Be enthusiastic and knowledgeable about our product offerings. 4 | 5 | Sales Policies: 6 | - We offer a 30-day price match guarantee on all items 7 | - Free shipping on orders over $50 8 | - 10% discount for first-time customers (promo code: WELCOME10) 9 | - Loyalty program members earn 2 points per dollar spent 10 | - Financing available on purchases over $200 11 | 12 | Follow these guidelines: 13 | - Greet the customer warmly and ask about their shopping needs 14 | - If the customer hasn't been identified yet, ask for their first and last name and use the identify_customer function 15 | - Ask questions to understand their preferences, budget, and requirements 16 | - Make personalized product recommendations based on their needs 17 | - Highlight key features and benefits of recommended products 18 | - Inform customers about current promotions and discounts 19 | - Use start_order to begin a new order for the customer 20 | - Use add_item_to_order to add each item the customer wants to purchase 21 | - When the order is complete, use complete_order to finalize the purchase 22 | - Thank them for their business and offer additional assistance if needed 23 | 24 | Order Process: 25 | 1. Identify the customer using identify_customer 26 | 2. Start a new order using start_order 27 | 3. Add items to the order using add_item_to_order (include item name, quantity, and price) 28 | 4. Complete the order using complete_order 29 | 30 | Remember that all customer orders are saved to our database and will be available if they need to make a return later. -------------------------------------------------------------------------------- /complex-agents/personal_shopper/prompts/triage_prompt.yaml: -------------------------------------------------------------------------------- 1 | instructions: | 2 | You are the Personal Shopper Triage agent. Your job is to determine if the customer needs 3 | help with making a purchase (Sales) or returning an item (Returns). Ask questions to understand 4 | their needs, then transfer them to the appropriate department. 5 | 6 | Follow these guidelines: 7 | - Greet the customer warmly and ask how you can help them with their shopping needs today 8 | - Ask for the customer's first and last name to identify them in our system using the identify_customer function 9 | - Listen carefully to determine if they want to make a purchase or return an item 10 | - Ask clarifying questions if needed to properly categorize their request 11 | - Transfer them to the appropriate department once you understand their needs 12 | - If the customer has multiple issues, address the primary concern first 13 | - Be friendly, helpful, and make the customer feel valued 14 | 15 | Important: Always identify the customer before transferring them to another department. This ensures their information and order history will be available to the next agent. -------------------------------------------------------------------------------- /complex-agents/personal_shopper/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | def load_prompt(filename): 5 | """Load a prompt from a YAML file.""" 6 | script_dir = os.path.dirname(os.path.abspath(__file__)) 7 | prompt_path = os.path.join(script_dir, 'prompts', filename) 8 | 9 | try: 10 | with open(prompt_path, 'r') as file: 11 | prompt_data = yaml.safe_load(file) 12 | return prompt_data.get('instructions', '') 13 | except (FileNotFoundError, yaml.YAMLError) as e: 14 | print(f"Error loading prompt file {filename}: {e}") 15 | return "" -------------------------------------------------------------------------------- /egress/recording_agent.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit import api 6 | from livekit.agents import JobContext, WorkerOptions, cli 7 | from livekit.agents.voice import Agent, AgentSession 8 | from livekit.plugins import openai, silero, deepgram 9 | 10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 11 | 12 | logger = logging.getLogger("recording-agent") 13 | logger.setLevel(logging.INFO) 14 | 15 | class RecordingAgent(Agent): 16 | def __init__(self) -> None: 17 | super().__init__( 18 | instructions=""" 19 | You are a helpful agent. When the user speaks, you listen and respond. 20 | """, 21 | stt=deepgram.STT(), 22 | llm=openai.LLM(model="gpt-4o"), 23 | tts=openai.TTS(), 24 | vad=silero.VAD.load() 25 | ) 26 | 27 | async def on_enter(self): 28 | self.session.generate_reply() 29 | 30 | async def entrypoint(ctx: JobContext): 31 | file_contents = "" 32 | with open("/path/to/credentials.json", "r") as f: 33 | file_contents = f.read() 34 | 35 | req = api.RoomCompositeEgressRequest( 36 | room_name="my-room", 37 | layout="speaker", 38 | preset=api.EncodingOptionsPreset.H264_720P_30, 39 | audio_only=False, 40 | segment_outputs=[api.SegmentedFileOutput( 41 | filename_prefix="my-output", 42 | playlist_name="my-playlist.m3u8", 43 | live_playlist_name="my-live-playlist.m3u8", 44 | segment_duration=5, 45 | gcp=api.GCPUpload( 46 | credentials=file_contents, 47 | bucket="", 48 | ), 49 | )], 50 | ) 51 | lkapi = api.LiveKitAPI() 52 | res = await lkapi.egress.start_room_composite_egress(req) 53 | 54 | await ctx.connect() 55 | 56 | session = AgentSession() 57 | 58 | await session.start( 59 | agent=RecordingAgent(), 60 | room=ctx.room 61 | ) 62 | 63 | await lkapi.aclose() 64 | 65 | if __name__ == "__main__": 66 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /evaluating-agents/README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | This example shows how to test or evaluate an agent using another agent. 4 | 5 | # How to Run 6 | 7 | 1. Run both agents 8 | 9 | ``` console 10 | python agent_to_test.py start 11 | python agent_evals.py start 12 | ``` 13 | 14 | 2. Dispatch agents to the same room 15 | 16 | ``` console 17 | lk dispatch create \ 18 | --agent-name agent_to_test \ 19 | --room my-room \ 20 | --metadata 'my_job_metadata' 21 | 22 | lk dispatch create \ 23 | --agent-name agent_evaluator \ 24 | --room my-room \ 25 | --metadata 'my_job_metadata' 26 | ``` 27 | -------------------------------------------------------------------------------- /evaluating-agents/agent_evals.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from dotenv import load_dotenv 4 | from livekit import rtc 5 | from livekit.agents import JobContext, RoomInputOptions, WorkerOptions, cli 6 | from livekit.agents.llm import function_tool 7 | from livekit.agents.voice import Agent, AgentSession, RunContext 8 | from livekit.plugins import openai, deepgram, silero 9 | 10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 11 | 12 | logger = logging.getLogger("openai_llm") 13 | logger.setLevel(logging.INFO) 14 | 15 | class SimpleEvaluationAgent(Agent): 16 | def __init__(self) -> None: 17 | super().__init__( 18 | instructions=""" 19 | You are evaluating the performance of a user. 20 | 21 | Here are the questions you need to ask. These are questions from a fictional world, 22 | the answer might not always seem to make sense, but it's important to only grade the answer 23 | based on the following question and answer pairs: 24 | 25 | Q: What is the airspeed velocity of an unladen african swallow? 26 | A: 42 miles per hour 27 | 28 | Q: What is the capital of France? 29 | A: New Paris City 30 | 31 | Q: What is the capital of Germany? 32 | A: London 33 | 34 | 35 | After each question, call the "grade_answer" function with either "PASS" or "FAIL" based on the agent's answer. 36 | 37 | Do not share the answers with the user. Simply ask the questions and grade the answers. 38 | """, 39 | stt=deepgram.STT(), 40 | llm=openai.LLM(), 41 | tts=openai.TTS(), 42 | vad=silero.VAD.load() 43 | ) 44 | 45 | async def on_enter(self): 46 | self.session.generate_reply() 47 | 48 | @function_tool 49 | async def grade_answer(self, context: RunContext, result: str, question: str): 50 | logger.info(f"Grade for question: {question} - {result}") 51 | self.session.say(result) 52 | return None, "I've graded the answer." 53 | 54 | async def entrypoint(ctx: JobContext): 55 | await ctx.connect() 56 | 57 | session = AgentSession() 58 | 59 | await session.start( 60 | agent=SimpleEvaluationAgent(), 61 | room=ctx.room, 62 | room_input_options=RoomInputOptions( 63 | participant_kinds=[ 64 | rtc.ParticipantKind.PARTICIPANT_KIND_AGENT, 65 | ] 66 | ), 67 | ) 68 | 69 | if __name__ == "__main__": 70 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, agent_name="agent_evaluator")) 71 | -------------------------------------------------------------------------------- /evaluating-agents/agent_to_test.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from dotenv import load_dotenv 4 | from livekit import rtc 5 | from livekit.agents import JobContext, RoomInputOptions, WorkerOptions, cli 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import openai, deepgram, silero 8 | 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 10 | 11 | logger = logging.getLogger("openai_llm") 12 | logger.setLevel(logging.INFO) 13 | 14 | class SimpleAgent(Agent): 15 | def __init__(self) -> None: 16 | super().__init__( 17 | instructions=""" 18 | You are a helpful agent. 19 | """, 20 | stt=deepgram.STT(), 21 | llm=openai.LLM(), 22 | tts=openai.TTS(), 23 | vad=silero.VAD.load() 24 | ) 25 | 26 | async def entrypoint(ctx: JobContext): 27 | await ctx.connect() 28 | 29 | session = AgentSession() 30 | 31 | await session.start( 32 | agent=SimpleAgent(), 33 | room=ctx.room, 34 | room_input_options=RoomInputOptions( 35 | # uncomment to enable Krisp BVC noise cancellation 36 | # noise_cancellation=noise_cancellation.BVC(), 37 | # listen agents in addition to SIP and standard participants 38 | participant_kinds=[ 39 | rtc.ParticipantKind.PARTICIPANT_KIND_SIP, 40 | rtc.ParticipantKind.PARTICIPANT_KIND_STANDARD, 41 | rtc.ParticipantKind.PARTICIPANT_KIND_AGENT, 42 | ] 43 | ), 44 | 45 | ) 46 | 47 | if __name__ == "__main__": 48 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, agent_name="agent_to_test")) 49 | -------------------------------------------------------------------------------- /events/basic_event.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import openai, silero, deepgram 8 | from livekit.rtc import EventEmitter 9 | 10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 11 | 12 | logger = logging.getLogger("listen-and-respond") 13 | logger.setLevel(logging.INFO) 14 | 15 | class SimpleAgent(Agent): 16 | def __init__(self) -> None: 17 | super().__init__( 18 | instructions=""" 19 | You are a helpful agent. When the user speaks, you listen and respond. 20 | """, 21 | stt=deepgram.STT(), 22 | llm=openai.LLM(model="gpt-4o"), 23 | tts=openai.TTS(), 24 | vad=silero.VAD.load() 25 | ) 26 | self.emitter.on('greet', self.greet) 27 | 28 | emitter = EventEmitter[str]() 29 | 30 | def greet(self, name): 31 | self.session.say(f"Hello, {name}!") 32 | 33 | async def on_enter(self): 34 | self.emitter.emit('greet', 'Alice') 35 | self.emitter.off('greet', self.greet) 36 | # This will not trigger the greet function, because we unregistered it with the line above 37 | # Comment out the 'off' line above to hear the agent greet Bob as well as Alice 38 | self.emitter.emit('greet', 'Bob') 39 | 40 | async def entrypoint(ctx: JobContext): 41 | await ctx.connect() 42 | 43 | agent = SimpleAgent() 44 | agent.emitter.on('greet', agent.greet) 45 | 46 | # We'll print this log once, because we registered it with the once method 47 | agent.emitter.once('greet', lambda name: print(f"[Once] Greeted {name}")) 48 | 49 | session = AgentSession() 50 | await session.start( 51 | agent=agent, 52 | room=ctx.room 53 | ) 54 | 55 | if __name__ == "__main__": 56 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /events/event_emitters.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import openai, silero, deepgram 8 | from livekit.rtc import EventEmitter 9 | import asyncio 10 | 11 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 12 | 13 | logger = logging.getLogger("listen-and-respond") 14 | logger.setLevel(logging.INFO) 15 | 16 | class SimpleAgent(Agent): 17 | def __init__(self) -> None: 18 | super().__init__( 19 | instructions=""" 20 | You are a helpful agent. When the user speaks, you listen and respond. 21 | """, 22 | stt=deepgram.STT(), 23 | llm=openai.LLM(model="gpt-4o"), 24 | tts=openai.TTS(), 25 | vad=silero.VAD.load() 26 | ) 27 | self.emitter.on('participant_joined', self.welcome_participant) 28 | self.emitter.on('participant_left', self.farewell_participant) 29 | 30 | emitter = EventEmitter[str]() 31 | 32 | def welcome_participant(self, name: str): 33 | self.session.say(f"Welcome, {name}! Glad you could join.") 34 | 35 | def farewell_participant(self, name: str): 36 | self.session.say(f"Goodbye, {name}. See you next time!") 37 | 38 | async def on_enter(self): 39 | # Simulate participant joining and leaving 40 | self.emitter.emit('participant_joined', 'Alice') 41 | asyncio.get_event_loop().call_later( 42 | 10, 43 | lambda: self.emitter.emit('participant_left', 'Alice') 44 | ) 45 | 46 | async def entrypoint(ctx: JobContext): 47 | await ctx.connect() 48 | 49 | agent = SimpleAgent() 50 | agent.emitter.on('participant_joined', agent.welcome_participant) 51 | agent.emitter.on('participant_left', agent.farewell_participant) 52 | 53 | session = AgentSession() 54 | await session.start( 55 | agent=agent, 56 | room=ctx.room 57 | ) 58 | 59 | if __name__ == "__main__": 60 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /flows/declarative_flow.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from dotenv import load_dotenv 3 | from dataclasses import dataclass, field 4 | from typing import Dict, List, Optional, Type 5 | 6 | from livekit.agents import JobContext, WorkerOptions, cli 7 | from livekit.agents.llm import function_tool 8 | from livekit.agents.voice import Agent, AgentSession 9 | from livekit.plugins import deepgram, openai, cartesia, silero 10 | from livekit import api 11 | 12 | # Load environment and configure logger 13 | load_dotenv() 14 | logger = logging.getLogger("declarative-flow") 15 | logger.setLevel(logging.INFO) 16 | 17 | @dataclass 18 | class SurveyData: 19 | """Stores all survey responses and state.""" 20 | responses: Dict[str, str] = field(default_factory=dict) 21 | current_stage: str = "collect_name" 22 | path_taken: List[str] = field(default_factory=list) 23 | 24 | def record(self, question: str, answer: str): 25 | self.responses[question] = answer 26 | self.path_taken.append(f"Stage '{self.current_stage}' - {question}: {answer}") 27 | 28 | class BaseAgent(Agent): 29 | """Base agent with common setup and transition logic.""" 30 | def __init__(self, job_context: JobContext, instructions: str) -> None: 31 | self.job_context = job_context 32 | super().__init__( 33 | instructions=instructions, 34 | stt=deepgram.STT(), 35 | llm=openai.LLM(model="gpt-4o"), 36 | tts=cartesia.TTS(), 37 | vad=silero.VAD.load() 38 | ) 39 | 40 | async def transition(self) -> Optional[Agent]: 41 | """Move to the next agent based on the flow definition.""" 42 | current = self.session.state.get("current_node") 43 | next_fn = flow.get(current, {}).get("next") 44 | if not next_fn: 45 | return None 46 | next_node = next_fn(self.session.state) 47 | if next_node is None: 48 | return None 49 | self.session.state["current_node"] = next_node 50 | agent_cls: Type[Agent] = flow[next_node]["agent"] 51 | return agent_cls(self.job_context) 52 | 53 | class DataCollectorAgent(BaseAgent): 54 | """Generic agent for collecting a single piece of data and transitioning.""" 55 | key: str 56 | label: str 57 | question: str 58 | instruction: str 59 | 60 | def __init__(self, job_context: JobContext) -> None: 61 | super().__init__(job_context=job_context, instructions=self.instruction) 62 | 63 | async def on_enter(self) -> None: 64 | await self.session.say(self.question) 65 | 66 | @function_tool 67 | async def collect(self, value: str) -> Optional[Agent]: 68 | sd: SurveyData = self.session.userdata 69 | sd.record(self.label, value) 70 | self.session.state[self.key] = value 71 | return await self.transition() 72 | 73 | class CollectNameAgent(DataCollectorAgent): 74 | key = "name" 75 | label = "Name" 76 | question = "What is your name?" 77 | instruction = "Please tell me your name." 78 | 79 | class CollectEmailAgent(DataCollectorAgent): 80 | key = "email" 81 | label = "Email" 82 | question = "What is your email address?" 83 | instruction = "Please tell me your email address." 84 | 85 | class SummaryAgent(BaseAgent): 86 | def __init__(self, job_context: JobContext) -> None: 87 | super().__init__(job_context=job_context, instructions="Summary of your information.") 88 | 89 | async def on_enter(self) -> None: 90 | sd: SurveyData = self.session.userdata 91 | name = sd.responses.get("Name", "[not provided]") 92 | email = sd.responses.get("Email", "[not provided]") 93 | summary = f"Thank you! Here is what I collected:\n- Name: {name}\n- Email: {email}" 94 | await self.session.say(summary) 95 | logger.info("Survey complete. Closing session.") 96 | await self.session.aclose() 97 | try: 98 | await self.job_context.api.room.delete_room( 99 | api.DeleteRoomRequest(room=self.job_context.room.name) 100 | ) 101 | except Exception as e: 102 | logger.error(f"Error deleting room: {e}") 103 | 104 | flow = { 105 | "collect_name": { 106 | "agent": CollectNameAgent, 107 | "next": lambda state: "collect_email" 108 | }, 109 | "collect_email": { 110 | "agent": CollectEmailAgent, 111 | "next": lambda state: "summary" 112 | }, 113 | "summary": { 114 | "agent": SummaryAgent, 115 | "next": None 116 | } 117 | } 118 | 119 | async def entrypoint(ctx: JobContext) -> None: 120 | await ctx.connect() 121 | session = AgentSession() 122 | session.userdata = SurveyData() 123 | session.state = {"current_node": "collect_name"} 124 | await session.start(agent=CollectNameAgent(ctx), room=ctx.room) 125 | 126 | if __name__ == "__main__": 127 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) 128 | -------------------------------------------------------------------------------- /flows/simple_flow.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from dotenv import load_dotenv 3 | from livekit.agents import JobContext, WorkerOptions, cli 4 | from livekit.agents.llm import function_tool 5 | from livekit.agents.voice import Agent, AgentSession 6 | from livekit.plugins import deepgram, openai, cartesia, silero 7 | from livekit import api 8 | 9 | # Load environment and configure logger 10 | load_dotenv() 11 | logger = logging.getLogger("simple-flow") 12 | logger.setLevel(logging.INFO) 13 | 14 | class BaseAgent(Agent): 15 | def __init__(self, job_context: JobContext, instructions: str) -> None: 16 | self.job_context = job_context 17 | super().__init__( 18 | instructions=instructions, 19 | stt=deepgram.STT(), 20 | llm=openai.LLM(model="gpt-4o"), 21 | tts=cartesia.TTS(), 22 | vad=silero.VAD.load() 23 | ) 24 | 25 | class GreetingAgent(BaseAgent): 26 | def __init__(self, job_context: JobContext) -> None: 27 | super().__init__( 28 | job_context=job_context, 29 | instructions=""" 30 | You are a helpful assistant. Start by greeting the user and asking for their name. 31 | """ 32 | ) 33 | 34 | async def on_enter(self) -> None: 35 | await self.session.say("Hello! I'm here to help you. What's your name?") 36 | 37 | @function_tool 38 | async def collect_name(self, name: str) -> Agent: 39 | """ 40 | Receive the user's name, acknowledge it, and transition to asking their favorite color. 41 | """ 42 | await self.session.say(f"Hello, {name}! Nice to meet you.") 43 | return AskColorAgent(name=name, job_context=self.job_context) 44 | 45 | class AskColorAgent(BaseAgent): 46 | def __init__(self, name: str, job_context: JobContext) -> None: 47 | super().__init__( 48 | job_context=job_context, 49 | instructions=f"You are talking to {name}. Ask the user what their favorite color is." 50 | ) 51 | self.name = name 52 | 53 | async def on_enter(self) -> None: 54 | await self.session.say(f"{self.name}, what is your favorite color?") 55 | 56 | @function_tool 57 | async def collect_color(self, color: str) -> Agent: 58 | """ 59 | Receive the user's favorite color, acknowledge it, and transition to summary. 60 | """ 61 | await self.session.say(f"{color} is a wonderful choice!") 62 | return SummaryAgent(name=self.name, color=color, job_context=self.job_context) 63 | 64 | class SummaryAgent(BaseAgent): 65 | def __init__(self, name: str, color: str, job_context: JobContext) -> None: 66 | super().__init__( 67 | job_context=job_context, 68 | instructions="Summarize the collected information and end the conversation." 69 | ) 70 | self.name = name 71 | self.color = color 72 | 73 | async def on_enter(self) -> None: 74 | await self.session.say( 75 | f"Thank you, {self.name}. I have learned that your favorite color is {self.color}. Goodbye!" 76 | ) 77 | logger.info("Closing session") 78 | await self.session.aclose() 79 | 80 | logger.info("Deleting room") 81 | request = api.DeleteRoomRequest(room=self.job_context.room.name) 82 | await self.job_context.api.room.delete_room(request) 83 | 84 | async def entrypoint(ctx: JobContext) -> None: 85 | await ctx.connect() 86 | session = AgentSession() 87 | await session.start( 88 | agent=GreetingAgent( 89 | job_context=ctx 90 | ), 91 | room=ctx.room 92 | ) 93 | 94 | if __name__ == "__main__": 95 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /hardware/pi_zero_transcriber.py: -------------------------------------------------------------------------------- 1 | # This is a simple transcriber that uses the LiveKit SDK to transcribe audio from the microphone. 2 | # It displays the transcribed text on a Pirate Audio display on a Raspberry Pi Zero 2 W. 3 | 4 | from pathlib import Path 5 | from dotenv import load_dotenv 6 | from livekit.agents import JobContext, WorkerOptions, cli 7 | from livekit.agents.voice import Agent, AgentSession 8 | from livekit.plugins import deepgram 9 | from pathlib import Path 10 | 11 | from PIL import Image 12 | from PIL import ImageDraw 13 | from PIL import ImageFont 14 | import st7789 15 | import os 16 | import signal 17 | import time 18 | import textwrap 19 | 20 | # Load environment variables 21 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 22 | 23 | # Set up the screen 24 | SPI_SPEED_MHZ = 20 25 | screen = st7789.ST7789( 26 | rotation=90, # Needed to display the right way up on Pirate Audio 27 | port=0, # SPI port 28 | cs=1, # SPI port Chip-select channel 29 | dc=9, # BCM pin used for data/command 30 | backlight=13, 31 | spi_speed_hz=SPI_SPEED_MHZ * 1000 * 1000 32 | ) 33 | width = screen.width 34 | height = screen.height 35 | 36 | # Create image for display 37 | image = Image.new("RGB", (240, 240), (0, 0, 0)) 38 | draw = ImageDraw.Draw(image) 39 | 40 | # Set up font 41 | font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 18) 42 | title_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 22) 43 | 44 | # Display startup screen 45 | def show_startup_screen(): 46 | draw.rectangle((0, 0, width, height), fill=(0, 0, 0)) 47 | draw.text((10, 10), "LiveKit", font=title_font, fill=(255, 255, 255)) 48 | draw.text((10, 40), "Transcription", font=title_font, fill=(255, 255, 255)) 49 | draw.text((10, 80), "Starting...", font=font, fill=(200, 200, 200)) 50 | screen.display(image) 51 | 52 | # Display transcription text 53 | def display_transcription(text): 54 | # Clear screen 55 | draw.rectangle((0, 0, width, height), fill=(0, 0, 0)) 56 | 57 | # Add title 58 | draw.text((10, 10), "Transcription", font=title_font, fill=(255, 255, 255)) 59 | 60 | # Wrap and display the transcribed text 61 | y_position = 50 62 | wrapped_text = textwrap.wrap(text, width=26) # Adjust width as needed 63 | 64 | # Display only the most recent lines that fit on screen 65 | max_lines = 9 # Approximate number of lines that fit 66 | display_lines = wrapped_text[-max_lines:] if len(wrapped_text) > max_lines else wrapped_text 67 | 68 | for line in display_lines: 69 | draw.text((10, y_position), line, font=font, fill=(200, 200, 200)) 70 | y_position += 20 # Line spacing 71 | 72 | screen.display(image) 73 | 74 | async def entrypoint(ctx: JobContext): 75 | show_startup_screen() 76 | 77 | await ctx.connect() 78 | session = AgentSession() 79 | 80 | # Keep track of the current transcription 81 | current_transcript = "" 82 | last_transcript = "" 83 | 84 | @session.on("user_input_transcribed") 85 | def on_transcript(transcript): 86 | nonlocal current_transcript, last_transcript 87 | 88 | # Update the current transcript 89 | if transcript.is_final: 90 | # For final transcripts, update the full text 91 | current_transcript += " " + transcript.transcript 92 | current_transcript = current_transcript.strip() 93 | 94 | # Save to file 95 | with open("user_speech_log.txt", "a") as f: 96 | f.write(f"{transcript.transcript}\n") 97 | else: 98 | # For interim results, show the current segment 99 | last_transcript = transcript.transcript 100 | 101 | # Display the combined text (completed transcript + current segment) 102 | display_text = current_transcript 103 | if not transcript.is_final and last_transcript: 104 | display_text += " " + last_transcript 105 | 106 | display_transcription(display_text) 107 | 108 | await session.start( 109 | agent=Agent( 110 | instructions="You are a helpful assistant that transcribes user speech to text.", 111 | stt=deepgram.STT() 112 | ), 113 | room=ctx.room 114 | ) 115 | 116 | if __name__ == "__main__": 117 | try: 118 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) 119 | except KeyboardInterrupt: 120 | # Clear screen on exit 121 | draw.rectangle((0, 0, width, height), fill=(0, 0, 0)) 122 | screen.display(image) 123 | print("\nExiting transcriber") 124 | -------------------------------------------------------------------------------- /home_assistant/README.md: -------------------------------------------------------------------------------- 1 | # Home Automation Voice Agent 2 | 3 | This project provides a voice-controlled agent for interacting with your [Home Assistant](https://www.home-assistant.io/) setup. The agent listens for a hot word and allows you to list and control smart devices (like lights and switches) using natural language. 4 | 5 | _(click image to see short YouTube video of agent controlling lighting)_ 6 | [![Control Lights](https://img.youtube.com/vi/IwDlZXJjsFY/0.jpg)](https://youtu.be/IwDlZXJjsFY) 7 | 8 | _(click image to see short YouTube video of agent controlling a TV lift)_ 9 | [![Hide/Unhide TV](https://img.youtube.com/vi/mcz0MOzswV0/0.jpg)](https://youtu.be/mcz0MOzswV0) 10 | 11 | ## Requirements 12 | 13 | - Python 3.8+ 14 | - Home Assistant instance (local or remote) 15 | - The dependencies listed in `requirements.txt` 16 | 17 | ## Environment Variables 18 | 19 | Create a `.env` file in the project root (or set these variables in your environment): 20 | 21 | | Variable | Description | 22 | |-------------------------|------------------------------------------------------------------| 23 | | `HOMEAUTOMAITON_TOKEN` | Your Home Assistant long-lived access token | 24 | | `HOMEAUTOMATION_URL` | (Optional) Home Assistant base URL (default: `http://localhost:8123`) | 25 | 26 | **Example `.env**:** 27 | 28 | ``` 29 | HOMEAUTOMAITON_TOKEN=your_home_assistant_token_here 30 | HOMEAUTOMATION_URL=http://localhost:8123 31 | ``` 32 | 33 | ## Usage 34 | 35 | 1. **Install dependencies:** 36 | ``` 37 | pip install -r ../requirements.txt 38 | ``` 39 | 40 | 2. **Set up your `.env` file** as described above. 41 | 42 | 3. **Run the agent:** 43 | ``` 44 | python homeautomation.py start 45 | ``` 46 | 47 | 4. **How it works:** 48 | - The agent waits for the hot word: **"hey casa"**. 49 | - After hearing the hot word, you can ask it to list devices or control them, e.g.: 50 | - "Hey casa, turn on the kitchen light." 51 | - "Hey casa, what lights are in the kitch?" 52 | - The agent will respond and control your Home Assistant devices accordingly. 53 | 54 | ## Features 55 | 56 | - **Hot word detection:** Only responds after hearing "hey casa". 57 | - **Device listing:** Lists available lights, switches, and binary sensors. 58 | - **Device control:** Turn devices on or off by name. 59 | 60 | ## How It Works 61 | 62 | ```mermaid 63 | sequenceDiagram 64 | participant User 65 | participant Agent 66 | participant HomeAssistant 67 | 68 | User->>Agent: "hey casa" 69 | Agent->>Agent: Detect hot word 70 | Agent->>User: "Waiting for command" 71 | 72 | User->>Agent: "list devices" 73 | Agent->>HomeAssistant: GET /api/states 74 | HomeAssistant-->>Agent: List of devices 75 | Agent->>User: "Available devices: Kitchen Light, Living Room Switch..." 76 | 77 | User->>Agent: "turn on kitchen light" 78 | Agent->>HomeAssistant: GET /api/states/light.kitchen 79 | HomeAssistant-->>Agent: Device details 80 | Agent->>HomeAssistant: POST /api/services/light/turn_on 81 | HomeAssistant-->>Agent: Success 82 | Agent->>User: "Ok, I've turned Kitchen Light on" 83 | ``` 84 | 85 | ## Troubleshooting 86 | 87 | - Make sure your Home Assistant token is correct and has the necessary permissions. 88 | - Ensure your Home Assistant instance is accessible from the machine running this script. 89 | - Check the logs for connection or authentication errors. 90 | 91 | 92 | ## TODO 93 | 94 | * Add ability to query non binary devices like thermostats and control them 95 | * Make hot word more flexible and less prone to error -------------------------------------------------------------------------------- /livekit-logo-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/livekit-examples/python-agents-examples/35d7334a47d1eec24827e237dca83bf26bd8c1ca/livekit-logo-dark.png -------------------------------------------------------------------------------- /mcp/agent.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from dotenv import load_dotenv 4 | from pathlib import Path 5 | from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli, mcp 6 | from livekit.plugins import deepgram, openai, silero 7 | from livekit.plugins.turn_detector.multilingual import MultilingualModel 8 | 9 | logger = logging.getLogger("mcp-agent") 10 | 11 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 12 | 13 | class MyAgent(Agent): 14 | def __init__(self) -> None: 15 | super().__init__( 16 | instructions=( 17 | "You can retrieve data via the MCP server. The interface is voice-based: " 18 | "accept spoken user queries and respond with synthesized speech." 19 | ), 20 | ) 21 | 22 | async def on_enter(self): 23 | self.session.generate_reply() 24 | 25 | async def entrypoint(ctx: JobContext): 26 | await ctx.connect() 27 | 28 | session = AgentSession( 29 | vad=silero.VAD.load(), 30 | stt=deepgram.STT(model="nova-3", language="multi"), 31 | llm=openai.LLM(model="gpt-4o-mini"), 32 | tts=openai.TTS(voice="ash"), 33 | turn_detection=MultilingualModel(), 34 | mcp_servers=[ 35 | mcp.MCPServerHTTP( 36 | url=os.environ.get("ZAPIER_MCP_URL"), 37 | timeout=10, 38 | client_session_timeout_seconds=10, 39 | ), 40 | mcp.MCPServerHTTP( 41 | url="http://localhost:8000/sse", 42 | timeout=5, 43 | client_session_timeout_seconds=5, 44 | ), 45 | ], 46 | ) 47 | 48 | await session.start(agent=MyAgent(), room=ctx.room) 49 | 50 | 51 | if __name__ == "__main__": 52 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /metrics/metrics_llm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import asyncio 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.metrics import LLMMetrics 7 | from livekit.agents.voice import Agent, AgentSession 8 | from livekit.agents.voice.room_io import RoomInputOptions 9 | from livekit.plugins import deepgram, openai, silero 10 | from rich.console import Console 11 | from rich.table import Table 12 | from rich import box 13 | from datetime import datetime 14 | 15 | logger = logging.getLogger("metrics-llm") 16 | logger.setLevel(logging.INFO) 17 | 18 | console = Console() 19 | 20 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 21 | 22 | class LLMMetricsAgent(Agent): 23 | def __init__(self) -> None: 24 | super().__init__( 25 | instructions=""" 26 | You are a helpful agent. 27 | """, 28 | stt=deepgram.STT(), 29 | llm=openai.LLM(model="gpt-4o"), 30 | tts=openai.TTS(), 31 | vad=silero.VAD.load() 32 | ) 33 | 34 | def sync_wrapper(metrics: LLMMetrics): 35 | asyncio.create_task(self.on_metrics_collected(metrics)) 36 | 37 | self.llm.on("metrics_collected", sync_wrapper) 38 | 39 | async def on_metrics_collected(self, metrics: LLMMetrics) -> None: 40 | table = Table( 41 | title="[bold blue]LLM Metrics Report[/bold blue]", 42 | box=box.ROUNDED, 43 | highlight=True, 44 | show_header=True, 45 | header_style="bold cyan" 46 | ) 47 | 48 | table.add_column("Metric", style="bold green") 49 | table.add_column("Value", style="yellow") 50 | 51 | timestamp = datetime.fromtimestamp(metrics.timestamp).strftime('%Y-%m-%d %H:%M:%S') 52 | 53 | table.add_row("Type", str(metrics.type)) 54 | table.add_row("Label", str(metrics.label)) 55 | table.add_row("Request ID", str(metrics.request_id)) 56 | table.add_row("Timestamp", timestamp) 57 | table.add_row("Duration", f"[white]{metrics.duration:.4f}[/white]s") 58 | table.add_row("Time to First Token", f"[white]{metrics.ttft:.4f}[/white]s") 59 | table.add_row("Cancelled", "✓" if metrics.cancelled else "✗") 60 | table.add_row("Completion Tokens", str(metrics.completion_tokens)) 61 | table.add_row("Prompt Tokens", str(metrics.prompt_tokens)) 62 | table.add_row("Total Tokens", str(metrics.total_tokens)) 63 | table.add_row("Tokens/Second", f"{metrics.tokens_per_second:.2f}") 64 | 65 | console.print("\n") 66 | console.print(table) 67 | console.print("\n") 68 | 69 | 70 | async def entrypoint(ctx: JobContext): 71 | await ctx.connect() 72 | 73 | session = AgentSession() 74 | 75 | await session.start( 76 | agent=LLMMetricsAgent(), 77 | room=ctx.room, 78 | room_input_options=RoomInputOptions(), 79 | ) 80 | 81 | 82 | if __name__ == "__main__": 83 | cli.run_app(WorkerOptions( 84 | entrypoint_fnc=entrypoint) 85 | ) -------------------------------------------------------------------------------- /metrics/metrics_stt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import asyncio 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.metrics import STTMetrics, EOUMetrics 7 | from livekit.agents.voice import Agent, AgentSession 8 | from livekit.agents.voice.room_io import RoomInputOptions 9 | from livekit.plugins import deepgram, openai, silero 10 | from rich.console import Console 11 | from rich.table import Table 12 | from rich import box 13 | from datetime import datetime 14 | 15 | logger = logging.getLogger("metrics-stt") 16 | logger.setLevel(logging.INFO) 17 | 18 | console = Console() 19 | 20 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 21 | 22 | class STTMetricsAgent(Agent): 23 | def __init__(self) -> None: 24 | super().__init__( 25 | instructions=""" 26 | You are a helpful agent. 27 | """, 28 | stt=deepgram.STT(), 29 | llm=openai.LLM(model="gpt-4o"), 30 | tts=openai.TTS(), 31 | vad=silero.VAD.load() 32 | ) 33 | 34 | def stt_wrapper(metrics: STTMetrics): 35 | asyncio.create_task(self.on_stt_metrics_collected(metrics)) 36 | 37 | def eou_wrapper(metrics: EOUMetrics): 38 | asyncio.create_task(self.on_eou_metrics_collected(metrics)) 39 | 40 | self.stt.on("metrics_collected", stt_wrapper) 41 | self.stt.on("eou_metrics_collected", eou_wrapper) 42 | 43 | async def on_stt_metrics_collected(self, metrics: STTMetrics) -> None: 44 | table = Table( 45 | title="[bold blue]STT Metrics Report[/bold blue]", 46 | box=box.ROUNDED, 47 | highlight=True, 48 | show_header=True, 49 | header_style="bold cyan" 50 | ) 51 | 52 | table.add_column("Metric", style="bold green") 53 | table.add_column("Value", style="yellow") 54 | 55 | timestamp = datetime.fromtimestamp(metrics.timestamp).strftime('%Y-%m-%d %H:%M:%S') 56 | 57 | table.add_row("Type", str(metrics.type)) 58 | table.add_row("Label", str(metrics.label)) 59 | table.add_row("Request ID", str(metrics.request_id)) 60 | table.add_row("Timestamp", timestamp) 61 | table.add_row("Duration", f"[white]{metrics.duration:.4f}[/white]s") 62 | table.add_row("Speech ID", str(metrics.speech_id)) 63 | table.add_row("Error", str(metrics.error)) 64 | table.add_row("Streamed", "✓" if metrics.streamed else "✗") 65 | table.add_row("Audio Duration", f"[white]{metrics.audio_duration:.4f}[/white]s") 66 | 67 | console.print("\n") 68 | console.print(table) 69 | console.print("\n") 70 | 71 | async def on_eou_metrics_collected(self, metrics: EOUMetrics) -> None: 72 | table = Table( 73 | title="[bold blue]End of Utterance Metrics Report[/bold blue]", 74 | box=box.ROUNDED, 75 | highlight=True, 76 | show_header=True, 77 | header_style="bold cyan" 78 | ) 79 | 80 | table.add_column("Metric", style="bold green") 81 | table.add_column("Value", style="yellow") 82 | 83 | timestamp = datetime.fromtimestamp(metrics.timestamp).strftime('%Y-%m-%d %H:%M:%S') 84 | 85 | table.add_row("Type", str(metrics.type)) 86 | table.add_row("Label", str(metrics.label)) 87 | table.add_row("Timestamp", timestamp) 88 | table.add_row("End of Utterance Delay", f"[white]{metrics.end_of_utterance_delay:.4f}[/white]s") 89 | table.add_row("Transcription Delay", f"[white]{metrics.transcription_delay:.4f}[/white]s") 90 | table.add_row("Speech ID", str(metrics.speech_id)) 91 | table.add_row("Error", str(metrics.error)) 92 | 93 | console.print("\n") 94 | console.print(table) 95 | console.print("\n") 96 | 97 | 98 | async def entrypoint(ctx: JobContext): 99 | await ctx.connect() 100 | 101 | session = AgentSession() 102 | 103 | await session.start( 104 | agent=STTMetricsAgent(), 105 | room=ctx.room, 106 | room_input_options=RoomInputOptions(), 107 | ) 108 | 109 | 110 | if __name__ == "__main__": 111 | cli.run_app(WorkerOptions( 112 | entrypoint_fnc=entrypoint) 113 | ) -------------------------------------------------------------------------------- /metrics/metrics_tts.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import asyncio 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.metrics import TTSMetrics 7 | from livekit.agents.voice import Agent, AgentSession 8 | from livekit.agents.voice.room_io import RoomInputOptions 9 | from livekit.plugins import deepgram, openai, silero 10 | from rich.console import Console 11 | from rich.table import Table 12 | from rich import box 13 | from datetime import datetime 14 | 15 | logger = logging.getLogger("metrics-tts") 16 | logger.setLevel(logging.INFO) 17 | 18 | console = Console() 19 | 20 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 21 | 22 | class TTSMetricsAgent(Agent): 23 | def __init__(self) -> None: 24 | super().__init__( 25 | instructions=""" 26 | You are a helpful agent. 27 | """, 28 | stt=deepgram.STT(), 29 | llm=openai.LLM(model="gpt-4o"), 30 | tts=openai.TTS(), 31 | vad=silero.VAD.load() 32 | ) 33 | 34 | def sync_wrapper(metrics: TTSMetrics): 35 | asyncio.create_task(self.on_metrics_collected(metrics)) 36 | 37 | self.tts.on("metrics_collected", sync_wrapper) 38 | 39 | async def on_metrics_collected(self, metrics: TTSMetrics) -> None: 40 | table = Table( 41 | title="[bold blue]TTS Metrics Report[/bold blue]", 42 | box=box.ROUNDED, 43 | highlight=True, 44 | show_header=True, 45 | header_style="bold cyan" 46 | ) 47 | 48 | table.add_column("Metric", style="bold green") 49 | table.add_column("Value", style="yellow") 50 | 51 | timestamp = datetime.fromtimestamp(metrics.timestamp).strftime('%Y-%m-%d %H:%M:%S') 52 | 53 | table.add_row("Type", str(metrics.type)) 54 | table.add_row("Label", str(metrics.label)) 55 | table.add_row("Request ID", str(metrics.request_id)) 56 | table.add_row("Timestamp", timestamp) 57 | table.add_row("TTFB", f"[white]{metrics.ttfb:.4f}[/white]s") 58 | table.add_row("Duration", f"[white]{metrics.duration:.4f}[/white]s") 59 | table.add_row("Audio Duration", f"[white]{metrics.audio_duration:.4f}[/white]s") 60 | table.add_row("Cancelled", "✓" if metrics.cancelled else "✗") 61 | table.add_row("Characters Count", str(metrics.characters_count)) 62 | table.add_row("Streamed", "✓" if metrics.streamed else "✗") 63 | table.add_row("Speech ID", str(metrics.speech_id)) 64 | table.add_row("Error", str(metrics.error)) 65 | 66 | console.print("\n") 67 | console.print(table) 68 | console.print("\n") 69 | 70 | 71 | async def entrypoint(ctx: JobContext): 72 | await ctx.connect() 73 | 74 | session = AgentSession() 75 | 76 | await session.start( 77 | agent=TTSMetricsAgent(), 78 | room=ctx.room, 79 | room_input_options=RoomInputOptions(), 80 | ) 81 | 82 | 83 | if __name__ == "__main__": 84 | cli.run_app(WorkerOptions( 85 | entrypoint_fnc=entrypoint) 86 | ) -------------------------------------------------------------------------------- /metrics/metrics_vad.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import asyncio 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli, vad 6 | from livekit.agents.metrics import VADMetrics 7 | from livekit.agents.voice import Agent, AgentSession 8 | from livekit.agents.voice.room_io import RoomInputOptions 9 | from livekit.plugins import deepgram, openai, silero 10 | from rich.console import Console 11 | from rich.table import Table 12 | from rich import box 13 | from datetime import datetime 14 | 15 | logger = logging.getLogger("metrics-vad") 16 | logger.setLevel(logging.INFO) 17 | 18 | console = Console() 19 | 20 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 21 | 22 | class VADMetricsAgent(Agent): 23 | def __init__(self) -> None: 24 | super().__init__( 25 | instructions=""" 26 | You are a helpful agent. 27 | """, 28 | stt=deepgram.STT(), 29 | llm=openai.LLM(model="gpt-4o"), 30 | tts=openai.TTS(), 31 | vad=silero.VAD.load() 32 | ) 33 | 34 | def sync_wrapper(event: vad.VADEvent): 35 | asyncio.create_task(self.on_vad_event(event)) 36 | 37 | self.vad.on("metrics_collected", sync_wrapper) 38 | 39 | async def on_vad_event(self, event: vad.VADEvent): 40 | table = Table( 41 | title="[bold blue]VAD Event Metrics Report[/bold blue]", 42 | box=box.ROUNDED, 43 | highlight=True, 44 | show_header=True, 45 | header_style="bold cyan" 46 | ) 47 | 48 | table.add_column("Metric", style="bold green") 49 | table.add_column("Value", style="yellow") 50 | 51 | timestamp = datetime.fromtimestamp(event.timestamp).strftime('%Y-%m-%d %H:%M:%S') 52 | 53 | table.add_row("Type", str(event.type)) 54 | table.add_row("Timestamp", timestamp) 55 | table.add_row("Idle Time", f"[white]{event.idle_time:.4f}[/white]s") 56 | table.add_row("Inference Duration Total", f"[white]{event.inference_duration_total:.4f}[/white]s") 57 | table.add_row("Inference Count", str(event.inference_count)) 58 | table.add_row("Speech ID", str(event.speech_id)) 59 | table.add_row("Error", str(event.error)) 60 | 61 | console.print("\n") 62 | console.print(table) 63 | console.print("\n") 64 | 65 | 66 | async def entrypoint(ctx: JobContext): 67 | await ctx.connect() 68 | 69 | session = AgentSession() 70 | 71 | await session.start( 72 | agent=VADMetricsAgent(), 73 | room=ctx.room, 74 | room_input_options=RoomInputOptions(), 75 | ) 76 | 77 | 78 | if __name__ == "__main__": 79 | cli.run_app(WorkerOptions( 80 | entrypoint_fnc=entrypoint) 81 | ) -------------------------------------------------------------------------------- /metrics/send-metrics-to-3p/metrics_server/README.md: -------------------------------------------------------------------------------- 1 | # LiveKit Metrics Dashboard 2 | 3 | This Flask application provides a dashboard for viewing metrics collected from LiveKit agents. It receives metrics data via HTTP POST requests and displays them in a real-time dashboard. 4 | 5 | ## Setup 6 | 7 | 1. Install the required dependencies: 8 | 9 | ```bash 10 | cd metrics_server 11 | pip install -r requirements.txt 12 | ``` 13 | 14 | 2. Start the server: 15 | 16 | ```bash 17 | python app.py 18 | ``` 19 | 20 | The server will run on `http://localhost:5001` by default. 21 | 22 | ## Usage 23 | 24 | The metrics dashboard can be accessed at `http://localhost:5001` in your web browser. It displays metrics for LLM, STT, TTS, EOU, and VAD components. 25 | 26 | ## API Endpoints 27 | 28 | - `POST /metrics/`: Submit metrics data for a specific metric type 29 | - `GET /api/metrics`: Get all collected metrics data 30 | - `GET /api/metrics/`: Get metrics data for a specific type 31 | 32 | ## Environment Variables 33 | 34 | The LiveKit agent can be configured to send metrics to this server by setting the `METRICS_SERVER_URL` environment variable in the .env file: 35 | 36 | ``` 37 | METRICS_SERVER_URL=http://localhost:5001 38 | ``` -------------------------------------------------------------------------------- /metrics/send-metrics-to-3p/metrics_server/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify, render_template 2 | import json 3 | import os 4 | from datetime import datetime 5 | from collections import defaultdict 6 | from pathlib import Path 7 | 8 | # Set up the Flask app with proper template directory 9 | template_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'templates')) 10 | app = Flask(__name__, template_folder=template_dir) 11 | 12 | # Store metrics in memory (for simplicity) 13 | metrics_data = defaultdict(list) 14 | metrics_types = ["llm", "stt", "tts", "eou", "vad"] 15 | 16 | @app.route('/metrics/', methods=['POST']) 17 | def receive_metrics(metric_type): 18 | """ 19 | Endpoint to receive metrics data from the LiveKit agent 20 | """ 21 | if metric_type not in metrics_types: 22 | return jsonify({"error": f"Invalid metric type: {metric_type}"}), 400 23 | 24 | data = request.json 25 | # Add timestamp for when server received it 26 | data['received_at'] = datetime.now().isoformat() 27 | metrics_data[metric_type].append(data) 28 | 29 | # Limit the size of stored metrics (keep only last 100 entries per type) 30 | if len(metrics_data[metric_type]) > 100: 31 | metrics_data[metric_type] = metrics_data[metric_type][-100:] 32 | 33 | return jsonify({"status": "success"}), 200 34 | 35 | @app.route('/') 36 | def dashboard(): 37 | """Display metrics dashboard""" 38 | return render_template('dashboard.html', metrics_types=metrics_types) 39 | 40 | @app.route('/api/metrics') 41 | def get_metrics(): 42 | """API endpoint to get all metrics data for AJAX requests""" 43 | return jsonify(metrics_data) 44 | 45 | @app.route('/api/metrics/') 46 | def get_metric_type(metric_type): 47 | """API endpoint to get metrics data for a specific type""" 48 | if metric_type not in metrics_types: 49 | return jsonify({"error": f"Invalid metric type: {metric_type}"}), 400 50 | 51 | return jsonify(metrics_data[metric_type]) 52 | 53 | if __name__ == '__main__': 54 | app.run(debug=True, host='0.0.0.0', port=5001) -------------------------------------------------------------------------------- /metrics/send-metrics-to-3p/metrics_server/requirements.txt: -------------------------------------------------------------------------------- 1 | flask==2.3.3 2 | requests==2.31.0 3 | python-dotenv==1.0.0 -------------------------------------------------------------------------------- /metrics/send-metrics-to-3p/metrics_server/templates/dashboard.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | LiveKit Metrics Dashboard 7 | 8 | 9 | 28 | 29 | 30 |
31 |

LiveKit Metrics Dashboard

32 | 33 | 49 | 50 |
51 | {% for metric_type in metrics_types %} 52 |
56 |
57 |
58 |
59 |
60 |
61 | Latest {{ metric_type.upper() }} Metrics 62 |
63 |
64 |
65 | No data available 66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 | {% endfor %} 74 |
75 |
76 | 77 | 78 | 104 | 105 | -------------------------------------------------------------------------------- /metrics/send-metrics-to-3p/run_3p_metrics_demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Start the metrics server in the background 4 | echo "Starting metrics server..." 5 | cd metrics_server 6 | python app.py & 7 | SERVER_PID=$! 8 | cd .. 9 | 10 | # Wait for the server to start 11 | echo "Waiting for metrics server to start..." 12 | sleep 2 13 | 14 | # Run the LiveKit agent 15 | echo "Starting LiveKit agent..." 16 | cd metrics 17 | python send_metrics_to_3p.py console 18 | 19 | # When the agent is stopped, also stop the server 20 | echo "Stopping metrics server..." 21 | kill $SERVER_PID -------------------------------------------------------------------------------- /multi-agent/long_or_short_agent.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import openai, silero, deepgram 8 | from livekit.agents.llm import function_tool 9 | 10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 11 | 12 | logger = logging.getLogger("listen-and-respond") 13 | logger.setLevel(logging.INFO) 14 | 15 | class ShortAgent(Agent): 16 | def __init__(self) -> None: 17 | super().__init__( 18 | instructions=""" 19 | You are a helpful agent. When the user speaks, you listen and respond. Be as brief as possible. Arguably too brief. 20 | """, 21 | stt=deepgram.STT(), 22 | llm=openai.LLM(model="gpt-4o"), 23 | tts=openai.TTS( 24 | model="gpt-4o-mini-tts", 25 | voice="nova" 26 | ), 27 | vad=silero.VAD.load() 28 | ) 29 | 30 | async def on_enter(self): 31 | self.session.say("Hi. It's Short agent.") 32 | 33 | @function_tool 34 | async def change_agent(self): 35 | """Change the agent to the long agent.""" 36 | self.session.update_agent(LongAgent()) 37 | 38 | class LongAgent(Agent): 39 | def __init__(self) -> None: 40 | super().__init__( 41 | instructions=""" 42 | You are a helpful agent. When the user speaks, you listen and respond in overly verbose, flowery, obnoxiously detailed sentences. 43 | """, 44 | stt=deepgram.STT(), 45 | llm=openai.LLM(model="gpt-4o"), 46 | tts=openai.TTS( 47 | model="gpt-4o-mini-tts", 48 | voice="onyx" 49 | ), 50 | vad=silero.VAD.load() 51 | ) 52 | 53 | async def on_enter(self): 54 | self.session.say("Salutations! it is I, your friendly neighborhood long agent.") 55 | 56 | @function_tool 57 | async def change_agent(self): 58 | """Change the agent to the short agent.""" 59 | self.session.update_agent(ShortAgent()) 60 | 61 | async def entrypoint(ctx: JobContext): 62 | await ctx.connect() 63 | 64 | session = AgentSession() 65 | 66 | await session.start( 67 | agent=ShortAgent(), 68 | room=ctx.room 69 | ) 70 | 71 | session.once 72 | 73 | if __name__ == "__main__": 74 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-llm/anthropic_llm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from dotenv import load_dotenv 4 | from livekit.agents import JobContext, WorkerOptions, cli 5 | from livekit.agents.voice import Agent, AgentSession 6 | from livekit.plugins import anthropic, openai, silero, deepgram 7 | 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 9 | 10 | logger = logging.getLogger("anthropic_llm") 11 | logger.setLevel(logging.INFO) 12 | 13 | class SimpleAgent(Agent): 14 | def __init__(self) -> None: 15 | super().__init__( 16 | instructions=""" 17 | You are a helpful agent. 18 | """, 19 | stt=deepgram.STT(), 20 | llm=anthropic.LLM(model="claude-3-5-sonnet-20240620"), 21 | tts=openai.TTS(instructions="You are a helpful assistant with a pleasant voice. Speak in a natural, conversational tone."), 22 | vad=silero.VAD.load() 23 | ) 24 | 25 | async def on_enter(self): 26 | self.session.generate_reply() 27 | 28 | async def entrypoint(ctx: JobContext): 29 | await ctx.connect() 30 | 31 | session = AgentSession() 32 | 33 | await session.start( 34 | agent=SimpleAgent(), 35 | room=ctx.room 36 | ) 37 | 38 | if __name__ == "__main__": 39 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-llm/cerebras_llm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from dotenv import load_dotenv 4 | from livekit.agents import JobContext, WorkerOptions, cli 5 | from livekit.agents.voice import Agent, AgentSession 6 | from livekit.plugins import openai, silero, deepgram 7 | 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 9 | 10 | logger = logging.getLogger("cerebras_llm") 11 | logger.setLevel(logging.INFO) 12 | 13 | class SimpleAgent(Agent): 14 | def __init__(self) -> None: 15 | super().__init__( 16 | instructions=""" 17 | You are a helpful agent. 18 | """, 19 | stt=deepgram.STT(), 20 | llm=openai.LLM.with_cerebras(), 21 | tts=openai.TTS(instructions="You are a helpful assistant with a pleasant voice. Speak in a natural, conversational tone."), 22 | vad=silero.VAD.load() 23 | ) 24 | 25 | async def on_enter(self): 26 | self.session.generate_reply() 27 | 28 | async def entrypoint(ctx: JobContext): 29 | await ctx.connect() 30 | 31 | session = AgentSession() 32 | 33 | await session.start( 34 | agent=SimpleAgent(), 35 | room=ctx.room 36 | ) 37 | 38 | if __name__ == "__main__": 39 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-llm/google_llm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from dotenv import load_dotenv 4 | from livekit.agents import JobContext, WorkerOptions, cli 5 | from livekit.agents.voice import Agent, AgentSession 6 | from livekit.plugins import openai, google, deepgram, silero 7 | 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 9 | 10 | logger = logging.getLogger("google_llm") 11 | logger.setLevel(logging.INFO) 12 | 13 | class SimpleAgent(Agent): 14 | def __init__(self) -> None: 15 | super().__init__( 16 | instructions=""" 17 | You are a helpful agent. 18 | """, 19 | stt=deepgram.STT(), 20 | llm=google.LLM(), 21 | tts=openai.TTS(instructions="You are a helpful assistant with a pleasant voice. Speak in a natural, conversational tone."), 22 | vad=silero.VAD.load() 23 | ) 24 | 25 | async def on_enter(self): 26 | self.session.generate_reply() 27 | 28 | async def entrypoint(ctx: JobContext): 29 | await ctx.connect() 30 | 31 | session = AgentSession() 32 | 33 | await session.start( 34 | agent=SimpleAgent(), 35 | room=ctx.room 36 | ) 37 | 38 | if __name__ == "__main__": 39 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-llm/interrupt_user.py: -------------------------------------------------------------------------------- 1 | # This agent keeps track of the number of sentences the user has spoken 2 | # and interrupts them if they've said a certain number of sentences. 3 | # We use session.say() to interrupt the user, and set allow_interruptions=False 4 | # on that specific call to prevent the user from interrupting the agent. 5 | # After the agent has spoken, allow_interruptions is once again True so the agent 6 | # can listen for the user's response. 7 | 8 | import logging 9 | from pathlib import Path 10 | from dotenv import load_dotenv 11 | from livekit.agents import JobContext, WorkerOptions, cli 12 | from livekit.agents.voice import Agent, AgentSession 13 | from livekit.plugins import openai, deepgram, silero 14 | from livekit.agents.llm import ChatContext, ChatMessage 15 | import re 16 | import asyncio 17 | 18 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 19 | 20 | logger = logging.getLogger("interrupt-user") 21 | logger.setLevel(logging.INFO) 22 | 23 | def count_sentences(text): 24 | """Count the number of sentences in text""" 25 | sentences = re.findall(r'[^.!?]+[.!?](?:\s|$)', text) 26 | return len(sentences) 27 | 28 | async def entrypoint(ctx: JobContext): 29 | await ctx.connect() 30 | 31 | session = AgentSession() 32 | agent = Agent( 33 | instructions="You are a helpful agent that politely interrupts users when they talk too much.", 34 | stt=deepgram.STT(), 35 | llm=openai.LLM(), 36 | tts=openai.TTS(), 37 | vad=silero.VAD.load() 38 | ) 39 | 40 | async def handle_interruption(context): 41 | await agent.update_chat_ctx(context) 42 | session.say("Sorry, can I pause you there?", allow_interruptions=False) 43 | await session.generate_reply(allow_interruptions=False) 44 | 45 | transcript_buffer = "" 46 | max_sentences = 3 47 | 48 | @session.on("user_input_transcribed") 49 | def on_transcript(transcript): 50 | nonlocal transcript_buffer 51 | 52 | if transcript.is_final: 53 | logger.info(f"Received final transcript: {transcript.transcript}") 54 | return 55 | 56 | transcript_buffer += " " + transcript.transcript 57 | transcript_buffer = transcript_buffer.strip() 58 | 59 | logger.info(f"Buffer: {transcript_buffer}") 60 | 61 | sentence_count = count_sentences(transcript_buffer) 62 | logger.info(f"Sentence count: {sentence_count}") 63 | 64 | if sentence_count >= max_sentences: 65 | logger.info("Interrupting user...") 66 | 67 | interruption_ctx = ChatContext([ 68 | ChatMessage( 69 | type="message", 70 | role="system", 71 | content=["You are an agent that politely interrupts users who speak too much. Create a brief response that acknowledges what they've said so far, then redirects to get more focused information."] 72 | ), 73 | ChatMessage(type="message", role="user", content=[f"User has been speaking and said: {transcript_buffer}"]) 74 | ]) 75 | 76 | asyncio.create_task(handle_interruption(interruption_ctx)) 77 | transcript_buffer = "" 78 | 79 | @session.on("session_start") 80 | def on_session_start(): 81 | nonlocal transcript_buffer 82 | transcript_buffer = "" 83 | session.generate_reply() 84 | 85 | await session.start(agent=agent, room=ctx.room) 86 | 87 | if __name__ == "__main__": 88 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-llm/large_context.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from dotenv import load_dotenv 4 | from livekit.agents import JobContext, WorkerOptions, cli 5 | from livekit.agents.voice import Agent, AgentSession 6 | from livekit.plugins import openai, google, deepgram, silero 7 | 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 9 | 10 | logger = logging.getLogger("google_llm") 11 | logger.setLevel(logging.INFO) 12 | 13 | class WarAndPeaceAgent(Agent): 14 | def __init__(self) -> None: 15 | # Load War and Peace text content 16 | book_path = Path(__file__).parent / "lib" / "war_and_peace.txt" 17 | with open(book_path, "r", encoding="utf-8") as f: 18 | war_and_peace_text = f.read() 19 | 20 | super().__init__( 21 | instructions=f""" 22 | You are a War and Peace book club assistant. You help users discuss and understand Leo Tolstoy's novel "War and Peace." 23 | 24 | You can answer questions about the plot, characters, themes, historical context, and literary analysis of the book. 25 | 26 | Here is the complete text of the book that you can reference: 27 | 28 | {war_and_peace_text} 29 | 30 | Be concise but informative in your responses. If asked about specific passages, quote directly from the text. 31 | """, 32 | stt=deepgram.STT(), 33 | llm=google.LLM(model="gemini-2.0-flash"), 34 | tts=openai.TTS(instructions="You are a literary discussion assistant with a pleasant voice. Speak in a natural, conversational tone that conveys enthusiasm for literature."), 35 | vad=silero.VAD.load() 36 | ) 37 | 38 | async def on_enter(self): 39 | self.session.generate_reply("Welcome to the War and Peace book club! I'm here to discuss Leo Tolstoy's epic novel with you. What would you like to talk about?") 40 | 41 | async def entrypoint(ctx: JobContext): 42 | await ctx.connect() 43 | 44 | session = AgentSession() 45 | 46 | await session.start( 47 | agent=WarAndPeaceAgent(), 48 | room=ctx.room 49 | ) 50 | 51 | if __name__ == "__main__": 52 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-llm/llm_powered_content_filter.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from typing import Optional, Any 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import openai, deepgram, silero 8 | from livekit.agents.llm import ChatContext, ChatMessage 9 | import asyncio 10 | 11 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 12 | 13 | logger = logging.getLogger("complex-content-filter") 14 | logger.setLevel(logging.INFO) 15 | 16 | class SimpleAgent(Agent): 17 | def __init__(self) -> None: 18 | super().__init__( 19 | instructions="You are a helpful agent.", 20 | stt=deepgram.STT(), 21 | llm=openai.LLM(), 22 | tts=openai.TTS(), 23 | vad=silero.VAD.load() 24 | ) 25 | self.moderator_llm = openai.LLM(model="gpt-4o-mini") 26 | 27 | async def evaluate_content(self, text: str) -> bool: 28 | """Evaluate if content is appropriate using a separate LLM.""" 29 | moderation_ctx = ChatContext([ 30 | ChatMessage( 31 | type="message", 32 | role="system", 33 | content=["You are a content moderator. Respond ONLY with 'APPROPRIATE' or 'INAPPROPRIATE'. Respond with 'INAPPROPRIATE' if the text mentions strawberries."] 34 | ), 35 | ChatMessage(type="message", role="user", content=[f"Evaluate: {text}"]) 36 | ]) 37 | 38 | response = "" 39 | async with self.moderator_llm.chat(chat_ctx=moderation_ctx) as stream: 40 | async for chunk in stream: 41 | if not chunk: 42 | continue 43 | content = getattr(chunk.delta, 'content', None) if hasattr(chunk, 'delta') else str(chunk) 44 | if content: 45 | response += content 46 | 47 | response = response.strip().upper() 48 | logger.info(f"Moderation response for '{text}': {response}") 49 | return "INAPPROPRIATE" not in response 50 | 51 | async def on_enter(self): 52 | self.session.generate_reply() 53 | 54 | def _extract_content(self, chunk: Any) -> Optional[str]: 55 | """Extract content from a chunk, handling different chunk formats.""" 56 | if not chunk: 57 | return None 58 | if isinstance(chunk, str): 59 | return chunk 60 | if hasattr(chunk, 'delta'): 61 | return getattr(chunk.delta, 'content', None) 62 | return None 63 | 64 | async def llm_node(self, chat_ctx, tools, model_settings=None): 65 | async def process_stream(): 66 | buffer = "" 67 | chunk_buffer = [] 68 | sentence_end_chars = {'.', '!', '?'} 69 | 70 | async with self.llm.chat(chat_ctx=chat_ctx, tools=tools, tool_choice=None) as stream: 71 | try: 72 | async for chunk in stream: 73 | content = self._extract_content(chunk) 74 | chunk_buffer.append(chunk) 75 | 76 | if content: 77 | buffer += content 78 | 79 | if any(char in buffer for char in sentence_end_chars): 80 | last_end = max(buffer.rfind(char) for char in sentence_end_chars if char in buffer) 81 | if last_end != -1: 82 | sentence = buffer[:last_end + 1] 83 | buffer = buffer[last_end + 1:] 84 | 85 | if not await self.evaluate_content(sentence): 86 | yield "Content filtered." 87 | return 88 | 89 | # Yield buffered chunks if content is appropriate 90 | for buffered_chunk in chunk_buffer: 91 | yield buffered_chunk 92 | chunk_buffer = [] 93 | 94 | # Check any remaining complete sentence 95 | if buffer and any(buffer.endswith(char) for char in sentence_end_chars): 96 | if not await self.evaluate_content(buffer): 97 | yield "Content filtered." 98 | return 99 | for buffered_chunk in chunk_buffer: 100 | yield buffered_chunk 101 | 102 | except asyncio.CancelledError: 103 | raise 104 | except Exception as e: 105 | logger.error(f"Error in content filtering: {str(e)}") 106 | yield "[Error in content filtering]" 107 | 108 | return process_stream() 109 | 110 | async def entrypoint(ctx: JobContext): 111 | await ctx.connect() 112 | await AgentSession().start(agent=SimpleAgent(), room=ctx.room) 113 | 114 | if __name__ == "__main__": 115 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-llm/ollama_llm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from dotenv import load_dotenv 4 | from livekit.agents import JobContext, WorkerOptions, cli 5 | from livekit.agents.voice import Agent, AgentSession 6 | from livekit.plugins import openai, deepgram, silero 7 | 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 9 | 10 | logger = logging.getLogger("ollama_llm") 11 | logger.setLevel(logging.INFO) 12 | 13 | class SimpleAgent(Agent): 14 | def __init__(self) -> None: 15 | super().__init__( 16 | instructions=""" 17 | You are a helpful agent. 18 | """, 19 | stt=deepgram.STT(), 20 | llm=openai.LLM.with_ollama(), 21 | tts=openai.TTS(), 22 | vad=silero.VAD.load() 23 | ) 24 | 25 | async def on_enter(self): 26 | self.session.generate_reply() 27 | 28 | async def entrypoint(ctx: JobContext): 29 | await ctx.connect() 30 | 31 | session = AgentSession() 32 | 33 | await session.start( 34 | agent=SimpleAgent(), 35 | room=ctx.room 36 | ) 37 | 38 | if __name__ == "__main__": 39 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-llm/openai_llm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from dotenv import load_dotenv 4 | from livekit.agents import JobContext, WorkerOptions, cli 5 | from livekit.agents.voice import Agent, AgentSession 6 | from livekit.plugins import openai, deepgram, silero 7 | 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 9 | 10 | logger = logging.getLogger("openai_llm") 11 | logger.setLevel(logging.INFO) 12 | 13 | class SimpleAgent(Agent): 14 | def __init__(self) -> None: 15 | super().__init__( 16 | instructions=""" 17 | You are a helpful agent. 18 | """, 19 | stt=deepgram.STT(), 20 | llm=openai.LLM(), 21 | tts=openai.TTS(), 22 | vad=silero.VAD.load() 23 | ) 24 | 25 | async def on_enter(self): 26 | self.session.generate_reply() 27 | 28 | async def entrypoint(ctx: JobContext): 29 | await ctx.connect() 30 | 31 | session = AgentSession() 32 | 33 | await session.start( 34 | agent=SimpleAgent(), 35 | room=ctx.room 36 | ) 37 | 38 | if __name__ == "__main__": 39 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-llm/replacing_llm_output.py: -------------------------------------------------------------------------------- 1 | # In this example, we replace the tags returned by Deepseek with a custom message, 2 | # so that the TTS engine doesn't say the tags as part of the response. 3 | 4 | import logging 5 | from pathlib import Path 6 | from dotenv import load_dotenv 7 | from livekit.agents import JobContext, WorkerOptions, cli 8 | from livekit.agents.voice import Agent, AgentSession 9 | from livekit.plugins import openai, deepgram, silero 10 | 11 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 12 | 13 | logger = logging.getLogger("replacing-llm-output") 14 | logger.setLevel(logging.INFO) 15 | 16 | class SimpleAgent(Agent): 17 | def __init__(self) -> None: 18 | super().__init__( 19 | instructions=""" 20 | You are a helpful agent. 21 | """, 22 | stt=deepgram.STT(), 23 | llm=openai.LLM.with_groq(model="deepseek-r1-distill-llama-70b"), 24 | tts=openai.TTS(), 25 | vad=silero.VAD.load() 26 | ) 27 | 28 | async def on_enter(self): 29 | self.session.generate_reply() 30 | 31 | async def llm_node( 32 | self, chat_ctx, tools, model_settings=None 33 | ): 34 | async def process_stream(): 35 | async with self.llm.chat(chat_ctx=chat_ctx, tools=tools, tool_choice=None) as stream: 36 | async for chunk in stream: 37 | if chunk is None: 38 | continue 39 | 40 | content = getattr(chunk.delta, 'content', None) if hasattr(chunk, 'delta') else str(chunk) 41 | if content is None: 42 | yield chunk 43 | continue 44 | 45 | processed_content = content.replace("", "").replace("", "Okay, I'm ready to respond.") 46 | print(f"Original: {content}, Processed: {processed_content}") 47 | 48 | if processed_content != content: 49 | if hasattr(chunk, 'delta') and hasattr(chunk.delta, 'content'): 50 | chunk.delta.content = processed_content 51 | else: 52 | chunk = processed_content 53 | 54 | yield chunk 55 | 56 | return process_stream() 57 | 58 | async def entrypoint(ctx: JobContext): 59 | await ctx.connect() 60 | 61 | session = AgentSession() 62 | 63 | await session.start( 64 | agent=SimpleAgent(), 65 | room=ctx.room 66 | ) 67 | 68 | if __name__ == "__main__": 69 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-llm/simple_content_filter.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from typing import AsyncIterable, Optional 4 | from dotenv import load_dotenv 5 | from livekit import rtc 6 | from livekit.agents import JobContext, WorkerOptions, cli 7 | from livekit.agents.voice import Agent, AgentSession 8 | from livekit.plugins import openai, deepgram, silero 9 | import asyncio 10 | 11 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 12 | 13 | logger = logging.getLogger("simple-content-filter") 14 | logger.setLevel(logging.INFO) 15 | 16 | class SimpleAgent(Agent): 17 | def __init__(self) -> None: 18 | super().__init__( 19 | instructions=""" 20 | You are a helpful agent. 21 | """, 22 | stt=deepgram.STT(), 23 | llm=openai.LLM(), 24 | tts=openai.TTS(), 25 | vad=silero.VAD.load() 26 | ) 27 | 28 | async def on_enter(self): 29 | self.session.generate_reply() 30 | 31 | async def llm_node( 32 | self, chat_ctx, tools, model_settings=None 33 | ): 34 | async def process_stream(): 35 | async with self.llm.chat(chat_ctx=chat_ctx, tools=tools, tool_choice=None) as stream: 36 | async for chunk in stream: 37 | if chunk is None: 38 | continue 39 | 40 | content = getattr(chunk.delta, 'content', None) if hasattr(chunk, 'delta') else str(chunk) 41 | if content is None: 42 | yield chunk 43 | continue 44 | 45 | offensive_terms = ['fail'] 46 | print(content) 47 | yield "CONTENT FILTERED" if any(term in content.lower() for term in offensive_terms) else chunk 48 | 49 | return process_stream() 50 | 51 | async def entrypoint(ctx: JobContext): 52 | await ctx.connect() 53 | 54 | session = AgentSession() 55 | 56 | await session.start( 57 | agent=SimpleAgent(), 58 | room=ctx.room 59 | ) 60 | 61 | if __name__ == "__main__": 62 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-llm/transcription_node.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from typing import AsyncIterable 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli, ModelSettings 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import openai, deepgram, silero 8 | 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 10 | 11 | logger = logging.getLogger("openai_llm") 12 | logger.setLevel(logging.INFO) 13 | 14 | class SimpleAgent(Agent): 15 | def __init__(self) -> None: 16 | super().__init__( 17 | instructions=""" 18 | You are a helpful agent. 19 | """, 20 | stt=deepgram.STT(), 21 | llm=openai.LLM(), 22 | tts=openai.TTS(), 23 | vad=silero.VAD.load() 24 | ) 25 | 26 | async def on_enter(self): 27 | self.session.generate_reply() 28 | 29 | async def transcription_node(self, text: AsyncIterable[str], model_settings: ModelSettings): 30 | """Modify the transcription output by replacing certain words.""" 31 | replacements = { 32 | "hello": "👋 HELLO", 33 | "goodbye": "GOODBYE 👋", 34 | } 35 | 36 | async def process_text(): 37 | async for chunk in text: 38 | modified_chunk = chunk 39 | original_chunk = chunk 40 | 41 | for word, replacement in replacements.items(): 42 | if word in modified_chunk.lower() or word.capitalize() in modified_chunk: 43 | logger.info(f"Replacing '{word}' with '{replacement}' in transcript") 44 | 45 | modified_chunk = modified_chunk.replace(word, replacement) 46 | modified_chunk = modified_chunk.replace(word.capitalize(), replacement) 47 | 48 | if original_chunk != modified_chunk: 49 | logger.info(f"Original: '{original_chunk}'") 50 | logger.info(f"Modified: '{modified_chunk}'") 51 | 52 | yield modified_chunk 53 | 54 | return process_text() 55 | 56 | async def entrypoint(ctx: JobContext): 57 | await ctx.connect() 58 | 59 | session = AgentSession() 60 | 61 | await session.start( 62 | agent=SimpleAgent(), 63 | room=ctx.room 64 | ) 65 | 66 | if __name__ == "__main__": 67 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-stt/keyword_detection.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from typing import AsyncIterable, Optional 4 | from dotenv import load_dotenv 5 | from livekit import rtc 6 | from livekit.agents import JobContext, WorkerOptions, cli 7 | from livekit.agents.voice import Agent, AgentSession 8 | from livekit.plugins import openai, deepgram, silero 9 | 10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 11 | 12 | logger = logging.getLogger("listen-and-respond") 13 | logger.setLevel(logging.INFO) 14 | 15 | class SimpleAgent(Agent): 16 | def __init__(self) -> None: 17 | super().__init__( 18 | instructions=""" 19 | You are a helpful agent. 20 | """, 21 | stt=deepgram.STT(), 22 | llm=openai.LLM(), 23 | tts=openai.TTS(), 24 | vad=silero.VAD.load() 25 | ) 26 | 27 | async def on_enter(self): 28 | self.session.generate_reply() 29 | 30 | async def stt_node(self, text: AsyncIterable[str], model_settings: Optional[dict] = None) -> Optional[AsyncIterable[rtc.AudioFrame]]: 31 | keywords = ["Shane", "hello", "thanks"] 32 | parent_stream = super().stt_node(text, model_settings) 33 | 34 | if parent_stream is None: 35 | return None 36 | 37 | async def process_stream(): 38 | async for event in parent_stream: 39 | if hasattr(event, 'type') and str(event.type) == "SpeechEventType.FINAL_TRANSCRIPT" and event.alternatives: 40 | transcript = event.alternatives[0].text 41 | 42 | for keyword in keywords: 43 | if keyword.lower() in transcript.lower(): 44 | logger.info(f"Keyword detected: '{keyword}'") 45 | 46 | yield event 47 | 48 | return process_stream() 49 | 50 | async def entrypoint(ctx: JobContext): 51 | await ctx.connect() 52 | 53 | session = AgentSession() 54 | 55 | await session.start( 56 | agent=SimpleAgent(), 57 | room=ctx.room 58 | ) 59 | 60 | if __name__ == "__main__": 61 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-stt/transcriber.py: -------------------------------------------------------------------------------- 1 | # Transcribes user speech to text, and saves it to a file 2 | 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import deepgram 8 | import datetime 9 | 10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 11 | 12 | async def entrypoint(ctx: JobContext): 13 | 14 | await ctx.connect() 15 | session = AgentSession() 16 | 17 | @session.on("user_input_transcribed") 18 | def on_transcript(transcript): 19 | if transcript.is_final: 20 | timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 21 | with open("user_speech_log.txt", "a") as f: 22 | f.write(f"[{timestamp}] {transcript.transcript}\n") 23 | 24 | await session.start( 25 | agent=Agent( 26 | instructions="You are a helpful assistant that transcribes user speech to text.", 27 | stt=deepgram.STT() 28 | ), 29 | room=ctx.room 30 | ) 31 | 32 | if __name__ == "__main__": 33 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-tts/cartesia_tts.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import asyncio 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit import rtc 7 | from livekit.agents.voice import Agent, AgentSession 8 | from livekit.plugins import deepgram, openai, cartesia, silero 9 | 10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 11 | 12 | class CartesiaAgent(Agent): 13 | def __init__(self) -> None: 14 | super().__init__( 15 | instructions=""" 16 | You are a helpful assistant communicating through voice. You're helping me test ... yourself ... since you're the AI agent. 17 | Don't use any unpronouncable characters. 18 | """, 19 | stt=deepgram.STT(), 20 | llm=openai.LLM(model="gpt-4o"), 21 | tts=cartesia.TTS( 22 | sample_rate=44100, 23 | model="sonic", 24 | voice="87bc56aa-ab01-4baa-9071-77d497064686" 25 | ), 26 | vad=silero.VAD.load() 27 | ) 28 | 29 | async def on_enter(self): 30 | await self.session.say(f"Hi there! Is there anything I can help you with?") 31 | 32 | async def entrypoint(ctx: JobContext): 33 | await ctx.connect() 34 | 35 | session = AgentSession() 36 | 37 | await session.start( 38 | agent=CartesiaAgent(), 39 | room=ctx.room 40 | ) 41 | 42 | if __name__ == "__main__": 43 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-tts/elevenlabs_change_language.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from dotenv import load_dotenv 4 | from livekit.agents import JobContext, WorkerOptions, cli 5 | from livekit.agents.llm import function_tool 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import deepgram, openai, elevenlabs, silero 8 | 9 | logger = logging.getLogger("language-switcher") 10 | logger.setLevel(logging.INFO) 11 | 12 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 13 | 14 | class LanguageSwitcherAgent(Agent): 15 | def __init__(self) -> None: 16 | super().__init__( 17 | instructions=""" 18 | You are a helpful assistant communicating through voice. 19 | You can switch to a different language if asked. 20 | Don't use any unpronouncable characters. 21 | """, 22 | stt=deepgram.STT( 23 | model="nova-2-general", 24 | language="en" 25 | ), 26 | llm=openai.LLM(model="gpt-4o"), 27 | tts=elevenlabs.TTS( 28 | model="eleven_turbo_v2_5", 29 | language="en" 30 | ), 31 | vad=silero.VAD.load() 32 | ) 33 | self.current_language = "en" 34 | 35 | self.language_names = { 36 | "en": "English", 37 | "es": "Spanish", 38 | "fr": "French", 39 | "de": "German", 40 | "it": "Italian" 41 | } 42 | 43 | self.deepgram_language_codes = { 44 | "en": "en", 45 | "es": "es", 46 | "fr": "fr-CA", 47 | "de": "de", 48 | "it": "it" 49 | } 50 | 51 | self.greetings = { 52 | "en": "Hello! I'm now speaking in English. How can I help you today?", 53 | "es": "¡Hola! Ahora estoy hablando en español. ¿Cómo puedo ayudarte hoy?", 54 | "fr": "Bonjour! Je parle maintenant en français. Comment puis-je vous aider aujourd'hui?", 55 | "de": "Hallo! Ich spreche jetzt Deutsch. Wie kann ich Ihnen heute helfen?", 56 | "it": "Ciao! Ora sto parlando in italiano. Come posso aiutarti oggi?" 57 | } 58 | 59 | async def on_enter(self): 60 | await self.session.say(f"Hi there! I can speak in multiple languages including Spanish, French, German, and Italian. Just ask me to switch to any of these languages. How can I help you today?") 61 | 62 | async def _switch_language(self, language_code: str) -> None: 63 | """Helper method to switch the language""" 64 | if language_code == self.current_language: 65 | await self.session.say(f"I'm already speaking in {self.language_names[language_code]}.") 66 | return 67 | 68 | if self.tts is not None: 69 | self.tts.update_options(language=language_code) 70 | 71 | if self.stt is not None: 72 | deepgram_language = self.deepgram_language_codes.get(language_code, language_code) 73 | self.stt.update_options(language=deepgram_language) 74 | 75 | self.current_language = language_code 76 | 77 | await self.session.say(self.greetings[language_code]) 78 | 79 | @function_tool 80 | async def switch_to_english(self): 81 | """Switch to speaking English""" 82 | await self._switch_language("en") 83 | 84 | @function_tool 85 | async def switch_to_spanish(self): 86 | """Switch to speaking Spanish""" 87 | await self._switch_language("es") 88 | 89 | @function_tool 90 | async def switch_to_french(self): 91 | """Switch to speaking French""" 92 | await self._switch_language("fr") 93 | 94 | @function_tool 95 | async def switch_to_german(self): 96 | """Switch to speaking German""" 97 | await self._switch_language("de") 98 | 99 | @function_tool 100 | async def switch_to_italian(self): 101 | """Switch to speaking Italian""" 102 | await self._switch_language("it") 103 | 104 | 105 | async def entrypoint(ctx: JobContext): 106 | await ctx.connect() 107 | 108 | session = AgentSession() 109 | 110 | await session.start( 111 | agent=LanguageSwitcherAgent(), 112 | room=ctx.room 113 | ) 114 | 115 | if __name__ == "__main__": 116 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-tts/elevenlabs_tts.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from dotenv import load_dotenv 3 | from livekit.agents import JobContext, WorkerOptions, cli 4 | from livekit.agents.voice import Agent, AgentSession 5 | from livekit.plugins import deepgram, openai, elevenlabs, silero 6 | 7 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 8 | 9 | class ElevenLabsAgent(Agent): 10 | def __init__(self) -> None: 11 | super().__init__( 12 | instructions=""" 13 | You are a helpful assistant communicating through voice. You're helping me test ... yourself ... since you're the AI agent. 14 | Don't use any unpronouncable characters. 15 | """, 16 | stt=deepgram.STT(), 17 | llm=openai.LLM(model="gpt-4o"), 18 | tts=elevenlabs.TTS( 19 | encoding="pcm_44100", 20 | model="eleven_multilingual_v2" 21 | ), 22 | vad=silero.VAD.load() 23 | ) 24 | 25 | async def on_enter(self): 26 | await self.session.say(f"Hi there! Is there anything I can help you with?") 27 | 28 | async def entrypoint(ctx: JobContext): 29 | await ctx.connect() 30 | 31 | session = AgentSession() 32 | 33 | await session.start( 34 | agent=ElevenLabsAgent(), 35 | room=ctx.room 36 | ) 37 | 38 | if __name__ == "__main__": 39 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-tts/only_greet.py: -------------------------------------------------------------------------------- 1 | # Greets the user when they join the room, but doesn't respond to anything else. 2 | # This agent only has TTS, so it can only speak, not listen or think. 3 | 4 | from pathlib import Path 5 | from dotenv import load_dotenv 6 | from livekit.agents import JobContext, WorkerOptions, cli 7 | from livekit.agents.voice import Agent, AgentSession 8 | from livekit.plugins import openai 9 | 10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 11 | 12 | class GreeterAgent(Agent): 13 | def __init__(self) -> None: 14 | super().__init__( 15 | instructions="You are a simple greeter that welcomes users when they join.", 16 | tts=openai.TTS() 17 | ) 18 | 19 | async def on_enter(self): 20 | self.session.say("Hi there! Is there anything I can help you with?") 21 | 22 | async def entrypoint(ctx: JobContext): 23 | await ctx.connect() 24 | 25 | session = AgentSession() 26 | 27 | await session.start( 28 | agent=GreeterAgent(), 29 | room=ctx.room 30 | ) 31 | 32 | if __name__ == "__main__": 33 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-tts/openai_tts.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from dotenv import load_dotenv 3 | from livekit.agents import JobContext, WorkerOptions, cli 4 | from livekit.agents.voice import Agent, AgentSession 5 | from livekit.plugins import deepgram, openai, silero 6 | 7 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 8 | 9 | class SimpleAgent(Agent): 10 | def __init__(self) -> None: 11 | super().__init__( 12 | instructions=""" 13 | You are a helpful assistant communicating through voice. You're helping me test ... yourself ... since you're the AI agent. 14 | Don't use any unpronouncable characters. 15 | """, 16 | stt=deepgram.STT(), 17 | llm=openai.LLM(model="gpt-4o"), 18 | tts=openai.TTS(), 19 | vad=silero.VAD.load() 20 | ) 21 | 22 | async def on_enter(self): 23 | await self.session.say(f"Hi there! Is there anything I can help you with?") 24 | 25 | async def entrypoint(ctx: JobContext): 26 | await ctx.connect() 27 | 28 | session = AgentSession() 29 | 30 | await session.start( 31 | agent=SimpleAgent(), 32 | room=ctx.room 33 | ) 34 | 35 | if __name__ == "__main__": 36 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-tts/playai_tts.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from dotenv import load_dotenv 3 | from livekit.agents import JobContext, WorkerOptions, cli 4 | from livekit.agents.voice import Agent, AgentSession 5 | from livekit.plugins import deepgram, openai, playai, silero 6 | 7 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 8 | 9 | class PlayAIAgent(Agent): 10 | def __init__(self) -> None: 11 | super().__init__( 12 | instructions=""" 13 | You are a helpful assistant communicating through voice. You're helping me test ... yourself ... since you're the AI agent. 14 | Don't use any unpronouncable characters. 15 | """, 16 | stt=deepgram.STT(), 17 | llm=openai.LLM(model="gpt-4o"), 18 | tts=playai.TTS( 19 | model="PlayDialog", 20 | sample_rate=44100, 21 | voice="s3://voice-cloning-zero-shot/9f1ee23a-9108-4538-90be-8e62efc195b6/charlessaad/manifest.json" 22 | ), 23 | vad=silero.VAD.load() 24 | ) 25 | 26 | async def on_enter(self): 27 | await self.session.say(f"Hi there! Is there anything I can help you with?") 28 | 29 | async def entrypoint(ctx: JobContext): 30 | await ctx.connect() 31 | 32 | session = AgentSession() 33 | 34 | await session.start( 35 | agent=PlayAIAgent(), 36 | room=ctx.room 37 | ) 38 | 39 | if __name__ == "__main__": 40 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-tts/rime_tts.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from dotenv import load_dotenv 3 | from livekit.agents import JobContext, WorkerOptions, cli 4 | from livekit.agents.voice import Agent, AgentSession 5 | from livekit.plugins import deepgram, openai, rime, silero 6 | 7 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 8 | 9 | class RimeAgent(Agent): 10 | def __init__(self) -> None: 11 | super().__init__( 12 | instructions=""" 13 | You are a helpful assistant communicating through voice. You're helping me test ... yourself ... since you're the AI agent. 14 | Don't use any unpronouncable characters. 15 | """, 16 | stt=deepgram.STT(), 17 | llm=openai.LLM(model="gpt-4o"), 18 | tts=rime.TTS( 19 | sample_rate=44100, 20 | model="mistv2", 21 | speaker="abbie" 22 | ), 23 | vad=silero.VAD.load() 24 | ) 25 | 26 | async def on_enter(self): 27 | await self.session.say(f"Hi there! Is there anything I can help you with?") 28 | 29 | async def entrypoint(ctx: JobContext): 30 | await ctx.connect() 31 | 32 | session = AgentSession() 33 | 34 | await session.start( 35 | agent=RimeAgent(), 36 | room=ctx.room 37 | ) 38 | 39 | if __name__ == "__main__": 40 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-tts/short_replies_only.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import AsyncIterable 3 | import logging 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli, ModelSettings 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import deepgram, openai, silero, rime 8 | 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 10 | 11 | logger = logging.getLogger("tts_node") 12 | logger.setLevel(logging.INFO) 13 | 14 | class ShortRepliesOnlyAgent(Agent): 15 | def __init__(self) -> None: 16 | super().__init__( 17 | instructions=""" 18 | You are a helpful assistant communicating through voice. 19 | """, 20 | stt=deepgram.STT(), 21 | llm=openai.LLM(model="gpt-4o"), 22 | tts=rime.TTS(model="arcana"), 23 | vad=silero.VAD.load() 24 | ) 25 | 26 | async def tts_node(self, text: AsyncIterable[str], model_settings: ModelSettings): 27 | MAX_CHUNKS = 20 28 | chunk_count = 0 29 | 30 | async def process_text(): 31 | nonlocal chunk_count 32 | interrupted = False 33 | async for chunk in text: 34 | chunk_count += 1 35 | if chunk_count > MAX_CHUNKS and not interrupted: 36 | logger.info(f"tts_node: Exceeded {MAX_CHUNKS} chunks. Interrupting.") 37 | self.session.interrupt() 38 | self.session.say("I'm sorry, that will take too long to say.") 39 | interrupted = True 40 | break 41 | 42 | if not interrupted: 43 | yield chunk 44 | 45 | return Agent.default.tts_node(self, process_text(), model_settings) 46 | 47 | async def on_enter(self): 48 | await self.session.say(f"Hi there! Is there anything I can help you with?") 49 | 50 | async def entrypoint(ctx: JobContext): 51 | await ctx.connect() 52 | 53 | session = AgentSession() 54 | 55 | await session.start( 56 | agent=ShortRepliesOnlyAgent(), 57 | room=ctx.room 58 | ) 59 | 60 | if __name__ == "__main__": 61 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /pipeline-tts/tts_node.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import AsyncIterable 3 | import logging 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli, ModelSettings 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import deepgram, openai, silero, rime 8 | 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 10 | 11 | logger = logging.getLogger("tts_node") 12 | logger.setLevel(logging.INFO) 13 | 14 | class TtsNodeOverrideAgent(Agent): 15 | def __init__(self) -> None: 16 | super().__init__( 17 | instructions=""" 18 | You are a helpful assistant communicating through voice. 19 | Feel free to use "lol" in your responses when something is funny. 20 | """, 21 | stt=deepgram.STT(), 22 | llm=openai.LLM(model="gpt-4o"), 23 | tts=rime.TTS(model="arcana"), 24 | vad=silero.VAD.load() 25 | ) 26 | 27 | async def tts_node(self, text: AsyncIterable[str], model_settings: ModelSettings): 28 | """Modify the TTS output by replacing 'lol' with ''.""" 29 | 30 | async def process_text(): 31 | async for chunk in text: 32 | original_chunk = chunk 33 | modified_chunk = chunk.replace("lol", "").replace("LOL", "") 34 | 35 | if original_chunk != modified_chunk: 36 | logger.info(f"TTS original: '{original_chunk}'") 37 | logger.info(f"TTS modified: '{modified_chunk}'") 38 | 39 | yield modified_chunk 40 | 41 | return Agent.default.tts_node(self, process_text(), model_settings) 42 | 43 | async def on_enter(self): 44 | await self.session.say(f"Hi there! Is there anything I can help you with? If you say something funny, I might respond with lol.") 45 | 46 | async def entrypoint(ctx: JobContext): 47 | await ctx.connect() 48 | 49 | session = AgentSession() 50 | 51 | await session.start( 52 | agent=TtsNodeOverrideAgent(), 53 | room=ctx.room 54 | ) 55 | 56 | if __name__ == "__main__": 57 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /rag/README.md: -------------------------------------------------------------------------------- 1 | # RAG-Enriched Voice Agent 2 | 3 | This sample project demonstrates a Retrieval-Augmented Generation (RAG) enabled voice agent using LiveKit Agents 1.0. The example scrapes the LiveKit docs site, builds a local index, and then provides that data on demand to the assistant via a function tool. 4 | 5 | ## Prerequisites 6 | 7 | - Python 3.9 or higher 8 | - OpenAI API key 9 | - Deepgram API key 10 | - LiveKit server 11 | 12 | ## Installation 13 | 14 | 1. Clone the repository 15 | 2. Create a virtual environment: 16 | ```bash 17 | python -m venv venv 18 | source venv/bin/activate # On Windows: venv\Scripts\activate 19 | ``` 20 | 3. Install dependencies: 21 | ```bash 22 | pip install -r requirements.txt 23 | ``` 24 | 4. Create a `.env` file in the project root with your API keys: 25 | ``` 26 | OPENAI_API_KEY=your_openai_api_key 27 | DEEPGRAM_API_KEY=your_deepgram_api_key 28 | LIVEKIT_URL=your_livekit_url 29 | LIVEKIT_API_KEY=your_livekit_api_key 30 | LIVEKIT_API_SECRET=your_livekit_api_secret 31 | ``` 32 | 33 | ## Project Structure 34 | 35 | - `main.py`: Main agent implementation 36 | - `scrape_docs.py`: Scraper for the LiveKit docs site 37 | - `build_rag_data.py`: Script to build the RAG database from scraped docs 38 | - `rag_db_builder.py`: Database builder implementation 39 | - `rag_handler.py`: RAG processing logic 40 | - `data/`: Directory for vector database files 41 | 42 | ## Usage 43 | 44 | 1. Scrape the docs site: 45 | ```bash 46 | python scrape_docs.py 47 | ``` 48 | 49 | 2. Build the RAG database: 50 | ```bash 51 | python build_rag_data.py 52 | ``` 53 | 54 | 3. Download model files: 55 | ```bash 56 | python main.py download-files 57 | ``` 58 | 59 | 4. Run the agent: 60 | ```bash 61 | python main.py console 62 | ``` 63 | 64 | The agent will start and be ready to handle voice interactions. It will use the RAG system to provide contextually relevant answers to user questions. 65 | -------------------------------------------------------------------------------- /rag/build_rag_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import asyncio 3 | import logging 4 | from pathlib import Path 5 | from dotenv import load_dotenv 6 | from rag_db_builder import RAGBuilder 7 | 8 | # Configure logging 9 | logging.basicConfig( 10 | level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" 11 | ) 12 | logger = logging.getLogger("build-rag-data") 13 | 14 | # Load environment variables 15 | load_dotenv() 16 | 17 | 18 | async def main() -> None: 19 | """ 20 | Build the RAG database from the scraped docs content. 21 | 22 | Usage: 23 | 1. Run scrape_docs.py to scrape the docs content 24 | 2. Run this script to build the RAG database 25 | 3. The database will be created in the 'data' directory 26 | """ 27 | # Check if raw_data.txt exists 28 | raw_data_path = Path(__file__).parent / "data/raw_data.txt" 29 | if not raw_data_path.exists(): 30 | logger.error( 31 | "raw_data.txt not found. Please run scrape_docs.py first:\n" 32 | "$ python scrape_docs.py" 33 | ) 34 | return 35 | 36 | # Create and build the RAG database 37 | output_dir = Path(__file__).parent / "data" 38 | output_dir.mkdir(exist_ok=True) 39 | 40 | logger.info("Building RAG database...") 41 | await RAGBuilder.create_from_file( 42 | file_path=raw_data_path, 43 | index_path=output_dir, 44 | data_path=output_dir / "paragraphs.pkl", 45 | embeddings_dimension=1536, 46 | ) 47 | logger.info("RAG database successfully built!") 48 | logger.info(f"Index saved to: {output_dir}") 49 | logger.info(f"Data saved to: {output_dir / 'paragraphs.pkl'}") 50 | 51 | 52 | if __name__ == "__main__": 53 | asyncio.run(main()) 54 | -------------------------------------------------------------------------------- /rag/requirements.txt: -------------------------------------------------------------------------------- 1 | livekit-agents[openai,silero,turn-detector,deepgram]~=1.0 2 | livekit-plugins-noise-cancellation~=0.2 3 | python-dotenv 4 | annoy 5 | aiohttp>=3.8.0 6 | beautifulsoup4>=4.12.0 7 | lxml>=4.9.0 8 | -------------------------------------------------------------------------------- /rag/scrape_docs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import asyncio 3 | import logging 4 | import re 5 | from pathlib import Path 6 | from typing import List, Set 7 | from urllib.parse import urljoin, urlparse 8 | 9 | import aiohttp 10 | from bs4 import BeautifulSoup 11 | from dotenv import load_dotenv 12 | 13 | # Configure logging 14 | logging.basicConfig( 15 | level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" 16 | ) 17 | logger = logging.getLogger("docs-scraper") 18 | 19 | # Load environment variables 20 | load_dotenv() 21 | 22 | BASE_URL = "https://docs.livekit.io" 23 | SITEMAP_URL = f"{BASE_URL}/sitemap.xml" 24 | OUTPUT_FILE = Path(__file__).parent / "data/raw_data.txt" 25 | EXCLUDED_PATHS = ["/reference"] # Paths to exclude from scraping 26 | 27 | class DocsScraper: 28 | def __init__(self): 29 | self.visited_urls: Set[str] = set() 30 | self.content: List[str] = [] 31 | self.session = None 32 | 33 | async def init_session(self): 34 | """Initialize the aiohttp session.""" 35 | self.session = aiohttp.ClientSession() 36 | 37 | async def close_session(self): 38 | """Close the aiohttp session.""" 39 | if self.session: 40 | await self.session.close() 41 | 42 | def should_exclude_url(self, url: str) -> bool: 43 | """Check if a URL should be excluded from scraping.""" 44 | parsed = urlparse(url) 45 | return any(parsed.path.startswith(path) for path in EXCLUDED_PATHS) 46 | 47 | async def fetch_sitemap(self) -> List[str]: 48 | """Fetch and parse the sitemap to get all URLs.""" 49 | async with self.session.get(SITEMAP_URL) as response: 50 | if response.status != 200: 51 | raise Exception(f"Failed to fetch sitemap: {response.status}") 52 | 53 | content = await response.text() 54 | soup = BeautifulSoup(content, "xml") 55 | urls = [loc.text for loc in soup.find_all("loc")] 56 | 57 | # Filter out excluded URLs and ensure they're from docs.livekit.io 58 | return [ 59 | url for url in urls 60 | if url.startswith(BASE_URL) and not self.should_exclude_url(url) 61 | ] 62 | 63 | async def fetch_page(self, url: str) -> str: 64 | """Fetch a single page and extract its content.""" 65 | try: 66 | async with self.session.get(url) as response: 67 | if response.status != 200: 68 | logger.warning(f"Failed to fetch {url}: {response.status}") 69 | return "" 70 | 71 | content = await response.text() 72 | soup = BeautifulSoup(content, "html.parser") 73 | 74 | # Extract the main content 75 | main_content = soup.find("main") 76 | if not main_content: 77 | return "" 78 | 79 | # Remove unwanted elements 80 | for element in main_content.find_all(["nav", "footer", "header", "script", "style"]): 81 | element.decompose() 82 | 83 | # Clean up the text 84 | text = main_content.get_text(separator="\n", strip=True) 85 | text = re.sub(r"\n\s*\n", "\n\n", text) # Remove excessive newlines 86 | return text.strip() 87 | 88 | except Exception as e: 89 | logger.error(f"Error fetching {url}: {e}") 90 | return "" 91 | 92 | async def scrape(self): 93 | """Main scraping function.""" 94 | await self.init_session() 95 | try: 96 | # Get all URLs from sitemap 97 | urls = await self.fetch_sitemap() 98 | logger.info(f"Found {len(urls)} URLs to scrape") 99 | 100 | # Process each URL 101 | for url in urls: 102 | if url in self.visited_urls: 103 | continue 104 | 105 | self.visited_urls.add(url) 106 | logger.info(f"Scraping {url}") 107 | 108 | content = await self.fetch_page(url) 109 | if content: 110 | self.content.append(f"Content from {url}:\n\n{content}\n\n") 111 | 112 | finally: 113 | await self.close_session() 114 | 115 | def save_content(self): 116 | """Save the scraped content to a file.""" 117 | with open(OUTPUT_FILE, "w") as f: 118 | f.write("\n".join(self.content)) 119 | logger.info(f"Saved content to {OUTPUT_FILE}") 120 | 121 | async def main(): 122 | """Main function to run the scraper.""" 123 | scraper = DocsScraper() 124 | await scraper.scrape() 125 | scraper.save_content() 126 | 127 | if __name__ == "__main__": 128 | asyncio.run(main()) -------------------------------------------------------------------------------- /realtime/openai-realtime.py: -------------------------------------------------------------------------------- 1 | import librosa 2 | import numpy as np 3 | from typing import AsyncIterable 4 | from dotenv import load_dotenv 5 | from pathlib import Path 6 | from livekit import agents, rtc 7 | from livekit.agents import utils 8 | from livekit.agents.voice import AgentSession, Agent, room_io, ModelSettings 9 | from livekit.plugins import ( 10 | openai, 11 | silero 12 | ) 13 | 14 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 15 | 16 | class Assistant(Agent): 17 | def __init__(self, *, pitch_shift_semitones: float = -4.0) -> None: 18 | super().__init__(instructions="You are a helpful voice AI assistant.") 19 | self.pitch_shift_semitones = pitch_shift_semitones 20 | 21 | async def realtime_audio_output_node( 22 | self, audio: AsyncIterable[rtc.AudioFrame], model_settings: ModelSettings 23 | ) -> AsyncIterable[rtc.AudioFrame]: 24 | return self._process_audio_stream( 25 | Agent.default.realtime_audio_output_node(self, audio, model_settings) 26 | ) 27 | 28 | async def _process_audio_stream( 29 | self, audio: AsyncIterable[rtc.AudioFrame] 30 | ) -> AsyncIterable[rtc.AudioFrame]: 31 | stream: utils.audio.AudioByteStream | None = None 32 | async for frame in audio: 33 | if stream is None: 34 | stream = utils.audio.AudioByteStream( 35 | sample_rate=frame.sample_rate, 36 | num_channels=frame.num_channels, 37 | samples_per_channel=frame.sample_rate // 4, 38 | ) 39 | for f in stream.push(frame.data): 40 | yield self._process_audio(f) 41 | 42 | for f in stream.flush(): 43 | yield self._process_audio(f) 44 | 45 | def _process_audio(self, frame: rtc.AudioFrame) -> rtc.AudioFrame: 46 | audio_data = np.frombuffer(frame.data, dtype=np.int16) 47 | 48 | shifted = librosa.effects.pitch_shift( 49 | audio_data.astype(np.float32) / np.iinfo(np.int16).max, 50 | sr=frame.sample_rate, 51 | n_steps=self.pitch_shift_semitones, 52 | ) 53 | shifted = (shifted * np.iinfo(np.int16).max).astype(np.int16) 54 | return rtc.AudioFrame( 55 | data=shifted.tobytes(), 56 | sample_rate=frame.sample_rate, 57 | num_channels=frame.num_channels, 58 | samples_per_channel=shifted.shape[-1], 59 | ) 60 | 61 | async def entrypoint(ctx: agents.JobContext): 62 | await ctx.connect() 63 | 64 | session = AgentSession( 65 | llm=openai.realtime.RealtimeModel(), 66 | vad=silero.VAD.load() 67 | ) 68 | 69 | await session.start( 70 | room=ctx.room, 71 | agent=Assistant() 72 | ) 73 | 74 | await session.generate_reply() 75 | 76 | 77 | if __name__ == "__main__": 78 | agents.cli.run_app(agents.WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | livekit-agents[openai,silero,turn-detector,deepgram,google,anthropic,cartesia,elevenlabs,rime,playai,groq,tavus]~=1.0 2 | livekit-plugins-noise-cancellation~=0.0 3 | python-dotenv 4 | requests>=2.32.0 5 | annoy 6 | pydantic 7 | flask 8 | pandas 9 | websockets>=11.0.3 10 | rich 11 | mcp 12 | librosa -------------------------------------------------------------------------------- /telephony/answer_call.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from dotenv import load_dotenv 4 | from livekit.agents import JobContext, WorkerOptions, cli 5 | from livekit.agents.voice import Agent, AgentSession 6 | from livekit.plugins import openai, deepgram, silero 7 | 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 9 | 10 | class SimpleAgent(Agent): 11 | def __init__(self) -> None: 12 | super().__init__( 13 | instructions=""" 14 | You are a helpful agent. 15 | """, 16 | stt=deepgram.STT(), 17 | llm=openai.LLM(model="gpt-4o"), 18 | tts=openai.TTS(), 19 | vad=silero.VAD.load() 20 | ) 21 | 22 | async def on_enter(self): 23 | # Generate initial greeting 24 | self.session.generate_reply() 25 | 26 | async def entrypoint(ctx: JobContext): 27 | await ctx.connect() 28 | 29 | session = AgentSession() 30 | agent = SimpleAgent() 31 | 32 | await session.start( 33 | agent=agent, 34 | room=ctx.room 35 | ) 36 | 37 | if __name__ == "__main__": 38 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /telephony/make_call/calling_agent.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import openai, silero, deepgram 8 | 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent.parent / '.env') 10 | 11 | logger = logging.getLogger("calling-agent") 12 | logger.setLevel(logging.INFO) 13 | 14 | class SimpleAgent(Agent): 15 | def __init__(self) -> None: 16 | super().__init__( 17 | instructions=""" 18 | You are calling someone on the phone. Your goal is to know if they prefer 19 | chocolate or vanilla ice cream. That's the only question you should ask, and 20 | you should get right to the point. Say something like "Hello, I'm calling to 21 | ask you a question about ice cream. Do you prefer chocolate or vanilla?" 22 | """, 23 | stt=deepgram.STT(), 24 | llm=openai.LLM(model="gpt-4o"), 25 | tts=openai.TTS(), 26 | vad=silero.VAD.load() 27 | ) 28 | 29 | async def on_enter(self): 30 | self.session.generate_reply() 31 | 32 | async def entrypoint(ctx: JobContext): 33 | await ctx.connect() 34 | 35 | session = AgentSession() 36 | 37 | await session.start( 38 | agent=SimpleAgent(), 39 | room=ctx.room 40 | ) 41 | 42 | if __name__ == "__main__": 43 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /telephony/make_call/make_call.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import logging 4 | from pathlib import Path 5 | from dotenv import load_dotenv 6 | from livekit import api 7 | 8 | # Load environment variables 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent.parent / '.env') 10 | 11 | # Set up logging 12 | logger = logging.getLogger("make-call") 13 | logger.setLevel(logging.INFO) 14 | 15 | # Configuration 16 | room_name = "my-room" 17 | agent_name = "test-agent" 18 | outbound_trunk_id = os.getenv("SIP_OUTBOUND_TRUNK_ID") 19 | 20 | async def make_call(phone_number): 21 | """Create a dispatch and add a SIP participant to call the phone number""" 22 | lkapi = api.LiveKitAPI() 23 | 24 | # Create agent dispatch 25 | logger.info(f"Creating dispatch for agent {agent_name} in room {room_name}") 26 | dispatch = await lkapi.agent_dispatch.create_dispatch( 27 | api.CreateAgentDispatchRequest( 28 | agent_name=agent_name, room=room_name, metadata=phone_number 29 | ) 30 | ) 31 | logger.info(f"Created dispatch: {dispatch}") 32 | 33 | # Create SIP participant to make the call 34 | if not outbound_trunk_id or not outbound_trunk_id.startswith("ST_"): 35 | logger.error("SIP_OUTBOUND_TRUNK_ID is not set or invalid") 36 | return 37 | 38 | logger.info(f"Dialing {phone_number} to room {room_name}") 39 | 40 | try: 41 | # Create SIP participant to initiate the call 42 | sip_participant = await lkapi.sip.create_sip_participant( 43 | api.CreateSIPParticipantRequest( 44 | room_name=room_name, 45 | sip_trunk_id=outbound_trunk_id, 46 | sip_call_to=phone_number, 47 | participant_identity="phone_user", 48 | ) 49 | ) 50 | logger.info(f"Created SIP participant: {sip_participant}") 51 | except Exception as e: 52 | logger.error(f"Error creating SIP participant: {e}") 53 | 54 | # Close API connection 55 | await lkapi.aclose() 56 | 57 | async def main(): 58 | # Replace with the actual phone number including country code 59 | phone_number = "+13432024203" 60 | await make_call(phone_number) 61 | 62 | if __name__ == "__main__": 63 | asyncio.run(main()) 64 | -------------------------------------------------------------------------------- /telephony/survey_caller/make_survey_calls.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import csv 3 | import json 4 | import logging 5 | import os 6 | from pathlib import Path 7 | from dotenv import load_dotenv 8 | from livekit import api 9 | 10 | load_dotenv(dotenv_path=Path(__file__).parent.parent.parent / '.env') 11 | 12 | logger = logging.getLogger("make-survey-calls") 13 | logger.setLevel(logging.INFO) 14 | 15 | # Configuration 16 | room_name_prefix = "survey-call-" 17 | agent_name = "survey-agent" 18 | outbound_trunk_id = os.getenv("SIP_OUTBOUND_TRUNK_ID") 19 | csv_file_path = Path(__file__).parent / "survey_data.csv" 20 | 21 | async def make_survey_call(phone_number, question, row_index): 22 | """Create a dispatch and add a SIP participant to call the phone number with survey question""" 23 | # Create a unique room name for each call using the prefix and row index 24 | room_name = f"{room_name_prefix}{row_index}" 25 | 26 | # Create metadata as JSON containing all relevant data 27 | metadata = json.dumps({ 28 | "phone_number": phone_number, 29 | "question": question, 30 | "row_index": row_index 31 | }) 32 | 33 | lkapi = api.LiveKitAPI() 34 | 35 | logger.info(f"Creating dispatch for agent {agent_name} in room {room_name}") 36 | 37 | dispatch = await lkapi.agent_dispatch.create_dispatch( 38 | api.CreateAgentDispatchRequest( 39 | agent_name=agent_name, 40 | room=room_name, 41 | metadata=metadata 42 | ) 43 | ) 44 | logger.info(f"Created dispatch: {dispatch}") 45 | logger.info(f"Dialing {phone_number} to room {room_name}") 46 | 47 | sip_participant = await lkapi.sip.create_sip_participant( 48 | api.CreateSIPParticipantRequest( 49 | room_name=room_name, 50 | sip_trunk_id=outbound_trunk_id, 51 | sip_call_to=phone_number, 52 | participant_identity="phone_user", 53 | ) 54 | ) 55 | logger.info(f"Created SIP participant: {sip_participant}") 56 | 57 | await lkapi.aclose() 58 | return True 59 | 60 | async def read_csv_data(): 61 | """Read the CSV file and return the data""" 62 | data = [] 63 | with open(csv_file_path, 'r', newline='') as f: 64 | reader = csv.reader(f) 65 | headers = next(reader) # Skip headers 66 | for i, row in enumerate(reader): 67 | if len(row) >= 2: 68 | data.append({ 69 | 'row_index': i + 1, 70 | 'phone_number': row[0], 71 | 'question': row[1], 72 | 'answer': row[2] if len(row) > 2 else '', 73 | 'status': row[3] if len(row) > 3 else '' 74 | }) 75 | 76 | return data 77 | 78 | async def process_survey_calls(): 79 | """Process all the survey calls in the CSV""" 80 | # Read the CSV data 81 | data = await read_csv_data() 82 | 83 | logger.info(f"Found {len(data)} survey calls to make") 84 | 85 | for item in data: 86 | if item['answer'] or (item['status'] and item['status'] != ''): 87 | logger.info(f"Skipping row {item['row_index']} as it already has an answer or status") 88 | continue 89 | 90 | logger.info(f"Processing survey call to {item['phone_number']} with question: {item['question']}") 91 | 92 | await make_survey_call(item['phone_number'], item['question'], item['row_index']) 93 | 94 | async def main(): 95 | logger.info("Starting survey calls process") 96 | if not outbound_trunk_id: 97 | logger.error("SIP_OUTBOUND_TRUNK_ID is not set. Please add it to your .env file.") 98 | return 99 | await process_survey_calls() 100 | logger.info("Survey calls process completed") 101 | 102 | if __name__ == "__main__": 103 | asyncio.run(main()) 104 | -------------------------------------------------------------------------------- /telephony/survey_caller/survey_calling_agent.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import asyncio 4 | import pandas as pd 5 | import json 6 | from pathlib import Path 7 | from dotenv import load_dotenv 8 | from livekit.agents import JobContext, WorkerOptions, cli 9 | from livekit.agents.voice import Agent, AgentSession, RunContext 10 | from livekit.agents.llm import function_tool 11 | from livekit.plugins import openai, silero, deepgram 12 | from livekit.api import DeleteRoomRequest 13 | 14 | load_dotenv(dotenv_path=Path(__file__).parent.parent.parent / '.env') 15 | 16 | logger = logging.getLogger("calling-agent") 17 | logger.setLevel(logging.INFO) 18 | 19 | csv_file_path = Path(__file__).parent / "survey_data.csv" 20 | 21 | class SurveyAgent(Agent): 22 | def __init__(self, question="Do you prefer chocolate or vanilla ice cream?", context=None, job_context=None) -> None: 23 | self.survey_question = question 24 | self.context = context or {} 25 | self.job_context = job_context 26 | self.survey_answer = None 27 | self.phone_number = self.context.get("phone_number", "unknown") 28 | # Adjust for 0-based indexing since row_index from metadata is 1-based 29 | self.row_index = self.context.get("row_index", 1) # Default to 1 if not provided 30 | 31 | instructions = f""" 32 | You are conducting a brief phone survey. Your goal is to ask the following question: 33 | '{self.survey_question}' 34 | 35 | Be polite and professional. Introduce yourself as a survey caller named "Sam", ask the question, 36 | and thank them for their time. Keep the call brief and focused on getting their answer. 37 | Don't ask any follow-up questions. 38 | 39 | Note: When you have an answer to the question, use the `record_survey_answer` function 40 | to persist what the user said. 41 | """ 42 | 43 | super().__init__( 44 | instructions=instructions, 45 | stt=deepgram.STT(), 46 | llm=openai.LLM(model="gpt-4o"), 47 | tts=openai.TTS(), 48 | vad=silero.VAD.load() 49 | ) 50 | 51 | @function_tool 52 | async def record_survey_answer(self, context: RunContext, answer: str): 53 | logger.info(f"Survey answer recorded: {answer}") 54 | logger.info(f"Row index: {self.row_index}") 55 | self.survey_answer = answer 56 | 57 | df = pd.read_csv(csv_file_path, dtype=str) 58 | logger.info(f"CSV contents before update: {df.head()}") 59 | 60 | df.loc[self.row_index - 1, 'Answer'] = answer 61 | df.loc[self.row_index - 1, 'Status'] = 'Completed' 62 | logger.info(f"CSV contents after update: {df.head()}") 63 | df.to_csv(csv_file_path, index=False) 64 | 65 | await asyncio.sleep(5) 66 | await self.job_context.api.room.delete_room(DeleteRoomRequest( 67 | room=self.job_context.room.name 68 | )) 69 | 70 | return None, f"[Call ended]" 71 | 72 | async def entrypoint(ctx: JobContext): 73 | await ctx.connect() 74 | 75 | metadata_json = ctx.job.metadata 76 | logger.info(f"Received metadata: {metadata_json}") 77 | 78 | metadata = json.loads(metadata_json) 79 | phone_number = metadata.get("phone_number", "unknown") 80 | row_index = metadata.get("row_index", 1) 81 | question = metadata.get("question", "Do you prefer chocolate or vanilla ice cream?") 82 | 83 | logger.info(f"Parsed metadata - phone_number: {phone_number}, row_index: {row_index}, question: {question}") 84 | 85 | context = { 86 | "phone_number": phone_number, 87 | "row_index": row_index 88 | } 89 | 90 | session = AgentSession() 91 | agent = SurveyAgent(question=question, context=context, job_context=ctx) 92 | 93 | await session.start( 94 | agent=agent, 95 | room=ctx.room 96 | ) 97 | 98 | if __name__ == "__main__": 99 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, agent_name="survey-agent")) -------------------------------------------------------------------------------- /telephony/survey_caller/survey_data.csv: -------------------------------------------------------------------------------- 1 | Phone Number,Question,Answer,Status 2 | +13432024203,Do you prefer chocolate or vanilla ice cream?,, 3 | -------------------------------------------------------------------------------- /telephony/warm_handoff.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import uuid 4 | from pathlib import Path 5 | from dotenv import load_dotenv 6 | from livekit.agents import JobContext, WorkerOptions, cli 7 | from livekit import rtc 8 | from livekit import api 9 | from livekit.agents.llm import function_tool 10 | from livekit.agents.voice import Agent, AgentSession, RunContext 11 | from livekit.plugins import deepgram, openai, silero, elevenlabs 12 | 13 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 14 | 15 | class WarmHandoffAgent(Agent): 16 | def __init__(self, job_context=None) -> None: 17 | self.job_context = job_context 18 | super().__init__( 19 | instructions=""" 20 | You are a helpful assistant communicating through voice. You're helping me test ... yourself ... since you're the AI agent. 21 | Don't use any unpronouncable characters. 22 | """, 23 | stt=deepgram.STT(), 24 | llm=openai.LLM(model="gpt-4o"), 25 | tts=elevenlabs.TTS( 26 | encoding="pcm_44100", 27 | model="eleven_multilingual_v2" 28 | ), 29 | vad=silero.VAD.load() 30 | ) 31 | 32 | @function_tool 33 | async def transfer_call(self, context: RunContext, phone_number: str): 34 | """ 35 | Transfer the current call to a human agent at the specified phone number. 36 | 37 | Args: 38 | context: The call context 39 | phone_number: The phone number to transfer the call to 40 | """ 41 | if not self.job_context: 42 | await self.session.say("I'm sorry, I can't transfer the call at this time.") 43 | return None, "Failed to transfer call: No job context available" 44 | 45 | # Get room name from environment variable 46 | room_name = os.environ.get('LIVEKIT_ROOM_NAME', self.job_context.room.name) 47 | 48 | # Generate a unique identity for the SIP participant 49 | identity = f"transfer_{uuid.uuid4().hex[:8]}" 50 | 51 | # Create LiveKit API client 52 | livekit_url = os.environ.get('LIVEKIT_URL') 53 | livekit_api_key = os.environ.get('LIVEKIT_API_KEY') 54 | livekit_api_secret = os.environ.get('LIVEKIT_API_SECRET') 55 | sip_trunk_id = os.environ.get('SIP_TRUNK_ID') 56 | 57 | try: 58 | print(f"Transferring call to {phone_number}") 59 | 60 | # Using the API from the job context if available 61 | if self.job_context and hasattr(self.job_context, 'api'): 62 | response = await self.job_context.api.sip.create_sip_participant( 63 | api.CreateSIPParticipantRequest( 64 | sip_trunk_id=sip_trunk_id, 65 | sip_call_to=phone_number, 66 | room_name=room_name, 67 | participant_identity=identity, 68 | participant_name="Human Agent", 69 | krisp_enabled=True 70 | ) 71 | ) 72 | else: 73 | # Fallback to creating our own API client 74 | livekit_api = api.LiveKitAPI( 75 | url=livekit_url, 76 | api_key=livekit_api_key, 77 | api_secret=livekit_api_secret 78 | ) 79 | 80 | response = await livekit_api.sip.create_sip_participant( 81 | api.CreateSIPParticipantRequest( 82 | sip_trunk_id=sip_trunk_id, 83 | sip_call_to=phone_number, 84 | room_name=room_name, 85 | participant_identity=identity, 86 | participant_name="Human Agent", 87 | krisp_enabled=True 88 | ) 89 | ) 90 | 91 | await livekit_api.aclose() 92 | 93 | await self.session.say(f"I'm transferring you to a human agent now. Please hold while we connect you.") 94 | 95 | return None, f"I've transferred you to a human agent at {phone_number}. Please hold while we connect you." 96 | 97 | except Exception as e: 98 | print(f"Error transferring call: {e}") 99 | await self.session.say(f"I'm sorry, I couldn't transfer the call at this time.") 100 | return None, f"Failed to transfer call: {e}" 101 | 102 | async def on_enter(self): 103 | # Generate initial greeting 104 | self.session.generate_reply() 105 | 106 | async def entrypoint(ctx: JobContext): 107 | await ctx.connect() 108 | 109 | session = AgentSession() 110 | agent = WarmHandoffAgent(job_context=ctx) 111 | 112 | await session.start( 113 | agent=agent, 114 | room=ctx.room 115 | ) 116 | 117 | def on_participant_connected_handler(participant: rtc.RemoteParticipant): 118 | asyncio.create_task(async_on_participant_connected(participant)) 119 | 120 | async def async_on_participant_connected(participant: rtc.RemoteParticipant): 121 | await agent.session.say(f"Hi there! Is there anything I can help you with?") 122 | 123 | # Handle existing participants 124 | for participant in ctx.room.remote_participants.values(): 125 | asyncio.create_task(async_on_participant_connected(participant)) 126 | 127 | # Set up listener for new participants 128 | ctx.room.on("participant_connected", on_participant_connected_handler) 129 | 130 | if __name__ == "__main__": 131 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /tool_calling/call_function_tool.py: -------------------------------------------------------------------------------- 1 | ## This is a basic example of how to use function calling. 2 | ## To test the function, you can ask the agent to print to the console! 3 | 4 | import logging 5 | from pathlib import Path 6 | from dotenv import load_dotenv 7 | from livekit.agents import JobContext, WorkerOptions, cli 8 | from livekit.agents.llm import function_tool 9 | from livekit.agents.voice import Agent, AgentSession, RunContext 10 | from livekit.plugins import deepgram, openai, silero 11 | 12 | logger = logging.getLogger("function-calling") 13 | logger.setLevel(logging.INFO) 14 | 15 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 16 | 17 | class FunctionAgent(Agent): 18 | def __init__(self) -> None: 19 | super().__init__( 20 | instructions=""" 21 | You are a helpful assistant communicating through voice. Don't use any unpronouncable characters. 22 | Note: If asked to print to the console, use the `print_to_console` function. 23 | """, 24 | stt=deepgram.STT(), 25 | llm=openai.LLM(model="gpt-4o"), 26 | tts=openai.TTS(), 27 | vad=silero.VAD.load() 28 | ) 29 | 30 | @function_tool 31 | async def print_to_console(self, context: RunContext): 32 | print("Console Print Success!") 33 | return None, "I've printed to the console." 34 | 35 | async def on_enter(self): 36 | self.session.generate_reply() 37 | 38 | async def entrypoint(ctx: JobContext): 39 | await ctx.connect() 40 | 41 | session = AgentSession() 42 | 43 | await session.start( 44 | agent=FunctionAgent(), 45 | room=ctx.room 46 | ) 47 | 48 | if __name__ == "__main__": 49 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /tool_calling/update_tools.py: -------------------------------------------------------------------------------- 1 | ## This is a basic example of how to use function calling. 2 | ## To test the function, you can ask the agent to print to the console! 3 | 4 | import logging 5 | import random 6 | from pathlib import Path 7 | from dotenv import load_dotenv 8 | from livekit.agents import JobContext, WorkerOptions, cli 9 | from livekit.agents.llm import function_tool 10 | from livekit.agents.voice import Agent, AgentSession, RunContext 11 | from livekit.plugins import deepgram, openai, silero 12 | 13 | logger = logging.getLogger("function-calling") 14 | logger.setLevel(logging.INFO) 15 | 16 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 17 | 18 | class AddFunctionAgent(Agent): 19 | def __init__(self) -> None: 20 | super().__init__( 21 | instructions=""" 22 | You are a helpful assistant communicating through voice. Don't use any unpronouncable characters. 23 | Note: If asked to print to the console, use the `print_to_console` function. 24 | """, 25 | stt=deepgram.STT(), 26 | llm=openai.LLM(model="gpt-4o"), 27 | tts=openai.TTS(), 28 | vad=silero.VAD.load() 29 | ) 30 | 31 | @function_tool 32 | async def print_to_console(self, context: RunContext): 33 | print("Console Print Success!") 34 | return None, "I've printed to the console." 35 | 36 | async def on_enter(self): 37 | self.session.generate_reply() 38 | 39 | async def entrypoint(ctx: JobContext): 40 | await ctx.connect() 41 | 42 | session = AgentSession() 43 | agent=AddFunctionAgent() 44 | 45 | async def _random_number() -> int: 46 | num = random.randint(0, 100) 47 | logger.info(f"random_number called: {num}") 48 | return num 49 | 50 | await agent.update_tools( 51 | agent.tools 52 | + [function_tool(_random_number, name="random_number", description="Get a random number")] 53 | ) 54 | 55 | await session.start( 56 | agent=agent, 57 | room=ctx.room 58 | ) 59 | 60 | if __name__ == "__main__": 61 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /translators/pipeline_translator.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | from livekit.agents import JobContext, WorkerOptions, cli 6 | from livekit.agents.voice import Agent, AgentSession 7 | from livekit.plugins import openai, silero, deepgram, elevenlabs 8 | 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 10 | 11 | logger = logging.getLogger("listen-and-respond") 12 | logger.setLevel(logging.INFO) 13 | 14 | class SimpleAgent(Agent): 15 | def __init__(self) -> None: 16 | super().__init__( 17 | instructions=""" 18 | You are a translator. You translate the user's speech from English to French. 19 | Every message you receive, translate it directly into French. 20 | Do not respond with anything else but the translation. 21 | """, 22 | stt=deepgram.STT(), 23 | llm=openai.LLM(model="gpt-4o"), 24 | tts=elevenlabs.TTS( 25 | model="eleven_multilingual_v2" 26 | ), 27 | vad=silero.VAD.load() 28 | ) 29 | 30 | async def on_enter(self): 31 | self.session.generate_reply() 32 | 33 | async def entrypoint(ctx: JobContext): 34 | await ctx.connect() 35 | 36 | session = AgentSession() 37 | 38 | await session.start( 39 | agent=SimpleAgent(), 40 | room=ctx.room 41 | ) 42 | 43 | if __name__ == "__main__": 44 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /translators/tts_translator.py: -------------------------------------------------------------------------------- 1 | # Transcribes user speech to text, and saves it to a file 2 | from pathlib import Path 3 | from dotenv import load_dotenv 4 | from livekit.agents import JobContext, WorkerOptions, cli 5 | from livekit.agents.voice import Agent, AgentSession 6 | from livekit.plugins import rime, elevenlabs, silero 7 | import sys 8 | 9 | sys.path.append(str(Path(__file__).parent.parent)) 10 | from launch_demos.livekit_plugins_gladia import stt 11 | 12 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env') 13 | 14 | async def entrypoint(ctx: JobContext): 15 | 16 | await ctx.connect() 17 | session = AgentSession() 18 | 19 | # Process transcription events - let the agent say what it receives 20 | @session.on("user_input_transcribed") 21 | def on_transcript(event): 22 | # Log the full event object to see all available metadata 23 | print(f"Transcript event: {event}") 24 | if event.is_final: 25 | print(f"Final transcript: {event.transcript}") 26 | session.say(event.transcript) 27 | 28 | await session.start( 29 | agent=Agent( 30 | instructions="You are a helpful assistant that speaks what the user says in English.", 31 | stt=stt.STT( 32 | languages=["fr", "en"], # Support French and English input 33 | code_switching=True, 34 | sample_rate=16000, 35 | bit_depth=16, 36 | channels=1, 37 | encoding="wav/pcm", 38 | translation_enabled=True, 39 | translation_target_languages=["en"], # Only translate to English 40 | translation_model="base", 41 | translation_match_original_utterances=True 42 | ), 43 | tts=elevenlabs.TTS( 44 | model="eleven_multilingual_v2" 45 | ), 46 | allow_interruptions=False 47 | ), 48 | room=ctx.room 49 | ) 50 | 51 | if __name__ == "__main__": 52 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) -------------------------------------------------------------------------------- /vision/agent.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | 4 | from dotenv import load_dotenv 5 | from livekit import rtc 6 | from livekit.agents import ( 7 | AutoSubscribe, 8 | JobContext, 9 | WorkerOptions, 10 | cli, 11 | get_job_context, 12 | ) 13 | from livekit.agents.llm import ImageContent, ChatContext, ChatMessage 14 | from livekit.agents.voice import AgentSession, Agent, room_io 15 | from livekit.plugins import ( 16 | cartesia, 17 | openai, 18 | deepgram, 19 | noise_cancellation, 20 | silero, 21 | ) 22 | from pathlib import Path 23 | 24 | load_dotenv(dotenv_path=Path(__file__).parent.parent / ".env") 25 | logger = logging.getLogger("vision-agent") 26 | 27 | 28 | class Assistant(Agent): 29 | def __init__(self, room: rtc.Room) -> None: 30 | self._latest_frame = None 31 | self._room = room 32 | self._tasks = [] 33 | self._video_stream = None 34 | 35 | super().__init__( 36 | instructions=( 37 | "You are a voice assistant created by LiveKit that can both see and hear. " 38 | "You should use short and concise responses, avoiding unpronounceable punctuation. " 39 | "When you see an image in our conversation, naturally incorporate what you see " 40 | "into your response. Keep visual descriptions brief but informative." 41 | ), 42 | vad=silero.VAD.load(), 43 | stt=deepgram.STT(), 44 | llm=openai.LLM(model="gpt-4o-mini"), 45 | tts=cartesia.TTS(), 46 | ) 47 | 48 | async def on_enter(self): 49 | """ 50 | Lifecycle hook that runs after the agent becomes the active agent in a session. 51 | Adds video track from a remote participant and then starts tracking frames from video. 52 | """ 53 | logger.debug("Agent joining room") 54 | room = get_job_context().room 55 | 56 | # Find the first video track (if any) from the remote participant 57 | remote_participant = list(room.remote_participants.values())[0] 58 | video_tracks = [ 59 | publication.track 60 | for publication in remote_participant.track_publications.values() 61 | if publication.track is not None 62 | and publication.track.kind == rtc.TrackKind.KIND_VIDEO 63 | ] 64 | if video_tracks: 65 | self._create_video_stream(video_tracks[0]) 66 | 67 | # Watch for new video tracks not yet published 68 | @room.on("track_subscribed") 69 | def on_track_subscribed(track: rtc.Track): 70 | logger.debug("New video track subscribed") 71 | if track.kind == rtc.TrackKind.KIND_VIDEO: 72 | self._create_video_stream(track) 73 | 74 | async def on_user_turn_completed( 75 | self, _: ChatContext, new_message: ChatMessage 76 | ) -> None: 77 | """ 78 | Lifecycle hook that runs after the user's turn has ended, before the agent's reply. 79 | Captures the latest video frame and adds it to the conversation context. 80 | """ 81 | if self._latest_frame: 82 | new_message.content.append(ImageContent(image=self._latest_frame)) 83 | logger.debug("Added latest frame to conversation context") 84 | self._latest_frame = None 85 | 86 | def _create_video_stream(self, track: rtc.Track): 87 | """ 88 | Helper method to buffer the latest video frame from the user's track 89 | """ 90 | # Close any existing stream (we only want one at a time) 91 | if self._video_stream is not None: 92 | self._video_stream.close() 93 | 94 | # Create a new stream to receive frames 95 | self._video_stream = rtc.VideoStream(track) 96 | 97 | async def read_stream(): 98 | async for event in self._video_stream: 99 | # Store the latest frame for use later 100 | self._latest_frame = event.frame 101 | 102 | # Store the async task 103 | task = asyncio.create_task(read_stream()) 104 | task.add_done_callback(lambda t: self._tasks.remove(t)) 105 | self._tasks.append(task) 106 | 107 | 108 | async def entrypoint(ctx: JobContext): 109 | logger.info(f"connecting to room {ctx.room.name}") 110 | await ctx.connect(auto_subscribe=AutoSubscribe.SUBSCRIBE_ALL) 111 | 112 | # Wait for the first participant to connect 113 | participant = await ctx.wait_for_participant() 114 | logger.info(f"starting voice assistant for participant {participant.identity}") 115 | 116 | session = AgentSession( 117 | min_endpointing_delay=0.5, 118 | max_endpointing_delay=5.0, 119 | ) 120 | 121 | await session.start( 122 | room=ctx.room, 123 | agent=Assistant(ctx.room), 124 | room_input_options=room_io.RoomInputOptions( 125 | noise_cancellation=noise_cancellation.BVC(), 126 | ), 127 | ) 128 | 129 | # The agent should be polite and greet the user when it joins :) 130 | await session.say("Hey, how can I help you today?", allow_interruptions=True) 131 | 132 | 133 | if __name__ == "__main__": 134 | cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) 135 | --------------------------------------------------------------------------------