├── .gitignore
├── README.md
├── avatars
    └── tavus
    │   ├── README.md
    │   ├── tavus.py
    │   └── voice-assistant-frontend
    │       ├── .eslintrc.json
    │       ├── .github
    │           ├── assets
    │           │   ├── app-icon.png
    │           │   ├── frontend-screenshot.png
    │           │   └── template-graphic.svg
    │           └── workflows
    │           │   ├── build-and-test.yaml
    │           │   └── sync-to-production.yaml
    │       ├── .gitignore
    │       ├── .prettierignore
    │       ├── .prettierrc
    │       ├── LICENSE
    │       ├── README.md
    │       ├── app
    │           ├── api
    │           │   └── connection-details
    │           │   │   └── route.ts
    │           ├── favicon.ico
    │           ├── globals.css
    │           ├── layout.tsx
    │           └── page.tsx
    │       ├── components
    │           ├── CloseIcon.tsx
    │           ├── FlashCard.tsx
    │           ├── FlashCardContainer.tsx
    │           ├── NoAgentNotification.tsx
    │           ├── Quiz.tsx
    │           ├── QuizContainer.tsx
    │           └── TranscriptionView.tsx
    │       ├── hooks
    │           ├── useCombinedTranscriptions.ts
    │           └── useLocalMicTrack.ts
    │       ├── next.config.mjs
    │       ├── package-lock.json
    │       ├── package.json
    │       ├── pnpm-lock.yaml
    │       ├── postcss.config.mjs
    │       ├── renovate.json
    │       ├── tailwind.config.ts
    │       ├── taskfile.yaml
    │       └── tsconfig.json
├── basics
    ├── audio.wav
    ├── change_agent_instructions.py
    ├── context_variables.py
    ├── exit_message.py
    ├── function_calling.py
    ├── interrupts_user.py
    ├── listen_and_respond.py
    ├── playing_audio.py
    ├── repeater.py
    └── uninterruptable.py
├── check_agent_example_coverage.py
├── complex-agents
    ├── medical_office_triage
    │   ├── prompts
    │   │   ├── billing_prompt.yaml
    │   │   ├── support_prompt.yaml
    │   │   └── triage_prompt.yaml
    │   ├── triage.py
    │   └── utils.py
    └── personal_shopper
    │   ├── add_test_orders.py
    │   ├── customer_data.db
    │   ├── database.py
    │   ├── personal_shopper.py
    │   ├── prompts
    │       ├── returns_prompt.yaml
    │       ├── sales_prompt.yaml
    │       └── triage_prompt.yaml
    │   └── utils.py
├── egress
    └── recording_agent.py
├── evaluating-agents
    ├── README.md
    ├── agent_evals.py
    └── agent_to_test.py
├── events
    ├── basic_event.py
    └── event_emitters.py
├── flows
    ├── declarative_flow.py
    ├── multi_stage_flow.py
    └── simple_flow.py
├── hardware
    └── pi_zero_transcriber.py
├── home_assistant
    ├── README.md
    └── homeautomation.py
├── livekit-logo-dark.png
├── mcp
    ├── agent.py
    └── server.py
├── metrics
    ├── metrics_llm.py
    ├── metrics_stt.py
    ├── metrics_tts.py
    ├── metrics_vad.py
    └── send-metrics-to-3p
    │   ├── metrics_server
    │       ├── README.md
    │       ├── app.py
    │       ├── requirements.txt
    │       └── templates
    │       │   └── dashboard.html
    │   ├── run_3p_metrics_demo.sh
    │   └── send_metrics_to_3p.py
├── multi-agent
    └── long_or_short_agent.py
├── pipeline-llm
    ├── anthropic_llm.py
    ├── cerebras_llm.py
    ├── google_llm.py
    ├── interrupt_user.py
    ├── large_context.py
    ├── lib
    │   └── war_and_peace.txt
    ├── llm_powered_content_filter.py
    ├── ollama_llm.py
    ├── openai_llm.py
    ├── replacing_llm_output.py
    ├── simple_content_filter.py
    └── transcription_node.py
├── pipeline-stt
    ├── keyword_detection.py
    └── transcriber.py
├── pipeline-tts
    ├── cartesia_tts.py
    ├── elevenlabs_change_language.py
    ├── elevenlabs_tts.py
    ├── only_greet.py
    ├── openai_tts.py
    ├── playai_tts.py
    ├── rime_tts.py
    ├── short_replies_only.py
    ├── tts_comparison.py
    └── tts_node.py
├── rag
    ├── README.md
    ├── build_rag_data.py
    ├── main.py
    ├── rag_db_builder.py
    ├── rag_handler.py
    ├── requirements.txt
    └── scrape_docs.py
├── realtime
    └── openai-realtime.py
├── requirements.txt
├── rpc
    └── rpc_agent.py
├── telephony
    ├── answer_call.py
    ├── make_call
    │   ├── calling_agent.py
    │   └── make_call.py
    ├── sip_lifecycle.py
    ├── survey_caller
    │   ├── make_survey_calls.py
    │   ├── survey_calling_agent.py
    │   └── survey_data.csv
    └── warm_handoff.py
├── tool_calling
    ├── call_function_tool.py
    └── update_tools.py
├── tracking_state
    └── npc_character.py
├── translators
    ├── pipeline_translator.py
    └── tts_translator.py
└── vision
    └── agent.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | .env.local
3 | rag/data
4 | __pycache__
5 | /venv
6 | .DS_Store


--------------------------------------------------------------------------------
/avatars/tavus/README.md:
--------------------------------------------------------------------------------
 1 | # Tavus Avatar Agent
 2 | 
 3 | A LiveKit-powered educational AI agent that uses the Tavus to create an interactive study partner focused on teaching about the Fall of the Roman Empire.
 4 | 
 5 | Demo: https://www.youtube.com/watch?v=iuX5PDP73bQ
 6 | 
 7 | ## Features
 8 | 
 9 | - **Conversational Teaching**: Uses the Socratic method to guide students through learning
10 | - **Flash Cards**: Creates and manages flash cards for important concepts
11 | - **Interactive Quizzes**: Builds multiple-choice quizzes to test knowledge retention
12 | - **Visual Avatar**: Powered by Tavus for procedural visual generation
13 | - **Voice Interaction**: Natural voice conversation using Deepgram STT and ElevenLabs TTS
14 | 
15 | ## Prerequisites
16 | 
17 | - Python 3.10+
18 | - LiveKit account
19 | - Tavus account with configured avatar (replica_id and persona_id)
20 | - API keys for:
21 |   - OpenAI
22 |   - Deepgram
23 |   - ElevenLabs
24 |   - Tavus
25 | 
26 | ## Installation
27 | 
28 | 1. Clone this repository
29 | 2. Install dependencies from the root level of `python-agents-examples`
30 |    ```
31 |    pip install -r requirements.txt
32 |    ```
33 | 3. Create a `.env` file in the parent directory with your API keys
34 | 
35 | ## Configuration
36 | 
37 | Set the following environment variables in your `.env` file:
38 | 
39 | ```
40 | OPENAI_API_KEY=your_openai_key
41 | ELEVENLABS_API_KEY=your_elevenlabs_key
42 | DEEPGRAM_API_KEY=your_deepgram_key
43 | TAVUS_API_KEY=your_tavus_key
44 | LIVEKIT_API_KEY=your_livekit_key
45 | LIVEKIT_API_SECRET=your_livekit_secret
46 | ```
47 | 
48 | Customize the avatar by changing the `replica_id` and `persona_id` in the `entrypoint` function.
49 | 
50 | ## Usage
51 | 
52 | Run the agent with:
53 | 
54 | ```
55 | python tavus.py dev
56 | ```
57 | 
58 | ### Frontend Setup
59 | 
60 | 1. Navigate to the frontend directory:
61 |    ```
62 |    cd voice-assistant-frontend
63 |    ```
64 | 
65 | 2. Install dependencies:
66 |    ```
67 |    npm install
68 |    ```
69 | 
70 | 3. Start the development server:
71 |    ```
72 |    npm run dev
73 |    ```
74 | 
75 | 4. Open your browser and navigate to:
76 |    ```
77 |    http://localhost:3000
78 |    ```
79 | 
80 | ### Flash Cards
81 | 
82 | The agent automatically creates flash cards for important concepts. Users can flip cards through the UI or by asking the agent.
83 | 
84 | ### Quizzes
85 | 
86 | The agent creates interactive quizzes with multiple-choice questions. After completion:
87 | - Users receive immediate feedback on their performance
88 | - Flash cards are automatically created for incorrectly answered questions
89 | 
90 | ## Extending
91 | 
92 | To modify the agent's topic focus:
93 | 1. Update the instructions in the `AvatarAgent` class
94 | 2. Adjust the quiz templates and flash card content to match your subject matter
95 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": ["next/core-web-vitals", "next/typescript", "prettier"]
3 | }
4 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/.github/assets/app-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/livekit-examples/python-agents-examples/35d7334a47d1eec24827e237dca83bf26bd8c1ca/avatars/tavus/voice-assistant-frontend/.github/assets/app-icon.png


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/.github/assets/frontend-screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/livekit-examples/python-agents-examples/35d7334a47d1eec24827e237dca83bf26bd8c1ca/avatars/tavus/voice-assistant-frontend/.github/assets/frontend-screenshot.png


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/.github/workflows/build-and-test.yaml:
--------------------------------------------------------------------------------
 1 | name: Lint and Build
 2 | permissions:
 3 |   contents: read
 4 |   pull-requests: read
 5 | on:
 6 |   push:
 7 |     branches: [main]
 8 |   pull_request:
 9 |     branches: [main]
10 | 
11 | jobs:
12 |   test:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v4
16 |       - uses: pnpm/action-setup@v4
17 |       - name: Use Node.js 22
18 |         uses: actions/setup-node@v4
19 |         with:
20 |           node-version: 22
21 |           cache: "pnpm"
22 | 
23 |       - name: Install dependencies
24 |         run: pnpm install
25 | 
26 |       - name: ESLint
27 |         run: pnpm lint
28 | 
29 |       - name: Prettier
30 |         run: pnpm format:check
31 | 
32 |       - name: Ensure build succeeds
33 |         run: pnpm build
34 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/.github/workflows/sync-to-production.yaml:
--------------------------------------------------------------------------------
 1 | # .github/workflows/sync-main-to-sandbox-production.yml
 2 | 
 3 | name: Sync main to sandbox-production
 4 | 
 5 | on:
 6 |   push:
 7 |     branches:
 8 |       - main
 9 | 
10 | permissions:
11 |   contents: write
12 |   pull-requests: write
13 | 
14 | jobs:
15 |   sync:
16 |     runs-on: ubuntu-latest
17 | 
18 |     steps:
19 |       - name: Checkout code
20 |         uses: actions/checkout@v3
21 |         with:
22 |           fetch-depth: 0 # Fetch all history so we can force push
23 | 
24 |       - name: Set up Git
25 |         run: |
26 |           git config --global user.name 'github-actions[bot]'
27 |           git config --global user.email 'github-actions[bot]@livekit.io'
28 | 
29 |       - name: Sync to sandbox-production
30 |         env:
31 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
32 |         run: |
33 |           git checkout sandbox-production || git checkout -b sandbox-production
34 |           git merge --strategy-option theirs main
35 |           git push origin sandbox-production
36 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.js
 7 | .yarn/install-state.gz
 8 | 
 9 | # testing
10 | /coverage
11 | 
12 | # next.js
13 | /.next/
14 | /out/
15 | 
16 | # production
17 | /build
18 | 
19 | # misc
20 | .DS_Store
21 | *.pem
22 | 
23 | # debug
24 | npm-debug.log*
25 | yarn-debug.log*
26 | yarn-error.log*
27 | 
28 | # local env files
29 | .env*.local
30 | 
31 | # vercel
32 | .vercel
33 | 
34 | # typescript
35 | *.tsbuildinfo
36 | next-env.d.ts
37 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/.prettierignore:
--------------------------------------------------------------------------------
1 | .github/
2 | dist/
3 | docs/
4 | node_modules/
5 | .next/
6 | yarn.lock
7 | pnpm-lock.yaml
8 | 
9 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/.prettierrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "singleQuote": false,
 3 |   "trailingComma": "es5",
 4 |   "semi": true,
 5 |   "tabWidth": 2,
 6 |   "printWidth": 100,
 7 |   "importOrder": ["<THIRD_PARTY_MODULES>", "^[./]"],
 8 |   "importOrderSeparation": false,
 9 |   "importOrderSortSpecifiers": true,
10 |   "importOrderParserPlugins": ["typescript", "jsx"],
11 |   "plugins": ["@trivago/prettier-plugin-sort-imports"]
12 | }
13 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025  LiveKit, Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/README.md:
--------------------------------------------------------------------------------
 1 | <img src="./.github/assets/app-icon.png" alt="Voice Assistant App Icon" width="100" height="100">
 2 | 
 3 | # Roman Empire Study Partner with Tavus Avatar
 4 | 
 5 | This is a voice-enabled educational assistant built with [LiveKit Agents](https://docs.livekit.io/agents) and [Tavus Avatars](https://tavus.io/). It creates an interactive learning experience focused on the Fall of the Roman Empire, featuring flash cards, quizzes, and Socratic teaching methods.
 6 | 
 7 | ## Features
 8 | 
 9 | - **Conversational Learning**: AI tutor uses Socratic method to guide students through complex historical topics
10 | - **Interactive Flash Cards**: Visual aids for key concepts that can be flipped to show questions or answers
11 | - **Multiple-Choice Quizzes**: Test your knowledge with interactive quizzes that provide immediate feedback
12 | - **Realistic Avatar**: Powered by Tavus for a more engaging visual learning experience
13 | 
14 | ![App screenshot](.github/assets/frontend-screenshot.png)
15 | 
16 | ## Getting started
17 | 
18 | Run the following commands to set up the frontend:
19 | 
20 | ```bash
21 | cd voice-assistant-frontend
22 | npm install
23 | npm run dev
24 | ```
25 | 
26 | And open http://localhost:3000 in your browser.
27 | 
28 | You'll need to run the Tavus agent in a separate terminal:
29 | 
30 | ```bash
31 | cd avatars/tavus
32 | python tavus.py
33 | ```
34 | 
35 | > [!NOTE]
36 | > Make sure you've configured your environment variables in the `.env` file as described in the main README.
37 | 
38 | ## How to Use
39 | 
40 | 1. **Start a Conversation**: Begin asking questions about the Fall of the Roman Empire
41 | 2. **Use Flash Cards**: The agent will create flash cards for important concepts, which you can flip to see answers
42 | 3. **Take Quizzes**: The agent will periodically offer quizzes to test your knowledge
43 | 4. **Review Incorrect Answers**: Flash cards will automatically be created for questions you miss
44 | 
45 | ## Contributing
46 | 
47 | This project is open source and we welcome contributions! Please open a PR or issue through GitHub, and don't forget to join us in the [LiveKit Community Slack](https://livekit.io/join-slack)!
48 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/app/api/connection-details/route.ts:
--------------------------------------------------------------------------------
 1 | import { AccessToken, AccessTokenOptions, VideoGrant } from "livekit-server-sdk";
 2 | import { NextResponse } from "next/server";
 3 | 
 4 | // NOTE: you are expected to define the following environment variables in `.env.local`:
 5 | const API_KEY = process.env.LIVEKIT_API_KEY;
 6 | const API_SECRET = process.env.LIVEKIT_API_SECRET;
 7 | const LIVEKIT_URL = process.env.LIVEKIT_URL;
 8 | 
 9 | // don't cache the results
10 | export const revalidate = 0;
11 | 
12 | export type ConnectionDetails = {
13 |   serverUrl: string;
14 |   roomName: string;
15 |   participantName: string;
16 |   participantToken: string;
17 | };
18 | 
19 | export async function GET() {
20 |   try {
21 |     if (LIVEKIT_URL === undefined) {
22 |       throw new Error("LIVEKIT_URL is not defined");
23 |     }
24 |     if (API_KEY === undefined) {
25 |       throw new Error("LIVEKIT_API_KEY is not defined");
26 |     }
27 |     if (API_SECRET === undefined) {
28 |       throw new Error("LIVEKIT_API_SECRET is not defined");
29 |     }
30 | 
31 |     // Generate participant token
32 |     const participantIdentity = `voice_assistant_user_${Math.floor(Math.random() * 10_000)}`;
33 |     const roomName = `voice_assistant_room_${Math.floor(Math.random() * 10_000)}`;
34 |     const participantToken = await createParticipantToken(
35 |       { identity: participantIdentity },
36 |       roomName
37 |     );
38 | 
39 |     // Return connection details
40 |     const data: ConnectionDetails = {
41 |       serverUrl: LIVEKIT_URL,
42 |       roomName,
43 |       participantToken: participantToken,
44 |       participantName: participantIdentity,
45 |     };
46 |     const headers = new Headers({
47 |       "Cache-Control": "no-store",
48 |     });
49 |     return NextResponse.json(data, { headers });
50 |   } catch (error) {
51 |     if (error instanceof Error) {
52 |       console.error(error);
53 |       return new NextResponse(error.message, { status: 500 });
54 |     }
55 |   }
56 | }
57 | 
58 | function createParticipantToken(userInfo: AccessTokenOptions, roomName: string) {
59 |   const at = new AccessToken(API_KEY, API_SECRET, {
60 |     ...userInfo,
61 |     ttl: "15m",
62 |   });
63 |   const grant: VideoGrant = {
64 |     room: roomName,
65 |     roomJoin: true,
66 |     canPublish: true,
67 |     canPublishData: true,
68 |     canSubscribe: true,
69 |   };
70 |   at.addGrant(grant);
71 |   return at.toJwt();
72 | }
73 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/livekit-examples/python-agents-examples/35d7334a47d1eec24827e237dca83bf26bd8c1ca/avatars/tavus/voice-assistant-frontend/app/favicon.ico


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/app/globals.css:
--------------------------------------------------------------------------------
 1 | @tailwind base;
 2 | @tailwind components;
 3 | @tailwind utilities;
 4 | 
 5 | :root {
 6 |   --lk-va-bar-width: 72px;
 7 |   --lk-control-bar-height: unset;
 8 | }
 9 | 
10 | .agent-visualizer > .lk-audio-bar {
11 |   width: 72px;
12 | }
13 | 
14 | .lk-agent-control-bar {
15 |   @apply border-t-0 p-0 h-min mr-4;
16 | }
17 | 
18 | .lk-disconnect-button {
19 |   @apply h-[36px] hover:bg-[#6b221a] hover:text-[white] bg-[#31100c] border-[#6b221a];
20 | }
21 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/app/layout.tsx:
--------------------------------------------------------------------------------
 1 | import "@livekit/components-styles";
 2 | import { Metadata } from "next";
 3 | import { Public_Sans } from "next/font/google";
 4 | import "./globals.css";
 5 | 
 6 | const publicSans400 = Public_Sans({
 7 |   weight: "400",
 8 |   subsets: ["latin"],
 9 | });
10 | 
11 | export const metadata: Metadata = {
12 |   title: "Voice Assistant",
13 | };
14 | 
15 | export default function RootLayout({
16 |   children,
17 | }: Readonly<{
18 |   children: React.ReactNode;
19 | }>) {
20 |   return (
21 |     <html lang="en" className={`h-full ${publicSans400.className}`}>
22 |       <body className="h-full">{children}</body>
23 |     </html>
24 |   );
25 | }
26 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/components/CloseIcon.tsx:
--------------------------------------------------------------------------------
 1 | export function CloseIcon() {
 2 |   return (
 3 |     <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
 4 |       <path
 5 |         d="M3.33398 3.33334L12.6673 12.6667M12.6673 3.33334L3.33398 12.6667"
 6 |         stroke="currentColor"
 7 |         stroke-width="2"
 8 |         stroke-linecap="square"
 9 |       />
10 |     </svg>
11 |   );
12 | }
13 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/components/FlashCard.tsx:
--------------------------------------------------------------------------------
 1 | import { motion, AnimatePresence } from "framer-motion";
 2 | import { useState, useEffect } from "react";
 3 | 
 4 | export interface FlashCardData {
 5 |   id: string;
 6 |   question: string;
 7 |   answer: string;
 8 |   isFlipped?: boolean;
 9 | }
10 | 
11 | interface FlashCardProps {
12 |   card: FlashCardData;
13 |   onFlip?: (id: string) => void;
14 | }
15 | 
16 | export default function FlashCard({ card, onFlip }: FlashCardProps) {
17 |   const [isFlipped, setIsFlipped] = useState(card.isFlipped || false);
18 | 
19 |   // Update local state when card prop changes
20 |   useEffect(() => {
21 |     setIsFlipped(card.isFlipped || false);
22 |   }, [card.isFlipped]);
23 | 
24 |   const handleFlip = () => {
25 |     setIsFlipped(!isFlipped);
26 |     if (onFlip) {
27 |       onFlip(card.id);
28 |     }
29 |   };
30 | 
31 |   return (
32 |     <div 
33 |       className="w-full max-w-md mx-auto cursor-pointer"
34 |       onClick={handleFlip}
35 |     >
36 |       <AnimatePresence mode="wait" initial={false}>
37 |         {!isFlipped ? (
38 |           <motion.div
39 |             key="front"
40 |             className="bg-white text-black p-6 rounded-lg shadow-lg"
41 |             initial={{ rotateY: 90 }}
42 |             animate={{ rotateY: 0 }}
43 |             exit={{ rotateY: 90 }}
44 |             transition={{ duration: 0.3 }}
45 |           >
46 |             <h3 className="text-lg font-semibold mb-2">Question</h3>
47 |             <p>{card.question}</p>
48 |           </motion.div>
49 |         ) : (
50 |           <motion.div
51 |             key="back"
52 |             className="bg-blue-100 text-black p-6 rounded-lg shadow-lg"
53 |             initial={{ rotateY: 90 }}
54 |             animate={{ rotateY: 0 }}
55 |             exit={{ rotateY: 90 }}
56 |             transition={{ duration: 0.3 }}
57 |           >
58 |             <h3 className="text-lg font-semibold mb-2">Answer</h3>
59 |             <p>{card.answer}</p>
60 |           </motion.div>
61 |         )}
62 |       </AnimatePresence>
63 |     </div>
64 |   );
65 | }
66 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/components/NoAgentNotification.tsx:
--------------------------------------------------------------------------------
  1 | import type { AgentState } from "@livekit/components-react";
  2 | import { useEffect, useRef, useState } from "react";
  3 | 
  4 | interface NoAgentNotificationProps extends React.PropsWithChildren<object> {
  5 |   state: AgentState;
  6 | }
  7 | 
  8 | /**
  9 |  * Renders some user info when no agent connects to the room after a certain time.
 10 |  */
 11 | export function NoAgentNotification(props: NoAgentNotificationProps) {
 12 |   const timeToWaitMs = 10_000;
 13 |   const timeoutRef = useRef<number | null>(null);
 14 |   const [showNotification, setShowNotification] = useState(false);
 15 |   const agentHasConnected = useRef(false);
 16 | 
 17 |   // If the agent has connected, we don't need to show the notification.
 18 |   if (
 19 |     ["listening", "thinking", "speaking"].includes(props.state) &&
 20 |     agentHasConnected.current == false
 21 |   ) {
 22 |     agentHasConnected.current = true;
 23 |   }
 24 | 
 25 |   useEffect(() => {
 26 |     if (props.state === "connecting") {
 27 |       timeoutRef.current = window.setTimeout(() => {
 28 |         if (props.state === "connecting" && agentHasConnected.current === false) {
 29 |           setShowNotification(true);
 30 |         }
 31 |       }, timeToWaitMs);
 32 |     } else {
 33 |       if (timeoutRef.current) {
 34 |         window.clearTimeout(timeoutRef.current);
 35 |       }
 36 |       setShowNotification(false);
 37 |     }
 38 | 
 39 |     return () => {
 40 |       if (timeoutRef.current) {
 41 |         window.clearTimeout(timeoutRef.current);
 42 |       }
 43 |     };
 44 |   }, [props.state]);
 45 | 
 46 |   return (
 47 |     <>
 48 |       {showNotification ? (
 49 |         <div className="fixed text-sm left-1/2 max-w-[90vw] -translate-x-1/2 flex top-6 items-center gap-4 bg-[#1F1F1F] px-4 py-3 rounded-lg">
 50 |           <div>
 51 |             {/* Warning Icon */}
 52 |             <svg
 53 |               width="24"
 54 |               height="24"
 55 |               viewBox="0 0 24 24"
 56 |               fill="none"
 57 |               xmlns="http://www.w3.org/2000/svg"
 58 |             >
 59 |               <path
 60 |                 fillRule="evenodd"
 61 |                 clipRule="evenodd"
 62 |                 d="M9.85068 3.63564C10.8197 2.00589 13.1793 2.00589 14.1484 3.63564L21.6323 16.2223C22.6232 17.8888 21.4223 20 19.4835 20H4.51555C2.57676 20 1.37584 17.8888 2.36671 16.2223L9.85068 3.63564ZM12 8.5C12.2761 8.5 12.5 8.72386 12.5 9V13.5C12.5 13.7761 12.2761 14 12 14C11.7239 14 11.5 13.7761 11.5 13.5V9C11.5 8.72386 11.7239 8.5 12 8.5ZM12.75 16C12.75 16.4142 12.4142 16.75 12 16.75C11.5858 16.75 11.25 16.4142 11.25 16C11.25 15.5858 11.5858 15.25 12 15.25C12.4142 15.25 12.75 15.5858 12.75 16Z"
 63 |                 fill="#666666"
 64 |               />
 65 |             </svg>
 66 |           </div>
 67 |           <p className="text-pretty w-max">
 68 |             It&apos;s quiet... too quiet. Is your agent lost? Ensure your agent is properly
 69 |             configured and running on your machine.
 70 |           </p>
 71 |           <a
 72 |             href="https://docs.livekit.io/agents/quickstarts/s2s/"
 73 |             target="_blank"
 74 |             className="underline whitespace-nowrap"
 75 |           >
 76 |             View guide
 77 |           </a>
 78 |           <button onClick={() => setShowNotification(false)}>
 79 |             {/* Close Icon */}
 80 |             <svg
 81 |               width="16"
 82 |               height="16"
 83 |               viewBox="0 0 16 16"
 84 |               fill="none"
 85 |               xmlns="http://www.w3.org/2000/svg"
 86 |             >
 87 |               <path
 88 |                 d="M3.16602 3.16666L12.8327 12.8333M12.8327 3.16666L3.16602 12.8333"
 89 |                 stroke="#999999"
 90 |                 strokeWidth="1.5"
 91 |                 strokeLinecap="square"
 92 |               />
 93 |             </svg>
 94 |           </button>
 95 |         </div>
 96 |       ) : null}
 97 |     </>
 98 |   );
 99 | }
100 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/components/Quiz.tsx:
--------------------------------------------------------------------------------
 1 | import { motion } from "framer-motion";
 2 | 
 3 | export interface QuizAnswer {
 4 |   id: string;
 5 |   text: string;
 6 | }
 7 | 
 8 | export interface QuizQuestion {
 9 |   id: string;
10 |   text: string;
11 |   answers: QuizAnswer[];
12 | }
13 | 
14 | interface QuizProps {
15 |   question: QuizQuestion;
16 |   selectedAnswerId: string | undefined;
17 |   onAnswerSelect: (answerId: string) => void;
18 | }
19 | 
20 | export default function Quiz({ question, selectedAnswerId, onAnswerSelect }: QuizProps) {
21 |   return (
22 |     <div className="w-full bg-white text-black p-6 rounded-lg shadow-lg">
23 |       <h3 className="text-lg font-semibold mb-4">{question.text}</h3>
24 |       
25 |       <div className="space-y-3">
26 |         {question.answers.map((answer) => (
27 |           <div key={answer.id} className="flex items-center">
28 |             <input
29 |               type="radio"
30 |               id={`answer-${answer.id}`}
31 |               name={`question-${question.id}`}
32 |               value={answer.id}
33 |               checked={selectedAnswerId === answer.id}
34 |               onChange={() => onAnswerSelect(answer.id)}
35 |               className="mr-3 h-4 w-4"
36 |             />
37 |             <label 
38 |               htmlFor={`answer-${answer.id}`}
39 |               className="flex-1 cursor-pointer"
40 |             >
41 |               {answer.text}
42 |             </label>
43 |           </div>
44 |         ))}
45 |       </div>
46 |     </div>
47 |   );
48 | }
49 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/components/QuizContainer.tsx:
--------------------------------------------------------------------------------
  1 | import { useEffect, useState } from "react";
  2 | import { motion, AnimatePresence } from "framer-motion";
  3 | import { useRoomContext, useVoiceAssistant } from "@livekit/components-react";
  4 | import Quiz, { QuizQuestion, QuizAnswer } from "./Quiz";
  5 | 
  6 | export interface SubmittedQuiz {
  7 |   id: string;
  8 |   questions: QuizQuestion[];
  9 |   answers: Record<string, string>;
 10 | }
 11 | 
 12 | export default function QuizContainer() {
 13 |   const [questions, setQuestions] = useState<QuizQuestion[]>([]);
 14 |   const [currentQuestionIndex, setCurrentQuestionIndex] = useState<number | null>(null);
 15 |   const [isVisible, setIsVisible] = useState(false);
 16 |   const [quizId, setQuizId] = useState<string | null>(null);
 17 |   const [selectedAnswers, setSelectedAnswers] = useState<Record<string, string>>({});
 18 |   const room = useRoomContext();
 19 |   const { agent } = useVoiceAssistant();
 20 | 
 21 |   useEffect(() => {
 22 |     if (!room) return;
 23 | 
 24 |     // Register RPC method to receive quizzes
 25 |     const handleShowQuiz = async (data: any): Promise<string> => {
 26 |       try {
 27 |         console.log("Received quiz RPC data:", data);
 28 |         
 29 |         // Check for the correct property in the RPC data
 30 |         if (!data || data.payload === undefined) {
 31 |           console.error("Invalid RPC data received:", data);
 32 |           return "Error: Invalid RPC data format";
 33 |         }
 34 |         
 35 |         console.log("Parsing payload:", data.payload);
 36 |         
 37 |         // Parse the payload string into a JSON object
 38 |         const payload = typeof data.payload === 'string' 
 39 |           ? JSON.parse(data.payload) 
 40 |           : data.payload;
 41 |         
 42 |         if (payload.action === "show") {
 43 |           // Reset answers when showing a new quiz
 44 |           setSelectedAnswers({});
 45 |           setQuizId(payload.id);
 46 |           setQuestions(payload.questions);
 47 |           setCurrentQuestionIndex(0);
 48 |           setIsVisible(true);
 49 |         } else if (payload.action === "hide") {
 50 |           setIsVisible(false);
 51 |         }
 52 |         
 53 |         return "Success";
 54 |       } catch (error) {
 55 |         console.error("Error processing quiz data:", error);
 56 |         return "Error: " + (error instanceof Error ? error.message : String(error));
 57 |       }
 58 |     };
 59 | 
 60 |     room.localParticipant.registerRpcMethod(
 61 |       "client.quiz",
 62 |       handleShowQuiz
 63 |     );
 64 | 
 65 |     return () => {
 66 |       // Clean up RPC method when component unmounts
 67 |       room.localParticipant.unregisterRpcMethod("client.quiz");
 68 |     };
 69 |   }, [room]);
 70 | 
 71 |   const handleAnswerSelect = (questionId: string, answerId: string) => {
 72 |     setSelectedAnswers(prev => ({
 73 |       ...prev,
 74 |       [questionId]: answerId
 75 |     }));
 76 |   };
 77 | 
 78 |   const handleSubmitQuiz = async () => {
 79 |     if (!agent || !quizId) return;
 80 |     
 81 |     try {
 82 |       console.log(`Submitting quiz ${quizId} to agent ${agent.identity}`);
 83 |       
 84 |       const payload = {
 85 |         id: quizId,
 86 |         answers: selectedAnswers
 87 |       };
 88 |       
 89 |       const result = await room.localParticipant.performRpc({
 90 |         destinationIdentity: agent.identity,
 91 |         method: "agent.submitQuiz",
 92 |         payload: JSON.stringify(payload)
 93 |       });
 94 |       
 95 |       console.log(`Quiz submission result: ${result}`);
 96 |       
 97 |       // Hide the quiz after submission
 98 |       setIsVisible(false);
 99 |     } catch (error: unknown) {
100 |       console.error("Error submitting quiz:", error);
101 |       if (error instanceof Error) {
102 |         console.error(error.stack);
103 |       }
104 |     }
105 |   };
106 | 
107 |   const currentQuestion = currentQuestionIndex !== null && questions[currentQuestionIndex] 
108 |     ? questions[currentQuestionIndex] 
109 |     : null;
110 | 
111 |   const isLastQuestion = currentQuestionIndex === questions.length - 1;
112 |   const allQuestionsAnswered = questions.length > 0 && 
113 |     questions.every(q => selectedAnswers[q.id] !== undefined);
114 | 
115 |   return (
116 |     <AnimatePresence>
117 |       {isVisible && currentQuestion && (
118 |         <motion.div
119 |           initial={{ opacity: 0, x: -100 }} // Start from left
120 |           animate={{ opacity: 1, x: 0 }}
121 |           exit={{ opacity: 0, x: -100 }}
122 |           className="fixed left-8 top-1/4 w-80 bg-gray-900 p-4 rounded-lg shadow-lg"
123 |         >
124 |           <div className="flex justify-between items-center mb-4">
125 |             <h2 className="text-xl font-bold">Quiz</h2>
126 |             <button 
127 |               onClick={() => setIsVisible(false)}
128 |               className="text-gray-400 hover:text-white"
129 |             >
130 |               ×
131 |             </button>
132 |           </div>
133 |           
134 |           <Quiz 
135 |             question={currentQuestion} 
136 |             selectedAnswerId={selectedAnswers[currentQuestion.id]}
137 |             onAnswerSelect={(answerId) => handleAnswerSelect(currentQuestion.id, answerId)}
138 |           />
139 |           
140 |           <div className="flex justify-between mt-4">
141 |             <button
142 |               onClick={() => setCurrentQuestionIndex(prev => 
143 |                 prev !== null ? Math.max(0, prev - 1) : 0
144 |               )}
145 |               disabled={currentQuestionIndex === 0}
146 |               className="px-3 py-1 bg-blue-600 rounded disabled:opacity-50"
147 |             >
148 |               Previous
149 |             </button>
150 |             <span>{(currentQuestionIndex ?? 0) + 1} / {questions.length}</span>
151 |             {!isLastQuestion ? (
152 |               <button
153 |                 onClick={() => setCurrentQuestionIndex(prev => 
154 |                   prev !== null ? Math.min(questions.length - 1, prev + 1) : 0
155 |                 )}
156 |                 className="px-3 py-1 bg-blue-600 rounded"
157 |               >
158 |                 Next
159 |               </button>
160 |             ) : (
161 |               <button
162 |                 onClick={handleSubmitQuiz}
163 |                 disabled={!allQuestionsAnswered}
164 |                 className="px-3 py-1 bg-green-600 rounded disabled:opacity-50"
165 |               >
166 |                 Submit Quiz
167 |               </button>
168 |             )}
169 |           </div>
170 |         </motion.div>
171 |       )}
172 |     </AnimatePresence>
173 |   );
174 | }
175 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/components/TranscriptionView.tsx:
--------------------------------------------------------------------------------
 1 | import useCombinedTranscriptions from "@/hooks/useCombinedTranscriptions";
 2 | import * as React from "react";
 3 | 
 4 | export default function TranscriptionView() {
 5 |   const combinedTranscriptions = useCombinedTranscriptions();
 6 |   const containerRef = React.useRef<HTMLDivElement>(null);
 7 | 
 8 |   // scroll to bottom when new transcription is added
 9 |   React.useEffect(() => {
10 |     if (containerRef.current) {
11 |       containerRef.current.scrollTop = containerRef.current.scrollHeight;
12 |     }
13 |   }, [combinedTranscriptions]);
14 | 
15 |   return (
16 |     <div className="relative h-[200px] w-[512px] max-w-[90vw] mx-auto">
17 |       {/* Fade-out gradient mask */}
18 |       <div className="absolute top-0 left-0 right-0 h-8 bg-gradient-to-b from-[var(--lk-bg)] to-transparent z-10 pointer-events-none" />
19 |       <div className="absolute bottom-0 left-0 right-0 h-8 bg-gradient-to-t from-[var(--lk-bg)] to-transparent z-10 pointer-events-none" />
20 | 
21 |       {/* Scrollable content */}
22 |       <div ref={containerRef} className="h-full flex flex-col gap-2 overflow-y-auto px-4 py-8">
23 |         {combinedTranscriptions.map((segment) => (
24 |           <div
25 |             id={segment.id}
26 |             key={segment.id}
27 |             className={
28 |               segment.role === "assistant"
29 |                 ? "p-2 self-start fit-content"
30 |                 : "bg-gray-800 rounded-md p-2 self-end fit-content"
31 |             }
32 |           >
33 |             {segment.text}
34 |           </div>
35 |         ))}
36 |       </div>
37 |     </div>
38 |   );
39 | }
40 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/hooks/useCombinedTranscriptions.ts:
--------------------------------------------------------------------------------
 1 | import { useTrackTranscription, useVoiceAssistant } from "@livekit/components-react";
 2 | import { useMemo } from "react";
 3 | import useLocalMicTrack from "./useLocalMicTrack";
 4 | 
 5 | export default function useCombinedTranscriptions() {
 6 |   const { agentTranscriptions } = useVoiceAssistant();
 7 | 
 8 |   const micTrackRef = useLocalMicTrack();
 9 |   const { segments: userTranscriptions } = useTrackTranscription(micTrackRef);
10 | 
11 |   const combinedTranscriptions = useMemo(() => {
12 |     return [
13 |       ...agentTranscriptions.map((val) => {
14 |         return { ...val, role: "assistant" };
15 |       }),
16 |       ...userTranscriptions.map((val) => {
17 |         return { ...val, role: "user" };
18 |       }),
19 |     ].sort((a, b) => a.firstReceivedTime - b.firstReceivedTime);
20 |   }, [agentTranscriptions, userTranscriptions]);
21 | 
22 |   return combinedTranscriptions;
23 | }
24 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/hooks/useLocalMicTrack.ts:
--------------------------------------------------------------------------------
 1 | import { TrackReferenceOrPlaceholder, useLocalParticipant } from "@livekit/components-react";
 2 | import { Track } from "livekit-client";
 3 | import { useMemo } from "react";
 4 | 
 5 | export default function useLocalMicTrack() {
 6 |   const { microphoneTrack, localParticipant } = useLocalParticipant();
 7 | 
 8 |   const micTrackRef: TrackReferenceOrPlaceholder = useMemo(() => {
 9 |     return {
10 |       participant: localParticipant,
11 |       source: Track.Source.Microphone,
12 |       publication: microphoneTrack,
13 |     };
14 |   }, [localParticipant, microphoneTrack]);
15 | 
16 |   return micTrackRef;
17 | }
18 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/next.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('next').NextConfig} */
2 | const nextConfig = {};
3 | 
4 | export default nextConfig;
5 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "voice-assistant2",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "dev": "next dev",
 7 |     "build": "next build",
 8 |     "start": "next start",
 9 |     "lint": "next lint",
10 |     "format:check": "prettier --check .",
11 |     "format:write": "prettier --write ."
12 |   },
13 |   "dependencies": {
14 |     "@livekit/components-react": "^2.9.3",
15 |     "@livekit/components-styles": "^1.1.4",
16 |     "framer-motion": "^11.18.0",
17 |     "livekit-client": "^2.8.0",
18 |     "livekit-server-sdk": "^2.9.7",
19 |     "react": "^18.3.1",
20 |     "react-dom": "^18.3.1"
21 |   },
22 |   "devDependencies": {
23 |     "@trivago/prettier-plugin-sort-imports": "^5.2.2",
24 |     "@types/node": "^20.17.13",
25 |     "@types/react": "^18.3.18",
26 |     "@types/react-dom": "^18.3.5",
27 |     "eslint": "^8.57.1",
28 |     "eslint-config-next": "14.2.28",
29 |     "eslint-config-prettier": "9.1.0",
30 |     "next": "14",
31 |     "postcss": "^8.5.1",
32 |     "prettier": "^3.4.2",
33 |     "tailwindcss": "^3.4.17",
34 |     "typescript": "^5.7.3"
35 |   },
36 |   "packageManager": "pnpm@9.15.9"
37 | }
38 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('postcss-load-config').Config} */
2 | const config = {
3 |   plugins: {
4 |     tailwindcss: {},
5 |   },
6 | };
7 | 
8 | export default config;
9 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/renovate.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://docs.renovatebot.com/renovate-schema.json",
 3 |   "extends": ["config:recommended"],
 4 |   "packageRules": [
 5 |     {
 6 |       "matchUpdateTypes": ["minor", "patch", "pin", "digest"],
 7 |       "automerge": true
 8 |     }
 9 |   ]
10 | }
11 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/tailwind.config.ts:
--------------------------------------------------------------------------------
 1 | import type { Config } from "tailwindcss";
 2 | 
 3 | const config: Config = {
 4 |   content: [
 5 |     "./pages/**/*.{js,ts,jsx,tsx,mdx}",
 6 |     "./components/**/*.{js,ts,jsx,tsx,mdx}",
 7 |     "./app/**/*.{js,ts,jsx,tsx,mdx}",
 8 |   ],
 9 |   theme: {},
10 |   plugins: [],
11 | };
12 | export default config;
13 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/taskfile.yaml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | output: interleaved
 3 | dotenv: [".env.local"]
 4 | 
 5 | tasks:
 6 |   post_create:
 7 |     desc: "Runs after this template is instantiated as a Sandbox or Bootstrap"
 8 |     cmds:
 9 |       - echo -e "\nYour Next.js voice assistant is ready to go!\n"
10 |       - echo -e "To give it a try, run the following commands:\r\n"
11 |       - echo -e "\tcd {{.ROOT_DIR}}\r"
12 |       - echo -e "\tpnpm install\r"
13 |       - echo -e "\tpnpm dev\r\n"
14 | 
15 |   install:
16 |     interactive: true
17 |     cmds:
18 |       - "pnpm install"
19 | 
20 |   dev:
21 |     interactive: true
22 |     cmds:
23 |       - "pnpm dev"
24 | 


--------------------------------------------------------------------------------
/avatars/tavus/voice-assistant-frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "lib": ["dom", "dom.iterable", "esnext"],
 4 |     "allowJs": true,
 5 |     "skipLibCheck": true,
 6 |     "strict": true,
 7 |     "noEmit": true,
 8 |     "esModuleInterop": true,
 9 |     "module": "esnext",
10 |     "moduleResolution": "bundler",
11 |     "resolveJsonModule": true,
12 |     "isolatedModules": true,
13 |     "jsx": "preserve",
14 |     "incremental": true,
15 |     "plugins": [
16 |       {
17 |         "name": "next"
18 |       }
19 |     ],
20 |     "paths": {
21 |       "@/*": ["./*"]
22 |     }
23 |   },
24 |   "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
25 |   "exclude": ["node_modules"]
26 | }
27 | 


--------------------------------------------------------------------------------
/basics/audio.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/livekit-examples/python-agents-examples/35d7334a47d1eec24827e237dca83bf26bd8c1ca/basics/audio.wav


--------------------------------------------------------------------------------
/basics/change_agent_instructions.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import openai, silero, deepgram
 8 | 
 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
10 | 
11 | logger = logging.getLogger("listen-and-respond")
12 | logger.setLevel(logging.INFO)
13 | 
14 | class ChangeInstructionsAgent(Agent):
15 |     def __init__(self) -> None:
16 |         super().__init__(
17 |             instructions="""
18 |                 You are a helpful agent. When the user speaks, you listen and respond.
19 |             """,
20 |             stt=deepgram.STT(),
21 |             llm=openai.LLM(model="gpt-4o"),
22 |             tts=openai.TTS(),
23 |             vad=silero.VAD.load()
24 |         )
25 |     
26 |     async def on_enter(self):
27 |         if self.session.participant.name.startswith("sip"):
28 |             self.update_instructions("""
29 |                 You are a helpful agent speaking on the phone.
30 |             """)
31 |         self.session.generate_reply()
32 | 
33 | async def entrypoint(ctx: JobContext):
34 |     await ctx.connect()
35 | 
36 |     session = AgentSession()
37 | 
38 |     await session.start(
39 |         agent=ChangeInstructionsAgent(),
40 |         room=ctx.room
41 |     )
42 | 
43 | if __name__ == "__main__":
44 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/basics/context_variables.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from dotenv import load_dotenv
 4 | from livekit.agents import JobContext, WorkerOptions, cli
 5 | from livekit.agents.voice import Agent, AgentSession
 6 | from livekit.plugins import openai, deepgram, silero
 7 | 
 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 9 | 
10 | logger = logging.getLogger("context-variables")
11 | logger.setLevel(logging.INFO)
12 | 
13 | class ContextAgent(Agent):
14 |     def __init__(self, context_vars=None) -> None:
15 |         instructions = """
16 |             You are a helpful agent. The user's name is {name}.
17 |             They are {age} years old and live in {city}.
18 |         """
19 |         
20 |         if context_vars:
21 |             instructions = instructions.format(**context_vars)
22 |             
23 |         super().__init__(
24 |             instructions=instructions,
25 |             stt=deepgram.STT(),
26 |             llm=openai.LLM(model="gpt-4o"),
27 |             tts=openai.TTS(),
28 |             vad=silero.VAD.load()
29 |         )
30 |     
31 |     async def on_enter(self):
32 |         self.session.generate_reply()
33 | 
34 | async def entrypoint(ctx: JobContext):
35 |     await ctx.connect()
36 | 
37 |     context_variables = {
38 |         "name": "Shayne",
39 |         "age": 35,
40 |         "city": "Toronto"
41 |     }
42 | 
43 |     session = AgentSession()
44 | 
45 |     await session.start(
46 |         agent=ContextAgent(context_vars=context_variables),
47 |         room=ctx.room
48 |     )
49 | 
50 | if __name__ == "__main__":
51 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/basics/exit_message.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import openai, silero, deepgram
 8 | from livekit.agents.llm import function_tool
 9 | 
10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
11 | 
12 | logger = logging.getLogger("listen-and-respond")
13 | logger.setLevel(logging.INFO)
14 | 
15 | class GoodbyeAgent(Agent):
16 |     def __init__(self) -> None:
17 |         super().__init__(
18 |             instructions="""
19 |                 You are a helpful agent.
20 |                 When the user wants to stop talking to you, use the end_session function to close the session.
21 |             """,
22 |             stt=deepgram.STT(),
23 |             llm=openai.LLM(model="gpt-4o"),
24 |             tts=openai.TTS(),
25 |             vad=silero.VAD.load()
26 |         )
27 | 
28 |     @function_tool
29 |     async def end_session(self):
30 |         """When the user wants to stop talking to you, use this function to close the session."""
31 |         await self.session.drain()
32 |         await self.session.aclose()
33 | 
34 |     async def on_exit(self):
35 |         await self.session.say("Goodbye!")
36 | 
37 | async def entrypoint(ctx: JobContext):
38 |     await ctx.connect()
39 | 
40 |     session = AgentSession()
41 | 
42 |     await session.start(
43 |         agent=GoodbyeAgent(),
44 |         room=ctx.room
45 |     )
46 | 
47 | if __name__ == "__main__":
48 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/basics/function_calling.py:
--------------------------------------------------------------------------------
 1 | ## This is a basic example of how to use function calling.
 2 | ## To test the function, you can ask the agent to print to the console!
 3 | 
 4 | import logging
 5 | from pathlib import Path
 6 | from dotenv import load_dotenv
 7 | from livekit.agents import JobContext, WorkerOptions, cli
 8 | from livekit.agents.llm import function_tool
 9 | from livekit.agents.voice import Agent, AgentSession, RunContext
10 | from livekit.plugins import deepgram, openai, silero
11 | 
12 | logger = logging.getLogger("function-calling")
13 | logger.setLevel(logging.INFO)
14 | 
15 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
16 | 
17 | class FunctionAgent(Agent):
18 |     def __init__(self) -> None:
19 |         super().__init__(
20 |             instructions="""
21 |                 You are a helpful assistant communicating through voice. Don't use any unpronouncable characters.
22 |                 Note: If asked to print to the console, use the `print_to_console` function.
23 |             """,
24 |             stt=deepgram.STT(),
25 |             llm=openai.LLM(model="gpt-4o"),
26 |             tts=openai.TTS(),
27 |             vad=silero.VAD.load()
28 |         )
29 | 
30 |     @function_tool
31 |     async def print_to_console(self, context: RunContext):
32 |         print("Console Print Success!")
33 |         return None, "I've printed to the console."
34 | 
35 |     async def on_enter(self):
36 |         self.session.generate_reply()
37 | 
38 | async def entrypoint(ctx: JobContext):
39 |     await ctx.connect()
40 | 
41 |     session = AgentSession()
42 | 
43 |     await session.start(
44 |         agent=FunctionAgent(),
45 |         room=ctx.room
46 |     )
47 | 
48 | if __name__ == "__main__":
49 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/basics/interrupts_user.py:
--------------------------------------------------------------------------------
 1 | # This agent isn't interruptable, so it will keep talking even if the user tries to speak.
 2 | 
 3 | from pathlib import Path
 4 | from typing import AsyncIterable, Optional
 5 | import re
 6 | import logging
 7 | from dotenv import load_dotenv
 8 | from livekit.agents import JobContext, WorkerOptions, cli
 9 | from livekit.agents.voice import Agent, AgentSession
10 | from livekit.plugins import deepgram, openai
11 | from livekit import rtc
12 | 
13 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
14 | 
15 | # Set up logging
16 | logging.basicConfig(level=logging.INFO)
17 | logger = logging.getLogger(__name__)
18 | 
19 | class UninterruptableAgent(Agent):
20 |     def __init__(self) -> None:
21 |         super().__init__(
22 |             instructions="""
23 |                 You are a helpful assistant communicating through voice who will interrupt the user if they try to say more than one sentence.
24 |             """,
25 |             stt=deepgram.STT(),
26 |             llm=openai.LLM(model="gpt-4o"),
27 |             tts=openai.TTS(),
28 |             allow_interruptions=False
29 |         )
30 |         self.text_buffer = ""
31 | 
32 |     async def stt_node(self, text: AsyncIterable[str], model_settings: Optional[dict] = None) -> Optional[AsyncIterable[rtc.AudioFrame]]:
33 |         parent_stream = super().stt_node(text, model_settings)
34 | 
35 |         if parent_stream is None:
36 |             return None
37 | 
38 |         async def replay_user_input(text: str):
39 |             await self.session.say("Let me stop you there, and respond. You said: " + text)
40 | 
41 |         async def process_stream():
42 |             async for event in parent_stream:
43 |                 if hasattr(event, 'type') and str(event.type) == "SpeechEventType.FINAL_TRANSCRIPT" and event.alternatives:
44 |                     transcript = event.alternatives[0].text
45 | 
46 |                     self.text_buffer += " " + transcript
47 |                     self.text_buffer = self.text_buffer.strip()
48 | 
49 |                     sentence_pattern = r'[.!?]+'
50 |                     if re.search(sentence_pattern, self.text_buffer):
51 |                         sentences = re.split(sentence_pattern, self.text_buffer)
52 | 
53 |                         if len(sentences) > 1:
54 |                             for i in range(len(sentences) - 1):
55 |                                 if sentences[i].strip():
56 |                                     logger.info(f"Complete sentence detected: '{sentences[i].strip()}'")
57 |                                     await replay_user_input(sentences[i].strip())
58 | 
59 |                             self.text_buffer = sentences[-1].strip()
60 | 
61 |                 yield event
62 | 
63 |         return process_stream()
64 |     
65 |     async def on_enter(self):
66 |         self.session.say("I'll interrupt you after 1 sentence.")
67 | 
68 | async def entrypoint(ctx: JobContext):
69 |     await ctx.connect()
70 | 
71 |     session = AgentSession()
72 | 
73 |     await session.start(
74 |         agent=UninterruptableAgent(),
75 |         room=ctx.room
76 |     )
77 | 
78 | if __name__ == "__main__":
79 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/basics/listen_and_respond.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import openai, silero, deepgram
 8 | 
 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
10 | 
11 | logger = logging.getLogger("listen-and-respond")
12 | logger.setLevel(logging.INFO)
13 | 
14 | class SimpleAgent(Agent):
15 |     def __init__(self) -> None:
16 |         super().__init__(
17 |             instructions="""
18 |                 You are a helpful agent. When the user speaks, you listen and respond.
19 |             """,
20 |             stt=deepgram.STT(),
21 |             llm=openai.LLM(model="gpt-4o"),
22 |             tts=openai.TTS(),
23 |             vad=silero.VAD.load()
24 |         )
25 |     
26 |     async def on_enter(self):
27 |         self.session.generate_reply()
28 | 
29 | async def entrypoint(ctx: JobContext):
30 |     await ctx.connect()
31 | 
32 |     session = AgentSession()
33 | 
34 |     await session.start(
35 |         agent=SimpleAgent(),
36 |         room=ctx.room
37 |     )
38 | 
39 | if __name__ == "__main__":
40 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/basics/playing_audio.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | import wave
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.llm import function_tool
 7 | from livekit.agents.voice import Agent, AgentSession, RunContext
 8 | from livekit.plugins import deepgram, openai, silero
 9 | from livekit import rtc
10 | 
11 | logger = logging.getLogger("function-calling")
12 | logger.setLevel(logging.INFO)
13 | 
14 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
15 | 
16 | class FunctionAgent(Agent):
17 |     def __init__(self) -> None:
18 |         super().__init__(
19 |             instructions="""
20 |                 You are a helpful assistant communicating through voice. Don't use any unpronouncable characters.
21 |                 If asked to play audio, use the `play_audio_file` function.
22 |             """,
23 |             stt=deepgram.STT(),
24 |             llm=openai.LLM(model="gpt-4o"),
25 |             tts=openai.TTS(),
26 |             vad=silero.VAD.load()
27 |         )
28 | 
29 |     @function_tool
30 |     async def play_audio_file(self, context: RunContext):
31 |         audio_path = Path(__file__).parent / "audio.wav"
32 |         
33 |         with wave.open(str(audio_path), 'rb') as wav_file:
34 |             num_channels = wav_file.getnchannels()
35 |             sample_rate = wav_file.getframerate()
36 |             frames = wav_file.readframes(wav_file.getnframes())
37 |         
38 |         audio_frame = rtc.AudioFrame(
39 |             data=frames,
40 |             sample_rate=sample_rate,
41 |             num_channels=num_channels,
42 |             samples_per_channel=wav_file.getnframes()
43 |         )
44 |         
45 |         async def audio_generator():
46 |             yield audio_frame
47 |         
48 |         await self.session.say("Playing audio file", audio=audio_generator())
49 |         
50 |         return None, "I've played the audio file for you."
51 | 
52 |     async def on_enter(self):
53 |         self.session.generate_reply()
54 | 
55 | async def entrypoint(ctx: JobContext):
56 |     await ctx.connect()
57 | 
58 |     session = AgentSession()
59 | 
60 |     await session.start(
61 |         agent=FunctionAgent(),
62 |         room=ctx.room
63 |     )
64 | 
65 | if __name__ == "__main__":
66 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/basics/repeater.py:
--------------------------------------------------------------------------------
 1 | # Repeats what the user says using a STT -> TTS loop, without any LLM.
 2 | 
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import deepgram, openai
 8 | 
 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
10 | 
11 | async def entrypoint(ctx: JobContext):
12 |     
13 |     await ctx.connect()
14 |     session = AgentSession()
15 |     
16 |     @session.on("user_input_transcribed")
17 |     def on_transcript(transcript):
18 |         if transcript.is_final:
19 |             session.say(transcript.transcript)           
20 |     
21 |     await session.start(
22 |         agent=Agent(
23 |             instructions="You are a helpful assistant that repeats what the user says.",
24 |             stt=deepgram.STT(),
25 |             tts=openai.TTS(),
26 |             allow_interruptions=False
27 |         ),
28 |         room=ctx.room
29 |     )
30 | 
31 | if __name__ == "__main__":
32 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/basics/uninterruptable.py:
--------------------------------------------------------------------------------
 1 | # This agent isn't interruptable, so it will keep talking even if the user tries to speak.
 2 | 
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import deepgram, openai
 8 | 
 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
10 | 
11 | class UninterruptableAgent(Agent):
12 |     def __init__(self) -> None:
13 |         super().__init__(
14 |             instructions="""
15 |                 You are a helpful assistant communicating through voice who is not interruptable.
16 |             """,
17 |             stt=deepgram.STT(),
18 |             llm=openai.LLM(model="gpt-4o"),
19 |             tts=openai.TTS(),
20 |             allow_interruptions=False
21 |         )
22 |     
23 |     async def on_enter(self):
24 |         self.session.generate_reply(user_input="Say something somewhat long and boring so I can test if you're interruptable.")
25 | 
26 | async def entrypoint(ctx: JobContext):
27 |     await ctx.connect()
28 | 
29 |     session = AgentSession()
30 | 
31 |     await session.start(
32 |         agent=UninterruptableAgent(),
33 |         room=ctx.room
34 |     )
35 | 
36 | if __name__ == "__main__":
37 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/check_agent_example_coverage.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | check_agent_example_coverage.py
  4 | ────────────────────────────────
  5 | Scan every .py file beneath the current directory (or the paths you provide)
  6 | and report which public methods of livekit.agents.voice.Agent and
  7 | AgentSession are already exercised or overridden in the example code.
  8 | 
  9 | USAGE
 10 | =====
 11 | 
 12 |     # Scan the whole repo
 13 |     python check_agent_example_coverage.py
 14 | 
 15 |     # Or cherry-pick folders
 16 |     python check_agent_example_coverage.py basics/ pipeline-stt/
 17 | 
 18 |     # Just print warnings for uncovered methods
 19 |     python check_agent_example_coverage.py --warn-only
 20 | 
 21 |     # Return non-zero exit code if coverage incomplete (for CI)
 22 |     python check_agent_example_coverage.py --fail-on-incomplete
 23 | 
 24 | The report looks like:
 25 | 
 26 | Agent (7/9 methods used)
 27 |   ✔ generate_reply
 28 |   ✔ on_enter
 29 |   ✘ on_exit
 30 |   …
 31 | 
 32 | AgentSession (5/8 methods used)
 33 |   ✔ start
 34 |   ✔ stop
 35 |   ✘ reconnect
 36 |   …
 37 | 
 38 | """
 39 | 
 40 | from __future__ import annotations
 41 | 
 42 | import argparse
 43 | import inspect
 44 | import sys
 45 | from pathlib import Path
 46 | from collections import defaultdict
 47 | from livekit.agents.voice import Agent, AgentSession
 48 | 
 49 | 
 50 | EXCLUDE_DIRS = {
 51 |     ".git",
 52 |     ".hg",
 53 |     ".svn",
 54 |     "__pycache__",
 55 |     "venv",
 56 |     ".venv",
 57 |     "env",
 58 |     ".env",
 59 |     "build",
 60 |     "dist",
 61 | }
 62 | 
 63 | def _public_methods(cls) -> set[str]:
 64 |     return {
 65 |         name
 66 |         for name, obj in inspect.getmembers(cls, inspect.isfunction)
 67 |         if not name.startswith("_")
 68 |     }
 69 | 
 70 | 
 71 | AGENT_METHODS = _public_methods(Agent)
 72 | SESSION_METHODS = _public_methods(AgentSession)
 73 | 
 74 | def _scan(paths: list[Path]):
 75 |     """Return a dict {'Agent': {m: bool}, 'AgentSession': {m: bool}}."""
 76 |     found = {
 77 |         "Agent": defaultdict(bool, {m: False for m in AGENT_METHODS}),
 78 |         "AgentSession": defaultdict(bool, {m: False for m in SESSION_METHODS}),
 79 |     }
 80 | 
 81 |     for base in paths:
 82 |         for py in base.rglob("*.py"):
 83 |             if any(part in EXCLUDE_DIRS for part in py.parts):
 84 |                 continue
 85 | 
 86 |             try:
 87 |                 code = py.read_text(encoding="utf-8", errors="ignore")
 88 |             except Exception:
 89 |                 continue
 90 | 
 91 |             for m in AGENT_METHODS:
 92 |                 call_pat = f".{m}("
 93 |                 def_pat = f"def {m}("
 94 |                 if call_pat in code or def_pat in code:
 95 |                     found["Agent"][m] = True
 96 |             for m in SESSION_METHODS:
 97 |                 call_pat = f".{m}("
 98 |                 def_pat = f"def {m}("
 99 |                 if call_pat in code or def_pat in code:
100 |                     found["AgentSession"][m] = True
101 |     return found
102 | 
103 | def _report(found: dict[str, dict[str, bool]], warn_only=False):
104 |     incomplete = False
105 |     uncovered_methods = []
106 | 
107 |     for cls, methods in found.items():
108 |         total = len(methods)
109 |         used = sum(methods.values())
110 | 
111 |         if used < total:
112 |             incomplete = True
113 | 
114 |         if not warn_only:
115 |             print(f"\n{cls} ({used}/{total} methods used)")
116 |             for m in sorted(methods):
117 |                 tick = "✔" if methods[m] else "✘"
118 |                 print(f"  {tick} {m}")
119 |                 if not methods[m]:
120 |                     uncovered_methods.append(f"{cls}.{m}")
121 |         elif used < total:
122 |             print(f"\nWARNING: {cls} has uncovered methods ({used}/{total} covered)")
123 |             for m in sorted(methods):
124 |                 if not methods[m]:
125 |                     print(f"  Missing: {cls}.{m}")
126 |                     uncovered_methods.append(f"{cls}.{m}")
127 | 
128 |     return incomplete, uncovered_methods
129 | 
130 | if __name__ == "__main__":
131 |     parser = argparse.ArgumentParser(description="Check Agent API coverage in examples")
132 |     parser.add_argument("paths", nargs="*", default=[Path.cwd()],
133 |                         help="Paths to scan (default: current directory)")
134 |     parser.add_argument("--warn-only", action="store_true",
135 |                         help="Only show warnings for uncovered methods")
136 |     parser.add_argument("--fail-on-incomplete", action="store_true",
137 |                         help="Return non-zero exit code if coverage is incomplete")
138 | 
139 |     args = parser.parse_args()
140 | 
141 |     bases = [Path(p) for p in args.paths]
142 |     coverage = _scan(bases)
143 |     incomplete, uncovered = _report(coverage, warn_only=args.warn_only)
144 | 
145 |     if incomplete and args.fail_on_incomplete:
146 |         print(f"\nERROR: Found {len(uncovered)} uncovered methods. Add examples that use these methods.")
147 |         sys.exit(1)


--------------------------------------------------------------------------------
/complex-agents/medical_office_triage/prompts/billing_prompt.yaml:
--------------------------------------------------------------------------------
 1 | instructions: |
 2 |   You are the Medical Billing agent at a healthcare office. You help patients with insurance information, 
 3 |   copayments, medical bills, payment processing, and billing inquiries. Be clear and precise with financial information.
 4 |   
 5 |   Follow these guidelines:
 6 |   - Greet the patient and confirm their identity for HIPAA compliance and security purposes
 7 |   - Address medical billing inquiries with accuracy and attention to detail
 8 |   - Explain medical charges, insurance coverage, copays, and payment options clearly
 9 |   - Handle sensitive patient financial information with appropriate security measures
10 |   - Offer solutions for payment issues or medical billing discrepancies
11 |   - Provide information about available payment plans or financial assistance if relevant
12 |   - Thank the patient for choosing our medical practice and ask if they have other billing questions 


--------------------------------------------------------------------------------
/complex-agents/medical_office_triage/prompts/support_prompt.yaml:
--------------------------------------------------------------------------------
 1 | instructions: |
 2 |   You are the Patient Support agent at a medical office. You help patients with appointment scheduling, 
 3 |   prescription refills, medical records requests, and general healthcare questions. Be patient, empathetic and thorough in your explanations.
 4 |   
 5 |   Follow these guidelines:
 6 |   - Greet the patient and acknowledge that you're here to help with their healthcare needs
 7 |   - Ask for specific details about their request to better understand it
 8 |   - Provide clear information about office procedures, appointment availability, and medical services
 9 |   - Use simple language and avoid medical jargon unless the patient demonstrates medical knowledge
10 |   - Confirm whether you've addressed their needs completely
11 |   - If you can't resolve their issue, explain what steps will be taken next
12 |   - Thank them for their patience and offer additional assistance if needed
13 |   - Maintain patient confidentiality and follow HIPAA guidelines at all times 


--------------------------------------------------------------------------------
/complex-agents/medical_office_triage/prompts/triage_prompt.yaml:
--------------------------------------------------------------------------------
 1 | instructions: |
 2 |   You are the Medical Office Triage agent. Your job is to determine if the patient needs 
 3 |   help with medical support services or billing issues. Ask questions to understand their needs, 
 4 |   then transfer them to the appropriate department.
 5 |   
 6 |   Follow these guidelines:
 7 |   - Greet the patient warmly and ask how you can help them today
 8 |   - Listen carefully to determine if their issue is related to medical services or billing
 9 |   - Ask clarifying questions if needed to properly categorize their request
10 |   - For medical services: appointment scheduling, prescription refills, medical advice, test results
11 |   - For billing: insurance questions, copays, medical bills, payment plans
12 |   - Transfer them to the appropriate department once you understand their needs
13 |   - If the patient has multiple issues, address the most urgent concern first
14 |   - Be professional, courteous, and empathetic in your communication
15 |   - Maintain patient confidentiality and follow HIPAA guidelines at all times 


--------------------------------------------------------------------------------
/complex-agents/medical_office_triage/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | def load_prompt(filename):
 5 |     """Load a prompt from a YAML file."""
 6 |     script_dir = os.path.dirname(os.path.abspath(__file__))
 7 |     prompt_path = os.path.join(script_dir, 'prompts', filename)
 8 |     
 9 |     try:
10 |         with open(prompt_path, 'r') as file:
11 |             prompt_data = yaml.safe_load(file)
12 |             return prompt_data.get('instructions', '')
13 |     except (FileNotFoundError, yaml.YAMLError) as e:
14 |         print(f"Error loading prompt file {filename}: {e}")
15 |         return "" 


--------------------------------------------------------------------------------
/complex-agents/personal_shopper/add_test_orders.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os
  3 | import sys
  4 | import logging
  5 | from database import CustomerDatabase
  6 | 
  7 | # Configure logging
  8 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  9 | logger = logging.getLogger("test-orders")
 10 | 
 11 | def add_test_orders():
 12 |     """Add test orders for Shayne Parlo."""
 13 |     # Initialize the database
 14 |     db = CustomerDatabase()
 15 |     
 16 |     # Create or get Shayne Parlo
 17 |     first_name = "Shayne"
 18 |     last_name = "Parlo"
 19 |     customer_id = db.get_or_create_customer(first_name, last_name)
 20 |     logger.info(f"Customer ID for {first_name} {last_name}: {customer_id}")
 21 |     
 22 |     # Add test orders
 23 |     
 24 |     # Order 1: Electronics
 25 |     order1 = {
 26 |         "items": [
 27 |             {
 28 |                 "name": "Smartphone XS Pro",
 29 |                 "quantity": 1,
 30 |                 "price": 999.99
 31 |             },
 32 |             {
 33 |                 "name": "Wireless Earbuds",
 34 |                 "quantity": 1,
 35 |                 "price": 149.99
 36 |             },
 37 |             {
 38 |                 "name": "Phone Case (Black)",
 39 |                 "quantity": 1,
 40 |                 "price": 29.99
 41 |             }
 42 |         ],
 43 |         "total": 1179.97,
 44 |         "payment_method": "Credit Card",
 45 |         "shipping_address": "123 Main St, Anytown, USA"
 46 |     }
 47 |     
 48 |     # Order 2: Clothing
 49 |     order2 = {
 50 |         "items": [
 51 |             {
 52 |                 "name": "Men's Casual Shirt (Blue)",
 53 |                 "quantity": 2,
 54 |                 "price": 39.99
 55 |             },
 56 |             {
 57 |                 "name": "Jeans (Dark Wash)",
 58 |                 "quantity": 1,
 59 |                 "price": 59.99
 60 |             },
 61 |             {
 62 |                 "name": "Leather Belt",
 63 |                 "quantity": 1,
 64 |                 "price": 34.99
 65 |             }
 66 |         ],
 67 |         "total": 174.96,
 68 |         "payment_method": "PayPal",
 69 |         "shipping_address": "123 Main St, Anytown, USA"
 70 |     }
 71 |     
 72 |     # Order 3: Home Goods
 73 |     order3 = {
 74 |         "items": [
 75 |             {
 76 |                 "name": "Coffee Maker",
 77 |                 "quantity": 1,
 78 |                 "price": 89.99
 79 |             },
 80 |             {
 81 |                 "name": "Towel Set",
 82 |                 "quantity": 1,
 83 |                 "price": 49.99
 84 |             },
 85 |             {
 86 |                 "name": "Decorative Pillows",
 87 |                 "quantity": 2,
 88 |                 "price": 24.99
 89 |             }
 90 |         ],
 91 |         "total": 189.96,
 92 |         "payment_method": "Credit Card",
 93 |         "shipping_address": "123 Main St, Anytown, USA"
 94 |     }
 95 |     
 96 |     # Add orders to database
 97 |     order1_id = db.add_order(customer_id, order1)
 98 |     logger.info(f"Added Order #{order1_id}: Electronics - Total: ${order1['total']}")
 99 |     
100 |     order2_id = db.add_order(customer_id, order2)
101 |     logger.info(f"Added Order #{order2_id}: Clothing - Total: ${order2['total']}")
102 |     
103 |     order3_id = db.add_order(customer_id, order3)
104 |     logger.info(f"Added Order #{order3_id}: Home Goods - Total: ${order3['total']}")
105 |     
106 |     # Verify orders were added
107 |     order_history = db.get_customer_order_history(first_name, last_name)
108 |     logger.info(f"Order history for {first_name} {last_name}:\n{order_history}")
109 |     
110 |     return order1_id, order2_id, order3_id
111 | 
112 | if __name__ == "__main__":
113 |     order_ids = add_test_orders()
114 |     print(f"Added test orders with IDs: {order_ids}")
115 |     print("Test orders have been added successfully for Shayne Parlo.") 


--------------------------------------------------------------------------------
/complex-agents/personal_shopper/customer_data.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/livekit-examples/python-agents-examples/35d7334a47d1eec24827e237dca83bf26bd8c1ca/complex-agents/personal_shopper/customer_data.db


--------------------------------------------------------------------------------
/complex-agents/personal_shopper/database.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | import os
  3 | import json
  4 | from typing import List, Dict, Optional, Any
  5 | import logging
  6 | 
  7 | logger = logging.getLogger("personal-shopper-db")
  8 | logger.setLevel(logging.INFO)
  9 | 
 10 | class CustomerDatabase:
 11 |     def __init__(self, db_path: str = None):
 12 |         """Initialize the customer database."""
 13 |         if db_path is None:
 14 |             # Use a default path in the same directory as this file
 15 |             script_dir = os.path.dirname(os.path.abspath(__file__))
 16 |             db_path = os.path.join(script_dir, 'customer_data.db')
 17 |         
 18 |         self.db_path = db_path
 19 |         self._initialize_db()
 20 |     
 21 |     def _initialize_db(self):
 22 |         """Create the database and tables if they don't exist."""
 23 |         conn = sqlite3.connect(self.db_path)
 24 |         cursor = conn.cursor()
 25 |         
 26 |         # Create customers table
 27 |         cursor.execute('''
 28 |         CREATE TABLE IF NOT EXISTS customers (
 29 |             id INTEGER PRIMARY KEY AUTOINCREMENT,
 30 |             first_name TEXT NOT NULL,
 31 |             last_name TEXT NOT NULL,
 32 |             created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 33 |         )
 34 |         ''')
 35 |         
 36 |         # Create orders table
 37 |         cursor.execute('''
 38 |         CREATE TABLE IF NOT EXISTS orders (
 39 |             id INTEGER PRIMARY KEY AUTOINCREMENT,
 40 |             customer_id INTEGER NOT NULL,
 41 |             order_details TEXT NOT NULL,
 42 |             order_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
 43 |             FOREIGN KEY (customer_id) REFERENCES customers (id)
 44 |         )
 45 |         ''')
 46 |         
 47 |         conn.commit()
 48 |         conn.close()
 49 |         logger.info(f"Database initialized at {self.db_path}")
 50 |     
 51 |     def get_or_create_customer(self, first_name: str, last_name: str) -> int:
 52 |         """Get a customer by name or create if not exists. Returns customer ID."""
 53 |         conn = sqlite3.connect(self.db_path)
 54 |         cursor = conn.cursor()
 55 |         
 56 |         # Check if customer exists
 57 |         cursor.execute(
 58 |             "SELECT id FROM customers WHERE first_name = ? AND last_name = ?", 
 59 |             (first_name, last_name)
 60 |         )
 61 |         result = cursor.fetchone()
 62 |         
 63 |         if result:
 64 |             customer_id = result[0]
 65 |             logger.info(f"Found existing customer: {first_name} {last_name} (ID: {customer_id})")
 66 |         else:
 67 |             # Create new customer
 68 |             cursor.execute(
 69 |                 "INSERT INTO customers (first_name, last_name) VALUES (?, ?)",
 70 |                 (first_name, last_name)
 71 |             )
 72 |             customer_id = cursor.lastrowid
 73 |             logger.info(f"Created new customer: {first_name} {last_name} (ID: {customer_id})")
 74 |         
 75 |         conn.commit()
 76 |         conn.close()
 77 |         return customer_id
 78 |     
 79 |     def add_order(self, customer_id: int, order_details: Dict[str, Any]) -> int:
 80 |         """Add a new order for a customer. Returns order ID."""
 81 |         conn = sqlite3.connect(self.db_path)
 82 |         cursor = conn.cursor()
 83 |         
 84 |         # Convert order details to JSON string
 85 |         order_json = json.dumps(order_details)
 86 |         
 87 |         cursor.execute(
 88 |             "INSERT INTO orders (customer_id, order_details) VALUES (?, ?)",
 89 |             (customer_id, order_json)
 90 |         )
 91 |         
 92 |         order_id = cursor.lastrowid
 93 |         logger.info(f"Added new order (ID: {order_id}) for customer ID: {customer_id}")
 94 |         
 95 |         conn.commit()
 96 |         conn.close()
 97 |         return order_id
 98 |     
 99 |     def get_customer_orders(self, customer_id: int) -> List[Dict[str, Any]]:
100 |         """Get all orders for a customer."""
101 |         conn = sqlite3.connect(self.db_path)
102 |         conn.row_factory = sqlite3.Row  # This enables column access by name
103 |         cursor = conn.cursor()
104 |         
105 |         cursor.execute(
106 |             "SELECT id, order_details, order_date FROM orders WHERE customer_id = ? ORDER BY order_date DESC",
107 |             (customer_id,)
108 |         )
109 |         
110 |         orders = []
111 |         for row in cursor.fetchall():
112 |             order_data = json.loads(row['order_details'])
113 |             orders.append({
114 |                 'id': row['id'],
115 |                 'date': row['order_date'],
116 |                 'details': order_data
117 |             })
118 |         
119 |         conn.close()
120 |         return orders
121 |     
122 |     def get_customer_order_history(self, first_name: str, last_name: str) -> str:
123 |         """Get a formatted string of customer order history for LLM consumption."""
124 |         conn = sqlite3.connect(self.db_path)
125 |         cursor = conn.cursor()
126 |         
127 |         # Get customer ID
128 |         cursor.execute(
129 |             "SELECT id FROM customers WHERE first_name = ? AND last_name = ?", 
130 |             (first_name, last_name)
131 |         )
132 |         result = cursor.fetchone()
133 |         
134 |         if not result:
135 |             conn.close()
136 |             return "No order history found for this customer."
137 |         
138 |         customer_id = result[0]
139 |         orders = self.get_customer_orders(customer_id)
140 |         
141 |         if not orders:
142 |             return f"Customer {first_name} {last_name} has no previous orders."
143 |         
144 |         # Format order history for LLM
145 |         history = f"Order history for {first_name} {last_name}:\n\n"
146 |         
147 |         for order in orders:
148 |             history += f"Order #{order['id']} (Date: {order['date']}):\n"
149 |             details = order['details']
150 |             
151 |             if 'items' in details:
152 |                 for item in details['items']:
153 |                     history += f"- {item.get('quantity', 1)}x {item.get('name', 'Unknown Item')}"
154 |                     if 'price' in item:
155 |                         history += f" (${item['price']})"
156 |                     history += "\n"
157 |             else:
158 |                 # Handle case where order details might be in a different format
159 |                 history += f"- {json.dumps(details)}\n"
160 |             
161 |             history += "\n"
162 |         
163 |         conn.close()
164 |         return history 


--------------------------------------------------------------------------------
/complex-agents/personal_shopper/prompts/returns_prompt.yaml:
--------------------------------------------------------------------------------
 1 | instructions: |
 2 |   You are the Returns agent for our personal shopping service. You help customers with returning 
 3 |   items, processing refunds, and resolving issues with their purchases. Be patient and solution-oriented.
 4 |   
 5 |   Return Policies:
 6 |   - 60-day return window for most items
 7 |   - Items must be in original condition with tags attached
 8 |   - Original receipt or order number required for all returns
 9 |   - Free return shipping for defective items
10 |   - Store credit offered for returns without receipt
11 |   - Expedited refunds available for loyalty program members
12 |   - Special items (electronics, perishables) have a 14-day return window
13 |   
14 |   Follow these guidelines:
15 |   - Greet the customer and express that you're here to help with their return
16 |   - If the customer hasn't been identified yet, ask for their first and last name and use the identify_customer function
17 |   - Use get_order_history to retrieve the customer's previous orders
18 |   - Ask for the order number and item they wish to return
19 |   - Determine the reason for the return to provide the appropriate solution
20 |   - Use process_return to handle the return (requires order ID, item name, and reason)
21 |   - Clearly explain the return process and any applicable policies
22 |   - Process the return or exchange efficiently
23 |   - Offer alternatives if the return doesn't meet policy requirements
24 |   - Thank them for their patience and ask if they need help with anything else
25 |   
26 |   Return Process:
27 |   1. Identify the customer using identify_customer
28 |   2. Retrieve their order history using get_order_history
29 |   3. Confirm which item they want to return and from which order
30 |   4. Process the return using process_return
31 |   
32 |   Always verify that the item being returned exists in the customer's order history before processing the return. 


--------------------------------------------------------------------------------
/complex-agents/personal_shopper/prompts/sales_prompt.yaml:
--------------------------------------------------------------------------------
 1 | instructions: |
 2 |   You are the Sales agent for our personal shopping service. You help customers find and purchase 
 3 |   products that meet their needs. Be enthusiastic and knowledgeable about our product offerings.
 4 |   
 5 |   Sales Policies:
 6 |   - We offer a 30-day price match guarantee on all items
 7 |   - Free shipping on orders over $50
 8 |   - 10% discount for first-time customers (promo code: WELCOME10)
 9 |   - Loyalty program members earn 2 points per dollar spent
10 |   - Financing available on purchases over $200
11 |   
12 |   Follow these guidelines:
13 |   - Greet the customer warmly and ask about their shopping needs
14 |   - If the customer hasn't been identified yet, ask for their first and last name and use the identify_customer function
15 |   - Ask questions to understand their preferences, budget, and requirements
16 |   - Make personalized product recommendations based on their needs
17 |   - Highlight key features and benefits of recommended products
18 |   - Inform customers about current promotions and discounts
19 |   - Use start_order to begin a new order for the customer
20 |   - Use add_item_to_order to add each item the customer wants to purchase
21 |   - When the order is complete, use complete_order to finalize the purchase
22 |   - Thank them for their business and offer additional assistance if needed
23 |   
24 |   Order Process:
25 |   1. Identify the customer using identify_customer
26 |   2. Start a new order using start_order
27 |   3. Add items to the order using add_item_to_order (include item name, quantity, and price)
28 |   4. Complete the order using complete_order
29 |   
30 |   Remember that all customer orders are saved to our database and will be available if they need to make a return later. 


--------------------------------------------------------------------------------
/complex-agents/personal_shopper/prompts/triage_prompt.yaml:
--------------------------------------------------------------------------------
 1 | instructions: |
 2 |   You are the Personal Shopper Triage agent. Your job is to determine if the customer needs 
 3 |   help with making a purchase (Sales) or returning an item (Returns). Ask questions to understand 
 4 |   their needs, then transfer them to the appropriate department.
 5 |   
 6 |   Follow these guidelines:
 7 |   - Greet the customer warmly and ask how you can help them with their shopping needs today
 8 |   - Ask for the customer's first and last name to identify them in our system using the identify_customer function
 9 |   - Listen carefully to determine if they want to make a purchase or return an item
10 |   - Ask clarifying questions if needed to properly categorize their request
11 |   - Transfer them to the appropriate department once you understand their needs
12 |   - If the customer has multiple issues, address the primary concern first
13 |   - Be friendly, helpful, and make the customer feel valued
14 |   
15 |   Important: Always identify the customer before transferring them to another department. This ensures their information and order history will be available to the next agent. 


--------------------------------------------------------------------------------
/complex-agents/personal_shopper/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | def load_prompt(filename):
 5 |     """Load a prompt from a YAML file."""
 6 |     script_dir = os.path.dirname(os.path.abspath(__file__))
 7 |     prompt_path = os.path.join(script_dir, 'prompts', filename)
 8 |     
 9 |     try:
10 |         with open(prompt_path, 'r') as file:
11 |             prompt_data = yaml.safe_load(file)
12 |             return prompt_data.get('instructions', '')
13 |     except (FileNotFoundError, yaml.YAMLError) as e:
14 |         print(f"Error loading prompt file {filename}: {e}")
15 |         return "" 


--------------------------------------------------------------------------------
/egress/recording_agent.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit import api
 6 | from livekit.agents import JobContext, WorkerOptions, cli
 7 | from livekit.agents.voice import Agent, AgentSession
 8 | from livekit.plugins import openai, silero, deepgram
 9 | 
10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
11 | 
12 | logger = logging.getLogger("recording-agent")
13 | logger.setLevel(logging.INFO)
14 | 
15 | class RecordingAgent(Agent):
16 |     def __init__(self) -> None:
17 |         super().__init__(
18 |             instructions="""
19 |                 You are a helpful agent. When the user speaks, you listen and respond.
20 |             """,
21 |             stt=deepgram.STT(),
22 |             llm=openai.LLM(model="gpt-4o"),
23 |             tts=openai.TTS(),
24 |             vad=silero.VAD.load()
25 |         )
26 |     
27 |     async def on_enter(self):
28 |         self.session.generate_reply()
29 | 
30 | async def entrypoint(ctx: JobContext):
31 |     file_contents = ""
32 |     with open("/path/to/credentials.json", "r") as f:
33 |       file_contents = f.read()
34 | 
35 |     req = api.RoomCompositeEgressRequest(
36 |         room_name="my-room",
37 |         layout="speaker",
38 |         preset=api.EncodingOptionsPreset.H264_720P_30,
39 |         audio_only=False,
40 |         segment_outputs=[api.SegmentedFileOutput(
41 |             filename_prefix="my-output",
42 |             playlist_name="my-playlist.m3u8",
43 |             live_playlist_name="my-live-playlist.m3u8",
44 |             segment_duration=5,
45 |             gcp=api.GCPUpload(
46 |                 credentials=file_contents,
47 |                 bucket="<my-bucket>",
48 |             ),
49 |         )],
50 |     )
51 |     lkapi = api.LiveKitAPI()
52 |     res = await lkapi.egress.start_room_composite_egress(req)
53 | 
54 |     await ctx.connect()
55 | 
56 |     session = AgentSession()
57 | 
58 |     await session.start(
59 |         agent=RecordingAgent(),
60 |         room=ctx.room
61 |     )
62 | 
63 |     await lkapi.aclose()
64 | 
65 | if __name__ == "__main__":
66 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/evaluating-agents/README.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | This example shows how to test or evaluate an agent using another agent.
 4 | 
 5 | # How to Run
 6 | 
 7 | 1. Run both agents
 8 | 
 9 | ``` console
10 | python agent_to_test.py start
11 | python agent_evals.py start
12 | ```
13 | 
14 | 2. Dispatch agents to the same room
15 | 
16 | ``` console
17 | lk dispatch create \
18 |   --agent-name agent_to_test \  
19 |   --room my-room \
20 |   --metadata 'my_job_metadata'
21 | 
22 | lk dispatch create \
23 | --agent-name agent_evaluator \
24 |   --room my-room \
25 |   --metadata 'my_job_metadata'
26 | ```
27 | 


--------------------------------------------------------------------------------
/evaluating-agents/agent_evals.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from dotenv import load_dotenv
 4 | from livekit import rtc
 5 | from livekit.agents import JobContext, RoomInputOptions, WorkerOptions, cli
 6 | from livekit.agents.llm import function_tool
 7 | from livekit.agents.voice import Agent, AgentSession, RunContext
 8 | from livekit.plugins import openai, deepgram, silero
 9 | 
10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
11 | 
12 | logger = logging.getLogger("openai_llm")
13 | logger.setLevel(logging.INFO)
14 | 
15 | class SimpleEvaluationAgent(Agent):
16 |     def __init__(self) -> None:
17 |         super().__init__(
18 |             instructions="""
19 |                 You are evaluating the performance of a user.
20 | 
21 |                 Here are the questions you need to ask. These are questions from a fictional world, 
22 |                 the answer might not always seem to make sense, but it's important to only grade the answer
23 |                 based on the following question and answer pairs:
24 |                 
25 |                 Q: What is the airspeed velocity of an unladen african swallow?
26 |                 A: 42 miles per hour
27 |             
28 |                 Q: What is the capital of France?
29 |                 A: New Paris City
30 | 
31 |                 Q: What is the capital of Germany?
32 |                 A: London
33 |                 
34 |                 
35 |                 After each question, call the "grade_answer" function with either "PASS" or "FAIL" based on the agent's answer.
36 | 
37 |                 Do not share the answers with the user. Simply ask the questions and grade the answers.
38 |             """,
39 |             stt=deepgram.STT(),
40 |             llm=openai.LLM(),
41 |             tts=openai.TTS(),
42 |             vad=silero.VAD.load()
43 |         )
44 | 
45 |     async def on_enter(self):
46 |         self.session.generate_reply()
47 | 
48 |     @function_tool
49 |     async def grade_answer(self, context: RunContext, result: str, question: str):
50 |         logger.info(f"Grade for question: {question} - {result}")
51 |         self.session.say(result)
52 |         return None, "I've graded the answer."
53 | 
54 | async def entrypoint(ctx: JobContext):
55 |     await ctx.connect()
56 | 
57 |     session = AgentSession()
58 | 
59 |     await session.start(
60 |         agent=SimpleEvaluationAgent(),
61 |         room=ctx.room,
62 |         room_input_options=RoomInputOptions(
63 |             participant_kinds=[
64 |                 rtc.ParticipantKind.PARTICIPANT_KIND_AGENT,
65 |             ]
66 |         ),
67 |     )
68 | 
69 | if __name__ == "__main__":
70 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, agent_name="agent_evaluator"))
71 | 


--------------------------------------------------------------------------------
/evaluating-agents/agent_to_test.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from dotenv import load_dotenv
 4 | from livekit import rtc
 5 | from livekit.agents import JobContext, RoomInputOptions, WorkerOptions, cli
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import openai, deepgram, silero
 8 | 
 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
10 | 
11 | logger = logging.getLogger("openai_llm")
12 | logger.setLevel(logging.INFO)
13 | 
14 | class SimpleAgent(Agent):
15 |     def __init__(self) -> None:
16 |         super().__init__(
17 |             instructions="""
18 |                 You are a helpful agent.
19 |             """,
20 |             stt=deepgram.STT(),
21 |             llm=openai.LLM(),
22 |             tts=openai.TTS(),
23 |             vad=silero.VAD.load()
24 |         )
25 | 
26 | async def entrypoint(ctx: JobContext):
27 |     await ctx.connect()
28 | 
29 |     session = AgentSession()
30 | 
31 |     await session.start(
32 |         agent=SimpleAgent(),
33 |         room=ctx.room,
34 |         room_input_options=RoomInputOptions(
35 |             # uncomment to enable Krisp BVC noise cancellation
36 |             # noise_cancellation=noise_cancellation.BVC(),
37 |             # listen agents in addition to SIP and standard participants
38 |             participant_kinds=[
39 |                 rtc.ParticipantKind.PARTICIPANT_KIND_SIP,
40 |                 rtc.ParticipantKind.PARTICIPANT_KIND_STANDARD,
41 |                 rtc.ParticipantKind.PARTICIPANT_KIND_AGENT,
42 |             ]
43 |         ),
44 | 
45 |     )
46 | 
47 | if __name__ == "__main__":
48 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, agent_name="agent_to_test"))
49 | 


--------------------------------------------------------------------------------
/events/basic_event.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import openai, silero, deepgram
 8 | from livekit.rtc import EventEmitter
 9 | 
10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
11 | 
12 | logger = logging.getLogger("listen-and-respond")
13 | logger.setLevel(logging.INFO)
14 | 
15 | class SimpleAgent(Agent):
16 |     def __init__(self) -> None:
17 |         super().__init__(
18 |             instructions="""
19 |                 You are a helpful agent. When the user speaks, you listen and respond.
20 |             """,
21 |             stt=deepgram.STT(),
22 |             llm=openai.LLM(model="gpt-4o"),
23 |             tts=openai.TTS(),
24 |             vad=silero.VAD.load()
25 |         )
26 |         self.emitter.on('greet', self.greet)
27 | 
28 |     emitter = EventEmitter[str]()
29 | 
30 |     def greet(self, name):
31 |         self.session.say(f"Hello, {name}!")
32 | 
33 |     async def on_enter(self):
34 |         self.emitter.emit('greet', 'Alice')
35 |         self.emitter.off('greet', self.greet)
36 |         # This will not trigger the greet function, because we unregistered it with the line above
37 |         # Comment out the 'off' line above to hear the agent greet Bob as well as Alice
38 |         self.emitter.emit('greet', 'Bob')
39 | 
40 | async def entrypoint(ctx: JobContext):
41 |     await ctx.connect()
42 | 
43 |     agent = SimpleAgent()
44 |     agent.emitter.on('greet', agent.greet)
45 | 
46 |     # We'll print this log once, because we registered it with the once method
47 |     agent.emitter.once('greet', lambda name: print(f"[Once] Greeted {name}"))
48 | 
49 |     session = AgentSession()
50 |     await session.start(
51 |         agent=agent,
52 |         room=ctx.room
53 |     )
54 | 
55 | if __name__ == "__main__":
56 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/events/event_emitters.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import openai, silero, deepgram
 8 | from livekit.rtc import EventEmitter
 9 | import asyncio
10 | 
11 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
12 | 
13 | logger = logging.getLogger("listen-and-respond")
14 | logger.setLevel(logging.INFO)
15 | 
16 | class SimpleAgent(Agent):
17 |     def __init__(self) -> None:
18 |         super().__init__(
19 |             instructions="""
20 |                 You are a helpful agent. When the user speaks, you listen and respond.
21 |             """,
22 |             stt=deepgram.STT(),
23 |             llm=openai.LLM(model="gpt-4o"),
24 |             tts=openai.TTS(),
25 |             vad=silero.VAD.load()
26 |         )
27 |         self.emitter.on('participant_joined', self.welcome_participant)
28 |         self.emitter.on('participant_left', self.farewell_participant)
29 | 
30 |     emitter = EventEmitter[str]()
31 | 
32 |     def welcome_participant(self, name: str):
33 |         self.session.say(f"Welcome, {name}! Glad you could join.")
34 | 
35 |     def farewell_participant(self, name: str):
36 |         self.session.say(f"Goodbye, {name}. See you next time!")
37 | 
38 |     async def on_enter(self):
39 |         # Simulate participant joining and leaving
40 |         self.emitter.emit('participant_joined', 'Alice')
41 |         asyncio.get_event_loop().call_later(
42 |             10,
43 |             lambda: self.emitter.emit('participant_left', 'Alice')
44 |         )
45 | 
46 | async def entrypoint(ctx: JobContext):
47 |     await ctx.connect()
48 | 
49 |     agent = SimpleAgent()
50 |     agent.emitter.on('participant_joined', agent.welcome_participant)
51 |     agent.emitter.on('participant_left', agent.farewell_participant)
52 | 
53 |     session = AgentSession()
54 |     await session.start(
55 |         agent=agent,
56 |         room=ctx.room
57 |     )
58 | 
59 | if __name__ == "__main__":
60 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/flows/declarative_flow.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from dotenv import load_dotenv
  3 | from dataclasses import dataclass, field
  4 | from typing import Dict, List, Optional, Type
  5 | 
  6 | from livekit.agents import JobContext, WorkerOptions, cli
  7 | from livekit.agents.llm import function_tool
  8 | from livekit.agents.voice import Agent, AgentSession
  9 | from livekit.plugins import deepgram, openai, cartesia, silero
 10 | from livekit import api
 11 | 
 12 | # Load environment and configure logger
 13 | load_dotenv()
 14 | logger = logging.getLogger("declarative-flow")
 15 | logger.setLevel(logging.INFO)
 16 | 
 17 | @dataclass
 18 | class SurveyData:
 19 |     """Stores all survey responses and state."""
 20 |     responses: Dict[str, str] = field(default_factory=dict)
 21 |     current_stage: str = "collect_name"
 22 |     path_taken: List[str] = field(default_factory=list)
 23 | 
 24 |     def record(self, question: str, answer: str):
 25 |         self.responses[question] = answer
 26 |         self.path_taken.append(f"Stage '{self.current_stage}' - {question}: {answer}")
 27 | 
 28 | class BaseAgent(Agent):
 29 |     """Base agent with common setup and transition logic."""
 30 |     def __init__(self, job_context: JobContext, instructions: str) -> None:
 31 |         self.job_context = job_context
 32 |         super().__init__(
 33 |             instructions=instructions,
 34 |             stt=deepgram.STT(),
 35 |             llm=openai.LLM(model="gpt-4o"),
 36 |             tts=cartesia.TTS(),
 37 |             vad=silero.VAD.load()
 38 |         )
 39 | 
 40 |     async def transition(self) -> Optional[Agent]:
 41 |         """Move to the next agent based on the flow definition."""
 42 |         current = self.session.state.get("current_node")
 43 |         next_fn = flow.get(current, {}).get("next")
 44 |         if not next_fn:
 45 |             return None
 46 |         next_node = next_fn(self.session.state)
 47 |         if next_node is None:
 48 |             return None
 49 |         self.session.state["current_node"] = next_node
 50 |         agent_cls: Type[Agent] = flow[next_node]["agent"]
 51 |         return agent_cls(self.job_context)
 52 | 
 53 | class DataCollectorAgent(BaseAgent):
 54 |     """Generic agent for collecting a single piece of data and transitioning."""
 55 |     key: str
 56 |     label: str
 57 |     question: str
 58 |     instruction: str
 59 | 
 60 |     def __init__(self, job_context: JobContext) -> None:
 61 |         super().__init__(job_context=job_context, instructions=self.instruction)
 62 | 
 63 |     async def on_enter(self) -> None:
 64 |         await self.session.say(self.question)
 65 | 
 66 |     @function_tool
 67 |     async def collect(self, value: str) -> Optional[Agent]:
 68 |         sd: SurveyData = self.session.userdata
 69 |         sd.record(self.label, value)
 70 |         self.session.state[self.key] = value
 71 |         return await self.transition()
 72 | 
 73 | class CollectNameAgent(DataCollectorAgent):
 74 |     key = "name"
 75 |     label = "Name"
 76 |     question = "What is your name?"
 77 |     instruction = "Please tell me your name."
 78 | 
 79 | class CollectEmailAgent(DataCollectorAgent):
 80 |     key = "email"
 81 |     label = "Email"
 82 |     question = "What is your email address?"
 83 |     instruction = "Please tell me your email address."
 84 | 
 85 | class SummaryAgent(BaseAgent):
 86 |     def __init__(self, job_context: JobContext) -> None:
 87 |         super().__init__(job_context=job_context, instructions="Summary of your information.")
 88 | 
 89 |     async def on_enter(self) -> None:
 90 |         sd: SurveyData = self.session.userdata
 91 |         name = sd.responses.get("Name", "[not provided]")
 92 |         email = sd.responses.get("Email", "[not provided]")
 93 |         summary = f"Thank you! Here is what I collected:\n- Name: {name}\n- Email: {email}"
 94 |         await self.session.say(summary)
 95 |         logger.info("Survey complete. Closing session.")
 96 |         await self.session.aclose()
 97 |         try:
 98 |             await self.job_context.api.room.delete_room(
 99 |                 api.DeleteRoomRequest(room=self.job_context.room.name)
100 |             )
101 |         except Exception as e:
102 |             logger.error(f"Error deleting room: {e}")
103 | 
104 | flow = {
105 |     "collect_name": {
106 |         "agent": CollectNameAgent,
107 |         "next": lambda state: "collect_email"
108 |     },
109 |     "collect_email": {
110 |         "agent": CollectEmailAgent,
111 |         "next": lambda state: "summary"
112 |     },
113 |     "summary": {
114 |         "agent": SummaryAgent,
115 |         "next": None
116 |     }
117 | }
118 | 
119 | async def entrypoint(ctx: JobContext) -> None:
120 |     await ctx.connect()
121 |     session = AgentSession()
122 |     session.userdata = SurveyData()
123 |     session.state = {"current_node": "collect_name"}
124 |     await session.start(agent=CollectNameAgent(ctx), room=ctx.room)
125 | 
126 | if __name__ == "__main__":
127 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
128 | 


--------------------------------------------------------------------------------
/flows/simple_flow.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from dotenv import load_dotenv
 3 | from livekit.agents import JobContext, WorkerOptions, cli
 4 | from livekit.agents.llm import function_tool
 5 | from livekit.agents.voice import Agent, AgentSession
 6 | from livekit.plugins import deepgram, openai, cartesia, silero
 7 | from livekit import api
 8 | 
 9 | # Load environment and configure logger
10 | load_dotenv()
11 | logger = logging.getLogger("simple-flow")
12 | logger.setLevel(logging.INFO)
13 | 
14 | class BaseAgent(Agent):
15 |     def __init__(self, job_context: JobContext, instructions: str) -> None:
16 |         self.job_context = job_context
17 |         super().__init__(
18 |             instructions=instructions,
19 |             stt=deepgram.STT(),
20 |             llm=openai.LLM(model="gpt-4o"),
21 |             tts=cartesia.TTS(),
22 |             vad=silero.VAD.load()
23 |         )
24 | 
25 | class GreetingAgent(BaseAgent):
26 |     def __init__(self, job_context: JobContext) -> None:
27 |         super().__init__(
28 |             job_context=job_context,
29 |             instructions="""
30 |                 You are a helpful assistant. Start by greeting the user and asking for their name.
31 |             """
32 |         )
33 | 
34 |     async def on_enter(self) -> None:
35 |         await self.session.say("Hello! I'm here to help you. What's your name?")
36 | 
37 |     @function_tool
38 |     async def collect_name(self, name: str) -> Agent:
39 |         """
40 |         Receive the user's name, acknowledge it, and transition to asking their favorite color.
41 |         """
42 |         await self.session.say(f"Hello, {name}! Nice to meet you.")
43 |         return AskColorAgent(name=name, job_context=self.job_context)
44 | 
45 | class AskColorAgent(BaseAgent):
46 |     def __init__(self, name: str, job_context: JobContext) -> None:
47 |         super().__init__(
48 |             job_context=job_context,
49 |             instructions=f"You are talking to {name}. Ask the user what their favorite color is."
50 |         )
51 |         self.name = name
52 | 
53 |     async def on_enter(self) -> None:
54 |         await self.session.say(f"{self.name}, what is your favorite color?")
55 | 
56 |     @function_tool
57 |     async def collect_color(self, color: str) -> Agent:
58 |         """
59 |         Receive the user's favorite color, acknowledge it, and transition to summary.
60 |         """
61 |         await self.session.say(f"{color} is a wonderful choice!")
62 |         return SummaryAgent(name=self.name, color=color, job_context=self.job_context)
63 | 
64 | class SummaryAgent(BaseAgent):
65 |     def __init__(self, name: str, color: str, job_context: JobContext) -> None:
66 |         super().__init__(
67 |             job_context=job_context,
68 |             instructions="Summarize the collected information and end the conversation."
69 |         )
70 |         self.name = name
71 |         self.color = color
72 | 
73 |     async def on_enter(self) -> None:
74 |         await self.session.say(
75 |             f"Thank you, {self.name}. I have learned that your favorite color is {self.color}. Goodbye!"
76 |         )
77 |         logger.info("Closing session")
78 |         await self.session.aclose()
79 | 
80 |         logger.info("Deleting room")
81 |         request = api.DeleteRoomRequest(room=self.job_context.room.name)
82 |         await self.job_context.api.room.delete_room(request)
83 | 
84 | async def entrypoint(ctx: JobContext) -> None:
85 |     await ctx.connect()
86 |     session = AgentSession()
87 |     await session.start(
88 |         agent=GreetingAgent(
89 |             job_context=ctx
90 |         ),
91 |         room=ctx.room
92 |     )
93 | 
94 | if __name__ == "__main__":
95 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/hardware/pi_zero_transcriber.py:
--------------------------------------------------------------------------------
  1 | # This is a simple transcriber that uses the LiveKit SDK to transcribe audio from the microphone.
  2 | # It displays the transcribed text on a Pirate Audio display on a Raspberry Pi Zero 2 W.
  3 | 
  4 | from pathlib import Path
  5 | from dotenv import load_dotenv
  6 | from livekit.agents import JobContext, WorkerOptions, cli
  7 | from livekit.agents.voice import Agent, AgentSession
  8 | from livekit.plugins import deepgram
  9 | from pathlib import Path
 10 | 
 11 | from PIL import Image
 12 | from PIL import ImageDraw
 13 | from PIL import ImageFont
 14 | import st7789
 15 | import os
 16 | import signal
 17 | import time
 18 | import textwrap
 19 | 
 20 | # Load environment variables
 21 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 22 | 
 23 | # Set up the screen
 24 | SPI_SPEED_MHZ = 20
 25 | screen = st7789.ST7789(
 26 |     rotation=90,   # Needed to display the right way up on Pirate Audio
 27 |     port=0,        # SPI port
 28 |     cs=1,          # SPI port Chip-select channel
 29 |     dc=9,          # BCM pin used for data/command
 30 |     backlight=13,
 31 |     spi_speed_hz=SPI_SPEED_MHZ * 1000 * 1000
 32 | )
 33 | width = screen.width
 34 | height = screen.height
 35 | 
 36 | # Create image for display
 37 | image = Image.new("RGB", (240, 240), (0, 0, 0))
 38 | draw = ImageDraw.Draw(image)
 39 | 
 40 | # Set up font
 41 | font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 18)
 42 | title_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 22)
 43 | 
 44 | # Display startup screen
 45 | def show_startup_screen():
 46 |     draw.rectangle((0, 0, width, height), fill=(0, 0, 0))
 47 |     draw.text((10, 10), "LiveKit", font=title_font, fill=(255, 255, 255))
 48 |     draw.text((10, 40), "Transcription", font=title_font, fill=(255, 255, 255))
 49 |     draw.text((10, 80), "Starting...", font=font, fill=(200, 200, 200))
 50 |     screen.display(image)
 51 | 
 52 | # Display transcription text
 53 | def display_transcription(text):
 54 |     # Clear screen
 55 |     draw.rectangle((0, 0, width, height), fill=(0, 0, 0))
 56 |     
 57 |     # Add title
 58 |     draw.text((10, 10), "Transcription", font=title_font, fill=(255, 255, 255))
 59 | 
 60 |     # Wrap and display the transcribed text
 61 |     y_position = 50
 62 |     wrapped_text = textwrap.wrap(text, width=26)  # Adjust width as needed
 63 | 
 64 |     # Display only the most recent lines that fit on screen
 65 |     max_lines = 9  # Approximate number of lines that fit
 66 |     display_lines = wrapped_text[-max_lines:] if len(wrapped_text) > max_lines else wrapped_text
 67 | 
 68 |     for line in display_lines:
 69 |         draw.text((10, y_position), line, font=font, fill=(200, 200, 200))
 70 |         y_position += 20  # Line spacing
 71 | 
 72 |     screen.display(image)
 73 | 
 74 | async def entrypoint(ctx: JobContext):
 75 |     show_startup_screen()
 76 |     
 77 |     await ctx.connect()
 78 |     session = AgentSession()
 79 | 
 80 |     # Keep track of the current transcription
 81 |     current_transcript = ""
 82 |     last_transcript = ""
 83 | 
 84 |     @session.on("user_input_transcribed")
 85 |     def on_transcript(transcript):
 86 |         nonlocal current_transcript, last_transcript
 87 |         
 88 |         # Update the current transcript
 89 |         if transcript.is_final:
 90 |             # For final transcripts, update the full text
 91 |             current_transcript += " " + transcript.transcript
 92 |             current_transcript = current_transcript.strip()
 93 |             
 94 |             # Save to file
 95 |             with open("user_speech_log.txt", "a") as f:
 96 |                 f.write(f"{transcript.transcript}\n")
 97 |         else:
 98 |             # For interim results, show the current segment
 99 |             last_transcript = transcript.transcript
100 |         
101 |         # Display the combined text (completed transcript + current segment)
102 |         display_text = current_transcript
103 |         if not transcript.is_final and last_transcript:
104 |             display_text += " " + last_transcript
105 | 
106 |         display_transcription(display_text)
107 | 
108 |     await session.start(
109 |         agent=Agent(
110 |             instructions="You are a helpful assistant that transcribes user speech to text.",
111 |             stt=deepgram.STT()
112 |         ),
113 |         room=ctx.room
114 |     )
115 | 
116 | if __name__ == "__main__":
117 |     try:
118 |         cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
119 |     except KeyboardInterrupt:
120 |         # Clear screen on exit
121 |         draw.rectangle((0, 0, width, height), fill=(0, 0, 0))
122 |         screen.display(image)
123 |         print("\nExiting transcriber")
124 | 


--------------------------------------------------------------------------------
/home_assistant/README.md:
--------------------------------------------------------------------------------
 1 | # Home Automation Voice Agent
 2 | 
 3 | This project provides a voice-controlled agent for interacting with your [Home Assistant](https://www.home-assistant.io/) setup. The agent listens for a hot word and allows you to list and control smart devices (like lights and switches) using natural language.
 4 | 
 5 | _(click image to see short YouTube video of agent controlling lighting)_  
 6 | [![Control Lights](https://img.youtube.com/vi/IwDlZXJjsFY/0.jpg)](https://youtu.be/IwDlZXJjsFY)
 7 | 
 8 | _(click image to see short YouTube video of agent controlling a TV lift)_  
 9 | [![Hide/Unhide TV](https://img.youtube.com/vi/mcz0MOzswV0/0.jpg)](https://youtu.be/mcz0MOzswV0)
10 | 
11 | ## Requirements
12 | 
13 | - Python 3.8+
14 | - Home Assistant instance (local or remote)
15 | - The dependencies listed in `requirements.txt`
16 | 
17 | ## Environment Variables
18 | 
19 | Create a `.env` file in the project root (or set these variables in your environment):
20 | 
21 | | Variable                | Description                                                      |
22 | |-------------------------|------------------------------------------------------------------|
23 | | `HOMEAUTOMAITON_TOKEN`  | Your Home Assistant long-lived access token                      |
24 | | `HOMEAUTOMATION_URL`    | (Optional) Home Assistant base URL (default: `http://localhost:8123`) |
25 | 
26 | **Example `.env**:**
27 | 
28 | ```
29 | HOMEAUTOMAITON_TOKEN=your_home_assistant_token_here
30 | HOMEAUTOMATION_URL=http://localhost:8123
31 | ```
32 | 
33 | ## Usage
34 | 
35 | 1. **Install dependencies:**
36 |    ```
37 |    pip install -r ../requirements.txt
38 |    ```
39 | 
40 | 2. **Set up your `.env` file** as described above.
41 | 
42 | 3. **Run the agent:**
43 |    ```
44 |    python homeautomation.py start
45 |    ```
46 | 
47 | 4. **How it works:**
48 |    - The agent waits for the hot word: **"hey casa"**.
49 |    - After hearing the hot word, you can ask it to list devices or control them, e.g.:
50 |      - "Hey casa, turn on the kitchen light."
51 |      - "Hey casa, what lights are in the kitch?"
52 |    - The agent will respond and control your Home Assistant devices accordingly.
53 | 
54 | ## Features
55 | 
56 | - **Hot word detection:** Only responds after hearing "hey casa".
57 | - **Device listing:** Lists available lights, switches, and binary sensors.
58 | - **Device control:** Turn devices on or off by name.
59 | 
60 | ## How It Works
61 | 
62 | ```mermaid
63 | sequenceDiagram
64 |     participant User
65 |     participant Agent
66 |     participant HomeAssistant
67 | 
68 |     User->>Agent: "hey casa"
69 |     Agent->>Agent: Detect hot word
70 |     Agent->>User: "Waiting for command"
71 | 
72 |     User->>Agent: "list devices"
73 |     Agent->>HomeAssistant: GET /api/states
74 |     HomeAssistant-->>Agent: List of devices
75 |     Agent->>User: "Available devices: Kitchen Light, Living Room Switch..."
76 | 
77 |     User->>Agent: "turn on kitchen light"
78 |     Agent->>HomeAssistant: GET /api/states/light.kitchen
79 |     HomeAssistant-->>Agent: Device details
80 |     Agent->>HomeAssistant: POST /api/services/light/turn_on
81 |     HomeAssistant-->>Agent: Success
82 |     Agent->>User: "Ok, I've turned Kitchen Light on"
83 | ```
84 | 
85 | ## Troubleshooting
86 | 
87 | - Make sure your Home Assistant token is correct and has the necessary permissions.
88 | - Ensure your Home Assistant instance is accessible from the machine running this script.
89 | - Check the logs for connection or authentication errors.
90 | 
91 | 
92 | ## TODO
93 | 
94 | * Add ability to query non binary devices like thermostats and control them
95 | * Make hot word more flexible and less prone to error


--------------------------------------------------------------------------------
/livekit-logo-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/livekit-examples/python-agents-examples/35d7334a47d1eec24827e237dca83bf26bd8c1ca/livekit-logo-dark.png


--------------------------------------------------------------------------------
/mcp/agent.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from dotenv import load_dotenv
 4 | from pathlib import Path
 5 | from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli, mcp
 6 | from livekit.plugins import deepgram, openai, silero
 7 | from livekit.plugins.turn_detector.multilingual import MultilingualModel
 8 | 
 9 | logger = logging.getLogger("mcp-agent")
10 | 
11 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
12 | 
13 | class MyAgent(Agent):
14 |     def __init__(self) -> None:
15 |         super().__init__(
16 |             instructions=(
17 |                 "You can retrieve data via the MCP server. The interface is voice-based: "
18 |                 "accept spoken user queries and respond with synthesized speech."
19 |             ),
20 |         )
21 | 
22 |     async def on_enter(self):
23 |         self.session.generate_reply()
24 | 
25 | async def entrypoint(ctx: JobContext):
26 |     await ctx.connect()
27 | 
28 |     session = AgentSession(
29 |         vad=silero.VAD.load(),
30 |         stt=deepgram.STT(model="nova-3", language="multi"),
31 |         llm=openai.LLM(model="gpt-4o-mini"),
32 |         tts=openai.TTS(voice="ash"),
33 |         turn_detection=MultilingualModel(),
34 |         mcp_servers=[
35 |             mcp.MCPServerHTTP(
36 |                 url=os.environ.get("ZAPIER_MCP_URL"),
37 |                 timeout=10,
38 |                 client_session_timeout_seconds=10,
39 |             ),
40 |             mcp.MCPServerHTTP(
41 |                 url="http://localhost:8000/sse",
42 |                 timeout=5,
43 |                 client_session_timeout_seconds=5,
44 |             ),
45 |         ],
46 |     )
47 | 
48 |     await session.start(agent=MyAgent(), room=ctx.room)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/metrics/metrics_llm.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import asyncio
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.metrics import LLMMetrics
 7 | from livekit.agents.voice import Agent, AgentSession
 8 | from livekit.agents.voice.room_io import RoomInputOptions
 9 | from livekit.plugins import deepgram, openai, silero
10 | from rich.console import Console
11 | from rich.table import Table
12 | from rich import box
13 | from datetime import datetime
14 | 
15 | logger = logging.getLogger("metrics-llm")
16 | logger.setLevel(logging.INFO)
17 | 
18 | console = Console()
19 | 
20 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
21 | 
22 | class LLMMetricsAgent(Agent):
23 |     def __init__(self) -> None:
24 |         super().__init__(
25 |             instructions="""
26 |                 You are a helpful agent.
27 |             """,
28 |             stt=deepgram.STT(),
29 |             llm=openai.LLM(model="gpt-4o"),
30 |             tts=openai.TTS(),
31 |             vad=silero.VAD.load()
32 |         )
33 |         
34 |         def sync_wrapper(metrics: LLMMetrics):
35 |             asyncio.create_task(self.on_metrics_collected(metrics))
36 |             
37 |         self.llm.on("metrics_collected", sync_wrapper)
38 | 
39 |     async def on_metrics_collected(self, metrics: LLMMetrics) -> None:
40 |         table = Table(
41 |             title="[bold blue]LLM Metrics Report[/bold blue]",
42 |             box=box.ROUNDED,
43 |             highlight=True,
44 |             show_header=True,
45 |             header_style="bold cyan"
46 |         )
47 |         
48 |         table.add_column("Metric", style="bold green")
49 |         table.add_column("Value", style="yellow")
50 |         
51 |         timestamp = datetime.fromtimestamp(metrics.timestamp).strftime('%Y-%m-%d %H:%M:%S')
52 |         
53 |         table.add_row("Type", str(metrics.type))
54 |         table.add_row("Label", str(metrics.label))
55 |         table.add_row("Request ID", str(metrics.request_id))
56 |         table.add_row("Timestamp", timestamp)
57 |         table.add_row("Duration", f"[white]{metrics.duration:.4f}[/white]s")
58 |         table.add_row("Time to First Token", f"[white]{metrics.ttft:.4f}[/white]s")
59 |         table.add_row("Cancelled", "✓" if metrics.cancelled else "✗")
60 |         table.add_row("Completion Tokens", str(metrics.completion_tokens))
61 |         table.add_row("Prompt Tokens", str(metrics.prompt_tokens))
62 |         table.add_row("Total Tokens", str(metrics.total_tokens))
63 |         table.add_row("Tokens/Second", f"{metrics.tokens_per_second:.2f}")
64 |         
65 |         console.print("\n")
66 |         console.print(table)
67 |         console.print("\n")
68 | 
69 | 
70 | async def entrypoint(ctx: JobContext):
71 |     await ctx.connect()
72 | 
73 |     session = AgentSession()
74 | 
75 |     await session.start(
76 |         agent=LLMMetricsAgent(),
77 |         room=ctx.room,
78 |         room_input_options=RoomInputOptions(),
79 |     )
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     cli.run_app(WorkerOptions(
84 |         entrypoint_fnc=entrypoint)
85 |     )


--------------------------------------------------------------------------------
/metrics/metrics_stt.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import asyncio
  3 | from pathlib import Path
  4 | from dotenv import load_dotenv
  5 | from livekit.agents import JobContext, WorkerOptions, cli
  6 | from livekit.agents.metrics import STTMetrics, EOUMetrics
  7 | from livekit.agents.voice import Agent, AgentSession
  8 | from livekit.agents.voice.room_io import RoomInputOptions
  9 | from livekit.plugins import deepgram, openai, silero
 10 | from rich.console import Console
 11 | from rich.table import Table
 12 | from rich import box
 13 | from datetime import datetime
 14 | 
 15 | logger = logging.getLogger("metrics-stt")
 16 | logger.setLevel(logging.INFO)
 17 | 
 18 | console = Console()
 19 | 
 20 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 21 | 
 22 | class STTMetricsAgent(Agent):
 23 |     def __init__(self) -> None:
 24 |         super().__init__(
 25 |             instructions="""
 26 |                 You are a helpful agent.
 27 |             """,
 28 |             stt=deepgram.STT(),
 29 |             llm=openai.LLM(model="gpt-4o"),
 30 |             tts=openai.TTS(),
 31 |             vad=silero.VAD.load()
 32 |         )
 33 |         
 34 |         def stt_wrapper(metrics: STTMetrics):
 35 |             asyncio.create_task(self.on_stt_metrics_collected(metrics))
 36 |             
 37 |         def eou_wrapper(metrics: EOUMetrics):
 38 |             asyncio.create_task(self.on_eou_metrics_collected(metrics))
 39 |             
 40 |         self.stt.on("metrics_collected", stt_wrapper)
 41 |         self.stt.on("eou_metrics_collected", eou_wrapper)
 42 | 
 43 |     async def on_stt_metrics_collected(self, metrics: STTMetrics) -> None:
 44 |         table = Table(
 45 |             title="[bold blue]STT Metrics Report[/bold blue]",
 46 |             box=box.ROUNDED,
 47 |             highlight=True,
 48 |             show_header=True,
 49 |             header_style="bold cyan"
 50 |         )
 51 |         
 52 |         table.add_column("Metric", style="bold green")
 53 |         table.add_column("Value", style="yellow")
 54 |         
 55 |         timestamp = datetime.fromtimestamp(metrics.timestamp).strftime('%Y-%m-%d %H:%M:%S')
 56 |         
 57 |         table.add_row("Type", str(metrics.type))
 58 |         table.add_row("Label", str(metrics.label))
 59 |         table.add_row("Request ID", str(metrics.request_id))
 60 |         table.add_row("Timestamp", timestamp)
 61 |         table.add_row("Duration", f"[white]{metrics.duration:.4f}[/white]s")
 62 |         table.add_row("Speech ID", str(metrics.speech_id))
 63 |         table.add_row("Error", str(metrics.error))
 64 |         table.add_row("Streamed", "✓" if metrics.streamed else "✗")
 65 |         table.add_row("Audio Duration", f"[white]{metrics.audio_duration:.4f}[/white]s")
 66 |         
 67 |         console.print("\n")
 68 |         console.print(table)
 69 |         console.print("\n")
 70 | 
 71 |     async def on_eou_metrics_collected(self, metrics: EOUMetrics) -> None:
 72 |         table = Table(
 73 |             title="[bold blue]End of Utterance Metrics Report[/bold blue]",
 74 |             box=box.ROUNDED,
 75 |             highlight=True,
 76 |             show_header=True,
 77 |             header_style="bold cyan"
 78 |         )
 79 |         
 80 |         table.add_column("Metric", style="bold green")
 81 |         table.add_column("Value", style="yellow")
 82 |         
 83 |         timestamp = datetime.fromtimestamp(metrics.timestamp).strftime('%Y-%m-%d %H:%M:%S')
 84 |         
 85 |         table.add_row("Type", str(metrics.type))
 86 |         table.add_row("Label", str(metrics.label))
 87 |         table.add_row("Timestamp", timestamp)
 88 |         table.add_row("End of Utterance Delay", f"[white]{metrics.end_of_utterance_delay:.4f}[/white]s")
 89 |         table.add_row("Transcription Delay", f"[white]{metrics.transcription_delay:.4f}[/white]s")
 90 |         table.add_row("Speech ID", str(metrics.speech_id))
 91 |         table.add_row("Error", str(metrics.error))
 92 |         
 93 |         console.print("\n")
 94 |         console.print(table)
 95 |         console.print("\n")
 96 | 
 97 | 
 98 | async def entrypoint(ctx: JobContext):
 99 |     await ctx.connect()
100 | 
101 |     session = AgentSession()
102 | 
103 |     await session.start(
104 |         agent=STTMetricsAgent(),
105 |         room=ctx.room,
106 |         room_input_options=RoomInputOptions(),
107 |     )
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     cli.run_app(WorkerOptions(
112 |         entrypoint_fnc=entrypoint)
113 |     )


--------------------------------------------------------------------------------
/metrics/metrics_tts.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import asyncio
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.metrics import TTSMetrics
 7 | from livekit.agents.voice import Agent, AgentSession
 8 | from livekit.agents.voice.room_io import RoomInputOptions
 9 | from livekit.plugins import deepgram, openai, silero
10 | from rich.console import Console
11 | from rich.table import Table
12 | from rich import box
13 | from datetime import datetime
14 | 
15 | logger = logging.getLogger("metrics-tts")
16 | logger.setLevel(logging.INFO)
17 | 
18 | console = Console()
19 | 
20 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
21 | 
22 | class TTSMetricsAgent(Agent):
23 |     def __init__(self) -> None:
24 |         super().__init__(
25 |             instructions="""
26 |                 You are a helpful agent.
27 |             """,
28 |             stt=deepgram.STT(),
29 |             llm=openai.LLM(model="gpt-4o"),
30 |             tts=openai.TTS(),
31 |             vad=silero.VAD.load()
32 |         )
33 |         
34 |         def sync_wrapper(metrics: TTSMetrics):
35 |             asyncio.create_task(self.on_metrics_collected(metrics))
36 |             
37 |         self.tts.on("metrics_collected", sync_wrapper)
38 | 
39 |     async def on_metrics_collected(self, metrics: TTSMetrics) -> None:
40 |         table = Table(
41 |             title="[bold blue]TTS Metrics Report[/bold blue]",
42 |             box=box.ROUNDED,
43 |             highlight=True,
44 |             show_header=True,
45 |             header_style="bold cyan"
46 |         )
47 |         
48 |         table.add_column("Metric", style="bold green")
49 |         table.add_column("Value", style="yellow")
50 |         
51 |         timestamp = datetime.fromtimestamp(metrics.timestamp).strftime('%Y-%m-%d %H:%M:%S')
52 |         
53 |         table.add_row("Type", str(metrics.type))
54 |         table.add_row("Label", str(metrics.label))
55 |         table.add_row("Request ID", str(metrics.request_id))
56 |         table.add_row("Timestamp", timestamp)
57 |         table.add_row("TTFB", f"[white]{metrics.ttfb:.4f}[/white]s")
58 |         table.add_row("Duration", f"[white]{metrics.duration:.4f}[/white]s")
59 |         table.add_row("Audio Duration", f"[white]{metrics.audio_duration:.4f}[/white]s")
60 |         table.add_row("Cancelled", "✓" if metrics.cancelled else "✗")
61 |         table.add_row("Characters Count", str(metrics.characters_count))
62 |         table.add_row("Streamed", "✓" if metrics.streamed else "✗")
63 |         table.add_row("Speech ID", str(metrics.speech_id))
64 |         table.add_row("Error", str(metrics.error))
65 |         
66 |         console.print("\n")
67 |         console.print(table)
68 |         console.print("\n")
69 | 
70 | 
71 | async def entrypoint(ctx: JobContext):
72 |     await ctx.connect()
73 | 
74 |     session = AgentSession()
75 | 
76 |     await session.start(
77 |         agent=TTSMetricsAgent(),
78 |         room=ctx.room,
79 |         room_input_options=RoomInputOptions(),
80 |     )
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     cli.run_app(WorkerOptions(
85 |         entrypoint_fnc=entrypoint)
86 |     )


--------------------------------------------------------------------------------
/metrics/metrics_vad.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import asyncio
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli, vad
 6 | from livekit.agents.metrics import VADMetrics
 7 | from livekit.agents.voice import Agent, AgentSession
 8 | from livekit.agents.voice.room_io import RoomInputOptions
 9 | from livekit.plugins import deepgram, openai, silero
10 | from rich.console import Console
11 | from rich.table import Table
12 | from rich import box
13 | from datetime import datetime
14 | 
15 | logger = logging.getLogger("metrics-vad")
16 | logger.setLevel(logging.INFO)
17 | 
18 | console = Console()
19 | 
20 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
21 | 
22 | class VADMetricsAgent(Agent):
23 |     def __init__(self) -> None:
24 |         super().__init__(
25 |             instructions="""
26 |                 You are a helpful agent.
27 |             """,
28 |             stt=deepgram.STT(),
29 |             llm=openai.LLM(model="gpt-4o"),
30 |             tts=openai.TTS(),
31 |             vad=silero.VAD.load()
32 |         )
33 | 
34 |         def sync_wrapper(event: vad.VADEvent):
35 |             asyncio.create_task(self.on_vad_event(event))
36 |             
37 |         self.vad.on("metrics_collected", sync_wrapper)
38 | 
39 |     async def on_vad_event(self, event: vad.VADEvent):
40 |         table = Table(
41 |             title="[bold blue]VAD Event Metrics Report[/bold blue]",
42 |             box=box.ROUNDED,
43 |             highlight=True,
44 |             show_header=True,
45 |             header_style="bold cyan"
46 |         )
47 |         
48 |         table.add_column("Metric", style="bold green")
49 |         table.add_column("Value", style="yellow")
50 |         
51 |         timestamp = datetime.fromtimestamp(event.timestamp).strftime('%Y-%m-%d %H:%M:%S')
52 |         
53 |         table.add_row("Type", str(event.type))
54 |         table.add_row("Timestamp", timestamp)
55 |         table.add_row("Idle Time", f"[white]{event.idle_time:.4f}[/white]s")
56 |         table.add_row("Inference Duration Total", f"[white]{event.inference_duration_total:.4f}[/white]s")
57 |         table.add_row("Inference Count", str(event.inference_count))
58 |         table.add_row("Speech ID", str(event.speech_id))
59 |         table.add_row("Error", str(event.error))
60 |         
61 |         console.print("\n")
62 |         console.print(table)
63 |         console.print("\n")
64 | 
65 | 
66 | async def entrypoint(ctx: JobContext):
67 |     await ctx.connect()
68 | 
69 |     session = AgentSession()
70 | 
71 |     await session.start(
72 |         agent=VADMetricsAgent(),
73 |         room=ctx.room,
74 |         room_input_options=RoomInputOptions(),
75 |     )
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     cli.run_app(WorkerOptions(
80 |         entrypoint_fnc=entrypoint)
81 |     )


--------------------------------------------------------------------------------
/metrics/send-metrics-to-3p/metrics_server/README.md:
--------------------------------------------------------------------------------
 1 | # LiveKit Metrics Dashboard
 2 | 
 3 | This Flask application provides a dashboard for viewing metrics collected from LiveKit agents. It receives metrics data via HTTP POST requests and displays them in a real-time dashboard.
 4 | 
 5 | ## Setup
 6 | 
 7 | 1. Install the required dependencies:
 8 | 
 9 | ```bash
10 | cd metrics_server
11 | pip install -r requirements.txt
12 | ```
13 | 
14 | 2. Start the server:
15 | 
16 | ```bash
17 | python app.py
18 | ```
19 | 
20 | The server will run on `http://localhost:5001` by default.
21 | 
22 | ## Usage
23 | 
24 | The metrics dashboard can be accessed at `http://localhost:5001` in your web browser. It displays metrics for LLM, STT, TTS, EOU, and VAD components.
25 | 
26 | ## API Endpoints
27 | 
28 | - `POST /metrics/<metric_type>`: Submit metrics data for a specific metric type
29 | - `GET /api/metrics`: Get all collected metrics data
30 | - `GET /api/metrics/<metric_type>`: Get metrics data for a specific type
31 | 
32 | ## Environment Variables
33 | 
34 | The LiveKit agent can be configured to send metrics to this server by setting the `METRICS_SERVER_URL` environment variable in the .env file:
35 | 
36 | ```
37 | METRICS_SERVER_URL=http://localhost:5001
38 | ``` 


--------------------------------------------------------------------------------
/metrics/send-metrics-to-3p/metrics_server/app.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, request, jsonify, render_template
 2 | import json
 3 | import os
 4 | from datetime import datetime
 5 | from collections import defaultdict
 6 | from pathlib import Path
 7 | 
 8 | # Set up the Flask app with proper template directory
 9 | template_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'templates'))
10 | app = Flask(__name__, template_folder=template_dir)
11 | 
12 | # Store metrics in memory (for simplicity)
13 | metrics_data = defaultdict(list)
14 | metrics_types = ["llm", "stt", "tts", "eou", "vad"]
15 | 
16 | @app.route('/metrics/<metric_type>', methods=['POST'])
17 | def receive_metrics(metric_type):
18 |     """
19 |     Endpoint to receive metrics data from the LiveKit agent
20 |     """
21 |     if metric_type not in metrics_types:
22 |         return jsonify({"error": f"Invalid metric type: {metric_type}"}), 400
23 |     
24 |     data = request.json
25 |     # Add timestamp for when server received it
26 |     data['received_at'] = datetime.now().isoformat()
27 |     metrics_data[metric_type].append(data)
28 |     
29 |     # Limit the size of stored metrics (keep only last 100 entries per type)
30 |     if len(metrics_data[metric_type]) > 100:
31 |         metrics_data[metric_type] = metrics_data[metric_type][-100:]
32 |     
33 |     return jsonify({"status": "success"}), 200
34 | 
35 | @app.route('/')
36 | def dashboard():
37 |     """Display metrics dashboard"""
38 |     return render_template('dashboard.html', metrics_types=metrics_types)
39 | 
40 | @app.route('/api/metrics')
41 | def get_metrics():
42 |     """API endpoint to get all metrics data for AJAX requests"""
43 |     return jsonify(metrics_data)
44 | 
45 | @app.route('/api/metrics/<metric_type>')
46 | def get_metric_type(metric_type):
47 |     """API endpoint to get metrics data for a specific type"""
48 |     if metric_type not in metrics_types:
49 |         return jsonify({"error": f"Invalid metric type: {metric_type}"}), 400
50 |     
51 |     return jsonify(metrics_data[metric_type])
52 | 
53 | if __name__ == '__main__':
54 |     app.run(debug=True, host='0.0.0.0', port=5001) 


--------------------------------------------------------------------------------
/metrics/send-metrics-to-3p/metrics_server/requirements.txt:
--------------------------------------------------------------------------------
1 | flask==2.3.3
2 | requests==2.31.0
3 | python-dotenv==1.0.0 


--------------------------------------------------------------------------------
/metrics/send-metrics-to-3p/metrics_server/templates/dashboard.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>LiveKit Metrics Dashboard</title>
  7 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css">
  8 |     <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
  9 |     <style>
 10 |         .metrics-container {
 11 |             margin-top: 20px;
 12 |         }
 13 |         .metric-card {
 14 |             margin-bottom: 20px;
 15 |         }
 16 |         .json-display {
 17 |             font-family: monospace;
 18 |             max-height: 300px;
 19 |             overflow-y: auto;
 20 |             background-color: #f8f9fa;
 21 |             padding: 10px;
 22 |             border-radius: 4px;
 23 |         }
 24 |         .nav-tabs .nav-link.active {
 25 |             font-weight: bold;
 26 |         }
 27 |     </style>
 28 | </head>
 29 | <body>
 30 |     <div class="container">
 31 |         <h1 class="mt-4 mb-4">LiveKit Metrics Dashboard</h1>
 32 |         
 33 |         <ul class="nav nav-tabs" id="metricsTab" role="tablist">
 34 |             {% for metric_type in metrics_types %}
 35 |             <li class="nav-item" role="presentation">
 36 |                 <button class="nav-link {% if loop.first %}active{% endif %}" 
 37 |                         id="{{ metric_type }}-tab" 
 38 |                         data-bs-toggle="tab" 
 39 |                         data-bs-target="#{{ metric_type }}" 
 40 |                         type="button" 
 41 |                         role="tab" 
 42 |                         aria-controls="{{ metric_type }}" 
 43 |                         aria-selected="{% if loop.first %}true{% else %}false{% endif %}">
 44 |                     {{ metric_type.upper() }} Metrics
 45 |                 </button>
 46 |             </li>
 47 |             {% endfor %}
 48 |         </ul>
 49 |         
 50 |         <div class="tab-content" id="metricsTabContent">
 51 |             {% for metric_type in metrics_types %}
 52 |             <div class="tab-pane fade {% if loop.first %}show active{% endif %}" 
 53 |                  id="{{ metric_type }}" 
 54 |                  role="tabpanel" 
 55 |                  aria-labelledby="{{ metric_type }}-tab">
 56 |                 <div class="metrics-container">
 57 |                     <div class="row">
 58 |                         <div class="col-md-12">
 59 |                             <div class="card metric-card">
 60 |                                 <div class="card-header">
 61 |                                     Latest {{ metric_type.upper() }} Metrics
 62 |                                 </div>
 63 |                                 <div class="card-body">
 64 |                                     <div id="{{ metric_type }}-data" class="json-display">
 65 |                                         No data available
 66 |                                     </div>
 67 |                                 </div>
 68 |                             </div>
 69 |                         </div>
 70 |                     </div>
 71 |                 </div>
 72 |             </div>
 73 |             {% endfor %}
 74 |         </div>
 75 |     </div>
 76 | 
 77 |     <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
 78 |     <script>
 79 |         // Function to fetch and update metrics data
 80 |         function updateMetrics() {
 81 |             fetch('/api/metrics')
 82 |                 .then(response => response.json())
 83 |                 .then(data => {
 84 |                     {% for metric_type in metrics_types %}
 85 |                     const {{ metric_type }}Data = data['{{ metric_type }}'] || [];
 86 |                     document.getElementById('{{ metric_type }}-data').innerHTML = 
 87 |                         {{ metric_type }}Data.length > 0 
 88 |                             ? formatJSON({{ metric_type }}Data.slice(-5).reverse()) 
 89 |                             : 'No data available';
 90 |                     {% endfor %}
 91 |                 })
 92 |                 .catch(error => console.error('Error fetching metrics:', error));
 93 |         }
 94 | 
 95 |         // Format JSON for display
 96 |         function formatJSON(obj) {
 97 |             return '<pre>' + JSON.stringify(obj, null, 2) + '</pre>';
 98 |         }
 99 | 
100 |         // Initial update and set interval
101 |         updateMetrics();
102 |         setInterval(updateMetrics, 2000);
103 |     </script>
104 | </body>
105 | </html> 


--------------------------------------------------------------------------------
/metrics/send-metrics-to-3p/run_3p_metrics_demo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Start the metrics server in the background
 4 | echo "Starting metrics server..."
 5 | cd metrics_server
 6 | python app.py &
 7 | SERVER_PID=$!
 8 | cd ..
 9 | 
10 | # Wait for the server to start
11 | echo "Waiting for metrics server to start..."
12 | sleep 2
13 | 
14 | # Run the LiveKit agent
15 | echo "Starting LiveKit agent..."
16 | cd metrics
17 | python send_metrics_to_3p.py console
18 | 
19 | # When the agent is stopped, also stop the server
20 | echo "Stopping metrics server..."
21 | kill $SERVER_PID 


--------------------------------------------------------------------------------
/multi-agent/long_or_short_agent.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import openai, silero, deepgram
 8 | from livekit.agents.llm import function_tool
 9 | 
10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
11 | 
12 | logger = logging.getLogger("listen-and-respond")
13 | logger.setLevel(logging.INFO)
14 | 
15 | class ShortAgent(Agent):
16 |     def __init__(self) -> None:
17 |         super().__init__(
18 |             instructions="""
19 |                 You are a helpful agent. When the user speaks, you listen and respond. Be as brief as possible. Arguably too brief.
20 |             """,
21 |             stt=deepgram.STT(),
22 |             llm=openai.LLM(model="gpt-4o"),
23 |             tts=openai.TTS(
24 |                 model="gpt-4o-mini-tts",
25 |                 voice="nova"
26 |             ),
27 |             vad=silero.VAD.load()
28 |         )
29 | 
30 |     async def on_enter(self):
31 |         self.session.say("Hi. It's Short agent.")
32 | 
33 |     @function_tool
34 |     async def change_agent(self):
35 |         """Change the agent to the long agent."""
36 |         self.session.update_agent(LongAgent())
37 | 
38 | class LongAgent(Agent):
39 |     def __init__(self) -> None:
40 |         super().__init__(
41 |             instructions="""
42 |                 You are a helpful agent. When the user speaks, you listen and respond in overly verbose, flowery, obnoxiously detailed sentences.
43 |             """,
44 |             stt=deepgram.STT(),
45 |             llm=openai.LLM(model="gpt-4o"),
46 |             tts=openai.TTS(
47 |                 model="gpt-4o-mini-tts",
48 |                 voice="onyx"
49 |             ),
50 |             vad=silero.VAD.load()
51 |         )
52 | 
53 |     async def on_enter(self):
54 |         self.session.say("Salutations! it is I, your friendly neighborhood long agent.")
55 | 
56 |     @function_tool
57 |     async def change_agent(self):
58 |         """Change the agent to the short agent."""
59 |         self.session.update_agent(ShortAgent())
60 | 
61 | async def entrypoint(ctx: JobContext):
62 |     await ctx.connect()
63 | 
64 |     session = AgentSession()
65 | 
66 |     await session.start(
67 |         agent=ShortAgent(),
68 |         room=ctx.room
69 |     )
70 | 
71 |     session.once
72 | 
73 | if __name__ == "__main__":
74 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-llm/anthropic_llm.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from dotenv import load_dotenv
 4 | from livekit.agents import JobContext, WorkerOptions, cli
 5 | from livekit.agents.voice import Agent, AgentSession
 6 | from livekit.plugins import anthropic, openai, silero, deepgram
 7 | 
 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 9 | 
10 | logger = logging.getLogger("anthropic_llm")
11 | logger.setLevel(logging.INFO)
12 | 
13 | class SimpleAgent(Agent):
14 |     def __init__(self) -> None:
15 |         super().__init__(
16 |             instructions="""
17 |                 You are a helpful agent.
18 |             """,
19 |             stt=deepgram.STT(),
20 |             llm=anthropic.LLM(model="claude-3-5-sonnet-20240620"),
21 |             tts=openai.TTS(instructions="You are a helpful assistant with a pleasant voice. Speak in a natural, conversational tone."),
22 |             vad=silero.VAD.load()
23 |         )
24 |     
25 |     async def on_enter(self):
26 |         self.session.generate_reply()
27 | 
28 | async def entrypoint(ctx: JobContext):
29 |     await ctx.connect()
30 | 
31 |     session = AgentSession()
32 | 
33 |     await session.start(
34 |         agent=SimpleAgent(),
35 |         room=ctx.room
36 |     )
37 | 
38 | if __name__ == "__main__":
39 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-llm/cerebras_llm.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from dotenv import load_dotenv
 4 | from livekit.agents import JobContext, WorkerOptions, cli
 5 | from livekit.agents.voice import Agent, AgentSession
 6 | from livekit.plugins import openai, silero, deepgram
 7 | 
 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 9 | 
10 | logger = logging.getLogger("cerebras_llm")
11 | logger.setLevel(logging.INFO)
12 | 
13 | class SimpleAgent(Agent):
14 |     def __init__(self) -> None:
15 |         super().__init__(
16 |             instructions="""
17 |                 You are a helpful agent.
18 |             """,
19 |             stt=deepgram.STT(),
20 |             llm=openai.LLM.with_cerebras(),
21 |             tts=openai.TTS(instructions="You are a helpful assistant with a pleasant voice. Speak in a natural, conversational tone."),
22 |             vad=silero.VAD.load()
23 |         )
24 |     
25 |     async def on_enter(self):
26 |         self.session.generate_reply()
27 | 
28 | async def entrypoint(ctx: JobContext):
29 |     await ctx.connect()
30 | 
31 |     session = AgentSession()
32 | 
33 |     await session.start(
34 |         agent=SimpleAgent(),
35 |         room=ctx.room
36 |     )
37 | 
38 | if __name__ == "__main__":
39 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-llm/google_llm.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from dotenv import load_dotenv
 4 | from livekit.agents import JobContext, WorkerOptions, cli
 5 | from livekit.agents.voice import Agent, AgentSession
 6 | from livekit.plugins import openai, google, deepgram, silero
 7 | 
 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 9 | 
10 | logger = logging.getLogger("google_llm")
11 | logger.setLevel(logging.INFO)
12 | 
13 | class SimpleAgent(Agent):
14 |     def __init__(self) -> None:
15 |         super().__init__(
16 |             instructions="""
17 |                 You are a helpful agent.
18 |             """,
19 |             stt=deepgram.STT(),
20 |             llm=google.LLM(),
21 |             tts=openai.TTS(instructions="You are a helpful assistant with a pleasant voice. Speak in a natural, conversational tone."),
22 |             vad=silero.VAD.load()
23 |         )
24 |     
25 |     async def on_enter(self):
26 |         self.session.generate_reply()
27 | 
28 | async def entrypoint(ctx: JobContext):
29 |     await ctx.connect()
30 | 
31 |     session = AgentSession()
32 | 
33 |     await session.start(
34 |         agent=SimpleAgent(),
35 |         room=ctx.room
36 |     )
37 | 
38 | if __name__ == "__main__":
39 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-llm/interrupt_user.py:
--------------------------------------------------------------------------------
 1 | # This agent keeps track of the number of sentences the user has spoken
 2 | # and interrupts them if they've said a certain number of sentences.
 3 | # We use session.say() to interrupt the user, and set allow_interruptions=False
 4 | # on that specific call to prevent the user from interrupting the agent.
 5 | # After the agent has spoken, allow_interruptions is once again True so the agent
 6 | # can listen for the user's response.
 7 | 
 8 | import logging
 9 | from pathlib import Path
10 | from dotenv import load_dotenv
11 | from livekit.agents import JobContext, WorkerOptions, cli
12 | from livekit.agents.voice import Agent, AgentSession
13 | from livekit.plugins import openai, deepgram, silero
14 | from livekit.agents.llm import ChatContext, ChatMessage
15 | import re
16 | import asyncio
17 | 
18 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
19 | 
20 | logger = logging.getLogger("interrupt-user")
21 | logger.setLevel(logging.INFO)
22 | 
23 | def count_sentences(text):
24 |     """Count the number of sentences in text"""
25 |     sentences = re.findall(r'[^.!?]+[.!?](?:\s|$)', text)
26 |     return len(sentences)
27 | 
28 | async def entrypoint(ctx: JobContext):
29 |     await ctx.connect()
30 |     
31 |     session = AgentSession()
32 |     agent = Agent(
33 |         instructions="You are a helpful agent that politely interrupts users when they talk too much.",
34 |         stt=deepgram.STT(),
35 |         llm=openai.LLM(),
36 |         tts=openai.TTS(),
37 |         vad=silero.VAD.load()
38 |     )
39 |     
40 |     async def handle_interruption(context):
41 |         await agent.update_chat_ctx(context)
42 |         session.say("Sorry, can I pause you there?", allow_interruptions=False)
43 |         await session.generate_reply(allow_interruptions=False)
44 |     
45 |     transcript_buffer = ""
46 |     max_sentences = 3
47 |     
48 |     @session.on("user_input_transcribed")
49 |     def on_transcript(transcript):
50 |         nonlocal transcript_buffer
51 |         
52 |         if transcript.is_final:
53 |             logger.info(f"Received final transcript: {transcript.transcript}")
54 |             return
55 |             
56 |         transcript_buffer += " " + transcript.transcript
57 |         transcript_buffer = transcript_buffer.strip()
58 |         
59 |         logger.info(f"Buffer: {transcript_buffer}")
60 |         
61 |         sentence_count = count_sentences(transcript_buffer)
62 |         logger.info(f"Sentence count: {sentence_count}")
63 |         
64 |         if sentence_count >= max_sentences:
65 |             logger.info("Interrupting user...")
66 |             
67 |             interruption_ctx = ChatContext([
68 |                 ChatMessage(
69 |                     type="message",
70 |                     role="system",
71 |                     content=["You are an agent that politely interrupts users who speak too much. Create a brief response that acknowledges what they've said so far, then redirects to get more focused information."]
72 |                 ),
73 |                 ChatMessage(type="message", role="user", content=[f"User has been speaking and said: {transcript_buffer}"])
74 |             ])
75 |             
76 |             asyncio.create_task(handle_interruption(interruption_ctx))
77 |             transcript_buffer = ""
78 |     
79 |     @session.on("session_start")
80 |     def on_session_start():
81 |         nonlocal transcript_buffer
82 |         transcript_buffer = ""
83 |         session.generate_reply()
84 |     
85 |     await session.start(agent=agent, room=ctx.room)
86 | 
87 | if __name__ == "__main__":
88 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-llm/large_context.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from dotenv import load_dotenv
 4 | from livekit.agents import JobContext, WorkerOptions, cli
 5 | from livekit.agents.voice import Agent, AgentSession
 6 | from livekit.plugins import openai, google, deepgram, silero
 7 | 
 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 9 | 
10 | logger = logging.getLogger("google_llm")
11 | logger.setLevel(logging.INFO)
12 | 
13 | class WarAndPeaceAgent(Agent):
14 |     def __init__(self) -> None:
15 |         # Load War and Peace text content
16 |         book_path = Path(__file__).parent / "lib" / "war_and_peace.txt"
17 |         with open(book_path, "r", encoding="utf-8") as f:
18 |             war_and_peace_text = f.read()
19 | 
20 |         super().__init__(
21 |             instructions=f"""
22 |                 You are a War and Peace book club assistant. You help users discuss and understand Leo Tolstoy's novel "War and Peace."
23 | 
24 |                 You can answer questions about the plot, characters, themes, historical context, and literary analysis of the book.
25 | 
26 |                 Here is the complete text of the book that you can reference:
27 | 
28 |                 {war_and_peace_text}
29 | 
30 |                 Be concise but informative in your responses. If asked about specific passages, quote directly from the text.
31 |             """,
32 |             stt=deepgram.STT(),
33 |             llm=google.LLM(model="gemini-2.0-flash"),
34 |             tts=openai.TTS(instructions="You are a literary discussion assistant with a pleasant voice. Speak in a natural, conversational tone that conveys enthusiasm for literature."),
35 |             vad=silero.VAD.load()
36 |         )
37 |     
38 |     async def on_enter(self):
39 |         self.session.generate_reply("Welcome to the War and Peace book club! I'm here to discuss Leo Tolstoy's epic novel with you. What would you like to talk about?")
40 | 
41 | async def entrypoint(ctx: JobContext):
42 |     await ctx.connect()
43 | 
44 |     session = AgentSession()
45 | 
46 |     await session.start(
47 |         agent=WarAndPeaceAgent(),
48 |         room=ctx.room
49 |     )
50 | 
51 | if __name__ == "__main__":
52 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-llm/llm_powered_content_filter.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from pathlib import Path
  3 | from typing import Optional, Any
  4 | from dotenv import load_dotenv
  5 | from livekit.agents import JobContext, WorkerOptions, cli
  6 | from livekit.agents.voice import Agent, AgentSession
  7 | from livekit.plugins import openai, deepgram, silero
  8 | from livekit.agents.llm import ChatContext, ChatMessage
  9 | import asyncio
 10 | 
 11 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 12 | 
 13 | logger = logging.getLogger("complex-content-filter")
 14 | logger.setLevel(logging.INFO)
 15 | 
 16 | class SimpleAgent(Agent):
 17 |     def __init__(self) -> None:
 18 |         super().__init__(
 19 |             instructions="You are a helpful agent.",
 20 |             stt=deepgram.STT(),
 21 |             llm=openai.LLM(),
 22 |             tts=openai.TTS(),
 23 |             vad=silero.VAD.load()
 24 |         )
 25 |         self.moderator_llm = openai.LLM(model="gpt-4o-mini")
 26 |     
 27 |     async def evaluate_content(self, text: str) -> bool:
 28 |         """Evaluate if content is appropriate using a separate LLM."""
 29 |         moderation_ctx = ChatContext([
 30 |             ChatMessage(
 31 |                 type="message",
 32 |                 role="system",
 33 |                 content=["You are a content moderator. Respond ONLY with 'APPROPRIATE' or 'INAPPROPRIATE'. Respond with 'INAPPROPRIATE' if the text mentions strawberries."]
 34 |             ),
 35 |             ChatMessage(type="message", role="user", content=[f"Evaluate: {text}"])
 36 |         ])
 37 |         
 38 |         response = ""
 39 |         async with self.moderator_llm.chat(chat_ctx=moderation_ctx) as stream:
 40 |             async for chunk in stream:
 41 |                 if not chunk:
 42 |                     continue
 43 |                 content = getattr(chunk.delta, 'content', None) if hasattr(chunk, 'delta') else str(chunk)
 44 |                 if content:
 45 |                     response += content
 46 |         
 47 |         response = response.strip().upper()
 48 |         logger.info(f"Moderation response for '{text}': {response}")
 49 |         return "INAPPROPRIATE" not in response
 50 |     
 51 |     async def on_enter(self):
 52 |         self.session.generate_reply()
 53 |     
 54 |     def _extract_content(self, chunk: Any) -> Optional[str]:
 55 |         """Extract content from a chunk, handling different chunk formats."""
 56 |         if not chunk:
 57 |             return None
 58 |         if isinstance(chunk, str):
 59 |             return chunk
 60 |         if hasattr(chunk, 'delta'):
 61 |             return getattr(chunk.delta, 'content', None)
 62 |         return None
 63 |     
 64 |     async def llm_node(self, chat_ctx, tools, model_settings=None):
 65 |         async def process_stream():
 66 |             buffer = ""
 67 |             chunk_buffer = []
 68 |             sentence_end_chars = {'.', '!', '?'}
 69 |             
 70 |             async with self.llm.chat(chat_ctx=chat_ctx, tools=tools, tool_choice=None) as stream:
 71 |                 try:
 72 |                     async for chunk in stream:
 73 |                         content = self._extract_content(chunk)
 74 |                         chunk_buffer.append(chunk)
 75 |                         
 76 |                         if content:
 77 |                             buffer += content
 78 |                             
 79 |                             if any(char in buffer for char in sentence_end_chars):
 80 |                                 last_end = max(buffer.rfind(char) for char in sentence_end_chars if char in buffer)
 81 |                                 if last_end != -1:
 82 |                                     sentence = buffer[:last_end + 1]
 83 |                                     buffer = buffer[last_end + 1:]
 84 |                                     
 85 |                                     if not await self.evaluate_content(sentence):
 86 |                                         yield "Content filtered."
 87 |                                         return
 88 |                                     
 89 |                                     # Yield buffered chunks if content is appropriate
 90 |                                     for buffered_chunk in chunk_buffer:
 91 |                                         yield buffered_chunk
 92 |                                     chunk_buffer = []
 93 |                     
 94 |                     # Check any remaining complete sentence
 95 |                     if buffer and any(buffer.endswith(char) for char in sentence_end_chars):
 96 |                         if not await self.evaluate_content(buffer):
 97 |                             yield "Content filtered."
 98 |                             return
 99 |                         for buffered_chunk in chunk_buffer:
100 |                             yield buffered_chunk
101 |                             
102 |                 except asyncio.CancelledError:
103 |                     raise
104 |                 except Exception as e:
105 |                     logger.error(f"Error in content filtering: {str(e)}")
106 |                     yield "[Error in content filtering]"
107 | 
108 |         return process_stream()
109 | 
110 | async def entrypoint(ctx: JobContext):
111 |     await ctx.connect()
112 |     await AgentSession().start(agent=SimpleAgent(), room=ctx.room)
113 | 
114 | if __name__ == "__main__":
115 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-llm/ollama_llm.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from dotenv import load_dotenv
 4 | from livekit.agents import JobContext, WorkerOptions, cli
 5 | from livekit.agents.voice import Agent, AgentSession
 6 | from livekit.plugins import openai, deepgram, silero
 7 | 
 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 9 | 
10 | logger = logging.getLogger("ollama_llm")
11 | logger.setLevel(logging.INFO)
12 | 
13 | class SimpleAgent(Agent):
14 |     def __init__(self) -> None:
15 |         super().__init__(
16 |             instructions="""
17 |                 You are a helpful agent.
18 |             """,
19 |             stt=deepgram.STT(),
20 |             llm=openai.LLM.with_ollama(),
21 |             tts=openai.TTS(),
22 |             vad=silero.VAD.load()
23 |         )
24 |     
25 |     async def on_enter(self):
26 |         self.session.generate_reply()
27 | 
28 | async def entrypoint(ctx: JobContext):
29 |     await ctx.connect()
30 | 
31 |     session = AgentSession()
32 | 
33 |     await session.start(
34 |         agent=SimpleAgent(),
35 |         room=ctx.room
36 |     )
37 | 
38 | if __name__ == "__main__":
39 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-llm/openai_llm.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from dotenv import load_dotenv
 4 | from livekit.agents import JobContext, WorkerOptions, cli
 5 | from livekit.agents.voice import Agent, AgentSession
 6 | from livekit.plugins import openai, deepgram, silero
 7 | 
 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 9 | 
10 | logger = logging.getLogger("openai_llm")
11 | logger.setLevel(logging.INFO)
12 | 
13 | class SimpleAgent(Agent):
14 |     def __init__(self) -> None:
15 |         super().__init__(
16 |             instructions="""
17 |                 You are a helpful agent.
18 |             """,
19 |             stt=deepgram.STT(),
20 |             llm=openai.LLM(),
21 |             tts=openai.TTS(),
22 |             vad=silero.VAD.load()
23 |         )
24 |     
25 |     async def on_enter(self):
26 |         self.session.generate_reply()
27 | 
28 | async def entrypoint(ctx: JobContext):
29 |     await ctx.connect()
30 | 
31 |     session = AgentSession()
32 | 
33 |     await session.start(
34 |         agent=SimpleAgent(),
35 |         room=ctx.room
36 |     )
37 | 
38 | if __name__ == "__main__":
39 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-llm/replacing_llm_output.py:
--------------------------------------------------------------------------------
 1 | # In this example, we replace the <think> tags returned by Deepseek with a custom message,
 2 | # so that the TTS engine doesn't say the <think> tags as part of the response.
 3 | 
 4 | import logging
 5 | from pathlib import Path
 6 | from dotenv import load_dotenv
 7 | from livekit.agents import JobContext, WorkerOptions, cli
 8 | from livekit.agents.voice import Agent, AgentSession
 9 | from livekit.plugins import openai, deepgram, silero
10 | 
11 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
12 | 
13 | logger = logging.getLogger("replacing-llm-output")
14 | logger.setLevel(logging.INFO)
15 | 
16 | class SimpleAgent(Agent):
17 |     def __init__(self) -> None:
18 |         super().__init__(
19 |             instructions="""
20 |                 You are a helpful agent.
21 |             """,
22 |             stt=deepgram.STT(),
23 |             llm=openai.LLM.with_groq(model="deepseek-r1-distill-llama-70b"),
24 |             tts=openai.TTS(),
25 |             vad=silero.VAD.load()
26 |         )
27 |     
28 |     async def on_enter(self):
29 |         self.session.generate_reply()
30 |     
31 |     async def llm_node(
32 |         self, chat_ctx, tools, model_settings=None
33 |     ):
34 |         async def process_stream():
35 |             async with self.llm.chat(chat_ctx=chat_ctx, tools=tools, tool_choice=None) as stream:
36 |                 async for chunk in stream:
37 |                     if chunk is None:
38 |                         continue
39 |                         
40 |                     content = getattr(chunk.delta, 'content', None) if hasattr(chunk, 'delta') else str(chunk)
41 |                     if content is None:
42 |                         yield chunk
43 |                         continue
44 |                     
45 |                     processed_content = content.replace("<think>", "").replace("</think>", "Okay, I'm ready to respond.")
46 |                     print(f"Original: {content}, Processed: {processed_content}")
47 |                     
48 |                     if processed_content != content:
49 |                         if hasattr(chunk, 'delta') and hasattr(chunk.delta, 'content'):
50 |                             chunk.delta.content = processed_content
51 |                         else:
52 |                             chunk = processed_content
53 |                     
54 |                     yield chunk
55 | 
56 |         return process_stream()
57 | 
58 | async def entrypoint(ctx: JobContext):
59 |     await ctx.connect()
60 | 
61 |     session = AgentSession()
62 | 
63 |     await session.start(
64 |         agent=SimpleAgent(),
65 |         room=ctx.room
66 |     )
67 | 
68 | if __name__ == "__main__":
69 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-llm/simple_content_filter.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from typing import AsyncIterable, Optional
 4 | from dotenv import load_dotenv
 5 | from livekit import rtc
 6 | from livekit.agents import JobContext, WorkerOptions, cli
 7 | from livekit.agents.voice import Agent, AgentSession
 8 | from livekit.plugins import openai, deepgram, silero
 9 | import asyncio
10 | 
11 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
12 | 
13 | logger = logging.getLogger("simple-content-filter")
14 | logger.setLevel(logging.INFO)
15 | 
16 | class SimpleAgent(Agent):
17 |     def __init__(self) -> None:
18 |         super().__init__(
19 |             instructions="""
20 |                 You are a helpful agent.
21 |             """,
22 |             stt=deepgram.STT(),
23 |             llm=openai.LLM(),
24 |             tts=openai.TTS(),
25 |             vad=silero.VAD.load()
26 |         )
27 |     
28 |     async def on_enter(self):
29 |         self.session.generate_reply()
30 |     
31 |     async def llm_node(
32 |         self, chat_ctx, tools, model_settings=None
33 |     ):
34 |         async def process_stream():
35 |             async with self.llm.chat(chat_ctx=chat_ctx, tools=tools, tool_choice=None) as stream:
36 |                 async for chunk in stream:
37 |                     if chunk is None:
38 |                         continue
39 |                         
40 |                     content = getattr(chunk.delta, 'content', None) if hasattr(chunk, 'delta') else str(chunk)
41 |                     if content is None:
42 |                         yield chunk
43 |                         continue
44 |                         
45 |                     offensive_terms = ['fail']
46 |                     print(content)
47 |                     yield "CONTENT FILTERED" if any(term in content.lower() for term in offensive_terms) else chunk
48 | 
49 |         return process_stream()
50 | 
51 | async def entrypoint(ctx: JobContext):
52 |     await ctx.connect()
53 | 
54 |     session = AgentSession()
55 | 
56 |     await session.start(
57 |         agent=SimpleAgent(),
58 |         room=ctx.room
59 |     )
60 | 
61 | if __name__ == "__main__":
62 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-llm/transcription_node.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from typing import AsyncIterable
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli, ModelSettings
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import openai, deepgram, silero
 8 | 
 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
10 | 
11 | logger = logging.getLogger("openai_llm")
12 | logger.setLevel(logging.INFO)
13 | 
14 | class SimpleAgent(Agent):
15 |     def __init__(self) -> None:
16 |         super().__init__(
17 |             instructions="""
18 |                 You are a helpful agent.
19 |             """,
20 |             stt=deepgram.STT(),
21 |             llm=openai.LLM(),
22 |             tts=openai.TTS(),
23 |             vad=silero.VAD.load()
24 |         )
25 |     
26 |     async def on_enter(self):
27 |         self.session.generate_reply()
28 | 
29 |     async def transcription_node(self, text: AsyncIterable[str], model_settings: ModelSettings):
30 |         """Modify the transcription output by replacing certain words."""
31 |         replacements = {
32 |             "hello": "👋 HELLO",
33 |             "goodbye": "GOODBYE 👋",
34 |         }
35 | 
36 |         async def process_text():
37 |             async for chunk in text:
38 |                 modified_chunk = chunk
39 |                 original_chunk = chunk
40 | 
41 |                 for word, replacement in replacements.items():
42 |                     if word in modified_chunk.lower() or word.capitalize() in modified_chunk:
43 |                         logger.info(f"Replacing '{word}' with '{replacement}' in transcript")
44 | 
45 |                     modified_chunk = modified_chunk.replace(word, replacement)
46 |                     modified_chunk = modified_chunk.replace(word.capitalize(), replacement)
47 | 
48 |                 if original_chunk != modified_chunk:
49 |                     logger.info(f"Original: '{original_chunk}'")
50 |                     logger.info(f"Modified: '{modified_chunk}'")
51 | 
52 |                 yield modified_chunk
53 | 
54 |         return process_text()
55 | 
56 | async def entrypoint(ctx: JobContext):
57 |     await ctx.connect()
58 | 
59 |     session = AgentSession()
60 | 
61 |     await session.start(
62 |         agent=SimpleAgent(),
63 |         room=ctx.room
64 |     )
65 | 
66 | if __name__ == "__main__":
67 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-stt/keyword_detection.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from typing import AsyncIterable, Optional
 4 | from dotenv import load_dotenv
 5 | from livekit import rtc
 6 | from livekit.agents import JobContext, WorkerOptions, cli
 7 | from livekit.agents.voice import Agent, AgentSession
 8 | from livekit.plugins import openai, deepgram, silero
 9 | 
10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
11 | 
12 | logger = logging.getLogger("listen-and-respond")
13 | logger.setLevel(logging.INFO)
14 | 
15 | class SimpleAgent(Agent):
16 |     def __init__(self) -> None:
17 |         super().__init__(
18 |             instructions="""
19 |                 You are a helpful agent.
20 |             """,
21 |             stt=deepgram.STT(),
22 |             llm=openai.LLM(),
23 |             tts=openai.TTS(),
24 |             vad=silero.VAD.load()
25 |         )
26 |     
27 |     async def on_enter(self):
28 |         self.session.generate_reply()
29 |     
30 |     async def stt_node(self, text: AsyncIterable[str], model_settings: Optional[dict] = None) -> Optional[AsyncIterable[rtc.AudioFrame]]:
31 |         keywords = ["Shane", "hello", "thanks"]
32 |         parent_stream = super().stt_node(text, model_settings)
33 |         
34 |         if parent_stream is None:
35 |             return None
36 |             
37 |         async def process_stream():
38 |             async for event in parent_stream:
39 |                 if hasattr(event, 'type') and str(event.type) == "SpeechEventType.FINAL_TRANSCRIPT" and event.alternatives:
40 |                     transcript = event.alternatives[0].text
41 |                     
42 |                     for keyword in keywords:
43 |                         if keyword.lower() in transcript.lower():
44 |                             logger.info(f"Keyword detected: '{keyword}'")
45 |                 
46 |                 yield event
47 |                 
48 |         return process_stream()
49 | 
50 | async def entrypoint(ctx: JobContext):
51 |     await ctx.connect()
52 | 
53 |     session = AgentSession()
54 | 
55 |     await session.start(
56 |         agent=SimpleAgent(),
57 |         room=ctx.room
58 |     )
59 | 
60 | if __name__ == "__main__":
61 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-stt/transcriber.py:
--------------------------------------------------------------------------------
 1 | # Transcribes user speech to text, and saves it to a file
 2 | 
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import deepgram
 8 | import datetime
 9 | 
10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
11 | 
12 | async def entrypoint(ctx: JobContext):
13 |     
14 |     await ctx.connect()
15 |     session = AgentSession()
16 |     
17 |     @session.on("user_input_transcribed")
18 |     def on_transcript(transcript):
19 |         if transcript.is_final:
20 |             timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
21 |             with open("user_speech_log.txt", "a") as f:
22 |                 f.write(f"[{timestamp}] {transcript.transcript}\n") 
23 |     
24 |     await session.start(
25 |         agent=Agent(
26 |             instructions="You are a helpful assistant that transcribes user speech to text.",
27 |             stt=deepgram.STT()
28 |         ),
29 |         room=ctx.room
30 |     )
31 | 
32 | if __name__ == "__main__":
33 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-tts/cartesia_tts.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import asyncio
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit import rtc
 7 | from livekit.agents.voice import Agent, AgentSession
 8 | from livekit.plugins import deepgram, openai, cartesia, silero
 9 | 
10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
11 | 
12 | class CartesiaAgent(Agent):
13 |     def __init__(self) -> None:
14 |         super().__init__(
15 |             instructions="""
16 |                 You are a helpful assistant communicating through voice. You're helping me test ... yourself ... since you're the AI agent. 
17 |                 Don't use any unpronouncable characters.
18 |             """,
19 |             stt=deepgram.STT(),
20 |             llm=openai.LLM(model="gpt-4o"),
21 |             tts=cartesia.TTS(
22 |                 sample_rate=44100,
23 |                 model="sonic",
24 |                 voice="87bc56aa-ab01-4baa-9071-77d497064686"
25 |             ),
26 |             vad=silero.VAD.load()
27 |         )
28 |     
29 |     async def on_enter(self):
30 |         await self.session.say(f"Hi there! Is there anything I can help you with?")
31 | 
32 | async def entrypoint(ctx: JobContext):
33 |     await ctx.connect()
34 | 
35 |     session = AgentSession()
36 | 
37 |     await session.start(
38 |         agent=CartesiaAgent(),
39 |         room=ctx.room
40 |     )
41 | 
42 | if __name__ == "__main__":
43 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-tts/elevenlabs_change_language.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from pathlib import Path
  3 | from dotenv import load_dotenv
  4 | from livekit.agents import JobContext, WorkerOptions, cli
  5 | from livekit.agents.llm import function_tool
  6 | from livekit.agents.voice import Agent, AgentSession
  7 | from livekit.plugins import deepgram, openai, elevenlabs, silero
  8 | 
  9 | logger = logging.getLogger("language-switcher")
 10 | logger.setLevel(logging.INFO)
 11 | 
 12 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 13 | 
 14 | class LanguageSwitcherAgent(Agent):
 15 |     def __init__(self) -> None:
 16 |         super().__init__(
 17 |             instructions="""
 18 |                 You are a helpful assistant communicating through voice. 
 19 |                 You can switch to a different language if asked.
 20 |                 Don't use any unpronouncable characters.
 21 |             """,
 22 |             stt=deepgram.STT(
 23 |                 model="nova-2-general",
 24 |                 language="en"
 25 |             ),
 26 |             llm=openai.LLM(model="gpt-4o"),
 27 |             tts=elevenlabs.TTS(
 28 |                 model="eleven_turbo_v2_5",
 29 |                 language="en"
 30 |             ),
 31 |             vad=silero.VAD.load()
 32 |         )
 33 |         self.current_language = "en"
 34 |         
 35 |         self.language_names = {
 36 |             "en": "English",
 37 |             "es": "Spanish",
 38 |             "fr": "French",
 39 |             "de": "German",
 40 |             "it": "Italian"
 41 |         }
 42 |         
 43 |         self.deepgram_language_codes = {
 44 |             "en": "en",
 45 |             "es": "es",
 46 |             "fr": "fr-CA",
 47 |             "de": "de",
 48 |             "it": "it"
 49 |         }
 50 |         
 51 |         self.greetings = {
 52 |             "en": "Hello! I'm now speaking in English. How can I help you today?",
 53 |             "es": "¡Hola! Ahora estoy hablando en español. ¿Cómo puedo ayudarte hoy?",
 54 |             "fr": "Bonjour! Je parle maintenant en français. Comment puis-je vous aider aujourd'hui?",
 55 |             "de": "Hallo! Ich spreche jetzt Deutsch. Wie kann ich Ihnen heute helfen?",
 56 |             "it": "Ciao! Ora sto parlando in italiano. Come posso aiutarti oggi?"
 57 |         }
 58 | 
 59 |     async def on_enter(self):
 60 |         await self.session.say(f"Hi there! I can speak in multiple languages including Spanish, French, German, and Italian. Just ask me to switch to any of these languages. How can I help you today?")
 61 | 
 62 |     async def _switch_language(self, language_code: str) -> None:
 63 |         """Helper method to switch the language"""
 64 |         if language_code == self.current_language:
 65 |             await self.session.say(f"I'm already speaking in {self.language_names[language_code]}.")
 66 |             return
 67 |         
 68 |         if self.tts is not None:
 69 |             self.tts.update_options(language=language_code)
 70 |         
 71 |         if self.stt is not None:
 72 |             deepgram_language = self.deepgram_language_codes.get(language_code, language_code)
 73 |             self.stt.update_options(language=deepgram_language)
 74 |         
 75 |         self.current_language = language_code
 76 |         
 77 |         await self.session.say(self.greetings[language_code])
 78 | 
 79 |     @function_tool
 80 |     async def switch_to_english(self):
 81 |         """Switch to speaking English"""
 82 |         await self._switch_language("en")
 83 | 
 84 |     @function_tool
 85 |     async def switch_to_spanish(self):
 86 |         """Switch to speaking Spanish"""
 87 |         await self._switch_language("es")
 88 |     
 89 |     @function_tool
 90 |     async def switch_to_french(self):
 91 |         """Switch to speaking French"""
 92 |         await self._switch_language("fr")
 93 |     
 94 |     @function_tool
 95 |     async def switch_to_german(self):
 96 |         """Switch to speaking German"""
 97 |         await self._switch_language("de")
 98 |     
 99 |     @function_tool
100 |     async def switch_to_italian(self):
101 |         """Switch to speaking Italian"""
102 |         await self._switch_language("it")
103 | 
104 | 
105 | async def entrypoint(ctx: JobContext):
106 |     await ctx.connect()
107 | 
108 |     session = AgentSession()
109 | 
110 |     await session.start(
111 |         agent=LanguageSwitcherAgent(),
112 |         room=ctx.room
113 |     )
114 | 
115 | if __name__ == "__main__":
116 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-tts/elevenlabs_tts.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from dotenv import load_dotenv
 3 | from livekit.agents import JobContext, WorkerOptions, cli
 4 | from livekit.agents.voice import Agent, AgentSession
 5 | from livekit.plugins import deepgram, openai, elevenlabs, silero
 6 | 
 7 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 8 | 
 9 | class ElevenLabsAgent(Agent):
10 |     def __init__(self) -> None:
11 |         super().__init__(
12 |             instructions="""
13 |                 You are a helpful assistant communicating through voice. You're helping me test ... yourself ... since you're the AI agent. 
14 |                 Don't use any unpronouncable characters.
15 |             """,
16 |             stt=deepgram.STT(),
17 |             llm=openai.LLM(model="gpt-4o"),
18 |             tts=elevenlabs.TTS(
19 |                 encoding="pcm_44100",
20 |                 model="eleven_multilingual_v2"
21 |             ),
22 |             vad=silero.VAD.load()
23 |         )
24 |     
25 |     async def on_enter(self):
26 |         await self.session.say(f"Hi there! Is there anything I can help you with?")
27 | 
28 | async def entrypoint(ctx: JobContext):
29 |     await ctx.connect()
30 | 
31 |     session = AgentSession()
32 | 
33 |     await session.start(
34 |         agent=ElevenLabsAgent(),
35 |         room=ctx.room
36 |     )
37 | 
38 | if __name__ == "__main__":
39 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-tts/only_greet.py:
--------------------------------------------------------------------------------
 1 | # Greets the user when they join the room, but doesn't respond to anything else.
 2 | # This agent only has TTS, so it can only speak, not listen or think.
 3 | 
 4 | from pathlib import Path
 5 | from dotenv import load_dotenv
 6 | from livekit.agents import JobContext, WorkerOptions, cli
 7 | from livekit.agents.voice import Agent, AgentSession
 8 | from livekit.plugins import openai
 9 | 
10 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
11 | 
12 | class GreeterAgent(Agent):
13 |     def __init__(self) -> None:
14 |         super().__init__(
15 |             instructions="You are a simple greeter that welcomes users when they join.",
16 |             tts=openai.TTS()
17 |         )
18 |     
19 |     async def on_enter(self):
20 |         self.session.say("Hi there! Is there anything I can help you with?")
21 | 
22 | async def entrypoint(ctx: JobContext):
23 |     await ctx.connect()
24 | 
25 |     session = AgentSession()
26 | 
27 |     await session.start(
28 |         agent=GreeterAgent(),
29 |         room=ctx.room
30 |     )
31 | 
32 | if __name__ == "__main__":
33 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-tts/openai_tts.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from dotenv import load_dotenv
 3 | from livekit.agents import JobContext, WorkerOptions, cli
 4 | from livekit.agents.voice import Agent, AgentSession
 5 | from livekit.plugins import deepgram, openai, silero
 6 | 
 7 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 8 | 
 9 | class SimpleAgent(Agent):
10 |     def __init__(self) -> None:
11 |         super().__init__(
12 |             instructions="""
13 |                 You are a helpful assistant communicating through voice. You're helping me test ... yourself ... since you're the AI agent. 
14 |                 Don't use any unpronouncable characters.
15 |             """,
16 |             stt=deepgram.STT(),
17 |             llm=openai.LLM(model="gpt-4o"),
18 |             tts=openai.TTS(),
19 |             vad=silero.VAD.load()
20 |         )
21 |     
22 |     async def on_enter(self):
23 |         await self.session.say(f"Hi there! Is there anything I can help you with?")
24 | 
25 | async def entrypoint(ctx: JobContext):
26 |     await ctx.connect()
27 | 
28 |     session = AgentSession()
29 | 
30 |     await session.start(
31 |         agent=SimpleAgent(),
32 |         room=ctx.room
33 |     )
34 | 
35 | if __name__ == "__main__":
36 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-tts/playai_tts.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from dotenv import load_dotenv
 3 | from livekit.agents import JobContext, WorkerOptions, cli
 4 | from livekit.agents.voice import Agent, AgentSession
 5 | from livekit.plugins import deepgram, openai, playai, silero
 6 | 
 7 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 8 | 
 9 | class PlayAIAgent(Agent):
10 |     def __init__(self) -> None:
11 |         super().__init__(
12 |             instructions="""
13 |                 You are a helpful assistant communicating through voice. You're helping me test ... yourself ... since you're the AI agent. 
14 |                 Don't use any unpronouncable characters.
15 |             """,
16 |             stt=deepgram.STT(),
17 |             llm=openai.LLM(model="gpt-4o"),
18 |             tts=playai.TTS(
19 |                 model="PlayDialog",
20 |                 sample_rate=44100,
21 |                 voice="s3://voice-cloning-zero-shot/9f1ee23a-9108-4538-90be-8e62efc195b6/charlessaad/manifest.json"
22 |             ),
23 |             vad=silero.VAD.load()
24 |         )
25 |     
26 |     async def on_enter(self):
27 |         await self.session.say(f"Hi there! Is there anything I can help you with?")
28 | 
29 | async def entrypoint(ctx: JobContext):
30 |     await ctx.connect()
31 | 
32 |     session = AgentSession()
33 | 
34 |     await session.start(
35 |         agent=PlayAIAgent(),
36 |         room=ctx.room
37 |     )
38 | 
39 | if __name__ == "__main__":
40 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-tts/rime_tts.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from dotenv import load_dotenv
 3 | from livekit.agents import JobContext, WorkerOptions, cli
 4 | from livekit.agents.voice import Agent, AgentSession
 5 | from livekit.plugins import deepgram, openai, rime, silero
 6 | 
 7 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 8 | 
 9 | class RimeAgent(Agent):
10 |     def __init__(self) -> None:
11 |         super().__init__(
12 |             instructions="""
13 |                 You are a helpful assistant communicating through voice. You're helping me test ... yourself ... since you're the AI agent. 
14 |                 Don't use any unpronouncable characters.
15 |             """,
16 |             stt=deepgram.STT(),
17 |             llm=openai.LLM(model="gpt-4o"),
18 |             tts=rime.TTS(
19 |                 sample_rate=44100, 
20 |                 model="mistv2", 
21 |                 speaker="abbie"
22 |             ),
23 |             vad=silero.VAD.load()
24 |         )
25 |     
26 |     async def on_enter(self):
27 |         await self.session.say(f"Hi there! Is there anything I can help you with?")
28 | 
29 | async def entrypoint(ctx: JobContext):
30 |     await ctx.connect()
31 | 
32 |     session = AgentSession()
33 | 
34 |     await session.start(
35 |         agent=RimeAgent(),
36 |         room=ctx.room
37 |     )
38 | 
39 | if __name__ == "__main__":
40 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-tts/short_replies_only.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import AsyncIterable
 3 | import logging
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli, ModelSettings
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import deepgram, openai, silero, rime
 8 | 
 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
10 | 
11 | logger = logging.getLogger("tts_node")
12 | logger.setLevel(logging.INFO)
13 | 
14 | class ShortRepliesOnlyAgent(Agent):
15 |     def __init__(self) -> None:
16 |         super().__init__(
17 |             instructions="""
18 |                 You are a helpful assistant communicating through voice.
19 |             """,
20 |             stt=deepgram.STT(),
21 |             llm=openai.LLM(model="gpt-4o"),
22 |             tts=rime.TTS(model="arcana"),
23 |             vad=silero.VAD.load()
24 |         )
25 |     
26 |     async def tts_node(self, text: AsyncIterable[str], model_settings: ModelSettings):
27 |         MAX_CHUNKS = 20
28 |         chunk_count = 0
29 | 
30 |         async def process_text():
31 |             nonlocal chunk_count
32 |             interrupted = False
33 |             async for chunk in text:
34 |                 chunk_count += 1
35 |                 if chunk_count > MAX_CHUNKS and not interrupted:
36 |                     logger.info(f"tts_node: Exceeded {MAX_CHUNKS} chunks. Interrupting.")
37 |                     self.session.interrupt()
38 |                     self.session.say("I'm sorry, that will take too long to say.")
39 |                     interrupted = True
40 |                     break
41 | 
42 |                 if not interrupted:
43 |                     yield chunk
44 | 
45 |         return Agent.default.tts_node(self, process_text(), model_settings)
46 | 
47 |     async def on_enter(self):
48 |         await self.session.say(f"Hi there! Is there anything I can help you with?")
49 | 
50 | async def entrypoint(ctx: JobContext):
51 |     await ctx.connect()
52 | 
53 |     session = AgentSession()
54 | 
55 |     await session.start(
56 |         agent=ShortRepliesOnlyAgent(),
57 |         room=ctx.room
58 |     )
59 | 
60 | if __name__ == "__main__":
61 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/pipeline-tts/tts_node.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import AsyncIterable
 3 | import logging
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli, ModelSettings
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import deepgram, openai, silero, rime
 8 | 
 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
10 | 
11 | logger = logging.getLogger("tts_node")
12 | logger.setLevel(logging.INFO)
13 | 
14 | class TtsNodeOverrideAgent(Agent):
15 |     def __init__(self) -> None:
16 |         super().__init__(
17 |             instructions="""
18 |                 You are a helpful assistant communicating through voice.
19 |                 Feel free to use "lol" in your responses when something is funny.
20 |             """,
21 |             stt=deepgram.STT(),
22 |             llm=openai.LLM(model="gpt-4o"),
23 |             tts=rime.TTS(model="arcana"),
24 |             vad=silero.VAD.load()
25 |         )
26 |     
27 |     async def tts_node(self, text: AsyncIterable[str], model_settings: ModelSettings):
28 |         """Modify the TTS output by replacing 'lol' with '<laughs>'."""
29 | 
30 |         async def process_text():
31 |             async for chunk in text:
32 |                 original_chunk = chunk
33 |                 modified_chunk = chunk.replace("lol", "<laugh>").replace("LOL", "<laugh>")
34 | 
35 |                 if original_chunk != modified_chunk:
36 |                     logger.info(f"TTS original: '{original_chunk}'")
37 |                     logger.info(f"TTS modified: '{modified_chunk}'")
38 | 
39 |                 yield modified_chunk
40 | 
41 |         return Agent.default.tts_node(self, process_text(), model_settings)
42 | 
43 |     async def on_enter(self):
44 |         await self.session.say(f"Hi there! Is there anything I can help you with? If you say something funny, I might respond with lol.")
45 | 
46 | async def entrypoint(ctx: JobContext):
47 |     await ctx.connect()
48 | 
49 |     session = AgentSession()
50 | 
51 |     await session.start(
52 |         agent=TtsNodeOverrideAgent(),
53 |         room=ctx.room
54 |     )
55 | 
56 | if __name__ == "__main__":
57 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/rag/README.md:
--------------------------------------------------------------------------------
 1 | # RAG-Enriched Voice Agent
 2 | 
 3 | This sample project demonstrates a Retrieval-Augmented Generation (RAG) enabled voice agent using LiveKit Agents 1.0. The example scrapes the LiveKit docs site, builds a local index, and then provides that data on demand to the assistant via a function tool.
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | - Python 3.9 or higher
 8 | - OpenAI API key
 9 | - Deepgram API key
10 | - LiveKit server
11 | 
12 | ## Installation
13 | 
14 | 1. Clone the repository
15 | 2. Create a virtual environment:
16 |    ```bash
17 |    python -m venv venv
18 |    source venv/bin/activate  # On Windows: venv\Scripts\activate
19 |    ```
20 | 3. Install dependencies:
21 |    ```bash
22 |    pip install -r requirements.txt
23 |    ```
24 | 4. Create a `.env` file in the project root with your API keys:
25 |    ```
26 |    OPENAI_API_KEY=your_openai_api_key
27 |    DEEPGRAM_API_KEY=your_deepgram_api_key
28 |    LIVEKIT_URL=your_livekit_url
29 |    LIVEKIT_API_KEY=your_livekit_api_key
30 |    LIVEKIT_API_SECRET=your_livekit_api_secret
31 |    ```
32 | 
33 | ## Project Structure
34 | 
35 | - `main.py`: Main agent implementation
36 | - `scrape_docs.py`: Scraper for the LiveKit docs site
37 | - `build_rag_data.py`: Script to build the RAG database from scraped docs
38 | - `rag_db_builder.py`: Database builder implementation
39 | - `rag_handler.py`: RAG processing logic
40 | - `data/`: Directory for vector database files
41 | 
42 | ## Usage
43 | 
44 | 1. Scrape the docs site:
45 |    ```bash
46 |    python scrape_docs.py
47 |    ```
48 | 
49 | 2. Build the RAG database:
50 |    ```bash
51 |    python build_rag_data.py
52 |    ```
53 | 
54 | 3. Download model files:
55 |    ```bash
56 |    python main.py download-files
57 |    ```
58 | 
59 | 4. Run the agent:
60 |    ```bash
61 |    python main.py console
62 |    ```
63 | 
64 | The agent will start and be ready to handle voice interactions. It will use the RAG system to provide contextually relevant answers to user questions.
65 | 


--------------------------------------------------------------------------------
/rag/build_rag_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import asyncio
 3 | import logging
 4 | from pathlib import Path
 5 | from dotenv import load_dotenv
 6 | from rag_db_builder import RAGBuilder
 7 | 
 8 | # Configure logging
 9 | logging.basicConfig(
10 |     level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
11 | )
12 | logger = logging.getLogger("build-rag-data")
13 | 
14 | # Load environment variables
15 | load_dotenv()
16 | 
17 | 
18 | async def main() -> None:
19 |     """
20 |     Build the RAG database from the scraped docs content.
21 | 
22 |     Usage:
23 |         1. Run scrape_docs.py to scrape the docs content
24 |         2. Run this script to build the RAG database
25 |         3. The database will be created in the 'data' directory
26 |     """
27 |     # Check if raw_data.txt exists
28 |     raw_data_path = Path(__file__).parent / "data/raw_data.txt"
29 |     if not raw_data_path.exists():
30 |         logger.error(
31 |             "raw_data.txt not found. Please run scrape_docs.py first:\n"
32 |             "$ python scrape_docs.py"
33 |         )
34 |         return
35 | 
36 |     # Create and build the RAG database
37 |     output_dir = Path(__file__).parent / "data"
38 |     output_dir.mkdir(exist_ok=True)
39 | 
40 |     logger.info("Building RAG database...")
41 |     await RAGBuilder.create_from_file(
42 |         file_path=raw_data_path,
43 |         index_path=output_dir,
44 |         data_path=output_dir / "paragraphs.pkl",
45 |         embeddings_dimension=1536,
46 |     )
47 |     logger.info("RAG database successfully built!")
48 |     logger.info(f"Index saved to: {output_dir}")
49 |     logger.info(f"Data saved to: {output_dir / 'paragraphs.pkl'}")
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     asyncio.run(main())
54 | 


--------------------------------------------------------------------------------
/rag/requirements.txt:
--------------------------------------------------------------------------------
1 | livekit-agents[openai,silero,turn-detector,deepgram]~=1.0
2 | livekit-plugins-noise-cancellation~=0.2
3 | python-dotenv
4 | annoy
5 | aiohttp>=3.8.0
6 | beautifulsoup4>=4.12.0
7 | lxml>=4.9.0
8 | 


--------------------------------------------------------------------------------
/rag/scrape_docs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import asyncio
  3 | import logging
  4 | import re
  5 | from pathlib import Path
  6 | from typing import List, Set
  7 | from urllib.parse import urljoin, urlparse
  8 | 
  9 | import aiohttp
 10 | from bs4 import BeautifulSoup
 11 | from dotenv import load_dotenv
 12 | 
 13 | # Configure logging
 14 | logging.basicConfig(
 15 |     level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 16 | )
 17 | logger = logging.getLogger("docs-scraper")
 18 | 
 19 | # Load environment variables
 20 | load_dotenv()
 21 | 
 22 | BASE_URL = "https://docs.livekit.io"
 23 | SITEMAP_URL = f"{BASE_URL}/sitemap.xml"
 24 | OUTPUT_FILE = Path(__file__).parent / "data/raw_data.txt"
 25 | EXCLUDED_PATHS = ["/reference"]  # Paths to exclude from scraping
 26 | 
 27 | class DocsScraper:
 28 |     def __init__(self):
 29 |         self.visited_urls: Set[str] = set()
 30 |         self.content: List[str] = []
 31 |         self.session = None
 32 | 
 33 |     async def init_session(self):
 34 |         """Initialize the aiohttp session."""
 35 |         self.session = aiohttp.ClientSession()
 36 | 
 37 |     async def close_session(self):
 38 |         """Close the aiohttp session."""
 39 |         if self.session:
 40 |             await self.session.close()
 41 | 
 42 |     def should_exclude_url(self, url: str) -> bool:
 43 |         """Check if a URL should be excluded from scraping."""
 44 |         parsed = urlparse(url)
 45 |         return any(parsed.path.startswith(path) for path in EXCLUDED_PATHS)
 46 | 
 47 |     async def fetch_sitemap(self) -> List[str]:
 48 |         """Fetch and parse the sitemap to get all URLs."""
 49 |         async with self.session.get(SITEMAP_URL) as response:
 50 |             if response.status != 200:
 51 |                 raise Exception(f"Failed to fetch sitemap: {response.status}")
 52 |             
 53 |             content = await response.text()
 54 |             soup = BeautifulSoup(content, "xml")
 55 |             urls = [loc.text for loc in soup.find_all("loc")]
 56 |             
 57 |             # Filter out excluded URLs and ensure they're from docs.livekit.io
 58 |             return [
 59 |                 url for url in urls 
 60 |                 if url.startswith(BASE_URL) and not self.should_exclude_url(url)
 61 |             ]
 62 | 
 63 |     async def fetch_page(self, url: str) -> str:
 64 |         """Fetch a single page and extract its content."""
 65 |         try:
 66 |             async with self.session.get(url) as response:
 67 |                 if response.status != 200:
 68 |                     logger.warning(f"Failed to fetch {url}: {response.status}")
 69 |                     return ""
 70 |                 
 71 |                 content = await response.text()
 72 |                 soup = BeautifulSoup(content, "html.parser")
 73 |                 
 74 |                 # Extract the main content
 75 |                 main_content = soup.find("main")
 76 |                 if not main_content:
 77 |                     return ""
 78 |                 
 79 |                 # Remove unwanted elements
 80 |                 for element in main_content.find_all(["nav", "footer", "header", "script", "style"]):
 81 |                     element.decompose()
 82 |                 
 83 |                 # Clean up the text
 84 |                 text = main_content.get_text(separator="\n", strip=True)
 85 |                 text = re.sub(r"\n\s*\n", "\n\n", text)  # Remove excessive newlines
 86 |                 return text.strip()
 87 |                 
 88 |         except Exception as e:
 89 |             logger.error(f"Error fetching {url}: {e}")
 90 |             return ""
 91 | 
 92 |     async def scrape(self):
 93 |         """Main scraping function."""
 94 |         await self.init_session()
 95 |         try:
 96 |             # Get all URLs from sitemap
 97 |             urls = await self.fetch_sitemap()
 98 |             logger.info(f"Found {len(urls)} URLs to scrape")
 99 |             
100 |             # Process each URL
101 |             for url in urls:
102 |                 if url in self.visited_urls:
103 |                     continue
104 |                     
105 |                 self.visited_urls.add(url)
106 |                 logger.info(f"Scraping {url}")
107 |                 
108 |                 content = await self.fetch_page(url)
109 |                 if content:
110 |                     self.content.append(f"Content from {url}:\n\n{content}\n\n")
111 |                     
112 |         finally:
113 |             await self.close_session()
114 | 
115 |     def save_content(self):
116 |         """Save the scraped content to a file."""
117 |         with open(OUTPUT_FILE, "w") as f:
118 |             f.write("\n".join(self.content))
119 |         logger.info(f"Saved content to {OUTPUT_FILE}")
120 | 
121 | async def main():
122 |     """Main function to run the scraper."""
123 |     scraper = DocsScraper()
124 |     await scraper.scrape()
125 |     scraper.save_content()
126 | 
127 | if __name__ == "__main__":
128 |     asyncio.run(main()) 


--------------------------------------------------------------------------------
/realtime/openai-realtime.py:
--------------------------------------------------------------------------------
 1 | import librosa
 2 | import numpy as np
 3 | from typing import AsyncIterable
 4 | from dotenv import load_dotenv
 5 | from pathlib import Path
 6 | from livekit import agents, rtc
 7 | from livekit.agents import utils
 8 | from livekit.agents.voice import AgentSession, Agent, room_io, ModelSettings
 9 | from livekit.plugins import (
10 |     openai,
11 |     silero
12 | )
13 | 
14 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
15 | 
16 | class Assistant(Agent):
17 |     def __init__(self, *, pitch_shift_semitones: float = -4.0) -> None:
18 |         super().__init__(instructions="You are a helpful voice AI assistant.")
19 |         self.pitch_shift_semitones = pitch_shift_semitones
20 | 
21 |     async def realtime_audio_output_node(
22 |         self, audio: AsyncIterable[rtc.AudioFrame], model_settings: ModelSettings
23 |     ) -> AsyncIterable[rtc.AudioFrame]:
24 |         return self._process_audio_stream(
25 |             Agent.default.realtime_audio_output_node(self, audio, model_settings)
26 |         )
27 | 
28 |     async def _process_audio_stream(
29 |         self, audio: AsyncIterable[rtc.AudioFrame]
30 |     ) -> AsyncIterable[rtc.AudioFrame]:
31 |         stream: utils.audio.AudioByteStream | None = None
32 |         async for frame in audio:
33 |             if stream is None:
34 |                 stream = utils.audio.AudioByteStream(
35 |                     sample_rate=frame.sample_rate,
36 |                     num_channels=frame.num_channels,
37 |                     samples_per_channel=frame.sample_rate // 4,
38 |                 )
39 |             for f in stream.push(frame.data):
40 |                 yield self._process_audio(f)
41 | 
42 |         for f in stream.flush():
43 |             yield self._process_audio(f)
44 | 
45 |     def _process_audio(self, frame: rtc.AudioFrame) -> rtc.AudioFrame:
46 |         audio_data = np.frombuffer(frame.data, dtype=np.int16)
47 | 
48 |         shifted = librosa.effects.pitch_shift(
49 |             audio_data.astype(np.float32) / np.iinfo(np.int16).max,
50 |             sr=frame.sample_rate,
51 |             n_steps=self.pitch_shift_semitones,
52 |         )
53 |         shifted = (shifted * np.iinfo(np.int16).max).astype(np.int16)
54 |         return rtc.AudioFrame(
55 |             data=shifted.tobytes(),
56 |             sample_rate=frame.sample_rate,
57 |             num_channels=frame.num_channels,
58 |             samples_per_channel=shifted.shape[-1],
59 |         )
60 | 
61 | async def entrypoint(ctx: agents.JobContext):
62 |     await ctx.connect()
63 | 
64 |     session = AgentSession(
65 |         llm=openai.realtime.RealtimeModel(),
66 |         vad=silero.VAD.load()
67 |     )
68 | 
69 |     await session.start(
70 |         room=ctx.room,
71 |         agent=Assistant()
72 |     )
73 | 
74 |     await session.generate_reply()
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     agents.cli.run_app(agents.WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | livekit-agents[openai,silero,turn-detector,deepgram,google,anthropic,cartesia,elevenlabs,rime,playai,groq,tavus]~=1.0
 2 | livekit-plugins-noise-cancellation~=0.0
 3 | python-dotenv
 4 | requests>=2.32.0
 5 | annoy
 6 | pydantic
 7 | flask
 8 | pandas
 9 | websockets>=11.0.3
10 | rich
11 | mcp
12 | librosa


--------------------------------------------------------------------------------
/telephony/answer_call.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from dotenv import load_dotenv
 4 | from livekit.agents import JobContext, WorkerOptions, cli
 5 | from livekit.agents.voice import Agent, AgentSession
 6 | from livekit.plugins import openai, deepgram, silero
 7 | 
 8 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 9 | 
10 | class SimpleAgent(Agent):
11 |     def __init__(self) -> None:
12 |         super().__init__(
13 |             instructions="""
14 |                 You are a helpful agent.
15 |             """,
16 |             stt=deepgram.STT(),
17 |             llm=openai.LLM(model="gpt-4o"),
18 |             tts=openai.TTS(),
19 |             vad=silero.VAD.load()
20 |         )
21 |         
22 |     async def on_enter(self):
23 |         # Generate initial greeting
24 |         self.session.generate_reply()
25 | 
26 | async def entrypoint(ctx: JobContext):
27 |     await ctx.connect()
28 | 
29 |     session = AgentSession()
30 |     agent = SimpleAgent()
31 | 
32 |     await session.start(
33 |         agent=agent,
34 |         room=ctx.room
35 |     )
36 | 
37 | if __name__ == "__main__":
38 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/telephony/make_call/calling_agent.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import openai, silero, deepgram
 8 | 
 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent.parent / '.env')
10 | 
11 | logger = logging.getLogger("calling-agent")
12 | logger.setLevel(logging.INFO)
13 | 
14 | class SimpleAgent(Agent):
15 |     def __init__(self) -> None:
16 |         super().__init__(
17 |             instructions="""
18 |                 You are calling someone on the phone. Your goal is to know if they prefer 
19 |                 chocolate or vanilla ice cream. That's the only question you should ask, and 
20 |                 you should get right to the point. Say something like "Hello, I'm calling to 
21 |                 ask you a question about ice cream. Do you prefer chocolate or vanilla?"
22 |             """,
23 |             stt=deepgram.STT(),
24 |             llm=openai.LLM(model="gpt-4o"),
25 |             tts=openai.TTS(),
26 |             vad=silero.VAD.load()
27 |         )
28 |     
29 |     async def on_enter(self):
30 |         self.session.generate_reply()
31 | 
32 | async def entrypoint(ctx: JobContext):
33 |     await ctx.connect()
34 | 
35 |     session = AgentSession()
36 | 
37 |     await session.start(
38 |         agent=SimpleAgent(),
39 |         room=ctx.room
40 |     )
41 | 
42 | if __name__ == "__main__":
43 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/telephony/make_call/make_call.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import logging
 4 | from pathlib import Path
 5 | from dotenv import load_dotenv
 6 | from livekit import api
 7 | 
 8 | # Load environment variables
 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent.parent / '.env')
10 | 
11 | # Set up logging
12 | logger = logging.getLogger("make-call")
13 | logger.setLevel(logging.INFO)
14 | 
15 | # Configuration
16 | room_name = "my-room"
17 | agent_name = "test-agent"
18 | outbound_trunk_id = os.getenv("SIP_OUTBOUND_TRUNK_ID")
19 | 
20 | async def make_call(phone_number):
21 |     """Create a dispatch and add a SIP participant to call the phone number"""
22 |     lkapi = api.LiveKitAPI()
23 |     
24 |     # Create agent dispatch
25 |     logger.info(f"Creating dispatch for agent {agent_name} in room {room_name}")
26 |     dispatch = await lkapi.agent_dispatch.create_dispatch(
27 |         api.CreateAgentDispatchRequest(
28 |             agent_name=agent_name, room=room_name, metadata=phone_number
29 |         )
30 |     )
31 |     logger.info(f"Created dispatch: {dispatch}")
32 |     
33 |     # Create SIP participant to make the call
34 |     if not outbound_trunk_id or not outbound_trunk_id.startswith("ST_"):
35 |         logger.error("SIP_OUTBOUND_TRUNK_ID is not set or invalid")
36 |         return
37 |     
38 |     logger.info(f"Dialing {phone_number} to room {room_name}")
39 |     
40 |     try:
41 |         # Create SIP participant to initiate the call
42 |         sip_participant = await lkapi.sip.create_sip_participant(
43 |             api.CreateSIPParticipantRequest(
44 |                 room_name=room_name,
45 |                 sip_trunk_id=outbound_trunk_id,
46 |                 sip_call_to=phone_number,
47 |                 participant_identity="phone_user",
48 |             )
49 |         )
50 |         logger.info(f"Created SIP participant: {sip_participant}")
51 |     except Exception as e:
52 |         logger.error(f"Error creating SIP participant: {e}")
53 |     
54 |     # Close API connection
55 |     await lkapi.aclose()
56 | 
57 | async def main():
58 |     # Replace with the actual phone number including country code
59 |     phone_number = "+13432024203"
60 |     await make_call(phone_number)
61 | 
62 | if __name__ == "__main__":
63 |     asyncio.run(main())
64 | 


--------------------------------------------------------------------------------
/telephony/survey_caller/make_survey_calls.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import csv
  3 | import json
  4 | import logging
  5 | import os
  6 | from pathlib import Path
  7 | from dotenv import load_dotenv
  8 | from livekit import api
  9 | 
 10 | load_dotenv(dotenv_path=Path(__file__).parent.parent.parent / '.env')
 11 | 
 12 | logger = logging.getLogger("make-survey-calls")
 13 | logger.setLevel(logging.INFO)
 14 | 
 15 | # Configuration
 16 | room_name_prefix = "survey-call-"
 17 | agent_name = "survey-agent"
 18 | outbound_trunk_id = os.getenv("SIP_OUTBOUND_TRUNK_ID")
 19 | csv_file_path = Path(__file__).parent / "survey_data.csv"
 20 | 
 21 | async def make_survey_call(phone_number, question, row_index):
 22 |     """Create a dispatch and add a SIP participant to call the phone number with survey question"""
 23 |     # Create a unique room name for each call using the prefix and row index
 24 |     room_name = f"{room_name_prefix}{row_index}"
 25 |     
 26 |     # Create metadata as JSON containing all relevant data
 27 |     metadata = json.dumps({
 28 |         "phone_number": phone_number,
 29 |         "question": question,
 30 |         "row_index": row_index
 31 |     })
 32 |     
 33 |     lkapi = api.LiveKitAPI()
 34 |     
 35 |     logger.info(f"Creating dispatch for agent {agent_name} in room {room_name}")
 36 | 
 37 |     dispatch = await lkapi.agent_dispatch.create_dispatch(
 38 |         api.CreateAgentDispatchRequest(
 39 |             agent_name=agent_name, 
 40 |             room=room_name, 
 41 |             metadata=metadata
 42 |         )
 43 |     )
 44 |     logger.info(f"Created dispatch: {dispatch}")
 45 |     logger.info(f"Dialing {phone_number} to room {room_name}")
 46 |     
 47 |     sip_participant = await lkapi.sip.create_sip_participant(
 48 |         api.CreateSIPParticipantRequest(
 49 |             room_name=room_name,
 50 |             sip_trunk_id=outbound_trunk_id,
 51 |             sip_call_to=phone_number,
 52 |             participant_identity="phone_user",
 53 |         )
 54 |     )
 55 |     logger.info(f"Created SIP participant: {sip_participant}")
 56 | 
 57 |     await lkapi.aclose()
 58 |     return True
 59 | 
 60 | async def read_csv_data():
 61 |     """Read the CSV file and return the data"""
 62 |     data = []
 63 |     with open(csv_file_path, 'r', newline='') as f:
 64 |         reader = csv.reader(f)
 65 |         headers = next(reader)  # Skip headers
 66 |         for i, row in enumerate(reader):
 67 |             if len(row) >= 2:
 68 |                 data.append({
 69 |                     'row_index': i + 1,
 70 |                     'phone_number': row[0],
 71 |                     'question': row[1],
 72 |                     'answer': row[2] if len(row) > 2 else '',
 73 |                     'status': row[3] if len(row) > 3 else ''
 74 |                 })
 75 |     
 76 |     return data
 77 | 
 78 | async def process_survey_calls():
 79 |     """Process all the survey calls in the CSV"""
 80 |     # Read the CSV data
 81 |     data = await read_csv_data()
 82 |     
 83 |     logger.info(f"Found {len(data)} survey calls to make")
 84 |     
 85 |     for item in data:
 86 |         if item['answer'] or (item['status'] and item['status'] != ''):
 87 |             logger.info(f"Skipping row {item['row_index']} as it already has an answer or status")
 88 |             continue
 89 |         
 90 |         logger.info(f"Processing survey call to {item['phone_number']} with question: {item['question']}")
 91 |         
 92 |         await make_survey_call(item['phone_number'], item['question'], item['row_index'])
 93 | 
 94 | async def main():
 95 |     logger.info("Starting survey calls process")
 96 |     if not outbound_trunk_id:
 97 |         logger.error("SIP_OUTBOUND_TRUNK_ID is not set. Please add it to your .env file.")
 98 |         return
 99 |     await process_survey_calls()
100 |     logger.info("Survey calls process completed")
101 | 
102 | if __name__ == "__main__":
103 |     asyncio.run(main())
104 | 


--------------------------------------------------------------------------------
/telephony/survey_caller/survey_calling_agent.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import asyncio
 4 | import pandas as pd
 5 | import json
 6 | from pathlib import Path
 7 | from dotenv import load_dotenv
 8 | from livekit.agents import JobContext, WorkerOptions, cli
 9 | from livekit.agents.voice import Agent, AgentSession, RunContext
10 | from livekit.agents.llm import function_tool
11 | from livekit.plugins import openai, silero, deepgram
12 | from livekit.api import DeleteRoomRequest
13 | 
14 | load_dotenv(dotenv_path=Path(__file__).parent.parent.parent / '.env')
15 | 
16 | logger = logging.getLogger("calling-agent")
17 | logger.setLevel(logging.INFO)
18 | 
19 | csv_file_path = Path(__file__).parent / "survey_data.csv"
20 | 
21 | class SurveyAgent(Agent):
22 |     def __init__(self, question="Do you prefer chocolate or vanilla ice cream?", context=None, job_context=None) -> None:
23 |         self.survey_question = question
24 |         self.context = context or {}
25 |         self.job_context = job_context
26 |         self.survey_answer = None
27 |         self.phone_number = self.context.get("phone_number", "unknown")
28 |         # Adjust for 0-based indexing since row_index from metadata is 1-based
29 |         self.row_index = self.context.get("row_index", 1)  # Default to 1 if not provided
30 | 
31 |         instructions = f"""
32 |             You are conducting a brief phone survey. Your goal is to ask the following question:
33 |             '{self.survey_question}'
34 |             
35 |             Be polite and professional. Introduce yourself as a survey caller named "Sam", ask the question,
36 |             and thank them for their time. Keep the call brief and focused on getting their answer.
37 |             Don't ask any follow-up questions.
38 |             
39 |             Note: When you have an answer to the question, use the `record_survey_answer` function
40 |             to persist what the user said.
41 |         """
42 |         
43 |         super().__init__(
44 |             instructions=instructions,
45 |             stt=deepgram.STT(),
46 |             llm=openai.LLM(model="gpt-4o"),
47 |             tts=openai.TTS(),
48 |             vad=silero.VAD.load()
49 |         )
50 | 
51 |     @function_tool
52 |     async def record_survey_answer(self, context: RunContext, answer: str):
53 |         logger.info(f"Survey answer recorded: {answer}")
54 |         logger.info(f"Row index: {self.row_index}")
55 |         self.survey_answer = answer
56 |         
57 |         df = pd.read_csv(csv_file_path, dtype=str)
58 |         logger.info(f"CSV contents before update: {df.head()}")
59 |         
60 |         df.loc[self.row_index - 1, 'Answer'] = answer
61 |         df.loc[self.row_index - 1, 'Status'] = 'Completed'
62 |         logger.info(f"CSV contents after update: {df.head()}")
63 |         df.to_csv(csv_file_path, index=False)
64 |         
65 |         await asyncio.sleep(5)
66 |         await self.job_context.api.room.delete_room(DeleteRoomRequest(
67 |             room=self.job_context.room.name
68 |         ))
69 | 
70 |         return None, f"[Call ended]"
71 | 
72 | async def entrypoint(ctx: JobContext):
73 |     await ctx.connect()
74 |     
75 |     metadata_json = ctx.job.metadata
76 |     logger.info(f"Received metadata: {metadata_json}")
77 |     
78 |     metadata = json.loads(metadata_json)
79 |     phone_number = metadata.get("phone_number", "unknown")
80 |     row_index = metadata.get("row_index", 1)
81 |     question = metadata.get("question", "Do you prefer chocolate or vanilla ice cream?")
82 |     
83 |     logger.info(f"Parsed metadata - phone_number: {phone_number}, row_index: {row_index}, question: {question}")
84 |     
85 |     context = {
86 |         "phone_number": phone_number,
87 |         "row_index": row_index
88 |     }
89 |     
90 |     session = AgentSession()
91 |     agent = SurveyAgent(question=question, context=context, job_context=ctx)
92 |     
93 |     await session.start(
94 |         agent=agent,
95 |         room=ctx.room
96 |     )
97 | 
98 | if __name__ == "__main__":
99 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, agent_name="survey-agent"))


--------------------------------------------------------------------------------
/telephony/survey_caller/survey_data.csv:
--------------------------------------------------------------------------------
1 | Phone Number,Question,Answer,Status
2 | +13432024203,Do you prefer chocolate or vanilla ice cream?,,
3 | 


--------------------------------------------------------------------------------
/telephony/warm_handoff.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import os
  3 | import uuid
  4 | from pathlib import Path
  5 | from dotenv import load_dotenv
  6 | from livekit.agents import JobContext, WorkerOptions, cli
  7 | from livekit import rtc
  8 | from livekit import api
  9 | from livekit.agents.llm import function_tool
 10 | from livekit.agents.voice import Agent, AgentSession, RunContext
 11 | from livekit.plugins import deepgram, openai, silero, elevenlabs
 12 | 
 13 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
 14 | 
 15 | class WarmHandoffAgent(Agent):
 16 |     def __init__(self, job_context=None) -> None:
 17 |         self.job_context = job_context
 18 |         super().__init__(
 19 |             instructions="""
 20 |                 You are a helpful assistant communicating through voice. You're helping me test ... yourself ... since you're the AI agent. 
 21 |                 Don't use any unpronouncable characters.
 22 |             """,
 23 |             stt=deepgram.STT(),
 24 |             llm=openai.LLM(model="gpt-4o"),
 25 |             tts=elevenlabs.TTS(
 26 |                 encoding="pcm_44100",
 27 |                 model="eleven_multilingual_v2"
 28 |             ),
 29 |             vad=silero.VAD.load()
 30 |         )
 31 | 
 32 |     @function_tool
 33 |     async def transfer_call(self, context: RunContext, phone_number: str):
 34 |         """
 35 |         Transfer the current call to a human agent at the specified phone number.
 36 |         
 37 |         Args:
 38 |             context: The call context
 39 |             phone_number: The phone number to transfer the call to
 40 |         """
 41 |         if not self.job_context:
 42 |             await self.session.say("I'm sorry, I can't transfer the call at this time.")
 43 |             return None, "Failed to transfer call: No job context available"
 44 |             
 45 |         # Get room name from environment variable
 46 |         room_name = os.environ.get('LIVEKIT_ROOM_NAME', self.job_context.room.name)
 47 |         
 48 |         # Generate a unique identity for the SIP participant
 49 |         identity = f"transfer_{uuid.uuid4().hex[:8]}"
 50 |         
 51 |         # Create LiveKit API client
 52 |         livekit_url = os.environ.get('LIVEKIT_URL')
 53 |         livekit_api_key = os.environ.get('LIVEKIT_API_KEY')
 54 |         livekit_api_secret = os.environ.get('LIVEKIT_API_SECRET')
 55 |         sip_trunk_id = os.environ.get('SIP_TRUNK_ID')
 56 |         
 57 |         try:
 58 |             print(f"Transferring call to {phone_number}")
 59 |             
 60 |             # Using the API from the job context if available
 61 |             if self.job_context and hasattr(self.job_context, 'api'):
 62 |                 response = await self.job_context.api.sip.create_sip_participant(
 63 |                     api.CreateSIPParticipantRequest(
 64 |                         sip_trunk_id=sip_trunk_id,
 65 |                         sip_call_to=phone_number,
 66 |                         room_name=room_name,
 67 |                         participant_identity=identity,
 68 |                         participant_name="Human Agent",
 69 |                         krisp_enabled=True
 70 |                     )
 71 |                 )
 72 |             else:
 73 |                 # Fallback to creating our own API client
 74 |                 livekit_api = api.LiveKitAPI(
 75 |                     url=livekit_url,
 76 |                     api_key=livekit_api_key,
 77 |                     api_secret=livekit_api_secret
 78 |                 )
 79 |                 
 80 |                 response = await livekit_api.sip.create_sip_participant(
 81 |                     api.CreateSIPParticipantRequest(
 82 |                         sip_trunk_id=sip_trunk_id,
 83 |                         sip_call_to=phone_number,
 84 |                         room_name=room_name,
 85 |                         participant_identity=identity,
 86 |                         participant_name="Human Agent",
 87 |                         krisp_enabled=True
 88 |                     )
 89 |                 )
 90 |                 
 91 |                 await livekit_api.aclose()
 92 |             
 93 |             await self.session.say(f"I'm transferring you to a human agent now. Please hold while we connect you.")
 94 |             
 95 |             return None, f"I've transferred you to a human agent at {phone_number}. Please hold while we connect you."
 96 |             
 97 |         except Exception as e:
 98 |             print(f"Error transferring call: {e}")
 99 |             await self.session.say(f"I'm sorry, I couldn't transfer the call at this time.")
100 |             return None, f"Failed to transfer call: {e}"
101 |             
102 |     async def on_enter(self):
103 |         # Generate initial greeting
104 |         self.session.generate_reply()
105 | 
106 | async def entrypoint(ctx: JobContext):
107 |     await ctx.connect()
108 | 
109 |     session = AgentSession()
110 |     agent = WarmHandoffAgent(job_context=ctx)
111 | 
112 |     await session.start(
113 |         agent=agent,
114 |         room=ctx.room
115 |     )
116 | 
117 |     def on_participant_connected_handler(participant: rtc.RemoteParticipant):
118 |         asyncio.create_task(async_on_participant_connected(participant))
119 |         
120 |     async def async_on_participant_connected(participant: rtc.RemoteParticipant):
121 |         await agent.session.say(f"Hi there! Is there anything I can help you with?")
122 | 
123 |     # Handle existing participants
124 |     for participant in ctx.room.remote_participants.values():
125 |         asyncio.create_task(async_on_participant_connected(participant))
126 |     
127 |     # Set up listener for new participants
128 |     ctx.room.on("participant_connected", on_participant_connected_handler)
129 | 
130 | if __name__ == "__main__":
131 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/tool_calling/call_function_tool.py:
--------------------------------------------------------------------------------
 1 | ## This is a basic example of how to use function calling.
 2 | ## To test the function, you can ask the agent to print to the console!
 3 | 
 4 | import logging
 5 | from pathlib import Path
 6 | from dotenv import load_dotenv
 7 | from livekit.agents import JobContext, WorkerOptions, cli
 8 | from livekit.agents.llm import function_tool
 9 | from livekit.agents.voice import Agent, AgentSession, RunContext
10 | from livekit.plugins import deepgram, openai, silero
11 | 
12 | logger = logging.getLogger("function-calling")
13 | logger.setLevel(logging.INFO)
14 | 
15 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
16 | 
17 | class FunctionAgent(Agent):
18 |     def __init__(self) -> None:
19 |         super().__init__(
20 |             instructions="""
21 |                 You are a helpful assistant communicating through voice. Don't use any unpronouncable characters.
22 |                 Note: If asked to print to the console, use the `print_to_console` function.
23 |             """,
24 |             stt=deepgram.STT(),
25 |             llm=openai.LLM(model="gpt-4o"),
26 |             tts=openai.TTS(),
27 |             vad=silero.VAD.load()
28 |         )
29 | 
30 |     @function_tool
31 |     async def print_to_console(self, context: RunContext):
32 |         print("Console Print Success!")
33 |         return None, "I've printed to the console."
34 | 
35 |     async def on_enter(self):
36 |         self.session.generate_reply()
37 | 
38 | async def entrypoint(ctx: JobContext):
39 |     await ctx.connect()
40 | 
41 |     session = AgentSession()
42 | 
43 |     await session.start(
44 |         agent=FunctionAgent(),
45 |         room=ctx.room
46 |     )
47 | 
48 | if __name__ == "__main__":
49 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/tool_calling/update_tools.py:
--------------------------------------------------------------------------------
 1 | ## This is a basic example of how to use function calling.
 2 | ## To test the function, you can ask the agent to print to the console!
 3 | 
 4 | import logging
 5 | import random
 6 | from pathlib import Path
 7 | from dotenv import load_dotenv
 8 | from livekit.agents import JobContext, WorkerOptions, cli
 9 | from livekit.agents.llm import function_tool
10 | from livekit.agents.voice import Agent, AgentSession, RunContext
11 | from livekit.plugins import deepgram, openai, silero
12 | 
13 | logger = logging.getLogger("function-calling")
14 | logger.setLevel(logging.INFO)
15 | 
16 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
17 | 
18 | class AddFunctionAgent(Agent):
19 |     def __init__(self) -> None:
20 |         super().__init__(
21 |             instructions="""
22 |                 You are a helpful assistant communicating through voice. Don't use any unpronouncable characters.
23 |                 Note: If asked to print to the console, use the `print_to_console` function.
24 |             """,
25 |             stt=deepgram.STT(),
26 |             llm=openai.LLM(model="gpt-4o"),
27 |             tts=openai.TTS(),
28 |             vad=silero.VAD.load()
29 |         )
30 | 
31 |     @function_tool
32 |     async def print_to_console(self, context: RunContext):
33 |         print("Console Print Success!")
34 |         return None, "I've printed to the console."
35 | 
36 |     async def on_enter(self):
37 |         self.session.generate_reply()
38 | 
39 | async def entrypoint(ctx: JobContext):
40 |     await ctx.connect()
41 | 
42 |     session = AgentSession()
43 |     agent=AddFunctionAgent()
44 | 
45 |     async def _random_number() -> int:
46 |         num = random.randint(0, 100)
47 |         logger.info(f"random_number called: {num}")
48 |         return num
49 | 
50 |     await agent.update_tools(
51 |         agent.tools
52 |         + [function_tool(_random_number, name="random_number", description="Get a random number")]
53 |     )
54 | 
55 |     await session.start(
56 |         agent=agent,
57 |         room=ctx.room
58 |     )
59 | 
60 | if __name__ == "__main__":
61 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/translators/pipeline_translator.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from pathlib import Path
 4 | from dotenv import load_dotenv
 5 | from livekit.agents import JobContext, WorkerOptions, cli
 6 | from livekit.agents.voice import Agent, AgentSession
 7 | from livekit.plugins import openai, silero, deepgram, elevenlabs
 8 | 
 9 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
10 | 
11 | logger = logging.getLogger("listen-and-respond")
12 | logger.setLevel(logging.INFO)
13 | 
14 | class SimpleAgent(Agent):
15 |     def __init__(self) -> None:
16 |         super().__init__(
17 |             instructions="""
18 |                 You are a translator. You translate the user's speech from English to French.
19 |                 Every message you receive, translate it directly into French.
20 |                 Do not respond with anything else but the translation.
21 |             """,
22 |             stt=deepgram.STT(),
23 |             llm=openai.LLM(model="gpt-4o"),
24 |             tts=elevenlabs.TTS(
25 |                 model="eleven_multilingual_v2"
26 |             ),
27 |             vad=silero.VAD.load()
28 |         )
29 |     
30 |     async def on_enter(self):
31 |         self.session.generate_reply()
32 | 
33 | async def entrypoint(ctx: JobContext):
34 |     await ctx.connect()
35 | 
36 |     session = AgentSession()
37 | 
38 |     await session.start(
39 |         agent=SimpleAgent(),
40 |         room=ctx.room
41 |     )
42 | 
43 | if __name__ == "__main__":
44 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/translators/tts_translator.py:
--------------------------------------------------------------------------------
 1 | # Transcribes user speech to text, and saves it to a file
 2 | from pathlib import Path
 3 | from dotenv import load_dotenv
 4 | from livekit.agents import JobContext, WorkerOptions, cli
 5 | from livekit.agents.voice import Agent, AgentSession
 6 | from livekit.plugins import rime, elevenlabs, silero
 7 | import sys
 8 | 
 9 | sys.path.append(str(Path(__file__).parent.parent))
10 | from launch_demos.livekit_plugins_gladia import stt
11 | 
12 | load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
13 | 
14 | async def entrypoint(ctx: JobContext):
15 |     
16 |     await ctx.connect()
17 |     session = AgentSession()
18 |     
19 |     # Process transcription events - let the agent say what it receives
20 |     @session.on("user_input_transcribed")
21 |     def on_transcript(event):
22 |         # Log the full event object to see all available metadata
23 |         print(f"Transcript event: {event}")
24 |         if event.is_final:
25 |             print(f"Final transcript: {event.transcript}")
26 |             session.say(event.transcript)
27 |     
28 |     await session.start(
29 |         agent=Agent(
30 |             instructions="You are a helpful assistant that speaks what the user says in English.",
31 |             stt=stt.STT(
32 |                 languages=["fr", "en"],  # Support French and English input
33 |                 code_switching=True,
34 |                 sample_rate=16000,
35 |                 bit_depth=16,
36 |                 channels=1,
37 |                 encoding="wav/pcm",
38 |                 translation_enabled=True,
39 |                 translation_target_languages=["en"],  # Only translate to English
40 |                 translation_model="base",
41 |                 translation_match_original_utterances=True
42 |             ),
43 |             tts=elevenlabs.TTS(
44 |                 model="eleven_multilingual_v2"
45 |             ),
46 |             allow_interruptions=False
47 |         ),
48 |         room=ctx.room
49 |     )
50 | 
51 | if __name__ == "__main__":
52 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))


--------------------------------------------------------------------------------
/vision/agent.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging
  3 | 
  4 | from dotenv import load_dotenv
  5 | from livekit import rtc
  6 | from livekit.agents import (
  7 |     AutoSubscribe,
  8 |     JobContext,
  9 |     WorkerOptions,
 10 |     cli,
 11 |     get_job_context,
 12 | )
 13 | from livekit.agents.llm import ImageContent, ChatContext, ChatMessage
 14 | from livekit.agents.voice import AgentSession, Agent, room_io
 15 | from livekit.plugins import (
 16 |     cartesia,
 17 |     openai,
 18 |     deepgram,
 19 |     noise_cancellation,
 20 |     silero,
 21 | )
 22 | from pathlib import Path
 23 | 
 24 | load_dotenv(dotenv_path=Path(__file__).parent.parent / ".env")
 25 | logger = logging.getLogger("vision-agent")
 26 | 
 27 | 
 28 | class Assistant(Agent):
 29 |     def __init__(self, room: rtc.Room) -> None:
 30 |         self._latest_frame = None
 31 |         self._room = room
 32 |         self._tasks = []
 33 |         self._video_stream = None
 34 | 
 35 |         super().__init__(
 36 |             instructions=(
 37 |                 "You are a voice assistant created by LiveKit that can both see and hear. "
 38 |                 "You should use short and concise responses, avoiding unpronounceable punctuation. "
 39 |                 "When you see an image in our conversation, naturally incorporate what you see "
 40 |                 "into your response. Keep visual descriptions brief but informative."
 41 |             ),
 42 |             vad=silero.VAD.load(),
 43 |             stt=deepgram.STT(),
 44 |             llm=openai.LLM(model="gpt-4o-mini"),
 45 |             tts=cartesia.TTS(),
 46 |         )
 47 | 
 48 |     async def on_enter(self):
 49 |         """
 50 |         Lifecycle hook that runs after the agent becomes the active agent in a session.
 51 |         Adds video track from a remote participant and then starts tracking frames from video.
 52 |         """
 53 |         logger.debug("Agent joining room")
 54 |         room = get_job_context().room
 55 | 
 56 |         # Find the first video track (if any) from the remote participant
 57 |         remote_participant = list(room.remote_participants.values())[0]
 58 |         video_tracks = [
 59 |             publication.track
 60 |             for publication in remote_participant.track_publications.values()
 61 |             if publication.track is not None
 62 |             and publication.track.kind == rtc.TrackKind.KIND_VIDEO
 63 |         ]
 64 |         if video_tracks:
 65 |             self._create_video_stream(video_tracks[0])
 66 | 
 67 |         # Watch for new video tracks not yet published
 68 |         @room.on("track_subscribed")
 69 |         def on_track_subscribed(track: rtc.Track):
 70 |             logger.debug("New video track subscribed")
 71 |             if track.kind == rtc.TrackKind.KIND_VIDEO:
 72 |                 self._create_video_stream(track)
 73 | 
 74 |     async def on_user_turn_completed(
 75 |         self, _: ChatContext, new_message: ChatMessage
 76 |     ) -> None:
 77 |         """
 78 |         Lifecycle hook that runs after the user's turn has ended, before the agent's reply.
 79 |         Captures the latest video frame and adds it to the conversation context.
 80 |         """
 81 |         if self._latest_frame:
 82 |             new_message.content.append(ImageContent(image=self._latest_frame))
 83 |             logger.debug("Added latest frame to conversation context")
 84 |             self._latest_frame = None
 85 | 
 86 |     def _create_video_stream(self, track: rtc.Track):
 87 |         """
 88 |         Helper method to buffer the latest video frame from the user's track
 89 |         """
 90 |         # Close any existing stream (we only want one at a time)
 91 |         if self._video_stream is not None:
 92 |             self._video_stream.close()
 93 | 
 94 |         # Create a new stream to receive frames
 95 |         self._video_stream = rtc.VideoStream(track)
 96 | 
 97 |         async def read_stream():
 98 |             async for event in self._video_stream:
 99 |                 # Store the latest frame for use later
100 |                 self._latest_frame = event.frame
101 | 
102 |         # Store the async task
103 |         task = asyncio.create_task(read_stream())
104 |         task.add_done_callback(lambda t: self._tasks.remove(t))
105 |         self._tasks.append(task)
106 | 
107 | 
108 | async def entrypoint(ctx: JobContext):
109 |     logger.info(f"connecting to room {ctx.room.name}")
110 |     await ctx.connect(auto_subscribe=AutoSubscribe.SUBSCRIBE_ALL)
111 | 
112 |     # Wait for the first participant to connect
113 |     participant = await ctx.wait_for_participant()
114 |     logger.info(f"starting voice assistant for participant {participant.identity}")
115 | 
116 |     session = AgentSession(
117 |         min_endpointing_delay=0.5,
118 |         max_endpointing_delay=5.0,
119 |     )
120 | 
121 |     await session.start(
122 |         room=ctx.room,
123 |         agent=Assistant(ctx.room),
124 |         room_input_options=room_io.RoomInputOptions(
125 |             noise_cancellation=noise_cancellation.BVC(),
126 |         ),
127 |     )
128 | 
129 |     # The agent should be polite and greet the user when it joins :)
130 |     await session.say("Hey, how can I help you today?", allow_interruptions=True)
131 | 
132 | 
133 | if __name__ == "__main__":
134 |     cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
135 | 


--------------------------------------------------------------------------------