├── .prettierrc ├── server ├── requirements.txt ├── env.example ├── Dockerfile ├── .gitignore ├── runner.py ├── server.py ├── bot.py ├── word_list.py ├── bot_phone_twilio.py └── bot_phone_local.py ├── client ├── env.example ├── public │ ├── og-image.png │ └── favicon.svg ├── src │ ├── assets │ │ ├── logo.png │ │ └── star.png │ ├── utils │ │ ├── formatTime.ts │ │ ├── timerUtils.ts │ │ └── wordDetection.ts │ ├── components │ │ ├── Game │ │ │ ├── ScoreRow │ │ │ │ ├── ScoreRow.module.css │ │ │ │ └── index.tsx │ │ │ ├── Timer.tsx │ │ │ ├── GameWord.tsx │ │ │ ├── GameContent.tsx │ │ │ ├── WordWrangler.tsx │ │ │ └── WordWrangler.module.css │ │ ├── Card.tsx │ │ └── StartButton │ │ │ └── index.tsx │ ├── types │ │ └── personality.ts │ ├── pages │ │ ├── _app.tsx │ │ ├── api │ │ │ └── connect.ts │ │ ├── _document.tsx │ │ └── index.tsx │ ├── contexts │ │ └── Configuration.tsx │ ├── styles │ │ ├── HomeStyles.ts │ │ └── globals.css │ ├── hooks │ │ ├── useConnectionState.ts │ │ ├── useWordDetection.ts │ │ ├── useGameTimer.ts │ │ ├── useVisualFeedback.ts │ │ └── useGameState.ts │ ├── constants │ │ └── gameConstants.ts │ ├── providers │ │ └── RTVIProvider.tsx │ └── data │ │ └── wordWranglerWords.ts ├── postcss.config.mjs ├── next.config.ts ├── eslint.config.mjs ├── .gitignore ├── package.json └── tsconfig.json ├── images ├── word-wrangler-web-screenshot.png ├── word-wrangler-web-architecture.png └── word-wrangler-twilio-architecture.png ├── LICENSE └── README.md /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "semi": true, 3 | "tabWidth": 2, 4 | "useTabs": false, 5 | "singleQuote": false 6 | } 7 | -------------------------------------------------------------------------------- /server/requirements.txt: -------------------------------------------------------------------------------- 1 | pipecatcloud 2 | pipecat-ai[daily,google,silero] 3 | fastapi 4 | uvicorn 5 | python-dotenv 6 | -------------------------------------------------------------------------------- /client/env.example: -------------------------------------------------------------------------------- 1 | NEXT_PUBLIC_API_BASE_URL=http://localhost:7860 2 | PIPECAT_CLOUD_API_KEY="" 3 | AGENT_NAME=word-wrangler -------------------------------------------------------------------------------- /client/public/og-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/word-wrangler-gemini-live/HEAD/client/public/og-image.png -------------------------------------------------------------------------------- /client/src/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/word-wrangler-gemini-live/HEAD/client/src/assets/logo.png -------------------------------------------------------------------------------- /client/src/assets/star.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/word-wrangler-gemini-live/HEAD/client/src/assets/star.png -------------------------------------------------------------------------------- /client/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | const config = { 2 | plugins: ["@tailwindcss/postcss"], 3 | }; 4 | 5 | export default config; 6 | -------------------------------------------------------------------------------- /images/word-wrangler-web-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/word-wrangler-gemini-live/HEAD/images/word-wrangler-web-screenshot.png -------------------------------------------------------------------------------- /images/word-wrangler-web-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/word-wrangler-gemini-live/HEAD/images/word-wrangler-web-architecture.png -------------------------------------------------------------------------------- /images/word-wrangler-twilio-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/word-wrangler-gemini-live/HEAD/images/word-wrangler-twilio-architecture.png -------------------------------------------------------------------------------- /server/env.example: -------------------------------------------------------------------------------- 1 | DAILY_API_KEY= 2 | DAILY_API_URL=https://api.daily.co/v1/ 3 | DAILY_SAMPLE_ROOM_URL= 4 | GOOGLE_API_KEY= 5 | GOOGLE_TEST_CREDENTIALS_FILE= -------------------------------------------------------------------------------- /server/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM dailyco/pipecat-base:latest 2 | 3 | COPY ./requirements.txt requirements.txt 4 | 5 | RUN pip install --no-cache-dir --upgrade -r requirements.txt 6 | 7 | COPY ./bot.py bot.py 8 | -------------------------------------------------------------------------------- /client/next.config.ts: -------------------------------------------------------------------------------- 1 | import type { NextConfig } from "next"; 2 | 3 | const nextConfig: NextConfig = { 4 | /* config options here */ 5 | reactStrictMode: true, 6 | }; 7 | 8 | export default nextConfig; 9 | -------------------------------------------------------------------------------- /client/src/utils/formatTime.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Formats seconds into MM:SS format 3 | */ 4 | export function formatTime(seconds: number): string { 5 | const mins = Math.floor(seconds / 60); 6 | const secs = seconds % 60; 7 | return `${mins}:${secs < 10 ? '0' : ''}${secs}`; 8 | } 9 | -------------------------------------------------------------------------------- /client/src/components/Game/ScoreRow/ScoreRow.module.css: -------------------------------------------------------------------------------- 1 | .divider { 2 | width: 100%; 3 | height: 2px; 4 | background: linear-gradient( 5 | 90deg, 6 | transparent 0%, 7 | rgba(255, 255, 255, 0.15) 30%, 8 | rgba(255, 255, 255, 0.15) 70%, 9 | transparent 100% 10 | ); 11 | } 12 | -------------------------------------------------------------------------------- /client/eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import { dirname } from "path"; 2 | import { fileURLToPath } from "url"; 3 | import { FlatCompat } from "@eslint/eslintrc"; 4 | 5 | const __filename = fileURLToPath(import.meta.url); 6 | const __dirname = dirname(__filename); 7 | 8 | const compat = new FlatCompat({ 9 | baseDirectory: __dirname, 10 | }); 11 | 12 | const eslintConfig = [ 13 | ...compat.extends("next/core-web-vitals", "next/typescript"), 14 | ]; 15 | 16 | export default eslintConfig; 17 | -------------------------------------------------------------------------------- /client/src/components/Card.tsx: -------------------------------------------------------------------------------- 1 | export function Card({ 2 | children, 3 | className, 4 | }: { 5 | children: React.ReactNode; 6 | className?: string; 7 | }) { 8 | return ( 9 |
12 | {children} 13 |
14 | ); 15 | } 16 | 17 | export function CardInner({ children }: { children: React.ReactNode }) { 18 | return
{children}
; 19 | } 20 | -------------------------------------------------------------------------------- /client/src/types/personality.ts: -------------------------------------------------------------------------------- 1 | export type PersonalityType = 2 | | 'friendly' 3 | | 'professional' 4 | | 'enthusiastic' 5 | | 'thoughtful' 6 | | 'witty'; 7 | 8 | // This object can be useful for displaying user-friendly labels or descriptions 9 | export const PERSONALITY_PRESETS: Record = { 10 | friendly: 'Friendly', 11 | professional: 'Professional', 12 | enthusiastic: 'Enthusiastic', 13 | thoughtful: 'Thoughtful', 14 | witty: 'Witty', 15 | }; 16 | 17 | // Default personality to use 18 | export const DEFAULT_PERSONALITY: PersonalityType = 'witty'; 19 | -------------------------------------------------------------------------------- /client/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.* 7 | .yarn/* 8 | !.yarn/patches 9 | !.yarn/plugins 10 | !.yarn/releases 11 | !.yarn/versions 12 | 13 | # testing 14 | /coverage 15 | 16 | # next.js 17 | /.next/ 18 | /out/ 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | .pnpm-debug.log* 32 | 33 | # env files (can opt-in for committing if needed) 34 | .env* 35 | 36 | # vercel 37 | .vercel 38 | 39 | # typescript 40 | *.tsbuildinfo 41 | next-env.d.ts 42 | -------------------------------------------------------------------------------- /server/.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | dist/ 9 | *.egg-info/ 10 | .installed.cfg 11 | *.egg 12 | .pytest_cache/ 13 | .coverage 14 | .coverage.* 15 | .env 16 | .venv 17 | env/ 18 | venv/ 19 | ENV/ 20 | .mypy_cache/ 21 | .dmypy.json 22 | dmypy.json 23 | 24 | # JavaScript/Node.js 25 | node_modules/ 26 | dist/ 27 | dist-ssr/ 28 | *.local 29 | .env.local 30 | .env.development.local 31 | .env.test.local 32 | .env.production.local 33 | 34 | # Logs 35 | logs/ 36 | *.log 37 | npm-debug.log* 38 | yarn-debug.log* 39 | yarn-error.log* 40 | pnpm-debug.log* 41 | 42 | # Editor/IDE 43 | .vscode/* 44 | !.vscode/extensions.json 45 | .idea/ 46 | *.swp 47 | *.swo 48 | .DS_Store 49 | 50 | # Project specific 51 | runpod.toml 52 | pcc-deploy.toml 53 | build.sh -------------------------------------------------------------------------------- /client/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "client", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@pipecat-ai/client-js": "^0.3.5", 13 | "@pipecat-ai/client-react": "^0.3.5", 14 | "@pipecat-ai/daily-transport": "^0.3.10", 15 | "@tabler/icons-react": "^3.31.0", 16 | "@tailwindcss/postcss": "^4.1.3", 17 | "js-confetti": "^0.12.0", 18 | "next": "15.2.4", 19 | "react": "^19.0.0", 20 | "react-dom": "^19.0.0" 21 | }, 22 | "devDependencies": { 23 | "@eslint/eslintrc": "^3", 24 | "@types/node": "^20", 25 | "@types/react": "^19", 26 | "@types/react-dom": "^19", 27 | "eslint": "^9", 28 | "eslint-config-next": "15.2.4", 29 | "postcss": "^8.5.3", 30 | "tailwindcss": "^4.1.3", 31 | "typescript": "^5" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /client/src/utils/timerUtils.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Safely clears any type of timer 3 | * @param timer The timer to clear 4 | * @returns null to reassign to the timer reference 5 | */ 6 | export function clearTimer(timer: NodeJS.Timeout | null): null { 7 | if (timer) { 8 | clearTimeout(timer); 9 | } 10 | return null; 11 | } 12 | 13 | /** 14 | * Creates a countdown timer that calls the callback every second 15 | * @returns A function to stop the timer 16 | */ 17 | export function createCountdownTimer( 18 | durationSeconds: number, 19 | onTick: (secondsLeft: number) => void, 20 | onComplete: () => void 21 | ): () => void { 22 | let secondsLeft = durationSeconds; 23 | 24 | const timer = setInterval(() => { 25 | secondsLeft--; 26 | onTick(secondsLeft); 27 | 28 | if (secondsLeft <= 0) { 29 | clearInterval(timer); 30 | onComplete(); 31 | } 32 | }, 1000); 33 | 34 | return () => { 35 | clearInterval(timer); 36 | }; 37 | } 38 | -------------------------------------------------------------------------------- /client/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2017", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "preserve", 15 | "incremental": true, 16 | "paths": { 17 | "@/components/*": ["./src/components/*"], 18 | "@/contexts/*": ["./src/contexts/*"], 19 | "@/providers/*": ["./src/providers/*"], 20 | "@/styles/*": ["./src/styles/*"], 21 | "@/data/*": ["./src/data/*"], 22 | "@/types/*": ["./src/types/*"], 23 | "@/constants/*": ["./src/constants/*"], 24 | "@/utils/*": ["./src/utils/*"], 25 | "@/hooks/*": ["./src/hooks/*"] 26 | } 27 | }, 28 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"], 29 | "exclude": ["node_modules"] 30 | } 31 | -------------------------------------------------------------------------------- /client/src/components/Game/Timer.tsx: -------------------------------------------------------------------------------- 1 | import { GAME_CONFIG, GAME_STATES } from "@/constants/gameConstants"; 2 | import { formatTime } from "@/utils/formatTime"; 3 | import { IconStopwatch } from "@tabler/icons-react"; 4 | import styles from "./WordWrangler.module.css"; 5 | 6 | interface TimerProps { 7 | timeLeft: number; 8 | gameState: string; 9 | } 10 | 11 | export function Timer({ timeLeft, gameState }: TimerProps) { 12 | const lowTimer = 13 | gameState === GAME_STATES.ACTIVE && 14 | timeLeft <= GAME_CONFIG.LOW_TIME_WARNING; 15 | 16 | return ( 17 |
18 |
19 | 20 | {formatTime(timeLeft)} 21 |
22 |
23 |
27 |
28 |
29 | ); 30 | } 31 | -------------------------------------------------------------------------------- /client/src/pages/_app.tsx: -------------------------------------------------------------------------------- 1 | import { ConfigurationProvider } from "@/contexts/Configuration"; 2 | import { RTVIProvider } from "@/providers/RTVIProvider"; 3 | import { RTVIClientAudio } from "@pipecat-ai/client-react"; 4 | import type { AppProps } from "next/app"; 5 | import { Nunito } from "next/font/google"; 6 | import Head from "next/head"; 7 | import "../styles/globals.css"; 8 | 9 | const nunito = Nunito({ 10 | subsets: ["latin"], 11 | display: "swap", 12 | variable: "--font-sans", 13 | }); 14 | 15 | export default function App({ Component, pageProps }: AppProps) { 16 | return ( 17 | <> 18 | 19 | Daily | Word Wrangler 20 | 21 | 22 |
23 | 24 | 25 | 26 | 27 | 28 | 29 |
30 | 31 | ); 32 | } 33 | -------------------------------------------------------------------------------- /client/public/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /client/src/contexts/Configuration.tsx: -------------------------------------------------------------------------------- 1 | import React, { createContext, useContext, useState, ReactNode } from 'react'; 2 | import { PersonalityType, DEFAULT_PERSONALITY } from '@/types/personality'; 3 | 4 | interface ConfigurationContextProps { 5 | personality: PersonalityType; 6 | setPersonality: (personality: PersonalityType) => void; 7 | } 8 | 9 | const ConfigurationContext = createContext< 10 | ConfigurationContextProps | undefined 11 | >(undefined); 12 | 13 | interface ConfigurationProviderProps { 14 | children: ReactNode; 15 | } 16 | 17 | export function ConfigurationProvider({ 18 | children, 19 | }: ConfigurationProviderProps) { 20 | const [personality, setPersonality] = 21 | useState(DEFAULT_PERSONALITY); 22 | 23 | const value = { 24 | personality, 25 | setPersonality, 26 | }; 27 | 28 | return ( 29 | 30 | {children} 31 | 32 | ); 33 | } 34 | 35 | export function useConfigurationSettings() { 36 | const context = useContext(ConfigurationContext); 37 | if (context === undefined) { 38 | throw new Error( 39 | 'useConfigurationSettings must be used within a ConfigurationProvider' 40 | ); 41 | } 42 | return context; 43 | } 44 | -------------------------------------------------------------------------------- /client/src/utils/wordDetection.ts: -------------------------------------------------------------------------------- 1 | import { TRANSCRIPT_PATTERNS } from '@/constants/gameConstants'; 2 | 3 | /** 4 | * Checks if a transcript contains a correct guess for the target word 5 | */ 6 | export function detectWordGuess(transcript: string, targetWord: string) { 7 | const currentWordLower = targetWord.toLowerCase().trim(); 8 | 9 | // Primary detection: Look for explicit guesses 10 | const guessPattern = TRANSCRIPT_PATTERNS.GUESS_PATTERN; 11 | const guessMatch = transcript.match(guessPattern); 12 | 13 | if (guessMatch) { 14 | // Extract the guessed word from whichever group matched (group 1 or 2) 15 | let guessedWord = (guessMatch[1] || guessMatch[2] || '') 16 | .toLowerCase() 17 | .trim(); 18 | 19 | // Remove articles ("a", "an", "the") from the beginning of the guessed word 20 | guessedWord = guessedWord.replace(/^(a|an|the)\s+/i, ''); 21 | 22 | return { 23 | isCorrect: guessedWord === currentWordLower, 24 | isExplicitGuess: true, 25 | guessedWord, 26 | }; 27 | } 28 | 29 | // Secondary detection: Check if word appears in transcript 30 | const containsWord = transcript.toLowerCase().includes(currentWordLower); 31 | 32 | return { 33 | isCorrect: containsWord, 34 | isExplicitGuess: false, 35 | guessedWord: containsWord ? targetWord : null, 36 | }; 37 | } 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2024–2025, Daily 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /client/src/components/StartButton/index.tsx: -------------------------------------------------------------------------------- 1 | import { BUTTON_TEXT } from "@/constants/gameConstants"; 2 | import { useConnectionState } from "@/hooks/useConnectionState"; 3 | import { IconArrowRight } from "@tabler/icons-react"; 4 | 5 | interface StartGameButtonProps { 6 | onGameStarted?: () => void; 7 | onGameEnded?: () => void; 8 | isGameEnded?: boolean; 9 | } 10 | 11 | export function StartGameButton({ 12 | onGameStarted, 13 | onGameEnded, 14 | isGameEnded, 15 | }: StartGameButtonProps) { 16 | const { isConnecting, isDisconnecting, toggleConnection } = 17 | useConnectionState(onGameStarted, onGameEnded); 18 | 19 | // Show spinner during connection process 20 | const showSpinner = isConnecting; 21 | const btnText = isGameEnded ? BUTTON_TEXT.RESTART : BUTTON_TEXT.START; 22 | 23 | return ( 24 |
25 | 43 |
44 | ); 45 | } 46 | -------------------------------------------------------------------------------- /client/src/pages/api/connect.ts: -------------------------------------------------------------------------------- 1 | import type { NextApiRequest, NextApiResponse } from "next"; 2 | 3 | export default async function handler( 4 | req: NextApiRequest, 5 | res: NextApiResponse 6 | ) { 7 | if (req.method !== "POST") { 8 | return res.status(405).json({ error: "Method not allowed" }); 9 | } 10 | 11 | try { 12 | const { personality } = req.body; 13 | 14 | // Validate required parameters 15 | if (!personality) { 16 | return res 17 | .status(400) 18 | .json({ error: "Missing required configuration parameters" }); 19 | } 20 | 21 | const response = await fetch( 22 | `https://api.pipecat.daily.co/v1/public/${process.env.AGENT_NAME}/start`, 23 | { 24 | method: "POST", 25 | headers: { 26 | Authorization: `Bearer ${process.env.PIPECAT_CLOUD_API_KEY}`, 27 | "Content-Type": "application/json", 28 | }, 29 | body: JSON.stringify({ 30 | createDailyRoom: true, 31 | body: { 32 | personality, 33 | }, 34 | }), 35 | } 36 | ); 37 | 38 | const data = await response.json(); 39 | 40 | console.log("Response from API:", JSON.stringify(data, null, 2)); 41 | 42 | // Transform the response to match what RTVI client expects 43 | return res.status(200).json({ 44 | room_url: data.dailyRoom, 45 | token: data.dailyToken, 46 | }); 47 | } catch (error) { 48 | console.error("Error starting agent:", error); 49 | return res.status(500).json({ error: "Failed to start agent" }); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /client/src/styles/HomeStyles.ts: -------------------------------------------------------------------------------- 1 | export const styles = { 2 | main: { 3 | display: "flex", 4 | flexDirection: "column" as const, 5 | justifyContent: "flex-start", 6 | alignItems: "center", 7 | minHeight: "100vh", 8 | padding: "2rem 0", 9 | }, 10 | container: { 11 | width: "100%", 12 | maxWidth: "800px", 13 | padding: "0 1rem", 14 | }, 15 | title: { 16 | fontSize: "2rem", 17 | textAlign: "center" as const, 18 | marginBottom: "2rem", 19 | color: "#333", 20 | }, 21 | gameContainer: { 22 | marginBottom: "2rem", 23 | }, 24 | controlsContainer: { 25 | display: "flex", 26 | flexDirection: "column" as const, 27 | gap: "1rem", 28 | marginBottom: "2rem", 29 | }, 30 | settings: { 31 | backgroundColor: "white", 32 | padding: "1rem", 33 | borderRadius: "8px", 34 | boxShadow: "0 1px 3px rgba(0,0,0,0.1)", 35 | }, 36 | label: { 37 | display: "flex", 38 | flexDirection: "column" as const, 39 | gap: "0.5rem", 40 | fontSize: "0.9rem", 41 | color: "#555", 42 | }, 43 | select: { 44 | padding: "0.5rem", 45 | border: "1px solid #ddd", 46 | borderRadius: "4px", 47 | fontSize: "1rem", 48 | }, 49 | instructions: { 50 | backgroundColor: "white", 51 | padding: "1.5rem", 52 | borderRadius: "8px", 53 | boxShadow: "0 1px 3px rgba(0,0,0,0.1)", 54 | }, 55 | instructionsTitle: { 56 | fontSize: "1.4rem", 57 | marginBottom: "1rem", 58 | color: "#333", 59 | }, 60 | instructionsList: { 61 | paddingLeft: "1.5rem", 62 | lineHeight: 1.6, 63 | }, 64 | }; 65 | -------------------------------------------------------------------------------- /client/src/hooks/useConnectionState.ts: -------------------------------------------------------------------------------- 1 | import { useEffect, useCallback } from 'react'; 2 | import { 3 | useRTVIClient, 4 | useRTVIClientTransportState, 5 | } from '@pipecat-ai/client-react'; 6 | import { CONNECTION_STATES } from '@/constants/gameConstants'; 7 | 8 | export function useConnectionState( 9 | onConnected?: () => void, 10 | onDisconnected?: () => void 11 | ) { 12 | const client = useRTVIClient(); 13 | const transportState = useRTVIClientTransportState(); 14 | 15 | const isConnected = CONNECTION_STATES.ACTIVE.includes(transportState); 16 | const isConnecting = CONNECTION_STATES.CONNECTING.includes(transportState); 17 | const isDisconnecting = 18 | CONNECTION_STATES.DISCONNECTING.includes(transportState); 19 | 20 | // Handle connection changes 21 | useEffect(() => { 22 | if (isConnected && onConnected) { 23 | onConnected(); 24 | } 25 | if (!isConnected && !isConnecting && onDisconnected) { 26 | onDisconnected(); 27 | } 28 | }, [isConnected, isConnecting, onConnected, onDisconnected]); 29 | 30 | // Toggle connection state 31 | const toggleConnection = useCallback(async () => { 32 | if (!client) return; 33 | 34 | try { 35 | if (isConnected) { 36 | await client.disconnect(); 37 | } else { 38 | await client.connect(); 39 | } 40 | } catch (error) { 41 | console.error('Connection error:', error); 42 | } 43 | }, [client, isConnected]); 44 | 45 | return { 46 | isConnected, 47 | isConnecting, 48 | isDisconnecting, 49 | toggleConnection, 50 | transportState, 51 | client, // Expose the client for direct access when needed 52 | }; 53 | } 54 | -------------------------------------------------------------------------------- /client/src/pages/_document.tsx: -------------------------------------------------------------------------------- 1 | import { Head, Html, Main, NextScript } from "next/document"; 2 | 3 | export default function Document() { 4 | return ( 5 | 6 | 7 | 11 | 12 | 13 | 14 | 15 | {/* Open Graph / Social Media Meta Tags */} 16 | 17 | 18 | 22 | 26 | 27 | 28 | {/* Twitter Card Meta Tags */} 29 | 30 | 31 | 35 | 39 | 40 | 41 | 42 |
43 | 44 | 45 | 46 | ); 47 | } 48 | -------------------------------------------------------------------------------- /client/src/components/Game/ScoreRow/index.tsx: -------------------------------------------------------------------------------- 1 | import { IconLaurelWreathFilled, IconStarFilled } from "@tabler/icons-react"; 2 | import styles from "./ScoreRow.module.css"; 3 | interface ScoreRowProps { 4 | score: number; 5 | bestScore: number; 6 | } 7 | 8 | export function ScoreRow({ score, bestScore = 0 }: ScoreRowProps) { 9 | return ( 10 |
11 |
12 | 16 |
17 | 18 | Current score 19 | 20 | 21 | {score} 22 | 23 |
24 |
25 |
26 |
27 | 31 |
32 | 33 | Best score 34 | 35 | 36 | {bestScore} 37 | 38 |
39 |
40 |
41 | ); 42 | } 43 | 44 | export default ScoreRow; 45 | -------------------------------------------------------------------------------- /client/src/hooks/useWordDetection.ts: -------------------------------------------------------------------------------- 1 | import { useRef } from 'react'; 2 | import { useRTVIClientEvent } from '@pipecat-ai/client-react'; 3 | import { RTVIEvent } from '@pipecat-ai/client-js'; 4 | import { detectWordGuess } from '@/utils/wordDetection'; 5 | import { GAME_STATES, GameState } from '@/constants/gameConstants'; 6 | 7 | interface UseWordDetectionProps { 8 | gameState: GameState; 9 | currentWord: string; 10 | onCorrectGuess: () => void; 11 | onIncorrectGuess: () => void; 12 | } 13 | 14 | export function useWordDetection({ 15 | gameState, 16 | currentWord, 17 | onCorrectGuess, 18 | onIncorrectGuess, 19 | }: UseWordDetectionProps) { 20 | const lastProcessedMessageRef = useRef(''); 21 | 22 | // Reset the last processed message 23 | const resetLastProcessedMessage = () => { 24 | lastProcessedMessageRef.current = ''; 25 | }; 26 | 27 | // Listen for bot transcripts to detect correct answers 28 | useRTVIClientEvent(RTVIEvent.BotTranscript, (data) => { 29 | if (gameState !== GAME_STATES.ACTIVE) { 30 | return; 31 | } 32 | 33 | if (!currentWord) { 34 | return; 35 | } 36 | 37 | if (!data.text) { 38 | return; 39 | } 40 | 41 | // Skip if this is a repeat of the same transcript 42 | if (data.text === lastProcessedMessageRef.current) { 43 | return; 44 | } 45 | 46 | lastProcessedMessageRef.current = data.text; 47 | 48 | // Use the utility function to detect word guesses 49 | const result = detectWordGuess(data.text, currentWord); 50 | 51 | if (result.isCorrect) { 52 | onCorrectGuess(); 53 | } else if (result.isExplicitGuess) { 54 | onIncorrectGuess(); 55 | } else { 56 | } 57 | }); 58 | 59 | return { 60 | resetLastProcessedMessage, 61 | }; 62 | } 63 | -------------------------------------------------------------------------------- /server/runner.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2024–2025, Daily 3 | # 4 | # SPDX-License-Identifier: BSD 2-Clause License 5 | # 6 | 7 | import argparse 8 | import os 9 | 10 | import aiohttp 11 | 12 | from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper 13 | 14 | 15 | async def configure(aiohttp_session: aiohttp.ClientSession): 16 | """Configure the Daily room and Daily REST helper.""" 17 | parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample") 18 | parser.add_argument( 19 | "-u", "--url", type=str, required=False, help="URL of the Daily room to join" 20 | ) 21 | parser.add_argument( 22 | "-k", 23 | "--apikey", 24 | type=str, 25 | required=False, 26 | help="Daily API Key (needed to create an owner token for the room)", 27 | ) 28 | 29 | args, unknown = parser.parse_known_args() 30 | 31 | url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL") 32 | key = args.apikey or os.getenv("DAILY_API_KEY") 33 | 34 | if not url: 35 | raise Exception( 36 | "No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL." 37 | ) 38 | 39 | if not key: 40 | raise Exception( 41 | "No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers." 42 | ) 43 | 44 | daily_rest_helper = DailyRESTHelper( 45 | daily_api_key=key, 46 | daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"), 47 | aiohttp_session=aiohttp_session, 48 | ) 49 | 50 | # Create a meeting token for the given room with an expiration 1 hour in 51 | # the future. 52 | expiry_time: float = 60 * 60 53 | 54 | token = await daily_rest_helper.get_token(url, expiry_time) 55 | 56 | return (url, token) 57 | -------------------------------------------------------------------------------- /client/src/hooks/useGameTimer.ts: -------------------------------------------------------------------------------- 1 | import { GAME_CONFIG } from "@/constants/gameConstants"; 2 | import { clearTimer } from "@/utils/timerUtils"; 3 | import { useCallback, useEffect, useRef, useState } from "react"; 4 | 5 | export function useGameTimer(onTimeUp: () => void) { 6 | const [timeLeft, setTimeLeft] = useState(GAME_CONFIG.GAME_DURATION); 7 | const timerRef = useRef(null); 8 | const hasCalledTimeUpRef = useRef(false); 9 | 10 | // Start the game timer with initial duration 11 | const startTimer = useCallback(() => { 12 | // Reset time left and timeUp flag 13 | setTimeLeft(GAME_CONFIG.GAME_DURATION); 14 | hasCalledTimeUpRef.current = false; 15 | 16 | // Clear any existing timer 17 | timerRef.current = clearTimer(timerRef.current); 18 | 19 | // Start a new timer 20 | timerRef.current = setInterval(() => { 21 | setTimeLeft((prev) => { 22 | if (prev <= 1 && !hasCalledTimeUpRef.current) { 23 | // Time's up - clear the interval and call the callback 24 | timerRef.current = clearTimer(timerRef.current); 25 | hasCalledTimeUpRef.current = true; 26 | onTimeUp(); 27 | return 0; 28 | } 29 | return prev - 1; 30 | }); 31 | }, GAME_CONFIG.TIMER_INTERVAL); 32 | }, [onTimeUp]); 33 | 34 | // Stop the timer 35 | const stopTimer = useCallback(() => { 36 | timerRef.current = clearTimer(timerRef.current); 37 | hasCalledTimeUpRef.current = false; 38 | }, []); 39 | 40 | // Reset the timer to initial value without starting it 41 | const resetTimer = useCallback(() => { 42 | setTimeLeft(GAME_CONFIG.GAME_DURATION); 43 | hasCalledTimeUpRef.current = false; 44 | }, []); 45 | 46 | // Cleanup on unmount 47 | useEffect(() => { 48 | return () => { 49 | timerRef.current = clearTimer(timerRef.current); 50 | }; 51 | }, []); 52 | 53 | return { 54 | timeLeft, 55 | startTimer, 56 | stopTimer, 57 | resetTimer, 58 | }; 59 | } 60 | -------------------------------------------------------------------------------- /client/src/hooks/useVisualFeedback.ts: -------------------------------------------------------------------------------- 1 | import { useState, useRef, useCallback } from 'react'; 2 | import { clearTimer } from '@/utils/timerUtils'; 3 | import { GAME_CONFIG } from '@/constants/gameConstants'; 4 | 5 | export function useVisualFeedback() { 6 | // Visual feedback state 7 | const [showAutoDetected, setShowAutoDetected] = useState(false); 8 | const [showIncorrect, setShowIncorrect] = useState(false); 9 | const autoDetectTimerRef = useRef(null); 10 | 11 | // Reset all visual states 12 | const resetVisuals = useCallback(() => { 13 | setShowAutoDetected(false); 14 | setShowIncorrect(false); 15 | autoDetectTimerRef.current = clearTimer(autoDetectTimerRef.current); 16 | }, []); 17 | 18 | // Show correct animation 19 | const showCorrect = useCallback((onComplete?: () => void) => { 20 | // Clear any existing animation 21 | autoDetectTimerRef.current = clearTimer(autoDetectTimerRef.current); 22 | 23 | // Show correct animation 24 | setShowAutoDetected(true); 25 | setShowIncorrect(false); 26 | 27 | // Set timeout to hide animation 28 | autoDetectTimerRef.current = setTimeout(() => { 29 | setShowAutoDetected(false); 30 | if (onComplete) onComplete(); 31 | }, GAME_CONFIG.ANIMATION_DURATION); 32 | }, []); 33 | 34 | // Show incorrect animation 35 | const showIncorrectAnimation = useCallback(() => { 36 | // Clear any existing animation 37 | autoDetectTimerRef.current = clearTimer(autoDetectTimerRef.current); 38 | 39 | // Show incorrect animation 40 | setShowIncorrect(true); 41 | setShowAutoDetected(false); 42 | 43 | // Set timeout to hide animation 44 | autoDetectTimerRef.current = setTimeout(() => { 45 | setShowIncorrect(false); 46 | }, GAME_CONFIG.ANIMATION_DURATION); 47 | }, []); 48 | 49 | // Clean up function 50 | const cleanup = useCallback(() => { 51 | autoDetectTimerRef.current = clearTimer(autoDetectTimerRef.current); 52 | }, []); 53 | 54 | return { 55 | showAutoDetected, 56 | showIncorrect, 57 | resetVisuals, 58 | showCorrect, 59 | showIncorrectAnimation, 60 | cleanup, 61 | }; 62 | } 63 | -------------------------------------------------------------------------------- /client/src/constants/gameConstants.ts: -------------------------------------------------------------------------------- 1 | // Game configuration 2 | export const GAME_CONFIG = { 3 | MAX_SKIPS: 3, 4 | GAME_DURATION: 60, // seconds 5 | WORD_POOL_SIZE: 30, 6 | ANIMATION_DURATION: 1000, // ms 7 | TIMER_INTERVAL: 1000, // ms 8 | LOW_TIME_WARNING: 10, // seconds 9 | }; 10 | 11 | // Game states 12 | export const GAME_STATES = { 13 | IDLE: "idle", 14 | CONNECTING: "connecting", 15 | WAITING_FOR_INTRO: "waitingForIntro", 16 | ACTIVE: "active", 17 | FINISHED: "finished", 18 | } as const; 19 | 20 | export type GameState = (typeof GAME_STATES)[keyof typeof GAME_STATES]; 21 | 22 | // Text used in the game 23 | export const GAME_TEXT = { 24 | time: "Time", 25 | score: "Score", 26 | gameOver: "Game Over!", 27 | finalScore: "Final Score", 28 | correct: "Mark Correct", 29 | skip: "Skip →", 30 | noSkips: "No Skips Left", 31 | skipsRemaining: (num: number) => `Skip (${num} left)`, 32 | startingGame: `How many words can you describe in ${GAME_CONFIG.GAME_DURATION} seconds?`, 33 | waitingForIntro: "Getting ready...", 34 | clickToStart: "Press Start Game to begin", 35 | describeWord: "Describe the following word:", 36 | introTitle: "How many words can you describe within 60 seconds?", 37 | introGuide1: "Earn points each time the AI correctly guesses the word", 38 | introGuide2: "Do not say the word, or you will lose points", 39 | introGuide3: "You can skip the word if you don't know it", 40 | aiPersonality: "AI Personality", 41 | finalScoreMessage: "Your best score:", 42 | }; 43 | 44 | // Pattern for detecting guesses in transcripts 45 | export const TRANSCRIPT_PATTERNS = { 46 | // Match both "Is it "word"?" and "Is it a/an word?" patterns 47 | GUESS_PATTERN: 48 | /is it [""]?([^""?]+)[""]?(?:\?)?|is it (?:a|an) ([^?]+)(?:\?)?/i, 49 | }; 50 | 51 | // Connection states 52 | export const CONNECTION_STATES = { 53 | ACTIVE: ["connected", "ready"], 54 | CONNECTING: ["connecting", "initializing", "initialized", "authenticating"], 55 | DISCONNECTING: ["disconnecting"], 56 | }; 57 | 58 | // Button text 59 | export const BUTTON_TEXT = { 60 | START: "Start Game", 61 | END: "End Game", 62 | CONNECTING: "Connecting...", 63 | STARTING: "Starting...", 64 | RESTART: "Play Again", 65 | }; 66 | -------------------------------------------------------------------------------- /client/src/components/Game/GameWord.tsx: -------------------------------------------------------------------------------- 1 | import { GAME_TEXT } from "@/constants/gameConstants"; 2 | import React from "react"; 3 | import styles from "./WordWrangler.module.css"; 4 | 5 | interface GameWordProps { 6 | word: string; 7 | showAutoDetected: boolean; 8 | showIncorrect: boolean; 9 | } 10 | 11 | export const GameWord: React.FC = ({ 12 | word, 13 | showAutoDetected, 14 | showIncorrect, 15 | }) => { 16 | return ( 17 |
22 | {GAME_TEXT.describeWord} 23 | {word} 24 | 25 | {showAutoDetected && } 26 | {showIncorrect && } 27 |
28 | ); 29 | }; 30 | 31 | const CorrectOverlay: React.FC = () => ( 32 |
33 |
34 | 39 | 46 | 51 | 52 |
53 |
54 | ); 55 | 56 | const IncorrectOverlay: React.FC = () => ( 57 |
58 |
59 | 64 | 71 | 76 | 77 |
78 |
79 | ); 80 | -------------------------------------------------------------------------------- /client/src/providers/RTVIProvider.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { RTVIClient } from "@pipecat-ai/client-js"; 4 | import { DailyTransport } from "@pipecat-ai/daily-transport"; 5 | import { RTVIClientProvider } from "@pipecat-ai/client-react"; 6 | import { PropsWithChildren, useEffect, useState, useRef } from "react"; 7 | import { useConfigurationSettings } from "@/contexts/Configuration"; 8 | 9 | // Get the API base URL from environment variables 10 | // Default to "/api" if not specified 11 | // "/api" is the default for Next.js API routes and used 12 | // for the Pipecat Cloud deployed agent 13 | const API_BASE_URL = process.env.NEXT_PUBLIC_API_BASE_URL || "/api"; 14 | 15 | console.log("Using API base URL:", API_BASE_URL); 16 | 17 | export function RTVIProvider({ children }: PropsWithChildren) { 18 | const [client, setClient] = useState(null); 19 | const config = useConfigurationSettings(); 20 | const clientCreated = useRef(false); 21 | 22 | useEffect(() => { 23 | // Only create the client once 24 | if (clientCreated.current) return; 25 | 26 | const transport = new DailyTransport(); 27 | 28 | const rtviClient = new RTVIClient({ 29 | transport, 30 | params: { 31 | baseUrl: API_BASE_URL, 32 | endpoints: { 33 | connect: "/connect", 34 | }, 35 | requestData: { 36 | personality: config.personality, 37 | }, 38 | }, 39 | enableMic: true, 40 | enableCam: false, 41 | }); 42 | 43 | setClient(rtviClient); 44 | clientCreated.current = true; 45 | 46 | // Cleanup when component unmounts 47 | return () => { 48 | if (rtviClient) { 49 | rtviClient.disconnect().catch((err) => { 50 | console.error("Error disconnecting client:", err); 51 | }); 52 | } 53 | clientCreated.current = false; 54 | }; 55 | }, []); 56 | 57 | // Update the connectParams when config changes 58 | useEffect(() => { 59 | if (!client) return; 60 | 61 | // Update the connect params without recreating the client 62 | client.params.requestData = { 63 | personality: config.personality, 64 | }; 65 | }, [client, config.personality]); 66 | 67 | if (!client) { 68 | return null; 69 | } 70 | 71 | return {children}; 72 | } 73 | -------------------------------------------------------------------------------- /client/src/components/Game/GameContent.tsx: -------------------------------------------------------------------------------- 1 | import { GAME_STATES, GAME_TEXT, GameState } from "@/constants/gameConstants"; 2 | import { IconArrowForwardUp, IconClockPause } from "@tabler/icons-react"; 3 | import React from "react"; 4 | import { GameWord } from "./GameWord"; 5 | import { Timer } from "./Timer"; 6 | import styles from "./WordWrangler.module.css"; 7 | 8 | interface GameContentProps { 9 | gameState: GameState; 10 | currentWord: string; 11 | showAutoDetected: boolean; 12 | timeLeft: number; 13 | showIncorrect: boolean; 14 | score: number; 15 | skipsRemaining: number; 16 | // onCorrect: () => void; 17 | onSkip: () => void; 18 | } 19 | 20 | export const GameContent: React.FC = ({ 21 | gameState, 22 | currentWord, 23 | showAutoDetected, 24 | showIncorrect, 25 | timeLeft, 26 | score, 27 | skipsRemaining, 28 | //onCorrect, 29 | onSkip, 30 | }) => { 31 | // Idle or Connecting State 32 | if (gameState === GAME_STATES.IDLE || gameState === GAME_STATES.CONNECTING) { 33 | return ( 34 |
35 | {GAME_TEXT.startingGame} 36 |
37 | ); 38 | } 39 | 40 | // Waiting for Intro State 41 | if (gameState === GAME_STATES.WAITING_FOR_INTRO) { 42 | return ( 43 |
44 | 45 | 46 | 47 | 48 | {GAME_TEXT.waitingForIntro} 49 | 50 |
51 | ); 52 | } 53 | 54 | // Finished State 55 | if (gameState === GAME_STATES.FINISHED) { 56 | return ( 57 |
58 |
59 |

{GAME_TEXT.gameOver}

60 |

61 | {GAME_TEXT.finalScore}: {score} 62 |

63 |
64 |
{GAME_TEXT.clickToStart}
65 |
66 | ); 67 | } 68 | 69 | // Active Game State 70 | return ( 71 |
72 | 77 |
78 | 79 | 89 |
90 |
91 | ); 92 | }; 93 | -------------------------------------------------------------------------------- /client/src/hooks/useGameState.ts: -------------------------------------------------------------------------------- 1 | import { GAME_CONFIG, GAME_STATES, GameState } from "@/constants/gameConstants"; 2 | import { getRandomCatchPhraseWords } from "@/data/wordWranglerWords"; 3 | import { useCallback, useState } from "react"; 4 | 5 | export function useGameState() { 6 | // Game state 7 | const [gameState, setGameState] = useState(GAME_STATES.IDLE); 8 | const [timeLeft, setTimeLeft] = useState(GAME_CONFIG.GAME_DURATION); 9 | const [score, setScore] = useState(0); 10 | const [words, setWords] = useState([]); 11 | const [currentWordIndex, setCurrentWordIndex] = useState(0); 12 | const [skipsRemaining, setSkipsRemaining] = useState(GAME_CONFIG.MAX_SKIPS); 13 | const [bestScore, _setBestScore] = useState(0); 14 | 15 | // Initialize or reset game state 16 | const initializeGame = useCallback(() => { 17 | const freshWords = getRandomCatchPhraseWords(GAME_CONFIG.WORD_POOL_SIZE); 18 | setWords(freshWords); 19 | setGameState(GAME_STATES.ACTIVE); 20 | setTimeLeft(GAME_CONFIG.GAME_DURATION); 21 | setScore(0); 22 | setCurrentWordIndex(0); 23 | setSkipsRemaining(GAME_CONFIG.MAX_SKIPS); 24 | 25 | // Get best score from local storage 26 | const storedScore = localStorage.getItem("bestScore"); 27 | if (storedScore) { 28 | _setBestScore(Number(storedScore) || 0); 29 | } 30 | return freshWords; 31 | }, []); 32 | 33 | // End game 34 | const finishGame = useCallback(() => { 35 | setGameState(GAME_STATES.FINISHED); 36 | }, []); 37 | 38 | // Handle scoring 39 | const incrementScore = useCallback(() => { 40 | setScore((prev) => prev + 1); 41 | }, []); 42 | 43 | // Handle best score 44 | const setBestScore = useCallback((newBestScore: number) => { 45 | _setBestScore(newBestScore); 46 | localStorage.setItem("bestScore", newBestScore.toString()); 47 | }, []); 48 | 49 | // Handle word navigation 50 | const moveToNextWord = useCallback(() => { 51 | setCurrentWordIndex((prev) => { 52 | if (prev >= words.length - 1) { 53 | // If we're at the end of the word list, get new words 54 | setWords(getRandomCatchPhraseWords(GAME_CONFIG.WORD_POOL_SIZE)); 55 | return 0; 56 | } 57 | return prev + 1; 58 | }); 59 | }, [words]); 60 | 61 | // Handle skipping 62 | const useSkip = useCallback(() => { 63 | if (skipsRemaining <= 0) return false; 64 | setSkipsRemaining((prev) => prev - 1); 65 | return true; 66 | }, [skipsRemaining]); 67 | 68 | // Update timer 69 | const decrementTimer = useCallback(() => { 70 | return setTimeLeft((prev) => { 71 | if (prev <= 1) { 72 | return 0; 73 | } 74 | return prev - 1; 75 | }); 76 | }, []); 77 | 78 | return { 79 | // State 80 | gameState, 81 | setGameState, 82 | timeLeft, 83 | score, 84 | bestScore, 85 | words, 86 | currentWord: words[currentWordIndex] || "", 87 | skipsRemaining, 88 | 89 | // Actions 90 | initializeGame, 91 | finishGame, 92 | incrementScore, 93 | setBestScore, 94 | moveToNextWord, 95 | useSkip, 96 | decrementTimer, 97 | }; 98 | } 99 | -------------------------------------------------------------------------------- /server/server.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2024–2025, Daily 3 | # 4 | # SPDX-License-Identifier: BSD 2-Clause License 5 | # 6 | 7 | """RTVI Bot Server Implementation. 8 | 9 | This FastAPI server manages RTVI bot instances and provides endpoints for both 10 | direct browser access and RTVI client connections. It handles: 11 | - Creating Daily rooms 12 | - Managing bot processes 13 | - Providing connection credentials 14 | - Monitoring bot status 15 | 16 | Requirements: 17 | - Daily API key (set in .env file) 18 | - Python 3.10+ 19 | - FastAPI 20 | - Running bot implementation 21 | """ 22 | 23 | import argparse 24 | import os 25 | import subprocess 26 | from contextlib import asynccontextmanager 27 | from typing import Any, Dict 28 | 29 | import aiohttp 30 | from dotenv import load_dotenv 31 | from fastapi import FastAPI, HTTPException, Request 32 | from fastapi.middleware.cors import CORSMiddleware 33 | from fastapi.responses import JSONResponse, RedirectResponse 34 | 35 | from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams 36 | 37 | # Load environment variables from .env file 38 | load_dotenv(override=True) 39 | 40 | # Maximum number of bot instances allowed per room 41 | MAX_BOTS_PER_ROOM = 1 42 | 43 | # Dictionary to track bot processes: {pid: (process, room_url)} 44 | bot_procs = {} 45 | 46 | # Store Daily API helpers 47 | daily_helpers = {} 48 | 49 | 50 | def cleanup(): 51 | """Cleanup function to terminate all bot processes. 52 | 53 | Called during server shutdown. 54 | """ 55 | for entry in bot_procs.values(): 56 | proc = entry[0] 57 | proc.terminate() 58 | proc.wait() 59 | 60 | 61 | @asynccontextmanager 62 | async def lifespan(app: FastAPI): 63 | """FastAPI lifespan manager that handles startup and shutdown tasks. 64 | 65 | - Creates aiohttp session 66 | - Initializes Daily API helper 67 | - Cleans up resources on shutdown 68 | """ 69 | aiohttp_session = aiohttp.ClientSession() 70 | daily_helpers["rest"] = DailyRESTHelper( 71 | daily_api_key=os.getenv("DAILY_API_KEY", ""), 72 | daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"), 73 | aiohttp_session=aiohttp_session, 74 | ) 75 | yield 76 | await aiohttp_session.close() 77 | cleanup() 78 | 79 | 80 | # Initialize FastAPI app with lifespan manager 81 | app = FastAPI(lifespan=lifespan) 82 | 83 | # Configure CORS to allow requests from any origin 84 | app.add_middleware( 85 | CORSMiddleware, 86 | allow_origins=["*"], 87 | allow_credentials=True, 88 | allow_methods=["*"], 89 | allow_headers=["*"], 90 | ) 91 | 92 | 93 | async def create_room_and_token() -> tuple[str, str]: 94 | """Helper function to create a Daily room and generate an access token. 95 | 96 | Returns: 97 | tuple[str, str]: A tuple containing (room_url, token) 98 | 99 | Raises: 100 | HTTPException: If room creation or token generation fails 101 | """ 102 | room = await daily_helpers["rest"].create_room(DailyRoomParams()) 103 | if not room.url: 104 | raise HTTPException(status_code=500, detail="Failed to create room") 105 | 106 | token = await daily_helpers["rest"].get_token(room.url) 107 | if not token: 108 | raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}") 109 | 110 | return room.url, token 111 | 112 | 113 | @app.get("/") 114 | async def start_agent(request: Request): 115 | """Endpoint for direct browser access to the bot. 116 | 117 | Creates a room, starts a bot instance, and redirects to the Daily room URL. 118 | 119 | Returns: 120 | RedirectResponse: Redirects to the Daily room URL 121 | 122 | Raises: 123 | HTTPException: If room creation, token generation, or bot startup fails 124 | """ 125 | print("Creating room") 126 | room_url, token = await create_room_and_token() 127 | print(f"Room URL: {room_url}") 128 | 129 | # Check if there is already an existing process running in this room 130 | num_bots_in_room = sum( 131 | 1 for proc in bot_procs.values() if proc[1] == room_url and proc[0].poll() is None 132 | ) 133 | if num_bots_in_room >= MAX_BOTS_PER_ROOM: 134 | raise HTTPException(status_code=500, detail=f"Max bot limit reached for room: {room_url}") 135 | 136 | # Spawn a new bot process 137 | try: 138 | proc = subprocess.Popen( 139 | [f"python3 bot.py -u {room_url} -t {token}"], 140 | shell=True, 141 | bufsize=1, 142 | cwd=os.path.dirname(os.path.abspath(__file__)), 143 | ) 144 | bot_procs[proc.pid] = (proc, room_url) 145 | except Exception as e: 146 | raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}") 147 | 148 | return RedirectResponse(room_url) 149 | 150 | 151 | @app.post("/connect") 152 | async def rtvi_connect(request: Request) -> Dict[Any, Any]: 153 | """RTVI connect endpoint that creates a room and returns connection credentials. 154 | 155 | This endpoint is called by RTVI clients to establish a connection. 156 | 157 | Returns: 158 | Dict[Any, Any]: Authentication bundle containing room_url and token 159 | 160 | Raises: 161 | HTTPException: If room creation, token generation, or bot startup fails 162 | """ 163 | print("Creating room for RTVI connection") 164 | room_url, token = await create_room_and_token() 165 | print(f"Room URL: {room_url}") 166 | 167 | # Start the bot process 168 | try: 169 | proc = subprocess.Popen( 170 | [f"python3 -m bot -u {room_url} -t {token}"], 171 | shell=True, 172 | bufsize=1, 173 | cwd=os.path.dirname(os.path.abspath(__file__)), 174 | ) 175 | bot_procs[proc.pid] = (proc, room_url) 176 | except Exception as e: 177 | raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}") 178 | 179 | # Return the authentication bundle in format expected by DailyTransport 180 | return {"room_url": room_url, "token": token} 181 | 182 | 183 | @app.get("/status/{pid}") 184 | def get_status(pid: int): 185 | """Get the status of a specific bot process. 186 | 187 | Args: 188 | pid (int): Process ID of the bot 189 | 190 | Returns: 191 | JSONResponse: Status information for the bot 192 | 193 | Raises: 194 | HTTPException: If the specified bot process is not found 195 | """ 196 | # Look up the subprocess 197 | proc = bot_procs.get(pid) 198 | 199 | # If the subprocess doesn't exist, return an error 200 | if not proc: 201 | raise HTTPException(status_code=404, detail=f"Bot with process id: {pid} not found") 202 | 203 | # Check the status of the subprocess 204 | status = "running" if proc[0].poll() is None else "finished" 205 | return JSONResponse({"bot_id": pid, "status": status}) 206 | 207 | 208 | if __name__ == "__main__": 209 | import uvicorn 210 | 211 | # Parse command line arguments for server configuration 212 | default_host = os.getenv("HOST", "0.0.0.0") 213 | default_port = int(os.getenv("FAST_API_PORT", "7860")) 214 | 215 | parser = argparse.ArgumentParser(description="Daily Storyteller FastAPI server") 216 | parser.add_argument("--host", type=str, default=default_host, help="Host address") 217 | parser.add_argument("--port", type=int, default=default_port, help="Port number") 218 | parser.add_argument("--reload", action="store_true", help="Reload code on change") 219 | 220 | config = parser.parse_args() 221 | 222 | # Start the FastAPI server 223 | uvicorn.run( 224 | "server:app", 225 | host=config.host, 226 | port=config.port, 227 | reload=config.reload, 228 | ) 229 | -------------------------------------------------------------------------------- /client/src/pages/index.tsx: -------------------------------------------------------------------------------- 1 | import { Card, CardInner } from "@/components/Card"; 2 | import { WordWrangler } from "@/components/Game/WordWrangler"; 3 | import { StartGameButton } from "@/components/StartButton"; 4 | import { GAME_TEXT } from "@/constants/gameConstants"; 5 | import { useConfigurationSettings } from "@/contexts/Configuration"; 6 | import { PERSONALITY_PRESETS, PersonalityType } from "@/types/personality"; 7 | import { 8 | IconArrowForwardUp, 9 | IconCheck, 10 | IconCode, 11 | IconX, 12 | } from "@tabler/icons-react"; 13 | import JSConfetti from "js-confetti"; 14 | import Image from "next/image"; 15 | import Link from "next/link"; 16 | import { useEffect, useState } from "react"; 17 | import Logo from "../assets/logo.png"; 18 | import Star from "../assets/star.png"; 19 | 20 | export default function Home() { 21 | const [hasStarted, setHasStarted] = useState(false); 22 | const [gameEnded, setGameEnded] = useState(false); 23 | const [score, setScore] = useState(0); 24 | const [bestScore, setBestScore] = useState(0); 25 | const config = useConfigurationSettings(); 26 | 27 | useEffect(() => { 28 | if (gameEnded) { 29 | const confetti = new JSConfetti(); 30 | confetti.addConfetti({ 31 | emojis: ["⭐", "⚡️", "👑", "✨", "💫", "🏆", "💯"], 32 | }); 33 | } 34 | }, [gameEnded]); 35 | 36 | if (gameEnded) { 37 | return ( 38 |
39 |
40 | 41 |
42 | Star 43 |
44 | 45 |

{GAME_TEXT.finalScore}

46 |

47 | {score} 48 |

49 |

50 | {GAME_TEXT.finalScoreMessage}{" "} 51 | 52 | {bestScore} 53 | 54 |

55 |
56 |
57 | 63 | 64 | View project source code 65 | 66 |
67 | 68 | 69 |
70 |
71 | { 74 | setGameEnded(false); 75 | setScore(0); 76 | setHasStarted(true); 77 | }} 78 | /> 79 |
80 |
81 | ); 82 | } 83 | 84 | if (!hasStarted) { 85 | return ( 86 |
87 |
88 | 89 | Word Wrangler 95 | 96 | 97 |
98 |

99 | {GAME_TEXT.introTitle} 100 |

101 |
102 |
103 |
104 | 105 |
106 |
107 | {GAME_TEXT.introGuide1} 108 |
109 |
110 |
111 |
112 | 113 |
114 |
115 | {GAME_TEXT.introGuide2} 116 |
117 |
118 |
119 |
120 | 121 |
122 |
123 | {GAME_TEXT.introGuide3} 124 |
125 |
126 |
127 |
128 |
129 |
130 | 148 |
149 | 150 | 151 |
152 |
153 | setHasStarted(true)} /> 154 |
155 |
156 | ); 157 | } 158 | 159 | return ( 160 | { 162 | setScore(score); 163 | setBestScore(bestScore); 164 | setGameEnded(true); 165 | }} 166 | /> 167 | ); 168 | } 169 | -------------------------------------------------------------------------------- /client/src/styles/globals.css: -------------------------------------------------------------------------------- 1 | @import "tailwindcss"; 2 | 3 | @theme { 4 | --border-radius-card: 24px; 5 | --border-width-card: 4px; 6 | --theme-gradient-start: #fdd256; 7 | --theme-gradient-end: #a62249; 8 | --button-height-sm: 52px; 9 | --button-height: 58px; 10 | --animate-bounce-in: zoom-bounce 0.75s ease-out forwards; 11 | } 12 | 13 | html, 14 | body { 15 | max-width: 100vw; 16 | overflow-x: hidden; 17 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, 18 | Ubuntu, Cantarell, "Open Sans", "Helvetica Neue", sans-serif; 19 | } 20 | 21 | body { 22 | height: 100dvh; 23 | font-synthesis: none; 24 | text-rendering: optimizeLegibility; 25 | -webkit-font-smoothing: antialiased; 26 | -moz-osx-font-smoothing: grayscale; 27 | background: linear-gradient(180deg, #0059b7 0%, #7dceff 100%); 28 | } 29 | 30 | main { 31 | font-family: var(--font-sans); 32 | padding: 0 12px; 33 | } 34 | 35 | a { 36 | color: inherit; 37 | text-decoration: none; 38 | } 39 | 40 | button:disabled { 41 | opacity: 0.6; 42 | cursor: not-allowed; 43 | } 44 | 45 | button:not(:disabled):hover { 46 | opacity: 0.9; 47 | } 48 | 49 | button:not(:disabled):active { 50 | transform: translateY(1px); 51 | } 52 | 53 | .card-border { 54 | position: relative; 55 | z-index: 1; 56 | } 57 | 58 | .card-border:before { 59 | content: ""; 60 | position: absolute; 61 | inset: -4px -4px -8px -4px; 62 | border-radius: 28px; 63 | background: linear-gradient( 64 | to bottom, 65 | rgba(0, 0, 0, 1) 0%, 66 | rgba(0, 0, 0, 0.15) 100% 67 | ); 68 | z-index: -1; 69 | } 70 | 71 | .card-border:after { 72 | content: ""; 73 | box-sizing: border-box; 74 | position: absolute; 75 | inset: 0; 76 | background: #ffffff; 77 | border-radius: var(--border-radius-card); 78 | border: var(--border-width-card) solid transparent; 79 | background-image: linear-gradient(#ffffff, #ffffff), 80 | linear-gradient( 81 | 180deg, 82 | var(--theme-gradient-start) 0%, 83 | var(--theme-gradient-end) 100% 84 | ); 85 | background-origin: border-box; 86 | background-clip: padding-box, border-box; 87 | } 88 | 89 | select { 90 | appearance: none; 91 | -webkit-appearance: none; 92 | -moz-appearance: none; 93 | padding: 0 36px 0 12px; 94 | background-image: url("data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48ZyBjbGlwLXBhdGg9InVybCgjY2xpcDBfNV80MSkiPjxwYXRoIGQ9Ik04IDlMMTIgNUwxNiA5IiBzdHJva2U9IiNCQkQ1RTEiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIi8+PHBhdGggZD0iTTE2IDE1TDEyIDE5TDggMTUiIHN0cm9rZT0iI0JCRDU4MSIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiLz48L2c+PGRlZnM+PGNsaXBQYXRoIGlkPSJjbGlwMF81XzQxIj48cmVjdCB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIGZpbGw9IndoaXRlIi8+PC9jbGlwUGF0aD48L2RlZnM+PC9zdmc+"); 95 | background-repeat: no-repeat; 96 | background-position: right 12px center; 97 | background-size: 24px; 98 | border: 1px solid var(--color-slate-200); 99 | color: var(--color-slate-600); 100 | } 101 | 102 | select::-ms-expand { 103 | display: none; 104 | } 105 | 106 | .styled-button { 107 | display: flex; 108 | flex-direction: row; 109 | align-items: center; 110 | justify-content: center; 111 | cursor: pointer; 112 | gap: 12px; 113 | border-radius: 16px; 114 | position: relative; 115 | color: #ffffff; 116 | text-align: center; 117 | height: 56px; 118 | padding: 0 32px; 119 | background: linear-gradient( 120 | to bottom, 121 | transparent 0%, 122 | transparent 88%, 123 | rgba(255, 255, 255, 0.2) 88%, 124 | rgba(255, 255, 255, 0.2) 100% 125 | ), 126 | linear-gradient(180deg, #10abe3 0%, #0046b5 98.08%); 127 | border-width: 2px 2px 5px 2px; 128 | border-style: solid; 129 | border-color: #000000; 130 | overflow: hidden; 131 | box-shadow: 0px 4px 0px 4px rgba(0, 0, 0, 0.12); 132 | } 133 | 134 | .styled-button:before { 135 | content: ""; 136 | position: absolute; 137 | inset: 0; 138 | pointer-events: none; 139 | border-radius: 12px; 140 | border: 4px solid transparent; 141 | background-image: linear-gradient( 142 | 180deg, 143 | var(--theme-gradient-start) 0%, 144 | var(--theme-gradient-end) 100% 145 | ); 146 | background-origin: border-box; 147 | background-clip: border-box; 148 | -webkit-mask: linear-gradient(#fff 0 0) padding-box, linear-gradient(#fff 0 0); 149 | -webkit-mask-composite: xor; 150 | mask-composite: exclude; 151 | z-index: 1; 152 | } 153 | 154 | .styled-button:after { 155 | content: ""; 156 | position: absolute; 157 | inset: 2px; 158 | border-radius: 12px; 159 | border: 2px solid #0d1e4c; 160 | z-index: 2; 161 | } 162 | 163 | .styled-button-text { 164 | color: white; 165 | font-size: 14px; 166 | font-weight: 800; 167 | text-shadow: 2px 2px 0 rgba(0, 0, 0, 0.35); 168 | text-transform: uppercase; 169 | letter-spacing: 1px; 170 | z-index: 2; 171 | position: relative; 172 | } 173 | 174 | .styled-button:active { 175 | transform: translateY(6px); 176 | box-shadow: 0 0 0 #000000, inset 0 -2px 12px rgba(0, 0, 0, 0.2), 177 | inset 0 2px 12px rgba(255, 255, 255, 0.4); 178 | } 179 | 180 | .styled-button-icon { 181 | position: relative; 182 | box-sizing: border-box; 183 | display: flex; 184 | flex-direction: row; 185 | justify-content: center; 186 | align-items: center; 187 | flex-grow: 0; 188 | padding: 0px; 189 | width: 30px; 190 | height: 30px; 191 | background: rgba(42, 88, 173, 0.25); 192 | border-radius: 999px; 193 | z-index: 1; 194 | } 195 | .styled-button-icon:after { 196 | pointer-events: none; 197 | content: ""; 198 | position: absolute; 199 | inset: 0px; 200 | background: rgba(3, 85, 188, 0.5); 201 | border-radius: 999px; 202 | border: 2px solid #0a82d1; 203 | z-index: -1; 204 | } 205 | 206 | @media (min-width: 1024px) { 207 | .styled-button { 208 | height: 60px; 209 | } 210 | .styled-button-text { 211 | font-size: 16px; 212 | } 213 | } 214 | 215 | .spinner { 216 | width: 16px; 217 | height: 16px; 218 | border: 2px solid rgba(255, 255, 255, 0.3); 219 | border-top: 2px solid #ffffff; 220 | border-radius: 50%; 221 | animation: spin 1s linear infinite; 222 | } 223 | 224 | @keyframes spin { 225 | 0% { 226 | transform: rotate(0deg); 227 | } 228 | 100% { 229 | transform: rotate(360deg); 230 | } 231 | } 232 | 233 | .button { 234 | appearance: none; 235 | display: flex; 236 | flex-direction: row; 237 | align-items: center; 238 | justify-content: center; 239 | gap: 6px; 240 | cursor: pointer; 241 | background-color: rgba(0, 0, 0, 0.15); 242 | color: #ffffff; 243 | font-weight: 800; 244 | padding: 0 12px; 245 | height: var(--button-height-sm); 246 | border-radius: 999px; 247 | font-size: 16px; 248 | 249 | &.outline { 250 | border: 2px solid rgba(255, 255, 255, 0.35); 251 | background-color: transparent; 252 | outline: none; 253 | transition: background-color 0.2s ease-in-out, border-color 0.2s ease-in-out; 254 | } 255 | &.outline:hover { 256 | background-color: rgba(255, 255, 255, 0.1); 257 | border-color: rgba(255, 255, 255, 0.5); 258 | } 259 | 260 | &.ghost { 261 | background-color: var(--color-slate-100); 262 | color: var(--color-slate-600); 263 | height: var(--button-height-sm); 264 | } 265 | 266 | &.ghost:hover { 267 | background-color: var(--color-slate-200); 268 | } 269 | } 270 | 271 | @media (min-width: 1024px) { 272 | .button { 273 | padding: 0 24px; 274 | gap: 12px; 275 | height: var(--button-height); 276 | } 277 | } 278 | 279 | /* Animations */ 280 | 281 | @keyframes zoom-bounce { 282 | 0% { 283 | transform: scale(0.25); 284 | animation-timing-function: cubic-bezier(0.8, 0, 1, 1); 285 | } 286 | 50% { 287 | transform: scale(1.1); 288 | animation-timing-function: cubic-bezier(0, 0, 0.2, 1); 289 | } 290 | 75% { 291 | transform: scale(0.95); 292 | animation-timing-function: cubic-bezier(0.8, 0, 1, 1); 293 | } 294 | 100% { 295 | transform: scale(1); 296 | } 297 | } 298 | -------------------------------------------------------------------------------- /client/src/components/Game/WordWrangler.tsx: -------------------------------------------------------------------------------- 1 | import { GAME_STATES, GAME_TEXT } from "@/constants/gameConstants"; 2 | import { useConnectionState } from "@/hooks/useConnectionState"; 3 | import { useGameState } from "@/hooks/useGameState"; 4 | import { useGameTimer } from "@/hooks/useGameTimer"; 5 | import { useVisualFeedback } from "@/hooks/useVisualFeedback"; 6 | import { useWordDetection } from "@/hooks/useWordDetection"; 7 | import { RTVIEvent } from "@pipecat-ai/client-js"; 8 | import { useRTVIClientEvent } from "@pipecat-ai/client-react"; 9 | import { IconCircleDashedCheck, IconDoorExit } from "@tabler/icons-react"; 10 | import { useCallback, useEffect, useRef } from "react"; 11 | import Logo from "../../assets/logo.png"; 12 | import { GameContent } from "./GameContent"; 13 | import { ScoreRow } from "./ScoreRow"; 14 | 15 | import JSConfetti from "js-confetti"; 16 | 17 | import Image from "next/image"; 18 | import styles from "./WordWrangler.module.css"; 19 | 20 | export const WordWrangler: React.FC<{ 21 | onGameEnded: (score: number, bestScore: number) => void; 22 | }> = ({ onGameEnded }) => { 23 | const botIntroCompletedRef = useRef(false); 24 | const currentScoreRef = useRef(0); 25 | const gameState = useGameState(); 26 | const visualFeedback = useVisualFeedback(); 27 | const { isConnected, client } = useConnectionState(); 28 | 29 | // Update the ref whenever score changes 30 | useEffect(() => { 31 | currentScoreRef.current = gameState.score; 32 | }, [gameState.score]); 33 | 34 | // End the game 35 | const endGame = useCallback(async () => { 36 | const scoreAtCallTime = currentScoreRef.current; 37 | 38 | // Prevent multiple calls to endGame 39 | if (gameState.gameState === GAME_STATES.FINISHED) { 40 | console.log("endGame prevented - game already finished"); 41 | return; 42 | } 43 | 44 | // Capture the current score before any state changes 45 | const finalScore = scoreAtCallTime; 46 | const currentBestScore = gameState.bestScore; 47 | 48 | // Update game state 49 | gameState.finishGame(); 50 | visualFeedback.resetVisuals(); 51 | 52 | // Update best score if needed 53 | if (currentBestScore < finalScore) { 54 | gameState.setBestScore(finalScore); 55 | } 56 | 57 | // Disconnect the bot 58 | if (client && isConnected) { 59 | try { 60 | await client.disconnectBot(); 61 | await client.disconnect(); 62 | } catch (error) { 63 | console.error("Error disconnecting bot:", error); 64 | } 65 | } 66 | 67 | // Call the callback with the captured scores 68 | onGameEnded(finalScore, Math.max(finalScore, currentBestScore)); 69 | }, [gameState, visualFeedback, client, isConnected, onGameEnded]); 70 | 71 | const gameTimer = useGameTimer(endGame); 72 | 73 | const wordDetection = useWordDetection({ 74 | gameState: gameState.gameState, 75 | currentWord: gameState.currentWord, 76 | onCorrectGuess: handleCorrectGuess, 77 | onIncorrectGuess: handleIncorrectGuess, 78 | }); 79 | 80 | // Initialize on component mount 81 | useEffect(() => { 82 | gameState.initializeGame(); 83 | }, []); 84 | 85 | // Handle connection state changes 86 | useEffect(() => { 87 | if (isConnected) { 88 | if (!botIntroCompletedRef.current) { 89 | // Connection is active, but bot hasn't completed intro 90 | gameState.setGameState(GAME_STATES.WAITING_FOR_INTRO); 91 | } 92 | } else { 93 | // Connection lost or never established 94 | if (gameState.gameState === GAME_STATES.ACTIVE) { 95 | // If game was active, it's now finished 96 | endGame(); 97 | } else if (gameState.gameState !== GAME_STATES.FINISHED) { 98 | // Reset to idle state if not already finished 99 | gameState.setGameState(GAME_STATES.IDLE); 100 | } 101 | 102 | // Reset intro state when connection is lost 103 | botIntroCompletedRef.current = false; 104 | } 105 | }, [isConnected, gameState.gameState, endGame]); 106 | 107 | // Listen for the bot to stop speaking to detect intro completion 108 | useRTVIClientEvent(RTVIEvent.BotStoppedSpeaking, () => { 109 | if ( 110 | gameState.gameState === GAME_STATES.WAITING_FOR_INTRO && 111 | !botIntroCompletedRef.current 112 | ) { 113 | // First time the bot stops speaking, consider intro done and start the game 114 | botIntroCompletedRef.current = true; 115 | startGame(); 116 | } 117 | }); 118 | 119 | // Handle correct guess with animation 120 | function handleCorrectGuess() { 121 | visualFeedback.showCorrect(() => { 122 | gameState.incrementScore(); 123 | gameState.moveToNextWord(); 124 | wordDetection.resetLastProcessedMessage(); 125 | }); 126 | const jsConfetti = new JSConfetti(); 127 | jsConfetti.addConfetti(); 128 | } 129 | 130 | // Handle incorrect guess with animation 131 | function handleIncorrectGuess() { 132 | visualFeedback.showIncorrectAnimation(); 133 | } 134 | 135 | // Start the game 136 | function startGame() { 137 | // Initialize game state 138 | gameState.initializeGame(); 139 | wordDetection.resetLastProcessedMessage(); 140 | 141 | // Start the timer - now it internally manages countdown and calls endGame when done 142 | gameTimer.startTimer(); 143 | } 144 | 145 | // Handle manual marking as correct 146 | function handleManualCorrect() { 147 | if (gameState.gameState !== GAME_STATES.ACTIVE) return; 148 | 149 | gameState.incrementScore(); 150 | 151 | const jsConfetti = new JSConfetti(); 152 | jsConfetti.addConfetti(); 153 | 154 | gameState.moveToNextWord(); 155 | wordDetection.resetLastProcessedMessage(); 156 | } 157 | 158 | // Handle skipping a word 159 | function handleSkip() { 160 | if (gameState.gameState !== GAME_STATES.ACTIVE) return; 161 | 162 | // Try to use a skip and proceed if successful 163 | if (gameState.useSkip()) { 164 | gameState.moveToNextWord(); 165 | wordDetection.resetLastProcessedMessage(); 166 | } 167 | } 168 | 169 | // Clean up on unmount 170 | useEffect(() => { 171 | return () => { 172 | gameTimer.stopTimer(); 173 | visualFeedback.cleanup(); 174 | }; 175 | }, []); 176 | 177 | return ( 178 |
179 |
180 |
181 |
182 | Word Wrangler 188 |
189 | 199 |
200 |
201 | 202 |
203 |
204 | 212 | 223 |
224 |
225 |
226 | ); 227 | }; 228 | -------------------------------------------------------------------------------- /server/bot.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2025, Daily 3 | # 4 | # SPDX-License-Identifier: BSD 2-Clause License 5 | # 6 | 7 | import asyncio 8 | import os 9 | import sys 10 | from typing import Any, Dict 11 | 12 | import aiohttp 13 | from dotenv import load_dotenv 14 | from loguru import logger 15 | from pipecatcloud.agent import DailySessionArguments 16 | 17 | from pipecat.audio.vad.silero import SileroVADAnalyzer 18 | from pipecat.pipeline.pipeline import Pipeline 19 | from pipecat.pipeline.runner import PipelineRunner 20 | from pipecat.pipeline.task import PipelineParams, PipelineTask 21 | from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext 22 | from pipecat.processors.filters.stt_mute_filter import STTMuteConfig, STTMuteFilter, STTMuteStrategy 23 | from pipecat.processors.frameworks.rtvi import ( 24 | RTVIConfig, 25 | RTVIObserver, 26 | RTVIProcessor, 27 | ) 28 | from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService 29 | from pipecat.transports.services.daily import DailyParams, DailyTransport 30 | 31 | load_dotenv(override=True) 32 | 33 | # Check if we're in local development mode 34 | LOCAL_RUN = os.getenv("LOCAL_RUN") 35 | 36 | logger.add(sys.stderr, level="DEBUG") 37 | 38 | # Define conversation modes with their respective prompt templates 39 | game_prompt = """You are the AI host and player for a game of Word Wrangler. 40 | 41 | GAME RULES: 42 | 1. The user will be given a word or phrase that they must describe to you 43 | 2. The user CANNOT say any part of the word/phrase directly 44 | 3. You must try to guess the word/phrase based on the user's description 45 | 4. Once you guess correctly, the user will move on to their next word 46 | 5. The user is trying to get through as many words as possible in 60 seconds 47 | 6. The external application will handle timing and keeping score 48 | 49 | YOUR ROLE: 50 | 1. Start with this exact brief introduction: "Welcome to Word Wrangler! I'll try to guess the words you describe. Remember, don't say any part of the word itself. Ready? Let's go!" 51 | 2. Listen carefully to the user's descriptions 52 | 3. Make intelligent guesses based on what they say 53 | 4. When you think you know the answer, state it clearly: "Is it [your guess]?" 54 | 5. If you're struggling, ask for more specific clues 55 | 6. Keep the game moving quickly - make guesses promptly 56 | 7. Be enthusiastic and encouraging 57 | 58 | IMPORTANT: 59 | - Keep all responses brief - the game is timed! 60 | - Make multiple guesses if needed 61 | - Use your common knowledge to make educated guesses 62 | - If the user indicates you got it right, just say "Got it!" and prepare for the next word 63 | - If you've made several wrong guesses, simply ask for "Another clue please?" 64 | 65 | Start with the exact introduction specified above, then wait for the user to begin describing their first word.""" 66 | 67 | # Define personality presets 68 | PERSONALITY_PRESETS = { 69 | "friendly": "You have a warm, approachable personality. You use conversational language, occasional humor, and express enthusiasm for the topic. Make the user feel comfortable and engaged.", 70 | "professional": "You have a formal, precise personality. You communicate clearly and directly with a focus on accuracy and relevance. Your tone is respectful and business-like.", 71 | "enthusiastic": "You have an energetic, passionate personality. You express excitement about the topic and use dynamic language. You're encouraging and positive throughout the conversation.", 72 | "thoughtful": "You have a reflective, philosophical personality. You speak carefully, considering multiple angles of each point. You ask thought-provoking questions and acknowledge nuance.", 73 | "witty": "You have a clever, humorous personality. While remaining informative, you inject appropriate wit and playful language. Your goal is to be engaging and entertaining while still being helpful.", 74 | } 75 | 76 | 77 | async def main(transport: DailyTransport, config: Dict[str, Any]): 78 | # Use the provided session logger if available, otherwise use the default logger 79 | logger.debug("Configuration: {}", config) 80 | 81 | # Extract configuration parameters with defaults 82 | personality = config.get("personality", "witty") 83 | 84 | personality_prompt = PERSONALITY_PRESETS.get(personality, PERSONALITY_PRESETS["friendly"]) 85 | 86 | system_instruction = f"""{game_prompt} 87 | 88 | {personality_prompt} 89 | 90 | Important guidelines: 91 | 1. Your responses will be converted to speech, so keep them concise and conversational. 92 | 2. Don't use special characters or formatting that wouldn't be natural in speech. 93 | 3. Encourage the user to elaborate when appropriate.""" 94 | 95 | intro_message = """Start with this exact brief introduction: "Welcome to Word Wrangler! I'll try to guess the words you describe. Remember, don't say any part of the word itself. Ready? Let's go!""" 96 | 97 | # Create the STT mute filter if we have strategies to apply 98 | stt_mute_filter = STTMuteFilter( 99 | config=STTMuteConfig(strategies={STTMuteStrategy.MUTE_UNTIL_FIRST_BOT_COMPLETE}) 100 | ) 101 | 102 | llm = GeminiMultimodalLiveLLMService( 103 | api_key=os.getenv("GOOGLE_API_KEY"), 104 | transcribe_user_audio=True, 105 | system_instruction=system_instruction, 106 | ) 107 | 108 | # Set up the initial context for the conversation 109 | messages = [ 110 | { 111 | "role": "user", 112 | "content": intro_message, 113 | }, 114 | ] 115 | 116 | # This sets up the LLM context by providing messages and tools 117 | context = OpenAILLMContext(messages) 118 | context_aggregator = llm.create_context_aggregator(context) 119 | 120 | # RTVI events for Pipecat client UI 121 | rtvi = RTVIProcessor(config=RTVIConfig(config=[])) 122 | 123 | pipeline = Pipeline( 124 | [ 125 | transport.input(), 126 | rtvi, 127 | stt_mute_filter, 128 | context_aggregator.user(), 129 | llm, 130 | transport.output(), 131 | context_aggregator.assistant(), 132 | ] 133 | ) 134 | 135 | task = PipelineTask( 136 | pipeline, 137 | params=PipelineParams( 138 | allow_interruptions=True, 139 | enable_metrics=True, 140 | enable_usage_metrics=True, 141 | ), 142 | observers=[RTVIObserver(rtvi)], 143 | ) 144 | 145 | @rtvi.event_handler("on_client_ready") 146 | async def on_client_ready(rtvi): 147 | logger.debug("Client ready event received") 148 | await rtvi.set_bot_ready() 149 | # Kick off the conversation 150 | await task.queue_frames([context_aggregator.user().get_context_frame()]) 151 | 152 | @transport.event_handler("on_first_participant_joined") 153 | async def on_first_participant_joined(transport, participant): 154 | logger.info("First participant joined: {}", participant["id"]) 155 | # Capture the participant's transcription 156 | await transport.capture_participant_transcription(participant["id"]) 157 | 158 | @transport.event_handler("on_participant_left") 159 | async def on_participant_left(transport, participant, reason): 160 | logger.info("Participant left: {}", participant) 161 | await task.cancel() 162 | 163 | runner = PipelineRunner(handle_sigint=False, force_gc=True) 164 | 165 | await runner.run(task) 166 | 167 | 168 | async def bot(args: DailySessionArguments): 169 | """Main bot entry point compatible with the FastAPI route handler. 170 | 171 | Args: 172 | room_url: The Daily room URL 173 | token: The Daily room token 174 | body: The configuration object from the request body 175 | session_id: The session ID for logging 176 | """ 177 | from pipecat.audio.filters.krisp_filter import KrispFilter 178 | 179 | logger.info(f"Bot process initialized {args.room_url} {args.token}") 180 | 181 | transport = DailyTransport( 182 | args.room_url, 183 | args.token, 184 | "Word Wrangler Bot", 185 | DailyParams( 186 | audio_in_filter=None if LOCAL_RUN else KrispFilter(), 187 | audio_out_enabled=True, 188 | vad_enabled=True, 189 | vad_analyzer=SileroVADAnalyzer(), 190 | vad_audio_passthrough=True, 191 | ), 192 | ) 193 | 194 | try: 195 | await main(transport, args.body) 196 | logger.info("Bot process completed") 197 | except Exception as e: 198 | logger.exception(f"Error in bot process: {str(e)}") 199 | raise 200 | 201 | 202 | # Local development 203 | async def local_daily(): 204 | """Daily transport for local development.""" 205 | from runner import configure 206 | 207 | try: 208 | async with aiohttp.ClientSession() as session: 209 | (room_url, token) = await configure(session) 210 | transport = DailyTransport( 211 | room_url, 212 | token, 213 | bot_name="Bot", 214 | params=DailyParams( 215 | audio_out_enabled=True, 216 | vad_enabled=True, 217 | vad_analyzer=SileroVADAnalyzer(), 218 | vad_audio_passthrough=True, 219 | ), 220 | ) 221 | 222 | test_config = { 223 | "personality": "witty", 224 | } 225 | 226 | await main(transport, test_config) 227 | except Exception as e: 228 | logger.exception(f"Error in local development mode: {e}") 229 | 230 | 231 | # Local development entry point 232 | if LOCAL_RUN and __name__ == "__main__": 233 | try: 234 | asyncio.run(local_daily()) 235 | except Exception as e: 236 | logger.exception(f"Failed to run in local mode: {e}") 237 | -------------------------------------------------------------------------------- /client/src/data/wordWranglerWords.ts: -------------------------------------------------------------------------------- 1 | // 100 Easy Words - Common, everyday objects and concepts 2 | export const EASY_CATCH_PHRASE_WORDS = [ 3 | // Common Objects 4 | 'Chair', 5 | 'Table', 6 | 'Door', 7 | 'Window', 8 | 'Book', 9 | 'Pencil', 10 | 'Phone', 11 | 'Computer', 12 | 'Ball', 13 | 'Car', 14 | 'Shoe', 15 | 'Hat', 16 | 'Cup', 17 | 'Plate', 18 | 'Fork', 19 | 'Spoon', 20 | 'Knife', 21 | 'Key', 22 | 'Clock', 23 | 'Watch', 24 | 25 | // Food & Drink 26 | 'Pizza', 27 | 'Hamburger', 28 | 'Ice cream', 29 | 'Chocolate', 30 | 'Apple', 31 | 'Banana', 32 | 'Orange', 33 | 'Milk', 34 | 'Water', 35 | 'Cake', 36 | 'Cookie', 37 | 'Bread', 38 | 'Egg', 39 | 'Cheese', 40 | 'Chicken', 41 | 42 | // Animals 43 | 'Dog', 44 | 'Cat', 45 | 'Fish', 46 | 'Bird', 47 | 'Horse', 48 | 'Cow', 49 | 'Pig', 50 | 'Duck', 51 | 'Lion', 52 | 'Tiger', 53 | 'Bear', 54 | 'Elephant', 55 | 'Monkey', 56 | 'Rabbit', 57 | 'Frog', 58 | 59 | // Colors & Simple Concepts 60 | 'Red', 61 | 'Blue', 62 | 'Green', 63 | 'Yellow', 64 | 'Black', 65 | 'White', 66 | 'Big', 67 | 'Small', 68 | 'Hot', 69 | 'Cold', 70 | 'Happy', 71 | 'Sad', 72 | 'Fast', 73 | 'Slow', 74 | 'Old', 75 | 'Young', 76 | 'Up', 77 | 'Down', 78 | 'Left', 79 | 'Right', 80 | 81 | // Simple Activities 82 | 'Run', 83 | 'Walk', 84 | 'Jump', 85 | 'Swim', 86 | 'Sleep', 87 | 'Eat', 88 | 'Drink', 89 | 'Laugh', 90 | 'Cry', 91 | 'Smile', 92 | 'Play', 93 | 'Work', 94 | 'Read', 95 | 'Write', 96 | 'Draw', 97 | 'Sing', 98 | 'Dance', 99 | 'Talk', 100 | 'Listen', 101 | 'Cook', 102 | ]; 103 | 104 | // 300 Medium Words - More specific items, common concepts, popular culture 105 | export const MEDIUM_CATCH_PHRASE_WORDS = [ 106 | // Household Items & Technology 107 | 'Refrigerator', 108 | 'Television', 109 | 'Microwave', 110 | 'Bookshelf', 111 | 'Couch', 112 | 'Dishwasher', 113 | 'Ceiling fan', 114 | 'Toaster', 115 | 'Vacuum cleaner', 116 | 'Blender', 117 | 'Printer', 118 | 'Headphones', 119 | 'Smartphone', 120 | 'Laptop', 121 | 'Tablet', 122 | 'Camera', 123 | 'Remote control', 124 | 'Charger', 125 | 'Keyboard', 126 | 'Mouse', 127 | 'Lightbulb', 128 | 'Shower curtain', 129 | 'Doorknob', 130 | 'Power outlet', 131 | 'Coffee maker', 132 | 133 | // Food & Cuisine 134 | 'Spaghetti', 135 | 'Burrito', 136 | 'Sushi', 137 | 'Pancake', 138 | 'Waffle', 139 | 'Cereal', 140 | 'Sandwich', 141 | 'Salad', 142 | 'French fries', 143 | 'Hot dog', 144 | 'Cupcake', 145 | 'Donut', 146 | 'Milkshake', 147 | 'Smoothie', 148 | 'Oatmeal', 149 | 'Peanut butter', 150 | 'Jelly', 151 | 'Bacon', 152 | 'Scrambled eggs', 153 | 'Toast', 154 | 'Steak', 155 | 'Mashed potatoes', 156 | 'Broccoli', 157 | 'Carrot', 158 | 'Onion', 159 | 160 | // Animals & Nature 161 | 'Giraffe', 162 | 'Penguin', 163 | 'Kangaroo', 164 | 'Dolphin', 165 | 'Octopus', 166 | 'Butterfly', 167 | 'Spider', 168 | 'Eagle', 169 | 'Turtle', 170 | 'Squirrel', 171 | 'Rainbow', 172 | 'Waterfall', 173 | 'Mountain', 174 | 'Beach', 175 | 'Forest', 176 | 'Hurricane', 177 | 'Snowflake', 178 | 'Thunderstorm', 179 | 'Volcano', 180 | 'Desert', 181 | 'Sunrise', 182 | 'Sunset', 183 | 'Moon', 184 | 'Stars', 185 | 'Planet', 186 | 187 | // Sports & Activities 188 | 'Basketball', 189 | 'Football', 190 | 'Soccer', 191 | 'Baseball', 192 | 'Tennis', 193 | 'Golf', 194 | 'Swimming', 195 | 'Skiing', 196 | 'Snowboarding', 197 | 'Hiking', 198 | 'Camping', 199 | 'Fishing', 200 | 'Gardening', 201 | 'Painting', 202 | 'Photography', 203 | 'Cycling', 204 | 'Jogging', 205 | 'Yoga', 206 | 'Dancing', 207 | 'Cooking', 208 | 'Driving', 209 | 'Flying', 210 | 'Sailing', 211 | 'Surfing', 212 | 'Rock climbing', 213 | 214 | // Clothing & Accessories 215 | 'Sunglasses', 216 | 'Umbrella', 217 | 'Necklace', 218 | 'Bracelet', 219 | 'Ring', 220 | 'Earrings', 221 | 'Backpack', 222 | 'Purse', 223 | 'Wallet', 224 | 'Watch', 225 | 'Sneakers', 226 | 'Sandals', 227 | 'Boots', 228 | 'High heels', 229 | 'Flip flops', 230 | 'Scarf', 231 | 'Gloves', 232 | 'Belt', 233 | 'Tie', 234 | 'Jacket', 235 | 'Sweater', 236 | 'Sweatshirt', 237 | 'Jeans', 238 | 'Shorts', 239 | 'Dress', 240 | 241 | // Places 242 | 'Restaurant', 243 | 'Grocery store', 244 | 'Shopping mall', 245 | 'Movie theater', 246 | 'Park', 247 | 'School', 248 | 'Library', 249 | 'Museum', 250 | 'Zoo', 251 | 'Airport', 252 | 'Hospital', 253 | 'Hotel', 254 | 'Bank', 255 | 'Post office', 256 | 'Gym', 257 | 'Beach', 258 | 'Swimming pool', 259 | 'Church', 260 | 'Stadium', 261 | 'Concert hall', 262 | 'Farm', 263 | 'City', 264 | 'Village', 265 | 'Country', 266 | 'Island', 267 | 268 | // Jobs & Professions 269 | 'Teacher', 270 | 'Doctor', 271 | 'Nurse', 272 | 'Police officer', 273 | 'Firefighter', 274 | 'Chef', 275 | 'Waiter', 276 | 'Pilot', 277 | 'Engineer', 278 | 'Scientist', 279 | 'Actor', 280 | 'Singer', 281 | 'Artist', 282 | 'Writer', 283 | 'Photographer', 284 | 'Farmer', 285 | 'Mechanic', 286 | 'Electrician', 287 | 'Plumber', 288 | 'Carpenter', 289 | 'Lawyer', 290 | 'Accountant', 291 | 'Businessman', 292 | 'Salesperson', 293 | 'Architect', 294 | 295 | // Transportation 296 | 'Bicycle', 297 | 'Motorcycle', 298 | 'Bus', 299 | 'Train', 300 | 'Airplane', 301 | 'Helicopter', 302 | 'Boat', 303 | 'Ship', 304 | 'Submarine', 305 | 'Rocket', 306 | 'Taxi', 307 | 'Ambulance', 308 | 'Fire truck', 309 | 'Police car', 310 | 'School bus', 311 | 'Skateboard', 312 | 'Scooter', 313 | 'Rollerblades', 314 | 'Wagon', 315 | 'Sled', 316 | 'Escalator', 317 | 'Elevator', 318 | 'Tractor', 319 | 'Bulldozer', 320 | 'Crane', 321 | 322 | // Entertainment & Hobbies 323 | 'Movie', 324 | 'Music', 325 | 'Book', 326 | 'Game', 327 | 'Puzzle', 328 | 'Toy', 329 | 'Doll', 330 | 'Action figure', 331 | 'Video game', 332 | 'Board game', 333 | 'Knitting', 334 | 'Sewing', 335 | 'Woodworking', 336 | 'Baking', 337 | 'Grilling', 338 | 'Hunting', 339 | 'Archery', 340 | 'Bowling', 341 | 'Karaoke', 342 | 'Dancing', 343 | 'Collecting', 344 | 'Reading', 345 | 'Writing', 346 | 'Drawing', 347 | 'Painting', 348 | 349 | // Body Parts & Health 350 | 'Heart', 351 | 'Brain', 352 | 'Stomach', 353 | 'Lungs', 354 | 'Liver', 355 | 'Kidneys', 356 | 'Skin', 357 | 'Hair', 358 | 'Nails', 359 | 'Teeth', 360 | 'Eyes', 361 | 'Ears', 362 | 'Nose', 363 | 'Mouth', 364 | 'Throat', 365 | 'Shoulder', 366 | 'Elbow', 367 | 'Wrist', 368 | 'Hip', 369 | 'Knee', 370 | 'Ankle', 371 | 'Exercise', 372 | 'Medicine', 373 | 'Doctor', 374 | 'Hospital', 375 | 376 | // Common Concepts 377 | 'Birthday', 378 | 'Wedding', 379 | 'Funeral', 380 | 'Holiday', 381 | 'Vacation', 382 | 'Friendship', 383 | 'Love', 384 | 'Family', 385 | 'Education', 386 | 'Career', 387 | 'Money', 388 | 'Time', 389 | 'Weather', 390 | 'Season', 391 | 'History', 392 | 'Future', 393 | 'Success', 394 | 'Failure', 395 | 'Challenge', 396 | 'Opportunity', 397 | 'Competition', 398 | 'Cooperation', 399 | 'Leadership', 400 | 'Creativity', 401 | 'Innovation', 402 | ]; 403 | 404 | // 100 Hard Words - Abstract concepts, specific terminology, complex ideas 405 | export const HARD_CATCH_PHRASE_WORDS = [ 406 | // Academic & Scientific Terms 407 | 'Photosynthesis', 408 | 'Mitochondria', 409 | 'Quantum physics', 410 | 'Relativity', 411 | 'Biodiversity', 412 | 'Logarithm', 413 | 'Algorithm', 414 | 'Neural network', 415 | 'Nanotechnology', 416 | 'Thermodynamics', 417 | 'Paleontology', 418 | 'Archaeology', 419 | 'Linguistics', 420 | 'Metaphysics', 421 | 'Epistemology', 422 | 'Cryptocurrency', 423 | 'Blockchain', 424 | 'Artificial intelligence', 425 | 'Machine learning', 426 | 'Virtual reality', 427 | 428 | // Abstract Concepts 429 | 'Nostalgia', 430 | 'Ambivalence', 431 | 'Empathy', 432 | 'Serendipity', 433 | 'Irony', 434 | 'Existentialism', 435 | 'Utilitarianism', 436 | 'Nihilism', 437 | 'Altruism', 438 | 'Pragmatism', 439 | 'Transcendence', 440 | 'Symbolism', 441 | 'Paradox', 442 | 'Dichotomy', 443 | 'Synchronicity', 444 | 'Meritocracy', 445 | 'Bureaucracy', 446 | 'Democracy', 447 | 'Capitalism', 448 | 'Socialism', 449 | 450 | // Specialized Terminology 451 | 'Amortization', 452 | 'Cryptocurrency', 453 | 'Jurisprudence', 454 | 'Cartography', 455 | 'Meteorology', 456 | 'Gentrification', 457 | 'Immunology', 458 | 'Horticulture', 459 | 'Gerontology', 460 | 'Acoustics', 461 | 'Encryption', 462 | 'Astrophysics', 463 | 'Ornithology', 464 | 'Entomology', 465 | 'Viticulture', 466 | 'Numismatics', 467 | 'Philately', 468 | 'Calligraphy', 469 | 'Lexicography', 470 | 'Etymology', 471 | 472 | // Cultural & Historical References 473 | 'Renaissance', 474 | 'Industrial Revolution', 475 | 'Cold War', 476 | 'Enlightenment', 477 | 'Reformation', 478 | 'Colonialism', 479 | 'Globalization', 480 | 'Diaspora', 481 | 'Apartheid', 482 | 'Imperialism', 483 | 'Monarchy', 484 | 'Republic', 485 | 'Federation', 486 | 'Aristocracy', 487 | 'Oligarchy', 488 | 'Surrealism', 489 | 'Impressionism', 490 | 'Baroque', 491 | 'Neoclassicism', 492 | 'Romanticism', 493 | 494 | // Complex Activities & Processes 495 | 'Meditation', 496 | 'Negotiation', 497 | 'Diplomacy', 498 | 'Arbitration', 499 | 'Litigation', 500 | 'Legislation', 501 | 'Deliberation', 502 | 'Investigation', 503 | 'Implementation', 504 | 'Extrapolation', 505 | 'Procurement', 506 | 'Outsourcing', 507 | 'Diversification', 508 | 'Consolidation', 509 | 'Optimization', 510 | 'Orchestration', 511 | 'Choreography', 512 | 'Composition', 513 | 'Improvisation', 514 | 'Interpretation', 515 | ]; 516 | 517 | // Combined lists for random selection 518 | export const ALL_CATCH_PHRASE_WORDS = [ 519 | ...EASY_CATCH_PHRASE_WORDS, 520 | ...MEDIUM_CATCH_PHRASE_WORDS, 521 | ...HARD_CATCH_PHRASE_WORDS, 522 | ]; 523 | 524 | // Get a batch of random words (useful for starting a game with multiple words) 525 | export const getRandomCatchPhraseWords = (count: number = 30): string[] => { 526 | const wordList = [...ALL_CATCH_PHRASE_WORDS]; 527 | 528 | // Shuffle the array using Fisher-Yates algorithm 529 | for (let i = wordList.length - 1; i > 0; i--) { 530 | const j = Math.floor(Math.random() * (i + 1)); 531 | [wordList[i], wordList[j]] = [wordList[j], wordList[i]]; 532 | } 533 | 534 | return wordList.slice(0, count); 535 | }; 536 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Word Wrangler 2 | 3 | Word Wrangler is a voice-based word guessing game powered by [Pipecat](https://github.com/pipecat-ai/pipecat) and the [Gemini Live API](https://ai.google.dev/gemini-api/docs/live). The game is available in two versions: a web-based experience and a phone-based experience. Test your description skills in this AI-powered twist on classic word games! 4 | 5 | ## Game Modes 6 | 7 | ### Web-Based Game 8 | 9 | In this version, you provide the words, and an AI player attempts to guess them based on your descriptions. 10 | 11 | **Try it now:** https://word-wrangler.vercel.app 12 | 13 | 14 | 15 | ### Phone-Based Game 16 | 17 | In this three-way conversation, an AI host provides words, you describe them without saying the actual word, and an AI player tries to guess. The host tracks your score and manages game flow. 18 | 19 | **Try it now:** Call +1-929-**LLM-GAME** (+1-929-556-4263) 20 | 21 | ## Game Rules 22 | 23 | ### Web-based Game 24 | 25 | 1. The web app provides words for you to describe 26 | 2. You describe the word WITHOUT saying any part of it 27 | 3. The AI player tries to guess based on your description 28 | 4. The app will automatically check the guesses and keep score 29 | 5. Click "Skip" to advance to the next word 30 | 6. You have 60 seconds to score as many points as possible 31 | 32 | ### Phone Game 33 | 34 | 1. The AI host provides a word for you to describe 35 | 2. You describe the word WITHOUT saying any part of it 36 | 3. The AI player tries to guess based on your description 37 | 4. Score points for each correct guess 38 | 5. Use commands like "skip" to get a new word or "repeat" to hear the current word again 39 | 6. You have 120 seconds to score as many points as possible 40 | 41 | ## Architecture 42 | 43 | ### Web Game Architecture 44 | 45 | The web game uses a simple linear flow: 46 | 47 | 1. **Transport Input** - Receives audio from the web browser via a Daily WebRTC transport. 48 | 2. **RTVIProcessor** - RTVI is a standard for client/server communication in a voice AI context. This processor collects server-side information and makes it available to the client. Additionally, the client can send events to the server, which are handled through this processor. 49 | 3. **STTMuteFilter** - Filters out speech during specific conditions. In this game, the user's initial speech is "muted", ensuring that the bot can deliver the entire initial message without being interrupted. 50 | 4. **User Context Aggregator** - Aggregates user messages as part of the conversation context. 51 | 5. **LLM** - The LLM powers the AI player's interactions. 52 | 6. **Transport Output** - Sends audio back to the browser using the Daily WebRTC transport. 53 | 7. **Assistant Context Aggregator** - Aggregates assistant messages as part of the conversation context. 54 | 55 | ### Phone Game Architecture 56 | 57 | The phone game implements a three-way conversation using Pipecat's parallel pipeline architecture. This design addresses the fundamental challenge of LLMs - they're built for turn-based interactions, while this game requires real-time, multi-participant conversation management. 58 | 59 | 60 | 61 | #### Conversation Participants 62 | 63 | **Audio Flow Requirements:** 64 | 65 | - **User:** Must hear both the Host and Player outputs; must be heard by both Host and Player 66 | - **Host:** Must hear the User and Player inputs; its output must be heard by User but NOT by Player 67 | - **Player:** Must hear only the User inputs; its output must be heard by both User and Host 68 | 69 | #### Technical Implementation 70 | 71 | The parallel pipeline pattern allows us to create two isolated processing branches, with controlled audio flow between them: 72 | 73 | 1. **Transport Input** - Receives audio from the phone call (Twilio) 74 | 2. **Audio Branch Separation:** 75 | - **Left Branch (Host Pipeline):** `ConsumerProcessor → Host LLM → Game State Tracker → TTS → Bot Stop Detector` 76 | - **Right Branch (Player Pipeline):** `StartFrame Gate → Player LLM → ProducerProcessor` 77 | 78 | **Host LLM Configuration:** 79 | 80 | The Host uses Gemini Live API, configured with specific response patterns to handle different input types: 81 | 82 | ``` 83 | - Correct guess: "Correct! That's [N] points. Your next word is [new word]" 84 | - Incorrect guess: "NO" (filtered out by TTS filter) 85 | - User descriptions: "IGNORE" (filtered out by TTS filter) 86 | - Skip requests: "The new word is [new word]" 87 | - Repeat requests: "Your word is [current word]" 88 | ``` 89 | 90 | **Audio Flow Management:** 91 | 92 | By default, all input audio flows to both branches, so both LLMs hear the user. To implement the complex routing: 93 | 94 | 1. **Producer/Consumer Pattern:** Captures the Player's output audio and feeds it to the Host 95 | 96 | - `ProducerProcessor` filters TTSAudioRawFrames from the Player 97 | - Transforms them from 24kHz to 16kHz (required by Gemini Live) 98 | - Passes them to the `ConsumerProcessor` at the top of the Host branch 99 | 100 | 2. **Text Filtering:** The `HostResponseTextFilter` intercepts the "NO" and "IGNORE" responses 101 | 102 | - Prevents TTS vocalization of these responses 103 | - Ensures that only meaningful Host responses are spoken 104 | 105 | 3. **Host-Player Synchronization:** 106 | 107 | - `BotStoppedSpeakingNotifier` detects when the Host finishes speaking 108 | - `GameStateTracker` parses the streamed text to detect new words and track score 109 | - `NewWordNotifier` triggers the `ResettablePlayerLLM` to disconnect and reconnect when a new word is presented 110 | - This reset ensures the Player has no context of previous words or guesses 111 | 112 | 4. **StartFrameGate:** The gate holds the Player's StartFrame until the Host has completed its introduction 113 | - Ensures the Player doesn't start interacting until the game has been properly set up 114 | 115 | All processed audio is collected at the end of the Parallel Pipeline and sent via the transport output back to Twilio. 116 | 117 | #### Game State Management 118 | 119 | The implementation tracks: 120 | 121 | - Current words being guessed 122 | - Running score (points for correct guesses) 123 | - Game duration with automatic timeout 124 | 125 | This architecture enables complex interaction patterns that would be difficult to achieve with traditional turn-based conversation models, allowing each AI participant to function effectively in their specific game role. 126 | 127 | ## Run Locally 128 | 129 | ### Web Game 130 | 131 | #### Run the Server 132 | 133 | 1. Switch to the server directory: 134 | 135 | ```bash 136 | cd server 137 | ``` 138 | 139 | 2. Set up and activate your virtual environment: 140 | 141 | ```bash 142 | python3 -m venv venv 143 | source venv/bin/activate # On Windows: venv\Scripts\activate 144 | ``` 145 | 146 | 3. Install dependencies: 147 | 148 | ```bash 149 | pip install -r requirements.txt 150 | ``` 151 | 152 | 4. Create an .env file and add your API keys: 153 | 154 | ```bash 155 | cp env.example .env 156 | ``` 157 | 158 | 5. Add environment variables for: 159 | 160 | ``` 161 | DAILY_API_KEY= 162 | DAILY_SAMPLE_ROOM_URL= 163 | GOOGLE_API_KEY= 164 | ``` 165 | 166 | 6. Run the server: 167 | 168 | ```bash 169 | LOCAL_RUN=1 python server.py 170 | ``` 171 | 172 | #### Run the Client 173 | 174 | 1. In a new terminal window, navigate to client: 175 | 176 | ```bash 177 | cd client 178 | ``` 179 | 180 | 2. Install dependencies: 181 | 182 | ```bash 183 | npm install 184 | ``` 185 | 186 | 3. Create an .env.local file: 187 | 188 | ```bash 189 | cp env.example .env.local 190 | ``` 191 | 192 | 4. In .env.local: 193 | 194 | - `NEXT_PUBLIC_API_BASE_URL=http://localhost:7860` is used for local development. For deployments, either remove this env var or replace with `/api`. 195 | - `AGENT_NAME` should be set to the name of your deployed Pipecat agent (e.g., "word-wrangler"). 196 | - `PIPECAT_CLOUD_API_KEY` is used only for deployments to Pipecat Cloud. 197 | 198 | 5. Run the app: 199 | 200 | ```bash 201 | npm run dev 202 | ``` 203 | 204 | 6. Open http://localhost:3000 in your browser 205 | 206 | ### Phone Game 207 | 208 | There are two versions of the phone game: 209 | 210 | 1. **Local Development** (`bot_phone_local.py`): 211 | 212 | - For testing locally before deployment 213 | 214 | 2. **Deployment** (`bot_phone_twilio.py`): 215 | - Ready for deployment to Pipecat Cloud 216 | 217 | #### Running Locally 218 | 219 | 1. Set up and activate your virtual environment: 220 | 221 | ```bash 222 | python3 -m venv venv 223 | source venv/bin/activate # On Windows: venv\Scripts\activate 224 | ``` 225 | 226 | 2. Install dependencies: 227 | 228 | ```bash 229 | pip install -r requirements.txt 230 | ``` 231 | 232 | 3. Create an .env file in the server directory with your API keys: 233 | 234 | ```bash 235 | cd server 236 | cp env.example .env 237 | ``` 238 | 239 | 4. Configure Daily information in your .env: 240 | 241 | ``` 242 | DAILY_API_KEY=your_daily_api_key 243 | DAILY_SAMPLE_ROOM_URL=your_daily_room_url 244 | GOOGLE_API_KEY=your_google_api_key 245 | GOOGLE_TEST_CREDENTIALS_FILE=path_to_credentials_file 246 | ``` 247 | 248 | 5. Run the local bot: 249 | 250 | ```bash 251 | LOCAL_RUN=1 python bot_phone_local.py 252 | ``` 253 | 254 | ## Deployment 255 | 256 | ### Web Game 257 | 258 | #### Deploy your Server 259 | 260 | You can deploy your server code using Pipecat Cloud. For a full walkthrough, start with the [Pipecat Cloud Quickstart](https://docs.pipecat.daily.co/quickstart). 261 | 262 | Here are the steps you'll need to complete: 263 | 264 | - Build, tag, and push your Docker image to a registry. 265 | - Create Pipecat Cloud secrets using the CLI or dashboard. For this agent, you only need a `GOOGLE_API_KEY`. Your `DAILY_API_KEY` is automatically applied. 266 | - Deploy your agent image. You can use a pcc-deploy.toml file to make deploying easier. For example: 267 | 268 | ```toml 269 | agent_name = "word-wrangler" 270 | image = "your-dockerhub-name/word-wrangler:0.1" 271 | secret_set = "word-wrangler-secrets" 272 | enable_krisp = true 273 | 274 | [scaling] 275 | min_instances = 1 276 | max_instances = 5 277 | ``` 278 | 279 | Then, you can deploy with the CLI using `pcc deploy`. 280 | 281 | - Finally, confirm that your agent is deployed. You'll get feedback in the terminal. 282 | 283 | #### Deploy your Client 284 | 285 | This project uses TypeScript, React, and Next.js, making it a perfect fit for [Vercel](https://vercel.com/). 286 | 287 | - In your client directory, install Vercel's CLI tool: `npm install -g vercel` 288 | - Verify it's installed using `vercel --version` 289 | - Log in your Vercel account using `vercel login` 290 | - Deploy your client to Vercel using `vercel` 291 | 292 | ### Phone Game 293 | 294 | #### Deploy your Server 295 | 296 | Again, we'll use Pipecat Cloud. Follow the steps from above. The only difference will be the secrets required; in addition to a GOOGLE_API_KEY, you'll need `GOOGLE_APPLICATION_CREDENTIALS` in the format of a .json file with your [Google Cloud service account](https://console.cloud.google.com/iam-admin/serviceaccounts) information. 297 | 298 | You'll need to modify the Dockerfile so that the credentials.json and word_list.py are accessible. This Dockerfile will work: 299 | 300 | ```Dockerfile 301 | FROM dailyco/pipecat-base:latest 302 | 303 | COPY ./requirements.txt requirements.txt 304 | 305 | RUN pip install --no-cache-dir --upgrade -r requirements.txt 306 | 307 | COPY ./word_list.py word_list.py 308 | COPY ./credentials.json credentials.json 309 | COPY ./bot_phone_twilio.py bot.py 310 | ``` 311 | 312 | Note: Your `credentials.json` file should have your Google service account credentials. 313 | 314 | #### Buy and Configure a Twilio Number 315 | 316 | Check out the [Twilio Websocket Telephony guide](https://docs.pipecat.daily.co/pipecat-in-production/telephony/twilio-mediastreams) for a step-by-step walkthrough on how to purchase a phone number, configure your TwiML, and make or receive calls. 317 | 318 | ## Tech stack 319 | 320 | Both games are built using: 321 | 322 | - [Pipecat](https://www.pipecat.ai/) framework for real-time voice conversation 323 | - Google's Gemini Live API 324 | - Real-time communication (Web via Daily, Phone via Twilio) 325 | 326 | The phone game features: 327 | 328 | - Parallel processing of host and player interactions 329 | - State tracking for game progress and scoring 330 | - Dynamic word selection from multiple categories 331 | - Automated game timing and scoring 332 | -------------------------------------------------------------------------------- /client/src/components/Game/WordWrangler.module.css: -------------------------------------------------------------------------------- 1 | .gameContainer { 2 | position: relative; 3 | z-index: 1; 4 | padding: 4px; 5 | width: 100%; 6 | border-radius: 28px; 7 | margin-top: 50px; 8 | min-height: 300px; 9 | box-shadow: 0px 66px 26px rgba(0, 0, 0, 0.01), 10 | 0px 37px 22px rgba(0, 0, 0, 0.05), 0px 16px 16px rgba(0, 0, 0, 0.09), 11 | 0px 4px 9px rgba(0, 0, 0, 0.1); 12 | } 13 | 14 | @media (min-width: 1024px) { 15 | .gameContainer { 16 | width: auto; 17 | flex: none; 18 | min-width: 626px; 19 | height: 260px; 20 | margin-top: 0; 21 | } 22 | } 23 | 24 | .gameContainer:before { 25 | content: ""; 26 | position: absolute; 27 | inset: -4px -4px -8px -4px; 28 | border-radius: 28px; 29 | background: linear-gradient( 30 | to bottom, 31 | rgba(0, 0, 0, 1) 0%, 32 | rgba(0, 0, 0, 0.15) 100% 33 | ); 34 | z-index: -1; 35 | } 36 | 37 | .gameContainer:after { 38 | content: ""; 39 | box-sizing: border-box; 40 | position: absolute; 41 | inset: 0; 42 | border-radius: var(--border-radius-card); 43 | border: var(--border-width-card) solid transparent; 44 | background-image: linear-gradient(#001146, #0655cc), 45 | linear-gradient( 46 | 180deg, 47 | var(--theme-gradient-start) 0%, 48 | var(--theme-gradient-end) 100% 49 | ); 50 | background-origin: border-box; 51 | background-clip: padding-box, border-box; 52 | } 53 | 54 | .gameContent { 55 | position: relative; 56 | z-index: 1; 57 | background: transparent; 58 | border-radius: 20px; 59 | width: 100%; 60 | height: 100%; 61 | min-height: 292px; 62 | display: flex; 63 | overflow: hidden; 64 | border: 6px solid rgba(0, 0, 0, 0.25); 65 | } 66 | 67 | .gameContent:after { 68 | content: ""; 69 | position: absolute; 70 | inset: 0; 71 | background: radial-gradient( 72 | 70% 40% at 50% 40%, 73 | #2da6ee 0%, 74 | rgba(45, 166, 238, 0) 100% 75 | ); 76 | opacity: 0.76; 77 | z-index: -1; 78 | } 79 | 80 | .gameArea { 81 | display: flex; 82 | flex-direction: column; 83 | align-items: center; 84 | flex: 1; 85 | padding: 12px; 86 | position: relative; 87 | z-index: 2; 88 | } 89 | 90 | .timer { 91 | height: var(--button-height); 92 | border-radius: 9999px; 93 | width: 100%; 94 | flex-direction: row; 95 | gap: 12px; 96 | display: flex; 97 | align-items: center; 98 | justify-content: center; 99 | background-color: rgba(0, 0, 0, 0.2); 100 | padding: 12px; 101 | 102 | @media (min-width: 1024px) { 103 | flex: 1; 104 | } 105 | 106 | .timerBadge { 107 | display: flex; 108 | flex-direction: row; 109 | align-items: center; 110 | gap: 6px; 111 | background-color: black; 112 | border-radius: 9999px; 113 | color: white; 114 | height: 100%; 115 | padding: 0 12px; 116 | font-weight: 800; 117 | } 118 | 119 | .timerBar { 120 | height: 100%; 121 | width: 100%; 122 | border-radius: 9999px; 123 | overflow: hidden; 124 | background-color: var(--color-emerald-100); 125 | } 126 | 127 | .timerBarFill { 128 | height: 100%; 129 | width: 100%; 130 | background-color: var(--color-emerald-400); 131 | transition: width 0.3s ease; 132 | } 133 | 134 | &.lowTime { 135 | color: #e74c3c; 136 | animation: pulse 1s infinite; 137 | 138 | .timerBar { 139 | background-color: var(--color-orange-100); 140 | } 141 | 142 | .timerBarFill { 143 | background-color: var(--color-orange-400); 144 | } 145 | } 146 | } 147 | 148 | .scoreDisplay { 149 | font-size: 1.25rem; 150 | font-weight: 500; 151 | color: #0071e3; 152 | } 153 | 154 | .currentWord { 155 | display: flex; 156 | flex: 1; 157 | flex-direction: column; 158 | align-items: center; 159 | justify-content: center; 160 | text-align: center; 161 | width: 100%; 162 | margin-top: 50px; 163 | .helpText { 164 | font-size: 1rem; 165 | font-weight: 700; 166 | color: rgba(255, 255, 255, 0.5); 167 | } 168 | 169 | .word { 170 | font-size: 2rem; 171 | font-weight: 800; 172 | letter-spacing: 0.05em; 173 | line-height: 2; 174 | color: #ffffff; 175 | text-shadow: 0px 4px 0px rgba(0, 0, 0, 0.45); 176 | } 177 | 178 | @media (min-width: 1024px) { 179 | margin-top: 0; 180 | .word { 181 | font-size: 3rem; 182 | text-shadow: 0px 6px 0px rgba(0, 0, 0, 0.45); 183 | } 184 | } 185 | } 186 | 187 | .gameButton { 188 | padding: 0.85rem 0; 189 | font-size: 1.1rem; 190 | font-weight: 500; 191 | border: none; 192 | border-radius: 8px; 193 | cursor: pointer; 194 | transition: all 0.2s ease; 195 | } 196 | 197 | /* Primary button (Skip) */ 198 | .skipButton { 199 | flex: 2; /* Takes more space */ 200 | background-color: #e74c3c; 201 | color: white; 202 | } 203 | 204 | .skipButton:hover { 205 | background-color: #c0392b; 206 | transform: translateY(-2px); 207 | } 208 | 209 | /* Secondary button (Correct) - more subdued */ 210 | .correctButton { 211 | flex: 1; /* Takes less space */ 212 | background-color: #f5f5f7; /* Light gray background */ 213 | color: #333; /* Dark text */ 214 | border: 1px solid #ddd; /* Subtle border */ 215 | } 216 | 217 | .correctButton:hover { 218 | background-color: #e8e8ed; 219 | transform: translateY(-1px); 220 | } 221 | 222 | .gameReadyArea { 223 | display: flex; 224 | flex-direction: column; 225 | align-items: center; 226 | } 227 | 228 | .gameResults { 229 | margin-bottom: 1rem; 230 | padding: 0.75rem; 231 | background-color: #f8f9fa; 232 | border-radius: 8px; 233 | width: 100%; 234 | text-align: center; 235 | } 236 | 237 | .gameResults h2 { 238 | margin: 0 0 0.5rem 0; 239 | color: #333; 240 | font-size: 1.3rem; 241 | } 242 | 243 | .statusNote { 244 | margin: 0.5rem 0; 245 | padding: 0.6rem 1rem; 246 | background-color: #f8f9fa; 247 | border-left: 3px solid #0071e3; 248 | font-size: 0.95rem; 249 | color: #333; 250 | width: 100%; 251 | text-align: center; 252 | border-radius: 4px; 253 | } 254 | 255 | .compactInstructions { 256 | margin: 0.75rem 0; 257 | width: 100%; 258 | max-width: 400px; 259 | background-color: #f8f9fa; 260 | border-radius: 8px; 261 | padding: 0.75rem 1rem; 262 | } 263 | 264 | .compactInstructions h3 { 265 | margin: 0 0 0.5rem 0; 266 | color: #333; 267 | font-size: 1.1rem; 268 | text-align: center; 269 | } 270 | 271 | .compactInstructions ul { 272 | margin: 0; 273 | padding-left: 1.5rem; 274 | line-height: 1.4; 275 | } 276 | 277 | .compactInstructions li { 278 | margin-bottom: 0.4rem; 279 | font-size: 0.9rem; 280 | } 281 | 282 | .loadingDots { 283 | display: inline-block; 284 | animation: dotPulse 1.5s infinite linear; 285 | } 286 | 287 | @keyframes dotPulse { 288 | 0% { 289 | opacity: 0.2; 290 | } 291 | 20% { 292 | opacity: 1; 293 | } 294 | 100% { 295 | opacity: 0.2; 296 | } 297 | } 298 | 299 | @keyframes pulse { 300 | 0% { 301 | opacity: 0.8; 302 | } 303 | 50% { 304 | opacity: 1; 305 | } 306 | 100% { 307 | opacity: 0.8; 308 | } 309 | } 310 | 311 | /* Animation styles */ 312 | .correctWordDetected { 313 | animation: correctPulse 1.5s ease-in-out; 314 | position: relative; 315 | } 316 | 317 | .autoDetectedOverlay { 318 | position: absolute; 319 | top: 0; 320 | left: 0; 321 | right: 0; 322 | bottom: 0; 323 | display: flex; 324 | justify-content: center; 325 | align-items: center; 326 | background-color: rgba(46, 204, 113, 0.6); 327 | border-radius: 8px; 328 | animation: fadeIn 0.3s ease-in-out; 329 | z-index: 10; 330 | } 331 | 332 | .checkmarkContainer { 333 | width: 80px; 334 | height: 80px; 335 | animation: scaleUp 0.4s ease-out; 336 | } 337 | 338 | .checkmarkSvg { 339 | width: 100%; 340 | height: 100%; 341 | border-radius: 50%; 342 | display: block; 343 | stroke-width: 4; 344 | stroke: #fff; 345 | stroke-miterlimit: 10; 346 | box-shadow: 0 0 0 rgba(46, 204, 113, 0.7); 347 | animation: fillCheck 0.3s ease-in-out 0.3s forwards, 348 | scale 0.2s ease-in-out 0.7s both; 349 | } 350 | 351 | .checkmarkCircle { 352 | stroke-dasharray: 166; 353 | stroke-dashoffset: 166; 354 | stroke-width: 4; 355 | stroke-miterlimit: 10; 356 | stroke: #fff; 357 | fill: transparent; 358 | animation: strokeCheck 0.5s cubic-bezier(0.65, 0, 0.45, 1) forwards; 359 | } 360 | 361 | .checkmarkCheck { 362 | transform-origin: 50% 50%; 363 | stroke-dasharray: 48; 364 | stroke-dashoffset: 48; 365 | animation: strokeCheck 0.25s cubic-bezier(0.65, 0, 0.45, 1) 0.6s forwards; 366 | } 367 | 368 | @keyframes strokeCheck { 369 | 100% { 370 | stroke-dashoffset: 0; 371 | } 372 | } 373 | 374 | @keyframes fillCheck { 375 | 100% { 376 | box-shadow: inset 0 0 0 50px transparent; 377 | } 378 | } 379 | 380 | @keyframes correctPulse { 381 | 0% { 382 | box-shadow: 0 0 0 0 rgba(46, 204, 113, 0.7); 383 | } 384 | 50% { 385 | box-shadow: 0 0 0 15px rgba(46, 204, 113, 0); 386 | } 387 | 100% { 388 | box-shadow: 0 0 0 0 rgba(46, 204, 113, 0); 389 | } 390 | } 391 | 392 | @keyframes fadeIn { 393 | from { 394 | opacity: 0; 395 | } 396 | to { 397 | opacity: 1; 398 | } 399 | } 400 | 401 | @keyframes scaleUp { 402 | from { 403 | transform: scale(0.5); 404 | opacity: 0; 405 | } 406 | to { 407 | transform: scale(1); 408 | opacity: 1; 409 | } 410 | } 411 | 412 | .incorrectWordDetected { 413 | animation: incorrectPulse 1.5s ease-in-out, 414 | shake 0.5s cubic-bezier(0.36, 0.07, 0.19, 0.97) both; 415 | position: relative; 416 | } 417 | 418 | .incorrectOverlay { 419 | position: absolute; 420 | top: 0; 421 | left: 0; 422 | right: 0; 423 | bottom: 0; 424 | display: flex; 425 | justify-content: center; 426 | align-items: center; 427 | background-color: rgba(255, 59, 48, 0.6); /* Red with transparency */ 428 | border-radius: 8px; 429 | animation: fadeIn 0.3s ease-in-out; 430 | z-index: 10; 431 | } 432 | 433 | .xmarkContainer { 434 | width: 80px; 435 | height: 80px; 436 | animation: scaleUp 0.4s ease-out; 437 | } 438 | 439 | .xmarkSvg { 440 | width: 100%; 441 | height: 100%; 442 | border-radius: 50%; 443 | display: block; 444 | stroke-width: 4; 445 | stroke: #fff; 446 | stroke-miterlimit: 10; 447 | box-shadow: 0 0 0 rgba(255, 59, 48, 0.7); 448 | animation: fillX 0.3s ease-in-out 0.3s forwards, 449 | scale 0.2s ease-in-out 0.7s both; 450 | } 451 | 452 | .xmarkCircle { 453 | stroke-dasharray: 166; 454 | stroke-dashoffset: 166; 455 | stroke-width: 4; 456 | stroke-miterlimit: 10; 457 | stroke: #fff; 458 | fill: transparent; 459 | animation: strokeX 0.5s cubic-bezier(0.65, 0, 0.45, 1) forwards; 460 | } 461 | 462 | .xmarkX { 463 | transform-origin: 50% 50%; 464 | stroke-dasharray: 48; 465 | stroke-dashoffset: 48; 466 | animation: strokeX 0.25s cubic-bezier(0.65, 0, 0.45, 1) 0.6s forwards; 467 | } 468 | 469 | @keyframes strokeX { 470 | 100% { 471 | stroke-dashoffset: 0; 472 | } 473 | } 474 | 475 | @keyframes fillX { 476 | 100% { 477 | box-shadow: inset 0 0 0 50px transparent; 478 | } 479 | } 480 | 481 | @keyframes incorrectPulse { 482 | 0% { 483 | box-shadow: 0 0 0 0 rgba(255, 59, 48, 0.7); 484 | } 485 | 50% { 486 | box-shadow: 0 0 0 15px rgba(255, 59, 48, 0); 487 | } 488 | 100% { 489 | box-shadow: 0 0 0 0 rgba(255, 59, 48, 0); 490 | } 491 | } 492 | 493 | @keyframes scale { 494 | 0%, 495 | 100% { 496 | transform: none; 497 | } 498 | 50% { 499 | transform: scale3d(1.1, 1.1, 1); 500 | } 501 | } 502 | 503 | @keyframes shake { 504 | 10%, 505 | 90% { 506 | transform: translate3d(-1px, 0, 0); 507 | } 508 | 20%, 509 | 80% { 510 | transform: translate3d(2px, 0, 0); 511 | } 512 | 30%, 513 | 50%, 514 | 70% { 515 | transform: translate3d(-3px, 0, 0); 516 | } 517 | 40%, 518 | 60% { 519 | transform: translate3d(3px, 0, 0); 520 | } 521 | } 522 | 523 | /* Game loading UI styles */ 524 | .gameLoadingContainer { 525 | display: flex; 526 | justify-content: center; 527 | align-items: center; 528 | height: 250px; /* Fixed height to prevent layout shifts */ 529 | width: 100%; 530 | } 531 | 532 | .gameLoadingContent { 533 | display: flex; 534 | flex-direction: column; 535 | align-items: center; 536 | justify-content: center; 537 | gap: 1.5rem; 538 | text-align: center; 539 | } 540 | 541 | .gameLoadingIcon { 542 | position: relative; 543 | width: 60px; 544 | height: 60px; 545 | display: flex; 546 | justify-content: center; 547 | align-items: center; 548 | } 549 | 550 | .pulseDot { 551 | width: 16px; 552 | height: 16px; 553 | background-color: #0071e3; 554 | border-radius: 50%; 555 | position: relative; 556 | } 557 | 558 | .pulseDot:before { 559 | content: ""; 560 | position: absolute; 561 | width: 100%; 562 | height: 100%; 563 | border-radius: 50%; 564 | background-color: #0071e3; 565 | opacity: 0.7; 566 | animation: pulse-wave 1.5s linear infinite; 567 | } 568 | 569 | .gameLoadingTitle { 570 | font-size: 1.5rem; 571 | font-weight: 500; 572 | color: #0071e3; 573 | margin: 0; 574 | } 575 | 576 | @keyframes pulse-wave { 577 | 0% { 578 | transform: scale(1); 579 | opacity: 0.7; 580 | } 581 | 50% { 582 | transform: scale(2.5); 583 | opacity: 0; 584 | } 585 | 100% { 586 | transform: scale(1); 587 | opacity: 0; 588 | } 589 | } 590 | -------------------------------------------------------------------------------- /server/word_list.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | # Define categories and words for the Word Wrangler game 4 | WORD_CATEGORIES = { 5 | "animals": [ 6 | "elephant", 7 | "penguin", 8 | "giraffe", 9 | "dolphin", 10 | "kangaroo", 11 | "octopus", 12 | "panda", 13 | "tiger", 14 | "koala", 15 | "flamingo", 16 | "hedgehog", 17 | "turtle", 18 | "zebra", 19 | "eagle", 20 | "sloth", 21 | "raccoon", 22 | "chameleon", 23 | "squirrel", 24 | "hamster", 25 | "cheetah", 26 | "platypus", 27 | "jellyfish", 28 | "parrot", 29 | "wolf", 30 | "hippo", 31 | "porcupine", 32 | "ostrich", 33 | "peacock", 34 | "alligator", 35 | "gorilla", 36 | "armadillo", 37 | "chipmunk", 38 | "walrus", 39 | "weasel", 40 | "skunk", 41 | "llama", 42 | "badger", 43 | "mongoose", 44 | "lemur", 45 | "otter", 46 | "bison", 47 | "falcon", 48 | "meerkat", 49 | "pelican", 50 | "cobra", 51 | "salamander", 52 | "lobster", 53 | "seal", 54 | "narwhal", 55 | "iguana", 56 | "piranha", 57 | "toucan", 58 | "moose", 59 | "lynx", 60 | "stingray", 61 | "starfish", 62 | "beaver", 63 | "vulture", 64 | "antelope", 65 | "jaguar", 66 | "seahorse", 67 | ], 68 | "food": [ 69 | "pizza", 70 | "sushi", 71 | "burrito", 72 | "pancake", 73 | "donut", 74 | "lasagna", 75 | "popcorn", 76 | "chocolate", 77 | "mango", 78 | "pretzel", 79 | "taco", 80 | "waffle", 81 | "cupcake", 82 | "avocado", 83 | "cookie", 84 | "croissant", 85 | "omelette", 86 | "cheesecake", 87 | "dumpling", 88 | "hummus", 89 | "gelato", 90 | "risotto", 91 | "ramen", 92 | "salsa", 93 | "kebab", 94 | "brownie", 95 | "guacamole", 96 | "bagel", 97 | "falafel", 98 | "biscuit", 99 | "churro", 100 | "meatball", 101 | "tiramisu", 102 | "enchilada", 103 | "couscous", 104 | "gumbo", 105 | "jambalaya", 106 | "baklava", 107 | "popsicle", 108 | "cannoli", 109 | "tofu", 110 | "macaron", 111 | "empanada", 112 | "pho", 113 | "casserole", 114 | "porridge", 115 | "granola", 116 | "fritter", 117 | "hazelnut", 118 | "kiwi", 119 | "pomegranate", 120 | "artichoke", 121 | "edamame", 122 | "zucchini", 123 | "cashew", 124 | "brisket", 125 | "custard", 126 | "nutmeg", 127 | "ginger", 128 | ], 129 | "household": [ 130 | "chair", 131 | "pillow", 132 | "mirror", 133 | "blanket", 134 | "lamp", 135 | "curtain", 136 | "sofa", 137 | "refrigerator", 138 | "blender", 139 | "bookshelf", 140 | "dishwasher", 141 | "carpet", 142 | "microwave", 143 | "table", 144 | "clock", 145 | "vase", 146 | "ottoman", 147 | "candle", 148 | "drawer", 149 | "cabinet", 150 | "doorknob", 151 | "silverware", 152 | "bathtub", 153 | "plunger", 154 | "toaster", 155 | "kettle", 156 | "spatula", 157 | "doormat", 158 | "hanger", 159 | "blinds", 160 | "ladle", 161 | "platter", 162 | "coaster", 163 | "napkin", 164 | "sponge", 165 | "thermostat", 166 | "showerhead", 167 | "coatrack", 168 | "nightstand", 169 | "cushion", 170 | "windowsill", 171 | "bedsheet", 172 | "countertop", 173 | "dustpan", 174 | "footstool", 175 | "flowerpot", 176 | "trashcan", 177 | "colander", 178 | "detergent", 179 | "chandelier", 180 | "laundry", 181 | "vacuum", 182 | "teapot", 183 | "duster", 184 | "lightbulb", 185 | "corkscrew", 186 | "paperweight", 187 | "doorstop", 188 | "radiator", 189 | ], 190 | "activities": [ 191 | "swimming", 192 | "painting", 193 | "dancing", 194 | "gardening", 195 | "skiing", 196 | "cooking", 197 | "hiking", 198 | "reading", 199 | "yoga", 200 | "fishing", 201 | "jogging", 202 | "biking", 203 | "baking", 204 | "singing", 205 | "camping", 206 | "knitting", 207 | "surfing", 208 | "photography", 209 | "bowling", 210 | "archery", 211 | "horseback", 212 | "meditation", 213 | "gymnastics", 214 | "volleyball", 215 | "tennis", 216 | "skating", 217 | "kayaking", 218 | "climbing", 219 | "juggling", 220 | "rowing", 221 | "snorkeling", 222 | "embroidery", 223 | "canoeing", 224 | "paddleboarding", 225 | "pottery", 226 | "birdwatching", 227 | "karaoke", 228 | "sailing", 229 | "pilates", 230 | "calligraphy", 231 | "skateboarding", 232 | "crossword", 233 | "origami", 234 | "beekeeping", 235 | "stargazing", 236 | "snowboarding", 237 | "woodworking", 238 | "fencing", 239 | "quilting", 240 | "foraging", 241 | "geocaching", 242 | "scrapbooking", 243 | "welding", 244 | "glassblowing", 245 | "whittling", 246 | "ziplining", 247 | ], 248 | "places": [ 249 | "beach", 250 | "library", 251 | "mountain", 252 | "airport", 253 | "stadium", 254 | "museum", 255 | "hospital", 256 | "castle", 257 | "garden", 258 | "hotel", 259 | "island", 260 | "desert", 261 | "university", 262 | "restaurant", 263 | "forest", 264 | "aquarium", 265 | "theater", 266 | "canyon", 267 | "lighthouse", 268 | "waterfall", 269 | "vineyard", 270 | "cathedral", 271 | "rainforest", 272 | "farmhouse", 273 | "greenhouse", 274 | "observatory", 275 | "marketplace", 276 | "boardwalk", 277 | "temple", 278 | "courtyard", 279 | "plantation", 280 | "lagoon", 281 | "volcano", 282 | "meadow", 283 | "oasis", 284 | "grotto", 285 | "peninsula", 286 | "aviary", 287 | "chapel", 288 | "coliseum", 289 | "bazaar", 290 | "marina", 291 | "orchard", 292 | "brewery", 293 | "sanctuary", 294 | "fortress", 295 | "prairie", 296 | "reservation", 297 | "tavern", 298 | "monument", 299 | "manor", 300 | "pavilion", 301 | "boulevard", 302 | "campground", 303 | ], 304 | "objects": [ 305 | "umbrella", 306 | "scissors", 307 | "camera", 308 | "wallet", 309 | "bicycle", 310 | "backpack", 311 | "telescope", 312 | "balloon", 313 | "compass", 314 | "notebook", 315 | "keyboard", 316 | "magnet", 317 | "headphones", 318 | "hammer", 319 | "envelope", 320 | "binoculars", 321 | "tambourine", 322 | "boomerang", 323 | "megaphone", 324 | "suitcase", 325 | "pinwheel", 326 | "kaleidoscope", 327 | "microscope", 328 | "hourglass", 329 | "harmonica", 330 | "trampoline", 331 | "bubblegum", 332 | "xylophone", 333 | "typewriter", 334 | "screwdriver", 335 | "whistle", 336 | "chessboard", 337 | "handcuffs", 338 | "stethoscope", 339 | "stopwatch", 340 | "parachute", 341 | "blowtorch", 342 | "calculator", 343 | "thermometer", 344 | "mousetrap", 345 | "crowbar", 346 | "paintbrush", 347 | "metronome", 348 | "surfboard", 349 | "flipchart", 350 | "dartboard", 351 | "wrench", 352 | "flippers", 353 | "thimble", 354 | "protractor", 355 | "snorkel", 356 | "doorbell", 357 | "flashlight", 358 | "pendulum", 359 | "abacus", 360 | ], 361 | "jobs": [ 362 | "teacher", 363 | "doctor", 364 | "chef", 365 | "firefighter", 366 | "pilot", 367 | "astronaut", 368 | "carpenter", 369 | "musician", 370 | "detective", 371 | "scientist", 372 | "farmer", 373 | "architect", 374 | "journalist", 375 | "electrician", 376 | "dentist", 377 | "veterinarian", 378 | "librarian", 379 | "photographer", 380 | "mechanic", 381 | "attorney", 382 | "barista", 383 | "plumber", 384 | "bartender", 385 | "surgeon", 386 | "therapist", 387 | "animator", 388 | "programmer", 389 | "pharmacist", 390 | "translator", 391 | "accountant", 392 | "florist", 393 | "butcher", 394 | "lifeguard", 395 | "beekeeper", 396 | "locksmith", 397 | "choreographer", 398 | "mortician", 399 | "paramedic", 400 | "blacksmith", 401 | "surveyor", 402 | "botanist", 403 | "chiropractor", 404 | "undertaker", 405 | "acrobat", 406 | "welder", 407 | "hypnotist", 408 | "zoologist", 409 | "mime", 410 | "sommelier", 411 | "meteorologist", 412 | "stuntman", 413 | "diplomat", 414 | "entomologist", 415 | "puppeteer", 416 | "archivist", 417 | "cartographer", 418 | "paleontologist", 419 | ], 420 | "transportation": [ 421 | "helicopter", 422 | "submarine", 423 | "scooter", 424 | "sailboat", 425 | "train", 426 | "motorcycle", 427 | "airplane", 428 | "canoe", 429 | "tractor", 430 | "limousine", 431 | "escalator", 432 | "skateboard", 433 | "ambulance", 434 | "ferry", 435 | "rocket", 436 | "hovercraft", 437 | "gondola", 438 | "segway", 439 | "zeppelin", 440 | "bulldozer", 441 | "speedboat", 442 | "unicycle", 443 | "monorail", 444 | "snowmobile", 445 | "paddleboat", 446 | "trolley", 447 | "rickshaw", 448 | "caboose", 449 | "glider", 450 | "bobsled", 451 | "jetpack", 452 | "forklift", 453 | "dirigible", 454 | "chariot", 455 | "sidecar", 456 | "tandem", 457 | "battleship", 458 | "catamaran", 459 | "toboggan", 460 | "dinghy", 461 | "hydrofoil", 462 | "sleigh", 463 | "hatchback", 464 | "kayak", 465 | "stagecoach", 466 | "tugboat", 467 | "airship", 468 | "skiff", 469 | "carriage", 470 | "rowboat", 471 | "chairlift", 472 | "steamroller", 473 | ], 474 | "clothing": [ 475 | "sweater", 476 | "sandals", 477 | "tuxedo", 478 | "poncho", 479 | "sneakers", 480 | "bikini", 481 | "cardigan", 482 | "overalls", 483 | "kimono", 484 | "mittens", 485 | "suspenders", 486 | "kilt", 487 | "leggings", 488 | "apron", 489 | "bowtie", 490 | "earmuffs", 491 | "fedora", 492 | "wetsuit", 493 | "pajamas", 494 | "sombrero", 495 | "raincoat", 496 | "beret", 497 | "turtleneck", 498 | "parka", 499 | "tiara", 500 | "toga", 501 | "bandana", 502 | "corset", 503 | "sarong", 504 | "tunic", 505 | "visor", 506 | "ascot", 507 | "fez", 508 | "moccasins", 509 | "blazer", 510 | "chaps", 511 | "romper", 512 | "waders", 513 | "clogs", 514 | "garter", 515 | "camisole", 516 | "galoshes", 517 | "bolero", 518 | "spats", 519 | "pantyhose", 520 | "onesie", 521 | "stiletto", 522 | "vest", 523 | "windbreaker", 524 | "scarf", 525 | "bonnet", 526 | ], 527 | "nature": [ 528 | "glacier", 529 | "sequoia", 530 | "geyser", 531 | "avalanche", 532 | "tornado", 533 | "quicksand", 534 | "stalactite", 535 | "hurricane", 536 | "asteroid", 537 | "tundra", 538 | "galaxy", 539 | "nebula", 540 | "earthquake", 541 | "stalagmite", 542 | "constellation", 543 | "crystal", 544 | "tributary", 545 | "abyss", 546 | "monsoon", 547 | "magma", 548 | "erosion", 549 | "iceberg", 550 | "mudslide", 551 | "delta", 552 | "aurora", 553 | "gravity", 554 | "humidity", 555 | "sinkhole", 556 | "wildfire", 557 | "tropics", 558 | "tsunami", 559 | "eclipse", 560 | "metabolism", 561 | "mirage", 562 | "hemisphere", 563 | "spectrum", 564 | "fossil", 565 | "plateau", 566 | "groundwater", 567 | "undergrowth", 568 | "oxygen", 569 | "molecule", 570 | "pollination", 571 | "algae", 572 | "carbon", 573 | "nitrogen", 574 | "organism", 575 | "nucleus", 576 | "equator", 577 | "solstice", 578 | "cocoon", 579 | "germination", 580 | "metamorphosis", 581 | "nocturnal", 582 | "symbiosis", 583 | "ecosystem", 584 | "biodiversity", 585 | ], 586 | "emotions": [ 587 | "happiness", 588 | "sadness", 589 | "anxiety", 590 | "surprise", 591 | "anger", 592 | "curiosity", 593 | "embarrassment", 594 | "nostalgia", 595 | "envy", 596 | "gratitude", 597 | "remorse", 598 | "boredom", 599 | "excitement", 600 | "loneliness", 601 | "pride", 602 | "jealousy", 603 | "contentment", 604 | "disgust", 605 | "empathy", 606 | "euphoria", 607 | "melancholy", 608 | "frustration", 609 | "anticipation", 610 | "amusement", 611 | "serenity", 612 | "disappointment", 613 | "confidence", 614 | "resentment", 615 | "apathy", 616 | "optimism", 617 | "pessimism", 618 | "bewilderment", 619 | "exhilaration", 620 | "indifference", 621 | "enthusiasm", 622 | "desperation", 623 | "satisfaction", 624 | "regret", 625 | "determination", 626 | "compassion", 627 | "hopelessness", 628 | "relief", 629 | "infatuation", 630 | "tranquility", 631 | "impatience", 632 | "exasperation", 633 | "agitation", 634 | "yearning", 635 | "sympathy", 636 | "admiration", 637 | "astonishment", 638 | "inspiration", 639 | "dread", 640 | "hope", 641 | ], 642 | } 643 | 644 | 645 | def generate_game_words(num_words=20): 646 | """Generate a random selection of words for the Word Wrangler game. 647 | 648 | 1. Create a flat list of all words 649 | 2. Remove any duplicates 650 | 3. Randomly select the requested number of words 651 | 652 | Args: 653 | num_words: Number of words to select for the game 654 | 655 | Returns: 656 | List of randomly selected words 657 | """ 658 | # Create a flat list of all words from all categories 659 | all_words = [] 660 | for category_words in WORD_CATEGORIES.values(): 661 | all_words.extend(category_words) 662 | 663 | # Remove duplicates by converting to a set and back to a list 664 | all_words = list(set(all_words)) 665 | 666 | # Randomly select words 667 | selected_words = random.sample(all_words, min(num_words, len(all_words))) 668 | 669 | return selected_words 670 | -------------------------------------------------------------------------------- /server/bot_phone_twilio.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2025, Daily 3 | # 4 | # SPDX-License-Identifier: BSD 2-Clause License 5 | # 6 | 7 | """Word Wrangler: A voice-based word guessing game. 8 | 9 | This demo version is intended to be deployed to 10 | Pipecat Cloud. For more information, visit: 11 | - Deployment Quickstart: https://docs.pipecat.daily.co/quickstart 12 | - Build for Twilio: https://docs.pipecat.daily.co/pipecat-in-production/telephony/twilio-mediastreams 13 | """ 14 | 15 | import asyncio 16 | import json 17 | import os 18 | import re 19 | import sys 20 | from typing import Any, Mapping, Optional 21 | 22 | from dotenv import load_dotenv 23 | from fastapi import WebSocket 24 | from loguru import logger 25 | from pipecatcloud import WebSocketSessionArguments 26 | from word_list import generate_game_words 27 | 28 | from pipecat.audio.filters.krisp_filter import KrispFilter 29 | from pipecat.audio.resamplers.soxr_resampler import SOXRAudioResampler 30 | from pipecat.audio.vad.silero import SileroVADAnalyzer 31 | from pipecat.frames.frames import ( 32 | BotStoppedSpeakingFrame, 33 | CancelFrame, 34 | EndFrame, 35 | Frame, 36 | InputAudioRawFrame, 37 | LLMFullResponseEndFrame, 38 | LLMTextFrame, 39 | StartFrame, 40 | TTSAudioRawFrame, 41 | TTSSpeakFrame, 42 | ) 43 | from pipecat.pipeline.parallel_pipeline import ParallelPipeline 44 | from pipecat.pipeline.pipeline import Pipeline 45 | from pipecat.pipeline.runner import PipelineRunner 46 | from pipecat.pipeline.task import PipelineParams, PipelineTask 47 | from pipecat.processors.aggregators.openai_llm_context import ( 48 | OpenAILLMContext, 49 | ) 50 | from pipecat.processors.consumer_processor import ConsumerProcessor 51 | from pipecat.processors.filters.stt_mute_filter import STTMuteConfig, STTMuteFilter, STTMuteStrategy 52 | from pipecat.processors.frame_processor import FrameDirection, FrameProcessor 53 | from pipecat.processors.producer_processor import ProducerProcessor 54 | from pipecat.serializers.twilio import TwilioFrameSerializer 55 | from pipecat.services.gemini_multimodal_live.gemini import ( 56 | GeminiMultimodalLiveLLMService, 57 | GeminiMultimodalModalities, 58 | InputParams, 59 | ) 60 | from pipecat.services.google.tts import GoogleTTSService 61 | from pipecat.sync.base_notifier import BaseNotifier 62 | from pipecat.sync.event_notifier import EventNotifier 63 | from pipecat.transports.network.fastapi_websocket import ( 64 | FastAPIWebsocketParams, 65 | FastAPIWebsocketTransport, 66 | ) 67 | from pipecat.utils.text.base_text_filter import BaseTextFilter 68 | 69 | load_dotenv(override=True) 70 | 71 | 72 | logger.add(sys.stderr, level="DEBUG") 73 | 74 | GAME_DURATION_SECONDS = 120 75 | NUM_WORDS_PER_GAME = 20 76 | HOST_VOICE_ID = "en-US-Chirp3-HD-Charon" 77 | PLAYER_VOICE_ID = "Kore" 78 | 79 | # Define conversation modes with their respective prompt templates 80 | game_player_prompt = """You are a player for a game of Word Wrangler. 81 | 82 | GAME RULES: 83 | 1. The user will be given a word or phrase that they must describe to you 84 | 2. The user CANNOT say any part of the word/phrase directly 85 | 3. You must try to guess the word/phrase based on the user's description 86 | 4. Once you guess correctly, the user will move on to their next word 87 | 5. The user is trying to get through as many words as possible in 60 seconds 88 | 6. The external application will handle timing and keeping score 89 | 90 | YOUR ROLE: 91 | 1. Listen carefully to the user's descriptions 92 | 2. Make intelligent guesses based on what they say 93 | 3. When you think you know the answer, state it clearly: "Is it [your guess]?" 94 | 4. If you're struggling, ask for more specific clues 95 | 5. Keep the game moving quickly - make guesses promptly 96 | 6. Be enthusiastic and encouraging 97 | 98 | IMPORTANT: 99 | - Keep all responses brief - the game is timed! 100 | - Make multiple guesses if needed 101 | - Use your common knowledge to make educated guesses 102 | - If the user indicates you got it right, just say "Got it!" and prepare for the next word 103 | - If you've made several wrong guesses, simply ask for "Another clue please?" 104 | 105 | Start by guessing once you hear the user describe the word or phrase.""" 106 | 107 | game_host_prompt = """You are the AI host for a game of Word Wrangler. There are two players in the game: the human describer and the AI guesser. 108 | 109 | GAME RULES: 110 | 1. You, the host, will give the human describer a word or phrase that they must describe 111 | 2. The describer CANNOT say any part of the word/phrase directly 112 | 3. The AI guesser will try to guess the word/phrase based on the describer's description 113 | 4. Once the guesser guesses correctly, move on to the next word 114 | 5. The describer is trying to get through as many words as possible in 60 seconds 115 | 6. The describer can say "skip" or "pass" to get a new word if they find a word too difficult 116 | 7. The describer can ask you to repeat the current word if they didn't hear it clearly 117 | 8. You'll keep track of the score (1 point for each correct guess) 118 | 9. The external application will handle timing 119 | 120 | YOUR ROLE: 121 | 1. Start with this exact brief introduction: "Welcome to Word Wrangler! I'll give you words to describe, and the A.I. player will try to guess them. Remember, don't say any part of the word itself. Here's your first word: [word]." 122 | 2. Provide words to the describer. Choose 1 or 2 word phrases that cover a variety of topics, including animals, objects, places, and actions. 123 | 3. IMPORTANT: You will hear DIFFERENT types of input: 124 | a. DESCRIPTIONS from the human (which you should IGNORE) 125 | b. AFFIRMATIONS from the human (like "correct", "that's right", "you got it") which you should IGNORE 126 | c. GUESSES from the AI player (which will be in the form of "Is it [word]?" or similar question format) 127 | d. SKIP REQUESTS from the human (if they say "skip", "pass", or "next word please") 128 | e. REPEAT REQUESTS from the human (if they say "repeat", "what was that?", "say again", etc.) 129 | 130 | 4. HOW TO RESPOND: 131 | - If you hear a DESCRIPTION or AFFIRMATION from the human, respond with exactly "IGNORE" (no other text) 132 | - If you hear a GUESS (in question form) and it's INCORRECT, respond with exactly "NO" (no other text) 133 | - If you hear a GUESS (in question form) and it's CORRECT, respond with "Correct! That's [N] points. Your next word is [new word]" where N is the current score 134 | - If you hear a SKIP REQUEST, respond with "The new word is [new word]" (don't change the score) 135 | - If you hear a REPEAT REQUEST, respond with "Your word is [current word]" (don't change the score) 136 | 137 | 5. SCORING: 138 | - Start with a score of 0 139 | - Add 1 point for each correct guess by the AI player 140 | - Do NOT add points for skipped words 141 | - Announce the current score after every correct guess 142 | 143 | RESPONSE EXAMPLES: 144 | - Human says: "This is something you use to write" → You respond: "IGNORE" 145 | - Human says: "That's right!" or "You got it!" → You respond: "IGNORE" 146 | - Human says: "Wait, what was my word again?" → You respond: "Your word is [current word]" 147 | - Human says: "Can you repeat that?" → You respond: "Your word is [current word]" 148 | - AI says: "Is it a pen?" → If correct and it's the first point, you respond: "Correct! That's 1 point. Your next word is [new word]" 149 | - AI says: "Is it a pencil?" → If correct and it's the third point, you respond: "Correct! That's 3 points. Your next word is [new word]" 150 | - AI says: "Is it a marker?" → If incorrect, you respond: "NO" 151 | - Human says: "Skip this one" or "Pass" → You respond: "The new word is [new word]" 152 | 153 | IMPORTANT GUIDELINES: 154 | - Choose words that range from easy to moderately difficult 155 | - Keep all responses brief - the game is timed! 156 | - Your "NO" and "IGNORE" responses won't be verbalized, but will be visible in the chat 157 | - Always keep track of the CURRENT word so you can repeat it when asked 158 | - Always keep track of the CURRENT SCORE and announce it after every correct guess 159 | - Make sure your word choices are appropriate for all audiences 160 | - If the human asks to skip, always provide a new word immediately without changing the score 161 | - If the human asks you to repeat the word, say ONLY "Your word is [current word]" - don't add additional text 162 | - CRUCIAL: Never interpret the human saying "correct", "that's right", "good job", or similar affirmations as a correct guess. These are just the human giving feedback to the AI player. 163 | 164 | Start with the exact introduction specified above and give the first word.""" 165 | 166 | 167 | class HostResponseTextFilter(BaseTextFilter): 168 | """Custom text filter for Word Wrangler game. 169 | 170 | This filter removes "NO" and "IGNORE" responses from the host so they don't get verbalized, 171 | allowing for silent incorrect guess handling and ignoring descriptions. 172 | """ 173 | 174 | def __init__(self): 175 | self._interrupted = False 176 | 177 | def update_settings(self, settings: Mapping[str, Any]): 178 | # No settings to update for this filter 179 | pass 180 | 181 | async def filter(self, text: str) -> str: 182 | # Remove case and whitespace for comparison 183 | clean_text = text.strip().upper() 184 | 185 | # If the text is exactly "NO" or "IGNORE", return empty string 186 | if clean_text == "NO" or clean_text == "IGNORE": 187 | return "" 188 | 189 | return text 190 | 191 | async def handle_interruption(self): 192 | self._interrupted = True 193 | 194 | async def reset_interruption(self): 195 | self._interrupted = False 196 | 197 | 198 | class BotStoppedSpeakingNotifier(FrameProcessor): 199 | """A processor that notifies whenever a BotStoppedSpeakingFrame is detected.""" 200 | 201 | def __init__(self, notifier: BaseNotifier): 202 | super().__init__() 203 | self._notifier = notifier 204 | 205 | async def process_frame(self, frame: Frame, direction: FrameDirection): 206 | await super().process_frame(frame, direction) 207 | 208 | # Check if this is a BotStoppedSpeakingFrame 209 | if isinstance(frame, BotStoppedSpeakingFrame): 210 | logger.debug(f"{self}: Host bot stopped speaking, notifying listeners") 211 | await self._notifier.notify() 212 | 213 | # Always push the frame through 214 | await self.push_frame(frame, direction) 215 | 216 | 217 | class StartFrameGate(FrameProcessor): 218 | """A gate that blocks only StartFrame until notified by a notifier. 219 | 220 | Once opened, all frames pass through normally. 221 | """ 222 | 223 | def __init__(self, notifier: BaseNotifier): 224 | super().__init__() 225 | self._notifier = notifier 226 | self._blocked_start_frame: Optional[Frame] = None 227 | self._gate_opened = False 228 | self._gate_task: Optional[asyncio.Task] = None 229 | 230 | async def process_frame(self, frame: Frame, direction: FrameDirection): 231 | await super().process_frame(frame, direction) 232 | 233 | if self._gate_opened: 234 | # Once the gate is open, let everything through 235 | await self.push_frame(frame, direction) 236 | elif isinstance(frame, StartFrame): 237 | # Store the StartFrame and wait for notification 238 | logger.debug(f"{self}: Blocking StartFrame until host bot stops speaking") 239 | self._blocked_start_frame = frame 240 | 241 | # Start the gate task if not already running 242 | if not self._gate_task: 243 | self._gate_task = self.create_task(self._wait_for_notification()) 244 | 245 | async def _wait_for_notification(self): 246 | try: 247 | # Wait for the notifier 248 | await self._notifier.wait() 249 | 250 | # Gate is now open - only run this code once 251 | if not self._gate_opened: 252 | self._gate_opened = True 253 | logger.debug(f"{self}: Gate opened, passing through blocked StartFrame") 254 | 255 | # Push the blocked StartFrame if we have one 256 | if self._blocked_start_frame: 257 | await self.push_frame(self._blocked_start_frame) 258 | self._blocked_start_frame = None 259 | except asyncio.CancelledError: 260 | logger.debug(f"{self}: Gate task was cancelled") 261 | raise 262 | except Exception as e: 263 | logger.exception(f"{self}: Error in gate task: {e}") 264 | raise 265 | 266 | 267 | class GameStateTracker(FrameProcessor): 268 | """Tracks game state including new words and score by monitoring host responses. 269 | 270 | This processor aggregates streamed text from the host LLM to detect: 271 | 1. New word announcements (triggering player LLM resets) 272 | 2. Score updates (to track the current score) 273 | """ 274 | 275 | def __init__(self, new_word_notifier: BaseNotifier): 276 | super().__init__() 277 | self._new_word_notifier = new_word_notifier 278 | self._text_buffer = "" 279 | self._current_score = 0 280 | 281 | # Words/phrases that indicate a new word being provided 282 | self._key_phrases = ["your word is", "new word is", "next word is"] 283 | 284 | # Pattern to extract score from responses 285 | self._score_pattern = re.compile(r"that's (\d+) point", re.IGNORECASE) 286 | 287 | async def process_frame(self, frame: Frame, direction: FrameDirection): 288 | await super().process_frame(frame, direction) 289 | 290 | # Collect text from LLMTextFrames 291 | if isinstance(frame, LLMTextFrame): 292 | text = frame.text 293 | 294 | # Skip responses that are "NO" or "IGNORE" 295 | if text.strip() in ["NO", "IGNORE"]: 296 | logger.debug(f"Skipping NO/IGNORE response") 297 | await self.push_frame(frame, direction) 298 | return 299 | 300 | # Add the new text to our buffer 301 | self._text_buffer += text 302 | 303 | # Process complete responses when we get an end frame 304 | elif isinstance(frame, LLMFullResponseEndFrame): 305 | if self._text_buffer: 306 | buffer_lower = self._text_buffer.lower() 307 | 308 | # 1. Check for new word announcements 309 | new_word_detected = False 310 | for phrase in self._key_phrases: 311 | if phrase in buffer_lower: 312 | await self._new_word_notifier.notify() 313 | new_word_detected = True 314 | break 315 | 316 | if not new_word_detected: 317 | logger.debug(f"No new word phrases detected") 318 | 319 | # 2. Check for score updates 320 | score_match = self._score_pattern.search(buffer_lower) 321 | if score_match: 322 | try: 323 | score = int(score_match.group(1)) 324 | # Only update if the new score is higher 325 | if score > self._current_score: 326 | logger.debug(f"Score updated from {self._current_score} to {score}") 327 | self._current_score = score 328 | else: 329 | logger.debug( 330 | f"Ignoring score {score} <= current score {self._current_score}" 331 | ) 332 | except ValueError as e: 333 | logger.warning(f"Error parsing score: {e}") 334 | else: 335 | logger.debug(f"No score pattern match in: '{buffer_lower}'") 336 | 337 | # Reset the buffer after processing the complete response 338 | self._text_buffer = "" 339 | 340 | # Always push the frame through 341 | await self.push_frame(frame, direction) 342 | 343 | @property 344 | def current_score(self) -> int: 345 | """Get the current score.""" 346 | return self._current_score 347 | 348 | 349 | class GameTimer: 350 | """Manages the game timer and triggers end-game events.""" 351 | 352 | def __init__( 353 | self, 354 | task: PipelineTask, 355 | game_state_tracker: GameStateTracker, 356 | game_duration_seconds: int = 120, 357 | ): 358 | self._task = task 359 | self._game_state_tracker = game_state_tracker 360 | self._game_duration = game_duration_seconds 361 | self._timer_task = None 362 | self._start_time = None 363 | 364 | def start(self): 365 | """Start the game timer.""" 366 | if self._timer_task is None: 367 | self._start_time = asyncio.get_event_loop().time() 368 | self._timer_task = asyncio.create_task(self._run_timer()) 369 | logger.info(f"Game timer started: {self._game_duration} seconds") 370 | 371 | def stop(self): 372 | """Stop the game timer.""" 373 | if self._timer_task: 374 | self._timer_task.cancel() 375 | self._timer_task = None 376 | logger.info("Game timer stopped") 377 | 378 | def get_remaining_time(self) -> int: 379 | """Get the remaining time in seconds.""" 380 | if self._start_time is None: 381 | return self._game_duration 382 | 383 | elapsed = asyncio.get_event_loop().time() - self._start_time 384 | remaining = max(0, self._game_duration - int(elapsed)) 385 | return remaining 386 | 387 | async def _run_timer(self): 388 | """Run the timer and end the game when time is up.""" 389 | try: 390 | # Wait for the game duration 391 | await asyncio.sleep(self._game_duration) 392 | 393 | # Game time is up, get the final score 394 | final_score = self._game_state_tracker.current_score 395 | 396 | # Create end game message 397 | end_message = f"Time's up! Thank you for playing Word Wrangler. Your final score is {final_score} point" 398 | if final_score != 1: 399 | end_message += "s" 400 | end_message += ". Great job!" 401 | 402 | # Send end game message as TTSSpeakFrame 403 | logger.info(f"Game over! Final score: {final_score}") 404 | await self._task.queue_frames([TTSSpeakFrame(text=end_message)]) 405 | 406 | # End the game 407 | await self._task.queue_frames([EndFrame()]) 408 | 409 | except asyncio.CancelledError: 410 | logger.debug("Game timer task cancelled") 411 | except Exception as e: 412 | logger.exception(f"Error in game timer: {e}") 413 | 414 | 415 | class ResettablePlayerLLM(GeminiMultimodalLiveLLMService): 416 | """A specialized LLM service that can reset its context when notified about a new word. 417 | 418 | This LLM intelligently waits for the host to finish speaking before reconnecting. 419 | """ 420 | 421 | def __init__( 422 | self, 423 | api_key: str, 424 | system_instruction: str, 425 | new_word_notifier: BaseNotifier, 426 | host_stopped_speaking_notifier: BaseNotifier, 427 | voice_id: str = PLAYER_VOICE_ID, 428 | **kwargs, 429 | ): 430 | super().__init__( 431 | api_key=api_key, voice_id=voice_id, system_instruction=system_instruction, **kwargs 432 | ) 433 | self._new_word_notifier = new_word_notifier 434 | self._host_stopped_speaking_notifier = host_stopped_speaking_notifier 435 | self._base_system_instruction = system_instruction 436 | self._reset_task: Optional[asyncio.Task] = None 437 | self._pending_reset: bool = False 438 | 439 | async def start(self, frame: StartFrame): 440 | await super().start(frame) 441 | 442 | # Start the notifier listener task 443 | if not self._reset_task or self._reset_task.done(): 444 | self._reset_task = self.create_task(self._listen_for_notifications()) 445 | 446 | async def stop(self, frame: EndFrame): 447 | # Cancel the reset task if it exists 448 | if self._reset_task and not self._reset_task.done(): 449 | await self.cancel_task(self._reset_task) 450 | self._reset_task = None 451 | 452 | await super().stop(frame) 453 | 454 | async def cancel(self, frame: CancelFrame): 455 | # Cancel the reset task if it exists 456 | if self._reset_task and not self._reset_task.done(): 457 | await self.cancel_task(self._reset_task) 458 | self._reset_task = None 459 | 460 | await super().cancel(frame) 461 | 462 | async def _listen_for_notifications(self): 463 | """Listen for new word and host stopped speaking notifications.""" 464 | try: 465 | # Create tasks for both notifiers 466 | new_word_task = self.create_task(self._listen_for_new_word()) 467 | host_stopped_task = self.create_task(self._listen_for_host_stopped()) 468 | 469 | # Wait for both tasks to complete (which should never happen) 470 | await asyncio.gather(new_word_task, host_stopped_task) 471 | 472 | except asyncio.CancelledError: 473 | logger.debug(f"{self}: Notification listener tasks cancelled") 474 | raise 475 | except Exception as e: 476 | logger.exception(f"{self}: Error in notification listeners: {e}") 477 | raise 478 | 479 | async def _listen_for_new_word(self): 480 | """Listen for new word notifications and flag a reset is needed.""" 481 | while True: 482 | # Wait for a new word notification 483 | await self._new_word_notifier.wait() 484 | logger.info( 485 | f"{self}: Received new word notification, disconnecting and waiting for host to finish" 486 | ) 487 | 488 | # Disconnect immediately to stop processing 489 | await self._disconnect() 490 | 491 | # Reset the system instruction 492 | self._system_instruction = self._base_system_instruction 493 | 494 | # Flag that we need to reconnect when the host stops speaking 495 | self._pending_reset = True 496 | 497 | async def _listen_for_host_stopped(self): 498 | """Listen for host stopped speaking and reconnect if a reset is pending.""" 499 | while True: 500 | # Wait for host stopped speaking notification 501 | await self._host_stopped_speaking_notifier.wait() 502 | 503 | # If we have a pending reset, reconnect now 504 | if self._pending_reset: 505 | logger.info(f"{self}: Host finished speaking, completing the LLM reset") 506 | 507 | # Reconnect 508 | await self._connect() 509 | 510 | # Reset the flag 511 | self._pending_reset = False 512 | 513 | logger.info(f"{self}: LLM reset complete") 514 | 515 | 516 | async def tts_audio_raw_frame_filter(frame: Frame): 517 | """Filter to check if the frame is a TTSAudioRawFrame.""" 518 | return isinstance(frame, TTSAudioRawFrame) 519 | 520 | 521 | # Create a resampler instance once 522 | resampler = SOXRAudioResampler() 523 | 524 | 525 | async def tts_to_input_audio_transformer(frame: Frame): 526 | """Transform TTS audio frames to InputAudioRawFrame with resampling. 527 | 528 | Converts 24kHz TTS output to 16kHz input audio required by the player LLM. 529 | 530 | Args: 531 | frame (Frame): The frame to transform (expected to be TTSAudioRawFrame) 532 | 533 | Returns: 534 | InputAudioRawFrame: The transformed and resampled input audio frame 535 | """ 536 | if isinstance(frame, TTSAudioRawFrame): 537 | # Resample the audio from 24kHz to 16kHz 538 | resampled_audio = await resampler.resample( 539 | frame.audio, 540 | frame.sample_rate, # Source rate (24kHz) 541 | 16000, # Target rate (16kHz) 542 | ) 543 | 544 | # Create a new InputAudioRawFrame with the resampled audio 545 | input_frame = InputAudioRawFrame( 546 | audio=resampled_audio, 547 | sample_rate=16000, # New sample rate 548 | num_channels=frame.num_channels, 549 | ) 550 | return input_frame 551 | 552 | 553 | async def main(ws: WebSocket): 554 | logger.debug("Starting WebSocket bot") 555 | 556 | game_words = generate_game_words(NUM_WORDS_PER_GAME) 557 | words_string = ", ".join(f'"{word}"' for word in game_words) 558 | logger.debug(f"Game words: {words_string}") 559 | 560 | # Read initial WebSocket messages 561 | start_data = ws.iter_text() 562 | await start_data.__anext__() 563 | 564 | # Second message contains the call details 565 | call_data = json.loads(await start_data.__anext__()) 566 | 567 | # Extract both StreamSid and CallSid 568 | stream_sid = call_data["start"]["streamSid"] 569 | call_sid = call_data["start"]["callSid"] 570 | 571 | logger.info(f"Connected to Twilio call: CallSid={call_sid}, StreamSid={stream_sid}") 572 | 573 | # Create serializer with both IDs and auto_hang_up enabled 574 | serializer = TwilioFrameSerializer( 575 | stream_sid=stream_sid, 576 | call_sid=call_sid, 577 | account_sid=os.getenv("TWILIO_ACCOUNT_SID"), 578 | auth_token=os.getenv("TWILIO_AUTH_TOKEN"), 579 | ) 580 | 581 | transport = FastAPIWebsocketTransport( 582 | websocket=ws, 583 | params=FastAPIWebsocketParams( 584 | audio_in_enabled=True, 585 | audio_in_filter=KrispFilter(), 586 | audio_out_enabled=True, 587 | add_wav_header=False, 588 | vad_enabled=True, 589 | vad_analyzer=SileroVADAnalyzer(), 590 | vad_audio_passthrough=True, 591 | serializer=serializer, 592 | ), 593 | ) 594 | 595 | player_instruction = f"""{game_player_prompt} 596 | 597 | Important guidelines: 598 | 1. Your responses will be converted to speech, so keep them concise and conversational. 599 | 2. Don't use special characters or formatting that wouldn't be natural in speech. 600 | 3. Encourage the user to elaborate when appropriate.""" 601 | 602 | host_instruction = f"""{game_host_prompt} 603 | 604 | GAME WORDS: 605 | Use ONLY these words for the game (in any order): {words_string} 606 | 607 | Important guidelines: 608 | 1. Your responses will be converted to speech, so keep them concise and conversational. 609 | 2. Don't use special characters or formatting that wouldn't be natural in speech. 610 | 3. ONLY use words from the provided list above when giving words to the player.""" 611 | 612 | intro_message = """Start with this exact brief introduction: "Welcome to Word Wrangler! I'll give you words to describe, and the A.I. player will try to guess them. Remember, don't say any part of the word itself. Here's your first word: [word]." """ 613 | 614 | # Create the STT mute filter if we have strategies to apply 615 | stt_mute_filter = STTMuteFilter( 616 | config=STTMuteConfig(strategies={STTMuteStrategy.MUTE_UNTIL_FIRST_BOT_COMPLETE}) 617 | ) 618 | 619 | host_llm = GeminiMultimodalLiveLLMService( 620 | api_key=os.getenv("GOOGLE_API_KEY"), 621 | system_instruction=host_instruction, 622 | params=InputParams(modalities=GeminiMultimodalModalities.TEXT), 623 | ) 624 | 625 | host_tts = GoogleTTSService( 626 | voice_id=HOST_VOICE_ID, 627 | credentials_path=os.getenv("GOOGLE_TEST_CREDENTIALS_FILE"), 628 | text_filters=[HostResponseTextFilter()], 629 | ) 630 | 631 | producer = ProducerProcessor( 632 | filter=tts_audio_raw_frame_filter, 633 | transformer=tts_to_input_audio_transformer, 634 | passthrough=True, 635 | ) 636 | consumer = ConsumerProcessor(producer=producer) 637 | 638 | # Create the notifiers 639 | bot_speaking_notifier = EventNotifier() 640 | new_word_notifier = EventNotifier() 641 | 642 | # Create BotStoppedSpeakingNotifier to detect when host bot stops speaking 643 | bot_stopped_speaking_detector = BotStoppedSpeakingNotifier(bot_speaking_notifier) 644 | 645 | # Create StartFrameGate to block Player LLM until host has stopped speaking 646 | start_frame_gate = StartFrameGate(bot_speaking_notifier) 647 | 648 | # Create GameStateTracker to handle new words and score tracking 649 | game_state_tracker = GameStateTracker(new_word_notifier) 650 | 651 | # Create a resettable player LLM that coordinates between notifiers 652 | player_llm = ResettablePlayerLLM( 653 | api_key=os.getenv("GOOGLE_API_KEY"), 654 | system_instruction=player_instruction, 655 | new_word_notifier=new_word_notifier, 656 | host_stopped_speaking_notifier=bot_speaking_notifier, 657 | voice_id=PLAYER_VOICE_ID, 658 | ) 659 | 660 | # Set up the initial context for the conversation 661 | messages = [ 662 | { 663 | "role": "user", 664 | "content": intro_message, 665 | }, 666 | ] 667 | 668 | # This sets up the LLM context by providing messages and tools 669 | context = OpenAILLMContext(messages) 670 | context_aggregator = host_llm.create_context_aggregator(context) 671 | 672 | pipeline = Pipeline( 673 | [ 674 | transport.input(), # Receive audio/video from Daily call 675 | stt_mute_filter, # Filter out speech during the bot's initial turn 676 | ParallelPipeline( 677 | # Host branch: manages the game and provides words 678 | [ 679 | consumer, # Receives audio from the player branch 680 | host_llm, # AI host that provides words and tracks score 681 | game_state_tracker, # Tracks words and score from host responses 682 | host_tts, # Converts host text to speech 683 | bot_stopped_speaking_detector, # Notifies when host stops speaking 684 | ], 685 | # Player branch: guesses words based on human descriptions 686 | [ 687 | start_frame_gate, # Gates the player until host finishes intro 688 | player_llm, # AI player that makes guesses 689 | producer, # Collects audio frames to be passed to the consumer 690 | ], 691 | ), 692 | transport.output(), # Send audio/video back to Daily call 693 | ] 694 | ) 695 | 696 | task = PipelineTask( 697 | pipeline, 698 | params=PipelineParams( 699 | audio_out_sample_rate=8000, 700 | allow_interruptions=False, 701 | enable_metrics=True, 702 | enable_usage_metrics=True, 703 | ), 704 | ) 705 | 706 | # Create the game timer 707 | game_timer = GameTimer(task, game_state_tracker, game_duration_seconds=GAME_DURATION_SECONDS) 708 | 709 | @transport.event_handler("on_client_connected") 710 | async def on_client_connected(transport, client): 711 | logger.info(f"Client connected: {client}") 712 | # Kick off the conversation 713 | await task.queue_frames([context_aggregator.user().get_context_frame()]) 714 | # Start the game timer 715 | game_timer.start() 716 | 717 | @transport.event_handler("on_client_disconnected") 718 | async def on_client_disconnected(transport, client): 719 | logger.info(f"Client disconnected: {client}") 720 | # Stop the timer 721 | game_timer.stop() 722 | # Cancel the pipeline task 723 | await task.cancel() 724 | 725 | runner = PipelineRunner(handle_sigint=False, force_gc=True) 726 | 727 | await runner.run(task) 728 | 729 | 730 | async def bot(args: WebSocketSessionArguments): 731 | """Main bot entry point for WebSocket connections. 732 | 733 | Args: 734 | ws: The WebSocket connection 735 | session_logger: The session-specific logger 736 | """ 737 | logger.info("WebSocket bot process initialized") 738 | 739 | try: 740 | await main(args.websocket) 741 | logger.info("WebSocket bot process completed") 742 | except Exception as e: 743 | logger.exception(f"Error in WebSocket bot process: {str(e)}") 744 | raise 745 | -------------------------------------------------------------------------------- /server/bot_phone_local.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2025, Daily 3 | # 4 | # SPDX-License-Identifier: BSD 2-Clause License 5 | # 6 | 7 | """Word Wrangler: A voice-based word guessing game. 8 | 9 | To run this demo: 10 | 1. Set up environment variables: 11 | - GOOGLE_API_KEY: API key for Google services 12 | - GOOGLE_TEST_CREDENTIALS_FILE: Path to Google credentials JSON file 13 | 14 | 2. Install requirements: 15 | pip install -r requirements.txt 16 | 17 | 3. Run in local development mode: 18 | LOCAL_RUN=1 python word_wrangler.py 19 | """ 20 | 21 | import asyncio 22 | import os 23 | import re 24 | import sys 25 | from typing import Any, Mapping, Optional 26 | 27 | import aiohttp 28 | from dotenv import load_dotenv 29 | from loguru import logger 30 | from pipecatcloud.agent import DailySessionArguments 31 | from word_list import generate_game_words 32 | 33 | from pipecat.audio.resamplers.soxr_resampler import SOXRAudioResampler 34 | from pipecat.audio.vad.silero import SileroVADAnalyzer 35 | from pipecat.frames.frames import ( 36 | BotStoppedSpeakingFrame, 37 | CancelFrame, 38 | EndFrame, 39 | Frame, 40 | InputAudioRawFrame, 41 | LLMFullResponseEndFrame, 42 | LLMTextFrame, 43 | StartFrame, 44 | TTSAudioRawFrame, 45 | TTSSpeakFrame, 46 | ) 47 | from pipecat.pipeline.parallel_pipeline import ParallelPipeline 48 | from pipecat.pipeline.pipeline import Pipeline 49 | from pipecat.pipeline.runner import PipelineRunner 50 | from pipecat.pipeline.task import PipelineParams, PipelineTask 51 | from pipecat.processors.aggregators.openai_llm_context import ( 52 | OpenAILLMContext, 53 | ) 54 | from pipecat.processors.consumer_processor import ConsumerProcessor 55 | from pipecat.processors.filters.stt_mute_filter import STTMuteConfig, STTMuteFilter, STTMuteStrategy 56 | from pipecat.processors.frame_processor import FrameDirection, FrameProcessor 57 | from pipecat.processors.producer_processor import ProducerProcessor 58 | from pipecat.services.gemini_multimodal_live.gemini import ( 59 | GeminiMultimodalLiveLLMService, 60 | GeminiMultimodalModalities, 61 | InputParams, 62 | ) 63 | from pipecat.services.google.tts import GoogleTTSService 64 | from pipecat.sync.base_notifier import BaseNotifier 65 | from pipecat.sync.event_notifier import EventNotifier 66 | from pipecat.transports.services.daily import DailyParams, DailyTransport 67 | from pipecat.utils.text.base_text_filter import BaseTextFilter 68 | 69 | load_dotenv(override=True) 70 | 71 | # Check if we're in local development mode 72 | LOCAL_RUN = os.getenv("LOCAL_RUN") 73 | if LOCAL_RUN: 74 | import webbrowser 75 | 76 | try: 77 | from runner import configure 78 | except ImportError: 79 | logger.error("Could not import local_runner module. Local development mode may not work.") 80 | 81 | 82 | logger.add(sys.stderr, level="DEBUG") 83 | 84 | GAME_DURATION_SECONDS = 120 85 | NUM_WORDS_PER_GAME = 20 86 | HOST_VOICE_ID = "en-US-Chirp3-HD-Charon" 87 | PLAYER_VOICE_ID = "Kore" 88 | 89 | # Define conversation modes with their respective prompt templates 90 | game_player_prompt = """You are a player for a game of Word Wrangler. 91 | 92 | GAME RULES: 93 | 1. The user will be given a word or phrase that they must describe to you 94 | 2. The user CANNOT say any part of the word/phrase directly 95 | 3. You must try to guess the word/phrase based on the user's description 96 | 4. Once you guess correctly, the user will move on to their next word 97 | 5. The user is trying to get through as many words as possible in 60 seconds 98 | 6. The external application will handle timing and keeping score 99 | 100 | YOUR ROLE: 101 | 1. Listen carefully to the user's descriptions 102 | 2. Make intelligent guesses based on what they say 103 | 3. When you think you know the answer, state it clearly: "Is it [your guess]?" 104 | 4. If you're struggling, ask for more specific clues 105 | 5. Keep the game moving quickly - make guesses promptly 106 | 6. Be enthusiastic and encouraging 107 | 108 | IMPORTANT: 109 | - Keep all responses brief - the game is timed! 110 | - Make multiple guesses if needed 111 | - Use your common knowledge to make educated guesses 112 | - If the user indicates you got it right, just say "Got it!" and prepare for the next word 113 | - If you've made several wrong guesses, simply ask for "Another clue please?" 114 | 115 | Start by guessing once you hear the user describe the word or phrase.""" 116 | 117 | game_host_prompt = """You are the AI host for a game of Word Wrangler. There are two players in the game: the human describer and the AI guesser. 118 | 119 | GAME RULES: 120 | 1. You, the host, will give the human describer a word or phrase that they must describe 121 | 2. The describer CANNOT say any part of the word/phrase directly 122 | 3. The AI guesser will try to guess the word/phrase based on the describer's description 123 | 4. Once the guesser guesses correctly, move on to the next word 124 | 5. The describer is trying to get through as many words as possible in 60 seconds 125 | 6. The describer can say "skip" or "pass" to get a new word if they find a word too difficult 126 | 7. The describer can ask you to repeat the current word if they didn't hear it clearly 127 | 8. You'll keep track of the score (1 point for each correct guess) 128 | 9. The external application will handle timing 129 | 130 | YOUR ROLE: 131 | 1. Start with this exact brief introduction: "Welcome to Word Wrangler! I'll give you words to describe, and the A.I. player will try to guess them. Remember, don't say any part of the word itself. Here's your first word: [word]." 132 | 2. Provide words to the describer. Choose 1 or 2 word phrases that cover a variety of topics, including animals, objects, places, and actions. 133 | 3. IMPORTANT: You will hear DIFFERENT types of input: 134 | a. DESCRIPTIONS from the human (which you should IGNORE) 135 | b. AFFIRMATIONS from the human (like "correct", "that's right", "you got it") which you should IGNORE 136 | c. GUESSES from the AI player (which will be in the form of "Is it [word]?" or similar question format) 137 | d. SKIP REQUESTS from the human (if they say "skip", "pass", or "next word please") 138 | e. REPEAT REQUESTS from the human (if they say "repeat", "what was that?", "say again", etc.) 139 | 140 | 4. HOW TO RESPOND: 141 | - If you hear a DESCRIPTION or AFFIRMATION from the human, respond with exactly "IGNORE" (no other text) 142 | - If you hear a GUESS (in question form) and it's INCORRECT, respond with exactly "NO" (no other text) 143 | - If you hear a GUESS (in question form) and it's CORRECT, respond with "Correct! That's [N] points. Your next word is [new word]" where N is the current score 144 | - If you hear a SKIP REQUEST, respond with "The new word is [new word]" (don't change the score) 145 | - If you hear a REPEAT REQUEST, respond with "Your word is [current word]" (don't change the score) 146 | 147 | 5. SCORING: 148 | - Start with a score of 0 149 | - Add 1 point for each correct guess by the AI player 150 | - Do NOT add points for skipped words 151 | - Announce the current score after every correct guess 152 | 153 | RESPONSE EXAMPLES: 154 | - Human says: "This is something you use to write" → You respond: "IGNORE" 155 | - Human says: "That's right!" or "You got it!" → You respond: "IGNORE" 156 | - Human says: "Wait, what was my word again?" → You respond: "Your word is [current word]" 157 | - Human says: "Can you repeat that?" → You respond: "Your word is [current word]" 158 | - AI says: "Is it a pen?" → If correct and it's the first point, you respond: "Correct! That's 1 point. Your next word is [new word]" 159 | - AI says: "Is it a pencil?" → If correct and it's the third point, you respond: "Correct! That's 3 points. Your next word is [new word]" 160 | - AI says: "Is it a marker?" → If incorrect, you respond: "NO" 161 | - Human says: "Skip this one" or "Pass" → You respond: "The new word is [new word]" 162 | 163 | IMPORTANT GUIDELINES: 164 | - Choose words that range from easy to moderately difficult 165 | - Keep all responses brief - the game is timed! 166 | - Your "NO" and "IGNORE" responses won't be verbalized, but will be visible in the chat 167 | - Always keep track of the CURRENT word so you can repeat it when asked 168 | - Always keep track of the CURRENT SCORE and announce it after every correct guess 169 | - Make sure your word choices are appropriate for all audiences 170 | - If the human asks to skip, always provide a new word immediately without changing the score 171 | - If the human asks you to repeat the word, say ONLY "Your word is [current word]" - don't add additional text 172 | - CRUCIAL: Never interpret the human saying "correct", "that's right", "good job", or similar affirmations as a correct guess. These are just the human giving feedback to the AI player. 173 | 174 | Start with the exact introduction specified above and give the first word.""" 175 | 176 | 177 | class HostResponseTextFilter(BaseTextFilter): 178 | """Custom text filter for Word Wrangler game. 179 | 180 | This filter removes "NO" and "IGNORE" responses from the host so they don't get verbalized, 181 | allowing for silent incorrect guess handling and ignoring descriptions. 182 | """ 183 | 184 | def __init__(self): 185 | self._interrupted = False 186 | 187 | def update_settings(self, settings: Mapping[str, Any]): 188 | # No settings to update for this filter 189 | pass 190 | 191 | async def filter(self, text: str) -> str: 192 | # Remove case and whitespace for comparison 193 | clean_text = text.strip().upper() 194 | 195 | # If the text is exactly "NO" or "IGNORE", return empty string 196 | if clean_text == "NO" or clean_text == "IGNORE": 197 | return "" 198 | 199 | return text 200 | 201 | async def handle_interruption(self): 202 | self._interrupted = True 203 | 204 | async def reset_interruption(self): 205 | self._interrupted = False 206 | 207 | 208 | class BotStoppedSpeakingNotifier(FrameProcessor): 209 | """A processor that notifies whenever a BotStoppedSpeakingFrame is detected.""" 210 | 211 | def __init__(self, notifier: BaseNotifier): 212 | super().__init__() 213 | self._notifier = notifier 214 | 215 | async def process_frame(self, frame: Frame, direction: FrameDirection): 216 | await super().process_frame(frame, direction) 217 | 218 | # Check if this is a BotStoppedSpeakingFrame 219 | if isinstance(frame, BotStoppedSpeakingFrame): 220 | logger.debug(f"{self}: Host bot stopped speaking, notifying listeners") 221 | await self._notifier.notify() 222 | 223 | # Always push the frame through 224 | await self.push_frame(frame, direction) 225 | 226 | 227 | class StartFrameGate(FrameProcessor): 228 | """A gate that blocks only StartFrame until notified by a notifier. 229 | 230 | Once opened, all frames pass through normally. 231 | """ 232 | 233 | def __init__(self, notifier: BaseNotifier): 234 | super().__init__() 235 | self._notifier = notifier 236 | self._blocked_start_frame: Optional[Frame] = None 237 | self._gate_opened = False 238 | self._gate_task: Optional[asyncio.Task] = None 239 | 240 | async def process_frame(self, frame: Frame, direction: FrameDirection): 241 | await super().process_frame(frame, direction) 242 | 243 | if self._gate_opened: 244 | # Once the gate is open, let everything through 245 | await self.push_frame(frame, direction) 246 | elif isinstance(frame, StartFrame): 247 | # Store the StartFrame and wait for notification 248 | logger.debug(f"{self}: Blocking StartFrame until host bot stops speaking") 249 | self._blocked_start_frame = frame 250 | 251 | # Start the gate task if not already running 252 | if not self._gate_task: 253 | self._gate_task = self.create_task(self._wait_for_notification()) 254 | 255 | async def _wait_for_notification(self): 256 | try: 257 | # Wait for the notifier 258 | await self._notifier.wait() 259 | 260 | # Gate is now open - only run this code once 261 | if not self._gate_opened: 262 | self._gate_opened = True 263 | logger.debug(f"{self}: Gate opened, passing through blocked StartFrame") 264 | 265 | # Push the blocked StartFrame if we have one 266 | if self._blocked_start_frame: 267 | await self.push_frame(self._blocked_start_frame) 268 | self._blocked_start_frame = None 269 | except asyncio.CancelledError: 270 | logger.debug(f"{self}: Gate task was cancelled") 271 | raise 272 | except Exception as e: 273 | logger.exception(f"{self}: Error in gate task: {e}") 274 | raise 275 | 276 | 277 | class GameStateTracker(FrameProcessor): 278 | """Tracks game state including new words and score by monitoring host responses.""" 279 | 280 | def __init__(self, new_word_notifier: BaseNotifier): 281 | super().__init__() 282 | self._new_word_notifier = new_word_notifier 283 | self._text_buffer = "" 284 | self._current_score = 0 285 | 286 | # Words/phrases that indicate a new word being provided 287 | self._key_phrases = ["your word is", "new word is", "next word is"] 288 | 289 | # Pattern to extract score from responses 290 | self._score_pattern = re.compile(r"that's (\d+) point", re.IGNORECASE) 291 | 292 | async def process_frame(self, frame: Frame, direction: FrameDirection): 293 | await super().process_frame(frame, direction) 294 | 295 | # Collect text from LLMTextFrames 296 | if isinstance(frame, LLMTextFrame): 297 | text = frame.text 298 | 299 | # Skip responses that are "NO" or "IGNORE" 300 | if text.strip() in ["NO", "IGNORE"]: 301 | logger.debug(f"Skipping NO/IGNORE response") 302 | await self.push_frame(frame, direction) 303 | return 304 | 305 | # Add the new text to our buffer 306 | self._text_buffer += text 307 | 308 | # Process complete responses when we get an end frame 309 | elif isinstance(frame, LLMFullResponseEndFrame): 310 | if self._text_buffer: 311 | buffer_lower = self._text_buffer.lower() 312 | 313 | # 1. Check for new word announcements 314 | new_word_detected = False 315 | for phrase in self._key_phrases: 316 | if phrase in buffer_lower: 317 | await self._new_word_notifier.notify() 318 | new_word_detected = True 319 | break 320 | 321 | if not new_word_detected: 322 | logger.debug(f"No new word phrases detected") 323 | 324 | # 2. Check for score updates 325 | score_match = self._score_pattern.search(buffer_lower) 326 | if score_match: 327 | try: 328 | score = int(score_match.group(1)) 329 | # Only update if the new score is higher 330 | if score > self._current_score: 331 | logger.debug(f"Score updated from {self._current_score} to {score}") 332 | self._current_score = score 333 | else: 334 | logger.debug( 335 | f"Ignoring score {score} <= current score {self._current_score}" 336 | ) 337 | except ValueError as e: 338 | logger.warning(f"Error parsing score: {e}") 339 | else: 340 | logger.debug(f"No score pattern match in: '{buffer_lower}'") 341 | 342 | # Reset the buffer after processing the complete response 343 | self._text_buffer = "" 344 | 345 | # Always push the frame through 346 | await self.push_frame(frame, direction) 347 | 348 | @property 349 | def current_score(self) -> int: 350 | """Get the current score.""" 351 | return self._current_score 352 | 353 | 354 | class GameTimer: 355 | """Manages the game timer and triggers end-game events.""" 356 | 357 | def __init__( 358 | self, 359 | task: PipelineTask, 360 | game_state_tracker: GameStateTracker, 361 | game_duration_seconds: int = 120, 362 | ): 363 | self._task = task 364 | self._game_state_tracker = game_state_tracker 365 | self._game_duration = game_duration_seconds 366 | self._timer_task = None 367 | self._start_time = None 368 | 369 | def start(self): 370 | """Start the game timer.""" 371 | if self._timer_task is None: 372 | self._start_time = asyncio.get_event_loop().time() 373 | self._timer_task = asyncio.create_task(self._run_timer()) 374 | logger.info(f"Game timer started: {self._game_duration} seconds") 375 | 376 | def stop(self): 377 | """Stop the game timer.""" 378 | if self._timer_task: 379 | self._timer_task.cancel() 380 | self._timer_task = None 381 | logger.info("Game timer stopped") 382 | 383 | def get_remaining_time(self) -> int: 384 | """Get the remaining time in seconds.""" 385 | if self._start_time is None: 386 | return self._game_duration 387 | 388 | elapsed = asyncio.get_event_loop().time() - self._start_time 389 | remaining = max(0, self._game_duration - int(elapsed)) 390 | return remaining 391 | 392 | async def _run_timer(self): 393 | """Run the timer and end the game when time is up.""" 394 | try: 395 | # Wait for the game duration 396 | await asyncio.sleep(self._game_duration) 397 | 398 | # Game time is up, get the final score 399 | final_score = self._game_state_tracker.current_score 400 | 401 | # Create end game message 402 | end_message = f"Time's up! Thank you for playing Word Wrangler. Your final score is {final_score} point" 403 | if final_score != 1: 404 | end_message += "s" 405 | end_message += ". Great job!" 406 | 407 | # Send end game message as TTSSpeakFrame 408 | logger.info(f"Game over! Final score: {final_score}") 409 | await self._task.queue_frames([TTSSpeakFrame(text=end_message)]) 410 | 411 | # End the game 412 | await self._task.queue_frames([EndFrame()]) 413 | 414 | except asyncio.CancelledError: 415 | logger.debug("Game timer task cancelled") 416 | except Exception as e: 417 | logger.exception(f"Error in game timer: {e}") 418 | 419 | 420 | class ResettablePlayerLLM(GeminiMultimodalLiveLLMService): 421 | """A specialized LLM service that can reset its context when notified about a new word. 422 | 423 | This LLM intelligently waits for the host to finish speaking before reconnecting. 424 | """ 425 | 426 | def __init__( 427 | self, 428 | api_key: str, 429 | system_instruction: str, 430 | new_word_notifier: BaseNotifier, 431 | host_stopped_speaking_notifier: BaseNotifier, 432 | voice_id: str = PLAYER_VOICE_ID, 433 | **kwargs, 434 | ): 435 | super().__init__( 436 | api_key=api_key, voice_id=voice_id, system_instruction=system_instruction, **kwargs 437 | ) 438 | self._new_word_notifier = new_word_notifier 439 | self._host_stopped_speaking_notifier = host_stopped_speaking_notifier 440 | self._base_system_instruction = system_instruction 441 | self._reset_task: Optional[asyncio.Task] = None 442 | self._pending_reset: bool = False 443 | 444 | async def start(self, frame: StartFrame): 445 | await super().start(frame) 446 | 447 | # Start the notifier listener task 448 | if not self._reset_task or self._reset_task.done(): 449 | self._reset_task = self.create_task(self._listen_for_notifications()) 450 | 451 | async def stop(self, frame: EndFrame): 452 | # Cancel the reset task if it exists 453 | if self._reset_task and not self._reset_task.done(): 454 | await self.cancel_task(self._reset_task) 455 | self._reset_task = None 456 | 457 | await super().stop(frame) 458 | 459 | async def cancel(self, frame: CancelFrame): 460 | # Cancel the reset task if it exists 461 | if self._reset_task and not self._reset_task.done(): 462 | await self.cancel_task(self._reset_task) 463 | self._reset_task = None 464 | 465 | await super().cancel(frame) 466 | 467 | async def _listen_for_notifications(self): 468 | """Listen for new word and host stopped speaking notifications.""" 469 | try: 470 | # Create tasks for both notifiers 471 | new_word_task = self.create_task(self._listen_for_new_word()) 472 | host_stopped_task = self.create_task(self._listen_for_host_stopped()) 473 | 474 | # Wait for both tasks to complete (which should never happen) 475 | await asyncio.gather(new_word_task, host_stopped_task) 476 | 477 | except asyncio.CancelledError: 478 | logger.debug(f"{self}: Notification listener tasks cancelled") 479 | raise 480 | except Exception as e: 481 | logger.exception(f"{self}: Error in notification listeners: {e}") 482 | raise 483 | 484 | async def _listen_for_new_word(self): 485 | """Listen for new word notifications and flag a reset is needed.""" 486 | while True: 487 | # Wait for a new word notification 488 | await self._new_word_notifier.wait() 489 | logger.info( 490 | f"{self}: Received new word notification, disconnecting and waiting for host to finish" 491 | ) 492 | 493 | # Disconnect immediately to stop processing 494 | await self._disconnect() 495 | 496 | # Reset the system instruction 497 | self._system_instruction = self._base_system_instruction 498 | 499 | # Flag that we need to reconnect when the host stops speaking 500 | self._pending_reset = True 501 | 502 | async def _listen_for_host_stopped(self): 503 | """Listen for host stopped speaking and reconnect if a reset is pending.""" 504 | while True: 505 | # Wait for host stopped speaking notification 506 | await self._host_stopped_speaking_notifier.wait() 507 | 508 | # If we have a pending reset, reconnect now 509 | if self._pending_reset: 510 | logger.info(f"{self}: Host finished speaking, completing the LLM reset") 511 | 512 | # Reconnect 513 | await self._connect() 514 | 515 | # Reset the flag 516 | self._pending_reset = False 517 | 518 | logger.info(f"{self}: LLM reset complete") 519 | 520 | 521 | async def tts_audio_raw_frame_filter(frame: Frame): 522 | """Filter to check if the frame is a TTSAudioRawFrame.""" 523 | return isinstance(frame, TTSAudioRawFrame) 524 | 525 | 526 | # Create a resampler instance once 527 | resampler = SOXRAudioResampler() 528 | 529 | 530 | async def tts_to_input_audio_transformer(frame: Frame): 531 | """Transform TTS audio frames to InputAudioRawFrame with resampling. 532 | 533 | Converts 24kHz TTS output to 16kHz input audio required by the player LLM. 534 | 535 | Args: 536 | frame (Frame): The frame to transform (expected to be TTSAudioRawFrame) 537 | 538 | Returns: 539 | InputAudioRawFrame: The transformed and resampled input audio frame 540 | """ 541 | if isinstance(frame, TTSAudioRawFrame): 542 | # Resample the audio from 24kHz to 16kHz 543 | resampled_audio = await resampler.resample( 544 | frame.audio, 545 | frame.sample_rate, # Source rate (24kHz) 546 | 16000, # Target rate (16kHz) 547 | ) 548 | 549 | # Create a new InputAudioRawFrame with the resampled audio 550 | input_frame = InputAudioRawFrame( 551 | audio=resampled_audio, 552 | sample_rate=16000, # New sample rate 553 | num_channels=frame.num_channels, 554 | ) 555 | return input_frame 556 | 557 | 558 | async def main(room_url: str, token: str): 559 | # Use the provided session logger if available, otherwise use the default logger 560 | logger.debug("Starting bot in room: {}", room_url) 561 | 562 | game_words = generate_game_words(NUM_WORDS_PER_GAME) 563 | words_string = ", ".join(f'"{word}"' for word in game_words) 564 | logger.debug(f"Game words: {words_string}") 565 | 566 | transport = DailyTransport( 567 | room_url, 568 | token, 569 | "Word Wrangler Bot", 570 | DailyParams( 571 | audio_out_enabled=True, 572 | vad_enabled=True, 573 | vad_analyzer=SileroVADAnalyzer(), 574 | vad_audio_passthrough=True, 575 | ), 576 | ) 577 | 578 | player_instruction = f"""{game_player_prompt} 579 | 580 | Important guidelines: 581 | 1. Your responses will be converted to speech, so keep them concise and conversational. 582 | 2. Don't use special characters or formatting that wouldn't be natural in speech. 583 | 3. Encourage the user to elaborate when appropriate.""" 584 | 585 | host_instruction = f"""{game_host_prompt} 586 | 587 | GAME WORDS: 588 | Use ONLY these words for the game (in any order): {words_string} 589 | 590 | Important guidelines: 591 | 1. Your responses will be converted to speech, so keep them concise and conversational. 592 | 2. Don't use special characters or formatting that wouldn't be natural in speech. 593 | 3. ONLY use words from the provided list above when giving words to the player.""" 594 | 595 | intro_message = """Start with this exact brief introduction: "Welcome to Word Wrangler! I'll give you words to describe, and the A.I. player will try to guess them. Remember, don't say any part of the word itself. Here's your first word: [word]." """ 596 | 597 | # Create the STT mute filter if we have strategies to apply 598 | stt_mute_filter = STTMuteFilter( 599 | config=STTMuteConfig(strategies={STTMuteStrategy.MUTE_UNTIL_FIRST_BOT_COMPLETE}) 600 | ) 601 | 602 | host_llm = GeminiMultimodalLiveLLMService( 603 | api_key=os.getenv("GOOGLE_API_KEY"), 604 | system_instruction=host_instruction, 605 | params=InputParams(modalities=GeminiMultimodalModalities.TEXT), 606 | ) 607 | 608 | host_tts = GoogleTTSService( 609 | voice_id=HOST_VOICE_ID, 610 | credentials_path=os.getenv("GOOGLE_TEST_CREDENTIALS_FILE"), 611 | text_filters=[HostResponseTextFilter()], 612 | ) 613 | 614 | producer = ProducerProcessor( 615 | filter=tts_audio_raw_frame_filter, 616 | transformer=tts_to_input_audio_transformer, 617 | passthrough=True, 618 | ) 619 | consumer = ConsumerProcessor(producer=producer) 620 | 621 | # Create the notifiers 622 | bot_speaking_notifier = EventNotifier() 623 | new_word_notifier = EventNotifier() 624 | 625 | # Create BotStoppedSpeakingNotifier to detect when host bot stops speaking 626 | bot_stopped_speaking_detector = BotStoppedSpeakingNotifier(bot_speaking_notifier) 627 | 628 | # Create StartFrameGate to block Player LLM until host has stopped speaking 629 | start_frame_gate = StartFrameGate(bot_speaking_notifier) 630 | 631 | # Create GameStateTracker to handle new words and score tracking 632 | game_state_tracker = GameStateTracker(new_word_notifier) 633 | 634 | # Create a resettable player LLM that coordinates between notifiers 635 | player_llm = ResettablePlayerLLM( 636 | api_key=os.getenv("GOOGLE_API_KEY"), 637 | system_instruction=player_instruction, 638 | new_word_notifier=new_word_notifier, 639 | host_stopped_speaking_notifier=bot_speaking_notifier, 640 | voice_id=PLAYER_VOICE_ID, 641 | ) 642 | 643 | # Set up the initial context for the conversation 644 | messages = [ 645 | { 646 | "role": "user", 647 | "content": intro_message, 648 | }, 649 | ] 650 | 651 | # This sets up the LLM context by providing messages and tools 652 | context = OpenAILLMContext(messages) 653 | context_aggregator = host_llm.create_context_aggregator(context) 654 | 655 | pipeline = Pipeline( 656 | [ 657 | transport.input(), # Receive audio/video from Daily call 658 | stt_mute_filter, # Filter out speech during the bot's initial turn 659 | ParallelPipeline( 660 | # Host branch: manages the game and provides words 661 | [ 662 | consumer, # Receives audio from the player branch 663 | host_llm, # AI host that provides words and tracks score 664 | game_state_tracker, # Tracks words and score from host responses 665 | host_tts, # Converts host text to speech 666 | bot_stopped_speaking_detector, # Notifies when host stops speaking 667 | ], 668 | # Player branch: guesses words based on human descriptions 669 | [ 670 | start_frame_gate, # Gates the player until host finishes intro 671 | player_llm, # AI player that makes guesses 672 | producer, # Collects audio frames to be passed to the consumer 673 | ], 674 | ), 675 | transport.output(), # Send audio/video back to Daily call 676 | ] 677 | ) 678 | 679 | task = PipelineTask( 680 | pipeline, 681 | params=PipelineParams( 682 | allow_interruptions=False, 683 | enable_metrics=True, 684 | enable_usage_metrics=True, 685 | ), 686 | ) 687 | 688 | # Create the game timer 689 | game_timer = GameTimer(task, game_state_tracker, game_duration_seconds=GAME_DURATION_SECONDS) 690 | 691 | @transport.event_handler("on_first_participant_joined") 692 | async def on_first_participant_joined(transport, participant): 693 | logger.info("First participant joined: {}", participant["id"]) 694 | # Capture the participant's transcription 695 | await transport.capture_participant_transcription(participant["id"]) 696 | # Kick off the conversation 697 | await task.queue_frames([context_aggregator.user().get_context_frame()]) 698 | # Start the game timer 699 | game_timer.start() 700 | 701 | @transport.event_handler("on_participant_left") 702 | async def on_participant_left(transport, participant, reason): 703 | logger.info("Participant left: {}", participant) 704 | # Stop the timer 705 | game_timer.stop() 706 | # Cancel the pipeline task 707 | await task.cancel() 708 | 709 | runner = PipelineRunner(handle_sigint=False, force_gc=True) 710 | 711 | await runner.run(task) 712 | 713 | 714 | async def bot(args: DailySessionArguments): 715 | """Main bot entry point compatible with the FastAPI route handler. 716 | 717 | Args: 718 | room_url: The Daily room URL 719 | token: The Daily room token 720 | body: The configuration object from the request body 721 | session_id: The session ID for logging 722 | """ 723 | logger.info(f"Bot process initialized {args.room_url} {args.token}") 724 | 725 | try: 726 | await main(args.room_url, args.token) 727 | logger.info("Bot process completed") 728 | except Exception as e: 729 | logger.exception(f"Error in bot process: {str(e)}") 730 | raise 731 | 732 | 733 | # Local development functions 734 | async def local_main(): 735 | """Function for local development testing.""" 736 | try: 737 | async with aiohttp.ClientSession() as session: 738 | (room_url, token) = await configure(session) 739 | logger.warning("_") 740 | logger.warning("_") 741 | logger.warning(f"Talk to your voice agent here: {room_url}") 742 | logger.warning("_") 743 | logger.warning("_") 744 | webbrowser.open(room_url) 745 | await main(room_url, token) 746 | except Exception as e: 747 | logger.exception(f"Error in local development mode: {e}") 748 | 749 | 750 | # Local development entry point 751 | if LOCAL_RUN and __name__ == "__main__": 752 | try: 753 | asyncio.run(local_main()) 754 | except Exception as e: 755 | logger.exception(f"Failed to run in local mode: {e}") 756 | --------------------------------------------------------------------------------