├── .prettierrc ├── server ├── requirements.txt ├── env.example ├── Dockerfile ├── .gitignore ├── runner.py ├── server.py ├── bot.py ├── word_list.py ├── bot_phone_twilio.py └── bot_phone_local.py ├── client ├── env.example ├── public │ ├── og-image.png │ └── favicon.svg ├── src │ ├── assets │ │ ├── logo.png │ │ └── star.png │ ├── utils │ │ ├── formatTime.ts │ │ ├── timerUtils.ts │ │ └── wordDetection.ts │ ├── components │ │ ├── Game │ │ │ ├── ScoreRow │ │ │ │ ├── ScoreRow.module.css │ │ │ │ └── index.tsx │ │ │ ├── Timer.tsx │ │ │ ├── GameWord.tsx │ │ │ ├── GameContent.tsx │ │ │ ├── WordWrangler.tsx │ │ │ └── WordWrangler.module.css │ │ ├── Card.tsx │ │ └── StartButton │ │ │ └── index.tsx │ ├── types │ │ └── personality.ts │ ├── pages │ │ ├── _app.tsx │ │ ├── api │ │ │ └── connect.ts │ │ ├── _document.tsx │ │ └── index.tsx │ ├── contexts │ │ └── Configuration.tsx │ ├── styles │ │ ├── HomeStyles.ts │ │ └── globals.css │ ├── hooks │ │ ├── useConnectionState.ts │ │ ├── useWordDetection.ts │ │ ├── useGameTimer.ts │ │ ├── useVisualFeedback.ts │ │ └── useGameState.ts │ ├── constants │ │ └── gameConstants.ts │ ├── providers │ │ └── RTVIProvider.tsx │ └── data │ │ └── wordWranglerWords.ts ├── postcss.config.mjs ├── next.config.ts ├── eslint.config.mjs ├── .gitignore ├── package.json └── tsconfig.json ├── images ├── word-wrangler-web-screenshot.png ├── word-wrangler-web-architecture.png └── word-wrangler-twilio-architecture.png ├── LICENSE └── README.md /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "semi": true, 3 | "tabWidth": 2, 4 | "useTabs": false, 5 | "singleQuote": false 6 | } 7 | -------------------------------------------------------------------------------- /server/requirements.txt: -------------------------------------------------------------------------------- 1 | pipecatcloud 2 | pipecat-ai[daily,google,silero] 3 | fastapi 4 | uvicorn 5 | python-dotenv 6 | -------------------------------------------------------------------------------- /client/env.example: -------------------------------------------------------------------------------- 1 | NEXT_PUBLIC_API_BASE_URL=http://localhost:7860 2 | PIPECAT_CLOUD_API_KEY="" 3 | AGENT_NAME=word-wrangler -------------------------------------------------------------------------------- /client/public/og-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/word-wrangler-gemini-live/HEAD/client/public/og-image.png -------------------------------------------------------------------------------- /client/src/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/word-wrangler-gemini-live/HEAD/client/src/assets/logo.png -------------------------------------------------------------------------------- /client/src/assets/star.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/word-wrangler-gemini-live/HEAD/client/src/assets/star.png -------------------------------------------------------------------------------- /client/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | const config = { 2 | plugins: ["@tailwindcss/postcss"], 3 | }; 4 | 5 | export default config; 6 | -------------------------------------------------------------------------------- /images/word-wrangler-web-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/word-wrangler-gemini-live/HEAD/images/word-wrangler-web-screenshot.png -------------------------------------------------------------------------------- /images/word-wrangler-web-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/word-wrangler-gemini-live/HEAD/images/word-wrangler-web-architecture.png -------------------------------------------------------------------------------- /images/word-wrangler-twilio-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daily-co/word-wrangler-gemini-live/HEAD/images/word-wrangler-twilio-architecture.png -------------------------------------------------------------------------------- /server/env.example: -------------------------------------------------------------------------------- 1 | DAILY_API_KEY= 2 | DAILY_API_URL=https://api.daily.co/v1/ 3 | DAILY_SAMPLE_ROOM_URL= 4 | GOOGLE_API_KEY= 5 | GOOGLE_TEST_CREDENTIALS_FILE= -------------------------------------------------------------------------------- /server/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM dailyco/pipecat-base:latest 2 | 3 | COPY ./requirements.txt requirements.txt 4 | 5 | RUN pip install --no-cache-dir --upgrade -r requirements.txt 6 | 7 | COPY ./bot.py bot.py 8 | -------------------------------------------------------------------------------- /client/next.config.ts: -------------------------------------------------------------------------------- 1 | import type { NextConfig } from "next"; 2 | 3 | const nextConfig: NextConfig = { 4 | /* config options here */ 5 | reactStrictMode: true, 6 | }; 7 | 8 | export default nextConfig; 9 | -------------------------------------------------------------------------------- /client/src/utils/formatTime.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Formats seconds into MM:SS format 3 | */ 4 | export function formatTime(seconds: number): string { 5 | const mins = Math.floor(seconds / 60); 6 | const secs = seconds % 60; 7 | return `${mins}:${secs < 10 ? '0' : ''}${secs}`; 8 | } 9 | -------------------------------------------------------------------------------- /client/src/components/Game/ScoreRow/ScoreRow.module.css: -------------------------------------------------------------------------------- 1 | .divider { 2 | width: 100%; 3 | height: 2px; 4 | background: linear-gradient( 5 | 90deg, 6 | transparent 0%, 7 | rgba(255, 255, 255, 0.15) 30%, 8 | rgba(255, 255, 255, 0.15) 70%, 9 | transparent 100% 10 | ); 11 | } 12 | -------------------------------------------------------------------------------- /client/eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import { dirname } from "path"; 2 | import { fileURLToPath } from "url"; 3 | import { FlatCompat } from "@eslint/eslintrc"; 4 | 5 | const __filename = fileURLToPath(import.meta.url); 6 | const __dirname = dirname(__filename); 7 | 8 | const compat = new FlatCompat({ 9 | baseDirectory: __dirname, 10 | }); 11 | 12 | const eslintConfig = [ 13 | ...compat.extends("next/core-web-vitals", "next/typescript"), 14 | ]; 15 | 16 | export default eslintConfig; 17 | -------------------------------------------------------------------------------- /client/src/components/Card.tsx: -------------------------------------------------------------------------------- 1 | export function Card({ 2 | children, 3 | className, 4 | }: { 5 | children: React.ReactNode; 6 | className?: string; 7 | }) { 8 | return ( 9 |
61 | {GAME_TEXT.finalScore}: {score} 62 |
63 |47 | {score} 48 |
49 |50 | {GAME_TEXT.finalScoreMessage}{" "} 51 | 52 | {bestScore} 53 | 54 |
55 | 56 |
14 |
15 | ### Phone-Based Game
16 |
17 | In this three-way conversation, an AI host provides words, you describe them without saying the actual word, and an AI player tries to guess. The host tracks your score and manages game flow.
18 |
19 | **Try it now:** Call +1-929-**LLM-GAME** (+1-929-556-4263)
20 |
21 | ## Game Rules
22 |
23 | ### Web-based Game
24 |
25 | 1. The web app provides words for you to describe
26 | 2. You describe the word WITHOUT saying any part of it
27 | 3. The AI player tries to guess based on your description
28 | 4. The app will automatically check the guesses and keep score
29 | 5. Click "Skip" to advance to the next word
30 | 6. You have 60 seconds to score as many points as possible
31 |
32 | ### Phone Game
33 |
34 | 1. The AI host provides a word for you to describe
35 | 2. You describe the word WITHOUT saying any part of it
36 | 3. The AI player tries to guess based on your description
37 | 4. Score points for each correct guess
38 | 5. Use commands like "skip" to get a new word or "repeat" to hear the current word again
39 | 6. You have 120 seconds to score as many points as possible
40 |
41 | ## Architecture
42 |
43 | ### Web Game Architecture
44 |
45 | The web game uses a simple linear flow:
46 |
47 | 1. **Transport Input** - Receives audio from the web browser via a Daily WebRTC transport.
48 | 2. **RTVIProcessor** - RTVI is a standard for client/server communication in a voice AI context. This processor collects server-side information and makes it available to the client. Additionally, the client can send events to the server, which are handled through this processor.
49 | 3. **STTMuteFilter** - Filters out speech during specific conditions. In this game, the user's initial speech is "muted", ensuring that the bot can deliver the entire initial message without being interrupted.
50 | 4. **User Context Aggregator** - Aggregates user messages as part of the conversation context.
51 | 5. **LLM** - The LLM powers the AI player's interactions.
52 | 6. **Transport Output** - Sends audio back to the browser using the Daily WebRTC transport.
53 | 7. **Assistant Context Aggregator** - Aggregates assistant messages as part of the conversation context.
54 |
55 | ### Phone Game Architecture
56 |
57 | The phone game implements a three-way conversation using Pipecat's parallel pipeline architecture. This design addresses the fundamental challenge of LLMs - they're built for turn-based interactions, while this game requires real-time, multi-participant conversation management.
58 |
59 |
60 |
61 | #### Conversation Participants
62 |
63 | **Audio Flow Requirements:**
64 |
65 | - **User:** Must hear both the Host and Player outputs; must be heard by both Host and Player
66 | - **Host:** Must hear the User and Player inputs; its output must be heard by User but NOT by Player
67 | - **Player:** Must hear only the User inputs; its output must be heard by both User and Host
68 |
69 | #### Technical Implementation
70 |
71 | The parallel pipeline pattern allows us to create two isolated processing branches, with controlled audio flow between them:
72 |
73 | 1. **Transport Input** - Receives audio from the phone call (Twilio)
74 | 2. **Audio Branch Separation:**
75 | - **Left Branch (Host Pipeline):** `ConsumerProcessor → Host LLM → Game State Tracker → TTS → Bot Stop Detector`
76 | - **Right Branch (Player Pipeline):** `StartFrame Gate → Player LLM → ProducerProcessor`
77 |
78 | **Host LLM Configuration:**
79 |
80 | The Host uses Gemini Live API, configured with specific response patterns to handle different input types:
81 |
82 | ```
83 | - Correct guess: "Correct! That's [N] points. Your next word is [new word]"
84 | - Incorrect guess: "NO" (filtered out by TTS filter)
85 | - User descriptions: "IGNORE" (filtered out by TTS filter)
86 | - Skip requests: "The new word is [new word]"
87 | - Repeat requests: "Your word is [current word]"
88 | ```
89 |
90 | **Audio Flow Management:**
91 |
92 | By default, all input audio flows to both branches, so both LLMs hear the user. To implement the complex routing:
93 |
94 | 1. **Producer/Consumer Pattern:** Captures the Player's output audio and feeds it to the Host
95 |
96 | - `ProducerProcessor` filters TTSAudioRawFrames from the Player
97 | - Transforms them from 24kHz to 16kHz (required by Gemini Live)
98 | - Passes them to the `ConsumerProcessor` at the top of the Host branch
99 |
100 | 2. **Text Filtering:** The `HostResponseTextFilter` intercepts the "NO" and "IGNORE" responses
101 |
102 | - Prevents TTS vocalization of these responses
103 | - Ensures that only meaningful Host responses are spoken
104 |
105 | 3. **Host-Player Synchronization:**
106 |
107 | - `BotStoppedSpeakingNotifier` detects when the Host finishes speaking
108 | - `GameStateTracker` parses the streamed text to detect new words and track score
109 | - `NewWordNotifier` triggers the `ResettablePlayerLLM` to disconnect and reconnect when a new word is presented
110 | - This reset ensures the Player has no context of previous words or guesses
111 |
112 | 4. **StartFrameGate:** The gate holds the Player's StartFrame until the Host has completed its introduction
113 | - Ensures the Player doesn't start interacting until the game has been properly set up
114 |
115 | All processed audio is collected at the end of the Parallel Pipeline and sent via the transport output back to Twilio.
116 |
117 | #### Game State Management
118 |
119 | The implementation tracks:
120 |
121 | - Current words being guessed
122 | - Running score (points for correct guesses)
123 | - Game duration with automatic timeout
124 |
125 | This architecture enables complex interaction patterns that would be difficult to achieve with traditional turn-based conversation models, allowing each AI participant to function effectively in their specific game role.
126 |
127 | ## Run Locally
128 |
129 | ### Web Game
130 |
131 | #### Run the Server
132 |
133 | 1. Switch to the server directory:
134 |
135 | ```bash
136 | cd server
137 | ```
138 |
139 | 2. Set up and activate your virtual environment:
140 |
141 | ```bash
142 | python3 -m venv venv
143 | source venv/bin/activate # On Windows: venv\Scripts\activate
144 | ```
145 |
146 | 3. Install dependencies:
147 |
148 | ```bash
149 | pip install -r requirements.txt
150 | ```
151 |
152 | 4. Create an .env file and add your API keys:
153 |
154 | ```bash
155 | cp env.example .env
156 | ```
157 |
158 | 5. Add environment variables for:
159 |
160 | ```
161 | DAILY_API_KEY=
162 | DAILY_SAMPLE_ROOM_URL=
163 | GOOGLE_API_KEY=
164 | ```
165 |
166 | 6. Run the server:
167 |
168 | ```bash
169 | LOCAL_RUN=1 python server.py
170 | ```
171 |
172 | #### Run the Client
173 |
174 | 1. In a new terminal window, navigate to client:
175 |
176 | ```bash
177 | cd client
178 | ```
179 |
180 | 2. Install dependencies:
181 |
182 | ```bash
183 | npm install
184 | ```
185 |
186 | 3. Create an .env.local file:
187 |
188 | ```bash
189 | cp env.example .env.local
190 | ```
191 |
192 | 4. In .env.local:
193 |
194 | - `NEXT_PUBLIC_API_BASE_URL=http://localhost:7860` is used for local development. For deployments, either remove this env var or replace with `/api`.
195 | - `AGENT_NAME` should be set to the name of your deployed Pipecat agent (e.g., "word-wrangler").
196 | - `PIPECAT_CLOUD_API_KEY` is used only for deployments to Pipecat Cloud.
197 |
198 | 5. Run the app:
199 |
200 | ```bash
201 | npm run dev
202 | ```
203 |
204 | 6. Open http://localhost:3000 in your browser
205 |
206 | ### Phone Game
207 |
208 | There are two versions of the phone game:
209 |
210 | 1. **Local Development** (`bot_phone_local.py`):
211 |
212 | - For testing locally before deployment
213 |
214 | 2. **Deployment** (`bot_phone_twilio.py`):
215 | - Ready for deployment to Pipecat Cloud
216 |
217 | #### Running Locally
218 |
219 | 1. Set up and activate your virtual environment:
220 |
221 | ```bash
222 | python3 -m venv venv
223 | source venv/bin/activate # On Windows: venv\Scripts\activate
224 | ```
225 |
226 | 2. Install dependencies:
227 |
228 | ```bash
229 | pip install -r requirements.txt
230 | ```
231 |
232 | 3. Create an .env file in the server directory with your API keys:
233 |
234 | ```bash
235 | cd server
236 | cp env.example .env
237 | ```
238 |
239 | 4. Configure Daily information in your .env:
240 |
241 | ```
242 | DAILY_API_KEY=your_daily_api_key
243 | DAILY_SAMPLE_ROOM_URL=your_daily_room_url
244 | GOOGLE_API_KEY=your_google_api_key
245 | GOOGLE_TEST_CREDENTIALS_FILE=path_to_credentials_file
246 | ```
247 |
248 | 5. Run the local bot:
249 |
250 | ```bash
251 | LOCAL_RUN=1 python bot_phone_local.py
252 | ```
253 |
254 | ## Deployment
255 |
256 | ### Web Game
257 |
258 | #### Deploy your Server
259 |
260 | You can deploy your server code using Pipecat Cloud. For a full walkthrough, start with the [Pipecat Cloud Quickstart](https://docs.pipecat.daily.co/quickstart).
261 |
262 | Here are the steps you'll need to complete:
263 |
264 | - Build, tag, and push your Docker image to a registry.
265 | - Create Pipecat Cloud secrets using the CLI or dashboard. For this agent, you only need a `GOOGLE_API_KEY`. Your `DAILY_API_KEY` is automatically applied.
266 | - Deploy your agent image. You can use a pcc-deploy.toml file to make deploying easier. For example:
267 |
268 | ```toml
269 | agent_name = "word-wrangler"
270 | image = "your-dockerhub-name/word-wrangler:0.1"
271 | secret_set = "word-wrangler-secrets"
272 | enable_krisp = true
273 |
274 | [scaling]
275 | min_instances = 1
276 | max_instances = 5
277 | ```
278 |
279 | Then, you can deploy with the CLI using `pcc deploy`.
280 |
281 | - Finally, confirm that your agent is deployed. You'll get feedback in the terminal.
282 |
283 | #### Deploy your Client
284 |
285 | This project uses TypeScript, React, and Next.js, making it a perfect fit for [Vercel](https://vercel.com/).
286 |
287 | - In your client directory, install Vercel's CLI tool: `npm install -g vercel`
288 | - Verify it's installed using `vercel --version`
289 | - Log in your Vercel account using `vercel login`
290 | - Deploy your client to Vercel using `vercel`
291 |
292 | ### Phone Game
293 |
294 | #### Deploy your Server
295 |
296 | Again, we'll use Pipecat Cloud. Follow the steps from above. The only difference will be the secrets required; in addition to a GOOGLE_API_KEY, you'll need `GOOGLE_APPLICATION_CREDENTIALS` in the format of a .json file with your [Google Cloud service account](https://console.cloud.google.com/iam-admin/serviceaccounts) information.
297 |
298 | You'll need to modify the Dockerfile so that the credentials.json and word_list.py are accessible. This Dockerfile will work:
299 |
300 | ```Dockerfile
301 | FROM dailyco/pipecat-base:latest
302 |
303 | COPY ./requirements.txt requirements.txt
304 |
305 | RUN pip install --no-cache-dir --upgrade -r requirements.txt
306 |
307 | COPY ./word_list.py word_list.py
308 | COPY ./credentials.json credentials.json
309 | COPY ./bot_phone_twilio.py bot.py
310 | ```
311 |
312 | Note: Your `credentials.json` file should have your Google service account credentials.
313 |
314 | #### Buy and Configure a Twilio Number
315 |
316 | Check out the [Twilio Websocket Telephony guide](https://docs.pipecat.daily.co/pipecat-in-production/telephony/twilio-mediastreams) for a step-by-step walkthrough on how to purchase a phone number, configure your TwiML, and make or receive calls.
317 |
318 | ## Tech stack
319 |
320 | Both games are built using:
321 |
322 | - [Pipecat](https://www.pipecat.ai/) framework for real-time voice conversation
323 | - Google's Gemini Live API
324 | - Real-time communication (Web via Daily, Phone via Twilio)
325 |
326 | The phone game features:
327 |
328 | - Parallel processing of host and player interactions
329 | - State tracking for game progress and scoring
330 | - Dynamic word selection from multiple categories
331 | - Automated game timing and scoring
332 |
--------------------------------------------------------------------------------
/client/src/components/Game/WordWrangler.module.css:
--------------------------------------------------------------------------------
1 | .gameContainer {
2 | position: relative;
3 | z-index: 1;
4 | padding: 4px;
5 | width: 100%;
6 | border-radius: 28px;
7 | margin-top: 50px;
8 | min-height: 300px;
9 | box-shadow: 0px 66px 26px rgba(0, 0, 0, 0.01),
10 | 0px 37px 22px rgba(0, 0, 0, 0.05), 0px 16px 16px rgba(0, 0, 0, 0.09),
11 | 0px 4px 9px rgba(0, 0, 0, 0.1);
12 | }
13 |
14 | @media (min-width: 1024px) {
15 | .gameContainer {
16 | width: auto;
17 | flex: none;
18 | min-width: 626px;
19 | height: 260px;
20 | margin-top: 0;
21 | }
22 | }
23 |
24 | .gameContainer:before {
25 | content: "";
26 | position: absolute;
27 | inset: -4px -4px -8px -4px;
28 | border-radius: 28px;
29 | background: linear-gradient(
30 | to bottom,
31 | rgba(0, 0, 0, 1) 0%,
32 | rgba(0, 0, 0, 0.15) 100%
33 | );
34 | z-index: -1;
35 | }
36 |
37 | .gameContainer:after {
38 | content: "";
39 | box-sizing: border-box;
40 | position: absolute;
41 | inset: 0;
42 | border-radius: var(--border-radius-card);
43 | border: var(--border-width-card) solid transparent;
44 | background-image: linear-gradient(#001146, #0655cc),
45 | linear-gradient(
46 | 180deg,
47 | var(--theme-gradient-start) 0%,
48 | var(--theme-gradient-end) 100%
49 | );
50 | background-origin: border-box;
51 | background-clip: padding-box, border-box;
52 | }
53 |
54 | .gameContent {
55 | position: relative;
56 | z-index: 1;
57 | background: transparent;
58 | border-radius: 20px;
59 | width: 100%;
60 | height: 100%;
61 | min-height: 292px;
62 | display: flex;
63 | overflow: hidden;
64 | border: 6px solid rgba(0, 0, 0, 0.25);
65 | }
66 |
67 | .gameContent:after {
68 | content: "";
69 | position: absolute;
70 | inset: 0;
71 | background: radial-gradient(
72 | 70% 40% at 50% 40%,
73 | #2da6ee 0%,
74 | rgba(45, 166, 238, 0) 100%
75 | );
76 | opacity: 0.76;
77 | z-index: -1;
78 | }
79 |
80 | .gameArea {
81 | display: flex;
82 | flex-direction: column;
83 | align-items: center;
84 | flex: 1;
85 | padding: 12px;
86 | position: relative;
87 | z-index: 2;
88 | }
89 |
90 | .timer {
91 | height: var(--button-height);
92 | border-radius: 9999px;
93 | width: 100%;
94 | flex-direction: row;
95 | gap: 12px;
96 | display: flex;
97 | align-items: center;
98 | justify-content: center;
99 | background-color: rgba(0, 0, 0, 0.2);
100 | padding: 12px;
101 |
102 | @media (min-width: 1024px) {
103 | flex: 1;
104 | }
105 |
106 | .timerBadge {
107 | display: flex;
108 | flex-direction: row;
109 | align-items: center;
110 | gap: 6px;
111 | background-color: black;
112 | border-radius: 9999px;
113 | color: white;
114 | height: 100%;
115 | padding: 0 12px;
116 | font-weight: 800;
117 | }
118 |
119 | .timerBar {
120 | height: 100%;
121 | width: 100%;
122 | border-radius: 9999px;
123 | overflow: hidden;
124 | background-color: var(--color-emerald-100);
125 | }
126 |
127 | .timerBarFill {
128 | height: 100%;
129 | width: 100%;
130 | background-color: var(--color-emerald-400);
131 | transition: width 0.3s ease;
132 | }
133 |
134 | &.lowTime {
135 | color: #e74c3c;
136 | animation: pulse 1s infinite;
137 |
138 | .timerBar {
139 | background-color: var(--color-orange-100);
140 | }
141 |
142 | .timerBarFill {
143 | background-color: var(--color-orange-400);
144 | }
145 | }
146 | }
147 |
148 | .scoreDisplay {
149 | font-size: 1.25rem;
150 | font-weight: 500;
151 | color: #0071e3;
152 | }
153 |
154 | .currentWord {
155 | display: flex;
156 | flex: 1;
157 | flex-direction: column;
158 | align-items: center;
159 | justify-content: center;
160 | text-align: center;
161 | width: 100%;
162 | margin-top: 50px;
163 | .helpText {
164 | font-size: 1rem;
165 | font-weight: 700;
166 | color: rgba(255, 255, 255, 0.5);
167 | }
168 |
169 | .word {
170 | font-size: 2rem;
171 | font-weight: 800;
172 | letter-spacing: 0.05em;
173 | line-height: 2;
174 | color: #ffffff;
175 | text-shadow: 0px 4px 0px rgba(0, 0, 0, 0.45);
176 | }
177 |
178 | @media (min-width: 1024px) {
179 | margin-top: 0;
180 | .word {
181 | font-size: 3rem;
182 | text-shadow: 0px 6px 0px rgba(0, 0, 0, 0.45);
183 | }
184 | }
185 | }
186 |
187 | .gameButton {
188 | padding: 0.85rem 0;
189 | font-size: 1.1rem;
190 | font-weight: 500;
191 | border: none;
192 | border-radius: 8px;
193 | cursor: pointer;
194 | transition: all 0.2s ease;
195 | }
196 |
197 | /* Primary button (Skip) */
198 | .skipButton {
199 | flex: 2; /* Takes more space */
200 | background-color: #e74c3c;
201 | color: white;
202 | }
203 |
204 | .skipButton:hover {
205 | background-color: #c0392b;
206 | transform: translateY(-2px);
207 | }
208 |
209 | /* Secondary button (Correct) - more subdued */
210 | .correctButton {
211 | flex: 1; /* Takes less space */
212 | background-color: #f5f5f7; /* Light gray background */
213 | color: #333; /* Dark text */
214 | border: 1px solid #ddd; /* Subtle border */
215 | }
216 |
217 | .correctButton:hover {
218 | background-color: #e8e8ed;
219 | transform: translateY(-1px);
220 | }
221 |
222 | .gameReadyArea {
223 | display: flex;
224 | flex-direction: column;
225 | align-items: center;
226 | }
227 |
228 | .gameResults {
229 | margin-bottom: 1rem;
230 | padding: 0.75rem;
231 | background-color: #f8f9fa;
232 | border-radius: 8px;
233 | width: 100%;
234 | text-align: center;
235 | }
236 |
237 | .gameResults h2 {
238 | margin: 0 0 0.5rem 0;
239 | color: #333;
240 | font-size: 1.3rem;
241 | }
242 |
243 | .statusNote {
244 | margin: 0.5rem 0;
245 | padding: 0.6rem 1rem;
246 | background-color: #f8f9fa;
247 | border-left: 3px solid #0071e3;
248 | font-size: 0.95rem;
249 | color: #333;
250 | width: 100%;
251 | text-align: center;
252 | border-radius: 4px;
253 | }
254 |
255 | .compactInstructions {
256 | margin: 0.75rem 0;
257 | width: 100%;
258 | max-width: 400px;
259 | background-color: #f8f9fa;
260 | border-radius: 8px;
261 | padding: 0.75rem 1rem;
262 | }
263 |
264 | .compactInstructions h3 {
265 | margin: 0 0 0.5rem 0;
266 | color: #333;
267 | font-size: 1.1rem;
268 | text-align: center;
269 | }
270 |
271 | .compactInstructions ul {
272 | margin: 0;
273 | padding-left: 1.5rem;
274 | line-height: 1.4;
275 | }
276 |
277 | .compactInstructions li {
278 | margin-bottom: 0.4rem;
279 | font-size: 0.9rem;
280 | }
281 |
282 | .loadingDots {
283 | display: inline-block;
284 | animation: dotPulse 1.5s infinite linear;
285 | }
286 |
287 | @keyframes dotPulse {
288 | 0% {
289 | opacity: 0.2;
290 | }
291 | 20% {
292 | opacity: 1;
293 | }
294 | 100% {
295 | opacity: 0.2;
296 | }
297 | }
298 |
299 | @keyframes pulse {
300 | 0% {
301 | opacity: 0.8;
302 | }
303 | 50% {
304 | opacity: 1;
305 | }
306 | 100% {
307 | opacity: 0.8;
308 | }
309 | }
310 |
311 | /* Animation styles */
312 | .correctWordDetected {
313 | animation: correctPulse 1.5s ease-in-out;
314 | position: relative;
315 | }
316 |
317 | .autoDetectedOverlay {
318 | position: absolute;
319 | top: 0;
320 | left: 0;
321 | right: 0;
322 | bottom: 0;
323 | display: flex;
324 | justify-content: center;
325 | align-items: center;
326 | background-color: rgba(46, 204, 113, 0.6);
327 | border-radius: 8px;
328 | animation: fadeIn 0.3s ease-in-out;
329 | z-index: 10;
330 | }
331 |
332 | .checkmarkContainer {
333 | width: 80px;
334 | height: 80px;
335 | animation: scaleUp 0.4s ease-out;
336 | }
337 |
338 | .checkmarkSvg {
339 | width: 100%;
340 | height: 100%;
341 | border-radius: 50%;
342 | display: block;
343 | stroke-width: 4;
344 | stroke: #fff;
345 | stroke-miterlimit: 10;
346 | box-shadow: 0 0 0 rgba(46, 204, 113, 0.7);
347 | animation: fillCheck 0.3s ease-in-out 0.3s forwards,
348 | scale 0.2s ease-in-out 0.7s both;
349 | }
350 |
351 | .checkmarkCircle {
352 | stroke-dasharray: 166;
353 | stroke-dashoffset: 166;
354 | stroke-width: 4;
355 | stroke-miterlimit: 10;
356 | stroke: #fff;
357 | fill: transparent;
358 | animation: strokeCheck 0.5s cubic-bezier(0.65, 0, 0.45, 1) forwards;
359 | }
360 |
361 | .checkmarkCheck {
362 | transform-origin: 50% 50%;
363 | stroke-dasharray: 48;
364 | stroke-dashoffset: 48;
365 | animation: strokeCheck 0.25s cubic-bezier(0.65, 0, 0.45, 1) 0.6s forwards;
366 | }
367 |
368 | @keyframes strokeCheck {
369 | 100% {
370 | stroke-dashoffset: 0;
371 | }
372 | }
373 |
374 | @keyframes fillCheck {
375 | 100% {
376 | box-shadow: inset 0 0 0 50px transparent;
377 | }
378 | }
379 |
380 | @keyframes correctPulse {
381 | 0% {
382 | box-shadow: 0 0 0 0 rgba(46, 204, 113, 0.7);
383 | }
384 | 50% {
385 | box-shadow: 0 0 0 15px rgba(46, 204, 113, 0);
386 | }
387 | 100% {
388 | box-shadow: 0 0 0 0 rgba(46, 204, 113, 0);
389 | }
390 | }
391 |
392 | @keyframes fadeIn {
393 | from {
394 | opacity: 0;
395 | }
396 | to {
397 | opacity: 1;
398 | }
399 | }
400 |
401 | @keyframes scaleUp {
402 | from {
403 | transform: scale(0.5);
404 | opacity: 0;
405 | }
406 | to {
407 | transform: scale(1);
408 | opacity: 1;
409 | }
410 | }
411 |
412 | .incorrectWordDetected {
413 | animation: incorrectPulse 1.5s ease-in-out,
414 | shake 0.5s cubic-bezier(0.36, 0.07, 0.19, 0.97) both;
415 | position: relative;
416 | }
417 |
418 | .incorrectOverlay {
419 | position: absolute;
420 | top: 0;
421 | left: 0;
422 | right: 0;
423 | bottom: 0;
424 | display: flex;
425 | justify-content: center;
426 | align-items: center;
427 | background-color: rgba(255, 59, 48, 0.6); /* Red with transparency */
428 | border-radius: 8px;
429 | animation: fadeIn 0.3s ease-in-out;
430 | z-index: 10;
431 | }
432 |
433 | .xmarkContainer {
434 | width: 80px;
435 | height: 80px;
436 | animation: scaleUp 0.4s ease-out;
437 | }
438 |
439 | .xmarkSvg {
440 | width: 100%;
441 | height: 100%;
442 | border-radius: 50%;
443 | display: block;
444 | stroke-width: 4;
445 | stroke: #fff;
446 | stroke-miterlimit: 10;
447 | box-shadow: 0 0 0 rgba(255, 59, 48, 0.7);
448 | animation: fillX 0.3s ease-in-out 0.3s forwards,
449 | scale 0.2s ease-in-out 0.7s both;
450 | }
451 |
452 | .xmarkCircle {
453 | stroke-dasharray: 166;
454 | stroke-dashoffset: 166;
455 | stroke-width: 4;
456 | stroke-miterlimit: 10;
457 | stroke: #fff;
458 | fill: transparent;
459 | animation: strokeX 0.5s cubic-bezier(0.65, 0, 0.45, 1) forwards;
460 | }
461 |
462 | .xmarkX {
463 | transform-origin: 50% 50%;
464 | stroke-dasharray: 48;
465 | stroke-dashoffset: 48;
466 | animation: strokeX 0.25s cubic-bezier(0.65, 0, 0.45, 1) 0.6s forwards;
467 | }
468 |
469 | @keyframes strokeX {
470 | 100% {
471 | stroke-dashoffset: 0;
472 | }
473 | }
474 |
475 | @keyframes fillX {
476 | 100% {
477 | box-shadow: inset 0 0 0 50px transparent;
478 | }
479 | }
480 |
481 | @keyframes incorrectPulse {
482 | 0% {
483 | box-shadow: 0 0 0 0 rgba(255, 59, 48, 0.7);
484 | }
485 | 50% {
486 | box-shadow: 0 0 0 15px rgba(255, 59, 48, 0);
487 | }
488 | 100% {
489 | box-shadow: 0 0 0 0 rgba(255, 59, 48, 0);
490 | }
491 | }
492 |
493 | @keyframes scale {
494 | 0%,
495 | 100% {
496 | transform: none;
497 | }
498 | 50% {
499 | transform: scale3d(1.1, 1.1, 1);
500 | }
501 | }
502 |
503 | @keyframes shake {
504 | 10%,
505 | 90% {
506 | transform: translate3d(-1px, 0, 0);
507 | }
508 | 20%,
509 | 80% {
510 | transform: translate3d(2px, 0, 0);
511 | }
512 | 30%,
513 | 50%,
514 | 70% {
515 | transform: translate3d(-3px, 0, 0);
516 | }
517 | 40%,
518 | 60% {
519 | transform: translate3d(3px, 0, 0);
520 | }
521 | }
522 |
523 | /* Game loading UI styles */
524 | .gameLoadingContainer {
525 | display: flex;
526 | justify-content: center;
527 | align-items: center;
528 | height: 250px; /* Fixed height to prevent layout shifts */
529 | width: 100%;
530 | }
531 |
532 | .gameLoadingContent {
533 | display: flex;
534 | flex-direction: column;
535 | align-items: center;
536 | justify-content: center;
537 | gap: 1.5rem;
538 | text-align: center;
539 | }
540 |
541 | .gameLoadingIcon {
542 | position: relative;
543 | width: 60px;
544 | height: 60px;
545 | display: flex;
546 | justify-content: center;
547 | align-items: center;
548 | }
549 |
550 | .pulseDot {
551 | width: 16px;
552 | height: 16px;
553 | background-color: #0071e3;
554 | border-radius: 50%;
555 | position: relative;
556 | }
557 |
558 | .pulseDot:before {
559 | content: "";
560 | position: absolute;
561 | width: 100%;
562 | height: 100%;
563 | border-radius: 50%;
564 | background-color: #0071e3;
565 | opacity: 0.7;
566 | animation: pulse-wave 1.5s linear infinite;
567 | }
568 |
569 | .gameLoadingTitle {
570 | font-size: 1.5rem;
571 | font-weight: 500;
572 | color: #0071e3;
573 | margin: 0;
574 | }
575 |
576 | @keyframes pulse-wave {
577 | 0% {
578 | transform: scale(1);
579 | opacity: 0.7;
580 | }
581 | 50% {
582 | transform: scale(2.5);
583 | opacity: 0;
584 | }
585 | 100% {
586 | transform: scale(1);
587 | opacity: 0;
588 | }
589 | }
590 |
--------------------------------------------------------------------------------
/server/word_list.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | # Define categories and words for the Word Wrangler game
4 | WORD_CATEGORIES = {
5 | "animals": [
6 | "elephant",
7 | "penguin",
8 | "giraffe",
9 | "dolphin",
10 | "kangaroo",
11 | "octopus",
12 | "panda",
13 | "tiger",
14 | "koala",
15 | "flamingo",
16 | "hedgehog",
17 | "turtle",
18 | "zebra",
19 | "eagle",
20 | "sloth",
21 | "raccoon",
22 | "chameleon",
23 | "squirrel",
24 | "hamster",
25 | "cheetah",
26 | "platypus",
27 | "jellyfish",
28 | "parrot",
29 | "wolf",
30 | "hippo",
31 | "porcupine",
32 | "ostrich",
33 | "peacock",
34 | "alligator",
35 | "gorilla",
36 | "armadillo",
37 | "chipmunk",
38 | "walrus",
39 | "weasel",
40 | "skunk",
41 | "llama",
42 | "badger",
43 | "mongoose",
44 | "lemur",
45 | "otter",
46 | "bison",
47 | "falcon",
48 | "meerkat",
49 | "pelican",
50 | "cobra",
51 | "salamander",
52 | "lobster",
53 | "seal",
54 | "narwhal",
55 | "iguana",
56 | "piranha",
57 | "toucan",
58 | "moose",
59 | "lynx",
60 | "stingray",
61 | "starfish",
62 | "beaver",
63 | "vulture",
64 | "antelope",
65 | "jaguar",
66 | "seahorse",
67 | ],
68 | "food": [
69 | "pizza",
70 | "sushi",
71 | "burrito",
72 | "pancake",
73 | "donut",
74 | "lasagna",
75 | "popcorn",
76 | "chocolate",
77 | "mango",
78 | "pretzel",
79 | "taco",
80 | "waffle",
81 | "cupcake",
82 | "avocado",
83 | "cookie",
84 | "croissant",
85 | "omelette",
86 | "cheesecake",
87 | "dumpling",
88 | "hummus",
89 | "gelato",
90 | "risotto",
91 | "ramen",
92 | "salsa",
93 | "kebab",
94 | "brownie",
95 | "guacamole",
96 | "bagel",
97 | "falafel",
98 | "biscuit",
99 | "churro",
100 | "meatball",
101 | "tiramisu",
102 | "enchilada",
103 | "couscous",
104 | "gumbo",
105 | "jambalaya",
106 | "baklava",
107 | "popsicle",
108 | "cannoli",
109 | "tofu",
110 | "macaron",
111 | "empanada",
112 | "pho",
113 | "casserole",
114 | "porridge",
115 | "granola",
116 | "fritter",
117 | "hazelnut",
118 | "kiwi",
119 | "pomegranate",
120 | "artichoke",
121 | "edamame",
122 | "zucchini",
123 | "cashew",
124 | "brisket",
125 | "custard",
126 | "nutmeg",
127 | "ginger",
128 | ],
129 | "household": [
130 | "chair",
131 | "pillow",
132 | "mirror",
133 | "blanket",
134 | "lamp",
135 | "curtain",
136 | "sofa",
137 | "refrigerator",
138 | "blender",
139 | "bookshelf",
140 | "dishwasher",
141 | "carpet",
142 | "microwave",
143 | "table",
144 | "clock",
145 | "vase",
146 | "ottoman",
147 | "candle",
148 | "drawer",
149 | "cabinet",
150 | "doorknob",
151 | "silverware",
152 | "bathtub",
153 | "plunger",
154 | "toaster",
155 | "kettle",
156 | "spatula",
157 | "doormat",
158 | "hanger",
159 | "blinds",
160 | "ladle",
161 | "platter",
162 | "coaster",
163 | "napkin",
164 | "sponge",
165 | "thermostat",
166 | "showerhead",
167 | "coatrack",
168 | "nightstand",
169 | "cushion",
170 | "windowsill",
171 | "bedsheet",
172 | "countertop",
173 | "dustpan",
174 | "footstool",
175 | "flowerpot",
176 | "trashcan",
177 | "colander",
178 | "detergent",
179 | "chandelier",
180 | "laundry",
181 | "vacuum",
182 | "teapot",
183 | "duster",
184 | "lightbulb",
185 | "corkscrew",
186 | "paperweight",
187 | "doorstop",
188 | "radiator",
189 | ],
190 | "activities": [
191 | "swimming",
192 | "painting",
193 | "dancing",
194 | "gardening",
195 | "skiing",
196 | "cooking",
197 | "hiking",
198 | "reading",
199 | "yoga",
200 | "fishing",
201 | "jogging",
202 | "biking",
203 | "baking",
204 | "singing",
205 | "camping",
206 | "knitting",
207 | "surfing",
208 | "photography",
209 | "bowling",
210 | "archery",
211 | "horseback",
212 | "meditation",
213 | "gymnastics",
214 | "volleyball",
215 | "tennis",
216 | "skating",
217 | "kayaking",
218 | "climbing",
219 | "juggling",
220 | "rowing",
221 | "snorkeling",
222 | "embroidery",
223 | "canoeing",
224 | "paddleboarding",
225 | "pottery",
226 | "birdwatching",
227 | "karaoke",
228 | "sailing",
229 | "pilates",
230 | "calligraphy",
231 | "skateboarding",
232 | "crossword",
233 | "origami",
234 | "beekeeping",
235 | "stargazing",
236 | "snowboarding",
237 | "woodworking",
238 | "fencing",
239 | "quilting",
240 | "foraging",
241 | "geocaching",
242 | "scrapbooking",
243 | "welding",
244 | "glassblowing",
245 | "whittling",
246 | "ziplining",
247 | ],
248 | "places": [
249 | "beach",
250 | "library",
251 | "mountain",
252 | "airport",
253 | "stadium",
254 | "museum",
255 | "hospital",
256 | "castle",
257 | "garden",
258 | "hotel",
259 | "island",
260 | "desert",
261 | "university",
262 | "restaurant",
263 | "forest",
264 | "aquarium",
265 | "theater",
266 | "canyon",
267 | "lighthouse",
268 | "waterfall",
269 | "vineyard",
270 | "cathedral",
271 | "rainforest",
272 | "farmhouse",
273 | "greenhouse",
274 | "observatory",
275 | "marketplace",
276 | "boardwalk",
277 | "temple",
278 | "courtyard",
279 | "plantation",
280 | "lagoon",
281 | "volcano",
282 | "meadow",
283 | "oasis",
284 | "grotto",
285 | "peninsula",
286 | "aviary",
287 | "chapel",
288 | "coliseum",
289 | "bazaar",
290 | "marina",
291 | "orchard",
292 | "brewery",
293 | "sanctuary",
294 | "fortress",
295 | "prairie",
296 | "reservation",
297 | "tavern",
298 | "monument",
299 | "manor",
300 | "pavilion",
301 | "boulevard",
302 | "campground",
303 | ],
304 | "objects": [
305 | "umbrella",
306 | "scissors",
307 | "camera",
308 | "wallet",
309 | "bicycle",
310 | "backpack",
311 | "telescope",
312 | "balloon",
313 | "compass",
314 | "notebook",
315 | "keyboard",
316 | "magnet",
317 | "headphones",
318 | "hammer",
319 | "envelope",
320 | "binoculars",
321 | "tambourine",
322 | "boomerang",
323 | "megaphone",
324 | "suitcase",
325 | "pinwheel",
326 | "kaleidoscope",
327 | "microscope",
328 | "hourglass",
329 | "harmonica",
330 | "trampoline",
331 | "bubblegum",
332 | "xylophone",
333 | "typewriter",
334 | "screwdriver",
335 | "whistle",
336 | "chessboard",
337 | "handcuffs",
338 | "stethoscope",
339 | "stopwatch",
340 | "parachute",
341 | "blowtorch",
342 | "calculator",
343 | "thermometer",
344 | "mousetrap",
345 | "crowbar",
346 | "paintbrush",
347 | "metronome",
348 | "surfboard",
349 | "flipchart",
350 | "dartboard",
351 | "wrench",
352 | "flippers",
353 | "thimble",
354 | "protractor",
355 | "snorkel",
356 | "doorbell",
357 | "flashlight",
358 | "pendulum",
359 | "abacus",
360 | ],
361 | "jobs": [
362 | "teacher",
363 | "doctor",
364 | "chef",
365 | "firefighter",
366 | "pilot",
367 | "astronaut",
368 | "carpenter",
369 | "musician",
370 | "detective",
371 | "scientist",
372 | "farmer",
373 | "architect",
374 | "journalist",
375 | "electrician",
376 | "dentist",
377 | "veterinarian",
378 | "librarian",
379 | "photographer",
380 | "mechanic",
381 | "attorney",
382 | "barista",
383 | "plumber",
384 | "bartender",
385 | "surgeon",
386 | "therapist",
387 | "animator",
388 | "programmer",
389 | "pharmacist",
390 | "translator",
391 | "accountant",
392 | "florist",
393 | "butcher",
394 | "lifeguard",
395 | "beekeeper",
396 | "locksmith",
397 | "choreographer",
398 | "mortician",
399 | "paramedic",
400 | "blacksmith",
401 | "surveyor",
402 | "botanist",
403 | "chiropractor",
404 | "undertaker",
405 | "acrobat",
406 | "welder",
407 | "hypnotist",
408 | "zoologist",
409 | "mime",
410 | "sommelier",
411 | "meteorologist",
412 | "stuntman",
413 | "diplomat",
414 | "entomologist",
415 | "puppeteer",
416 | "archivist",
417 | "cartographer",
418 | "paleontologist",
419 | ],
420 | "transportation": [
421 | "helicopter",
422 | "submarine",
423 | "scooter",
424 | "sailboat",
425 | "train",
426 | "motorcycle",
427 | "airplane",
428 | "canoe",
429 | "tractor",
430 | "limousine",
431 | "escalator",
432 | "skateboard",
433 | "ambulance",
434 | "ferry",
435 | "rocket",
436 | "hovercraft",
437 | "gondola",
438 | "segway",
439 | "zeppelin",
440 | "bulldozer",
441 | "speedboat",
442 | "unicycle",
443 | "monorail",
444 | "snowmobile",
445 | "paddleboat",
446 | "trolley",
447 | "rickshaw",
448 | "caboose",
449 | "glider",
450 | "bobsled",
451 | "jetpack",
452 | "forklift",
453 | "dirigible",
454 | "chariot",
455 | "sidecar",
456 | "tandem",
457 | "battleship",
458 | "catamaran",
459 | "toboggan",
460 | "dinghy",
461 | "hydrofoil",
462 | "sleigh",
463 | "hatchback",
464 | "kayak",
465 | "stagecoach",
466 | "tugboat",
467 | "airship",
468 | "skiff",
469 | "carriage",
470 | "rowboat",
471 | "chairlift",
472 | "steamroller",
473 | ],
474 | "clothing": [
475 | "sweater",
476 | "sandals",
477 | "tuxedo",
478 | "poncho",
479 | "sneakers",
480 | "bikini",
481 | "cardigan",
482 | "overalls",
483 | "kimono",
484 | "mittens",
485 | "suspenders",
486 | "kilt",
487 | "leggings",
488 | "apron",
489 | "bowtie",
490 | "earmuffs",
491 | "fedora",
492 | "wetsuit",
493 | "pajamas",
494 | "sombrero",
495 | "raincoat",
496 | "beret",
497 | "turtleneck",
498 | "parka",
499 | "tiara",
500 | "toga",
501 | "bandana",
502 | "corset",
503 | "sarong",
504 | "tunic",
505 | "visor",
506 | "ascot",
507 | "fez",
508 | "moccasins",
509 | "blazer",
510 | "chaps",
511 | "romper",
512 | "waders",
513 | "clogs",
514 | "garter",
515 | "camisole",
516 | "galoshes",
517 | "bolero",
518 | "spats",
519 | "pantyhose",
520 | "onesie",
521 | "stiletto",
522 | "vest",
523 | "windbreaker",
524 | "scarf",
525 | "bonnet",
526 | ],
527 | "nature": [
528 | "glacier",
529 | "sequoia",
530 | "geyser",
531 | "avalanche",
532 | "tornado",
533 | "quicksand",
534 | "stalactite",
535 | "hurricane",
536 | "asteroid",
537 | "tundra",
538 | "galaxy",
539 | "nebula",
540 | "earthquake",
541 | "stalagmite",
542 | "constellation",
543 | "crystal",
544 | "tributary",
545 | "abyss",
546 | "monsoon",
547 | "magma",
548 | "erosion",
549 | "iceberg",
550 | "mudslide",
551 | "delta",
552 | "aurora",
553 | "gravity",
554 | "humidity",
555 | "sinkhole",
556 | "wildfire",
557 | "tropics",
558 | "tsunami",
559 | "eclipse",
560 | "metabolism",
561 | "mirage",
562 | "hemisphere",
563 | "spectrum",
564 | "fossil",
565 | "plateau",
566 | "groundwater",
567 | "undergrowth",
568 | "oxygen",
569 | "molecule",
570 | "pollination",
571 | "algae",
572 | "carbon",
573 | "nitrogen",
574 | "organism",
575 | "nucleus",
576 | "equator",
577 | "solstice",
578 | "cocoon",
579 | "germination",
580 | "metamorphosis",
581 | "nocturnal",
582 | "symbiosis",
583 | "ecosystem",
584 | "biodiversity",
585 | ],
586 | "emotions": [
587 | "happiness",
588 | "sadness",
589 | "anxiety",
590 | "surprise",
591 | "anger",
592 | "curiosity",
593 | "embarrassment",
594 | "nostalgia",
595 | "envy",
596 | "gratitude",
597 | "remorse",
598 | "boredom",
599 | "excitement",
600 | "loneliness",
601 | "pride",
602 | "jealousy",
603 | "contentment",
604 | "disgust",
605 | "empathy",
606 | "euphoria",
607 | "melancholy",
608 | "frustration",
609 | "anticipation",
610 | "amusement",
611 | "serenity",
612 | "disappointment",
613 | "confidence",
614 | "resentment",
615 | "apathy",
616 | "optimism",
617 | "pessimism",
618 | "bewilderment",
619 | "exhilaration",
620 | "indifference",
621 | "enthusiasm",
622 | "desperation",
623 | "satisfaction",
624 | "regret",
625 | "determination",
626 | "compassion",
627 | "hopelessness",
628 | "relief",
629 | "infatuation",
630 | "tranquility",
631 | "impatience",
632 | "exasperation",
633 | "agitation",
634 | "yearning",
635 | "sympathy",
636 | "admiration",
637 | "astonishment",
638 | "inspiration",
639 | "dread",
640 | "hope",
641 | ],
642 | }
643 |
644 |
645 | def generate_game_words(num_words=20):
646 | """Generate a random selection of words for the Word Wrangler game.
647 |
648 | 1. Create a flat list of all words
649 | 2. Remove any duplicates
650 | 3. Randomly select the requested number of words
651 |
652 | Args:
653 | num_words: Number of words to select for the game
654 |
655 | Returns:
656 | List of randomly selected words
657 | """
658 | # Create a flat list of all words from all categories
659 | all_words = []
660 | for category_words in WORD_CATEGORIES.values():
661 | all_words.extend(category_words)
662 |
663 | # Remove duplicates by converting to a set and back to a list
664 | all_words = list(set(all_words))
665 |
666 | # Randomly select words
667 | selected_words = random.sample(all_words, min(num_words, len(all_words)))
668 |
669 | return selected_words
670 |
--------------------------------------------------------------------------------
/server/bot_phone_twilio.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2025, Daily
3 | #
4 | # SPDX-License-Identifier: BSD 2-Clause License
5 | #
6 |
7 | """Word Wrangler: A voice-based word guessing game.
8 |
9 | This demo version is intended to be deployed to
10 | Pipecat Cloud. For more information, visit:
11 | - Deployment Quickstart: https://docs.pipecat.daily.co/quickstart
12 | - Build for Twilio: https://docs.pipecat.daily.co/pipecat-in-production/telephony/twilio-mediastreams
13 | """
14 |
15 | import asyncio
16 | import json
17 | import os
18 | import re
19 | import sys
20 | from typing import Any, Mapping, Optional
21 |
22 | from dotenv import load_dotenv
23 | from fastapi import WebSocket
24 | from loguru import logger
25 | from pipecatcloud import WebSocketSessionArguments
26 | from word_list import generate_game_words
27 |
28 | from pipecat.audio.filters.krisp_filter import KrispFilter
29 | from pipecat.audio.resamplers.soxr_resampler import SOXRAudioResampler
30 | from pipecat.audio.vad.silero import SileroVADAnalyzer
31 | from pipecat.frames.frames import (
32 | BotStoppedSpeakingFrame,
33 | CancelFrame,
34 | EndFrame,
35 | Frame,
36 | InputAudioRawFrame,
37 | LLMFullResponseEndFrame,
38 | LLMTextFrame,
39 | StartFrame,
40 | TTSAudioRawFrame,
41 | TTSSpeakFrame,
42 | )
43 | from pipecat.pipeline.parallel_pipeline import ParallelPipeline
44 | from pipecat.pipeline.pipeline import Pipeline
45 | from pipecat.pipeline.runner import PipelineRunner
46 | from pipecat.pipeline.task import PipelineParams, PipelineTask
47 | from pipecat.processors.aggregators.openai_llm_context import (
48 | OpenAILLMContext,
49 | )
50 | from pipecat.processors.consumer_processor import ConsumerProcessor
51 | from pipecat.processors.filters.stt_mute_filter import STTMuteConfig, STTMuteFilter, STTMuteStrategy
52 | from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
53 | from pipecat.processors.producer_processor import ProducerProcessor
54 | from pipecat.serializers.twilio import TwilioFrameSerializer
55 | from pipecat.services.gemini_multimodal_live.gemini import (
56 | GeminiMultimodalLiveLLMService,
57 | GeminiMultimodalModalities,
58 | InputParams,
59 | )
60 | from pipecat.services.google.tts import GoogleTTSService
61 | from pipecat.sync.base_notifier import BaseNotifier
62 | from pipecat.sync.event_notifier import EventNotifier
63 | from pipecat.transports.network.fastapi_websocket import (
64 | FastAPIWebsocketParams,
65 | FastAPIWebsocketTransport,
66 | )
67 | from pipecat.utils.text.base_text_filter import BaseTextFilter
68 |
69 | load_dotenv(override=True)
70 |
71 |
72 | logger.add(sys.stderr, level="DEBUG")
73 |
74 | GAME_DURATION_SECONDS = 120
75 | NUM_WORDS_PER_GAME = 20
76 | HOST_VOICE_ID = "en-US-Chirp3-HD-Charon"
77 | PLAYER_VOICE_ID = "Kore"
78 |
79 | # Define conversation modes with their respective prompt templates
80 | game_player_prompt = """You are a player for a game of Word Wrangler.
81 |
82 | GAME RULES:
83 | 1. The user will be given a word or phrase that they must describe to you
84 | 2. The user CANNOT say any part of the word/phrase directly
85 | 3. You must try to guess the word/phrase based on the user's description
86 | 4. Once you guess correctly, the user will move on to their next word
87 | 5. The user is trying to get through as many words as possible in 60 seconds
88 | 6. The external application will handle timing and keeping score
89 |
90 | YOUR ROLE:
91 | 1. Listen carefully to the user's descriptions
92 | 2. Make intelligent guesses based on what they say
93 | 3. When you think you know the answer, state it clearly: "Is it [your guess]?"
94 | 4. If you're struggling, ask for more specific clues
95 | 5. Keep the game moving quickly - make guesses promptly
96 | 6. Be enthusiastic and encouraging
97 |
98 | IMPORTANT:
99 | - Keep all responses brief - the game is timed!
100 | - Make multiple guesses if needed
101 | - Use your common knowledge to make educated guesses
102 | - If the user indicates you got it right, just say "Got it!" and prepare for the next word
103 | - If you've made several wrong guesses, simply ask for "Another clue please?"
104 |
105 | Start by guessing once you hear the user describe the word or phrase."""
106 |
107 | game_host_prompt = """You are the AI host for a game of Word Wrangler. There are two players in the game: the human describer and the AI guesser.
108 |
109 | GAME RULES:
110 | 1. You, the host, will give the human describer a word or phrase that they must describe
111 | 2. The describer CANNOT say any part of the word/phrase directly
112 | 3. The AI guesser will try to guess the word/phrase based on the describer's description
113 | 4. Once the guesser guesses correctly, move on to the next word
114 | 5. The describer is trying to get through as many words as possible in 60 seconds
115 | 6. The describer can say "skip" or "pass" to get a new word if they find a word too difficult
116 | 7. The describer can ask you to repeat the current word if they didn't hear it clearly
117 | 8. You'll keep track of the score (1 point for each correct guess)
118 | 9. The external application will handle timing
119 |
120 | YOUR ROLE:
121 | 1. Start with this exact brief introduction: "Welcome to Word Wrangler! I'll give you words to describe, and the A.I. player will try to guess them. Remember, don't say any part of the word itself. Here's your first word: [word]."
122 | 2. Provide words to the describer. Choose 1 or 2 word phrases that cover a variety of topics, including animals, objects, places, and actions.
123 | 3. IMPORTANT: You will hear DIFFERENT types of input:
124 | a. DESCRIPTIONS from the human (which you should IGNORE)
125 | b. AFFIRMATIONS from the human (like "correct", "that's right", "you got it") which you should IGNORE
126 | c. GUESSES from the AI player (which will be in the form of "Is it [word]?" or similar question format)
127 | d. SKIP REQUESTS from the human (if they say "skip", "pass", or "next word please")
128 | e. REPEAT REQUESTS from the human (if they say "repeat", "what was that?", "say again", etc.)
129 |
130 | 4. HOW TO RESPOND:
131 | - If you hear a DESCRIPTION or AFFIRMATION from the human, respond with exactly "IGNORE" (no other text)
132 | - If you hear a GUESS (in question form) and it's INCORRECT, respond with exactly "NO" (no other text)
133 | - If you hear a GUESS (in question form) and it's CORRECT, respond with "Correct! That's [N] points. Your next word is [new word]" where N is the current score
134 | - If you hear a SKIP REQUEST, respond with "The new word is [new word]" (don't change the score)
135 | - If you hear a REPEAT REQUEST, respond with "Your word is [current word]" (don't change the score)
136 |
137 | 5. SCORING:
138 | - Start with a score of 0
139 | - Add 1 point for each correct guess by the AI player
140 | - Do NOT add points for skipped words
141 | - Announce the current score after every correct guess
142 |
143 | RESPONSE EXAMPLES:
144 | - Human says: "This is something you use to write" → You respond: "IGNORE"
145 | - Human says: "That's right!" or "You got it!" → You respond: "IGNORE"
146 | - Human says: "Wait, what was my word again?" → You respond: "Your word is [current word]"
147 | - Human says: "Can you repeat that?" → You respond: "Your word is [current word]"
148 | - AI says: "Is it a pen?" → If correct and it's the first point, you respond: "Correct! That's 1 point. Your next word is [new word]"
149 | - AI says: "Is it a pencil?" → If correct and it's the third point, you respond: "Correct! That's 3 points. Your next word is [new word]"
150 | - AI says: "Is it a marker?" → If incorrect, you respond: "NO"
151 | - Human says: "Skip this one" or "Pass" → You respond: "The new word is [new word]"
152 |
153 | IMPORTANT GUIDELINES:
154 | - Choose words that range from easy to moderately difficult
155 | - Keep all responses brief - the game is timed!
156 | - Your "NO" and "IGNORE" responses won't be verbalized, but will be visible in the chat
157 | - Always keep track of the CURRENT word so you can repeat it when asked
158 | - Always keep track of the CURRENT SCORE and announce it after every correct guess
159 | - Make sure your word choices are appropriate for all audiences
160 | - If the human asks to skip, always provide a new word immediately without changing the score
161 | - If the human asks you to repeat the word, say ONLY "Your word is [current word]" - don't add additional text
162 | - CRUCIAL: Never interpret the human saying "correct", "that's right", "good job", or similar affirmations as a correct guess. These are just the human giving feedback to the AI player.
163 |
164 | Start with the exact introduction specified above and give the first word."""
165 |
166 |
167 | class HostResponseTextFilter(BaseTextFilter):
168 | """Custom text filter for Word Wrangler game.
169 |
170 | This filter removes "NO" and "IGNORE" responses from the host so they don't get verbalized,
171 | allowing for silent incorrect guess handling and ignoring descriptions.
172 | """
173 |
174 | def __init__(self):
175 | self._interrupted = False
176 |
177 | def update_settings(self, settings: Mapping[str, Any]):
178 | # No settings to update for this filter
179 | pass
180 |
181 | async def filter(self, text: str) -> str:
182 | # Remove case and whitespace for comparison
183 | clean_text = text.strip().upper()
184 |
185 | # If the text is exactly "NO" or "IGNORE", return empty string
186 | if clean_text == "NO" or clean_text == "IGNORE":
187 | return ""
188 |
189 | return text
190 |
191 | async def handle_interruption(self):
192 | self._interrupted = True
193 |
194 | async def reset_interruption(self):
195 | self._interrupted = False
196 |
197 |
198 | class BotStoppedSpeakingNotifier(FrameProcessor):
199 | """A processor that notifies whenever a BotStoppedSpeakingFrame is detected."""
200 |
201 | def __init__(self, notifier: BaseNotifier):
202 | super().__init__()
203 | self._notifier = notifier
204 |
205 | async def process_frame(self, frame: Frame, direction: FrameDirection):
206 | await super().process_frame(frame, direction)
207 |
208 | # Check if this is a BotStoppedSpeakingFrame
209 | if isinstance(frame, BotStoppedSpeakingFrame):
210 | logger.debug(f"{self}: Host bot stopped speaking, notifying listeners")
211 | await self._notifier.notify()
212 |
213 | # Always push the frame through
214 | await self.push_frame(frame, direction)
215 |
216 |
217 | class StartFrameGate(FrameProcessor):
218 | """A gate that blocks only StartFrame until notified by a notifier.
219 |
220 | Once opened, all frames pass through normally.
221 | """
222 |
223 | def __init__(self, notifier: BaseNotifier):
224 | super().__init__()
225 | self._notifier = notifier
226 | self._blocked_start_frame: Optional[Frame] = None
227 | self._gate_opened = False
228 | self._gate_task: Optional[asyncio.Task] = None
229 |
230 | async def process_frame(self, frame: Frame, direction: FrameDirection):
231 | await super().process_frame(frame, direction)
232 |
233 | if self._gate_opened:
234 | # Once the gate is open, let everything through
235 | await self.push_frame(frame, direction)
236 | elif isinstance(frame, StartFrame):
237 | # Store the StartFrame and wait for notification
238 | logger.debug(f"{self}: Blocking StartFrame until host bot stops speaking")
239 | self._blocked_start_frame = frame
240 |
241 | # Start the gate task if not already running
242 | if not self._gate_task:
243 | self._gate_task = self.create_task(self._wait_for_notification())
244 |
245 | async def _wait_for_notification(self):
246 | try:
247 | # Wait for the notifier
248 | await self._notifier.wait()
249 |
250 | # Gate is now open - only run this code once
251 | if not self._gate_opened:
252 | self._gate_opened = True
253 | logger.debug(f"{self}: Gate opened, passing through blocked StartFrame")
254 |
255 | # Push the blocked StartFrame if we have one
256 | if self._blocked_start_frame:
257 | await self.push_frame(self._blocked_start_frame)
258 | self._blocked_start_frame = None
259 | except asyncio.CancelledError:
260 | logger.debug(f"{self}: Gate task was cancelled")
261 | raise
262 | except Exception as e:
263 | logger.exception(f"{self}: Error in gate task: {e}")
264 | raise
265 |
266 |
267 | class GameStateTracker(FrameProcessor):
268 | """Tracks game state including new words and score by monitoring host responses.
269 |
270 | This processor aggregates streamed text from the host LLM to detect:
271 | 1. New word announcements (triggering player LLM resets)
272 | 2. Score updates (to track the current score)
273 | """
274 |
275 | def __init__(self, new_word_notifier: BaseNotifier):
276 | super().__init__()
277 | self._new_word_notifier = new_word_notifier
278 | self._text_buffer = ""
279 | self._current_score = 0
280 |
281 | # Words/phrases that indicate a new word being provided
282 | self._key_phrases = ["your word is", "new word is", "next word is"]
283 |
284 | # Pattern to extract score from responses
285 | self._score_pattern = re.compile(r"that's (\d+) point", re.IGNORECASE)
286 |
287 | async def process_frame(self, frame: Frame, direction: FrameDirection):
288 | await super().process_frame(frame, direction)
289 |
290 | # Collect text from LLMTextFrames
291 | if isinstance(frame, LLMTextFrame):
292 | text = frame.text
293 |
294 | # Skip responses that are "NO" or "IGNORE"
295 | if text.strip() in ["NO", "IGNORE"]:
296 | logger.debug(f"Skipping NO/IGNORE response")
297 | await self.push_frame(frame, direction)
298 | return
299 |
300 | # Add the new text to our buffer
301 | self._text_buffer += text
302 |
303 | # Process complete responses when we get an end frame
304 | elif isinstance(frame, LLMFullResponseEndFrame):
305 | if self._text_buffer:
306 | buffer_lower = self._text_buffer.lower()
307 |
308 | # 1. Check for new word announcements
309 | new_word_detected = False
310 | for phrase in self._key_phrases:
311 | if phrase in buffer_lower:
312 | await self._new_word_notifier.notify()
313 | new_word_detected = True
314 | break
315 |
316 | if not new_word_detected:
317 | logger.debug(f"No new word phrases detected")
318 |
319 | # 2. Check for score updates
320 | score_match = self._score_pattern.search(buffer_lower)
321 | if score_match:
322 | try:
323 | score = int(score_match.group(1))
324 | # Only update if the new score is higher
325 | if score > self._current_score:
326 | logger.debug(f"Score updated from {self._current_score} to {score}")
327 | self._current_score = score
328 | else:
329 | logger.debug(
330 | f"Ignoring score {score} <= current score {self._current_score}"
331 | )
332 | except ValueError as e:
333 | logger.warning(f"Error parsing score: {e}")
334 | else:
335 | logger.debug(f"No score pattern match in: '{buffer_lower}'")
336 |
337 | # Reset the buffer after processing the complete response
338 | self._text_buffer = ""
339 |
340 | # Always push the frame through
341 | await self.push_frame(frame, direction)
342 |
343 | @property
344 | def current_score(self) -> int:
345 | """Get the current score."""
346 | return self._current_score
347 |
348 |
349 | class GameTimer:
350 | """Manages the game timer and triggers end-game events."""
351 |
352 | def __init__(
353 | self,
354 | task: PipelineTask,
355 | game_state_tracker: GameStateTracker,
356 | game_duration_seconds: int = 120,
357 | ):
358 | self._task = task
359 | self._game_state_tracker = game_state_tracker
360 | self._game_duration = game_duration_seconds
361 | self._timer_task = None
362 | self._start_time = None
363 |
364 | def start(self):
365 | """Start the game timer."""
366 | if self._timer_task is None:
367 | self._start_time = asyncio.get_event_loop().time()
368 | self._timer_task = asyncio.create_task(self._run_timer())
369 | logger.info(f"Game timer started: {self._game_duration} seconds")
370 |
371 | def stop(self):
372 | """Stop the game timer."""
373 | if self._timer_task:
374 | self._timer_task.cancel()
375 | self._timer_task = None
376 | logger.info("Game timer stopped")
377 |
378 | def get_remaining_time(self) -> int:
379 | """Get the remaining time in seconds."""
380 | if self._start_time is None:
381 | return self._game_duration
382 |
383 | elapsed = asyncio.get_event_loop().time() - self._start_time
384 | remaining = max(0, self._game_duration - int(elapsed))
385 | return remaining
386 |
387 | async def _run_timer(self):
388 | """Run the timer and end the game when time is up."""
389 | try:
390 | # Wait for the game duration
391 | await asyncio.sleep(self._game_duration)
392 |
393 | # Game time is up, get the final score
394 | final_score = self._game_state_tracker.current_score
395 |
396 | # Create end game message
397 | end_message = f"Time's up! Thank you for playing Word Wrangler. Your final score is {final_score} point"
398 | if final_score != 1:
399 | end_message += "s"
400 | end_message += ". Great job!"
401 |
402 | # Send end game message as TTSSpeakFrame
403 | logger.info(f"Game over! Final score: {final_score}")
404 | await self._task.queue_frames([TTSSpeakFrame(text=end_message)])
405 |
406 | # End the game
407 | await self._task.queue_frames([EndFrame()])
408 |
409 | except asyncio.CancelledError:
410 | logger.debug("Game timer task cancelled")
411 | except Exception as e:
412 | logger.exception(f"Error in game timer: {e}")
413 |
414 |
415 | class ResettablePlayerLLM(GeminiMultimodalLiveLLMService):
416 | """A specialized LLM service that can reset its context when notified about a new word.
417 |
418 | This LLM intelligently waits for the host to finish speaking before reconnecting.
419 | """
420 |
421 | def __init__(
422 | self,
423 | api_key: str,
424 | system_instruction: str,
425 | new_word_notifier: BaseNotifier,
426 | host_stopped_speaking_notifier: BaseNotifier,
427 | voice_id: str = PLAYER_VOICE_ID,
428 | **kwargs,
429 | ):
430 | super().__init__(
431 | api_key=api_key, voice_id=voice_id, system_instruction=system_instruction, **kwargs
432 | )
433 | self._new_word_notifier = new_word_notifier
434 | self._host_stopped_speaking_notifier = host_stopped_speaking_notifier
435 | self._base_system_instruction = system_instruction
436 | self._reset_task: Optional[asyncio.Task] = None
437 | self._pending_reset: bool = False
438 |
439 | async def start(self, frame: StartFrame):
440 | await super().start(frame)
441 |
442 | # Start the notifier listener task
443 | if not self._reset_task or self._reset_task.done():
444 | self._reset_task = self.create_task(self._listen_for_notifications())
445 |
446 | async def stop(self, frame: EndFrame):
447 | # Cancel the reset task if it exists
448 | if self._reset_task and not self._reset_task.done():
449 | await self.cancel_task(self._reset_task)
450 | self._reset_task = None
451 |
452 | await super().stop(frame)
453 |
454 | async def cancel(self, frame: CancelFrame):
455 | # Cancel the reset task if it exists
456 | if self._reset_task and not self._reset_task.done():
457 | await self.cancel_task(self._reset_task)
458 | self._reset_task = None
459 |
460 | await super().cancel(frame)
461 |
462 | async def _listen_for_notifications(self):
463 | """Listen for new word and host stopped speaking notifications."""
464 | try:
465 | # Create tasks for both notifiers
466 | new_word_task = self.create_task(self._listen_for_new_word())
467 | host_stopped_task = self.create_task(self._listen_for_host_stopped())
468 |
469 | # Wait for both tasks to complete (which should never happen)
470 | await asyncio.gather(new_word_task, host_stopped_task)
471 |
472 | except asyncio.CancelledError:
473 | logger.debug(f"{self}: Notification listener tasks cancelled")
474 | raise
475 | except Exception as e:
476 | logger.exception(f"{self}: Error in notification listeners: {e}")
477 | raise
478 |
479 | async def _listen_for_new_word(self):
480 | """Listen for new word notifications and flag a reset is needed."""
481 | while True:
482 | # Wait for a new word notification
483 | await self._new_word_notifier.wait()
484 | logger.info(
485 | f"{self}: Received new word notification, disconnecting and waiting for host to finish"
486 | )
487 |
488 | # Disconnect immediately to stop processing
489 | await self._disconnect()
490 |
491 | # Reset the system instruction
492 | self._system_instruction = self._base_system_instruction
493 |
494 | # Flag that we need to reconnect when the host stops speaking
495 | self._pending_reset = True
496 |
497 | async def _listen_for_host_stopped(self):
498 | """Listen for host stopped speaking and reconnect if a reset is pending."""
499 | while True:
500 | # Wait for host stopped speaking notification
501 | await self._host_stopped_speaking_notifier.wait()
502 |
503 | # If we have a pending reset, reconnect now
504 | if self._pending_reset:
505 | logger.info(f"{self}: Host finished speaking, completing the LLM reset")
506 |
507 | # Reconnect
508 | await self._connect()
509 |
510 | # Reset the flag
511 | self._pending_reset = False
512 |
513 | logger.info(f"{self}: LLM reset complete")
514 |
515 |
516 | async def tts_audio_raw_frame_filter(frame: Frame):
517 | """Filter to check if the frame is a TTSAudioRawFrame."""
518 | return isinstance(frame, TTSAudioRawFrame)
519 |
520 |
521 | # Create a resampler instance once
522 | resampler = SOXRAudioResampler()
523 |
524 |
525 | async def tts_to_input_audio_transformer(frame: Frame):
526 | """Transform TTS audio frames to InputAudioRawFrame with resampling.
527 |
528 | Converts 24kHz TTS output to 16kHz input audio required by the player LLM.
529 |
530 | Args:
531 | frame (Frame): The frame to transform (expected to be TTSAudioRawFrame)
532 |
533 | Returns:
534 | InputAudioRawFrame: The transformed and resampled input audio frame
535 | """
536 | if isinstance(frame, TTSAudioRawFrame):
537 | # Resample the audio from 24kHz to 16kHz
538 | resampled_audio = await resampler.resample(
539 | frame.audio,
540 | frame.sample_rate, # Source rate (24kHz)
541 | 16000, # Target rate (16kHz)
542 | )
543 |
544 | # Create a new InputAudioRawFrame with the resampled audio
545 | input_frame = InputAudioRawFrame(
546 | audio=resampled_audio,
547 | sample_rate=16000, # New sample rate
548 | num_channels=frame.num_channels,
549 | )
550 | return input_frame
551 |
552 |
553 | async def main(ws: WebSocket):
554 | logger.debug("Starting WebSocket bot")
555 |
556 | game_words = generate_game_words(NUM_WORDS_PER_GAME)
557 | words_string = ", ".join(f'"{word}"' for word in game_words)
558 | logger.debug(f"Game words: {words_string}")
559 |
560 | # Read initial WebSocket messages
561 | start_data = ws.iter_text()
562 | await start_data.__anext__()
563 |
564 | # Second message contains the call details
565 | call_data = json.loads(await start_data.__anext__())
566 |
567 | # Extract both StreamSid and CallSid
568 | stream_sid = call_data["start"]["streamSid"]
569 | call_sid = call_data["start"]["callSid"]
570 |
571 | logger.info(f"Connected to Twilio call: CallSid={call_sid}, StreamSid={stream_sid}")
572 |
573 | # Create serializer with both IDs and auto_hang_up enabled
574 | serializer = TwilioFrameSerializer(
575 | stream_sid=stream_sid,
576 | call_sid=call_sid,
577 | account_sid=os.getenv("TWILIO_ACCOUNT_SID"),
578 | auth_token=os.getenv("TWILIO_AUTH_TOKEN"),
579 | )
580 |
581 | transport = FastAPIWebsocketTransport(
582 | websocket=ws,
583 | params=FastAPIWebsocketParams(
584 | audio_in_enabled=True,
585 | audio_in_filter=KrispFilter(),
586 | audio_out_enabled=True,
587 | add_wav_header=False,
588 | vad_enabled=True,
589 | vad_analyzer=SileroVADAnalyzer(),
590 | vad_audio_passthrough=True,
591 | serializer=serializer,
592 | ),
593 | )
594 |
595 | player_instruction = f"""{game_player_prompt}
596 |
597 | Important guidelines:
598 | 1. Your responses will be converted to speech, so keep them concise and conversational.
599 | 2. Don't use special characters or formatting that wouldn't be natural in speech.
600 | 3. Encourage the user to elaborate when appropriate."""
601 |
602 | host_instruction = f"""{game_host_prompt}
603 |
604 | GAME WORDS:
605 | Use ONLY these words for the game (in any order): {words_string}
606 |
607 | Important guidelines:
608 | 1. Your responses will be converted to speech, so keep them concise and conversational.
609 | 2. Don't use special characters or formatting that wouldn't be natural in speech.
610 | 3. ONLY use words from the provided list above when giving words to the player."""
611 |
612 | intro_message = """Start with this exact brief introduction: "Welcome to Word Wrangler! I'll give you words to describe, and the A.I. player will try to guess them. Remember, don't say any part of the word itself. Here's your first word: [word]." """
613 |
614 | # Create the STT mute filter if we have strategies to apply
615 | stt_mute_filter = STTMuteFilter(
616 | config=STTMuteConfig(strategies={STTMuteStrategy.MUTE_UNTIL_FIRST_BOT_COMPLETE})
617 | )
618 |
619 | host_llm = GeminiMultimodalLiveLLMService(
620 | api_key=os.getenv("GOOGLE_API_KEY"),
621 | system_instruction=host_instruction,
622 | params=InputParams(modalities=GeminiMultimodalModalities.TEXT),
623 | )
624 |
625 | host_tts = GoogleTTSService(
626 | voice_id=HOST_VOICE_ID,
627 | credentials_path=os.getenv("GOOGLE_TEST_CREDENTIALS_FILE"),
628 | text_filters=[HostResponseTextFilter()],
629 | )
630 |
631 | producer = ProducerProcessor(
632 | filter=tts_audio_raw_frame_filter,
633 | transformer=tts_to_input_audio_transformer,
634 | passthrough=True,
635 | )
636 | consumer = ConsumerProcessor(producer=producer)
637 |
638 | # Create the notifiers
639 | bot_speaking_notifier = EventNotifier()
640 | new_word_notifier = EventNotifier()
641 |
642 | # Create BotStoppedSpeakingNotifier to detect when host bot stops speaking
643 | bot_stopped_speaking_detector = BotStoppedSpeakingNotifier(bot_speaking_notifier)
644 |
645 | # Create StartFrameGate to block Player LLM until host has stopped speaking
646 | start_frame_gate = StartFrameGate(bot_speaking_notifier)
647 |
648 | # Create GameStateTracker to handle new words and score tracking
649 | game_state_tracker = GameStateTracker(new_word_notifier)
650 |
651 | # Create a resettable player LLM that coordinates between notifiers
652 | player_llm = ResettablePlayerLLM(
653 | api_key=os.getenv("GOOGLE_API_KEY"),
654 | system_instruction=player_instruction,
655 | new_word_notifier=new_word_notifier,
656 | host_stopped_speaking_notifier=bot_speaking_notifier,
657 | voice_id=PLAYER_VOICE_ID,
658 | )
659 |
660 | # Set up the initial context for the conversation
661 | messages = [
662 | {
663 | "role": "user",
664 | "content": intro_message,
665 | },
666 | ]
667 |
668 | # This sets up the LLM context by providing messages and tools
669 | context = OpenAILLMContext(messages)
670 | context_aggregator = host_llm.create_context_aggregator(context)
671 |
672 | pipeline = Pipeline(
673 | [
674 | transport.input(), # Receive audio/video from Daily call
675 | stt_mute_filter, # Filter out speech during the bot's initial turn
676 | ParallelPipeline(
677 | # Host branch: manages the game and provides words
678 | [
679 | consumer, # Receives audio from the player branch
680 | host_llm, # AI host that provides words and tracks score
681 | game_state_tracker, # Tracks words and score from host responses
682 | host_tts, # Converts host text to speech
683 | bot_stopped_speaking_detector, # Notifies when host stops speaking
684 | ],
685 | # Player branch: guesses words based on human descriptions
686 | [
687 | start_frame_gate, # Gates the player until host finishes intro
688 | player_llm, # AI player that makes guesses
689 | producer, # Collects audio frames to be passed to the consumer
690 | ],
691 | ),
692 | transport.output(), # Send audio/video back to Daily call
693 | ]
694 | )
695 |
696 | task = PipelineTask(
697 | pipeline,
698 | params=PipelineParams(
699 | audio_out_sample_rate=8000,
700 | allow_interruptions=False,
701 | enable_metrics=True,
702 | enable_usage_metrics=True,
703 | ),
704 | )
705 |
706 | # Create the game timer
707 | game_timer = GameTimer(task, game_state_tracker, game_duration_seconds=GAME_DURATION_SECONDS)
708 |
709 | @transport.event_handler("on_client_connected")
710 | async def on_client_connected(transport, client):
711 | logger.info(f"Client connected: {client}")
712 | # Kick off the conversation
713 | await task.queue_frames([context_aggregator.user().get_context_frame()])
714 | # Start the game timer
715 | game_timer.start()
716 |
717 | @transport.event_handler("on_client_disconnected")
718 | async def on_client_disconnected(transport, client):
719 | logger.info(f"Client disconnected: {client}")
720 | # Stop the timer
721 | game_timer.stop()
722 | # Cancel the pipeline task
723 | await task.cancel()
724 |
725 | runner = PipelineRunner(handle_sigint=False, force_gc=True)
726 |
727 | await runner.run(task)
728 |
729 |
730 | async def bot(args: WebSocketSessionArguments):
731 | """Main bot entry point for WebSocket connections.
732 |
733 | Args:
734 | ws: The WebSocket connection
735 | session_logger: The session-specific logger
736 | """
737 | logger.info("WebSocket bot process initialized")
738 |
739 | try:
740 | await main(args.websocket)
741 | logger.info("WebSocket bot process completed")
742 | except Exception as e:
743 | logger.exception(f"Error in WebSocket bot process: {str(e)}")
744 | raise
745 |
--------------------------------------------------------------------------------
/server/bot_phone_local.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2025, Daily
3 | #
4 | # SPDX-License-Identifier: BSD 2-Clause License
5 | #
6 |
7 | """Word Wrangler: A voice-based word guessing game.
8 |
9 | To run this demo:
10 | 1. Set up environment variables:
11 | - GOOGLE_API_KEY: API key for Google services
12 | - GOOGLE_TEST_CREDENTIALS_FILE: Path to Google credentials JSON file
13 |
14 | 2. Install requirements:
15 | pip install -r requirements.txt
16 |
17 | 3. Run in local development mode:
18 | LOCAL_RUN=1 python word_wrangler.py
19 | """
20 |
21 | import asyncio
22 | import os
23 | import re
24 | import sys
25 | from typing import Any, Mapping, Optional
26 |
27 | import aiohttp
28 | from dotenv import load_dotenv
29 | from loguru import logger
30 | from pipecatcloud.agent import DailySessionArguments
31 | from word_list import generate_game_words
32 |
33 | from pipecat.audio.resamplers.soxr_resampler import SOXRAudioResampler
34 | from pipecat.audio.vad.silero import SileroVADAnalyzer
35 | from pipecat.frames.frames import (
36 | BotStoppedSpeakingFrame,
37 | CancelFrame,
38 | EndFrame,
39 | Frame,
40 | InputAudioRawFrame,
41 | LLMFullResponseEndFrame,
42 | LLMTextFrame,
43 | StartFrame,
44 | TTSAudioRawFrame,
45 | TTSSpeakFrame,
46 | )
47 | from pipecat.pipeline.parallel_pipeline import ParallelPipeline
48 | from pipecat.pipeline.pipeline import Pipeline
49 | from pipecat.pipeline.runner import PipelineRunner
50 | from pipecat.pipeline.task import PipelineParams, PipelineTask
51 | from pipecat.processors.aggregators.openai_llm_context import (
52 | OpenAILLMContext,
53 | )
54 | from pipecat.processors.consumer_processor import ConsumerProcessor
55 | from pipecat.processors.filters.stt_mute_filter import STTMuteConfig, STTMuteFilter, STTMuteStrategy
56 | from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
57 | from pipecat.processors.producer_processor import ProducerProcessor
58 | from pipecat.services.gemini_multimodal_live.gemini import (
59 | GeminiMultimodalLiveLLMService,
60 | GeminiMultimodalModalities,
61 | InputParams,
62 | )
63 | from pipecat.services.google.tts import GoogleTTSService
64 | from pipecat.sync.base_notifier import BaseNotifier
65 | from pipecat.sync.event_notifier import EventNotifier
66 | from pipecat.transports.services.daily import DailyParams, DailyTransport
67 | from pipecat.utils.text.base_text_filter import BaseTextFilter
68 |
69 | load_dotenv(override=True)
70 |
71 | # Check if we're in local development mode
72 | LOCAL_RUN = os.getenv("LOCAL_RUN")
73 | if LOCAL_RUN:
74 | import webbrowser
75 |
76 | try:
77 | from runner import configure
78 | except ImportError:
79 | logger.error("Could not import local_runner module. Local development mode may not work.")
80 |
81 |
82 | logger.add(sys.stderr, level="DEBUG")
83 |
84 | GAME_DURATION_SECONDS = 120
85 | NUM_WORDS_PER_GAME = 20
86 | HOST_VOICE_ID = "en-US-Chirp3-HD-Charon"
87 | PLAYER_VOICE_ID = "Kore"
88 |
89 | # Define conversation modes with their respective prompt templates
90 | game_player_prompt = """You are a player for a game of Word Wrangler.
91 |
92 | GAME RULES:
93 | 1. The user will be given a word or phrase that they must describe to you
94 | 2. The user CANNOT say any part of the word/phrase directly
95 | 3. You must try to guess the word/phrase based on the user's description
96 | 4. Once you guess correctly, the user will move on to their next word
97 | 5. The user is trying to get through as many words as possible in 60 seconds
98 | 6. The external application will handle timing and keeping score
99 |
100 | YOUR ROLE:
101 | 1. Listen carefully to the user's descriptions
102 | 2. Make intelligent guesses based on what they say
103 | 3. When you think you know the answer, state it clearly: "Is it [your guess]?"
104 | 4. If you're struggling, ask for more specific clues
105 | 5. Keep the game moving quickly - make guesses promptly
106 | 6. Be enthusiastic and encouraging
107 |
108 | IMPORTANT:
109 | - Keep all responses brief - the game is timed!
110 | - Make multiple guesses if needed
111 | - Use your common knowledge to make educated guesses
112 | - If the user indicates you got it right, just say "Got it!" and prepare for the next word
113 | - If you've made several wrong guesses, simply ask for "Another clue please?"
114 |
115 | Start by guessing once you hear the user describe the word or phrase."""
116 |
117 | game_host_prompt = """You are the AI host for a game of Word Wrangler. There are two players in the game: the human describer and the AI guesser.
118 |
119 | GAME RULES:
120 | 1. You, the host, will give the human describer a word or phrase that they must describe
121 | 2. The describer CANNOT say any part of the word/phrase directly
122 | 3. The AI guesser will try to guess the word/phrase based on the describer's description
123 | 4. Once the guesser guesses correctly, move on to the next word
124 | 5. The describer is trying to get through as many words as possible in 60 seconds
125 | 6. The describer can say "skip" or "pass" to get a new word if they find a word too difficult
126 | 7. The describer can ask you to repeat the current word if they didn't hear it clearly
127 | 8. You'll keep track of the score (1 point for each correct guess)
128 | 9. The external application will handle timing
129 |
130 | YOUR ROLE:
131 | 1. Start with this exact brief introduction: "Welcome to Word Wrangler! I'll give you words to describe, and the A.I. player will try to guess them. Remember, don't say any part of the word itself. Here's your first word: [word]."
132 | 2. Provide words to the describer. Choose 1 or 2 word phrases that cover a variety of topics, including animals, objects, places, and actions.
133 | 3. IMPORTANT: You will hear DIFFERENT types of input:
134 | a. DESCRIPTIONS from the human (which you should IGNORE)
135 | b. AFFIRMATIONS from the human (like "correct", "that's right", "you got it") which you should IGNORE
136 | c. GUESSES from the AI player (which will be in the form of "Is it [word]?" or similar question format)
137 | d. SKIP REQUESTS from the human (if they say "skip", "pass", or "next word please")
138 | e. REPEAT REQUESTS from the human (if they say "repeat", "what was that?", "say again", etc.)
139 |
140 | 4. HOW TO RESPOND:
141 | - If you hear a DESCRIPTION or AFFIRMATION from the human, respond with exactly "IGNORE" (no other text)
142 | - If you hear a GUESS (in question form) and it's INCORRECT, respond with exactly "NO" (no other text)
143 | - If you hear a GUESS (in question form) and it's CORRECT, respond with "Correct! That's [N] points. Your next word is [new word]" where N is the current score
144 | - If you hear a SKIP REQUEST, respond with "The new word is [new word]" (don't change the score)
145 | - If you hear a REPEAT REQUEST, respond with "Your word is [current word]" (don't change the score)
146 |
147 | 5. SCORING:
148 | - Start with a score of 0
149 | - Add 1 point for each correct guess by the AI player
150 | - Do NOT add points for skipped words
151 | - Announce the current score after every correct guess
152 |
153 | RESPONSE EXAMPLES:
154 | - Human says: "This is something you use to write" → You respond: "IGNORE"
155 | - Human says: "That's right!" or "You got it!" → You respond: "IGNORE"
156 | - Human says: "Wait, what was my word again?" → You respond: "Your word is [current word]"
157 | - Human says: "Can you repeat that?" → You respond: "Your word is [current word]"
158 | - AI says: "Is it a pen?" → If correct and it's the first point, you respond: "Correct! That's 1 point. Your next word is [new word]"
159 | - AI says: "Is it a pencil?" → If correct and it's the third point, you respond: "Correct! That's 3 points. Your next word is [new word]"
160 | - AI says: "Is it a marker?" → If incorrect, you respond: "NO"
161 | - Human says: "Skip this one" or "Pass" → You respond: "The new word is [new word]"
162 |
163 | IMPORTANT GUIDELINES:
164 | - Choose words that range from easy to moderately difficult
165 | - Keep all responses brief - the game is timed!
166 | - Your "NO" and "IGNORE" responses won't be verbalized, but will be visible in the chat
167 | - Always keep track of the CURRENT word so you can repeat it when asked
168 | - Always keep track of the CURRENT SCORE and announce it after every correct guess
169 | - Make sure your word choices are appropriate for all audiences
170 | - If the human asks to skip, always provide a new word immediately without changing the score
171 | - If the human asks you to repeat the word, say ONLY "Your word is [current word]" - don't add additional text
172 | - CRUCIAL: Never interpret the human saying "correct", "that's right", "good job", or similar affirmations as a correct guess. These are just the human giving feedback to the AI player.
173 |
174 | Start with the exact introduction specified above and give the first word."""
175 |
176 |
177 | class HostResponseTextFilter(BaseTextFilter):
178 | """Custom text filter for Word Wrangler game.
179 |
180 | This filter removes "NO" and "IGNORE" responses from the host so they don't get verbalized,
181 | allowing for silent incorrect guess handling and ignoring descriptions.
182 | """
183 |
184 | def __init__(self):
185 | self._interrupted = False
186 |
187 | def update_settings(self, settings: Mapping[str, Any]):
188 | # No settings to update for this filter
189 | pass
190 |
191 | async def filter(self, text: str) -> str:
192 | # Remove case and whitespace for comparison
193 | clean_text = text.strip().upper()
194 |
195 | # If the text is exactly "NO" or "IGNORE", return empty string
196 | if clean_text == "NO" or clean_text == "IGNORE":
197 | return ""
198 |
199 | return text
200 |
201 | async def handle_interruption(self):
202 | self._interrupted = True
203 |
204 | async def reset_interruption(self):
205 | self._interrupted = False
206 |
207 |
208 | class BotStoppedSpeakingNotifier(FrameProcessor):
209 | """A processor that notifies whenever a BotStoppedSpeakingFrame is detected."""
210 |
211 | def __init__(self, notifier: BaseNotifier):
212 | super().__init__()
213 | self._notifier = notifier
214 |
215 | async def process_frame(self, frame: Frame, direction: FrameDirection):
216 | await super().process_frame(frame, direction)
217 |
218 | # Check if this is a BotStoppedSpeakingFrame
219 | if isinstance(frame, BotStoppedSpeakingFrame):
220 | logger.debug(f"{self}: Host bot stopped speaking, notifying listeners")
221 | await self._notifier.notify()
222 |
223 | # Always push the frame through
224 | await self.push_frame(frame, direction)
225 |
226 |
227 | class StartFrameGate(FrameProcessor):
228 | """A gate that blocks only StartFrame until notified by a notifier.
229 |
230 | Once opened, all frames pass through normally.
231 | """
232 |
233 | def __init__(self, notifier: BaseNotifier):
234 | super().__init__()
235 | self._notifier = notifier
236 | self._blocked_start_frame: Optional[Frame] = None
237 | self._gate_opened = False
238 | self._gate_task: Optional[asyncio.Task] = None
239 |
240 | async def process_frame(self, frame: Frame, direction: FrameDirection):
241 | await super().process_frame(frame, direction)
242 |
243 | if self._gate_opened:
244 | # Once the gate is open, let everything through
245 | await self.push_frame(frame, direction)
246 | elif isinstance(frame, StartFrame):
247 | # Store the StartFrame and wait for notification
248 | logger.debug(f"{self}: Blocking StartFrame until host bot stops speaking")
249 | self._blocked_start_frame = frame
250 |
251 | # Start the gate task if not already running
252 | if not self._gate_task:
253 | self._gate_task = self.create_task(self._wait_for_notification())
254 |
255 | async def _wait_for_notification(self):
256 | try:
257 | # Wait for the notifier
258 | await self._notifier.wait()
259 |
260 | # Gate is now open - only run this code once
261 | if not self._gate_opened:
262 | self._gate_opened = True
263 | logger.debug(f"{self}: Gate opened, passing through blocked StartFrame")
264 |
265 | # Push the blocked StartFrame if we have one
266 | if self._blocked_start_frame:
267 | await self.push_frame(self._blocked_start_frame)
268 | self._blocked_start_frame = None
269 | except asyncio.CancelledError:
270 | logger.debug(f"{self}: Gate task was cancelled")
271 | raise
272 | except Exception as e:
273 | logger.exception(f"{self}: Error in gate task: {e}")
274 | raise
275 |
276 |
277 | class GameStateTracker(FrameProcessor):
278 | """Tracks game state including new words and score by monitoring host responses."""
279 |
280 | def __init__(self, new_word_notifier: BaseNotifier):
281 | super().__init__()
282 | self._new_word_notifier = new_word_notifier
283 | self._text_buffer = ""
284 | self._current_score = 0
285 |
286 | # Words/phrases that indicate a new word being provided
287 | self._key_phrases = ["your word is", "new word is", "next word is"]
288 |
289 | # Pattern to extract score from responses
290 | self._score_pattern = re.compile(r"that's (\d+) point", re.IGNORECASE)
291 |
292 | async def process_frame(self, frame: Frame, direction: FrameDirection):
293 | await super().process_frame(frame, direction)
294 |
295 | # Collect text from LLMTextFrames
296 | if isinstance(frame, LLMTextFrame):
297 | text = frame.text
298 |
299 | # Skip responses that are "NO" or "IGNORE"
300 | if text.strip() in ["NO", "IGNORE"]:
301 | logger.debug(f"Skipping NO/IGNORE response")
302 | await self.push_frame(frame, direction)
303 | return
304 |
305 | # Add the new text to our buffer
306 | self._text_buffer += text
307 |
308 | # Process complete responses when we get an end frame
309 | elif isinstance(frame, LLMFullResponseEndFrame):
310 | if self._text_buffer:
311 | buffer_lower = self._text_buffer.lower()
312 |
313 | # 1. Check for new word announcements
314 | new_word_detected = False
315 | for phrase in self._key_phrases:
316 | if phrase in buffer_lower:
317 | await self._new_word_notifier.notify()
318 | new_word_detected = True
319 | break
320 |
321 | if not new_word_detected:
322 | logger.debug(f"No new word phrases detected")
323 |
324 | # 2. Check for score updates
325 | score_match = self._score_pattern.search(buffer_lower)
326 | if score_match:
327 | try:
328 | score = int(score_match.group(1))
329 | # Only update if the new score is higher
330 | if score > self._current_score:
331 | logger.debug(f"Score updated from {self._current_score} to {score}")
332 | self._current_score = score
333 | else:
334 | logger.debug(
335 | f"Ignoring score {score} <= current score {self._current_score}"
336 | )
337 | except ValueError as e:
338 | logger.warning(f"Error parsing score: {e}")
339 | else:
340 | logger.debug(f"No score pattern match in: '{buffer_lower}'")
341 |
342 | # Reset the buffer after processing the complete response
343 | self._text_buffer = ""
344 |
345 | # Always push the frame through
346 | await self.push_frame(frame, direction)
347 |
348 | @property
349 | def current_score(self) -> int:
350 | """Get the current score."""
351 | return self._current_score
352 |
353 |
354 | class GameTimer:
355 | """Manages the game timer and triggers end-game events."""
356 |
357 | def __init__(
358 | self,
359 | task: PipelineTask,
360 | game_state_tracker: GameStateTracker,
361 | game_duration_seconds: int = 120,
362 | ):
363 | self._task = task
364 | self._game_state_tracker = game_state_tracker
365 | self._game_duration = game_duration_seconds
366 | self._timer_task = None
367 | self._start_time = None
368 |
369 | def start(self):
370 | """Start the game timer."""
371 | if self._timer_task is None:
372 | self._start_time = asyncio.get_event_loop().time()
373 | self._timer_task = asyncio.create_task(self._run_timer())
374 | logger.info(f"Game timer started: {self._game_duration} seconds")
375 |
376 | def stop(self):
377 | """Stop the game timer."""
378 | if self._timer_task:
379 | self._timer_task.cancel()
380 | self._timer_task = None
381 | logger.info("Game timer stopped")
382 |
383 | def get_remaining_time(self) -> int:
384 | """Get the remaining time in seconds."""
385 | if self._start_time is None:
386 | return self._game_duration
387 |
388 | elapsed = asyncio.get_event_loop().time() - self._start_time
389 | remaining = max(0, self._game_duration - int(elapsed))
390 | return remaining
391 |
392 | async def _run_timer(self):
393 | """Run the timer and end the game when time is up."""
394 | try:
395 | # Wait for the game duration
396 | await asyncio.sleep(self._game_duration)
397 |
398 | # Game time is up, get the final score
399 | final_score = self._game_state_tracker.current_score
400 |
401 | # Create end game message
402 | end_message = f"Time's up! Thank you for playing Word Wrangler. Your final score is {final_score} point"
403 | if final_score != 1:
404 | end_message += "s"
405 | end_message += ". Great job!"
406 |
407 | # Send end game message as TTSSpeakFrame
408 | logger.info(f"Game over! Final score: {final_score}")
409 | await self._task.queue_frames([TTSSpeakFrame(text=end_message)])
410 |
411 | # End the game
412 | await self._task.queue_frames([EndFrame()])
413 |
414 | except asyncio.CancelledError:
415 | logger.debug("Game timer task cancelled")
416 | except Exception as e:
417 | logger.exception(f"Error in game timer: {e}")
418 |
419 |
420 | class ResettablePlayerLLM(GeminiMultimodalLiveLLMService):
421 | """A specialized LLM service that can reset its context when notified about a new word.
422 |
423 | This LLM intelligently waits for the host to finish speaking before reconnecting.
424 | """
425 |
426 | def __init__(
427 | self,
428 | api_key: str,
429 | system_instruction: str,
430 | new_word_notifier: BaseNotifier,
431 | host_stopped_speaking_notifier: BaseNotifier,
432 | voice_id: str = PLAYER_VOICE_ID,
433 | **kwargs,
434 | ):
435 | super().__init__(
436 | api_key=api_key, voice_id=voice_id, system_instruction=system_instruction, **kwargs
437 | )
438 | self._new_word_notifier = new_word_notifier
439 | self._host_stopped_speaking_notifier = host_stopped_speaking_notifier
440 | self._base_system_instruction = system_instruction
441 | self._reset_task: Optional[asyncio.Task] = None
442 | self._pending_reset: bool = False
443 |
444 | async def start(self, frame: StartFrame):
445 | await super().start(frame)
446 |
447 | # Start the notifier listener task
448 | if not self._reset_task or self._reset_task.done():
449 | self._reset_task = self.create_task(self._listen_for_notifications())
450 |
451 | async def stop(self, frame: EndFrame):
452 | # Cancel the reset task if it exists
453 | if self._reset_task and not self._reset_task.done():
454 | await self.cancel_task(self._reset_task)
455 | self._reset_task = None
456 |
457 | await super().stop(frame)
458 |
459 | async def cancel(self, frame: CancelFrame):
460 | # Cancel the reset task if it exists
461 | if self._reset_task and not self._reset_task.done():
462 | await self.cancel_task(self._reset_task)
463 | self._reset_task = None
464 |
465 | await super().cancel(frame)
466 |
467 | async def _listen_for_notifications(self):
468 | """Listen for new word and host stopped speaking notifications."""
469 | try:
470 | # Create tasks for both notifiers
471 | new_word_task = self.create_task(self._listen_for_new_word())
472 | host_stopped_task = self.create_task(self._listen_for_host_stopped())
473 |
474 | # Wait for both tasks to complete (which should never happen)
475 | await asyncio.gather(new_word_task, host_stopped_task)
476 |
477 | except asyncio.CancelledError:
478 | logger.debug(f"{self}: Notification listener tasks cancelled")
479 | raise
480 | except Exception as e:
481 | logger.exception(f"{self}: Error in notification listeners: {e}")
482 | raise
483 |
484 | async def _listen_for_new_word(self):
485 | """Listen for new word notifications and flag a reset is needed."""
486 | while True:
487 | # Wait for a new word notification
488 | await self._new_word_notifier.wait()
489 | logger.info(
490 | f"{self}: Received new word notification, disconnecting and waiting for host to finish"
491 | )
492 |
493 | # Disconnect immediately to stop processing
494 | await self._disconnect()
495 |
496 | # Reset the system instruction
497 | self._system_instruction = self._base_system_instruction
498 |
499 | # Flag that we need to reconnect when the host stops speaking
500 | self._pending_reset = True
501 |
502 | async def _listen_for_host_stopped(self):
503 | """Listen for host stopped speaking and reconnect if a reset is pending."""
504 | while True:
505 | # Wait for host stopped speaking notification
506 | await self._host_stopped_speaking_notifier.wait()
507 |
508 | # If we have a pending reset, reconnect now
509 | if self._pending_reset:
510 | logger.info(f"{self}: Host finished speaking, completing the LLM reset")
511 |
512 | # Reconnect
513 | await self._connect()
514 |
515 | # Reset the flag
516 | self._pending_reset = False
517 |
518 | logger.info(f"{self}: LLM reset complete")
519 |
520 |
521 | async def tts_audio_raw_frame_filter(frame: Frame):
522 | """Filter to check if the frame is a TTSAudioRawFrame."""
523 | return isinstance(frame, TTSAudioRawFrame)
524 |
525 |
526 | # Create a resampler instance once
527 | resampler = SOXRAudioResampler()
528 |
529 |
530 | async def tts_to_input_audio_transformer(frame: Frame):
531 | """Transform TTS audio frames to InputAudioRawFrame with resampling.
532 |
533 | Converts 24kHz TTS output to 16kHz input audio required by the player LLM.
534 |
535 | Args:
536 | frame (Frame): The frame to transform (expected to be TTSAudioRawFrame)
537 |
538 | Returns:
539 | InputAudioRawFrame: The transformed and resampled input audio frame
540 | """
541 | if isinstance(frame, TTSAudioRawFrame):
542 | # Resample the audio from 24kHz to 16kHz
543 | resampled_audio = await resampler.resample(
544 | frame.audio,
545 | frame.sample_rate, # Source rate (24kHz)
546 | 16000, # Target rate (16kHz)
547 | )
548 |
549 | # Create a new InputAudioRawFrame with the resampled audio
550 | input_frame = InputAudioRawFrame(
551 | audio=resampled_audio,
552 | sample_rate=16000, # New sample rate
553 | num_channels=frame.num_channels,
554 | )
555 | return input_frame
556 |
557 |
558 | async def main(room_url: str, token: str):
559 | # Use the provided session logger if available, otherwise use the default logger
560 | logger.debug("Starting bot in room: {}", room_url)
561 |
562 | game_words = generate_game_words(NUM_WORDS_PER_GAME)
563 | words_string = ", ".join(f'"{word}"' for word in game_words)
564 | logger.debug(f"Game words: {words_string}")
565 |
566 | transport = DailyTransport(
567 | room_url,
568 | token,
569 | "Word Wrangler Bot",
570 | DailyParams(
571 | audio_out_enabled=True,
572 | vad_enabled=True,
573 | vad_analyzer=SileroVADAnalyzer(),
574 | vad_audio_passthrough=True,
575 | ),
576 | )
577 |
578 | player_instruction = f"""{game_player_prompt}
579 |
580 | Important guidelines:
581 | 1. Your responses will be converted to speech, so keep them concise and conversational.
582 | 2. Don't use special characters or formatting that wouldn't be natural in speech.
583 | 3. Encourage the user to elaborate when appropriate."""
584 |
585 | host_instruction = f"""{game_host_prompt}
586 |
587 | GAME WORDS:
588 | Use ONLY these words for the game (in any order): {words_string}
589 |
590 | Important guidelines:
591 | 1. Your responses will be converted to speech, so keep them concise and conversational.
592 | 2. Don't use special characters or formatting that wouldn't be natural in speech.
593 | 3. ONLY use words from the provided list above when giving words to the player."""
594 |
595 | intro_message = """Start with this exact brief introduction: "Welcome to Word Wrangler! I'll give you words to describe, and the A.I. player will try to guess them. Remember, don't say any part of the word itself. Here's your first word: [word]." """
596 |
597 | # Create the STT mute filter if we have strategies to apply
598 | stt_mute_filter = STTMuteFilter(
599 | config=STTMuteConfig(strategies={STTMuteStrategy.MUTE_UNTIL_FIRST_BOT_COMPLETE})
600 | )
601 |
602 | host_llm = GeminiMultimodalLiveLLMService(
603 | api_key=os.getenv("GOOGLE_API_KEY"),
604 | system_instruction=host_instruction,
605 | params=InputParams(modalities=GeminiMultimodalModalities.TEXT),
606 | )
607 |
608 | host_tts = GoogleTTSService(
609 | voice_id=HOST_VOICE_ID,
610 | credentials_path=os.getenv("GOOGLE_TEST_CREDENTIALS_FILE"),
611 | text_filters=[HostResponseTextFilter()],
612 | )
613 |
614 | producer = ProducerProcessor(
615 | filter=tts_audio_raw_frame_filter,
616 | transformer=tts_to_input_audio_transformer,
617 | passthrough=True,
618 | )
619 | consumer = ConsumerProcessor(producer=producer)
620 |
621 | # Create the notifiers
622 | bot_speaking_notifier = EventNotifier()
623 | new_word_notifier = EventNotifier()
624 |
625 | # Create BotStoppedSpeakingNotifier to detect when host bot stops speaking
626 | bot_stopped_speaking_detector = BotStoppedSpeakingNotifier(bot_speaking_notifier)
627 |
628 | # Create StartFrameGate to block Player LLM until host has stopped speaking
629 | start_frame_gate = StartFrameGate(bot_speaking_notifier)
630 |
631 | # Create GameStateTracker to handle new words and score tracking
632 | game_state_tracker = GameStateTracker(new_word_notifier)
633 |
634 | # Create a resettable player LLM that coordinates between notifiers
635 | player_llm = ResettablePlayerLLM(
636 | api_key=os.getenv("GOOGLE_API_KEY"),
637 | system_instruction=player_instruction,
638 | new_word_notifier=new_word_notifier,
639 | host_stopped_speaking_notifier=bot_speaking_notifier,
640 | voice_id=PLAYER_VOICE_ID,
641 | )
642 |
643 | # Set up the initial context for the conversation
644 | messages = [
645 | {
646 | "role": "user",
647 | "content": intro_message,
648 | },
649 | ]
650 |
651 | # This sets up the LLM context by providing messages and tools
652 | context = OpenAILLMContext(messages)
653 | context_aggregator = host_llm.create_context_aggregator(context)
654 |
655 | pipeline = Pipeline(
656 | [
657 | transport.input(), # Receive audio/video from Daily call
658 | stt_mute_filter, # Filter out speech during the bot's initial turn
659 | ParallelPipeline(
660 | # Host branch: manages the game and provides words
661 | [
662 | consumer, # Receives audio from the player branch
663 | host_llm, # AI host that provides words and tracks score
664 | game_state_tracker, # Tracks words and score from host responses
665 | host_tts, # Converts host text to speech
666 | bot_stopped_speaking_detector, # Notifies when host stops speaking
667 | ],
668 | # Player branch: guesses words based on human descriptions
669 | [
670 | start_frame_gate, # Gates the player until host finishes intro
671 | player_llm, # AI player that makes guesses
672 | producer, # Collects audio frames to be passed to the consumer
673 | ],
674 | ),
675 | transport.output(), # Send audio/video back to Daily call
676 | ]
677 | )
678 |
679 | task = PipelineTask(
680 | pipeline,
681 | params=PipelineParams(
682 | allow_interruptions=False,
683 | enable_metrics=True,
684 | enable_usage_metrics=True,
685 | ),
686 | )
687 |
688 | # Create the game timer
689 | game_timer = GameTimer(task, game_state_tracker, game_duration_seconds=GAME_DURATION_SECONDS)
690 |
691 | @transport.event_handler("on_first_participant_joined")
692 | async def on_first_participant_joined(transport, participant):
693 | logger.info("First participant joined: {}", participant["id"])
694 | # Capture the participant's transcription
695 | await transport.capture_participant_transcription(participant["id"])
696 | # Kick off the conversation
697 | await task.queue_frames([context_aggregator.user().get_context_frame()])
698 | # Start the game timer
699 | game_timer.start()
700 |
701 | @transport.event_handler("on_participant_left")
702 | async def on_participant_left(transport, participant, reason):
703 | logger.info("Participant left: {}", participant)
704 | # Stop the timer
705 | game_timer.stop()
706 | # Cancel the pipeline task
707 | await task.cancel()
708 |
709 | runner = PipelineRunner(handle_sigint=False, force_gc=True)
710 |
711 | await runner.run(task)
712 |
713 |
714 | async def bot(args: DailySessionArguments):
715 | """Main bot entry point compatible with the FastAPI route handler.
716 |
717 | Args:
718 | room_url: The Daily room URL
719 | token: The Daily room token
720 | body: The configuration object from the request body
721 | session_id: The session ID for logging
722 | """
723 | logger.info(f"Bot process initialized {args.room_url} {args.token}")
724 |
725 | try:
726 | await main(args.room_url, args.token)
727 | logger.info("Bot process completed")
728 | except Exception as e:
729 | logger.exception(f"Error in bot process: {str(e)}")
730 | raise
731 |
732 |
733 | # Local development functions
734 | async def local_main():
735 | """Function for local development testing."""
736 | try:
737 | async with aiohttp.ClientSession() as session:
738 | (room_url, token) = await configure(session)
739 | logger.warning("_")
740 | logger.warning("_")
741 | logger.warning(f"Talk to your voice agent here: {room_url}")
742 | logger.warning("_")
743 | logger.warning("_")
744 | webbrowser.open(room_url)
745 | await main(room_url, token)
746 | except Exception as e:
747 | logger.exception(f"Error in local development mode: {e}")
748 |
749 |
750 | # Local development entry point
751 | if LOCAL_RUN and __name__ == "__main__":
752 | try:
753 | asyncio.run(local_main())
754 | except Exception as e:
755 | logger.exception(f"Failed to run in local mode: {e}")
756 |
--------------------------------------------------------------------------------