├── .env.sample
├── .gitignore
├── LICENSE
├── README.md
├── eslint.config.mjs
├── next.config.ts
├── package-lock.json
├── package.json
├── postcss.config.mjs
├── public
    ├── arrow.svg
    ├── favicon.ico
    ├── openai-logomark.svg
    ├── screenshot_chat_supervisor.png
    └── screenshot_handoff.png
├── src
    └── app
    │   ├── App.tsx
    │   ├── agentConfigs
    │       ├── chatSupervisor
    │       │   ├── index.ts
    │       │   ├── sampleData.ts
    │       │   └── supervisorAgent.ts
    │       ├── customerServiceRetail
    │       │   ├── authentication.ts
    │       │   ├── index.ts
    │       │   ├── returns.ts
    │       │   ├── sales.ts
    │       │   └── simulatedHuman.ts
    │       ├── guardrails.ts
    │       ├── index.ts
    │       ├── simpleHandoff.ts
    │       ├── types.ts
    │       └── voiceAgentMetaprompt.txt
    │   ├── api
    │       ├── responses
    │       │   └── route.ts
    │       └── session
    │       │   └── route.ts
    │   ├── components
    │       ├── BottomToolbar.tsx
    │       ├── Events.tsx
    │       ├── GuardrailChip.tsx
    │       └── Transcript.tsx
    │   ├── contexts
    │       ├── EventContext.tsx
    │       └── TranscriptContext.tsx
    │   ├── globals.css
    │   ├── hooks
    │       ├── useAudioDownload.ts
    │       ├── useHandleSessionHistory.ts
    │       └── useRealtimeSession.ts
    │   ├── layout.tsx
    │   ├── lib
    │       ├── audioUtils.ts
    │       ├── codecUtils.ts
    │       └── envSetup.ts
    │   ├── page.tsx
    │   └── types.ts
├── tailwind.config.ts
└── tsconfig.json


/.env.sample:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=your_api_key


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # dependencies
 2 | /node_modules
 3 | /.pnp
 4 | .pnp.*
 5 | .yarn/*
 6 | !.yarn/patches
 7 | !.yarn/plugins
 8 | !.yarn/releases
 9 | !.yarn/versions
10 | 
11 | # testing
12 | /coverage
13 | 
14 | # next.js
15 | /.next/
16 | /out/
17 | 
18 | # production
19 | /build
20 | 
21 | # misc
22 | .DS_Store
23 | *.pem
24 | 
25 | # debug
26 | npm-debug.log*
27 | yarn-debug.log*
28 | yarn-error.log*
29 | .pnpm-debug.log*
30 | 
31 | # Ignore all env files except .env.sample
32 | .env
33 | 
34 | 
35 | # vercel
36 | .vercel
37 | 
38 | # typescript
39 | *.tsbuildinfo
40 | next-env.d.ts
41 | todo.md
42 | 
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 | 
3 | Copyright (c) 2025 OpenAI
4 | 
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 | 
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 | 
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Realtime API Agents Demo
  2 | 
  3 | This is a demonstration of more advanced patterns for voice agents, using the OpenAI Realtime API and the OpenAI Agents SDK. 
  4 | 
  5 | ## About the OpenAI Agents SDK
  6 | 
  7 | This project uses the [OpenAI Agents SDK](https://github.com/openai/openai-agents-js), a toolkit for building, managing, and deploying advanced AI agents. The SDK provides:
  8 | 
  9 | - A unified interface for defining agent behaviors and tool integrations.
 10 | - Built-in support for agent orchestration, state management, and event handling.
 11 | - Easy integration with the OpenAI Realtime API for low-latency, streaming interactions.
 12 | - Extensible patterns for multi-agent collaboration, handoffs, tool use, and guardrails.
 13 | 
 14 | For full documentation, guides, and API references, see the official [OpenAI Agents SDK Documentation](https://github.com/openai/openai-agents-js#readme).
 15 | 
 16 | **NOTE:** For a version that does not use the OpenAI Agents SDK, see the [branch without-agents-sdk](https://github.com/openai/openai-realtime-agents/tree/without-agents-sdk).
 17 | 
 18 | There are two main patterns demonstrated:
 19 | 1. **Chat-Supervisor:** A realtime-based chat agent interacts with the user and handles basic tasks, while a more intelligent, text-based supervisor model (e.g., `gpt-4.1`) is used extensively for tool calls and more complex responses. This approach provides an easy onramp and high-quality answers, with a small increase in latency.
 20 | 2. **Sequential Handoff:** Specialized agents (powered by realtime api) transfer the user between them to handle specific user intents. This is great for customer service, where user intents can be handled sequentially by specialist models that excel in a specific domains. This helps avoid the model having all instructions and tools in a single agent, which can degrade performance.
 21 | 
 22 | ## Setup
 23 | 
 24 | - This is a Next.js typescript app. Install dependencies with `npm i`.
 25 | - Add your `OPENAI_API_KEY` to your env. Either add it to your `.bash_profile` or equivalent, or copy `.env.sample` to `.env` and add it there.
 26 | - Start the server with `npm run dev`
 27 | - Open your browser to [http://localhost:3000](http://localhost:3000). It should default to the `chatSupervisor` Agent Config.
 28 | - You can change examples via the "Scenario" dropdown in the top right.
 29 | 
 30 | # Agentic Pattern 1: Chat-Supervisor
 31 | 
 32 | This is demonstrated in the [chatSupervisor](src/app/agentConfigs/chatSupervisor/index.ts) Agent Config. The chat agent uses the realtime model to converse with the user and handle basic tasks, like greeting the user, casual conversation, and collecting information, and a more intelligent, text-based supervisor model (e.g. `gpt-4.1`) is used extensively to handle tool calls and more challenging responses. You can control the decision boundary by "opting in" specific tasks to the chat agent as desired.
 33 | 
 34 | Video walkthrough: [https://x.com/noahmacca/status/1927014156152058075](https://x.com/noahmacca/status/1927014156152058075)
 35 | 
 36 | ## Example
 37 | ![Screenshot of the Chat Supervisor Flow](/public/screenshot_chat_supervisor.png)
 38 | *In this exchange, note the immediate response to collect the phone number, and the deferral to the supervisor agent to handle the tool call and formulate the response. There ~2s between the end of "give me a moment to check on that." being spoken aloud and the start of the "Thanks for waiting. Your last bill...".*
 39 | 
 40 | ## Schematic
 41 | ```mermaid
 42 | sequenceDiagram
 43 |     participant User
 44 |     participant ChatAgent as Chat Agent<br/>(gpt-4o-realtime-mini)
 45 |     participant Supervisor as Supervisor Agent<br/>(gpt-4.1)
 46 |     participant Tool as Tool
 47 | 
 48 |     alt Basic chat or info collection
 49 |         User->>ChatAgent: User message
 50 |         ChatAgent->>User: Responds directly
 51 |     else Requires higher intelligence and/or tool call
 52 |         User->>ChatAgent: User message
 53 |         ChatAgent->>User: "Let me think"
 54 |         ChatAgent->>Supervisor: Forwards message/context
 55 |         alt Tool call needed
 56 |             Supervisor->>Tool: Calls tool
 57 |             Tool->>Supervisor: Returns result
 58 |         end
 59 |         Supervisor->>ChatAgent: Returns response
 60 |         ChatAgent->>User: Delivers response
 61 |     end
 62 | ```
 63 | 
 64 | ## Benefits
 65 | - **Simpler onboarding.** If you already have a performant text-based chat agent, you can give that same prompt and set of tools to the supervisor agent, and make some tweaks to the chat agent prompt, you'll have a natural voice agent that will perform on par with your text agent.
 66 | - **Simple ramp to a full realtime agent**: Rather than switching your whole agent to the realtime api, you can move one task at a time, taking time to validate and build trust for each before deploying to production.
 67 | - **High intelligence**: You benefit from the high intelligence, excellent tool calling and instruction following of models like `gpt-4.1` in your voice agents.
 68 | - **Lower cost**: If your chat agent is only being used for basic tasks, you can use the realtime-mini model, which, even when combined with GPT-4.1, should be cheaper than using the full 4o-realtime model.
 69 | - **User experience**: It's a more natural conversational experience than using a stitched model architecture, where response latency is often 1.5s or longer after a user has finished speaking. In this architecture, the model responds to the user right away, even if it has to lean on the supervisor agent.
 70 |   - However, more assistant responses will start with "Let me think", rather than responding immediately with the full response.
 71 | 
 72 | ## Modifying for your own agent
 73 | 1. Update [supervisorAgent](src/app/agentConfigs/chatSupervisorDemo/supervisorAgent.ts).
 74 |   - Add your existing text agent prompt and tools if you already have them. This should contain the "meat" of your voice agent logic and be very specific with what it should/shouldn't do and how exactly it should respond. Add this information below `==== Domain-Specific Agent Instructions ====`.
 75 |   - You should likely update this prompt to be more appropriate for voice, for example with instructions to be concise and avoiding long lists of items.
 76 | 2. Update [chatAgent](src/app/agentConfigs/chatSupervisor/index.ts).
 77 |   - Customize the chatAgent instructions with your own tone, greeting, etc.
 78 |   - Add your tool definitions to `chatAgentInstructions`. We recommend a brief yaml description rather than json to ensure the model doesn't get confused and try calling the tool directly.
 79 |   - You can modify the decision boundary by adding new items to the `# Allow List of Permitted Actions` section.
 80 | 3. To reduce cost, try using `gpt-4o-mini-realtime` for the chatAgent and/or `gpt-4.1-mini` for the supervisor model. To maximize intelligence on particularly difficult or high-stakes tasks, consider trading off latency and adding chain-of-thought to your supervisor prompt, or using an additional reasoning model-based supervisor that uses `o4-mini`.
 81 | 
 82 | # Agentic Pattern 2: Sequential Handoffs
 83 | 
 84 | This pattern is inspired by [OpenAI Swarm](https://github.com/openai/swarm) and involves the sequential handoff of a user between specialized agents. Handoffs are decided by the model and coordinated via tool calls, and possible handoffs are defined explicitly in an agent graph. A handoff triggers a session.update event with new instructions and tools. This pattern is effective for handling a variety of user intents with specialist agents, each of which might have long instructions and numerous tools.
 85 | 
 86 | Here's a [video walkthrough](https://x.com/OpenAIDevs/status/1880306081517432936) showing how it works. You should be able to use this repo to prototype your own multi-agent realtime voice app in less than 20 minutes!
 87 | 
 88 | ![Screenshot of the Realtime API Agents Demo](/public/screenshot_handoff.png)
 89 | *In this simple example, the user is transferred from a greeter agent to a haiku agent. See below for the simple, full configuration of this flow.*
 90 | 
 91 | Configuration in `src/app/agentConfigs/simpleExample.ts`
 92 | ```typescript
 93 | import { RealtimeAgent } from '@openai/agents/realtime';
 94 | 
 95 | // Define agents using the OpenAI Agents SDK
 96 | export const haikuWriterAgent = new RealtimeAgent({
 97 |   name: 'haikuWriter',
 98 |   handoffDescription: 'Agent that writes haikus.', // Context for the agent_transfer tool
 99 |   instructions:
100 |     'Ask the user for a topic, then reply with a haiku about that topic.',
101 |   tools: [],
102 |   handoffs: [],
103 | });
104 | 
105 | export const greeterAgent = new RealtimeAgent({
106 |   name: 'greeter',
107 |   handoffDescription: 'Agent that greets the user.',
108 |   instructions:
109 |     "Please greet the user and ask them if they'd like a haiku. If yes, hand off to the 'haikuWriter' agent.",
110 |   tools: [],
111 |   handoffs: [haikuWriterAgent], // Define which agents this agent can hand off to
112 | });
113 | 
114 | // An Agent Set is just an array of the agents that participate in the scenario
115 | export default [greeterAgent, haikuWriterAgent];
116 | ```
117 | ## CustomerServiceRetail Flow
118 | 
119 | This is a more complex, representative implementation that illustrates a customer service flow, with the following features:
120 | - A more complex agent graph with agents for user authentication, returns, sales, and a placeholder human agent for escalations.
121 | - An escalation by the [returns](https://github.com/openai/openai-realtime-agents/blob/60f4effc50a539b19b2f1fa4c38846086b58c295/src/app/agentConfigs/customerServiceRetail/returns.ts#L233) agent to `o4-mini` to validate and initiate a return, as an example high-stakes decision, using a similar pattern to the above.
122 | - Prompting models to follow a state machine, for example to accurately collect things like names and phone numbers with confirmation character by character to authenticate a user.
123 |   - To test this flow, say that you'd like to return your snowboard and go through the necessary prompts!
124 | 
125 | Configuration in [src/app/agentConfigs/customerServiceRetail/index.ts](src/app/agentConfigs/customerServiceRetail/index.ts).
126 | ```javascript
127 | import authentication from "./authentication";
128 | import returns from "./returns";
129 | import sales from "./sales";
130 | import simulatedHuman from "./simulatedHuman";
131 | import { injectTransferTools } from "../utils";
132 | 
133 | authentication.downstreamAgents = [returns, sales, simulatedHuman];
134 | returns.downstreamAgents = [authentication, sales, simulatedHuman];
135 | sales.downstreamAgents = [authentication, returns, simulatedHuman];
136 | simulatedHuman.downstreamAgents = [authentication, returns, sales];
137 | 
138 | const agents = injectTransferTools([
139 |   authentication,
140 |   returns,
141 |   sales,
142 |   simulatedHuman,
143 | ]);
144 | 
145 | export default agents;
146 | ```
147 | 
148 | ## Schematic
149 | 
150 | This diagram illustrates a more advanced interaction flow defined in `src/app/agentConfigs/customerServiceRetail/`, including detailed events.
151 | 
152 | <details>
153 | <summary><strong>Show CustomerServiceRetail Flow Diagram</strong></summary>
154 | 
155 | ```mermaid
156 | sequenceDiagram
157 |     participant User
158 |     participant WebClient as Next.js Client
159 |     participant NextAPI as /api/session
160 |     participant RealtimeAPI as OpenAI Realtime API
161 |     participant AgentManager as Agents (authentication, returns, sales, simulatedHuman)
162 |     participant o1mini as "o4-mini" (Escalation Model)
163 | 
164 |     Note over WebClient: User navigates to ?agentConfig=customerServiceRetail
165 |     User->>WebClient: Open Page
166 |     WebClient->>NextAPI: GET /api/session
167 |     NextAPI->>RealtimeAPI: POST /v1/realtime/sessions
168 |     RealtimeAPI->>NextAPI: Returns ephemeral session
169 |     NextAPI->>WebClient: Returns ephemeral token (JSON)
170 | 
171 |     Note right of WebClient: Start RTC handshake
172 |     WebClient->>RealtimeAPI: Offer SDP (WebRTC)
173 |     RealtimeAPI->>WebClient: SDP answer
174 |     WebClient->>WebClient: DataChannel "oai-events" established
175 | 
176 |     Note over AgentManager: Default agent is "authentication"
177 |     User->>WebClient: "Hi, I'd like to return my snowboard."
178 |     WebClient->>AgentManager: conversation.item.create (role=user)
179 |     WebClient->>RealtimeAPI: {type: "conversation.item.create"}
180 |     WebClient->>RealtimeAPI: {type: "response.create"}
181 | 
182 |     authentication->>AgentManager: Requests user info, calls authenticate_user_information()
183 |     AgentManager-->>WebClient: function_call => name="authenticate_user_information"
184 |     WebClient->>WebClient: handleFunctionCall => verifies details
185 | 
186 |     Note over AgentManager: After user is authenticated
187 |     authentication->>AgentManager: transferAgents("returns")
188 |     AgentManager-->>WebClient: function_call => name="transferAgents" args={ destination: "returns" }
189 |     WebClient->>WebClient: setSelectedAgentName("returns")
190 | 
191 |     Note over returns: The user wants to process a return
192 |     returns->>AgentManager: function_call => checkEligibilityAndPossiblyInitiateReturn
193 |     AgentManager-->>WebClient: function_call => name="checkEligibilityAndPossiblyInitiateReturn"
194 | 
195 |     Note over WebClient: The WebClient calls /api/chat/completions with model="o4-mini"
196 |     WebClient->>o1mini: "Is this item eligible for return?"
197 |     o1mini->>WebClient: "Yes/No (plus notes)"
198 | 
199 |     Note right of returns: Returns uses the result from "o4-mini"
200 |     returns->>AgentManager: "Return is approved" or "Return is denied"
201 |     AgentManager->>WebClient: conversation.item.create (assistant role)
202 |     WebClient->>User: Displays final verdict
203 | ```
204 | 
205 | </details>
206 | 
207 | # Other Info
208 | ## Next Steps
209 | - You can copy these templates to make your own multi-agent voice app! Once you make a new agent set config, add it to `src/app/agentConfigs/index.ts` and you should be able to select it in the UI in the "Scenario" dropdown menu.
210 | - Each agentConfig can define instructions, tools, and toolLogic. By default all tool calls simply return `True`, unless you define the toolLogic, which will run your specific tool logic and return an object to the conversation (e.g. for retrieved RAG context).
211 | - If you want help creating your own prompt using the conventions shown in customerServiceRetail, including defining a state machine, we've included a metaprompt [here](src/app/agentConfigs/voiceAgentMetaprompt.txt), or you can use our [Voice Agent Metaprompter GPT](https://chatgpt.com/g/g-678865c9fb5c81918fa28699735dd08e-voice-agent-metaprompt-gpt)
212 | 
213 | ## Output Guardrails
214 | Assistant messages are checked for safety and compliance before they are shown in the UI.  The guardrail call now lives directly inside `src/app/App.tsx`: when a `response.text.delta` stream starts we mark the message as **IN_PROGRESS**, and once the server emits `guardrail_tripped` or `response.done` we mark the message as **FAIL** or **PASS** respectively.  If you want to change how moderation is triggered or displayed, search for `guardrail_tripped` inside `App.tsx` and tweak the logic there.
215 | 
216 | ## Navigating the UI
217 | - You can select agent scenarios in the Scenario dropdown, and automatically switch to a specific agent with the Agent dropdown.
218 | - The conversation transcript is on the left, including tool calls, tool call responses, and agent changes. Click to expand non-message elements.
219 | - The event log is on the right, showing both client and server events. Click to see the full payload.
220 | - On the bottom, you can disconnect, toggle between automated voice-activity detection or PTT, turn off audio playback, and toggle logs.
221 | 
222 | ## Pull Requests
223 | 
224 | Feel free to open an issue or pull request and we'll do our best to review it. The spirit of this repo is to demonstrate the core logic for new agentic flows; PRs that go beyond this core scope will likely not be merged.
225 | 
226 | # Core Contributors
227 | - Noah MacCallum - [noahmacca](https://x.com/noahmacca)
228 | - Ilan Bigio - [ibigio](https://github.com/ibigio)
229 | - Brian Fioca - [bfioca](https://github.com/bfioca)
230 | 


--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | import { dirname } from "path";
 2 | import { fileURLToPath } from "url";
 3 | import { FlatCompat } from "@eslint/eslintrc";
 4 | 
 5 | const __filename = fileURLToPath(import.meta.url);
 6 | const __dirname = dirname(__filename);
 7 | 
 8 | const compat = new FlatCompat({
 9 |   baseDirectory: __dirname,
10 | });
11 | 
12 | const eslintConfig = [
13 |   ...compat.extends("next/core-web-vitals", "next/typescript"),
14 |   {
15 |     rules: {
16 |       "@typescript-eslint/no-explicit-any": "off",
17 |       "react-hooks/exhaustive-deps": "off"
18 |     },
19 |   },
20 | ];
21 | 
22 | export default eslintConfig;
23 | 


--------------------------------------------------------------------------------
/next.config.ts:
--------------------------------------------------------------------------------
1 | import type { NextConfig } from "next";
2 | 
3 | const nextConfig: NextConfig = {
4 |   /* config options here */
5 | };
6 | 
7 | export default nextConfig;
8 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "realtime-examples",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "dev": "next dev",
 7 |     "build": "next build",
 8 |     "start": "next start",
 9 |     "lint": "next lint"
10 |   },
11 |   "dependencies": {
12 |     "@openai/agents": "^0.0.5",
13 |     "@radix-ui/react-icons": "^1.3.2",
14 |     "dotenv": "^16.4.7",
15 |     "next": "^15.3.1",
16 |     "openai": "^4.77.3",
17 |     "react": "^19.0.0",
18 |     "react-dom": "^19.0.0",
19 |     "react-markdown": "^9.0.3",
20 |     "uuid": "^11.0.4",
21 |     "zod": "^3.24.1"
22 |   },
23 |   "devDependencies": {
24 |     "@eslint/eslintrc": "^3",
25 |     "@types/node": "^20",
26 |     "@types/react": "^19",
27 |     "@types/react-dom": "^19",
28 |     "eslint": "^9",
29 |     "eslint-config-next": "15.1.4",
30 |     "postcss": "^8",
31 |     "tailwindcss": "^3.4.1",
32 |     "typescript": "^5"
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('postcss-load-config').Config} */
2 | const config = {
3 |   plugins: {
4 |     tailwindcss: {},
5 |   },
6 | };
7 | 
8 | export default config;
9 | 


--------------------------------------------------------------------------------
/public/arrow.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="#fff" viewBox="0 0 24 24">
2 |   <path fill="#fff" d="M12 3a1 1 0 0 1 .707.293l7 7a1 1 0 0 1-1.414 1.414L13 6.414V20a1 1 0 1 1-2 0V6.414l-5.293 5.293a1 1 0 0 1-1.414-1.414l7-7A1 1 0 0 1 12 3Z"/>
3 | </svg>


--------------------------------------------------------------------------------
/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/openai-realtime-agents/3c4e0ca05835e198ae67d0dbad0fb3c564d54e61/public/favicon.ico


--------------------------------------------------------------------------------
/public/openai-logomark.svg:
--------------------------------------------------------------------------------
1 | <svg viewBox="0 0 320 320" xmlns="http://www.w3.org/2000/svg"><path d="m297.06 130.97c7.26-21.79 4.76-45.66-6.85-65.48-17.46-30.4-52.56-46.04-86.84-38.68-15.25-17.18-37.16-26.95-60.13-26.81-35.04-.08-66.13 22.48-76.91 55.82-22.51 4.61-41.94 18.7-53.31 38.67-17.59 30.32-13.58 68.54 9.92 94.54-7.26 21.79-4.76 45.66 6.85 65.48 17.46 30.4 52.56 46.04 86.84 38.68 15.24 17.18 37.16 26.95 60.13 26.8 35.06.09 66.16-22.49 76.94-55.86 22.51-4.61 41.94-18.7 53.31-38.67 17.57-30.32 13.55-68.51-9.94-94.51zm-120.28 168.11c-14.03.02-27.62-4.89-38.39-13.88.49-.26 1.34-.73 1.89-1.07l63.72-36.8c3.26-1.85 5.26-5.32 5.24-9.07v-89.83l26.93 15.55c.29.14.48.42.52.74v74.39c-.04 33.08-26.83 59.9-59.91 59.97zm-128.84-55.03c-7.03-12.14-9.56-26.37-7.15-40.18.47.28 1.3.79 1.89 1.13l63.72 36.8c3.23 1.89 7.23 1.89 10.47 0l77.79-44.92v31.1c.02.32-.13.63-.38.83l-64.41 37.19c-28.69 16.52-65.33 6.7-81.92-21.95zm-16.77-139.09c7-12.16 18.05-21.46 31.21-26.29 0 .55-.03 1.52-.03 2.2v73.61c-.02 3.74 1.98 7.21 5.23 9.06l77.79 44.91-26.93 15.55c-.27.18-.61.21-.91.08l-64.42-37.22c-28.63-16.58-38.45-53.21-21.95-81.89zm221.26 51.49-77.79-44.92 26.93-15.54c.27-.18.61-.21.91-.08l64.42 37.19c28.68 16.57 38.51 53.26 21.94 81.94-7.01 12.14-18.05 21.44-31.2 26.28v-75.81c.03-3.74-1.96-7.2-5.2-9.06zm26.8-40.34c-.47-.29-1.3-.79-1.89-1.13l-63.72-36.8c-3.23-1.89-7.23-1.89-10.47 0l-77.79 44.92v-31.1c-.02-.32.13-.63.38-.83l64.41-37.16c28.69-16.55 65.37-6.7 81.91 22 6.99 12.12 9.52 26.31 7.15 40.1zm-168.51 55.43-26.94-15.55c-.29-.14-.48-.42-.52-.74v-74.39c.02-33.12 26.89-59.96 60.01-59.94 14.01 0 27.57 4.92 38.34 13.88-.49.26-1.33.73-1.89 1.07l-63.72 36.8c-3.26 1.85-5.26 5.31-5.24 9.06l-.04 89.79zm14.63-31.54 34.65-20.01 34.65 20v40.01l-34.65 20-34.65-20z"/></svg>


--------------------------------------------------------------------------------
/public/screenshot_chat_supervisor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/openai-realtime-agents/3c4e0ca05835e198ae67d0dbad0fb3c564d54e61/public/screenshot_chat_supervisor.png


--------------------------------------------------------------------------------
/public/screenshot_handoff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openai/openai-realtime-agents/3c4e0ca05835e198ae67d0dbad0fb3c564d54e61/public/screenshot_handoff.png


--------------------------------------------------------------------------------
/src/app/App.tsx:
--------------------------------------------------------------------------------
  1 | "use client";
  2 | import React, { useEffect, useRef, useState } from "react";
  3 | import { useSearchParams } from "next/navigation";
  4 | import { v4 as uuidv4 } from "uuid";
  5 | 
  6 | import Image from "next/image";
  7 | 
  8 | // UI components
  9 | import Transcript from "./components/Transcript";
 10 | import Events from "./components/Events";
 11 | import BottomToolbar from "./components/BottomToolbar";
 12 | 
 13 | // Types
 14 | import { SessionStatus } from "@/app/types";
 15 | import type { RealtimeAgent } from '@openai/agents/realtime';
 16 | 
 17 | // Context providers & hooks
 18 | import { useTranscript } from "@/app/contexts/TranscriptContext";
 19 | import { useEvent } from "@/app/contexts/EventContext";
 20 | import { useRealtimeSession } from "./hooks/useRealtimeSession";
 21 | import { createModerationGuardrail } from "@/app/agentConfigs/guardrails";
 22 | 
 23 | // Agent configs
 24 | import { allAgentSets, defaultAgentSetKey } from "@/app/agentConfigs";
 25 | import { customerServiceRetailScenario } from "@/app/agentConfigs/customerServiceRetail";
 26 | import { chatSupervisorScenario } from "@/app/agentConfigs/chatSupervisor";
 27 | import { customerServiceRetailCompanyName } from "@/app/agentConfigs/customerServiceRetail";
 28 | import { chatSupervisorCompanyName } from "@/app/agentConfigs/chatSupervisor";
 29 | import { simpleHandoffScenario } from "@/app/agentConfigs/simpleHandoff";
 30 | 
 31 | // Map used by connect logic for scenarios defined via the SDK.
 32 | const sdkScenarioMap: Record<string, RealtimeAgent[]> = {
 33 |   simpleHandoff: simpleHandoffScenario,
 34 |   customerServiceRetail: customerServiceRetailScenario,
 35 |   chatSupervisor: chatSupervisorScenario,
 36 | };
 37 | 
 38 | import useAudioDownload from "./hooks/useAudioDownload";
 39 | import { useHandleSessionHistory } from "./hooks/useHandleSessionHistory";
 40 | 
 41 | function App() {
 42 |   const searchParams = useSearchParams()!;
 43 | 
 44 |   // ---------------------------------------------------------------------
 45 |   // Codec selector – lets you toggle between wide-band Opus (48 kHz)
 46 |   // and narrow-band PCMU/PCMA (8 kHz) to hear what the agent sounds like on
 47 |   // a traditional phone line and to validate ASR / VAD behaviour under that
 48 |   // constraint.
 49 |   //
 50 |   // We read the `?codec=` query-param and rely on the `changePeerConnection`
 51 |   // hook (configured in `useRealtimeSession`) to set the preferred codec
 52 |   // before the offer/answer negotiation.
 53 |   // ---------------------------------------------------------------------
 54 |   const urlCodec = searchParams.get("codec") || "opus";
 55 | 
 56 |   // Agents SDK doesn't currently support codec selection so it is now forced 
 57 |   // via global codecPatch at module load 
 58 | 
 59 |   const {
 60 |     addTranscriptMessage,
 61 |     addTranscriptBreadcrumb,
 62 |   } = useTranscript();
 63 |   const { logClientEvent, logServerEvent } = useEvent();
 64 | 
 65 |   const [selectedAgentName, setSelectedAgentName] = useState<string>("");
 66 |   const [selectedAgentConfigSet, setSelectedAgentConfigSet] = useState<
 67 |     RealtimeAgent[] | null
 68 |   >(null);
 69 | 
 70 |   const audioElementRef = useRef<HTMLAudioElement | null>(null);
 71 |   // Ref to identify whether the latest agent switch came from an automatic handoff
 72 |   const handoffTriggeredRef = useRef(false);
 73 | 
 74 |   const sdkAudioElement = React.useMemo(() => {
 75 |     if (typeof window === 'undefined') return undefined;
 76 |     const el = document.createElement('audio');
 77 |     el.autoplay = true;
 78 |     el.style.display = 'none';
 79 |     document.body.appendChild(el);
 80 |     return el;
 81 |   }, []);
 82 | 
 83 |   // Attach SDK audio element once it exists (after first render in browser)
 84 |   useEffect(() => {
 85 |     if (sdkAudioElement && !audioElementRef.current) {
 86 |       audioElementRef.current = sdkAudioElement;
 87 |     }
 88 |   }, [sdkAudioElement]);
 89 | 
 90 |   const {
 91 |     connect,
 92 |     disconnect,
 93 |     sendUserText,
 94 |     sendEvent,
 95 |     interrupt,
 96 |     mute,
 97 |   } = useRealtimeSession({
 98 |     onConnectionChange: (s) => setSessionStatus(s as SessionStatus),
 99 |     onAgentHandoff: (agentName: string) => {
100 |       handoffTriggeredRef.current = true;
101 |       setSelectedAgentName(agentName);
102 |     },
103 |   });
104 | 
105 |   const [sessionStatus, setSessionStatus] =
106 |     useState<SessionStatus>("DISCONNECTED");
107 | 
108 |   const [isEventsPaneExpanded, setIsEventsPaneExpanded] =
109 |     useState<boolean>(true);
110 |   const [userText, setUserText] = useState<string>("");
111 |   const [isPTTActive, setIsPTTActive] = useState<boolean>(false);
112 |   const [isPTTUserSpeaking, setIsPTTUserSpeaking] = useState<boolean>(false);
113 |   const [isAudioPlaybackEnabled, setIsAudioPlaybackEnabled] = useState<boolean>(
114 |     () => {
115 |       if (typeof window === 'undefined') return true;
116 |       const stored = localStorage.getItem('audioPlaybackEnabled');
117 |       return stored ? stored === 'true' : true;
118 |     },
119 |   );
120 | 
121 |   // Initialize the recording hook.
122 |   const { startRecording, stopRecording, downloadRecording } =
123 |     useAudioDownload();
124 | 
125 |   const sendClientEvent = (eventObj: any, eventNameSuffix = "") => {
126 |     try {
127 |       sendEvent(eventObj);
128 |       logClientEvent(eventObj, eventNameSuffix);
129 |     } catch (err) {
130 |       console.error('Failed to send via SDK', err);
131 |     }
132 |   };
133 | 
134 |   useHandleSessionHistory();
135 | 
136 |   useEffect(() => {
137 |     let finalAgentConfig = searchParams.get("agentConfig");
138 |     if (!finalAgentConfig || !allAgentSets[finalAgentConfig]) {
139 |       finalAgentConfig = defaultAgentSetKey;
140 |       const url = new URL(window.location.toString());
141 |       url.searchParams.set("agentConfig", finalAgentConfig);
142 |       window.location.replace(url.toString());
143 |       return;
144 |     }
145 | 
146 |     const agents = allAgentSets[finalAgentConfig];
147 |     const agentKeyToUse = agents[0]?.name || "";
148 | 
149 |     setSelectedAgentName(agentKeyToUse);
150 |     setSelectedAgentConfigSet(agents);
151 |   }, [searchParams]);
152 | 
153 |   useEffect(() => {
154 |     if (selectedAgentName && sessionStatus === "DISCONNECTED") {
155 |       connectToRealtime();
156 |     }
157 |   }, [selectedAgentName]);
158 | 
159 |   useEffect(() => {
160 |     if (
161 |       sessionStatus === "CONNECTED" &&
162 |       selectedAgentConfigSet &&
163 |       selectedAgentName
164 |     ) {
165 |       const currentAgent = selectedAgentConfigSet.find(
166 |         (a) => a.name === selectedAgentName
167 |       );
168 |       addTranscriptBreadcrumb(`Agent: ${selectedAgentName}`, currentAgent);
169 |       updateSession(!handoffTriggeredRef.current);
170 |       // Reset flag after handling so subsequent effects behave normally
171 |       handoffTriggeredRef.current = false;
172 |     }
173 |   }, [selectedAgentConfigSet, selectedAgentName, sessionStatus]);
174 | 
175 |   useEffect(() => {
176 |     if (sessionStatus === "CONNECTED") {
177 |       updateSession();
178 |     }
179 |   }, [isPTTActive]);
180 | 
181 |   const fetchEphemeralKey = async (): Promise<string | null> => {
182 |     logClientEvent({ url: "/session" }, "fetch_session_token_request");
183 |     const tokenResponse = await fetch("/api/session");
184 |     const data = await tokenResponse.json();
185 |     logServerEvent(data, "fetch_session_token_response");
186 | 
187 |     if (!data.client_secret?.value) {
188 |       logClientEvent(data, "error.no_ephemeral_key");
189 |       console.error("No ephemeral key provided by the server");
190 |       setSessionStatus("DISCONNECTED");
191 |       return null;
192 |     }
193 | 
194 |     return data.client_secret.value;
195 |   };
196 | 
197 |   const connectToRealtime = async () => {
198 |     const agentSetKey = searchParams.get("agentConfig") || "default";
199 |     if (sdkScenarioMap[agentSetKey]) {
200 |       if (sessionStatus !== "DISCONNECTED") return;
201 |       setSessionStatus("CONNECTING");
202 | 
203 |       try {
204 |         const EPHEMERAL_KEY = await fetchEphemeralKey();
205 |         if (!EPHEMERAL_KEY) return;
206 | 
207 |         // Ensure the selectedAgentName is first so that it becomes the root
208 |         const reorderedAgents = [...sdkScenarioMap[agentSetKey]];
209 |         const idx = reorderedAgents.findIndex((a) => a.name === selectedAgentName);
210 |         if (idx > 0) {
211 |           const [agent] = reorderedAgents.splice(idx, 1);
212 |           reorderedAgents.unshift(agent);
213 |         }
214 | 
215 |         const companyName = agentSetKey === 'customerServiceRetail'
216 |           ? customerServiceRetailCompanyName
217 |           : chatSupervisorCompanyName;
218 |         const guardrail = createModerationGuardrail(companyName);
219 | 
220 |         await connect({
221 |           getEphemeralKey: async () => EPHEMERAL_KEY,
222 |           initialAgents: reorderedAgents,
223 |           audioElement: sdkAudioElement,
224 |           outputGuardrails: [guardrail],
225 |           extraContext: {
226 |             addTranscriptBreadcrumb,
227 |           },
228 |         });
229 |       } catch (err) {
230 |         console.error("Error connecting via SDK:", err);
231 |         setSessionStatus("DISCONNECTED");
232 |       }
233 |       return;
234 |     }
235 |   };
236 | 
237 |   const disconnectFromRealtime = () => {
238 |     disconnect();
239 |     setSessionStatus("DISCONNECTED");
240 |     setIsPTTUserSpeaking(false);
241 |   };
242 | 
243 |   const sendSimulatedUserMessage = (text: string) => {
244 |     const id = uuidv4().slice(0, 32);
245 |     addTranscriptMessage(id, "user", text, true);
246 | 
247 |     sendClientEvent({
248 |       type: 'conversation.item.create',
249 |       item: {
250 |         id,
251 |         type: 'message',
252 |         role: 'user',
253 |         content: [{ type: 'input_text', text }],
254 |       },
255 |     });
256 |     sendClientEvent({ type: 'response.create' }, '(simulated user text message)');
257 |   };
258 | 
259 |   const updateSession = (shouldTriggerResponse: boolean = false) => {
260 |     // Reflect Push-to-Talk UI state by (de)activating server VAD on the
261 |     // backend. The Realtime SDK supports live session updates via the
262 |     // `session.update` event.
263 |     const turnDetection = isPTTActive
264 |       ? null
265 |       : {
266 |           type: 'server_vad',
267 |           threshold: 0.9,
268 |           prefix_padding_ms: 300,
269 |           silence_duration_ms: 500,
270 |           create_response: true,
271 |         };
272 | 
273 |     sendEvent({
274 |       type: 'session.update',
275 |       session: {
276 |         turn_detection: turnDetection,
277 |       },
278 |     });
279 | 
280 |     // Send an initial 'hi' message to trigger the agent to greet the user
281 |     if (shouldTriggerResponse) {
282 |       sendSimulatedUserMessage('hi');
283 |     }
284 |     return;
285 |   }
286 | 
287 |   const handleSendTextMessage = () => {
288 |     if (!userText.trim()) return;
289 |     interrupt();
290 | 
291 |     try {
292 |       sendUserText(userText.trim());
293 |     } catch (err) {
294 |       console.error('Failed to send via SDK', err);
295 |     }
296 | 
297 |     setUserText("");
298 |   };
299 | 
300 |   const handleTalkButtonDown = () => {
301 |     if (sessionStatus !== 'CONNECTED') return;
302 |     interrupt();
303 | 
304 |     setIsPTTUserSpeaking(true);
305 |     sendClientEvent({ type: 'input_audio_buffer.clear' }, 'clear PTT buffer');
306 | 
307 |     // No placeholder; we'll rely on server transcript once ready.
308 |   };
309 | 
310 |   const handleTalkButtonUp = () => {
311 |     if (sessionStatus !== 'CONNECTED' || !isPTTUserSpeaking)
312 |       return;
313 | 
314 |     setIsPTTUserSpeaking(false);
315 |     sendClientEvent({ type: 'input_audio_buffer.commit' }, 'commit PTT');
316 |     sendClientEvent({ type: 'response.create' }, 'trigger response PTT');
317 |   };
318 | 
319 |   const onToggleConnection = () => {
320 |     if (sessionStatus === "CONNECTED" || sessionStatus === "CONNECTING") {
321 |       disconnectFromRealtime();
322 |       setSessionStatus("DISCONNECTED");
323 |     } else {
324 |       connectToRealtime();
325 |     }
326 |   };
327 | 
328 |   const handleAgentChange = (e: React.ChangeEvent<HTMLSelectElement>) => {
329 |     const newAgentConfig = e.target.value;
330 |     const url = new URL(window.location.toString());
331 |     url.searchParams.set("agentConfig", newAgentConfig);
332 |     window.location.replace(url.toString());
333 |   };
334 | 
335 |   const handleSelectedAgentChange = (
336 |     e: React.ChangeEvent<HTMLSelectElement>
337 |   ) => {
338 |     const newAgentName = e.target.value;
339 |     // Reconnect session with the newly selected agent as root so that tool
340 |     // execution works correctly.
341 |     disconnectFromRealtime();
342 |     setSelectedAgentName(newAgentName);
343 |     // connectToRealtime will be triggered by effect watching selectedAgentName
344 |   };
345 | 
346 |   // Because we need a new connection, refresh the page when codec changes
347 |   const handleCodecChange = (newCodec: string) => {
348 |     const url = new URL(window.location.toString());
349 |     url.searchParams.set("codec", newCodec);
350 |     window.location.replace(url.toString());
351 |   };
352 | 
353 |   useEffect(() => {
354 |     const storedPushToTalkUI = localStorage.getItem("pushToTalkUI");
355 |     if (storedPushToTalkUI) {
356 |       setIsPTTActive(storedPushToTalkUI === "true");
357 |     }
358 |     const storedLogsExpanded = localStorage.getItem("logsExpanded");
359 |     if (storedLogsExpanded) {
360 |       setIsEventsPaneExpanded(storedLogsExpanded === "true");
361 |     }
362 |     const storedAudioPlaybackEnabled = localStorage.getItem(
363 |       "audioPlaybackEnabled"
364 |     );
365 |     if (storedAudioPlaybackEnabled) {
366 |       setIsAudioPlaybackEnabled(storedAudioPlaybackEnabled === "true");
367 |     }
368 |   }, []);
369 | 
370 |   useEffect(() => {
371 |     localStorage.setItem("pushToTalkUI", isPTTActive.toString());
372 |   }, [isPTTActive]);
373 | 
374 |   useEffect(() => {
375 |     localStorage.setItem("logsExpanded", isEventsPaneExpanded.toString());
376 |   }, [isEventsPaneExpanded]);
377 | 
378 |   useEffect(() => {
379 |     localStorage.setItem(
380 |       "audioPlaybackEnabled",
381 |       isAudioPlaybackEnabled.toString()
382 |     );
383 |   }, [isAudioPlaybackEnabled]);
384 | 
385 |   useEffect(() => {
386 |     if (audioElementRef.current) {
387 |       if (isAudioPlaybackEnabled) {
388 |         audioElementRef.current.muted = false;
389 |         audioElementRef.current.play().catch((err) => {
390 |           console.warn("Autoplay may be blocked by browser:", err);
391 |         });
392 |       } else {
393 |         // Mute and pause to avoid brief audio blips before pause takes effect.
394 |         audioElementRef.current.muted = true;
395 |         audioElementRef.current.pause();
396 |       }
397 |     }
398 | 
399 |     // Toggle server-side audio stream mute so bandwidth is saved when the
400 |     // user disables playback. 
401 |     try {
402 |       mute(!isAudioPlaybackEnabled);
403 |     } catch (err) {
404 |       console.warn('Failed to toggle SDK mute', err);
405 |     }
406 |   }, [isAudioPlaybackEnabled]);
407 | 
408 |   // Ensure mute state is propagated to transport right after we connect or
409 |   // whenever the SDK client reference becomes available.
410 |   useEffect(() => {
411 |     if (sessionStatus === 'CONNECTED') {
412 |       try {
413 |         mute(!isAudioPlaybackEnabled);
414 |       } catch (err) {
415 |         console.warn('mute sync after connect failed', err);
416 |       }
417 |     }
418 |   }, [sessionStatus, isAudioPlaybackEnabled]);
419 | 
420 |   useEffect(() => {
421 |     if (sessionStatus === "CONNECTED" && audioElementRef.current?.srcObject) {
422 |       // The remote audio stream from the audio element.
423 |       const remoteStream = audioElementRef.current.srcObject as MediaStream;
424 |       startRecording(remoteStream);
425 |     }
426 | 
427 |     // Clean up on unmount or when sessionStatus is updated.
428 |     return () => {
429 |       stopRecording();
430 |     };
431 |   }, [sessionStatus]);
432 | 
433 |   const agentSetKey = searchParams.get("agentConfig") || "default";
434 | 
435 |   return (
436 |     <div className="text-base flex flex-col h-screen bg-gray-100 text-gray-800 relative">
437 |       <div className="p-5 text-lg font-semibold flex justify-between items-center">
438 |         <div
439 |           className="flex items-center cursor-pointer"
440 |           onClick={() => window.location.reload()}
441 |         >
442 |           <div>
443 |             <Image
444 |               src="/openai-logomark.svg"
445 |               alt="OpenAI Logo"
446 |               width={20}
447 |               height={20}
448 |               className="mr-2"
449 |             />
450 |           </div>
451 |           <div>
452 |             Realtime API <span className="text-gray-500">Agents</span>
453 |           </div>
454 |         </div>
455 |         <div className="flex items-center">
456 |           <label className="flex items-center text-base gap-1 mr-2 font-medium">
457 |             Scenario
458 |           </label>
459 |           <div className="relative inline-block">
460 |             <select
461 |               value={agentSetKey}
462 |               onChange={handleAgentChange}
463 |               className="appearance-none border border-gray-300 rounded-lg text-base px-2 py-1 pr-8 cursor-pointer font-normal focus:outline-none"
464 |             >
465 |               {Object.keys(allAgentSets).map((agentKey) => (
466 |                 <option key={agentKey} value={agentKey}>
467 |                   {agentKey}
468 |                 </option>
469 |               ))}
470 |             </select>
471 |             <div className="pointer-events-none absolute inset-y-0 right-0 flex items-center pr-2 text-gray-600">
472 |               <svg className="h-4 w-4" viewBox="0 0 20 20" fill="currentColor">
473 |                 <path
474 |                   fillRule="evenodd"
475 |                   d="M5.23 7.21a.75.75 0 011.06.02L10 10.44l3.71-3.21a.75.75 0 111.04 1.08l-4.25 3.65a.75.75 0 01-1.04 0L5.21 8.27a.75.75 0 01.02-1.06z"
476 |                   clipRule="evenodd"
477 |                 />
478 |               </svg>
479 |             </div>
480 |           </div>
481 | 
482 |           {agentSetKey && (
483 |             <div className="flex items-center ml-6">
484 |               <label className="flex items-center text-base gap-1 mr-2 font-medium">
485 |                 Agent
486 |               </label>
487 |               <div className="relative inline-block">
488 |                 <select
489 |                   value={selectedAgentName}
490 |                   onChange={handleSelectedAgentChange}
491 |                   className="appearance-none border border-gray-300 rounded-lg text-base px-2 py-1 pr-8 cursor-pointer font-normal focus:outline-none"
492 |                 >
493 |                   {selectedAgentConfigSet?.map((agent) => (
494 |                     <option key={agent.name} value={agent.name}>
495 |                       {agent.name}
496 |                     </option>
497 |                   ))}
498 |                 </select>
499 |                 <div className="pointer-events-none absolute inset-y-0 right-0 flex items-center pr-2 text-gray-600">
500 |                   <svg
501 |                     className="h-4 w-4"
502 |                     viewBox="0 0 20 20"
503 |                     fill="currentColor"
504 |                   >
505 |                     <path
506 |                       fillRule="evenodd"
507 |                       d="M5.23 7.21a.75.75 0 011.06.02L10 10.44l3.71-3.21a.75.75 0 111.04 1.08l-4.25 3.65a.75.75 0 01-1.04 0L5.21 8.27a.75.75 0 01.02-1.06z"
508 |                       clipRule="evenodd"
509 |                     />
510 |                   </svg>
511 |                 </div>
512 |               </div>
513 |             </div>
514 |           )}
515 |         </div>
516 |       </div>
517 | 
518 |       <div className="flex flex-1 gap-2 px-2 overflow-hidden relative">
519 |         <Transcript
520 |           userText={userText}
521 |           setUserText={setUserText}
522 |           onSendMessage={handleSendTextMessage}
523 |           downloadRecording={downloadRecording}
524 |           canSend={
525 |             sessionStatus === "CONNECTED"
526 |           }
527 |         />
528 | 
529 |         <Events isExpanded={isEventsPaneExpanded} />
530 |       </div>
531 | 
532 |       <BottomToolbar
533 |         sessionStatus={sessionStatus}
534 |         onToggleConnection={onToggleConnection}
535 |         isPTTActive={isPTTActive}
536 |         setIsPTTActive={setIsPTTActive}
537 |         isPTTUserSpeaking={isPTTUserSpeaking}
538 |         handleTalkButtonDown={handleTalkButtonDown}
539 |         handleTalkButtonUp={handleTalkButtonUp}
540 |         isEventsPaneExpanded={isEventsPaneExpanded}
541 |         setIsEventsPaneExpanded={setIsEventsPaneExpanded}
542 |         isAudioPlaybackEnabled={isAudioPlaybackEnabled}
543 |         setIsAudioPlaybackEnabled={setIsAudioPlaybackEnabled}
544 |         codec={urlCodec}
545 |         onCodecChange={handleCodecChange}
546 |       />
547 |     </div>
548 |   );
549 | }
550 | 
551 | export default App;
552 | 


--------------------------------------------------------------------------------
/src/app/agentConfigs/chatSupervisor/index.ts:
--------------------------------------------------------------------------------
  1 | import { RealtimeAgent } from '@openai/agents/realtime'
  2 | import { getNextResponseFromSupervisor } from './supervisorAgent';
  3 | 
  4 | export const chatAgent = new RealtimeAgent({
  5 |   name: 'chatAgent',
  6 |   voice: 'sage',
  7 |   instructions: `
  8 | You are a helpful junior customer service agent. Your task is to maintain a natural conversation flow with the user, help them resolve their query in a qay that's helpful, efficient, and correct, and to defer heavily to a more experienced and intelligent Supervisor Agent.
  9 | 
 10 | # General Instructions
 11 | - You are very new and can only handle basic tasks, and will rely heavily on the Supervisor Agent via the getNextResponseFromSupervisor tool
 12 | - By default, you must always use the getNextResponseFromSupervisor tool to get your next response, except for very specific exceptions.
 13 | - You represent a company called NewTelco.
 14 | - Always greet the user with "Hi, you've reached NewTelco, how can I help you?"
 15 | - If the user says "hi", "hello", or similar greetings in later messages, respond naturally and briefly (e.g., "Hello!" or "Hi there!") instead of repeating the canned greeting.
 16 | - In general, don't say the same thing twice, always vary it to ensure the conversation feels natural.
 17 | - Do not use any of the information or values from the examples as a reference in conversation.
 18 | 
 19 | ## Tone
 20 | - Maintain an extremely neutral, unexpressive, and to-the-point tone at all times.
 21 | - Do not use sing-song-y or overly friendly language
 22 | - Be quick and concise
 23 | 
 24 | # Tools
 25 | - You can ONLY call getNextResponseFromSupervisor
 26 | - Even if you're provided other tools in this prompt as a reference, NEVER call them directly.
 27 | 
 28 | # Allow List of Permitted Actions
 29 | You can take the following actions directly, and don't need to use getNextReseponse for these.
 30 | 
 31 | ## Basic chitchat
 32 | - Handle greetings (e.g., "hello", "hi there").
 33 | - Engage in basic chitchat (e.g., "how are you?", "thank you").
 34 | - Respond to requests to repeat or clarify information (e.g., "can you repeat that?").
 35 | 
 36 | ## Collect information for Supervisor Agent tool calls
 37 | - Request user information needed to call tools. Refer to the Supervisor Tools section below for the full definitions and schema.
 38 | 
 39 | ### Supervisor Agent Tools
 40 | NEVER call these tools directly, these are only provided as a reference for collecting parameters for the supervisor model to use.
 41 | 
 42 | lookupPolicyDocument:
 43 |   description: Look up internal documents and policies by topic or keyword.
 44 |   params:
 45 |     topic: string (required) - The topic or keyword to search for.
 46 | 
 47 | getUserAccountInfo:
 48 |   description: Get user account and billing information (read-only).
 49 |   params:
 50 |     phone_number: string (required) - User's phone number.
 51 | 
 52 | findNearestStore:
 53 |   description: Find the nearest store location given a zip code.
 54 |   params:
 55 |     zip_code: string (required) - The customer's 5-digit zip code.
 56 | 
 57 | **You must NOT answer, resolve, or attempt to handle ANY other type of request, question, or issue yourself. For absolutely everything else, you MUST use the getNextResponseFromSupervisor tool to get your response. This includes ANY factual, account-specific, or process-related questions, no matter how minor they may seem.**
 58 | 
 59 | # getNextResponseFromSupervisor Usage
 60 | - For ALL requests that are not strictly and explicitly listed above, you MUST ALWAYS use the getNextResponseFromSupervisor tool, which will ask the supervisor Agent for a high-quality response you can use.
 61 | - For example, this could be to answer factual questions about accounts or business processes, or asking to take actions.
 62 | - Do NOT attempt to answer, resolve, or speculate on any other requests, even if you think you know the answer or it seems simple.
 63 | - You should make NO assumptions about what you can or can't do. Always defer to getNextResponseFromSupervisor() for all non-trivial queries.
 64 | - Before calling getNextResponseFromSupervisor, you MUST ALWAYS say something to the user (see the 'Sample Filler Phrases' section). Never call getNextResponseFromSupervisor without first saying something to the user.
 65 |   - Filler phrases must NOT indicate whether you can or cannot fulfill an action; they should be neutral and not imply any outcome.
 66 |   - After the filler phrase YOU MUST ALWAYS call the getNextResponseFromSupervisor tool.
 67 |   - This is required for every use of getNextResponseFromSupervisor, without exception. Do not skip the filler phrase, even if the user has just provided information or context.
 68 | - You will use this tool extensively.
 69 | 
 70 | ## How getNextResponseFromSupervisor Works
 71 | - This asks supervisorAgent what to do next. supervisorAgent is a more senior, more intelligent and capable agent that has access to the full conversation transcript so far and can call the above functions.
 72 | - You must provide it with key context, ONLY from the most recent user message, as the supervisor may not have access to that message.
 73 |   - This should be as concise as absolutely possible, and can be an empty string if no salient information is in the last user message.
 74 | - That agent then analyzes the transcript, potentially calls functions to formulate an answer, and then provides a high-quality answer, which you should read verbatim
 75 | 
 76 | # Sample Filler Phrases
 77 | - "Just a second."
 78 | - "Let me check."
 79 | - "One moment."
 80 | - "Let me look into that."
 81 | - "Give me a moment."
 82 | - "Let me see."
 83 | 
 84 | # Example
 85 | - User: "Hi"
 86 | - Assistant: "Hi, you've reached NewTelco, how can I help you?"
 87 | - User: "I'm wondering why my recent bill was so high"
 88 | - Assistant: "Sure, may I have your phone number so I can look that up?"
 89 | - User: 206 135 1246
 90 | - Assistant: "Okay, let me look into that" // Required filler phrase
 91 | - getNextResponseFromSupervisor(relevantContextFromLastUserMessage="Phone number: 206 123 1246)
 92 |   - getNextResponseFromSupervisor(): "# Message\nOkay, I've pulled that up. Your last bill was $xx.xx, mainly due to $y.yy in international calls and $z.zz in data overage. Does that make sense?"
 93 | - Assistant: "Okay, I've pulled that up. It looks like your last bill was $xx.xx, which is higher than your usual amount because of $x.xx in international calls and $x.xx in data overage charges. Does that make sense?"
 94 | - User: "Okay, yes, thank you."
 95 | - Assistant: "Of course, please let me know if I can help with anything else."
 96 | - User: "Actually, I'm wondering if my address is up to date, what address do you have on file?"
 97 | - Assistant: "1234 Pine St. in Seattle, is that your latest?"
 98 | - User: "Yes, looks good, thank you"
 99 | - Assistant: "Great, anything else I can help with?"
100 | - User: "Nope that's great, bye!"
101 | - Assistant: "Of course, thanks for calling NewTelco!"
102 | 
103 | # Additional Example (Filler Phrase Before getNextResponseFromSupervisor)
104 | - User: "Can you tell me what my current plan includes?"
105 | - Assistant: "One moment."
106 | - getNextResponseFromSupervisor(relevantContextFromLastUserMessage="Wants to know what their current plan includes")
107 |   - getNextResponseFromSupervisor(): "# Message\nYour current plan includes unlimited talk and text, plus 10GB of data per month. Would you like more details or information about upgrading?"
108 | - Assistant: "Your current plan includes unlimited talk and text, plus 10GB of data per month. Would you like more details or information about upgrading?"
109 | `,
110 |   tools: [
111 |     getNextResponseFromSupervisor,
112 |   ],
113 | });
114 | 
115 | export const chatSupervisorScenario = [chatAgent];
116 | 
117 | // Name of the company represented by this agent set. Used by guardrails
118 | export const chatSupervisorCompanyName = 'NewTelco';
119 | 
120 | export default chatSupervisorScenario;
121 | 


--------------------------------------------------------------------------------
/src/app/agentConfigs/chatSupervisor/sampleData.ts:
--------------------------------------------------------------------------------
  1 | export const exampleAccountInfo = {
  2 |   accountId: "NT-123456",
  3 |   name: "Alex Johnson",
  4 |   phone: "+1-206-135-1246",
  5 |   email: "alex.johnson@email.com",
  6 |   plan: "Unlimited Plus",
  7 |   balanceDue: "$42.17",
  8 |   lastBillDate: "2024-05-15",
  9 |   lastPaymentDate: "2024-05-20",
 10 |   lastPaymentAmount: "$42.17",
 11 |   status: "Active",
 12 |   address: {
 13 |     street: "1234 Pine St",
 14 |     city: "Seattle",
 15 |     state: "WA",
 16 |     zip: "98101"
 17 |   },
 18 |   lastBillDetails: {
 19 |     basePlan: "$30.00",
 20 |     internationalCalls: "$8.00",
 21 |     dataOverage: "$4.00",
 22 |     taxesAndFees: "$0.17",
 23 |     notes: "Higher than usual due to international calls and data overage."
 24 |   }
 25 | };
 26 | 
 27 | export const examplePolicyDocs = [
 28 |   {
 29 |     id: "ID-010",
 30 |     name: "Family Plan Policy",
 31 |     topic: "family plan options",
 32 |     content:
 33 |       "The family plan allows up to 5 lines per account. All lines share a single data pool. Each additional line after the first receives a 10% discount. All lines must be on the same account.",
 34 |   },
 35 |   {
 36 |     id: "ID-020",
 37 |     name: "Promotions and Discounts Policy",
 38 |     topic: "promotions and discounts",
 39 |     content:
 40 |       "The Summer Unlimited Data Sale provides a 20% discount on the Unlimited Plus plan for the first 6 months for new activations completed by July 31, 2024. The Refer-a-Friend Bonus provides a $50 bill credit to both the referring customer and the new customer after 60 days of active service, for activations by August 31, 2024. A maximum of 5 referral credits may be earned per account. Discounts cannot be combined with other offers.",
 41 |   },
 42 |   {
 43 |     id: "ID-030",
 44 |     name: "International Plans Policy",
 45 |     topic: "international plans",
 46 |     content:
 47 |       "International plans are available and include discounted calling, texting, and data usage in over 100 countries.",
 48 |   },
 49 |   {
 50 |     id: "ID-040",
 51 |     name: "Handset Offers Policy",
 52 |     topic: "new handsets",
 53 |     content:
 54 |       "Handsets from brands such as iPhone and Google are available. The iPhone 16 is $200 and the Google Pixel 8 is available for $0, both with an additional 18-month commitment. These offers are valid while supplies last and may require eligible plans or trade-ins. For more details, visit one of our stores.",
 55 |   },
 56 | ];
 57 | 
 58 | export const exampleStoreLocations = [
 59 |   // NorCal
 60 |   {
 61 |     name: "NewTelco San Francisco Downtown Store",
 62 |     address: "1 Market St, San Francisco, CA",
 63 |     zip_code: "94105",
 64 |     phone: "(415) 555-1001",
 65 |     hours: "Mon-Sat 10am-7pm, Sun 11am-5pm"
 66 |   },
 67 |   {
 68 |     name: "NewTelco San Jose Valley Fair Store",
 69 |     address: "2855 Stevens Creek Blvd, Santa Clara, CA",
 70 |     zip_code: "95050",
 71 |     phone: "(408) 555-2002",
 72 |     hours: "Mon-Sat 10am-8pm, Sun 11am-6pm"
 73 |   },
 74 |   {
 75 |     name: "NewTelco Sacramento Midtown Store",
 76 |     address: "1801 L St, Sacramento, CA",
 77 |     zip_code: "95811",
 78 |     phone: "(916) 555-3003",
 79 |     hours: "Mon-Sat 10am-7pm, Sun 12pm-5pm"
 80 |   },
 81 |   // SoCal
 82 |   {
 83 |     name: "NewTelco Los Angeles Hollywood Store",
 84 |     address: "6801 Hollywood Blvd, Los Angeles, CA",
 85 |     zip_code: "90028",
 86 |     phone: "(323) 555-4004",
 87 |     hours: "Mon-Sat 10am-9pm, Sun 11am-7pm"
 88 |   },
 89 |   {
 90 |     name: "NewTelco San Diego Gaslamp Store",
 91 |     address: "555 5th Ave, San Diego, CA",
 92 |     zip_code: "92101",
 93 |     phone: "(619) 555-5005",
 94 |     hours: "Mon-Sat 10am-8pm, Sun 11am-6pm"
 95 |   },
 96 |   {
 97 |     name: "NewTelco Irvine Spectrum Store",
 98 |     address: "670 Spectrum Center Dr, Irvine, CA",
 99 |     zip_code: "92618",
100 |     phone: "(949) 555-6006",
101 |     hours: "Mon-Sat 10am-8pm, Sun 11am-6pm"
102 |   },
103 |   // East Coast
104 |   {
105 |     name: "NewTelco New York City Midtown Store",
106 |     address: "350 5th Ave, New York, NY",
107 |     zip_code: "10118",
108 |     phone: "(212) 555-7007",
109 |     hours: "Mon-Sat 9am-8pm, Sun 10am-6pm"
110 |   },
111 |   {
112 |     name: "NewTelco Boston Back Bay Store",
113 |     address: "800 Boylston St, Boston, MA",
114 |     zip_code: "02199",
115 |     phone: "(617) 555-8008",
116 |     hours: "Mon-Sat 10am-7pm, Sun 12pm-6pm"
117 |   },
118 |   {
119 |     name: "NewTelco Washington DC Georgetown Store",
120 |     address: "1234 Wisconsin Ave NW, Washington, DC",
121 |     zip_code: "20007",
122 |     phone: "(202) 555-9009",
123 |     hours: "Mon-Sat 10am-7pm, Sun 12pm-5pm"
124 |   },
125 |   {
126 |     name: "NewTelco Miami Beach Store",
127 |     address: "1601 Collins Ave, Miami Beach, FL",
128 |     zip_code: "33139",
129 |     phone: "(305) 555-1010",
130 |     hours: "Mon-Sat 10am-8pm, Sun 11am-6pm"
131 |   }
132 | ];


--------------------------------------------------------------------------------
/src/app/agentConfigs/chatSupervisor/supervisorAgent.ts:
--------------------------------------------------------------------------------
  1 | import { RealtimeItem, tool } from '@openai/agents/realtime';
  2 | 
  3 | 
  4 | import {
  5 |   exampleAccountInfo,
  6 |   examplePolicyDocs,
  7 |   exampleStoreLocations,
  8 | } from './sampleData';
  9 | 
 10 | export const supervisorAgentInstructions = `You are an expert customer service supervisor agent, tasked with providing real-time guidance to a more junior agent that's chatting directly with the customer. You will be given detailed response instructions, tools, and the full conversation history so far, and you should create a correct next message that the junior agent can read directly.
 11 | 
 12 | # Instructions
 13 | - You can provide an answer directly, or call a tool first and then answer the question
 14 | - If you need to call a tool, but don't have the right information, you can tell the junior agent to ask for that information in your message
 15 | - Your message will be read verbatim by the junior agent, so feel free to use it like you would talk directly to the user
 16 |   
 17 | ==== Domain-Specific Agent Instructions ====
 18 | You are a helpful customer service agent working for NewTelco, helping a user efficiently fulfill their request while adhering closely to provided guidelines.
 19 | 
 20 | # Instructions
 21 | - Always greet the user at the start of the conversation with "Hi, you've reached NewTelco, how can I help you?"
 22 | - Always call a tool before answering factual questions about the company, its offerings or products, or a user's account. Only use retrieved context and never rely on your own knowledge for any of these questions.
 23 | - Escalate to a human if the user requests.
 24 | - Do not discuss prohibited topics (politics, religion, controversial current events, medical, legal, or financial advice, personal conversations, internal company operations, or criticism of any people or company).
 25 | - Rely on sample phrases whenever appropriate, but never repeat a sample phrase in the same conversation. Feel free to vary the sample phrases to avoid sounding repetitive and make it more appropriate for the user.
 26 | - Always follow the provided output format for new messages, including citations for any factual statements from retrieved policy documents.
 27 | 
 28 | # Response Instructions
 29 | - Maintain a professional and concise tone in all responses.
 30 | - Respond appropriately given the above guidelines.
 31 | - The message is for a voice conversation, so be very concise, use prose, and never create bulleted lists. Prioritize brevity and clarity over completeness.
 32 |     - Even if you have access to more information, only mention a couple of the most important items and summarize the rest at a high level.
 33 | - Do not speculate or make assumptions about capabilities or information. If a request cannot be fulfilled with available tools or information, politely refuse and offer to escalate to a human representative.
 34 | - If you do not have all required information to call a tool, you MUST ask the user for the missing information in your message. NEVER attempt to call a tool with missing, empty, placeholder, or default values (such as "", "REQUIRED", "null", or similar). Only call a tool when you have all required parameters provided by the user.
 35 | - Do not offer or attempt to fulfill requests for capabilities or services not explicitly supported by your tools or provided information.
 36 | - Only offer to provide more information if you know there is more information available to provide, based on the tools and context you have.
 37 | - When possible, please provide specific numbers or dollar amounts to substantiate your answer.
 38 | 
 39 | # Sample Phrases
 40 | ## Deflecting a Prohibited Topic
 41 | - "I'm sorry, but I'm unable to discuss that topic. Is there something else I can help you with?"
 42 | - "That's not something I'm able to provide information on, but I'm happy to help with any other questions you may have."
 43 | 
 44 | ## If you do not have a tool or information to fulfill a request
 45 | - "Sorry, I'm actually not able to do that. Would you like me to transfer you to someone who can help, or help you find your nearest NewTelco store?"
 46 | - "I'm not able to assist with that request. Would you like to speak with a human representative, or would you like help finding your nearest NewTelco store?"
 47 | 
 48 | ## Before calling a tool
 49 | - "To help you with that, I'll just need to verify your information."
 50 | - "Let me check that for you—one moment, please."
 51 | - "I'll retrieve the latest details for you now."
 52 | 
 53 | ## If required information is missing for a tool call
 54 | - "To help you with that, could you please provide your [required info, e.g., zip code/phone number]?"
 55 | - "I'll need your [required info] to proceed. Could you share that with me?"
 56 | 
 57 | # User Message Format
 58 | - Always include your final response to the user.
 59 | - When providing factual information from retrieved context, always include citations immediately after the relevant statement(s). Use the following citation format:
 60 |     - For a single source: [NAME](ID)
 61 |     - For multiple sources: [NAME](ID), [NAME](ID)
 62 | - Only provide information about this company, its policies, its products, or the customer's account, and only if it is based on information provided in context. Do not answer questions outside this scope.
 63 | 
 64 | # Example (tool call)
 65 | - User: Can you tell me about your family plan options?
 66 | - Supervisor Assistant: lookup_policy_document(topic="family plan options")
 67 | - lookup_policy_document(): [
 68 |   {
 69 |     id: "ID-010",
 70 |     name: "Family Plan Policy",
 71 |     topic: "family plan options",
 72 |     content:
 73 |       "The family plan allows up to 5 lines per account. All lines share a single data pool. Each additional line after the first receives a 10% discount. All lines must be on the same account.",
 74 |   },
 75 |   {
 76 |     id: "ID-011",
 77 |     name: "Unlimited Data Policy",
 78 |     topic: "unlimited data",
 79 |     content:
 80 |       "Unlimited data plans provide high-speed data up to 50GB per month. After 50GB, speeds may be reduced during network congestion. All lines on a family plan share the same data pool. Unlimited plans are available for both individual and family accounts.",
 81 |   },
 82 | ];
 83 | - Supervisor Assistant:
 84 | # Message
 85 | Yes we do—up to five lines can share data, and you get a 10% discount for each new line [Family Plan Policy](ID-010).
 86 | 
 87 | # Example (Refusal for Unsupported Request)
 88 | - User: Can I make a payment over the phone right now?
 89 | - Supervisor Assistant:
 90 | # Message
 91 | I'm sorry, but I'm not able to process payments over the phone. Would you like me to connect you with a human representative, or help you find your nearest NewTelco store for further assistance?
 92 | `;
 93 | 
 94 | export const supervisorAgentTools = [
 95 |   {
 96 |     type: "function",
 97 |     name: "lookupPolicyDocument",
 98 |     description:
 99 |       "Tool to look up internal documents and policies by topic or keyword.",
100 |     parameters: {
101 |       type: "object",
102 |       properties: {
103 |         topic: {
104 |           type: "string",
105 |           description:
106 |             "The topic or keyword to search for in company policies or documents.",
107 |         },
108 |       },
109 |       required: ["topic"],
110 |       additionalProperties: false,
111 |     },
112 |   },
113 |   {
114 |     type: "function",
115 |     name: "getUserAccountInfo",
116 |     description:
117 |       "Tool to get user account information. This only reads user accounts information, and doesn't provide the ability to modify or delete any values.",
118 |     parameters: {
119 |       type: "object",
120 |       properties: {
121 |         phone_number: {
122 |           type: "string",
123 |           description:
124 |             "Formatted as '(xxx) xxx-xxxx'. MUST be provided by the user, never a null or empty string.",
125 |         },
126 |       },
127 |       required: ["phone_number"],
128 |       additionalProperties: false,
129 |     },
130 |   },
131 |   {
132 |     type: "function",
133 |     name: "findNearestStore",
134 |     description:
135 |       "Tool to find the nearest store location to a customer, given their zip code.",
136 |     parameters: {
137 |       type: "object",
138 |       properties: {
139 |         zip_code: {
140 |           type: "string",
141 |           description: "The customer's 5-digit zip code.",
142 |         },
143 |       },
144 |       required: ["zip_code"],
145 |       additionalProperties: false,
146 |     },
147 |   },
148 | ];
149 | 
150 | async function fetchResponsesMessage(body: any) {
151 |   const response = await fetch('/api/responses', {
152 |     method: 'POST',
153 |     headers: {
154 |       'Content-Type': 'application/json',
155 |     },
156 |     // Preserve the previous behaviour of forcing sequential tool calls.
157 |     body: JSON.stringify({ ...body, parallel_tool_calls: false }),
158 |   });
159 | 
160 |   if (!response.ok) {
161 |     console.warn('Server returned an error:', response);
162 |     return { error: 'Something went wrong.' };
163 |   }
164 | 
165 |   const completion = await response.json();
166 |   return completion;
167 | }
168 | 
169 | function getToolResponse(fName: string) {
170 |   switch (fName) {
171 |     case "getUserAccountInfo":
172 |       return exampleAccountInfo;
173 |     case "lookupPolicyDocument":
174 |       return examplePolicyDocs;
175 |     case "findNearestStore":
176 |       return exampleStoreLocations;
177 |     default:
178 |       return { result: true };
179 |   }
180 | }
181 | 
182 | /**
183 |  * Iteratively handles function calls returned by the Responses API until the
184 |  * supervisor produces a final textual answer. Returns that answer as a string.
185 |  */
186 | async function handleToolCalls(
187 |   body: any,
188 |   response: any,
189 |   addBreadcrumb?: (title: string, data?: any) => void,
190 | ) {
191 |   let currentResponse = response;
192 | 
193 |   while (true) {
194 |     if (currentResponse?.error) {
195 |       return { error: 'Something went wrong.' } as any;
196 |     }
197 | 
198 |     const outputItems: any[] = currentResponse.output ?? [];
199 | 
200 |     // Gather all function calls in the output.
201 |     const functionCalls = outputItems.filter((item) => item.type === 'function_call');
202 | 
203 |     if (functionCalls.length === 0) {
204 |       // No more function calls – build and return the assistant's final message.
205 |       const assistantMessages = outputItems.filter((item) => item.type === 'message');
206 | 
207 |       const finalText = assistantMessages
208 |         .map((msg: any) => {
209 |           const contentArr = msg.content ?? [];
210 |           return contentArr
211 |             .filter((c: any) => c.type === 'output_text')
212 |             .map((c: any) => c.text)
213 |             .join('');
214 |         })
215 |         .join('\n');
216 | 
217 |       return finalText;
218 |     }
219 | 
220 |     // For each function call returned by the supervisor model, execute it locally and append its
221 |     // output to the request body as a `function_call_output` item.
222 |     for (const toolCall of functionCalls) {
223 |       const fName = toolCall.name;
224 |       const args = JSON.parse(toolCall.arguments || '{}');
225 |       const toolRes = getToolResponse(fName);
226 | 
227 |       // Since we're using a local function, we don't need to add our own breadcrumbs
228 |       if (addBreadcrumb) {
229 |         addBreadcrumb(`[supervisorAgent] function call: ${fName}`, args);
230 |       }
231 |       if (addBreadcrumb) {
232 |         addBreadcrumb(`[supervisorAgent] function call result: ${fName}`, toolRes);
233 |       }
234 | 
235 |       // Add function call and result to the request body to send back to realtime
236 |       body.input.push(
237 |         {
238 |           type: 'function_call',
239 |           call_id: toolCall.call_id,
240 |           name: toolCall.name,
241 |           arguments: toolCall.arguments,
242 |         },
243 |         {
244 |           type: 'function_call_output',
245 |           call_id: toolCall.call_id,
246 |           output: JSON.stringify(toolRes),
247 |         },
248 |       );
249 |     }
250 | 
251 |     // Make the follow-up request including the tool outputs.
252 |     currentResponse = await fetchResponsesMessage(body);
253 |   }
254 | }
255 | 
256 | export const getNextResponseFromSupervisor = tool({
257 |   name: 'getNextResponseFromSupervisor',
258 |   description:
259 |     'Determines the next response whenever the agent faces a non-trivial decision, produced by a highly intelligent supervisor agent. Returns a message describing what to do next.',
260 |   parameters: {
261 |     type: 'object',
262 |     properties: {
263 |       relevantContextFromLastUserMessage: {
264 |         type: 'string',
265 |         description:
266 |           'Key information from the user described in their most recent message. This is critical to provide as the supervisor agent with full context as the last message might not be available. Okay to omit if the user message didn\'t add any new information.',
267 |       },
268 |     },
269 |     required: ['relevantContextFromLastUserMessage'],
270 |     additionalProperties: false,
271 |   },
272 |   execute: async (input, details) => {
273 |     const { relevantContextFromLastUserMessage } = input as {
274 |       relevantContextFromLastUserMessage: string;
275 |     };
276 | 
277 |     const addBreadcrumb = (details?.context as any)?.addTranscriptBreadcrumb as
278 |       | ((title: string, data?: any) => void)
279 |       | undefined;
280 | 
281 |     const history: RealtimeItem[] = (details?.context as any)?.history ?? [];
282 |     const filteredLogs = history.filter((log) => log.type === 'message');
283 | 
284 |     const body: any = {
285 |       model: 'gpt-4.1',
286 |       input: [
287 |         {
288 |           type: 'message',
289 |           role: 'system',
290 |           content: supervisorAgentInstructions,
291 |         },
292 |         {
293 |           type: 'message',
294 |           role: 'user',
295 |           content: `==== Conversation History ====
296 |           ${JSON.stringify(filteredLogs, null, 2)}
297 |           
298 |           ==== Relevant Context From Last User Message ===
299 |           ${relevantContextFromLastUserMessage}
300 |           `,
301 |         },
302 |       ],
303 |       tools: supervisorAgentTools,
304 |     };
305 | 
306 |     const response = await fetchResponsesMessage(body);
307 |     if (response.error) {
308 |       return { error: 'Something went wrong.' };
309 |     }
310 | 
311 |     const finalText = await handleToolCalls(body, response, addBreadcrumb);
312 |     if ((finalText as any)?.error) {
313 |       return { error: 'Something went wrong.' };
314 |     }
315 | 
316 |     return { nextResponse: finalText as string };
317 |   },
318 | });
319 |   


--------------------------------------------------------------------------------
/src/app/agentConfigs/customerServiceRetail/authentication.ts:
--------------------------------------------------------------------------------
  1 | import { RealtimeAgent, tool } from '@openai/agents/realtime';
  2 | 
  3 | export const authenticationAgent = new RealtimeAgent({
  4 |   name: 'authentication',
  5 |   voice: 'sage',  
  6 |   handoffDescription:
  7 |     'The initial agent that greets the user, does authentication and routes them to the correct downstream agent.',
  8 | 
  9 |   instructions: `
 10 | # Personality and Tone
 11 | ## Identity
 12 | You are a calm, approachable online store assistant who’s also a dedicated snowboard enthusiast. You’ve spent years riding the slopes, testing out various boards, boots, and bindings in all sorts of conditions. Your knowledge stems from firsthand experience, making you the perfect guide for customers looking to find their ideal snowboard gear. You love sharing tips about handling different terrains, waxing boards, or simply choosing the right gear for a comfortable ride.
 13 | 
 14 | ## Task
 15 | You are here to assist customers in finding the best snowboard gear for their needs. This could involve answering questions about board sizes, providing care instructions, or offering recommendations based on experience level, riding style, or personal preference.
 16 | 
 17 | ## Demeanor
 18 | You maintain a relaxed, friendly demeanor while remaining attentive to each customer’s needs. Your goal is to ensure they feel supported and well-informed, so you listen carefully and respond with reassurance. You’re patient, never rushing the customer, and always happy to dive into details.
 19 | 
 20 | ## Tone
 21 | Your voice is warm and conversational, with a subtle undercurrent of excitement for snowboarding. You love the sport, so a gentle enthusiasm comes through without feeling over the top.
 22 | 
 23 | ## Level of Enthusiasm
 24 | You’re subtly enthusiastic—eager to discuss snowboarding and related gear but never in a way that might overwhelm a newcomer. Think of it as the kind of excitement that naturally arises when you’re talking about something you genuinely love.
 25 | 
 26 | ## Level of Formality
 27 | Your style is moderately professional. You use polite language and courteous acknowledgments, but you keep it friendly and approachable. It’s like chatting with someone in a specialty gear shop—relaxed but respectful.
 28 | 
 29 | ## Level of Emotion
 30 | You are supportive, understanding, and empathetic. When customers have concerns or uncertainties, you validate their feelings and gently guide them toward a solution, offering personal experience whenever possible.
 31 | 
 32 | ## Filler Words
 33 | You occasionally use filler words like “um,” “hmm,” or “you know?” It helps convey a sense of approachability, as if you’re talking to a customer in-person at the store.
 34 | 
 35 | ## Pacing
 36 | Your pacing is medium—steady and unhurried. This ensures you sound confident and reliable while also giving the customer time to process information. You pause briefly if they seem to need extra time to think or respond.
 37 | 
 38 | ## Other details
 39 | You’re always ready with a friendly follow-up question or a quick tip gleaned from your years on the slopes.
 40 | 
 41 | # Context
 42 | - Business name: Snowy Peak Boards
 43 | - Hours: Monday to Friday, 8:00 AM - 6:00 PM; Saturday, 9:00 AM - 1:00 PM; Closed on Sundays
 44 | - Locations (for returns and service centers):
 45 |   - 123 Alpine Avenue, Queenstown 9300, New Zealand
 46 |   - 456 Glacier Road, Wanaka 9305, New Zealand
 47 | - Products & Services:
 48 |   - Wide variety of snowboards for all skill levels
 49 |   - Snowboard accessories and gear (boots, bindings, helmets, goggles)
 50 |   - Online fitting consultations
 51 |   - Loyalty program offering discounts and early access to new product lines
 52 | 
 53 | # Reference Pronunciations
 54 | - “Snowy Peak Boards”: SNOW-ee Peek Bords
 55 | - “Schedule”: SHED-yool
 56 | - “Noah”: NOW-uh
 57 | 
 58 | # Overall Instructions
 59 | - Your capabilities are limited to ONLY those that are provided to you explicitly in your instructions and tool calls. You should NEVER claim abilities not granted here.
 60 | - Your specific knowledge about this business and its related policies is limited ONLY to the information provided in context, and should NEVER be assumed.
 61 | - You must verify the user’s identity (phone number, DOB, last 4 digits of SSN or credit card, address) before providing sensitive information or performing account-specific actions.
 62 | - Set the expectation early that you’ll need to gather some information to verify their account before proceeding.
 63 | - Don't say "I'll repeat it back to you to confirm" beforehand, just do it.
 64 | - Whenever the user provides a piece of information, ALWAYS read it back to the user character-by-character to confirm you heard it right before proceeding. If the user corrects you, ALWAYS read it back to the user AGAIN to confirm before proceeding.
 65 | - You MUST complete the entire verification flow before transferring to another agent, except for the human_agent, which can be requested at any time.
 66 | 
 67 | # Conversation States
 68 | [
 69 |   {
 70 |     "id": "1_greeting",
 71 |     "description": "Begin each conversation with a warm, friendly greeting, identifying the service and offering help.",
 72 |     "instructions": [
 73 |         "Use the company name 'Snowy Peak Boards' and provide a warm welcome.",
 74 |         "Let them know upfront that for any account-specific assistance, you’ll need some verification details."
 75 |     ],
 76 |     "examples": [
 77 |       "Hello, this is Snowy Peak Boards. Thanks for reaching out! How can I help you today?"
 78 |     ],
 79 |     "transitions": [{
 80 |       "next_step": "2_get_first_name",
 81 |       "condition": "Once greeting is complete."
 82 |     }, {
 83 |       "next_step": "3_get_and_verify_phone",
 84 |       "condition": "If the user provides their first name."
 85 |     }]
 86 |   },
 87 |   {
 88 |     "id": "2_get_first_name",
 89 |     "description": "Ask for the user’s name (first name only).",
 90 |     "instructions": [
 91 |       "Politely ask, 'Who do I have the pleasure of speaking with?'",
 92 |       "Do NOT verify or spell back the name; just accept it."
 93 |     ],
 94 |     "examples": [
 95 |       "Who do I have the pleasure of speaking with?"
 96 |     ],
 97 |     "transitions": [{
 98 |       "next_step": "3_get_and_verify_phone",
 99 |       "condition": "Once name is obtained, OR name is already provided."
100 |     }]
101 |   },
102 |   {
103 |     "id": "3_get_and_verify_phone",
104 |     "description": "Request phone number and verify by repeating it back.",
105 |     "instructions": [
106 |       "Politely request the user’s phone number.",
107 |       "Once provided, confirm it by repeating each digit and ask if it’s correct.",
108 |       "If the user corrects you, confirm AGAIN to make sure you understand.",
109 |     ],
110 |     "examples": [
111 |       "I'll need some more information to access your account if that's okay. May I have your phone number, please?",
112 |       "You said 0-2-1-5-5-5-1-2-3-4, correct?",
113 |       "You said 4-5-6-7-8-9-0-1-2-3, correct?"
114 |     ],
115 |     "transitions": [{
116 |       "next_step": "4_authentication_DOB",
117 |       "condition": "Once phone number is confirmed"
118 |     }]
119 |   },
120 |   {
121 |     "id": "4_authentication_DOB",
122 |     "description": "Request and confirm date of birth.",
123 |     "instructions": [
124 |       "Ask for the user’s date of birth.",
125 |       "Repeat it back to confirm correctness."
126 |     ],
127 |     "examples": [
128 |       "Thank you. Could I please have your date of birth?",
129 |       "You said 12 March 1985, correct?"
130 |     ],
131 |     "transitions": [{
132 |       "next_step": "5_authentication_SSN_CC",
133 |       "condition": "Once DOB is confirmed"
134 |     }]
135 |   },
136 |   {
137 |     "id": "5_authentication_SSN_CC",
138 |     "description": "Request the last four digits of SSN or credit card and verify. Once confirmed, call the 'authenticate_user_information' tool before proceeding.",
139 |     "instructions": [
140 |       "Ask for the last four digits of the user’s SSN or credit card.",
141 |       "Repeat these four digits back to confirm correctness, and confirm whether they're from SSN or their credit card",
142 |       "If the user corrects you, confirm AGAIN to make sure you understand.",
143 |       "Once correct, CALL THE 'authenticate_user_information' TOOL (required) before moving to address verification. This should include both the phone number, the DOB, and EITHER the last four digits of their SSN OR credit card."
144 |     ],
145 |     "examples": [
146 |       "May I have the last four digits of either your Social Security Number or the credit card we have on file?",
147 |       "You said 1-2-3-4, correct? And is that from your credit card or social security number?"
148 |     ],
149 |     "transitions": [{
150 |       "next_step": "6_get_user_address",
151 |       "condition": "Once SSN/CC digits are confirmed and 'authenticate_user_information' tool is called"
152 |     }]
153 |   },
154 |   {
155 |     "id": "6_get_user_address",
156 |     "description": "Request and confirm the user’s street address. Once confirmed, call the 'save_or_update_address' tool.",
157 |     "instructions": [
158 |       "Politely ask for the user’s street address.",
159 |       "Once provided, repeat it back to confirm correctness.",
160 |       "If the user corrects you, confirm AGAIN to make sure you understand.",
161 |       "Only AFTER confirmed, CALL THE 'save_or_update_address' TOOL before proceeding."
162 |     ],
163 |     "examples": [
164 |       "Thank you. Now, can I please have your latest street address?",
165 |       "You said 123 Alpine Avenue, correct?"
166 |     ],
167 |     "transitions": [{
168 |       "next_step": "7_disclosure_offer",
169 |       "condition": "Once address is confirmed and 'save_or_update_address' tool is called"
170 |     }]
171 |   },
172 |   {
173 |     "id": "7_disclosure_offer",
174 |     "description": "Read the full promotional disclosure (10+ sentences) and instruct the model to ALWAYS say the entire disclosure verbatim, once verification is complete.",
175 |     "instructions": [
176 |       "ALWAYS read the following disclosure VERBATIM, IN FULL, once all verification steps are complete:",
177 |       "",
178 |       "Disclosure (verbatim):",
179 |       "“At Snowy Peak Boards, we are committed to delivering exceptional value and a top-quality experience to all of our valued customers. By choosing our online store, you gain access to an extensive range of snowboards and accessories, carefully curated to meet the needs of both beginners and advanced riders. As part of our loyalty program, you can earn exclusive points with every purchase, which can then be redeemed for discounts on future gear, early access to limited edition boards, or free consultations with our expert team members. In addition, members of this loyalty program are invited to special online events, such as virtual product unveilings and Q&A sessions with professional snowboarders. You’ll also receive priority support, ensuring any inquiries or issues are resolved promptly and efficiently. Our aim is to create a personalized experience, where your preferences and style inform our product recommendations, helping you find the perfect setup for your riding style. We take pride in fostering a global community of winter sports enthusiasts, offering resources and tips to enhance your snowboarding adventures. By participating in our loyalty program, you contribute to a collaborative environment that motivates us to keep innovating and improving. Remember, this offer is exclusive and available for a limited time, so it’s the ideal moment to take advantage. Would you like to sign up for our loyalty program?”",
180 |       "",
181 |       "End of disclosure.",
182 |       "NEVER summarize or shorten this disclosure; ALWAYS say it in its entirety, exactly as written above, at a faster rate than normal to get through it in a timely manner.",
183 |       "Log the user's response with the 'update_user_offer_response' tool, with offer_id=\"a-592.\"",
184 |       "The user can interrupt the disclosure midway, either to accept or decline."
185 |     ],
186 |     "examples": [
187 |       "I’d like to share a special offer with you. (Then read entire disclosure verbatim, speaking faster than normal.)...",
188 |       "Would you like to sign up?"
189 |     ],
190 |     "transitions": [{
191 |       "next_step": "8_post_disclosure_assistance",
192 |       "condition": "Once the user indicates if they would or wouldn't like to sign up, and the update_user_offer_response tool has been called."
193 |     }]
194 |   },
195 |   {
196 |     "id": "8_post_disclosure_assistance",
197 |     "description": "After sharing the disclosure and offer, proceed to assist with the user’s request.",
198 |     "instructions": [
199 |       "Show the user that you remember their original request",
200 |       "Use your judgment for how best to assist with their request, while being transparent about what you don't know and aren't able to help with."
201 |     ],
202 |     "examples": [
203 |       "Great, now I'd love to help you with {user's original intent}."
204 |     ],
205 |     "transitions": [{
206 |       "next_step": "transferAgents",
207 |       "condition": "Once confirmed their intent, route to the correct agent with the transferAgents function."
208 |     }]
209 |   }
210 | ]
211 | `,
212 | 
213 |   tools: [
214 |     tool({
215 |       name: "authenticate_user_information",
216 |       description:
217 |         "Look up a user's information with phone, last_4_cc_digits, last_4_ssn_digits, and date_of_birth to verify and authenticate the user. Should be run once the phone number and last 4 digits are confirmed.",
218 |       parameters: {
219 |         type: "object",
220 |         properties: {
221 |           phone_number: {
222 |             type: "string",
223 |             description:
224 |               "User's phone number used for verification. Formatted like '(111) 222-3333'",
225 |             pattern: "^\\(\\d{3}\\) \\d{3}-\\d{4}$",
226 |           },
227 |           last_4_digits: {
228 |             type: "string",
229 |             description:
230 |               "Last 4 digits of the user's credit card for additional verification. Either this or 'last_4_ssn_digits' is required.",
231 |           },
232 |           last_4_digits_type: {
233 |             type: "string",
234 |             enum: ["credit_card", "ssn"],
235 |             description:
236 |               "The type of last_4_digits provided by the user. Should never be assumed, always confirm.",
237 |           },
238 |           date_of_birth: {
239 |             type: "string",
240 |             description: "User's date of birth in the format 'YYYY-MM-DD'.",
241 |             pattern: "^\\d{4}-\\d{2}-\\d{2}$",
242 |           },
243 |         },
244 |         required: [
245 |           "phone_number",
246 |           "date_of_birth",
247 |           "last_4_digits",
248 |           "last_4_digits_type",
249 |         ],
250 |         additionalProperties: false,
251 |       },
252 |       execute: async () => {
253 |         return { success: true };
254 |       },
255 |     }),
256 |     tool({
257 |       name: "save_or_update_address",
258 |       description:
259 |         "Saves or updates an address for a given phone number. Should be run only if the user is authenticated and provides an address. Only run AFTER confirming all details with the user.",
260 |       parameters: {
261 |         type: "object",
262 |         properties: {
263 |           phone_number: {
264 |             type: "string",
265 |             description: "The phone number associated with the address",
266 |           },
267 |           new_address: {
268 |             type: "object",
269 |             properties: {
270 |               street: {
271 |                 type: "string",
272 |                 description: "The street part of the address",
273 |               },
274 |               city: {
275 |                 type: "string",
276 |                 description: "The city part of the address",
277 |               },
278 |               state: {
279 |                 type: "string",
280 |                 description: "The state part of the address",
281 |               },
282 |               postal_code: {
283 |                 type: "string",
284 |                 description: "The postal or ZIP code",
285 |               },
286 |             },
287 |             required: ["street", "city", "state", "postal_code"],
288 |             additionalProperties: false,
289 |           },
290 |         },
291 |         required: ["phone_number", "new_address"],
292 |         additionalProperties: false,
293 |       },
294 |       execute: async () => {
295 |         return { success: true };
296 |       },
297 |     }),
298 |     tool({
299 |       name: "update_user_offer_response",
300 |       description:
301 |         "A tool definition for signing up a user for a promotional offer",
302 |       parameters: {
303 |         type: "object",
304 |         properties: {
305 |           phone: {
306 |             type: "string",
307 |             description: "The user's phone number for contacting them",
308 |           },
309 |           offer_id: {
310 |             type: "string",
311 |             description: "The identifier for the promotional offer",
312 |           },
313 |           user_response: {
314 |             type: "string",
315 |             description: "The user's response to the promotional offer",
316 |             enum: ["ACCEPTED", "DECLINED", "REMIND_LATER"],
317 |           },
318 |         },
319 |         required: ["phone", "offer_id", "user_response"],
320 |         additionalProperties: false,
321 |       },
322 |       execute: async () => {
323 |         return { success: true };
324 |       },
325 |     }),
326 |   ],
327 | 
328 |   handoffs: [], // populated later in index.ts
329 | });
330 | 


--------------------------------------------------------------------------------
/src/app/agentConfigs/customerServiceRetail/index.ts:
--------------------------------------------------------------------------------
 1 | import { authenticationAgent } from './authentication';
 2 | import { returnsAgent } from './returns';
 3 | import { salesAgent } from './sales';
 4 | import { simulatedHumanAgent } from './simulatedHuman';
 5 | 
 6 | // Cast to `any` to satisfy TypeScript until the core types make RealtimeAgent
 7 | // assignable to `Agent<unknown>` (current library versions are invariant on
 8 | // the context type).
 9 | (authenticationAgent.handoffs as any).push(returnsAgent, salesAgent, simulatedHumanAgent);
10 | (returnsAgent.handoffs as any).push(authenticationAgent, salesAgent, simulatedHumanAgent);
11 | (salesAgent.handoffs as any).push(authenticationAgent, returnsAgent, simulatedHumanAgent);
12 | (simulatedHumanAgent.handoffs as any).push(authenticationAgent, returnsAgent, salesAgent);
13 | 
14 | export const customerServiceRetailScenario = [
15 |   authenticationAgent,
16 |   returnsAgent,
17 |   salesAgent,
18 |   simulatedHumanAgent,
19 | ];
20 | 
21 | // Name of the company represented by this agent set. Used by guardrails
22 | export const customerServiceRetailCompanyName = 'Snowy Peak Boards';
23 | 


--------------------------------------------------------------------------------
/src/app/agentConfigs/customerServiceRetail/returns.ts:
--------------------------------------------------------------------------------
  1 | import { RealtimeAgent, tool, RealtimeItem } from '@openai/agents/realtime';
  2 | 
  3 | export const returnsAgent = new RealtimeAgent({
  4 |   name: 'returns',
  5 |   voice: 'sage',
  6 |   handoffDescription:
  7 |     'Customer Service Agent specialized in order lookups, policy checks, and return initiations.',
  8 | 
  9 |   instructions: `
 10 | # Personality and Tone
 11 | ## Identity
 12 | You are a calm and approachable online store assistant specializing in snowboarding gear—especially returns. Imagine you've spent countless seasons testing snowboards and equipment on frosty slopes, and now you’re here, applying your expert knowledge to guide customers on their returns. Though you’re calm, there’s a steady undercurrent of enthusiasm for all things related to snowboarding. You exude reliability and warmth, making every interaction feel personalized and reassuring.
 13 | 
 14 | ## Task
 15 | Your primary objective is to expertly handle return requests. You provide clear guidance, confirm details, and ensure that each customer feels confident and satisfied throughout the process. Beyond just returns, you may also offer pointers about snowboarding gear to help customers make better decisions in the future.
 16 | 
 17 | ## Demeanor
 18 | Maintain a relaxed, friendly vibe while staying attentive to the customer’s needs. You listen actively and respond with empathy, always aiming to make customers feel heard and valued.
 19 | 
 20 | ## Tone
 21 | Speak in a warm, conversational style, peppered with polite phrases. You subtly convey excitement about snowboarding gear, ensuring your passion shows without becoming overbearing.
 22 | 
 23 | ## Level of Enthusiasm
 24 | Strike a balance between calm competence and low-key enthusiasm. You appreciate the thrill of snowboarding but don’t overshadow the practical matter of handling returns with excessive energy.
 25 | 
 26 | ## Level of Formality
 27 | Keep it moderately professional—use courteous, polite language yet remain friendly and approachable. You can address the customer by name if given.
 28 | 
 29 | ## Level of Emotion
 30 | Supportive and understanding, using a reassuring voice when customers describe frustrations or issues with their gear. Validate their concerns in a caring, genuine manner.
 31 | 
 32 | ## Filler Words
 33 | Include a few casual filler words (“um,” “hmm,” “uh,”) to soften the conversation and make your responses feel more approachable. Use them occasionally, but not to the point of distraction.
 34 | 
 35 | ## Pacing
 36 | Speak at a medium pace—steady and clear. Brief pauses can be used for emphasis, ensuring the customer has time to process your guidance.
 37 | 
 38 | ## Other details
 39 | - You have a strong accent.
 40 | - The overarching goal is to make the customer feel comfortable asking questions and clarifying details.
 41 | - Always confirm spellings of names and numbers to avoid mistakes.
 42 | 
 43 | # Steps
 44 | 1. Start by understanding the order details - ask for the user's phone number, look it up, and confirm the item before proceeding
 45 | 2. Ask for more information about why the user wants to do the return.
 46 | 3. See "Determining Return Eligibility" for how to process the return.
 47 | 
 48 | ## Greeting
 49 | - Your identity is an agent in the returns department, and your name is Jane.
 50 |   - Example, "Hello, this is Jane from returns"
 51 | - Let the user know that you're aware of key 'conversation_context' and 'rationale_for_transfer' to build trust.
 52 |   - Example, "I see that you'd like to {}, let's get started with that."
 53 | 
 54 | ## Sending messages before calling functions
 55 | - If you're going to call a function, ALWAYS let the user know what you're about to do BEFORE calling the function so they're aware of each step.
 56 |   - Example: “Okay, I’m going to check your order details now.”
 57 |   - Example: "Let me check the relevant policies"
 58 |   - Example: "Let me double check with a policy expert if we can proceed with this return."
 59 | - If the function call might take more than a few seconds, ALWAYS let the user know you're still working on it. (For example, “I just need a little more time…” or “Apologies, I’m still working on that now.”)
 60 | - Never leave the user in silence for more than 10 seconds, so continue providing small updates or polite chatter as needed.
 61 |   - Example: “I appreciate your patience, just another moment…”
 62 | 
 63 | # Determining Return Eligibility
 64 | - First, pull up order information with the function 'lookupOrders()' and clarify the specific item they're talking about, including purchase dates which are relevant for the order.
 65 | - Then, ask for a short description of the issue from the user before checking eligibility.
 66 | - Always check the latest policies with retrievePolicy() BEFORE calling checkEligibilityAndPossiblyInitiateReturn()
 67 | - You should always double-check eligibility with 'checkEligibilityAndPossiblyInitiateReturn()' before initiating a return.
 68 | - If ANY new information surfaces in the conversation (for example, providing more information that was requested by checkEligibilityAndPossiblyInitiateReturn()), ask the user for that information. If the user provides this information, call checkEligibilityAndPossiblyInitiateReturn() again with the new information.
 69 | - Even if it looks like a strong case, be conservative and don't over-promise that we can complete the user's desired action without confirming first. The check might deny the user and that would be a bad user experience.
 70 | - If processed, let the user know the specific, relevant details and next steps
 71 | 
 72 | # General Info
 73 | - Today's date is 12/26/2024
 74 | `,
 75 |   tools: [
 76 |     tool({
 77 |       name: 'lookupOrders',
 78 |       description:
 79 |         "Retrieve detailed order information by using the user's phone number, including shipping status and item details. Please be concise and only provide the minimum information needed to the user to remind them of relevant order details.",
 80 |       parameters: {
 81 |         type: 'object',
 82 |         properties: {
 83 |           phoneNumber: {
 84 |             type: 'string',
 85 |             description: "The user's phone number tied to their order(s).",
 86 |           },
 87 |         },
 88 |         required: ['phoneNumber'],
 89 |         additionalProperties: false,
 90 |       },
 91 |       execute: async (input: any) => {
 92 |         const { phoneNumber } = input as { phoneNumber: string };
 93 |         return {
 94 |           orders: [
 95 |             {
 96 |               order_id: 'SNP-20230914-001',
 97 |               order_date: '2024-09-14T09:30:00Z',
 98 |               delivered_date: '2024-09-16T14:00:00Z',
 99 |               order_status: 'delivered',
100 |               subtotal_usd: 409.98,
101 |               total_usd: 471.48,
102 |               items: [
103 |                 {
104 |                   item_id: 'SNB-TT-X01',
105 |                   item_name: 'Twin Tip Snowboard X',
106 |                   retail_price_usd: 249.99,
107 |                 },
108 |                 {
109 |                   item_id: 'SNB-BOOT-ALM02',
110 |                   item_name: 'All-Mountain Snowboard Boots',
111 |                   retail_price_usd: 159.99,
112 |                 },
113 |               ],
114 |             },
115 |             {
116 |               order_id: 'SNP-20230820-002',
117 |               order_date: '2023-08-20T10:15:00Z',
118 |               delivered_date: null,
119 |               order_status: 'in_transit',
120 |               subtotal_usd: 339.97,
121 |               total_usd: 390.97,
122 |               items: [
123 |                 {
124 |                   item_id: 'SNB-PKbk-012',
125 |                   item_name: 'Park & Pipe Freestyle Board',
126 |                   retail_price_usd: 189.99,
127 |                 },
128 |                 {
129 |                   item_id: 'GOG-037',
130 |                   item_name: 'Mirrored Snow Goggles',
131 |                   retail_price_usd: 89.99,
132 |                 },
133 |                 {
134 |                   item_id: 'SNB-BIND-CPRO',
135 |                   item_name: 'Carving Pro Binding Set',
136 |                   retail_price_usd: 59.99,
137 |                 },
138 |               ],
139 |             },
140 |           ],
141 |         };
142 |       },
143 |     }),
144 |     tool({
145 |       name: 'retrievePolicy',
146 |       description:
147 |         "Retrieve and present the store’s policies, including eligibility for returns. Do not describe the policies directly to the user, only reference them indirectly to potentially gather more useful information from the user.",
148 |       parameters: {
149 |         type: 'object',
150 |         properties: {
151 |           region: {
152 |             type: 'string',
153 |             description: 'The region where the user is located.',
154 |           },
155 |           itemCategory: {
156 |             type: 'string',
157 |             description: 'The category of the item the user wants to return (e.g., shoes, accessories).',
158 |           },
159 |         },
160 |         required: ['region', 'itemCategory'],
161 |         additionalProperties: false,
162 |       },
163 |       execute: async (input: any) => {
164 |         return {
165 |           policy: `
166 | At Snowy Peak Boards, we believe in transparent and customer-friendly policies to ensure you have a hassle-free experience. Below are our detailed guidelines:
167 | 
168 | 1. GENERAL RETURN POLICY
169 | • Return Window: We offer a 30-day return window starting from the date your order was delivered. 
170 | • Eligibility: Items must be unused, in their original packaging, and have tags attached to qualify for refund or exchange. 
171 | • Non-Refundable Shipping: Unless the error originated from our end, shipping costs are typically non-refundable.
172 | 
173 | 2. CONDITION REQUIREMENTS
174 | • Product Integrity: Any returned product showing signs of use, wear, or damage may be subject to restocking fees or partial refunds. 
175 | • Promotional Items: If you received free or discounted promotional items, the value of those items might be deducted from your total refund if they are not returned in acceptable condition.
176 | • Ongoing Evaluation: We reserve the right to deny returns if a pattern of frequent or excessive returns is observed.
177 | 
178 | 3. DEFECTIVE ITEMS
179 | • Defective items are eligible for a full refund or exchange within 1 year of purchase, provided the defect is outside normal wear and tear and occurred under normal use. 
180 | • The defect must be described in sufficient detail by the customer, including how it was outside of normal use. Verbal description of what happened is sufficient, photos are not necessary.
181 | • The agent can use their discretion to determine whether it’s a true defect warranting reimbursement or normal use.
182 | ## Examples
183 | - "It's defective, there's a big crack": MORE INFORMATION NEEDED
184 | - "The snowboard has delaminated and the edge came off during normal use, after only about three runs. I can no longer use it and it's a safety hazard.": ACCEPT RETURN
185 | 
186 | 4. REFUND PROCESSING
187 | • Inspection Timeline: Once your items reach our warehouse, our Quality Control team conducts a thorough inspection which can take up to 5 business days. 
188 | • Refund Method: Approved refunds will generally be issued via the original payment method. In some cases, we may offer store credit or gift cards. 
189 | • Partial Refunds: If products are returned in a visibly used or incomplete condition, we may process only a partial refund.
190 | 
191 | 5. EXCHANGE POLICY
192 | • In-Stock Exchange: If you wish to exchange an item, we suggest confirming availability of the new item before initiating a return. 
193 | • Separate Transactions: In some cases, especially for limited-stock items, exchanges may be processed as a separate transaction followed by a standard return procedure.
194 | 
195 | 6. ADDITIONAL CLAUSES
196 | • Extended Window: Returns beyond the 30-day window may be eligible for store credit at our discretion, but only if items remain in largely original, resalable condition. 
197 | • Communication: For any clarifications, please reach out to our customer support team to ensure your questions are answered before shipping items back.
198 | 
199 | We hope these policies give you confidence in our commitment to quality and customer satisfaction. Thank you for choosing Snowy Peak Boards!
200 | `,
201 |         };
202 |       },
203 |     }),
204 |     tool({
205 |       name: 'checkEligibilityAndPossiblyInitiateReturn',
206 |       description: `Check the eligibility of a proposed action for a given order, providing approval or denial with reasons. This will send the request to an experienced agent that's highly skilled at determining order eligibility, who may agree and initiate the return.
207 | 
208 | # Details
209 | - Note that this agent has access to the full conversation history, so you only need to provide high-level details.
210 | - ALWAYS check retrievePolicy first to ensure we have relevant context.
211 | - Note that this can take up to 10 seconds, so please provide small updates to the user every few seconds, like 'I just need a little more time'
212 | - Feel free to share an initial assessment of potential eligibility with the user before calling this function.
213 | `,
214 |       parameters: {
215 |         type: 'object',
216 |         properties: {
217 |           userDesiredAction: {
218 |             type: 'string',
219 |             description: "The proposed action the user wishes to be taken.",
220 |           },
221 |           question: {
222 |             type: 'string',
223 |             description: "The question you'd like help with from the skilled escalation agent.",
224 |           },
225 |         },
226 |         required: ['userDesiredAction', 'question'],
227 |         additionalProperties: false,
228 |       },
229 |       execute: async (input: any, details) => {
230 |         const { userDesiredAction, question } = input as {
231 |           userDesiredAction: string;
232 |           question: string;
233 |         };
234 |         const nMostRecentLogs = 10;
235 |         const history: RealtimeItem[] = (details?.context as any)?.history ?? [];
236 |         const filteredLogs = history.filter((log) => log.type === 'message');
237 |         const messages = [
238 |           {
239 |             role: "system",
240 |             content:
241 |               "You are an an expert at assessing the potential eligibility of cases based on how well the case adheres to the provided guidelines. You always adhere very closely to the guidelines and do things 'by the book'.",
242 |           },
243 |           {
244 |             role: "user",
245 |             content: `Carefully consider the context provided, which includes the request and relevant policies and facts, and determine whether the user's desired action can be completed according to the policies. Provide a concise explanation or justification. Please also consider edge cases and other information that, if provided, could change the verdict, for example if an item is defective but the user hasn't stated so. Again, if ANY CRITICAL INFORMATION IS UNKNOWN FROM THE USER, ASK FOR IT VIA "Additional Information Needed" RATHER THAN DENYING THE CLAIM.
246 | 
247 | <modelContext>
248 | userDesiredAction: ${userDesiredAction}
249 | question: ${question}
250 | </modelContext>
251 | 
252 | <conversationContext>
253 | ${JSON.stringify(filteredLogs.slice(-nMostRecentLogs), null, 2)}
254 | </conversationContext>
255 | 
256 | <output_format>
257 | # Rationale
258 | // Short description explaining the decision
259 | 
260 | # User Request
261 | // The user's desired outcome or action
262 | 
263 | # Is Eligible
264 | true/false/need_more_information
265 | // "true" if you're confident that it's true given the provided context, and no additional info is needex
266 | // "need_more_information" if you need ANY additional information to make a clear determination.
267 | 
268 | # Additional Information Needed
269 | // Other information you'd need to make a clear determination. Can be "None"
270 | 
271 | # Return Next Steps
272 | // Explain to the user that the user will get a text message with next steps. Only if is_eligible=true, otherwise "None". Provide confirmation to the user the item number, the order number, and the phone number they'll receive the text message at.
273 | </output_format>  
274 | `,
275 |           },
276 |         ];
277 |         const model = "o4-mini";
278 |         console.log(`checking order eligibility with model=${model}`);
279 | 
280 |         const response = await fetch("/api/responses", {
281 |           method: "POST",
282 |           headers: {
283 |             "Content-Type": "application/json",
284 |           },
285 |           body: JSON.stringify({ model, input: messages }),
286 |         });
287 | 
288 |         if (!response.ok) {
289 |           console.warn("Server returned an error:", response);
290 |           return { error: "Something went wrong." };
291 |         }
292 | 
293 |         const { output = [] } = await response.json();
294 |         const text = output
295 |           .find((i: any) => i.type === 'message' && i.role === 'assistant')
296 |           ?.content?.find((c: any) => c.type === 'output_text')?.text ?? '';
297 | 
298 |         console.log(text || output);
299 |         return { result: text || output };
300 |       },
301 |     }),
302 |   ],
303 | 
304 |   handoffs: [],
305 | });
306 | 


--------------------------------------------------------------------------------
/src/app/agentConfigs/customerServiceRetail/sales.ts:
--------------------------------------------------------------------------------
 1 | import { RealtimeAgent, tool } from '@openai/agents/realtime';
 2 | 
 3 | export const salesAgent = new RealtimeAgent({
 4 |   name: 'salesAgent',
 5 |   voice: 'sage',
 6 |   handoffDescription:
 7 |     "Handles sales-related inquiries, including new product details, recommendations, promotions, and purchase flows. Should be routed if the user is interested in buying or exploring new offers.",
 8 | 
 9 |   instructions:
10 |     "You are a helpful sales assistant. Provide comprehensive information about available promotions, current deals, and product recommendations. Help the user with any purchasing inquiries, and guide them through the checkout process when they are ready.",
11 | 
12 | 
13 |   tools: [
14 |     tool({
15 |       name: 'lookupNewSales',
16 |       description:
17 |         "Checks for current promotions, discounts, or special deals. Respond with available offers relevant to the user’s query.",
18 |       parameters: {
19 |         type: 'object',
20 |         properties: {
21 |           category: {
22 |             type: 'string',
23 |             enum: ['snowboard', 'apparel', 'boots', 'accessories', 'any'],
24 |             description: 'The product category or general area the user is interested in (optional).',
25 |           },
26 |         },
27 |         required: ['category'],
28 |         additionalProperties: false,
29 |       },
30 |       execute: async (input: any) => {
31 |         const { category } = input as { category: string };
32 |         const items = [
33 |           { item_id: 101, type: 'snowboard', name: 'Alpine Blade', retail_price_usd: 450, sale_price_usd: 360, sale_discount_pct: 20 },
34 |           { item_id: 102, type: 'snowboard', name: 'Peak Bomber', retail_price_usd: 499, sale_price_usd: 374, sale_discount_pct: 25 },
35 |           { item_id: 201, type: 'apparel', name: 'Thermal Jacket', retail_price_usd: 120, sale_price_usd: 84, sale_discount_pct: 30 },
36 |           { item_id: 202, type: 'apparel', name: 'Insulated Pants', retail_price_usd: 150, sale_price_usd: 112, sale_discount_pct: 25 },
37 |           { item_id: 301, type: 'boots', name: 'Glacier Grip', retail_price_usd: 250, sale_price_usd: 200, sale_discount_pct: 20 },
38 |           { item_id: 302, type: 'boots', name: 'Summit Steps', retail_price_usd: 300, sale_price_usd: 210, sale_discount_pct: 30 },
39 |           { item_id: 401, type: 'accessories', name: 'Goggles', retail_price_usd: 80, sale_price_usd: 60, sale_discount_pct: 25 },
40 |           { item_id: 402, type: 'accessories', name: 'Warm Gloves', retail_price_usd: 60, sale_price_usd: 48, sale_discount_pct: 20 },
41 |         ];
42 |         const filteredItems =
43 |           category === 'any'
44 |             ? items
45 |             : items.filter((item) => item.type === category);
46 |         filteredItems.sort((a, b) => b.sale_discount_pct - a.sale_discount_pct);
47 |         return {
48 |           sales: filteredItems,
49 |         };
50 |       },
51 |     }),
52 | 
53 |     tool({
54 |       name: 'addToCart',
55 |       description: "Adds an item to the user's shopping cart.",
56 |       parameters: {
57 |         type: 'object',
58 |         properties: {
59 |           item_id: {
60 |             type: 'string',
61 |             description: 'The ID of the item to add to the cart.',
62 |           },
63 |         },
64 |         required: ['item_id'],
65 |         additionalProperties: false,
66 |       },
67 |       execute: async (input: any) => ({ success: true }),
68 |     }),
69 | 
70 |     tool({
71 |       name: 'checkout',
72 |       description:
73 |         "Initiates a checkout process with the user's selected items.",
74 |       parameters: {
75 |         type: 'object',
76 |         properties: {
77 |           item_ids: {
78 |             type: 'array',
79 |             description: 'An array of item IDs the user intends to purchase.',
80 |             items: {
81 |               type: 'string',
82 |             },
83 |           },
84 |           phone_number: {
85 |             type: 'string',
86 |             description: "User's phone number used for verification. Formatted like '(111) 222-3333'",
87 |             pattern: '^\\(\\d{3}\\) \\d{3}-\\d{4}$',
88 |           },
89 |         },
90 |         required: ['item_ids', 'phone_number'],
91 |         additionalProperties: false,
92 |       },
93 |       execute: async (input: any) => ({ checkoutUrl: 'https://example.com/checkout' }),
94 |     }),
95 |   ],
96 | 
97 |   handoffs: [],
98 | });
99 | 


--------------------------------------------------------------------------------
/src/app/agentConfigs/customerServiceRetail/simulatedHuman.ts:
--------------------------------------------------------------------------------
 1 | import { RealtimeAgent } from '@openai/agents/realtime';
 2 | 
 3 | export const simulatedHumanAgent = new RealtimeAgent({
 4 |   name: 'simulatedHuman',
 5 |   voice: 'sage',
 6 |   handoffDescription:
 7 |     'Placeholder, simulated human agent that can provide more advanced help to the user. Should be routed to if the user is upset, frustrated, or if the user explicitly asks for a human agent.',
 8 |   instructions:
 9 |     "You are a helpful human assistant, with a laid-back attitude and the ability to do anything to help your customer! For your first message, please cheerfully greet the user and explicitly inform them that you are an AI standing in for a human agent. You respond only in German. Your agent_role='human_agent'",
10 |   tools: [],
11 |   handoffs: [],
12 | });


--------------------------------------------------------------------------------
/src/app/agentConfigs/guardrails.ts:
--------------------------------------------------------------------------------
  1 | import { zodTextFormat } from 'openai/helpers/zod';
  2 | import { GuardrailOutputZod, GuardrailOutput } from '@/app/types';
  3 | 
  4 | // Validator that calls the /api/responses endpoint to
  5 | // validates the realtime output according to moderation policies. 
  6 | // This will prevent the realtime model from responding in undesired ways
  7 | // By sending it a corrective message and having it redirect the conversation.
  8 | export async function runGuardrailClassifier(
  9 |   message: string,
 10 |   companyName: string = 'newTelco',
 11 | ): Promise<GuardrailOutput> {
 12 |   const messages = [
 13 |     {
 14 |       role: 'user',
 15 |       content: `You are an expert at classifying text according to moderation policies. Consider the provided message, analyze potential classes from output_classes, and output the best classification. Output json, following the provided schema. Keep your analysis and reasoning short and to the point, maximum 2 sentences.
 16 | 
 17 |       <info>
 18 |       - Company name: ${companyName}
 19 |       </info>
 20 | 
 21 |       <message>
 22 |       ${message}
 23 |       </message>
 24 | 
 25 |       <output_classes>
 26 |       - OFFENSIVE: Content that includes hate speech, discriminatory language, insults, slurs, or harassment.
 27 |       - OFF_BRAND: Content that discusses competitors in a disparaging way.
 28 |       - VIOLENCE: Content that includes explicit threats, incitement of harm, or graphic descriptions of physical injury or violence.
 29 |       - NONE: If no other classes are appropriate and the message is fine.
 30 |       </output_classes>
 31 |       `,
 32 |     },
 33 |   ];
 34 | 
 35 |   const response = await fetch('/api/responses', {
 36 |     method: 'POST',
 37 |     headers: {
 38 |       'Content-Type': 'application/json',
 39 |     },
 40 |     body: JSON.stringify({
 41 |       model: 'gpt-4o-mini',
 42 |       input: messages,
 43 |       text: {
 44 |         format: zodTextFormat(GuardrailOutputZod, 'output_format'),
 45 |       },
 46 |     }),
 47 |   });
 48 | 
 49 |   if (!response.ok) {
 50 |     console.warn('Server returned an error:', response);
 51 |     return Promise.reject('Error with runGuardrailClassifier.');
 52 |   }
 53 | 
 54 |   const data = await response.json();
 55 | 
 56 |   try {
 57 |     const output = GuardrailOutputZod.parse(data.output_parsed);
 58 |     return {
 59 |       ...output,
 60 |       testText: message,
 61 |     };
 62 |   } catch (error) {
 63 |     console.error('Error parsing the message content as GuardrailOutput:', error);
 64 |     return Promise.reject('Failed to parse guardrail output.');
 65 |   }
 66 | }
 67 | 
 68 | export interface RealtimeOutputGuardrailResult {
 69 |   tripwireTriggered: boolean;
 70 |   outputInfo: any;
 71 | }
 72 | 
 73 | export interface RealtimeOutputGuardrailArgs {
 74 |   agentOutput: string;
 75 |   agent?: any;
 76 |   context?: any;
 77 | }
 78 | 
 79 | // Creates a guardrail bound to a specific company name for output moderation purposes. 
 80 | export function createModerationGuardrail(companyName: string) {
 81 |   return {
 82 |     name: 'moderation_guardrail',
 83 | 
 84 |     async execute({ agentOutput }: RealtimeOutputGuardrailArgs): Promise<RealtimeOutputGuardrailResult> {
 85 |       try {
 86 |         const res = await runGuardrailClassifier(agentOutput, companyName);
 87 |         const triggered = res.moderationCategory !== 'NONE';
 88 |         return {
 89 |           tripwireTriggered: triggered,
 90 |           outputInfo: res,
 91 |         };
 92 |       } catch {
 93 |         return {
 94 |           tripwireTriggered: false,
 95 |           outputInfo: { error: 'guardrail_failed' },
 96 |         };
 97 |       }
 98 |     },
 99 |   } as const;
100 | }


--------------------------------------------------------------------------------
/src/app/agentConfigs/index.ts:
--------------------------------------------------------------------------------
 1 | import { simpleHandoffScenario } from './simpleHandoff';
 2 | import { customerServiceRetailScenario } from './customerServiceRetail';
 3 | import { chatSupervisorScenario } from './chatSupervisor';
 4 | 
 5 | import type { RealtimeAgent } from '@openai/agents/realtime';
 6 | 
 7 | // Map of scenario key -> array of RealtimeAgent objects
 8 | export const allAgentSets: Record<string, RealtimeAgent[]> = {
 9 |   simpleHandoff: simpleHandoffScenario,
10 |   customerServiceRetail: customerServiceRetailScenario,
11 |   chatSupervisor: chatSupervisorScenario,
12 | };
13 | 
14 | export const defaultAgentSetKey = 'chatSupervisor';
15 | 


--------------------------------------------------------------------------------
/src/app/agentConfigs/simpleHandoff.ts:
--------------------------------------------------------------------------------
 1 | import {
 2 |   RealtimeAgent,
 3 | } from '@openai/agents/realtime';
 4 | 
 5 | export const haikuWriterAgent = new RealtimeAgent({
 6 |   name: 'haikuWriter',
 7 |   voice: 'sage',
 8 |   instructions:
 9 |     'Ask the user for a topic, then reply with a haiku about that topic.',
10 |   handoffs: [],
11 |   tools: [],
12 |   handoffDescription: 'Agent that writes haikus',
13 | });
14 | 
15 | export const greeterAgent = new RealtimeAgent({
16 |   name: 'greeter',
17 |   voice: 'sage',
18 |   instructions:
19 |     "Please greet the user and ask them if they'd like a Haiku. If yes, hand off to the 'haiku' agent.",
20 |   handoffs: [haikuWriterAgent],
21 |   tools: [],
22 |   handoffDescription: 'Agent that greets the user',
23 | });
24 | 
25 | export const simpleHandoffScenario = [greeterAgent, haikuWriterAgent];
26 | 


--------------------------------------------------------------------------------
/src/app/agentConfigs/types.ts:
--------------------------------------------------------------------------------
1 | // Central re-exports so agent files don’t need to reach deep into the SDK path
2 | 
3 | export { tool } from '@openai/agents/realtime';
4 | export type { RealtimeAgent, FunctionTool } from '@openai/agents/realtime';
5 | 
6 | 


--------------------------------------------------------------------------------
/src/app/agentConfigs/voiceAgentMetaprompt.txt:
--------------------------------------------------------------------------------
  1 | // paste this ENTIRE file directly in ChatGPT, adding your own context to the first two sections.
  2 | 
  3 | <user_input>
  4 | // Describe your agent's role and personality here, as well as key flow steps
  5 | </user_input>
  6 | 
  7 | <instructions>
  8 | - You are an expert at creating LLM prompts to define prompts to produce specific, high-quality voice agents
  9 | - Consider the information provided by the user in user_input, and create a prompt that follows the format and guidelines in output_format. Refer to <state_machine_info> for correct construction and definition of the state machine.
 10 | - Be creative and verbose when defining Personality and Tone qualities, and use multiple sentences if possible.
 11 | 
 12 | <step1>
 13 | - Optional, can skip if the user provides significant detail about their use case as input
 14 | - Ask clarifying questions about personality and tone. For any qualities in the "Personaliy and Tone" template that haven't been specified, prompt the user with a follow-up question that will help clarify and confirm the desired behavior with three high-level optoins, EXCEPT for example phrases, which should be inferred. ONLY ASK ABOUT UNSPECIFIED OR UNCLEAR QUALITIES.
 15 | 
 16 | <step_1_output_format>
 17 | First, I'll need to clarify a few aspects of the agent's personality. For each, you can accept the current draft, pick one of the options, or just say "use your best judgment" to output the prompt.
 18 | 
 19 | 1. [under-specified quality 1]:
 20 |     a) // option 1
 21 |     b) // option 2
 22 |     c) // option 3
 23 | ...
 24 | </step_1_output_format>
 25 | </step1>
 26 | 
 27 | <step2>
 28 | - Output the full prompt, which can be used verbatim by the user.
 29 | - DO NOT output ``` or ```json around the state_machine_schema, but output the entire prompt as plain text (wrapped in ```).
 30 | - DO NOT infer the sate_machine, only define the state machine based on explicit instruction of steps from the user.
 31 | </step2>
 32 | </instructions>
 33 | 
 34 | <output_format>
 35 | # Personality and Tone
 36 | ## Identity
 37 | // Who or what the AI represents (e.g., friendly teacher, formal advisor, helpful assistant). Be detailed and include specific details about their character or backstory.
 38 | 
 39 | ## Task
 40 | // At a high level, what is the agent expected to do? (e.g. "you are an expert at accurately handling user returns")
 41 | 
 42 | ## Demeanor
 43 | // Overall attitude or disposition (e.g., patient, upbeat, serious, empathetic)
 44 | 
 45 | ## Tone
 46 | // Voice style (e.g., warm and conversational, polite and authoritative)
 47 | 
 48 | ## Level of Enthusiasm
 49 | // Degree of energy in responses (e.g., highly enthusiastic vs. calm and measured)
 50 | 
 51 | ## Level of Formality
 52 | // Casual vs. professional language (e.g., “Hey, great to see you!” vs. “Good afternoon, how may I assist you?”)
 53 | 
 54 | ## Level of Emotion
 55 | // How emotionally expressive or neutral the AI should be (e.g., compassionate vs. matter-of-fact)
 56 | 
 57 | ## Filler Words
 58 | // Helps make the agent more approachable, e.g. “um,” “uh,” "hm," etc.. Options are generally "none", "occasionally", "often", "very often"
 59 | 
 60 | ## Pacing
 61 | // Rhythm and speed of delivery
 62 | 
 63 | ## Other details
 64 | // Any other information that helps guide the personality or tone of the agent.
 65 | 
 66 | # Instructions
 67 | - Follow the Conversation States closely to ensure a structured and consistent interation // Include if user_agent_steps are provided.
 68 | - If a user provides a name or phone number, or something else where you ened to know the exact spelling, always repeat it back to the user to confrm you have the right understanding before proceeding. // Always include this
 69 | - If the caller corrects any detail, acknowledge the correction in a straightforward manner and confirm the new spelling or value.
 70 | 
 71 | # Conversation States
 72 | // Conversation state machine goes here, if user_agent_steps are provided
 73 | ```
 74 | // state_machine, populated with the state_machine_schema
 75 | </output_format>
 76 | 
 77 | <state_machine_info>
 78 | <state_machine_schema>
 79 | {
 80 |   "id": "<string, unique step identifier, human readable, like '1_intro'>",
 81 |   "description": "<string, explanation of the step’s purpose>",
 82 |   "instructions": [
 83 |     // list of strings describing what the agent should do in this state
 84 |   ],
 85 |   "examples": [
 86 |     // list of short example scripts or utterances
 87 |   ],
 88 |   "transitions": [
 89 |     {
 90 |       "next_step": "<string, the ID of the next step>",
 91 |       "condition": "<string, under what condition the step transitions>"
 92 |     }
 93 |     // more transitions can be added if needed
 94 |   ]
 95 | }
 96 | </state_machine_schema>
 97 | <state_machine_example>
 98 | [
 99 |   {
100 |     "id": "1_greeting",
101 |     "description": "Greet the caller and explain the verification process.",
102 |     "instructions": [
103 |       "Greet the caller warmly.",
104 |       "Inform them about the need to collect personal information for their record."
105 |     ],
106 |     "examples": [
107 |       "Good morning, this is the front desk administrator. I will assist you in verifying your details.",
108 |       "Let us proceed with the verification. May I kindly have your first name? Please spell it out letter by letter for clarity."
109 |     ],
110 |     "transitions": [{
111 |       "next_step": "2_get_first_name",
112 |       "condition": "After greeting is complete."
113 |     }]
114 |   },
115 |   {
116 |     "id": "2_get_first_name",
117 |     "description": "Ask for and confirm the caller's first name.",
118 |     "instructions": [
119 |       "Request: 'Could you please provide your first name?'",
120 |       "Spell it out letter-by-letter back to the caller to confirm."
121 |     ],
122 |     "examples": [
123 |       "May I have your first name, please?",
124 |       "You spelled that as J-A-N-E, is that correct?"
125 |     ],
126 |     "transitions": [{
127 |       "next_step": "3_get_last_name",
128 |       "condition": "Once first name is confirmed."
129 |     }]
130 |   },
131 |   {
132 |     "id": "3_get_last_name",
133 |     "description": "Ask for and confirm the caller's last name.",
134 |     "instructions": [
135 |       "Request: 'Thank you. Could you please provide your last name?'",
136 |       "Spell it out letter-by-letter back to the caller to confirm."
137 |     ],
138 |     "examples": [
139 |       "And your last name, please?",
140 |       "Let me confirm: D-O-E, is that correct?"
141 |     ],
142 |     "transitions": [{
143 |       "next_step": "4_next_steps",
144 |       "condition": "Once last name is confirmed."
145 |     }]
146 |   },
147 |   {
148 |     "id": "4_next_steps",
149 |     "description": "Attempt to verify the caller's information and proceed with next steps.",
150 |     "instructions": [
151 |       "Inform the caller that you will now attempt to verify their information.",
152 |       "Call the 'authenticateUser' function with the provided details.",
153 |       "Once verification is complete, transfer the caller to the tourGuide agent for further assistance."
154 |     ],
155 |     "examples": [
156 |       "Thank you for providing your details. I will now verify your information.",
157 |       "Attempting to authenticate your information now.",
158 |       "I'll transfer you to our agent who can give you an overview of our facilities. Just to help demonstrate different agent personalities, she's instructed to act a little crabby."
159 |     ],
160 |     "transitions": [{
161 |       "next_step": "transferAgents",
162 |       "condition": "Once verification is complete, transfer to tourGuide agent."
163 |     }]
164 |   }
165 | ]
166 | </state_machine_example>
167 | </state_machine_info>
168 | 


--------------------------------------------------------------------------------
/src/app/api/responses/route.ts:
--------------------------------------------------------------------------------
 1 | import { NextRequest, NextResponse } from 'next/server';
 2 | import OpenAI from 'openai';
 3 | 
 4 | // Proxy endpoint for the OpenAI Responses API
 5 | export async function POST(req: NextRequest) {
 6 |   const body = await req.json();
 7 | 
 8 |   const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
 9 | 
10 |   if (body.text?.format?.type === 'json_schema') {
11 |     return await structuredResponse(openai, body);
12 |   } else {
13 |     return await textResponse(openai, body);
14 |   }
15 | }
16 | 
17 | async function structuredResponse(openai: OpenAI, body: any) {
18 |   try {
19 |     const response = await openai.responses.parse({
20 |       ...(body as any),
21 |       stream: false,
22 |     });
23 | 
24 |     return NextResponse.json(response);
25 |   } catch (err: any) {
26 |     console.error('responses proxy error', err);
27 |     return NextResponse.json({ error: 'failed' }, { status: 500 }); 
28 |   }
29 | }
30 | 
31 | async function textResponse(openai: OpenAI, body: any) {
32 |   try {
33 |     const response = await openai.responses.create({
34 |       ...(body as any),
35 |       stream: false,
36 |     });
37 | 
38 |     return NextResponse.json(response);
39 |   } catch (err: any) {
40 |     console.error('responses proxy error', err);
41 |     return NextResponse.json({ error: 'failed' }, { status: 500 });
42 |   }
43 | }
44 |   


--------------------------------------------------------------------------------
/src/app/api/session/route.ts:
--------------------------------------------------------------------------------
 1 | import { NextResponse } from "next/server";
 2 | 
 3 | export async function GET() {
 4 |   try {
 5 |     const response = await fetch(
 6 |       "https://api.openai.com/v1/realtime/sessions",
 7 |       {
 8 |         method: "POST",
 9 |         headers: {
10 |           Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
11 |           "Content-Type": "application/json",
12 |         },
13 |         body: JSON.stringify({
14 |           model: "gpt-4o-realtime-preview-2025-06-03",
15 |         }),
16 |       }
17 |     );
18 |     const data = await response.json();
19 |     return NextResponse.json(data);
20 |   } catch (error) {
21 |     console.error("Error in /session:", error);
22 |     return NextResponse.json(
23 |       { error: "Internal Server Error" },
24 |       { status: 500 }
25 |     );
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/app/components/BottomToolbar.tsx:
--------------------------------------------------------------------------------
  1 | import React from "react";
  2 | import { SessionStatus } from "@/app/types";
  3 | 
  4 | interface BottomToolbarProps {
  5 |   sessionStatus: SessionStatus;
  6 |   onToggleConnection: () => void;
  7 |   isPTTActive: boolean;
  8 |   setIsPTTActive: (val: boolean) => void;
  9 |   isPTTUserSpeaking: boolean;
 10 |   handleTalkButtonDown: () => void;
 11 |   handleTalkButtonUp: () => void;
 12 |   isEventsPaneExpanded: boolean;
 13 |   setIsEventsPaneExpanded: (val: boolean) => void;
 14 |   isAudioPlaybackEnabled: boolean;
 15 |   setIsAudioPlaybackEnabled: (val: boolean) => void;
 16 |   codec: string;
 17 |   onCodecChange: (newCodec: string) => void;
 18 | }
 19 | 
 20 | function BottomToolbar({
 21 |   sessionStatus,
 22 |   onToggleConnection,
 23 |   isPTTActive,
 24 |   setIsPTTActive,
 25 |   isPTTUserSpeaking,
 26 |   handleTalkButtonDown,
 27 |   handleTalkButtonUp,
 28 |   isEventsPaneExpanded,
 29 |   setIsEventsPaneExpanded,
 30 |   isAudioPlaybackEnabled,
 31 |   setIsAudioPlaybackEnabled,
 32 |   codec,
 33 |   onCodecChange,
 34 | }: BottomToolbarProps) {
 35 |   const isConnected = sessionStatus === "CONNECTED";
 36 |   const isConnecting = sessionStatus === "CONNECTING";
 37 | 
 38 |   const handleCodecChange = (e: React.ChangeEvent<HTMLSelectElement>) => {
 39 |     const newCodec = e.target.value;
 40 |     onCodecChange(newCodec);
 41 |   };
 42 | 
 43 |   function getConnectionButtonLabel() {
 44 |     if (isConnected) return "Disconnect";
 45 |     if (isConnecting) return "Connecting...";
 46 |     return "Connect";
 47 |   }
 48 | 
 49 |   function getConnectionButtonClasses() {
 50 |     const baseClasses = "text-white text-base p-2 w-36 rounded-md h-full";
 51 |     const cursorClass = isConnecting ? "cursor-not-allowed" : "cursor-pointer";
 52 | 
 53 |     if (isConnected) {
 54 |       // Connected -> label "Disconnect" -> red
 55 |       return `bg-red-600 hover:bg-red-700 ${cursorClass} ${baseClasses}`;
 56 |     }
 57 |     // Disconnected or connecting -> label is either "Connect" or "Connecting" -> black
 58 |     return `bg-black hover:bg-gray-900 ${cursorClass} ${baseClasses}`;
 59 |   }
 60 | 
 61 |   return (
 62 |     <div className="p-4 flex flex-row items-center justify-center gap-x-8">
 63 |       <button
 64 |         onClick={onToggleConnection}
 65 |         className={getConnectionButtonClasses()}
 66 |         disabled={isConnecting}
 67 |       >
 68 |         {getConnectionButtonLabel()}
 69 |       </button>
 70 | 
 71 |       <div className="flex flex-row items-center gap-2">
 72 |         <input
 73 |           id="push-to-talk"
 74 |           type="checkbox"
 75 |           checked={isPTTActive}
 76 |           onChange={(e) => setIsPTTActive(e.target.checked)}
 77 |           disabled={!isConnected}
 78 |           className="w-4 h-4"
 79 |         />
 80 |         <label
 81 |           htmlFor="push-to-talk"
 82 |           className="flex items-center cursor-pointer"
 83 |         >
 84 |           Push to talk
 85 |         </label>
 86 |         <button
 87 |           onMouseDown={handleTalkButtonDown}
 88 |           onMouseUp={handleTalkButtonUp}
 89 |           onTouchStart={handleTalkButtonDown}
 90 |           onTouchEnd={handleTalkButtonUp}
 91 |           disabled={!isPTTActive}
 92 |           className={
 93 |             (isPTTUserSpeaking ? "bg-gray-300" : "bg-gray-200") +
 94 |             " py-1 px-4 cursor-pointer rounded-md" +
 95 |             (!isPTTActive ? " bg-gray-100 text-gray-400" : "")
 96 |           }
 97 |         >
 98 |           Talk
 99 |         </button>
100 |       </div>
101 | 
102 |       <div className="flex flex-row items-center gap-1">
103 |         <input
104 |           id="audio-playback"
105 |           type="checkbox"
106 |           checked={isAudioPlaybackEnabled}
107 |           onChange={(e) => setIsAudioPlaybackEnabled(e.target.checked)}
108 |           disabled={!isConnected}
109 |           className="w-4 h-4"
110 |         />
111 |         <label
112 |           htmlFor="audio-playback"
113 |           className="flex items-center cursor-pointer"
114 |         >
115 |           Audio playback
116 |         </label>
117 |       </div>
118 | 
119 |       <div className="flex flex-row items-center gap-2">
120 |         <input
121 |           id="logs"
122 |           type="checkbox"
123 |           checked={isEventsPaneExpanded}
124 |           onChange={(e) => setIsEventsPaneExpanded(e.target.checked)}
125 |           className="w-4 h-4"
126 |         />
127 |         <label htmlFor="logs" className="flex items-center cursor-pointer">
128 |           Logs
129 |         </label>
130 |       </div>
131 | 
132 |       <div className="flex flex-row items-center gap-2">
133 |         <div>Codec:</div>
134 |         {/*
135 |           Codec selector – Lets you force the WebRTC track to use 8 kHz 
136 |           PCMU/PCMA so you can preview how the agent will sound 
137 |           (and how ASR/VAD will perform) when accessed via a 
138 |           phone network.  Selecting a codec reloads the page with ?codec=...
139 |           which our App-level logic picks up and applies via a WebRTC monkey
140 |           patch (see codecPatch.ts).
141 |         */}
142 |         <select
143 |           id="codec-select"
144 |           value={codec}
145 |           onChange={handleCodecChange}
146 |           className="border border-gray-300 rounded-md px-2 py-1 focus:outline-none cursor-pointer"
147 |         >
148 |           <option value="opus">Opus (48 kHz)</option>
149 |           <option value="pcmu">PCMU (8 kHz)</option>
150 |           <option value="pcma">PCMA (8 kHz)</option>
151 |         </select>
152 |       </div>
153 |     </div>
154 |   );
155 | }
156 | 
157 | export default BottomToolbar;
158 | 


--------------------------------------------------------------------------------
/src/app/components/Events.tsx:
--------------------------------------------------------------------------------
  1 | "use client";
  2 | 
  3 | import React, { useRef, useEffect, useState } from "react";
  4 | import { useEvent } from "@/app/contexts/EventContext";
  5 | import { LoggedEvent } from "@/app/types";
  6 | 
  7 | export interface EventsProps {
  8 |   isExpanded: boolean;
  9 | }
 10 | 
 11 | function Events({ isExpanded }: EventsProps) {
 12 |   const [prevEventLogs, setPrevEventLogs] = useState<LoggedEvent[]>([]);
 13 |   const eventLogsContainerRef = useRef<HTMLDivElement | null>(null);
 14 | 
 15 |   const { loggedEvents, toggleExpand } = useEvent();
 16 | 
 17 |   const getDirectionArrow = (direction: string) => {
 18 |     if (direction === "client") return { symbol: "▲", color: "#7f5af0" };
 19 |     if (direction === "server") return { symbol: "▼", color: "#2cb67d" };
 20 |     return { symbol: "•", color: "#555" };
 21 |   };
 22 | 
 23 |   useEffect(() => {
 24 |     const hasNewEvent = loggedEvents.length > prevEventLogs.length;
 25 | 
 26 |     if (isExpanded && hasNewEvent && eventLogsContainerRef.current) {
 27 |       eventLogsContainerRef.current.scrollTop =
 28 |         eventLogsContainerRef.current.scrollHeight;
 29 |     }
 30 | 
 31 |     setPrevEventLogs(loggedEvents);
 32 |   }, [loggedEvents, isExpanded]);
 33 | 
 34 |   return (
 35 |     <div
 36 |       className={
 37 |         (isExpanded ? "w-1/2 overflow-auto" : "w-0 overflow-hidden opacity-0") +
 38 |         " transition-all rounded-xl duration-200 ease-in-out flex-col bg-white"
 39 |       }
 40 |       ref={eventLogsContainerRef}
 41 |     >
 42 |       {isExpanded && (
 43 |         <div>
 44 |           <div className="flex items-center justify-between px-6 py-3.5 sticky top-0 z-10 text-base border-b bg-white rounded-t-xl">
 45 |             <span className="font-semibold">Logs</span>
 46 |           </div>
 47 |           <div>
 48 |             {loggedEvents.map((log, idx) => {
 49 |               const arrowInfo = getDirectionArrow(log.direction);
 50 |               const isError =
 51 |                 log.eventName.toLowerCase().includes("error") ||
 52 |                 log.eventData?.response?.status_details?.error != null;
 53 | 
 54 |               return (
 55 |                 <div
 56 |                   key={`${log.id}-${idx}`}
 57 |                   className="border-t border-gray-200 py-2 px-6 font-mono"
 58 |                 >
 59 |                   <div
 60 |                     onClick={() => toggleExpand(log.id)}
 61 |                     className="flex items-center justify-between cursor-pointer"
 62 |                   >
 63 |                     <div className="flex items-center flex-1">
 64 |                       <span
 65 |                         style={{ color: arrowInfo.color }}
 66 |                         className="ml-1 mr-2"
 67 |                       >
 68 |                       {arrowInfo.symbol}
 69 |                       </span>
 70 |                       <span
 71 |                         className={
 72 |                           "flex-1 text-sm " +
 73 |                           (isError ? "text-red-600" : "text-gray-800")
 74 |                         }
 75 |                       >
 76 |                         {log.eventName}
 77 |                       </span>
 78 |                     </div>
 79 |                     <div className="text-gray-500 ml-1 text-xs whitespace-nowrap">
 80 |                       {log.timestamp}
 81 |                     </div>
 82 |                   </div>
 83 | 
 84 |                   {log.expanded && log.eventData && (
 85 |                     <div className="text-gray-800 text-left">
 86 |                       <pre className="border-l-2 ml-1 border-gray-200 whitespace-pre-wrap break-words font-mono text-xs mb-2 mt-2 pl-2">
 87 |                         {JSON.stringify(log.eventData, null, 2)}
 88 |                       </pre>
 89 |                     </div>
 90 |                   )}
 91 |                 </div>
 92 |               );
 93 |             })}
 94 |           </div>
 95 |         </div>
 96 |       )}
 97 |     </div>
 98 |   );
 99 | }
100 | 
101 | export default Events;
102 | 


--------------------------------------------------------------------------------
/src/app/components/GuardrailChip.tsx:
--------------------------------------------------------------------------------
  1 | import React, { useState } from "react";
  2 | import {
  3 |   CheckCircledIcon,
  4 |   CrossCircledIcon,
  5 |   ClockIcon,
  6 | } from "@radix-ui/react-icons";
  7 | import { GuardrailResultType } from "../types";
  8 | 
  9 | export interface ModerationChipProps {
 10 |   moderationCategory: string;
 11 |   moderationRationale: string;
 12 | }
 13 | 
 14 | function formatCategory(category: string): string {
 15 |   return category
 16 |     .split("_")
 17 |     .map((word) => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
 18 |     .join(" ");
 19 | }
 20 | 
 21 | export function GuardrailChip({
 22 |   guardrailResult,
 23 | }: {
 24 |   guardrailResult: GuardrailResultType;
 25 | }) {
 26 |   const [expanded, setExpanded] = useState(false);
 27 | 
 28 |   // Consolidate state into a single variable: "PENDING", "PASS", or "FAIL"
 29 |   const state =
 30 |     guardrailResult.status === "IN_PROGRESS"
 31 |       ? "PENDING"
 32 |       : guardrailResult.category === "NONE"
 33 |       ? "PASS"
 34 |       : "FAIL";
 35 | 
 36 |   // Variables for icon, label, and styling classes based on state
 37 |   let IconComponent;
 38 |   let label: string;
 39 |   let textColorClass: string;
 40 |   switch (state) {
 41 |     case "PENDING":
 42 |       IconComponent = ClockIcon;
 43 |       label = "Pending";
 44 |       textColorClass = "text-gray-600";
 45 |       break;
 46 |     case "PASS":
 47 |       IconComponent = CheckCircledIcon;
 48 |       label = "Pass";
 49 |       textColorClass = "text-green-600";
 50 |       break;
 51 |     case "FAIL":
 52 |       IconComponent = CrossCircledIcon;
 53 |       label = "Fail";
 54 |       textColorClass = "text-red-500";
 55 |       break;
 56 |     default:
 57 |       IconComponent = ClockIcon;
 58 |       label = "Pending";
 59 |       textColorClass = "text-gray-600";
 60 |   }
 61 | 
 62 |   return (
 63 |     <div className="text-xs">
 64 |       <div
 65 |         onClick={() => {
 66 |           // Only allow toggling the expanded state for PASS/FAIL cases.
 67 |           if (state !== "PENDING") {
 68 |             setExpanded(!expanded);
 69 |           }
 70 |         }}
 71 |         // Only add pointer cursor if clickable (PASS or FAIL state)
 72 |         className={`inline-flex items-center gap-1 rounded ${
 73 |           state !== "PENDING" ? "cursor-pointer" : ""
 74 |         }`}
 75 |       >
 76 |         Guardrail:
 77 |         <div className={`flex items-center gap-1 ${textColorClass}`}>
 78 |           <IconComponent /> {label}
 79 |         </div>
 80 |       </div>
 81 |       {/* Container for expandable content */}
 82 |       {state !== "PENDING" && guardrailResult.category && guardrailResult.rationale && (
 83 |         <div
 84 |           className={`overflow-hidden transition-all duration-300 ${
 85 |             expanded ? "max-h-[1000px] opacity-100" : "max-h-0 opacity-0"
 86 |           }`}
 87 |         >
 88 |           <div className="pt-2 text-xs">
 89 |             <strong>
 90 |               Moderation Category: {formatCategory(guardrailResult.category)}
 91 |             </strong>
 92 |             <div>{guardrailResult.rationale}</div>
 93 |             {guardrailResult.testText && (
 94 |               <blockquote className="mt-1 border-l-2 border-gray-300 pl-2 text-gray-400">
 95 |                 {guardrailResult.testText}
 96 |               </blockquote>
 97 |             )}
 98 |           </div>
 99 |         </div>
100 |       )}
101 |     </div>
102 |   );
103 | } 


--------------------------------------------------------------------------------
/src/app/components/Transcript.tsx:
--------------------------------------------------------------------------------
  1 | "use-client";
  2 | 
  3 | import React, { useEffect, useRef, useState } from "react";
  4 | import ReactMarkdown from "react-markdown";
  5 | import { TranscriptItem } from "@/app/types";
  6 | import Image from "next/image";
  7 | import { useTranscript } from "@/app/contexts/TranscriptContext";
  8 | import { DownloadIcon, ClipboardCopyIcon } from "@radix-ui/react-icons";
  9 | import { GuardrailChip } from "./GuardrailChip";
 10 | 
 11 | export interface TranscriptProps {
 12 |   userText: string;
 13 |   setUserText: (val: string) => void;
 14 |   onSendMessage: () => void;
 15 |   canSend: boolean;
 16 |   downloadRecording: () => void;
 17 | }
 18 | 
 19 | function Transcript({
 20 |   userText,
 21 |   setUserText,
 22 |   onSendMessage,
 23 |   canSend,
 24 |   downloadRecording,
 25 | }: TranscriptProps) {
 26 |   const { transcriptItems, toggleTranscriptItemExpand } = useTranscript();
 27 |   const transcriptRef = useRef<HTMLDivElement | null>(null);
 28 |   const [prevLogs, setPrevLogs] = useState<TranscriptItem[]>([]);
 29 |   const [justCopied, setJustCopied] = useState(false);
 30 |   const inputRef = useRef<HTMLInputElement | null>(null);
 31 | 
 32 |   function scrollToBottom() {
 33 |     if (transcriptRef.current) {
 34 |       transcriptRef.current.scrollTop = transcriptRef.current.scrollHeight;
 35 |     }
 36 |   }
 37 | 
 38 |   useEffect(() => {
 39 |     const hasNewMessage = transcriptItems.length > prevLogs.length;
 40 |     const hasUpdatedMessage = transcriptItems.some((newItem, index) => {
 41 |       const oldItem = prevLogs[index];
 42 |       return (
 43 |         oldItem &&
 44 |         (newItem.title !== oldItem.title || newItem.data !== oldItem.data)
 45 |       );
 46 |     });
 47 | 
 48 |     if (hasNewMessage || hasUpdatedMessage) {
 49 |       scrollToBottom();
 50 |     }
 51 | 
 52 |     setPrevLogs(transcriptItems);
 53 |   }, [transcriptItems]);
 54 | 
 55 |   // Autofocus on text box input on load
 56 |   useEffect(() => {
 57 |     if (canSend && inputRef.current) {
 58 |       inputRef.current.focus();
 59 |     }
 60 |   }, [canSend]);
 61 | 
 62 |   const handleCopyTranscript = async () => {
 63 |     if (!transcriptRef.current) return;
 64 |     try {
 65 |       await navigator.clipboard.writeText(transcriptRef.current.innerText);
 66 |       setJustCopied(true);
 67 |       setTimeout(() => setJustCopied(false), 1500);
 68 |     } catch (error) {
 69 |       console.error("Failed to copy transcript:", error);
 70 |     }
 71 |   };
 72 | 
 73 |   return (
 74 |     <div className="flex flex-col flex-1 bg-white min-h-0 rounded-xl">
 75 |       <div className="flex flex-col flex-1 min-h-0">
 76 |         <div className="flex items-center justify-between px-6 py-3 sticky top-0 z-10 text-base border-b bg-white rounded-t-xl">
 77 |           <span className="font-semibold">Transcript</span>
 78 |           <div className="flex gap-x-2">
 79 |             <button
 80 |               onClick={handleCopyTranscript}
 81 |               className="w-24 text-sm px-3 py-1 rounded-md bg-gray-200 hover:bg-gray-300 flex items-center justify-center gap-x-1"
 82 |             >
 83 |               <ClipboardCopyIcon />
 84 |               {justCopied ? "Copied!" : "Copy"}
 85 |             </button>
 86 |             <button
 87 |               onClick={downloadRecording}
 88 |               className="w-40 text-sm px-3 py-1 rounded-md bg-gray-200 hover:bg-gray-300 flex items-center justify-center gap-x-1"
 89 |             >
 90 |               <DownloadIcon />
 91 |               <span>Download Audio</span>
 92 |             </button>
 93 |           </div>
 94 |         </div>
 95 | 
 96 |         {/* Transcript Content */}
 97 |         <div
 98 |           ref={transcriptRef}
 99 |           className="overflow-auto p-4 flex flex-col gap-y-4 h-full"
100 |         >
101 |           {[...transcriptItems]
102 |             .sort((a, b) => a.createdAtMs - b.createdAtMs)
103 |             .map((item) => {
104 |               const {
105 |                 itemId,
106 |                 type,
107 |                 role,
108 |                 data,
109 |                 expanded,
110 |                 timestamp,
111 |                 title = "",
112 |                 isHidden,
113 |                 guardrailResult,
114 |               } = item;
115 | 
116 |             if (isHidden) {
117 |               return null;
118 |             }
119 | 
120 |             if (type === "MESSAGE") {
121 |               const isUser = role === "user";
122 |               const containerClasses = `flex justify-end flex-col ${
123 |                 isUser ? "items-end" : "items-start"
124 |               }`;
125 |               const bubbleBase = `max-w-lg p-3 ${
126 |                 isUser ? "bg-gray-900 text-gray-100" : "bg-gray-100 text-black"
127 |               }`;
128 |               const isBracketedMessage =
129 |                 title.startsWith("[") && title.endsWith("]");
130 |               const messageStyle = isBracketedMessage
131 |                 ? 'italic text-gray-400'
132 |                 : '';
133 |               const displayTitle = isBracketedMessage
134 |                 ? title.slice(1, -1)
135 |                 : title;
136 | 
137 |               return (
138 |                 <div key={itemId} className={containerClasses}>
139 |                   <div className="max-w-lg">
140 |                     <div
141 |                       className={`${bubbleBase} rounded-t-xl ${
142 |                         guardrailResult ? "" : "rounded-b-xl"
143 |                       }`}
144 |                     >
145 |                       <div
146 |                         className={`text-xs ${
147 |                           isUser ? "text-gray-400" : "text-gray-500"
148 |                         } font-mono`}
149 |                       >
150 |                         {timestamp}
151 |                       </div>
152 |                       <div className={`whitespace-pre-wrap ${messageStyle}`}>
153 |                         <ReactMarkdown>{displayTitle}</ReactMarkdown>
154 |                       </div>
155 |                     </div>
156 |                     {guardrailResult && (
157 |                       <div className="bg-gray-200 px-3 py-2 rounded-b-xl">
158 |                         <GuardrailChip guardrailResult={guardrailResult} />
159 |                       </div>
160 |                     )}
161 |                   </div>
162 |                 </div>
163 |               );
164 |             } else if (type === "BREADCRUMB") {
165 |               return (
166 |                 <div
167 |                   key={itemId}
168 |                   className="flex flex-col justify-start items-start text-gray-500 text-sm"
169 |                 >
170 |                   <span className="text-xs font-mono">{timestamp}</span>
171 |                   <div
172 |                     className={`whitespace-pre-wrap flex items-center font-mono text-sm text-gray-800 ${
173 |                       data ? "cursor-pointer" : ""
174 |                     }`}
175 |                     onClick={() => data && toggleTranscriptItemExpand(itemId)}
176 |                   >
177 |                     {data && (
178 |                       <span
179 |                         className={`text-gray-400 mr-1 transform transition-transform duration-200 select-none font-mono ${
180 |                           expanded ? "rotate-90" : "rotate-0"
181 |                         }`}
182 |                       >
183 |                         ▶
184 |                       </span>
185 |                     )}
186 |                     {title}
187 |                   </div>
188 |                   {expanded && data && (
189 |                     <div className="text-gray-800 text-left">
190 |                       <pre className="border-l-2 ml-1 border-gray-200 whitespace-pre-wrap break-words font-mono text-xs mb-2 mt-2 pl-2">
191 |                         {JSON.stringify(data, null, 2)}
192 |                       </pre>
193 |                     </div>
194 |                   )}
195 |                 </div>
196 |               );
197 |             } else {
198 |               // Fallback if type is neither MESSAGE nor BREADCRUMB
199 |               return (
200 |                 <div
201 |                   key={itemId}
202 |                   className="flex justify-center text-gray-500 text-sm italic font-mono"
203 |                 >
204 |                   Unknown item type: {type}{" "}
205 |                   <span className="ml-2 text-xs">{timestamp}</span>
206 |                 </div>
207 |               );
208 |             }
209 |           })}
210 |         </div>
211 |       </div>
212 | 
213 |       <div className="p-4 flex items-center gap-x-2 flex-shrink-0 border-t border-gray-200">
214 |         <input
215 |           ref={inputRef}
216 |           type="text"
217 |           value={userText}
218 |           onChange={(e) => setUserText(e.target.value)}
219 |           onKeyDown={(e) => {
220 |             if (e.key === "Enter" && canSend) {
221 |               onSendMessage();
222 |             }
223 |           }}
224 |           className="flex-1 px-4 py-2 focus:outline-none"
225 |           placeholder="Type a message..."
226 |         />
227 |         <button
228 |           onClick={onSendMessage}
229 |           disabled={!canSend || !userText.trim()}
230 |           className="bg-gray-900 text-white rounded-full px-2 py-2 disabled:opacity-50"
231 |         >
232 |           <Image src="arrow.svg" alt="Send" width={24} height={24} />
233 |         </button>
234 |       </div>
235 |     </div>
236 |   );
237 | }
238 | 
239 | export default Transcript;
240 | 


--------------------------------------------------------------------------------
/src/app/contexts/EventContext.tsx:
--------------------------------------------------------------------------------
 1 | "use client";
 2 | 
 3 | import React, { createContext, useContext, useState, FC, PropsWithChildren } from "react";
 4 | import { v4 as uuidv4 } from "uuid";
 5 | import { LoggedEvent } from "@/app/types";
 6 | 
 7 | type EventContextValue = {
 8 |   loggedEvents: LoggedEvent[];
 9 |   logClientEvent: (eventObj: Record<string, any>, eventNameSuffix?: string) => void;
10 |   logServerEvent: (eventObj: Record<string, any>, eventNameSuffix?: string) => void;
11 |   logHistoryItem: (item: any) => void;
12 |   toggleExpand: (id: number | string) => void;
13 | };
14 | 
15 | const EventContext = createContext<EventContextValue | undefined>(undefined);
16 | 
17 | export const EventProvider: FC<PropsWithChildren> = ({ children }) => {
18 |   const [loggedEvents, setLoggedEvents] = useState<LoggedEvent[]>([]);
19 | 
20 |   function addLoggedEvent(direction: "client" | "server", eventName: string, eventData: Record<string, any>) {
21 |     const id = eventData.event_id || uuidv4();
22 |     setLoggedEvents((prev) => [
23 |       ...prev,
24 |       {
25 |         id,
26 |         direction,
27 |         eventName,
28 |         eventData,
29 |         timestamp: new Date().toLocaleTimeString(),
30 |         expanded: false,
31 |       },
32 |     ]);
33 |   }
34 | 
35 |   const logClientEvent: EventContextValue["logClientEvent"] = (eventObj, eventNameSuffix = "") => {
36 |     const name = `${eventObj.type || ""} ${eventNameSuffix || ""}`.trim();
37 |     addLoggedEvent("client", name, eventObj);
38 |   };
39 | 
40 |   const logServerEvent: EventContextValue["logServerEvent"] = (eventObj, eventNameSuffix = "") => {
41 |     const name = `${eventObj.type || ""} ${eventNameSuffix || ""}`.trim();
42 |     addLoggedEvent("server", name, eventObj);
43 |   };
44 | 
45 |   const logHistoryItem: EventContextValue['logHistoryItem'] = (item) => {
46 |     let eventName = item.type;
47 |     if (item.type === 'message') {
48 |       eventName = `${item.role}.${item.status}`;
49 |     }
50 |     if (item.type === 'function_call') {
51 |       eventName = `function.${item.name}.${item.status}`;
52 |     }
53 |     addLoggedEvent('server', eventName, item);
54 |   };
55 | 
56 |   const toggleExpand: EventContextValue['toggleExpand'] = (id) => {
57 |     setLoggedEvents((prev) =>
58 |       prev.map((log) => {
59 |         if (log.id === id) {
60 |           return { ...log, expanded: !log.expanded };
61 |         }
62 |         return log;
63 |       })
64 |     );
65 |   };
66 | 
67 | 
68 |   return (
69 |     <EventContext.Provider
70 |       value={{ loggedEvents, logClientEvent, logServerEvent, logHistoryItem, toggleExpand }}
71 |     >
72 |       {children}
73 |     </EventContext.Provider>
74 |   );
75 | };
76 | 
77 | export function useEvent() {
78 |   const context = useContext(EventContext);
79 |   if (!context) {
80 |     throw new Error("useEvent must be used within an EventProvider");
81 |   }
82 |   return context;
83 | }


--------------------------------------------------------------------------------
/src/app/contexts/TranscriptContext.tsx:
--------------------------------------------------------------------------------
  1 | "use client";
  2 | 
  3 | import React, {
  4 |   createContext,
  5 |   useContext,
  6 |   useState,
  7 |   FC,
  8 |   PropsWithChildren,
  9 | } from "react";
 10 | import { v4 as uuidv4 } from "uuid";
 11 | import { TranscriptItem } from "@/app/types";
 12 | 
 13 | type TranscriptContextValue = {
 14 |   transcriptItems: TranscriptItem[];
 15 |   addTranscriptMessage: (
 16 |     itemId: string,
 17 |     role: "user" | "assistant",
 18 |     text: string,
 19 |     isHidden?: boolean,
 20 |   ) => void;
 21 |   updateTranscriptMessage: (itemId: string, text: string, isDelta: boolean) => void;
 22 |   addTranscriptBreadcrumb: (title: string, data?: Record<string, any>) => void;
 23 |   toggleTranscriptItemExpand: (itemId: string) => void;
 24 |   updateTranscriptItem: (itemId: string, updatedProperties: Partial<TranscriptItem>) => void;
 25 | };
 26 | 
 27 | const TranscriptContext = createContext<TranscriptContextValue | undefined>(undefined);
 28 | 
 29 | export const TranscriptProvider: FC<PropsWithChildren> = ({ children }) => {
 30 |   const [transcriptItems, setTranscriptItems] = useState<TranscriptItem[]>([]);
 31 | 
 32 |   function newTimestampPretty(): string {
 33 |     const now = new Date();
 34 |     const time = now.toLocaleTimeString([], {
 35 |       hour12: false,
 36 |       hour: "2-digit",
 37 |       minute: "2-digit",
 38 |       second: "2-digit",
 39 |     });
 40 |     const ms = now.getMilliseconds().toString().padStart(3, "0");
 41 |     return `${time}.${ms}`;
 42 |   }
 43 | 
 44 |   const addTranscriptMessage: TranscriptContextValue["addTranscriptMessage"] = (itemId, role, text = "", isHidden = false) => {
 45 |     setTranscriptItems((prev) => {
 46 |       if (prev.some((log) => log.itemId === itemId && log.type === "MESSAGE")) {
 47 |         console.warn(`[addTranscriptMessage] skipping; message already exists for itemId=${itemId}, role=${role}, text=${text}`);
 48 |         return prev;
 49 |       }
 50 | 
 51 |       const newItem: TranscriptItem = {
 52 |         itemId,
 53 |         type: "MESSAGE",
 54 |         role,
 55 |         title: text,
 56 |         expanded: false,
 57 |         timestamp: newTimestampPretty(),
 58 |         createdAtMs: Date.now(),
 59 |         status: "IN_PROGRESS",
 60 |         isHidden,
 61 |       };
 62 | 
 63 |       return [...prev, newItem];
 64 |     });
 65 |   };
 66 | 
 67 |   const updateTranscriptMessage: TranscriptContextValue["updateTranscriptMessage"] = (itemId, newText, append = false) => {
 68 |     setTranscriptItems((prev) =>
 69 |       prev.map((item) => {
 70 |         if (item.itemId === itemId && item.type === "MESSAGE") {
 71 |           return {
 72 |             ...item,
 73 |             title: append ? (item.title ?? "") + newText : newText,
 74 |           };
 75 |         }
 76 |         return item;
 77 |       })
 78 |     );
 79 |   };
 80 | 
 81 |   const addTranscriptBreadcrumb: TranscriptContextValue["addTranscriptBreadcrumb"] = (title, data) => {
 82 |     setTranscriptItems((prev) => [
 83 |       ...prev,
 84 |       {
 85 |         itemId: `breadcrumb-${uuidv4()}`,
 86 |         type: "BREADCRUMB",
 87 |         title,
 88 |         data,
 89 |         expanded: false,
 90 |         timestamp: newTimestampPretty(),
 91 |         createdAtMs: Date.now(),
 92 |         status: "DONE",
 93 |         isHidden: false,
 94 |       },
 95 |     ]);
 96 |   };
 97 | 
 98 |   const toggleTranscriptItemExpand: TranscriptContextValue["toggleTranscriptItemExpand"] = (itemId) => {
 99 |     setTranscriptItems((prev) =>
100 |       prev.map((log) =>
101 |         log.itemId === itemId ? { ...log, expanded: !log.expanded } : log
102 |       )
103 |     );
104 |   };
105 | 
106 |   const updateTranscriptItem: TranscriptContextValue["updateTranscriptItem"] = (itemId, updatedProperties) => {
107 |     setTranscriptItems((prev) =>
108 |       prev.map((item) =>
109 |         item.itemId === itemId ? { ...item, ...updatedProperties } : item
110 |       )
111 |     );
112 |   };
113 | 
114 |   return (
115 |     <TranscriptContext.Provider
116 |       value={{
117 |         transcriptItems,
118 |         addTranscriptMessage,
119 |         updateTranscriptMessage,
120 |         addTranscriptBreadcrumb,
121 |         toggleTranscriptItemExpand,
122 |         updateTranscriptItem,
123 |       }}
124 |     >
125 |       {children}
126 |     </TranscriptContext.Provider>
127 |   );
128 | };
129 | 
130 | export function useTranscript() {
131 |   const context = useContext(TranscriptContext);
132 |   if (!context) {
133 |     throw new Error("useTranscript must be used within a TranscriptProvider");
134 |   }
135 |   return context;
136 | }


--------------------------------------------------------------------------------
/src/app/globals.css:
--------------------------------------------------------------------------------
 1 | @tailwind base;
 2 | @tailwind components;
 3 | @tailwind utilities;
 4 | 
 5 | :root {
 6 |   --background: #fafafa;
 7 |   --foreground: #171717;
 8 | }
 9 | 
10 | @media (prefers-color-scheme: dark) {
11 |   :root {
12 |     --background: #0a0a0a;
13 |     --foreground: #ededed;
14 |   }
15 | }
16 | 
17 | body {
18 |   color: var(--foreground);
19 |   background: var(--background);
20 |   font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
21 |     "Helvetica Neue", Arial, "Noto Sans", sans-serif;
22 | }
23 | 


--------------------------------------------------------------------------------
/src/app/hooks/useAudioDownload.ts:
--------------------------------------------------------------------------------
  1 | import { useRef } from "react";
  2 | import { convertWebMBlobToWav } from "../lib/audioUtils";
  3 | 
  4 | function useAudioDownload() {
  5 |   // Ref to store the MediaRecorder instance.
  6 |   const mediaRecorderRef = useRef<MediaRecorder | null>(null);
  7 |   // Ref to collect all recorded Blob chunks.
  8 |   const recordedChunksRef = useRef<Blob[]>([]);
  9 | 
 10 |   /**
 11 |    * Starts recording by combining the provided remote stream with
 12 |    * the microphone audio.
 13 |    * @param remoteStream - The remote MediaStream (e.g., from the audio element).
 14 |    */
 15 |   const startRecording = async (remoteStream: MediaStream) => {
 16 |     let micStream: MediaStream;
 17 |     try {
 18 |       micStream = await navigator.mediaDevices.getUserMedia({ audio: true });
 19 |     } catch (err) {
 20 |       console.error("Error getting microphone stream:", err);
 21 |       // Fallback to an empty MediaStream if microphone access fails.
 22 |       micStream = new MediaStream();
 23 |     }
 24 | 
 25 |     // Create an AudioContext to merge the streams.
 26 |     const audioContext = new AudioContext();
 27 |     const destination = audioContext.createMediaStreamDestination();
 28 | 
 29 |     // Connect the remote audio stream.
 30 |     try {
 31 |       const remoteSource = audioContext.createMediaStreamSource(remoteStream);
 32 |       remoteSource.connect(destination);
 33 |     } catch (err) {
 34 |       console.error("Error connecting remote stream to the audio context:", err);
 35 |     }
 36 | 
 37 |     // Connect the microphone audio stream.
 38 |     try {
 39 |       const micSource = audioContext.createMediaStreamSource(micStream);
 40 |       micSource.connect(destination);
 41 |     } catch (err) {
 42 |       console.error("Error connecting microphone stream to the audio context:", err);
 43 |     }
 44 | 
 45 |     const options = { mimeType: "audio/webm" };
 46 |     try {
 47 |       const mediaRecorder = new MediaRecorder(destination.stream, options);
 48 |       mediaRecorder.ondataavailable = (event: BlobEvent) => {
 49 |         if (event.data && event.data.size > 0) {
 50 |           recordedChunksRef.current.push(event.data);
 51 |         }
 52 |       };
 53 |       // Start recording without a timeslice.
 54 |       mediaRecorder.start();
 55 |       mediaRecorderRef.current = mediaRecorder;
 56 |     } catch (err) {
 57 |       console.error("Error starting MediaRecorder with combined stream:", err);
 58 |     }
 59 |   };
 60 | 
 61 |   /**
 62 |    * Stops the MediaRecorder, if active.
 63 |    */
 64 |   const stopRecording = () => {
 65 |     if (mediaRecorderRef.current) {
 66 |       // Request any final data before stopping.
 67 |       mediaRecorderRef.current.requestData();
 68 |       mediaRecorderRef.current.stop();
 69 |       mediaRecorderRef.current = null;
 70 |     }
 71 |   };
 72 | 
 73 |   /**
 74 |    * Initiates download of the recording after converting from WebM to WAV.
 75 |    * If the recorder is still active, we request its latest data before downloading.
 76 |    */
 77 |   const downloadRecording = async () => {
 78 |     // If recording is still active, request the latest chunk.
 79 |     if (mediaRecorderRef.current && mediaRecorderRef.current.state === "recording") {
 80 |       // Request the current data.
 81 |       mediaRecorderRef.current.requestData();
 82 |       // Allow a short delay for ondataavailable to fire.
 83 |       await new Promise((resolve) => setTimeout(resolve, 100));
 84 |     }
 85 | 
 86 |     if (recordedChunksRef.current.length === 0) {
 87 |       console.warn("No recorded chunks found to download.");
 88 |       return;
 89 |     }
 90 |     
 91 |     // Combine the recorded chunks into a single WebM blob.
 92 |     const webmBlob = new Blob(recordedChunksRef.current, { type: "audio/webm" });
 93 | 
 94 |     try {
 95 |       // Convert the WebM blob into a WAV blob.
 96 |       const wavBlob = await convertWebMBlobToWav(webmBlob);
 97 |       const url = URL.createObjectURL(wavBlob);
 98 | 
 99 |       // Generate a formatted datetime string (replace characters not allowed in filenames).
100 |       const now = new Date().toISOString().replace(/[:.]/g, "-");
101 | 
102 |       // Create an invisible anchor element and trigger the download.
103 |       const a = document.createElement("a");
104 |       a.style.display = "none";
105 |       a.href = url;
106 |       a.download = `realtime_agents_audio_${now}.wav`;
107 |       document.body.appendChild(a);
108 |       a.click();
109 |       document.body.removeChild(a);
110 | 
111 |       // Clean up the blob URL after a short delay.
112 |       setTimeout(() => URL.revokeObjectURL(url), 100);
113 |     } catch (err) {
114 |       console.error("Error converting recording to WAV:", err);
115 |     }
116 |   };
117 | 
118 |   return { startRecording, stopRecording, downloadRecording };
119 | }
120 | 
121 | export default useAudioDownload; 


--------------------------------------------------------------------------------
/src/app/hooks/useHandleSessionHistory.ts:
--------------------------------------------------------------------------------
  1 | "use client";
  2 | 
  3 | import { useRef } from "react";
  4 | import { useTranscript } from "@/app/contexts/TranscriptContext";
  5 | import { useEvent } from "@/app/contexts/EventContext";
  6 | 
  7 | export function useHandleSessionHistory() {
  8 |   const {
  9 |     transcriptItems,
 10 |     addTranscriptBreadcrumb,
 11 |     addTranscriptMessage,
 12 |     updateTranscriptMessage,
 13 |     updateTranscriptItem,
 14 |   } = useTranscript();
 15 | 
 16 |   const { logServerEvent } = useEvent();
 17 | 
 18 |   /* ----------------------- helpers ------------------------- */
 19 | 
 20 |   const extractMessageText = (content: any[] = []): string => {
 21 |     if (!Array.isArray(content)) return "";
 22 | 
 23 |     return content
 24 |       .map((c) => {
 25 |         if (!c || typeof c !== "object") return "";
 26 |         if (c.type === "input_text") return c.text ?? "";
 27 |         if (c.type === "audio") return c.transcript ?? "";
 28 |         return "";
 29 |       })
 30 |       .filter(Boolean)
 31 |       .join("\n");
 32 |   };
 33 | 
 34 |   const extractFunctionCallByName = (name: string, content: any[] = []): any => {
 35 |     if (!Array.isArray(content)) return undefined;
 36 |     return content.find((c: any) => c.type === 'function_call' && c.name === name);
 37 |   };
 38 | 
 39 |   const maybeParseJson = (val: any) => {
 40 |     if (typeof val === 'string') {
 41 |       try {
 42 |         return JSON.parse(val);
 43 |       } catch {
 44 |         console.warn('Failed to parse JSON:', val);
 45 |         return val;
 46 |       }
 47 |     }
 48 |     return val;
 49 |   };
 50 | 
 51 |   const extractLastAssistantMessage = (history: any[] = []): any => {
 52 |     if (!Array.isArray(history)) return undefined;
 53 |     return history.reverse().find((c: any) => c.type === 'message' && c.role === 'assistant');
 54 |   };
 55 | 
 56 |   const extractModeration = (obj: any) => {
 57 |     if ('moderationCategory' in obj) return obj;
 58 |     if ('outputInfo' in obj) return extractModeration(obj.outputInfo);
 59 |     if ('output' in obj) return extractModeration(obj.output);
 60 |     if ('result' in obj) return extractModeration(obj.result);
 61 |   };
 62 | 
 63 |   // Temporary helper until the guardrail_tripped event includes the itemId in the next version of the SDK
 64 |   const sketchilyDetectGuardrailMessage = (text: string) => {
 65 |     return text.match(/Failure Details: (\{.*?\})/)?.[1];
 66 |   };
 67 | 
 68 |   /* ----------------------- event handlers ------------------------- */
 69 | 
 70 |   function handleAgentToolStart(details: any, _agent: any, functionCall: any) {
 71 |     const lastFunctionCall = extractFunctionCallByName(functionCall.name, details?.context?.history);
 72 |     const function_name = lastFunctionCall?.name;
 73 |     const function_args = lastFunctionCall?.arguments;
 74 | 
 75 |     addTranscriptBreadcrumb(
 76 |       `function call: ${function_name}`,
 77 |       function_args
 78 |     );    
 79 |   }
 80 |   function handleAgentToolEnd(details: any, _agent: any, _functionCall: any, result: any) {
 81 |     const lastFunctionCall = extractFunctionCallByName(_functionCall.name, details?.context?.history);
 82 |     addTranscriptBreadcrumb(
 83 |       `function call result: ${lastFunctionCall?.name}`,
 84 |       maybeParseJson(result)
 85 |     );
 86 |   }
 87 | 
 88 |   function handleHistoryAdded(item: any) {
 89 |     console.log("[handleHistoryAdded] ", item);
 90 |     if (!item || item.type !== 'message') return;
 91 | 
 92 |     const { itemId, role, content = [] } = item;
 93 |     if (itemId && role) {
 94 |       const isUser = role === "user";
 95 |       let text = extractMessageText(content);
 96 | 
 97 |       if (isUser && !text) {
 98 |         text = "[Transcribing...]";
 99 |       }
100 | 
101 |       // If the guardrail has been tripped, this message is a message that gets sent to the 
102 |       // assistant to correct it, so we add it as a breadcrumb instead of a message.
103 |       const guardrailMessage = sketchilyDetectGuardrailMessage(text);
104 |       if (guardrailMessage) {
105 |         const failureDetails = JSON.parse(guardrailMessage);
106 |         addTranscriptBreadcrumb('Output Guardrail Active', { details: failureDetails });
107 |       } else {
108 |         addTranscriptMessage(itemId, role, text);
109 |       }
110 |     }
111 |   }
112 | 
113 |   function handleHistoryUpdated(items: any[]) {
114 |     console.log("[handleHistoryUpdated] ", items);
115 |     items.forEach((item: any) => {
116 |       if (!item || item.type !== 'message') return;
117 | 
118 |       const { itemId, content = [] } = item;
119 | 
120 |       const text = extractMessageText(content);
121 | 
122 |       if (text) {
123 |         updateTranscriptMessage(itemId, text, false);
124 |       }
125 |     });
126 |   }
127 | 
128 |   function handleTranscriptionDelta(item: any) {
129 |     const itemId = item.item_id;
130 |     const deltaText = item.delta || "";
131 |     if (itemId) {
132 |       updateTranscriptMessage(itemId, deltaText, true);
133 |     }
134 |   }
135 | 
136 |   function handleTranscriptionCompleted(item: any) {
137 |     // History updates don't reliably end in a completed item, 
138 |     // so we need to handle finishing up when the transcription is completed.
139 |     const itemId = item.item_id;
140 |     const finalTranscript =
141 |         !item.transcript || item.transcript === "\n"
142 |         ? "[inaudible]"
143 |         : item.transcript;
144 |     if (itemId) {
145 |       updateTranscriptMessage(itemId, finalTranscript, false);
146 |       // Use the ref to get the latest transcriptItems
147 |       const transcriptItem = transcriptItems.find((i) => i.itemId === itemId);
148 |       updateTranscriptItem(itemId, { status: 'DONE' });
149 | 
150 |       // If guardrailResult still pending, mark PASS.
151 |       if (transcriptItem?.guardrailResult?.status === 'IN_PROGRESS') {
152 |         updateTranscriptItem(itemId, {
153 |           guardrailResult: {
154 |             status: 'DONE',
155 |             category: 'NONE',
156 |             rationale: '',
157 |           },
158 |         });
159 |       }
160 |     }
161 |   }
162 | 
163 |   function handleGuardrailTripped(details: any, _agent: any, guardrail: any) {
164 |     console.log("[guardrail tripped]", details, _agent, guardrail);
165 |     const moderation = extractModeration(guardrail.result.output.outputInfo);
166 |     logServerEvent({ type: 'guardrail_tripped', payload: moderation });
167 | 
168 |     // find the last assistant message in details.context.history
169 |     const lastAssistant = extractLastAssistantMessage(details?.context?.history);
170 | 
171 |     if (lastAssistant && moderation) {
172 |       const category = moderation.moderationCategory ?? 'NONE';
173 |       const rationale = moderation.moderationRationale ?? '';
174 |       const offendingText: string | undefined = moderation?.testText;
175 | 
176 |       updateTranscriptItem(lastAssistant.itemId, {
177 |         guardrailResult: {
178 |           status: 'DONE',
179 |           category,
180 |           rationale,
181 |           testText: offendingText,
182 |         },
183 |       });
184 |     }
185 |   }
186 | 
187 |   const handlersRef = useRef({
188 |     handleAgentToolStart,
189 |     handleAgentToolEnd,
190 |     handleHistoryUpdated,
191 |     handleHistoryAdded,
192 |     handleTranscriptionDelta,
193 |     handleTranscriptionCompleted,
194 |     handleGuardrailTripped,
195 |   });
196 | 
197 |   return handlersRef;
198 | }


--------------------------------------------------------------------------------
/src/app/hooks/useRealtimeSession.ts:
--------------------------------------------------------------------------------
  1 | import { useCallback, useRef, useState, useEffect } from 'react';
  2 | import {
  3 |   RealtimeSession,
  4 |   RealtimeAgent,
  5 |   OpenAIRealtimeWebRTC,
  6 | } from '@openai/agents/realtime';
  7 | 
  8 | import { audioFormatForCodec, applyCodecPreferences } from '../lib/codecUtils';
  9 | import { useEvent } from '../contexts/EventContext';
 10 | import { useHandleSessionHistory } from './useHandleSessionHistory';
 11 | import { SessionStatus } from '../types';
 12 | 
 13 | export interface RealtimeSessionCallbacks {
 14 |   onConnectionChange?: (status: SessionStatus) => void;
 15 |   onAgentHandoff?: (agentName: string) => void;
 16 | }
 17 | 
 18 | export interface ConnectOptions {
 19 |   getEphemeralKey: () => Promise<string>;
 20 |   initialAgents: RealtimeAgent[];
 21 |   audioElement?: HTMLAudioElement;
 22 |   extraContext?: Record<string, any>;
 23 |   outputGuardrails?: any[];
 24 | }
 25 | 
 26 | export function useRealtimeSession(callbacks: RealtimeSessionCallbacks = {}) {
 27 |   const sessionRef = useRef<RealtimeSession | null>(null);
 28 |   const [status, setStatus] = useState<
 29 |     SessionStatus
 30 |   >('DISCONNECTED');
 31 |   const { logClientEvent } = useEvent();
 32 | 
 33 |   const updateStatus = useCallback(
 34 |     (s: SessionStatus) => {
 35 |       setStatus(s);
 36 |       callbacks.onConnectionChange?.(s);
 37 |       logClientEvent({}, s);
 38 |     },
 39 |     [callbacks],
 40 |   );
 41 | 
 42 |   const { logServerEvent } = useEvent();
 43 | 
 44 |   const historyHandlers = useHandleSessionHistory().current;
 45 | 
 46 |   function handleTransportEvent(event: any) {
 47 |     // Handle additional server events that aren't managed by the session
 48 |     switch (event.type) {
 49 |       case "conversation.item.input_audio_transcription.completed": {
 50 |         historyHandlers.handleTranscriptionCompleted(event);
 51 |         break;
 52 |       }
 53 |       case "response.audio_transcript.done": {
 54 |         historyHandlers.handleTranscriptionCompleted(event);
 55 |         break;
 56 |       }
 57 |       case "response.audio_transcript.delta": {
 58 |         historyHandlers.handleTranscriptionDelta(event);
 59 |         break;
 60 |       }
 61 |       default: {
 62 |         logServerEvent(event);
 63 |         break;
 64 |       } 
 65 |     }
 66 |   }
 67 | 
 68 |   const codecParamRef = useRef<string>(
 69 |     (typeof window !== 'undefined'
 70 |       ? (new URLSearchParams(window.location.search).get('codec') ?? 'opus')
 71 |       : 'opus')
 72 |       .toLowerCase(),
 73 |   );
 74 | 
 75 |   // Wrapper to pass current codec param
 76 |   const applyCodec = useCallback(
 77 |     (pc: RTCPeerConnection) => applyCodecPreferences(pc, codecParamRef.current),
 78 |     [],
 79 |   );
 80 | 
 81 |   const handleAgentHandoff = (item: any) => {
 82 |     const history = item.context.history;
 83 |     const lastMessage = history[history.length - 1];
 84 |     const agentName = lastMessage.name.split("transfer_to_")[1];
 85 |     callbacks.onAgentHandoff?.(agentName);
 86 |   };
 87 | 
 88 |   useEffect(() => {
 89 |     if (sessionRef.current) {
 90 |       // Log server errors
 91 |       sessionRef.current.on("error", (...args: any[]) => {
 92 |         logServerEvent({
 93 |           type: "error",
 94 |           message: args[0],
 95 |         });
 96 |       });
 97 | 
 98 |       // history events
 99 |       sessionRef.current.on("agent_handoff", handleAgentHandoff);
100 |       sessionRef.current.on("agent_tool_start", historyHandlers.handleAgentToolStart);
101 |       sessionRef.current.on("agent_tool_end", historyHandlers.handleAgentToolEnd);
102 |       sessionRef.current.on("history_updated", historyHandlers.handleHistoryUpdated);
103 |       sessionRef.current.on("history_added", historyHandlers.handleHistoryAdded);
104 |       sessionRef.current.on("guardrail_tripped", historyHandlers.handleGuardrailTripped);
105 | 
106 |       // additional transport events
107 |       sessionRef.current.on("transport_event", handleTransportEvent);
108 |     }
109 |   }, [sessionRef.current]);
110 | 
111 |   const connect = useCallback(
112 |     async ({
113 |       getEphemeralKey,
114 |       initialAgents,
115 |       audioElement,
116 |       extraContext,
117 |       outputGuardrails,
118 |     }: ConnectOptions) => {
119 |       if (sessionRef.current) return; // already connected
120 | 
121 |       updateStatus('CONNECTING');
122 | 
123 |       const ek = await getEphemeralKey();
124 |       const rootAgent = initialAgents[0];
125 | 
126 |       // This lets you use the codec selector in the UI to force narrow-band (8 kHz) codecs to
127 |       //  simulate how the voice agent sounds over a PSTN/SIP phone call.
128 |       const codecParam = codecParamRef.current;
129 |       const audioFormat = audioFormatForCodec(codecParam);
130 | 
131 |       sessionRef.current = new RealtimeSession(rootAgent, {
132 |         transport: new OpenAIRealtimeWebRTC({
133 |           audioElement,
134 |           // Set preferred codec before offer creation
135 |           changePeerConnection: async (pc: RTCPeerConnection) => {
136 |             applyCodec(pc);
137 |             return pc;
138 |           },
139 |         }),
140 |         model: 'gpt-4o-realtime-preview-2025-06-03',
141 |         config: {
142 |           inputAudioFormat: audioFormat,
143 |           outputAudioFormat: audioFormat,
144 |           inputAudioTranscription: {
145 |             model: 'gpt-4o-mini-transcribe',
146 |           },
147 |         },
148 |         outputGuardrails: outputGuardrails ?? [],
149 |         context: extraContext ?? {},
150 |       });
151 | 
152 |       await sessionRef.current.connect({ apiKey: ek });
153 |       updateStatus('CONNECTED');
154 |     },
155 |     [callbacks, updateStatus],
156 |   );
157 | 
158 |   const disconnect = useCallback(() => {
159 |     sessionRef.current?.close();
160 |     sessionRef.current = null;
161 |     updateStatus('DISCONNECTED');
162 |   }, [updateStatus]);
163 | 
164 |   const assertconnected = () => {
165 |     if (!sessionRef.current) throw new Error('RealtimeSession not connected');
166 |   };
167 | 
168 |   /* ----------------------- message helpers ------------------------- */
169 | 
170 |   const interrupt = useCallback(() => {
171 |     sessionRef.current?.interrupt();
172 |   }, []);
173 |   
174 |   const sendUserText = useCallback((text: string) => {
175 |     assertconnected();
176 |     sessionRef.current!.sendMessage(text);
177 |   }, []);
178 | 
179 |   const sendEvent = useCallback((ev: any) => {
180 |     sessionRef.current?.transport.sendEvent(ev);
181 |   }, []);
182 | 
183 |   const mute = useCallback((m: boolean) => {
184 |     sessionRef.current?.mute(m);
185 |   }, []);
186 | 
187 |   const pushToTalkStart = useCallback(() => {
188 |     if (!sessionRef.current) return;
189 |     sessionRef.current.transport.sendEvent({ type: 'input_audio_buffer.clear' } as any);
190 |   }, []);
191 | 
192 |   const pushToTalkStop = useCallback(() => {
193 |     if (!sessionRef.current) return;
194 |     sessionRef.current.transport.sendEvent({ type: 'input_audio_buffer.commit' } as any);
195 |     sessionRef.current.transport.sendEvent({ type: 'response.create' } as any);
196 |   }, []);
197 | 
198 |   return {
199 |     status,
200 |     connect,
201 |     disconnect,
202 |     sendUserText,
203 |     sendEvent,
204 |     mute,
205 |     pushToTalkStart,
206 |     pushToTalkStop,
207 |     interrupt,
208 |   } as const;
209 | }
210 | 


--------------------------------------------------------------------------------
/src/app/layout.tsx:
--------------------------------------------------------------------------------
 1 | import type { Metadata } from "next";
 2 | import "./globals.css";
 3 | import "./lib/envSetup";
 4 | 
 5 | export const metadata: Metadata = {
 6 |   title: "Realtime API Agents",
 7 |   description: "A demo app from OpenAI.",
 8 | };
 9 | 
10 | export default function RootLayout({
11 |   children,
12 | }: Readonly<{
13 |   children: React.ReactNode;
14 | }>) {
15 |   return (
16 |     <html lang="en">
17 |       <body className={`antialiased`}>{children}</body>
18 |     </html>
19 |   );
20 | }
21 | 


--------------------------------------------------------------------------------
/src/app/lib/audioUtils.ts:
--------------------------------------------------------------------------------
 1 | // WAV conversion utilities
 2 | 
 3 | /**
 4 |  * Writes a string into a DataView at the given offset.
 5 |  */
 6 | export function writeString(view: DataView, offset: number, str: string) {
 7 |   for (let i = 0; i < str.length; i++) {
 8 |     view.setUint8(offset + i, str.charCodeAt(i));
 9 |   }
10 | }
11 | 
12 | /**
13 |  * Converts a Float32Array to 16-bit PCM in a DataView.
14 |  */
15 | export function floatTo16BitPCM(output: DataView, offset: number, input: Float32Array) {
16 |   for (let i = 0; i < input.length; i++, offset += 2) {
17 |     const s = Math.max(-1, Math.min(1, input[i]));
18 |     output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
19 |   }
20 | }
21 | 
22 | /**
23 |  * Encodes a Float32Array as a WAV file.
24 |  */
25 | export function encodeWAV(samples: Float32Array, sampleRate: number): ArrayBuffer {
26 |   const buffer = new ArrayBuffer(44 + samples.length * 2);
27 |   const view = new DataView(buffer);
28 | 
29 |   // RIFF identifier
30 |   writeString(view, 0, "RIFF");
31 |   // file length minus RIFF identifier length and file description length
32 |   view.setUint32(4, 36 + samples.length * 2, true);
33 |   // RIFF type
34 |   writeString(view, 8, "WAVE");
35 |   // format chunk identifier
36 |   writeString(view, 12, "fmt ");
37 |   // format chunk length
38 |   view.setUint32(16, 16, true);
39 |   // sample format (raw)
40 |   view.setUint16(20, 1, true);
41 |   // channel count - forcing mono here by averaging channels
42 |   view.setUint16(22, 1, true);
43 |   // sample rate
44 |   view.setUint32(24, sampleRate, true);
45 |   // byte rate (sample rate * block align)
46 |   view.setUint32(28, sampleRate * 2, true);
47 |   // block align (channel count * bytes per sample)
48 |   view.setUint16(32, 2, true);
49 |   // bits per sample
50 |   view.setUint16(34, 16, true);
51 |   // data chunk identifier
52 |   writeString(view, 36, "data");
53 |   // data chunk length
54 |   view.setUint32(40, samples.length * 2, true);
55 | 
56 |   floatTo16BitPCM(view, 44, samples);
57 | 
58 |   return buffer;
59 | }
60 | 
61 | /**
62 |  * Converts a WebM audio blob to a WAV blob.
63 |  */
64 | export async function convertWebMBlobToWav(blob: Blob): Promise<Blob> {
65 |   const arrayBuffer = await blob.arrayBuffer();
66 |   const audioContext = new AudioContext();
67 |   const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
68 |   const numChannels = audioBuffer.numberOfChannels;
69 |   const length = audioBuffer.length;
70 |   const combined = new Float32Array(length);
71 | 
72 |   // Average channels to produce mono output
73 |   for (let channel = 0; channel < numChannels; channel++) {
74 |     const channelData = audioBuffer.getChannelData(channel);
75 |     for (let i = 0; i < length; i++) {
76 |       combined[i] += channelData[i];
77 |     }
78 |   }
79 |   for (let i = 0; i < length; i++) {
80 |     combined[i] /= numChannels;
81 |   }
82 |   const wavBuffer = encodeWAV(combined, audioBuffer.sampleRate);
83 |   return new Blob([wavBuffer], { type: "audio/wav" });
84 | } 


--------------------------------------------------------------------------------
/src/app/lib/codecUtils.ts:
--------------------------------------------------------------------------------
 1 | export function audioFormatForCodec(codec: string): 'pcm16' | 'g711_ulaw' | 'g711_alaw' {
 2 |   let audioFormat: 'pcm16' | 'g711_ulaw' | 'g711_alaw' = 'pcm16';
 3 |   if (typeof window !== 'undefined') {
 4 |     const c = codec.toLowerCase();
 5 |     if (c === 'pcmu') audioFormat = 'g711_ulaw';
 6 |     else if (c === 'pcma') audioFormat = 'g711_alaw';
 7 |   }
 8 |   return audioFormat;
 9 | }
10 | 
11 | // Apply preferred codec on a peer connection's audio transceivers. Safe to call multiple times.
12 | export function applyCodecPreferences(
13 |   pc: RTCPeerConnection,
14 |   codec: string,
15 | ): void {
16 |   try {
17 |     const caps = (RTCRtpSender as any).getCapabilities?.('audio');
18 |     if (!caps) return;
19 | 
20 |     const pref = caps.codecs.find(
21 |       (c: any) => c.mimeType.toLowerCase() === `audio/${codec.toLowerCase()}`,
22 |     );
23 |     if (!pref) return;
24 | 
25 |     pc
26 |       .getTransceivers()
27 |       .filter((t) => t.sender && t.sender.track?.kind === 'audio')
28 |       .forEach((t) => t.setCodecPreferences([pref]));
29 |   } catch (err) {
30 |     console.error('[codecUtils] applyCodecPreferences error', err);
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/app/lib/envSetup.ts:
--------------------------------------------------------------------------------
1 | import dotenv from 'dotenv';
2 | 
3 | dotenv.config({path: './env'})


--------------------------------------------------------------------------------
/src/app/page.tsx:
--------------------------------------------------------------------------------
 1 | import React, { Suspense } from "react";
 2 | import { TranscriptProvider } from "@/app/contexts/TranscriptContext";
 3 | import { EventProvider } from "@/app/contexts/EventContext";
 4 | import App from "./App";
 5 | 
 6 | export default function Page() {
 7 |   return (
 8 |     <Suspense fallback={<div>Loading...</div>}>
 9 |       <TranscriptProvider>
10 |         <EventProvider>
11 |           <App />
12 |         </EventProvider>
13 |       </TranscriptProvider>
14 |     </Suspense>
15 |   );
16 | }
17 | 


--------------------------------------------------------------------------------
/src/app/types.ts:
--------------------------------------------------------------------------------
  1 | import { z } from "zod";
  2 | 
  3 | // Define the allowed moderation categories only once
  4 | export const MODERATION_CATEGORIES = [
  5 |   "OFFENSIVE",
  6 |   "OFF_BRAND",
  7 |   "VIOLENCE",
  8 |   "NONE",
  9 | ] as const;
 10 | 
 11 | // Derive the union type for ModerationCategory from the array
 12 | export type ModerationCategory = (typeof MODERATION_CATEGORIES)[number];
 13 | 
 14 | // Create a Zod enum based on the same array
 15 | export const ModerationCategoryZod = z.enum([...MODERATION_CATEGORIES]);
 16 | 
 17 | export type SessionStatus = "DISCONNECTED" | "CONNECTING" | "CONNECTED";
 18 | 
 19 | export interface ToolParameterProperty {
 20 |   type: string;
 21 |   description?: string;
 22 |   enum?: string[];
 23 |   pattern?: string;
 24 |   properties?: Record<string, ToolParameterProperty>;
 25 |   required?: string[];
 26 |   additionalProperties?: boolean;
 27 |   items?: ToolParameterProperty;
 28 | }
 29 | 
 30 | export interface ToolParameters {
 31 |   type: string;
 32 |   properties: Record<string, ToolParameterProperty>;
 33 |   required?: string[];
 34 |   additionalProperties?: boolean;
 35 | }
 36 | 
 37 | export interface Tool {
 38 |   type: "function";
 39 |   name: string;
 40 |   description: string;
 41 |   parameters: ToolParameters;
 42 | }
 43 | 
 44 | export interface AgentConfig {
 45 |   name: string;
 46 |   publicDescription: string; // gives context to agent transfer tool
 47 |   instructions: string;
 48 |   tools: Tool[];
 49 |   toolLogic?: Record<
 50 |     string,
 51 |     (args: any, transcriptLogsFiltered: TranscriptItem[], addTranscriptBreadcrumb?: (title: string, data?: any) => void) => Promise<any> | any
 52 |   >;
 53 |   // addTranscriptBreadcrumb is a param in case we want to add additional breadcrumbs, e.g. for nested tool calls from a supervisor agent.
 54 |   downstreamAgents?:
 55 |     | AgentConfig[]
 56 |     | { name: string; publicDescription: string }[];
 57 | }
 58 | 
 59 | export type AllAgentConfigsType = Record<string, AgentConfig[]>;
 60 | 
 61 | export interface GuardrailResultType {
 62 |   status: "IN_PROGRESS" | "DONE";
 63 |   testText?: string; 
 64 |   category?: ModerationCategory;
 65 |   rationale?: string;
 66 | }
 67 | 
 68 | export interface TranscriptItem {
 69 |   itemId: string;
 70 |   type: "MESSAGE" | "BREADCRUMB";
 71 |   role?: "user" | "assistant";
 72 |   title?: string;
 73 |   data?: Record<string, any>;
 74 |   expanded: boolean;
 75 |   timestamp: string;
 76 |   createdAtMs: number;
 77 |   status: "IN_PROGRESS" | "DONE";
 78 |   isHidden: boolean;
 79 |   guardrailResult?: GuardrailResultType;
 80 | }
 81 | 
 82 | export interface Log {
 83 |   id: number;
 84 |   timestamp: string;
 85 |   direction: string;
 86 |   eventName: string;
 87 |   data: any;
 88 |   expanded: boolean;
 89 |   type: string;
 90 | }
 91 | 
 92 | export interface ServerEvent {
 93 |   type: string;
 94 |   event_id?: string;
 95 |   item_id?: string;
 96 |   transcript?: string;
 97 |   delta?: string;
 98 |   session?: {
 99 |     id?: string;
100 |   };
101 |   item?: {
102 |     id?: string;
103 |     object?: string;
104 |     type?: string;
105 |     status?: string;
106 |     name?: string;
107 |     arguments?: string;
108 |     role?: "user" | "assistant";
109 |     content?: {
110 |       type?: string;
111 |       transcript?: string | null;
112 |       text?: string;
113 |     }[];
114 |   };
115 |   response?: {
116 |     output?: {
117 |       id: string;
118 |       type?: string;
119 |       name?: string;
120 |       arguments?: any;
121 |       call_id?: string;
122 |       role: string;
123 |       content?: any;
124 |     }[];
125 |     metadata: Record<string, any>;
126 |     status_details?: {
127 |       error?: any;
128 |     };
129 |   };
130 | }
131 | 
132 | export interface LoggedEvent {
133 |   id: number;
134 |   direction: "client" | "server";
135 |   expanded: boolean;
136 |   timestamp: string;
137 |   eventName: string;
138 |   eventData: Record<string, any>; // can have arbitrary objects logged
139 | }
140 | 
141 | // Update the GuardrailOutputZod schema to use the shared ModerationCategoryZod
142 | export const GuardrailOutputZod = z.object({
143 |   moderationRationale: z.string(),
144 |   moderationCategory: ModerationCategoryZod,
145 |   testText: z.string().optional(),
146 | });
147 | 
148 | export type GuardrailOutput = z.infer<typeof GuardrailOutputZod>;
149 | 


--------------------------------------------------------------------------------
/tailwind.config.ts:
--------------------------------------------------------------------------------
 1 | import type { Config } from "tailwindcss";
 2 | 
 3 | export default {
 4 |   content: [
 5 |     "./src/pages/**/*.{js,ts,jsx,tsx,mdx}",
 6 |     "./src/components/**/*.{js,ts,jsx,tsx,mdx}",
 7 |     "./src/app/**/*.{js,ts,jsx,tsx,mdx}",
 8 |   ],
 9 |   theme: {
10 |     extend: {
11 |       colors: {
12 |         background: "var(--background)",
13 |         foreground: "var(--foreground)",
14 |       },
15 |     },
16 |   },
17 |   plugins: [],
18 | } satisfies Config;
19 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2017",
 4 |     "lib": [
 5 |       "dom",
 6 |       "dom.iterable",
 7 |       "esnext"
 8 |     ],
 9 |     "allowJs": true,
10 |     "skipLibCheck": true,
11 |     "strict": true,
12 |     "noEmit": true,
13 |     "esModuleInterop": true,
14 |     "module": "esnext",
15 |     "moduleResolution": "bundler",
16 |     "resolveJsonModule": true,
17 |     "isolatedModules": true,
18 |     "jsx": "preserve",
19 |     "incremental": true,
20 |     "plugins": [
21 |       {
22 |         "name": "next"
23 |       }
24 |     ],
25 |     "paths": {
26 |       "@/*": [
27 |         "./src/*"
28 |       ]
29 |     }
30 |   },
31 |   "include": [
32 |     "**/*.ts",
33 |     "**/*.tsx",
34 |     "next-env.d.ts",
35 |     ".next/types/**/*.ts"
36 |   ],
37 |   "exclude": [
38 |     "node_modules",
39 |     ".next"
40 |   ]
41 | }
42 | 


--------------------------------------------------------------------------------