├── .env.example
├── .gitignore
├── README.md
├── app
    ├── api
    │   ├── cua
    │   │   ├── agent
    │   │   │   ├── agent.ts
    │   │   │   ├── base_playwright.ts
    │   │   │   ├── browserbase.ts
    │   │   │   └── types.ts
    │   │   ├── start
    │   │   │   └── route.ts
    │   │   ├── step
    │   │   │   ├── execute
    │   │   │   │   └── route.ts
    │   │   │   └── generate
    │   │   │   │   └── route.ts
    │   │   └── types.ts
    │   └── session
    │   │   ├── [sessionId]
    │   │       └── pages
    │   │       │   └── route.ts
    │   │   └── route.ts
    ├── components
    │   ├── AnimatedButton.tsx
    │   ├── BrowserSessionContainer.tsx
    │   ├── BrowserTabs.tsx
    │   ├── ChatBlock.tsx
    │   ├── ChatFeed.tsx
    │   ├── PosthogProvider.tsx
    │   ├── SessionControls.tsx
    │   └── ui
    │   │   └── sliding-number.tsx
    ├── favicon.ico
    ├── globals.css
    ├── layout.tsx
    └── page.tsx
├── components.json
├── eslint.config.mjs
├── fonts
    ├── PPNeueMontreal-Medium.otf
    └── PPSupplySans-Regular.otf
├── lib
    └── utils.ts
├── next-env.d.ts
├── next.config.ts
├── package-lock.json
├── package.json
├── postcss.config.mjs
├── public
    ├── agent_loop.png
    ├── agent_mess.png
    ├── favicon.svg
    ├── file.svg
    ├── github.svg
    ├── globe.svg
    ├── grid.svg
    ├── next.svg
    ├── og.png
    ├── stagehand_clean.png
    ├── vercel.svg
    └── window.svg
├── tailwind.config.ts
├── tsconfig.json
└── vercel.json


/.env.example:
--------------------------------------------------------------------------------
1 | # OpenAI API Configuration
2 | OPENAI_API_KEY=your_openai_api_key_here
3 | 
4 | # Browserbase Configuration
5 | BROWSERBASE_API_KEY=your_browserbase_api_key_here
6 | BROWSERBASE_PROJECT_ID=your_browserbase_project_id_here
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled source #
 2 | ###################
 3 | *.com
 4 | *.class
 5 | *.dll
 6 | *.exe
 7 | *.o
 8 | *.so
 9 | 
10 | # Packages #
11 | ############
12 | # it's better to unpack these files and commit the raw source
13 | # git has its own built in compression methods
14 | *.7z
15 | *.dmg
16 | *.gz
17 | *.iso
18 | *.jar
19 | *.rar
20 | *.tar
21 | *.zip
22 | 
23 | # Logs and databases #
24 | ######################
25 | *.log
26 | *.sql
27 | *.sqlite
28 | 
29 | # OS generated files #
30 | ######################
31 | .DS_Store
32 | .DS_Store?
33 | ._*
34 | .Spotlight-V100
35 | .Trashes
36 | ehthumbs.db
37 | Thumbs.db
38 | 
39 | # IDE and Editor folders #
40 | ##########################
41 | .idea/
42 | .vscode/
43 | *.swp
44 | *.swo
45 | *~
46 | 
47 | # Node.js #
48 | ###########
49 | node_modules/
50 | npm-debug.log
51 | .next
52 | 
53 | # Python #
54 | ##########
55 | *.py[cod]
56 | __pycache__/
57 | *.so
58 | 
59 | # Java #
60 | ########
61 | *.class
62 | *.jar
63 | *.war
64 | *.ear
65 | 
66 | # Gradle #
67 | ##########
68 | .gradle
69 | /build/
70 | 
71 | # Maven #
72 | #########
73 | target/
74 | 
75 | # Miscellaneous #
76 | #################
77 | *.bak
78 | *.tmp
79 | *.temp
80 | .env
81 | .env.local
82 | 
83 | # pnpm
84 | pnpm-lock.yaml
85 | 
86 | test/
87 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # CUA Browser
  2 | 
  3 | This is a playground for you to test, explore, and get inspired by the power of Browserbase and Open AI's Computer Use Agent. This is free and always will be! It's not a product, just a demo playground
  4 | 
  5 | [![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2Fbrowserbase%2Fcua-browser&env=OPENAI_API_KEY,BROWSERBASE_API_KEY,BROWSERBASE_PROJECT_ID&envDescription=API%20keys%20needed%20to%20run%20CUA%20Browser&envLink=https%3A%2F%2Fgithub.com%2Fbrowserbase%2Fcua-browser%23environment-variables)
  6 | 
  7 | ## Getting Started
  8 | 
  9 | This project uses TypeScript and requires Node.js. We recommend using Node.js version 14.x or later.
 10 | 
 11 | First, install the dependencies for this repository:
 12 | 
 13 | ```bash
 14 | npm install
 15 | ```
 16 | 
 17 | Next, copy the example environment variables:
 18 | 
 19 | ```bash
 20 | cp .env.example .env.local
 21 | ```
 22 | 
 23 | You'll need to set up your API keys:
 24 | 
 25 | 1. Get your OpenAI API key from [OpenAI's dashboard](https://platform.openai.com/api-keys)
 26 | 2. Get your Browserbase API key and project ID from [Browserbase](https://www.browserbase.com)
 27 | 
 28 | 1. Clone this repository:
 29 |    ```bash
 30 |    git clone https://github.com/browserbase/cua-browser.git
 31 |    cd cua-browser
 32 |    ```
 33 | 
 34 | 2. Install dependencies:
 35 |    ```bash
 36 |    npm install
 37 |    ```
 38 | 
 39 | 3. Create a `.env.local` file with your API keys. You can get your API keys from [OpenAI](https://platform.openai.com/api-keys) and [Browserbase](https://www.browserbase.com)
 40 |    ```
 41 |    OPENAI_API_KEY=your_openai_api_key
 42 |    OPENAI_ORG=your_openai_org_id (optional)
 43 |    BROWSERBASE_API_KEY=your_browserbase_api_key
 44 |    BROWSERBASE_PROJECT_ID=your_browserbase_project_id
 45 |    ```
 46 | 
 47 | 4. Start the development server:
 48 |    ```bash
 49 |    npm run dev
 50 |    ```
 51 | 
 52 | Open [http://localhost:3000](http://localhost:3000) with your browser to see CUA Browser in action. You can interact with the CUA Browser by typing natural language commands in the input field and observing the browser's actions in response.
 53 | 
 54 | ## Usage
 55 | 
 56 | Here's a basic example of how to implement the Browserbase Compute Use Agent:
 57 | 
 58 | ```typescript
 59 | import { Agent } from './app/api/agent/agent';
 60 | import { BrowserbaseBrowser } from './app/api/agent/browserbase';
 61 | 
 62 | async function main() {
 63 |   // Initialize the browser
 64 |   const browser = new BrowserbaseBrowser(1024, 768);
 65 |   await browser.connect();
 66 | 
 67 |   // Initialize the agent
 68 |   const agent = new Agent(
 69 |     "computer-use-preview",
 70 |     browser,
 71 |     (message) => {
 72 |       console.log(`Safety check: ${message}`);
 73 |       return true; // Acknowledge all safety checks
 74 |     }
 75 |   );
 76 | 
 77 |   // Prepare the input for the agent
 78 |   const inputItems = [
 79 |     {
 80 |       role: "user",
 81 |       content: [
 82 |         {
 83 |           type: "text",
 84 |           text: "Go to google.com and search for 'Browserbase'"
 85 |         }
 86 |       ]
 87 |     }
 88 |   ];
 89 | 
 90 |   // Get the action from the agent
 91 |   const { output, responseId } = await agent.getAction(inputItems, undefined);
 92 | 
 93 |   // Take the action
 94 |   const results = await agent.takeAction(output);
 95 | 
 96 |   // Print the results
 97 |   console.log("Action results:", results);
 98 | 
 99 |   // Store the response ID for potential future use
100 |   agent.lastResponseId = responseId;
101 | 
102 |   // Disconnect the browser
103 |   await browser.disconnect();
104 | }
105 | 
106 | main().catch(console.error);
107 | ```
108 | 
109 | This example demonstrates how to:
110 | 
111 | 1. Initialize the BrowserbaseBrowser with specific dimensions.
112 | 2. Create an Agent instance with the appropriate model and browser.
113 | 3. Prepare input items for the agent.
114 | 4. Get an action from the agent using the `getAction` method.
115 | 5. Execute the action using the `takeAction` method.
116 | 6. Handle the results of the action.
117 | 7. Store the response ID for potential future interactions.
118 | 
119 | Note that this example uses the `getAction` and `takeAction` methods separately, which allows for more granular control over the agent's behavior. You can expand on this basic example to create more complex interactions with the browser based on your specific use case.
120 | 
121 | ## Files
122 | 
123 | - `agent.ts`: The main Agent class that handles interactions with the OpenAI API
124 | - `base_playwright.ts`: Base class for Playwright-based browser automation
125 | - `browserbase.ts`: Implementation of the Browserbase browser
126 | - `utils.ts`: Utility functions for API calls and image handling
127 | 


--------------------------------------------------------------------------------
/app/api/cua/agent/agent.ts:
--------------------------------------------------------------------------------
  1 | import { BrowserbaseBrowser } from "./browserbase";
  2 | import OpenAI from "openai";
  3 | import {
  4 |   InputItem,
  5 |   Item,
  6 |   Message,
  7 |   FunctionToolCall,
  8 |   ComputerToolCall,
  9 |   ComputerCallOutput,
 10 |   FunctionOutput,
 11 |   Tool,
 12 |   RequestOptions,
 13 | } from "./types";
 14 | import { AxiosError } from "axios";
 15 | import axios from "axios";
 16 | import axiosRetry from 'axios-retry';
 17 | 
 18 | type AcknowledgeSafetyCheckCallback = (message: string) => boolean;
 19 | 
 20 | export class Agent {
 21 |   private client: OpenAI;
 22 |   private model: string;
 23 |   private computer: BrowserbaseBrowser;
 24 |   private tools: Tool[];
 25 |   private printSteps: boolean = true;
 26 |   private acknowledgeSafetyCheckCallback: AcknowledgeSafetyCheckCallback;
 27 |   public lastResponseId: string | undefined = undefined;
 28 | 
 29 |   constructor(
 30 |     model: string = "computer-use-preview",
 31 |     computer: BrowserbaseBrowser,
 32 |     acknowledgeSafetyCheckCallback: AcknowledgeSafetyCheckCallback = () => true
 33 |   ) {
 34 |     this.client = new OpenAI();
 35 |     this.model = model;
 36 |     this.computer = computer;
 37 |     this.acknowledgeSafetyCheckCallback = acknowledgeSafetyCheckCallback;
 38 | 
 39 |     this.tools = [
 40 |       {
 41 |         type: "computer-preview",
 42 |         display_width: computer.dimensions[0],
 43 |         display_height: computer.dimensions[1],
 44 |         environment: computer.environment,
 45 |       },
 46 |       {
 47 |         type: "function",
 48 |         name: "back",
 49 |         description: "Go back to the previous page.",
 50 |         parameters: {},
 51 |         strict: false,
 52 |       },
 53 |       {
 54 |         type: "function",
 55 |         name: "goto",
 56 |         description: "Go to a specific URL.",
 57 |         parameters: {
 58 |           type: "object",
 59 |           properties: {
 60 |             url: {
 61 |               type: "string",
 62 |               description: "Fully qualified URL to navigate to.",
 63 |             },
 64 |           },
 65 |           additionalProperties: false,
 66 |           required: ["url"],
 67 |         },
 68 |         strict: false,
 69 |       },
 70 |     ];
 71 |     /* Some additional tools, disabled as they seem to slow down model performance
 72 |       {
 73 |         type: "function",
 74 |         name: "refresh",
 75 |         description: "Refresh the current page.",
 76 |         parameters: {},
 77 |         strict: false,
 78 |       },
 79 |       {
 80 |         type: "function",
 81 |         name: "listTabs",
 82 |         description: "Get the list of tabs, including the current tab.",
 83 |         parameters: {},
 84 |         strict: false,
 85 |       },
 86 |       {
 87 |         type: "function",
 88 |         name: "changeTab",
 89 |         description: "Change to a specific tab.",
 90 |         parameters: {
 91 |           type: "object",
 92 |           properties: {
 93 |             tab: {
 94 |               type: "string",
 95 |               description: "The URL of the tab to change to.",
 96 |             },
 97 |           },
 98 |           additionalProperties: false,
 99 |           required: ["tab"],
100 |         },
101 |         strict: false,
102 |       },
103 |       */
104 |   }
105 | 
106 |   private async createResponse(options: RequestOptions): Promise<Response> {
107 |     const url = "https://api.openai.com/v1/responses";
108 |     const headers: Record<string, string> = {
109 |       Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
110 |       'Content-Type': 'application/json',
111 |       'Openai-beta': 'responses=v1',
112 |     };
113 |   
114 |     const openaiOrg = process.env.OPENAI_ORG;
115 |     if (openaiOrg) {
116 |       headers['Openai-Organization'] = openaiOrg;
117 |     }
118 | 
119 |     // Configure retry behavior
120 |     axiosRetry(axios, { 
121 |       retries: 3,
122 |       retryDelay: axiosRetry.exponentialDelay,
123 |       retryCondition: (error: AxiosError): boolean => {
124 |         return axiosRetry.isNetworkOrIdempotentRequestError(error) || 
125 |                (error.response?.status ? error.response.status >= 500 : false);
126 |       }
127 |     });
128 |   
129 |     try {
130 |       const response = await axios.post(url, options, { headers });
131 |       return response.data;
132 |     } catch (error) {
133 |       const axiosError = error as AxiosError;
134 | 
135 |       console.error(`Error: ${axiosError.response?.status} ${axiosError.response?.data || axiosError.message}`);
136 |       console.error(`${JSON.stringify(axiosError.response?.data)}`);
137 |       throw error;
138 |     }
139 |   } 
140 | 
141 |   async getAction(
142 |     inputItems: InputItem[],
143 |     previousResponseId: string | undefined
144 |   ): Promise<{
145 |     output: Item[];
146 |     responseId: string;
147 |   }> {
148 |     const response = await this.createResponse({
149 |       model: this.model,
150 |       input: inputItems,
151 |       tools: this.tools,
152 |       truncation: "auto",
153 |       ...(previousResponseId
154 |         ? { previous_response_id: previousResponseId }
155 |         : {}),
156 |     });
157 | 
158 |     console.log("response", response);
159 | 
160 |     return {
161 |       output: response.output as Item[],
162 |       responseId: response.id as string,
163 |     };
164 |   }
165 | 
166 |   async takeAction(
167 |     output: Item[]
168 |   ): Promise<(Message | ComputerCallOutput | FunctionOutput)[]> {
169 |     const actions: Promise<Message | ComputerCallOutput | FunctionOutput>[] =
170 |       [];
171 |     for (const item of output) {
172 |       if (item.type === "message") {
173 |         // Do nothing
174 |       }
175 |       if (item.type === "computer_call") {
176 |         actions.push(this.takeComputerAction(item as ComputerToolCall));
177 |       }
178 |       if (item.type === "function_call") {
179 |         actions.push(this.takeFunctionAction(item as FunctionToolCall));
180 |       }
181 |     }
182 | 
183 |     const results = await Promise.all(actions);
184 |     return results;
185 |   }
186 | 
187 |   async takeMessageAction(messageItem: Message): Promise<Message> {
188 |     if (this.printSteps && messageItem.content?.[0]) {
189 |       console.log(messageItem.content[0]);
190 |     }
191 |     return messageItem;
192 |   }
193 | 
194 |   async takeComputerAction(
195 |     computerItem: ComputerToolCall
196 |   ): Promise<ComputerCallOutput> {
197 |     const action = computerItem.action;
198 |     const actionType = action.type;
199 |     const actionArgs = Object.fromEntries(
200 |       Object.entries(action).filter(([key]) => key !== "type")
201 |     );
202 | 
203 |     if (this.printSteps) {
204 |       console.log(`${actionType}(${JSON.stringify(actionArgs)})`);
205 |     }
206 | 
207 |     if (!this.computer) {
208 |       throw new Error("Computer not initialized");
209 |     }
210 | 
211 |     const method = (this.computer as unknown as Record<string, unknown>)[
212 |       actionType
213 |     ] as (...args: unknown[]) => unknown;
214 |     await method.apply(this.computer, Object.values(actionArgs));
215 | 
216 |     const screenshot = await this.computer.screenshot();
217 | 
218 |     // Handle safety checks
219 |     const pendingChecks = computerItem.pending_safety_checks || [];
220 |     for (const check of pendingChecks) {
221 |       const message = check.message;
222 |       if (!this.acknowledgeSafetyCheckCallback(message)) {
223 |         throw new Error(
224 |           `Safety check failed: ${message}. Cannot continue with unacknowledged safety checks.`
225 |         );
226 |       }
227 |     }
228 | 
229 |     return {
230 |       type: "computer_call_output",
231 |       call_id: computerItem.call_id,
232 |       acknowledged_safety_checks: pendingChecks,
233 |       output: {
234 |         type: "input_image",
235 |         image_url: `data:image/png;base64,${screenshot}`,
236 |       },
237 |     };
238 |   }
239 | 
240 |   async takeFunctionAction(
241 |     functionItem: FunctionToolCall
242 |   ): Promise<FunctionOutput> {
243 |     const name = functionItem.name;
244 |     const args = JSON.parse(functionItem.arguments);
245 |     if (this.printSteps) {
246 |       console.log(`${name}(${JSON.stringify(args)})`);
247 |     }
248 | 
249 |     if (
250 |       this.computer &&
251 |       typeof (this.computer as unknown as Record<string, unknown>)[name] ===
252 |         "function"
253 |     ) {
254 |       const method = (this.computer as unknown as Record<string, unknown>)[
255 |         name
256 |       ] as (...args: unknown[]) => unknown;
257 |       await method.apply(this.computer, Object.values(args));
258 |     }
259 | 
260 |     return {
261 |       type: "function_call_output",
262 |       call_id: functionItem.call_id,
263 |       output: "success", // hard-coded output for demo
264 |     };
265 |   }
266 | }
267 | 


--------------------------------------------------------------------------------
/app/api/cua/agent/base_playwright.ts:
--------------------------------------------------------------------------------
  1 | import { Browser, Page } from 'playwright';
  2 | 
  3 | // Optional: key mapping if your model uses "CUA" style keys
  4 | const CUA_KEY_TO_PLAYWRIGHT_KEY: Record<string, string> = {
  5 |   "/": "Divide",
  6 |   "\\": "Backslash",
  7 |   "alt": "Alt",
  8 |   "arrowdown": "ArrowDown",
  9 |   "arrowleft": "ArrowLeft",
 10 |   "arrowright": "ArrowRight",
 11 |   "arrowup": "ArrowUp",
 12 |   "up": "ArrowUp",
 13 |   "down": "ArrowDown",
 14 |   "left": "ArrowLeft",
 15 |   "right": "ArrowRight",
 16 |   "backspace": "Backspace",
 17 |   "capslock": "CapsLock",
 18 |   "cmd": "Meta",
 19 |   "command": "Meta",
 20 |   "ctrl": "Control",
 21 |   "control": "Control",
 22 |   "delete": "Delete",
 23 |   "end": "End",
 24 |   "enter": "Enter",
 25 |   "esc": "Escape",
 26 |   "home": "Home",
 27 |   "insert": "Insert",
 28 |   "option": "Alt",
 29 |   "pagedown": "PageDown",
 30 |   "pageup": "PageUp",
 31 |   "shift": "Shift",
 32 |   "space": " ",
 33 |   "super": "Meta",
 34 |   "tab": "Tab",
 35 |   "win": "Meta",
 36 | };
 37 | 
 38 | const HOTKEYS: Record<string, string> = {
 39 |   "alt": "Alt",
 40 |   "ctrl": "Control",
 41 |   "control": "Control",
 42 |   "shift": "Shift",
 43 |   "meta": "Meta",
 44 |   "command": "Meta",
 45 |   "win": "Meta",
 46 | }
 47 | 
 48 | export type Environment = "browser";
 49 | 
 50 | /**
 51 |  * Abstract base for Playwright-based computers:
 52 |  * 
 53 |  * - Subclasses override `_getBrowserAndPage()` to do local or remote connection,
 54 |  *   returning [Browser, Page].
 55 |  * - This base class handles context creation (`connect`/`disconnect`),
 56 |  *   plus standard "Computer" actions like click, scroll, etc.
 57 |  * - We also have extra browser actions: `goto(url)` and `back()`.
 58 |  */
 59 | export abstract class BasePlaywrightComputer {
 60 |   environment: Environment = "browser";
 61 |   dimensions: [number, number] = [1024, 768];
 62 |   
 63 |   protected _browser: Browser | null = null;
 64 |   protected _page: Page | null = null;
 65 |   
 66 |   constructor() {
 67 |     this._browser = null;
 68 |     this._page = null;
 69 |   }
 70 |   
 71 |   async connect(): Promise<this> {
 72 |     // Start Playwright and call the subclass hook for getting browser/page
 73 |     const [browser, page] = await this._getBrowserAndPage();
 74 |     this._browser = browser;
 75 |     this._page = page;
 76 |     return this;
 77 |   }
 78 |   
 79 |   async disconnect(): Promise<void> {
 80 |     if (this._browser) {
 81 |       await this._browser.close();
 82 |     }
 83 |   }
 84 |   
 85 |   // --- Common "Computer" actions ---
 86 |   async screenshot(): Promise<string> {
 87 |     /**
 88 |      * Capture only the viewport (not full_page).
 89 |      */
 90 |     if (!this._page) throw new Error("Page not initialized");
 91 |     const buffer = await this._page.screenshot({ fullPage: false });
 92 |     return buffer.toString('base64');
 93 |   }
 94 |   
 95 |   async click(button: string = "left", x: number | string, y: number | string): Promise<void> {
 96 |     if (!this._page) throw new Error("Page not initialized");
 97 |     const parsedX = typeof x === 'string' ? parseInt(x, 10) : x;
 98 |     const parsedY = typeof y === 'string' ? parseInt(y, 10) : y;
 99 |     if (isNaN(parsedX) || isNaN(parsedY)) {
100 |       throw new Error(`Invalid x or y coordinate: x=${x}, y=${y}`);
101 |     }
102 |     if (button == "wheel") {
103 |       await this._page.mouse.wheel(parsedX, parsedY);
104 |     } else {
105 |       await this._page.mouse.click(parsedX, parsedY, { button: button as "left" | "right" | "middle" });
106 |     }
107 |   }
108 |   
109 |   async double_click(x: number, y: number): Promise<void> {
110 |     if (!this._page) throw new Error("Page not initialized");
111 |     await this._page.mouse.dblclick(x, y);
112 |   }
113 |   
114 |   async scroll(x: number, y: number, scrollX: number, scrollY: number): Promise<void> {
115 |     if (!this._page) throw new Error("Page not initialized");
116 |     await this._page.mouse.wheel(scrollX, scrollY);
117 |     await this._page.mouse.move(x, y);
118 |   }
119 |   
120 |   async type(text: string): Promise<void> {
121 |     if (!this._page) throw new Error("Page not initialized");
122 |     await this._page.keyboard.type(text);
123 |   }
124 |   
125 |   async wait(ms: number = 250): Promise<void> {
126 |     await new Promise(resolve => setTimeout(resolve, ms));
127 |   }
128 |   
129 |   async move(x: number, y: number): Promise<void> {
130 |     if (!this._page) throw new Error("Page not initialized");
131 |     await this._page.mouse.move(x, y);
132 |   }
133 |   
134 |   async keypress(keys: string[]): Promise<void> {
135 |     if (!this._page) throw new Error("Page not initialized");
136 | 
137 |     console.log("HOT KEY", HOTKEYS[keys[0].toLowerCase()]);
138 |     // Support for hotkeys
139 |     if (HOTKEYS[keys[0].toLowerCase()]) {
140 |       await this._page.keyboard.down(HOTKEYS[keys[0].toLowerCase()]);
141 |       console.log("DOWN", HOTKEYS[keys[0].toLowerCase()]);
142 |       for (let i = 1; i < keys.length; i++) {
143 |         await this._page.keyboard.press(keys[i]);
144 |         console.log("PRESS", keys[i]);
145 |       }
146 |       await this._page.keyboard.up(HOTKEYS[keys[0].toLowerCase()]);
147 |       console.log("UP", HOTKEYS[keys[0].toLowerCase()]);
148 |     } else {
149 |       for (const key of keys) {
150 |         const mappedKey = CUA_KEY_TO_PLAYWRIGHT_KEY[key.toLowerCase()] || key;
151 |         await this._page.keyboard.press(mappedKey);
152 |       }
153 |     }
154 |   }
155 |   
156 |   async drag(path: {x: number, y: number}[]): Promise<void> {
157 |     if (!this._page) throw new Error("Page not initialized");
158 |     if (!path.length) return;
159 |     
160 |     await this._page.mouse.move(path[0].x, path[0].y);
161 |     await this._page.mouse.down();
162 |     
163 |     for (let i = 1; i < path.length; i++) {
164 |       await this._page.mouse.move(path[i].x, path[i].y);
165 |     }
166 |     
167 |     await this._page.mouse.up();
168 |   }
169 |   
170 |   // --- Extra browser-oriented actions ---
171 |   async goto(url: string): Promise<void> {
172 |     if (!this._page) throw new Error("Page not initialized");
173 |     await this._page.goto(url, { waitUntil: "domcontentloaded" });
174 |   }
175 |   
176 |   async back(): Promise<void> {
177 |     if (!this._page) throw new Error("Page not initialized");
178 |     await this._page.goBack();
179 |   }
180 |   
181 |   // --- Subclass hook ---
182 |   protected abstract _getBrowserAndPage(): Promise<[Browser, Page]>;
183 | } 


--------------------------------------------------------------------------------
/app/api/cua/agent/browserbase.ts:
--------------------------------------------------------------------------------
  1 | import * as dotenv from "dotenv";
  2 | import { Browser, Page, chromium } from "playwright";
  3 | import { BasePlaywrightComputer } from "./base_playwright";
  4 | import Browserbase from "@browserbasehq/sdk";
  5 | import { SessionCreateResponse } from "@browserbasehq/sdk/resources/sessions/sessions.mjs";
  6 | import axios from "axios";
  7 | 
  8 | dotenv.config();
  9 | 
 10 | // Define a custom type that includes all necessary properties
 11 | interface BrowserbaseSession extends SessionCreateResponse {
 12 |   connectUrl: string;
 13 | }
 14 | 
 15 | // Define the type for session creation parameters
 16 | interface SessionCreateParams {
 17 |   projectId: string;
 18 |   browserSettings: {
 19 |     viewport: {
 20 |       width: number;
 21 |       height: number;
 22 |     };
 23 |     blockAds: boolean;
 24 |   };
 25 |   region: "us-west-2" | "us-east-1" | "eu-central-1" | "ap-southeast-1";
 26 |   proxies: boolean;
 27 |   keepAlive: boolean;
 28 | }
 29 | 
 30 | export class BrowserbaseBrowser extends BasePlaywrightComputer {
 31 |   /**
 32 |    * Browserbase is a headless browser platform that offers a remote browser API. You can use it to control thousands of browsers from anywhere.
 33 |    * With Browserbase, you can watch and control a browser in real-time, record and replay sessions, and use built-in proxies for more reliable browsing.
 34 |    * You can find more information about Browserbase at https://docs.browserbase.com/ or view our OpenAI CUA Quickstart at https://docs.browserbase.com/integrations/openai-cua/introduction.
 35 |    */
 36 | 
 37 |   private bb: Browserbase;
 38 |   private projectId: string;
 39 |   private session: BrowserbaseSession | null = null;
 40 |   private region: string;
 41 |   private proxies: boolean;
 42 |   private sessionId: string | null;
 43 | 
 44 |   constructor(
 45 |     width: number = 1024,
 46 |     height: number = 768,
 47 |     region: string = "us-east-1",
 48 |     proxies: boolean = true,
 49 |     sessionId: string | null = null
 50 |   ) {
 51 |     /**
 52 |      * Initialize the Browserbase instance. Additional configuration options for features such as persistent cookies, ad blockers, file downloads and more can be found in the Browserbase API documentation: https://docs.browserbase.com/reference/api/create-a-session
 53 |      *
 54 |      * @param width - The width of the browser viewport. Default is 1024.
 55 |      * @param height - The height of the browser viewport. Default is 768.
 56 |      * @param region - The region for the Browserbase session. Default is "us-west-2". Pick a region close to you for better performance. https://docs.browserbase.com/guides/multi-region
 57 |      * @param proxies - Whether to use a proxy for the session. Default is False. Turn on proxies if you're browsing is frequently interrupted. https://docs.browserbase.com/features/proxies
 58 |      * @param sessionId - Optional. If provided, use an existing session instead of creating a new one.
 59 |      */
 60 |     super();
 61 |     // We're using a dynamic import here as a workaround since we don't have the actual types
 62 |     // In a real project, you would install the proper types and import correctly
 63 |     this.bb = new Browserbase({ apiKey: process.env.BROWSERBASE_API_KEY});
 64 |     this.projectId = process.env.BROWSERBASE_PROJECT_ID!;
 65 |     this.session = null;
 66 |     this.dimensions = [width, height];
 67 |     this.region = region;
 68 |     this.proxies = proxies;
 69 |     this.sessionId = sessionId;
 70 |   }
 71 | 
 72 |   protected async _getBrowserAndPage(): Promise<[Browser, Page]> {
 73 |     /**
 74 |      * Create a Browserbase session and connect to it, or connect to an existing session if a session ID is provided.
 75 |      *
 76 |      * @returns A tuple containing the connected browser and page objects.
 77 |      */
 78 |     if (this.sessionId) {
 79 |       // TODO: replace with this when we ship connectUrl via session GET to the SDK
 80 |       const response = await axios.get(
 81 |         `https://api.browserbase.com/v1/sessions/${this.sessionId}`,
 82 |         {
 83 |           headers: {
 84 |             "X-BB-API-Key": process.env.BROWSERBASE_API_KEY,
 85 |           },
 86 |         }
 87 |       );
 88 |       this.session = {
 89 |         connectUrl: response.data.connectUrl,
 90 |       } as unknown as BrowserbaseSession;
 91 |     } else {
 92 |       // Create a new session on Browserbase with specified parameters
 93 |       const [width, height] = this.dimensions;
 94 |       const sessionParams: SessionCreateParams = {
 95 |         projectId: this.projectId,
 96 |         browserSettings: {
 97 |           blockAds: true,
 98 |           viewport: {
 99 |             width,
100 |             height,
101 |           },
102 |         },
103 |         region: this.region as
104 |           | "us-west-2"
105 |           | "us-east-1"
106 |           | "eu-central-1"
107 |           | "ap-southeast-1",
108 |         proxies: true,
109 |         keepAlive: true,
110 |       };
111 | 
112 |       this.session = (await this.bb.sessions.create(
113 |         sessionParams
114 |       )) as unknown as BrowserbaseSession;
115 |     }
116 | 
117 |     if (!this.session) {
118 |       throw new Error("Failed to create or retrieve session");
119 |     }
120 | 
121 |     // Connect to the remote session
122 |     const browser = await chromium.connectOverCDP(this.session.connectUrl, {
123 |       timeout: 1000 * 60,
124 |     });
125 |     const context = browser.contexts()[0];
126 |     // Inject inline cursor-rendering script globally for every page
127 |     const pages = context.pages();
128 |     const page = pages[pages.length - 1];
129 |     page
130 |       .evaluate(() => {
131 |         const CURSOR_ID = "__cursor__";
132 | 
133 |         // Check if cursor element already exists
134 |         if (document.getElementById(CURSOR_ID)) return;
135 | 
136 |         const cursor = document.createElement("div");
137 |         cursor.id = CURSOR_ID;
138 |         Object.assign(cursor.style, {
139 |           position: "fixed",
140 |           top: "0px",
141 |           left: "0px",
142 |           width: "20px",
143 |           height: "20px",
144 |           backgroundImage:
145 |             "url(\"data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='black' stroke='white' stroke-width='1' stroke-linejoin='round' stroke-linecap='round'><polygon points='2,2 2,22 8,16 14,22 17,19 11,13 20,13'/></svg>\")",
146 |           backgroundSize: "cover",
147 |           pointerEvents: "none",
148 |           zIndex: "99999",
149 |           transform: "translate(-2px, -2px)",
150 |         });
151 | 
152 |         document.body.appendChild(cursor);
153 | 
154 |         document.addEventListener("mousemove", (e) => {
155 |           cursor.style.top = `${e.clientY}px`;
156 |           cursor.style.left = `${e.clientX}px`;
157 |         });
158 |         document.addEventListener("mousedown", (e) => {
159 |           cursor.style.top = `${e.clientY}px`;
160 |           cursor.style.left = `${e.clientX}px`;
161 |         });
162 |       })
163 |       .catch((error) => {
164 |         console.error("Error injecting cursor-rendering script:", error);
165 |       });
166 | 
167 |     // Only navigate to Google if it's a new session
168 |     if (!this.sessionId) {
169 |       await page.goto("https://www.google.com");
170 |     }
171 | 
172 |     return [browser, page];
173 |   }
174 | 
175 |   async disconnect(): Promise<void> {
176 |     /**
177 |      * Clean up resources when exiting the context manager.
178 |      */
179 |     /*if (this._page) {
180 |       await this._page.close();
181 |     }
182 |     if (this._browser) {
183 |       await this._browser.close();
184 |     }
185 |     
186 |     if (this.session) {
187 |       console.log(`Session completed. View replay at https://browserbase.com/sessions/${this.session.id}`);
188 |     }*/
189 |   }
190 | 
191 |   async screenshot(): Promise<string> {
192 |     /**
193 |      * Capture a screenshot of the current viewport using CDP.
194 |      *
195 |      * @returns A base64 encoded string of the screenshot.
196 |      */
197 |     if (!this._page) {
198 |       throw new Error("Page not initialized");
199 |     }
200 | 
201 |     try {
202 |       // Get CDP session from the page
203 |       const cdpSession = await this._page.context().newCDPSession(this._page);
204 | 
205 |       // Capture screenshot using CDP
206 |       const { data } = await cdpSession.send("Page.captureScreenshot", {
207 |         format: "png",
208 |         fromSurface: true,
209 |       });
210 | 
211 |       return data; // CDP already returns base64 encoded string
212 |     } catch (error) {
213 |       console.warn(
214 |         "CDP screenshot failed, falling back to standard screenshot:",
215 |         error
216 |       );
217 |       // Fall back to standard Playwright screenshot
218 |       const buffer = await this._page.screenshot({ type: "png" });
219 |       return buffer.toString("base64");
220 |     }
221 |   }
222 | 
223 |   async refresh(): Promise<void> {
224 |     /**
225 |      * Refresh the current page.
226 |      */
227 |     if (!this._page) {
228 |       throw new Error("Page not initialized");
229 |     }
230 | 
231 |     await this._page.reload();
232 |   }
233 | 
234 |   async listTabs(): Promise<string[]> {
235 |     /**
236 |      * Get the list of tabs, including the current tab.
237 |      */
238 |     if (!this._page) {
239 |       throw new Error("Page not initialized");
240 |     }
241 | 
242 |     const tabs = await this._page.context().pages();
243 |     const tabUrls = tabs.map((tab) => tab.url());
244 |     const currentTab = this._page.url();
245 |     return [...tabUrls, currentTab];
246 |   }
247 | 
248 |   async changeTab(tabUrl: string): Promise<void> {
249 |     /**
250 |      * Change to a specific tab.
251 |      */
252 |     if (!this._page) {
253 |       throw new Error("Page not initialized");
254 |     }
255 | 
256 |     const tabs = await this._page.context().pages();
257 |     const tab = tabs.find((t) => t.url() === tabUrl);
258 |     if (!tab) {
259 |       throw new Error(`Tab with URL ${tabUrl} not found`);
260 |     }
261 |     await tab.bringToFront();
262 |     this._page = tab;
263 |   }
264 | }
265 | 


--------------------------------------------------------------------------------
/app/api/cua/agent/types.ts:
--------------------------------------------------------------------------------
  1 | export type Includable = "output[*].file_search_call.search_results";
  2 | 
  3 | export type FunctionOutput = {
  4 |   type: "function_call_output";
  5 |   call_id: string;
  6 |   output: string;
  7 | };
  8 | 
  9 | export type ComputerCallOutput = {
 10 |   type: "computer_call_output";
 11 |   call_id: string;
 12 |   output: { type: "input_image"; image_url: string };
 13 |   acknowledged_safety_checks: SafetyCheck[];
 14 |   current_url?: string;
 15 | };
 16 | 
 17 | export type EasyMessage = {
 18 |   role: "system" | "user" | "assistant" | "developer";
 19 |   content: string | InputContent[];
 20 | };
 21 | 
 22 | export type ItemReference = {
 23 |   type: "item_reference";
 24 |   id: string;
 25 | };
 26 | 
 27 | export type InputItem = EasyMessage | FunctionOutput | ComputerCallOutput;
 28 | 
 29 | export type Tool = FunctionTool | ComputerTool;
 30 | 
 31 | export type ComputerTool = {
 32 |   type: "computer-preview";
 33 |   display_width: number;
 34 |   display_height: number;
 35 |   environment: "mac" | "windows" | "linux" | "browser";
 36 | };
 37 | 
 38 | export type FunctionTool = {
 39 |   type: "function";
 40 |   name: string;
 41 |   description: string | null;
 42 |   parameters: object;
 43 |   strict: boolean;
 44 | };
 45 | 
 46 | export type Item = Message | FunctionToolCall | ComputerToolCall | Reasoning;
 47 | 
 48 | export type Message = {
 49 |   id: string;
 50 |   type: "message";
 51 |   role: "user" | "assistant" | "developer" | "system";
 52 |   content: Content[];
 53 | };
 54 | 
 55 | export type Reasoning = {
 56 |   id: string;
 57 |   type: "reasoning";
 58 |   content: [];
 59 | };
 60 | 
 61 | export type FunctionToolCall = {
 62 |   type: "function_call";
 63 |   id: string;
 64 |   call_id: string;
 65 |   name: string;
 66 |   arguments: string;
 67 |   output: Content[] | null;
 68 | };
 69 | 
 70 | export type ComputerAction =
 71 |   | Click
 72 |   | DoubleClick
 73 |   | Drag
 74 |   | Screenshot
 75 |   | KeyPress
 76 |   | Move
 77 |   | Scroll
 78 |   | Type
 79 |   | Wait;
 80 | 
 81 | export type ComputerToolCall = {
 82 |   type: "computer_call";
 83 |   id: string;
 84 |   call_id: string;
 85 |   action: ComputerAction;
 86 |   pending_safety_checks: SafetyCheck[];
 87 | };
 88 | 
 89 | export type Click = {
 90 |   type: "click";
 91 |   button: "left" | "right" | "wheel" | "back" | "forward";
 92 |   x: number;
 93 |   y: number;
 94 | };
 95 | 
 96 | export type DoubleClick = {
 97 |   type: "double_click";
 98 |   x: number;
 99 |   y: number;
100 | };
101 | 
102 | export type Scroll = {
103 |   type: "scroll";
104 |   x: number;
105 |   y: number;
106 |   scroll_x: number;
107 |   scroll_y: number;
108 | };
109 | 
110 | export type Type = {
111 |   type: "type";
112 |   text: string;
113 | };
114 | 
115 | export type Wait = {
116 |   type: "wait";
117 | };
118 | 
119 | export type KeyPress = {
120 |   type: "keypress";
121 |   keys: string[];
122 | };
123 | 
124 | export type Drag = {
125 |   type: "drag";
126 |   path: {
127 |     x: number;
128 |     y: number;
129 |   }[];
130 | };
131 | 
132 | export type Screenshot = {
133 |   type: "screenshot";
134 | };
135 | 
136 | export type Move = {
137 |   type: "move";
138 |   x: number;
139 |   y: number;
140 | };
141 | 
142 | export type SafetyCheck = {
143 |   id: string;
144 |   code: string;
145 |   message: string;
146 | };
147 | 
148 | export type InputContent = InputText | InputImage | InputFile;
149 | 
150 | export type OutputContent = OutputText | Refusal;
151 | 
152 | export type Content = InputContent | OutputContent | Reasoning;
153 | 
154 | export type InputText = {
155 |   type: "input_text";
156 |   text: string;
157 | };
158 | 
159 | export type OutputText = {
160 |   type: "output_text";
161 |   text: string;
162 |   logprobs?: LogProb[] | null;
163 |   annotations: Annotation[];
164 | };
165 | 
166 | export type Refusal = {
167 |   type: "refusal";
168 |   refusal: string;
169 | };
170 | 
171 | export type InputImage = {
172 |   type: "input_image";
173 |   image_url?: string;
174 |   file_id?: string;
175 |   detail: "high" | "low" | "auto";
176 | };
177 | 
178 | export type InputFile = {
179 |   type: "input_file";
180 |   file_id: string | null;
181 |   filename: string | null;
182 |   file_data: string | null;
183 | };
184 | 
185 | export type LogProb = {
186 |   token: string;
187 |   logprob: number;
188 |   bytes: number[];
189 |   top_logprobs?: LogProb[];
190 | };
191 | 
192 | export type FileCitation = {
193 |   type: "file_citation";
194 |   index: number;
195 |   file_id: string;
196 |   filename: string;
197 | };
198 | 
199 | export type FilePath = {
200 |   type: "file_path";
201 |   file_id: string;
202 |   index: number;
203 | };
204 | 
205 | export type Annotation = FileCitation | FilePath;
206 | 
207 | export type RequestOptions = {
208 |   model: string;
209 |   input?: string | InputItem[];
210 |   previous_response_id?: string;
211 |   include?: Includable[];
212 |   tools?: Tool[];
213 | 
214 |   metadata?: Record<string, string>;
215 |   tool_choice?:
216 |     | "none"
217 |     | "auto" // default
218 |     | "required"
219 |     | { type: "file_search" }
220 |     | { type: "computer" }
221 |     | { type: "function"; name: string };
222 |   text?: {
223 |     format?:
224 |       | { type: "text" } // default
225 |       | { type: "json_object" }
226 |       | {
227 |           type: "json_schema";
228 |           schema: object;
229 |           name: string;
230 |           description?: string;
231 |           strict?: boolean; // default true
232 |         };
233 |   };
234 |   temperature?: number; // default 1
235 |   top_p?: number; // default 1
236 |   truncation?: "auto" | "disabled";
237 |   parallel_tool_calls?: boolean; // default true
238 |   stream?: boolean;
239 |   reasoning?: { effort?: "low" | "medium" | "high" };
240 | };
241 | 
242 | export type Response = {
243 |   id: string;
244 |   object: "response";
245 |   created_at: number;
246 |   completed_at: number | null;
247 |   error: Error | null;
248 |   model: string;
249 |   tools: Tool[];
250 |   tool_choice:
251 |     | "none"
252 |     | "auto"
253 |     | "required"
254 |     | { type: "file_search" }
255 |     | { type: "code_interpreter" }
256 |     | { type: "function"; name: string };
257 |   text: {
258 |     response_format:
259 |       | { type: "text" } // default
260 |       | { type: "json_object" }
261 |       | {
262 |           type: "json_schema";
263 |           schema: object;
264 |           name: string;
265 |           description?: string;
266 |           strict: boolean | null;
267 |         };
268 |   };
269 |   previous_response_id: string | null;
270 |   output: Item[];
271 |   metadata: Record<string, string>;
272 |   // eslint-disable-next-line @typescript-eslint/no-explicit-any
273 |   usage: any | null;
274 | };
275 | 


--------------------------------------------------------------------------------
/app/api/cua/start/route.ts:
--------------------------------------------------------------------------------
 1 | import { NextResponse } from 'next/server';
 2 | import { Agent } from '../agent/agent';
 3 | import { BrowserbaseBrowser } from '../agent/browserbase';
 4 | import { InputItem } from '../agent/types';
 5 | 
 6 | export async function POST(request: Request) {
 7 |   let computer: BrowserbaseBrowser | null = null;
 8 |   let agent: Agent | null = null;
 9 | 
10 |   try {
11 |     const body = await request.json();
12 |     const { sessionId, userInput } = body;
13 | 
14 |     computer = new BrowserbaseBrowser(1024, 768, "us-west-2", false, sessionId);
15 |     agent = new Agent("computer-use-preview", computer);
16 |     if (!sessionId || !userInput) {
17 |         return NextResponse.json(
18 |           { error: 'Missing sessionId or userInput in request body' },
19 |           { status: 400 }
20 |         );
21 |       }
22 | 
23 |       await computer.connect();
24 | 
25 |       // Check if userInput contains a URL and navigate to it
26 |       const urlPattern = /(https?:\/\/[^\s]+)|(?:^|\s)([a-zA-Z0-9-]+\.(?:com|org|edu|gov|net|io|ai|app|dev|co|me|info|biz)\b)/;
27 |       const urlMatch = userInput.match(urlPattern);
28 | 
29 |       const initialMessages: InputItem[] = [
30 |         {
31 |           "role": "developer",
32 |           "content": "You are a helpful assistant that can use a web browser to accomplish tasks. Your starting point is the Google search page. If you see nothing, trying going to Google."
33 |         },
34 |         {
35 |           "role": "user",
36 |           "content": urlMatch ? "What page are we on? Can you take a screenshot to confirm?" : userInput
37 |         }
38 |       ];
39 | 
40 |       // Initialize the agent with the first step
41 |       let stepResult = await agent.getAction(initialMessages, undefined);
42 | 
43 |       if (stepResult.output.length > 0 && stepResult.output.find(item => item.type === "message")) {
44 |         return NextResponse.json([stepResult]);
45 |       }
46 |       
47 |       const actions = await agent.takeAction(stepResult.output);
48 | 
49 |       // This is a hack because function calling doesn't work if it's the first call made by the LLM.
50 |       if (urlMatch) {
51 |         let fakeAction;
52 |         let fakeStep;
53 |         let done = false;
54 | 
55 |         do {
56 |           if (fakeStep) {
57 |             fakeAction = await agent.getAction(fakeStep.filter(item => item.type === "computer_call_output"), fakeAction!.responseId);
58 |           } else {
59 |             fakeAction = await agent.getAction(actions.filter(item => item.type === "computer_call_output"), stepResult.responseId);
60 |           }
61 |           stepResult = fakeAction;
62 |           if (fakeAction.output.length > 0 && fakeAction.output.find(item => item.type === "message") != null) {
63 |             done = true;
64 |           } else {
65 |             fakeStep = await agent.takeAction(fakeAction.output);
66 |           }
67 |         } while (!done);
68 | 
69 |         stepResult = await agent.getAction([{
70 |           "role": "user",
71 |           "content": "Let's continue."
72 |         },{
73 |           "role": "user",
74 |           "content": userInput
75 |         }], stepResult.responseId);
76 |         return NextResponse.json([stepResult]);
77 |       }
78 | 
79 |       const nextStep = [];
80 | 
81 |       for (const action of actions) {
82 |         if ('type' in action && action.type === 'message') {
83 |           nextStep.push({output: [action], responseId: stepResult.responseId});
84 |         } else {
85 |           const nextStepResult = await agent.getAction([action], stepResult.responseId);
86 |           nextStep.push(nextStepResult);
87 |         }
88 |       }
89 | 
90 |       return NextResponse.json(nextStep);
91 |   } catch (error) {
92 |     console.error('Error in cua endpoint:', error);
93 |     return NextResponse.json(
94 |       { success: false, error: 'Failed to process request' },
95 |       { status: 500 }
96 |     );
97 |   }
98 | } 


--------------------------------------------------------------------------------
/app/api/cua/step/execute/route.ts:
--------------------------------------------------------------------------------
 1 | import { NextResponse } from 'next/server';
 2 | import { Agent } from '../../agent/agent';
 3 | import { BrowserbaseBrowser } from '../../agent/browserbase';
 4 | 
 5 | export async function POST(request: Request) {
 6 |   let computer: BrowserbaseBrowser | null = null;
 7 |   let agent: Agent | null = null;
 8 | 
 9 |   try {
10 |     const body = await request.json();
11 |     const { sessionId, output } = body;
12 |     console.log("output", output);
13 | 
14 |     computer = new BrowserbaseBrowser(1024, 768, "us-west-2", false, sessionId);
15 |     agent = new Agent("computer-use-preview", computer);
16 |     if (!sessionId) {
17 |       return NextResponse.json(
18 |         { error: 'Missing sessionId in request body' },
19 |         { status: 400 }
20 |       );
21 |     }
22 | 
23 |     await computer.connect();
24 | 
25 |     const result = await agent.takeAction(output.output);
26 | 
27 |     return NextResponse.json(result);
28 |   } catch (error) {
29 |     console.error('Error in cua endpoint:', error);
30 |     return NextResponse.json(
31 |       { success: false, error: 'Failed to process request' },
32 |       { status: 500 }
33 |     );
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/app/api/cua/step/generate/route.ts:
--------------------------------------------------------------------------------
 1 | import { NextResponse } from "next/server";
 2 | import { Agent } from "../../agent/agent";
 3 | import { BrowserbaseBrowser } from "../../agent/browserbase";
 4 | import { ComputerToolCall } from "../../agent/types";
 5 | 
 6 | export async function POST(request: Request) {
 7 |   let computer: BrowserbaseBrowser | null = null;
 8 |   let agent: Agent | null = null;
 9 | 
10 |   try {
11 |     const body = await request.json();
12 |     const { sessionId, responseId, input } = body;
13 |     console.log("input", input);
14 | 
15 |     computer = new BrowserbaseBrowser(1024, 768, "us-west-2", false, sessionId);
16 |     agent = new Agent("computer-use-preview", computer);
17 |     if (!sessionId) {
18 |       return NextResponse.json(
19 |         { error: "Missing sessionId in request body" },
20 |         { status: 400 }
21 |       );
22 |     }
23 | 
24 |     let result = await agent.getAction(input, responseId);
25 | 
26 |     // If there's a screenshot returned, just handle it right here so we don't have to make a round trip.
27 |     if (result.output.find((item) => item.type === "computer_call")) {
28 |       const computerCall = result.output.find(
29 |         (item) => item.type === "computer_call"
30 |       ) as ComputerToolCall;
31 |       if (computerCall.action.type === "screenshot") {
32 |         await computer.connect();
33 | 
34 |         const screenshotAction = await agent.takeAction(result.output);
35 |         result = await agent.getAction(
36 |           screenshotAction.filter((item) => item.type != "message"),
37 |           result.responseId
38 |         );
39 |       }
40 |     }
41 | 
42 |     // If the generated action is only reasoning, let's request a real action.
43 |     if (
44 |       result.output.length == 1 &&
45 |       result.output.find((item) => item.type === "reasoning")
46 |     ) {
47 |       do {
48 |         result = await agent.getAction(
49 |           [
50 |             {
51 |             role: "user",
52 |             content: "Please continue with the task.",
53 |           },
54 |         ],
55 |           result.responseId
56 |         );
57 |       } while (result.output.length == 1 && result.output.find((item) => item.type === "reasoning"));
58 |     }
59 | 
60 |     return NextResponse.json([result]);
61 |   } catch (error) {
62 |     console.error("Error in cua endpoint:", error);
63 |     return NextResponse.json(
64 |       { success: false, error: "Failed to process request" },
65 |       { status: 500 }
66 |     );
67 |   }
68 | }
69 | 


--------------------------------------------------------------------------------
/app/api/cua/types.ts:
--------------------------------------------------------------------------------
 1 | // types.ts
 2 | // eslint-disable-next-line @typescript-eslint/no-explicit-any
 3 | type Json = Record<string, any>;
 4 | 
 5 | 
 6 | // eslint-disable-next-line @typescript-eslint/no-unused-vars
 7 | interface ResponseOptions {
 8 |     model: string;
 9 |     previous_response_id?: string;
10 |     input: string | Json[];
11 |     include?: string[];
12 |     // eslint-disable-next-line @typescript-eslint/no-explicit-any
13 |     tools?: any[];
14 |     metadata?: Json;
15 |     temperature?: number;
16 |     top_p?: number;
17 |     parallel_tool_calls?: boolean;
18 |     stream?: boolean;
19 |     response_format?: Json;
20 |     tool_choice?: Json;
21 |     truncation?: string;
22 | }
23 | 
24 | interface ComputerCall {
25 |     type: 'computer_call';
26 |     id: string;
27 |     action: {
28 |         type: string;
29 |         x?: number;
30 |         y?: number;
31 |         text?: string;
32 |         keys?: string[];
33 |         scroll_x?: number;
34 |         scroll_y?: number;
35 |     };
36 | }
37 | 
38 | interface FunctionCall {
39 |     type: 'function_call';
40 |     id: string;
41 |     name: string;
42 |     arguments: string;
43 | }
44 | 
45 | interface OutputText {
46 |     type: 'output_text';
47 |     text: string;
48 | }
49 | 
50 | interface Message {
51 |     type: 'message';
52 |     content: [OutputText];
53 | }
54 | 
55 | // eslint-disable-next-line @typescript-eslint/no-unused-vars
56 | interface Response {
57 |     id: string;
58 |     output: (ComputerCall | Message | FunctionCall | OutputText)[];
59 | }


--------------------------------------------------------------------------------
/app/api/session/[sessionId]/pages/route.ts:
--------------------------------------------------------------------------------
 1 | import Browserbase from "@browserbasehq/sdk";
 2 | import { NextResponse } from "next/server";
 3 | 
 4 | async function getOpenPages(sessionId: string) {
 5 |   const bb = new Browserbase({
 6 |     apiKey: process.env.BROWSERBASE_API_KEY!,
 7 |   });
 8 |   const debug = await bb.sessions.debug(sessionId);
 9 |   return debug.pages;
10 | }
11 | 
12 | export async function GET(
13 |   request: Request,
14 |   { params }: { params: Promise<{ sessionId: string }> }
15 | ) {
16 |   const { sessionId } = await params;
17 |   const pages = await getOpenPages(sessionId);
18 |   return NextResponse.json({ pages });
19 | }
20 | 


--------------------------------------------------------------------------------
/app/api/session/route.ts:
--------------------------------------------------------------------------------
  1 | import { NextResponse } from "next/server";
  2 | import Browserbase from "@browserbasehq/sdk";
  3 | import { chromium } from "playwright-core";
  4 | 
  5 | type BrowserbaseRegion =
  6 |   | "us-west-2"
  7 |   | "us-east-1"
  8 |   | "eu-central-1"
  9 |   | "ap-southeast-1";
 10 | 
 11 | // Exact timezone matches for east coast cities
 12 | const exactTimezoneMap: Record<string, BrowserbaseRegion> = {
 13 |   "America/New_York": "us-east-1",
 14 |   "America/Detroit": "us-east-1",
 15 |   "America/Toronto": "us-east-1",
 16 |   "America/Montreal": "us-east-1",
 17 |   "America/Boston": "us-east-1",
 18 |   "America/Chicago": "us-east-1",
 19 | };
 20 | 
 21 | // Prefix-based region mapping
 22 | const prefixToRegion: Record<string, BrowserbaseRegion> = {
 23 |   America: "us-west-2",
 24 |   US: "us-west-2",
 25 |   Canada: "us-west-2",
 26 |   Europe: "eu-central-1",
 27 |   Africa: "eu-central-1",
 28 |   Asia: "ap-southeast-1",
 29 |   Australia: "ap-southeast-1",
 30 |   Pacific: "ap-southeast-1",
 31 | };
 32 | 
 33 | // Offset ranges to regions (inclusive bounds)
 34 | const offsetRanges: {
 35 |   min: number;
 36 |   max: number;
 37 |   region: BrowserbaseRegion;
 38 | }[] = [
 39 |   { min: -24, max: -4, region: "us-west-2" }, // UTC-24 to UTC-4
 40 |   { min: -3, max: 4, region: "eu-central-1" }, // UTC-3 to UTC+4
 41 |   { min: 5, max: 24, region: "ap-southeast-1" }, // UTC+5 to UTC+24
 42 | ];
 43 | 
 44 | function getClosestRegion(timezone?: string): BrowserbaseRegion {
 45 |   try {
 46 |     if (!timezone) {
 47 |       return "us-west-2"; // Default if no timezone provided
 48 |     }
 49 | 
 50 |     // Check exact matches first
 51 |     if (timezone in exactTimezoneMap) {
 52 |       return exactTimezoneMap[timezone];
 53 |     }
 54 | 
 55 |     // Check prefix matches
 56 |     const prefix = timezone.split("/")[0];
 57 |     if (prefix in prefixToRegion) {
 58 |       return prefixToRegion[prefix];
 59 |     }
 60 | 
 61 |     // Use offset-based fallback
 62 |     const date = new Date();
 63 |     // Create a date formatter for the given timezone
 64 |     const formatter = new Intl.DateTimeFormat("en-US", { timeZone: timezone });
 65 |     // Get the timezone offset in minutes
 66 |     const timeString = formatter.format(date);
 67 |     const testDate = new Date(timeString);
 68 |     const hourOffset = (testDate.getTime() - date.getTime()) / (1000 * 60 * 60);
 69 | 
 70 |     const matchingRange = offsetRanges.find(
 71 |       (range) => hourOffset >= range.min && hourOffset <= range.max
 72 |     );
 73 | 
 74 |     return matchingRange?.region ?? "us-west-2";
 75 |   } catch {
 76 |     return "us-west-2";
 77 |   }
 78 | }
 79 | 
 80 | async function createSession(timezone?: string) {
 81 |   const bb = new Browserbase({
 82 |     apiKey: process.env.BROWSERBASE_API_KEY!,
 83 |   });
 84 | 
 85 |   console.log("timezone ", timezone);
 86 |   console.log("getClosestRegion(timezone)", getClosestRegion(timezone));
 87 | 
 88 |   const browserSettings = {
 89 |     viewport: {
 90 |       width: 1024,
 91 |       height: 768,
 92 |     },
 93 |     blockAds: true,
 94 |   };
 95 |   const session = await bb.sessions.create({
 96 |     projectId: process.env.BROWSERBASE_PROJECT_ID!,
 97 |     browserSettings,
 98 |     keepAlive: true,
 99 |     region: getClosestRegion(timezone),
100 |     proxies: true,
101 |     timeout: 600,
102 |   });
103 |   return {
104 |     session,
105 |   };
106 | }
107 | 
108 | async function endSession(sessionId: string) {
109 |   const bb = new Browserbase({
110 |     apiKey: process.env.BROWSERBASE_API_KEY!,
111 |   });
112 |   await bb.sessions.update(sessionId, {
113 |     projectId: process.env.BROWSERBASE_PROJECT_ID!,
114 |     status: "REQUEST_RELEASE",
115 |   });
116 | }
117 | 
118 | async function getDebugUrl(sessionId: string) {
119 |   const bb = new Browserbase({
120 |     apiKey: process.env.BROWSERBASE_API_KEY!,
121 |   });
122 |   const session = await bb.sessions.debug(sessionId);
123 |   return session.debuggerFullscreenUrl;
124 | }
125 | 
126 | export async function POST(request: Request) {
127 |   try {
128 |     const body = await request.json();
129 |     const timezone = body.timezone as string;
130 |     const { session } = await createSession(timezone);
131 |     const browser = await chromium.connectOverCDP(session.connectUrl);
132 |     const defaultContext = browser.contexts()[0];
133 |     const page = defaultContext.pages()[0];
134 |     await page.goto("https://www.google.com", {
135 |       waitUntil: "domcontentloaded",
136 |     });
137 |     const liveUrl = await getDebugUrl(session.id);
138 |     return NextResponse.json({
139 |       success: true,
140 |       sessionId: session.id,
141 |       sessionUrl: liveUrl,
142 |       connectUrl: session.connectUrl,
143 |     });
144 |   } catch (error) {
145 |     console.error("Error creating session:", error);
146 |     return NextResponse.json(
147 |       { success: false, error: "Failed to create session" },
148 |       { status: 500 }
149 |     );
150 |   }
151 | }
152 | 
153 | export async function DELETE(request: Request) {
154 |   const body = await request.json();
155 |   const sessionId = body.sessionId as string;
156 |   await endSession(sessionId);
157 |   return NextResponse.json({ success: true });
158 | }
159 | 


--------------------------------------------------------------------------------
/app/components/AnimatedButton.tsx:
--------------------------------------------------------------------------------
 1 | import { motion } from "framer-motion";
 2 | 
 3 | interface AnimatedButtonProps {
 4 |   type?: "button" | "submit";
 5 |   onClick?: () => void;
 6 |   className?: string;
 7 |   children: React.ReactNode;
 8 | }
 9 | 
10 | export default function AnimatedButton({
11 |   type = "button",
12 |   onClick,
13 |   className = "",
14 |   children
15 | }: AnimatedButtonProps) {
16 |   return (
17 |     <motion.button
18 |       type={type}
19 |       onClick={onClick}
20 |       className={`absolute right-2 sm:right-5  px-2 py-1 sm:px-3 sm:py-1.5 md:px-4 md:py-2 bg-[#FF3B00] hover:bg-[#FF2200] text-white font-medium transition-colors ${className} group`}
21 |       whileHover={{ scale: 1.02 }}
22 |       whileTap={{ scale: 0.95 }}
23 |       transition={{
24 |         type: "spring",
25 |         stiffness: 400,
26 |         damping: 17
27 |       }}
28 |     >
29 |       <span className="flex items-center gap-1 font-ppsupply">
30 |         {children}
31 |         <span className="hidden sm:inline text-sm opacity-80 group-hover:opacity-100 transition-opacity">⌘+</span>
32 |         <div className="hidden sm:block w-3 h-3 opacity-80 group-hover:opacity-100 transition-opacity">
33 |           <svg viewBox="0 0 18 19">
34 |             <path 
35 |               d="M2.40088 13.2758H13.6766C15.2909 13.2758 16.5995 11.9672 16.5995 10.353V1M5.121 9.55976L1.40088 13.2799L5.121 17" 
36 |               stroke="currentColor" 
37 |               fill="none" 
38 |               strokeWidth="1.5" 
39 |               strokeLinecap="square" 
40 |               strokeLinejoin="bevel"
41 |             />
42 |           </svg>
43 |         </div>
44 |       </span>
45 |     </motion.button>
46 |   );
47 | } 


--------------------------------------------------------------------------------
/app/components/BrowserSessionContainer.tsx:
--------------------------------------------------------------------------------
  1 | "use client";
  2 | 
  3 | import React, { useState, useEffect } from "react";
  4 | import { motion, AnimatePresence } from "framer-motion";
  5 | import { SessionControls } from "./SessionControls";
  6 | import { RotateCcwIcon } from "lucide-react";
  7 | 
  8 | interface BrowserSessionContainerProps {
  9 |   sessionUrl: string | null;
 10 |   isVisible: boolean;
 11 |   isCompleted: boolean;
 12 |   initialMessage: string | undefined;
 13 |   sessionTime?: number;
 14 |   onStop?: () => void;
 15 |   onRestart?: () => void;
 16 | }
 17 | 
 18 | const containerVariants = {
 19 |   hidden: {
 20 |     opacity: 0,
 21 |     y: 20,
 22 |     scale: 0.98,
 23 |   },
 24 |   visible: {
 25 |     opacity: 1,
 26 |     y: 0,
 27 |     scale: 1,
 28 |     transition: {
 29 |       type: "spring",
 30 |       stiffness: 300,
 31 |       damping: 30,
 32 |       mass: 1,
 33 |       delay: 0.2,
 34 |     },
 35 |   },
 36 |   exit: {
 37 |     opacity: 0,
 38 |     y: -20,
 39 |     scale: 0.98,
 40 |     transition: {
 41 |       duration: 0.3,
 42 |       ease: "easeInOut",
 43 |     },
 44 |   },
 45 | };
 46 | 
 47 | const leftCurtainVariants = {
 48 |   hidden: { x: "-100%" },
 49 |   visible: {
 50 |     x: "-100%",
 51 |     transition: {
 52 |       duration: 0,
 53 |     },
 54 |   },
 55 |   open: {
 56 |     x: "-100%",
 57 |     transition: {
 58 |       type: "spring",
 59 |       stiffness: 120,
 60 |       damping: 20,
 61 |       delay: 0.2,
 62 |     },
 63 |   },
 64 |   close: {
 65 |     x: "0%",
 66 |     transition: {
 67 |       type: "spring",
 68 |       stiffness: 120,
 69 |       damping: 20,
 70 |     },
 71 |   },
 72 | };
 73 | 
 74 | const rightCurtainVariants = {
 75 |   hidden: { x: "100%" },
 76 |   visible: {
 77 |     x: "100%",
 78 |     transition: {
 79 |       duration: 0,
 80 |     },
 81 |   },
 82 |   open: {
 83 |     x: "100%",
 84 |     transition: {
 85 |       type: "spring",
 86 |       stiffness: 120,
 87 |       damping: 20,
 88 |       delay: 0.2,
 89 |     },
 90 |   },
 91 |   close: {
 92 |     x: "0%",
 93 |     transition: {
 94 |       type: "spring",
 95 |       stiffness: 120,
 96 |       damping: 20,
 97 |     },
 98 |   },
 99 | };
100 | 
101 | const BrowserSessionContainer: React.FC<BrowserSessionContainerProps> = ({
102 |   sessionUrl,
103 |   isVisible,
104 |   isCompleted,
105 |   initialMessage,
106 |   sessionTime = 0,
107 |   onStop = () => {},
108 |   onRestart = () => {},
109 | }) => {
110 |   // Track the animation state of curtains
111 |   const [curtainState, setCurtainState] = useState<
112 |     "closed" | "opening" | "open" | "closing"
113 |   >("closed");
114 | 
115 |   // Handle curtain animation based on session state
116 |   useEffect(() => {
117 |     if (isVisible) {
118 |       if (!sessionUrl && !isCompleted) {
119 |         // Session is starting, curtains closed initially
120 |         setCurtainState("closed");
121 |       } else if (sessionUrl && !isCompleted) {
122 |         // Session URL is available, but wait 1 second before opening the curtains
123 |         const openTimer = setTimeout(() => {
124 |           setCurtainState("opening");
125 |           // After animation delay, set to fully open
126 |           const openCompleteTimer = setTimeout(
127 |             () => setCurtainState("open"),
128 |             800
129 |           );
130 |           return () => clearTimeout(openCompleteTimer);
131 |         }, 1000); // Wait 1 second before starting to open
132 | 
133 |         return () => clearTimeout(openTimer);
134 |       } else if (isCompleted) {
135 |         // Session is completed, close the curtains
136 |         setCurtainState("closing");
137 |       }
138 |     }
139 |   }, [isVisible, sessionUrl, isCompleted]);
140 | 
141 |   return (
142 |     <AnimatePresence mode="wait">
143 |       {isVisible && (
144 |         <motion.div
145 |           className="w-full max-w-[1000px] mx-auto flex flex-col md:justify-center"
146 |           style={{ minHeight: "auto" }}
147 |           variants={containerVariants}
148 |           initial="hidden"
149 |           animate="visible"
150 |           exit="exit"
151 |           key={isCompleted ? "completed" : "active"}
152 |         >
153 |           {/* Browser frame */}
154 |           <div
155 |             className="w-full h-[250px] md:h-[600px] flex items-center justify-center overflow-hidden border border-[#CAC8C7] shadow-sm relative"
156 |             style={{
157 |               backgroundColor: "rgba(245, 240, 255, 0.75)",
158 |               backdropFilter: "blur(8px)",
159 |             }}
160 |           >
161 |             {/* Left Curtain */}
162 |             <motion.div
163 |               className="absolute top-0 left-0 w-1/2 h-full z-10"
164 |               style={{
165 |                 backgroundColor: "#2E191E",
166 |               }}
167 |               variants={leftCurtainVariants}
168 |               initial="visible"
169 |               animate={
170 |                 curtainState === "opening" || curtainState === "open"
171 |                   ? "open"
172 |                   : "close"
173 |               }
174 |             />
175 | 
176 |             {/* Right Curtain */}
177 |             <motion.div
178 |               className="absolute top-0 right-0 w-1/2 h-full z-10"
179 |               style={{
180 |                 backgroundColor: "#2E191E",
181 |               }}
182 |               variants={rightCurtainVariants}
183 |               initial="visible"
184 |               animate={
185 |                 curtainState === "opening" || curtainState === "open"
186 |                   ? "open"
187 |                   : "close"
188 |               }
189 |             />
190 |             {/* Browser Content */}
191 |             {!isCompleted ? (
192 |               sessionUrl ? (
193 |                 <iframe
194 |                   src={sessionUrl}
195 |                   className="w-full h-full border-none"
196 |                   sandbox="allow-same-origin allow-scripts allow-forms"
197 |                   allow="clipboard-read; clipboard-write"
198 |                   loading="lazy"
199 |                   referrerPolicy="no-referrer"
200 |                   title="Browser Session"
201 |                 />
202 |               ) : (
203 |                 <div
204 |                   className="w-full h-full flex flex-col items-center justify-center"
205 |                   style={{ backgroundColor: "rgba(245, 240, 255, 0.4)" }}
206 |                 >
207 |                   {/* Simple loading animation that will always show when session URL is not available */}
208 |                   <div className="flex flex-col items-center space-y-6 w-full animate-in fade-in slide-in-from-bottom-5 duration-500">
209 |                     <h2 className="text-2xl font-semibold text-white z-10 animate-in fade-in duration-700 delay-500">
210 |                       Starting CUA Browser
211 |                     </h2>
212 |                     <div className="flex flex-col items-center space-y-4 w-full animate-in fade-in duration-700 delay-500">
213 |                       <div className="mt-4 flex justify-center">
214 |                         <div className=" bg-gray-200 h-16 w-16 animate-pulse"></div>
215 |                       </div>
216 |                     </div>
217 |                   </div>
218 |                 </div>
219 |               )
220 |             ) : null}
221 | 
222 |             {/* Completion Message with AnimatePresence for fade in/out */}
223 |             <AnimatePresence>
224 |               {isCompleted && (
225 |                 <motion.div
226 |                   className="absolute inset-0 z-20 flex flex-col items-center justify-center p-3 md:p-8"
227 |                   style={{
228 |                     backdropFilter: "blur(3px)",
229 |                     backgroundColor: "rgba(46, 25, 30, 0.2)",
230 |                   }}
231 |                   initial={{ opacity: 0 }}
232 |                   animate={{ opacity: 1 }}
233 |                   exit={{ opacity: 0 }}
234 |                   transition={{ duration: 0.5 }}
235 |                 >
236 |                   <motion.div
237 |                     className="flex flex-col items-center space-y-3 md:space-y-6 w-full max-w-[95%] md:max-w-[80%] text-center bg-[rgba(46,25,30,0.7)] p-4 rounded-lg backdrop-blur-sm"
238 |                     initial={{ opacity: 0, y: 20 }}
239 |                     animate={{ opacity: 1, y: 0 }}
240 |                     exit={{ opacity: 0, y: -20 }}
241 |                     transition={{ duration: 0.6, delay: 0.2 }}
242 |                   >
243 |                     <motion.span
244 |                       className="text-lg md:text-3xl font-semibold text-white"
245 |                       initial={{ opacity: 0 }}
246 |                       animate={{ opacity: 1 }}
247 |                       transition={{ delay: 0.4 }}
248 |                     >
249 |                       Task completed
250 |                     </motion.span>
251 |                     <motion.span
252 |                       className="text-sm md:text-xl italic text-white break-words max-h-[150px] md:max-h-none overflow-y-auto px-2"
253 |                       initial={{ opacity: 0 }}
254 |                       animate={{ opacity: 1 }}
255 |                       transition={{ delay: 0.6 }}
256 |                     >
257 |                       &quot;{initialMessage}&quot;
258 |                     </motion.span>
259 | 
260 |                     <motion.a
261 |                       href="https://www.browserbase.com/sign-up"
262 |                       target="_blank"
263 |                       rel="noopener noreferrer"
264 |                       className="px-4 md:px-6 py-2 md:py-3 text-white text-base md:text-lg font-medium mt-4 md:mt-8 inline-block text-center"
265 |                       style={{
266 |                         background: "#F14A1C",
267 |                         backdropFilter: "blur(12px)",
268 |                         border: "1px solid rgba(255, 255, 255, 0.3)",
269 |                       }}
270 |                       initial={{ opacity: 0, scale: 0.9 }}
271 |                       animate={{ opacity: 1, scale: 1 }}
272 |                       transition={{ delay: 0.8 }}
273 |                       whileHover={{
274 |                         scale: 0.95,
275 |                         background: "#F14A1C",
276 |                       }}
277 |                       whileTap={{ scale: 0.98 }}
278 |                     >
279 |                       Want to try Browserbase?
280 |                     </motion.a>
281 |                     <motion.button
282 |                       type="button"
283 |                       onClick={onRestart}
284 |                       className="flex gap-x-2 text-white px-2 py-1 items-center"
285 |                     >
286 |                       <RotateCcwIcon className="size-4" />
287 |                       Restart
288 |                     </motion.button>
289 |                   </motion.div>
290 |                 </motion.div>
291 |               )}
292 |             </AnimatePresence>
293 |           </div>
294 |           {/* Timer below iframe on desktop - always reserve the space */}
295 |           <div className="h-[42px] mt-4 hidden md:block">
296 |             {!isCompleted && sessionUrl && (
297 |               <motion.div
298 |                 className="w-full flex justify-center items-center space-x-1 text-sm text-[#2E191E]"
299 |                 initial={{ opacity: 0 }}
300 |                 animate={{ opacity: 1 }}
301 |                 transition={{
302 |                   delay: 1.5,
303 |                   duration: 0.5,
304 |                   type: "spring",
305 |                   stiffness: 300,
306 |                   damping: 25,
307 |                 }}
308 |               >
309 |                 <SessionControls sessionTime={sessionTime} onStop={onStop} />
310 |               </motion.div>
311 |             )}
312 |           </div>
313 |         </motion.div>
314 |       )}
315 |     </AnimatePresence>
316 |   );
317 | };
318 | 
319 | export default BrowserSessionContainer;
320 | 


--------------------------------------------------------------------------------
/app/components/BrowserTabs.tsx:
--------------------------------------------------------------------------------
  1 | import { cn } from "@/lib/utils";
  2 | import { SessionLiveURLs } from "@browserbasehq/sdk/resources/index.mjs";
  3 | import { useEffect, useState } from "react";
  4 | 
  5 | let abortController: AbortController | null = null;
  6 | let errors = 0;
  7 | async function getPages(sessionId: string) {
  8 |   try {
  9 |     // abort any previous requests
 10 |     if (abortController) {
 11 |       abortController.abort("Aborted previous request");
 12 |     }
 13 |     abortController = new AbortController();
 14 |     const res = await fetch(`/api/session/${sessionId}/pages`, {
 15 |       signal: abortController.signal,
 16 |     });
 17 | 
 18 |     // retry 3 times if the request fails
 19 |     if (!res.ok) {
 20 |       errors++;
 21 |       if (errors > 3) {
 22 |         throw new Error("Failed to fetch pages");
 23 |       }
 24 |       return [];
 25 |     }
 26 | 
 27 |     const data = await res.json();
 28 |     errors = 0;
 29 |     return data.pages;
 30 |   } catch (error: unknown) {
 31 |     // abort error is expected when the request is aborted
 32 |     if (
 33 |       (error instanceof Error && error.name === "AbortError") ||
 34 |       error === "Aborted previous request"
 35 |     ) {
 36 |       return [];
 37 |     }
 38 | 
 39 |     console.error("Error fetching pages:", error);
 40 |     return [];
 41 |   }
 42 | }
 43 | 
 44 | const refetchInterval = 5000;
 45 | 
 46 | export default function BrowserTabs({
 47 |   sessionId,
 48 |   activePage,
 49 |   setActivePage,
 50 | }: {
 51 |   sessionId: string;
 52 |   activePage: SessionLiveURLs.Page | null;
 53 |   setActivePage: (page: SessionLiveURLs.Page) => void;
 54 | }) {
 55 |   const [pages, setPages] = useState<SessionLiveURLs.Page[]>([]);
 56 | 
 57 |   useEffect(() => {
 58 |     const refetchPages = async () => {
 59 |       const p = await getPages(sessionId);
 60 |       // when a new page is added, set the active page to the last page
 61 |       if (p.length > pages.length) {
 62 |         setActivePage(p[p.length - 1]);
 63 |       }
 64 | 
 65 |       setPages(p);
 66 |     };
 67 | 
 68 |     refetchPages();
 69 |     const interval = setInterval(refetchPages, refetchInterval);
 70 | 
 71 |     return () => clearInterval(interval);
 72 |   }, [pages.length, sessionId, setActivePage]);
 73 | 
 74 |   // fallback to first page if activePageId is not found
 75 |   useEffect(() => {
 76 |     if (!activePage && pages.length > 0) {
 77 |       setActivePage(pages[0]);
 78 |     }
 79 |   }, [activePage, pages, setActivePage]);
 80 | 
 81 |   if (pages.length === 0 || !activePage) {
 82 |     return null;
 83 |   }
 84 | 
 85 |   const tabLoading = (t: SessionLiveURLs.Page) => !Boolean(t.title || t.url);
 86 | 
 87 |   // hide tabs if there is only one page
 88 |   if (pages.length < 2) {
 89 |     return null;
 90 |   }
 91 | 
 92 |   return (
 93 |     <div className="w-full overflow-x-auto max-w-[1000px] [&::-webkit-scrollbar]:hidden [-ms-overflow-style:none] [scrollbar-width:none]">
 94 |       <div
 95 |         className="grid gap-2 w-full justify-start"
 96 |         style={{
 97 |           gridTemplateColumns: `repeat(${pages.length}, minmax(100px,300px))`,
 98 |         }}
 99 |       >
100 |         {pages.map((page) => (
101 |           <div
102 |             key={page.id}
103 |             onClick={() => setActivePage(page)}
104 |             className={cn(
105 |               "bg-[rgb(248,248,255)] rounded-[2px] text-gray-500 border border-[rgb(245,235,255)] text-sm flex gap-x-1 py-1 px-1.5 max-w-[300px] cursor-pointer hover:border-gray-400",
106 |               {
107 |                 "bg-[rgb(245,240,255)] text-gray-800 border-[rgb(179,170,170)]":
108 |                   page.id === activePage?.id,
109 |               }
110 |             )}
111 |           >
112 |             {page.faviconUrl && (
113 |               // eslint-disable-next-line @next/next/no-img-element
114 |               <img src={page.faviconUrl} alt={page.title} className="size-3" />
115 |             )}
116 |             {tabLoading(page) ? (
117 |               <span className="text-gray-400 animate-pulse">Loading...</span>
118 |             ) : (
119 |               <span className="truncate text-ellipsis whitespace-nowrap">
120 |                 {page.title || page.url}
121 |               </span>
122 |             )}
123 |           </div>
124 |         ))}
125 |       </div>
126 |     </div>
127 |   );
128 | }
129 | 


--------------------------------------------------------------------------------
/app/components/ChatBlock.tsx:
--------------------------------------------------------------------------------
  1 | "use client";
  2 | 
  3 | import { AnimatePresence, motion } from "framer-motion";
  4 | import { useState, useCallback, useEffect } from "react";
  5 | import { useWindowSize } from "usehooks-ts";
  6 | 
  7 | interface ChatBlockProps {
  8 |   isVisible: boolean;
  9 |   onClose: () => void;
 10 |   initialMessage?: string;
 11 | }
 12 | 
 13 | export interface Message {
 14 |   id: string;
 15 |   content: string;
 16 |   role: "user" | "assistant";
 17 |   timestamp: Date;
 18 | }
 19 | 
 20 | export default function ChatBlock({
 21 |   isVisible,
 22 |   onClose,
 23 |   initialMessage,
 24 | }: ChatBlockProps) {
 25 |   const [messages, setMessages] = useState<Message[]>([]);
 26 |   const [input, setInput] = useState("");
 27 |   const [isSidebarOpen, setIsSidebarOpen] = useState(true);
 28 |   const { width: windowWidth } = useWindowSize();
 29 |   const isMobile = windowWidth ? windowWidth < 768 : false;
 30 |   const [currentUrl] = useState("");
 31 | 
 32 |   // Spring configuration for smoother animations
 33 |   const springConfig = {
 34 |     type: "spring",
 35 |     stiffness: 350,
 36 |     damping: 30,
 37 |   };
 38 | 
 39 |   // Animation variants for the main container
 40 |   const containerVariants = {
 41 |     hidden: { opacity: 0, scale: 0.8 },
 42 |     visible: {
 43 |       opacity: 1,
 44 |       scale: 1,
 45 |       transition: springConfig,
 46 |     },
 47 |     exit: {
 48 |       opacity: 0,
 49 |       scale: 0.8,
 50 |       transition: { duration: 0.2 },
 51 |     },
 52 |   };
 53 | 
 54 |   const handleSubmit = useCallback(
 55 |     (e: React.FormEvent) => {
 56 |       e.preventDefault();
 57 |       if (!input.trim()) return;
 58 | 
 59 |       const newMessage: Message = {
 60 |         id: Date.now().toString(),
 61 |         content: input,
 62 |         role: "user",
 63 |         timestamp: new Date(),
 64 |       };
 65 | 
 66 |       setMessages((prev) => [...prev, newMessage]);
 67 |       setInput("");
 68 |     },
 69 |     [input]
 70 |   );
 71 | 
 72 |   useEffect(() => {
 73 |     if (isVisible && initialMessage && messages.length === 0) {
 74 |       const newMessage: Message = {
 75 |         id: Date.now().toString(),
 76 |         content: initialMessage,
 77 |         role: "user",
 78 |         timestamp: new Date(),
 79 |       };
 80 |       setMessages([newMessage]);
 81 |     }
 82 |   }, [isVisible, initialMessage, messages.length]);
 83 | 
 84 |   useEffect(() => {
 85 |     const handleKeyDown = (e: KeyboardEvent) => {
 86 |       if (!isVisible) return; // Only handle shortcuts when chat is visible
 87 | 
 88 |       // Handle ESC to close
 89 |       if (e.key === "Escape") {
 90 |         e.preventDefault();
 91 |         onClose();
 92 |       }
 93 | 
 94 |       // Handle CMD+Enter or CTRL+Enter to submit
 95 |       if ((e.metaKey || e.ctrlKey) && e.key === "Enter" && input.trim()) {
 96 |         e.preventDefault();
 97 |         handleSubmit(e as unknown as React.FormEvent);
 98 |       }
 99 | 
100 |       // Handle CMD+K or CTRL+K to toggle sidebar
101 |       if ((e.metaKey || e.ctrlKey) && e.key === "k") {
102 |         e.preventDefault();
103 |         setIsSidebarOpen(!isSidebarOpen);
104 |       }
105 |     };
106 | 
107 |     window.addEventListener("keydown", handleKeyDown);
108 |     return () => window.removeEventListener("keydown", handleKeyDown);
109 |   }, [
110 |     onClose,
111 |     handleSubmit,
112 |     input,
113 |     isVisible,
114 |     isSidebarOpen,
115 |     setIsSidebarOpen,
116 |   ]);
117 | 
118 |   return (
119 |     <AnimatePresence mode="sync">
120 |       {isVisible && (
121 |         <motion.div
122 |           className="flex flex-row h-dvh w-dvw fixed top-0 left-0 z-50 bg-white"
123 |           initial={{ opacity: 0 }}
124 |           animate={{ opacity: 1 }}
125 |           exit={{ opacity: 0 }}
126 |         >
127 | 
128 |           <motion.div
129 |             className="fixed bg-white h-dvh flex flex-col"
130 |             variants={containerVariants}
131 |             initial="hidden"
132 |             animate="visible"
133 |             exit="exit"
134 |             layoutId="chat-content"
135 |           >
136 |             <motion.div
137 |               className="p-6 flex justify-between items-center border-b border-gray-200"
138 |               initial={{ opacity: 0 }}
139 |               animate={{ opacity: 1 }}
140 |               transition={{ delay: 0.2 }}
141 |             >
142 |               <div className="flex items-center gap-4">
143 |                 {!isMobile && (
144 |                   <motion.button
145 |                     onClick={() => setIsSidebarOpen(!isSidebarOpen)}
146 |                     className="p-2 hover:bg-gray-100 text-gray-600 hover:text-gray-900 transition-colors "
147 |                     whileHover={{ scale: 1.05 }}
148 |                     whileTap={{ scale: 0.95 }}
149 |                   >
150 |                     {isSidebarOpen ? "←" : "→"}
151 |                   </motion.button>
152 |                 )}
153 |                 <h2 className="text-lg font-ppneue text-gray-900">Browser</h2>
154 |               </div>
155 |               <motion.button
156 |                 onClick={onClose}
157 |                 className="p-2 hover:bg-gray-100 text-gray-600 hover:text-gray-900 transition-colors font-ppsupply"
158 |                 whileHover={{ scale: 1.05 }}
159 |                 whileTap={{ scale: 0.95 }}
160 |               >
161 |                 Close
162 |               </motion.button>
163 |             </motion.div>
164 | 
165 |             <motion.div
166 |               className="flex-1 p-8"
167 |               initial={{ opacity: 0 }}
168 |               animate={{ opacity: 1 }}
169 |               transition={{ delay: 0.3 }}
170 |             >
171 |               <div className="w-full h-full bg-gray-50 border border-gray-200 shadow-sm  overflow-hidden flex flex-col">
172 |                 {/* Browser Chrome */}
173 |                 <div className="w-full bg-white border-b border-gray-200">
174 |                   {/* Window Controls */}
175 |                   <div className="h-12 flex items-center px-4 border-b border-gray-200">
176 |                     <div className="flex items-center gap-2">
177 |                       <div className="w-3 h-3 bg-red-500" />
178 |                       <div className="w-3 h-3 bg-yellow-500" />
179 |                       <div className="w-3 h-3 bg-green-500" />
180 |                     </div>
181 |                   </div>
182 |                   {/* Navigation Bar */}
183 |                   <div className="h-12 flex items-center px-4 gap-4">
184 |                     <button className="text-gray-400 hover:text-gray-600 font-ppsupply">
185 |                       ←
186 |                     </button>
187 |                     <button className="text-gray-400 hover:text-gray-600 font-ppsupply">
188 |                       →
189 |                     </button>
190 |                     <button className="text-gray-400 hover:text-gray-600 font-ppsupply">
191 |                       ↻
192 |                     </button>
193 |                     <div className="flex-1 px-4 py-1.5 bg-gray-100 text-sm text-gray-600 font-ppsupply">
194 |                       {currentUrl || "about:blank"}
195 |                     </div>
196 |                   </div>
197 |                 </div>
198 | 
199 |                 {/* Browser Content */}
200 |                 <div className="flex-1 bg-white">
201 |                   <iframe
202 |                     className="w-full h-full border-none"
203 |                     src={
204 |                       initialMessage
205 |                         ? `https://www.google.com/search?q=${encodeURIComponent(
206 |                             initialMessage
207 |                           )}`
208 |                         : "about:blank"
209 |                     }
210 |                     sandbox="allow-same-origin allow-scripts allow-popups allow-forms allow-top-navigation"
211 |                     allow="clipboard-read; clipboard-write"
212 |                     title="Browser Content"
213 |                   />
214 |                 </div>
215 |               </div>
216 |             </motion.div>
217 |           </motion.div>
218 |         </motion.div>
219 |       )}
220 |     </AnimatePresence>
221 |   );
222 | }
223 | 


--------------------------------------------------------------------------------
/app/components/ChatFeed.tsx:
--------------------------------------------------------------------------------
   1 | "use client";
   2 | 
   3 | import { motion } from "framer-motion";
   4 | import { useState, useEffect, useCallback, useRef } from "react";
   5 | import React from "react";
   6 | import { useWindowSize } from "usehooks-ts";
   7 | import Image from "next/image";
   8 | import posthog from "posthog-js";
   9 | import {
  10 |   FunctionOutput,
  11 |   Item,
  12 |   ComputerCallOutput,
  13 |   OutputText,
  14 | } from "../api/cua/agent/types";
  15 | // import { SlidingNumber } from "../components/ui/sliding-number";
  16 | import { Layers, Pin } from "lucide-react";
  17 | import { SessionControls } from "./SessionControls";
  18 | import BrowserSessionContainer from "./BrowserSessionContainer";
  19 | import { SessionLiveURLs } from "@browserbasehq/sdk/resources/index.mjs";
  20 | import BrowserTabs from "./BrowserTabs";
  21 | 
  22 | interface ChatFeedProps {
  23 |   initialMessage?: string;
  24 |   onClose: () => void;
  25 |   url?: string;
  26 | }
  27 | 
  28 | export interface BrowserStep {
  29 |   text: string;
  30 |   reasoning: string;
  31 |   tool:
  32 |     | "GOTO"
  33 |     | "ACT"
  34 |     | "EXTRACT"
  35 |     | "OBSERVE"
  36 |     | "CLOSE"
  37 |     | "WAIT"
  38 |     | "NAVBACK"
  39 |     | "MESSAGE"
  40 |     | "CLICK"
  41 |     | "TYPE"
  42 |     | "KEYPRESS"
  43 |     | "SCROLL"
  44 |     | "DOUBLECLICK"
  45 |     | "DRAG"
  46 |     | "SCREENSHOT"
  47 |     | "MOVE";
  48 |   instruction: string;
  49 |   stepNumber?: number;
  50 |   messageId?: string;
  51 | }
  52 | 
  53 | interface AgentState {
  54 |   sessionId: string | null;
  55 |   sessionUrl: string | null;
  56 |   connectUrl: string | null;
  57 |   steps: BrowserStep[];
  58 |   isLoading: boolean;
  59 | }
  60 | 
  61 | // formatTime moved to SessionControls component
  62 | 
  63 | // Generate detailed reasoning for actions based on context and action type
  64 | const generateDetailedReasoning = (
  65 |   action: Record<string, unknown>,
  66 |   actionType: string,
  67 |   contextClues: Record<string, unknown>,
  68 |   createTaskDescription: (
  69 |     action: Record<string, unknown>,
  70 |     actionType: string
  71 |   ) => string
  72 | ): string => {
  73 |   // Get basic description first
  74 |   const basicDescription = createTaskDescription(action, actionType);
  75 | 
  76 |   // Add more detailed context based on the action type and available context
  77 |   switch (actionType) {
  78 |     case "click":
  79 |       if (contextClues.goal) {
  80 |         return `${basicDescription} to begin searching for information about ${contextClues.goal}. This interaction initiates the search process.`;
  81 |       }
  82 |       return `${basicDescription} to interact with the page interface. This helps navigate through the content to find the requested information.`;
  83 | 
  84 |     case "type":
  85 |       // eslint-disable-next-line @typescript-eslint/no-unused-vars
  86 |       const text = action.text || "";
  87 |       if (contextClues.goal) {
  88 |         return `${basicDescription} to search for specific information about ${contextClues.goal}. Entering these search terms will help retrieve relevant results.`;
  89 |       }
  90 |       return `${basicDescription} to provide input needed for this search. This text will help narrow down the results to find the specific information requested.`;
  91 | 
  92 |     case "keypress":
  93 |       const keys = Array.isArray(action.keys) ? action.keys.join(", ") : "";
  94 |       if (keys.includes("ENTER")) {
  95 |         return `Submitting the search query to find information about ${
  96 |           contextClues.goal || "the requested topic"
  97 |         }. This will execute the search and retrieve relevant results.`;
  98 |       }
  99 |       return `${basicDescription} to efficiently interact with the page. This keyboard interaction helps streamline the navigation process.`;
 100 | 
 101 |     case "scroll":
 102 |       return `${basicDescription} to view additional content that might contain the requested information about ${
 103 |         contextClues.goal || "the topic"
 104 |       }. Scrolling allows examining more search results or content.`;
 105 | 
 106 |     case "goto":
 107 |       let domain = "";
 108 |       try {
 109 |         if (action.url) {
 110 |           // eslint-disable-next-line @typescript-eslint/no-unused-vars
 111 |           domain = new URL(action.url as string).hostname.replace("www.", "");
 112 |         }
 113 |       } catch (e) {
 114 |         // eslint-disable-next-line @typescript-eslint/no-unused-vars
 115 |         console.error("Error parsing URL:", e);
 116 |       }
 117 | 
 118 |       return `${basicDescription} to find information about ${
 119 |         contextClues.goal || "the requested topic"
 120 |       }. This website likely contains relevant data or search capabilities needed.`;
 121 | 
 122 |     case "back":
 123 |       return `${basicDescription} to return to previous content. This helps with navigation when the current page doesn't contain the needed information.`;
 124 | 
 125 |     case "wait":
 126 |       return `${basicDescription} while the page loads the requested information. This ensures all content is properly displayed before proceeding.`;
 127 | 
 128 |     case "double_click":
 129 |       return `${basicDescription} to interact with this element. Double-clicking often opens or expands content that may contain relevant information.`;
 130 | 
 131 |     case "drag":
 132 |       // Get start and end points from the path if available
 133 |       let startPoint = { x: 0, y: 0 };
 134 |       let endPoint = { x: 0, y: 0 };
 135 |       if (Array.isArray(action.path) && action.path.length > 0) {
 136 |         startPoint = action.path[0] as { x: number; y: number };
 137 |         endPoint = action.path[action.path.length - 1] as {
 138 |           x: number;
 139 |           y: number;
 140 |         };
 141 |       }
 142 |       return `${basicDescription} to adjust the view or interact with content. Dragging from (${startPoint.x}, ${startPoint.y}) to (${endPoint.x}, ${endPoint.y}) helps reveal or organize information in a more useful way.`;
 143 | 
 144 |     case "screenshot":
 145 |       return `${basicDescription} to capture the visual information displayed. This preserves the current state of the information for reference.`;
 146 | 
 147 |     case "move":
 148 |       return `${basicDescription} to prepare for the next interaction. Positioning the cursor is necessary before clicking or selecting content.`;
 149 | 
 150 |     case "message":
 151 |       if (
 152 |         typeof action.text === "string" &&
 153 |         (action.text.startsWith("yes") ||
 154 |           action.text.startsWith("no") ||
 155 |           action.text.includes("?"))
 156 |       ) {
 157 |         return `Providing additional input to refine the search for information about ${
 158 |           contextClues.goal || "the requested topic"
 159 |         }. This clarification helps the assistant provide more relevant results.`;
 160 |       }
 161 |       return `Communicating with the assistant about ${
 162 |         contextClues.goal || "the requested information"
 163 |       }. This exchange helps clarify needs and receive appropriate information.`;
 164 | 
 165 |     default:
 166 |       return `${basicDescription} to progress in finding information about ${
 167 |         contextClues.goal || "the requested topic"
 168 |       }. This action is part of the process to retrieve the relevant data.`;
 169 |   }
 170 | };
 171 | 
 172 | export default function LegacyChatFeed({
 173 |   initialMessage,
 174 |   onClose,
 175 | }: ChatFeedProps) {
 176 |   const [activePage, setActivePage] = useState<SessionLiveURLs.Page | null>(
 177 |     null
 178 |   );
 179 |   // eslint-disable-next-line @typescript-eslint/no-unused-vars
 180 |   const [isLoading, setIsLoading] = useState(false);
 181 |   const [sessionTime, setSessionTime] = useState(0);
 182 |   const { width } = useWindowSize();
 183 |   const isMobile = width ? width < 768 : false;
 184 |   const initializationRef = useRef(false);
 185 |   const chatContainerRef = useRef<HTMLDivElement>(null);
 186 |   const [isScrolled, setIsScrolled] = useState(false);
 187 |   // eslint-disable-next-line @typescript-eslint/no-unused-vars
 188 |   const [isMounted, setIsMounted] = useState(false);
 189 |   const [isAgentFinished, setIsAgentFinished] = useState(false);
 190 |   const agentStateRef = useRef<AgentState>({
 191 |     sessionId: null,
 192 |     sessionUrl: null,
 193 |     connectUrl: null,
 194 |     steps: [],
 195 |     isLoading: false,
 196 |   });
 197 | 
 198 |   const [uiState, setUiState] = useState<{
 199 |     sessionId: string | null;
 200 |     sessionUrl: string | null;
 201 |     connectUrl: string | null;
 202 |     steps: BrowserStep[];
 203 |   }>({
 204 |     sessionId: null,
 205 |     sessionUrl: null,
 206 |     connectUrl: null,
 207 |     steps: [],
 208 |   });
 209 | 
 210 |   // generate the debugger URL for the current tab
 211 |   const activePageUrl = (
 212 |     activePage?.debuggerFullscreenUrl ??
 213 |     uiState.sessionUrl ??
 214 |     ""
 215 |   ).replace(
 216 |     "https://www.browserbase.com/devtools-fullscreen/inspector.html",
 217 |     "https://www.browserbase.com/devtools-internal-compiled/index.html"
 218 |   );
 219 | 
 220 |   const [userInput, setUserInput] = useState("");
 221 |   const [isWaitingForInput, setIsWaitingForInput] = useState(false);
 222 |   const inputRef = useRef<HTMLInputElement>(null);
 223 | 
 224 |   const scrollToBottom = useCallback(() => {
 225 |     if (chatContainerRef.current) {
 226 |       chatContainerRef.current.scrollTop =
 227 |         chatContainerRef.current.scrollHeight;
 228 |     }
 229 |   }, []);
 230 | 
 231 |   // Set mounted state after hydration is complete
 232 |   useEffect(() => {
 233 |     setIsMounted(true);
 234 |   }, []);
 235 | 
 236 |   // Auto-focus input field when waiting for input
 237 |   useEffect(() => {
 238 |     if (isWaitingForInput && inputRef.current) {
 239 |       // Try multiple times with increasing delays to ensure focus works
 240 |       const focusAttempts = [10, 100, 300, 500];
 241 | 
 242 |       focusAttempts.forEach((delay) => {
 243 |         setTimeout(() => {
 244 |           if (inputRef.current) {
 245 |             inputRef.current.focus();
 246 |             console.log(`Attempting to focus input at ${delay}ms`);
 247 |           }
 248 |         }, delay);
 249 |       });
 250 |     }
 251 |   }, [isWaitingForInput]);
 252 | 
 253 |   // Track scroll position to apply conditional margin
 254 |   useEffect(() => {
 255 |     const handleScroll = () => {
 256 |       if (chatContainerRef.current) {
 257 |         setIsScrolled(chatContainerRef.current.scrollTop > 10);
 258 |       }
 259 |     };
 260 | 
 261 |     const container = chatContainerRef.current;
 262 |     if (container) {
 263 |       container.addEventListener("scroll", handleScroll);
 264 |       return () => container.removeEventListener("scroll", handleScroll);
 265 |     }
 266 |   }, []);
 267 | 
 268 |   useEffect(() => {
 269 |     let timer: NodeJS.Timeout | null = null;
 270 | 
 271 |     if (uiState.sessionId) {
 272 |       // Reset timer when a new session starts
 273 |       setSessionTime(0);
 274 | 
 275 |       // Start the timer
 276 |       timer = setInterval(() => {
 277 |         setSessionTime((prevTime) => prevTime + 1);
 278 |       }, 1000);
 279 |     }
 280 | 
 281 |     return () => {
 282 |       if (timer) clearInterval(timer);
 283 |     };
 284 |   }, [uiState.sessionId]);
 285 | 
 286 |   useEffect(() => {
 287 |     if (
 288 |       uiState.steps.length > 0 &&
 289 |       uiState.steps[uiState.steps.length - 1].tool === "CLOSE"
 290 |     ) {
 291 |       setIsAgentFinished(true);
 292 |     }
 293 |   }, [uiState.sessionId, uiState.steps]);
 294 | 
 295 |   // Watch for isAgentFinished state changes to terminate the session when stop button is clicked
 296 |   useEffect(() => {
 297 |     if (isAgentFinished && uiState.sessionId) {
 298 |       console.log(
 299 |         "Terminating session due to agent finished state:",
 300 |         uiState.sessionId
 301 |       );
 302 | 
 303 |       // Set a flag to prevent further API calls
 304 |       const abortController = new AbortController();
 305 |       // eslint-disable-next-line @typescript-eslint/no-unused-vars
 306 |       const signal = abortController.signal;
 307 | 
 308 |       // Cancel any pending requests
 309 |       abortController.abort();
 310 | 
 311 |       // Wait a short delay to allow any in-progress operations to complete
 312 |       setTimeout(() => {
 313 |         fetch("/api/session", {
 314 |           method: "DELETE",
 315 |           headers: {
 316 |             "Content-Type": "application/json",
 317 |           },
 318 |           body: JSON.stringify({
 319 |             sessionId: uiState.sessionId,
 320 |           }),
 321 |         }).catch((error) => {
 322 |           // Ignore errors during session termination
 323 |           console.log(
 324 |             "Error during session termination (can be ignored):",
 325 |             error
 326 |           );
 327 |         });
 328 |       }, 500);
 329 |     }
 330 |   }, [isAgentFinished, uiState.sessionId]);
 331 | 
 332 |   useEffect(() => {
 333 |     scrollToBottom();
 334 |   }, [uiState.steps, scrollToBottom]);
 335 | 
 336 |   // Add a new function to process a single step
 337 |   const processStep = useCallback(
 338 |     async (
 339 |       stepData: {
 340 |         output: Item[];
 341 |         responseId: string;
 342 |       }[],
 343 |       sessionId?: string,
 344 |       stepNumber = 1
 345 |     ) => {
 346 |       // Ensure stepData is an array before using array methods
 347 |       if (!Array.isArray(stepData)) {
 348 |         console.error("stepData is not an array:", stepData);
 349 |         // Add an error message to the UI
 350 |         const errorStep: BrowserStep = {
 351 |           text: `There was an error processing the request. Please try again.`,
 352 |           reasoning: `API returned invalid data: ${JSON.stringify(stepData)}`,
 353 |           tool: "MESSAGE",
 354 |           instruction: "",
 355 |           stepNumber: stepNumber++,
 356 |         };
 357 | 
 358 |         agentStateRef.current = {
 359 |           ...agentStateRef.current,
 360 |           steps: [...agentStateRef.current.steps, errorStep],
 361 |           isLoading: false,
 362 |         };
 363 | 
 364 |         setUiState((prev) => ({
 365 |           ...prev,
 366 |           steps: agentStateRef.current.steps,
 367 |           isLoading: false,
 368 |         }));
 369 | 
 370 |         setIsWaitingForInput(true);
 371 |         return;
 372 |       }
 373 | 
 374 |       const hasMessage = stepData.find((step) =>
 375 |         step.output.find((item) => item.type === "message")
 376 |       );
 377 |       const hasComputerCall = stepData.find((step) =>
 378 |         step.output.find((item) => item.type === "computer_call")
 379 |       );
 380 |       const hasFunctionCall = stepData.find((step) =>
 381 |         step.output.find((item) => item.type === "function_call")
 382 |       );
 383 | 
 384 |       const messageItem = hasMessage?.output.find(
 385 |         (item) => item.type === "message"
 386 |       );
 387 |       const computerItem = hasComputerCall?.output.find(
 388 |         (item) => item.type === "computer_call"
 389 |       );
 390 |       const functionItem = hasFunctionCall?.output.find(
 391 |         (item) => item.type === "function_call"
 392 |       );
 393 | 
 394 |       // Extract context from message content
 395 |       const contextClues = {
 396 |         website: "",
 397 |         action: "",
 398 |         subject: "",
 399 |         location: "",
 400 |         filter: "",
 401 |         selection: "",
 402 |         goal: "", // The overall user goal
 403 |         lastAction: "", // Keep track of the previous action
 404 |       };
 405 | 
 406 |       // Extract context from message content if available
 407 |       if (
 408 |         messageItem &&
 409 |         messageItem.type === "message" &&
 410 |         messageItem.content
 411 |       ) {
 412 |         // Extract text from content items
 413 |         const messageText =
 414 |           messageItem.content
 415 |             .filter((content) => content.type === "output_text")
 416 |             .map((content) => (content as OutputText).text)
 417 |             .join(" ") || "";
 418 | 
 419 |         // Look for goal statements
 420 |         const goalPatterns = [
 421 |           /(?:I want to|I'd like to|I need to|Can you|Please)\s+([^.?!]+)[.?!]/i,
 422 |           /(?:find|search|look up|tell me|show me)\s+([^.?!]+)[.?!]/i,
 423 |           /(?:what is|how much|how many|where is|when is)\s+([^.?!]+)[?]/i,
 424 |         ];
 425 | 
 426 |         // Extract website names
 427 |         const websitePatterns = [
 428 |           /(?:on|to|using|visit|open|access|browse)\s+([A-Za-z0-9]+(?:\.[A-Za-z0-9]+)+)/i,
 429 |           /([A-Za-z0-9]+(?:\.[A-Za-z0-9]+)+)\s+(?:website|site|page)/i,
 430 |           /(?:website|site|page)\s+([A-Za-z0-9]+(?:\.[A-Za-z0-9]+)+)/i,
 431 |         ];
 432 | 
 433 |         // Extract search terms
 434 |         const searchPatterns = [
 435 |           /(?:search|look|find)(?:\s+for)?\s+([^.,;]+)/i,
 436 |           /searching\s+for\s+([^.,;]+)/i,
 437 |         ];
 438 | 
 439 |         // Extract location information
 440 |         const locationPatterns = [
 441 |           /(?:in|near|at|around)\s+([A-Za-z\s]+(?:City|Town|Village|County|State|Province|District|Area|Region))/i,
 442 |           /location\s+(?:in|near|at|to)\s+([^.,;]+)/i,
 443 |           /([A-Za-z\s]+(?:City|Town|Village|County|State|Province|District|Area|Region))/i,
 444 |         ];
 445 | 
 446 |         // Extract filter information
 447 |         const filterPatterns = [
 448 |           /filter\s+(?:by|for|with)\s+([^.,;]+)/i,
 449 |           /(?:set|adjust|change)\s+(?:the)?\s+([^\s]+)\s+(?:filter|setting|option)\s+(?:to|for)?\s+([^.,;]+)/i,
 450 |         ];
 451 | 
 452 |         // Extract selection information
 453 |         const selectionPatterns = [
 454 |           /(?:select|choose|pick)\s+(?:the)?\s+([^.,;]+)/i,
 455 |           /selecting\s+(?:the)?\s+([^.,;]+)/i,
 456 |         ];
 457 | 
 458 |         // Apply all patterns to extract context
 459 |         for (const pattern of goalPatterns) {
 460 |           const match = messageText.match(pattern);
 461 |           if (match && match[1]) {
 462 |             contextClues.goal = match[1].trim();
 463 |             break;
 464 |           }
 465 |         }
 466 | 
 467 |         for (const pattern of websitePatterns) {
 468 |           const match = messageText.match(pattern);
 469 |           if (match && match[1]) {
 470 |             contextClues.website = match[1].trim();
 471 |             break;
 472 |           }
 473 |         }
 474 | 
 475 |         for (const pattern of searchPatterns) {
 476 |           const match = messageText.match(pattern);
 477 |           if (match && match[1]) {
 478 |             contextClues.subject = match[1].trim();
 479 |             break;
 480 |           }
 481 |         }
 482 | 
 483 |         for (const pattern of locationPatterns) {
 484 |           const match = messageText.match(pattern);
 485 |           if (match && match[1]) {
 486 |             contextClues.location = match[1].trim();
 487 |             break;
 488 |           }
 489 |         }
 490 | 
 491 |         for (const pattern of filterPatterns) {
 492 |           const match = messageText.match(pattern);
 493 |           if (match && match[1]) {
 494 |             contextClues.filter = match[1].trim();
 495 |             if (match[2]) contextClues.filter += " " + match[2].trim();
 496 |             break;
 497 |           }
 498 |         }
 499 | 
 500 |         for (const pattern of selectionPatterns) {
 501 |           const match = messageText.match(pattern);
 502 |           if (match && match[1]) {
 503 |             contextClues.selection = match[1].trim();
 504 |             break;
 505 |           }
 506 |         }
 507 | 
 508 |         // Determine the main action from the message
 509 |         if (messageText.match(/search|find|look/i)) {
 510 |           contextClues.action = "searching";
 511 |         } else if (messageText.match(/select|choose|pick/i)) {
 512 |           contextClues.action = "selecting";
 513 |         } else if (messageText.match(/filter|adjust|set/i)) {
 514 |           contextClues.action = "filtering";
 515 |         } else if (messageText.match(/click|press|tap/i)) {
 516 |           contextClues.action = "clicking";
 517 |         } else if (messageText.match(/type|enter|input|fill/i)) {
 518 |           contextClues.action = "entering";
 519 |         } else if (messageText.match(/scroll|move/i)) {
 520 |           contextClues.action = "scrolling";
 521 |         }
 522 |       }
 523 | 
 524 |       // Create a concise, task-oriented reasoning description
 525 |       const createTaskDescription = (
 526 |         action: Record<string, unknown>,
 527 |         actionType: string
 528 |       ): string => {
 529 |         // Default descriptions based on action type
 530 |         const defaultDescriptions: Record<string, string> = {
 531 |           click: "Clicking on an element",
 532 |           type: "Entering text",
 533 |           keypress: "Pressing keyboard keys",
 534 |           scroll: "Scrolling the page",
 535 |           goto: "Navigating to a website",
 536 |           back: "Going back to previous page",
 537 |           wait: "Waiting for page to load",
 538 |           double_click: "Double-clicking on an element",
 539 |           drag: "Dragging an element",
 540 |           screenshot: "Taking a screenshot",
 541 |           move: "Moving the cursor",
 542 |           message: "Sending a message",
 543 |         };
 544 | 
 545 |         // Get domain from URL for goto actions
 546 |         let domain = "";
 547 |         if (actionType === "goto" && typeof action.url === "string") {
 548 |           try {
 549 |             domain = new URL(action.url).hostname.replace("www.", "");
 550 |           } catch (e: unknown) {
 551 |             // If URL parsing fails, just use the default
 552 |             console.error("Error parsing URL:", e);
 553 |           }
 554 |         }
 555 | 
 556 |         // Create specific descriptions based on context
 557 |         switch (actionType) {
 558 |           case "click":
 559 |             // Try to infer what's being clicked based on common UI patterns
 560 |             const x = typeof action.x === "number" ? action.x : 0;
 561 |             const y = typeof action.y === "number" ? action.y : 0;
 562 | 
 563 |             if (typeof action.x === "number" && typeof action.y === "number") {
 564 |               // Check if clicking in top-left corner (often navigation/menu)
 565 |               if (x < 100 && y < 100) {
 566 |                 return "Opening navigation menu";
 567 |               }
 568 |               // Check if clicking in top-right corner (often account/settings)
 569 |               else if (x > 900 && y < 100) {
 570 |                 return "Accessing account options";
 571 |               }
 572 |               // Check if clicking near bottom of page (often pagination/load more)
 573 |               else if (y > 500) {
 574 |                 return "Loading more content";
 575 |               }
 576 |             }
 577 | 
 578 |             return "Selecting an interactive element";
 579 |           case "type":
 580 |             const text = typeof action.text === "string" ? action.text : "";
 581 |             if (text.includes("@") && text.includes("."))
 582 |               return "Entering email address";
 583 |             if (text.length > 20) return "Entering detailed information";
 584 |             if (/^\d+$/.test(text)) return "Entering numeric value";
 585 |             return text
 586 |               ? `Typing "${text.substring(0, 15)}${
 587 |                   text.length > 15 ? "..." : ""
 588 |                 }"`
 589 |               : defaultDescriptions.type;
 590 |           case "keypress":
 591 |             const keys = Array.isArray(action.keys)
 592 |               ? action.keys.join(", ")
 593 |               : "";
 594 |             if (keys.includes("Enter")) return "Submitting form";
 595 |             if (keys.includes("Tab")) return "Moving to next field";
 596 |             if (keys.includes("Escape")) return "Closing dialog";
 597 |             return defaultDescriptions.keypress;
 598 |           case "scroll":
 599 |             const scrollY =
 600 |               typeof action.scroll_y === "number" ? action.scroll_y : 0;
 601 |             return scrollY > 0
 602 |               ? "Scrolling down to see more results"
 603 |               : "Scrolling up to previous content";
 604 |           case "goto":
 605 |             return domain ? `Accessing ${domain}` : defaultDescriptions.goto;
 606 |           case "back":
 607 |             return "Going back to previous page";
 608 |           case "wait":
 609 |             // Provide more specific wait descriptions
 610 |             if (contextClues.action === "searching") {
 611 |               return `Waiting for search results to load`;
 612 |             } else if (contextClues.website) {
 613 |               return `Waiting for ${contextClues.website} page to load`;
 614 |             } else if (contextClues.subject) {
 615 |               return `Waiting for ${contextClues.subject} content to appear`;
 616 |             }
 617 |             return "Waiting for page to respond";
 618 |           default:
 619 |             // For other action types, try to be more specific based on context
 620 |             if (actionType === "doubleclick" && contextClues.selection) {
 621 |               return `Opening ${contextClues.selection}`;
 622 |             } else if (actionType === "drag" && contextClues.action) {
 623 |               return `Adjusting ${contextClues.action} by dragging`;
 624 |             } else if (actionType === "screenshot") {
 625 |               return "Capturing screenshot of current view";
 626 |             } else if (actionType === "move" && contextClues.action) {
 627 |               return `Positioning cursor for ${contextClues.action}`;
 628 |             }
 629 |             return (
 630 |               defaultDescriptions[actionType] ||
 631 |               `Performing ${actionType} action`
 632 |             );
 633 |         }
 634 |       };
 635 | 
 636 |       if (
 637 |         !hasComputerCall &&
 638 |         !hasFunctionCall &&
 639 |         messageItem &&
 640 |         messageItem.type === "message" &&
 641 |         messageItem.content[0].type === "output_text"
 642 |       ) {
 643 |         const newStep: BrowserStep = {
 644 |           text: messageItem.content?.[0].text || "",
 645 |           reasoning: "Processing message",
 646 |           tool: "MESSAGE",
 647 |           instruction: "",
 648 |           stepNumber: stepNumber++,
 649 |           messageId: messageItem.id,
 650 |         };
 651 | 
 652 |         // Only add the step if we haven't seen this messageId before
 653 |         const isDuplicate = agentStateRef.current.steps.some(
 654 |           (step) =>
 655 |             step.messageId === messageItem.id && messageItem.id !== undefined
 656 |         );
 657 | 
 658 |         if (!isDuplicate) {
 659 |           agentStateRef.current = {
 660 |             ...agentStateRef.current,
 661 |             steps: [...agentStateRef.current.steps, newStep],
 662 |           };
 663 | 
 664 |           setUiState((prev) => ({
 665 |             ...prev,
 666 |             steps: agentStateRef.current.steps,
 667 |           }));
 668 |         }
 669 | 
 670 |         setIsWaitingForInput(true);
 671 |         currentResponseRef.current = {
 672 |           id: stepData[0].responseId,
 673 |         };
 674 | 
 675 |         // Focus the input when it becomes visible
 676 |         if (inputRef.current) {
 677 |           inputRef.current.focus();
 678 |         }
 679 |       } else if (computerItem || functionItem) {
 680 |         if (
 681 |           messageItem &&
 682 |           messageItem.type === "message" &&
 683 |           messageItem.content[0].type === "output_text"
 684 |         ) {
 685 |           const newStep: BrowserStep = {
 686 |             text: messageItem.content?.[0].text || "",
 687 |             reasoning: "Processing message",
 688 |             tool: "MESSAGE",
 689 |             instruction: "",
 690 |             stepNumber: stepNumber++,
 691 |             messageId: messageItem.id,
 692 |           };
 693 | 
 694 |           // Only add the step if we haven't seen this messageId before
 695 |           const isDuplicate = agentStateRef.current.steps.some(
 696 |             (step) =>
 697 |               step.messageId === messageItem.id && messageItem.id !== undefined
 698 |           );
 699 | 
 700 |           if (!isDuplicate) {
 701 |             agentStateRef.current = {
 702 |               ...agentStateRef.current,
 703 |               steps: [...agentStateRef.current.steps, newStep],
 704 |             };
 705 | 
 706 |             setUiState((prev) => ({
 707 |               ...prev,
 708 |               steps: agentStateRef.current.steps,
 709 |             }));
 710 |           }
 711 |         }
 712 |         let actionStep: BrowserStep | null = null;
 713 | 
 714 |         if (computerItem) {
 715 |           const action = computerItem.action;
 716 | 
 717 |           switch (action.type) {
 718 |             case "click":
 719 |               actionStep = {
 720 |                 text: `Clicking at position (${action.x}, ${action.y})`,
 721 |                 reasoning: generateDetailedReasoning(
 722 |                   action,
 723 |                   "click",
 724 |                   contextClues,
 725 |                   createTaskDescription
 726 |                 ),
 727 |                 tool: "CLICK",
 728 |                 instruction: `click(${action.x}, ${action.y})`,
 729 |                 stepNumber: stepNumber++,
 730 |               };
 731 |               break;
 732 |             case "type":
 733 |               actionStep = {
 734 |                 text: `Typing text: "${action.text}"`,
 735 |                 reasoning: generateDetailedReasoning(
 736 |                   action,
 737 |                   "type",
 738 |                   contextClues,
 739 |                   createTaskDescription
 740 |                 ),
 741 |                 tool: "TYPE",
 742 |                 instruction: action.text || "",
 743 |                 stepNumber: stepNumber++,
 744 |               };
 745 |               break;
 746 |             case "keypress":
 747 |               actionStep = {
 748 |                 text: `Pressing keys: ${action.keys?.join(", ")}`,
 749 |                 reasoning: generateDetailedReasoning(
 750 |                   action,
 751 |                   "keypress",
 752 |                   contextClues,
 753 |                   createTaskDescription
 754 |                 ),
 755 |                 tool: "KEYPRESS",
 756 |                 instruction: action.keys?.join(", ") || "",
 757 |                 stepNumber: stepNumber++,
 758 |               };
 759 |               break;
 760 |             case "scroll":
 761 |               actionStep = {
 762 |                 text: `Scrolling by (${action.scroll_x}, ${action.scroll_y})`,
 763 |                 reasoning: generateDetailedReasoning(
 764 |                   action,
 765 |                   "scroll",
 766 |                   contextClues,
 767 |                   createTaskDescription
 768 |                 ),
 769 |                 tool: "SCROLL",
 770 |                 instruction: `scroll(${action.scroll_x}, ${action.scroll_y})`,
 771 |                 stepNumber: stepNumber++,
 772 |               };
 773 |               break;
 774 |             default:
 775 |               // Create more specific text descriptions for different action types
 776 |               let actionText = `Performing ${action.type} action`;
 777 | 
 778 |               if (action.type === "wait") {
 779 |                 actionText = "Waiting for page to respond";
 780 |               } else if (action.type === "double_click") {
 781 |                 actionText = `Double-clicking at position (${action.x || 0}, ${
 782 |                   action.y || 0
 783 |                 })`;
 784 |               } else if (action.type === "drag") {
 785 |                 // Drag has a path array with start and end points
 786 |                 const startPoint = action.path?.[0] || { x: 0, y: 0 };
 787 |                 const endPoint = action.path?.[action.path?.length - 1] || {
 788 |                   x: 0,
 789 |                   y: 0,
 790 |                 };
 791 |                 actionText = `Dragging from (${startPoint.x}, ${startPoint.y}) to (${endPoint.x}, ${endPoint.y})`;
 792 |               } else if (action.type === "screenshot") {
 793 |                 actionText = "Taking screenshot of current page";
 794 |               } else if (action.type === "move") {
 795 |                 actionText = `Moving cursor to position (${action.x || 0}, ${
 796 |                   action.y || 0
 797 |                 })`;
 798 |               }
 799 | 
 800 |               actionStep = {
 801 |                 text: actionText,
 802 |                 reasoning: generateDetailedReasoning(
 803 |                   action,
 804 |                   action.type,
 805 |                   contextClues,
 806 |                   createTaskDescription
 807 |                 ),
 808 |                 tool: action.type.toUpperCase() as unknown as
 809 |                   | "GOTO"
 810 |                   | "ACT"
 811 |                   | "EXTRACT"
 812 |                   | "OBSERVE"
 813 |                   | "CLOSE"
 814 |                   | "WAIT"
 815 |                   | "NAVBACK"
 816 |                   | "MESSAGE"
 817 |                   | "CLICK"
 818 |                   | "TYPE"
 819 |                   | "KEYPRESS"
 820 |                   | "SCROLL"
 821 |                   | "DOUBLECLICK"
 822 |                   | "DRAG"
 823 |                   | "SCREENSHOT"
 824 |                   | "MOVE",
 825 |                 instruction: action.type,
 826 |                 stepNumber: stepNumber++,
 827 |               };
 828 |           }
 829 |         } else if (functionItem) {
 830 |           switch (functionItem.name) {
 831 |             case "back":
 832 |               actionStep = {
 833 |                 text: "Going back to the previous page",
 834 |                 reasoning: generateDetailedReasoning(
 835 |                   {},
 836 |                   "back",
 837 |                   contextClues,
 838 |                   createTaskDescription
 839 |                 ),
 840 |                 tool: "NAVBACK",
 841 |                 instruction: "back()",
 842 |                 stepNumber: stepNumber++,
 843 |               };
 844 |               break;
 845 |             case "goto":
 846 |               const gotoArgs = JSON.parse(functionItem.arguments);
 847 |               actionStep = {
 848 |                 text: `Navigating to ${gotoArgs.url}`,
 849 |                 reasoning: generateDetailedReasoning(
 850 |                   gotoArgs,
 851 |                   "goto",
 852 |                   contextClues,
 853 |                   createTaskDescription
 854 |                 ),
 855 |                 tool: "GOTO",
 856 |                 instruction: `goto(${gotoArgs.url})`,
 857 |                 stepNumber: stepNumber++,
 858 |               };
 859 |               break;
 860 |           }
 861 |         }
 862 |         agentStateRef.current = {
 863 |           ...agentStateRef.current,
 864 |           steps: [
 865 |             ...agentStateRef.current.steps,
 866 |             actionStep ?? {
 867 |               text: "Unknown action",
 868 |               reasoning: "Default action",
 869 |               tool: "ACT",
 870 |               instruction: "",
 871 |               stepNumber: stepNumber++,
 872 |             },
 873 |           ],
 874 |         };
 875 | 
 876 |         setUiState((prev) => ({
 877 |           ...prev,
 878 |           steps: agentStateRef.current.steps,
 879 |         }));
 880 | 
 881 |         // Handle computer call
 882 |         const computerCallResponse = await fetch("/api/cua/step/execute", {
 883 |           method: "POST",
 884 |           headers: {
 885 |             "Content-Type": "application/json",
 886 |           },
 887 |           body: JSON.stringify({
 888 |             sessionId,
 889 |             output: hasComputerCall ?? hasFunctionCall,
 890 |           }),
 891 |         });
 892 | 
 893 |         const computerCallData: (
 894 |           | Message
 895 |           | FunctionOutput
 896 |           | ComputerCallOutput
 897 |         )[] = await computerCallResponse.json();
 898 | 
 899 |         const nextStepResponse = await fetch("/api/cua/step/generate", {
 900 |           method: "POST",
 901 |           headers: {
 902 |             "Content-Type": "application/json",
 903 |           },
 904 |           body: JSON.stringify({
 905 |             sessionId,
 906 |             input: computerCallData,
 907 |             responseId: stepData[0]?.responseId || null,
 908 |           }),
 909 |         });
 910 | 
 911 |         // abort here if generate fails
 912 |         if (!nextStepResponse.ok) {
 913 |           console.error("API error:", nextStepResponse);
 914 |           return;
 915 |         }
 916 | 
 917 |         const responseData = await nextStepResponse.json();
 918 | 
 919 |         // Log error if we got an invalid response
 920 |         if (!Array.isArray(responseData)) {
 921 |           console.error("API returned non-array data:", responseData);
 922 |         }
 923 | 
 924 |         // Ensure nextStepData is always an array
 925 |         const nextStepData = Array.isArray(responseData) ? responseData : [];
 926 | 
 927 |         // Handle reasoning-only responses by adding a message item if needed
 928 |         if (
 929 |           nextStepData[0]?.output?.length === 1 &&
 930 |           nextStepData[0]?.output[0]?.type === "reasoning"
 931 |         ) {
 932 |           console.log("Detected reasoning-only response, adding message item");
 933 |           // Add a message item to ensure the reasoning is followed by another item
 934 |           nextStepData[0].output.push({
 935 |             id: `msg_fallback_${nextStepData[0]?.responseId || "default"}`,
 936 |             type: "message",
 937 |             role: "assistant",
 938 |             content: [
 939 |               {
 940 |                 type: "output_text",
 941 |                 text: "I'll continue with the task.",
 942 |                 annotations: [],
 943 |               },
 944 |             ],
 945 |           });
 946 |         }
 947 | 
 948 |         currentResponseRef.current = {
 949 |           id: nextStepData[0]?.responseId || null,
 950 |         };
 951 | 
 952 |         // Process the next step recursively - ensure nextStepData is an array first
 953 |         if (Array.isArray(nextStepData)) {
 954 |           return processStep(nextStepData, sessionId, stepNumber);
 955 |         } else {
 956 |           console.error("stepData is not an array:", nextStepData);
 957 |           // Return gracefully instead of causing an error
 958 |           return;
 959 |         }
 960 |       } else {
 961 |         console.log("No message or computer call output");
 962 |         console.log("messageItem", messageItem);
 963 |         console.log("computerItem", computerItem);
 964 |       }
 965 |     },
 966 |     []
 967 |   );
 968 | 
 969 |   // Update the handleUserInput function
 970 |   const handleUserInput = useCallback(
 971 |     async (input: string) => {
 972 |       if (!input.trim()) return;
 973 | 
 974 |       // Add user message to chat
 975 |       const userStep: BrowserStep = {
 976 |         text: input,
 977 |         reasoning: "User input",
 978 |         tool: "MESSAGE",
 979 |         instruction: "",
 980 |         stepNumber: agentStateRef.current.steps.length + 1,
 981 |       };
 982 | 
 983 |       agentStateRef.current = {
 984 |         ...agentStateRef.current,
 985 |         steps: [...agentStateRef.current.steps, userStep],
 986 |       };
 987 | 
 988 |       setUiState((prev) => ({
 989 |         ...prev,
 990 |         steps: agentStateRef.current.steps,
 991 |       }));
 992 | 
 993 |       setIsWaitingForInput(false);
 994 | 
 995 |       setUserInput("");
 996 | 
 997 |       try {
 998 |         // Continue the conversation
 999 |         const nextStepResponse = await fetch("/api/cua/step/generate", {
1000 |           // Add retry logic for API errors
1001 |           signal: AbortSignal.timeout(15000), // 15 second timeout
1002 |           method: "POST",
1003 |           headers: {
1004 |             "Content-Type": "application/json",
1005 |           },
1006 |           body: JSON.stringify({
1007 |             sessionId: agentStateRef.current.sessionId,
1008 |             responseId: currentResponseRef.current?.id,
1009 |             input: [
1010 |               {
1011 |                 role: "user",
1012 |                 content: input,
1013 |               },
1014 |             ],
1015 |           }),
1016 |         });
1017 | 
1018 |         const responseData = await nextStepResponse.json();
1019 | 
1020 |         // Ensure nextStepData is always an array
1021 |         const nextStepData = Array.isArray(responseData) ? responseData : [];
1022 | 
1023 |         // Log error if we got an invalid response
1024 |         if (!Array.isArray(responseData)) {
1025 |           console.error("API returned non-array data:", responseData);
1026 |         }
1027 | 
1028 |         // Handle reasoning-only responses by adding a message item if needed
1029 |         if (
1030 |           nextStepData[0]?.output?.length === 1 &&
1031 |           nextStepData[0]?.output[0]?.type === "reasoning"
1032 |         ) {
1033 |           console.log("Detected reasoning-only response, adding message item");
1034 |           // Add a message item to ensure the reasoning is followed by another item
1035 |           nextStepData[0].output.push({
1036 |             id: `msg_fallback_${nextStepData[0]?.responseId || "default"}`,
1037 |             type: "message",
1038 |             role: "assistant",
1039 |             content: [
1040 |               {
1041 |                 type: "output_text",
1042 |                 text: "I'll help you with that task.",
1043 |                 annotations: [],
1044 |               },
1045 |             ],
1046 |           });
1047 |         }
1048 | 
1049 |         currentResponseRef.current = {
1050 |           id: nextStepData[0].responseId,
1051 |         };
1052 | 
1053 |         const stepNumber = agentStateRef.current.steps.length + 1;
1054 | 
1055 |         if (agentStateRef.current.sessionId) {
1056 |           // Process the next step recursively
1057 |           return processStep(
1058 |             nextStepData,
1059 |             agentStateRef.current.sessionId,
1060 |             stepNumber
1061 |           );
1062 |         }
1063 |       } catch (error) {
1064 |         console.error("Error handling user input:", error);
1065 | 
1066 |         // Check if this is a reasoning item error
1067 |         if (
1068 |           error instanceof Error &&
1069 |           (error.message.includes("reasoning") ||
1070 |             error.message.includes("without its required following item"))
1071 |         ) {
1072 |           console.log(
1073 |             "Handling reasoning item error, retrying with modified request"
1074 |           );
1075 |           try {
1076 |             // Try again with a more specific instruction
1077 |             const retryResponse = await fetch("/api/cua/step/generate", {
1078 |               method: "POST",
1079 |               headers: {
1080 |                 "Content-Type": "application/json",
1081 |               },
1082 |               body: JSON.stringify({
1083 |                 sessionId: agentStateRef.current.sessionId,
1084 |                 responseId: currentResponseRef.current?.id,
1085 |                 input: [
1086 |                   {
1087 |                     role: "user",
1088 |                     content: input + " Please take a specific action.",
1089 |                   },
1090 |                 ],
1091 |               }),
1092 |             });
1093 | 
1094 |             if (!retryResponse.ok) {
1095 |               throw new Error(`API error: ${retryResponse.status}`);
1096 |             }
1097 | 
1098 |             const retryData = await retryResponse.json();
1099 | 
1100 |             // If we still have a reasoning-only response, add a message item
1101 |             if (
1102 |               retryData[0]?.output?.length === 1 &&
1103 |               retryData[0]?.output[0]?.type === "reasoning"
1104 |             ) {
1105 |               console.log(
1106 |                 "Still got reasoning-only response, adding message item"
1107 |               );
1108 |               // Add a message item to ensure reasoning is followed by another item
1109 |               retryData[0].output.push({
1110 |                 id: `msg_fallback_${retryData[0]?.responseId || "default"}`,
1111 |                 type: "message",
1112 |                 role: "assistant",
1113 |                 content: [
1114 |                   {
1115 |                     type: "output_text",
1116 |                     text: "I'll help you with that task.",
1117 |                     annotations: [],
1118 |                   },
1119 |                 ],
1120 |               });
1121 |             }
1122 | 
1123 |             currentResponseRef.current = {
1124 |               id: retryData[0].responseId,
1125 |             };
1126 | 
1127 |             const stepNumber = agentStateRef.current.steps.length + 1;
1128 | 
1129 |             if (agentStateRef.current.sessionId) {
1130 |               // Process the retry step
1131 |               return processStep(
1132 |                 retryData,
1133 |                 agentStateRef.current.sessionId,
1134 |                 stepNumber
1135 |               );
1136 |             }
1137 |           } catch (retryError) {
1138 |             console.error("Error during retry:", retryError);
1139 |             // Fall through to the default error handling
1140 |           }
1141 |         }
1142 | 
1143 |         // Default error handling
1144 |         const errorStep: BrowserStep = {
1145 |           text: "Sorry, there was an error processing your request. Please try again.",
1146 |           reasoning: "Error handling user input",
1147 |           tool: "MESSAGE",
1148 |           instruction: "",
1149 |           stepNumber: agentStateRef.current.steps.length + 1,
1150 |         };
1151 | 
1152 |         agentStateRef.current = {
1153 |           ...agentStateRef.current,
1154 |           steps: [...agentStateRef.current.steps, errorStep],
1155 |         };
1156 | 
1157 |         setUiState((prev) => ({
1158 |           ...prev,
1159 |           steps: agentStateRef.current.steps,
1160 |         }));
1161 | 
1162 |         setUserInput("");
1163 | 
1164 |         setIsWaitingForInput(true);
1165 |         return null;
1166 |       }
1167 |     },
1168 |     [processStep]
1169 |   );
1170 | 
1171 |   // Add currentResponseRef to store the current response
1172 |   const currentResponseRef = useRef<{ id: string } | null>(null);
1173 | 
1174 |   // Update the initialization function
1175 |   useEffect(() => {
1176 |     console.log("useEffect called");
1177 |     const initializeSession = async () => {
1178 |       if (initializationRef.current) return;
1179 |       initializationRef.current = true;
1180 | 
1181 |       if (initialMessage && !agentStateRef.current.sessionId) {
1182 |         setIsLoading(true);
1183 |         try {
1184 |           const sessionResponse = await fetch("/api/session", {
1185 |             method: "POST",
1186 |             headers: {
1187 |               "Content-Type": "application/json",
1188 |             },
1189 |             body: JSON.stringify({
1190 |               timezone: Intl.DateTimeFormat().resolvedOptions().timeZone,
1191 |             }),
1192 |           });
1193 |           const sessionData = await sessionResponse.json();
1194 | 
1195 |           if (!sessionData.success) {
1196 |             throw new Error(sessionData.error || "Failed to create session");
1197 |           }
1198 | 
1199 |           agentStateRef.current = {
1200 |             ...agentStateRef.current,
1201 |             sessionId: sessionData.sessionId,
1202 |             sessionUrl: sessionData.sessionUrl,
1203 |             connectUrl: sessionData.connectUrl,
1204 |           };
1205 | 
1206 |           setUiState({
1207 |             sessionId: sessionData.sessionId,
1208 |             sessionUrl: sessionData.sessionUrl,
1209 |             connectUrl: sessionData.connectUrl,
1210 |             steps: [],
1211 |           });
1212 | 
1213 |           // Start the cua session
1214 |           const startResponse = await fetch("/api/cua/start", {
1215 |             method: "POST",
1216 |             headers: {
1217 |               "Content-Type": "application/json",
1218 |             },
1219 |             body: JSON.stringify({
1220 |               sessionId: sessionData.sessionId,
1221 |               userInput: initialMessage,
1222 |             }),
1223 |           });
1224 | 
1225 |           const responseData = await startResponse.json();
1226 | 
1227 |           posthog.capture("cua_start", {
1228 |             goal: initialMessage,
1229 |             sessionId: sessionData.sessionId,
1230 |           });
1231 | 
1232 |           // Ensure startData is always an array
1233 |           const startData = Array.isArray(responseData) ? responseData : [];
1234 | 
1235 |           // Log error if we got an invalid response
1236 |           if (!Array.isArray(responseData)) {
1237 |             console.error(
1238 |               "API returned non-array data from /api/cua/start:",
1239 |               responseData
1240 |             );
1241 |           }
1242 | 
1243 |           if (startData.length > 0) {
1244 |             const stepNumber = 1;
1245 | 
1246 |             // Process the first step and continue with subsequent steps
1247 |             await processStep(startData, sessionData.sessionId, stepNumber);
1248 |           }
1249 |         } catch (error) {
1250 |           console.error("Session initialization error:", error);
1251 |         } finally {
1252 |           setIsLoading(false);
1253 |         }
1254 |       }
1255 |     };
1256 | 
1257 |     initializeSession();
1258 |   }, [initialMessage, handleUserInput, processStep]);
1259 | 
1260 |   // Spring configuration for smoother animations
1261 |   const springConfig = {
1262 |     type: "spring",
1263 |     stiffness: 350,
1264 |     damping: 30,
1265 |   };
1266 | 
1267 |   const containerVariants = {
1268 |     hidden: { opacity: 0, scale: 0.95 },
1269 |     visible: {
1270 |       opacity: 1,
1271 |       scale: 1,
1272 |       transition: {
1273 |         ...springConfig,
1274 |         staggerChildren: 0.1,
1275 |       },
1276 |     },
1277 |     exit: {
1278 |       opacity: 0,
1279 |       scale: 0.95,
1280 |       transition: { duration: 0.2 },
1281 |     },
1282 |   };
1283 | 
1284 |   const messageVariants = {
1285 |     hidden: { opacity: 0, y: 20 },
1286 |     visible: { opacity: 1, y: 0 },
1287 |     exit: { opacity: 0, y: -20 },
1288 |   };
1289 | 
1290 |   return (
1291 |     <motion.div
1292 |       className="min-h-screen bg-gray-50 flex flex-col"
1293 |       variants={containerVariants}
1294 |       initial="hidden"
1295 |       animate="visible"
1296 |       exit="exit"
1297 |     >
1298 |       <motion.nav
1299 |         className="flex justify-between items-center px-4 pt-4 sm:px-8 sm:py-4 bg-white sm:border-b border-[#CAC8C7] shadow-sm relative z-10"
1300 |         initial={{ y: -20, opacity: 0 }}
1301 |         animate={{ y: 0, opacity: 1 }}
1302 |         transition={{ delay: 0.2 }}
1303 |         style={{
1304 |           backgroundColor: "#ffffff",
1305 |         }}
1306 |       >
1307 |         <div className="flex items-center gap-2">
1308 |           <a
1309 |             href="https://browserbase.com/computer-use"
1310 |             target="_blank"
1311 |             rel="noopener noreferrer"
1312 |             className="flex items-center gap-3 hover:opacity-90 transition-opacity duration-200"
1313 |           >
1314 |             <Image
1315 |               src="/favicon.svg"
1316 |               alt="CUA Browser"
1317 |               className="w-8 h-8"
1318 |               width={32}
1319 |               height={32}
1320 |             />
1321 |             <span className="font-ppsupply text-xl font-bold text-[#100D0D]">
1322 |               CUA Browser
1323 |             </span>
1324 |           </a>
1325 |         </div>
1326 |         <div className="flex items-center gap-2">
1327 |           <a
1328 |             href="https://browserbase.com/computer-use"
1329 |             target="_blank"
1330 |             rel="noopener noreferrer"
1331 |           >
1332 |             <button className=" flex items-center justify-center px-3 py-2 bg-white gap-1 text-sm font-medium border border-[#F14A1C] transition-all duration-200 hover:bg-[#F14A1C] group h-full">
1333 |               <Layers
1334 |                 size={20}
1335 |                 className="sm:mr-2 text-[#F14A1C] group-hover:text-white transition-colors duration-200"
1336 |                 strokeWidth={2}
1337 |                 strokeLinecap="square"
1338 |                 strokeLinejoin="miter"
1339 |               />
1340 |               <span className="hidden sm:inline text-[#F14A1C] group-hover:text-white transition-colors  duration-200">
1341 |                 Deploy
1342 |               </span>
1343 |             </button>
1344 |           </a>
1345 |           <motion.button
1346 |             onClick={onClose}
1347 |             className="flex items-center justify-center px-3 py-2 bg-[#F6F5F5] gap-1 text-sm font-medium border border-[#CAC8C7] transition-all duration-200 hover:bg-gray-100 h-full"
1348 |             whileTap={{ scale: 0.98 }}
1349 |           >
1350 |             <span className="flex items-center text-[#10100D]">
1351 |               Close
1352 |               {!isMobile && (
1353 |                 <kbd className="px-1.5 text-xs bg-gray-100 ml-2 border border-[#CAC8C7]">
1354 |                   ESC
1355 |                 </kbd>
1356 |               )}
1357 |             </span>
1358 |           </motion.button>
1359 |         </div>
1360 |       </motion.nav>
1361 |       <main
1362 |         className="flex-1 flex flex-col items-center sm:p-4 md:p-6 relative overflow-hidden"
1363 |         style={{ backgroundColor: "#FCFCFC" }}
1364 |       >
1365 |         <div
1366 |           className="absolute inset-0 z-0 overflow-hidden pointer-events-none"
1367 |           style={{
1368 |             backgroundImage: "url(/grid.svg)",
1369 |             backgroundSize: "25%",
1370 |             backgroundPosition: "center",
1371 |             backgroundRepeat: "repeat",
1372 |             opacity: 0.8,
1373 |             position: "fixed",
1374 |           }}
1375 |         ></div>
1376 |         <motion.div
1377 |           className="w-full max-w-[1600px] bg-white md:border border-[#CAC8C7] shadow-sm overflow-hidden mx-auto relative z-10"
1378 |           style={{ height: isMobile ? "calc(100vh - 56px)" : "auto" }}
1379 |           initial={{ y: 20, opacity: 0 }}
1380 |           animate={{ y: 0, opacity: 1 }}
1381 |           transition={{ delay: 0.3 }}
1382 |         >
1383 |           <div className="flex flex-col md:flex-row h-full overflow-hidden">
1384 |             {/* Main browser area */}
1385 |             <div className="w-full md:flex-[2] gap-y-2 p-4 md:p-6 md:border-l border-[#CAC8C7] order-first md:order-last flex flex-col items-center justify-center sticky top-0 z-20 bg-white">
1386 |               {/* Tabs */}
1387 |               {!isAgentFinished && uiState.sessionId && (
1388 |                 <BrowserTabs
1389 |                   sessionId={uiState.sessionId}
1390 |                   activePage={activePage}
1391 |                   setActivePage={setActivePage}
1392 |                 />
1393 |               )}
1394 | 
1395 |               <BrowserSessionContainer
1396 |                 sessionUrl={activePageUrl}
1397 |                 isVisible={true}
1398 |                 isCompleted={isAgentFinished}
1399 |                 initialMessage={initialMessage}
1400 |                 sessionTime={sessionTime}
1401 |                 onStop={() => setIsAgentFinished(true)}
1402 |                 onRestart={onClose}
1403 |               />
1404 | 
1405 |               {!isAgentFinished && (
1406 |                 <div className="mt-4 md:hidden flex justify-center items-center space-x-1 text-sm text-[#2E191E]">
1407 |                   <SessionControls
1408 |                     sessionTime={sessionTime}
1409 |                     onStop={() => setIsAgentFinished(true)}
1410 |                   />
1411 |                 </div>
1412 |               )}
1413 |             </div>
1414 | 
1415 |             {/* Chat sidebar */}
1416 |             <div
1417 |               className="w-full md:w-[450px] min-w-0 md:min-w-[360px] px-4 pb-4 md:p-6 flex flex-col flex-1 overflow-hidden"
1418 |               style={{
1419 |                 height: isMobile
1420 |                   ? "calc(100vh - 300px)"
1421 |                   : "calc(100vh - 12rem)",
1422 |                 position: "relative",
1423 |               }}
1424 |             >
1425 |               {/* Pinned Goal Message */}
1426 |               {initialMessage && (
1427 |                 <div className="relative">
1428 |                   {/* Blur effect behind the goal message */}
1429 |                   <div
1430 |                     className="absolute pointer-events-none"
1431 |                     style={{
1432 |                       background: "rgba(245, 240, 255, 0.4)",
1433 |                       filter: "blur(20px)",
1434 |                       width: "130%",
1435 |                       height: "130%",
1436 |                       left: "-15%",
1437 |                       right: "-15%",
1438 |                       top: "-15%",
1439 |                       bottom: "-15%",
1440 |                       zIndex: 1,
1441 |                       borderRadius: "12px",
1442 |                     }}
1443 |                   ></div>
1444 |                   <motion.div
1445 |                     variants={messageVariants}
1446 |                     className={`p-4 font-ppsupply sticky top-0 z-10 w-full ${
1447 |                       !isScrolled ? "mb-4" : ""
1448 |                     }`}
1449 |                     style={{
1450 |                       backgroundColor: "rgba(245, 240, 255, 0.75)",
1451 |                       backdropFilter: "blur(8px)",
1452 |                       border: "1px solid #CAC8C7",
1453 |                       width: "100%",
1454 |                       maxWidth: "100%",
1455 |                       marginLeft: 0,
1456 |                       marginRight: 0,
1457 |                       position: "relative",
1458 |                       zIndex: 2,
1459 |                     }}
1460 |                   >
1461 |                     <div
1462 |                       className="absolute pointer-events-none"
1463 |                       style={{
1464 |                         background:
1465 |                           "linear-gradient(to bottom, rgba(245, 240, 255, 0.85), rgba(245, 240, 255, 0))",
1466 |                         opacity: 0.6,
1467 |                         filter: "blur(2px)",
1468 |                         width: "150%",
1469 |                         height: "32px",
1470 |                         left: "-25%",
1471 |                         right: "-25%",
1472 |                         bottom: "-24px",
1473 |                         zIndex: 0,
1474 |                       }}
1475 |                     ></div>
1476 | 
1477 |                     <div className="absolute right-2">
1478 |                       <Pin
1479 |                         color="#2E191E"
1480 |                         size={17}
1481 |                         strokeWidth={2}
1482 |                         style={{ transform: "rotate(30deg)" }}
1483 |                       />
1484 |                     </div>
1485 |                     <p className="font-semibold pr-6">Goal:</p>
1486 | 
1487 |                     <p className="break-words overflow-hidden text-ellipsis max-w-full">
1488 |                       {initialMessage}
1489 |                     </p>
1490 |                   </motion.div>
1491 |                 </div>
1492 |               )}
1493 | 
1494 |               <div
1495 |                 ref={chatContainerRef}
1496 |                 className="flex-1 overflow-y-auto overflow-x-hidden space-y-4 hide-scrollbar"
1497 |                 style={{
1498 |                   height: isMobile
1499 |                     ? "calc(100vh - 400px)"
1500 |                     : "calc(100% - 100px)",
1501 |                   flex: "1 1 auto",
1502 |                   position: "relative",
1503 |                 }}
1504 |               >
1505 |                 {uiState.steps.map((step, index) => {
1506 |                   // Determine if this is a system message (like stock price info)
1507 |                   const isSystemMessage =
1508 |                     step.tool === "MESSAGE" &&
1509 |                     step.reasoning === "Processing message";
1510 |                   // Determine if this is a user input message
1511 |                   const isUserInput =
1512 |                     step.tool === "MESSAGE" && step.reasoning === "User input";
1513 |                   return (
1514 |                     <motion.div
1515 |                       key={index}
1516 |                       variants={messageVariants}
1517 |                       className={`p-4 ${
1518 |                         isUserInput
1519 |                           ? "bg-white"
1520 |                           : isSystemMessage
1521 |                           ? "bg-[#2E191E] text-white"
1522 |                           : "bg-[#FCFCFC]"
1523 |                       } border border-[#B3B1B0] font-ppsupply space-y-2`}
1524 |                     >
1525 |                       <div className="flex justify-between items-center">
1526 |                         {/* Step number */}
1527 |                         <span
1528 |                           className={`text-sm ${
1529 |                             isSystemMessage
1530 |                               ? "text-[gray-200]"
1531 |                               : "text-[#2E191E]"
1532 |                           }`}
1533 |                         >
1534 |                           Step {step.stepNumber}
1535 |                         </span>
1536 |                         {/* Tool name */}
1537 |                         <span
1538 |                           className={`px-2 py-1 ${
1539 |                             isSystemMessage
1540 |                               ? " text-gray-200"
1541 |                               : " text-white-200"
1542 |                           } border border-[#CAC8C7] text-xs`}
1543 |                         >
1544 |                           {step.tool}
1545 |                         </span>
1546 |                       </div>
1547 |                       <div className="font-medium">
1548 |                         {isSystemMessage && step.tool === "MESSAGE" ? (
1549 |                           <>
1550 |                             {(() => {
1551 |                               // Check if this is a message with a question
1552 |                               if (step.text.includes("?")) {
1553 |                                 // Find all sentences that end with a question mark
1554 |                                 const sentences = step.text.match(
1555 |                                   /[^.!?]+[.!?]+/g
1556 |                                 ) || [step.text];
1557 | 
1558 |                                 // Separate questions from non-questions
1559 |                                 const questions = sentences.filter((s) =>
1560 |                                   s.trim().endsWith("?")
1561 |                                 );
1562 |                                 const nonQuestions = sentences.filter(
1563 |                                   (s) => !s.trim().endsWith("?")
1564 |                                 );
1565 | 
1566 |                                 // Join non-questions as the answer
1567 |                                 const answerText = nonQuestions
1568 |                                   .join(" ")
1569 |                                   .trim();
1570 | 
1571 |                                 // Join questions as the question
1572 |                                 const questionText = questions.join(" ").trim();
1573 | 
1574 |                                 // Check if the entire message is just a question
1575 |                                 // eslint-disable-next-line @typescript-eslint/no-unused-vars
1576 |                                 const isOnlyQuestion =
1577 |                                   step.text.trim() === questionText;
1578 | 
1579 |                                 // Extract answer content from the message or find it in previous steps
1580 |                                 let displayAnswerText = answerText;
1581 | 
1582 |                                 // If there's no answer content but there is a question
1583 |                                 if (!displayAnswerText && questionText) {
1584 |                                   // First, check if this step has a specific answer marker
1585 |                                   if (step.text.includes("ANSWER:")) {
1586 |                                     const answerParts =
1587 |                                       step.text.split("ANSWER:");
1588 |                                     if (answerParts.length > 1) {
1589 |                                       // Extract the text after "ANSWER:" and before any "QUESTION" marker
1590 |                                       let extractedAnswer =
1591 |                                         answerParts[1].trim();
1592 |                                       if (
1593 |                                         extractedAnswer.includes("QUESTION")
1594 |                                       ) {
1595 |                                         extractedAnswer = extractedAnswer
1596 |                                           .split("QUESTION")[0]
1597 |                                           .trim();
1598 |                                       }
1599 |                                       if (extractedAnswer) {
1600 |                                         displayAnswerText = extractedAnswer;
1601 |                                       }
1602 |                                     }
1603 |                                   }
1604 | 
1605 |                                   // If we still don't have an answer, look for the first message step
1606 |                                   if (!displayAnswerText) {
1607 |                                     // Look for relevant information in previous steps
1608 |                                     const previousSteps = uiState.steps.slice(
1609 |                                       0,
1610 |                                       index
1611 |                                     );
1612 | 
1613 |                                     // Find the first informative MESSAGE step that's not a question
1614 |                                     const infoStep = previousSteps.find(
1615 |                                       (s) =>
1616 |                                         s.tool === "MESSAGE" &&
1617 |                                         s.text &&
1618 |                                         !s.text.includes("?") && // Not a question
1619 |                                         s.text.length > 10
1620 |                                     );
1621 | 
1622 |                                     if (infoStep) {
1623 |                                       // Use the content from the informative step
1624 |                                       displayAnswerText = infoStep.text;
1625 |                                     } else {
1626 |                                       // Default message if no relevant info found
1627 |                                       displayAnswerText =
1628 |                                         "I'm currently searching for this information. The results will be displayed here when available.";
1629 |                                     }
1630 |                                   }
1631 |                                 } else if (!displayAnswerText) {
1632 |                                   // For other cases with no answer content
1633 |                                   displayAnswerText = step.text;
1634 |                                 }
1635 | 
1636 |                                 // Only render the answer part in this message block
1637 |                                 return (
1638 |                                   <div className="mb-3">
1639 |                                     <div className="text-xs font-semibold text-gray-200 mb-1">
1640 |                                       ANSWER:
1641 |                                     </div>
1642 |                                     <div className="p-2">
1643 |                                       <span>{displayAnswerText}</span>
1644 |                                     </div>
1645 |                                   </div>
1646 |                                 );
1647 |                               } else {
1648 |                                 // For regular messages without questions, format them as answers
1649 |                                 return (
1650 |                                   <div className="mb-3">
1651 |                                     {/* <div className="text-xs font-semibold text-gray-200 mb-1">
1652 |                                       ANSWER:
1653 |                                     </div> */}
1654 |                                     <div className="p-2 ">
1655 |                                       <span>{step.text}</span>
1656 |                                     </div>
1657 |                                   </div>
1658 |                                 );
1659 |                               }
1660 |                             })()}
1661 |                           </>
1662 |                         ) : (
1663 |                           step.text
1664 |                         )}
1665 |                       </div>
1666 |                       {/* Show reasoning for all steps except the last one */}
1667 |                       {(!isSystemMessage ||
1668 |                         index < uiState.steps.length - 1) && (
1669 |                         <p className="text-sm text-white-200">
1670 |                           <span className="font-semibold">Reasoning: </span>
1671 |                           {step.reasoning}
1672 |                         </p>
1673 |                       )}
1674 |                     </motion.div>
1675 |                   );
1676 |                 })}
1677 | 
1678 |                 {/* Add a separate question message if the last message had a question */}
1679 |                 {uiState.steps.length > 0 &&
1680 |                   (() => {
1681 |                     const lastStep = uiState.steps[uiState.steps.length - 1];
1682 |                     if (
1683 |                       lastStep.tool === "MESSAGE" &&
1684 |                       lastStep.text.includes("?")
1685 |                     ) {
1686 |                       // Find all sentences that end with a question mark
1687 |                       const sentences = lastStep.text.match(
1688 |                         /[^.!?]+[.!?]+/g
1689 |                       ) || [lastStep.text];
1690 | 
1691 |                       // Extract questions
1692 |                       const questions = sentences.filter((s) =>
1693 |                         s.trim().endsWith("?")
1694 |                       );
1695 |                       const questionText = questions.join(" ").trim();
1696 | 
1697 |                       // Check if the entire message is just a question
1698 |                       // eslint-disable-next-line @typescript-eslint/no-unused-vars
1699 |                       const isOnlyQuestion =
1700 |                         lastStep.text.trim() === questionText;
1701 | 
1702 |                       if (questionText) {
1703 |                         return (
1704 |                           <motion.div
1705 |                             variants={messageVariants}
1706 |                             initial={{ opacity: 0, y: 10 }}
1707 |                             animate={{ opacity: 1, y: 0 }}
1708 |                             transition={{ delay: 0.5, duration: 0.3 }}
1709 |                             className={`p-4 bg-[#2E191E] text-white font-ppsupply space-y-2 mt-2`}
1710 |                           >
1711 |                             <div className="flex justify-between items-center">
1712 |                               {/* <span className="text-sm text-gray-200">
1713 |                               {isOnlyQuestion ? "Question" : "Follow-up"}
1714 |                             </span> */}
1715 |                               {/* <span className="px-2 py-1 text-gray-200 rounded text-xs">
1716 |                               QUESTION
1717 |                             </span> */}
1718 |                             </div>
1719 |                             <div className="font-medium">
1720 |                               <div className="p-2 border-l-2 ">
1721 |                                 <span>{questionText}</span>
1722 |                               </div>
1723 |                             </div>
1724 |                           </motion.div>
1725 |                         );
1726 |                       }
1727 |                     }
1728 |                     return null;
1729 |                   })()}
1730 |               </div>
1731 | 
1732 |               {/* Chat Input */}
1733 |               {isWaitingForInput && !isAgentFinished && (
1734 |                 <motion.form
1735 |                   initial={{ opacity: 0, y: 20 }}
1736 |                   animate={{ opacity: 1, y: 0 }}
1737 |                   transition={{ type: "spring", stiffness: 300, damping: 30 }}
1738 |                   onAnimationComplete={() => {
1739 |                     // Focus input when animation completes
1740 |                     if (inputRef.current) {
1741 |                       inputRef.current.focus();
1742 |                       console.log("Animation complete, focusing input");
1743 |                     }
1744 |                   }}
1745 |                   onSubmit={async (e) => {
1746 |                     e.preventDefault();
1747 |                     if (
1748 |                       ["quit", "exit", "bye"].includes(userInput.toLowerCase())
1749 |                     ) {
1750 |                       setIsAgentFinished(true);
1751 |                       return;
1752 |                     }
1753 |                     await handleUserInput(userInput);
1754 |                   }}
1755 |                   className="mt-4 flex gap-2 w-full"
1756 |                 >
1757 |                   <input
1758 |                     ref={inputRef}
1759 |                     type="text"
1760 |                     value={userInput}
1761 |                     onChange={(e) => setUserInput(e.target.value)}
1762 |                     placeholder="Type your message..."
1763 |                     className="flex-1 px-2 sm:px-4 py-2 border focus:outline-none focus:ring-1 focus:ring-[#FF3B00] focus:border-transparent font-ppsupply transition-all text-sm sm:text-base"
1764 |                     style={{
1765 |                       // backgroundColor: "rgba(245, 240, 255, 0.75)",
1766 |                       backdropFilter: "blur(8px)",
1767 |                       borderColor: "rgba(255, 59, 0, 0.5)",
1768 |                       borderWidth: "2px",
1769 |                     }}
1770 |                   />
1771 |                   <button
1772 |                     type="submit"
1773 |                     disabled={!userInput.trim()}
1774 |                     className="px-2 sm:px-4 py-2 bg-[#FF3B00] text-white font-ppsupply disabled:opacity-50 disabled:cursor-not-allowed hover:bg-[#E63500] transition-colors text-sm sm:text-base whitespace-nowrap"
1775 |                   >
1776 |                     Send
1777 |                   </button>
1778 |                 </motion.form>
1779 |               )}
1780 |             </div>
1781 |           </div>
1782 |         </motion.div>
1783 |       </main>
1784 |     </motion.div>
1785 |   );
1786 | }
1787 | 


--------------------------------------------------------------------------------
/app/components/PosthogProvider.tsx:
--------------------------------------------------------------------------------
 1 | "use client";
 2 | 
 3 | import posthog from "posthog-js";
 4 | import { PostHogProvider as PHProvider } from "posthog-js/react";
 5 | import { useEffect } from "react";
 6 | 
 7 | export function PostHogProvider({ children }: { children: React.ReactNode }) {
 8 |   useEffect(() => {
 9 |     try {
10 |       if (
11 |         typeof window === "undefined" ||
12 |         !process.env.NEXT_PUBLIC_POSTHOG_KEY ||
13 |         !process.env.NEXT_PUBLIC_POSTHOG_HOST
14 |       )
15 |         return;
16 |       posthog.init(process.env.NEXT_PUBLIC_POSTHOG_KEY, {
17 |         api_host: process.env.NEXT_PUBLIC_POSTHOG_HOST,
18 |         person_profiles: "always",
19 |       });
20 |     } catch (e) {
21 |       console.error(e);
22 |     }
23 |   }, []);
24 | 
25 |   return <PHProvider client={posthog}>{children}</PHProvider>;
26 | }
27 | 


--------------------------------------------------------------------------------
/app/components/SessionControls.tsx:
--------------------------------------------------------------------------------
 1 | "use client";
 2 | 
 3 | import React, { useState, useEffect } from "react";
 4 | import { motion } from "framer-motion";
 5 | 
 6 | interface SessionControlsProps {
 7 |   sessionTime: number;
 8 |   onStop: () => void;
 9 | }
10 | 
11 | const formatTime = (seconds: number, totalTime: string): string => {
12 |   // Always show minutes:seconds format
13 |   const minutes = Math.floor(seconds / 60);
14 |   const remainingSeconds = seconds % 60;
15 |   return `${minutes}:${remainingSeconds
16 |     .toString()
17 |     .padStart(2, "0")} / ${totalTime}`;
18 | };
19 | 
20 | export const SessionControls: React.FC<SessionControlsProps> = ({
21 |   sessionTime,
22 |   onStop,
23 | }) => {
24 |   // Use client-side rendering for the time display to avoid hydration mismatch
25 |   const [mounted, setMounted] = useState(false);
26 | 
27 |   useEffect(() => {
28 |     setMounted(true);
29 |   }, []);
30 | 
31 |   return (
32 |     <div className="flex flex-row items-center gap-2 bg-[#F6F5F5] px-2 py-1 border border-[#CAC8C7]">
33 |       <div className="flex flex-row items-center gap-1">
34 |         <svg
35 |           className="w-4 h-4"
36 |           xmlns="http://www.w3.org/2000/svg"
37 |           fill="none"
38 |           viewBox="0 0 24 24"
39 |           stroke="currentColor"
40 |           strokeWidth={1.5}
41 |         >
42 |           <circle cx="12" cy="12" r="10" />
43 |           <polyline points="12 6 12 12 16 14" />
44 |         </svg>
45 |         <div className="flex items-center px-1 py-1 text-sm text-[#2E191E]">
46 |           <span className="font-medium">Session time:</span>{" "}
47 |           <span className="ml-1 min-w-[80px] text-center">
48 |             {mounted ? formatTime(sessionTime, "5:00") : "0:00"}
49 |           </span>
50 |         </div>
51 |       </div>
52 | 
53 |       <motion.button
54 |         className="flex items-center justify-center px-2 py-0.5 text-sm text-white bg-[#FF3B00] hover:bg-[#E63500] transition-colors"
55 |         whileHover={{ scale: 1.05 }}
56 |         whileTap={{ scale: 0.95 }}
57 |         onClick={onStop}
58 |       >
59 |         <svg
60 |           className="w-4 h-4 mr-1"
61 |           xmlns="http://www.w3.org/2000/svg"
62 |           fill="none"
63 |           viewBox="0 0 24 24"
64 |           stroke="currentColor"
65 |         >
66 |           <rect x="6" y="6" width="12" height="12" rx="1" fill="currentColor" />
67 |         </svg>
68 |         Stop
69 |       </motion.button>
70 |     </div>
71 |   );
72 | };
73 | 


--------------------------------------------------------------------------------
/app/components/ui/sliding-number.tsx:
--------------------------------------------------------------------------------
  1 | 'use client';
  2 | import { useEffect, useId } from 'react';
  3 | import {
  4 |   MotionValue,
  5 |   motion,
  6 |   useSpring,
  7 |   useTransform,
  8 |   motionValue,
  9 | } from 'motion/react';
 10 | import useMeasure from 'react-use-measure';
 11 | 
 12 | const TRANSITION = {
 13 |   type: 'spring',
 14 |   stiffness: 280,
 15 |   damping: 18,
 16 |   mass: 0.3,
 17 | };
 18 | 
 19 | function Digit({ value, place }: { value: number; place: number }) {
 20 |   const valueRoundedToPlace = Math.floor(value / place) % 10;
 21 |   const initial = motionValue(valueRoundedToPlace);
 22 |   const animatedValue = useSpring(initial, TRANSITION);
 23 | 
 24 |   useEffect(() => {
 25 |     animatedValue.set(valueRoundedToPlace);
 26 |   }, [animatedValue, valueRoundedToPlace]);
 27 | 
 28 |   return (
 29 |     <div className='relative inline-block w-[1ch] overflow-x-visible overflow-y-clip leading-none tabular-nums'>
 30 |       <div className='invisible'>0</div>
 31 |       {Array.from({ length: 10 }, (_, i) => (
 32 |         <Number key={i} mv={animatedValue} number={i} />
 33 |       ))}
 34 |     </div>
 35 |   );
 36 | }
 37 | 
 38 | function Number({ mv, number }: { mv: MotionValue<number>; number: number }) {
 39 |   const uniqueId = useId();
 40 |   const [ref, bounds] = useMeasure();
 41 | 
 42 |   const y = useTransform(mv, (latest) => {
 43 |     if (!bounds.height) return 0;
 44 |     const placeValue = latest % 10;
 45 |     const offset = (10 + number - placeValue) % 10;
 46 |     let memo = offset * bounds.height;
 47 | 
 48 |     if (offset > 5) {
 49 |       memo -= 10 * bounds.height;
 50 |     }
 51 | 
 52 |     return memo;
 53 |   });
 54 | 
 55 |   // don't render the animated number until we know the height
 56 |   if (!bounds.height) {
 57 |     return (
 58 |       <span ref={ref} className='invisible absolute'>
 59 |         {number}
 60 |       </span>
 61 |     );
 62 |   }
 63 | 
 64 |   return (
 65 |     <motion.span
 66 |       style={{ y }}
 67 |       layoutId={`${uniqueId}-${number}`}
 68 |       className='absolute inset-0 flex items-center justify-center'
 69 |       transition={TRANSITION}
 70 |       ref={ref}
 71 |     >
 72 |       {number}
 73 |     </motion.span>
 74 |   );
 75 | }
 76 | 
 77 | type SlidingNumberProps = {
 78 |   value: number;
 79 |   padStart?: boolean;
 80 |   decimalSeparator?: string;
 81 | };
 82 | 
 83 | export function SlidingNumber({
 84 |   value,
 85 |   padStart = false,
 86 |   decimalSeparator = '.',
 87 | }: SlidingNumberProps) {
 88 |   const absValue = Math.abs(value);
 89 |   const [integerPart, decimalPart] = absValue.toString().split('.');
 90 |   const integerValue = parseInt(integerPart, 10);
 91 |   const paddedInteger =
 92 |     padStart && integerValue < 10 ? `0${integerPart}` : integerPart;
 93 |   const integerDigits = paddedInteger.split('');
 94 |   const integerPlaces = integerDigits.map((_, i) =>
 95 |     Math.pow(10, integerDigits.length - i - 1)
 96 |   );
 97 | 
 98 |   return (
 99 |     <div className='flex items-center'>
100 |       {value < 0 && '-'}
101 |       {integerDigits.map((_, index) => (
102 |         <Digit
103 |           key={`pos-${integerPlaces[index]}`}
104 |           value={integerValue}
105 |           place={integerPlaces[index]}
106 |         />
107 |       ))}
108 |       {decimalPart && (
109 |         <>
110 |           <span>{decimalSeparator}</span>
111 |           {decimalPart.split('').map((_, index) => (
112 |             <Digit
113 |               key={`decimal-${index}`}
114 |               value={parseInt(decimalPart, 10)}
115 |               place={Math.pow(10, decimalPart.length - index - 1)}
116 |             />
117 |           ))}
118 |         </>
119 |       )}
120 |     </div>
121 |   );
122 | }
123 | 


--------------------------------------------------------------------------------
/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/cua-browser/e0de1c7456cbe60c1a01c665aac785435a991297/app/favicon.ico


--------------------------------------------------------------------------------
/app/globals.css:
--------------------------------------------------------------------------------
  1 | @tailwind base;
  2 | @tailwind components;
  3 | @tailwind utilities;
  4 | 
  5 | /* Custom utility classes */
  6 | @layer utilities {
  7 |   /* Hide scrollbar for all browsers */
  8 |   .hide-scrollbar {
  9 |     -ms-overflow-style: none;  /* IE and Edge */
 10 |     scrollbar-width: none;  /* Firefox */
 11 |   }
 12 |   
 13 |   /* Hide scrollbar for Chrome, Safari and Opera */
 14 |   .hide-scrollbar::-webkit-scrollbar {
 15 |     display: none;
 16 |   }
 17 | 
 18 |   /* Pulsing glow animation */
 19 |   @keyframes pulse-glow {
 20 |     0% {
 21 |       box-shadow: 0 0 5px rgba(255, 59, 0, 0.5);
 22 |     }
 23 |     50% {
 24 |       box-shadow: 0 0 20px rgba(255, 59, 0, 0.7);
 25 |     }
 26 |     100% {
 27 |       box-shadow: 0 0 5px rgba(255, 59, 0, 0.5);
 28 |     }
 29 |   }
 30 |   
 31 |   .pulse-glow-red {
 32 |     animation: pulse-glow 2s infinite;
 33 |   }
 34 | }
 35 | 
 36 | @layer base {
 37 |   :root {
 38 | 
 39 |     --background: 0 0% 100%;
 40 | 
 41 |     --foreground: 240 10% 3.9%;
 42 | 
 43 |     --card: 0 0% 100%;
 44 | 
 45 |     --card-foreground: 240 10% 3.9%;
 46 | 
 47 |     --popover: 0 0% 100%;
 48 | 
 49 |     --popover-foreground: 240 10% 3.9%;
 50 | 
 51 |     --primary: 240 5.9% 10%;
 52 | 
 53 |     --primary-foreground: 0 0% 98%;
 54 | 
 55 |     --secondary: 240 4.8% 95.9%;
 56 | 
 57 |     --secondary-foreground: 240 5.9% 10%;
 58 | 
 59 |     --muted: 240 4.8% 95.9%;
 60 | 
 61 |     --muted-foreground: 240 3.8% 46.1%;
 62 | 
 63 |     --accent: 240 4.8% 95.9%;
 64 | 
 65 |     --accent-foreground: 240 5.9% 10%;
 66 | 
 67 |     --destructive: 0 84.2% 60.2%;
 68 | 
 69 |     --destructive-foreground: 0 0% 98%;
 70 | 
 71 |     --border: 240 5.9% 90%;
 72 | 
 73 |     --input: 240 5.9% 90%;
 74 | 
 75 |     --ring: 240 10% 3.9%;
 76 | 
 77 |     --chart-1: 12 76% 61%;
 78 | 
 79 |     --chart-2: 173 58% 39%;
 80 | 
 81 |     --chart-3: 197 37% 24%;
 82 | 
 83 |     --chart-4: 43 74% 66%;
 84 | 
 85 |     --chart-5: 27 87% 67%;
 86 | 
 87 |     --radius: 0.5rem
 88 |   }
 89 |   .dark {
 90 | 
 91 |     --background: 240 10% 3.9%;
 92 | 
 93 |     --foreground: 0 0% 98%;
 94 | 
 95 |     --card: 240 10% 3.9%;
 96 | 
 97 |     --card-foreground: 0 0% 98%;
 98 | 
 99 |     --popover: 240 10% 3.9%;
100 | 
101 |     --popover-foreground: 0 0% 98%;
102 | 
103 |     --primary: 0 0% 98%;
104 | 
105 |     --primary-foreground: 240 5.9% 10%;
106 | 
107 |     --secondary: 240 3.7% 15.9%;
108 | 
109 |     --secondary-foreground: 0 0% 98%;
110 | 
111 |     --muted: 240 3.7% 15.9%;
112 | 
113 |     --muted-foreground: 240 5% 64.9%;
114 | 
115 |     --accent: 240 3.7% 15.9%;
116 | 
117 |     --accent-foreground: 0 0% 98%;
118 | 
119 |     --destructive: 0 62.8% 30.6%;
120 | 
121 |     --destructive-foreground: 0 0% 98%;
122 | 
123 |     --border: 240 3.7% 15.9%;
124 | 
125 |     --input: 240 3.7% 15.9%;
126 | 
127 |     --ring: 240 4.9% 83.9%;
128 | 
129 |     --chart-1: 220 70% 50%;
130 | 
131 |     --chart-2: 160 60% 45%;
132 | 
133 |     --chart-3: 30 80% 55%;
134 | 
135 |     --chart-4: 280 65% 60%;
136 | 
137 |     --chart-5: 340 75% 55%
138 |   }
139 | }
140 | 
141 | 
142 | 
143 | @layer base {
144 |   * {
145 |     @apply border-border;
146 |   }
147 |   body {
148 |     @apply bg-background text-foreground;
149 |   }
150 | }
151 | 


--------------------------------------------------------------------------------
/app/layout.tsx:
--------------------------------------------------------------------------------
 1 | import { Analytics } from "@vercel/analytics/next";
 2 | import type { Metadata } from "next";
 3 | import { Inter } from "next/font/google";
 4 | import localFont from "next/font/local";
 5 | import "./globals.css";
 6 | import { PostHogProvider } from "./components/PosthogProvider";
 7 | 
 8 | const inter = Inter({
 9 |   subsets: ["latin"],
10 |   variable: "--font-inter",
11 | });
12 | 
13 | const ppNeue = localFont({
14 |   src: "../fonts/PPNeueMontreal-Medium.otf",
15 |   variable: "--font-pp-neue",
16 | });
17 | 
18 | // const ppSupply = localFont({
19 | //   src: "../fonts/PPSupplySans-Regular.otf",
20 | //   variable: "--font-pp-supply",
21 | // });
22 | 
23 | export const metadata: Metadata = {
24 |   title: "Computer Use Browser",
25 |   description: "Watch AI browse the web, for free",
26 |   openGraph: {
27 |     images: ["/og.png"],
28 |     title: "Computer Use Browser",
29 |     description: "Watch AI browse the web, for free",
30 |     url: "https://cua.browserbase.com",
31 |   },
32 |   icons: {
33 |     icon: [
34 |       { url: "/favicon.svg", type: "image/svg+xml" },
35 |       { url: "/favicon.ico" },
36 |     ],
37 |   },
38 | };
39 | 
40 | export default function RootLayout({
41 |   children,
42 | }: {
43 |   children: React.ReactNode;
44 | }) {
45 |   return (
46 |     <html lang="en">
47 |       <head>
48 |         <link rel="icon" href="/favicon.svg" type="image/svg+xml" />
49 |         {/* Fallback for browsers that don't support SVG favicons */}
50 |         <link rel="alternate icon" href="/favicon.ico" />
51 |       </head>
52 |       <body
53 |         className={`${inter.variable} ${ppNeue.variable} font-sans antialiased bg-white text-gray-900`}
54 |       >
55 |         <PostHogProvider>{children}</PostHogProvider>
56 |         <Analytics />
57 |       </body>
58 |     </html>
59 |   );
60 | }
61 | 


--------------------------------------------------------------------------------
/app/page.tsx:
--------------------------------------------------------------------------------
  1 | "use client";
  2 | 
  3 | import { useState, useEffect, useCallback, useRef } from "react";
  4 | import { AnimatePresence, motion } from "framer-motion";
  5 | import AnimatedButton from "./components/AnimatedButton";
  6 | import Image from "next/image";
  7 | import posthog from "posthog-js";
  8 | import ChatFeed from "./components/ChatFeed";
  9 | import { Code, Gamepad2, Layers, Table, TrendingUp } from "lucide-react";
 10 | 
 11 | const Tooltip = ({
 12 |   children,
 13 |   text,
 14 | }: {
 15 |   children: React.ReactNode;
 16 |   text: string;
 17 | }) => {
 18 |   const [isHovered, setIsHovered] = useState(false);
 19 | 
 20 |   return (
 21 |     <div
 22 |       className="relative"
 23 |       onMouseEnter={() => setIsHovered(true)}
 24 |       onMouseLeave={() => setIsHovered(false)}
 25 |     >
 26 |       {children}
 27 |       <AnimatePresence>
 28 |         {isHovered && (
 29 |           <motion.span
 30 |             initial={{ opacity: 0, y: 10, scale: 0.9 }}
 31 |             animate={{ opacity: 1, y: 3, scale: 1 }}
 32 |             exit={{ opacity: 0, y: 10, scale: 0.9 }}
 33 |             transition={{
 34 |               duration: 0.2,
 35 |               type: "spring",
 36 |               stiffness: 400,
 37 |               damping: 17,
 38 |             }}
 39 |             className="absolute w-auto px-3 py-2 min-w-max left-1/2 -translate-x-1/2 bg-[#2E191E] text-white text-xs font-ppsupply z-50 backdrop-blur-sm"
 40 |           >
 41 |             {text}
 42 |           </motion.span>
 43 |         )}
 44 |       </AnimatePresence>
 45 |     </div>
 46 |   );
 47 | };
 48 | 
 49 | export default function Home() {
 50 |   const [isChatVisible, setIsChatVisible] = useState(false);
 51 |   const [initialMessage, setInitialMessage] = useState("");
 52 |   const inputRef = useRef<HTMLInputElement>(null);
 53 | 
 54 |   useEffect(() => {
 55 |     const handleKeyDown = (e: KeyboardEvent) => {
 56 |       // Handle CMD+Enter to submit the form when chat is not visible
 57 |       if (!isChatVisible && (e.metaKey || e.ctrlKey) && e.key === "Enter") {
 58 |         e.preventDefault();
 59 |         const form = document.querySelector("form") as HTMLFormElement;
 60 |         if (form) {
 61 |           form.requestSubmit();
 62 |         }
 63 |       }
 64 | 
 65 |       // Handle CMD+K to focus input when chat is not visible
 66 |       if (!isChatVisible && (e.metaKey || e.ctrlKey) && e.key === "k") {
 67 |         e.preventDefault();
 68 |         const input = document.querySelector(
 69 |           'input[name="message"]'
 70 |         ) as HTMLInputElement;
 71 |         if (input) {
 72 |           input.focus();
 73 |         }
 74 |       }
 75 | 
 76 |       // Handle ESC to close chat when visible
 77 |       if (isChatVisible && e.key === "Escape") {
 78 |         e.preventDefault();
 79 |         setIsChatVisible(false);
 80 |       }
 81 |     };
 82 | 
 83 |     window.addEventListener("keydown", handleKeyDown);
 84 |     return () => window.removeEventListener("keydown", handleKeyDown);
 85 |   }, [isChatVisible]);
 86 | 
 87 |   const startChat = useCallback(
 88 |     (finalMessage: string) => {
 89 |       setInitialMessage(finalMessage);
 90 |       setIsChatVisible(true);
 91 | 
 92 |       try {
 93 |         posthog.capture("submit_message", {
 94 |           message: finalMessage,
 95 |         });
 96 |       } catch (e) {
 97 |         console.error(e);
 98 |       }
 99 |     },
100 |     [setInitialMessage, setIsChatVisible]
101 |   );
102 | 
103 |   return (
104 |     <AnimatePresence mode="wait">
105 |       {!isChatVisible ? (
106 |         <div className="min-h-screen bg-gray-50 flex flex-col relative">
107 |           {/* Grid Background */}
108 |           <div
109 |             className="absolute inset-0 z-0 overflow-hidden pointer-events-none"
110 |             style={{
111 |               backgroundImage: "url(/grid.svg)",
112 |               backgroundSize: "25%",
113 |               backgroundPosition: "center",
114 |               backgroundRepeat: "repeat",
115 |               opacity: 0.8,
116 |               position: "fixed",
117 |             }}
118 |           ></div>
119 |           {/* Top Navigation */}
120 |           <nav className="flex justify-between items-center px-8 py-4 bg-white border-b border-[#CAC8C7] z-10">
121 |             <div className="flex items-center gap-3">
122 |               <a
123 |                 href="https://www.browserbase.com/computer-use"
124 |                 target="_blank"
125 |                 rel="noopener noreferrer"
126 |                 className="flex items-center gap-3 hover:opacity-90 transition-opacity duration-200"
127 |               >
128 |                 <Image
129 |                   src="/favicon.svg"
130 |                   alt="CUA Browser"
131 |                   className="w-8 h-8"
132 |                   width={32}
133 |                   height={32}
134 |                 />
135 |                 <span className="font-ppsupply text-gray-900 font-semibold">
136 |                   CUA Browser
137 |                 </span>
138 |               </a>
139 |             </div>
140 |             <div className="flex items-center gap-2">
141 |               <a
142 |                 href="https://www.browserbase.com/computer-use"
143 |                 target="_blank"
144 |                 rel="noopener noreferrer"
145 |               >
146 |                 <button className="h-fit flex items-center justify-center px-3 py-2 bg-white gap-1 text-sm font-medium border border-[#F14A1C] transition-all duration-200 hover:bg-[#F14A1C] group">
147 |                   <Layers
148 |                     size={20}
149 |                     className="sm:mr-2 text-[#F14A1C] group-hover:text-white transition-colors duration-200"
150 |                     strokeWidth={2}
151 |                     strokeLinecap="square"
152 |                     strokeLinejoin="miter"
153 |                   />
154 |                   <span className="hidden sm:inline text-[#F14A1C] group-hover:text-white transition-colors duration-200">Deploy</span>
155 |                 </button>
156 |               </a>
157 |               <a
158 |                 href="https://github.com/browserbase/cua-browser"
159 |                 target="_blank"
160 |                 rel="noopener noreferrer"
161 |               >
162 |                 <button className="h-fit flex items-center justify-center px-3 py-2 bg-[#1b2128] hover:bg-[#1d232b] gap-1 text-sm font-medium text-white border border-[#CAC8C7]  transition-colors duration-200">
163 |                   <Image
164 |                     src="/github.svg"
165 |                     alt="GitHub"
166 |                     width={20}
167 |                     height={20}
168 |                     className="sm:mr-2"
169 |                   />
170 |                   <span className="hidden sm:inline">View GitHub</span>
171 |                 </button>
172 |               </a>
173 |             </div>
174 |           </nav>
175 | 
176 |           {/* Main Content */}
177 |           <main className="flex-1 flex flex-col items-center pt-12 md:pt-16 lg:pt-20 pb-16 md:pb-24 lg:pb-32 px-6 z-10">
178 |             <div className="w-full max-w-[640px] md:max-w-[800px] lg:max-w-[960px] bg-white border border-[#CAC8C7] shadow-sm z-10">
179 |               <div className="w-full h-12 md:h-16 bg-white border-b border-[#CAC8C7] flex items-center px-4 md:px-6">
180 |                 <div className="flex items-center gap-2">
181 |                   <Tooltip text="why would you want to close this?">
182 |                     <div className="w-3 h-3  bg-red-500 hover:scale-110 transition-transform" />
183 |                   </Tooltip>
184 |                   <Tooltip text="s/o to the 🅱️rowserbase devs">
185 |                     <div className="w-3 h-3  bg-yellow-500 hover:scale-110 transition-transform" />
186 |                   </Tooltip>
187 |                   <Tooltip text="@pk_iv, @alexdphan, @nosajio were here">
188 |                     <div className="w-3 h-3  bg-green-500 hover:scale-110 transition-transform" />
189 |                   </Tooltip>
190 |                 </div>
191 |               </div>
192 | 
193 |               <div className="p-8 md:p-10 lg:p-12 flex flex-col items-center gap-8 md:gap-10">
194 |                 <div className="flex flex-col items-center gap-3 md:gap-5">
195 |                   <h1 className="text-2xl md:text-3xl lg:text-4xl font-ppneue text-gray-900 text-center">
196 |                     Computer Use Browser
197 |                   </h1>
198 |                   <p className="text-base md:text-lg font-ppsupply text-gray-500 text-center">
199 |                     Hit run to watch AI browse the web.
200 |                   </p>
201 |                 </div>
202 | 
203 |                 <form
204 |                   onSubmit={(e) => {
205 |                     e.preventDefault();
206 |                     const formData = new FormData(e.currentTarget);
207 |                     const input = e.currentTarget.querySelector(
208 |                       'input[name="message"]'
209 |                     ) as HTMLInputElement;
210 |                     const message = (formData.get("message") as string).trim();
211 |                     const finalMessage = message || input.placeholder;
212 |                     startChat(finalMessage);
213 |                   }}
214 |                   className="w-full max-w-[720px] md:max-w-[880px] lg:max-w-[1040px] flex flex-col items-center gap-3 md:gap-5"
215 |                 >
216 |                   <div className="relative w-full">
217 |                     <input
218 |                       ref={inputRef}
219 |                       name="message"
220 |                       type="text"
221 |                       placeholder="What's the price of NVIDIA stock?"
222 |                       className="w-full px-4 py-3 sm:pr-[140px] pr-[100px] border border-[#CAC8C7] text-gray-900 placeholder:text-gray-400 focus:outline-none focus:ring-0 focus:border-[#FF3B00] font-ppsupply text-sm md:text-base md:py-5 lg:py-6 transition-all duration-300 focus:pulse-glow-red focus:backdrop-blur-sm focus:bg-opacity-95 focus:bg-white"
223 |                       style={{
224 |                         textOverflow: "ellipsis",
225 |                         whiteSpace: "nowrap",
226 |                         overflow: "hidden",
227 |                         backdropFilter: "blur(8px)",
228 |                       }}
229 |                     />
230 |                     <div className="absolute inset-y-0 right-0 flex items-center pr-3">
231 |                       <AnimatedButton type="submit">Run</AnimatedButton>
232 |                     </div>
233 |                   </div>
234 |                 </form>
235 |                 <div className="grid grid-cols-2 gap-3 md:gap-4 lg:gap-5 w-full">
236 |                   <motion.button
237 |                     initial={{ opacity: 0, y: 20 }}
238 |                     animate={{ opacity: 1, y: 0 }}
239 |                     transition={{ duration: 0.3, delay: 0.1 }}
240 |                     onClick={() =>
241 |                       startChat(
242 |                         "Find the most recently opened non-draft PR on Github for Browserbase's Stagehand project and make sure the combination-evals in the PR validation passed."
243 |                       )
244 |                     }
245 |                     className="p-3 md:p-5 lg:p-6 text-sm md:text-base lg:text-xl text-[#2E191E] border border-[#CAC8C7] hover:border-[#FF3B00] hover:text-[#FF3B00] transition-colors font-ppsupply font-medium text-center overflow-hidden text-ellipsis break-words whitespace-normal md:min-h-[100px] lg:min-h-[120px] flex items-center justify-center backdrop-blur-sm bg-opacity-60 bg-[rgba(245,240,255,0.15)] hover:bg-[rgba(255,59,0,0.05)] rounded-none"
246 |                   >
247 |                     <div className="w-full h-full flex flex-row justify-between items-start px-3 py-2 md:px-4 md:py-3 space-y-3">
248 |                       <span className="text-left">Review a pull request<br />on Github</span>
249 |                       <Code className="" size={20} strokeWidth={1.5} />
250 |                     </div>
251 |                   
252 |                   </motion.button>
253 |                   
254 |                   <motion.button
255 |                     initial={{ opacity: 0, y: 20 }}
256 |                     animate={{ opacity: 1, y: 0 }}
257 |                     transition={{ duration: 0.3, delay: 0.3 }}
258 |                     onClick={() =>
259 |                       startChat(
260 |                         "Play a game of 2048 on https://www.2048.org/. Just try to win and I'll watch. Good luck!"
261 |                       )
262 |                     }
263 |                     className="p-3 md:p-5 lg:p-6 text-sm md:text-base lg:text-xl text-[#2E191E] border border-[#CAC8C7] hover:border-[#FF3B00] hover:text-[#FF3B00] transition-colors font-ppsupply font-medium text-center overflow-hidden text-ellipsis break-words whitespace-normal md:min-h-[100px] lg:min-h-[120px] flex items-center justify-center backdrop-blur-sm bg-opacity-60 bg-[rgba(245,240,255,0.15)] hover:bg-[rgba(255,59,0,0.05)] rounded-none"
264 |                   >
265 |                     <div className="w-full h-full flex flex-row justify-between items-start px-3 py-2 md:px-4 md:py-3">
266 |                       <span className="text-left">Play a challenging<br />game of 2048</span>
267 |                       <Gamepad2 size={20} strokeWidth={1.5} className="rounded-none" />
268 |                     </div>
269 |                   </motion.button>
270 |                   <motion.button
271 |                     initial={{ opacity: 0, y: 20 }}
272 |                     animate={{ opacity: 1, y: 0 }}
273 |                     transition={{ duration: 0.3, delay: 0.5 }}
274 |                     onClick={() =>
275 |                       startChat(
276 |                         "Please visit https://docs.google.com/spreadsheets/d/16fFgY7y4B2AnZLLFx4ajbBh-cuaXE-PU2ldQx-H-CcA/edit?gid=0#gid=0 and analyze the data in the sheet."
277 |                       )
278 |                     }
279 |                     className="p-3 md:p-5 lg:p-6 text-sm md:text-base lg:text-xl text-[#2E191E] border border-[#CAC8C7] hover:border-[#FF3B00] hover:text-[#FF3B00] transition-colors font-ppsupply font-medium text-center overflow-hidden text-ellipsis break-words whitespace-normal md:min-h-[100px] lg:min-h-[120px] flex items-center justify-center backdrop-blur-sm bg-opacity-60 bg-[rgba(245,240,255,0.15)] hover:bg-[rgba(255,59,0,0.05)] rounded-none"
280 |                   >
281 |                     <div className="w-full h-full flex flex-row justify-between items-start px-3 py-2 md:px-4 md:py-3 space-y-3">
282 |                       <span className="text-left">Analyze a<br />spreadsheet</span>
283 |                       <Table size={20} strokeWidth={1.5} className="rounded-none" />
284 |                     </div>
285 |                   </motion.button>
286 |                   <motion.button
287 |                     initial={{ opacity: 0, y: 20 }}
288 |                     animate={{ opacity: 1, y: 0 }}
289 |                     transition={{ duration: 0.3, delay: 0.7 }}
290 |                     onClick={() => startChat("How much is NVIDIA stock?")}
291 |                     className="p-3 md:p-5 lg:p-6 text-sm md:text-base lg:text-xl text-[#2E191E] border border-[#CAC8C7] hover:border-[#FF3B00] hover:text-[#FF3B00] transition-colors font-ppsupply font-medium text-center overflow-hidden text-ellipsis break-words whitespace-normal md:min-h-[100px] lg:min-h-[120px] flex items-center justify-center backdrop-blur-sm bg-opacity-60 bg-[rgba(245,240,255,0.15)] hover:bg-[rgba(255,59,0,0.05)] rounded-none"
292 |                   >
293 |                     <div className="w-full h-full flex flex-row justify-between items-start px-3 py-2 md:px-4 md:py-3 space-y-3">
294 |                       <span className="text-left">Check the price<br />of NVIDIA stock</span>
295 |                       <TrendingUp size={20} strokeWidth={1.5} className="rounded-none" />
296 |                     </div>
297 |                   </motion.button>
298 |                 </div>
299 |                 <motion.div
300 |                   initial={{ opacity: 0, y: 20 }}
301 |                   animate={{ opacity: 1, y: 0 }}
302 |                   transition={{ duration: 0.4, delay: 1.0 }}
303 |                   className="text-center text-xs text-gray-500"
304 |                   onAnimationComplete={() => {
305 |                     // Focus the input field after the last animation completes
306 |                     setTimeout(() => {
307 |                       inputRef.current?.focus();
308 |                     }, 100);
309 |                   }}
310 |                 >
311 |                   <p>Or type your own request</p>
312 |                 </motion.div>
313 |               </div>
314 |             </div>
315 |             <motion.div
316 |               initial={{ opacity: 0, y: 20, scale: 0.95 }}
317 |               animate={{ opacity: 1, y: 0, scale: 1 }}
318 |               transition={{
319 |                 duration: 0.5,
320 |                 delay: 0.2,
321 |                 ease: [0.25, 0.1, 0.25, 1.0],
322 |                 staggerChildren: 0.1,
323 |               }}
324 |               className="bg-[#28171B] p-4 md:p-6 lg:p-8 mt-8 md:mt-10 w-full max-w-[640px] md:max-w-[800px] lg:max-w-[960px] relative overflow-hidden backdrop-blur-sm bg-opacity-90"
325 |             >
326 |               {/* Tech animation background elements */}
327 |               <motion.div
328 |                 className="absolute inset-0 opacity-10"
329 |                 initial={{ opacity: 0 }}
330 |                 animate={{ opacity: 0.1 }}
331 |                 transition={{ duration: 0.5 }}
332 |               >
333 |                 <div className="absolute top-0 right-0 sm:w-20 sm:h-20 w-16 h-16 bg-[#FF3B00] rounded-full blur-xl"></div>
334 |                 <div className="absolute bottom-0 left-0 w-16 h-16 bg-blue-400 rounded-full blur-xl"></div>
335 |               </motion.div>
336 | 
337 |               <motion.p
338 |                 className="text-base md:text-lg font-ppsupply text-center text-white relative z-10 font-semibold"
339 |                 initial={{ opacity: 0 }}
340 |                 animate={{ opacity: 1 }}
341 |                 transition={{ duration: 0.5, delay: 0.5 }}
342 |               >
343 |                 Powered by{" "}
344 |                 <motion.a
345 |                   href="https://browserbase.com"
346 |                   className="text-[#FF3B00] hover:underline relative"
347 |                   whileHover={{ scale: 1.05 }}
348 |                 >
349 |                   🅱️ Browserbase
350 |                 </motion.a>{" "}
351 |                 and OpenAI&apos;s computer-use model preview.
352 |               </motion.p>
353 |             </motion.div>
354 |           </main>
355 |         </div>
356 |       ) : (
357 |         <ChatFeed
358 |           initialMessage={initialMessage}
359 |           onClose={() => setIsChatVisible(false)}
360 |         />
361 |       )}
362 |     </AnimatePresence>
363 |   );
364 | }
365 | 


--------------------------------------------------------------------------------
/components.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://ui.shadcn.com/schema.json",
 3 |   "style": "new-york",
 4 |   "rsc": true,
 5 |   "tsx": true,
 6 |   "tailwind": {
 7 |     "config": "tailwind.config.ts",
 8 |     "css": "app/globals.css",
 9 |     "baseColor": "zinc",
10 |     "cssVariables": true,
11 |     "prefix": ""
12 |   },
13 |   "aliases": {
14 |     "components": "@/app/components",
15 |     "utils": "@/lib/utils",
16 |     "ui": "@/app/components/ui",
17 |     "lib": "@/lib",
18 |     "hooks": "@/hooks"
19 |   },
20 |   "iconLibrary": "lucide"
21 | }
22 | 


--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | import { dirname } from "path";
 2 | import { fileURLToPath } from "url";
 3 | import { FlatCompat } from "@eslint/eslintrc";
 4 | 
 5 | const __filename = fileURLToPath(import.meta.url);
 6 | const __dirname = dirname(__filename);
 7 | 
 8 | const compat = new FlatCompat({
 9 |   baseDirectory: __dirname,
10 | });
11 | 
12 | const eslintConfig = [
13 |   ...compat.extends("next/core-web-vitals", "next/typescript"),
14 | ];
15 | 
16 | export default eslintConfig;
17 | 


--------------------------------------------------------------------------------
/fonts/PPNeueMontreal-Medium.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/cua-browser/e0de1c7456cbe60c1a01c665aac785435a991297/fonts/PPNeueMontreal-Medium.otf


--------------------------------------------------------------------------------
/fonts/PPSupplySans-Regular.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/cua-browser/e0de1c7456cbe60c1a01c665aac785435a991297/fonts/PPSupplySans-Regular.otf


--------------------------------------------------------------------------------
/lib/utils.ts:
--------------------------------------------------------------------------------
1 | import { clsx, type ClassValue } from "clsx"
2 | import { twMerge } from "tailwind-merge"
3 | 
4 | export function cn(...inputs: ClassValue[]) {
5 |   return twMerge(clsx(inputs))
6 | }
7 | 


--------------------------------------------------------------------------------
/next-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="next" />
2 | /// <reference types="next/image-types/global" />
3 | 
4 | // NOTE: This file should not be edited
5 | // see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
6 | 


--------------------------------------------------------------------------------
/next.config.ts:
--------------------------------------------------------------------------------
1 | import type { NextConfig } from "next";
2 | 
3 | const nextConfig: NextConfig = {
4 |   /* config options here */
5 | };
6 | 
7 | export default nextConfig;
8 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "frontend",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "dev": "next dev --turbopack",
 7 |     "build": "next build",
 8 |     "start": "next start",
 9 |     "lint": "next lint"
10 |   },
11 |   "dependencies": {
12 |     "@ai-sdk/openai": "^1.1.2",
13 |     "@ai-sdk/provider": "^1.0.6",
14 |     "@browserbasehq/sdk": "^2.0.0",
15 |     "@vercel/analytics": "^1.4.1",
16 |     "axios": "^1.8.2",
17 |     "axios-retry": "^4.5.0",
18 |     "class-variance-authority": "^0.7.1",
19 |     "clsx": "^2.1.1",
20 |     "dotenv": "^16.4.7",
21 |     "framer-motion": "^11.0.3",
22 |     "jotai": "^2.11.1",
23 |     "lucide": "^0.479.0",
24 |     "lucide-react": "^0.479.0",
25 |     "motion": "^12.4.11",
26 |     "next": "15.1.6",
27 |     "openai": "^4.86.2",
28 |     "playwright": "^1.50.0",
29 |     "playwright-core": "^1.50.0",
30 |     "posthog-js": "^1.209.3",
31 |     "react": "^19.0.0",
32 |     "react-dom": "^19.0.0",
33 |     "react-use-measure": "^2.1.7",
34 |     "sharp": "^0.33.5",
35 |     "tailwind-merge": "^3.0.2",
36 |     "tailwindcss-animate": "^1.0.7",
37 |     "usehooks-ts": "^3.1.1"
38 |   },
39 |   "devDependencies": {
40 |     "@eslint/eslintrc": "^3",
41 |     "@types/node": "^20",
42 |     "@types/react": "^19",
43 |     "@types/react-dom": "^19",
44 |     "eslint": "^9",
45 |     "eslint-config-next": "15.1.6",
46 |     "postcss": "^8",
47 |     "tailwindcss": "^3.4.1",
48 |     "typescript": "^5"
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('postcss-load-config').Config} */
2 | const config = {
3 |   plugins: {
4 |     tailwindcss: {},
5 |   },
6 | };
7 | 
8 | export default config;
9 | 


--------------------------------------------------------------------------------
/public/agent_loop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/cua-browser/e0de1c7456cbe60c1a01c665aac785435a991297/public/agent_loop.png


--------------------------------------------------------------------------------
/public/agent_mess.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/cua-browser/e0de1c7456cbe60c1a01c665aac785435a991297/public/agent_mess.png


--------------------------------------------------------------------------------
/public/favicon.svg:
--------------------------------------------------------------------------------
 1 | <svg width="100" height="100" viewBox="0 0 100 100" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | 	<g clip-path="url(#clip0_2212_248)">
 3 | 		<path d="M0 0H100V100H0V0Z" fill="#F03603" />
 4 | 		<path
 5 | 			d="M36 72.2222V27.7778H51.2381C57.5873 27.7778 62.6667 32.8571 62.6667 39.2063V41.746C62.6667 44.6667 61.5873 47.3968 59.7461 49.3651C62.2858 51.4603 63.9366 54.6349 63.9366 58.254V60.7936C63.9366 67.1428 58.8572 72.2222 52.508 72.2222H36ZM42.3493 65.873H52.508C55.3651 65.873 57.5873 63.6508 57.5873 60.7936V58.254C57.5873 55.3968 55.3651 53.1746 52.508 53.1746H42.3493V65.873ZM42.3493 46.8254H51.2381C54.0953 46.8254 56.3175 44.6032 56.3175 41.746V39.2063C56.3175 36.3492 54.0953 34.127 51.2381 34.127H42.3493V46.8254Z"
 6 | 			fill="white" />
 7 | 	</g>
 8 | 	<defs>
 9 | 		<clipPath id="clip0_2212_248">
10 | 			<rect width="100" height="100" fill="white" />
11 | 		</clipPath>
12 | 	</defs>
13 | </svg>
14 | 	


--------------------------------------------------------------------------------
/public/file.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>


--------------------------------------------------------------------------------
/public/github.svg:
--------------------------------------------------------------------------------
 1 | <svg width="25" height="24" viewBox="0 0 25 24" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | 	<g clip-path="url(#clip0_2196_27)">
 3 | 		<path fill-rule="evenodd" clip-rule="evenodd"
 4 | 			d="M12.4642 0C5.84833 0 0.5 5.5 0.5 12.3042C0.5 17.7432 3.92686 22.3472 8.68082 23.9767C9.27518 24.0992 9.4929 23.712 9.4929 23.3862C9.4929 23.101 9.47331 22.1232 9.47331 21.1045C6.14514 21.838 5.45208 19.6378 5.45208 19.6378C4.91722 18.2118 4.12473 17.8452 4.12473 17.8452C3.03543 17.0915 4.20408 17.0915 4.20408 17.0915C5.41241 17.173 6.04645 18.3545 6.04645 18.3545C7.11592 20.2285 8.83926 19.699 9.53257 19.373C9.63151 18.5785 9.94865 18.0285 10.2854 17.723C7.63094 17.4377 4.83812 16.3785 4.83812 11.6523C4.83812 10.3078 5.31322 9.20775 6.06604 8.35225C5.94727 8.04675 5.53118 6.7835 6.18506 5.09275C6.18506 5.09275 7.19527 4.76675 9.47306 6.35575C10.4483 6.08642 11.454 5.9494 12.4642 5.94825C13.4744 5.94825 14.5042 6.091 15.4552 6.35575C17.7332 4.76675 18.7434 5.09275 18.7434 5.09275C19.3973 6.7835 18.981 8.04675 18.8622 8.35225C19.6349 9.20775 20.0904 10.3078 20.0904 11.6523C20.0904 16.3785 17.2976 17.4172 14.6233 17.723C15.0592 18.11 15.4353 18.8433 15.4353 20.0045C15.4353 21.6545 15.4158 22.9788 15.4158 23.386C15.4158 23.712 15.6337 24.0992 16.2278 23.977C20.9818 22.347 24.4087 17.7432 24.4087 12.3042C24.4282 5.5 19.0603 0 12.4642 0Z"
 5 | 			fill="white" />
 6 | 	</g>
 7 | 	<defs>
 8 | 		<clipPath id="clip0_2196_27">
 9 | 			<rect width="24" height="24" fill="white" transform="translate(0.5)" />
10 | 		</clipPath>
11 | 	</defs>
12 | </svg>


--------------------------------------------------------------------------------
/public/globe.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>


--------------------------------------------------------------------------------
/public/grid.svg:
--------------------------------------------------------------------------------
 1 | <svg width="1199" height="653" viewBox="0 0 1199 653" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <g style="mix-blend-mode:multiply" opacity="0.1">
 3 | <path d="M-1 68.0005L1599 68.0005" stroke="#480512"/>
 4 | <path d="M-1 156.001L1599 156.001" stroke="#480512"/>
 5 | <path d="M-1 244.001L1599 244.001" stroke="#480512"/>
 6 | <path d="M-1 332.001L1599 332.001" stroke="#480512"/>
 7 | <path d="M-1 420.001L1599 420.001" stroke="#480512"/>
 8 | <path d="M-1 508.001L1599 508.001" stroke="#480512"/>
 9 | <path d="M-1 596.001L1599 596.001" stroke="#480512"/>
10 | <path d="M7 850L7.00004 -49" stroke="#480512"/>
11 | <path d="M95 850L95 -49" stroke="#480512"/>
12 | <path d="M183 850L183 -49" stroke="#480512"/>
13 | <path d="M271 850L271 -49" stroke="#480512"/>
14 | <path d="M359 850L359 -49" stroke="#480512"/>
15 | <path d="M447 850L447 -49" stroke="#480512"/>
16 | <path d="M535 850L535 -49" stroke="#480512"/>
17 | <path d="M623 850L623 -49" stroke="#480512"/>
18 | <path d="M711 850L711 -49" stroke="#480512"/>
19 | <path d="M799 850L799 -49" stroke="#480512"/>
20 | <path d="M887 850L887 -49" stroke="#480512"/>
21 | <path d="M975.001 850L975.001 -49" stroke="#480512"/>
22 | <path d="M1063 850L1063 -49" stroke="#480512"/>
23 | <path d="M1151 850L1151 -49" stroke="#480512"/>
24 | </g>
25 | </svg>
26 | 


--------------------------------------------------------------------------------
/public/next.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>


--------------------------------------------------------------------------------
/public/og.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/cua-browser/e0de1c7456cbe60c1a01c665aac785435a991297/public/og.png


--------------------------------------------------------------------------------
/public/stagehand_clean.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/browserbase/cua-browser/e0de1c7456cbe60c1a01c665aac785435a991297/public/stagehand_clean.png


--------------------------------------------------------------------------------
/public/vercel.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>


--------------------------------------------------------------------------------
/public/window.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>


--------------------------------------------------------------------------------
/tailwind.config.ts:
--------------------------------------------------------------------------------
 1 | import type { Config } from "tailwindcss";
 2 | 
 3 | export default {
 4 |     darkMode: ["class"],
 5 |     content: [
 6 |     "./pages/**/*.{js,ts,jsx,tsx,mdx}",
 7 |     "./components/**/*.{js,ts,jsx,tsx,mdx}",
 8 |     "./app/**/*.{js,ts,jsx,tsx,mdx}",
 9 |   ],
10 |   theme: {
11 |   	extend: {
12 |   		colors: {
13 |   			background: 'hsl(var(--background))',
14 |   			foreground: 'hsl(var(--foreground))',
15 |   			card: {
16 |   				DEFAULT: 'hsl(var(--card))',
17 |   				foreground: 'hsl(var(--card-foreground))'
18 |   			},
19 |   			popover: {
20 |   				DEFAULT: 'hsl(var(--popover))',
21 |   				foreground: 'hsl(var(--popover-foreground))'
22 |   			},
23 |   			primary: {
24 |   				DEFAULT: 'hsl(var(--primary))',
25 |   				foreground: 'hsl(var(--primary-foreground))'
26 |   			},
27 |   			secondary: {
28 |   				DEFAULT: 'hsl(var(--secondary))',
29 |   				foreground: 'hsl(var(--secondary-foreground))'
30 |   			},
31 |   			muted: {
32 |   				DEFAULT: 'hsl(var(--muted))',
33 |   				foreground: 'hsl(var(--muted-foreground))'
34 |   			},
35 |   			accent: {
36 |   				DEFAULT: 'hsl(var(--accent))',
37 |   				foreground: 'hsl(var(--accent-foreground))'
38 |   			},
39 |   			destructive: {
40 |   				DEFAULT: 'hsl(var(--destructive))',
41 |   				foreground: 'hsl(var(--destructive-foreground))'
42 |   			},
43 |   			border: 'hsl(var(--border))',
44 |   			input: 'hsl(var(--input))',
45 |   			ring: 'hsl(var(--ring))',
46 |   			chart: {
47 |   				'1': 'hsl(var(--chart-1))',
48 |   				'2': 'hsl(var(--chart-2))',
49 |   				'3': 'hsl(var(--chart-3))',
50 |   				'4': 'hsl(var(--chart-4))',
51 |   				'5': 'hsl(var(--chart-5))'
52 |   			}
53 |   		},
54 | 			keyframes: {
55 | 				fadeIn: {
56 | 					'0%': { opacity: '0', transform: 'translateY(10px)' },
57 | 					'100%': { opacity: '1', transform: 'translateY(3px)' },
58 | 				},
59 | 				pulse: {
60 | 					'0%, 100%': { opacity: '0.6' },
61 | 					'50%': { opacity: '1' },
62 | 				},
63 | 			},
64 | 			animation: {
65 | 				fadeIn: 'fadeIn 0.3s ease-in-out forwards',
66 | 				pulse: 'pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite',
67 | 			},
68 |   		fontFamily: {
69 |   			sans: [
70 |   				'var(--font-inter)',
71 |   				'system-ui',
72 |   				'sans-serif'
73 |   			],
74 |   			ppneue: [
75 |   				'var(--font-pp-neue)',
76 |   				'system-ui',
77 |   				'sans-serif'
78 |   			],
79 |   			ppsupply: [
80 |   				'var(--font-pp-supply)',
81 |   				'system-ui',
82 |   				'sans-serif'
83 |   			]
84 |   		},
85 |   		borderRadius: {
86 |   			lg: 'var(--radius)',
87 |   			md: 'calc(var(--radius) - 2px)',
88 |   			sm: 'calc(var(--radius) - 4px)'
89 |   		}
90 |   	}
91 |   },
92 |   // eslint-disable-next-line @typescript-eslint/no-require-imports
93 |   plugins: [require("tailwindcss-animate")],
94 | } satisfies Config;
95 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2017",
 4 |     "lib": ["dom", "dom.iterable", "esnext"],
 5 |     "allowJs": true,
 6 |     "skipLibCheck": true,
 7 |     "strict": true,
 8 |     "noEmit": true,
 9 |     "esModuleInterop": true,
10 |     "module": "esnext",
11 |     "moduleResolution": "bundler",
12 |     "resolveJsonModule": true,
13 |     "isolatedModules": true,
14 |     "jsx": "preserve",
15 |     "incremental": true,
16 |     "plugins": [
17 |       {
18 |         "name": "next"
19 |       }
20 |     ],
21 |     "paths": {
22 |       "@/*": ["./*"]
23 |     }
24 |   },
25 |   "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
26 |   "exclude": ["node_modules"]
27 | }
28 | 


--------------------------------------------------------------------------------
/vercel.json:
--------------------------------------------------------------------------------
1 | {
2 |     "functions": {
3 |       "app/api/**/*": {
4 |         "maxDuration": 300
5 |       }
6 |     }
7 |   }
8 |   


--------------------------------------------------------------------------------