├── .env.example ├── .gitignore ├── README.md ├── app ├── api │ ├── cua │ │ ├── agent │ │ │ ├── agent.ts │ │ │ ├── base_playwright.ts │ │ │ ├── browserbase.ts │ │ │ └── types.ts │ │ ├── start │ │ │ └── route.ts │ │ ├── step │ │ │ ├── execute │ │ │ │ └── route.ts │ │ │ └── generate │ │ │ │ └── route.ts │ │ └── types.ts │ └── session │ │ ├── [sessionId] │ │ └── pages │ │ │ └── route.ts │ │ └── route.ts ├── components │ ├── AnimatedButton.tsx │ ├── BrowserSessionContainer.tsx │ ├── BrowserTabs.tsx │ ├── ChatBlock.tsx │ ├── ChatFeed.tsx │ ├── PosthogProvider.tsx │ ├── SessionControls.tsx │ └── ui │ │ └── sliding-number.tsx ├── favicon.ico ├── globals.css ├── layout.tsx └── page.tsx ├── components.json ├── eslint.config.mjs ├── fonts ├── PPNeueMontreal-Medium.otf └── PPSupplySans-Regular.otf ├── lib └── utils.ts ├── next-env.d.ts ├── next.config.ts ├── package-lock.json ├── package.json ├── postcss.config.mjs ├── public ├── agent_loop.png ├── agent_mess.png ├── favicon.svg ├── file.svg ├── github.svg ├── globe.svg ├── grid.svg ├── next.svg ├── og.png ├── stagehand_clean.png ├── vercel.svg └── window.svg ├── tailwind.config.ts ├── tsconfig.json └── vercel.json /.env.example: -------------------------------------------------------------------------------- 1 | # OpenAI API Configuration 2 | OPENAI_API_KEY=your_openai_api_key_here 3 | 4 | # Browserbase Configuration 5 | BROWSERBASE_API_KEY=your_browserbase_api_key_here 6 | BROWSERBASE_PROJECT_ID=your_browserbase_project_id_here 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | 10 | # Packages # 11 | ############ 12 | # it's better to unpack these files and commit the raw source 13 | # git has its own built in compression methods 14 | *.7z 15 | *.dmg 16 | *.gz 17 | *.iso 18 | *.jar 19 | *.rar 20 | *.tar 21 | *.zip 22 | 23 | # Logs and databases # 24 | ###################### 25 | *.log 26 | *.sql 27 | *.sqlite 28 | 29 | # OS generated files # 30 | ###################### 31 | .DS_Store 32 | .DS_Store? 33 | ._* 34 | .Spotlight-V100 35 | .Trashes 36 | ehthumbs.db 37 | Thumbs.db 38 | 39 | # IDE and Editor folders # 40 | ########################## 41 | .idea/ 42 | .vscode/ 43 | *.swp 44 | *.swo 45 | *~ 46 | 47 | # Node.js # 48 | ########### 49 | node_modules/ 50 | npm-debug.log 51 | .next 52 | 53 | # Python # 54 | ########## 55 | *.py[cod] 56 | __pycache__/ 57 | *.so 58 | 59 | # Java # 60 | ######## 61 | *.class 62 | *.jar 63 | *.war 64 | *.ear 65 | 66 | # Gradle # 67 | ########## 68 | .gradle 69 | /build/ 70 | 71 | # Maven # 72 | ######### 73 | target/ 74 | 75 | # Miscellaneous # 76 | ################# 77 | *.bak 78 | *.tmp 79 | *.temp 80 | .env 81 | .env.local 82 | 83 | # pnpm 84 | pnpm-lock.yaml 85 | 86 | test/ 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CUA Browser 2 | 3 | This is a playground for you to test, explore, and get inspired by the power of Browserbase and Open AI's Computer Use Agent. This is free and always will be! It's not a product, just a demo playground 4 | 5 | [![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2Fbrowserbase%2Fcua-browser&env=OPENAI_API_KEY,BROWSERBASE_API_KEY,BROWSERBASE_PROJECT_ID&envDescription=API%20keys%20needed%20to%20run%20CUA%20Browser&envLink=https%3A%2F%2Fgithub.com%2Fbrowserbase%2Fcua-browser%23environment-variables) 6 | 7 | ## Getting Started 8 | 9 | This project uses TypeScript and requires Node.js. We recommend using Node.js version 14.x or later. 10 | 11 | First, install the dependencies for this repository: 12 | 13 | ```bash 14 | npm install 15 | ``` 16 | 17 | Next, copy the example environment variables: 18 | 19 | ```bash 20 | cp .env.example .env.local 21 | ``` 22 | 23 | You'll need to set up your API keys: 24 | 25 | 1. Get your OpenAI API key from [OpenAI's dashboard](https://platform.openai.com/api-keys) 26 | 2. Get your Browserbase API key and project ID from [Browserbase](https://www.browserbase.com) 27 | 28 | 1. Clone this repository: 29 | ```bash 30 | git clone https://github.com/browserbase/cua-browser.git 31 | cd cua-browser 32 | ``` 33 | 34 | 2. Install dependencies: 35 | ```bash 36 | npm install 37 | ``` 38 | 39 | 3. Create a `.env.local` file with your API keys. You can get your API keys from [OpenAI](https://platform.openai.com/api-keys) and [Browserbase](https://www.browserbase.com) 40 | ``` 41 | OPENAI_API_KEY=your_openai_api_key 42 | OPENAI_ORG=your_openai_org_id (optional) 43 | BROWSERBASE_API_KEY=your_browserbase_api_key 44 | BROWSERBASE_PROJECT_ID=your_browserbase_project_id 45 | ``` 46 | 47 | 4. Start the development server: 48 | ```bash 49 | npm run dev 50 | ``` 51 | 52 | Open [http://localhost:3000](http://localhost:3000) with your browser to see CUA Browser in action. You can interact with the CUA Browser by typing natural language commands in the input field and observing the browser's actions in response. 53 | 54 | ## Usage 55 | 56 | Here's a basic example of how to implement the Browserbase Compute Use Agent: 57 | 58 | ```typescript 59 | import { Agent } from './app/api/agent/agent'; 60 | import { BrowserbaseBrowser } from './app/api/agent/browserbase'; 61 | 62 | async function main() { 63 | // Initialize the browser 64 | const browser = new BrowserbaseBrowser(1024, 768); 65 | await browser.connect(); 66 | 67 | // Initialize the agent 68 | const agent = new Agent( 69 | "computer-use-preview", 70 | browser, 71 | (message) => { 72 | console.log(`Safety check: ${message}`); 73 | return true; // Acknowledge all safety checks 74 | } 75 | ); 76 | 77 | // Prepare the input for the agent 78 | const inputItems = [ 79 | { 80 | role: "user", 81 | content: [ 82 | { 83 | type: "text", 84 | text: "Go to google.com and search for 'Browserbase'" 85 | } 86 | ] 87 | } 88 | ]; 89 | 90 | // Get the action from the agent 91 | const { output, responseId } = await agent.getAction(inputItems, undefined); 92 | 93 | // Take the action 94 | const results = await agent.takeAction(output); 95 | 96 | // Print the results 97 | console.log("Action results:", results); 98 | 99 | // Store the response ID for potential future use 100 | agent.lastResponseId = responseId; 101 | 102 | // Disconnect the browser 103 | await browser.disconnect(); 104 | } 105 | 106 | main().catch(console.error); 107 | ``` 108 | 109 | This example demonstrates how to: 110 | 111 | 1. Initialize the BrowserbaseBrowser with specific dimensions. 112 | 2. Create an Agent instance with the appropriate model and browser. 113 | 3. Prepare input items for the agent. 114 | 4. Get an action from the agent using the `getAction` method. 115 | 5. Execute the action using the `takeAction` method. 116 | 6. Handle the results of the action. 117 | 7. Store the response ID for potential future interactions. 118 | 119 | Note that this example uses the `getAction` and `takeAction` methods separately, which allows for more granular control over the agent's behavior. You can expand on this basic example to create more complex interactions with the browser based on your specific use case. 120 | 121 | ## Files 122 | 123 | - `agent.ts`: The main Agent class that handles interactions with the OpenAI API 124 | - `base_playwright.ts`: Base class for Playwright-based browser automation 125 | - `browserbase.ts`: Implementation of the Browserbase browser 126 | - `utils.ts`: Utility functions for API calls and image handling 127 | -------------------------------------------------------------------------------- /app/api/cua/agent/agent.ts: -------------------------------------------------------------------------------- 1 | import { BrowserbaseBrowser } from "./browserbase"; 2 | import OpenAI from "openai"; 3 | import { 4 | InputItem, 5 | Item, 6 | Message, 7 | FunctionToolCall, 8 | ComputerToolCall, 9 | ComputerCallOutput, 10 | FunctionOutput, 11 | Tool, 12 | RequestOptions, 13 | } from "./types"; 14 | import { AxiosError } from "axios"; 15 | import axios from "axios"; 16 | import axiosRetry from 'axios-retry'; 17 | 18 | type AcknowledgeSafetyCheckCallback = (message: string) => boolean; 19 | 20 | export class Agent { 21 | private client: OpenAI; 22 | private model: string; 23 | private computer: BrowserbaseBrowser; 24 | private tools: Tool[]; 25 | private printSteps: boolean = true; 26 | private acknowledgeSafetyCheckCallback: AcknowledgeSafetyCheckCallback; 27 | public lastResponseId: string | undefined = undefined; 28 | 29 | constructor( 30 | model: string = "computer-use-preview", 31 | computer: BrowserbaseBrowser, 32 | acknowledgeSafetyCheckCallback: AcknowledgeSafetyCheckCallback = () => true 33 | ) { 34 | this.client = new OpenAI(); 35 | this.model = model; 36 | this.computer = computer; 37 | this.acknowledgeSafetyCheckCallback = acknowledgeSafetyCheckCallback; 38 | 39 | this.tools = [ 40 | { 41 | type: "computer-preview", 42 | display_width: computer.dimensions[0], 43 | display_height: computer.dimensions[1], 44 | environment: computer.environment, 45 | }, 46 | { 47 | type: "function", 48 | name: "back", 49 | description: "Go back to the previous page.", 50 | parameters: {}, 51 | strict: false, 52 | }, 53 | { 54 | type: "function", 55 | name: "goto", 56 | description: "Go to a specific URL.", 57 | parameters: { 58 | type: "object", 59 | properties: { 60 | url: { 61 | type: "string", 62 | description: "Fully qualified URL to navigate to.", 63 | }, 64 | }, 65 | additionalProperties: false, 66 | required: ["url"], 67 | }, 68 | strict: false, 69 | }, 70 | ]; 71 | /* Some additional tools, disabled as they seem to slow down model performance 72 | { 73 | type: "function", 74 | name: "refresh", 75 | description: "Refresh the current page.", 76 | parameters: {}, 77 | strict: false, 78 | }, 79 | { 80 | type: "function", 81 | name: "listTabs", 82 | description: "Get the list of tabs, including the current tab.", 83 | parameters: {}, 84 | strict: false, 85 | }, 86 | { 87 | type: "function", 88 | name: "changeTab", 89 | description: "Change to a specific tab.", 90 | parameters: { 91 | type: "object", 92 | properties: { 93 | tab: { 94 | type: "string", 95 | description: "The URL of the tab to change to.", 96 | }, 97 | }, 98 | additionalProperties: false, 99 | required: ["tab"], 100 | }, 101 | strict: false, 102 | }, 103 | */ 104 | } 105 | 106 | private async createResponse(options: RequestOptions): Promise { 107 | const url = "https://api.openai.com/v1/responses"; 108 | const headers: Record = { 109 | Authorization: `Bearer ${process.env.OPENAI_API_KEY}`, 110 | 'Content-Type': 'application/json', 111 | 'Openai-beta': 'responses=v1', 112 | }; 113 | 114 | const openaiOrg = process.env.OPENAI_ORG; 115 | if (openaiOrg) { 116 | headers['Openai-Organization'] = openaiOrg; 117 | } 118 | 119 | // Configure retry behavior 120 | axiosRetry(axios, { 121 | retries: 3, 122 | retryDelay: axiosRetry.exponentialDelay, 123 | retryCondition: (error: AxiosError): boolean => { 124 | return axiosRetry.isNetworkOrIdempotentRequestError(error) || 125 | (error.response?.status ? error.response.status >= 500 : false); 126 | } 127 | }); 128 | 129 | try { 130 | const response = await axios.post(url, options, { headers }); 131 | return response.data; 132 | } catch (error) { 133 | const axiosError = error as AxiosError; 134 | 135 | console.error(`Error: ${axiosError.response?.status} ${axiosError.response?.data || axiosError.message}`); 136 | console.error(`${JSON.stringify(axiosError.response?.data)}`); 137 | throw error; 138 | } 139 | } 140 | 141 | async getAction( 142 | inputItems: InputItem[], 143 | previousResponseId: string | undefined 144 | ): Promise<{ 145 | output: Item[]; 146 | responseId: string; 147 | }> { 148 | const response = await this.createResponse({ 149 | model: this.model, 150 | input: inputItems, 151 | tools: this.tools, 152 | truncation: "auto", 153 | ...(previousResponseId 154 | ? { previous_response_id: previousResponseId } 155 | : {}), 156 | }); 157 | 158 | console.log("response", response); 159 | 160 | return { 161 | output: response.output as Item[], 162 | responseId: response.id as string, 163 | }; 164 | } 165 | 166 | async takeAction( 167 | output: Item[] 168 | ): Promise<(Message | ComputerCallOutput | FunctionOutput)[]> { 169 | const actions: Promise[] = 170 | []; 171 | for (const item of output) { 172 | if (item.type === "message") { 173 | // Do nothing 174 | } 175 | if (item.type === "computer_call") { 176 | actions.push(this.takeComputerAction(item as ComputerToolCall)); 177 | } 178 | if (item.type === "function_call") { 179 | actions.push(this.takeFunctionAction(item as FunctionToolCall)); 180 | } 181 | } 182 | 183 | const results = await Promise.all(actions); 184 | return results; 185 | } 186 | 187 | async takeMessageAction(messageItem: Message): Promise { 188 | if (this.printSteps && messageItem.content?.[0]) { 189 | console.log(messageItem.content[0]); 190 | } 191 | return messageItem; 192 | } 193 | 194 | async takeComputerAction( 195 | computerItem: ComputerToolCall 196 | ): Promise { 197 | const action = computerItem.action; 198 | const actionType = action.type; 199 | const actionArgs = Object.fromEntries( 200 | Object.entries(action).filter(([key]) => key !== "type") 201 | ); 202 | 203 | if (this.printSteps) { 204 | console.log(`${actionType}(${JSON.stringify(actionArgs)})`); 205 | } 206 | 207 | if (!this.computer) { 208 | throw new Error("Computer not initialized"); 209 | } 210 | 211 | const method = (this.computer as unknown as Record)[ 212 | actionType 213 | ] as (...args: unknown[]) => unknown; 214 | await method.apply(this.computer, Object.values(actionArgs)); 215 | 216 | const screenshot = await this.computer.screenshot(); 217 | 218 | // Handle safety checks 219 | const pendingChecks = computerItem.pending_safety_checks || []; 220 | for (const check of pendingChecks) { 221 | const message = check.message; 222 | if (!this.acknowledgeSafetyCheckCallback(message)) { 223 | throw new Error( 224 | `Safety check failed: ${message}. Cannot continue with unacknowledged safety checks.` 225 | ); 226 | } 227 | } 228 | 229 | return { 230 | type: "computer_call_output", 231 | call_id: computerItem.call_id, 232 | acknowledged_safety_checks: pendingChecks, 233 | output: { 234 | type: "input_image", 235 | image_url: `data:image/png;base64,${screenshot}`, 236 | }, 237 | }; 238 | } 239 | 240 | async takeFunctionAction( 241 | functionItem: FunctionToolCall 242 | ): Promise { 243 | const name = functionItem.name; 244 | const args = JSON.parse(functionItem.arguments); 245 | if (this.printSteps) { 246 | console.log(`${name}(${JSON.stringify(args)})`); 247 | } 248 | 249 | if ( 250 | this.computer && 251 | typeof (this.computer as unknown as Record)[name] === 252 | "function" 253 | ) { 254 | const method = (this.computer as unknown as Record)[ 255 | name 256 | ] as (...args: unknown[]) => unknown; 257 | await method.apply(this.computer, Object.values(args)); 258 | } 259 | 260 | return { 261 | type: "function_call_output", 262 | call_id: functionItem.call_id, 263 | output: "success", // hard-coded output for demo 264 | }; 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /app/api/cua/agent/base_playwright.ts: -------------------------------------------------------------------------------- 1 | import { Browser, Page } from 'playwright'; 2 | 3 | // Optional: key mapping if your model uses "CUA" style keys 4 | const CUA_KEY_TO_PLAYWRIGHT_KEY: Record = { 5 | "/": "Divide", 6 | "\\": "Backslash", 7 | "alt": "Alt", 8 | "arrowdown": "ArrowDown", 9 | "arrowleft": "ArrowLeft", 10 | "arrowright": "ArrowRight", 11 | "arrowup": "ArrowUp", 12 | "up": "ArrowUp", 13 | "down": "ArrowDown", 14 | "left": "ArrowLeft", 15 | "right": "ArrowRight", 16 | "backspace": "Backspace", 17 | "capslock": "CapsLock", 18 | "cmd": "Meta", 19 | "command": "Meta", 20 | "ctrl": "Control", 21 | "control": "Control", 22 | "delete": "Delete", 23 | "end": "End", 24 | "enter": "Enter", 25 | "esc": "Escape", 26 | "home": "Home", 27 | "insert": "Insert", 28 | "option": "Alt", 29 | "pagedown": "PageDown", 30 | "pageup": "PageUp", 31 | "shift": "Shift", 32 | "space": " ", 33 | "super": "Meta", 34 | "tab": "Tab", 35 | "win": "Meta", 36 | }; 37 | 38 | const HOTKEYS: Record = { 39 | "alt": "Alt", 40 | "ctrl": "Control", 41 | "control": "Control", 42 | "shift": "Shift", 43 | "meta": "Meta", 44 | "command": "Meta", 45 | "win": "Meta", 46 | } 47 | 48 | export type Environment = "browser"; 49 | 50 | /** 51 | * Abstract base for Playwright-based computers: 52 | * 53 | * - Subclasses override `_getBrowserAndPage()` to do local or remote connection, 54 | * returning [Browser, Page]. 55 | * - This base class handles context creation (`connect`/`disconnect`), 56 | * plus standard "Computer" actions like click, scroll, etc. 57 | * - We also have extra browser actions: `goto(url)` and `back()`. 58 | */ 59 | export abstract class BasePlaywrightComputer { 60 | environment: Environment = "browser"; 61 | dimensions: [number, number] = [1024, 768]; 62 | 63 | protected _browser: Browser | null = null; 64 | protected _page: Page | null = null; 65 | 66 | constructor() { 67 | this._browser = null; 68 | this._page = null; 69 | } 70 | 71 | async connect(): Promise { 72 | // Start Playwright and call the subclass hook for getting browser/page 73 | const [browser, page] = await this._getBrowserAndPage(); 74 | this._browser = browser; 75 | this._page = page; 76 | return this; 77 | } 78 | 79 | async disconnect(): Promise { 80 | if (this._browser) { 81 | await this._browser.close(); 82 | } 83 | } 84 | 85 | // --- Common "Computer" actions --- 86 | async screenshot(): Promise { 87 | /** 88 | * Capture only the viewport (not full_page). 89 | */ 90 | if (!this._page) throw new Error("Page not initialized"); 91 | const buffer = await this._page.screenshot({ fullPage: false }); 92 | return buffer.toString('base64'); 93 | } 94 | 95 | async click(button: string = "left", x: number | string, y: number | string): Promise { 96 | if (!this._page) throw new Error("Page not initialized"); 97 | const parsedX = typeof x === 'string' ? parseInt(x, 10) : x; 98 | const parsedY = typeof y === 'string' ? parseInt(y, 10) : y; 99 | if (isNaN(parsedX) || isNaN(parsedY)) { 100 | throw new Error(`Invalid x or y coordinate: x=${x}, y=${y}`); 101 | } 102 | if (button == "wheel") { 103 | await this._page.mouse.wheel(parsedX, parsedY); 104 | } else { 105 | await this._page.mouse.click(parsedX, parsedY, { button: button as "left" | "right" | "middle" }); 106 | } 107 | } 108 | 109 | async double_click(x: number, y: number): Promise { 110 | if (!this._page) throw new Error("Page not initialized"); 111 | await this._page.mouse.dblclick(x, y); 112 | } 113 | 114 | async scroll(x: number, y: number, scrollX: number, scrollY: number): Promise { 115 | if (!this._page) throw new Error("Page not initialized"); 116 | await this._page.mouse.wheel(scrollX, scrollY); 117 | await this._page.mouse.move(x, y); 118 | } 119 | 120 | async type(text: string): Promise { 121 | if (!this._page) throw new Error("Page not initialized"); 122 | await this._page.keyboard.type(text); 123 | } 124 | 125 | async wait(ms: number = 250): Promise { 126 | await new Promise(resolve => setTimeout(resolve, ms)); 127 | } 128 | 129 | async move(x: number, y: number): Promise { 130 | if (!this._page) throw new Error("Page not initialized"); 131 | await this._page.mouse.move(x, y); 132 | } 133 | 134 | async keypress(keys: string[]): Promise { 135 | if (!this._page) throw new Error("Page not initialized"); 136 | 137 | console.log("HOT KEY", HOTKEYS[keys[0].toLowerCase()]); 138 | // Support for hotkeys 139 | if (HOTKEYS[keys[0].toLowerCase()]) { 140 | await this._page.keyboard.down(HOTKEYS[keys[0].toLowerCase()]); 141 | console.log("DOWN", HOTKEYS[keys[0].toLowerCase()]); 142 | for (let i = 1; i < keys.length; i++) { 143 | await this._page.keyboard.press(keys[i]); 144 | console.log("PRESS", keys[i]); 145 | } 146 | await this._page.keyboard.up(HOTKEYS[keys[0].toLowerCase()]); 147 | console.log("UP", HOTKEYS[keys[0].toLowerCase()]); 148 | } else { 149 | for (const key of keys) { 150 | const mappedKey = CUA_KEY_TO_PLAYWRIGHT_KEY[key.toLowerCase()] || key; 151 | await this._page.keyboard.press(mappedKey); 152 | } 153 | } 154 | } 155 | 156 | async drag(path: {x: number, y: number}[]): Promise { 157 | if (!this._page) throw new Error("Page not initialized"); 158 | if (!path.length) return; 159 | 160 | await this._page.mouse.move(path[0].x, path[0].y); 161 | await this._page.mouse.down(); 162 | 163 | for (let i = 1; i < path.length; i++) { 164 | await this._page.mouse.move(path[i].x, path[i].y); 165 | } 166 | 167 | await this._page.mouse.up(); 168 | } 169 | 170 | // --- Extra browser-oriented actions --- 171 | async goto(url: string): Promise { 172 | if (!this._page) throw new Error("Page not initialized"); 173 | await this._page.goto(url, { waitUntil: "domcontentloaded" }); 174 | } 175 | 176 | async back(): Promise { 177 | if (!this._page) throw new Error("Page not initialized"); 178 | await this._page.goBack(); 179 | } 180 | 181 | // --- Subclass hook --- 182 | protected abstract _getBrowserAndPage(): Promise<[Browser, Page]>; 183 | } -------------------------------------------------------------------------------- /app/api/cua/agent/browserbase.ts: -------------------------------------------------------------------------------- 1 | import * as dotenv from "dotenv"; 2 | import { Browser, Page, chromium } from "playwright"; 3 | import { BasePlaywrightComputer } from "./base_playwright"; 4 | import Browserbase from "@browserbasehq/sdk"; 5 | import { SessionCreateResponse } from "@browserbasehq/sdk/resources/sessions/sessions.mjs"; 6 | import axios from "axios"; 7 | 8 | dotenv.config(); 9 | 10 | // Define a custom type that includes all necessary properties 11 | interface BrowserbaseSession extends SessionCreateResponse { 12 | connectUrl: string; 13 | } 14 | 15 | // Define the type for session creation parameters 16 | interface SessionCreateParams { 17 | projectId: string; 18 | browserSettings: { 19 | viewport: { 20 | width: number; 21 | height: number; 22 | }; 23 | blockAds: boolean; 24 | }; 25 | region: "us-west-2" | "us-east-1" | "eu-central-1" | "ap-southeast-1"; 26 | proxies: boolean; 27 | keepAlive: boolean; 28 | } 29 | 30 | export class BrowserbaseBrowser extends BasePlaywrightComputer { 31 | /** 32 | * Browserbase is a headless browser platform that offers a remote browser API. You can use it to control thousands of browsers from anywhere. 33 | * With Browserbase, you can watch and control a browser in real-time, record and replay sessions, and use built-in proxies for more reliable browsing. 34 | * You can find more information about Browserbase at https://docs.browserbase.com/ or view our OpenAI CUA Quickstart at https://docs.browserbase.com/integrations/openai-cua/introduction. 35 | */ 36 | 37 | private bb: Browserbase; 38 | private projectId: string; 39 | private session: BrowserbaseSession | null = null; 40 | private region: string; 41 | private proxies: boolean; 42 | private sessionId: string | null; 43 | 44 | constructor( 45 | width: number = 1024, 46 | height: number = 768, 47 | region: string = "us-east-1", 48 | proxies: boolean = true, 49 | sessionId: string | null = null 50 | ) { 51 | /** 52 | * Initialize the Browserbase instance. Additional configuration options for features such as persistent cookies, ad blockers, file downloads and more can be found in the Browserbase API documentation: https://docs.browserbase.com/reference/api/create-a-session 53 | * 54 | * @param width - The width of the browser viewport. Default is 1024. 55 | * @param height - The height of the browser viewport. Default is 768. 56 | * @param region - The region for the Browserbase session. Default is "us-west-2". Pick a region close to you for better performance. https://docs.browserbase.com/guides/multi-region 57 | * @param proxies - Whether to use a proxy for the session. Default is False. Turn on proxies if you're browsing is frequently interrupted. https://docs.browserbase.com/features/proxies 58 | * @param sessionId - Optional. If provided, use an existing session instead of creating a new one. 59 | */ 60 | super(); 61 | // We're using a dynamic import here as a workaround since we don't have the actual types 62 | // In a real project, you would install the proper types and import correctly 63 | this.bb = new Browserbase({ apiKey: process.env.BROWSERBASE_API_KEY}); 64 | this.projectId = process.env.BROWSERBASE_PROJECT_ID!; 65 | this.session = null; 66 | this.dimensions = [width, height]; 67 | this.region = region; 68 | this.proxies = proxies; 69 | this.sessionId = sessionId; 70 | } 71 | 72 | protected async _getBrowserAndPage(): Promise<[Browser, Page]> { 73 | /** 74 | * Create a Browserbase session and connect to it, or connect to an existing session if a session ID is provided. 75 | * 76 | * @returns A tuple containing the connected browser and page objects. 77 | */ 78 | if (this.sessionId) { 79 | // TODO: replace with this when we ship connectUrl via session GET to the SDK 80 | const response = await axios.get( 81 | `https://api.browserbase.com/v1/sessions/${this.sessionId}`, 82 | { 83 | headers: { 84 | "X-BB-API-Key": process.env.BROWSERBASE_API_KEY, 85 | }, 86 | } 87 | ); 88 | this.session = { 89 | connectUrl: response.data.connectUrl, 90 | } as unknown as BrowserbaseSession; 91 | } else { 92 | // Create a new session on Browserbase with specified parameters 93 | const [width, height] = this.dimensions; 94 | const sessionParams: SessionCreateParams = { 95 | projectId: this.projectId, 96 | browserSettings: { 97 | blockAds: true, 98 | viewport: { 99 | width, 100 | height, 101 | }, 102 | }, 103 | region: this.region as 104 | | "us-west-2" 105 | | "us-east-1" 106 | | "eu-central-1" 107 | | "ap-southeast-1", 108 | proxies: true, 109 | keepAlive: true, 110 | }; 111 | 112 | this.session = (await this.bb.sessions.create( 113 | sessionParams 114 | )) as unknown as BrowserbaseSession; 115 | } 116 | 117 | if (!this.session) { 118 | throw new Error("Failed to create or retrieve session"); 119 | } 120 | 121 | // Connect to the remote session 122 | const browser = await chromium.connectOverCDP(this.session.connectUrl, { 123 | timeout: 1000 * 60, 124 | }); 125 | const context = browser.contexts()[0]; 126 | // Inject inline cursor-rendering script globally for every page 127 | const pages = context.pages(); 128 | const page = pages[pages.length - 1]; 129 | page 130 | .evaluate(() => { 131 | const CURSOR_ID = "__cursor__"; 132 | 133 | // Check if cursor element already exists 134 | if (document.getElementById(CURSOR_ID)) return; 135 | 136 | const cursor = document.createElement("div"); 137 | cursor.id = CURSOR_ID; 138 | Object.assign(cursor.style, { 139 | position: "fixed", 140 | top: "0px", 141 | left: "0px", 142 | width: "20px", 143 | height: "20px", 144 | backgroundImage: 145 | "url(\"data:image/svg+xml;utf8,\")", 146 | backgroundSize: "cover", 147 | pointerEvents: "none", 148 | zIndex: "99999", 149 | transform: "translate(-2px, -2px)", 150 | }); 151 | 152 | document.body.appendChild(cursor); 153 | 154 | document.addEventListener("mousemove", (e) => { 155 | cursor.style.top = `${e.clientY}px`; 156 | cursor.style.left = `${e.clientX}px`; 157 | }); 158 | document.addEventListener("mousedown", (e) => { 159 | cursor.style.top = `${e.clientY}px`; 160 | cursor.style.left = `${e.clientX}px`; 161 | }); 162 | }) 163 | .catch((error) => { 164 | console.error("Error injecting cursor-rendering script:", error); 165 | }); 166 | 167 | // Only navigate to Google if it's a new session 168 | if (!this.sessionId) { 169 | await page.goto("https://www.google.com"); 170 | } 171 | 172 | return [browser, page]; 173 | } 174 | 175 | async disconnect(): Promise { 176 | /** 177 | * Clean up resources when exiting the context manager. 178 | */ 179 | /*if (this._page) { 180 | await this._page.close(); 181 | } 182 | if (this._browser) { 183 | await this._browser.close(); 184 | } 185 | 186 | if (this.session) { 187 | console.log(`Session completed. View replay at https://browserbase.com/sessions/${this.session.id}`); 188 | }*/ 189 | } 190 | 191 | async screenshot(): Promise { 192 | /** 193 | * Capture a screenshot of the current viewport using CDP. 194 | * 195 | * @returns A base64 encoded string of the screenshot. 196 | */ 197 | if (!this._page) { 198 | throw new Error("Page not initialized"); 199 | } 200 | 201 | try { 202 | // Get CDP session from the page 203 | const cdpSession = await this._page.context().newCDPSession(this._page); 204 | 205 | // Capture screenshot using CDP 206 | const { data } = await cdpSession.send("Page.captureScreenshot", { 207 | format: "png", 208 | fromSurface: true, 209 | }); 210 | 211 | return data; // CDP already returns base64 encoded string 212 | } catch (error) { 213 | console.warn( 214 | "CDP screenshot failed, falling back to standard screenshot:", 215 | error 216 | ); 217 | // Fall back to standard Playwright screenshot 218 | const buffer = await this._page.screenshot({ type: "png" }); 219 | return buffer.toString("base64"); 220 | } 221 | } 222 | 223 | async refresh(): Promise { 224 | /** 225 | * Refresh the current page. 226 | */ 227 | if (!this._page) { 228 | throw new Error("Page not initialized"); 229 | } 230 | 231 | await this._page.reload(); 232 | } 233 | 234 | async listTabs(): Promise { 235 | /** 236 | * Get the list of tabs, including the current tab. 237 | */ 238 | if (!this._page) { 239 | throw new Error("Page not initialized"); 240 | } 241 | 242 | const tabs = await this._page.context().pages(); 243 | const tabUrls = tabs.map((tab) => tab.url()); 244 | const currentTab = this._page.url(); 245 | return [...tabUrls, currentTab]; 246 | } 247 | 248 | async changeTab(tabUrl: string): Promise { 249 | /** 250 | * Change to a specific tab. 251 | */ 252 | if (!this._page) { 253 | throw new Error("Page not initialized"); 254 | } 255 | 256 | const tabs = await this._page.context().pages(); 257 | const tab = tabs.find((t) => t.url() === tabUrl); 258 | if (!tab) { 259 | throw new Error(`Tab with URL ${tabUrl} not found`); 260 | } 261 | await tab.bringToFront(); 262 | this._page = tab; 263 | } 264 | } 265 | -------------------------------------------------------------------------------- /app/api/cua/agent/types.ts: -------------------------------------------------------------------------------- 1 | export type Includable = "output[*].file_search_call.search_results"; 2 | 3 | export type FunctionOutput = { 4 | type: "function_call_output"; 5 | call_id: string; 6 | output: string; 7 | }; 8 | 9 | export type ComputerCallOutput = { 10 | type: "computer_call_output"; 11 | call_id: string; 12 | output: { type: "input_image"; image_url: string }; 13 | acknowledged_safety_checks: SafetyCheck[]; 14 | current_url?: string; 15 | }; 16 | 17 | export type EasyMessage = { 18 | role: "system" | "user" | "assistant" | "developer"; 19 | content: string | InputContent[]; 20 | }; 21 | 22 | export type ItemReference = { 23 | type: "item_reference"; 24 | id: string; 25 | }; 26 | 27 | export type InputItem = EasyMessage | FunctionOutput | ComputerCallOutput; 28 | 29 | export type Tool = FunctionTool | ComputerTool; 30 | 31 | export type ComputerTool = { 32 | type: "computer-preview"; 33 | display_width: number; 34 | display_height: number; 35 | environment: "mac" | "windows" | "linux" | "browser"; 36 | }; 37 | 38 | export type FunctionTool = { 39 | type: "function"; 40 | name: string; 41 | description: string | null; 42 | parameters: object; 43 | strict: boolean; 44 | }; 45 | 46 | export type Item = Message | FunctionToolCall | ComputerToolCall | Reasoning; 47 | 48 | export type Message = { 49 | id: string; 50 | type: "message"; 51 | role: "user" | "assistant" | "developer" | "system"; 52 | content: Content[]; 53 | }; 54 | 55 | export type Reasoning = { 56 | id: string; 57 | type: "reasoning"; 58 | content: []; 59 | }; 60 | 61 | export type FunctionToolCall = { 62 | type: "function_call"; 63 | id: string; 64 | call_id: string; 65 | name: string; 66 | arguments: string; 67 | output: Content[] | null; 68 | }; 69 | 70 | export type ComputerAction = 71 | | Click 72 | | DoubleClick 73 | | Drag 74 | | Screenshot 75 | | KeyPress 76 | | Move 77 | | Scroll 78 | | Type 79 | | Wait; 80 | 81 | export type ComputerToolCall = { 82 | type: "computer_call"; 83 | id: string; 84 | call_id: string; 85 | action: ComputerAction; 86 | pending_safety_checks: SafetyCheck[]; 87 | }; 88 | 89 | export type Click = { 90 | type: "click"; 91 | button: "left" | "right" | "wheel" | "back" | "forward"; 92 | x: number; 93 | y: number; 94 | }; 95 | 96 | export type DoubleClick = { 97 | type: "double_click"; 98 | x: number; 99 | y: number; 100 | }; 101 | 102 | export type Scroll = { 103 | type: "scroll"; 104 | x: number; 105 | y: number; 106 | scroll_x: number; 107 | scroll_y: number; 108 | }; 109 | 110 | export type Type = { 111 | type: "type"; 112 | text: string; 113 | }; 114 | 115 | export type Wait = { 116 | type: "wait"; 117 | }; 118 | 119 | export type KeyPress = { 120 | type: "keypress"; 121 | keys: string[]; 122 | }; 123 | 124 | export type Drag = { 125 | type: "drag"; 126 | path: { 127 | x: number; 128 | y: number; 129 | }[]; 130 | }; 131 | 132 | export type Screenshot = { 133 | type: "screenshot"; 134 | }; 135 | 136 | export type Move = { 137 | type: "move"; 138 | x: number; 139 | y: number; 140 | }; 141 | 142 | export type SafetyCheck = { 143 | id: string; 144 | code: string; 145 | message: string; 146 | }; 147 | 148 | export type InputContent = InputText | InputImage | InputFile; 149 | 150 | export type OutputContent = OutputText | Refusal; 151 | 152 | export type Content = InputContent | OutputContent | Reasoning; 153 | 154 | export type InputText = { 155 | type: "input_text"; 156 | text: string; 157 | }; 158 | 159 | export type OutputText = { 160 | type: "output_text"; 161 | text: string; 162 | logprobs?: LogProb[] | null; 163 | annotations: Annotation[]; 164 | }; 165 | 166 | export type Refusal = { 167 | type: "refusal"; 168 | refusal: string; 169 | }; 170 | 171 | export type InputImage = { 172 | type: "input_image"; 173 | image_url?: string; 174 | file_id?: string; 175 | detail: "high" | "low" | "auto"; 176 | }; 177 | 178 | export type InputFile = { 179 | type: "input_file"; 180 | file_id: string | null; 181 | filename: string | null; 182 | file_data: string | null; 183 | }; 184 | 185 | export type LogProb = { 186 | token: string; 187 | logprob: number; 188 | bytes: number[]; 189 | top_logprobs?: LogProb[]; 190 | }; 191 | 192 | export type FileCitation = { 193 | type: "file_citation"; 194 | index: number; 195 | file_id: string; 196 | filename: string; 197 | }; 198 | 199 | export type FilePath = { 200 | type: "file_path"; 201 | file_id: string; 202 | index: number; 203 | }; 204 | 205 | export type Annotation = FileCitation | FilePath; 206 | 207 | export type RequestOptions = { 208 | model: string; 209 | input?: string | InputItem[]; 210 | previous_response_id?: string; 211 | include?: Includable[]; 212 | tools?: Tool[]; 213 | 214 | metadata?: Record; 215 | tool_choice?: 216 | | "none" 217 | | "auto" // default 218 | | "required" 219 | | { type: "file_search" } 220 | | { type: "computer" } 221 | | { type: "function"; name: string }; 222 | text?: { 223 | format?: 224 | | { type: "text" } // default 225 | | { type: "json_object" } 226 | | { 227 | type: "json_schema"; 228 | schema: object; 229 | name: string; 230 | description?: string; 231 | strict?: boolean; // default true 232 | }; 233 | }; 234 | temperature?: number; // default 1 235 | top_p?: number; // default 1 236 | truncation?: "auto" | "disabled"; 237 | parallel_tool_calls?: boolean; // default true 238 | stream?: boolean; 239 | reasoning?: { effort?: "low" | "medium" | "high" }; 240 | }; 241 | 242 | export type Response = { 243 | id: string; 244 | object: "response"; 245 | created_at: number; 246 | completed_at: number | null; 247 | error: Error | null; 248 | model: string; 249 | tools: Tool[]; 250 | tool_choice: 251 | | "none" 252 | | "auto" 253 | | "required" 254 | | { type: "file_search" } 255 | | { type: "code_interpreter" } 256 | | { type: "function"; name: string }; 257 | text: { 258 | response_format: 259 | | { type: "text" } // default 260 | | { type: "json_object" } 261 | | { 262 | type: "json_schema"; 263 | schema: object; 264 | name: string; 265 | description?: string; 266 | strict: boolean | null; 267 | }; 268 | }; 269 | previous_response_id: string | null; 270 | output: Item[]; 271 | metadata: Record; 272 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 273 | usage: any | null; 274 | }; 275 | -------------------------------------------------------------------------------- /app/api/cua/start/route.ts: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { Agent } from '../agent/agent'; 3 | import { BrowserbaseBrowser } from '../agent/browserbase'; 4 | import { InputItem } from '../agent/types'; 5 | 6 | export async function POST(request: Request) { 7 | let computer: BrowserbaseBrowser | null = null; 8 | let agent: Agent | null = null; 9 | 10 | try { 11 | const body = await request.json(); 12 | const { sessionId, userInput } = body; 13 | 14 | computer = new BrowserbaseBrowser(1024, 768, "us-west-2", false, sessionId); 15 | agent = new Agent("computer-use-preview", computer); 16 | if (!sessionId || !userInput) { 17 | return NextResponse.json( 18 | { error: 'Missing sessionId or userInput in request body' }, 19 | { status: 400 } 20 | ); 21 | } 22 | 23 | await computer.connect(); 24 | 25 | // Check if userInput contains a URL and navigate to it 26 | const urlPattern = /(https?:\/\/[^\s]+)|(?:^|\s)([a-zA-Z0-9-]+\.(?:com|org|edu|gov|net|io|ai|app|dev|co|me|info|biz)\b)/; 27 | const urlMatch = userInput.match(urlPattern); 28 | 29 | const initialMessages: InputItem[] = [ 30 | { 31 | "role": "developer", 32 | "content": "You are a helpful assistant that can use a web browser to accomplish tasks. Your starting point is the Google search page. If you see nothing, trying going to Google." 33 | }, 34 | { 35 | "role": "user", 36 | "content": urlMatch ? "What page are we on? Can you take a screenshot to confirm?" : userInput 37 | } 38 | ]; 39 | 40 | // Initialize the agent with the first step 41 | let stepResult = await agent.getAction(initialMessages, undefined); 42 | 43 | if (stepResult.output.length > 0 && stepResult.output.find(item => item.type === "message")) { 44 | return NextResponse.json([stepResult]); 45 | } 46 | 47 | const actions = await agent.takeAction(stepResult.output); 48 | 49 | // This is a hack because function calling doesn't work if it's the first call made by the LLM. 50 | if (urlMatch) { 51 | let fakeAction; 52 | let fakeStep; 53 | let done = false; 54 | 55 | do { 56 | if (fakeStep) { 57 | fakeAction = await agent.getAction(fakeStep.filter(item => item.type === "computer_call_output"), fakeAction!.responseId); 58 | } else { 59 | fakeAction = await agent.getAction(actions.filter(item => item.type === "computer_call_output"), stepResult.responseId); 60 | } 61 | stepResult = fakeAction; 62 | if (fakeAction.output.length > 0 && fakeAction.output.find(item => item.type === "message") != null) { 63 | done = true; 64 | } else { 65 | fakeStep = await agent.takeAction(fakeAction.output); 66 | } 67 | } while (!done); 68 | 69 | stepResult = await agent.getAction([{ 70 | "role": "user", 71 | "content": "Let's continue." 72 | },{ 73 | "role": "user", 74 | "content": userInput 75 | }], stepResult.responseId); 76 | return NextResponse.json([stepResult]); 77 | } 78 | 79 | const nextStep = []; 80 | 81 | for (const action of actions) { 82 | if ('type' in action && action.type === 'message') { 83 | nextStep.push({output: [action], responseId: stepResult.responseId}); 84 | } else { 85 | const nextStepResult = await agent.getAction([action], stepResult.responseId); 86 | nextStep.push(nextStepResult); 87 | } 88 | } 89 | 90 | return NextResponse.json(nextStep); 91 | } catch (error) { 92 | console.error('Error in cua endpoint:', error); 93 | return NextResponse.json( 94 | { success: false, error: 'Failed to process request' }, 95 | { status: 500 } 96 | ); 97 | } 98 | } -------------------------------------------------------------------------------- /app/api/cua/step/execute/route.ts: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { Agent } from '../../agent/agent'; 3 | import { BrowserbaseBrowser } from '../../agent/browserbase'; 4 | 5 | export async function POST(request: Request) { 6 | let computer: BrowserbaseBrowser | null = null; 7 | let agent: Agent | null = null; 8 | 9 | try { 10 | const body = await request.json(); 11 | const { sessionId, output } = body; 12 | console.log("output", output); 13 | 14 | computer = new BrowserbaseBrowser(1024, 768, "us-west-2", false, sessionId); 15 | agent = new Agent("computer-use-preview", computer); 16 | if (!sessionId) { 17 | return NextResponse.json( 18 | { error: 'Missing sessionId in request body' }, 19 | { status: 400 } 20 | ); 21 | } 22 | 23 | await computer.connect(); 24 | 25 | const result = await agent.takeAction(output.output); 26 | 27 | return NextResponse.json(result); 28 | } catch (error) { 29 | console.error('Error in cua endpoint:', error); 30 | return NextResponse.json( 31 | { success: false, error: 'Failed to process request' }, 32 | { status: 500 } 33 | ); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /app/api/cua/step/generate/route.ts: -------------------------------------------------------------------------------- 1 | import { NextResponse } from "next/server"; 2 | import { Agent } from "../../agent/agent"; 3 | import { BrowserbaseBrowser } from "../../agent/browserbase"; 4 | import { ComputerToolCall } from "../../agent/types"; 5 | 6 | export async function POST(request: Request) { 7 | let computer: BrowserbaseBrowser | null = null; 8 | let agent: Agent | null = null; 9 | 10 | try { 11 | const body = await request.json(); 12 | const { sessionId, responseId, input } = body; 13 | console.log("input", input); 14 | 15 | computer = new BrowserbaseBrowser(1024, 768, "us-west-2", false, sessionId); 16 | agent = new Agent("computer-use-preview", computer); 17 | if (!sessionId) { 18 | return NextResponse.json( 19 | { error: "Missing sessionId in request body" }, 20 | { status: 400 } 21 | ); 22 | } 23 | 24 | let result = await agent.getAction(input, responseId); 25 | 26 | // If there's a screenshot returned, just handle it right here so we don't have to make a round trip. 27 | if (result.output.find((item) => item.type === "computer_call")) { 28 | const computerCall = result.output.find( 29 | (item) => item.type === "computer_call" 30 | ) as ComputerToolCall; 31 | if (computerCall.action.type === "screenshot") { 32 | await computer.connect(); 33 | 34 | const screenshotAction = await agent.takeAction(result.output); 35 | result = await agent.getAction( 36 | screenshotAction.filter((item) => item.type != "message"), 37 | result.responseId 38 | ); 39 | } 40 | } 41 | 42 | // If the generated action is only reasoning, let's request a real action. 43 | if ( 44 | result.output.length == 1 && 45 | result.output.find((item) => item.type === "reasoning") 46 | ) { 47 | do { 48 | result = await agent.getAction( 49 | [ 50 | { 51 | role: "user", 52 | content: "Please continue with the task.", 53 | }, 54 | ], 55 | result.responseId 56 | ); 57 | } while (result.output.length == 1 && result.output.find((item) => item.type === "reasoning")); 58 | } 59 | 60 | return NextResponse.json([result]); 61 | } catch (error) { 62 | console.error("Error in cua endpoint:", error); 63 | return NextResponse.json( 64 | { success: false, error: "Failed to process request" }, 65 | { status: 500 } 66 | ); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /app/api/cua/types.ts: -------------------------------------------------------------------------------- 1 | // types.ts 2 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 3 | type Json = Record; 4 | 5 | 6 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 7 | interface ResponseOptions { 8 | model: string; 9 | previous_response_id?: string; 10 | input: string | Json[]; 11 | include?: string[]; 12 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 13 | tools?: any[]; 14 | metadata?: Json; 15 | temperature?: number; 16 | top_p?: number; 17 | parallel_tool_calls?: boolean; 18 | stream?: boolean; 19 | response_format?: Json; 20 | tool_choice?: Json; 21 | truncation?: string; 22 | } 23 | 24 | interface ComputerCall { 25 | type: 'computer_call'; 26 | id: string; 27 | action: { 28 | type: string; 29 | x?: number; 30 | y?: number; 31 | text?: string; 32 | keys?: string[]; 33 | scroll_x?: number; 34 | scroll_y?: number; 35 | }; 36 | } 37 | 38 | interface FunctionCall { 39 | type: 'function_call'; 40 | id: string; 41 | name: string; 42 | arguments: string; 43 | } 44 | 45 | interface OutputText { 46 | type: 'output_text'; 47 | text: string; 48 | } 49 | 50 | interface Message { 51 | type: 'message'; 52 | content: [OutputText]; 53 | } 54 | 55 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 56 | interface Response { 57 | id: string; 58 | output: (ComputerCall | Message | FunctionCall | OutputText)[]; 59 | } -------------------------------------------------------------------------------- /app/api/session/[sessionId]/pages/route.ts: -------------------------------------------------------------------------------- 1 | import Browserbase from "@browserbasehq/sdk"; 2 | import { NextResponse } from "next/server"; 3 | 4 | async function getOpenPages(sessionId: string) { 5 | const bb = new Browserbase({ 6 | apiKey: process.env.BROWSERBASE_API_KEY!, 7 | }); 8 | const debug = await bb.sessions.debug(sessionId); 9 | return debug.pages; 10 | } 11 | 12 | export async function GET( 13 | request: Request, 14 | { params }: { params: Promise<{ sessionId: string }> } 15 | ) { 16 | const { sessionId } = await params; 17 | const pages = await getOpenPages(sessionId); 18 | return NextResponse.json({ pages }); 19 | } 20 | -------------------------------------------------------------------------------- /app/api/session/route.ts: -------------------------------------------------------------------------------- 1 | import { NextResponse } from "next/server"; 2 | import Browserbase from "@browserbasehq/sdk"; 3 | import { chromium } from "playwright-core"; 4 | 5 | type BrowserbaseRegion = 6 | | "us-west-2" 7 | | "us-east-1" 8 | | "eu-central-1" 9 | | "ap-southeast-1"; 10 | 11 | // Exact timezone matches for east coast cities 12 | const exactTimezoneMap: Record = { 13 | "America/New_York": "us-east-1", 14 | "America/Detroit": "us-east-1", 15 | "America/Toronto": "us-east-1", 16 | "America/Montreal": "us-east-1", 17 | "America/Boston": "us-east-1", 18 | "America/Chicago": "us-east-1", 19 | }; 20 | 21 | // Prefix-based region mapping 22 | const prefixToRegion: Record = { 23 | America: "us-west-2", 24 | US: "us-west-2", 25 | Canada: "us-west-2", 26 | Europe: "eu-central-1", 27 | Africa: "eu-central-1", 28 | Asia: "ap-southeast-1", 29 | Australia: "ap-southeast-1", 30 | Pacific: "ap-southeast-1", 31 | }; 32 | 33 | // Offset ranges to regions (inclusive bounds) 34 | const offsetRanges: { 35 | min: number; 36 | max: number; 37 | region: BrowserbaseRegion; 38 | }[] = [ 39 | { min: -24, max: -4, region: "us-west-2" }, // UTC-24 to UTC-4 40 | { min: -3, max: 4, region: "eu-central-1" }, // UTC-3 to UTC+4 41 | { min: 5, max: 24, region: "ap-southeast-1" }, // UTC+5 to UTC+24 42 | ]; 43 | 44 | function getClosestRegion(timezone?: string): BrowserbaseRegion { 45 | try { 46 | if (!timezone) { 47 | return "us-west-2"; // Default if no timezone provided 48 | } 49 | 50 | // Check exact matches first 51 | if (timezone in exactTimezoneMap) { 52 | return exactTimezoneMap[timezone]; 53 | } 54 | 55 | // Check prefix matches 56 | const prefix = timezone.split("/")[0]; 57 | if (prefix in prefixToRegion) { 58 | return prefixToRegion[prefix]; 59 | } 60 | 61 | // Use offset-based fallback 62 | const date = new Date(); 63 | // Create a date formatter for the given timezone 64 | const formatter = new Intl.DateTimeFormat("en-US", { timeZone: timezone }); 65 | // Get the timezone offset in minutes 66 | const timeString = formatter.format(date); 67 | const testDate = new Date(timeString); 68 | const hourOffset = (testDate.getTime() - date.getTime()) / (1000 * 60 * 60); 69 | 70 | const matchingRange = offsetRanges.find( 71 | (range) => hourOffset >= range.min && hourOffset <= range.max 72 | ); 73 | 74 | return matchingRange?.region ?? "us-west-2"; 75 | } catch { 76 | return "us-west-2"; 77 | } 78 | } 79 | 80 | async function createSession(timezone?: string) { 81 | const bb = new Browserbase({ 82 | apiKey: process.env.BROWSERBASE_API_KEY!, 83 | }); 84 | 85 | console.log("timezone ", timezone); 86 | console.log("getClosestRegion(timezone)", getClosestRegion(timezone)); 87 | 88 | const browserSettings = { 89 | viewport: { 90 | width: 1024, 91 | height: 768, 92 | }, 93 | blockAds: true, 94 | }; 95 | const session = await bb.sessions.create({ 96 | projectId: process.env.BROWSERBASE_PROJECT_ID!, 97 | browserSettings, 98 | keepAlive: true, 99 | region: getClosestRegion(timezone), 100 | proxies: true, 101 | timeout: 600, 102 | }); 103 | return { 104 | session, 105 | }; 106 | } 107 | 108 | async function endSession(sessionId: string) { 109 | const bb = new Browserbase({ 110 | apiKey: process.env.BROWSERBASE_API_KEY!, 111 | }); 112 | await bb.sessions.update(sessionId, { 113 | projectId: process.env.BROWSERBASE_PROJECT_ID!, 114 | status: "REQUEST_RELEASE", 115 | }); 116 | } 117 | 118 | async function getDebugUrl(sessionId: string) { 119 | const bb = new Browserbase({ 120 | apiKey: process.env.BROWSERBASE_API_KEY!, 121 | }); 122 | const session = await bb.sessions.debug(sessionId); 123 | return session.debuggerFullscreenUrl; 124 | } 125 | 126 | export async function POST(request: Request) { 127 | try { 128 | const body = await request.json(); 129 | const timezone = body.timezone as string; 130 | const { session } = await createSession(timezone); 131 | const browser = await chromium.connectOverCDP(session.connectUrl); 132 | const defaultContext = browser.contexts()[0]; 133 | const page = defaultContext.pages()[0]; 134 | await page.goto("https://www.google.com", { 135 | waitUntil: "domcontentloaded", 136 | }); 137 | const liveUrl = await getDebugUrl(session.id); 138 | return NextResponse.json({ 139 | success: true, 140 | sessionId: session.id, 141 | sessionUrl: liveUrl, 142 | connectUrl: session.connectUrl, 143 | }); 144 | } catch (error) { 145 | console.error("Error creating session:", error); 146 | return NextResponse.json( 147 | { success: false, error: "Failed to create session" }, 148 | { status: 500 } 149 | ); 150 | } 151 | } 152 | 153 | export async function DELETE(request: Request) { 154 | const body = await request.json(); 155 | const sessionId = body.sessionId as string; 156 | await endSession(sessionId); 157 | return NextResponse.json({ success: true }); 158 | } 159 | -------------------------------------------------------------------------------- /app/components/AnimatedButton.tsx: -------------------------------------------------------------------------------- 1 | import { motion } from "framer-motion"; 2 | 3 | interface AnimatedButtonProps { 4 | type?: "button" | "submit"; 5 | onClick?: () => void; 6 | className?: string; 7 | children: React.ReactNode; 8 | } 9 | 10 | export default function AnimatedButton({ 11 | type = "button", 12 | onClick, 13 | className = "", 14 | children 15 | }: AnimatedButtonProps) { 16 | return ( 17 | 29 | 30 | {children} 31 | ⌘+ 32 |
33 | 34 | 42 | 43 |
44 |
45 |
46 | ); 47 | } -------------------------------------------------------------------------------- /app/components/BrowserSessionContainer.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import React, { useState, useEffect } from "react"; 4 | import { motion, AnimatePresence } from "framer-motion"; 5 | import { SessionControls } from "./SessionControls"; 6 | import { RotateCcwIcon } from "lucide-react"; 7 | 8 | interface BrowserSessionContainerProps { 9 | sessionUrl: string | null; 10 | isVisible: boolean; 11 | isCompleted: boolean; 12 | initialMessage: string | undefined; 13 | sessionTime?: number; 14 | onStop?: () => void; 15 | onRestart?: () => void; 16 | } 17 | 18 | const containerVariants = { 19 | hidden: { 20 | opacity: 0, 21 | y: 20, 22 | scale: 0.98, 23 | }, 24 | visible: { 25 | opacity: 1, 26 | y: 0, 27 | scale: 1, 28 | transition: { 29 | type: "spring", 30 | stiffness: 300, 31 | damping: 30, 32 | mass: 1, 33 | delay: 0.2, 34 | }, 35 | }, 36 | exit: { 37 | opacity: 0, 38 | y: -20, 39 | scale: 0.98, 40 | transition: { 41 | duration: 0.3, 42 | ease: "easeInOut", 43 | }, 44 | }, 45 | }; 46 | 47 | const leftCurtainVariants = { 48 | hidden: { x: "-100%" }, 49 | visible: { 50 | x: "-100%", 51 | transition: { 52 | duration: 0, 53 | }, 54 | }, 55 | open: { 56 | x: "-100%", 57 | transition: { 58 | type: "spring", 59 | stiffness: 120, 60 | damping: 20, 61 | delay: 0.2, 62 | }, 63 | }, 64 | close: { 65 | x: "0%", 66 | transition: { 67 | type: "spring", 68 | stiffness: 120, 69 | damping: 20, 70 | }, 71 | }, 72 | }; 73 | 74 | const rightCurtainVariants = { 75 | hidden: { x: "100%" }, 76 | visible: { 77 | x: "100%", 78 | transition: { 79 | duration: 0, 80 | }, 81 | }, 82 | open: { 83 | x: "100%", 84 | transition: { 85 | type: "spring", 86 | stiffness: 120, 87 | damping: 20, 88 | delay: 0.2, 89 | }, 90 | }, 91 | close: { 92 | x: "0%", 93 | transition: { 94 | type: "spring", 95 | stiffness: 120, 96 | damping: 20, 97 | }, 98 | }, 99 | }; 100 | 101 | const BrowserSessionContainer: React.FC = ({ 102 | sessionUrl, 103 | isVisible, 104 | isCompleted, 105 | initialMessage, 106 | sessionTime = 0, 107 | onStop = () => {}, 108 | onRestart = () => {}, 109 | }) => { 110 | // Track the animation state of curtains 111 | const [curtainState, setCurtainState] = useState< 112 | "closed" | "opening" | "open" | "closing" 113 | >("closed"); 114 | 115 | // Handle curtain animation based on session state 116 | useEffect(() => { 117 | if (isVisible) { 118 | if (!sessionUrl && !isCompleted) { 119 | // Session is starting, curtains closed initially 120 | setCurtainState("closed"); 121 | } else if (sessionUrl && !isCompleted) { 122 | // Session URL is available, but wait 1 second before opening the curtains 123 | const openTimer = setTimeout(() => { 124 | setCurtainState("opening"); 125 | // After animation delay, set to fully open 126 | const openCompleteTimer = setTimeout( 127 | () => setCurtainState("open"), 128 | 800 129 | ); 130 | return () => clearTimeout(openCompleteTimer); 131 | }, 1000); // Wait 1 second before starting to open 132 | 133 | return () => clearTimeout(openTimer); 134 | } else if (isCompleted) { 135 | // Session is completed, close the curtains 136 | setCurtainState("closing"); 137 | } 138 | } 139 | }, [isVisible, sessionUrl, isCompleted]); 140 | 141 | return ( 142 | 143 | {isVisible && ( 144 | 153 | {/* Browser frame */} 154 |
161 | {/* Left Curtain */} 162 | 175 | 176 | {/* Right Curtain */} 177 | 190 | {/* Browser Content */} 191 | {!isCompleted ? ( 192 | sessionUrl ? ( 193 |