├── .eslintrc.json ├── app ├── favicon.ico ├── actions.ts ├── layout.tsx ├── api │ └── chat │ │ └── route.ts └── page.tsx ├── .env.example ├── assets └── preview.png ├── readme-assets ├── surf-dark.png └── surf-light.png ├── next.config.mjs ├── postcss.config.mjs ├── lib ├── utils.ts ├── config.ts ├── logger.ts ├── streaming │ ├── index.ts │ ├── openai.ts │ ├── resolution.ts │ ├── anthropic.ts │ └── resolution.test.ts └── chat-context.tsx ├── public ├── anthropic.svg ├── groq.svg ├── xai.svg ├── google.svg ├── openai.svg └── mistral.svg ├── components ├── providers.tsx ├── ui │ ├── skeleton.tsx │ ├── input.tsx │ ├── separator.tsx │ ├── badge.tsx │ ├── shared-menu-styles.ts │ ├── scroll-area.tsx │ ├── grid-pattern.tsx │ ├── card.tsx │ ├── cctv.tsx │ ├── button.tsx │ ├── keyboard.tsx │ ├── route.tsx │ ├── cursor-click.tsx │ └── select.tsx ├── frame.tsx ├── use-scroll-to-bottom.ts ├── chat │ ├── loader.tsx │ ├── message-list.tsx │ ├── example-prompts.tsx │ ├── input.tsx │ └── message.tsx ├── surfing.tsx ├── scanline.tsx ├── repo-banner.tsx ├── markdown.tsx ├── logo.tsx ├── loader.tsx └── icons.tsx ├── components.json ├── .gitignore ├── tsconfig.json ├── styles ├── globals.css ├── variables.css └── theme.css ├── package.json ├── types ├── chat.ts ├── api.ts └── anthropic.ts ├── README.md └── LICENSE /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "next/core-web-vitals" 3 | } 4 | -------------------------------------------------------------------------------- /app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/e2b-dev/surf/HEAD/app/favicon.ico -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | E2B_API_KEY=your_e2b_api_key 2 | OPENAI_API_KEY=your_openai_api_key -------------------------------------------------------------------------------- /assets/preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/e2b-dev/surf/HEAD/assets/preview.png -------------------------------------------------------------------------------- /readme-assets/surf-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/e2b-dev/surf/HEAD/readme-assets/surf-dark.png -------------------------------------------------------------------------------- /readme-assets/surf-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/e2b-dev/surf/HEAD/readme-assets/surf-light.png -------------------------------------------------------------------------------- /next.config.mjs: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = {}; 3 | 4 | export default nextConfig; 5 | -------------------------------------------------------------------------------- /postcss.config.mjs: -------------------------------------------------------------------------------- 1 | /** @type {import('postcss-load-config').Config} */ 2 | const config = { 3 | plugins: { 4 | "@tailwindcss/postcss": {}, 5 | }, 6 | }; 7 | 8 | export default config; 9 | -------------------------------------------------------------------------------- /lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { clsx, type ClassValue } from "clsx" 2 | import { twMerge } from "tailwind-merge" 3 | 4 | export function cn(...inputs: ClassValue[]) { 5 | return twMerge(clsx(inputs)) 6 | } 7 | -------------------------------------------------------------------------------- /public/anthropic.svg: -------------------------------------------------------------------------------- 1 | Anthropic 2 | -------------------------------------------------------------------------------- /components/providers.tsx: -------------------------------------------------------------------------------- 1 | 'use client' 2 | 3 | import { ThemeProvider } from 'next-themes' 4 | 5 | export function Providers({ children }: { children: React.ReactNode }) { 6 | return ( 7 | 8 | {children} 9 | 10 | ) 11 | } -------------------------------------------------------------------------------- /components/ui/skeleton.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils" 2 | 3 | function Skeleton({ 4 | className, 5 | ...props 6 | }: React.HTMLAttributes) { 7 | return ( 8 |
12 | ) 13 | } 14 | 15 | export { Skeleton } 16 | -------------------------------------------------------------------------------- /public/groq.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /public/xai.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "default", 4 | "rsc": true, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "tailwind.config.ts", 8 | "css": "app/globals.css", 9 | "baseColor": "neutral", 10 | "cssVariables": true, 11 | "prefix": "" 12 | }, 13 | "aliases": { 14 | "components": "@/components", 15 | "utils": "@/lib/utils", 16 | "ui": "@/components/ui", 17 | "lib": "@/lib", 18 | "hooks": "@/hooks" 19 | }, 20 | "iconLibrary": "lucide" 21 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | .yarn/install-state.gz 8 | 9 | # testing 10 | /coverage 11 | 12 | # next.js 13 | /.next/ 14 | /out/ 15 | 16 | # production 17 | /build 18 | 19 | # misc 20 | .DS_Store 21 | *.pem 22 | 23 | # debug 24 | npm-debug.log* 25 | yarn-debug.log* 26 | yarn-error.log* 27 | 28 | # local env files 29 | .env*.local 30 | .env 31 | 32 | # vercel 33 | .vercel 34 | 35 | # typescript 36 | *.tsbuildinfo 37 | next-env.d.ts 38 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ESNext", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "preserve", 15 | "incremental": true, 16 | "plugins": [ 17 | { 18 | "name": "next" 19 | } 20 | ], 21 | "paths": { 22 | "@/*": ["./*"] 23 | } 24 | }, 25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], 26 | "exclude": ["node_modules"] 27 | } 28 | -------------------------------------------------------------------------------- /lib/config.ts: -------------------------------------------------------------------------------- 1 | export const SANDBOX_TIMEOUT_MS = 300_000; // 5 minutes in milliseconds 2 | 3 | // Resolution boundaries for performance optimization 4 | // The sandbox will run at full resolution, but screenshots sent to the LLM API 5 | // will be scaled down to these dimensions to reduce bandwidth and tokens. 6 | export const MAX_RESOLUTION_WIDTH = 1024; 7 | export const MAX_RESOLUTION_HEIGHT = 768; 8 | export const MIN_RESOLUTION_WIDTH = 640; 9 | export const MIN_RESOLUTION_HEIGHT = 480; 10 | 11 | // Default resolution used when none is specified 12 | // NOTE: This should be within the max/min bounds defined above, 13 | // otherwise it will be scaled automatically 14 | export const DEFAULT_RESOLUTION: [number, number] = [1024, 720]; 15 | -------------------------------------------------------------------------------- /public/google.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /app/actions.ts: -------------------------------------------------------------------------------- 1 | "use server"; 2 | 3 | import { SANDBOX_TIMEOUT_MS } from "@/lib/config"; 4 | import { Sandbox } from "@e2b/desktop"; 5 | 6 | export async function increaseTimeout(sandboxId: string) { 7 | try { 8 | const desktop = await Sandbox.connect(sandboxId); 9 | await desktop.setTimeout(SANDBOX_TIMEOUT_MS); // 5 minutes 10 | return true; 11 | } catch (error) { 12 | console.error("Failed to increase timeout:", error); 13 | return false; 14 | } 15 | } 16 | 17 | export async function stopSandboxAction(sandboxId: string) { 18 | try { 19 | const desktop = await Sandbox.connect(sandboxId); 20 | await desktop.kill(); 21 | return true; 22 | } catch (error) { 23 | console.error("Failed to stop sandbox:", error); 24 | return false; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /components/ui/input.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react"; 2 | 3 | import { cn } from "@/lib/utils"; 4 | 5 | function Input({ className, type, ...props }: React.ComponentProps<"input">) { 6 | return ( 7 | 22 | ); 23 | } 24 | 25 | export { Input }; 26 | -------------------------------------------------------------------------------- /components/frame.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils"; 2 | import Scanline from "@/components/scanline"; 3 | 4 | interface FrameProps { 5 | children: React.ReactNode; 6 | classNames?: { 7 | wrapper?: string; 8 | frame?: string; 9 | }; 10 | } 11 | 12 | export default function Frame({ children, classNames }: FrameProps) { 13 | return ( 14 |
17 |
18 | 19 |
20 |
26 | {children} 27 |
28 |
29 | ); 30 | } 31 | -------------------------------------------------------------------------------- /components/ui/separator.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import * as React from "react" 4 | import * as SeparatorPrimitive from "@radix-ui/react-separator" 5 | 6 | import { cn } from "@/lib/utils" 7 | 8 | function Separator({ 9 | className, 10 | orientation = "horizontal", 11 | decorative = true, 12 | ...props 13 | }: React.ComponentProps) { 14 | return ( 15 | 25 | ) 26 | } 27 | 28 | export { Separator } 29 | -------------------------------------------------------------------------------- /components/use-scroll-to-bottom.ts: -------------------------------------------------------------------------------- 1 | import { useEffect, useRef, RefObject } from "react"; 2 | 3 | export function useScrollToBottom(): [ 4 | RefObject, 5 | RefObject, 6 | ] { 7 | const containerRef = useRef(null); 8 | const endRef = useRef(null); 9 | 10 | useEffect(() => { 11 | const container = containerRef.current; 12 | const end = endRef.current; 13 | 14 | if (container && end) { 15 | const observer = new MutationObserver(() => { 16 | end.scrollIntoView({ behavior: "smooth" }); 17 | }); 18 | 19 | observer.observe(container, { 20 | childList: true, 21 | subtree: true, 22 | }); 23 | 24 | return () => observer.disconnect(); 25 | } 26 | }, []); 27 | 28 | return [containerRef as RefObject, endRef as RefObject]; 29 | } 30 | -------------------------------------------------------------------------------- /styles/globals.css: -------------------------------------------------------------------------------- 1 | @import 'tailwindcss'; 2 | 3 | @import './variables.css'; 4 | @import './theme.css'; 5 | 6 | @layer theme, base, components, utilities; 7 | 8 | @layer base { 9 | * { 10 | @apply border-border ring-ring; 11 | } 12 | 13 | body { 14 | @apply bg-gradient-to-tr from-bg to-bg-100 text-fg tracking-wide antialiased; 15 | } 16 | 17 | h1, 18 | h2, 19 | h3, 20 | h4, 21 | h5, 22 | h6 { 23 | @apply font-mono font-light uppercase; 24 | } 25 | 26 | /* Selection styling */ 27 | ::selection { 28 | @apply bg-accent/20; 29 | } 30 | 31 | /* Scrollbar styling */ 32 | ::-webkit-scrollbar { 33 | width: 8px; 34 | height: 8px; 35 | } 36 | 37 | ::-webkit-scrollbar-track { 38 | background: transparent; 39 | } 40 | 41 | ::-webkit-scrollbar-thumb { 42 | @apply bg-bg-300 rounded-full; 43 | } 44 | 45 | ::-webkit-scrollbar-thumb:hover { 46 | @apply bg-bg-400; 47 | } 48 | } 49 | 50 | .lucide { 51 | stroke-width: 1.5px; 52 | } 53 | -------------------------------------------------------------------------------- /components/chat/loader.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect, ReactNode } from "react"; 2 | import { cn } from "@/lib/utils"; 3 | 4 | interface ChatLoaderProps { 5 | text?: ReactNode; 6 | className?: string; 7 | dotClassName?: string; 8 | interval?: number; 9 | } 10 | 11 | export function ChatLoader({ 12 | text = "surfing", 13 | className, 14 | dotClassName, 15 | interval = 200, 16 | }: ChatLoaderProps) { 17 | const [dots, setDots] = useState(1); 18 | 19 | useEffect(() => { 20 | const timer = setInterval(() => { 21 | setDots((prev) => (prev % 3) + 1); 22 | }, interval); 23 | 24 | return () => clearInterval(timer); 25 | }, [interval]); 26 | 27 | return ( 28 |
29 | {text} 30 | 31 | {".".repeat(dots)} 32 | {".".repeat(3 - dots)} 33 | 34 |
35 | ); 36 | } 37 | -------------------------------------------------------------------------------- /components/surfing.tsx: -------------------------------------------------------------------------------- 1 | interface SurfingProps { 2 | className?: string; 3 | } 4 | 5 | export function Surfing({ className }: SurfingProps) { 6 | return ( 7 |
 8 |       {`
 9 |                        ..
10 |                    .+....:
11 |                    :~.~+=*
12 |                   .:....~+       ..
13 |               +++++**..=}}[())<)(:
14 |           ~^)(][[[[[[}{{{)}}}]^
15 |         -(]}[)-){#{{{{{{~
16 |        <[{(   -)#######(
17 |       ..^     *[{{{{#{}(<>^.
18 |      ..       ^(](]}#{}[(<><)
19 |               ++<><(   :){{{{.   .~:
20 |               -=^*>)     .##{=.--   .~::
21 |                *^><)  :===}#{-    ~::+*
22 |               +^]][]===~..:*^-..-.-++
23 |             -^({{{>=~ --    := :=++
24 |            =^}}<=.:-.    :-.:~=++
25 |          =~=-- -~     :~.:===++
26 |       :++~ -...   .~- -====*~
27 |     .~-:~-  :::-~-:~===++=
28 |    -+~     :=~ ~====+*~
29 |   .    :~---=++**+:
30 |   ~+++=++**+:
31 |         `}
32 |     
33 | ); 34 | } 35 | -------------------------------------------------------------------------------- /lib/logger.ts: -------------------------------------------------------------------------------- 1 | import ansis from "ansis"; 2 | 3 | export const logger = console; 4 | 5 | const stringifyArg = (arg: unknown) => 6 | typeof arg === "object" ? JSON.stringify(arg, null, 2) : String(arg); 7 | 8 | export const logError = (...args: Parameters) => { 9 | console.error( 10 | ansis.bgRedBright.white(" ERROR "), 11 | ansis.redBright(args.map(stringifyArg).join(" ")) 12 | ); 13 | }; 14 | 15 | export const logDebug = (...args: Parameters) => { 16 | console.debug( 17 | ansis.bgBlueBright.white(" DEBUG "), 18 | ansis.blueBright(args.map(stringifyArg).join(" ")) 19 | ); 20 | }; 21 | 22 | export const logSuccess = (...args: Parameters) => { 23 | console.log( 24 | ansis.bgGreenBright.white(" SUCCESS "), 25 | ansis.greenBright(args.map(stringifyArg).join(" ")) 26 | ); 27 | }; 28 | 29 | export const logWarning = (...args: Parameters) => { 30 | console.warn( 31 | ansis.bgYellowBright.white(" WARNING "), 32 | ansis.yellowBright(args.map(stringifyArg).join(" ")) 33 | ); 34 | }; 35 | -------------------------------------------------------------------------------- /components/scanline.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils"; 2 | 3 | interface ScanlineProps { 4 | className?: string; 5 | } 6 | 7 | export default function Scanline({ className }: ScanlineProps) { 8 | return ( 9 |
15 | 22 | 23 | 29 | 37 | 38 | 39 | 40 | 41 |
42 | ); 43 | } 44 | -------------------------------------------------------------------------------- /components/chat/message-list.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import React, { useRef, useEffect } from "react"; 4 | import { ChatMessage } from "@/components/chat/message"; 5 | import { ChatMessage as ChatMessageType } from "@/types/chat"; 6 | import { cn } from "@/lib/utils"; 7 | 8 | interface ChatListProps { 9 | messages: ChatMessageType[]; 10 | className?: string; 11 | } 12 | 13 | export function ChatList({ messages, className }: ChatListProps) { 14 | const messagesEndRef = useRef(null); 15 | const containerRef = useRef(null); 16 | 17 | useEffect(() => { 18 | if (messagesEndRef.current) { 19 | messagesEndRef.current.scrollIntoView({ behavior: "smooth" }); 20 | } 21 | }, [messages]); 22 | 23 | return ( 24 |
28 | {messages.length !== 0 && 29 | messages.map((message) => ( 30 | 35 | ))} 36 |
37 |
38 | ); 39 | } 40 | -------------------------------------------------------------------------------- /components/ui/badge.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react"; 2 | import { cva, type VariantProps } from "class-variance-authority"; 3 | 4 | import { cn } from "@/lib/utils"; 5 | 6 | const badgeVariants = cva( 7 | "inline-flex gap-1 items-center px-2 rounded-sm py-1 text-xs font-mono font-light transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2", 8 | { 9 | variants: { 10 | variant: { 11 | default: "border-transparent bg-fg text-bg hover:bg-fg-200", 12 | muted: "bg-bg-200 text-fg-500", 13 | success: "bg-success/20 text-success", 14 | warning: "bg-warning/20 text-warning", 15 | error: "bg-error/20 text-error", 16 | accent: "bg-accent/15 text-accent", 17 | "contrast-1": "bg-contrast-1/20 text-contrast-1", 18 | "contrast-2": "bg-contrast-2/20 text-contrast-2", 19 | }, 20 | defaultVariants: { 21 | variant: "default", 22 | }, 23 | }, 24 | } 25 | ); 26 | 27 | export interface BadgeProps 28 | extends React.HTMLAttributes, 29 | VariantProps {} 30 | 31 | function Badge({ className, variant, ...props }: BadgeProps) { 32 | return ( 33 |
34 | ); 35 | } 36 | 37 | export { Badge, badgeVariants }; 38 | -------------------------------------------------------------------------------- /public/openai.svg: -------------------------------------------------------------------------------- 1 | OpenAI 2 | -------------------------------------------------------------------------------- /lib/streaming/index.ts: -------------------------------------------------------------------------------- 1 | import { Sandbox } from "@e2b/desktop"; 2 | import { SSEEvent, ActionResponse } from "@/types/api"; 3 | import { ResolutionScaler } from "./resolution"; 4 | import { logDebug } from "../logger"; 5 | 6 | export function formatSSE(event: SSEEvent): string { 7 | return `data: ${JSON.stringify(event)}\n\n`; 8 | } 9 | 10 | export interface ComputerInteractionStreamerFacadeStreamProps { 11 | signal: AbortSignal; 12 | messages: { role: "user" | "assistant"; content: string }[]; 13 | } 14 | 15 | export abstract class ComputerInteractionStreamerFacade { 16 | abstract instructions: string; 17 | abstract desktop: Sandbox; 18 | abstract resolutionScaler: ResolutionScaler; 19 | 20 | abstract stream( 21 | props: ComputerInteractionStreamerFacadeStreamProps 22 | ): AsyncGenerator; 23 | 24 | // action type is specific to the streamer implementation 25 | abstract executeAction(action: unknown): Promise; 26 | } 27 | 28 | export function createStreamingResponse( 29 | generator: AsyncGenerator 30 | ): Response { 31 | const stream = new ReadableStream({ 32 | async start(controller) { 33 | for await (const chunk of generator) { 34 | controller.enqueue(new TextEncoder().encode(formatSSE(chunk))); 35 | } 36 | controller.close(); 37 | }, 38 | }); 39 | 40 | return new Response(stream, { 41 | headers: { 42 | "Content-Type": "text/event-stream", 43 | "Cache-Control": "no-cache", 44 | Connection: "keep-alive", 45 | }, 46 | }); 47 | } 48 | -------------------------------------------------------------------------------- /app/layout.tsx: -------------------------------------------------------------------------------- 1 | import "@/styles/globals.css"; 2 | 3 | import { Metadata } from "next"; 4 | import { Toaster } from "sonner"; 5 | import { Providers } from "../components/providers"; 6 | import { IBM_Plex_Sans, IBM_Plex_Mono } from "next/font/google"; 7 | import { ChatProvider } from "@/lib/chat-context"; 8 | import { Analytics } from "@vercel/analytics/react"; 9 | 10 | const ibmPlexSans = IBM_Plex_Sans({ 11 | subsets: ["latin"], 12 | weight: ["400", "500", "600", "700"], 13 | variable: "--font-ibm-plex-sans", 14 | }); 15 | 16 | const ibmPlexMono = IBM_Plex_Mono({ 17 | subsets: ["latin"], 18 | weight: ["400", "500", "600", "700"], 19 | variable: "--font-ibm-plex-mono", 20 | }); 21 | 22 | export const metadata: Metadata = { 23 | title: "Surf - E2B Computer Use Agent", 24 | description: 25 | "AI agent that interacts with a virtual desktop environment through natural language instructions", 26 | keywords: [ 27 | "AI", 28 | "desktop", 29 | "automation", 30 | "E2B", 31 | "OpenAI", 32 | "virtual desktop", 33 | "sandbox", 34 | ], 35 | authors: [{ name: "E2B", url: "https://e2b.dev" }], 36 | }; 37 | 38 | export default function RootLayout({ 39 | children, 40 | }: { 41 | children: React.ReactNode; 42 | }) { 43 | return ( 44 | 45 | 49 | 50 | 51 | 52 | {children} 53 | 54 | 55 | 56 | 57 | 58 | ); 59 | } 60 | -------------------------------------------------------------------------------- /components/repo-banner.tsx: -------------------------------------------------------------------------------- 1 | import { GitHubIcon } from "./icons"; 2 | import { buttonVariants } from "./ui/button"; 3 | import { Separator } from "./ui/separator"; 4 | import { cn } from "@/lib/utils"; 5 | import { StarFilledIcon } from "@radix-ui/react-icons"; 6 | 7 | const REPO_URL = "https://github.com/e2b-dev/surf"; 8 | 9 | export function RepoBanner() { 10 | return ( 11 | 24 | 44 | ); 45 | } 46 | -------------------------------------------------------------------------------- /components/ui/shared-menu-styles.ts: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils"; 2 | import { cva } from "class-variance-authority"; 3 | import { cardVariants } from "./card"; 4 | 5 | export const menuItemVariants = cva( 6 | [ 7 | "relative flex cursor-pointer rounded-sm select-none items-center gap-2", 8 | "px-2 py-1.5", 9 | "font-mono text-xs", 10 | "outline-none", 11 | "data-[disabled]:pointer-events-none data-[disabled]:opacity-50", 12 | ], 13 | { 14 | variants: { 15 | variant: { 16 | default: "focus:bg-accent/10 focus:text-accent", 17 | error: "text-red-500 focus:bg-red-500/10 focus:text-red-500", 18 | success: "text-green-500 focus:bg-green-500/10 focus:text-green-500", 19 | warning: "text-yellow-500 focus:bg-yellow-500/10 focus:text-yellow-500", 20 | }, 21 | }, 22 | defaultVariants: { 23 | variant: "default", 24 | }, 25 | } 26 | ); 27 | 28 | export const menuContentStyles = cn( 29 | "z-50 min-w-[8rem] overflow-hidden rounded-sm p-2", 30 | cardVariants({ variant: "layer" }), 31 | "shadow-sm", 32 | "data-[state=open]:animate-in data-[state=closed]:animate-out", 33 | "data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0", 34 | "data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95", 35 | "data-[side=bottom]:slide-in-from-top-2", 36 | "data-[side=left]:slide-in-from-right-2", 37 | "data-[side=right]:slide-in-from-left-2", 38 | "data-[side=top]:slide-in-from-bottom-2" 39 | ); 40 | 41 | export const menuLabelStyles = cn("font-mono text-xs uppercase", "text-fg-500"); 42 | 43 | export const menuSeparatorStyles = cn( 44 | "-mx-2 my-2", 45 | "border-t border-dashed border-border-200" 46 | ); 47 | 48 | export const menuViewportStyles = cn("p-1"); 49 | 50 | export const menuGroupStyles = cn("flex flex-col gap-0.5 pt-2 first:pt-0"); 51 | -------------------------------------------------------------------------------- /components/ui/scroll-area.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import * as React from "react"; 4 | import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area"; 5 | 6 | import { cn } from "@/lib/utils"; 7 | 8 | const ScrollArea = React.forwardRef< 9 | React.ElementRef, 10 | React.ComponentPropsWithoutRef 11 | >(({ className, children, ...props }, ref) => ( 12 | 17 | 18 | {children} 19 | 20 | 21 | 22 | 23 | )); 24 | ScrollArea.displayName = ScrollAreaPrimitive.Root.displayName; 25 | 26 | const ScrollBar = React.forwardRef< 27 | React.ElementRef, 28 | React.ComponentPropsWithoutRef 29 | >(({ className, orientation = "vertical", ...props }, ref) => ( 30 | 43 | 44 | 45 | )); 46 | ScrollBar.displayName = ScrollAreaPrimitive.ScrollAreaScrollbar.displayName; 47 | 48 | export { ScrollArea, ScrollBar }; 49 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "surf", 3 | "version": "1.0.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@ai-sdk/anthropic": "1.1.0", 13 | "@ai-sdk/google": "1.1.10", 14 | "@ai-sdk/groq": "1.1.9", 15 | "@ai-sdk/mistral": "1.1.11", 16 | "@ai-sdk/openai": "1.0.10", 17 | "@ai-sdk/ui-utils": "1.1.10", 18 | "@ai-sdk/xai": "1.1.10", 19 | "@anthropic-ai/sdk": "^0.39.0", 20 | "@e2b/desktop": "^1.8.1", 21 | "@gradio/client": "^1.10.0", 22 | "@phosphor-icons/react": "^2.1.7", 23 | "@radix-ui/react-icons": "^1.3.2", 24 | "@radix-ui/react-scroll-area": "^1.2.3", 25 | "@radix-ui/react-select": "^2.1.5", 26 | "@radix-ui/react-separator": "^1.1.2", 27 | "@vercel/analytics": "^1.5.0", 28 | "@vercel/kv": "^3.0.0", 29 | "ai": "4.1.25", 30 | "ansis": "^3.17.0", 31 | "class-variance-authority": "^0.7.1", 32 | "clsx": "^2.1.1", 33 | "e2b": "^1.3.0", 34 | "hls.js": "^1.5.19", 35 | "lucide-react": "^0.474.0", 36 | "motion": "^12.5.0", 37 | "next": "^15.5.9", 38 | "next-themes": "^0.4.4", 39 | "openai": "4.87.2", 40 | "react": "^19.2.3", 41 | "react-dom": "^19.2.3", 42 | "react-markdown": "^9.0.1", 43 | "react-use": "^17.6.0", 44 | "remark-gfm": "^4.0.0", 45 | "sharp": "^0.33.3", 46 | "sonner": "^1.7.2", 47 | "tailwind-merge": "^2.6.0", 48 | "tailwindcss-animate": "^1.0.7", 49 | "zod": "^3.24.1" 50 | }, 51 | "devDependencies": { 52 | "@tailwindcss/postcss": "^4.0.13", 53 | "@types/node": "^22.10.2", 54 | "@types/react": "^19.0.2", 55 | "@types/react-dom": "^19.0.2", 56 | "eslint": "^9.17.0", 57 | "eslint-config-next": "15.1.2", 58 | "postcss": "^8.4.49", 59 | "tailwindcss": "^4.0.13", 60 | "typescript": "^5.7.2" 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /components/markdown.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import ReactMarkdown from "react-markdown"; 3 | import remarkGfm from "remark-gfm"; 4 | 5 | const NonMemoizedMarkdown = ({ children }: { children: string }) => { 6 | const components = { 7 | code: ({ node, inline, className, children, ...props }: any) => { 8 | const match = /language-(\w+)/.exec(className || ""); 9 | return !inline && match ? ( 10 |
14 |           {children}
15 |         
16 | ) : ( 17 | 21 | {children} 22 | 23 | ); 24 | }, 25 | ol: ({ node, children, ...props }: any) => ( 26 |
    27 | {children} 28 |
29 | ), 30 | li: ({ node, children, ...props }: any) => ( 31 |
  • 32 | {children} 33 |
  • 34 | ), 35 | ul: ({ node, children, ...props }: any) => ( 36 |
      37 | {children} 38 |
    39 | ), 40 | strong: ({ node, children, ...props }: any) => ( 41 | 42 | {children} 43 | 44 | ), 45 | p: ({ children }: any) => { 46 | return ( 47 |

    48 | {children} 49 |

    50 | ); 51 | } 52 | }; 53 | 54 | return ( 55 | 56 | {children} 57 | 58 | ); 59 | }; 60 | 61 | export const Markdown = React.memo( 62 | NonMemoizedMarkdown, 63 | (prevProps, nextProps) => prevProps.children === nextProps.children 64 | ); -------------------------------------------------------------------------------- /public/mistral.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /components/chat/example-prompts.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import React from "react"; 4 | import { Terminal } from "lucide-react"; 5 | import { Button } from "@/components/ui/button"; 6 | import { cn } from "@/lib/utils"; 7 | 8 | interface ExamplePromptProps { 9 | text: string; 10 | onClick: () => void; 11 | disabled?: boolean; 12 | } 13 | 14 | /** 15 | * Individual example prompt button 16 | */ 17 | export function ExamplePrompt({ text, onClick, disabled }: ExamplePromptProps) { 18 | return ( 19 | 28 | ); 29 | } 30 | 31 | interface ExamplePromptsProps { 32 | onPromptClick: (prompt: string) => void; 33 | prompts?: Array<{ text: string; prompt: string }>; 34 | disabled?: boolean; 35 | className?: string; 36 | } 37 | 38 | /** 39 | * Example prompts container with default prompts 40 | */ 41 | export function ExamplePrompts({ 42 | onPromptClick, 43 | prompts = [ 44 | { 45 | text: "Create a JavaScript script", 46 | prompt: 47 | "Create a simple JavaScript script that calculates the Fibonacci sequence and save it to a file", 48 | }, 49 | { 50 | text: "Edit a document in VS Code", 51 | prompt: 52 | "Open VS Code and create a simple React component that displays a counter", 53 | }, 54 | { 55 | text: "Browse GitHub", 56 | prompt: 57 | "Open Firefox and go to GitHub to search for popular machine learning repositories", 58 | }, 59 | { 60 | text: "Create a spreadsheet", 61 | prompt: 62 | "Open LibreOffice Calc and create a simple budget spreadsheet with formulas", 63 | }, 64 | ], 65 | disabled = false, 66 | className, 67 | }: ExamplePromptsProps) { 68 | return ( 69 |
    75 |
    76 | 77 | Try these examples 78 |
    79 |
    80 | {prompts.map((item, index) => ( 81 | onPromptClick(item.prompt)} 85 | disabled={disabled} 86 | /> 87 | ))} 88 |
    89 |
    90 | ); 91 | } 92 | -------------------------------------------------------------------------------- /types/chat.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Type definitions for chat messages and related functionality 3 | */ 4 | import { ResponseComputerToolCall } from "openai/resources/responses/responses.mjs"; 5 | import { ActionEvent, ComputerModel, SSEEventType } from "./api"; 6 | import { ComputerAction } from "@/types/anthropic"; 7 | 8 | /** 9 | * Role of a chat message 10 | */ 11 | export type MessageRole = "user" | "assistant" | "system" | "action"; 12 | 13 | /** 14 | * Base interface for all chat messages 15 | */ 16 | export interface BaseChatMessage { 17 | id: string; 18 | role: MessageRole; 19 | } 20 | 21 | /** 22 | * User message in the chat 23 | */ 24 | export interface UserChatMessage extends BaseChatMessage { 25 | role: "user"; 26 | content: string; 27 | } 28 | 29 | /** 30 | * Assistant message in the chat 31 | */ 32 | export interface AssistantChatMessage extends BaseChatMessage { 33 | role: "assistant"; 34 | content: string; 35 | model: ComputerModel; 36 | } 37 | 38 | /** 39 | * System message in the chat 40 | */ 41 | export interface SystemChatMessage extends BaseChatMessage { 42 | role: "system"; 43 | content: string; 44 | isError?: boolean; 45 | } 46 | 47 | /** 48 | * Action message in the chat 49 | */ 50 | export interface ActionChatMessage 51 | extends BaseChatMessage { 52 | role: "action"; 53 | action: T extends "openai" 54 | ? ResponseComputerToolCall["action"] 55 | : ComputerAction; 56 | status?: "pending" | "completed" | "failed"; 57 | model: ComputerModel; 58 | } 59 | 60 | /** 61 | * Union type for all chat messages 62 | */ 63 | export type ChatMessage = 64 | | UserChatMessage 65 | | AssistantChatMessage 66 | | SystemChatMessage 67 | | ActionChatMessage; 68 | 69 | /** 70 | * Chat state interface 71 | */ 72 | export interface ChatState { 73 | messages: ChatMessage[]; 74 | isLoading: boolean; 75 | error: string | null; 76 | } 77 | 78 | /** 79 | * Parsed SSE event from the server 80 | */ 81 | export interface ParsedSSEEvent { 82 | type: SSEEventType; 83 | content?: any; 84 | action?: ActionEvent["action"]; 85 | callId?: string; 86 | sandboxId?: string; 87 | vncUrl?: string; 88 | } 89 | 90 | /** 91 | * Chat API request parameters 92 | */ 93 | export interface ChatApiRequest { 94 | messages: { role: MessageRole; content: string }[]; 95 | sandboxId?: string; 96 | environment?: string; 97 | resolution: [number, number]; 98 | model?: ComputerModel; 99 | } 100 | 101 | /** 102 | * Options for sending a message 103 | */ 104 | export interface SendMessageOptions { 105 | content: string; 106 | sandboxId?: string; 107 | environment?: string; 108 | resolution: [number, number]; 109 | model?: ComputerModel; 110 | } 111 | -------------------------------------------------------------------------------- /types/api.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Type definitions for Surf Computer API and SSE events 3 | */ 4 | import { ComputerAction } from "@/types/anthropic"; 5 | import { ResponseComputerToolCall } from "openai/resources/responses/responses.mjs"; 6 | 7 | /** 8 | * Model types supported by Surf 9 | */ 10 | export type ComputerModel = "openai" | "anthropic"; 11 | 12 | /** 13 | * SSE event types for client communication 14 | */ 15 | export enum SSEEventType { 16 | UPDATE = "update", 17 | ACTION = "action", 18 | REASONING = "reasoning", 19 | DONE = "done", 20 | ERROR = "error", 21 | SANDBOX_CREATED = "sandbox_created", 22 | ACTION_COMPLETED = "action_completed", 23 | } 24 | 25 | /** 26 | * Base interface for all SSE events 27 | */ 28 | export interface BaseSSEEvent { 29 | type: SSEEventType; 30 | } 31 | 32 | /** 33 | * Action event with details about computer action being performed 34 | */ 35 | export interface ActionEvent extends BaseSSEEvent { 36 | type: SSEEventType.ACTION; 37 | action: T extends "openai" 38 | ? ResponseComputerToolCall["action"] 39 | : ComputerAction; 40 | } 41 | 42 | /** 43 | * Reasoning event with AI's explanation for an action 44 | */ 45 | export interface ReasoningEvent extends BaseSSEEvent { 46 | type: SSEEventType.REASONING; 47 | content: string; 48 | } 49 | 50 | /** 51 | * Done event indicating completion 52 | */ 53 | export interface DoneEvent extends BaseSSEEvent { 54 | type: SSEEventType.DONE; 55 | content?: string; // Final OpenAI response output 56 | } 57 | 58 | /** 59 | * Error event with error details 60 | */ 61 | export interface ErrorEvent extends BaseSSEEvent { 62 | type: SSEEventType.ERROR; 63 | content: string; 64 | } 65 | 66 | /** 67 | * Sandbox created event with sandbox details 68 | */ 69 | export interface SandboxCreatedEvent extends BaseSSEEvent { 70 | type: SSEEventType.SANDBOX_CREATED; 71 | sandboxId: string; 72 | vncUrl: string; 73 | } 74 | 75 | /** 76 | * Action completed event with details about the completed action 77 | */ 78 | export interface ActionCompletedEvent extends BaseSSEEvent { 79 | type: SSEEventType.ACTION_COMPLETED; 80 | } 81 | 82 | /** 83 | * Union type of all possible SSE events 84 | */ 85 | export type SSEEvent = 86 | | ActionEvent 87 | | ReasoningEvent 88 | | DoneEvent 89 | | ErrorEvent 90 | | SandboxCreatedEvent 91 | | ActionCompletedEvent; 92 | 93 | /** 94 | * Response from action execution 95 | */ 96 | export type ActionResponse = { 97 | action: string; 98 | data: { 99 | type: "computer_screenshot"; 100 | image_url: string; 101 | }; 102 | }; 103 | 104 | /** 105 | * Helper function to sleep for a specified duration 106 | */ 107 | export async function sleep(ms: number): Promise { 108 | return new Promise((resolve) => setTimeout(resolve, ms)); 109 | } 110 | -------------------------------------------------------------------------------- /components/ui/grid-pattern.tsx: -------------------------------------------------------------------------------- 1 | import { useId } from "react"; 2 | 3 | import { cn } from "@/lib/utils"; 4 | 5 | interface GridPatternProps { 6 | width?: number; 7 | height?: number; 8 | x?: number; 9 | y?: number; 10 | squares?: Array<[x: number, y: number]>; 11 | strokeDasharray?: string; 12 | className?: string; 13 | gradientFrom?: string; 14 | gradientVia?: string; 15 | gradientTo?: string; 16 | gradientDegrees?: number; 17 | [key: string]: unknown; 18 | } 19 | 20 | export function GridPattern({ 21 | width = 50, 22 | height = 50, 23 | x = -1, 24 | y = -1, 25 | strokeDasharray = "0", 26 | squares, 27 | className, 28 | gradientFrom = "rgba(255,255,255,0.3)", 29 | gradientVia = "rgba(255,255,255,0.15)", 30 | gradientTo = "rgba(255,255,255,0)", 31 | gradientDegrees = 180, 32 | ...props 33 | }: GridPatternProps) { 34 | const id = useId(); 35 | const gradientId = `gradient-${id}`; 36 | const maskId = `mask-${id}`; 37 | 38 | return ( 39 | 99 | ); 100 | } 101 | 102 | export default GridPattern; 103 | -------------------------------------------------------------------------------- /components/ui/card.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react"; 2 | import { cn } from "@/lib/utils"; 3 | import { cva, VariantProps } from "class-variance-authority"; 4 | 5 | export const cardVariants = cva("rounded-sm", { 6 | variants: { 7 | variant: { 8 | default: "bg-bg text-fg", 9 | layer: "bg-bg-200/60 backdrop-blur-lg border border-border", 10 | slate: "", 11 | }, 12 | }, 13 | defaultVariants: { 14 | variant: "default", 15 | }, 16 | }); 17 | 18 | interface CardProps 19 | extends React.HTMLAttributes, 20 | VariantProps { 21 | hideUnderline?: boolean; 22 | } 23 | 24 | const Card = React.forwardRef( 25 | ({ className, hideUnderline = false, variant = "slate", ...props }, ref) => ( 26 |
    31 | ) 32 | ); 33 | Card.displayName = "Card"; 34 | 35 | const CardHeader = React.forwardRef< 36 | HTMLDivElement, 37 | React.HTMLAttributes 38 | >(({ className, ...props }, ref) => ( 39 |
    44 | )); 45 | CardHeader.displayName = "CardHeader"; 46 | 47 | const CardTitle = React.forwardRef< 48 | HTMLParagraphElement, 49 | React.HTMLAttributes 50 | >(({ className, ...props }, ref) => ( 51 |

    60 | )); 61 | CardTitle.displayName = "CardTitle"; 62 | 63 | const CardDescription = React.forwardRef< 64 | HTMLParagraphElement, 65 | React.HTMLAttributes 66 | >(({ className, ...props }, ref) => ( 67 |

    72 | )); 73 | CardDescription.displayName = "CardDescription"; 74 | 75 | const CardContent = React.forwardRef< 76 | HTMLDivElement, 77 | React.HTMLAttributes 78 | >(({ className, ...props }, ref) => ( 79 |

    84 | )); 85 | CardContent.displayName = "CardContent"; 86 | 87 | const CardFooter = React.forwardRef< 88 | HTMLDivElement, 89 | React.HTMLAttributes 90 | >(({ className, ...props }, ref) => ( 91 |
    100 | )); 101 | CardFooter.displayName = "CardFooter"; 102 | 103 | export { 104 | Card, 105 | CardHeader, 106 | CardFooter, 107 | CardTitle, 108 | CardDescription, 109 | CardContent, 110 | }; 111 | -------------------------------------------------------------------------------- /styles/variables.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --bg: hsl(0 0% 100%); 3 | --bg-100: hsl(0 0% 98%); 4 | --bg-200: hsl(0 0% 96%); 5 | --bg-300: hsl(0 0% 94%); 6 | --bg-400: hsl(0 0% 92%); 7 | --bg-500: hsl(0 0% 90%); 8 | 9 | --fg: hsl(0 0% 5%); 10 | --fg-100: hsl(0 0% 13%); 11 | --fg-200: hsl(0 0% 21%); 12 | --fg-300: hsl(0 0% 29%); 13 | --fg-400: hsl(0 0% 37%); 14 | --fg-500: hsl(0 0% 45%); 15 | 16 | --accent: hsl(32 100% 50%); 17 | --accent-100: hsl(32 95% 55%); 18 | --accent-200: hsl(32 90% 60%); 19 | --accent-300: hsl(32 85% 65%); 20 | --accent-400: hsl(32 80% 70%); 21 | --accent-500: hsl(32 75% 75%); 22 | 23 | --accent-fg: hsl(45 20% 95%); 24 | 25 | --border: hsl(0 0% 90%); 26 | --border-100: hsl(0 0% 85%); 27 | --border-200: hsl(0 0% 80%); 28 | --border-300: hsl(0 0% 75%); 29 | --border-400: hsl(0 0% 70%); 30 | --border-500: hsl(0 0% 65%); 31 | 32 | --ring: hsl(32 85% 45%); 33 | 34 | --chart-1: hsl(32 85% 55%); 35 | --chart-2: hsl(212 85% 55%); 36 | --chart-3: hsl(152 85% 55%); 37 | --chart-4: hsl(272 85% 55%); 38 | --chart-5: hsl(332 85% 55%); 39 | 40 | --contrast-1: hsl(200 85% 50%); 41 | --contrast-2: hsl(235 86% 65%); 42 | 43 | --error: hsl(0 75% 50%); 44 | --error-100: hsl(0 75% 55%); 45 | --error-200: hsl(0 75% 60%); 46 | --error-300: hsl(0 75% 65%); 47 | --error-400: hsl(0 75% 70%); 48 | --error-500: hsl(0 75% 75%); 49 | 50 | --error-fg: hsl(45 20% 95%); 51 | 52 | --warning: hsl(35 80% 40%); 53 | --success: hsl(140 95% 30%); 54 | 55 | /* Commons */ 56 | --radius: 0.375rem; 57 | 58 | --shadow: hsl(0 0% 85%); 59 | --shadow-strength: 0.2; 60 | } 61 | 62 | .dark { 63 | --bg: hsl(0 0% 3%); 64 | --bg-100: hsl(0 0% 5%); 65 | --bg-200: hsl(0 0% 7%); 66 | --bg-300: hsl(0 0% 9%); 67 | --bg-400: hsl(0 0% 11%); 68 | --bg-500: hsl(0 0% 13%); 69 | 70 | --fg: hsl(0 0% 98%); 71 | --fg-100: hsl(0 0% 90%); 72 | --fg-200: hsl(0 0% 82%); 73 | --fg-300: hsl(0 0% 74%); 74 | --fg-400: hsl(0 0% 66%); 75 | --fg-500: hsl(0 0% 60%); 76 | 77 | --accent: hsl(32 100% 45%); 78 | --accent-100: hsl(32 100% 40%); 79 | --accent-200: hsl(32 100% 30%); 80 | --accent-300: hsl(32 100% 20%); 81 | --accent-400: hsl(32 100% 10%); 82 | --accent-500: hsl(32 100% 5%); 83 | 84 | --accent-fg: hsl(0 0% 100%); 85 | 86 | --border: hsl(0 0% 10%); 87 | --border-100: hsl(0 0% 13%); 88 | --border-200: hsl(0 0% 16%); 89 | --border-300: hsl(0 0% 19%); 90 | --border-400: hsl(0 0% 22%); 91 | --border-500: hsl(0 0% 25%); 92 | 93 | --ring: hsl(32 100% 50%); 94 | 95 | --chart-1: hsl(220 70% 50%); 96 | --chart-2: hsl(160 60% 45%); 97 | --chart-3: hsl(30 80% 55%); 98 | --chart-4: hsl(280 65% 60%); 99 | --chart-5: hsl(340 75% 55%); 100 | 101 | --error: hsl(0 100% 65%); 102 | --error-100: hsl(0 90% 60%); 103 | --error-200: hsl(0 85% 55%); 104 | --error-300: hsl(0 80% 50%); 105 | --error-400: hsl(0 75% 45%); 106 | --error-500: hsl(0 70% 40%); 107 | 108 | --error-fg: hsl(0 0% 100%); 109 | 110 | --warning: hsl(60 100% 45%); 111 | --success: hsl(160 100% 43%); 112 | 113 | --contrast-1: hsl(200 85% 65%); 114 | --contrast-2: hsl(235 86% 75%); 115 | 116 | --shadow: hsl(0 0% 0%); 117 | --shadow-strength: 0.2; 118 | } 119 | -------------------------------------------------------------------------------- /components/ui/cctv.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import type { Variants } from 'motion/react'; 4 | import { motion, useAnimation } from 'motion/react'; 5 | import type { HTMLAttributes } from 'react'; 6 | import { forwardRef, useCallback, useImperativeHandle, useRef } from 'react'; 7 | 8 | export interface CctvIconHandle { 9 | startAnimation: () => void; 10 | stopAnimation: () => void; 11 | } 12 | 13 | const dotVariants: Variants = { 14 | normal: { opacity: 1 }, 15 | animate: { 16 | opacity: [1, 0, 1], 17 | transition: { 18 | duration: 1, 19 | repeat: Infinity, 20 | }, 21 | }, 22 | }; 23 | 24 | const cctvVariants: Variants = { 25 | normal: { rotate: 0 }, 26 | animate: { 27 | rotate: [0, -15, 10, 0], 28 | originX: '9px', 29 | originY: '15px', 30 | transition: { 31 | duration: 2, 32 | ease: 'easeInOut', 33 | }, 34 | }, 35 | }; 36 | 37 | const CctvIcon = forwardRef>( 38 | ({ onMouseEnter, onMouseLeave, ...props }, ref) => { 39 | const controls = useAnimation(); 40 | const isControlledRef = useRef(false); 41 | 42 | useImperativeHandle(ref, () => { 43 | isControlledRef.current = true; 44 | 45 | return { 46 | startAnimation: () => controls.start('animate'), 47 | stopAnimation: () => controls.start('normal'), 48 | }; 49 | }); 50 | 51 | const handleMouseEnter = useCallback( 52 | (e: React.MouseEvent) => { 53 | if (!isControlledRef.current) { 54 | controls.start('animate'); 55 | } else { 56 | onMouseEnter?.(e); 57 | } 58 | }, 59 | [controls, onMouseEnter] 60 | ); 61 | 62 | const handleMouseLeave = useCallback( 63 | (e: React.MouseEvent) => { 64 | if (!isControlledRef.current) { 65 | controls.start('normal'); 66 | } else { 67 | onMouseLeave?.(e); 68 | } 69 | }, 70 | [controls, onMouseLeave] 71 | ); 72 | return ( 73 |
    79 | 90 | 91 | 92 | 93 | 98 | 99 | 100 | 101 | 102 |
    103 | ); 104 | } 105 | ); 106 | 107 | CctvIcon.displayName = 'CctvIcon'; 108 | 109 | export { CctvIcon }; 110 | -------------------------------------------------------------------------------- /components/ui/button.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react"; 2 | import { Slot } from "@radix-ui/react-slot"; 3 | import { cva, type VariantProps } from "class-variance-authority"; 4 | import { cn } from "@/lib/utils"; 5 | import { Loader } from "@/components/loader"; 6 | 7 | const buttonVariants = cva( 8 | [ 9 | "inline-flex items-center cursor-pointer gap-2 rounded-sm justify-center whitespace-nowrap", 10 | "font-mono uppercase tracking-wider text-sm", 11 | "transition-colors duration-150", 12 | "focus-visible:outline-none ", 13 | "disabled:pointer-events-none disabled:opacity-50", 14 | ].join(" "), 15 | { 16 | variants: { 17 | variant: { 18 | default: [ 19 | "bg-fg text-bg", 20 | "hover:bg-fg-100 focus:bg-fg-100", 21 | "active:translate-y-[1px] active:shadow-none", 22 | ].join(" "), 23 | accent: [ 24 | "bg-accent/10 text-accent", 25 | "hover:bg-accent/20 focus:bg-accent/20", 26 | "active:translate-y-[1px] active:shadow-none", 27 | ].join(" "), 28 | ghost: [ 29 | "bg-transparent", 30 | "hover:bg-transparent focus:bg-transparent", 31 | "active:translate-y-[1px] active:shadow-none", 32 | ].join(" "), 33 | muted: [ 34 | "border border-border-200 bg-bg-200 text-fg-300 hover:text-fg", 35 | "hover:bg-bg-200/90 focus:bg-bg-200/90", 36 | "active:translate-y-[1px] active:shadow-none", 37 | ].join(" "), 38 | error: [ 39 | "bg-error/10 text-error", 40 | "hover:bg-error/20 focus:bg-error/20", 41 | "active:translate-y-[1px] active:shadow-none", 42 | ].join(" "), 43 | outline: [ 44 | "border border-border bg-transparent", 45 | "hover:bg-bg-300/80 focus:bg-bg-300/80", 46 | "active:translate-y-[1px] active:shadow-none", 47 | ].join(" "), 48 | link: [ 49 | "text-accent underline-offset-4", 50 | "hover:underline hover:bg-transparent", 51 | "focus:ring-0 focus:underline focus:bg-transparent", 52 | "shadow-none", 53 | ].join(" "), 54 | }, 55 | size: { 56 | default: "h-8 px-3", 57 | sm: "h-7 px-2", 58 | lg: "h-10 px-4", 59 | icon: "h-8 w-8", 60 | iconSm: "h-7 w-7", 61 | iconLg: "h-10 w-10 text-xl", 62 | slate: "h-auto px-0 py-0", 63 | }, 64 | }, 65 | defaultVariants: { 66 | variant: "default", 67 | size: "default", 68 | }, 69 | } 70 | ); 71 | 72 | export interface ButtonProps 73 | extends React.ButtonHTMLAttributes, 74 | VariantProps { 75 | asChild?: boolean; 76 | loading?: boolean; 77 | } 78 | 79 | const Button = React.forwardRef( 80 | ( 81 | { className, variant, size, asChild = false, loading = false, ...props }, 82 | ref 83 | ) => { 84 | const Comp = asChild ? Slot : "button"; 85 | return ( 86 | 92 | {loading ? ( 93 |
    94 | {props.children} 95 | 96 |
    97 | ) : ( 98 | props.children 99 | )} 100 |
    101 | ); 102 | } 103 | ); 104 | Button.displayName = "Button"; 105 | 106 | export { Button, buttonVariants }; 107 | -------------------------------------------------------------------------------- /app/api/chat/route.ts: -------------------------------------------------------------------------------- 1 | import { Sandbox } from "@e2b/desktop"; 2 | import { ComputerModel, SSEEvent, SSEEventType } from "@/types/api"; 3 | import { 4 | ComputerInteractionStreamerFacade, 5 | createStreamingResponse, 6 | } from "@/lib/streaming"; 7 | import { SANDBOX_TIMEOUT_MS } from "@/lib/config"; 8 | import { OpenAIComputerStreamer } from "@/lib/streaming/openai"; 9 | import { logError } from "@/lib/logger"; 10 | import { ResolutionScaler } from "@/lib/streaming/resolution"; 11 | 12 | export const maxDuration = 800; 13 | 14 | class StreamerFactory { 15 | static getStreamer( 16 | model: ComputerModel, 17 | desktop: Sandbox, 18 | resolution: [number, number] 19 | ): ComputerInteractionStreamerFacade { 20 | const resolutionScaler = new ResolutionScaler(desktop, resolution); 21 | 22 | switch (model) { 23 | case "anthropic": 24 | // currently not implemented 25 | /* return new AnthropicComputerStreamer(desktop, resolutionScaler); */ 26 | case "openai": 27 | default: 28 | return new OpenAIComputerStreamer(desktop, resolutionScaler); 29 | } 30 | } 31 | } 32 | 33 | export async function POST(request: Request) { 34 | const abortController = new AbortController(); 35 | const { signal } = abortController; 36 | 37 | request.signal.addEventListener("abort", () => { 38 | abortController.abort(); 39 | }); 40 | 41 | const { 42 | messages, 43 | sandboxId, 44 | resolution, 45 | model = "openai", 46 | } = await request.json(); 47 | 48 | const apiKey = process.env.E2B_API_KEY; 49 | 50 | if (!apiKey) { 51 | return new Response("E2B API key not found", { status: 500 }); 52 | } 53 | 54 | let desktop: Sandbox | undefined; 55 | let activeSandboxId = sandboxId; 56 | let vncUrl: string | undefined; 57 | 58 | try { 59 | if (!activeSandboxId) { 60 | const newSandbox = await Sandbox.create({ 61 | resolution, 62 | dpi: 96, 63 | timeoutMs: SANDBOX_TIMEOUT_MS, 64 | }); 65 | 66 | await newSandbox.stream.start(); 67 | 68 | activeSandboxId = newSandbox.sandboxId; 69 | vncUrl = newSandbox.stream.getUrl(); 70 | desktop = newSandbox; 71 | } else { 72 | desktop = await Sandbox.connect(activeSandboxId); 73 | } 74 | 75 | if (!desktop) { 76 | return new Response("Failed to connect to sandbox", { status: 500 }); 77 | } 78 | 79 | desktop.setTimeout(SANDBOX_TIMEOUT_MS); 80 | 81 | try { 82 | const streamer = StreamerFactory.getStreamer( 83 | model as ComputerModel, 84 | desktop, 85 | resolution 86 | ); 87 | 88 | if (!sandboxId && activeSandboxId && vncUrl) { 89 | async function* stream(): AsyncGenerator> { 90 | yield { 91 | type: SSEEventType.SANDBOX_CREATED, 92 | sandboxId: activeSandboxId, 93 | vncUrl: vncUrl as string, 94 | }; 95 | 96 | yield* streamer.stream({ messages, signal }); 97 | } 98 | 99 | return createStreamingResponse(stream()); 100 | } else { 101 | return createStreamingResponse(streamer.stream({ messages, signal })); 102 | } 103 | } catch (error) { 104 | logError("Error from streaming service:", error); 105 | 106 | return new Response( 107 | "An error occurred with the AI service. Please try again.", 108 | { status: 500 } 109 | ); 110 | } 111 | } catch (error) { 112 | logError("Error connecting to sandbox:", error); 113 | return new Response("Failed to connect to sandbox", { status: 500 }); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /components/ui/keyboard.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { AnimatePresence, motion, useAnimation } from 'motion/react'; 4 | import { useEffect, useState } from 'react'; 5 | import type { HTMLAttributes } from 'react'; 6 | import { forwardRef, useCallback, useImperativeHandle, useRef } from 'react'; 7 | 8 | export interface KeyboardIconHandle { 9 | startAnimation: () => void; 10 | stopAnimation: () => void; 11 | } 12 | 13 | const KEYBOARD_PATHS = [ 14 | { id: 'key1', d: 'M10 8h.01' }, 15 | { id: 'key2', d: 'M12 12h.01' }, 16 | { id: 'key3', d: 'M14 8h.01' }, 17 | { id: 'key4', d: 'M16 12h.01' }, 18 | { id: 'key5', d: 'M18 8h.01' }, 19 | { id: 'key6', d: 'M6 8h.01' }, 20 | { id: 'key7', d: 'M7 16h10' }, 21 | { id: 'key8', d: 'M8 12h.01' }, 22 | ]; 23 | 24 | const KeyboardIcon = forwardRef< 25 | KeyboardIconHandle, 26 | HTMLAttributes 27 | >(({ onMouseEnter, onMouseLeave, ...props }, ref) => { 28 | const [isHovered, setIsHovered] = useState(false); 29 | const controls = useAnimation(); 30 | 31 | const isControlledRef = useRef(false); 32 | 33 | useImperativeHandle(ref, () => { 34 | isControlledRef.current = true; 35 | 36 | return { 37 | startAnimation: () => setIsHovered(true), 38 | stopAnimation: () => setIsHovered(false), 39 | }; 40 | }); 41 | 42 | const handleMouseEnter = useCallback( 43 | (e: React.MouseEvent) => { 44 | if (!isControlledRef.current) { 45 | setIsHovered(true); 46 | } else { 47 | onMouseEnter?.(e); 48 | } 49 | }, 50 | [onMouseEnter] 51 | ); 52 | 53 | const handleMouseLeave = useCallback( 54 | (e: React.MouseEvent) => { 55 | if (!isControlledRef.current) { 56 | setIsHovered(false); 57 | } else { 58 | onMouseLeave?.(e); 59 | } 60 | }, 61 | [onMouseLeave] 62 | ); 63 | 64 | useEffect(() => { 65 | const animateKeys = async () => { 66 | if (isHovered) { 67 | await controls.start((i) => ({ 68 | opacity: [1, 0.2, 1], 69 | transition: { 70 | duration: 1.5, 71 | times: [0, 0.5, 1], 72 | delay: i * 0.2 * Math.random(), 73 | repeat: 1, 74 | repeatType: 'reverse', 75 | }, 76 | })); 77 | } else { 78 | controls.stop(); 79 | controls.set({ opacity: 1 }); 80 | } 81 | }; 82 | 83 | animateKeys(); 84 | }, [isHovered, controls]); 85 | 86 | return ( 87 |
    93 | 104 | 105 | 106 | {KEYBOARD_PATHS.map((path, index) => ( 107 | 114 | ))} 115 | 116 | 117 |
    118 | ); 119 | }); 120 | 121 | KeyboardIcon.displayName = 'KeyboardIcon'; 122 | 123 | export { KeyboardIcon }; 124 | -------------------------------------------------------------------------------- /components/ui/route.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import type { Transition, Variants } from 'motion/react'; 4 | import { motion, useAnimation } from 'motion/react'; 5 | import type { HTMLAttributes } from 'react'; 6 | import { forwardRef, useCallback, useImperativeHandle, useRef } from 'react'; 7 | 8 | export interface RouteIconHandle { 9 | startAnimation: () => void; 10 | stopAnimation: () => void; 11 | } 12 | 13 | const circleTransition: Transition = { 14 | duration: 0.3, 15 | delay: 0.1, 16 | opacity: { delay: 0.15 }, 17 | }; 18 | 19 | const circleVariants: Variants = { 20 | normal: { 21 | pathLength: 1, 22 | opacity: 1, 23 | }, 24 | animate: { 25 | pathLength: [0, 1], 26 | opacity: [0, 1], 27 | }, 28 | }; 29 | 30 | const RouteIcon = forwardRef>( 31 | ({ onMouseEnter, onMouseLeave, ...props }, ref) => { 32 | const controls = useAnimation(); 33 | const isControlledRef = useRef(false); 34 | 35 | useImperativeHandle(ref, () => { 36 | isControlledRef.current = true; 37 | 38 | return { 39 | startAnimation: () => controls.start('animate'), 40 | stopAnimation: () => controls.start('normal'), 41 | }; 42 | }); 43 | 44 | const handleMouseEnter = useCallback( 45 | (e: React.MouseEvent) => { 46 | if (!isControlledRef.current) { 47 | controls.start('animate'); 48 | } else { 49 | onMouseEnter?.(e); 50 | } 51 | }, 52 | [controls, onMouseEnter] 53 | ); 54 | 55 | const handleMouseLeave = useCallback( 56 | (e: React.MouseEvent) => { 57 | if (!isControlledRef.current) { 58 | controls.start('normal'); 59 | } else { 60 | onMouseLeave?.(e); 61 | } 62 | }, 63 | [controls, onMouseLeave] 64 | ); 65 | 66 | return ( 67 |
    73 | 84 | 92 | 109 | 117 | 118 |
    119 | ); 120 | } 121 | ); 122 | 123 | RouteIcon.displayName = 'RouteIcon'; 124 | 125 | export { RouteIcon }; 126 | -------------------------------------------------------------------------------- /components/chat/input.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import React, { useMemo } from "react"; 4 | import { OpenAiLogo } from "@phosphor-icons/react"; 5 | import { ChevronsRight, StopCircle } from "lucide-react"; 6 | import { Button } from "@/components/ui/button"; 7 | import { cn } from "@/lib/utils"; 8 | import { 9 | Select, 10 | SelectContent, 11 | SelectGroup, 12 | SelectItem, 13 | SelectLabel, 14 | SelectTrigger, 15 | } from "../ui/select"; 16 | import { useChat } from "@/lib/chat-context"; 17 | import { Input } from "../ui/input"; 18 | import { AnthropicLogo } from "../icons"; 19 | import { motion } from "motion/react"; 20 | 21 | interface ChatInputProps { 22 | input: string; 23 | setInput: (input: string) => void; 24 | onSubmit: (e: React.FormEvent) => void; 25 | isLoading: boolean; 26 | onStop: () => void; 27 | disabled?: boolean; 28 | placeholder?: string; 29 | className?: string; 30 | } 31 | 32 | /** 33 | * Chat input component with submit and stop buttons 34 | */ 35 | export function ChatInput({ 36 | input, 37 | setInput, 38 | onSubmit, 39 | isLoading, 40 | onStop, 41 | disabled = false, 42 | placeholder = "What are we surfing today?", 43 | className, 44 | }: ChatInputProps) { 45 | const { model, setModel } = useChat(); 46 | 47 | const isInputEmpty = useMemo(() => input.trim() === "", [input]); 48 | 49 | return ( 50 |
    51 |
    52 |
    53 | {/* CURRENTLY NOT USED */} 54 | {/* */} 73 | setInput(e.target.value)} 77 | autoFocus 78 | required 79 | disabled={disabled} 80 | className="w-full pr-16" 81 | /> 82 |
    83 | {isLoading ? ( 84 | 94 | ) : ( 95 | 115 | )} 116 |
    117 |
    118 |
    119 |
    120 | ); 121 | } 122 | -------------------------------------------------------------------------------- /components/logo.tsx: -------------------------------------------------------------------------------- 1 | export type LogoStyle = "e2b" | "fragments"; 2 | 3 | export default function Logo({ 4 | style = "e2b", 5 | ...props 6 | }: { style?: LogoStyle } & React.SVGProps) { 7 | return style === "fragments" ? ( 8 | 14 | 18 | 19 | ) : ( 20 | 26 | 32 | 33 | ); 34 | } 35 | -------------------------------------------------------------------------------- /components/ui/cursor-click.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import type { Variants } from 'motion/react'; 4 | import { motion, useAnimation } from 'motion/react'; 5 | import type { HTMLAttributes } from 'react'; 6 | import { forwardRef, useCallback, useImperativeHandle, useRef } from 'react'; 7 | 8 | export interface CursorClickIconHandle { 9 | startAnimation: () => void; 10 | stopAnimation: () => void; 11 | } 12 | 13 | const cursorVariants: Variants = { 14 | initial: { x: 0, y: 0 }, 15 | hover: { 16 | x: [0, 0, -3, 0], 17 | y: [0, -4, 0, 0], 18 | transition: { 19 | duration: 1, 20 | bounce: 0.3, 21 | }, 22 | }, 23 | }; 24 | 25 | const lineVariants: Variants = { 26 | initial: { opacity: 1, x: 0, y: 0 }, 27 | spread: (custom: { x: number; y: number }) => ({ 28 | opacity: [0, 1, 0, 0, 0, 0, 1], 29 | x: [0, custom.x, 0, 0], 30 | y: [0, custom.y, 0, 0], 31 | transition: { 32 | type: 'spring', 33 | stiffness: 70, 34 | damping: 10, 35 | mass: 0.4, 36 | }, 37 | }), 38 | }; 39 | 40 | const CursorClickIcon = forwardRef< 41 | CursorClickIconHandle, 42 | HTMLAttributes 43 | >(({ onMouseEnter, onMouseLeave, ...props }, ref) => { 44 | const clickControls = useAnimation(); 45 | const cursorControls = useAnimation(); 46 | const isControlledRef = useRef(false); 47 | 48 | useImperativeHandle(ref, () => { 49 | isControlledRef.current = true; 50 | 51 | return { 52 | startAnimation: () => { 53 | cursorControls.start('hover'); 54 | clickControls.start('spread', { delay: 1.3 }); 55 | }, 56 | stopAnimation: () => { 57 | cursorControls.start('initial'); 58 | clickControls.start('initial'); 59 | }, 60 | }; 61 | }); 62 | 63 | const handleMouseEnter = useCallback( 64 | (e: React.MouseEvent) => { 65 | if (!isControlledRef.current) { 66 | cursorControls.start('hover'); 67 | clickControls.start('spread', { delay: 1.3 }); 68 | } else { 69 | onMouseEnter?.(e); 70 | } 71 | }, 72 | [clickControls, cursorControls, onMouseEnter] 73 | ); 74 | 75 | const handleMouseLeave = useCallback( 76 | (e: React.MouseEvent) => { 77 | if (!isControlledRef.current) { 78 | cursorControls.start('initial'); 79 | clickControls.start('initial'); 80 | } else { 81 | onMouseLeave?.(e); 82 | } 83 | }, 84 | [cursorControls, clickControls, onMouseLeave] 85 | ); 86 | 87 | return ( 88 |
    94 | 105 | 110 | 116 | 122 | 128 | 134 | 135 |
    136 | ); 137 | }); 138 | 139 | CursorClickIcon.displayName = 'CursorClickIcon'; 140 | 141 | export { CursorClickIcon }; 142 | -------------------------------------------------------------------------------- /types/anthropic.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Type definitions for Anthropic-related functionality 3 | */ 4 | import { 5 | BetaMessageParam, 6 | BetaToolResultBlockParam, 7 | BetaToolUseBlock, 8 | } from "@anthropic-ai/sdk/resources/beta/messages/messages.mjs"; 9 | 10 | type Coordinate = [number, number]; 11 | 12 | interface ComputerActionBase { 13 | action: string; 14 | } 15 | 16 | interface KeyAction extends ComputerActionBase { 17 | action: "key"; 18 | text: string; 19 | } 20 | 21 | interface HoldKeyAction extends ComputerActionBase { 22 | action: "hold_key"; 23 | text: string; 24 | duration: number; 25 | } 26 | 27 | interface TypeAction extends ComputerActionBase { 28 | action: "type"; 29 | text: string; 30 | } 31 | 32 | interface CursorPositionAction extends ComputerActionBase { 33 | action: "cursor_position"; 34 | } 35 | 36 | interface MouseMoveAction extends ComputerActionBase { 37 | action: "mouse_move"; 38 | coordinate: Coordinate; 39 | } 40 | 41 | interface LeftMouseDownAction extends ComputerActionBase { 42 | action: "left_mouse_down"; 43 | } 44 | 45 | interface LeftMouseUpAction extends ComputerActionBase { 46 | action: "left_mouse_up"; 47 | } 48 | 49 | interface LeftClickAction extends ComputerActionBase { 50 | action: "left_click"; 51 | coordinate: Coordinate; 52 | text?: string; 53 | } 54 | 55 | interface LeftClickDragAction extends ComputerActionBase { 56 | action: "left_click_drag"; 57 | start_coordinate: Coordinate; 58 | coordinate: Coordinate; 59 | } 60 | 61 | interface RightClickAction extends ComputerActionBase { 62 | action: "right_click"; 63 | coordinate: Coordinate; 64 | text?: string; 65 | } 66 | 67 | interface MiddleClickAction extends ComputerActionBase { 68 | action: "middle_click"; 69 | coordinate: Coordinate; 70 | text?: string; 71 | } 72 | 73 | interface DoubleClickAction extends ComputerActionBase { 74 | action: "double_click"; 75 | coordinate: Coordinate; 76 | text?: string; 77 | } 78 | 79 | interface TripleClickAction extends ComputerActionBase { 80 | action: "triple_click"; 81 | coordinate: Coordinate; 82 | text?: string; 83 | } 84 | 85 | interface ScrollAction extends ComputerActionBase { 86 | action: "scroll"; 87 | coordinate: Coordinate; 88 | scroll_direction: "up" | "down" | "left" | "right"; 89 | scroll_amount: number; 90 | text?: string; 91 | } 92 | 93 | interface WaitAction extends ComputerActionBase { 94 | action: "wait"; 95 | duration: number; 96 | } 97 | 98 | interface ScreenshotAction extends ComputerActionBase { 99 | action: "screenshot"; 100 | } 101 | 102 | export type ComputerAction = 103 | | KeyAction 104 | | HoldKeyAction 105 | | TypeAction 106 | | CursorPositionAction 107 | | MouseMoveAction 108 | | LeftMouseDownAction 109 | | LeftMouseUpAction 110 | | LeftClickAction 111 | | LeftClickDragAction 112 | | RightClickAction 113 | | MiddleClickAction 114 | | DoubleClickAction 115 | | TripleClickAction 116 | | ScrollAction 117 | | WaitAction 118 | | ScreenshotAction; 119 | 120 | interface TextEditorCommandBase { 121 | command: string; 122 | path: string; 123 | } 124 | 125 | interface ViewCommand extends TextEditorCommandBase { 126 | command: "view"; 127 | view_range?: [number, number]; 128 | } 129 | 130 | interface CreateCommand extends TextEditorCommandBase { 131 | command: "create"; 132 | file_text: string; 133 | } 134 | 135 | interface StrReplaceCommand extends TextEditorCommandBase { 136 | command: "str_replace"; 137 | old_str: string; 138 | new_str?: string; 139 | } 140 | 141 | interface InsertCommand extends TextEditorCommandBase { 142 | command: "insert"; 143 | insert_line: number; 144 | new_str: string; 145 | } 146 | 147 | interface UndoEditCommand extends TextEditorCommandBase { 148 | command: "undo_edit"; 149 | } 150 | 151 | export type TextEditorCommand = 152 | | ViewCommand 153 | | CreateCommand 154 | | StrReplaceCommand 155 | | InsertCommand 156 | | UndoEditCommand; 157 | 158 | export type BashCommand = 159 | | { 160 | command: string; 161 | restart?: never; 162 | } 163 | | { 164 | command?: never; 165 | restart: true; 166 | }; 167 | 168 | export type ToolInput = 169 | | { name: "computer"; input: ComputerAction } 170 | | { name: "str_replace_editor"; input: TextEditorCommand } 171 | | { name: "bash"; input: BashCommand }; 172 | -------------------------------------------------------------------------------- /components/loader.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { cn } from "@/lib/utils"; 4 | import { useEffect, useState } from "react"; 5 | 6 | // Add this before the Button component 7 | const LOADER_VARIANTS = { 8 | line: ["|", "/", "─", "\\"], 9 | progress: ["▰▱▱▱▱▱", "▰▰▱▱▱▱", "▰▰▰▱▱▱", "▰▰▰▰▱▱", "▰▰▰▰▰▱", "▰▰▰▰▰▰"], 10 | compute: ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"], 11 | dots: [". ", ".. ", "...", " ..", " .", " "], 12 | clock: [ 13 | "🕐", 14 | "🕑", 15 | "🕒", 16 | "🕓", 17 | "🕔", 18 | "🕕", 19 | "🕖", 20 | "🕗", 21 | "🕘", 22 | "🕙", 23 | "🕚", 24 | "🕛", 25 | ], 26 | bounce: ["⠁", "⠂", "⠄", "⠂"], 27 | wave: ["⠀", "⠄", "⠆", "⠇", "⠋", "⠙", "⠸", "⠰", "⠠", "⠀"], 28 | square: ["◰", "◳", "◲", "◱"], 29 | pulse: ["□", "◊", "○", "◊"], 30 | } as const; 31 | 32 | export const Loader = ({ 33 | variant = "square", 34 | interval = 150, 35 | className, 36 | }: { 37 | variant?: keyof typeof LOADER_VARIANTS; 38 | interval?: number; 39 | className?: string; 40 | }) => { 41 | const [index, setIndex] = useState(0); 42 | const chars = LOADER_VARIANTS[variant]; 43 | 44 | useEffect(() => { 45 | const timer = setInterval(() => { 46 | setIndex((i) => (i + 1) % chars.length); 47 | }, interval); 48 | return () => clearInterval(timer); 49 | }, [chars, interval]); 50 | 51 | return {chars[index]}; 52 | }; 53 | 54 | export const AssemblyLoader = ({ 55 | interval = 20, 56 | className, 57 | filledChar = "█", 58 | emptyChar = "░", 59 | gridWidth = 5, 60 | gridHeight = 3, 61 | }: { 62 | interval?: number; 63 | className?: string; 64 | filledChar?: string; 65 | emptyChar?: string; 66 | gridWidth?: number; 67 | gridHeight?: number; 68 | }) => { 69 | // Grid state: true means filled 70 | const [grid, setGrid] = useState( 71 | Array(gridHeight) 72 | .fill(null) 73 | .map(() => Array(gridWidth).fill(false)) 74 | ); 75 | 76 | // Current falling block position 77 | const [block, setBlock] = useState<{ x: number; y: number } | null>(null); 78 | 79 | // Check if block can move down 80 | const canMoveDown = (x: number, y: number) => { 81 | if (y + 1 >= gridHeight) return false; // Bottom boundary 82 | if (grid[y + 1][x]) return false; // Block below 83 | return true; 84 | }; 85 | 86 | // Check if block can move left 87 | const canMoveLeft = (x: number, y: number) => { 88 | if (x - 1 < 0) return false; // Left boundary 89 | if (grid[y][x - 1]) return false; // Block to left 90 | return true; 91 | }; 92 | 93 | // Place block in grid 94 | const placeBlock = (x: number, y: number) => { 95 | setGrid((prev) => { 96 | const newGrid = prev.map((row) => [...row]); 97 | newGrid[y][x] = true; 98 | return newGrid; 99 | }); 100 | }; 101 | 102 | // Spawn new block - always at rightmost column 103 | const spawnBlock = () => { 104 | // Check if grid is completely full 105 | if (grid.every((row) => row.every((cell) => cell))) { 106 | return null; 107 | } 108 | return { x: gridWidth - 1, y: 0 }; 109 | }; 110 | 111 | useEffect(() => { 112 | const timer = setInterval(() => { 113 | setBlock((current) => { 114 | if (!current) { 115 | return spawnBlock(); 116 | } 117 | 118 | const { x, y } = current; 119 | 120 | // If can move down, do it 121 | if (canMoveDown(x, y)) { 122 | return { x, y: y + 1 }; 123 | } 124 | 125 | // If can't move down, try to move left 126 | if (canMoveLeft(x, y)) { 127 | return { x: x - 1, y }; 128 | } 129 | 130 | // Can't move anymore, place block 131 | placeBlock(x, y); 132 | 133 | // Spawn new block 134 | return spawnBlock(); 135 | }); 136 | }, interval); 137 | 138 | return () => clearInterval(timer); 139 | }, [interval, grid]); 140 | 141 | return ( 142 |
    143 | {grid.map((row, y) => ( 144 |
    145 | {row.map((cell, x) => ( 146 | 147 | {cell || (block && block.x === x && block.y === y) 148 | ? filledChar 149 | : emptyChar} 150 | 151 | ))} 152 |
    153 | ))} 154 |
    155 | ); 156 | }; 157 | -------------------------------------------------------------------------------- /styles/theme.css: -------------------------------------------------------------------------------- 1 | @custom-variant dark (&:where(.dark, .dark *)); 2 | 3 | @theme inline { 4 | /* Border Radius */ 5 | --radius: 0.375rem; 6 | 7 | /* Colors */ 8 | --color-border: var(--border); 9 | --color-border-100: var(--border-100); 10 | --color-border-200: var(--border-200); 11 | --color-border-300: var(--border-300); 12 | --color-border-400: var(--border-400); 13 | --color-border-500: var(--border-500); 14 | 15 | --color-bg: var(--bg); 16 | --color-bg-100: var(--bg-100); 17 | --color-bg-200: var(--bg-200); 18 | --color-bg-300: var(--bg-300); 19 | --color-bg-400: var(--bg-400); 20 | --color-bg-500: var(--bg-500); 21 | 22 | --color-fg: var(--fg); 23 | --color-fg-100: var(--fg-100); 24 | --color-fg-200: var(--fg-200); 25 | --color-fg-300: var(--fg-300); 26 | --color-fg-400: var(--fg-400); 27 | --color-fg-500: var(--fg-500); 28 | 29 | --color-accent: var(--accent); 30 | --color-accent-100: var(--accent-100); 31 | --color-accent-200: var(--accent-200); 32 | --color-accent-300: var(--accent-300); 33 | --color-accent-400: var(--accent-400); 34 | --color-accent-500: var(--accent-500); 35 | 36 | --color-error: var(--error); 37 | --color-error-100: var(--error-100); 38 | --color-error-200: var(--error-200); 39 | --color-error-300: var(--error-300); 40 | --color-error-400: var(--error-400); 41 | --color-error-500: var(--error-500); 42 | 43 | --color-success: var(--success); 44 | --color-warning: var(--warning); 45 | 46 | --color-ring: var(--ring); 47 | 48 | --color-chart-1: var(--chart-1); 49 | --color-chart-2: var(--chart-2); 50 | --color-chart-3: var(--chart-3); 51 | --color-chart-4: var(--chart-4); 52 | --color-chart-5: var(--chart-5); 53 | 54 | --color-contrast-1: var(--contrast-1); 55 | --color-contrast-2: var(--contrast-2); 56 | 57 | --shadow-xs: 0 1px 2px hsl(from var(--shadow) h s l / var(--shadow-strength)), 0 1px 1px hsl(from var(--shadow) h s l / var(--shadow-strength)); 58 | --shadow-sm: 0 1px 3px hsl(from var(--shadow) h s l / var(--shadow-strength)), 0 1px 2px hsl(from var(--shadow) h s l / var(--shadow-strength)); 59 | --shadow-md: 0 4px 6px hsl(from var(--shadow) h s l / var(--shadow-strength)), 0 2px 4px hsl(from var(--shadow) h s l / var(--shadow-strength)); 60 | --shadow-lg: 0 10px 15px hsl(from var(--shadow) h s l / var(--shadow-strength)), 0 4px 6px hsl(from var(--shadow) h s l / var(--shadow-strength)); 61 | --shadow-xl: 0 20px 25px hsl(from var(--shadow) h s l / var(--shadow-strength)), 0 8px 10px hsl(from var(--shadow) h s l / var(--shadow-strength)); 62 | --shadow-2xl: 0 25px 50px hsl(from var(--shadow) h s l / var(--shadow-strength)); 63 | --shadow-inner: inset 0 2px 4px hsl(from var(--shadow) h s l / var(--shadow-strength)); 64 | 65 | --color-fd-background: var(--bg); 66 | --color-fd-foreground: var(--fg); 67 | --color-fd-muted: var(--bg-100); 68 | --color-fd-muted-foreground: var(--fg-300); 69 | --color-fd-popover: var(--bg); 70 | --color-fd-popover-foreground: var(--fg); 71 | --color-fd-card: var(--bg); 72 | --color-fd-card-foreground: var(--fg); 73 | --color-fd-border: var(--border); 74 | --color-fd-primary: var(--accent); 75 | --color-fd-primary-foreground: var(--accent-fg); 76 | --color-fd-secondary: var(--bg-100); 77 | --color-fd-secondary-foreground: var(--fg); 78 | --color-fd-accent: var(--bg-100); 79 | --color-fd-accent-foreground: var(--fg); 80 | --color-fd-ring: var(--ring); 81 | 82 | /* Fonts */ 83 | --font-sans: "IBM Plex Sans", sans-serif; 84 | --font-mono: "IBM Plex Mono", monospace; 85 | 86 | /* Animations */ 87 | @keyframes accordion-down { 88 | from { 89 | height: 0; 90 | } 91 | to { 92 | height: var(--radix-accordion-content-height); 93 | } 94 | } 95 | 96 | @keyframes accordion-up { 97 | from { 98 | height: var(--radix-accordion-content-height); 99 | } 100 | to { 101 | height: 0; 102 | } 103 | } 104 | 105 | @keyframes wave { 106 | 0% { 107 | transform: translateX(0%); 108 | } 109 | 100% { 110 | transform: translateX(-50%); 111 | } 112 | } 113 | 114 | @keyframes shimmer { 115 | 0% { 116 | background-position: 100% 0; 117 | } 118 | 100% { 119 | background-position: -100% 0; 120 | } 121 | } 122 | 123 | @keyframes grid { 124 | 0% { 125 | transform: translateY(-50%); 126 | } 127 | 100% { 128 | transform: translateY(0); 129 | } 130 | } 131 | 132 | @keyframes fade-slide-in-from-bottom { 133 | 0% { 134 | transform: translateY(5px); 135 | } 136 | 100% { 137 | transform: translateY(0); 138 | } 139 | } 140 | 141 | --animate-accordion-down: accordion-down 0.2s ease-out; 142 | --animate-accordion-up: accordion-up 0.2s ease-out; 143 | --animate-shimmer: shimmer 1s ease-in-out infinite; 144 | --animate-grid: grid 30s linear infinite; 145 | --animate-wave: wave 2s linear linear infinite; 146 | --animate-fade-slide-in: fade-slide-in-from-bottom 0.1s cubic-bezier(0.16, 1, 0.3, 1); 147 | } 148 | 149 | @utility container { 150 | margin-inline: auto; 151 | padding-inline: 2rem; 152 | max-width: 1400px; 153 | } -------------------------------------------------------------------------------- /components/chat/message.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import React from "react"; 4 | import { 5 | ChatMessage as ChatMessageType, 6 | ActionChatMessage, 7 | AssistantChatMessage, 8 | } from "@/types/chat"; 9 | import { Card, CardContent } from "@/components/ui/card"; 10 | import { cn } from "@/lib/utils"; 11 | import { cva, VariantProps } from "class-variance-authority"; 12 | import { 13 | Terminal, 14 | AlertCircle, 15 | CheckCircle, 16 | Clock, 17 | User, 18 | Info, 19 | } from "lucide-react"; 20 | import { useChat } from "@/lib/chat-context"; 21 | import { Badge } from "../ui/badge"; 22 | import { OpenAiLogo } from "@phosphor-icons/react"; 23 | import { AnthropicLogo } from "../icons"; 24 | 25 | const messageVariants = cva("", { 26 | variants: { 27 | role: { 28 | user: "bg-accent/15 text-accent-fg border-accent-300", 29 | assistant: "bg-bg-100 text-fg border-border-100", 30 | system: "bg-bg-100 text-fg-300 border-border italic", 31 | }, 32 | }, 33 | defaultVariants: { 34 | role: "system", 35 | }, 36 | }); 37 | 38 | interface ChatMessageProps extends VariantProps { 39 | message: ChatMessageType; 40 | className?: string; 41 | } 42 | 43 | function ActionMessageDisplay({ 44 | message, 45 | className, 46 | }: { 47 | message: ActionChatMessage; 48 | className?: string; 49 | }) { 50 | const { action, status } = message; 51 | 52 | const formatAction = (action: any): string => { 53 | if (!action) return "No action details"; 54 | 55 | try { 56 | return JSON.stringify(action, null, 2); 57 | } catch (e) { 58 | return "Unable to display action details"; 59 | } 60 | }; 61 | 62 | const getStatusIcon = () => { 63 | switch (status) { 64 | case "completed": 65 | return ; 66 | case "failed": 67 | return ; 68 | case "pending": 69 | return ; 70 | default: 71 | return null; 72 | } 73 | }; 74 | 75 | return ( 76 |
    77 | 81 | 82 |
    83 | 84 | Action 85 | {status && ( 86 |
    87 | {getStatusIcon()} 88 | {status} 89 |
    90 | )} 91 |
    92 | 93 |
    94 | {formatAction(action)} 95 |
    96 |
    97 |
    98 |
    99 | ); 100 | } 101 | 102 | export function ChatMessage({ message, className }: ChatMessageProps) { 103 | const role = message.role; 104 | 105 | const isUser = role === "user"; 106 | const isAssistant = role === "assistant"; 107 | const isAction = role === "action"; 108 | const isSystem = role === "system"; 109 | const isError = "isError" in message && message.isError; 110 | 111 | const { model } = useChat(); 112 | 113 | if (isSystem) { 114 | return ( 115 |
    116 | {message.content} 117 |
    118 | ); 119 | } 120 | 121 | if (isAction) { 122 | return ( 123 | } 125 | className={className} 126 | /> 127 | ); 128 | } 129 | 130 | const getRoleIcon = () => { 131 | if (isUser) return ; 132 | if (isAssistant) { 133 | if ((message as AssistantChatMessage).model === "openai") { 134 | return ; 135 | } else { 136 | return ; 137 | } 138 | } 139 | return ; 140 | }; 141 | 142 | const roleLabel = isUser ? "You" : isAssistant ? "Assistant" : "System"; 143 | 144 | return ( 145 |
    152 | 159 | 160 |
    161 | {getRoleIcon()} 162 | {roleLabel} 163 |
    164 |
    165 | {message.content} 166 |
    167 |
    168 |
    169 |
    170 | ); 171 | } 172 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![E2B Surf Preview Light](/readme-assets/surf-light.png#gh-light-mode-only) 2 | ![E2B Surf Preview Dark](/readme-assets/surf-dark.png#gh-dark-mode-only) 3 | 4 | # 🏄 Surf - OpenAI's Computer Use Agent + E2B Desktop 5 | 6 | A Next.js application that allows AI to interact with a virtual desktop environment. This project integrates [E2B's desktop sandbox](https://github.com/e2b-dev/desktop) with OpenAI's API to create an AI agent that can perform tasks on a virtual computer through natural language instructions. 7 | 8 | [E2B](https://e2b.dev) is an open source isolated virtual computer in the cloud made for AI use cases. 9 | 10 | ## Overview 11 | 12 | The Computer Use App provides a web interface where users can: 13 | 14 | 1. Start a virtual desktop sandbox environment 15 | 2. Send natural language instructions to an AI agent 16 | 3. Watch as the AI agent performs actions on the virtual desktop 17 | 4. Interact with the AI through a chat interface 18 | 19 | The application uses Server-Sent Events (SSE) to stream AI responses and actions in real-time, providing a seamless experience. 20 | 21 | ## How It Works 22 | 23 | ### Architecture 24 | 25 | The application consists of several key components: 26 | 27 | 1. **Frontend UI (Next.js)**: Provides the user interface with a virtual desktop view and chat interface 28 | 2. [**E2B Desktop Sandbox**](https://github.com/e2b-dev/desktop): Creates and manages virtual desktop environments 29 | 3. [**OpenAI Computer Use**](https://platform.openai.com/docs/guides/tools-computer-use): Processes user instructions and generates actions for the AI agent 30 | 4. **Streaming API**: Handles real-time communication between the frontend and backend 31 | 32 | ### Core Flow 33 | 34 | 1. User starts a new sandbox instance 35 | 2. E2B creates a virtual desktop and provides a URL for streaming 36 | 3. User sends instructions via the chat interface 37 | 4. Backend processes the instructions using OpenAI's API 38 | 5. AI generates actions (clicks, typing, etc.) to perform on the virtual desktop 39 | 6. Actions are executed on the sandbox and streamed back to the frontend 40 | 7. The process repeats as the user continues to provide instructions 41 | 42 | ## Prerequisites 43 | 44 | Before starting, you'll need: 45 | 46 | 1. [Node.js](https://nodejs.org/) (version specified in package.json) 47 | 2. [npm](https://www.npmjs.com/) (comes with Node.js) 48 | 3. An [E2B API key](https://e2b.dev/docs/getting-started/api-key) 49 | 4. An [OpenAI API key](https://platform.openai.com/api-keys) 50 | 51 | ## Setup Instructions 52 | 53 | 1. **Clone the repository** 54 | ```bash 55 | git clone https://github.com/e2b-dev/surf 56 | cd surf 57 | ``` 58 | 59 | 2. **Install dependencies** 60 | ```bash 61 | npm install 62 | ``` 63 | 64 | 3. **Set up environment variables** 65 | 66 | Create a `.env.local` file in the root directory based on the provided `.env.example`: 67 | 68 | ```env 69 | E2B_API_KEY=your_e2b_api_key 70 | OPENAI_API_KEY=your_openai_api_key 71 | ``` 72 | 73 | 4. **Start the development server** 74 | ```bash 75 | npm run dev 76 | ``` 77 | 78 | 5. **Open the application** 79 | 80 | Navigate to [http://localhost:3000](http://localhost:3000) in your browser. 81 | 82 | ## Usage 83 | 84 | 1. **Start a Sandbox Instance** 85 | - Click the "Start new Sandbox" button to initialize a virtual desktop environment 86 | - Wait for the sandbox to start (this may take a few seconds) 87 | 88 | 2. **Send Instructions** 89 | - Type your instructions in the chat input (e.g., "Open Firefox and go to google.com") 90 | - Press Enter or click the send button 91 | - You can also select from example prompts if available 92 | 93 | 3. **Watch AI Actions** 94 | - The AI will process your instructions and perform actions on the virtual desktop 95 | - You can see the AI's reasoning and actions in the chat interface 96 | - The virtual desktop will update in real-time as actions are performed 97 | 98 | 4. **Manage the Sandbox** 99 | - The timer shows the remaining time for your sandbox instance 100 | - You can stop the sandbox at any time by clicking the "Stop" button 101 | - The sandbox will automatically extend its time when it's about to expire 102 | 103 | ## Features 104 | 105 | - **Virtual Desktop Environment**: Runs a Linux-based desktop in a sandbox 106 | - **AI-Powered Interaction**: Uses OpenAI's API to understand and execute user instructions 107 | - **Real-Time Streaming**: Shows AI actions and responses as they happen 108 | - **Chat Interface**: Provides a conversational interface for interacting with the AI 109 | - **Example Prompts**: Offers pre-defined instructions to help users get started 110 | - **Dark/Light Mode**: Supports both dark and light themes 111 | 112 | ## Technical Details 113 | 114 | ### Dependencies 115 | 116 | The application uses several key dependencies: 117 | 118 | - **Next.js**: React framework for the frontend 119 | - **@e2b/desktop**: SDK for creating and managing desktop sandbox environments 120 | - **OpenAI**: SDK for interacting with OpenAI's API 121 | - **Tailwind CSS**: Utility-first CSS framework for styling 122 | - **Framer Motion**: Library for animations 123 | 124 | See `package.json` for a complete list of dependencies. 125 | 126 | ### API Endpoints 127 | 128 | - **/api/chat**: Handles chat messages and streams AI responses and actions 129 | 130 | ### Server Actions 131 | 132 | - **createSandbox**: Creates a new sandbox instance 133 | - **increaseTimeout**: Extends the sandbox timeout 134 | - **stopSandboxAction**: Stops a running sandbox instance 135 | 136 | ## Troubleshooting 137 | 138 | - **Sandbox not starting**: Verify your E2B API key is correct in `.env.local` 139 | - **AI not responding**: Check that your OpenAI API key is valid and has access to the required models 140 | - **Actions not working**: Ensure the sandbox is running and the AI has proper instructions 141 | 142 | ## Contributing 143 | 144 | Contributions are welcome! Please feel free to submit a Pull Request. 145 | 146 | ## License 147 | 148 | This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details. 149 | 150 | ## Support 151 | 152 | If you encounter any issues or have questions: 153 | - Check the [E2B Documentation](https://e2b.dev/docs) 154 | - Join the [E2B Discord](https://discord.gg/U7KEcGErtQ) 155 | - Open an [issue](https://github.com/e2b-dev/computer-use-app/issues) 156 | -------------------------------------------------------------------------------- /components/ui/select.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import * as React from "react"; 4 | import * as SelectPrimitive from "@radix-ui/react-select"; 5 | 6 | import { cn } from "@/lib/utils"; 7 | import { ChevronsUpDown } from "lucide-react"; 8 | import { 9 | menuContentStyles, 10 | menuLabelStyles, 11 | menuSeparatorStyles, 12 | menuViewportStyles, 13 | menuGroupStyles, 14 | menuItemVariants, 15 | } from "./shared-menu-styles"; 16 | 17 | const Select = SelectPrimitive.Root; 18 | 19 | const SelectGroup = React.forwardRef< 20 | React.ElementRef, 21 | React.ComponentPropsWithoutRef 22 | >(({ className, ...props }, ref) => ( 23 | 28 | )); 29 | SelectGroup.displayName = SelectPrimitive.Group.displayName; 30 | 31 | const SelectValue = SelectPrimitive.Value; 32 | 33 | const SelectTrigger = React.forwardRef< 34 | React.ElementRef, 35 | React.ComponentPropsWithoutRef & { 36 | withIcon?: boolean; 37 | } 38 | >(({ className, children, withIcon = true, ...props }, ref) => ( 39 | span]:line-clamp-1", 49 | "data-[placeholder]:text-fg-300", 50 | className 51 | )} 52 | {...props} 53 | > 54 | {children} 55 | {withIcon && } 56 | 57 | )); 58 | SelectTrigger.displayName = SelectPrimitive.Trigger.displayName; 59 | 60 | const SelectScrollUpButton = React.forwardRef< 61 | React.ElementRef, 62 | React.ComponentPropsWithoutRef 63 | >(({ className, ...props }, ref) => ( 64 | 72 | ▲ 73 | 74 | )); 75 | SelectScrollUpButton.displayName = SelectPrimitive.ScrollUpButton.displayName; 76 | 77 | const SelectScrollDownButton = React.forwardRef< 78 | React.ElementRef, 79 | React.ComponentPropsWithoutRef 80 | >(({ className, ...props }, ref) => ( 81 | 89 | ▼ 90 | 91 | )); 92 | SelectScrollDownButton.displayName = 93 | SelectPrimitive.ScrollDownButton.displayName; 94 | 95 | const SelectContent = React.forwardRef< 96 | React.ElementRef, 97 | React.ComponentPropsWithoutRef 98 | >(({ className, children, position = "popper", ...props }, ref) => ( 99 | 100 | 106 | 107 | 114 | {children} 115 | 116 | 117 | 118 | 119 | )); 120 | SelectContent.displayName = SelectPrimitive.Content.displayName; 121 | 122 | const SelectLabel = React.forwardRef< 123 | React.ElementRef, 124 | React.ComponentPropsWithoutRef & { 125 | inset?: boolean; 126 | } 127 | >(({ className, inset, ...props }, ref) => ( 128 | 134 | 139 | 140 | )); 141 | SelectLabel.displayName = SelectPrimitive.Label.displayName; 142 | 143 | const SelectItem = React.forwardRef< 144 | React.ElementRef, 145 | React.ComponentPropsWithoutRef & { 146 | variant?: "default" | "error" | "success" | "warning"; 147 | } 148 | >(({ className, children, variant = "default", ...props }, ref) => ( 149 | 154 | 155 | {"<<"} 156 | 157 | {children} 158 | 159 | )); 160 | SelectItem.displayName = SelectPrimitive.Item.displayName; 161 | 162 | const SelectSeparator = React.forwardRef< 163 | React.ElementRef, 164 | React.ComponentPropsWithoutRef 165 | >(({ className, ...props }, ref) => ( 166 | 171 | )); 172 | SelectSeparator.displayName = SelectPrimitive.Separator.displayName; 173 | 174 | export { 175 | Select, 176 | SelectGroup, 177 | SelectValue, 178 | SelectTrigger, 179 | SelectContent, 180 | SelectLabel, 181 | SelectItem, 182 | SelectSeparator, 183 | SelectScrollUpButton, 184 | SelectScrollDownButton, 185 | }; 186 | -------------------------------------------------------------------------------- /lib/streaming/openai.ts: -------------------------------------------------------------------------------- 1 | import { Sandbox } from "@e2b/desktop"; 2 | import OpenAI from "openai"; 3 | import { SSEEventType, SSEEvent } from "@/types/api"; 4 | import { 5 | ResponseComputerToolCall, 6 | ResponseInput, 7 | ResponseInputItem, 8 | Tool, 9 | } from "openai/resources/responses/responses.mjs"; 10 | import { 11 | ComputerInteractionStreamerFacade, 12 | ComputerInteractionStreamerFacadeStreamProps, 13 | } from "@/lib/streaming"; 14 | import { ActionResponse } from "@/types/api"; 15 | import { logDebug, logError, logWarning } from "../logger"; 16 | import { ResolutionScaler } from "./resolution"; 17 | 18 | const INSTRUCTIONS = ` 19 | You are Surf, a helpful assistant that can use a computer to help the user with their tasks. 20 | You can use the computer to search the web, write code, and more. 21 | 22 | Surf is built by E2B, which provides an open source isolated virtual computer in the cloud made for AI use cases. 23 | This application integrates E2B's desktop sandbox with OpenAI's API to create an AI agent that can perform tasks 24 | on a virtual computer through natural language instructions. 25 | 26 | The screenshots that you receive are from a running sandbox instance, allowing you to see and interact with a real 27 | virtual computer environment in real-time. 28 | 29 | Since you are operating in a secure, isolated sandbox micro VM, you can execute most commands and operations without 30 | worrying about security concerns. This environment is specifically designed for AI experimentation and task execution. 31 | 32 | The sandbox is based on Ubuntu 22.04 and comes with many pre-installed applications including: 33 | - Firefox browser 34 | - Visual Studio Code 35 | - LibreOffice suite 36 | - Python 3 with common libraries 37 | - Terminal with standard Linux utilities 38 | - File manager (PCManFM) 39 | - Text editor (Gedit) 40 | - Calculator and other basic utilities 41 | 42 | IMPORTANT: It is okay to run terminal commands at any point without confirmation, as long as they are required to fulfill the task the user has given. You should execute commands immediately when needed to complete the user's request efficiently. 43 | 44 | IMPORTANT: When typing commands in the terminal, ALWAYS send a KEYPRESS ENTER action immediately after typing the command to execute it. Terminal commands will not run until you press Enter. 45 | 46 | IMPORTANT: When editing files, prefer to use Visual Studio Code (VS Code) as it provides a better editing experience with syntax highlighting, code completion, and other helpful features. 47 | `; 48 | 49 | export class OpenAIComputerStreamer 50 | implements ComputerInteractionStreamerFacade 51 | { 52 | public instructions: string; 53 | public desktop: Sandbox; 54 | public resolutionScaler: ResolutionScaler; 55 | 56 | private openai: OpenAI; 57 | 58 | constructor(desktop: Sandbox, resolutionScaler: ResolutionScaler) { 59 | this.desktop = desktop; 60 | this.resolutionScaler = resolutionScaler; 61 | this.openai = new OpenAI(); 62 | this.instructions = INSTRUCTIONS; 63 | } 64 | 65 | async executeAction( 66 | action: ResponseComputerToolCall["action"] 67 | ): Promise { 68 | const desktop = this.desktop; 69 | 70 | switch (action.type) { 71 | case "screenshot": { 72 | break; 73 | } 74 | case "double_click": { 75 | const coordinate = this.resolutionScaler.scaleToOriginalSpace([ 76 | action.x, 77 | action.y, 78 | ]); 79 | 80 | await desktop.doubleClick(coordinate[0], coordinate[1]); 81 | break; 82 | } 83 | case "click": { 84 | const coordinate = this.resolutionScaler.scaleToOriginalSpace([ 85 | action.x, 86 | action.y, 87 | ]); 88 | 89 | if (action.button === "left") { 90 | await desktop.leftClick(coordinate[0], coordinate[1]); 91 | } else if (action.button === "right") { 92 | await desktop.rightClick(coordinate[0], coordinate[1]); 93 | } else if (action.button === "wheel") { 94 | await desktop.middleClick(coordinate[0], coordinate[1]); 95 | } 96 | break; 97 | } 98 | case "type": { 99 | await desktop.write(action.text); 100 | break; 101 | } 102 | case "keypress": { 103 | await desktop.press(action.keys); 104 | break; 105 | } 106 | case "move": { 107 | const coordinate = this.resolutionScaler.scaleToOriginalSpace([ 108 | action.x, 109 | action.y, 110 | ]); 111 | 112 | await desktop.moveMouse(coordinate[0], coordinate[1]); 113 | break; 114 | } 115 | case "scroll": { 116 | if (action.scroll_y < 0) { 117 | await desktop.scroll("up", Math.abs(action.scroll_y)); 118 | } else if (action.scroll_y > 0) { 119 | await desktop.scroll("down", action.scroll_y); 120 | } 121 | break; 122 | } 123 | case "wait": { 124 | break; 125 | } 126 | case "drag": { 127 | const startCoordinate = this.resolutionScaler.scaleToOriginalSpace([ 128 | action.path[0].x, 129 | action.path[0].y, 130 | ]); 131 | 132 | const endCoordinate = this.resolutionScaler.scaleToOriginalSpace([ 133 | action.path[1].x, 134 | action.path[1].y, 135 | ]); 136 | 137 | await desktop.drag(startCoordinate, endCoordinate); 138 | break; 139 | } 140 | default: { 141 | logWarning("Unknown action type:", action); 142 | } 143 | } 144 | } 145 | 146 | async *stream( 147 | props: ComputerInteractionStreamerFacadeStreamProps 148 | ): AsyncGenerator> { 149 | const { messages, signal } = props; 150 | 151 | try { 152 | const modelResolution = this.resolutionScaler.getScaledResolution(); 153 | 154 | const computerTool: Tool = { 155 | // @ts-ignore 156 | type: "computer_use_preview", 157 | display_width: modelResolution[0], 158 | display_height: modelResolution[1], 159 | // @ts-ignore 160 | environment: "linux", 161 | }; 162 | 163 | let response = await this.openai.responses.create({ 164 | model: "computer-use-preview", 165 | tools: [computerTool], 166 | input: [...(messages as ResponseInput)], 167 | truncation: "auto", 168 | instructions: this.instructions, 169 | reasoning: { 170 | effort: "medium", 171 | generate_summary: "concise", 172 | }, 173 | }); 174 | 175 | while (true) { 176 | if (signal.aborted) { 177 | yield { 178 | type: SSEEventType.DONE, 179 | content: "Generation stopped by user", 180 | }; 181 | break; 182 | } 183 | 184 | const computerCalls = response.output.filter( 185 | (item) => item.type === "computer_call" 186 | ); 187 | 188 | if (computerCalls.length === 0) { 189 | yield { 190 | type: SSEEventType.REASONING, 191 | content: response.output_text, 192 | }; 193 | yield { 194 | type: SSEEventType.DONE, 195 | }; 196 | break; 197 | } 198 | 199 | const computerCall = computerCalls[0]; 200 | const callId = computerCall.call_id; 201 | const action = computerCall.action; 202 | 203 | const reasoningItems = response.output.filter( 204 | (item) => item.type === "message" && "content" in item 205 | ); 206 | 207 | if (reasoningItems.length > 0 && "content" in reasoningItems[0]) { 208 | const content = reasoningItems[0].content; 209 | 210 | // Log to debug why content is not a string 211 | logDebug("Reasoning content structure:", content); 212 | 213 | yield { 214 | type: SSEEventType.REASONING, 215 | content: 216 | reasoningItems[0].content[0].type === "output_text" 217 | ? reasoningItems[0].content[0].text 218 | : JSON.stringify(reasoningItems[0].content), 219 | }; 220 | } 221 | 222 | yield { 223 | type: SSEEventType.ACTION, 224 | action, 225 | }; 226 | 227 | await this.executeAction(action); 228 | 229 | yield { 230 | type: SSEEventType.ACTION_COMPLETED, 231 | }; 232 | 233 | const newScreenshotData = await this.resolutionScaler.takeScreenshot(); 234 | const newScreenshotBase64 = 235 | Buffer.from(newScreenshotData).toString("base64"); 236 | 237 | const computerCallOutput: ResponseInputItem = { 238 | call_id: callId, 239 | type: "computer_call_output", 240 | output: { 241 | // @ts-ignore 242 | type: "input_image", 243 | image_url: `data:image/png;base64,${newScreenshotBase64}`, 244 | }, 245 | }; 246 | 247 | response = await this.openai.responses.create({ 248 | model: "computer-use-preview", 249 | previous_response_id: response.id, 250 | instructions: this.instructions, 251 | tools: [computerTool], 252 | input: [computerCallOutput], 253 | truncation: "auto", 254 | reasoning: { 255 | effort: "medium", 256 | generate_summary: "concise", 257 | }, 258 | }); 259 | } 260 | } catch (error) { 261 | logError("OPENAI_STREAMER", error); 262 | if (error instanceof OpenAI.APIError && error.status === 429) { 263 | // since hitting rate limits is not expected, we optimistically assume we hit our quota limit (both have the same status code) 264 | yield { 265 | type: SSEEventType.ERROR, 266 | content: 267 | "Our usage quota ran out for this month. Please visit GitHub, self host the repository and use your own API keys to continue.", 268 | }; 269 | yield { 270 | type: SSEEventType.DONE, 271 | }; 272 | return; 273 | } 274 | yield { 275 | type: SSEEventType.ERROR, 276 | content: "An error occurred with the AI service. Please try again.", 277 | }; 278 | } 279 | } 280 | } 281 | -------------------------------------------------------------------------------- /components/icons.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils"; 2 | 3 | export const BotIcon = () => { 4 | return ( 5 | 12 | 18 | 19 | ); 20 | }; 21 | 22 | export const UserIcon = () => { 23 | return ( 24 | 32 | 38 | 39 | ); 40 | }; 41 | 42 | export const AttachmentIcon = () => { 43 | return ( 44 | 51 | 57 | 58 | ); 59 | }; 60 | 61 | export const VercelIcon = ({ size = 17 }) => { 62 | return ( 63 | 70 | 76 | 77 | ); 78 | }; 79 | 80 | export const MasonryIcon = () => { 81 | return ( 82 | 89 | 95 | 96 | ); 97 | }; 98 | 99 | interface IconProps { 100 | className?: string; 101 | } 102 | 103 | export const GitHubIcon = ({ className }: IconProps) => ( 104 | 110 | 111 | 112 | ); 113 | 114 | export const BoxIcon = ({ size = 16 }: { size: number }) => { 115 | return ( 116 | 123 | 129 | 130 | ); 131 | }; 132 | 133 | export const HomeIcon = ({ size = 16 }: { size: number }) => { 134 | return ( 135 | 142 | 148 | 149 | ); 150 | }; 151 | 152 | export const GPSIcon = ({ size = 16 }: { size: number }) => { 153 | return ( 154 | 161 | 169 | 170 | ); 171 | }; 172 | 173 | export const InvoiceIcon = ({ size = 16 }: { size: number }) => { 174 | return ( 175 | 182 | 188 | 189 | ); 190 | }; 191 | 192 | export const AnthropicLogo = ({ 193 | size = 16, 194 | className, 195 | }: { 196 | size?: number; 197 | className?: string; 198 | }) => { 199 | return ( 200 | 212 | 213 | 218 | 219 | ); 220 | }; 221 | -------------------------------------------------------------------------------- /lib/chat-context.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import React, { 4 | createContext, 5 | useContext, 6 | useState, 7 | useRef, 8 | useCallback, 9 | } from "react"; 10 | import { 11 | ChatMessage, 12 | ChatState, 13 | ParsedSSEEvent, 14 | SendMessageOptions, 15 | ActionChatMessage, 16 | UserChatMessage, 17 | AssistantChatMessage, 18 | SystemChatMessage, 19 | } from "@/types/chat"; 20 | import { ComputerModel, SSEEventType } from "@/types/api"; 21 | import { logDebug, logError } from "./logger"; 22 | 23 | interface ChatContextType extends ChatState { 24 | sendMessage: (options: SendMessageOptions) => Promise; 25 | stopGeneration: () => void; 26 | clearMessages: () => void; 27 | setInput: (input: string) => void; 28 | input: string; 29 | handleSubmit: (e: React.FormEvent) => string | undefined; 30 | onSandboxCreated: ( 31 | callback: (sandboxId: string, vncUrl: string) => void 32 | ) => void; 33 | model: ComputerModel; 34 | setModel: (model: ComputerModel) => void; 35 | } 36 | 37 | const ChatContext = createContext(undefined); 38 | 39 | interface ChatProviderProps { 40 | children: React.ReactNode; 41 | } 42 | 43 | export function ChatProvider({ children }: ChatProviderProps) { 44 | const [messages, setMessages] = useState([]); 45 | const [isLoading, setIsLoading] = useState(false); 46 | const [error, setError] = useState(null); 47 | const [input, setInput] = useState(""); 48 | const abortControllerRef = useRef(null); 49 | const onSandboxCreatedRef = useRef< 50 | ((sandboxId: string, vncUrl: string) => void) | undefined 51 | >(undefined); 52 | const [model, setModel] = useState("openai"); 53 | 54 | const parseSSEEvent = (data: string): ParsedSSEEvent | null => { 55 | try { 56 | if (!data || data.trim() === "") { 57 | return null; 58 | } 59 | 60 | if (data.startsWith("data: ")) { 61 | const jsonStr = data.substring(6).trim(); 62 | 63 | if (!jsonStr) { 64 | return null; 65 | } 66 | 67 | return JSON.parse(jsonStr); 68 | } 69 | 70 | const match = data.match(/data: ({.*})/); 71 | if (match && match[1]) { 72 | return JSON.parse(match[1]); 73 | } 74 | 75 | return JSON.parse(data); 76 | } catch (e) { 77 | logError( 78 | "Error parsing SSE event:", 79 | e, 80 | "Data:", 81 | data.substring(0, 200) + (data.length > 200 ? "..." : "") 82 | ); 83 | return null; 84 | } 85 | }; 86 | 87 | const sendMessage = async ({ 88 | content, 89 | sandboxId, 90 | environment, 91 | resolution, 92 | }: SendMessageOptions) => { 93 | if (isLoading) return; 94 | 95 | setIsLoading(true); 96 | setError(null); 97 | 98 | const userMessage: ChatMessage = { 99 | role: "user", 100 | content, 101 | id: Date.now().toString(), 102 | }; 103 | 104 | setMessages((prev) => [...prev, userMessage]); 105 | 106 | abortControllerRef.current = new AbortController(); 107 | 108 | try { 109 | const apiMessages = messages 110 | .concat(userMessage) 111 | .filter((msg) => msg.role === "user" || msg.role === "assistant") 112 | .map((msg) => { 113 | const typedMsg = msg as UserChatMessage | AssistantChatMessage; 114 | return { 115 | role: typedMsg.role, 116 | content: typedMsg.content, 117 | }; 118 | }); 119 | 120 | const response = await fetch("/api/chat", { 121 | method: "POST", 122 | headers: { "Content-Type": "application/json" }, 123 | body: JSON.stringify({ 124 | messages: apiMessages, 125 | sandboxId, 126 | environment, 127 | resolution, 128 | model, 129 | }), 130 | signal: abortControllerRef.current.signal, 131 | }); 132 | 133 | if (!response.ok) { 134 | throw new Error(`HTTP error! status: ${response.status}`); 135 | } 136 | 137 | const reader = response.body?.getReader(); 138 | if (!reader) throw new Error("Response body is null"); 139 | 140 | setMessages((prev) => [ 141 | ...prev, 142 | { 143 | role: "system", 144 | id: `system-message-${Date.now()}`, 145 | content: "Task started", 146 | }, 147 | ]); 148 | 149 | const decoder = new TextDecoder(); 150 | let assistantMessage = ""; 151 | let buffer = ""; 152 | 153 | while (true) { 154 | const { done, value } = await reader.read(); 155 | 156 | if (done) { 157 | if (buffer.trim()) { 158 | const parsedEvent = parseSSEEvent(buffer); 159 | if (parsedEvent) { 160 | if (parsedEvent.type === SSEEventType.DONE) { 161 | setMessages((prev) => { 162 | const systemMessage: SystemChatMessage = { 163 | role: "system", 164 | id: `system-${Date.now()}`, 165 | content: "Task completed", 166 | }; 167 | 168 | return [...prev, systemMessage]; 169 | }); 170 | setIsLoading(false); 171 | } 172 | } 173 | } 174 | break; 175 | } 176 | 177 | const chunk = decoder.decode(value, { stream: true }); 178 | buffer += chunk; 179 | 180 | const events = buffer.split("\n\n"); 181 | 182 | buffer = events.pop() || ""; 183 | 184 | for (const event of events) { 185 | if (!event.trim()) continue; 186 | 187 | const parsedEvent = parseSSEEvent(event); 188 | if (!parsedEvent) continue; 189 | 190 | if (process.env.NODE_ENV === "development") { 191 | logDebug("Parsed event:", parsedEvent); 192 | } 193 | 194 | switch (parsedEvent.type) { 195 | case SSEEventType.ACTION: 196 | if (parsedEvent.action) { 197 | const actionMessage: ActionChatMessage = { 198 | role: "action", 199 | id: `action-${Date.now()}`, 200 | action: parsedEvent.action, 201 | status: "pending", 202 | model, 203 | }; 204 | 205 | setMessages((prev) => [...prev, actionMessage]); 206 | } 207 | break; 208 | 209 | case SSEEventType.REASONING: 210 | if (typeof parsedEvent.content === "string") { 211 | assistantMessage = parsedEvent.content; 212 | const reasoningMessage: AssistantChatMessage = { 213 | role: "assistant", 214 | id: `assistant-${Date.now()}-${messages.length}`, 215 | content: assistantMessage, 216 | model, 217 | }; 218 | setMessages((prev) => [...prev, reasoningMessage]); 219 | } 220 | break; 221 | 222 | case SSEEventType.DONE: 223 | setMessages((prev) => { 224 | const systemMessage: SystemChatMessage = { 225 | role: "system", 226 | id: `system-${Date.now()}`, 227 | content: parsedEvent.content || "Task completed", 228 | }; 229 | 230 | return [...prev, systemMessage]; 231 | }); 232 | setIsLoading(false); 233 | break; 234 | 235 | case SSEEventType.ERROR: 236 | setError(parsedEvent.content); 237 | setMessages((prev) => [ 238 | ...prev, 239 | { 240 | role: "system", 241 | id: `system-${Date.now()}`, 242 | content: parsedEvent.content, 243 | isError: true, 244 | }, 245 | ]); 246 | setIsLoading(false); 247 | break; 248 | 249 | case SSEEventType.SANDBOX_CREATED: 250 | if ( 251 | parsedEvent.sandboxId && 252 | parsedEvent.vncUrl && 253 | onSandboxCreatedRef.current 254 | ) { 255 | onSandboxCreatedRef.current( 256 | parsedEvent.sandboxId, 257 | parsedEvent.vncUrl 258 | ); 259 | } 260 | break; 261 | 262 | case SSEEventType.ACTION_COMPLETED: 263 | setMessages((prev) => { 264 | const lastActionIndex = [...prev] 265 | .reverse() 266 | .findIndex((msg) => msg.role === "action"); 267 | 268 | if (lastActionIndex !== -1) { 269 | const actualIndex = prev.length - 1 - lastActionIndex; 270 | 271 | return prev.map((msg, index) => 272 | index === actualIndex 273 | ? { ...msg, status: "completed" } 274 | : msg 275 | ); 276 | } 277 | 278 | return prev; 279 | }); 280 | break; 281 | } 282 | } 283 | } 284 | } catch (error) { 285 | logError("Error sending message:", error); 286 | setError(error instanceof Error ? error.message : "An error occurred"); 287 | setIsLoading(false); 288 | } 289 | }; 290 | 291 | const stopGeneration = useCallback(() => { 292 | if (abortControllerRef.current) { 293 | try { 294 | abortControllerRef.current.abort( 295 | new DOMException("Generation stopped by user", "AbortError") 296 | ); 297 | setIsLoading(false); 298 | } catch (error) { 299 | logError("Error stopping generation:", error); 300 | setIsLoading(false); 301 | } 302 | } 303 | }, []); 304 | 305 | const clearMessages = useCallback(() => { 306 | setMessages([]); 307 | setError(null); 308 | }, []); 309 | 310 | const handleSubmit = useCallback( 311 | (e: React.FormEvent): string | undefined => { 312 | e.preventDefault(); 313 | if (!input.trim()) return; 314 | 315 | const content = input.trim(); 316 | setInput(""); 317 | return content; 318 | }, 319 | [input] 320 | ); 321 | 322 | const value = { 323 | messages, 324 | isLoading, 325 | error, 326 | input, 327 | setInput, 328 | sendMessage, 329 | stopGeneration, 330 | clearMessages, 331 | handleSubmit, 332 | model, 333 | setModel, 334 | onSandboxCreated: ( 335 | callback: (sandboxId: string, vncUrl: string) => void 336 | ) => { 337 | onSandboxCreatedRef.current = callback; 338 | }, 339 | }; 340 | 341 | return {children}; 342 | } 343 | 344 | export function useChat() { 345 | const context = useContext(ChatContext); 346 | if (context === undefined) { 347 | throw new Error("useChat must be used within a ChatProvider"); 348 | } 349 | return context; 350 | } 351 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2025 FoundryLabs, Inc. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /lib/streaming/resolution.ts: -------------------------------------------------------------------------------- 1 | import { Sandbox } from "@e2b/desktop"; 2 | import { 3 | MAX_RESOLUTION_WIDTH, 4 | MAX_RESOLUTION_HEIGHT, 5 | MIN_RESOLUTION_WIDTH, 6 | MIN_RESOLUTION_HEIGHT, 7 | } from "@/lib/config"; 8 | import sharp from "sharp"; 9 | 10 | /** 11 | * ResolutionScaler handles resolution scaling between the original desktop 12 | * resolution and the scaled model resolution, including coordinate transformations 13 | * and screenshot scaling. 14 | * 15 | * This class maintains aspect ratio consistency while ensuring the resolution 16 | * stays within configured boundaries, providing accurate coordinate mapping 17 | * between different resolution spaces. 18 | */ 19 | export class ResolutionScaler { 20 | // Private properties 21 | private desktop: Sandbox; 22 | private originalResolution: [number, number]; 23 | private scaledResolution: [number, number]; 24 | private scaleFactor: number; 25 | private originalAspectRatio: number; 26 | private scaledAspectRatio: number; 27 | 28 | /** 29 | * Creates a new ResolutionScaler 30 | * 31 | * @param desktop - The sandbox instance used for taking screenshots 32 | * @param originalResolution - The original desktop resolution [width, height] 33 | */ 34 | constructor(desktop: Sandbox, originalResolution: [number, number]) { 35 | this.desktop = desktop; 36 | this.originalResolution = originalResolution; 37 | this.originalAspectRatio = originalResolution[0] / originalResolution[1]; 38 | 39 | // Calculate scaled resolution and scale factor immediately on instantiation 40 | const { scaledResolution, scaleFactor } = 41 | this.calculateScaledResolution(originalResolution); 42 | this.scaledResolution = scaledResolution; 43 | this.scaleFactor = scaleFactor; 44 | this.scaledAspectRatio = scaledResolution[0] / scaledResolution[1]; 45 | 46 | // Validate coordinate scaling accuracy 47 | this.validateCoordinateScaling(); 48 | } 49 | 50 | /** 51 | * Get the original desktop resolution 52 | * 53 | * @returns The original resolution as [width, height] 54 | */ 55 | public getOriginalResolution(): [number, number] { 56 | return this.originalResolution; 57 | } 58 | 59 | /** 60 | * Get the scaled resolution used for model interactions 61 | * 62 | * @returns The scaled resolution as [width, height] 63 | */ 64 | public getScaledResolution(): [number, number] { 65 | return this.scaledResolution; 66 | } 67 | 68 | /** 69 | * Get the scale factor between original and scaled resolutions 70 | * 71 | * @returns The scale factor 72 | */ 73 | public getScaleFactor(): number { 74 | return this.scaleFactor; 75 | } 76 | 77 | /** 78 | * Get the aspect ratio of the original resolution 79 | * 80 | * @returns The original aspect ratio (width/height) 81 | */ 82 | public getOriginalAspectRatio(): number { 83 | return this.originalAspectRatio; 84 | } 85 | 86 | /** 87 | * Get the aspect ratio of the scaled resolution 88 | * 89 | * @returns The scaled aspect ratio (width/height) 90 | */ 91 | public getScaledAspectRatio(): number { 92 | return this.scaledAspectRatio; 93 | } 94 | 95 | /** 96 | * Validates coordinate scaling functions by performing round-trip tests 97 | * on several key positions across the screen 98 | */ 99 | private validateCoordinateScaling(): void { 100 | // Test points at corners, center, and edges 101 | const testPoints: Array<{ name: string; point: [number, number] }> = [ 102 | { name: "Top-left corner", point: [0, 0] }, 103 | { name: "Top-right corner", point: [this.originalResolution[0] - 1, 0] }, 104 | { 105 | name: "Bottom-left corner", 106 | point: [0, this.originalResolution[1] - 1], 107 | }, 108 | { 109 | name: "Bottom-right corner", 110 | point: [this.originalResolution[0] - 1, this.originalResolution[1] - 1], 111 | }, 112 | { 113 | name: "Center", 114 | point: [ 115 | Math.floor(this.originalResolution[0] / 2), 116 | Math.floor(this.originalResolution[1] / 2), 117 | ], 118 | }, 119 | { name: "Small target (10px)", point: [10, 10] }, // Small target test 120 | ]; 121 | 122 | for (const { point } of testPoints) { 123 | this.testCoordinateRoundTrip(point); 124 | } 125 | } 126 | 127 | /** 128 | * Converts coordinates from model space to original desktop space. 129 | * Use this when the model sends coordinates (based on scaled screenshot) 130 | * that need to be converted to the original desktop space for actual interaction. 131 | * 132 | * @param coordinate - Coordinates in model's scaled space [x, y] 133 | * @returns Coordinates in original desktop space [x, y] 134 | */ 135 | public scaleToOriginalSpace(coordinate: [number, number]): [number, number] { 136 | // Store the exact scaled values before rounding 137 | const exactScaledX = coordinate[0] / this.scaleFactor; 138 | const exactScaledY = coordinate[1] / this.scaleFactor; 139 | 140 | // Round only at the final step for pixel-perfect positioning 141 | const finalX = Math.round(exactScaledX); 142 | const finalY = Math.round(exactScaledY); 143 | 144 | return [finalX, finalY]; 145 | } 146 | 147 | /** 148 | * Converts coordinates from original desktop space to model space. 149 | * Use this when desktop coordinates need to be represented in the model's scaled space. 150 | * 151 | * @param coordinate - Coordinates in original desktop space [x, y] 152 | * @returns Coordinates in model's scaled space [x, y] 153 | */ 154 | public scaleToModelSpace(coordinate: [number, number]): [number, number] { 155 | // Store the exact scaled values before rounding 156 | const exactScaledX = coordinate[0] * this.scaleFactor; 157 | const exactScaledY = coordinate[1] * this.scaleFactor; 158 | 159 | // Round only at the final step for pixel-perfect representation 160 | const finalX = Math.round(exactScaledX); 161 | const finalY = Math.round(exactScaledY); 162 | 163 | return [finalX, finalY]; 164 | } 165 | 166 | /** 167 | * Tests the round-trip accuracy of coordinate scaling. 168 | * Helps identify potential precision issues with coordinate transformations. 169 | * 170 | * @param originalCoordinate - A coordinate in original space to test 171 | * @returns Object containing the original, model space, and round-trip coordinates with error 172 | */ 173 | public testCoordinateRoundTrip(originalCoordinate: [number, number]): { 174 | original: [number, number]; 175 | modelSpace: [number, number]; 176 | roundTrip: [number, number]; 177 | error: [number, number]; 178 | } { 179 | const modelSpace = this.scaleToModelSpace(originalCoordinate); 180 | const roundTrip = this.scaleToOriginalSpace(modelSpace); 181 | 182 | const error: [number, number] = [ 183 | roundTrip[0] - originalCoordinate[0], 184 | roundTrip[1] - originalCoordinate[1], 185 | ]; 186 | 187 | return { original: originalCoordinate, modelSpace, roundTrip, error }; 188 | } 189 | 190 | /** 191 | * Takes a screenshot at the scaled resolution suitable for model consumption. 192 | * The screenshot is automatically scaled to the target resolution while 193 | * preserving aspect ratio. 194 | * 195 | * @returns A buffer containing the scaled screenshot 196 | */ 197 | public async takeScreenshot(): Promise { 198 | // Take the original screenshot 199 | const originalScreenshot = await this.desktop.screenshot(); 200 | 201 | // If no scaling is needed, return the original 202 | if (this.scaleFactor === 1) { 203 | return Buffer.from(originalScreenshot); 204 | } 205 | 206 | // Scale the screenshot - use high quality settings for better small target visibility 207 | const scaledScreenshot = await this.scaleScreenshot( 208 | originalScreenshot, 209 | this.scaledResolution 210 | ); 211 | 212 | return scaledScreenshot; 213 | } 214 | 215 | /** 216 | * Calculates a scaled resolution that maintains aspect ratio and fits within boundaries. 217 | * This ensures the resolution stays within MIN and MAX resolution constraints. 218 | * 219 | * @param originalResolution - The original resolution to scale 220 | * @returns The scaled resolution and scale factor 221 | */ 222 | private calculateScaledResolution(originalResolution: [number, number]): { 223 | scaledResolution: [number, number]; 224 | scaleFactor: number; 225 | } { 226 | const [width, height] = originalResolution; 227 | const originalAspectRatio = width / height; 228 | 229 | // If resolution is already within bounds, return it as is 230 | if ( 231 | width <= MAX_RESOLUTION_WIDTH && 232 | width >= MIN_RESOLUTION_WIDTH && 233 | height <= MAX_RESOLUTION_HEIGHT && 234 | height >= MIN_RESOLUTION_HEIGHT 235 | ) { 236 | return { 237 | scaledResolution: [width, height], 238 | scaleFactor: 1, 239 | }; 240 | } 241 | 242 | // Calculate scale factors for width and height 243 | let widthScaleFactor = 1; 244 | if (width > MAX_RESOLUTION_WIDTH) { 245 | widthScaleFactor = MAX_RESOLUTION_WIDTH / width; 246 | } else if (width < MIN_RESOLUTION_WIDTH) { 247 | widthScaleFactor = MIN_RESOLUTION_WIDTH / width; 248 | } 249 | 250 | let heightScaleFactor = 1; 251 | if (height > MAX_RESOLUTION_HEIGHT) { 252 | heightScaleFactor = MAX_RESOLUTION_HEIGHT / height; 253 | } else if (height < MIN_RESOLUTION_HEIGHT) { 254 | heightScaleFactor = MIN_RESOLUTION_HEIGHT / height; 255 | } 256 | 257 | // Use the appropriate scale factor to ensure both dimensions are within bounds 258 | let scaleFactor; 259 | if (widthScaleFactor < 1 || heightScaleFactor < 1) { 260 | // We need to scale down, use the smaller factor 261 | scaleFactor = Math.min(widthScaleFactor, heightScaleFactor); 262 | } else { 263 | // We need to scale up, use the larger factor 264 | scaleFactor = Math.max(widthScaleFactor, heightScaleFactor); 265 | } 266 | 267 | // Calculate new dimensions - store exact values before rounding 268 | const exactScaledWidth = width * scaleFactor; 269 | const exactScaledHeight = height * scaleFactor; 270 | 271 | // Round to integer pixels at the final step 272 | const scaledWidth = Math.round(exactScaledWidth); 273 | const scaledHeight = Math.round(exactScaledHeight); 274 | 275 | // Recalculate the final scale factor based on the rounded dimensions 276 | // This ensures more accurate coordinate scaling when using these dimensions 277 | const finalWidthScaleFactor = scaledWidth / width; 278 | const finalHeightScaleFactor = scaledHeight / height; 279 | 280 | // Using geometric mean for scale factor to better preserve aspect ratio 281 | const finalScaleFactor = Math.sqrt( 282 | finalWidthScaleFactor * finalHeightScaleFactor 283 | ); 284 | 285 | return { 286 | scaledResolution: [scaledWidth, scaledHeight], 287 | scaleFactor: finalScaleFactor, 288 | }; 289 | } 290 | 291 | /** 292 | * Scales a screenshot to the specified resolution. 293 | * Uses high-quality scaling to preserve UI details. 294 | * 295 | * @param screenshot - The original screenshot buffer 296 | * @param targetResolution - The target resolution to scale to [width, height] 297 | * @returns A buffer containing the scaled screenshot 298 | */ 299 | private async scaleScreenshot( 300 | screenshot: Buffer | Uint8Array, 301 | targetResolution: [number, number] 302 | ): Promise { 303 | const [width, height] = targetResolution; 304 | 305 | try { 306 | // Use higher quality settings to preserve small UI elements better 307 | const result = await sharp(screenshot) 308 | .resize(width, height, { 309 | fit: "fill", 310 | kernel: "lanczos3", // Higher quality resampling kernel 311 | fastShrinkOnLoad: false, // Disable fast shrink for higher quality 312 | }) 313 | .toBuffer(); 314 | 315 | return result; 316 | } catch (error) { 317 | // Return original if scaling fails, ensuring it's a Buffer 318 | return Buffer.from(screenshot); 319 | } 320 | } 321 | } 322 | -------------------------------------------------------------------------------- /lib/streaming/anthropic.ts: -------------------------------------------------------------------------------- 1 | import { Sandbox } from "@e2b/desktop"; 2 | import Anthropic from "@anthropic-ai/sdk"; 3 | import { SSEEventType, SSEEvent, sleep } from "@/types/api"; 4 | import { 5 | ComputerInteractionStreamerFacade, 6 | ComputerInteractionStreamerFacadeStreamProps, 7 | } from "@/lib/streaming"; 8 | import { ActionResponse } from "@/types/api"; 9 | import { 10 | BetaMessageParam, 11 | BetaToolResultBlockParam, 12 | BetaToolUseBlock, 13 | } from "@anthropic-ai/sdk/resources/beta/messages/messages.mjs"; 14 | import { ResolutionScaler } from "./resolution"; 15 | import { ComputerAction, ToolInput } from "@/types/anthropic"; 16 | import { logError } from "../logger"; 17 | 18 | const INSTRUCTIONS = ` 19 | You are Surf, a helpful assistant that can use a computer to help the user with their tasks. 20 | You can use the computer to search the web, write code, and more. 21 | 22 | Surf is built by E2B, which provides an open source isolated virtual computer in the cloud made for AI use cases. 23 | This application integrates E2B's desktop sandbox with Anthropic's API to create an AI agent that can perform tasks 24 | on a virtual computer through natural language instructions. 25 | 26 | The screenshots that you receive are from a running sandbox instance, allowing you to see and interact with a real 27 | virtual computer environment in real-time. 28 | 29 | Since you are operating in a secure, isolated sandbox micro VM, you can execute most commands and operations without 30 | worrying about security concerns. This environment is specifically designed for AI experimentation and task execution. 31 | 32 | IMPORTANT NOTES: 33 | 1. You automatically receive a screenshot after each action you take. You DO NOT need to request screenshots separately. 34 | 2. When a user asks you to run a command in the terminal, ALWAYS press Enter immediately after typing the command. 35 | 3. When the user explicitly asks you to press any key (Enter, Tab, Ctrl+C, etc.) in any application or interface, 36 | you MUST do so immediately. 37 | 4. Remember: In terminal environments, commands DO NOT execute until Enter is pressed. 38 | 5. When working on complex tasks, continue to completion without stopping to ask for confirmation. 39 | Break down complex tasks into steps and execute them fully. 40 | 41 | Please help the user effectively by observing the current state of the computer and taking appropriate actions. 42 | `; 43 | 44 | export class AnthropicComputerStreamer 45 | implements ComputerInteractionStreamerFacade 46 | { 47 | public instructions: string; 48 | public desktop: Sandbox; 49 | public resolutionScaler: ResolutionScaler; 50 | private anthropic: Anthropic; 51 | 52 | constructor(desktop: Sandbox, resolutionScaler: ResolutionScaler) { 53 | if (!process.env.ANTHROPIC_API_KEY) { 54 | throw new Error("ANTHROPIC_API_KEY is not set"); 55 | } 56 | 57 | this.desktop = desktop; 58 | this.resolutionScaler = resolutionScaler; 59 | this.anthropic = new Anthropic({ 60 | apiKey: process.env.ANTHROPIC_API_KEY, 61 | }); 62 | this.instructions = INSTRUCTIONS; 63 | } 64 | 65 | async executeAction( 66 | tool: BetaToolUseBlock & ToolInput 67 | ): Promise { 68 | const desktop = this.desktop; 69 | 70 | if (tool.name === "str_replace_editor") { 71 | const editorCommand = tool.input; 72 | 73 | switch (editorCommand.command) { 74 | default: { 75 | } 76 | } 77 | return; 78 | } 79 | 80 | if (tool.name === "bash") { 81 | const bashCommand = tool.input; 82 | 83 | switch (bashCommand.command) { 84 | case "command": { 85 | desktop.commands.run(bashCommand.command); 86 | return; 87 | } 88 | 89 | default: { 90 | } 91 | } 92 | 93 | return; 94 | } 95 | 96 | const action = tool.input; 97 | 98 | switch (action.action) { 99 | case "screenshot": { 100 | // that explicit screenshot actions are no longer necessary 101 | break; 102 | } 103 | 104 | case "double_click": { 105 | const [x, y] = this.resolutionScaler.scaleToOriginalSpace( 106 | action.coordinate 107 | ); 108 | if (action.text) { 109 | await desktop.moveMouse(x, y); 110 | await desktop.press(action.text); 111 | } 112 | await desktop.doubleClick(x, y); 113 | break; 114 | } 115 | 116 | case "triple_click": { 117 | const [x, y] = this.resolutionScaler.scaleToOriginalSpace( 118 | action.coordinate 119 | ); 120 | 121 | await desktop.moveMouse(x, y); 122 | if (action.text) { 123 | await desktop.press(action.text); 124 | } 125 | await desktop.leftClick(); 126 | await desktop.leftClick(); 127 | await desktop.leftClick(); 128 | break; 129 | } 130 | 131 | case "left_click": { 132 | const [x, y] = this.resolutionScaler.scaleToOriginalSpace( 133 | action.coordinate 134 | ); 135 | 136 | if (action.text) { 137 | await desktop.moveMouse(x, y); 138 | await desktop.press(action.text); 139 | } 140 | await desktop.leftClick(x, y); 141 | break; 142 | } 143 | 144 | case "right_click": { 145 | const [x, y] = this.resolutionScaler.scaleToOriginalSpace( 146 | action.coordinate 147 | ); 148 | 149 | if (action.text) { 150 | await desktop.moveMouse(x, y); 151 | await desktop.press(action.text); 152 | } 153 | await desktop.rightClick(x, y); 154 | break; 155 | } 156 | 157 | case "middle_click": { 158 | const [x, y] = this.resolutionScaler.scaleToOriginalSpace( 159 | action.coordinate 160 | ); 161 | 162 | if (action.text) { 163 | await desktop.moveMouse(x, y); 164 | await desktop.press(action.text); 165 | } 166 | await desktop.middleClick(x, y); 167 | break; 168 | } 169 | 170 | case "type": { 171 | await desktop.write(action.text); 172 | break; 173 | } 174 | 175 | case "key": { 176 | await desktop.press(action.text); 177 | break; 178 | } 179 | 180 | case "hold_key": { 181 | await desktop.press(action.text); 182 | break; 183 | } 184 | 185 | case "mouse_move": { 186 | const [x, y] = this.resolutionScaler.scaleToOriginalSpace( 187 | action.coordinate 188 | ); 189 | 190 | await desktop.moveMouse(x, y); 191 | break; 192 | } 193 | 194 | case "left_mouse_down": { 195 | break; 196 | } 197 | 198 | case "left_mouse_up": { 199 | break; 200 | } 201 | 202 | case "left_click_drag": { 203 | const start = this.resolutionScaler.scaleToOriginalSpace( 204 | action.start_coordinate 205 | ); 206 | const end = this.resolutionScaler.scaleToOriginalSpace( 207 | action.coordinate 208 | ); 209 | 210 | await desktop.drag(start, end); 211 | break; 212 | } 213 | 214 | case "scroll": { 215 | const [x, y] = this.resolutionScaler.scaleToOriginalSpace( 216 | action.coordinate 217 | ); 218 | 219 | const direction = action.scroll_direction; 220 | const amount = action.scroll_amount; 221 | 222 | await desktop.moveMouse(x, y); 223 | 224 | if (action.text) { 225 | await desktop.press(action.text); 226 | } 227 | 228 | await desktop.scroll(direction === "up" ? "up" : "down", amount); 229 | break; 230 | } 231 | 232 | case "wait": { 233 | await sleep(action.duration * 1000); 234 | break; 235 | } 236 | 237 | case "cursor_position": { 238 | break; 239 | } 240 | 241 | default: { 242 | } 243 | } 244 | } 245 | 246 | async *stream( 247 | props: ComputerInteractionStreamerFacadeStreamProps 248 | ): AsyncGenerator> { 249 | const { messages, signal } = props; 250 | 251 | const anthropicMessages: BetaMessageParam[] = messages.map((msg) => ({ 252 | role: msg.role as "user" | "assistant", 253 | content: [{ type: "text", text: msg.content }], 254 | })); 255 | 256 | try { 257 | while (true) { 258 | if (signal?.aborted) { 259 | yield { 260 | type: SSEEventType.DONE, 261 | content: "Generation stopped by user", 262 | }; 263 | break; 264 | } 265 | 266 | const modelResolution = this.resolutionScaler.getScaledResolution(); 267 | 268 | const response = await this.anthropic.beta.messages.create({ 269 | model: "claude-3-7-sonnet-latest", 270 | max_tokens: 4096, 271 | messages: anthropicMessages, 272 | system: this.instructions, 273 | tools: [ 274 | { 275 | type: "computer_20250124", 276 | name: "computer", 277 | display_width_px: modelResolution[0], 278 | display_height_px: modelResolution[1], 279 | }, 280 | { 281 | type: "bash_20250124", 282 | name: "bash", 283 | }, 284 | ], 285 | betas: ["computer-use-2025-01-24"], 286 | thinking: { type: "enabled", budget_tokens: 1024 }, 287 | }); 288 | 289 | const toolUseBlocks: BetaToolUseBlock[] = []; 290 | let reasoningText = ""; 291 | 292 | for (const block of response.content) { 293 | if (block.type === "tool_use") { 294 | toolUseBlocks.push(block); 295 | } else if (block.type === "text") { 296 | reasoningText += block.text; 297 | } else if (block.type === "thinking" && block.thinking) { 298 | yield { 299 | type: SSEEventType.REASONING, 300 | content: block.thinking, 301 | }; 302 | } 303 | } 304 | 305 | if (reasoningText) { 306 | yield { 307 | type: SSEEventType.REASONING, 308 | content: reasoningText, 309 | }; 310 | } 311 | 312 | if (toolUseBlocks.length === 0) { 313 | yield { 314 | type: SSEEventType.DONE, 315 | }; 316 | break; 317 | } 318 | 319 | const assistantMessage: BetaMessageParam = { 320 | role: "assistant", 321 | content: response.content, 322 | }; 323 | anthropicMessages.push(assistantMessage); 324 | 325 | const toolResults: BetaToolResultBlockParam[] = []; 326 | 327 | for await (const toolUse of toolUseBlocks) { 328 | yield { 329 | type: SSEEventType.ACTION, 330 | action: toolUse.input as ComputerAction, 331 | }; 332 | 333 | await this.executeAction(toolUse as BetaToolUseBlock & ToolInput); 334 | 335 | yield { 336 | type: SSEEventType.ACTION_COMPLETED, 337 | }; 338 | 339 | // Always take a screenshot after each action 340 | const screenshotData = await this.resolutionScaler.takeScreenshot(); 341 | const screenshotBase64 = 342 | Buffer.from(screenshotData).toString("base64"); 343 | 344 | const toolResultContent: BetaToolResultBlockParam["content"] = [ 345 | { 346 | type: "image", 347 | source: { 348 | type: "base64", 349 | media_type: "image/png", 350 | data: screenshotBase64, 351 | }, 352 | }, 353 | ]; 354 | 355 | const toolResult: BetaToolResultBlockParam = { 356 | type: "tool_result", 357 | tool_use_id: toolUse.id, 358 | content: toolResultContent, 359 | is_error: false, 360 | }; 361 | 362 | toolResults.push(toolResult); 363 | } 364 | 365 | if (toolResults.length > 0) { 366 | const userMessage: BetaMessageParam = { 367 | role: "user", 368 | content: toolResults, 369 | }; 370 | anthropicMessages.push(userMessage); 371 | } 372 | } 373 | } catch (error) { 374 | logError("ANTHROPIC_STREAMER", error); 375 | yield { 376 | type: SSEEventType.ERROR, 377 | content: "An error occurred with the AI service. Please try again.", 378 | }; 379 | } 380 | } 381 | } 382 | -------------------------------------------------------------------------------- /app/page.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { useRef, useState, useEffect } from "react"; 4 | import { 5 | MoonIcon, 6 | SunIcon, 7 | Timer, 8 | Power, 9 | Menu, 10 | X, 11 | ArrowUpRight, 12 | } from "lucide-react"; 13 | import { useTheme } from "next-themes"; 14 | import { toast } from "sonner"; 15 | import { increaseTimeout, stopSandboxAction } from "@/app/actions"; 16 | import { motion, AnimatePresence } from "framer-motion"; 17 | import { ChatList } from "@/components/chat/message-list"; 18 | import { ChatInput } from "@/components/chat/input"; 19 | import { ExamplePrompts } from "@/components/chat/example-prompts"; 20 | import { useChat } from "@/lib/chat-context"; 21 | import Frame from "@/components/frame"; 22 | import { Button } from "@/components/ui/button"; 23 | import { Loader, AssemblyLoader } from "@/components/loader"; 24 | import Link from "next/link"; 25 | import Logo from "@/components/logo"; 26 | import { RepoBanner } from "@/components/repo-banner"; 27 | import { SANDBOX_TIMEOUT_MS } from "@/lib/config"; 28 | import { Surfing } from "@/components/surfing"; 29 | 30 | export default function Home() { 31 | const [sandboxId, setSandboxId] = useState(null); 32 | const [isLoading, setIsLoading] = useState(false); 33 | const [vncUrl, setVncUrl] = useState(null); 34 | const { theme, setTheme } = useTheme(); 35 | const [timeRemaining, setTimeRemaining] = useState( 36 | SANDBOX_TIMEOUT_MS / 1000 37 | ); 38 | const [isTabVisible, setIsTabVisible] = useState(true); 39 | const iframeRef = useRef(null); 40 | const iFrameWrapperRef = useRef(null); 41 | const [mobileMenuOpen, setMobileMenuOpen] = useState(false); 42 | 43 | const { 44 | messages, 45 | isLoading: chatLoading, 46 | input, 47 | setInput, 48 | sendMessage, 49 | stopGeneration, 50 | clearMessages, 51 | handleSubmit, 52 | onSandboxCreated, 53 | } = useChat(); 54 | 55 | useEffect(() => { 56 | const handleVisibilityChange = () => { 57 | setIsTabVisible(document.visibilityState === "visible"); 58 | }; 59 | 60 | setIsTabVisible(document.visibilityState === "visible"); 61 | 62 | document.addEventListener("visibilitychange", handleVisibilityChange); 63 | 64 | return () => { 65 | document.removeEventListener("visibilitychange", handleVisibilityChange); 66 | }; 67 | }, []); 68 | 69 | const stopSandbox = async () => { 70 | if (sandboxId) { 71 | try { 72 | stopGeneration(); 73 | const success = await stopSandboxAction(sandboxId); 74 | if (success) { 75 | setSandboxId(null); 76 | setVncUrl(null); 77 | clearMessages(); 78 | setTimeRemaining(SANDBOX_TIMEOUT_MS / 1000); 79 | toast("Sandbox instance stopped"); 80 | } else { 81 | toast.error("Failed to stop sandbox instance"); 82 | } 83 | } catch (error) { 84 | console.error("Failed to stop sandbox:", error); 85 | toast.error("Failed to stop sandbox"); 86 | } 87 | } 88 | }; 89 | 90 | const handleIncreaseTimeout = async () => { 91 | if (!sandboxId) return; 92 | 93 | try { 94 | await increaseTimeout(sandboxId); 95 | setTimeRemaining(SANDBOX_TIMEOUT_MS / 1000); 96 | toast.success("Instance time increased"); 97 | } catch (error) { 98 | console.error("Failed to increase time:", error); 99 | toast.error("Failed to increase time"); 100 | } 101 | }; 102 | 103 | const onSubmit = (e: React.FormEvent) => { 104 | const content = handleSubmit(e); 105 | if (content) { 106 | const width = 107 | iFrameWrapperRef.current?.clientWidth || 108 | (window.innerWidth < 768 ? window.innerWidth - 32 : 1024); 109 | const height = 110 | iFrameWrapperRef.current?.clientHeight || 111 | (window.innerWidth < 768 112 | ? Math.min(window.innerHeight * 0.4, 400) 113 | : 768); 114 | 115 | sendMessage({ 116 | content, 117 | sandboxId: sandboxId || undefined, 118 | environment: "linux", 119 | resolution: [width, height], 120 | }); 121 | } 122 | }; 123 | 124 | const handleExampleClick = (prompt: string) => { 125 | const width = 126 | iFrameWrapperRef.current?.clientWidth || 127 | (window.innerWidth < 768 ? window.innerWidth - 32 : 1024); 128 | const height = 129 | iFrameWrapperRef.current?.clientHeight || 130 | (window.innerWidth < 768 ? Math.min(window.innerHeight * 0.4, 400) : 768); 131 | 132 | sendMessage({ 133 | content: prompt, 134 | sandboxId: sandboxId || undefined, 135 | environment: "linux", 136 | resolution: [width, height], 137 | }); 138 | }; 139 | 140 | const handleSandboxCreated = (newSandboxId: string, newVncUrl: string) => { 141 | setSandboxId(newSandboxId); 142 | setVncUrl(newVncUrl); 143 | setTimeRemaining(SANDBOX_TIMEOUT_MS / 1000); 144 | toast.success("Sandbox instance created"); 145 | }; 146 | 147 | const handleClearChat = () => { 148 | clearMessages(); 149 | toast.success("Chat cleared"); 150 | }; 151 | 152 | const ThemeToggle = () => ( 153 | 165 | ); 166 | 167 | useEffect(() => { 168 | if (!sandboxId) return; 169 | const interval = setInterval(() => { 170 | if (isTabVisible) { 171 | setTimeRemaining((prev) => (prev > 0 ? prev - 1 : 0)); 172 | } 173 | }, 1000); 174 | return () => clearInterval(interval); 175 | }, [sandboxId, isTabVisible]); 176 | 177 | useEffect(() => { 178 | if (!sandboxId) return; 179 | 180 | if (timeRemaining === 10 && isTabVisible) { 181 | handleIncreaseTimeout(); 182 | } 183 | 184 | if (timeRemaining === 0) { 185 | setSandboxId(null); 186 | setVncUrl(null); 187 | clearMessages(); 188 | stopGeneration(); 189 | toast.error("Instance time expired"); 190 | setTimeRemaining(SANDBOX_TIMEOUT_MS / 1000); 191 | } 192 | }, [timeRemaining, sandboxId, stopGeneration, clearMessages, isTabVisible]); 193 | 194 | useEffect(() => { 195 | onSandboxCreated((newSandboxId: string, newVncUrl: string) => { 196 | handleSandboxCreated(newSandboxId, newVncUrl); 197 | }); 198 | }, [onSandboxCreated]); 199 | 200 | return ( 201 |
    202 | 208 |
    209 |
    210 | 215 | 216 |

    Surf - Computer Agent by

    217 | 218 | 223 | E2B 224 | 225 |
    226 | 227 |
    228 | 240 |
    241 | 242 |
    243 | 244 | 245 | 246 | 247 | {sandboxId && ( 248 | 255 | 279 | 280 | 288 | 289 | )} 290 | 291 |
    292 | 293 |
    294 | 295 | {sandboxId && ( 296 | 303 | 328 | 329 | 337 | 338 | )} 339 | 340 |
    341 |
    342 | 343 | 344 | {mobileMenuOpen && ( 345 | 352 |
    353 | 354 | 355 |
    356 |
    357 | )} 358 |
    359 | 360 |
    361 |
    365 | {isLoading || (chatLoading && !sandboxId) ? ( 366 |
    367 |
    368 |

    369 | {isLoading ? "Starting instance" : "Creating sandbox..."} 370 |

    371 | 372 |
    373 | 374 | 381 | 382 |

    383 | {isLoading 384 | ? "Preparing your sandbox environment..." 385 | : "Creating a new sandbox for your request..."} 386 |

    387 |
    388 | ) : sandboxId && vncUrl ? ( 389 |