├── frontend ├── src │ ├── vite-env.d.ts │ ├── lib │ │ ├── utils.ts │ │ ├── api.ts │ │ └── useExperiment.ts │ ├── main.tsx │ ├── App.tsx │ ├── App.css │ ├── components │ │ ├── StatusBadge.tsx │ │ ├── Console.tsx │ │ ├── StreamingMarkdown.tsx │ │ ├── Notebook │ │ │ ├── NotebookCell.tsx │ │ │ ├── AgentNotebook.tsx │ │ │ └── ResearchPaper.tsx │ │ ├── FindingsRail.tsx │ │ ├── CredentialPrompt.tsx │ │ └── LabNotebook.tsx │ ├── index.css │ └── assets │ │ └── react.svg ├── vite.config.d.ts ├── postcss.config.js ├── vite.config.ts ├── vite.config.js ├── .gitignore ├── tsconfig.tsbuildinfo ├── eslint.config.js ├── index.html ├── tsconfig.app.json ├── tsconfig.node.json ├── tsconfig.json ├── package.json ├── public │ └── vite.svg ├── tailwind.config.js └── README.md ├── requirements.txt ├── .env.example ├── railway.json ├── api_guide.md ├── Dockerfile ├── experiment.py ├── LICENSE ├── logger.py ├── README.md ├── modal_guide.md ├── run_app.py ├── main.py ├── gemini_3_pro_guide.md ├── insights.py └── api_server.py /frontend/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /frontend/vite.config.d.ts: -------------------------------------------------------------------------------- 1 | declare const _default: import("vite").UserConfig; 2 | export default _default; 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | google-genai 2 | anthropic 3 | modal 4 | python-dotenv 5 | rich 6 | fastapi 7 | uvicorn[standard] 8 | python-dotenv 9 | -------------------------------------------------------------------------------- /frontend/postcss.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | "@tailwindcss/postcss": {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | GOOGLE_API_KEY=your_google_api_key_here 2 | ANTHROPIC_API_KEY=your_anthropic_api_key_here 3 | MODAL_TOKEN_ID=your_modal_token_id_here 4 | MODAL_TOKEN_SECRET=your_modal_token_secret_here 5 | -------------------------------------------------------------------------------- /frontend/src/lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { type ClassValue, clsx } from "clsx" 2 | import { twMerge } from "tailwind-merge" 3 | 4 | export function cn(...inputs: ClassValue[]) { 5 | return twMerge(clsx(inputs)) 6 | } 7 | -------------------------------------------------------------------------------- /frontend/src/main.tsx: -------------------------------------------------------------------------------- 1 | import { StrictMode } from 'react' 2 | import { createRoot } from 'react-dom/client' 3 | import './index.css' 4 | import App from './App.tsx' 5 | 6 | createRoot(document.getElementById('root')!).render( 7 | 8 | 9 | , 10 | ) 11 | -------------------------------------------------------------------------------- /railway.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://railway.app/railway.schema.json", 3 | "build": { 4 | "builder": "DOCKERFILE", 5 | "dockerfilePath": "Dockerfile" 6 | }, 7 | "deploy": { 8 | "healthcheckPath": "/api/health", 9 | "restartPolicyType": "ON_FAILURE" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /frontend/src/App.tsx: -------------------------------------------------------------------------------- 1 | import { LabNotebook } from "@/components/LabNotebook"; 2 | 3 | function App() { 4 | return ( 5 |
6 | 7 |
8 | ); 9 | } 10 | 11 | export default App; 12 | -------------------------------------------------------------------------------- /frontend/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite' 2 | import react from '@vitejs/plugin-react' 3 | import path from 'path' 4 | 5 | // https://vitejs.dev/config/ 6 | export default defineConfig({ 7 | plugins: [react()], 8 | resolve: { 9 | alias: { 10 | "@": path.resolve(__dirname, "./src"), 11 | }, 12 | }, 13 | }) 14 | -------------------------------------------------------------------------------- /frontend/vite.config.js: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite'; 2 | import react from '@vitejs/plugin-react'; 3 | import path from 'path'; 4 | // https://vitejs.dev/config/ 5 | export default defineConfig({ 6 | plugins: [react()], 7 | resolve: { 8 | alias: { 9 | "@": path.resolve(__dirname, "./src"), 10 | }, 11 | }, 12 | }); 13 | -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /api_guide.md: -------------------------------------------------------------------------------- 1 | # AI Researcher HTTP API 2 | 3 | This document describes the lightweight HTTP API that wraps the existing CLI entrypoint (`main.py`). 4 | 5 | The API **does not** change any of the research logic – it simply spawns the current CLI in a subprocess and streams back everything it prints, so you can build a rich front-end on top. 6 | 7 | --- 8 | 9 | ## Getting started 10 | 11 | 1. Install dependencies: 12 | 13 | ```bash 14 | pip install -r requirements.txt -------------------------------------------------------------------------------- /frontend/tsconfig.tsbuildinfo: -------------------------------------------------------------------------------- 1 | {"root":["./src/app.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/console.tsx","./src/components/credentialprompt.tsx","./src/components/findingsrail.tsx","./src/components/labnotebook.tsx","./src/components/statusbadge.tsx","./src/components/streamingmarkdown.tsx","./src/components/notebook/agentnotebook.tsx","./src/components/notebook/notebookcell.tsx","./src/components/notebook/researchpaper.tsx","./src/lib/api.ts","./src/lib/useexperiment.ts","./src/lib/utils.ts"],"version":"5.9.3"} -------------------------------------------------------------------------------- /frontend/eslint.config.js: -------------------------------------------------------------------------------- 1 | import js from '@eslint/js' 2 | import globals from 'globals' 3 | import reactHooks from 'eslint-plugin-react-hooks' 4 | import reactRefresh from 'eslint-plugin-react-refresh' 5 | import tseslint from 'typescript-eslint' 6 | import { defineConfig, globalIgnores } from 'eslint/config' 7 | 8 | export default defineConfig([ 9 | globalIgnores(['dist']), 10 | { 11 | files: ['**/*.{ts,tsx}'], 12 | extends: [ 13 | js.configs.recommended, 14 | tseslint.configs.recommended, 15 | reactHooks.configs.flat.recommended, 16 | reactRefresh.configs.vite, 17 | ], 18 | languageOptions: { 19 | ecmaVersion: 2020, 20 | globals: globals.browser, 21 | }, 22 | }, 23 | ]) 24 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Build stage for frontend 2 | FROM node:20-slim AS frontend-builder 3 | WORKDIR /app/frontend 4 | COPY frontend/package*.json ./ 5 | RUN npm ci 6 | COPY frontend/ ./ 7 | RUN npm run build 8 | 9 | # Production stage 10 | FROM python:3.11-slim 11 | WORKDIR /app 12 | 13 | # Install dependencies 14 | COPY requirements.txt ./ 15 | RUN pip install --no-cache-dir -r requirements.txt 16 | 17 | # Copy all Python files from root (not recursively into venv) 18 | COPY api_server.py main.py agent.py orchestrator.py logger.py insights.py ./ 19 | 20 | # Copy built frontend 21 | COPY --from=frontend-builder /app/frontend/dist ./frontend/dist 22 | 23 | # Expose port (Railway sets PORT env var) 24 | EXPOSE 8000 25 | 26 | # Start the server 27 | CMD ["python", "api_server.py"] 28 | -------------------------------------------------------------------------------- /frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | AI Researcher 9 | 10 | 11 | 14 | 15 | 16 | 17 |
18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /frontend/src/App.css: -------------------------------------------------------------------------------- 1 | #root { 2 | max-width: 1280px; 3 | margin: 0 auto; 4 | padding: 2rem; 5 | text-align: center; 6 | } 7 | 8 | .logo { 9 | height: 6em; 10 | padding: 1.5em; 11 | will-change: filter; 12 | transition: filter 300ms; 13 | } 14 | .logo:hover { 15 | filter: drop-shadow(0 0 2em #646cffaa); 16 | } 17 | .logo.react:hover { 18 | filter: drop-shadow(0 0 2em #61dafbaa); 19 | } 20 | 21 | @keyframes logo-spin { 22 | from { 23 | transform: rotate(0deg); 24 | } 25 | to { 26 | transform: rotate(360deg); 27 | } 28 | } 29 | 30 | @media (prefers-reduced-motion: no-preference) { 31 | a:nth-of-type(2) .logo { 32 | animation: logo-spin infinite 20s linear; 33 | } 34 | } 35 | 36 | .card { 37 | padding: 2em; 38 | } 39 | 40 | .read-the-docs { 41 | color: #888; 42 | } 43 | -------------------------------------------------------------------------------- /frontend/tsconfig.app.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo", 4 | "target": "ES2022", 5 | "useDefineForClassFields": true, 6 | "lib": ["ES2022", "DOM", "DOM.Iterable"], 7 | "module": "ESNext", 8 | "types": ["vite/client"], 9 | "skipLibCheck": true, 10 | 11 | /* Bundler mode */ 12 | "moduleResolution": "bundler", 13 | "allowImportingTsExtensions": true, 14 | "verbatimModuleSyntax": true, 15 | "moduleDetection": "force", 16 | "noEmit": true, 17 | "jsx": "react-jsx", 18 | 19 | /* Linting */ 20 | "strict": true, 21 | "noUnusedLocals": true, 22 | "noUnusedParameters": true, 23 | "erasableSyntaxOnly": true, 24 | "noFallthroughCasesInSwitch": true, 25 | "noUncheckedSideEffectImports": true 26 | }, 27 | "include": ["src"] 28 | } 29 | -------------------------------------------------------------------------------- /frontend/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo", 4 | "target": "ES2023", 5 | "lib": [ 6 | "ES2023" 7 | ], 8 | "module": "ESNext", 9 | "types": [ 10 | "node" 11 | ], 12 | "skipLibCheck": true, 13 | /* Bundler mode */ 14 | "moduleResolution": "bundler", 15 | "verbatimModuleSyntax": true, 16 | "moduleDetection": "force", 17 | "composite": true, 18 | "noEmit": false, 19 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo", 20 | /* Linting */ 21 | "strict": true, 22 | "noUnusedLocals": true, 23 | "noUnusedParameters": true, 24 | "erasableSyntaxOnly": true, 25 | "noFallthroughCasesInSwitch": true, 26 | "noUncheckedSideEffectImports": true 27 | }, 28 | "include": [ 29 | "vite.config.ts" 30 | ] 31 | } -------------------------------------------------------------------------------- /frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "useDefineForClassFields": true, 5 | "lib": [ 6 | "ES2020", 7 | "DOM", 8 | "DOM.Iterable" 9 | ], 10 | "module": "ESNext", 11 | "skipLibCheck": true, 12 | /* Bundler mode */ 13 | "moduleResolution": "bundler", 14 | "allowImportingTsExtensions": true, 15 | "resolveJsonModule": true, 16 | "isolatedModules": true, 17 | "noEmit": true, 18 | "jsx": "react-jsx", 19 | /* Linting */ 20 | "strict": true, 21 | "noUnusedLocals": true, 22 | "noUnusedParameters": true, 23 | "noFallthroughCasesInSwitch": true, 24 | "baseUrl": ".", 25 | "paths": { 26 | "@/*": [ 27 | "./src/*" 28 | ] 29 | } 30 | }, 31 | "include": [ 32 | "src" 33 | ], 34 | "references": [ 35 | { 36 | "path": "./tsconfig.node.json", 37 | "composite": true, 38 | "allow": true 39 | } 40 | ] 41 | } -------------------------------------------------------------------------------- /experiment.py: -------------------------------------------------------------------------------- 1 | import modal 2 | 3 | app = modal.App("fib-gpu-test-1") 4 | 5 | image = modal.Image.debian_slim().pip_install("torch") 6 | 7 | @app.function(image=image, gpu="any") 8 | def calculate_fib_iterative(): 9 | import torch 10 | print(f"Using device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}") 11 | 12 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 13 | 14 | n = 10 15 | # Initializing tensors on GPU 16 | t0 = torch.tensor(0, device=device) 17 | t1 = torch.tensor(1, device=device) 18 | 19 | if n == 0: 20 | res = t0 21 | elif n == 1: 22 | res = t1 23 | else: 24 | for i in range(2, n + 1): 25 | temp = t0 + t1 26 | t0 = t1 27 | t1 = temp 28 | res = t1 29 | 30 | print(f"Fibonacci({n}) calculated on {device}: {res.item()}") 31 | return res.item() 32 | 33 | @app.local_entrypoint() 34 | def main(): 35 | print("Starting Test 1: Iterative Approach") 36 | result = calculate_fib_iterative.remote() 37 | print(f"Result 1: {result}") 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 mshumer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend", 3 | "private": true, 4 | "version": "0.0.0", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "tsc -b && vite build", 9 | "lint": "eslint .", 10 | "preview": "vite preview" 11 | }, 12 | "dependencies": { 13 | "clsx": "^2.1.1", 14 | "date-fns": "^4.1.0", 15 | "framer-motion": "^12.23.24", 16 | "lucide-react": "^0.554.0", 17 | "react": "^19.2.0", 18 | "react-dom": "^19.2.0", 19 | "react-markdown": "^10.1.0", 20 | "remark-gfm": "^4.0.1", 21 | "tailwind-merge": "^3.4.0" 22 | }, 23 | "devDependencies": { 24 | "@eslint/js": "^9.39.1", 25 | "@tailwindcss/postcss": "^4.1.17", 26 | "@tailwindcss/typography": "^0.5.19", 27 | "@types/node": "^24.10.1", 28 | "@types/react": "^19.2.5", 29 | "@types/react-dom": "^19.2.3", 30 | "@vitejs/plugin-react": "^5.1.1", 31 | "autoprefixer": "^10.4.22", 32 | "eslint": "^9.39.1", 33 | "eslint-plugin-react-hooks": "^7.0.1", 34 | "eslint-plugin-react-refresh": "^0.4.24", 35 | "globals": "^16.5.0", 36 | "postcss": "^8.5.6", 37 | "tailwindcss": "^4.1.17", 38 | "typescript": "~5.9.3", 39 | "typescript-eslint": "^8.46.4", 40 | "vite": "^7.2.4" 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /frontend/src/components/StatusBadge.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils"; 2 | 3 | interface StatusBadgeProps { 4 | status: "idle" | "running" | "completed" | "failed" | "planning"; 5 | className?: string; 6 | } 7 | 8 | export function StatusBadge({ status, className }: StatusBadgeProps) { 9 | const config: Record = { 10 | idle: { 11 | text: "Idle", 12 | dotColor: "bg-[#333]", 13 | }, 14 | running: { 15 | text: "Running", 16 | dotColor: "bg-blue-500", 17 | animate: true, 18 | }, 19 | planning: { 20 | text: "Planning", 21 | dotColor: "bg-purple-500", 22 | animate: true, 23 | }, 24 | completed: { 25 | text: "Done", 26 | dotColor: "bg-green-500", 27 | }, 28 | failed: { 29 | text: "Failed", 30 | dotColor: "bg-red-500", 31 | }, 32 | }; 33 | 34 | const { text, dotColor, animate } = config[status]; 35 | 36 | return ( 37 |
38 |
45 | 46 | {text} 47 | 48 |
49 | ); 50 | } 51 | -------------------------------------------------------------------------------- /frontend/public/vite.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from rich.console import Console 3 | from rich.panel import Panel 4 | from rich.logging import RichHandler 5 | from rich.theme import Theme 6 | 7 | # Custom theme for the console 8 | custom_theme = Theme({ 9 | "info": "dim cyan", 10 | "warning": "magenta", 11 | "error": "bold red", 12 | "success": "bold green", 13 | "thought": "italic cyan", 14 | "code": "bold yellow", 15 | "result": "white" 16 | }) 17 | 18 | console = Console(theme=custom_theme) 19 | 20 | def setup_logging(): 21 | """Sets up logging to both file and console.""" 22 | logging.basicConfig( 23 | level=logging.INFO, 24 | format="%(asctime)s - %(levelname)s - %(message)s", 25 | handlers=[ 26 | logging.FileHandler("agent.log"), 27 | # We don't add RichHandler here because we want manual control over console output 28 | # to keep it "elegant" and not just a stream of logs. 29 | ] 30 | ) 31 | # Create a separate logger for the file that doesn't propagate to root 32 | file_logger = logging.getLogger("agent_file") 33 | file_logger.setLevel(logging.DEBUG) 34 | return file_logger 35 | 36 | # Global file logger instance 37 | logger = setup_logging() 38 | 39 | def log_step(step_name, status="INFO"): 40 | """Logs a step to the file.""" 41 | logger.info(f"[{step_name}] {status}") 42 | 43 | def print_panel(content, title, style="info"): 44 | """Prints a rich panel to the console.""" 45 | console.print(Panel(content, title=title, border_style=style, expand=False)) 46 | 47 | def print_status(message, style="info"): 48 | """Prints a status message.""" 49 | console.print(f"[{style}]{message}[/{style}]") 50 | -------------------------------------------------------------------------------- /frontend/src/components/Console.tsx: -------------------------------------------------------------------------------- 1 | import { useEffect, useRef } from "react"; 2 | import { LogEvent } from "@/lib/api"; 3 | import { cn } from "@/lib/utils"; 4 | 5 | interface ConsoleProps { 6 | logs: LogEvent[]; 7 | className?: string; 8 | } 9 | 10 | export function Console({ logs, className }: ConsoleProps) { 11 | const bottomRef = useRef(null); 12 | 13 | useEffect(() => { 14 | bottomRef.current?.scrollIntoView({ behavior: "smooth" }); 15 | }, [logs]); 16 | 17 | return ( 18 |
24 |
25 | Console Output 26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | {logs.length === 0 && ( 34 |
35 | Waiting for process output... 36 |
37 | )} 38 | {logs 39 | .filter((log) => !((log.plain ?? log.raw ?? "").includes("::EVENT::"))) 40 | .map((log, i) => ( 41 |
42 | 43 | {new Date(log.timestamp).toLocaleTimeString([], { 44 | hour12: false, 45 | hour: "2-digit", 46 | minute: "2-digit", 47 | second: "2-digit", 48 | })} 49 | 50 | 55 | {log.plain || log.raw} 56 | 57 |
58 | ))} 59 |
60 |
61 |
62 | ); 63 | } 64 | -------------------------------------------------------------------------------- /frontend/src/components/StreamingMarkdown.tsx: -------------------------------------------------------------------------------- 1 | import { useEffect, useState } from "react"; 2 | import ReactMarkdown from "react-markdown"; 3 | import remarkGfm from "remark-gfm"; 4 | import { cn } from "@/lib/utils"; 5 | 6 | interface StreamingMarkdownProps { 7 | content: string; 8 | /** 9 | * A key that identifies a logical block of streaming content. 10 | * When this changes (e.g. new thought / new cell), we restart the animation. 11 | */ 12 | animateKey?: string | number; 13 | /** 14 | * Tailwind / CSS classes applied to the markdown body wrapper. 15 | */ 16 | markdownClassName?: string; 17 | /** 18 | * Optional classes for the outer container (animation wrapper). 19 | */ 20 | wrapperClassName?: string; 21 | } 22 | 23 | /** 24 | * StreamingMarkdown 25 | * 26 | * Renders markdown that is being updated incrementally (streamed tokens). 27 | * Every time the content changes, we softly re-trigger a fade-in animation 28 | * on the entire block using the global `.stream-fade` styles. 29 | * 30 | * This keeps the effect subtle but ensures *all* new streamed chunks 31 | * participate in the animation, not just the initial thought. 32 | */ 33 | export function StreamingMarkdown({ 34 | content, 35 | animateKey, 36 | markdownClassName, 37 | wrapperClassName, 38 | }: StreamingMarkdownProps) { 39 | const [isAnimating, setIsAnimating] = useState(false); 40 | 41 | useEffect(() => { 42 | if (typeof window === "undefined") return; 43 | 44 | let timeoutId: number | null = null; 45 | const rafId = window.requestAnimationFrame(() => { 46 | setIsAnimating(true); 47 | timeoutId = window.setTimeout(() => setIsAnimating(false), 420); // match CSS duration 48 | }); 49 | 50 | return () => { 51 | window.cancelAnimationFrame(rafId); 52 | if (timeoutId !== null) window.clearTimeout(timeoutId); 53 | }; 54 | }, [content, animateKey]); 55 | 56 | return ( 57 |
64 |
65 | 66 | {content} 67 | 68 |
69 |
70 | ); 71 | } 72 | -------------------------------------------------------------------------------- /frontend/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | export default { 3 | content: [ 4 | "./index.html", 5 | "./src/**/*.{js,ts,jsx,tsx}", 6 | ], 7 | theme: { 8 | extend: { 9 | colors: { 10 | background: "hsl(var(--background))", 11 | foreground: "hsl(var(--foreground))", 12 | card: { 13 | DEFAULT: "hsl(var(--card))", 14 | foreground: "hsl(var(--card-foreground))", 15 | }, 16 | popover: { 17 | DEFAULT: "hsl(var(--popover))", 18 | foreground: "hsl(var(--popover-foreground))", 19 | }, 20 | primary: { 21 | DEFAULT: "hsl(var(--primary))", 22 | foreground: "hsl(var(--primary-foreground))", 23 | }, 24 | secondary: { 25 | DEFAULT: "hsl(var(--secondary))", 26 | foreground: "hsl(var(--secondary-foreground))", 27 | }, 28 | muted: { 29 | DEFAULT: "hsl(var(--muted))", 30 | foreground: "hsl(var(--muted-foreground))", 31 | }, 32 | accent: { 33 | DEFAULT: "hsl(var(--accent))", 34 | foreground: "hsl(var(--accent-foreground))", 35 | }, 36 | destructive: { 37 | DEFAULT: "hsl(var(--destructive))", 38 | foreground: "hsl(var(--destructive-foreground))", 39 | }, 40 | border: "hsl(var(--border))", 41 | input: "hsl(var(--input))", 42 | ring: "hsl(var(--ring))", 43 | chart: { 44 | "1": "hsl(var(--chart-1))", 45 | "2": "hsl(var(--chart-2))", 46 | "3": "hsl(var(--chart-3))", 47 | "4": "hsl(var(--chart-4))", 48 | "5": "hsl(var(--chart-5))", 49 | }, 50 | }, 51 | borderRadius: { 52 | lg: "var(--radius)", 53 | md: "calc(var(--radius) - 2px)", 54 | sm: "calc(var(--radius) - 4px)", 55 | }, 56 | fontFamily: { 57 | sans: ['Inter', 'sans-serif'], 58 | mono: ['JetBrains Mono', 'monospace'], 59 | serif: ['Newsreader', 'serif'], 60 | }, 61 | }, 62 | }, 63 | plugins: [ 64 | require('@tailwindcss/typography'), 65 | ], 66 | } 67 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | # React + TypeScript + Vite 2 | 3 | This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules. 4 | 5 | Currently, two official plugins are available: 6 | 7 | - [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) (or [oxc](https://oxc.rs) when used in [rolldown-vite](https://vite.dev/guide/rolldown)) for Fast Refresh 8 | - [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh 9 | 10 | ## React Compiler 11 | 12 | The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation). 13 | 14 | ## Expanding the ESLint configuration 15 | 16 | If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules: 17 | 18 | ```js 19 | export default defineConfig([ 20 | globalIgnores(['dist']), 21 | { 22 | files: ['**/*.{ts,tsx}'], 23 | extends: [ 24 | // Other configs... 25 | 26 | // Remove tseslint.configs.recommended and replace with this 27 | tseslint.configs.recommendedTypeChecked, 28 | // Alternatively, use this for stricter rules 29 | tseslint.configs.strictTypeChecked, 30 | // Optionally, add this for stylistic rules 31 | tseslint.configs.stylisticTypeChecked, 32 | 33 | // Other configs... 34 | ], 35 | languageOptions: { 36 | parserOptions: { 37 | project: ['./tsconfig.node.json', './tsconfig.app.json'], 38 | tsconfigRootDir: import.meta.dirname, 39 | }, 40 | // other options... 41 | }, 42 | }, 43 | ]) 44 | ``` 45 | 46 | You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules: 47 | 48 | ```js 49 | // eslint.config.js 50 | import reactX from 'eslint-plugin-react-x' 51 | import reactDom from 'eslint-plugin-react-dom' 52 | 53 | export default defineConfig([ 54 | globalIgnores(['dist']), 55 | { 56 | files: ['**/*.{ts,tsx}'], 57 | extends: [ 58 | // Other configs... 59 | // Enable lint rules for React 60 | reactX.configs['recommended-typescript'], 61 | // Enable lint rules for React DOM 62 | reactDom.configs.recommended, 63 | ], 64 | languageOptions: { 65 | parserOptions: { 66 | project: ['./tsconfig.node.json', './tsconfig.app.json'], 67 | tsconfigRootDir: import.meta.dirname, 68 | }, 69 | // other options... 70 | }, 71 | }, 72 | ]) 73 | ``` 74 | -------------------------------------------------------------------------------- /frontend/src/index.css: -------------------------------------------------------------------------------- 1 | @import "tailwindcss"; 2 | @config "../tailwind.config.js"; 3 | 4 | @layer base { 5 | :root { 6 | --background: 240 10% 3.9%; 7 | --foreground: 0 0% 98%; 8 | --card: 240 10% 3.9%; 9 | --card-foreground: 0 0% 98%; 10 | --popover: 240 10% 3.9%; 11 | --popover-foreground: 0 0% 98%; 12 | --primary: 0 0% 98%; 13 | --primary-foreground: 240 5.9% 10%; 14 | --secondary: 240 3.7% 15.9%; 15 | --secondary-foreground: 0 0% 98%; 16 | --muted: 240 3.7% 15.9%; 17 | --muted-foreground: 240 5% 64.9%; 18 | --accent: 240 3.7% 15.9%; 19 | --accent-foreground: 0 0% 98%; 20 | --destructive: 0 62.8% 30.6%; 21 | --destructive-foreground: 0 0% 98%; 22 | --border: 240 3.7% 15.9%; 23 | --input: 240 3.7% 15.9%; 24 | --ring: 240 4.9% 83.9%; 25 | --chart-1: 220 70% 50%; 26 | --chart-2: 160 60% 45%; 27 | --chart-3: 30 80% 55%; 28 | --chart-4: 280 65% 60%; 29 | --chart-5: 340 75% 55%; 30 | --radius: 0.5rem; 31 | } 32 | } 33 | 34 | @layer base { 35 | * { 36 | @apply border-border; 37 | } 38 | 39 | body { 40 | @apply bg-background text-foreground; 41 | font-feature-settings: "rlig" 1, "calt" 1; 42 | } 43 | } 44 | 45 | /* Custom Scrollbar Global (Dark) */ 46 | ::-webkit-scrollbar { 47 | width: 8px; 48 | height: 8px; 49 | } 50 | 51 | ::-webkit-scrollbar-track { 52 | background: transparent; 53 | } 54 | 55 | ::-webkit-scrollbar-thumb { 56 | @apply bg-muted rounded-full; 57 | } 58 | 59 | ::-webkit-scrollbar-thumb:hover { 60 | @apply bg-muted-foreground/50; 61 | } 62 | 63 | /* Light Theme Scrollbar override */ 64 | .custom-scrollbar-light::-webkit-scrollbar-thumb { 65 | @apply bg-gray-300; 66 | } 67 | 68 | .custom-scrollbar-light::-webkit-scrollbar-thumb:hover { 69 | @apply bg-gray-400; 70 | } 71 | 72 | /* Print Styles */ 73 | @media print { 74 | @page { 75 | margin: 0; 76 | size: auto; 77 | } 78 | 79 | body * { 80 | visibility: hidden; 81 | } 82 | 83 | #printable-paper-content, 84 | #printable-paper-content * { 85 | visibility: visible; 86 | } 87 | 88 | #printable-paper-content { 89 | position: fixed; 90 | left: 0; 91 | top: 0; 92 | width: 100vw; 93 | height: auto; 94 | margin: 0; 95 | padding: 1in; 96 | background: white !important; 97 | color: black !important; 98 | box-shadow: none !important; 99 | overflow: visible !important; 100 | -webkit-print-color-adjust: exact; 101 | print-color-adjust: exact; 102 | } 103 | 104 | /* Hide footer in print */ 105 | .no-print { 106 | display: none !important; 107 | } 108 | } 109 | 110 | /* Subtle fade for streamed tokens */ 111 | .stream-fade { 112 | will-change: opacity, transform, filter; 113 | } 114 | 115 | .stream-fade--active { 116 | animation: streamFade 0.42s ease-out; 117 | animation-fill-mode: backwards; 118 | } 119 | 120 | @keyframes streamFade { 121 | from { 122 | opacity: 0.2; 123 | transform: translateY(6px); 124 | filter: blur(8px); 125 | } 126 | 65% { 127 | opacity: 1; 128 | transform: translateY(0); 129 | filter: blur(0); 130 | } 131 | to { 132 | opacity: 1; 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI Researcher 2 | [![Twitter Follow](https://img.shields.io/twitter/follow/mattshumer_?style=social)](https://twitter.com/mattshumer_) 3 | 4 | [Be the first to know when I publish new AI builds + demos!](https://tally.so/r/w2M17p) 5 | 6 | An autonomous AI researcher. It takes a research objective, breaks it into experiments, spins up separate agents with access to their own GPUs to run these experiments, and delivers a paper-style writeup with findings. 7 | 8 | ## How it Works 9 | - Decomposes your prompt into experiments and assigns them to specialist researcher agents. 10 | - Each agent can launch GPU-enabled sandboxes to train models/run inference/etc., evaluate, and collect evidence. 11 | - Based on the results of these experiments, the orchestrator can decide to finalize, or run more experiments. 12 | - The orchestrator goes over all of the results and turns them into a coherent "paper". 13 | 14 | ## Run it (web notebook, one command) 15 | The fastest way to use it: 16 | ``` 17 | python run_app.py 18 | ``` 19 | This installs missing deps, starts the API + frontend, and opens the notebook. If Google/Modal keys aren’t set, the UI will prompt you and save them locally before the run starts. 20 | 21 | ## Keys Needed 22 | - **LLM key** (at least one): 23 | - Google AI Studio: `GOOGLE_API_KEY` (for Gemini 3 Pro) 24 | - Anthropic: `ANTHROPIC_API_KEY` (for Claude Opus 4.5) 25 | - **Modal tokens**: `MODAL_TOKEN_ID` and `MODAL_TOKEN_SECRET` (for GPU sandboxes) 26 | - Add them to `.env` in the repo root, or paste them into the web prompt when asked. 27 | 28 | ## Model Selection 29 | Choose between **Gemini 3 Pro** and **Claude Opus 4.5** from the dropdown in the web UI, or via CLI with `--model`. 30 | 31 | ## Optional CLI 32 | Prefer the terminal? 33 | ``` 34 | python -m venv venv && source venv/bin/activate 35 | pip install -r requirements.txt 36 | python main.py "Does label smoothing improve ViT-Base on CIFAR-10?" --mode single --gpu any --model gemini-3-pro-preview 37 | ``` 38 | Orchestrator (multi-agent): 39 | ``` 40 | python main.py "Characterize scaling laws for sparse attention transformers" \ 41 | --mode orchestrator --num-agents 3 --max-rounds 3 --max-parallel 2 --gpu any 42 | ``` 43 | Dry run: 44 | ``` 45 | python main.py "Sanity check the pipeline" --mode orchestrator --test-mode 46 | ``` 47 | 48 | ## Deploy to Railway 49 | 50 | [![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/new/template?template=https://github.com/mattshumer/ai-researcher&referralCode=mattshumer) 51 | 52 | **Steps:** 53 | 1. Click the button above (or go to Railway and select "Deploy from GitHub repo") 54 | 2. Connect your GitHub account and select this repo (or your fork) 55 | 3. Railway will automatically detect the Dockerfile and build the app 56 | 4. Once deployed, open the app URL and enter your API keys in the UI 57 | 58 | **Optional environment variables** (if you want server-side defaults): 59 | - `GOOGLE_API_KEY` - Google AI Studio key for Gemini 3 Pro 60 | - `ANTHROPIC_API_KEY` - Anthropic key for Claude Opus 4.5 61 | - `MODAL_TOKEN_ID` and `MODAL_TOKEN_SECRET` - For GPU sandboxes 62 | 63 | Note: Users can also enter their own keys directly in the web UI without setting environment variables. 64 | 65 | ## Status/Contribution 66 | This is a super-early, experimental harness. There are a number of improvements to be worked out (i.e. dataset sharing between agents, key management, etc.), literature search, that would make this way more capable. If anyone wants to add these in, feel free! -------------------------------------------------------------------------------- /frontend/src/assets/react.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /frontend/src/components/Notebook/NotebookCell.tsx: -------------------------------------------------------------------------------- 1 | import { Terminal, ArrowRight } from "lucide-react"; 2 | import { ExperimentStep } from "@/lib/useExperiment"; 3 | import { StreamingMarkdown } from "../StreamingMarkdown"; 4 | 5 | interface NotebookCellProps { 6 | step: ExperimentStep; 7 | } 8 | 9 | export function NotebookCell({ step }: NotebookCellProps) { 10 | const { type, content, id } = step; 11 | 12 | if (type === "thought") { 13 | return ( 14 |
15 |
16 |
17 | Thinking 18 |
19 |
20 | 25 |
26 |
27 | ); 28 | } 29 | 30 | if (type === "code") { 31 | return ( 32 |
33 |
34 | 35 | Command 36 |
37 |
38 |
39 |                         {content}
40 |                     
41 |
42 |
43 | ); 44 | } 45 | 46 | if (type === "result") { 47 | // Handle carriage returns (\r) for progress bars (like tqdm). 48 | // We want to simulate the terminal behavior where \r moves the cursor 49 | // to the start of the line, allowing subsequent text to overwrite. 50 | // We split by \r and take the last segment for the current line context if it's a pure overwrite, 51 | // but \r can be mixed with \n. 52 | // A simple approximation is: split by \n, and for each line, process \r. 53 | 54 | const processCarriageReturns = (text: string) => { 55 | const lines = text.split('\n'); 56 | const processedLines = lines.map(line => { 57 | // If line has \r, usually we just want the text AFTER the last \r 58 | // unless that \r is followed by nothing? 59 | // Standard terminal: "Loading... 10%\rLoading... 20%" -> "Loading... 20%" 60 | // "Item 1\rItem 2" -> "Item 2" 61 | 62 | if (line.includes('\r')) { 63 | const parts = line.split('\r'); 64 | return parts[parts.length - 1]; 65 | } 66 | return line; 67 | }); 68 | return processedLines.join('\n'); 69 | }; 70 | 71 | return ( 72 |
73 |
74 | 75 | Output 76 |
77 |
78 |
79 |                         {processCarriageReturns(content)}
80 |                     
81 |
82 |
83 | ); 84 | } 85 | 86 | return null; 87 | } 88 | -------------------------------------------------------------------------------- /modal_guide.md: -------------------------------------------------------------------------------- 1 | # Modal Guide for AI Agents 2 | 3 | This guide provides a concise, accurate reference for using Modal to spin up GPUs, run code, track costs, and manage data. 4 | 5 | ## 1. Core Concepts 6 | - **App**: The unit of deployment. Defined as `app = modal.App("my-app-name")`. 7 | - **Image**: The environment (OS + Python packages). 8 | - **Function**: The code that runs remotely. Decorated with `@app.function`. 9 | - **Volume**: Persistent storage for large files (datasets, models). 10 | 11 | ## 2. Project Structure 12 | A standard Modal project is a single Python file (e.g., `experiment.py`) or a module. 13 | 14 | ```python 15 | import modal 16 | 17 | app = modal.App("experiment-01") 18 | 19 | # Define the environment 20 | image = ( 21 | modal.Image.debian_slim() 22 | .pip_install("torch", "transformers", "numpy") 23 | ) 24 | 25 | @app.function(image=image, gpu="A100") 26 | def run_experiment(params: dict): 27 | import torch 28 | print(f"Running on {torch.cuda.get_device_name(0)}") 29 | # ... experiment logic ... 30 | return {"status": "success", "loss": 0.01} 31 | 32 | @app.local_entrypoint() 33 | def main(): 34 | print("Starting remote experiment...") 35 | result = run_experiment.remote({"learning_rate": 0.001}) 36 | print(f"Result: {result}") 37 | ``` 38 | 39 | ## 3. Spinning Up GPUs 40 | Specify the `gpu` argument in the `@app.function` decorator. 41 | 42 | ### GPU Types 43 | - **H100**: `gpu="H100"` (Most powerful, scarce) 44 | - **A100**: `gpu="A100"` (Standard for LLM training/inference) 45 | - `gpu="A100-80GB"` (Force 80GB memory) 46 | - **A10G**: `gpu="A10G"` (Good price/performance for inference) 47 | - **T4**: `gpu="T4"` (Cheap, older) 48 | - **Any**: `gpu="any"` (Lowest availability latency) 49 | 50 | ### Multi-GPU 51 | Append `:N` to the type string. 52 | ```python 53 | @app.function(gpu="A100:4") # Request 4 A100s 54 | ``` 55 | 56 | ## 4. Persistent Storage (Volumes) 57 | Use `modal.Volume` to persist data across runs. 58 | 59 | ### Creating & Mounting 60 | ```python 61 | # Create/Get volume 62 | volume = modal.Volume.from_name("my-dataset-vol", create_if_missing=True) 63 | 64 | @app.function(volumes={"/data": volume}) 65 | def process_data(): 66 | # Read 67 | with open("/data/input.txt", "r") as f: 68 | data = f.read() 69 | 70 | # Write 71 | with open("/data/output.txt", "w") as f: 72 | f.write("processed") 73 | 74 | # CRITICAL: Commit changes to persist them! 75 | volume.commit() 76 | ``` 77 | 78 | ### Reloading 79 | If another function updates the volume, reload it to see changes: 80 | ```python 81 | volume.reload() 82 | ``` 83 | 84 | ## 5. Secrets & API Keys 85 | Inject environment variables securely. 86 | 87 | 1. **Create Secret** (CLI or Dashboard): 88 | `modal secret create my-huggingface-secret HF_TOKEN=hf_...` 89 | 90 | 2. **Use in Code**: 91 | ```python 92 | @app.function(secrets=[modal.Secret.from_name("my-huggingface-secret")]) 93 | def download_model(): 94 | import os 95 | token = os.environ["HF_TOKEN"] 96 | ``` 97 | 98 | ## 6. Running Code 99 | ### CLI 100 | Run the local entrypoint: 101 | ```bash 102 | modal run experiment.py 103 | ``` 104 | 105 | ### Remote Execution 106 | - **`func.remote(args)`**: Synchronous call. Returns the result. 107 | - **`func.spawn(args)`**: Asynchronous call. Returns a `FunctionCall` object. 108 | ```python 109 | job = run_experiment.spawn(params) 110 | # ... do other work ... 111 | result = job.get() 112 | ``` 113 | 114 | ## 7. Web Endpoints 115 | Expose a function as a web endpoint (useful for agent-to-agent communication). 116 | 117 | ```python 118 | @app.function() 119 | @modal.web_endpoint(method="POST") 120 | def webhook(data: dict): 121 | return {"received": data} 122 | ``` 123 | *URL is printed to stdout on deploy.* 124 | 125 | ## 8. Observability & Costs 126 | ### Logs 127 | - **Live**: Streamed to your terminal during `modal run`. 128 | - **Dashboard**: View full logs at `https://modal.com/apps`. 129 | - **Programmatic**: Currently, logs are best consumed via the dashboard or by redirecting stdout in the function to a file on a Volume. 130 | 131 | ### Cost Tracking 132 | - **Pricing**: Usage-based (per second). 133 | - A100: ~$0.000694/sec 134 | - H100: ~$0.001097/sec 135 | - CPU: Very cheap 136 | - **Dashboard**: View "Usage & Billing" in the Modal dashboard for exact costs per app/function. 137 | - **Optimization**: 138 | - Use `modal.Image` caching (builds are cached). 139 | - Scale down to 0 automatically (serverless). 140 | 141 | ## 9. Quick Reference Checklist 142 | - [ ] **Decorator**: `@app.function(image=..., gpu=..., volumes=..., secrets=...)` 143 | - [ ] **Entrypoint**: `@app.local_entrypoint()` 144 | - [ ] **Run**: `modal run file.py` 145 | - [ ] **Persist**: `volume.commit()` after writes. 146 | - [ ] **Secrets**: `modal.Secret.from_name("name")` 147 | -------------------------------------------------------------------------------- /run_app.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | import time 4 | import webbrowser 5 | import os 6 | import signal 7 | from pathlib import Path 8 | 9 | def print_status(msg, color="white"): 10 | # ANSI colors 11 | colors = { 12 | "white": "\033[97m", 13 | "cyan": "\033[96m", 14 | "green": "\033[92m", 15 | "yellow": "\033[93m", 16 | "red": "\033[91m", 17 | "magenta": "\033[95m", 18 | } 19 | end = "\033[0m" 20 | print(f"{colors.get(color, colors['white'])}[{color.upper()}] {msg}{end}") 21 | 22 | def kill_port(port): 23 | """Kill any process listening on the specified port.""" 24 | try: 25 | # Find PID using lsof 26 | result = subprocess.run( 27 | ["lsof", "-t", "-i", f":{port}"], 28 | capture_output=True, 29 | text=True 30 | ) 31 | pids = result.stdout.strip().split('\n') 32 | 33 | for pid in pids: 34 | if pid: 35 | print_status(f"Killing existing process on port {port} (PID: {pid})...", "yellow") 36 | subprocess.run(["kill", "-9", pid], check=False) 37 | except Exception as e: 38 | # lsof might not be installed or other error, just ignore 39 | pass 40 | 41 | def ensure_venv(root_dir): 42 | """Ensure a virtual environment exists and is used.""" 43 | venv_dir = root_dir / "venv" 44 | venv_python = venv_dir / "bin" / "python" 45 | 46 | # If we are already running in the venv, continue 47 | if sys.prefix == str(venv_dir): 48 | return sys.executable 49 | 50 | print_status("Checking environment...", "cyan") 51 | 52 | # Create venv if it doesn't exist 53 | if not venv_dir.exists(): 54 | print_status("Creating virtual environment...", "yellow") 55 | subprocess.run([sys.executable, "-m", "venv", "venv"], cwd=root_dir, check=True) 56 | 57 | # Install requirements 58 | print_status("Installing backend dependencies...", "yellow") 59 | subprocess.run([str(venv_python), "-m", "pip", "install", "-r", "requirements.txt"], cwd=root_dir, check=True) 60 | 61 | # Re-execute this script using the venv python 62 | print_status("Switching to virtual environment...", "cyan") 63 | os.execv(str(venv_python), [str(venv_python)] + sys.argv) 64 | 65 | def main(): 66 | # Paths 67 | root_dir = Path(__file__).parent.resolve() 68 | frontend_dir = root_dir / "frontend" 69 | 70 | # 0. Ensure Venv 71 | ensure_venv(root_dir) 72 | 73 | # 1. Cleanup Ports 74 | kill_port(8000) 75 | kill_port(5173) 76 | 77 | # 2. Install Frontend Dependencies if needed 78 | if not (frontend_dir / "node_modules").exists(): 79 | print_status("Installing frontend dependencies...", "cyan") 80 | subprocess.run(["npm", "install"], cwd=frontend_dir, check=True) 81 | 82 | # 3. Start Backend 83 | print_status("Starting Backend API...", "green") 84 | backend_env = os.environ.copy() 85 | # Ensure backend sees the venv 86 | backend_env["VIRTUAL_ENV"] = str(root_dir / "venv") 87 | backend_env["PATH"] = f"{root_dir}/venv/bin:{backend_env['PATH']}" 88 | 89 | backend_process = subprocess.Popen( 90 | [sys.executable, "api_server.py"], 91 | cwd=root_dir, 92 | env=backend_env 93 | ) 94 | 95 | # 4. Start Frontend 96 | print_status("Starting Frontend Dev Server...", "green") 97 | frontend_process = subprocess.Popen( 98 | ["npm", "run", "dev", "--", "--port", "5173"], 99 | cwd=frontend_dir, 100 | stdout=subprocess.PIPE, 101 | stderr=subprocess.PIPE, 102 | text=True 103 | ) 104 | 105 | # Wait a bit for servers to spin up 106 | time.sleep(3) 107 | 108 | # 5. Open Browser 109 | print_status("Opening Fractal Notebook...", "magenta") 110 | webbrowser.open("http://localhost:5173") 111 | 112 | print_status("System Running. Press Ctrl+C to stop.", "cyan") 113 | 114 | try: 115 | while True: 116 | time.sleep(1) 117 | if backend_process.poll() is not None: 118 | print_status("Backend process exited unexpectedly.", "red") 119 | break 120 | if frontend_process.poll() is not None: 121 | print_status("Frontend process exited unexpectedly.", "red") 122 | # Print frontend error if it failed 123 | if frontend_process.stderr: 124 | print(frontend_process.stderr.read()) 125 | break 126 | except KeyboardInterrupt: 127 | print_status("\nStopping system...", "yellow") 128 | finally: 129 | backend_process.terminate() 130 | frontend_process.terminate() 131 | try: 132 | backend_process.wait(timeout=5) 133 | frontend_process.wait(timeout=5) 134 | except subprocess.TimeoutExpired: 135 | backend_process.kill() 136 | frontend_process.kill() 137 | print_status("Shutdown complete.", "green") 138 | 139 | if __name__ == "__main__": 140 | main() 141 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | from dotenv import load_dotenv 5 | 6 | from agent import run_experiment_loop 7 | from logger import print_status 8 | 9 | 10 | def main(): 11 | # Load environment variables from .env file 12 | load_dotenv() 13 | 14 | # Debug: show which credentials are available 15 | import sys 16 | google_key = os.environ.get("GOOGLE_API_KEY", "") 17 | anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "") 18 | modal_id = os.environ.get("MODAL_TOKEN_ID", "") 19 | modal_secret = os.environ.get("MODAL_TOKEN_SECRET", "") 20 | print(f"[DEBUG] Credentials check: GOOGLE_API_KEY={'set' if google_key else 'missing'} (len={len(google_key)}), " 21 | f"ANTHROPIC_API_KEY={'set' if anthropic_key else 'missing'}, " 22 | f"MODAL_TOKEN={'set' if modal_id and modal_secret else 'missing'}", file=sys.stderr) 23 | 24 | parser = argparse.ArgumentParser( 25 | description="AI Experiment Agent CLI (single-agent and orchestrator modes)" 26 | ) 27 | parser.add_argument( 28 | "task", 29 | type=str, 30 | help=( 31 | "In 'single' mode: the hypothesis to verify.\n" 32 | "In 'orchestrator' mode: the high-level research task to investigate." 33 | ), 34 | ) 35 | parser.add_argument( 36 | "--gpu", 37 | type=str, 38 | default=None, 39 | help="GPU type to request (e.g., 'T4', 'A10G', 'A100', 'any').", 40 | ) 41 | parser.add_argument( 42 | "--mode", 43 | type=str, 44 | choices=["single", "orchestrator"], 45 | default="single", 46 | help=( 47 | "Execution mode: " 48 | "'single' runs a single-researcher agent (original behavior); " 49 | "'orchestrator' runs the higher-level multi-agent orchestrator." 50 | ), 51 | ) 52 | parser.add_argument( 53 | "--num-agents", 54 | type=int, 55 | default=3, 56 | help="(orchestrator) Number of initial single-researcher agents to launch.", 57 | ) 58 | parser.add_argument( 59 | "--max-rounds", 60 | type=int, 61 | default=3, 62 | help="(orchestrator) Maximum number of orchestration rounds.", 63 | ) 64 | parser.add_argument( 65 | "--max-parallel", 66 | type=int, 67 | default=2, 68 | help=( 69 | "(orchestrator) Maximum number of experiments to run in parallel " 70 | "in a single wave of tool calls." 71 | ), 72 | ) 73 | parser.add_argument( 74 | "--test-mode", 75 | action="store_true", 76 | help="Run in test mode with mock data (no LLM/GPU usage).", 77 | ) 78 | parser.add_argument( 79 | "--model", 80 | type=str, 81 | choices=["gemini-3-pro-preview", "claude-opus-4-5"], 82 | default="gemini-3-pro-preview", 83 | help=( 84 | "LLM model to use: " 85 | "'gemini-3-pro-preview' (default) or 'claude-opus-4-5'." 86 | ), 87 | ) 88 | 89 | args = parser.parse_args() 90 | 91 | # Preserve existing behavior by default: single agent mode. 92 | if args.mode == "single": 93 | print_status("Initializing Single Researcher Agent...", "bold cyan") 94 | 95 | try: 96 | # Record GPU preference globally for sandbox creation 97 | import agent as agent_module 98 | 99 | agent_module._selected_gpu = args.gpu 100 | run_experiment_loop(args.task, test_mode=args.test_mode, model=args.model) 101 | except KeyboardInterrupt: 102 | print_status("\nExperiment interrupted by user.", "bold red") 103 | sys.exit(0) 104 | except Exception as e: 105 | import traceback 106 | print_status(f"\nFatal Error: {e}", "bold red") 107 | print(f"[ERROR] Fatal Error: {e}", file=sys.stderr) 108 | traceback.print_exc(file=sys.stderr) 109 | sys.exit(1) 110 | else: 111 | # Multi-agent orchestrator mode. 112 | print_status("Initializing Orchestrator Agent...", "bold cyan") 113 | 114 | try: 115 | from orchestrator import run_orchestrator_loop 116 | 117 | run_orchestrator_loop( 118 | research_task=args.task, 119 | num_initial_agents=args.num_agents, 120 | max_rounds=args.max_rounds, 121 | default_gpu=args.gpu, 122 | max_parallel_experiments=args.max_parallel, 123 | test_mode=args.test_mode, 124 | model=args.model, 125 | ) 126 | except KeyboardInterrupt: 127 | print_status("\nOrchestrated experiment interrupted by user.", "bold red") 128 | sys.exit(0) 129 | except Exception as e: 130 | import traceback 131 | print_status(f"\nFatal Error (orchestrator mode): {e}", "bold red") 132 | print(f"[ERROR] Fatal Error (orchestrator): {e}", file=sys.stderr) 133 | traceback.print_exc(file=sys.stderr) 134 | sys.exit(1) 135 | 136 | 137 | if __name__ == "__main__": 138 | main() 139 | -------------------------------------------------------------------------------- /gemini_3_pro_guide.md: -------------------------------------------------------------------------------- 1 | # Gemini 3 Pro: The Complete Developer's Guide 2 | 3 | Gemini 3 Pro is Google's most advanced AI model, featuring state-of-the-art reasoning capabilities. This guide covers everything you need to build agents, run the model, and track costs using the `google-genai` SDK. 4 | 5 | ## 1. Setup 6 | 7 | To use Gemini 3 Pro, you must use the `google-genai` SDK (version 1.51.0 or higher). 8 | 9 | ```bash 10 | pip install -U google-genai 11 | ``` 12 | 13 | Get your API key from [Google AI Studio](https://aistudio.google.com/). 14 | 15 | ## 2. Running the Model 16 | 17 | Gemini 3 Pro introduces a new parameter: `thinking_level`. This controls the depth of the model's internal reasoning process. 18 | 19 | * **`high`** (Default): Maximum reasoning depth. Best for complex tasks, coding, and math. Higher latency and cost. 20 | * **`low`**: Faster, lower cost. Good for simple instruction following and chat. 21 | 22 | > [!IMPORTANT] 23 | > You cannot disable "thinking" completely for Gemini 3 Pro. 24 | 25 | ### Basic Example 26 | 27 | ```python 28 | from google import genai 29 | from google.genai import types 30 | import os 31 | 32 | client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"]) 33 | 34 | response = client.models.generate_content( 35 | model="gemini-3-pro-preview", 36 | contents="Explain the concept of quantum entanglement to a 5-year-old.", 37 | config=types.GenerateContentConfig( 38 | thinking_level="HIGH", # Options: "LOW", "HIGH" (default) 39 | ) 40 | ) 41 | 42 | print(response.text) 43 | ``` 44 | 45 | ## 3. Building an Agent (Tool Use) 46 | 47 | Gemini 3 Pro supports advanced tool use (function calling). You define Python functions, pass them to the model, and the model decides when to call them. 48 | 49 | ### Step-by-Step Agent Example 50 | 51 | ```python 52 | from google import genai 53 | from google.genai import types 54 | import os 55 | 56 | # 1. Define the tools 57 | def get_weather(location: str): 58 | """Get the current weather for a given location.""" 59 | # In a real app, call a weather API here 60 | return {"location": location, "temperature": "72", "condition": "Sunny"} 61 | 62 | def get_stock_price(ticker: str): 63 | """Get the current stock price for a given ticker symbol.""" 64 | # In a real app, call a stock API here 65 | return {"ticker": ticker, "price": "150.25", "currency": "USD"} 66 | 67 | # 2. Initialize Client 68 | client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"]) 69 | 70 | # 3. Create the tool configuration 71 | tools = [get_weather, get_stock_price] 72 | 73 | # 4. Run the model with tools 74 | response = client.models.generate_content( 75 | model="gemini-3-pro-preview", 76 | contents="What's the weather in New York and how is Google's stock doing?", 77 | config=types.GenerateContentConfig( 78 | tools=tools, 79 | thinking_level="HIGH" 80 | ) 81 | ) 82 | 83 | # 5. Handle the response (Automatic function calling is handled by the SDK in many cases, 84 | # but here is how you inspect the tool calls if you need to execute them manually or debug) 85 | for part in response.candidates[0].content.parts: 86 | if part.function_call: 87 | print(f"Model requested tool: {part.function_call.name}") 88 | print(f"Arguments: {part.function_call.args}") 89 | 90 | # Execute the tool (simplified logic) 91 | tool_name = part.function_call.name 92 | tool_args = part.function_call.args 93 | 94 | if tool_name == "get_weather": 95 | result = get_weather(**tool_args) 96 | elif tool_name == "get_stock_price": 97 | result = get_stock_price(**tool_args) 98 | 99 | print(f"Tool Result: {result}") 100 | ``` 101 | 102 | > [!TIP] 103 | > For a fully autonomous agent, you would feed the tool results back into the model in a loop until the model generates a final text response. 104 | 105 | ## 4. Cost Tracking & Pricing 106 | 107 | Gemini 3 Pro pricing is token-based. To track costs accurately, you must inspect the `usage_metadata` in the response. 108 | 109 | ### Pricing (Preview Rates) 110 | * **Input**: ~$2.00 / 1 million tokens (for prompts < 200k tokens) 111 | * **Output**: ~$12.00 / 1 million tokens (for prompts < 200k tokens) 112 | 113 | *Note: Prices increase for context windows > 200k tokens. Always check the [official pricing page](https://ai.google.dev/pricing) for the latest numbers.* 114 | 115 | ### Tracking Tokens Programmatically 116 | 117 | The response object contains a detailed breakdown of token usage, including the new `thoughts_token_count`. 118 | 119 | ```python 120 | # ... after generating response ... 121 | 122 | usage = response.usage_metadata 123 | 124 | print(f"Input Tokens: {usage.prompt_token_count}") 125 | print(f"Output Tokens (Candidates): {usage.candidates_token_count}") 126 | print(f"Thinking Tokens: {usage.thoughts_token_count}") 127 | print(f"Total Tokens: {usage.total_token_count}") 128 | 129 | # Simple Cost Calculator (Estimation) 130 | input_cost = (usage.prompt_token_count / 1_000_000) * 2.00 131 | output_cost = (usage.total_token_count - usage.prompt_token_count) / 1_000_000 * 12.00 # Note: Thinking tokens are billed as output 132 | total_cost = input_cost + output_cost 133 | 134 | print(f"Estimated Cost: ${total_cost:.6f}") 135 | ``` 136 | 137 | > [!WARNING] 138 | > **Thinking Tokens are Billed as Output**: The `thoughts_token_count` is part of the generated output and is billed at the output token rate. High thinking levels will significantly increase your output token usage and cost. 139 | 140 | ## 5. Summary of Key Differences 141 | 142 | | Feature | Gemini 1.5 Pro | Gemini 3 Pro | 143 | | :--- | :--- | :--- | 144 | | **Reasoning** | Standard | **Advanced (Thinking Process)** | 145 | | **Thinking Control** | N/A | `thinking_level="LOW" | "HIGH"` | 146 | | **Token Metadata** | Standard | Includes `thoughts_token_count` | 147 | | **SDK Requirement** | Older versions OK | Requires `google-genai >= 1.51.0` | 148 | -------------------------------------------------------------------------------- /insights.py: -------------------------------------------------------------------------------- 1 | """Lightweight sidebar summarizer for streaming agent thoughts. 2 | 3 | This helper stays **separate** from the main agents/orchestrator logic. 4 | It only consumes the recent public transcript (last ~5 steps) and asks a 5 | cheaper Gemini model (no thinking mode) to condense it into a tiny finding 6 | plus an optional chart spec the frontend can render. 7 | """ 8 | 9 | from __future__ import annotations 10 | 11 | import json 12 | import os 13 | import logging 14 | from typing import Any, Dict, List, Optional 15 | 16 | from google import genai 17 | from google.genai import types 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | _client: Optional[genai.Client] = None 23 | 24 | 25 | def _get_client() -> genai.Client: 26 | """Lazily create a single Gemini client (re-used across requests).""" 27 | 28 | global _client 29 | if _client is None: 30 | api_key = os.environ.get("GOOGLE_API_KEY") 31 | if not api_key: 32 | raise RuntimeError("GOOGLE_API_KEY is not set") 33 | _client = genai.Client(api_key=api_key) 34 | return _client 35 | 36 | 37 | def _build_prompt(history: List[Dict[str, str]]) -> str: 38 | """Format the last few steps into a compact textual context.""" 39 | 40 | lines: List[str] = [] 41 | for item in history[-5:]: # hard cap: last 5 turns only 42 | role = (item.get("type") or "text").upper() 43 | content = (item.get("content") or "").strip() 44 | # Trim individual snippets to keep context small and cheap 45 | if len(content) > 1600: 46 | content = content[:1600] + "\n...[truncated]" 47 | lines.append(f"[{role}]\n{content}") 48 | 49 | return "\n\n".join(lines) 50 | 51 | 52 | def summarize_agent_findings( 53 | agent_id: str, 54 | history: List[Dict[str, str]], 55 | ) -> Dict[str, Any]: 56 | """Return a JSON-friendly finding + optional chart for a single agent. 57 | 58 | Args: 59 | agent_id: Identifier of the sub-agent (for logging only). 60 | history: List of dicts with at least ``type`` and ``content`` keys. 61 | Only the 5 most recent entries are used. 62 | 63 | Returns: 64 | {"summary": str, "chart": Optional[dict]} 65 | """ 66 | prompt = _build_prompt(history) 67 | 68 | if not prompt.strip(): 69 | return {"summary": "Waiting for agent output...", "chart": None} 70 | 71 | system_instruction = ( 72 | "You distill an autonomous research agent's most recent scratch notes " 73 | "into crisp sidebar findings. Keep it short (<=120 words), prefer " 74 | "bullets, surface concrete numbers, and call out the next action.\n" 75 | "If you can see numeric progressions (loss/accuracy/score vs step), " 76 | "add a compact chart spec. Use simple types only: line or bar.\n" 77 | "Respond as JSON with keys: summary (markdown-safe string) and optional " 78 | "chart. Chart shape: {\"title\": str, \"type\": \"line\"|\"bar\", " 79 | "\"labels\": [str], \"series\":[{\"name\": str, \"values\": [number]}]}. " 80 | "Omit chart if no numeric series are present." 81 | ) 82 | 83 | client = _get_client() 84 | 85 | try: 86 | response = client.models.generate_content( 87 | model="gemini-3-pro-preview", # cheaper, no thinking mode 88 | contents=[ 89 | types.Content( 90 | role="user", 91 | parts=[types.Part.from_text(text=prompt)], 92 | ) 93 | ], 94 | config=types.GenerateContentConfig( 95 | system_instruction=system_instruction, 96 | temperature=0.2, 97 | max_output_tokens=4000, 98 | ), 99 | ) 100 | except Exception as e: 101 | logger.error("Gemini summarize failed for agent %s: %s", agent_id, e) 102 | raise 103 | 104 | raw_text = "" 105 | try: 106 | # Prefer the convenience accessor if available 107 | raw_text = getattr(response, "text", "") or "" 108 | if not raw_text: 109 | candidate = response.candidates[0] 110 | if candidate.content and candidate.content.parts: 111 | for part in candidate.content.parts: 112 | if getattr(part, "text", None): 113 | raw_text += part.text 114 | elif getattr(part, "inline_data", None) and getattr(part.inline_data, "data", None): 115 | try: 116 | raw_text += part.inline_data.data.decode("utf-8", errors="ignore") 117 | except Exception: 118 | pass 119 | raw_text = raw_text.strip() 120 | except Exception as e: 121 | logger.warning("Failed to extract text for agent %s: %s", agent_id, e) 122 | 123 | result: Dict[str, Any] 124 | try: 125 | result = json.loads(raw_text) 126 | except Exception as json_err: 127 | logger.debug( 128 | "summarize_agent: json decode failed for agent=%s err=%s raw_sample=%s", 129 | agent_id, 130 | json_err, 131 | (raw_text[:200] + ("..." if len(raw_text) > 200 else "")), 132 | ) 133 | # Heuristic: try to salvage a JSON-ish blob between the first { and last } 134 | salvaged = None 135 | if "{" in raw_text and "}" in raw_text: 136 | candidate_blob = raw_text[raw_text.find("{") : raw_text.rfind("}") + 1] 137 | try: 138 | salvaged = json.loads(candidate_blob) 139 | except Exception: 140 | pass 141 | 142 | if salvaged and isinstance(salvaged, dict): 143 | result = salvaged 144 | else: 145 | # Fallback: treat the raw text as the summary string. 146 | result = {"summary": raw_text or "No summary produced", "chart": None} 147 | 148 | # Ensure required fields exist and are JSON-serializable 149 | if "summary" not in result or not isinstance(result.get("summary"), str): 150 | result["summary"] = raw_text or "No summary produced" 151 | if "chart" in result and result["chart"] is not None: 152 | if not isinstance(result["chart"], dict): 153 | result["chart"] = None 154 | 155 | # Trim overly verbose summaries so the rail stays tight 156 | if result.get("summary") and len(result["summary"]) > 800: 157 | result["summary"] = result["summary"][:800] + "..." 158 | 159 | return result 160 | -------------------------------------------------------------------------------- /frontend/src/lib/api.ts: -------------------------------------------------------------------------------- 1 | // In production (Railway), the API is served from the same origin 2 | // In development, we use localhost:8000 3 | export const API_BASE_URL = import.meta.env.DEV ? "http://localhost:8000" : ""; 4 | 5 | // LocalStorage key for user credentials 6 | const CREDENTIALS_STORAGE_KEY = "ai_researcher_credentials"; 7 | 8 | export interface UserCredentials { 9 | google_api_key?: string; 10 | anthropic_api_key?: string; 11 | modal_token_id?: string; 12 | modal_token_secret?: string; 13 | } 14 | 15 | export interface SingleExperimentRequest { 16 | task: string; 17 | gpu?: string; 18 | model?: string; 19 | test_mode?: boolean; 20 | credentials?: UserCredentials; 21 | } 22 | 23 | export interface OrchestratorExperimentRequest { 24 | task: string; 25 | gpu?: string; 26 | model?: string; 27 | num_agents: number; 28 | max_rounds: number; 29 | max_parallel: number; 30 | test_mode?: boolean; 31 | credentials?: UserCredentials; 32 | } 33 | 34 | export type ExperimentRequest = 35 | | SingleExperimentRequest 36 | | OrchestratorExperimentRequest; 37 | 38 | export interface LogEvent { 39 | type: "line" | "summary"; 40 | stream?: "stdout" | "stderr"; 41 | timestamp: string; 42 | raw?: string; 43 | plain?: string; 44 | exit_code?: number; 45 | duration_seconds?: number; 46 | } 47 | 48 | export interface ChartSeries { 49 | name: string; 50 | values: number[]; 51 | } 52 | 53 | export interface ChartSpec { 54 | title?: string; 55 | type: "line" | "bar"; 56 | labels: string[]; 57 | series: ChartSeries[]; 58 | } 59 | 60 | export interface AgentSummaryRequest { 61 | agent_id: string; 62 | history: { type: "thought" | "code" | "result" | "text"; content: string }[]; 63 | } 64 | 65 | export interface AgentSummaryResponse { 66 | summary: string; 67 | chart?: ChartSpec | null; 68 | } 69 | 70 | export interface CredentialStatus { 71 | hasGoogleApiKey: boolean; 72 | hasAnthropicApiKey: boolean; 73 | hasModalToken: boolean; 74 | } 75 | 76 | export interface CredentialUpdatePayload { 77 | googleApiKey?: string; 78 | anthropicApiKey?: string; 79 | modalTokenId?: string; 80 | modalTokenSecret?: string; 81 | } 82 | 83 | // --------------------------------------------------------------------------- 84 | // Local credential storage (browser localStorage) 85 | // --------------------------------------------------------------------------- 86 | 87 | export function getStoredCredentials(): UserCredentials { 88 | try { 89 | const stored = localStorage.getItem(CREDENTIALS_STORAGE_KEY); 90 | if (stored) { 91 | return JSON.parse(stored); 92 | } 93 | } catch (e) { 94 | console.warn("Failed to read stored credentials:", e); 95 | } 96 | return {}; 97 | } 98 | 99 | export function storeCredentials(creds: CredentialUpdatePayload): void { 100 | const toStore: UserCredentials = { 101 | google_api_key: creds.googleApiKey, 102 | anthropic_api_key: creds.anthropicApiKey, 103 | modal_token_id: creds.modalTokenId, 104 | modal_token_secret: creds.modalTokenSecret, 105 | }; 106 | // Only store non-empty values 107 | const filtered = Object.fromEntries( 108 | Object.entries(toStore).filter(([, v]) => v && v.trim()) 109 | ); 110 | localStorage.setItem(CREDENTIALS_STORAGE_KEY, JSON.stringify(filtered)); 111 | } 112 | 113 | export function getLocalCredentialStatus(): CredentialStatus { 114 | const creds = getStoredCredentials(); 115 | return { 116 | hasGoogleApiKey: Boolean(creds.google_api_key?.trim()), 117 | hasAnthropicApiKey: Boolean(creds.anthropic_api_key?.trim()), 118 | hasModalToken: Boolean(creds.modal_token_id?.trim() && creds.modal_token_secret?.trim()), 119 | }; 120 | } 121 | 122 | // --------------------------------------------------------------------------- 123 | // API functions 124 | // --------------------------------------------------------------------------- 125 | 126 | export async function streamExperiment( 127 | endpoint: "/api/experiments/single/stream" | "/api/experiments/orchestrator/stream", 128 | payload: ExperimentRequest, 129 | onData: (data: LogEvent) => void, 130 | onError: (error: Error) => void, 131 | onComplete: () => void 132 | ) { 133 | try { 134 | // Attach stored credentials to the request 135 | const credentials = getStoredCredentials(); 136 | const payloadWithCreds = { ...payload, credentials }; 137 | 138 | const response = await fetch(`${API_BASE_URL}${endpoint}`, { 139 | method: "POST", 140 | headers: { 141 | "Content-Type": "application/json", 142 | }, 143 | body: JSON.stringify(payloadWithCreds), 144 | }); 145 | 146 | if (!response.ok) { 147 | throw new Error(`API Error: ${response.status} ${response.statusText}`); 148 | } 149 | 150 | if (!response.body) { 151 | throw new Error("No response body"); 152 | } 153 | 154 | const reader = response.body.getReader(); 155 | const decoder = new TextDecoder(); 156 | let buffer = ""; 157 | 158 | while (true) { 159 | const { done, value } = await reader.read(); 160 | if (done) break; 161 | 162 | buffer += decoder.decode(value, { stream: true }); 163 | const lines = buffer.split("\n"); 164 | buffer = lines.pop() || ""; 165 | 166 | for (const line of lines) { 167 | if (!line.trim()) continue; 168 | try { 169 | const event = JSON.parse(line); 170 | onData(event); 171 | } catch (e) { 172 | console.warn("Failed to parse JSON line:", line, e); 173 | } 174 | } 175 | } 176 | 177 | onComplete(); 178 | } catch (error) { 179 | onError(error instanceof Error ? error : new Error(String(error))); 180 | } 181 | } 182 | 183 | export async function summarizeAgent(payload: AgentSummaryRequest): Promise { 184 | const response = await fetch(`${API_BASE_URL}/api/agents/summarize`, { 185 | method: "POST", 186 | headers: { "Content-Type": "application/json" }, 187 | body: JSON.stringify(payload), 188 | }); 189 | 190 | if (!response.ok) { 191 | throw new Error(`Summarizer error: ${response.status} ${response.statusText}`); 192 | } 193 | 194 | const data = (await response.json()) as AgentSummaryResponse; 195 | return data; 196 | } 197 | 198 | export async function fetchCredentialStatus(): Promise { 199 | // In multi-user mode, we check localStorage instead of server 200 | // Server credentials are only used as fallback 201 | const localStatus = getLocalCredentialStatus(); 202 | 203 | // Also check server for fallback (e.g., if server has env vars set) 204 | try { 205 | const response = await fetch(`${API_BASE_URL}/api/credentials/status`); 206 | if (response.ok) { 207 | const data = await response.json(); 208 | return { 209 | hasGoogleApiKey: localStatus.hasGoogleApiKey || Boolean(data.has_google_api_key), 210 | hasAnthropicApiKey: localStatus.hasAnthropicApiKey || Boolean(data.has_anthropic_api_key), 211 | hasModalToken: localStatus.hasModalToken || Boolean(data.has_modal_token), 212 | }; 213 | } 214 | } catch { 215 | // Server check failed, just use local status 216 | } 217 | 218 | return localStatus; 219 | } 220 | 221 | export async function saveCredentials(payload: CredentialUpdatePayload): Promise { 222 | // Store credentials locally in the browser 223 | storeCredentials(payload); 224 | 225 | // Return the new status based on what we just stored + what was already there 226 | return fetchCredentialStatus(); 227 | } 228 | -------------------------------------------------------------------------------- /frontend/src/components/Notebook/AgentNotebook.tsx: -------------------------------------------------------------------------------- 1 | import { useEffect, useRef, useState } from "react"; 2 | import { createPortal } from "react-dom"; 3 | import { Maximize2, Minimize2 } from "lucide-react"; 4 | import { AgentState } from "@/lib/useExperiment"; 5 | import { NotebookCell } from "./NotebookCell"; 6 | import { StatusBadge } from "../StatusBadge"; 7 | import { cn } from "@/lib/utils"; 8 | 9 | interface AgentNotebookProps { 10 | agent: AgentState; 11 | } 12 | 13 | function AgentNotebookContent({ 14 | agent, 15 | isExpanded, 16 | onToggleExpand, 17 | isModal = false 18 | }: { 19 | agent: AgentState; 20 | isExpanded: boolean; 21 | onToggleExpand: () => void; 22 | isModal?: boolean; 23 | }) { 24 | const scrollRef = useRef(null); 25 | const autoScrollEnabledRef = useRef(true); 26 | const isProgrammaticScrollRef = useRef(false); 27 | const timeoutRef = useRef(null); 28 | 29 | const scrollToBottom = () => { 30 | if (scrollRef.current) { 31 | isProgrammaticScrollRef.current = true; 32 | scrollRef.current.scrollTop = scrollRef.current.scrollHeight; 33 | // Small timeout to ensure the onScroll event fired by this change 34 | // is ignored by our handler. 35 | setTimeout(() => { 36 | isProgrammaticScrollRef.current = false; 37 | }, 50); 38 | } 39 | }; 40 | 41 | // Auto-scroll effect 42 | useEffect(() => { 43 | // We depend on the entire agent object to catch streaming updates 44 | if (autoScrollEnabledRef.current) { 45 | scrollToBottom(); 46 | } 47 | }, [agent]); 48 | 49 | const handleScroll = () => { 50 | if (!scrollRef.current) return; 51 | 52 | // Ignore scroll events triggered by our auto-scroll 53 | if (isProgrammaticScrollRef.current) { 54 | return; 55 | } 56 | 57 | const { scrollTop, scrollHeight, clientHeight } = scrollRef.current; 58 | const isAtBottom = scrollHeight - scrollTop - clientHeight < 50; 59 | 60 | if (isAtBottom) { 61 | // User is at the bottom, resume auto-scroll 62 | autoScrollEnabledRef.current = true; 63 | if (timeoutRef.current) { 64 | clearTimeout(timeoutRef.current); 65 | timeoutRef.current = null; 66 | } 67 | } else { 68 | // User scrolled away 69 | autoScrollEnabledRef.current = false; 70 | 71 | // Set/Reset 10s timeout to resume auto-scroll 72 | if (timeoutRef.current) { 73 | clearTimeout(timeoutRef.current); 74 | } 75 | 76 | timeoutRef.current = setTimeout(() => { 77 | autoScrollEnabledRef.current = true; 78 | // Optional: snap back to bottom immediately when timer fires? 79 | // The requirement says "goes back to auto-scroll-to-bottom", 80 | // which we interpret as re-enabling the behavior. 81 | // We'll also snap to bottom to make it clear the mode is back. 82 | scrollToBottom(); 83 | }, 10000); 84 | } 85 | }; 86 | 87 | return ( 88 |
92 | {/* Header - Ultra Minimal */} 93 |
94 |
95 | 96 | Agent {agent.id} 97 | 98 |
99 | 100 | {agent.gpu || "CPU"} 101 | 102 |
103 |
104 | 105 | 111 |
112 |
113 | 114 | {/* Hypothesis - Clean & Typography focused */} 115 | {agent.hypothesis && ( 116 |
117 |
118 | Objective 119 |
120 |
124 | {agent.hypothesis} 125 |
126 |
127 | )} 128 | 129 | {/* Notebook Content */} 130 |
138 |
142 | {agent.steps.length === 0 ? ( 143 |
144 |
145 | Initializing Environment 146 |
147 | ) : ( 148 | agent.steps.map((step) => ( 149 | 150 | )) 151 | )} 152 |
153 |
154 |
155 | ); 156 | } 157 | 158 | export function AgentNotebook({ agent }: AgentNotebookProps) { 159 | const [isExpanded, setIsExpanded] = useState(false); 160 | 161 | // Effect to handle body scroll locking when expanded 162 | useEffect(() => { 163 | if (isExpanded) { 164 | document.body.style.overflow = "hidden"; 165 | } else { 166 | document.body.style.overflow = "unset"; 167 | } 168 | return () => { 169 | document.body.style.overflow = "unset"; 170 | }; 171 | }, [isExpanded]); 172 | 173 | return ( 174 | <> 175 | {/* Default View */} 176 |
177 | setIsExpanded(!isExpanded)} 181 | /> 182 |
183 | 184 | {/* Expanded Modal View */} 185 | {isExpanded && createPortal( 186 |
187 |
188 | setIsExpanded(!isExpanded)} 192 | isModal={true} 193 | /> 194 |
195 |
, 196 | document.body 197 | )} 198 | 199 | ); 200 | } 201 | -------------------------------------------------------------------------------- /frontend/src/components/FindingsRail.tsx: -------------------------------------------------------------------------------- 1 | import { AgentState, AgentInsight } from "@/lib/useExperiment"; 2 | import { ChartSpec } from "@/lib/api"; 3 | import { formatDistanceToNow } from "date-fns"; 4 | import { cn } from "@/lib/utils"; 5 | import ReactMarkdown from "react-markdown"; 6 | import remarkGfm from "remark-gfm"; 7 | import { motion, AnimatePresence } from "framer-motion"; 8 | 9 | type InsightWithAgent = AgentInsight & { agentId: string; gpu?: string }; 10 | 11 | interface FindingsRailProps { 12 | agents: Record; 13 | } 14 | 15 | function MiniChart({ chart }: { chart: ChartSpec }) { 16 | const width = 260; 17 | const height = 140; // slightly taller to fit axis labels 18 | const padding = { top: 14, right: 10, bottom: 28, left: 46 }; 19 | 20 | const series = chart.series?.[0]; 21 | if (!series || !Array.isArray(series.values) || series.values.length === 0) return null; 22 | 23 | const values = series.values.map((v) => Number(v)).filter((v) => Number.isFinite(v)); 24 | if (values.length === 0) return null; 25 | 26 | const min = Math.min(...values); 27 | const max = Math.max(...values); 28 | const span = max - min || 1; 29 | 30 | const innerWidth = width - padding.left - padding.right; 31 | const innerHeight = height - padding.top - padding.bottom; 32 | 33 | const formatNumber = (val: number) => { 34 | const abs = Math.abs(val); 35 | if (abs >= 1_000_000_000) return `${(val / 1_000_000_000).toFixed(1)}b`; 36 | if (abs >= 1_000_000) return `${(val / 1_000_000).toFixed(1)}m`; 37 | if (abs >= 1_000) return `${(val / 1_000).toFixed(1)}k`; 38 | if (abs >= 100) return val.toFixed(0); 39 | if (abs >= 1) return val.toFixed(2); 40 | return val.toPrecision(2); 41 | }; 42 | 43 | const pts = values.map((v, idx) => { 44 | const x = padding.left + (idx / Math.max(values.length - 1, 1)) * innerWidth; 45 | const y = padding.top + (1 - (v - min) / span) * innerHeight; 46 | return `${x},${y}`; 47 | }); 48 | 49 | // Respect labels if they match the series length; otherwise fall back to indices. 50 | const xLabels = 51 | Array.isArray(chart.labels) && chart.labels.length === values.length 52 | ? chart.labels 53 | : values.map((_, idx) => `${idx + 1}`); 54 | 55 | const xTicks = (() => { 56 | const maxTicks = 4; // keep tiny chart readable 57 | const step = Math.max(1, Math.ceil(xLabels.length / maxTicks)); 58 | const ticks: { idx: number; label: string; x: number }[] = []; 59 | for (let i = 0; i < xLabels.length; i += step) { 60 | const x = padding.left + (i / Math.max(xLabels.length - 1, 1)) * innerWidth; 61 | ticks.push({ idx: i, label: xLabels[i], x }); 62 | } 63 | // Always include the last label for clarity 64 | if (ticks[ticks.length - 1]?.idx !== xLabels.length - 1) { 65 | const i = xLabels.length - 1; 66 | const x = padding.left + (i / Math.max(xLabels.length - 1, 1)) * innerWidth; 67 | ticks.push({ idx: i, label: xLabels[i], x }); 68 | } 69 | return ticks; 70 | })(); 71 | 72 | const yTicks = [0, 0.5, 1].map((t) => ({ 73 | value: min + span * t, 74 | y: padding.top + (1 - t) * innerHeight, 75 | })); 76 | 77 | const bars = chart.type === "bar"; 78 | 79 | return ( 80 |
81 |
82 | {chart.title || "Signal"} 83 | {series.name || "metric"} 84 |
85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | {/* Grid + axes */} 95 | 96 | {yTicks.map(({ y }, i) => ( 97 | 98 | ))} 99 | 100 | 101 | 102 | 103 | {/* Y-axis labels */} 104 | 105 | {yTicks.map(({ y, value }, i) => ( 106 | 107 | {formatNumber(value)} 108 | 109 | ))} 110 | 111 | 112 | {/* X-axis labels */} 113 | 114 | {xTicks.map(({ x, label }, i) => ( 115 | 116 | {label} 117 | 118 | ))} 119 | 120 | 121 | {bars ? ( 122 | values.map((v, idx) => { 123 | const barWidth = innerWidth / Math.max(values.length * 1.4, 1); 124 | const x = padding.left + idx * (innerWidth / Math.max(values.length - 1, 1)); 125 | const y = padding.top + (1 - (v - min) / span) * innerHeight; 126 | const h = height - padding.bottom - y; 127 | return ( 128 | 137 | ); 138 | }) 139 | ) : ( 140 | 148 | )} 149 | 150 |
151 | ); 152 | } 153 | 154 | export function FindingsRail({ agents }: FindingsRailProps) { 155 | const insights: InsightWithAgent[] = Object.values(agents) 156 | .flatMap((agent) => (agent.insights || []).map((insight) => ({ ...insight, agentId: agent.id, gpu: agent.gpu }))) 157 | .sort((a, b) => b.timestamp - a.timestamp) 158 | .slice(0, 24); // Cap to avoid ever-growing rail 159 | 160 | return ( 161 | 162 | {insights.length > 0 && ( 163 | 170 |
171 |
172 |
173 |

Findings

174 |

Live distillations of sub-agents—posted whenever they think.

175 |
176 | 177 |
178 | {insights.map((insight) => ( 179 |
186 |
187 | Agent {insight.agentId} 188 | 189 | {formatDistanceToNow(new Date(insight.timestamp), { addSuffix: true })} 190 | 191 |
192 |
193 | 194 | {insight.summary} 195 | 196 |
197 | {insight.chart && insight.chart.series?.length ? ( 198 |
199 | 200 |
201 | ) : null} 202 |
203 | ))} 204 |
205 |
206 | 207 | )} 208 | 209 | ); 210 | } 211 | -------------------------------------------------------------------------------- /frontend/src/components/CredentialPrompt.tsx: -------------------------------------------------------------------------------- 1 | import { CredentialStatus } from "@/lib/api"; 2 | import { AnimatePresence, motion } from "framer-motion"; 3 | import { ExternalLink, KeyRound, Loader2, ShieldCheck, Sparkles, X } from "lucide-react"; 4 | 5 | export type CredentialFormState = { 6 | googleApiKey: string; 7 | anthropicApiKey: string; 8 | modalTokenId: string; 9 | modalTokenSecret: string; 10 | }; 11 | 12 | type CredentialPromptProps = { 13 | open: boolean; 14 | status: CredentialStatus | null; 15 | selectedModel: "gemini-3-pro-preview" | "claude-opus-4-5"; 16 | form: CredentialFormState; 17 | onChange: (field: keyof CredentialFormState, value: string) => void; 18 | onSubmit: () => void; 19 | onClose: () => void; 20 | isSaving: boolean; 21 | error?: string | null; 22 | }; 23 | 24 | export function CredentialPrompt({ 25 | open, 26 | status, 27 | selectedModel, 28 | form, 29 | onChange, 30 | onSubmit, 31 | onClose, 32 | isSaving, 33 | error, 34 | }: CredentialPromptProps) { 35 | const googleReady = !!status?.hasGoogleApiKey; 36 | const anthropicReady = !!status?.hasAnthropicApiKey; 37 | const modalReady = !!status?.hasModalToken; 38 | 39 | // Determine which key is needed based on selected model 40 | const needsGoogleKey = selectedModel === "gemini-3-pro-preview"; 41 | const needsAnthropicKey = selectedModel === "claude-opus-4-5"; 42 | 43 | const requiredKeyName = needsGoogleKey ? "Google API key" : "Anthropic API key"; 44 | const hasRequiredKey = needsGoogleKey ? googleReady : anthropicReady; 45 | 46 | const readinessCopy = 47 | hasRequiredKey && modalReady 48 | ? "All set — keys already saved locally." 49 | : `Needed: ${[ 50 | !hasRequiredKey ? requiredKeyName : null, 51 | modalReady ? null : "Modal token (id + secret)", 52 | ] 53 | .filter(Boolean) 54 | .join(" + ")}`; 55 | 56 | const googleProvided = !!form.googleApiKey.trim(); 57 | const anthropicProvided = !!form.anthropicApiKey.trim(); 58 | const modalProvided = !!form.modalTokenId.trim() && !!form.modalTokenSecret.trim(); 59 | 60 | // Check if the required key for the selected model is available or provided 61 | const hasRequiredLLMKey = needsGoogleKey 62 | ? (googleReady || googleProvided) 63 | : (anthropicReady || anthropicProvided); 64 | const hasModalCredentials = modalReady || modalProvided; 65 | 66 | const disableSubmit = 67 | isSaving || !hasRequiredLLMKey || !hasModalCredentials; 68 | 69 | return ( 70 | 71 | {open && ( 72 | 78 |
79 | 86 |
87 |
88 |
89 | 90 |
91 |
92 |
93 |
94 | 95 | Environment check 96 |
97 |

98 | Add your API keys to launch the run 99 |

100 |

101 | We need at least one LLM key (Google for Gemini or Anthropic for Claude) and a Modal token pair to spin up research sandboxes. Keys are stored locally in your .env. 102 |

103 |
104 | 111 |
112 | 113 |
114 |
115 |
116 | 117 |
118 |

Credentials status

119 |

120 | {hasRequiredKey && modalReady 121 | ? "Ready to launch." 122 | : "Add the missing keys to continue."} 123 |

124 |
125 |
126 | 127 |
128 | 133 | 138 | 139 |
140 | 141 | 173 |
174 | 175 |
176 | onChange("googleApiKey", value)} 181 | status={googleReady ? "ok" : needsGoogleKey ? "missing" : "optional"} 182 | helper="Used for Gemini 3 Pro (stored locally)." 183 | /> 184 | onChange("anthropicApiKey", value)} 189 | status={anthropicReady ? "ok" : needsAnthropicKey ? "missing" : "optional"} 190 | helper="Used for Claude Opus 4.5 (stored locally)." 191 | /> 192 | onChange("modalTokenId", value)} 197 | status={modalReady ? "ok" : "missing"} 198 | helper="Pair with the secret to deploy sandboxes." 199 | /> 200 | onChange("modalTokenSecret", value)} 205 | status={modalReady ? "ok" : "missing"} 206 | helper="Kept locally in your .env file." 207 | /> 208 | 209 | {error && ( 210 |
211 | {error} 212 |
213 | )} 214 | 215 |
216 |

{readinessCopy}

217 | 234 |
235 |
236 |
237 |
238 | 239 | 240 | )} 241 | 242 | ); 243 | } 244 | 245 | type StatusPillProps = { 246 | label: string; 247 | ok: boolean; 248 | required?: boolean; 249 | }; 250 | 251 | function StatusPill({ label, ok, required = true }: StatusPillProps) { 252 | // If not required and not ok, show as "optional" (gray/neutral) 253 | const isOptional = !required && !ok; 254 | 255 | return ( 256 |
257 | {label} 258 | 267 |
276 | {ok ? "Ready" : isOptional ? "Optional" : "Missing"} 277 | 278 |
279 | ); 280 | } 281 | 282 | type FieldProps = { 283 | label: string; 284 | placeholder: string; 285 | value: string; 286 | onChange: (value: string) => void; 287 | status: "ok" | "missing" | "optional"; 288 | helper?: string; 289 | }; 290 | 291 | function Field({ label, placeholder, value, onChange, status, helper }: FieldProps) { 292 | const statusText = status === "ok" ? "Optional (already set)" : status === "optional" ? "Optional" : "Required"; 293 | const statusColor = status === "ok" ? "text-emerald-300" : status === "optional" ? "text-blue-300" : "text-amber-200"; 294 | 295 | return ( 296 | 311 | ); 312 | } 313 | -------------------------------------------------------------------------------- /frontend/src/components/Notebook/ResearchPaper.tsx: -------------------------------------------------------------------------------- 1 | import { useMemo, useState } from "react"; 2 | import { createPortal } from "react-dom"; 3 | import { FileText, X, Download, Printer, BookOpen } from "lucide-react"; 4 | import ReactMarkdown from "react-markdown"; 5 | import remarkGfm from "remark-gfm"; 6 | import { ChartSpec } from "@/lib/api"; 7 | 8 | interface ResearchPaperProps { 9 | content: string; 10 | charts?: ChartSpec[]; 11 | } 12 | 13 | function PaperChart({ chart, compact = false }: { chart: ChartSpec; compact?: boolean }) { 14 | const width = compact ? 340 : 640; 15 | const height = compact ? 140 : 240; 16 | const padding = compact 17 | ? { top: 14, right: 10, bottom: 28, left: 46 } 18 | : { top: 18, right: 18, bottom: 42, left: 64 }; 19 | 20 | const series = chart.series?.[0]; 21 | const values = useMemo( 22 | () => (series?.values || []).map((v) => Number(v)).filter((v) => Number.isFinite(v)), 23 | [series], 24 | ); 25 | if (!series || !values.length) return null; 26 | 27 | const min = Math.min(...values); 28 | const max = Math.max(...values); 29 | const span = max - min || 1; 30 | 31 | const innerWidth = width - padding.left - padding.right; 32 | const innerHeight = height - padding.top - padding.bottom; 33 | 34 | const formatNumber = (val: number) => { 35 | const abs = Math.abs(val); 36 | if (abs >= 1_000_000_000) return `${(val / 1_000_000_000).toFixed(1)}b`; 37 | if (abs >= 1_000_000) return `${(val / 1_000_000).toFixed(1)}m`; 38 | if (abs >= 1_000) return `${(val / 1_000).toFixed(1)}k`; 39 | if (abs >= 100) return val.toFixed(0); 40 | if (abs >= 1) return val.toFixed(2); 41 | return val.toPrecision(2); 42 | }; 43 | 44 | const xLabels = 45 | Array.isArray(chart.labels) && chart.labels.length === values.length 46 | ? chart.labels 47 | : values.map((_, idx) => `${idx + 1}`); 48 | 49 | const xTicks = useMemo(() => { 50 | const maxTicks = compact ? 4 : 6; 51 | const step = Math.max(1, Math.ceil(xLabels.length / maxTicks)); 52 | const ticks: { idx: number; label: string; x: number }[] = []; 53 | for (let i = 0; i < xLabels.length; i += step) { 54 | const x = padding.left + (i / Math.max(xLabels.length - 1, 1)) * innerWidth; 55 | ticks.push({ idx: i, label: xLabels[i], x }); 56 | } 57 | if (ticks[ticks.length - 1]?.idx !== xLabels.length - 1) { 58 | const i = xLabels.length - 1; 59 | const x = padding.left + (i / Math.max(xLabels.length - 1, 1)) * innerWidth; 60 | ticks.push({ idx: i, label: xLabels[i], x }); 61 | } 62 | return ticks; 63 | }, [xLabels, padding.left, innerWidth, compact]); 64 | 65 | const yTicks = [0, 0.5, 1].map((t) => ({ 66 | value: min + span * t, 67 | y: padding.top + (1 - t) * innerHeight, 68 | })); 69 | 70 | const points = values.map((v, idx) => { 71 | const x = padding.left + (idx / Math.max(values.length - 1, 1)) * innerWidth; 72 | const y = padding.top + (1 - (v - min) / span) * innerHeight; 73 | return `${x},${y}`; 74 | }); 75 | 76 | const bars = chart.type === "bar"; 77 | 78 | return ( 79 |
80 |
81 | {chart.title || "Generated Chart"} 82 | 83 | {series.name || "Series"} · {chart.type === "bar" ? "Bar" : "Line"} 84 | 85 |
86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | {/* Grid + axes */} 96 | 97 | {yTicks.map(({ y }, i) => ( 98 | 99 | ))} 100 | 101 | 102 | 103 | 104 | 105 | {yTicks.map(({ y, value }, i) => ( 106 | 107 | {formatNumber(value)} 108 | 109 | ))} 110 | 111 | 112 | 113 | {xTicks.map(({ x, label }, i) => ( 114 | 115 | {label} 116 | 117 | ))} 118 | 119 | 120 | {bars ? ( 121 | values.map((v, idx) => { 122 | const barWidth = innerWidth / Math.max(values.length * 1.15, 1); 123 | const x = padding.left + idx * (innerWidth / Math.max(values.length - 1, 1)); 124 | const y = padding.top + (1 - (v - min) / span) * innerHeight; 125 | const h = height - padding.bottom - y; 126 | return ( 127 | 136 | ); 137 | }) 138 | ) : ( 139 | 147 | )} 148 | 149 |
150 | {chart.title || "Figure"} — {series.name || "metric"} over steps. Labels: {xLabels.slice(0, 3).join(", ")} 151 | {xLabels.length > 3 ? "…" : ""}. 152 |
153 |
154 | ); 155 | } 156 | 157 | export function ResearchPaper({ content, charts }: ResearchPaperProps) { 158 | const [isFullView, setIsFullView] = useState(false); 159 | 160 | return ( 161 | <> 162 | {/* Minimal Inline View */} 163 |
164 |
165 | {/* Header */} 166 |
167 |
168 |
169 | 170 | Final Manuscript 171 | 172 |
173 | 182 |
183 | 184 | {/* Preview Content */} 185 |
186 |
187 | 188 | {content} 189 | 190 |
191 | {charts && charts.length > 0 && ( 192 |
193 |

Figures (preview)

194 |
195 | {charts.slice(0, 3).map((chart, idx) => ( 196 |
197 | 198 |
199 | ))} 200 |
201 |
202 | )} 203 | {/* Gradient Fade */} 204 |
205 |
206 |
207 |
208 | 209 | {/* Full View Overlay */} 210 | {isFullView && createPortal( 211 |
212 |
213 | {/* Clean Header Bar */} 214 |
215 |
216 |
217 |
218 | 219 |
220 | Research Preview 221 |
222 |
223 | Final Draft 224 |
225 |
226 |
227 | 231 | 238 |
239 | 245 |
246 |
247 | 248 | {/* Paper Content Area - "The Page" */} 249 |
250 |
251 |
252 |
253 | {/* Style Overrides for "Paper" look */} 254 | 303 | 304 |

, 308 | }} 309 | > 310 | {content} 311 | 312 | 313 | {charts && charts.length > 0 && ( 314 |
315 |

Figures

316 |
317 | {charts.map((chart, idx) => ( 318 |
319 |
320 | Figure {idx + 1}. {chart.title || "Generated chart"} 321 |
322 | 323 |
324 | ))} 325 |
326 |
327 | )} 328 | 329 | {/* Footer */} 330 |
331 |

Preprint generated by AI Researcher

332 |
333 |

334 |
335 |
336 |
337 |
338 |
, 339 | document.body 340 | )} 341 | 342 | ); 343 | } 344 | -------------------------------------------------------------------------------- /frontend/src/lib/useExperiment.ts: -------------------------------------------------------------------------------- 1 | import { useState, useRef } from "react"; 2 | import { streamExperiment, LogEvent, summarizeAgent, ChartSpec } from "./api"; 3 | 4 | export type StepType = "thought" | "code" | "result" | "text"; 5 | 6 | export interface ExperimentStep { 7 | id: string; 8 | type: StepType; 9 | content: string; 10 | metadata?: Record; 11 | timestamp: number; 12 | } 13 | 14 | export interface AgentInsight { 15 | id: string; 16 | summary: string; 17 | chart?: ChartSpec | null; 18 | timestamp: number; 19 | } 20 | 21 | export interface AgentState { 22 | id: string; 23 | status: "idle" | "running" | "completed" | "failed"; 24 | hypothesis?: string; 25 | gpu?: string; 26 | logs: string[]; 27 | exitCode?: number; 28 | steps: ExperimentStep[]; 29 | insights: AgentInsight[]; 30 | } 31 | 32 | export type TimelineItem = 33 | | { type: "thought"; content: string; timestamp: number } 34 | | { type: "agents"; agentIds: string[]; timestamp: number } 35 | | { type: "paper"; content: string; charts?: ChartSpec[]; timestamp: number }; 36 | 37 | export interface OrchestratorState { 38 | status: "idle" | "planning" | "running" | "completed"; 39 | thoughts: string[]; 40 | plan: string[]; 41 | timeline: TimelineItem[]; 42 | } 43 | 44 | export function useExperiment() { 45 | const [isRunning, setIsRunning] = useState(false); 46 | const [logs, setLogs] = useState([]); 47 | const [agents, setAgents] = useState>({}); 48 | const [orchestrator, setOrchestrator] = useState({ 49 | status: "idle", 50 | thoughts: [], 51 | plan: [], 52 | timeline: [], 53 | }); 54 | const [error, setError] = useState(null); 55 | 56 | // Keep track of the latest agents state to update it functionally 57 | const agentsRef = useRef>({}); 58 | const summaryTimersRef = useRef>({}); 59 | const summaryInflightRef = useRef>({}); 60 | 61 | const updateAgent = (id: string, update: Partial) => { 62 | setAgents((prev) => { 63 | const current = prev[id] || { id, status: "idle", logs: [], steps: [], insights: [] }; 64 | const next = { 65 | ...prev, 66 | [id]: { ...current, ...update }, 67 | }; 68 | agentsRef.current = next; 69 | return next; 70 | }); 71 | }; 72 | 73 | const addAgentStep = (id: string, step: Omit) => { 74 | setAgents((prev) => { 75 | const current = prev[id] || { id, status: "idle", logs: [], steps: [], insights: [] }; 76 | const newStep: ExperimentStep = { 77 | ...step, 78 | id: Math.random().toString(36).substring(7), 79 | timestamp: Date.now(), 80 | }; 81 | 82 | const next = { 83 | ...prev, 84 | [id]: { 85 | ...current, 86 | steps: [...current.steps, newStep], 87 | }, 88 | }; 89 | agentsRef.current = next; 90 | return next; 91 | }); 92 | }; 93 | 94 | const appendToLatestAgentStep = (id: string, type: StepType, chunk: string) => { 95 | setAgents((prev) => { 96 | const current = prev[id]; 97 | if (!current) return prev; 98 | 99 | const steps = [...current.steps]; 100 | 101 | const newStep: ExperimentStep = { 102 | id: Math.random().toString(36).substring(7), 103 | type, 104 | content: chunk, 105 | timestamp: Date.now(), 106 | }; 107 | 108 | // If there are no steps yet, create the first one so we can stream into it. 109 | if (steps.length === 0) { 110 | const next = { 111 | ...prev, 112 | [id]: { 113 | ...current, 114 | steps: [newStep], 115 | }, 116 | }; 117 | agentsRef.current = next; 118 | return next; 119 | } 120 | 121 | const lastStep = steps[steps.length - 1]; 122 | 123 | // If the last step matches the type, append to it. 124 | if (lastStep.type === type) { 125 | steps[steps.length - 1] = { 126 | ...lastStep, 127 | content: lastStep.content + chunk, 128 | }; 129 | } else { 130 | // Fallback: create new step if types mismatch 131 | steps.push(newStep); 132 | } 133 | 134 | const next = { 135 | ...prev, 136 | [id]: { ...current, steps }, 137 | }; 138 | agentsRef.current = next; 139 | return next; 140 | }); 141 | }; 142 | 143 | const addAgentInsight = (id: string, insight: Omit & { id?: string; timestamp?: number }) => { 144 | setAgents((prev) => { 145 | const current = prev[id]; 146 | if (!current) return prev; 147 | 148 | const nextInsight: AgentInsight = { 149 | id: insight.id || Math.random().toString(36).substring(7), 150 | timestamp: insight.timestamp || Date.now(), 151 | summary: insight.summary, 152 | chart: insight.chart, 153 | }; 154 | 155 | const next = { 156 | ...prev, 157 | [id]: { 158 | ...current, 159 | insights: [...(current.insights || []), nextInsight], 160 | }, 161 | }; 162 | 163 | agentsRef.current = next; 164 | return next; 165 | }); 166 | }; 167 | 168 | const appendToLatestOrchestratorStep = (type: "thought" | "text", chunk: string) => { 169 | setOrchestrator((prev) => { 170 | const timeline = [...prev.timeline]; 171 | const lastItem = timeline[timeline.length - 1]; 172 | 173 | // Check if we can append to the last item 174 | if (lastItem && lastItem.type === type) { 175 | timeline[timeline.length - 1] = { 176 | ...lastItem, 177 | content: lastItem.content + chunk 178 | }; 179 | 180 | // Also update thoughts array if it's a thought 181 | let thoughts = prev.thoughts; 182 | if (type === "thought") { 183 | thoughts = [...prev.thoughts]; 184 | if (thoughts.length > 0) { 185 | thoughts[thoughts.length - 1] = thoughts[thoughts.length - 1] + chunk; 186 | } else { 187 | thoughts.push(chunk); 188 | } 189 | } 190 | 191 | return { ...prev, timeline, thoughts }; 192 | } else { 193 | // Create new item 194 | const newItem: TimelineItem = { 195 | type: type as any, // 'text' isn't in TimelineItem type explicitly? let's check 196 | content: chunk, 197 | timestamp: Date.now() 198 | }; 199 | 200 | // TimelineItem is: thought | agents | paper. 201 | // If 'text' is meant to be something else, we might need to adjust. 202 | // But 'thought' is definitely supported. 203 | if (type !== "thought") { 204 | // For now orchestrator only really supports 'thought' and 'paper' and 'agents' in timeline 205 | // If we have general text messages, maybe treat as thoughts or ignore? 206 | // Orchestrator messages are usually shown as 'info' panels in CLI. 207 | // In frontend timeline, we map 'thought' to the Orchestrator block. 208 | // Let's assume 'thought' for now. 209 | return prev; 210 | } 211 | 212 | return { 213 | ...prev, 214 | timeline: [...timeline, newItem], 215 | thoughts: [...prev.thoughts, chunk] 216 | }; 217 | } 218 | }); 219 | }; 220 | 221 | const runAgentSummary = async (agentId: string) => { 222 | // Clear pending timer marker 223 | summaryTimersRef.current[agentId] = null; 224 | 225 | if (summaryInflightRef.current[agentId]) return; 226 | 227 | const agent = agentsRef.current[agentId]; 228 | if (!agent || agent.steps.length === 0) return; 229 | 230 | summaryInflightRef.current[agentId] = true; 231 | 232 | try { 233 | const recentSteps = agent.steps.slice(-5).map((step) => ({ 234 | type: step.type, 235 | content: step.content.slice(-2000), 236 | })); 237 | 238 | const resp = await summarizeAgent({ 239 | agent_id: agentId, 240 | history: recentSteps, 241 | }); 242 | 243 | addAgentInsight(agentId, { 244 | summary: resp.summary, 245 | chart: resp.chart, 246 | }); 247 | } catch (err) { 248 | console.warn("Failed to summarize agent", agentId, err); 249 | } finally { 250 | summaryInflightRef.current[agentId] = false; 251 | } 252 | }; 253 | 254 | const scheduleAgentSummary = (agentId: string) => { 255 | if (!agentId) return; 256 | 257 | // debounce to wait for the end of a thought stream 258 | const timers = summaryTimersRef.current; 259 | if (timers[agentId]) { 260 | clearTimeout(timers[agentId]!); 261 | } 262 | 263 | timers[agentId] = setTimeout(() => runAgentSummary(agentId), 900); 264 | }; 265 | 266 | const startExperiment = async ( 267 | mode: "single" | "orchestrator", 268 | config: { 269 | task: string; 270 | gpu?: string; 271 | model?: string; 272 | num_agents?: number; 273 | max_rounds?: number; 274 | max_parallel?: number; 275 | test_mode?: boolean; 276 | } 277 | ) => { 278 | // Reset any pending sidebar summary timers between runs 279 | Object.values(summaryTimersRef.current).forEach((timer) => timer && clearTimeout(timer)); 280 | summaryTimersRef.current = {}; 281 | summaryInflightRef.current = {}; 282 | 283 | setIsRunning(true); 284 | setError(null); 285 | setAgents({}); 286 | setOrchestrator({ thoughts: [], plan: [], timeline: [], status: "running" }); 287 | 288 | try { 289 | const endpoint = mode === "single" 290 | ? "/api/experiments/single/stream" 291 | : "/api/experiments/orchestrator/stream"; 292 | 293 | const payload = mode === "single" 294 | ? { task: config.task, gpu: config.gpu, model: config.model, test_mode: config.test_mode } 295 | : { 296 | task: config.task, 297 | gpu: config.gpu, 298 | model: config.model, 299 | num_agents: config.num_agents || 3, 300 | max_rounds: config.max_rounds || 3, 301 | max_parallel: config.max_parallel || 2, 302 | test_mode: config.test_mode 303 | }; 304 | 305 | await streamExperiment( 306 | endpoint, 307 | payload, 308 | (event) => { 309 | setLogs((prev) => [...prev, event]); 310 | 311 | // Check for subprocess completion with error 312 | if (event.type === "summary" && event.exit_code !== 0) { 313 | // Find any error messages from recent logs (check both stderr and stdout for tracebacks) 314 | setLogs((prevLogs) => { 315 | // Look for stderr first 316 | const recentStderr = prevLogs 317 | .filter((l) => l.stream === "stderr" && l.plain) 318 | .slice(-10) 319 | .map((l) => l.plain?.trim()) 320 | .filter(Boolean) 321 | .join("\n"); 322 | 323 | // Also check stdout for error-like content (tracebacks, Fatal Error, etc.) 324 | const recentStdout = prevLogs 325 | .filter((l) => l.stream === "stdout" && l.plain && 326 | (l.plain.includes("Error") || l.plain.includes("Traceback") || l.plain.includes("Exception"))) 327 | .slice(-10) 328 | .map((l) => l.plain?.trim()) 329 | .filter(Boolean) 330 | .join("\n"); 331 | 332 | // If no specific error found, show last few lines of any output 333 | const anyOutput = prevLogs 334 | .filter((l) => l.type === "line" && l.plain) 335 | .slice(-5) 336 | .map((l) => l.plain?.trim()) 337 | .filter(Boolean) 338 | .join("\n"); 339 | 340 | const errorMsg = recentStderr || recentStdout || anyOutput || `Process exited with code ${event.exit_code}. No output captured.`; 341 | setError(errorMsg); 342 | return prevLogs; 343 | }); 344 | } 345 | 346 | if (event.type === "line" && event.plain) { 347 | // Check for ::EVENT:: marker 348 | const eventIndex = event.plain.indexOf("::EVENT::"); 349 | if (eventIndex !== -1) { 350 | try { 351 | const jsonStr = event.plain.substring(eventIndex + "::EVENT::".length); 352 | const payload = JSON.parse(jsonStr); 353 | 354 | // Try to extract Agent ID from prefix if present: "[Agent 1] ::EVENT::..." 355 | let inferredAgentId: string | undefined; 356 | const prefix = event.plain.substring(0, eventIndex); 357 | const match = prefix.match(/\[Agent (\d+)\]/); 358 | if (match) { 359 | inferredAgentId = match[1]; 360 | } 361 | 362 | handleStructuredEvent(payload, inferredAgentId); 363 | } catch (e) { 364 | console.warn("Failed to parse structured event:", e); 365 | } 366 | } 367 | } 368 | }, 369 | (err) => { 370 | setError(err.message); 371 | setIsRunning(false); 372 | }, 373 | () => { 374 | setIsRunning(false); 375 | } 376 | ); 377 | } catch (err) { 378 | setError(err instanceof Error ? err.message : "Failed to start experiment"); 379 | setIsRunning(false); 380 | } 381 | }; 382 | 383 | const handleStructuredEvent = (event: any, inferredAgentId?: string) => { 384 | const { type, data } = event; 385 | 386 | switch (type) { 387 | case "AGENT_START": 388 | updateAgent(data.agent_id, { 389 | status: "running", 390 | hypothesis: data.hypothesis, 391 | gpu: data.gpu, 392 | }); 393 | 394 | // Add agent to the current 'agents' timeline item if it exists, or create new one 395 | setOrchestrator((prev) => { 396 | const lastItem = prev.timeline[prev.timeline.length - 1]; 397 | if (lastItem && lastItem.type === "agents") { 398 | // Check if agent is already in the list to avoid dupes 399 | if (lastItem.agentIds.includes(data.agent_id)) { 400 | return prev; 401 | } 402 | // Update the last item in place (immutably) 403 | const newTimeline = [...prev.timeline]; 404 | newTimeline[newTimeline.length - 1] = { 405 | ...lastItem, 406 | agentIds: [...lastItem.agentIds, data.agent_id] 407 | }; 408 | return { ...prev, timeline: newTimeline }; 409 | } else { 410 | // Create new agents group 411 | return { 412 | ...prev, 413 | timeline: [ 414 | ...prev.timeline, 415 | { type: "agents", agentIds: [data.agent_id], timestamp: Date.now() } 416 | ] 417 | }; 418 | } 419 | }); 420 | break; 421 | 422 | case "AGENT_THOUGHT": 423 | if (inferredAgentId) { 424 | addAgentStep(inferredAgentId, { 425 | type: "thought", 426 | content: data.thought, 427 | }); 428 | scheduleAgentSummary(inferredAgentId); 429 | } 430 | break; 431 | 432 | case "AGENT_THOUGHT_STREAM": 433 | if (inferredAgentId && typeof data?.chunk === "string") { 434 | appendToLatestAgentStep(inferredAgentId, "thought", data.chunk); 435 | scheduleAgentSummary(inferredAgentId); 436 | } 437 | break; 438 | 439 | case "AGENT_TOOL": 440 | if (inferredAgentId) { 441 | addAgentStep(inferredAgentId, { 442 | type: "code", 443 | content: `${data.tool}(${JSON.stringify(data.args, null, 2)})`, 444 | metadata: { tool: data.tool, args: data.args }, 445 | }); 446 | } 447 | break; 448 | 449 | case "AGENT_TOOL_RESULT": 450 | if (inferredAgentId) { 451 | // Update the latest step if it's a result block (from streaming), 452 | // otherwise create a new one. 453 | setAgents((prev) => { 454 | const current = prev[inferredAgentId]; 455 | if (!current) return prev; 456 | 457 | const steps = [...current.steps]; 458 | const lastStep = steps[steps.length - 1]; 459 | 460 | if (lastStep && lastStep.type === "result") { 461 | // Update existing result block with the final full content 462 | steps[steps.length - 1] = { 463 | ...lastStep, 464 | content: data.result, 465 | metadata: { ...lastStep.metadata, tool: data.tool } 466 | }; 467 | } else { 468 | // Create new result block 469 | steps.push({ 470 | id: Math.random().toString(36).substring(7), 471 | type: "result", 472 | content: data.result, 473 | metadata: { tool: data.tool }, 474 | timestamp: Date.now(), 475 | }); 476 | } 477 | 478 | return { 479 | ...prev, 480 | [inferredAgentId]: { ...current, steps } 481 | }; 482 | }); 483 | 484 | scheduleAgentSummary(inferredAgentId); 485 | } 486 | break; 487 | 488 | case "AGENT_STREAM": 489 | if (inferredAgentId && typeof data?.chunk === "string") { 490 | // Stream incremental sandbox output into the latest result cell. 491 | // NotebookCell already handles carriage returns (\r) to render 492 | // tqdm-style progress bars cleanly. 493 | appendToLatestAgentStep(inferredAgentId, "result", data.chunk); 494 | } 495 | break; 496 | 497 | case "AGENT_COMPLETE": 498 | updateAgent(data.agent_id, { 499 | status: "completed", 500 | exitCode: data.exit_code, 501 | }); 502 | scheduleAgentSummary(data.agent_id); 503 | break; 504 | 505 | case "ORCH_THOUGHT": 506 | setOrchestrator((prev) => ({ 507 | ...prev, 508 | thoughts: [...prev.thoughts, data.thought], 509 | timeline: [ 510 | ...prev.timeline, 511 | { type: "thought", content: data.thought, timestamp: Date.now() } 512 | ] 513 | })); 514 | break; 515 | 516 | case "ORCH_THOUGHT_STREAM": 517 | if (typeof data?.chunk === "string") { 518 | appendToLatestOrchestratorStep("thought", data.chunk); 519 | } 520 | break; 521 | 522 | case "ORCH_PAPER": 523 | // Capture any charts that agents have produced so we can surface them alongside the paper. 524 | const charts: ChartSpec[] = Object.values(agentsRef.current) 525 | .flatMap((agent) => (agent.insights || []).map((insight) => insight.chart)) 526 | .filter(Boolean) as ChartSpec[]; 527 | 528 | // Deduplicate loosely by title + type + first series name + length of labels. 529 | const seen = new Set(); 530 | const uniqueCharts: ChartSpec[] = []; 531 | for (const chart of charts) { 532 | const key = [ 533 | chart.title || "untitled", 534 | chart.type, 535 | chart.series?.[0]?.name || "series", 536 | chart.labels?.length || 0, 537 | chart.series?.[0]?.values?.length || 0, 538 | ].join("|"); 539 | if (seen.has(key)) continue; 540 | seen.add(key); 541 | uniqueCharts.push(chart); 542 | } 543 | 544 | setOrchestrator((prev) => ({ 545 | ...prev, 546 | timeline: [ 547 | ...prev.timeline, 548 | { 549 | type: "paper", 550 | content: data.content, 551 | charts: uniqueCharts.slice(0, 6), // keep it concise 552 | timestamp: Date.now(), 553 | }, 554 | ], 555 | })); 556 | break; 557 | 558 | case "ORCH_TOOL": 559 | // We could also track orchestrator steps if we wanted a notebook for it 560 | break; 561 | } 562 | }; 563 | 564 | // We need a way to parse the agent ID from the line if it exists. 565 | // The orchestrator prefixes: `[Agent {id}] ` 566 | 567 | const clearError = () => setError(null); 568 | 569 | return { 570 | isRunning, 571 | logs, 572 | agents, 573 | orchestrator, 574 | error, 575 | startExperiment, 576 | clearError, 577 | }; 578 | } 579 | -------------------------------------------------------------------------------- /frontend/src/components/LabNotebook.tsx: -------------------------------------------------------------------------------- 1 | import { useState, useEffect, useRef } from "react"; 2 | import { Loader2, Play } from "lucide-react"; 3 | import { motion } from "framer-motion"; 4 | import { useExperiment } from "@/lib/useExperiment"; 5 | import { FindingsRail } from "./FindingsRail"; 6 | import { AgentNotebook } from "./Notebook/AgentNotebook"; 7 | import { ResearchPaper } from "./Notebook/ResearchPaper"; 8 | import { cn } from "@/lib/utils"; 9 | import { StreamingMarkdown } from "./StreamingMarkdown"; 10 | import { CredentialPrompt, CredentialFormState } from "./CredentialPrompt"; 11 | import { CredentialStatus, fetchCredentialStatus, saveCredentials } from "@/lib/api"; 12 | 13 | type PendingRun = { 14 | mode: "single" | "orchestrator"; 15 | config: { 16 | task: string; 17 | gpu?: string; 18 | model?: string; 19 | num_agents?: number; 20 | max_rounds?: number; 21 | max_parallel?: number; 22 | test_mode?: boolean; 23 | }; 24 | }; 25 | 26 | export function LabNotebook() { 27 | const { isRunning, agents, orchestrator, error: experimentError, startExperiment, clearError } = useExperiment(); 28 | const [task, setTask] = useState(""); 29 | const [mode, setMode] = useState<"single" | "orchestrator">("orchestrator"); 30 | const [testMode, setTestMode] = useState(false); 31 | const bottomRef = useRef(null); 32 | const prevTimelineLengthRef = useRef(0); 33 | const [credentialStatus, setCredentialStatus] = useState(null); 34 | const [credentialForm, setCredentialForm] = useState({ 35 | googleApiKey: "", 36 | anthropicApiKey: "", 37 | modalTokenId: "", 38 | modalTokenSecret: "", 39 | }); 40 | const [selectedModel, setSelectedModel] = useState<"gemini-3-pro-preview" | "claude-opus-4-5">("gemini-3-pro-preview"); 41 | const [showCredentialPrompt, setShowCredentialPrompt] = useState(false); 42 | const [pendingRun, setPendingRun] = useState(null); 43 | const [isCheckingCredentials, setIsCheckingCredentials] = useState(false); 44 | const [isSavingCredentials, setIsSavingCredentials] = useState(false); 45 | const [prereqError, setPrereqError] = useState(null); 46 | const [credentialPromptError, setCredentialPromptError] = useState(null); 47 | 48 | // Check credentials once on load so we can prompt proactively. 49 | useEffect(() => { 50 | fetchCredentialStatus() 51 | .then(setCredentialStatus) 52 | .catch(() => { 53 | // silently ignore so we don't block the UI if the backend isn't ready yet 54 | }); 55 | }, []); 56 | 57 | // Auto-scroll effect 58 | useEffect(() => { 59 | const currentLength = orchestrator.timeline.length; 60 | const prevLength = prevTimelineLengthRef.current; 61 | 62 | if (currentLength > prevLength) { 63 | const lastItem = orchestrator.timeline[currentLength - 1]; 64 | if (lastItem.type === "agents" || lastItem.type === "paper" || currentLength === 1) { 65 | // Scroll slightly above the new element to keep context 66 | // We do this by scrolling to the bottom ref, but with 'start' block alignment if possible, 67 | // or just letting the padding handle it. 68 | // Actually, let's scroll to the *element itself* if we could, but since we use bottomRef, 69 | // let's just scroll smoothly to it. 70 | // The user said it "goes a little too far", implying it might be scrolling past the top of the new content. 71 | // Or maybe it scrolls so the bottom is at the bottom of the screen? 72 | // "scrollIntoView" aligns the element to the top or bottom. 73 | 74 | // Let's try aligning the bottomRef to the 'end' of the view, but give it some breathing room. 75 | bottomRef.current?.scrollIntoView({ behavior: "smooth", block: "end" }); 76 | } 77 | } 78 | 79 | prevTimelineLengthRef.current = currentLength; 80 | }, [orchestrator.timeline]); 81 | 82 | const handleStart = async () => { 83 | if (!task.trim() || isCheckingCredentials) return; 84 | 85 | const config = { 86 | task, 87 | gpu: "any", 88 | model: selectedModel, 89 | num_agents: 3, 90 | max_rounds: 3, 91 | max_parallel: 2, 92 | test_mode: testMode, 93 | }; 94 | 95 | setPrereqError(null); 96 | setCredentialPromptError(null); 97 | setIsCheckingCredentials(true); 98 | 99 | try { 100 | const status = await fetchCredentialStatus(); 101 | setCredentialStatus(status); 102 | 103 | // Check if the required key for the selected model is available 104 | const needsGoogleKey = selectedModel === "gemini-3-pro-preview" && !status.hasGoogleApiKey; 105 | const needsAnthropicKey = selectedModel === "claude-opus-4-5" && !status.hasAnthropicApiKey; 106 | const needsModalToken = !status.hasModalToken; 107 | 108 | if (needsGoogleKey || needsAnthropicKey || needsModalToken) { 109 | setPendingRun({ mode, config }); 110 | setShowCredentialPrompt(true); 111 | return; 112 | } 113 | 114 | startExperiment(mode, config); 115 | } catch (err) { 116 | setPrereqError(err instanceof Error ? err.message : "Unable to verify API keys."); 117 | } finally { 118 | setIsCheckingCredentials(false); 119 | } 120 | }; 121 | 122 | const handleCredentialFieldChange = (field: keyof CredentialFormState, value: string) => { 123 | setCredentialForm((prev) => ({ ...prev, [field]: value })); 124 | }; 125 | 126 | const handleSaveCredentials = async () => { 127 | setCredentialPromptError(null); 128 | setIsSavingCredentials(true); 129 | 130 | try { 131 | const status = await saveCredentials({ 132 | googleApiKey: credentialForm.googleApiKey || undefined, 133 | anthropicApiKey: credentialForm.anthropicApiKey || undefined, 134 | modalTokenId: credentialForm.modalTokenId || undefined, 135 | modalTokenSecret: credentialForm.modalTokenSecret || undefined, 136 | }); 137 | setCredentialStatus(status); 138 | 139 | // Check if we have the required key for the selected model 140 | const hasRequiredLLMKey = selectedModel === "gemini-3-pro-preview" 141 | ? status.hasGoogleApiKey 142 | : status.hasAnthropicApiKey; 143 | 144 | if (hasRequiredLLMKey && status.hasModalToken) { 145 | setShowCredentialPrompt(false); 146 | setCredentialForm({ googleApiKey: "", anthropicApiKey: "", modalTokenId: "", modalTokenSecret: "" }); 147 | const nextRun = pendingRun; 148 | setPendingRun(null); 149 | if (nextRun) { 150 | startExperiment(nextRun.mode, nextRun.config); 151 | } 152 | } else { 153 | const modelName = selectedModel === "gemini-3-pro-preview" ? "Google" : "Anthropic"; 154 | setCredentialPromptError(`We still need the ${modelName} API key and Modal token to start a run with the selected model.`); 155 | } 156 | } catch (err) { 157 | setCredentialPromptError(err instanceof Error ? err.message : "Unable to save credentials."); 158 | } finally { 159 | setIsSavingCredentials(false); 160 | } 161 | }; 162 | 163 | const handleCloseCredentialPrompt = () => { 164 | setShowCredentialPrompt(false); 165 | setPendingRun(null); 166 | }; 167 | 168 | const isStartDisabled = !task.trim() || isCheckingCredentials; 169 | 170 | return ( 171 |
172 | {/* Fixed API Keys Button - Top Right */} 173 | 182 | 183 |
184 | 185 | {/* Main Content Area */} 186 |
187 | 188 | {/* Sticky Header for Active Research */} 189 | {orchestrator.timeline.length > 0 && ( 190 |
191 |
192 | 193 | Objective 194 | 195 |

196 | {task} 197 |

198 |
199 |
200 | )} 201 | 202 | {/* Scrollable Timeline */} 203 |
204 |
205 | 206 | {/* Initial Input State (Only visible when timeline is empty, not running, and no error) */} 207 | {orchestrator.timeline.length === 0 && !isRunning && !experimentError && ( 208 |
209 |
210 |

211 | Research Objective 212 |

213 |

214 | Describe your scientific query. The orchestrator will decompose it into hypotheses and launch autonomous agents to investigate. 215 |

216 |
217 | 218 |
219 |
220 |
221 |