Figures

317 | {charts.map((chart, idx) => ( 318 |

319 |

320 | Figure {idx + 1}. {chart.title || "Generated chart"} 321 |

322 | 323 |

324 | ))} 325 |

187 | 188 | {/* Sticky Header for Active Research */} 189 | {orchestrator.timeline.length > 0 && ( 190 |

191 |

192 | 193 | Objective 194 | 195 |

196 | {task} 197 |

198 |

199 |

200 | )} 201 | 202 | {/* Scrollable Timeline */} 203 |

204 |

205 | 206 | {/* Initial Input State (Only visible when timeline is empty, not running, and no error) */} 207 | {orchestrator.timeline.length === 0 && !isRunning && !experimentError && ( 208 |

209 |

210 |

211 | Research Objective 212 |

213 |

214 | Describe your scientific query. The orchestrator will decompose it into hypotheses and launch autonomous agents to investigate. 215 |

216 |

217 | 218 |

219 |

220 |

221 |

setTask(e.target.value)}
224 |                                     disabled={isRunning}
225 |                                     placeholder="e.g., Investigate the scaling laws of sparse attention mechanisms..."
226 |                                     className="relative w-full h-32 bg-black border border-[#333] rounded-xl p-6 text-lg font-light text-white placeholder:text-[#333] focus:ring-0 focus:border-[#666] focus:outline-none resize-none leading-relaxed transition-all duration-300"
227 |                                 />
228 |                             </div>
229 | 
230 |                             <div className="flex items-center justify-between">
231 |                                 <div className="flex items-center gap-4">
232 |                                     <button
233 |                                         onClick={() => setTestMode(!testMode)}
234 |                                         className={cn(
235 |                                             "text-[10px] font-medium px-3 py-1.5 rounded-full transition-all duration-300 border border-transparent",
236 |                                             testMode 
237 |                                                 ? "bg-white text-black border-white" 
238 |                                                 : "bg-[#1d1d1f] text-[#86868b] hover:text-white border-[#333]"
239 |                                         )}
240 |                                     >
241 |                                         {testMode ? "TEST MODE" : "LIVE MODE"}
242 |                                     </button>
243 |                                     <div className="h-4 w-[1px] bg-[#333]" />
244 |                                     <select
245 |                                         value={mode}
246 |                                         onChange={(e) => setMode(e.target.value as "single" | "orchestrator")}
247 |                                         className="bg-transparent text-[#86868b] text-xs font-medium focus:outline-none cursor-pointer hover:text-white transition-colors"
248 |                                     >
249 |                                         <option value="single">Single Agent</option>
250 |                                         <option value="orchestrator">Agent Swarm</option>
251 |                                     </select>
252 |                                     <div className="h-4 w-[1px] bg-[#333]" />
253 |                                     <select
254 |                                         value={selectedModel}
255 |                                         onChange={(e) => setSelectedModel(e.target.value as "gemini-3-pro-preview" | "claude-opus-4-5")}
256 |                                         className="bg-transparent text-[#86868b] text-xs font-medium focus:outline-none cursor-pointer hover:text-white transition-colors"
257 |                                     >
258 |                                         <option value="gemini-3-pro-preview">Gemini 3 Pro</option>
259 |                                         <option value="claude-opus-4-5">Claude Opus 4.5</option>
260 |                                     </select>
261 |                                 </div>
262 | 
263 |                                 <div className="flex flex-col items-end gap-2">
264 |                                     <button
265 |                                         onClick={handleStart}
266 |                                         disabled={isStartDisabled}
267 |                                         className={cn(
268 |                                             "px-8 py-3 rounded-full text-xs font-medium tracking-widest uppercase transition-all duration-500 flex items-center gap-2",
269 |                                             isStartDisabled
270 |                                                 ? "bg-[#1d1d1f] text-[#333] cursor-not-allowed"
271 |                                                 : "bg-white text-black hover:bg-[#e5e5e5] hover:scale-105"
272 |                                         )}
273 |                                     >
274 |                                         {isCheckingCredentials ? (
275 |                                             <>
276 |                                                 <Loader2 className="w-3 h-3 animate-spin" />
277 |                                                 <span>Checking keys...</span>
278 |                                             </>
279 |                                         ) : (
280 |                                             <>
281 |                                                 <Play className="w-3 h-3 fill-current" />
282 |                                                 <span>Start Research</span>
283 |                                             </>
284 |                                         )}
285 |                                     </button>
286 |                                     {prereqError && (
287 |                                         <p className="text-xs text-red-300 text-right max-w-sm">
288 |                                             {prereqError}
289 |                                         </p>
290 |                                     )}
291 |                                 </div>
292 |                             </div>
293 |                         </div>
294 |                     </div>
295 |                 )}
296 | 
297 |                 {/* Error State (Visible when experiment failed) */}
298 |                 {experimentError && !isRunning && orchestrator.timeline.length === 0 && (
299 |                     <div className="min-h-[60vh] flex flex-col justify-center items-center space-y-8 animate-in fade-in duration-700">
300 |                         <div className="space-y-4 text-center max-w-lg">
301 |                             <div className="w-16 h-16 mx-auto rounded-full bg-red-500/10 border border-red-500/30 flex items-center justify-center">
302 |                                 <span className="text-2xl">⚠️</span>
303 |                             </div>
304 |                             <h2 className="text-xl font-light text-white tracking-wide">
305 |                                 Experiment Failed
306 |                             </h2>
307 |                             <p className="text-sm text-red-300 font-mono bg-red-500/10 border border-red-500/20 rounded-lg p-4">
308 |                                 {experimentError}
309 |                             </p>
310 |                             <p className="text-xs text-[#86868b]">
311 |                                 Check your API keys and try again. If using Claude Opus 4.5, make sure your Anthropic API key is set.
312 |                             </p>
313 |                             <button
314 |                                 onClick={() => clearError()}
315 |                                 className="mt-4 px-6 py-2 rounded-full text-xs font-medium tracking-widest uppercase transition-all duration-500 bg-white text-black hover:bg-[#e5e5e5] hover:scale-105"
316 |                             >
317 |                                 Try Again
318 |                             </button>
319 |                         </div>
320 |                     </div>
321 |                 )}
322 | 
323 |                 {/* Loading State (Visible when running but no timeline events yet) */}
324 |                 {orchestrator.timeline.length === 0 && isRunning && (
325 |                     <div className="min-h-[60vh] flex flex-col justify-center items-center space-y-8 animate-in fade-in duration-700">
326 |                         <div className="relative">
327 |                             <div className="absolute inset-0 bg-white/20 blur-xl rounded-full animate-pulse"></div>
328 |                             <div className="relative w-16 h-16 border-t-2 border-white rounded-full animate-spin"></div>
329 |                             <div className="absolute inset-0 flex items-center justify-center">
330 |                                 <div className="w-2 h-2 bg-white rounded-full animate-ping" />
331 |                             </div>
332 |                         </div>
333 |                         <div className="space-y-2 text-center">
334 |                             <h2 className="text-xl font-light text-white tracking-wide animate-pulse">
335 |                                 Initializing Research Environment
336 |                             </h2>
337 |                             <p className="text-sm text-[#86868b] font-mono">
338 |                                 Spinning up main agent...
339 |                             </p>
340 |                         </div>
341 |                     </div>
342 |                 )}
343 | 
344 |                 {/* Timeline Rendering */}
345 |                 {orchestrator.timeline.map((item, index) => {
346 |                     const key = item.timestamp ?? `${item.type}-${index}`;
347 |                     if (item.type === "thought") {
348 |                         return (
349 |                             <motion.div 
350 |                                 key={key} 
351 |                                 initial={{ opacity: 0, y: 20, filter: "blur(10px)" }}
352 |                                 animate={{ opacity: 1, y: 0, filter: "blur(0px)" }}
353 |                                 transition={{ duration: 0.8, ease: "easeOut" }}
354 |                                 className="w-full"
355 |                             >
356 |                                 <div className="pl-6 border-l border-[#333] py-2">
357 |                                     <span className="block text-[10px] font-medium text-[#424245] uppercase tracking-widest mb-3">
358 |                                         Orchestrator
359 |                                     </span>
360 |                                     <StreamingMarkdown
361 |                                         animateKey={key}
362 |                                         content={item.content}
363 |                                         markdownClassName="prose prose-invert prose-lg md:prose-xl max-w-none prose-p:text-[#d1d1d6] prose-p:font-light prose-p:leading-relaxed prose-strong:text-white prose-headings:text-white prose-code:text-[#d1d1d6] prose-pre:bg-[#1d1d1f] prose-pre:border prose-pre:border-[#333]"
364 |                                     />
365 |                                 </div>
366 |                             </motion.div>
367 |                         );
368 |                     } else if (item.type === "agents") {
369 |                         return (
370 |                             <motion.div 
371 |                                 key={key} 
372 |                                 initial={{ opacity: 0, y: 20, filter: "blur(10px)" }}
373 |                                 animate={{ opacity: 1, y: 0, filter: "blur(0px)" }}
374 |                                 transition={{ duration: 0.8, ease: "easeOut" }}
375 |                                 className="space-y-8"
376 |                             >
377 |                                 <div className="flex items-center gap-3">
378 |                                     <div className="h-[1px] w-8 bg-[#333]" />
379 |                                     <span className="text-[10px] font-medium text-[#424245] uppercase tracking-widest">
380 |                                         Sub-Agents Deployed
381 |                                     </span>
382 |                                     <div className="h-[1px] flex-1 bg-[#333]" />
383 |                                 </div>
384 |                                 <div className="grid grid-cols-1 xl:grid-cols-2 gap-6">
385 |                                     {item.agentIds.map((agentId) => {
386 |                                         const agent = agents[agentId];
387 |                                         if (!agent) return null;
388 |                                         return (
389 |                                             <div key={agentId} className="h-[600px]">
390 |                                                 <AgentNotebook agent={agent} />
391 |                                             </div>
392 |                                         );
393 |                                     })}
394 |                                 </div>
395 |                             </motion.div>
396 |                         );
397 |                     } else if (item.type === "paper") {
398 |                         return (
399 |                             <motion.div
400 |                                 key={key}
401 |                                 initial={{ opacity: 0, y: 20, filter: "blur(10px)" }}
402 |                                 animate={{ opacity: 1, y: 0, filter: "blur(0px)" }}
403 |                                 transition={{ duration: 0.8, ease: "easeOut" }}
404 |                             >
405 |                                 <ResearchPaper content={item.content} charts={item.charts} />
406 |                             </motion.div>
407 |                         );
408 |                     }
409 |                     return null;
410 |                 })}
411 |                 
412 |                 {/* Running Indicator at Bottom */}
413 |                 {isRunning && orchestrator.timeline.length > 0 && (
414 |                     <div className="flex justify-center py-12">
415 |                         <div className="flex items-center gap-3 px-4 py-2 rounded-full bg-[#1d1d1f] border border-[#333]">
416 |                             <div className="w-1.5 h-1.5 rounded-full bg-green-500 animate-pulse" />
417 |                             <span className="text-[10px] font-medium text-[#86868b] uppercase tracking-widest">
418 |                                 Orchestrating
419 |                             </span>
420 |                         </div>
421 |                     </div>
422 |                 )}
423 | 
424 |                 <div ref={bottomRef} className="h-10" />
425 |             </div>
426 |         </div>
427 |       </main>
428 | 
429 |       </div>
430 |       {/* Summaries rail on the right */}
431 |       <FindingsRail agents={agents} />
432 | 
433 |       {/* Minimal Fixed Header (Only visible when running) */}
434 |       {isRunning && (
435 |           <div className="fixed top-0 left-0 right-0 z-50 h-1 bg-gradient-to-r from-blue-500 via-purple-500 to-pink-500 animate-gradient-x" />
436 |       )}
437 | 
438 |       <CredentialPrompt
439 |         open={showCredentialPrompt}
440 |         status={credentialStatus}
441 |         selectedModel={selectedModel}
442 |         form={credentialForm}
443 |         onChange={handleCredentialFieldChange}
444 |         onSubmit={handleSaveCredentials}
445 |         onClose={handleCloseCredentialPrompt}
446 |         isSaving={isSavingCredentials}
447 |         error={credentialPromptError}
448 |       />
449 |     </div>
450 |   );
451 | }
452 |

--------------------------------------------------------------------------------
/api_server.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import json
  4 | import queue
  5 | import re
  6 | import subprocess
  7 | import sys
  8 | import threading
  9 | import os
 10 | from datetime import datetime, timezone
 11 | from pathlib import Path
 12 | from typing import Any, Dict, List, Optional, Literal
 13 | 
 14 | from dotenv import load_dotenv, set_key
 15 | from fastapi import FastAPI, HTTPException
 16 | from fastapi.responses import StreamingResponse, FileResponse
 17 | from fastapi.staticfiles import StaticFiles
 18 | from pydantic import BaseModel, Field
 19 | 
 20 | # Load environment variables from the repo's .env file so spawned processes inherit them.
 21 | BASE_DIR = Path(__file__).resolve().parent
 22 | ENV_PATH = BASE_DIR / ".env"
 23 | load_dotenv(ENV_PATH)
 24 | 
 25 | MAIN_PATH = BASE_DIR / "main.py"
 26 | 
 27 | # Regex for stripping ANSI escape sequences (Rich colour codes, etc.).
 28 | ANSI_ESCAPE_RE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
 29 | 
 30 | 
 31 | def strip_ansi(text: str) -> str:
 32 |     """
 33 |     Remove ANSI colour / style escape sequences from terminal output.
 34 | 
 35 |     This is useful for building UIs that want plain text, while still
 36 |     retaining the original coloured output for advanced terminals.
 37 |     """
 38 |     return ANSI_ESCAPE_RE.sub("", text)
 39 | 
 40 | 
 41 | class UserCredentials(BaseModel):
 42 |     """API credentials passed per-request for multi-user deployments."""
 43 |     google_api_key: Optional[str] = Field(None, description="Google AI Studio API key")
 44 |     anthropic_api_key: Optional[str] = Field(None, description="Anthropic API key")
 45 |     modal_token_id: Optional[str] = Field(None, description="Modal token ID")
 46 |     modal_token_secret: Optional[str] = Field(None, description="Modal token secret")
 47 | 
 48 | 
 49 | class SingleExperimentRequest(BaseModel):
 50 |     """
 51 |     Request body for running a single-agent experiment.
 52 | 
 53 |     This directly maps to:
 54 |         python main.py "<task>" --mode single [--gpu GPU] [--model MODEL]
 55 |     """
 56 | 
 57 |     task: str = Field(
 58 |         ...,
 59 |         description=(
 60 |             "A short natural-language hypothesis to test. This is passed to the "
 61 |             "single research agent as the main experiment description."
 62 |         ),
 63 |         examples=[
 64 |             "Fine-tuning a small transformer on CIFAR-10 improves accuracy by more than 5%."
 65 |         ],
 66 |     )
 67 |     gpu: Optional[str] = Field(
 68 |         None,
 69 |         description=(
 70 |             "Optional GPU type to request for the Modal sandbox. "
 71 |             "Examples: 'T4', 'A10G', 'A100', 'any'. "
 72 |             "If omitted, the system uses its default (CPU-only or configured GPU)."
 73 |         ),
 74 |         examples=["T4"],
 75 |     )
 76 |     model: str = Field(
 77 |         "gemini-3-pro-preview",
 78 |         description=(
 79 |             "The LLM model to use for the experiment. "
 80 |             "Options: 'gemini-3-pro-preview' or 'claude-opus-4-5'."
 81 |         ),
 82 |         examples=["gemini-3-pro-preview", "claude-opus-4-5"],
 83 |     )
 84 |     test_mode: bool = Field(
 85 |         False,
 86 |         description="If true, runs in test mode with mock data (no LLM/GPU usage).",
 87 |     )
 88 |     credentials: Optional[UserCredentials] = Field(
 89 |         None,
 90 |         description="User-provided API credentials. If not provided, falls back to server environment.",
 91 |     )
 92 | 
 93 | 
 94 | class OrchestratorExperimentRequest(BaseModel):
 95 |     """
 96 |     Request body for running the multi-agent orchestrator.
 97 | 
 98 |     This maps to:
 99 |         python main.py "<task>" --mode orchestrator \\
100 |             --num-agents N --max-rounds R --max-parallel P [--gpu GPU] [--model MODEL]
101 |     """
102 | 
103 |     task: str = Field(
104 |         ...,
105 |         description=(
106 |             "High-level research question for the orchestrator to investigate. "
107 |             "The orchestrator will decompose this into multiple hypotheses and "
108 |             "launch single-agent experiments as needed."
109 |         ),
110 |         examples=[
111 |             "Characterize the scaling behaviour of depth vs width in small transformers."
112 |         ],
113 |     )
114 |     gpu: Optional[str] = Field(
115 |         None,
116 |         description=(
117 |             "Default GPU hint for experiments spawned by the orchestrator "
118 |             "(e.g. 'T4', 'A10G', 'A100', 'any'). If omitted, falls back to the "
119 |             "host default (CPU-only or configured GPU)."
120 |         ),
121 |         examples=["A10G"],
122 |     )
123 |     model: str = Field(
124 |         "gemini-3-pro-preview",
125 |         description=(
126 |             "The LLM model to use for the experiment. "
127 |             "Options: 'gemini-3-pro-preview' or 'claude-opus-4-5'."
128 |         ),
129 |         examples=["gemini-3-pro-preview", "claude-opus-4-5"],
130 |     )
131 |     num_agents: int = Field(
132 |         3,
133 |         ge=1,
134 |         le=16,
135 |         description=(
136 |             "How many distinct single-agent researchers to launch in the first wave. "
137 |             "This is passed as --num-agents."
138 |         ),
139 |     )
140 |     max_rounds: int = Field(
141 |         3,
142 |         ge=1,
143 |         le=10,
144 |         description=(
145 |             "Maximum number of orchestration rounds (waves of experiments). "
146 |             "This is passed as --max-rounds."
147 |         ),
148 |     )
149 |     max_parallel: int = Field(
150 |         2,
151 |         ge=1,
152 |         le=16,
153 |         description=(
154 |             "Maximum number of experiments to run in parallel in a single wave. "
155 |             "This is passed as --max-parallel."
156 |         ),
157 |     )
158 |     test_mode: bool = Field(
159 |         False,
160 |         description="If true, runs in test mode with mock data (no LLM/GPU usage).",
161 |     )
162 |     credentials: Optional[UserCredentials] = Field(
163 |         None,
164 |         description="User-provided API credentials. If not provided, falls back to server environment.",
165 |     )
166 | 
167 | 
168 | class SummaryHistoryItem(BaseModel):
169 |     """Minimal view of an agent step for sidebar summarization."""
170 | 
171 |     type: Literal["thought", "code", "result", "text"] = Field(
172 |         ...,
173 |         description="Type of step (kept small to control context size).",
174 |     )
175 |     content: str = Field(
176 |         ..., description="Truncated text content of the step (agent thought or tool output)."
177 |     )
178 | 
179 | 
180 | class AgentSummaryRequest(BaseModel):
181 |     """Request body for Gemini-lite sidebar summaries."""
182 | 
183 |     agent_id: str = Field(..., description="Sub-agent identifier")
184 |     history: List[SummaryHistoryItem] = Field(
185 |         ..., description="Last ~5 steps for this agent (already truncated on client)."
186 |     )
187 | 
188 | 
189 | class AgentSummaryResponse(BaseModel):
190 |     """Shape returned to the frontend for sidebar rendering."""
191 | 
192 |     summary: str = Field(..., description="Short markdown-friendly finding")
193 |     chart: Optional[Dict[str, Any]] = Field(
194 |         None,
195 |         description=(
196 |             "Optional chart spec with keys: title, type(line|bar), labels, series. "
197 |             "Omitted if no obvious numeric progression."
198 |         ),
199 |     )
200 | 
201 | 
202 | class CredentialStatus(BaseModel):
203 |     """Report which required API credentials are present."""
204 | 
205 |     has_google_api_key: bool = Field(
206 |         ...,
207 |         description="True when GOOGLE_API_KEY is set to a non-placeholder value.",
208 |     )
209 |     has_anthropic_api_key: bool = Field(
210 |         ...,
211 |         description="True when ANTHROPIC_API_KEY is set to a non-placeholder value.",
212 |     )
213 |     has_modal_token: bool = Field(
214 |         ...,
215 |         description="True when both MODAL_TOKEN_ID and MODAL_TOKEN_SECRET are set.",
216 |     )
217 | 
218 | 
219 | class CredentialUpdateRequest(BaseModel):
220 |     """Payload for setting API credentials via the UI."""
221 | 
222 |     google_api_key: Optional[str] = Field(
223 |         None, description="Full Google API key from AI Studio"
224 |     )
225 |     anthropic_api_key: Optional[str] = Field(
226 |         None, description="Anthropic API key from https://console.anthropic.com"
227 |     )
228 |     modal_token_id: Optional[str] = Field(
229 |         None, description="Modal token ID from https://modal.com/settings/tokens"
230 |     )
231 |     modal_token_secret: Optional[str] = Field(
232 |         None, description="Modal token secret from https://modal.com/settings/tokens"
233 |     )
234 | 
235 | 
236 | class ProcessSummary(BaseModel):
237 |     """
238 |     Structured view of a completed CLI run.
239 | 
240 |     The stdout/stderr fields preserve all Rich formatting escape codes,
241 |     while the *_plain variants provide the same content with ANSI codes stripped
242 |     for easy rendering in front-ends.
243 |     """
244 | 
245 |     mode: Literal["single", "orchestrator"] = Field(
246 |         ...,
247 |         description="Which execution mode was used.",
248 |     )
249 |     task: str = Field(
250 |         ...,
251 |         description="Original task/hypothesis passed to main.py.",
252 |     )
253 |     gpu: Optional[str] = Field(
254 |         None,
255 |         description="GPU hint passed to main.py (if any).",
256 |     )
257 |     command: List[str] = Field(
258 |         ...,
259 |         description="Exact command invoked by the API to run the experiment.",
260 |     )
261 |     started_at: datetime = Field(
262 |         ...,
263 |         description="UTC timestamp when the subprocess started.",
264 |     )
265 |     finished_at: datetime = Field(
266 |         ...,
267 |         description="UTC timestamp when the subprocess finished.",
268 |     )
269 |     duration_seconds: float = Field(
270 |         ...,
271 |         description="Wall-clock run duration in seconds.",
272 |     )
273 |     exit_code: int = Field(
274 |         ...,
275 |         description="Subprocess exit code. Zero usually indicates success.",
276 |     )
277 |     stdout: str = Field(
278 |         ...,
279 |         description="Raw stdout produced by main.py (including ANSI colour codes).",
280 |     )
281 |     stderr: str = Field(
282 |         ...,
283 |         description="Raw stderr produced by main.py (including ANSI colour codes).",
284 |     )
285 |     stdout_plain: str = Field(
286 |         ...,
287 |         description="Stdout with ANSI escape codes stripped for simple rendering.",
288 |     )
289 |     stderr_plain: str = Field(
290 |         ...,
291 |         description="Stderr with ANSI escape codes stripped for simple rendering.",
292 |     )
293 | 
294 | 
295 | app = FastAPI(
296 |     title="AI Researcher API",
297 |     description=(
298 |         "Thin HTTP wrapper around the existing CLI-based AI Researcher agents.\n\n"
299 |         "The API does not reimplement any research logic; it simply shells out "
300 |         "to `main.py` and returns everything the CLI prints so that a front-end "
301 |         "can visualise it in rich ways."
302 |     ),
303 |     version="0.1.0",
304 | )
305 | 
306 | from fastapi.middleware.cors import CORSMiddleware
307 | 
308 | # Optional, lightweight summarizer (kept outside agent logic)
309 | from insights import summarize_agent_findings
310 | 
311 | # For Railway/production: allow all origins, or be more specific
312 | app.add_middleware(
313 |     CORSMiddleware,
314 |     allow_origins=["*"],
315 |     allow_credentials=True,
316 |     allow_methods=["*"],
317 |     allow_headers=["*"],
318 | )
319 | 
320 | # Serve frontend static files in production
321 | FRONTEND_DIST = BASE_DIR / "frontend" / "dist"
322 | 
323 | def _env_value_present(value: Optional[str]) -> bool:
324 |     """Treat empty or placeholder values as missing."""
325 |     if value is None:
326 |         return False
327 |     cleaned = value.strip()
328 |     if not cleaned:
329 |         return False
330 | 
331 |     lower = cleaned.lower()
332 |     # Ignore common placeholder patterns (e.g., "your_google_api_key_here").
333 |     if lower.startswith("your_") or lower.endswith("_here"):
334 |         return False
335 |     if lower in {"changeme", "example"}:
336 |         return False
337 |     return True
338 | 
339 | 
340 | def _persist_env(key: str, value: str) -> None:
341 |     """Persist a credential to both the running process and the .env file."""
342 |     os.environ[key] = value
343 |     set_key(str(ENV_PATH), key, value)
344 | 
345 | 
346 | def _credential_status() -> CredentialStatus:
347 |     """Summarize which credentials are available."""
348 |     has_google = _env_value_present(os.environ.get("GOOGLE_API_KEY"))
349 |     has_anthropic = _env_value_present(os.environ.get("ANTHROPIC_API_KEY"))
350 |     has_modal_id = _env_value_present(os.environ.get("MODAL_TOKEN_ID"))
351 |     has_modal_secret = _env_value_present(os.environ.get("MODAL_TOKEN_SECRET"))
352 |     return CredentialStatus(
353 |         has_google_api_key=has_google,
354 |         has_anthropic_api_key=has_anthropic,
355 |         has_modal_token=has_modal_id and has_modal_secret,
356 |     )
357 | 
358 | 
359 | def _ensure_main_exists() -> None:
360 |     """
361 |     Verify that main.py exists at the expected location.
362 | 
363 |     If not, raise an HTTPException so callers get a clear error.
364 |     """
365 |     if not MAIN_PATH.exists():
366 |         raise HTTPException(
367 |             status_code=500,
368 |             detail=f"main.py not found at expected path: {MAIN_PATH}",
369 |         )
370 | 
371 | 
372 | def _build_single_command(req: SingleExperimentRequest) -> List[str]:
373 |     """
374 |     Build the command to run a single-agent experiment via main.py.
375 |     """
376 |     cmd: List[str] = [
377 |         sys.executable,
378 |         str(MAIN_PATH),
379 |         req.task,
380 |         "--mode",
381 |         "single",
382 |         "--model",
383 |         req.model,
384 |     ]
385 |     if req.gpu:
386 |         cmd.extend(["--gpu", req.gpu])
387 |     if req.test_mode:
388 |         cmd.append("--test-mode")
389 |     return cmd
390 | 
391 | 
392 | def _build_orchestrator_command(req: OrchestratorExperimentRequest) -> List[str]:
393 |     """
394 |     Build the command to run the orchestrator via main.py.
395 |     """
396 |     cmd: List[str] = [
397 |         sys.executable,
398 |         str(MAIN_PATH),
399 |         req.task,
400 |         "--mode",
401 |         "orchestrator",
402 |         "--model",
403 |         req.model,
404 |         "--num-agents",
405 |         str(req.num_agents),
406 |         "--max-rounds",
407 |         str(req.max_rounds),
408 |         "--max-parallel",
409 |         str(req.max_parallel),
410 |     ]
411 |     if req.gpu:
412 |         cmd.extend(["--gpu", req.gpu])
413 |     if req.test_mode:
414 |         cmd.append("--test-mode")
415 |     return cmd
416 | 
417 | 
418 | def _run_and_capture(
419 |     cmd: List[str],
420 |     *,
421 |     mode: Literal["single", "orchestrator"],
422 |     task: str,
423 |     gpu: Optional[str],
424 | ) -> ProcessSummary:
425 |     """
426 |     Run `main.py` as a subprocess and capture all of its stdout/stderr.
427 | 
428 |     This leaves the underlying CLI behaviour untouched and simply wraps it.
429 |     """
430 |     _ensure_main_exists()
431 | 
432 |     started_at = datetime.now(timezone.utc)
433 | 
434 |     # Enable structured event emission in the child process so the frontend
435 |     # can consume ::EVENT::-prefixed messages.
436 |     env = dict(os.environ)
437 |     env["AI_RESEARCHER_ENABLE_EVENTS"] = "1"
438 | 
439 |     proc = subprocess.Popen(
440 |         cmd,
441 |         stdout=subprocess.PIPE,
442 |         stderr=subprocess.PIPE,
443 |         text=True,
444 |         bufsize=1,
445 |         env=env,
446 |     )
447 | 
448 |     stdout_chunks: List[str] = []
449 |     stderr_chunks: List[str] = []
450 | 
451 |     def _reader(stream, chunks: List[str]) -> None:
452 |         for line in stream:
453 |             chunks.append(line)
454 | 
455 |     t_out = threading.Thread(
456 |         target=_reader, args=(proc.stdout, stdout_chunks), daemon=True
457 |     )
458 |     t_err = threading.Thread(
459 |         target=_reader, args=(proc.stderr, stderr_chunks), daemon=True
460 |     )
461 | 
462 |     t_out.start()
463 |     t_err.start()
464 | 
465 |     exit_code = proc.wait()
466 |     t_out.join()
467 |     t_err.join()
468 | 
469 |     finished_at = datetime.now(timezone.utc)
470 | 
471 |     stdout_text = "".join(stdout_chunks)
472 |     stderr_text = "".join(stderr_chunks)
473 | 
474 |     return ProcessSummary(
475 |         mode=mode,
476 |         task=task,
477 |         gpu=gpu,
478 |         command=cmd,
479 |         started_at=started_at,
480 |         finished_at=finished_at,
481 |         duration_seconds=(finished_at - started_at).total_seconds(),
482 |         exit_code=exit_code,
483 |         stdout=stdout_text,
484 |         stderr=stderr_text,
485 |         stdout_plain=strip_ansi(stdout_text),
486 |         stderr_plain=strip_ansi(stderr_text),
487 |     )
488 | 
489 | 
490 | def _stream_subprocess(
491 |     cmd: List[str],
492 |     *,
493 |     meta: Dict[str, Any],
494 |     credentials: Optional[UserCredentials] = None,
495 | ) -> StreamingResponse:
496 |     """
497 |     Stream stdout/stderr from `main.py` as newline-delimited JSON (NDJSON).
498 | 
499 |     Each line of output is sent as:
500 |         {
501 |           "type": "line",
502 |           "stream": "stdout" | "stderr",
503 |           "timestamp": "<ISO-8601>",
504 |           "raw": "<raw line>",
505 |           "plain": "<line without ANSI>",
506 |           ...meta
507 |         }
508 | 
509 |     When the process finishes, a final summary event is sent:
510 |         {
511 |           "type": "summary",
512 |           "exit_code": <int>,
513 |           "started_at": "<ISO-8601>",
514 |           "finished_at": "<ISO-8601>",
515 |           "duration_seconds": <float>,
516 |           ...meta
517 |         }
518 |     """
519 |     _ensure_main_exists()
520 | 
521 |     started_at = datetime.now(timezone.utc)
522 | 
523 |     # Enable structured event emission in the child process so the frontend
524 |     # can consume ::EVENT::-prefixed messages.
525 |     env = dict(os.environ)
526 |     env["AI_RESEARCHER_ENABLE_EVENTS"] = "1"
527 | 
528 |     # Override with user-provided credentials if present
529 |     if credentials:
530 |         if credentials.google_api_key:
531 |             env["GOOGLE_API_KEY"] = credentials.google_api_key
532 |             print(f"[DEBUG] Set GOOGLE_API_KEY from request (len={len(credentials.google_api_key)})", file=sys.stderr)
533 |         if credentials.anthropic_api_key:
534 |             env["ANTHROPIC_API_KEY"] = credentials.anthropic_api_key
535 |             print(f"[DEBUG] Set ANTHROPIC_API_KEY from request (len={len(credentials.anthropic_api_key)})", file=sys.stderr)
536 |         if credentials.modal_token_id:
537 |             env["MODAL_TOKEN_ID"] = credentials.modal_token_id
538 |             print(f"[DEBUG] Set MODAL_TOKEN_ID from request (len={len(credentials.modal_token_id)})", file=sys.stderr)
539 |         if credentials.modal_token_secret:
540 |             env["MODAL_TOKEN_SECRET"] = credentials.modal_token_secret
541 |             print(f"[DEBUG] Set MODAL_TOKEN_SECRET from request (len={len(credentials.modal_token_secret)})", file=sys.stderr)
542 |     else:
543 |         print("[DEBUG] No credentials in request, using environment", file=sys.stderr)
544 | 
545 |     proc = subprocess.Popen(
546 |         cmd,
547 |         stdout=subprocess.PIPE,
548 |         stderr=subprocess.PIPE,
549 |         text=True,
550 |         bufsize=1,
551 |         env=env,
552 |     )
553 | 
554 |     # Queue is used to multiplex stdout and stderr into a single ordered stream.
555 |     q: "queue.Queue[Optional[Dict[str, Any]]]" = queue.Queue()
556 | 
557 |     def _push_line(stream, stream_name: str) -> None:
558 |         for line in stream:
559 |             event: Dict[str, Any] = {
560 |                 "type": "line",
561 |                 "stream": stream_name,
562 |                 "timestamp": datetime.now(timezone.utc).isoformat(),
563 |                 "raw": line,
564 |                 "plain": strip_ansi(line),
565 |             }
566 |             event.update(meta)
567 |             q.put(event)
568 | 
569 |     def _wait_for_exit() -> None:
570 |         exit_code = proc.wait()
571 |         finished_at = datetime.now(timezone.utc)
572 |         summary: Dict[str, Any] = {
573 |             "type": "summary",
574 |             "timestamp": finished_at.isoformat(),
575 |             "exit_code": exit_code,
576 |             "started_at": started_at.isoformat(),
577 |             "finished_at": finished_at.isoformat(),
578 |             "duration_seconds": (finished_at - started_at).total_seconds(),
579 |         }
580 |         summary.update(meta)
581 |         q.put(summary)
582 |         # Sentinel to tell the iterator to stop.
583 |         q.put(None)
584 | 
585 |     threading.Thread(
586 |         target=_push_line, args=(proc.stdout, "stdout"), daemon=True
587 |     ).start()
588 |     threading.Thread(
589 |         target=_push_line, args=(proc.stderr, "stderr"), daemon=True
590 |     ).start()
591 |     threading.Thread(target=_wait_for_exit, daemon=True).start()
592 | 
593 |     def event_iterator():
594 |         while True:
595 |             item = q.get()
596 |             if item is None:
597 |                 break
598 |             yield json.dumps(item, ensure_ascii=False) + "\n"
599 | 
600 |     return StreamingResponse(
601 |         event_iterator(),
602 |         media_type="application/x-ndjson",
603 |     )
604 | 
605 | 
606 | @app.get("/api/health", summary="Simple health probe")
607 | def health_check() -> Dict[str, Any]:
608 |     """
609 |     Lightweight health check.
610 | 
611 |     This does not call Gemini or Modal; it only verifies that `main.py`
612 |     exists on disk and that the API process can see it.
613 |     """
614 |     exists = MAIN_PATH.exists()
615 |     return {
616 |         "status": "ok" if exists else "error",
617 |         "main_py": str(MAIN_PATH),
618 |         "main_py_exists": exists,
619 |     }
620 | 
621 | 
622 | @app.get("/api/state", summary="Get global system state")
623 | def get_state() -> Dict[str, Any]:
624 |     """
625 |     Return the current global state of the orchestrator.
626 |     This is primarily to support legacy clients or status monitors.
627 |     """
628 |     # Import here to avoid circular imports if orchestrator imports api_server
629 |     try:
630 |         from orchestrator import _experiment_counter, _default_gpu
631 |         return {
632 |             "status": "active",
633 |             "experiments_run": _experiment_counter,
634 |             "default_gpu": _default_gpu,
635 |         }
636 |     except ImportError:
637 |         return {
638 |             "status": "active",
639 |             "info": "Orchestrator module not loaded",
640 |         }
641 | 
642 | 
643 | @app.get(
644 |     "/api/credentials/status",
645 |     response_model=CredentialStatus,
646 |     summary="Check whether required API keys are set",
647 | )
648 | def credentials_status() -> CredentialStatus:
649 |     """
650 |     Return a minimal view of credential readiness.
651 | 
652 |     Used by the frontend to gate runs and prompt users for missing keys.
653 |     """
654 |     return _credential_status()
655 | 
656 | 
657 | @app.post(
658 |     "/api/credentials",
659 |     response_model=CredentialStatus,
660 |     summary="Set Google/Modal credentials (persists to .env)",
661 | )
662 | def update_credentials(req: CredentialUpdateRequest) -> CredentialStatus:
663 |     """
664 |     Allow the UI to persist credentials locally.
665 | 
666 |     Keys are written to the running process and the .env file so subsequent
667 |     subprocesses inherit them.
668 |     """
669 |     try:
670 |         if req.google_api_key and req.google_api_key.strip():
671 |             _persist_env("GOOGLE_API_KEY", req.google_api_key.strip())
672 |         if req.anthropic_api_key and req.anthropic_api_key.strip():
673 |             _persist_env("ANTHROPIC_API_KEY", req.anthropic_api_key.strip())
674 |         if req.modal_token_id and req.modal_token_id.strip():
675 |             _persist_env("MODAL_TOKEN_ID", req.modal_token_id.strip())
676 |         if req.modal_token_secret and req.modal_token_secret.strip():
677 |             _persist_env("MODAL_TOKEN_SECRET", req.modal_token_secret.strip())
678 |     except Exception as e:
679 |         raise HTTPException(status_code=500, detail=f"Failed to persist credentials: {e}") from e
680 | 
681 |     return _credential_status()
682 | 
683 | 
684 | @app.post(
685 |     "/api/experiments/single",
686 |     response_model=ProcessSummary,
687 |     summary="Run a single-agent experiment (blocking)",
688 | )
689 | def run_single_experiment(req: SingleExperimentRequest) -> ProcessSummary:
690 |     """
691 |     Run the original single-agent researcher and wait for it to finish.
692 | 
693 |     This endpoint blocks until the underlying CLI command exits, then returns
694 |     a fully structured `ProcessSummary` containing all CLI output.
695 |     """
696 |     cmd = _build_single_command(req)
697 |     return _run_and_capture(
698 |         cmd,
699 |         mode="single",
700 |         task=req.task,
701 |         gpu=req.gpu,
702 |     )
703 | 
704 | 
705 | @app.post(
706 |     "/api/experiments/orchestrator",
707 |     response_model=ProcessSummary,
708 |     summary="Run the multi-agent orchestrator (blocking)",
709 | )
710 | def run_orchestrator_experiment(req: OrchestratorExperimentRequest) -> ProcessSummary:
711 |     """
712 |     Run the orchestrator mode end-to-end and wait for it to finish.
713 | 
714 |     The returned `ProcessSummary` includes all orchestrator logs, experiment
715 |     transcripts, and the final paper generated at the end of the run.
716 |     """
717 |     cmd = _build_orchestrator_command(req)
718 |     return _run_and_capture(
719 |         cmd,
720 |         mode="orchestrator",
721 |         task=req.task,
722 |         gpu=req.gpu,
723 |     )
724 | 
725 | 
726 | @app.post(
727 |     "/api/experiments/single/stream",
728 |     summary="Stream a single-agent experiment as newline-delimited JSON",
729 | )
730 | def stream_single_experiment(req: SingleExperimentRequest) -> StreamingResponse:
731 |     """
732 |     Run the single-agent researcher and stream all logs as NDJSON.
733 | 
734 |     This is ideal for front-ends that want to show real-time logs or
735 |     progressively render the final report as it is produced.
736 |     """
737 |     cmd = _build_single_command(req)
738 |     meta = {
739 |         "mode": "single",
740 |         "task": req.task,
741 |         "gpu": req.gpu,
742 |         "command": cmd,
743 |     }
744 |     return _stream_subprocess(cmd, meta=meta, credentials=req.credentials)
745 | 
746 | 
747 | @app.post(
748 |     "/api/experiments/orchestrator/stream",
749 |     summary="Stream the orchestrator as newline-delimited JSON",
750 | )
751 | def stream_orchestrator_experiment(
752 |     req: OrchestratorExperimentRequest,
753 | ) -> StreamingResponse:
754 |     """
755 |     Run the orchestrator mode and stream all logs as NDJSON.
756 | 
757 |     The stream includes orchestrator thinking, tool calls, and nested
758 |     single-agent transcripts exactly as printed by the CLI.
759 |     """
760 |     cmd = _build_orchestrator_command(req)
761 |     meta = {
762 |         "mode": "orchestrator",
763 |         "task": req.task,
764 |         "gpu": req.gpu,
765 |         "command": cmd,
766 |     }
767 |     return _stream_subprocess(cmd, meta=meta, credentials=req.credentials)
768 | 
769 | 
770 | @app.post(
771 |     "/api/agents/summarize",
772 |     response_model=AgentSummaryResponse,
773 |     summary="Summarize the last few sub-agent turns for the sidebar",
774 | )
775 | def summarize_agent(req: AgentSummaryRequest) -> AgentSummaryResponse:
776 |     """Run a cheap Gemini call that condenses recent agent thoughts/tool outputs."""
777 | 
778 |     try:
779 |         result = summarize_agent_findings(req.agent_id, [item.model_dump() for item in req.history])
780 |     except Exception as e:
781 |         raise HTTPException(status_code=500, detail=str(e)) from e
782 | 
783 |     return AgentSummaryResponse(**result)
784 | 
785 | 
786 | # ---------------------------------------------------------------------------
787 | # Static file serving for production (Railway, etc.)
788 | # ---------------------------------------------------------------------------
789 | # Mount static assets if the frontend dist folder exists
790 | if FRONTEND_DIST.exists() and (FRONTEND_DIST / "assets").exists():
791 |     app.mount("/assets", StaticFiles(directory=str(FRONTEND_DIST / "assets")), name="static-assets")
792 | 
793 | 
794 | @app.get("/{full_path:path}")
795 | async def serve_spa(full_path: str):
796 |     """
797 |     Catch-all route to serve the frontend SPA.
798 |     API routes are defined above, so they take precedence.
799 |     """
800 |     # If frontend dist exists, serve it
801 |     if FRONTEND_DIST.exists():
802 |         # Check if the requested file exists
803 |         file_path = FRONTEND_DIST / full_path
804 |         if file_path.exists() and file_path.is_file():
805 |             return FileResponse(file_path)
806 |         # Otherwise serve index.html for SPA routing
807 |         index_path = FRONTEND_DIST / "index.html"
808 |         if index_path.exists():
809 |             return FileResponse(index_path)
810 | 
811 |     # Fallback: return a simple message if no frontend
812 |     return {"message": "AI Researcher API is running. Frontend not built.", "docs": "/docs"}
813 | 
814 | 
815 | if __name__ == "__main__":
816 |     # Convenience entrypoint so you can run:
817 |     #   python api_server.py
818 |     # during development instead of calling uvicorn manually.
819 |     import uvicorn
820 | 
821 |     port = int(os.environ.get("PORT", 8000))
822 |     reload_enabled = os.environ.get("RAILWAY_ENVIRONMENT") is None  # Disable reload in production
823 | 
824 |     uvicorn.run(
825 |         "api_server:app",
826 |         host="0.0.0.0",
827 |         port=port,
828 |         reload=reload_enabled,
829 |     )
830 |

--------------------------------------------------------------------------------

98 | Add your API keys to launch the run 99 |

Figures

211 | Research Objective 212 |