├── .gitignore ├── README.md ├── pyproject.toml ├── vibetest.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt ├── entry_points.txt ├── requires.txt └── top_level.txt └── vibetest ├── __init__.py ├── agents.py └── mcp_server.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Build artifacts 2 | 3 | build/ 4 | dist/ 5 | __pycache__/ 6 | *.pyc 7 | *.pyo 8 | 9 | # QA test artifacts 10 | qa_artifacts/ 11 | 12 | # Environment files 13 | .env 14 | .venv/ 15 | venv/ 16 | 17 | # IDE files 18 | .vscode/ 19 | .idea/ 20 | *.swp 21 | *.swo 22 | 23 | # OS files 24 | .DS_Store 25 | Thumbs.db 26 | 27 | # Logs 28 | *.log -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Vibetest Use 2 | 3 | 4 | https://github.com/user-attachments/assets/9558d051-78bc-45fd-8694-9ac80eaf9494 5 | 6 | 7 | An MCP server that launches multiple Browser-Use agents to test a vibe-coded website for UI bugs, broken links, accessibility issues, and other technical problems. 8 | 9 | Perfect for testing both live websites and localhost development sites. 10 | 11 | Vibecode and vibetest until your website works. 12 | 13 | ## Quick Start 14 | 15 | ```bash 16 | # Install dependencies 17 | uv venv 18 | source .venv/bin/activate 19 | uv pip install -e . 20 | ``` 21 | 22 | ### 1) Claude Code 23 | 24 | ```bash 25 | # Add MCP server via CLI 26 | claude mcp add vibetest /full/path/to/vibetest-use/.venv/bin/vibetest-mcp -e GOOGLE_API_KEY="your_api_key" 27 | 28 | # Test in Claude Code 29 | > claude 30 | 31 | > /mcp 32 | ⎿  MCP Server Status 33 | 34 | • vibetest: connected 35 | ``` 36 | 37 | ### 2) Cursor (manually) 38 | 39 | 1. **Install via MCP Settings UI:** 40 | - Open Cursor Settings 41 | - Click on "MCP" in the left sidebar 42 | - Click "Add Server" or the "+" button 43 | - Manually edit config: 44 | 45 | ```json 46 | { 47 | "mcpServers": { 48 | "vibetest": { 49 | "command": "/full/path/to/vibetest-use/.venv/bin/vibetest-mcp", 50 | "env": { 51 | "GOOGLE_API_KEY": "your_api_key" 52 | } 53 | } 54 | } 55 | } 56 | 57 | ``` 58 | 59 | ### Basic Prompts 60 | ``` 61 | > Vibetest my website with 5 agents: browser-use.com 62 | > Run vibetest on localhost:3000 63 | > Run a headless vibetest on localhost:8080 with 10 agents 64 | ``` 65 | 66 | ### Parameters You Can Specify 67 | - **URL**: Any website (`https://example.com`, `localhost:3000`, `http://dev.mysite.com`) 68 | - **Number of agents**: `3` (default), `5 agents`, `2 agents` - more agents = more thorough testing 69 | - **Headless mode**: `non-headless` (default) or `headless` 70 | 71 | ## Requirements 72 | 73 | - Python 3.11+ 74 | - Google API key ([get one](https://developers.google.com/maps/api-security-best-practices)) (we support gemini-2.0-flash) 75 | - Cursor/Claude with MCP support 76 | 77 | ## Full Demo 78 | 79 | 80 | https://github.com/user-attachments/assets/6450b5b7-10e5-4019-82a4-6d726dbfbe1f 81 | 82 | 83 | 84 | ## License 85 | 86 | MIT 87 | 88 | --- 89 | 90 | Powered by [Browser Use](https://github.com/browser-use/browser-use) 91 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "vibetest" 3 | version = "0.1.0" 4 | description = "Browser-agent QA swarm with MCP interface for testing AI-generated websites" 5 | requires-python = ">=3.11" 6 | dependencies = [ 7 | "mcp[cli]>=1.0.0", 8 | "browser-use", 9 | "playwright", 10 | "langchain_google_genai", 11 | "langchain_core", 12 | "pydantic", 13 | "screeninfo", 14 | ] 15 | 16 | [tool.setuptools.packages.find] 17 | include = ["vibetest*"] 18 | 19 | [project.scripts] 20 | vibetest-mcp = "vibetest.mcp_server:run" -------------------------------------------------------------------------------- /vibetest.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.4 2 | Name: vibetest 3 | Version: 0.1.0 4 | Summary: Browser-agent QA swarm with MCP interface for testing AI-generated websites 5 | Requires-Python: >=3.11 6 | Requires-Dist: mcp[cli]>=1.0.0 7 | Requires-Dist: browser-use 8 | Requires-Dist: playwright 9 | Requires-Dist: langchain_google_genai 10 | Requires-Dist: langchain_core 11 | Requires-Dist: pydantic 12 | Requires-Dist: screeninfo 13 | -------------------------------------------------------------------------------- /vibetest.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | README.md 2 | pyproject.toml 3 | vibetest/__init__.py 4 | vibetest/agents.py 5 | vibetest/mcp_server.py 6 | vibetest.egg-info/PKG-INFO 7 | vibetest.egg-info/SOURCES.txt 8 | vibetest.egg-info/dependency_links.txt 9 | vibetest.egg-info/entry_points.txt 10 | vibetest.egg-info/requires.txt 11 | vibetest.egg-info/top_level.txt -------------------------------------------------------------------------------- /vibetest.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /vibetest.egg-info/entry_points.txt: -------------------------------------------------------------------------------- 1 | [console_scripts] 2 | vibetest-mcp = vibetest.mcp_server:run 3 | -------------------------------------------------------------------------------- /vibetest.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | mcp[cli]>=1.0.0 2 | browser-use 3 | playwright 4 | langchain_google_genai 5 | langchain_core 6 | pydantic 7 | screeninfo 8 | -------------------------------------------------------------------------------- /vibetest.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | vibetest 2 | -------------------------------------------------------------------------------- /vibetest/__init__.py: -------------------------------------------------------------------------------- 1 | """Web QA tool using Browser-Use agents for testing AI-generated websites.""" 2 | 3 | __version__ = "0.1.0" -------------------------------------------------------------------------------- /vibetest/agents.py: -------------------------------------------------------------------------------- 1 | import asyncio, os, uuid, json, time 2 | from browser_use import Agent, BrowserSession, BrowserProfile 3 | from langchain_google_genai import ChatGoogleGenerativeAI 4 | 5 | GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") 6 | 7 | if not GOOGLE_API_KEY: 8 | raise ValueError("GOOGLE_API_KEY environment variable is required. Set it in your MCP config or environment.") 9 | 10 | _test_results = {} 11 | 12 | def get_screen_dimensions(): 13 | """Get screen dimensions with fallback for headless environments""" 14 | try: 15 | import screeninfo 16 | screen = screeninfo.get_monitors()[0] 17 | return screen.width, screen.height 18 | except Exception: 19 | return 1920, 1080 20 | 21 | async def run_pool(base_url: str, num_agents: int = 3, headless: bool = False) -> str: 22 | test_id = str(uuid.uuid4()) 23 | start_time = time.time() 24 | 25 | qa_tasks = await scout_page(base_url) 26 | 27 | llm = ChatGoogleGenerativeAI( 28 | model="gemini-2.0-flash", 29 | temperature=0.9, 30 | google_api_key=GOOGLE_API_KEY 31 | ) 32 | 33 | async def run_single_agent(i: int): 34 | task_description = qa_tasks[i % len(qa_tasks)] 35 | 36 | try: 37 | # browser configuration 38 | browser_args = ['--disable-gpu', '--no-sandbox', '--disable-dev-shm-usage'] 39 | if headless: 40 | browser_args.append('--headless=new') 41 | 42 | window_config = {} 43 | 44 | if not headless: 45 | # window positioning for non-headless mode 46 | screen_width, screen_height = get_screen_dimensions() 47 | 48 | window_width = 300 49 | window_height = 400 50 | viewport_width = 280 51 | viewport_height = 350 52 | 53 | margin = 10 54 | spacing = 15 55 | 56 | usable_width = screen_width - (2 * margin) 57 | windows_per_row = max(1, usable_width // (window_width + spacing)) 58 | 59 | row = i // windows_per_row 60 | col = i % windows_per_row 61 | 62 | x_offset = margin + col * (window_width + spacing) 63 | y_offset = margin + row * (window_height + spacing) 64 | 65 | if x_offset + window_width > screen_width: 66 | x_offset = screen_width - window_width - margin 67 | if y_offset + window_height > screen_height: 68 | y_offset = screen_height - window_height - margin 69 | 70 | window_config = { 71 | "window_size": {"width": window_width, "height": window_height}, 72 | "window_position": {"width": x_offset, "height": y_offset}, 73 | "viewport": {"width": viewport_width, "height": viewport_height} 74 | } 75 | 76 | browser_profile = BrowserProfile( 77 | headless=headless, 78 | disable_security=True, 79 | user_data_dir=None, 80 | args=browser_args, 81 | ignore_default_args=['--enable-automation'], 82 | wait_for_network_idle_page_load_time=2.0, 83 | maximum_wait_page_load_time=8.0, 84 | wait_between_actions=0.5, 85 | **window_config 86 | ) 87 | 88 | browser_session = BrowserSession( 89 | browser_profile=browser_profile, 90 | headless=headless 91 | ) 92 | 93 | # zoom setup for non-headless mode 94 | if not headless: 95 | try: 96 | page = browser_session.page 97 | if page: 98 | async def apply_zoom(page): 99 | try: 100 | await asyncio.sleep(0.5) 101 | await page.evaluate(""" 102 | document.body.style.zoom = '0.25'; 103 | document.documentElement.style.zoom = '0.25'; 104 | """) 105 | except Exception: 106 | pass 107 | 108 | page.on("load", lambda: asyncio.create_task(apply_zoom(page))) 109 | page.on("domcontentloaded", lambda: asyncio.create_task(apply_zoom(page))) 110 | except Exception: 111 | pass 112 | 113 | # run agent 114 | agent = Agent( 115 | task=task_description, 116 | llm=llm, 117 | browser_session=browser_session, 118 | use_vision=True 119 | ) 120 | 121 | history = await agent.run() 122 | await browser_session.close() 123 | 124 | result_text = str(history.final_result()) if hasattr(history, 'final_result') else str(history) 125 | 126 | return { 127 | "agent_id": i, 128 | "task": task_description, 129 | "result": result_text, 130 | "timestamp": time.time(), 131 | "status": "success" 132 | } 133 | 134 | except Exception as e: 135 | try: 136 | if 'browser_session' in locals(): 137 | await browser_session.close() 138 | except: 139 | pass 140 | 141 | return { 142 | "agent_id": i, 143 | "task": task_description, 144 | "error": str(e), 145 | "timestamp": time.time(), 146 | "status": "error" 147 | } 148 | 149 | # run agents in parallel 150 | semaphore = asyncio.Semaphore(min(num_agents, 10)) 151 | 152 | async def run_agent_with_semaphore(i: int): 153 | async with semaphore: 154 | return await run_single_agent(i) 155 | 156 | results = await asyncio.gather( 157 | *[run_agent_with_semaphore(i) for i in range(num_agents)], 158 | return_exceptions=True 159 | ) 160 | 161 | end_time = time.time() 162 | 163 | # cleanup lingering browser processes 164 | try: 165 | import subprocess 166 | import platform 167 | if platform.system() == 'Darwin': 168 | await asyncio.sleep(1) 169 | subprocess.run(['pkill', '-f', 'chromium'], capture_output=True, check=False) 170 | except Exception: 171 | pass 172 | 173 | # store results 174 | test_data = { 175 | "test_id": test_id, 176 | "url": base_url, 177 | "agents": num_agents, 178 | "start_time": start_time, 179 | "end_time": end_time, 180 | "duration": end_time - start_time, 181 | "results": [r for r in results if not isinstance(r, Exception)], 182 | "status": "completed" 183 | } 184 | 185 | _test_results[test_id] = test_data 186 | 187 | return test_id 188 | 189 | 190 | # === Standardized summarization with severity classification === 191 | def summarize_bug_reports(test_id: str) -> dict: 192 | if test_id not in _test_results: 193 | return {"error": f"Test ID {test_id} not found"} 194 | 195 | test_data = _test_results[test_id] 196 | 197 | # separate results and prepare for analysis 198 | agent_results = [] 199 | bug_reports = [] 200 | errors = [] 201 | 202 | for result in test_data["results"]: 203 | if result["status"] == "success": 204 | agent_results.append(result) 205 | if "result" in result and result["result"]: 206 | bug_reports.append({ 207 | "agent_id": result["agent_id"], 208 | "task": result["task"], 209 | "findings": result["result"], 210 | "timestamp": result["timestamp"] 211 | }) 212 | else: 213 | errors.append(result) 214 | 215 | bug_reports_text = "\n\n".join([ 216 | f"Agent {report['agent_id']} Report:\nTask: {report['task']}\nFindings: {report['findings']}" 217 | for report in bug_reports 218 | ]) 219 | 220 | summary = { 221 | "test_id": test_id, 222 | "total_agents": len(agent_results) + len(errors), 223 | "successful_agents": len(agent_results), 224 | "failed_agents": len(errors), 225 | "errors": errors, 226 | "summary_generated": time.time() 227 | } 228 | 229 | # llm analysis of findings 230 | if bug_reports and GOOGLE_API_KEY: 231 | try: 232 | from langchain_google_genai import ChatGoogleGenerativeAI 233 | 234 | client = ChatGoogleGenerativeAI( 235 | model="gemini-1.5-flash", 236 | google_api_key=GOOGLE_API_KEY, 237 | temperature=0.1, 238 | ) 239 | 240 | prompt = f""" 241 | You are an objective QA analyst. Review the following test reports from agents that explored the website {test_data['url']}. 242 | 243 | Identify only actual functional issues, broken features, or technical problems. Do NOT classify subjective opinions, missing features that may be intentional, or design preferences as issues. 244 | 245 | Only report issues if they represent: 246 | - Broken functionality (buttons that don't work, forms that fail) 247 | - Technical errors (404s, JavaScript errors, broken links) 248 | - Accessibility violations (missing alt text, poor contrast) 249 | - Performance problems (very slow loading, timeouts) 250 | 251 | IMPORTANT: For each issue you identify, provide SPECIFIC and DETAILED descriptions including: 252 | - The exact element that was tested (button name, link text, form field, etc.) 253 | - The specific action taken (clicked, typed, submitted, etc.) 254 | - The exact result or error observed (404 error, no response, broken redirect, etc.) 255 | - Any relevant context from the agent's testing 256 | 257 | DO NOT use vague descriptions like "broken link" or "404 error". Instead use specific descriptions like: 258 | - "Upon clicking the 'Contact Us' button in the header navigation, the page redirected to a 404 error" 259 | - "When submitting the newsletter signup form with a valid email, the form displayed 'Server Error 500' instead of confirmation" 260 | 261 | Here are the test reports: 262 | {bug_reports_text} 263 | 264 | Format the output as JSON with the following structure: 265 | {{ 266 | "high_severity": [ 267 | {{ "category": "category_name", "description": "specific detailed description with exact steps and results" }}, 268 | ... 269 | ], 270 | "medium_severity": [ 271 | {{ "category": "category_name", "description": "specific detailed description with exact steps and results" }}, 272 | ... 273 | ], 274 | "low_severity": [ 275 | {{ "category": "category_name", "description": "specific detailed description with exact steps and results" }}, 276 | ... 277 | ] 278 | }} 279 | 280 | Only include real issues found during testing. Provide clear, concise descriptions. Deduplicate similar issues. 281 | """ 282 | 283 | response = client.invoke(prompt) 284 | 285 | # parse json response and calculate severity 286 | try: 287 | import re 288 | json_match = re.search(r'\{.*\}', response.content, re.DOTALL) 289 | if json_match: 290 | severity_analysis = json.loads(json_match.group()) 291 | else: 292 | severity_analysis = { 293 | "high_severity": [], 294 | "medium_severity": [], 295 | "low_severity": [] 296 | } 297 | except: 298 | severity_analysis = { 299 | "high_severity": [], 300 | "medium_severity": [], 301 | "low_severity": [] 302 | } 303 | 304 | total_issues = ( 305 | len(severity_analysis.get("high_severity", [])) + 306 | len(severity_analysis.get("medium_severity", [])) + 307 | len(severity_analysis.get("low_severity", [])) 308 | ) 309 | 310 | # determine overall status 311 | if len(severity_analysis.get("high_severity", [])) > 0: 312 | overall_status = "high-severity" 313 | status_emoji = "🔴" 314 | status_description = "Critical issues found that need immediate attention" 315 | elif len(severity_analysis.get("medium_severity", [])) > 0: 316 | overall_status = "medium-severity" 317 | status_emoji = "🟠" 318 | status_description = "Moderate issues found that should be addressed" 319 | elif len(severity_analysis.get("low_severity", [])) > 0: 320 | overall_status = "low-severity" 321 | status_emoji = "🟡" 322 | status_description = "Minor issues found that could be improved" 323 | else: 324 | overall_status = "passing" 325 | status_emoji = "✅" 326 | status_description = "No technical issues detected during testing" 327 | 328 | summary.update({ 329 | "overall_status": overall_status, 330 | "status_emoji": status_emoji, 331 | "status_description": status_description, 332 | "total_issues": total_issues, 333 | "severity_breakdown": severity_analysis, 334 | "llm_analysis": { 335 | "raw_response": response.content, 336 | "model_used": "gemini-1.5-flash" 337 | } 338 | }) 339 | 340 | except Exception as e: 341 | # fallback analysis 342 | summary.update({ 343 | "overall_status": "low-severity" if bug_reports else "passing", 344 | "status_emoji": "🟡" if bug_reports else "✅", 345 | "status_description": f"Found {len(bug_reports)} potential issues requiring manual review" if bug_reports else "No technical issues detected during testing", 346 | "total_issues": len(bug_reports), 347 | "severity_breakdown": { 348 | "high_severity": [], 349 | "medium_severity": [], 350 | "low_severity": [{"category": "general", "description": f"Found {len(bug_reports)} potential issues requiring manual review"}] if bug_reports else [] 351 | }, 352 | "llm_analysis_error": str(e) 353 | }) 354 | else: 355 | # no llm analysis available 356 | summary.update({ 357 | "overall_status": "low-severity" if bug_reports else "passing", 358 | "status_emoji": "🟡" if bug_reports else "✅", 359 | "status_description": f"Found {len(bug_reports)} potential issues requiring manual review" if bug_reports else "No technical issues detected during testing", 360 | "total_issues": len(bug_reports), 361 | "severity_breakdown": { 362 | "high_severity": [], 363 | "medium_severity": [], 364 | "low_severity": [{"category": "general", "description": f"Found {len(bug_reports)} potential issues requiring manual review"}] if bug_reports else [] 365 | } 366 | }) 367 | 368 | return summary 369 | 370 | async def scout_page(base_url: str) -> list: 371 | """Scout agent that identifies all interactive elements on the page""" 372 | try: 373 | llm = ChatGoogleGenerativeAI( 374 | model="gemini-1.5-flash", 375 | temperature=0.1, 376 | google_api_key=GOOGLE_API_KEY 377 | ) 378 | 379 | browser_profile = BrowserProfile( 380 | headless=True, 381 | disable_security=True, 382 | user_data_dir=None, 383 | args=['--disable-gpu', '--no-sandbox', '--disable-dev-shm-usage', '--headless=new'], 384 | wait_for_network_idle_page_load_time=2.0, 385 | maximum_wait_page_load_time=8.0, 386 | wait_between_actions=0.5 387 | ) 388 | 389 | browser_session = BrowserSession(browser_profile=browser_profile, headless=True) 390 | 391 | scout_task = f"""Visit {base_url} and identify ALL interactive elements on the page. Do NOT click anything, just observe and catalog what's available. List buttons, links, forms, input fields, menus, dropdowns, and any other clickable elements you can see. Provide a comprehensive inventory.""" 392 | 393 | agent = Agent( 394 | task=scout_task, 395 | llm=llm, 396 | browser_session=browser_session, 397 | use_vision=True 398 | ) 399 | 400 | history = await agent.run() 401 | await browser_session.close() 402 | 403 | scout_result = str(history.final_result()) if hasattr(history, 'final_result') else str(history) 404 | 405 | # partition elements with llm 406 | partition_prompt = f""" 407 | Based on this scout report of interactive elements found on {base_url}: 408 | 409 | {scout_result} 410 | 411 | Create a list of specific testing tasks, each focusing on different elements. Each task should specify exactly which elements to test (by their text, location, or description). Aim for 6-8 distinct tasks that cover different elements without overlap. 412 | 413 | Format as JSON array: 414 | [ 415 | "Test the [specific element description] - click on [exact button/link text or location]", 416 | "Test the [different specific element] - interact with [exact description]", 417 | ... 418 | ] 419 | 420 | Make each task very specific about which exact elements to test. 421 | """ 422 | 423 | partition_response = llm.invoke(partition_prompt) 424 | 425 | # parse response 426 | import re 427 | json_match = re.search(r'\[.*\]', partition_response.content, re.DOTALL) 428 | if json_match: 429 | element_tasks = json.loads(json_match.group()) 430 | else: 431 | # fallback tasks 432 | element_tasks = [ 433 | f"Test navigation elements in the header area of {base_url}", 434 | f"Test main content links and buttons in {base_url}", 435 | f"Test footer links and elements in {base_url}", 436 | f"Test any form elements found in {base_url}", 437 | f"Test sidebar or secondary navigation in {base_url}", 438 | f"Test any remaining interactive elements in {base_url}" 439 | ] 440 | 441 | return element_tasks 442 | 443 | except Exception as e: 444 | # fallback tasks if scouting fails 445 | return [ 446 | f"Test navigation elements in the header area of {base_url}", 447 | f"Test main content links and buttons in {base_url}", 448 | f"Test footer links and elements in {base_url}", 449 | f"Test any form elements found in {base_url}", 450 | f"Test sidebar or secondary navigation in {base_url}", 451 | f"Test any remaining interactive elements in {base_url}" 452 | ] 453 | -------------------------------------------------------------------------------- /vibetest/mcp_server.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import os 4 | import sys 5 | 6 | # Completely disable ALL logging to prevent JSON-RPC interference 7 | logging.disable(logging.CRITICAL) 8 | os.environ['ANONYMIZED_TELEMETRY'] = 'false' 9 | os.environ['BROWSER_USE_LOGGING_LEVEL'] = 'CRITICAL' 10 | 11 | # Redirect stderr to devnull to suppress any remaining output 12 | if hasattr(sys.stderr, 'close'): 13 | sys.stderr = open(os.devnull, 'w') 14 | 15 | from mcp.server.fastmcp import FastMCP 16 | from .agents import run_pool, summarize_bug_reports 17 | 18 | # Create FastMCP instance 19 | mcp = FastMCP("vibetest") 20 | 21 | @mcp.tool() 22 | async def start(url: str, num_agents: int = 3, headless: bool = False) -> str: 23 | """Launch browser agents to test a website for UI bugs and issues. 24 | 25 | Args: 26 | url: The website URL to test 27 | num_agents: Number of QA agents to spawn (default: 3) 28 | headless: Whether to run browsers in headless mode (default: True) 29 | 30 | Returns: 31 | test_id: Unique identifier for this test run 32 | """ 33 | try: 34 | test_id = await run_pool(url, num_agents, headless=headless) 35 | return test_id 36 | except Exception as e: 37 | return f"Error starting test: {str(e)}" 38 | 39 | @mcp.tool() 40 | def results(test_id: str) -> dict: 41 | """Get the consolidated bug report for a test run. 42 | 43 | Args: 44 | test_id: The test ID returned from start 45 | 46 | Returns: 47 | dict: Complete test results with detailed findings 48 | """ 49 | try: 50 | summary = summarize_bug_reports(test_id) 51 | 52 | if "error" in summary: 53 | return summary 54 | 55 | # Get test data to access duration 56 | from .agents import _test_results 57 | test_data = _test_results.get(test_id, {}) 58 | 59 | # Add duration to the summary 60 | duration_seconds = test_data.get('duration', 0) 61 | if duration_seconds > 0: 62 | summary['duration_seconds'] = duration_seconds 63 | if duration_seconds < 60: 64 | summary['duration_formatted'] = f"{duration_seconds:.0f}s" 65 | else: 66 | minutes = int(duration_seconds // 60) 67 | seconds = int(duration_seconds % 60) 68 | summary['duration_formatted'] = f"{minutes}m {seconds}s" 69 | else: 70 | summary['duration_formatted'] = "unknown" 71 | 72 | return summary 73 | 74 | except Exception as e: 75 | return {"error": f"Error getting results: {str(e)}"} 76 | 77 | def run(): 78 | """Entry point for the MCP server""" 79 | try: 80 | mcp.run() 81 | return 0 82 | except Exception as e: 83 | return 1 84 | 85 | if __name__ == "__main__": 86 | run() 87 | --------------------------------------------------------------------------------