├── .gitignore
├── README.md
├── pyproject.toml
├── vibetest.egg-info
    ├── PKG-INFO
    ├── SOURCES.txt
    ├── dependency_links.txt
    ├── entry_points.txt
    ├── requires.txt
    └── top_level.txt
└── vibetest
    ├── __init__.py
    ├── agents.py
    └── mcp_server.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Build artifacts
 2 | 
 3 | build/
 4 | dist/
 5 | __pycache__/
 6 | *.pyc
 7 | *.pyo
 8 | 
 9 | # QA test artifacts
10 | qa_artifacts/
11 | 
12 | # Environment files
13 | .env
14 | .venv/
15 | venv/
16 | 
17 | # IDE files
18 | .vscode/
19 | .idea/
20 | *.swp
21 | *.swo
22 | 
23 | # OS files
24 | .DS_Store
25 | Thumbs.db
26 | 
27 | # Logs
28 | *.log 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Vibetest Use
 2 | 
 3 | 
 4 | https://github.com/user-attachments/assets/9558d051-78bc-45fd-8694-9ac80eaf9494
 5 | 
 6 | 
 7 | An MCP server that launches multiple Browser-Use agents to test a vibe-coded website for UI bugs, broken links, accessibility issues, and other technical problems.
 8 | 
 9 | Perfect for testing both live websites and localhost development sites. 
10 | 
11 | Vibecode and vibetest until your website works.
12 | 
13 | ## Quick Start
14 | 
15 | ```bash
16 | # Install dependencies
17 | uv venv
18 | source .venv/bin/activate
19 | uv pip install -e .
20 | ```
21 | 
22 | ### 1) Claude Code
23 | 
24 | ```bash
25 | # Add MCP server via CLI
26 | claude mcp add vibetest /full/path/to/vibetest-use/.venv/bin/vibetest-mcp -e GOOGLE_API_KEY="your_api_key"
27 | 
28 | # Test in Claude Code
29 | > claude
30 | 
31 | > /mcp 
32 |   ⎿  MCP Server Status
33 | 
34 |      • vibetest: connected
35 | ```
36 | 
37 | ### 2) Cursor (manually)
38 | 
39 | 1. **Install via MCP Settings UI:**
40 |    - Open Cursor Settings
41 |    - Click on "MCP" in the left sidebar  
42 |    - Click "Add Server" or the "+" button
43 |    - Manually edit config:
44 |   
45 | ```json
46 | {
47 |   "mcpServers": {
48 |     "vibetest": {
49 |       "command": "/full/path/to/vibetest-use/.venv/bin/vibetest-mcp",
50 |       "env": {
51 |         "GOOGLE_API_KEY": "your_api_key"
52 |       }
53 |     }
54 |   }
55 | }
56 | 
57 | ```
58 | 
59 | ### Basic Prompts
60 | ```
61 | > Vibetest my website with 5 agents: browser-use.com
62 | > Run vibetest on localhost:3000
63 | > Run a headless vibetest on localhost:8080 with 10 agents
64 | ```
65 | 
66 | ### Parameters You Can Specify
67 | - **URL**: Any website (`https://example.com`, `localhost:3000`, `http://dev.mysite.com`)
68 | - **Number of agents**: `3` (default), `5 agents`, `2 agents` - more agents = more thorough testing
69 | - **Headless mode**: `non-headless` (default) or `headless`
70 | 
71 | ## Requirements
72 | 
73 | - Python 3.11+
74 | - Google API key ([get one](https://developers.google.com/maps/api-security-best-practices)) (we support gemini-2.0-flash)
75 | - Cursor/Claude with MCP support
76 | 
77 | ## Full Demo
78 | 
79 | 
80 | https://github.com/user-attachments/assets/6450b5b7-10e5-4019-82a4-6d726dbfbe1f
81 | 
82 | 
83 | 
84 | ## License
85 | 
86 | MIT
87 | 
88 | ---
89 | 
90 | Powered by [Browser Use](https://github.com/browser-use/browser-use) 
91 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "vibetest"
 3 | version = "0.1.0"
 4 | description = "Browser-agent QA swarm with MCP interface for testing AI-generated websites"
 5 | requires-python = ">=3.11"
 6 | dependencies = [
 7 |   "mcp[cli]>=1.0.0",
 8 |   "browser-use",
 9 |   "playwright",
10 |   "langchain_google_genai",
11 |   "langchain_core",
12 |   "pydantic",
13 |   "screeninfo",
14 | ]
15 | 
16 | [tool.setuptools.packages.find]
17 | include = ["vibetest*"]
18 | 
19 | [project.scripts]
20 | vibetest-mcp = "vibetest.mcp_server:run"


--------------------------------------------------------------------------------
/vibetest.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 2.4
 2 | Name: vibetest
 3 | Version: 0.1.0
 4 | Summary: Browser-agent QA swarm with MCP interface for testing AI-generated websites
 5 | Requires-Python: >=3.11
 6 | Requires-Dist: mcp[cli]>=1.0.0
 7 | Requires-Dist: browser-use
 8 | Requires-Dist: playwright
 9 | Requires-Dist: langchain_google_genai
10 | Requires-Dist: langchain_core
11 | Requires-Dist: pydantic
12 | Requires-Dist: screeninfo
13 | 


--------------------------------------------------------------------------------
/vibetest.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | README.md
 2 | pyproject.toml
 3 | vibetest/__init__.py
 4 | vibetest/agents.py
 5 | vibetest/mcp_server.py
 6 | vibetest.egg-info/PKG-INFO
 7 | vibetest.egg-info/SOURCES.txt
 8 | vibetest.egg-info/dependency_links.txt
 9 | vibetest.egg-info/entry_points.txt
10 | vibetest.egg-info/requires.txt
11 | vibetest.egg-info/top_level.txt


--------------------------------------------------------------------------------
/vibetest.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/vibetest.egg-info/entry_points.txt:
--------------------------------------------------------------------------------
1 | [console_scripts]
2 | vibetest-mcp = vibetest.mcp_server:run
3 | 


--------------------------------------------------------------------------------
/vibetest.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | mcp[cli]>=1.0.0
2 | browser-use
3 | playwright
4 | langchain_google_genai
5 | langchain_core
6 | pydantic
7 | screeninfo
8 | 


--------------------------------------------------------------------------------
/vibetest.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | vibetest
2 | 


--------------------------------------------------------------------------------
/vibetest/__init__.py:
--------------------------------------------------------------------------------
1 | """Web QA tool using Browser-Use agents for testing AI-generated websites."""
2 | 
3 | __version__ = "0.1.0" 


--------------------------------------------------------------------------------
/vibetest/agents.py:
--------------------------------------------------------------------------------
  1 | import asyncio, os, uuid, json, time
  2 | from browser_use import Agent, BrowserSession, BrowserProfile
  3 | from langchain_google_genai import ChatGoogleGenerativeAI
  4 | 
  5 | GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
  6 | 
  7 | if not GOOGLE_API_KEY:
  8 |     raise ValueError("GOOGLE_API_KEY environment variable is required. Set it in your MCP config or environment.")
  9 | 
 10 | _test_results = {}
 11 | 
 12 | def get_screen_dimensions():
 13 |     """Get screen dimensions with fallback for headless environments"""
 14 |     try:
 15 |         import screeninfo
 16 |         screen = screeninfo.get_monitors()[0]
 17 |         return screen.width, screen.height
 18 |     except Exception:
 19 |         return 1920, 1080
 20 | 
 21 | async def run_pool(base_url: str, num_agents: int = 3, headless: bool = False) -> str:
 22 |     test_id = str(uuid.uuid4())
 23 |     start_time = time.time()
 24 |     
 25 |     qa_tasks = await scout_page(base_url)
 26 |     
 27 |     llm = ChatGoogleGenerativeAI(
 28 |         model="gemini-2.0-flash",
 29 |         temperature=0.9,
 30 |         google_api_key=GOOGLE_API_KEY
 31 |     )
 32 | 
 33 |     async def run_single_agent(i: int):
 34 |         task_description = qa_tasks[i % len(qa_tasks)]
 35 |         
 36 |         try:
 37 |             # browser configuration
 38 |             browser_args = ['--disable-gpu', '--no-sandbox', '--disable-dev-shm-usage']
 39 |             if headless:
 40 |                 browser_args.append('--headless=new')
 41 |             
 42 |             window_config = {}
 43 |             
 44 |             if not headless:
 45 |                 # window positioning for non-headless mode
 46 |                 screen_width, screen_height = get_screen_dimensions()
 47 |                 
 48 |                 window_width = 300
 49 |                 window_height = 400
 50 |                 viewport_width = 280
 51 |                 viewport_height = 350
 52 |                 
 53 |                 margin = 10
 54 |                 spacing = 15
 55 |                 
 56 |                 usable_width = screen_width - (2 * margin)
 57 |                 windows_per_row = max(1, usable_width // (window_width + spacing))
 58 |                 
 59 |                 row = i // windows_per_row
 60 |                 col = i % windows_per_row
 61 |                 
 62 |                 x_offset = margin + col * (window_width + spacing)
 63 |                 y_offset = margin + row * (window_height + spacing)
 64 |                 
 65 |                 if x_offset + window_width > screen_width:
 66 |                     x_offset = screen_width - window_width - margin
 67 |                 if y_offset + window_height > screen_height:
 68 |                     y_offset = screen_height - window_height - margin
 69 |                 
 70 |                 window_config = {
 71 |                     "window_size": {"width": window_width, "height": window_height},
 72 |                     "window_position": {"width": x_offset, "height": y_offset},
 73 |                     "viewport": {"width": viewport_width, "height": viewport_height}
 74 |                 }
 75 |             
 76 |             browser_profile = BrowserProfile(
 77 |                 headless=headless,
 78 |                 disable_security=True,
 79 |                 user_data_dir=None,
 80 |                 args=browser_args,
 81 |                 ignore_default_args=['--enable-automation'],
 82 |                 wait_for_network_idle_page_load_time=2.0,
 83 |                 maximum_wait_page_load_time=8.0,
 84 |                 wait_between_actions=0.5,
 85 |                 **window_config
 86 |             )
 87 |             
 88 |             browser_session = BrowserSession(
 89 |                 browser_profile=browser_profile,
 90 |                 headless=headless
 91 |             )
 92 |             
 93 |             # zoom setup for non-headless mode
 94 |             if not headless:
 95 |                 try:
 96 |                     page = browser_session.page
 97 |                     if page:
 98 |                         async def apply_zoom(page):
 99 |                             try:
100 |                                 await asyncio.sleep(0.5)
101 |                                 await page.evaluate("""
102 |                                     document.body.style.zoom = '0.25';
103 |                                     document.documentElement.style.zoom = '0.25';
104 |                                 """)
105 |                             except Exception:
106 |                                 pass
107 |                         
108 |                         page.on("load", lambda: asyncio.create_task(apply_zoom(page)))
109 |                         page.on("domcontentloaded", lambda: asyncio.create_task(apply_zoom(page)))
110 |                 except Exception:
111 |                     pass
112 |             
113 |             # run agent
114 |             agent = Agent(
115 |                 task=task_description,
116 |                 llm=llm,
117 |                 browser_session=browser_session,
118 |                 use_vision=True
119 |             )
120 |             
121 |             history = await agent.run()
122 |             await browser_session.close()
123 |             
124 |             result_text = str(history.final_result()) if hasattr(history, 'final_result') else str(history)
125 |             
126 |             return {
127 |                 "agent_id": i,
128 |                 "task": task_description,
129 |                 "result": result_text,
130 |                 "timestamp": time.time(),
131 |                 "status": "success"
132 |             }
133 |             
134 |         except Exception as e:
135 |             try:
136 |                 if 'browser_session' in locals():
137 |                     await browser_session.close()
138 |             except:
139 |                 pass
140 |                 
141 |             return {
142 |                 "agent_id": i,
143 |                 "task": task_description,
144 |                 "error": str(e),
145 |                 "timestamp": time.time(),
146 |                 "status": "error"
147 |             }
148 | 
149 |     # run agents in parallel
150 |     semaphore = asyncio.Semaphore(min(num_agents, 10))
151 |     
152 |     async def run_agent_with_semaphore(i: int):
153 |         async with semaphore:
154 |             return await run_single_agent(i)
155 |     
156 |     results = await asyncio.gather(
157 |         *[run_agent_with_semaphore(i) for i in range(num_agents)], 
158 |         return_exceptions=True
159 |     )
160 |     
161 |     end_time = time.time()
162 |     
163 |     # cleanup lingering browser processes
164 |     try:
165 |         import subprocess
166 |         import platform
167 |         if platform.system() == 'Darwin':
168 |             await asyncio.sleep(1)
169 |             subprocess.run(['pkill', '-f', 'chromium'], capture_output=True, check=False)
170 |     except Exception:
171 |         pass
172 |     
173 |     # store results
174 |     test_data = {
175 |         "test_id": test_id,
176 |         "url": base_url,
177 |         "agents": num_agents,
178 |         "start_time": start_time,
179 |         "end_time": end_time,
180 |         "duration": end_time - start_time,
181 |         "results": [r for r in results if not isinstance(r, Exception)],
182 |         "status": "completed"
183 |     }
184 |     
185 |     _test_results[test_id] = test_data
186 |     
187 |     return test_id
188 | 
189 | 
190 | # === Standardized summarization with severity classification ===
191 | def summarize_bug_reports(test_id: str) -> dict:
192 |     if test_id not in _test_results:
193 |         return {"error": f"Test ID {test_id} not found"}
194 | 
195 |     test_data = _test_results[test_id]
196 |     
197 |     # separate results and prepare for analysis
198 |     agent_results = []
199 |     bug_reports = []
200 |     errors = []
201 |     
202 |     for result in test_data["results"]:
203 |         if result["status"] == "success":
204 |             agent_results.append(result)
205 |             if "result" in result and result["result"]:
206 |                 bug_reports.append({
207 |                     "agent_id": result["agent_id"],
208 |                     "task": result["task"],
209 |                     "findings": result["result"],
210 |                     "timestamp": result["timestamp"]
211 |                 })
212 |         else:
213 |             errors.append(result)
214 | 
215 |     bug_reports_text = "\n\n".join([
216 |         f"Agent {report['agent_id']} Report:\nTask: {report['task']}\nFindings: {report['findings']}"
217 |         for report in bug_reports
218 |     ])
219 | 
220 |     summary = {
221 |         "test_id": test_id,
222 |         "total_agents": len(agent_results) + len(errors),
223 |         "successful_agents": len(agent_results),
224 |         "failed_agents": len(errors),
225 |         "errors": errors,
226 |         "summary_generated": time.time()
227 |     }
228 | 
229 |     # llm analysis of findings
230 |     if bug_reports and GOOGLE_API_KEY:
231 |         try:
232 |             from langchain_google_genai import ChatGoogleGenerativeAI
233 |             
234 |             client = ChatGoogleGenerativeAI(
235 |                 model="gemini-1.5-flash",
236 |                 google_api_key=GOOGLE_API_KEY,
237 |                 temperature=0.1,
238 |             )
239 | 
240 |             prompt = f"""
241 | You are an objective QA analyst. Review the following test reports from agents that explored the website {test_data['url']}.
242 | 
243 | Identify only actual functional issues, broken features, or technical problems. Do NOT classify subjective opinions, missing features that may be intentional, or design preferences as issues.
244 | 
245 | Only report issues if they represent:
246 | - Broken functionality (buttons that don't work, forms that fail)
247 | - Technical errors (404s, JavaScript errors, broken links)
248 | - Accessibility violations (missing alt text, poor contrast)
249 | - Performance problems (very slow loading, timeouts)
250 | 
251 | IMPORTANT: For each issue you identify, provide SPECIFIC and DETAILED descriptions including:
252 | - The exact element that was tested (button name, link text, form field, etc.)
253 | - The specific action taken (clicked, typed, submitted, etc.)
254 | - The exact result or error observed (404 error, no response, broken redirect, etc.)
255 | - Any relevant context from the agent's testing
256 | 
257 | DO NOT use vague descriptions like "broken link" or "404 error". Instead use specific descriptions like:
258 | - "Upon clicking the 'Contact Us' button in the header navigation, the page redirected to a 404 error"
259 | - "When submitting the newsletter signup form with a valid email, the form displayed 'Server Error 500' instead of confirmation"
260 | 
261 | Here are the test reports:
262 | {bug_reports_text}
263 | 
264 | Format the output as JSON with the following structure:
265 | {{
266 |     "high_severity": [
267 |         {{ "category": "category_name", "description": "specific detailed description with exact steps and results" }},
268 |         ...
269 |     ],
270 |     "medium_severity": [
271 |         {{ "category": "category_name", "description": "specific detailed description with exact steps and results" }},
272 |         ...
273 |     ],
274 |     "low_severity": [
275 |         {{ "category": "category_name", "description": "specific detailed description with exact steps and results" }},
276 |         ...
277 |     ]
278 | }}
279 | 
280 | Only include real issues found during testing. Provide clear, concise descriptions. Deduplicate similar issues.
281 | """
282 | 
283 |             response = client.invoke(prompt)
284 |             
285 |             # parse json response and calculate severity
286 |             try:
287 |                 import re
288 |                 json_match = re.search(r'\{.*\}', response.content, re.DOTALL)
289 |                 if json_match:
290 |                     severity_analysis = json.loads(json_match.group())
291 |                 else:
292 |                     severity_analysis = {
293 |                         "high_severity": [],
294 |                         "medium_severity": [],
295 |                         "low_severity": []
296 |                     }
297 |             except:
298 |                 severity_analysis = {
299 |                     "high_severity": [],
300 |                     "medium_severity": [],
301 |                     "low_severity": []
302 |                 }
303 |             
304 |             total_issues = (
305 |                 len(severity_analysis.get("high_severity", [])) +
306 |                 len(severity_analysis.get("medium_severity", [])) +
307 |                 len(severity_analysis.get("low_severity", []))
308 |             )
309 |             
310 |             # determine overall status
311 |             if len(severity_analysis.get("high_severity", [])) > 0:
312 |                 overall_status = "high-severity"
313 |                 status_emoji = "🔴"
314 |                 status_description = "Critical issues found that need immediate attention"
315 |             elif len(severity_analysis.get("medium_severity", [])) > 0:
316 |                 overall_status = "medium-severity"
317 |                 status_emoji = "🟠"
318 |                 status_description = "Moderate issues found that should be addressed"
319 |             elif len(severity_analysis.get("low_severity", [])) > 0:
320 |                 overall_status = "low-severity"
321 |                 status_emoji = "🟡"
322 |                 status_description = "Minor issues found that could be improved"
323 |             else:
324 |                 overall_status = "passing"
325 |                 status_emoji = "✅"
326 |                 status_description = "No technical issues detected during testing"
327 |             
328 |             summary.update({
329 |                 "overall_status": overall_status,
330 |                 "status_emoji": status_emoji,
331 |                 "status_description": status_description,
332 |                 "total_issues": total_issues,
333 |                 "severity_breakdown": severity_analysis,
334 |                 "llm_analysis": {
335 |                     "raw_response": response.content,
336 |                     "model_used": "gemini-1.5-flash"
337 |                 }
338 |             })
339 |             
340 |         except Exception as e:
341 |             # fallback analysis
342 |             summary.update({
343 |                 "overall_status": "low-severity" if bug_reports else "passing",
344 |                 "status_emoji": "🟡" if bug_reports else "✅",
345 |                 "status_description": f"Found {len(bug_reports)} potential issues requiring manual review" if bug_reports else "No technical issues detected during testing",
346 |                 "total_issues": len(bug_reports),
347 |                 "severity_breakdown": {
348 |                     "high_severity": [],
349 |                     "medium_severity": [],
350 |                     "low_severity": [{"category": "general", "description": f"Found {len(bug_reports)} potential issues requiring manual review"}] if bug_reports else []
351 |                 },
352 |                 "llm_analysis_error": str(e)
353 |             })
354 |     else:
355 |         # no llm analysis available
356 |         summary.update({
357 |             "overall_status": "low-severity" if bug_reports else "passing",
358 |             "status_emoji": "🟡" if bug_reports else "✅",
359 |             "status_description": f"Found {len(bug_reports)} potential issues requiring manual review" if bug_reports else "No technical issues detected during testing",
360 |             "total_issues": len(bug_reports),
361 |             "severity_breakdown": {
362 |                 "high_severity": [],
363 |                 "medium_severity": [],
364 |                 "low_severity": [{"category": "general", "description": f"Found {len(bug_reports)} potential issues requiring manual review"}] if bug_reports else []
365 |             }
366 |         })
367 | 
368 |     return summary
369 | 
370 | async def scout_page(base_url: str) -> list:
371 |     """Scout agent that identifies all interactive elements on the page"""
372 |     try:
373 |         llm = ChatGoogleGenerativeAI(
374 |             model="gemini-1.5-flash",
375 |             temperature=0.1,
376 |             google_api_key=GOOGLE_API_KEY
377 |         )
378 |         
379 |         browser_profile = BrowserProfile(
380 |             headless=True,
381 |             disable_security=True,
382 |             user_data_dir=None,
383 |             args=['--disable-gpu', '--no-sandbox', '--disable-dev-shm-usage', '--headless=new'],
384 |             wait_for_network_idle_page_load_time=2.0,
385 |             maximum_wait_page_load_time=8.0,
386 |             wait_between_actions=0.5
387 |         )
388 |         
389 |         browser_session = BrowserSession(browser_profile=browser_profile, headless=True)
390 |         
391 |         scout_task = f"""Visit {base_url} and identify ALL interactive elements on the page. Do NOT click anything, just observe and catalog what's available. List buttons, links, forms, input fields, menus, dropdowns, and any other clickable elements you can see. Provide a comprehensive inventory."""
392 |         
393 |         agent = Agent(
394 |             task=scout_task,
395 |             llm=llm,
396 |             browser_session=browser_session,
397 |             use_vision=True
398 |         )
399 |         
400 |         history = await agent.run()
401 |         await browser_session.close()
402 |         
403 |         scout_result = str(history.final_result()) if hasattr(history, 'final_result') else str(history)
404 |         
405 |         # partition elements with llm
406 |         partition_prompt = f"""
407 | Based on this scout report of interactive elements found on {base_url}:
408 | 
409 | {scout_result}
410 | 
411 | Create a list of specific testing tasks, each focusing on different elements. Each task should specify exactly which elements to test (by their text, location, or description). Aim for 6-8 distinct tasks that cover different elements without overlap.
412 | 
413 | Format as JSON array:
414 | [
415 |     "Test the [specific element description] - click on [exact button/link text or location]",
416 |     "Test the [different specific element] - interact with [exact description]",
417 |     ...
418 | ]
419 | 
420 | Make each task very specific about which exact elements to test.
421 | """
422 |         
423 |         partition_response = llm.invoke(partition_prompt)
424 |         
425 |         # parse response
426 |         import re
427 |         json_match = re.search(r'\[.*\]', partition_response.content, re.DOTALL)
428 |         if json_match:
429 |             element_tasks = json.loads(json_match.group())
430 |         else:
431 |             # fallback tasks
432 |             element_tasks = [
433 |                 f"Test navigation elements in the header area of {base_url}",
434 |                 f"Test main content links and buttons in {base_url}",
435 |                 f"Test footer links and elements in {base_url}",
436 |                 f"Test any form elements found in {base_url}",
437 |                 f"Test sidebar or secondary navigation in {base_url}",
438 |                 f"Test any remaining interactive elements in {base_url}"
439 |             ]
440 |         
441 |         return element_tasks
442 |         
443 |     except Exception as e:
444 |         # fallback tasks if scouting fails
445 |         return [
446 |             f"Test navigation elements in the header area of {base_url}",
447 |             f"Test main content links and buttons in {base_url}",
448 |             f"Test footer links and elements in {base_url}",
449 |             f"Test any form elements found in {base_url}",
450 |             f"Test sidebar or secondary navigation in {base_url}",
451 |             f"Test any remaining interactive elements in {base_url}"
452 |         ]
453 | 


--------------------------------------------------------------------------------
/vibetest/mcp_server.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | # Completely disable ALL logging to prevent JSON-RPC interference
 7 | logging.disable(logging.CRITICAL)
 8 | os.environ['ANONYMIZED_TELEMETRY'] = 'false'
 9 | os.environ['BROWSER_USE_LOGGING_LEVEL'] = 'CRITICAL'
10 | 
11 | # Redirect stderr to devnull to suppress any remaining output
12 | if hasattr(sys.stderr, 'close'):
13 |     sys.stderr = open(os.devnull, 'w')
14 | 
15 | from mcp.server.fastmcp import FastMCP
16 | from .agents import run_pool, summarize_bug_reports
17 | 
18 | # Create FastMCP instance
19 | mcp = FastMCP("vibetest")
20 | 
21 | @mcp.tool()
22 | async def start(url: str, num_agents: int = 3, headless: bool = False) -> str:
23 |     """Launch browser agents to test a website for UI bugs and issues.
24 |     
25 |     Args:
26 |         url: The website URL to test
27 |         num_agents: Number of QA agents to spawn (default: 3)
28 |         headless: Whether to run browsers in headless mode (default: True)
29 |     
30 |     Returns:
31 |         test_id: Unique identifier for this test run
32 |     """
33 |     try:
34 |         test_id = await run_pool(url, num_agents, headless=headless)
35 |         return test_id
36 |     except Exception as e:
37 |         return f"Error starting test: {str(e)}"
38 | 
39 | @mcp.tool()
40 | def results(test_id: str) -> dict:
41 |     """Get the consolidated bug report for a test run.
42 |     
43 |     Args:
44 |         test_id: The test ID returned from start
45 |     
46 |     Returns:
47 |         dict: Complete test results with detailed findings
48 |     """
49 |     try:
50 |         summary = summarize_bug_reports(test_id)
51 |         
52 |         if "error" in summary:
53 |             return summary
54 |         
55 |         # Get test data to access duration
56 |         from .agents import _test_results
57 |         test_data = _test_results.get(test_id, {})
58 |         
59 |         # Add duration to the summary
60 |         duration_seconds = test_data.get('duration', 0)
61 |         if duration_seconds > 0:
62 |             summary['duration_seconds'] = duration_seconds
63 |             if duration_seconds < 60:
64 |                 summary['duration_formatted'] = f"{duration_seconds:.0f}s"
65 |             else:
66 |                 minutes = int(duration_seconds // 60)
67 |                 seconds = int(duration_seconds % 60)
68 |                 summary['duration_formatted'] = f"{minutes}m {seconds}s"
69 |         else:
70 |             summary['duration_formatted'] = "unknown"
71 |         
72 |         return summary
73 |         
74 |     except Exception as e:
75 |         return {"error": f"Error getting results: {str(e)}"}
76 | 
77 | def run():
78 |     """Entry point for the MCP server"""
79 |     try:
80 |         mcp.run()
81 |         return 0
82 |     except Exception as e:
83 |         return 1
84 | 
85 | if __name__ == "__main__":
86 |     run()
87 | 


--------------------------------------------------------------------------------