├── .python-version
├── tests
├── __pycache__
│ └── test_server.cpython-311-pytest-8.0.0.pyc
└── test_server.py
├── src
└── web_browser_mcp_server
│ ├── __init__.py
│ ├── config.py
│ └── server.py
├── smithery.yaml
├── .github
└── workflows
│ └── tests.yml
├── LICENSE
├── pyproject.toml
├── Dockerfile
├── .gitignore
└── README.md
/.python-version:
--------------------------------------------------------------------------------
1 | 3.11
--------------------------------------------------------------------------------
/tests/__pycache__/test_server.cpython-311-pytest-8.0.0.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blazickjp/web-browser-mcp-server/HEAD/tests/__pycache__/test_server.cpython-311-pytest-8.0.0.pyc
--------------------------------------------------------------------------------
/src/web_browser_mcp_server/__init__.py:
--------------------------------------------------------------------------------
1 | from . import server
2 | import asyncio
3 |
4 |
5 | def main():
6 | """Main entry point for the package."""
7 | asyncio.run(server.main())
8 |
9 |
10 | # Optionally expose other important items at package level
11 | __all__ = ["main", "server"]
12 |
--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
2 |
3 | startCommand:
4 | type: stdio
5 | configSchema:
6 | # JSON Schema defining the configuration options for the MCP.
7 | type: object
8 | required:
9 | - requestTimeout
10 | properties:
11 | requestTimeout:
12 | type: string
13 | description: Webpage request timeout in seconds
14 | commandFunction:
15 | # A function that produces the CLI command to start the MCP on stdio.
16 | |-
17 | (config) => ({ command: 'uv', args: ['tool', 'run', 'web-browser-mcp-server'], env: { REQUEST_TIMEOUT: config.requestTimeout } })
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 | branches: [ main ]
8 |
9 | jobs:
10 | test:
11 | runs-on: ubuntu-latest
12 | strategy:
13 | matrix:
14 | python-version: ["3.11"]
15 |
16 | steps:
17 | - uses: actions/checkout@v4
18 |
19 | - name: Set up Python ${{ matrix.python-version }}
20 | uses: actions/setup-python@v4
21 | with:
22 | python-version: ${{ matrix.python-version }}
23 |
24 | - name: Install dependencies
25 | run: |
26 | python -m pip install --upgrade pip
27 | pip install -e ".[test]"
28 |
29 | - name: Run tests
30 | run: |
31 | pytest --cov=web_browser_mcp_server
32 |
33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Your Name
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "web-browser-mcp-server"
3 | version = "0.2.1"
4 | description = "Transform your AI applications with advanced web browsing capabilities through this Model Context Protocol (MCP) server"
5 | readme = "README.md"
6 | requires-python = ">=3.11"
7 | dependencies = [
8 | "beautifulsoup4==4.12.2",
9 | "requests==2.31.0",
10 | "fastapi>=0.110.0",
11 | "uvicorn==0.27.0",
12 | "pydantic>=2.8.0",
13 | "aiohttp==3.9.1",
14 | "python-dotenv==1.0.0",
15 | "pydantic-settings>=2.1.0",
16 | "click==8.1.7",
17 | "mcp>=1.1.2",
18 | ]
19 | license = { text = "MIT" } # Fixed: license needs to be a table with 'text' key
20 |
21 | [project.urls]
22 | Repository = "https://github.com/blazickjp/web-browser-mcp-server"
23 |
24 | [project.optional-dependencies]
25 | test = [
26 | "pytest==8.0.0",
27 | "pytest-asyncio==0.23.5",
28 | "pytest-cov==4.1.0",
29 | "httpx>=0.27.0",
30 | "pluggy==1.3.0",
31 | "aioresponses==0.7.6"
32 | ]
33 | [project.scripts]
34 | web-browser-mcp-server = "web_browser_mcp_server:main"
35 |
36 | [build-system]
37 | requires = ["hatchling"]
38 | build-backend = "hatchling.build"
39 |
40 | [tool.hatch.build.targets.wheel]
41 | packages = ["src/web_browser_mcp_server"]
42 |
43 | [tool.hatch.metadata]
44 | allow-direct-references = true
45 |
46 | [tool.pytest.ini_options]
47 | asyncio_mode = "auto"
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
2 | # Use a Python image with uv pre-installed
3 | FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim AS uv
4 |
5 | # Install the project into /app
6 | WORKDIR /app
7 |
8 | # Enable bytecode compilation
9 | ENV UV_COMPILE_BYTECODE=1
10 |
11 | # Copy from the cache instead of linking since it's a mounted volume
12 | ENV UV_LINK_MODE=copy
13 |
14 | # Install the project's dependencies using the lockfile and settings
15 | RUN --mount=type=cache,target=/root/.cache/uv --mount=type=bind,source=uv.lock,target=uv.lock --mount=type=bind,source=pyproject.toml,target=pyproject.toml uv sync --frozen --no-install-project --no-dev --no-editable
16 |
17 | # Then, add the rest of the project source code and install it
18 | # Installing separately from its dependencies allows optimal layer caching
19 | ADD . /app
20 | RUN --mount=type=cache,target=/root/.cache/uv uv sync --frozen --no-dev --no-editable
21 |
22 | FROM python:3.11-slim-bookworm
23 |
24 | WORKDIR /app
25 |
26 | COPY --from=uv /root/.local /root/.local
27 | COPY --from=uv --chown=app:app /app/.venv /app/.venv
28 |
29 | # Place executables in the environment at the front of the path
30 | ENV PATH="/app/.venv/bin:$PATH"
31 |
32 | # when running the container, add --db-path and a bind mount to the host's db file
33 | ENTRYPOINT ["uv", "tool", "run", "web-browser-mcp-server"]
--------------------------------------------------------------------------------
/tests/test_server.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from web_browser_mcp_server.server import list_tools, call_tool
3 | from aioresponses import aioresponses
4 |
5 |
6 | @pytest.mark.asyncio
7 | async def test_list_tools():
8 | tools = await list_tools()
9 | assert len(tools) == 1
10 | assert tools[0].name == "browse_webpage"
11 | assert "url" in tools[0].inputSchema["properties"]
12 |
13 |
14 | @pytest.mark.asyncio
15 | async def test_call_tool_invalid():
16 | result = await call_tool("invalid_tool", {})
17 | assert len(result) == 1
18 | assert "Error: Unknown tool" in result[0].text
19 |
20 |
21 | @pytest.mark.asyncio
22 | async def test_browse_webpage():
23 | mock_html = """
24 |
25 |
Test Page
26 |
27 | Test Link
28 | Test Content
29 |
30 |
31 | """
32 |
33 | with aioresponses() as m:
34 | m.get("https://test.com", status=200, body=mock_html)
35 |
36 | result = await call_tool(
37 | "browse_webpage",
38 | {"url": "https://test.com", "selectors": {"content": ".content"}},
39 | )
40 |
41 | assert len(result) == 1
42 | content = result[0].text
43 | assert "Test Page" in content
44 | assert "Test Link" in content
45 | assert "Test Content" in content
46 |
--------------------------------------------------------------------------------
/src/web_browser_mcp_server/config.py:
--------------------------------------------------------------------------------
1 | """
2 | Configuration Settings for Web Browser MCP Server
3 | ==============================================
4 |
5 | This module defines the settings and configuration options for the web browser MCP server
6 | using pydantic for settings management and validation.
7 |
8 | Settings include:
9 | - Application name and version
10 | - Logging level
11 | - User agent string for HTTP requests
12 | - Request timeout duration
13 | - Maximum number of retries
14 | """
15 |
16 | from pydantic_settings import BaseSettings
17 | from pydantic import ConfigDict
18 |
19 |
20 | class Settings(BaseSettings):
21 | """
22 | Application settings class using pydantic_settings.
23 |
24 | Attributes:
25 | APP_NAME (str): Name of the application
26 | APP_VERSION (str): Current version of the application
27 | LOG_LEVEL (str): Logging level (default: "info")
28 | USER_AGENT (str): User agent string for HTTP requests
29 | REQUEST_TIMEOUT (int): Timeout for HTTP requests in seconds
30 | MAX_RETRIES (int): Maximum number of retry attempts for failed requests
31 | """
32 |
33 | APP_NAME: str = "web-browser-mcp-server"
34 | APP_VERSION: str = "0.2.0"
35 | LOG_LEVEL: str = "info"
36 | USER_AGENT: str = (
37 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
38 | )
39 | REQUEST_TIMEOUT: int = 30
40 | MAX_RETRIES: int = 3
41 |
42 | model_config = ConfigDict(env_file=".env", env_file_encoding="utf-8")
43 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 | *.lock
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 | cover/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | .python-version
87 |
88 | # pipenv
89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
91 | # having no cross-platform support, pipfile.lock can be added to .gitignore.
92 | Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # pytype static type analyzer
132 | .pytype/
133 |
134 | # Cython debug symbols
135 | cython_debug/
136 |
137 | # FastAPI
138 | *.db
139 |
140 | # Local environment variables
141 | .env.local
142 | .env.*.local
143 | .env.test
144 | .env.*.test
145 | .env.production
146 | .env.*.production
147 |
148 | # Hatch
149 | .hatch/
150 | .hatch.env/
151 | .hatch.env.json
152 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://twitter.com/JoeBlazick)
2 | [](https://smithery.ai/server/web-browser-mcp-server)
3 | [](https://www.python.org/downloads/)
4 | [](https://opensource.org/licenses/MIT)
5 | [](https://pypi.org/project/web-browser-mcp-server/)
6 | [](https://pypi.org/project/web-browser-mcp-server/)
7 |
8 |
9 |
10 | ## ✨ Features
11 |
12 | > 🌐 Enable AI assistants to browse and extract content from the web through a simple MCP interface.
13 |
14 | The Web Browser MCP Server provides AI models with the ability to browse websites, extract content, and understand web pages through the Message Control Protocol (MCP). It enables smart content extraction with CSS selectors and robust error handling.
15 |
16 |
17 |
18 | 🤝 **[Contribute](https://github.com/blazickjp/web-browser-mcp-server/blob/main/CONTRIBUTING.md)** •
19 | 📝 **[Report Bug](https://github.com/blazickjp/web-browser-mcp-server/issues)**
20 |
21 |
22 |
23 | ## ✨ Core Features
24 |
25 | - 🎯 **Smart Content Extraction**: Target exactly what you need with CSS selectors
26 | - ⚡ **Lightning Fast**: Built with async processing for optimal performance
27 | - 📊 **Rich Metadata**: Capture titles, links, and structured content
28 | - 🛡️ **Robust & Reliable**: Built-in error handling and timeout management
29 | - 🌍 **Cross-Platform**: Works everywhere Python runs
30 |
31 | ## 🚀 Quick Start
32 |
33 | ### Installing via Smithery
34 |
35 | To install Web Browser Server for Claude Desktop automatically via [Smithery](https://smithery.ai/server/web-browser-mcp-server):
36 |
37 | ```bash
38 | npx -y @smithery/cli install web-browser-mcp-server --client claude
39 | ```
40 |
41 | ### Installing Manually
42 | Install using uv:
43 |
44 | ```bash
45 | uv tool install web-browser-mcp-server
46 | ```
47 |
48 | For development:
49 |
50 | ```bash
51 | # Clone and set up development environment
52 | git clone https://github.com/blazickjp/web-browser-mcp-server.git
53 | cd web-browser-mcp-server
54 |
55 | # Create and activate virtual environment
56 | uv venv
57 | source .venv/bin/activate
58 |
59 | # Install with test dependencies
60 | uv pip install -e ".[test]"
61 | ```
62 |
63 | ### 🔌 MCP Integration
64 |
65 | Add this configuration to your MCP client config file:
66 |
67 | ```json
68 | {
69 | "mcpServers": {
70 | "web-browser-mcp-server": {
71 | "command": "uv",
72 | "args": [
73 | "tool",
74 | "run",
75 | "web-browser-mcp-server"
76 | ],
77 | "env": {
78 | "REQUEST_TIMEOUT": "30"
79 | }
80 | }
81 | }
82 | }
83 | ```
84 |
85 | For Development:
86 |
87 | ```json
88 | {
89 | "mcpServers": {
90 | "web-browser-mcp-server": {
91 | "command": "uv",
92 | "args": [
93 | "--directory",
94 | "path/to/cloned/web-browser-mcp-server",
95 | "run",
96 | "web-browser-mcp-server"
97 | ],
98 | "env": {
99 | "REQUEST_TIMEOUT": "30"
100 | }
101 | }
102 | }
103 | }
104 | ```
105 |
106 | ## 💡 Available Tools
107 |
108 | The server provides a powerful web browsing tool:
109 |
110 | ### browse_webpage
111 | Browse and extract content from web pages with optional CSS selectors:
112 |
113 | ```python
114 | # Basic webpage fetch
115 | result = await call_tool("browse_webpage", {
116 | "url": "https://example.com"
117 | })
118 |
119 | # Target specific content with CSS selectors
120 | result = await call_tool("browse_webpage", {
121 | "url": "https://example.com",
122 | "selectors": {
123 | "headlines": "h1, h2",
124 | "main_content": "article.content",
125 | "navigation": "nav a"
126 | }
127 | })
128 | ```
129 |
130 | ## ⚙️ Configuration
131 |
132 | Configure through environment variables:
133 |
134 | | Variable | Purpose | Default |
135 | |----------|---------|---------|
136 | | `REQUEST_TIMEOUT` | Webpage request timeout in seconds | 30 |
137 |
138 | ## 🧪 Testing
139 |
140 | Run the test suite:
141 |
142 | ```bash
143 | python -m pytest
144 | ```
145 |
146 | ## 📄 License
147 |
148 | Released under the MIT License. See the LICENSE file for details.
149 |
150 | ---
151 |
152 |
153 |
154 | Made with ❤️ by the Pear Labs Team
155 |
156 |

157 |
--------------------------------------------------------------------------------
/src/web_browser_mcp_server/server.py:
--------------------------------------------------------------------------------
1 | """
2 | Web Browser MCP Server
3 | =====================
4 |
5 | This module implements an MCP (Message Control Protocol) server that provides web browsing capabilities.
6 | It allows clients to fetch and parse webpage content using specified CSS selectors.
7 |
8 | Key Features:
9 | - Fetches webpage content with configurable timeout and user agent
10 | - Extracts basic page information (title, text, links)
11 | - Supports custom CSS selectors for targeted content extraction
12 | - Handles various error conditions gracefully
13 | """
14 |
15 | import aiohttp
16 | import mcp
17 | import asyncio
18 | from bs4 import BeautifulSoup
19 | from typing import Dict, Any, List
20 | from .config import Settings
21 | import mcp.types as types
22 | from mcp.server import Server, NotificationOptions
23 | from mcp.server.models import InitializationOptions
24 |
25 | # Initialize server settings and create server instance
26 | settings = Settings()
27 | server = Server(settings.APP_NAME)
28 |
29 |
30 | @server.list_tools()
31 | async def list_tools() -> List[types.Tool]:
32 | """
33 | List available web browsing tools.
34 |
35 | Returns:
36 | List[types.Tool]: A list containing the browse_webpage tool definition
37 | """
38 | return [
39 | types.Tool(
40 | name="browse_webpage",
41 | description="Extract content from a webpage with optional CSS selectors for specific elements",
42 | inputSchema={
43 | "type": "object",
44 | "properties": {
45 | "url": {
46 | "type": "string",
47 | "description": "The URL of the webpage to browse",
48 | },
49 | "selectors": {
50 | "type": "object",
51 | "additionalProperties": {"type": "string"},
52 | "description": "Optional CSS selectors to extract specific content",
53 | },
54 | },
55 | "required": ["url"],
56 | },
57 | )
58 | ]
59 |
60 |
61 | @server.call_tool()
62 | async def call_tool(name: str, arguments: Dict[str, Any]) -> List[types.TextContent]:
63 | """
64 | Handle tool calls for web browsing functionality.
65 |
66 | Args:
67 | name (str): The name of the tool to call (must be 'browse_webpage')
68 | arguments (Dict[str, Any]): Tool arguments including 'url' and optional 'selectors'
69 |
70 | Returns:
71 | List[types.TextContent]: The extracted webpage content or error message
72 |
73 | The function performs the following steps:
74 | 1. Validates the tool name
75 | 2. Fetches the webpage content with configured timeout and user agent
76 | 3. Parses the HTML using BeautifulSoup
77 | 4. Extracts basic page information (title, text, links)
78 | 5. Applies any provided CSS selectors for specific content
79 | 6. Handles various error conditions (timeout, HTTP errors, etc.)
80 | """
81 | if name != "browse_webpage":
82 | return [types.TextContent(type="text", text=f"Error: Unknown tool {name}")]
83 |
84 | url = arguments["url"]
85 | selectors = arguments.get("selectors", {})
86 |
87 | async with aiohttp.ClientSession() as session:
88 | try:
89 | headers = {"User-Agent": settings.USER_AGENT}
90 | timeout = aiohttp.ClientTimeout(total=settings.REQUEST_TIMEOUT)
91 |
92 | async with session.get(url, headers=headers, timeout=timeout) as response:
93 | if response.status >= 400:
94 | return [
95 | types.TextContent(
96 | type="text",
97 | text=f"Error: HTTP {response.status} - Failed to fetch webpage",
98 | )
99 | ]
100 |
101 | html = await response.text()
102 | soup = BeautifulSoup(html, "html.parser")
103 |
104 | # Extract basic page information
105 | result = {
106 | "title": soup.title.string if soup.title else None,
107 | "text": soup.get_text(strip=True),
108 | "links": [
109 | {"text": link.text.strip(), "href": link.get("href")}
110 | for link in soup.find_all("a", href=True)
111 | ],
112 | }
113 |
114 | # Extract content using provided selectors
115 | if selectors:
116 | for key, selector in selectors.items():
117 | elements = soup.select(selector)
118 | result[key] = [elem.get_text(strip=True) for elem in elements]
119 |
120 | return [types.TextContent(type="text", text=str(result))]
121 |
122 | except asyncio.TimeoutError:
123 | return [
124 | types.TextContent(
125 | type="text", text="Error: Request timed out while fetching webpage"
126 | )
127 | ]
128 | except aiohttp.ClientError as e:
129 | return [types.TextContent(type="text", text=f"Error: {str(e)}")]
130 | except Exception as e:
131 | return [types.TextContent(type="text", text=f"Error: {str(e)}")]
132 |
133 |
134 | async def main():
135 | """
136 | Main entry point for the web browser MCP server.
137 | Sets up and runs the server using stdin/stdout streams.
138 | """
139 | # Run the server using stdin/stdout streams
140 | async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
141 | await server.run(
142 | read_stream,
143 | write_stream,
144 | InitializationOptions(
145 | server_name=settings.APP_NAME,
146 | server_version=settings.APP_VERSION,
147 | capabilities=server.get_capabilities(
148 | notification_options=NotificationOptions(),
149 | experimental_capabilities={},
150 | ),
151 | ),
152 | )
153 |
--------------------------------------------------------------------------------