├── .env.template
├── .gitignore
├── .vscode
    └── launch.json
├── Dockerfile
├── LICENSE
├── README.md
├── chatgui.py
├── client_bridge
    ├── __init__.py
    ├── bridge.py
    ├── config.py
    ├── llm_client.py
    ├── llm_config.py
    └── mcp_client.py
├── client_test.py
├── doc
    ├── chatgui_gpt_generate.png
    └── globe_icon.png
├── pyproject.toml
├── server
    ├── browser_manager.py
    └── browser_navigator_server.py
└── smithery.yaml


/.env.template:
--------------------------------------------------------------------------------
1 | AZURE_OPEN_AI_ENDPOINT=
2 | AZURE_OPEN_AI_API_KEY=
3 | AZURE_OPEN_AI_DEPLOYMENT_MODEL=
4 | AZURE_OPEN_AI_API_VERSION=


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Python-generated files
  2 | __pycache__/
  3 | *.py[oc]
  4 | build/
  5 | dist/
  6 | wheels/
  7 | *.egg-info
  8 | 
  9 | # Virtual environments
 10 | .venv
 11 | .env
 12 | venv
 13 | 
 14 | # Byte-compiled / optimized / DLL files
 15 | __pycache__/
 16 | *.py[cod]
 17 | *$py.class
 18 | 
 19 | # C extensions
 20 | *.so
 21 | 
 22 | # Distribution / packaging
 23 | .Python
 24 | build/
 25 | develop-eggs/
 26 | dist/
 27 | downloads/
 28 | eggs/
 29 | .eggs/
 30 | lib/
 31 | lib64/
 32 | parts/
 33 | sdist/
 34 | var/
 35 | wheels/
 36 | share/python-wheels/
 37 | *.egg-info/
 38 | .installed.cfg
 39 | *.egg
 40 | MANIFEST
 41 | 
 42 | # PyInstaller
 43 | #  Usually these files are written by a python script from a template
 44 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 45 | *.manifest
 46 | *.spec
 47 | 
 48 | # Installer logs
 49 | pip-log.txt
 50 | pip-delete-this-directory.txt
 51 | 
 52 | # Unit test / coverage reports
 53 | htmlcov/
 54 | .tox/
 55 | .nox/
 56 | .coverage
 57 | .coverage.*
 58 | .cache
 59 | nosetests.xml
 60 | coverage.xml
 61 | *.cover
 62 | *.py,cover
 63 | .hypothesis/
 64 | .pytest_cache/
 65 | cover/
 66 | 
 67 | # Translations
 68 | *.mo
 69 | *.pot
 70 | 
 71 | # Django stuff:
 72 | *.log
 73 | local_settings.py
 74 | db.sqlite3
 75 | db.sqlite3-journal
 76 | 
 77 | # Flask stuff:
 78 | instance/
 79 | .webassets-cache
 80 | 
 81 | # Scrapy stuff:
 82 | .scrapy
 83 | 
 84 | # Sphinx documentation
 85 | docs/_build/
 86 | 
 87 | # PyBuilder
 88 | .pybuilder/
 89 | target/
 90 | 
 91 | # Jupyter Notebook
 92 | .ipynb_checkpoints
 93 | 
 94 | # IPython
 95 | profile_default/
 96 | ipython_config.py
 97 | 
 98 | # pyenv
 99 | #   For a library or package, you might want to ignore these files since the code is
100 | #   intended to run in multiple environments; otherwise, check them in:
101 | # .python-version
102 | 
103 | # pipenv
104 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
105 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
106 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
107 | #   install all needed dependencies.
108 | #Pipfile.lock
109 | 
110 | # UV
111 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
112 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
113 | #   commonly ignored for libraries.
114 | #uv.lock
115 | 
116 | # poetry
117 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
118 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
119 | #   commonly ignored for libraries.
120 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
121 | #poetry.lock
122 | 
123 | # pdm
124 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
125 | #pdm.lock
126 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
127 | #   in version control.
128 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
129 | .pdm.toml
130 | .pdm-python
131 | .pdm-build/
132 | 
133 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
134 | __pypackages__/
135 | 
136 | # Celery stuff
137 | celerybeat-schedule
138 | celerybeat.pid
139 | 
140 | # SageMath parsed files
141 | *.sage.py
142 | 
143 | # Environments
144 | .env
145 | .venv
146 | env/
147 | venv/
148 | ENV/
149 | env.bak/
150 | venv.bak/
151 | 
152 | # Spyder project settings
153 | .spyderproject
154 | .spyproject
155 | 
156 | # Rope project settings
157 | .ropeproject
158 | 
159 | # mkdocs documentation
160 | /site
161 | 
162 | # mypy
163 | .mypy_cache/
164 | .dmypy.json
165 | dmypy.json
166 | 
167 | # Pyre type checker
168 | .pyre/
169 | 
170 | # pytype static type analyzer
171 | .pytype/
172 | 
173 | # Cython debug symbols
174 | cython_debug/
175 | 
176 | # PyCharm
177 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
178 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
179 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
180 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
181 | #.idea/
182 | /old
183 | uv.lock
184 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "name": "Python Debugger: Current File",
 9 |             "type": "debugpy",
10 |             "request": "launch",
11 |             "program": "${file}",
12 |             "console": "integratedTerminal"
13 |         }
14 |     ]
15 | }


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile
 2 | # Use a Python image with uv pre-installed
 3 | FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS uv
 4 | 
 5 | # Set the working directory
 6 | WORKDIR /app
 7 | 
 8 | # Copy the pyproject.toml and the lock file if available
 9 | COPY pyproject.toml ./
10 | 
11 | # Copy the entire app directory
12 | COPY . .
13 | 
14 | # Install the project's dependencies using uv
15 | RUN --mount=type=cache,target=/root/.cache/uv uv sync --frozen --no-install-project --no-dev --no-editable
16 | 
17 | # Build the server
18 | RUN --mount=type=cache,target=/root/.cache/uv uv run fastmcp dev ./server/browser_navigator_server.py:app
19 | 
20 | FROM python:3.12-slim-bookworm
21 | 
22 | WORKDIR /app
23 | 
24 | COPY --from=uv /root/.local /root/.local
25 | COPY --from=uv --chown=app:app /app/.venv /app/.venv
26 | 
27 | # Place executables in the environment at the front of the path
28 | ENV PATH="/app/.venv/bin:$PATH"
29 | 
30 | # Set environment variables for Azure OpenAI from .env
31 | ENV $(cat .env | xargs)
32 | 
33 | # Set the entrypoint command
34 | ENTRYPOINT ["uv", "run", "fastmcp", "dev", "./server/browser_navigator_server.py:app"]
35 | 
36 | # Expose the necessary port
37 | EXPOSE 5173


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 kimtth
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## MCP Server & Client implementation for using Azure OpenAI
  2 | 
  3 | <!-- [![smithery badge](https://smithery.ai/badge/mcp-web-auto)](https://smithery.ai/server/mcp-web-auto) -->
  4 | 
  5 | - A minimal server/client application implementation utilizing the Model Context Protocol (MCP) and Azure OpenAI.
  6 | 
  7 |     1. The MCP server is built with `FastMCP`.  
  8 |     2. `Playwright` is an an open source, end to end testing framework by Microsoft for testing your modern web applications. 
  9 |     3. The MCP response about tools will be converted to the OpenAI function calling format.  
 10 |     4. The bridge that converts the MCP server response to the OpenAI function calling format customises the `MCP-LLM Bridge` implementation.
 11 |     5. To ensure a stable connection, the server object is passed directly into the bridge. 
 12 | 
 13 | ## Model Context Protocol (MCP)
 14 | 
 15 | **Model Context Protocol (MCP)** MCP (Model Context Protocol) is an open protocol that enables secure, controlled interactions between AI applications and local or remote resources. 
 16 | 
 17 | ### Official Repositories
 18 | 
 19 | - [MCP Python SDK](https://github.com/modelcontextprotocol/python-sdk)  
 20 | - [Create Python Server](https://github.com/modelcontextprotocol/create-python-server)  
 21 | - [MCP Servers](https://github.com/modelcontextprotocol/servers)  
 22 | 
 23 | ### Community Resources
 24 | 
 25 | - [Awesome MCP Servers](https://github.com/punkpeye/awesome-mcp-servers)  
 26 | - [MCP on Reddit](https://www.reddit.com/r/mcp/)  
 27 | 
 28 | ### Related Projects
 29 | 
 30 | - [FastMCP](https://github.com/jlowin/fastmcp): The fast, Pythonic way to build MCP servers.
 31 | - [Chat MCP](https://github.com/daodao97/chatmcp): MCP client
 32 | - [MCP-LLM Bridge](https://github.com/bartolli/mcp-llm-bridge): MCP implementation that enables communication between MCP servers and OpenAI-compatible LLMs
 33 | 
 34 | ### MCP Playwright
 35 | 
 36 | - [MCP Playwright server](https://github.com/executeautomation/mcp-playwright)  
 37 | - [Microsoft Playwright for Python](https://github.com/microsoft/playwright-python)  
 38 | 
 39 | ### Configuration
 40 | 
 41 | During the development phase in December 2024, the Python project should be initiated with 'uv'. Other dependency management libraries, such as 'pip' and 'poetry', are not yet fully supported by the MCP CLI.
 42 | 
 43 | 1. Rename `.env.template` to `.env`, then fill in the values in `.env` for Azure OpenAI:
 44 | 
 45 |     ```bash
 46 |     AZURE_OPEN_AI_ENDPOINT=
 47 |     AZURE_OPEN_AI_API_KEY=
 48 |     AZURE_OPEN_AI_DEPLOYMENT_MODEL=
 49 |     AZURE_OPEN_AI_API_VERSION=
 50 |     ```
 51 | 
 52 | 1. Install `uv` for python library management
 53 | 
 54 |     ```bash
 55 |     pip install uv
 56 |     uv sync
 57 |     ```
 58 | 
 59 | 1. Execute `python chatgui.py`
 60 | 
 61 |     - The sample screen shows the client launching a browser to navigate to the URL.
 62 | 
 63 |     <img alt="chatgui" src="doc/chatgui_gpt_generate.png" width="300"/>
 64 | 
 65 | ### w.r.t. 'stdio'
 66 | 
 67 | `stdio` is a **transport layer** (raw data flow), while **JSON-RPC** is an **application protocol** (structured communication). They are distinct but often used interchangeably, e.g., "JSON-RPC over stdio" in protocols.
 68 | 
 69 | ### Tool description
 70 | 
 71 | ```cmd
 72 | @self.mcp.tool()
 73 | async def playwright_navigate(url: str, timeout=30000, wait_until="load"):
 74 |     """Navigate to a URL.""" -> This comment provides a description, which may be used in a mechanism similar to function calling in LLMs.
 75 | 
 76 | # Output
 77 | Tool(name='playwright_navigate', description='Navigate to a URL.', inputSchema={'properties': {'url': {'title': 'Url', 'type': 'string'}, 'timeout': {'default': 30000, 'title': 'timeout', 'type': 'string'}
 78 | ```
 79 | 
 80 | ### Tip: uv
 81 | 
 82 | - [features](https://docs.astral.sh/uv/getting-started/features)
 83 | 
 84 | ```
 85 | uv run: Run a script.
 86 | uv venv: Create a new virtual environment. By default, '.venv'.
 87 | uv add: Add a dependency to a script
 88 | uv remove: Remove a dependency from a script
 89 | uv sync: Sync (Install) the project's dependencies with the environment.
 90 | ```
 91 | 
 92 | ### Tip
 93 | 
 94 | - taskkill command for python.exe
 95 | 
 96 | ```cmd
 97 | taskkill /IM python.exe /F
 98 | ```
 99 | - Visual Code: Python Debugger: Debugging with launch.json will start the debugger using the configuration from .vscode/launch.json.
100 | 
101 | <!-- ### Sample query
102 | 
103 | Navigate to website http://eaapp.somee.com and click the login link. In the login page, enter the username and password as "admin" and "password" respectively and perform login. Then click the Employee List page and click "Create New" button and enter realistic employee details to create for Name, Salary, DurationWorked, Select dropdown for Grade as CLevel and Email. -->
104 | 
105 | 


--------------------------------------------------------------------------------
/chatgui.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import os
  3 | from tkinter import (
  4 |     Tk,
  5 |     Text,
  6 |     Button,
  7 |     Scrollbar,
  8 |     VERTICAL,
  9 |     RIGHT,
 10 |     Y,
 11 |     END,
 12 |     Frame,
 13 |     PhotoImage,
 14 | )
 15 | from dotenv import load_dotenv
 16 | from client_bridge.config import BridgeConfig
 17 | from client_bridge.bridge import BridgeManager
 18 | from client_bridge.llm_config import get_default_llm_config
 19 | from server.browser_navigator_server import BrowserNavigationServer
 20 | from loguru import logger
 21 | import threading
 22 | 
 23 | # Load environment variables
 24 | load_dotenv()
 25 | 
 26 | 
 27 | class ClientBridgeGUI:
 28 |     def __init__(self, master):
 29 |         self.master: Tk = master
 30 |         self.master.title("Client Bridge GUI")
 31 | 
 32 |         # Set application icon
 33 |         current_dir = os.path.dirname(os.path.abspath(__file__))
 34 |         icon_path = os.path.join(current_dir, "doc", "globe_icon.png")
 35 |         icon_image = PhotoImage(file=icon_path)
 36 |         self.master.iconphoto(False, icon_image)
 37 | 
 38 |         # Frame for the text area and scrollbar
 39 |         self.chat_frame = Frame(master)
 40 |         self.chat_frame.pack(padx=10, pady=10, fill="both", expand=True)
 41 | 
 42 |         # Set up the text area for chat history (readonly)
 43 |         self.text_area = Text(
 44 |             self.chat_frame, wrap="word", height=20, width=50, state="disabled"
 45 |         )
 46 |         self.text_area.pack(side="left", fill="both", expand=True)
 47 | 
 48 |         # Create a tag for response text with specific color
 49 |         self.text_area.tag_configure("response", foreground="#3377ff")
 50 | 
 51 |         # Scrollbar for the text area
 52 |         self.scrollbar = Scrollbar(
 53 |             self.chat_frame, command=self.text_area.yview, orient=VERTICAL
 54 |         )
 55 |         self.scrollbar.pack(side=RIGHT, fill=Y)
 56 |         self.text_area.config(yscrollcommand=self.scrollbar.set)
 57 | 
 58 |         # Frame for the user input and button
 59 |         self.input_frame = Frame(master)
 60 |         self.input_frame.pack(padx=10, pady=10, fill="x")
 61 | 
 62 |         # Text widget for the user input (editable)
 63 |         self.user_input = Text(self.input_frame, height=3, wrap="word", width=50)
 64 |         self.user_input.pack(side="left", fill="x", expand=True)
 65 | 
 66 |         # Send button
 67 |         self.send_button = Button(
 68 |             self.input_frame, text="Send", command=self.process_input
 69 |         )
 70 |         self.send_button.pack(side="right")
 71 | 
 72 |         # Set up configuration for server and bridge
 73 |         self.server = BrowserNavigationServer()
 74 |         self.config = BridgeConfig(
 75 |             mcp=self.server,
 76 |             llm_config=get_default_llm_config(),
 77 |             system_prompt="You are a helpful assistant that can use tools to help answer questions.",
 78 |         )
 79 | 
 80 |         logger.info(f"Starting bridge with model: {self.config.llm_config.deploy_name}")
 81 | 
 82 |         # Initialize the asyncio event loop in a separate thread
 83 |         self.loop = asyncio.new_event_loop()
 84 |         threading.Thread(target=self.start_event_loop, daemon=True).start()
 85 | 
 86 |         # Initialize the bridge asynchronously
 87 |         asyncio.run_coroutine_threadsafe(self.initialize_bridge(), self.loop)
 88 | 
 89 |         # Bind the close event to the close method
 90 |         self.master.protocol("WM_DELETE_WINDOW", self.close)
 91 | 
 92 |     def start_event_loop(self):
 93 |         """Start the asyncio event loop."""
 94 |         asyncio.set_event_loop(self.loop)
 95 |         self.loop.run_forever()
 96 | 
 97 |     async def initialize_bridge(self):
 98 |         """Initialize the bridge manager for communication."""
 99 |         async with BridgeManager(self.config) as bridge:
100 |             self.bridge = bridge
101 |         logger.info("Bridge initialized successfully.")
102 | 
103 |     async def process_message(self, user_input):
104 |         """Process the message using the bridge and return the response."""
105 |         response = await self.bridge.process_message(user_input)
106 |         return response
107 | 
108 |     def process_input(self):
109 |         """Handle user input and trigger asynchronous processing."""
110 |         user_input = self.user_input.get("1.0", END).strip()
111 |         if user_input:
112 |             # Display the user input in the chat area
113 |             self.display_message(f"You: {user_input}\n")
114 |             self.user_input.delete("1.0", END)
115 | 
116 |             # Run the asynchronous input handler in the event loop
117 |             asyncio.run_coroutine_threadsafe(self.handle_input(user_input), self.loop)
118 | 
119 |     async def handle_input(self, user_input):
120 |         """Handle user input asynchronously and display response."""
121 |         try:
122 |             response = await self.process_message(user_input)
123 |             # Schedule the UI update in the main thread
124 |             self.master.after(0, self.display_response, f"Response: {response}\n")
125 |         except Exception as e:
126 |             logger.error(f"Error occurred: {e}")
127 |             self.master.after(0, self.display_message, f"Error: {e}\n")
128 | 
129 |     def display_message(self, message):
130 |         """Display a message in the chat area."""
131 |         self.text_area.config(state="normal")  # Enable editing temporarily
132 |         self.text_area.insert(END, message)
133 |         self.text_area.config(state="disabled")  # Disable editing
134 | 
135 |         # Automatically scroll to the latest message
136 |         self.text_area.yview(END)
137 | 
138 |     def display_response(self, message):
139 |         """Display a response message in the chat area with specific color."""
140 |         self.text_area.config(state="normal")  # Enable editing temporarily
141 |         self.text_area.insert(END, message, "response")
142 |         self.text_area.config(state="disabled")  # Disable editing
143 | 
144 |         # Automatically scroll to the latest message
145 |         self.text_area.yview(END)
146 | 
147 |     def close(self):
148 |         """Handle closing of the application and cleanup."""
149 |         logger.info("Closing application and cleaning up resources.")
150 |         self.loop.call_soon_threadsafe(self.loop.stop)
151 |         self.master.destroy()
152 | 
153 | 
154 | if __name__ == "__main__":
155 |     root = Tk()
156 |     app = ClientBridgeGUI(root)
157 |     root.mainloop()
158 | 


--------------------------------------------------------------------------------
/client_bridge/__init__.py:
--------------------------------------------------------------------------------
1 | # src/mcp_llm_bridge/__init__.py
2 | from .mcp_client import MCPClient
3 | from .bridge import MCPLLMBridge, BridgeManager
4 | from .config import BridgeConfig, LLMConfig
5 | from .llm_client import LLMClient
6 | 
7 | __all__ = ['MCPClient', 'MCPLLMBridge', 'BridgeManager', 'BridgeConfig', 'LLMConfig', 'LLMClient']


--------------------------------------------------------------------------------
/client_bridge/bridge.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from typing import Dict, List, Any, Optional
  3 | from client_bridge.mcp_client import MCPClient
  4 | from client_bridge.llm_client import LLMClient
  5 | from client_bridge.config import BridgeConfig
  6 | from loguru import logger
  7 | 
  8 | 
  9 | class MCPLLMBridge:
 10 |     """Bridge between MCP protocol and LLM client"""
 11 | 
 12 |     def __init__(self, config: BridgeConfig):
 13 |         self.config = config
 14 |         self.mcp_client_session = MCPClient(config.mcp)
 15 |         self.llm_client = LLMClient(config.llm_config)
 16 | 
 17 |         self.llm_client.system_prompt = f"{config.system_prompt}"
 18 | 
 19 |         self.available_tools: List[Any] = []
 20 |         self.tool_name_mapping: Dict[str, str] = (
 21 |             {}
 22 |         )  # Maps OpenAI tool names to MCP tool names
 23 | 
 24 |     async def initialize(self):
 25 |         """Initialize both clients and set up tools"""
 26 |         try:
 27 |             # Connect MCP client
 28 |             await self.mcp_client_session.connect()
 29 | 
 30 |             # Get available tools from MCP and add our database tool
 31 |             mcp_tools = await self.mcp_client_session.get_available_tools()
 32 |             if hasattr(mcp_tools, "tools"):
 33 |                 self.available_tools = [
 34 |                     *mcp_tools.tools
 35 |                 ]
 36 |             else:
 37 |                 self.available_tools = [*mcp_tools]
 38 | 
 39 |             logger.debug(f"MCP Tools received: {self.available_tools}")
 40 | 
 41 |             # Convert and register tools with LLM client
 42 |             converted_tools = self._convert_mcp_tools_to_openai_format(
 43 |                 self.available_tools
 44 |             )
 45 |             logger.debug(f"Converted tools for OpenAI: {converted_tools}")
 46 |             self.llm_client.tools = converted_tools
 47 | 
 48 |             return True
 49 |         except Exception as e:
 50 |             logger.error(f"Bridge initialization failed: {str(e)}", exc_info=True)
 51 |             return False
 52 | 
 53 |     def _convert_mcp_tools_to_openai_format(
 54 |         self, mcp_tools: List[Any]
 55 |     ) -> List[Dict[str, Any]]:
 56 |         """Convert MCP tool format to OpenAI tool format"""
 57 |         openai_tools = []
 58 | 
 59 |         logger.debug(f"Input mcp_tools type: {type(mcp_tools)}")
 60 |         logger.debug(f"Input mcp_tools: {mcp_tools}")
 61 | 
 62 |         # Extract tools from the response
 63 |         if hasattr(mcp_tools, "tools"):
 64 |             tools_list = mcp_tools.tools
 65 |             logger.debug("Found ListToolsResult, extracting tools attribute")
 66 |         elif isinstance(mcp_tools, dict):
 67 |             tools_list = mcp_tools.get("tools", [])
 68 |             logger.debug("Found dict, extracting 'tools' key")
 69 |         else:
 70 |             tools_list = mcp_tools
 71 |             logger.debug("Using mcp_tools directly as list")
 72 | 
 73 |         logger.debug(f"Tools list type: {type(tools_list)}")
 74 |         logger.debug(f"Tools list: {tools_list}")
 75 | 
 76 |         # Process each tool in the list
 77 |         if isinstance(tools_list, list):
 78 |             logger.debug(f"Processing {len(tools_list)} tools")
 79 |             for tool in tools_list:
 80 |                 logger.debug(f"Processing tool: {tool}, type: {type(tool)}")
 81 |                 if hasattr(tool, "name") and hasattr(tool, "description"):
 82 |                     openai_name = self._sanitize_tool_name(tool.name)
 83 |                     self.tool_name_mapping[openai_name] = tool.name
 84 |                     logger.debug(f"Tool has required attributes. Name: {tool.name}")
 85 | 
 86 |                     tool_schema = getattr(
 87 |                         tool,
 88 |                         "inputSchema",
 89 |                         {"type": "object", "properties": {}, "required": []},
 90 |                     )
 91 | 
 92 |                     openai_tool = {
 93 |                         "type": "function",
 94 |                         "function": {
 95 |                             "name": openai_name,
 96 |                             "description": tool.description,
 97 |                             "parameters": tool_schema,
 98 |                         },
 99 |                     }
100 |                     openai_tools.append(openai_tool)
101 |                     logger.debug(f"Converted tool {tool.name} to OpenAI format")
102 |                 else:
103 |                     logger.debug(
104 |                         f"Tool missing required attributes: has name = {hasattr(tool, 'name')}, has description = {hasattr(tool, 'description')}"
105 |                     )
106 |         else:
107 |             logger.debug(f"Tools list is not a list, it's a {type(tools_list)}")
108 | 
109 |         return openai_tools
110 | 
111 |     def _sanitize_tool_name(self, name: str) -> str:
112 |         """Sanitize tool name for OpenAI compatibility"""
113 |         # Replace any characters that might cause issues
114 |         return name.replace("-", "_").replace(" ", "_").lower()
115 | 
116 |     async def process_message(self, message: str) -> str:
117 |         """Process a user message through the bridge"""
118 |         try:
119 |             # Send message to LLM
120 |             logger.debug(f"Sending message to LLM: {message}")
121 |             response = await self.llm_client.invoke_with_prompt(message)
122 |             logger.debug(f"LLM Response: {response}")
123 | 
124 |             # Keep processing tool calls until we get a final response
125 |             while response.is_tool_call:
126 |                 if not response.tool_calls:
127 |                     break
128 | 
129 |                 logger.debug(f"Tool calls detected: {response.tool_calls}")
130 |                 tool_responses = await self._handle_tool_calls(response.tool_calls)
131 |                 logger.debug(f"Tool responses: {tool_responses}")
132 | 
133 |                 # Continue the conversation with tool results
134 |                 response = await self.llm_client.invoke(tool_responses)
135 |                 logger.debug(f"Next LLM response: {response}")
136 | 
137 |             return response.content
138 |         except Exception as e:
139 |             logger.error(f"Error processing message: {str(e)}", exc_info=True)
140 |             return f"Error processing message: {str(e)}"
141 | 
142 |     async def _handle_tool_calls(
143 |         self, tool_calls: List[Dict[str, Any]]
144 |     ) -> List[Dict[str, Any]]:
145 |         """Handle tool calls through MCP"""
146 |         tool_responses = []
147 | 
148 |         for tool_call in tool_calls:
149 |             try:
150 |                 logger.debug(f"Processing tool call: {tool_call}")
151 |                 # Get original MCP tool name
152 |                 openai_name = tool_call.function.name
153 |                 mcp_name = self.tool_name_mapping.get(openai_name)
154 | 
155 |                 if not mcp_name:
156 |                     raise ValueError(f"Unknown tool: {openai_name}")
157 | 
158 |                 # Parse arguments
159 |                 arguments = json.loads(tool_call.function.arguments)
160 |                 logger.debug(f"Tool arguments: {arguments}")
161 | 
162 |                 # Execute through MCP
163 |                 result = await self.mcp_client_session.call_tool(mcp_name, arguments)
164 |                 logger.debug(f"Raw MCP result: {result}")
165 | 
166 |                 # Format response - handle both string and structured results
167 |                 if isinstance(result, str):
168 |                     output = result
169 |                 elif hasattr(result, "content") and isinstance(result.content, list):
170 |                     # Handle MCP CallToolResult format
171 |                     output = " ".join(
172 |                         content.text
173 |                         for content in result.content
174 |                         if hasattr(content, "text")
175 |                     )
176 |                 else:
177 |                     output = str(result)  # Use str() instead of json.dumps()
178 | 
179 |                 logger.debug(f"Formatted output: {output}")
180 | 
181 |                 # Format response
182 |                 tool_responses.append({"tool_call_id": tool_call.id, "output": output})
183 | 
184 |             except Exception as e:
185 |                 logger.error(f"Tool execution failed: {str(e)}", exc_info=True)
186 |                 tool_responses.append(
187 |                     {"tool_call_id": tool_call.id, "output": f"Error: {str(e)}"}
188 |                 )
189 | 
190 |         return tool_responses
191 | 
192 | 
193 | class BridgeManager:
194 |     """Manager class for handling the bridge lifecycle"""
195 | 
196 |     def __init__(self, config: BridgeConfig):
197 |         self.config = config
198 |         self.bridge: Optional[MCPLLMBridge] = None
199 | 
200 |     async def __aenter__(self) -> MCPLLMBridge:
201 |         """Context manager entry"""
202 |         self.bridge = MCPLLMBridge(self.config)
203 |         await self.bridge.initialize()
204 |         return self.bridge
205 | 
206 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
207 |         """Context manager exit"""
208 |         logger.debug("Context manager exit")
209 | 


--------------------------------------------------------------------------------
/client_bridge/config.py:
--------------------------------------------------------------------------------
 1 | from fastmcp import FastMCP
 2 | from pydantic import BaseModel
 3 | from typing import Optional
 4 | 
 5 | class LLMConfig(BaseModel):
 6 |     """Configuration for LLM client"""
 7 |     api_key: str
 8 |     model: Optional[str] = None
 9 |     base_url: Optional[str] = None
10 |     temperature: float = 0.7
11 |     max_tokens: int = 2000
12 |     # Azure OpenAI specific parameters
13 |     api_version: Optional[str] = None
14 |     azure_endpoint: Optional[str] = None
15 |     deploy_name: Optional[str] = None
16 | 
17 | class BridgeConfig(BaseModel):
18 |     """Configuration for the MCP-LLM Bridge"""
19 |     mcp: FastMCP
20 |     llm_config: LLMConfig
21 |     system_prompt: Optional[str] = None
22 | 
23 |     class Config:
24 |         arbitrary_types_allowed = True
25 | 
26 |     
27 | 


--------------------------------------------------------------------------------
/client_bridge/llm_client.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Any, Optional
 2 | from openai import AzureOpenAI, OpenAI
 3 | from .config import LLMConfig
 4 | from loguru import logger
 5 | 
 6 | 
 7 | class LLMResponse:
 8 |     """Standardized response format focusing on tool handling"""
 9 |     def __init__(self, completion: Any):
10 |         self.completion = completion
11 |         self.choice = completion.choices[0]
12 |         self.message = self.choice.message
13 |         self.stop_reason = self.choice.finish_reason
14 |         self.is_tool_call = self.stop_reason == "tool_calls"
15 |         
16 |         # Format content for bridge compatibility
17 |         self.content = self.message.content if self.message.content is not None else ""
18 |         self.tool_calls = self.message.tool_calls if hasattr(self.message, "tool_calls") else None
19 |         
20 |         # Debug logging
21 |         logger.debug(f"Raw completion: {completion}")
22 |         logger.debug(f"Message content: {self.content}")
23 |         logger.debug(f"Tool calls: {self.tool_calls}")
24 |         
25 |     def get_message(self) -> Dict[str, Any]:
26 |         """Get standardized message format"""
27 |         return {
28 |             "role": "assistant",
29 |             "content": self.content,
30 |             "tool_calls": self.tool_calls
31 |         }
32 | 
33 | class LLMClient:
34 |     """Client for interacting with OpenAI-compatible LLMs"""
35 |     
36 |     def __init__(self, config: LLMConfig):
37 |         self.config = config
38 |         if hasattr(config, 'azure_endpoint') and config.azure_endpoint:
39 |             self.client = AzureOpenAI(
40 |                 api_version=config.api_version,
41 |                 azure_endpoint=config.azure_endpoint,
42 |                 api_key=config.api_key,
43 |             )
44 |         else:
45 |             self.client = OpenAI(
46 |                 api_key=config.api_key,
47 |                 base_url=config.base_url
48 |             )
49 |         self.tools = []
50 |         self.messages = []
51 |         self.system_prompt = None
52 |     
53 |     def _prepare_messages(self) -> List[Dict[str, Any]]:
54 |         """Prepare messages for API call"""
55 |         formatted_messages = []
56 |         
57 |         if self.system_prompt:
58 |             formatted_messages.append({
59 |                 "role": "system",
60 |                 "content": self.system_prompt
61 |             })
62 |             
63 |         formatted_messages.extend(self.messages)
64 |         return formatted_messages
65 |     
66 |     async def invoke_with_prompt(self, prompt: str) -> LLMResponse:
67 |         """Send a single prompt to the LLM"""
68 |         self.messages.append({
69 |             "role": "user",
70 |             "content": prompt
71 |         })
72 |         
73 |         return await self.invoke([])
74 |     
75 |     async def invoke(self, tool_results: Optional[List[Dict[str, Any]]] = None) -> LLMResponse:
76 |         """Invoke the LLM with optional tool results"""
77 |         if tool_results:
78 |             for result in tool_results:
79 |                 self.messages.append({
80 |                     "role": "tool",
81 |                     "content": str(result.get("output", "")),  # Convert to string and provide default
82 |                     "tool_call_id": result["tool_call_id"]
83 |                 })
84 |         
85 |         completion = self.client.chat.completions.create(
86 |             # To handle Azure OpenAI specific parameters
87 |             model= self.config.deploy_name if hasattr(self.config, 'azure_endpoint') else self.config.model,
88 |             messages=self._prepare_messages(),
89 |             tools=self.tools if self.tools else None,
90 |             temperature=self.config.temperature,
91 |             max_tokens=self.config.max_tokens
92 |         )
93 |         
94 |         response = LLMResponse(completion)
95 |         self.messages.append(response.get_message())
96 |         
97 |         return response


--------------------------------------------------------------------------------
/client_bridge/llm_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from client_bridge.config import LLMConfig
 3 | 
 4 | 
 5 | def get_default_llm_config():
 6 |     """Set default LLM configuration"""
 7 |     return LLMConfig(
 8 |         azure_endpoint=os.getenv("AZURE_OPEN_AI_ENDPOINT"),
 9 |         api_version=os.getenv("AZURE_OPEN_AI_API_VERSION"),
10 |         api_key=os.getenv("AZURE_OPEN_AI_API_KEY"),
11 |         deploy_name=os.getenv("AZURE_OPEN_AI_DEPLOYMENT_MODEL"),
12 |     )
13 | 


--------------------------------------------------------------------------------
/client_bridge/mcp_client.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, List
 2 | from loguru import logger
 3 | from fastmcp import FastMCP
 4 | from mcp.shared.memory import (
 5 |     create_connected_server_and_client_session as client_session,
 6 | )
 7 | 
 8 | 
 9 | class MCPClient:
10 |     """Client for interacting with MCP servers"""
11 | 
12 |     def __init__(self, mcp: FastMCP):
13 |         self.mcp = mcp
14 | 
15 |     async def connect(self):
16 |         """Establishes connection to MCP server"""
17 |         logger.debug("Connecting to MCP server...")
18 |         try:
19 |             async with client_session(self.mcp._mcp_server) as client:
20 |                 logger.debug("Connected to MCP server successfully")
21 |         except Exception as e:
22 |             logger.error(f"Failed to connect to MCP server: {e}")
23 |             raise
24 | 
25 |     async def get_available_tools(self) -> List[Any]:
26 |         """List available tools"""
27 |         logger.debug("Requesting available tools from MCP server")
28 |         try:
29 |             async with client_session(self.mcp._mcp_server) as client:
30 |                 tools = await client.list_tools()
31 |                 logger.debug(f"Received tools from MCP server: {tools}")
32 |                 return tools
33 |         except Exception as e:
34 |             RuntimeError("Failed to get available tools from MCP server")
35 |             raise
36 | 
37 |     async def call_tool(self, tool_name: str, arguments: dict) -> Any:
38 |         """Call a tool with given arguments"""
39 |         try:
40 |             async with client_session(self.mcp._mcp_server) as client:
41 |                 result = await client.call_tool(tool_name, arguments=arguments)
42 |                 logger.debug(f"Tool result: {result}")
43 |                 return result
44 |         except Exception as e:
45 |             RuntimeError(f"Failed to call tool '{tool_name}' with arguments: {arguments}")
46 |             raise
47 | 


--------------------------------------------------------------------------------
/client_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import asyncio
 3 | from dotenv import load_dotenv
 4 | from client_bridge.config import BridgeConfig, LLMConfig
 5 | from client_bridge.bridge import BridgeManager
 6 | from server.browser_navigator_server import BrowserNavigationServer
 7 | from loguru import logger
 8 | 
 9 | 
10 | async def main():
11 |     # Load environment variables
12 |     load_dotenv()
13 | 
14 |     # Configure bridge
15 |     server = BrowserNavigationServer()
16 |     config = BridgeConfig(
17 |         mcp=server,
18 |         llm_config=LLMConfig(
19 |             azure_endpoint=os.getenv("AZURE_OPEN_AI_ENDPOINT"),
20 |             api_version=os.getenv("AZURE_OPEN_AI_API_VERSION"),
21 |             api_key=os.getenv("AZURE_OPEN_AI_API_KEY"),
22 |             deploy_name=os.getenv("AZURE_OPEN_AI_DEPLOYMENT_MODEL")
23 |         ),
24 |         system_prompt="You are a helpful assistant that can use tools to help answer questions."
25 |     )
26 |     
27 |     logger.info(f"Starting bridge with model: {config.llm_config.deploy_name}")
28 |     
29 |     # Use bridge with context manager
30 |     async with BridgeManager(config) as bridge:
31 |         while True:
32 |             try:
33 |                 user_input = input("\nEnter your prompt (or 'quit' to exit): ")
34 |                 if user_input.lower() in ['quit', 'exit', 'q']:
35 |                     break
36 |                     
37 |                 response = await bridge.process_message(user_input)
38 |                 print(f"\nResponse: {response}")
39 |                 
40 |             except KeyboardInterrupt:
41 |                 logger.info("\nExiting...")
42 |                 break
43 |             except Exception as e:
44 |                 logger.error(f"\nError occurred: {e}")
45 | 
46 | if __name__ == "__main__":
47 |     try:
48 |         asyncio.run(main())
49 |     except Exception as e:
50 |         logger.error(f"Unhandled exception: {e}")


--------------------------------------------------------------------------------
/doc/chatgui_gpt_generate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kimtth/mcp-aoai-web-browsing/9c0d0a511fefa924ba879fdeff73a8c375b4dcbc/doc/chatgui_gpt_generate.png


--------------------------------------------------------------------------------
/doc/globe_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kimtth/mcp-aoai-web-browsing/9c0d0a511fefa924ba879fdeff73a8c375b4dcbc/doc/globe_icon.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "mcp-web-auto"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["kimtth <kimtth@email.com>"]
 6 | readme = "README.md"
 7 | requires-python = ">=3.11"
 8 | dependencies = [
 9 |     "aiohttp>=3.11.10",
10 |     "asyncio>=3.4.3",
11 |     "fastmcp>=0.4.1",
12 |     "loguru>=0.7.3",
13 |     "mcp>=1.1.1",
14 |     "openai>=1.57.1",
15 |     "pydantic>=2.10.3",
16 |     "pytest-playwright>=0.6.2",
17 |     "python-dotenv>=1.0.1",
18 |     "typing-extensions>=4.12.2",
19 | ]
20 | 
21 | 


--------------------------------------------------------------------------------
/server/browser_manager.py:
--------------------------------------------------------------------------------
 1 | from playwright.async_api import async_playwright
 2 | 
 3 | class BrowserManager:
 4 |     def __init__(self):
 5 |         self.browser = None
 6 |         self.page = None
 7 |         self.console_logs = []
 8 |         self.screenshots = {}
 9 | 
10 |     async def ensure_browser(self):
11 |         if not self.browser:
12 |             playwright = await async_playwright().start()
13 |             self.browser = await playwright.chromium.launch(headless=False)
14 |             context = await self.browser.new_context(
15 |                 viewport={"width": 1920, "height": 1080},
16 |                 device_scale_factor=1,
17 |             )
18 |             self.page = await context.new_page()
19 | 
20 |             async def handle_console_message(msg):
21 |                 log_entry = f"[{msg.type}] {msg.text}"
22 |                 self.console_logs.append(log_entry)
23 |                 # Simulate a server notification
24 |                 print({
25 |                     "method": "notifications/resources/updated",
26 |                     "params": {"uri": "console://logs"},
27 |                 })
28 | 
29 |             self.page.on("console", handle_console_message)
30 | 
31 |         return self.page
32 | 


--------------------------------------------------------------------------------
/server/browser_navigator_server.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import json
  3 | from fastmcp import Context, FastMCP
  4 | from mcp.types import TextContent, ImageContent
  5 | from playwright.async_api import Page
  6 | from client_bridge.llm_client import LLMClient, LLMResponse
  7 | from client_bridge.llm_config import get_default_llm_config
  8 | from server.browser_manager import BrowserManager
  9 | 
 10 | 
 11 | class BrowserNavigationServer(FastMCP):
 12 |     def __init__(self, server_name="browser-navigator-server"):
 13 |         super().__init__(server_name)
 14 |         self.mcp = self
 15 |         self.browser_manager = BrowserManager()
 16 |         self.llm_config = get_default_llm_config()
 17 |         self.llm_client = LLMClient(self.llm_config)
 18 |         self.screenshots = dict()
 19 |         self.register_tools()
 20 |         self.register_resources()
 21 |         self.register_prompts()
 22 | 
 23 |     def register_tools(self):
 24 |         @self.mcp.tool()
 25 |         async def playwright_navigate(url: str, timeout=30000, wait_until="load"):
 26 |             """Navigate to a URL."""
 27 |             try:
 28 |                 page: Page = await self.browser_manager.ensure_browser()
 29 |                 await page.goto(url, timeout=timeout, wait_until=wait_until)
 30 |                 return f"Navigated to {url} with {wait_until} wait"
 31 |             except Exception as e:
 32 |                 raise ValueError(f"Navigation failed: {e}")
 33 | 
 34 |         @self.mcp.tool()
 35 |         async def playwright_screenshot(
 36 |             name: str, selector: str = None, width: int = 800, height: int = 600
 37 |         ):
 38 |             """Take a screenshot of the current page or a specific element."""
 39 |             try:
 40 |                 page: Page = await self.browser_manager.ensure_browser()
 41 |                 element = await page.query_selector(selector) if selector else None
 42 |                 screeenshot_options = {
 43 |                     "type": "png",
 44 |                     "full_page": True,
 45 |                     "element": element,
 46 |                     # "mask": True # TODO
 47 |                 }
 48 | 
 49 |                 if element:
 50 |                     screenshot = await page.screenshot(**screeenshot_options)
 51 |                     # Convert the screenshot to a base64 string
 52 |                     screenshot_base64 = base64.b64encode(screenshot).decode("utf-8")
 53 |                     self.screenshots[name] = screenshot_base64
 54 |                     return [
 55 |                         TextContent(type="text", text=f"Screenshot {name} taken"),
 56 |                         ImageContent(
 57 |                             type="image", data=screenshot_base64, mimeType="image/png"
 58 |                         ),
 59 |                     ]
 60 |                 else:
 61 |                     return f"Element not found: {selector}"
 62 |             except Exception as e:
 63 |                 raise ValueError(f"Screenshot failed: {e}")
 64 | 
 65 |         @self.mcp.tool()
 66 |         async def playwright_click(selector: str):
 67 |             """Click an element on the page."""
 68 |             try:
 69 |                 page: Page = await self.browser_manager.ensure_browser()
 70 |                 await page.click(selector)
 71 |                 return f"Clicked on {selector}"
 72 |             except Exception as e:
 73 |                 raise ValueError(f"Failed to click: {e}")
 74 | 
 75 |         @self.mcp.tool()
 76 |         async def playwright_fill(selector: str, value: str):
 77 |             """Fill out an input field."""
 78 |             try:
 79 |                 page: Page = await self.browser_manager.ensure_browser()
 80 |                 await page.wait_for_selector(selector)
 81 |                 await page.fill(selector, value)
 82 |                 return f"Filled {selector} with {value}"
 83 |             except Exception as e:
 84 |                 raise ValueError(f"Failed to fill: {e}")
 85 | 
 86 |         @self.mcp.tool()
 87 |         async def playwright_select(selector: str, value: str):
 88 |             """Select an element on the page with a Select tag."""
 89 |             try:
 90 |                 page: Page = await self.browser_manager.ensure_browser()
 91 |                 await page.wait_for_selector(selector)
 92 |                 await page.select_option(selector, value)
 93 |                 return f"Selected {value} in {selector}"
 94 |             except Exception as e:
 95 |                 raise ValueError(f"Failed to select: {e}")
 96 | 
 97 |         @self.mcp.tool()
 98 |         async def playwright_hover(selector: str):
 99 |             """Hover over an element on the page."""
100 |             try:
101 |                 page: Page = await self.browser_manager.ensure_browser()
102 |                 await page.wait_for_selector(selector)
103 |                 await page.hover(selector)
104 |                 return f"Hovered over {selector}"
105 |             except Exception as e:
106 |                 raise ValueError(f"Failed to hover: {e}")
107 | 
108 |         @self.mcp.tool()
109 |         async def playwright_evaluate(script: str):
110 |             """Execute JavaScript in the browser console."""
111 |             try:
112 |                 page: Page = await self.browser_manager.ensure_browser()
113 |                 script_result = await page.evaluate(
114 |                     """
115 |                 (script) => {
116 |                     const logs = [];
117 |                     const originalConsole = { ...console };
118 | 
119 |                     ['log', 'info', 'warn', 'error'].forEach(method => {
120 |                         console[method] = (...args) => {
121 |                             logs.push(`[${method}] ${args.join(' ')}`);
122 |                             originalConsole[method](...args);
123 |                         };
124 |                     });
125 | 
126 |                     try {
127 |                         const result = eval(script);
128 |                         Object.assign(console, originalConsole);
129 |                         return { result, logs };
130 |                     } catch (error) {
131 |                         Object.assign(console, originalConsole);
132 |                         throw error;
133 |                     }
134 |                 }
135 |                 """,
136 |                     script,
137 |                 )
138 |                 # Parentheses allow grouping multiple expressions in one line,
139 |                 # often used for long strings, tuples, or function arguments
140 |                 # that span multiple lines.
141 |                 return_string = (
142 |                     "Execution result:\n"
143 |                     + json.dumps(script_result["result"], indent=2)
144 |                     + "\n\n"
145 |                     + "Console output:\n"
146 |                     + "\n".join(script_result["logs"])
147 |                 )
148 |                 return return_string
149 |             except Exception as e:
150 |                 raise ValueError(f"Script execution failed: {e}")
151 | 
152 |         @self.mcp.tool()
153 |         async def extract_selector_by_page_content(user_message: str) -> str:
154 |             """Try to find a css selector by current page content."""
155 |             # Ensure the browser page is available
156 |             page = await self.browser_manager.ensure_browser()
157 | 
158 |             # Get the HTML content of the page
159 |             html_content = await page.content()
160 | 
161 |             # Prepare the prompt for the LLM
162 |             prompt = (
163 |                 "Given the following HTML content of a web page:\n\n"
164 |                 f"{html_content}\n\n"
165 |                 f"User request: '{user_message}'\n\n"
166 |                 "Provide the CSS selector that best matches the user's request. Return only the CSS selector."
167 |             )
168 | 
169 |             # Use the LLM client to generate the selector
170 |             llm_response: LLMResponse = await self.llm_client.invoke_with_prompt(prompt)
171 |             selector: str = llm_response["content"]
172 | 
173 |             # Return the selector
174 |             return selector.strip()
175 | 
176 |         # Long-running example to read all screenshots from a list of file names
177 |         @self.mcp.tool()
178 |         async def read_all_screenshots(file_name_list: list[str], ctx: Context) -> str:
179 |             """Read all screenshots from a list of file names."""
180 |             for i, file_name in enumerate(file_name_list):
181 |                 ctx.info(f"Processing {file_name}...")
182 |                 await ctx.report_progress(i, len(file_name_list))
183 | 
184 |                 # Read another resource if needed
185 |                 data = await ctx.read_resource(f"screenshot://{file_name}")
186 | 
187 |             return "Processing complete"
188 | 
189 |     def register_resources(self):
190 |         @self.mcp.resource("console://logs")
191 |         async def get_console_logs() -> str:
192 |             """Get a personalized greeting"""
193 |             return TextContent(
194 |                 type="text", text="\n".join(self.browser_manager.console_logs)
195 |             )
196 | 
197 |         @self.mcp.resource("screenshot://{name}")
198 |         async def get_screenshot(name: str) -> str:
199 |             """Get a screenshot by name"""
200 |             screenshot_base64 = self.screenshots.get(name)
201 |             if screenshot_base64:
202 |                 return ImageContent(
203 |                     type="image",
204 |                     data=screenshot_base64,
205 |                     mimeType="image/png",
206 |                     uri=f"screenshot://{name}",
207 |                 )
208 |             else:
209 |                 raise ValueError(f"Screenshot {name} not found")
210 | 
211 |     def register_prompts(self):
212 |         @self.mcp.prompt()
213 |         async def hello_world(code: str) -> str:
214 |             return f"Hello world:\n\n{code}"
215 | 
216 | 
217 | """ 
218 | When executing the MCP Inspector in a terminal, use the following command:
219 | 
220 | ```bash
221 | cmd> fastmcp dev ./server/browser_navigator_server.py:app
222 | ```
223 | 
224 | app = BrowserNavigationServer()
225 | 
226 | - `server/browser_navigator_server.py` specifies the file path.
227 | - `app` refers to the server object created by `BrowserNavigationServer`.
228 | 
229 | After running the command, the following message will be displayed:
230 | 
231 | ```
232 | > Starting MCP Inspector...
233 | > 🔍 MCP Inspector is up and running at http://localhost:5173 🚀
234 | ```
235 | 
236 | **Important:** Do not use `__main__` to launch the MCP Inspector. This will result in the following error:
237 | 
238 |     No server object found in **.py. Please either:
239 |     1. Use a standard variable name (mcp, server, or app)
240 |     2. Specify the object name with file:object syntax
241 | """
242 | # app = BrowserNavigationServer()
243 | print("BrowserNavigationServer is running...")
244 | # print all attributes of the mcp
245 | # print(dir(app))
246 | 
247 | # if __name__ == "__main__":
248 | #     app = BrowserNavigationServer()
249 | #     app.run()
250 | #     print("BrowserNavigationServer is running...")
251 | 


--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     required:
 9 |       - azureOpenAiEndpoint
10 |       - azureOpenAiApiKey
11 |       - azureOpenAiDeploymentModel
12 |       - azureOpenAiApiVersion
13 |     properties:
14 |       azureOpenAiEndpoint:
15 |         type: string
16 |         description: The endpoint for Azure OpenAI.
17 |       azureOpenAiApiKey:
18 |         type: string
19 |         description: The API key for Azure OpenAI.
20 |       azureOpenAiDeploymentModel:
21 |         type: string
22 |         description: The deployment model for Azure OpenAI.
23 |       azureOpenAiApiVersion:
24 |         type: string
25 |         description: The API version for Azure OpenAI.
26 |   commandFunction:
27 |     # A function that produces the CLI command to start the MCP on stdio.
28 |     |-
29 |     (config) => ({ command: 'uv', args: ['run', 'fastmcp', 'dev', './server/browser_navigator_server.py:app'], env: { AZURE_OPEN_AI_ENDPOINT: config.azureOpenAiEndpoint, AZURE_OPEN_AI_API_KEY: config.azureOpenAiApiKey, AZURE_OPEN_AI_DEPLOYMENT_MODEL: config.azureOpenAiDeploymentModel, AZURE_OPEN_AI_API_VERSION: config.azureOpenAiApiVersion } })


--------------------------------------------------------------------------------