├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── images └── nova_act_img.jpg ├── mcp_examples ├── README.md ├── hello_world_mcp_client.py ├── hello_world_mcp_server.py ├── nova_act_mcp_client.py └── nova_act_mcp_server.py ├── nova_act_examples ├── README.md ├── get_coffee_maker.py └── multi_monitor.py ├── requirements.txt ├── strands_examples ├── mcp_docs_diag.py ├── nova_act_mcp_server.py ├── nova_act_strands.py └── weather_word_count.py └── streamlit_examples ├── README.md └── video_game_research_st.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Added 2 | nova_act_examples/logs/ 3 | streamlit_examples/game_searches/ 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # UV 102 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 103 | # This is especially recommended for binary packages to ensure reproducibility, and is more 104 | # commonly ignored for libraries. 105 | #uv.lock 106 | 107 | # poetry 108 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 109 | # This is especially recommended for binary packages to ensure reproducibility, and is more 110 | # commonly ignored for libraries. 111 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 112 | #poetry.lock 113 | 114 | # pdm 115 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 116 | #pdm.lock 117 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 118 | # in version control. 119 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 120 | .pdm.toml 121 | .pdm-python 122 | .pdm-build/ 123 | 124 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 125 | __pypackages__/ 126 | 127 | # Celery stuff 128 | celerybeat-schedule 129 | celerybeat.pid 130 | 131 | # SageMath parsed files 132 | *.sage.py 133 | 134 | # Environments 135 | .env 136 | .venv 137 | env/ 138 | venv/ 139 | ENV/ 140 | env.bak/ 141 | venv.bak/ 142 | 143 | # Spyder project settings 144 | .spyderproject 145 | .spyproject 146 | 147 | # Rope project settings 148 | .ropeproject 149 | 150 | # mkdocs documentation 151 | /site 152 | 153 | # mypy 154 | .mypy_cache/ 155 | .dmypy.json 156 | dmypy.json 157 | 158 | # Pyre type checker 159 | .pyre/ 160 | 161 | # pytype static type analyzer 162 | .pytype/ 163 | 164 | # Cython debug symbols 165 | cython_debug/ 166 | 167 | # PyCharm 168 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 169 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 170 | # and can be added to the global gitignore or merged into this file. For a more nuclear 171 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 172 | #.idea/ 173 | 174 | # Ruff stuff: 175 | .ruff_cache/ 176 | 177 | # PyPI configuration file 178 | .pypirc -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Building Agents with Amazon Nova Act and MCP 2 | 3 | This repository demonstrates how to build intelligent web automation agents using Amazon Nova Act integrated with MCP (Model Context Protocol). MCP provides a standardized way to connect AI models to different data sources and tools - think of it like a "USB-C port for AI applications." 4 | 5 | ![Amazon Nova Act](./images/nova_act_img.jpg) 6 | 7 | ## Overview 8 | 9 | This project showcases the integration between: 10 | 11 | - **Amazon Nova Act**: A powerful web automation tool for creating agents that interact with web pages 12 | - **MCP (Model Context Protocol)**: An open protocol that standardizes how applications provide context to LLMs 13 | - **Amazon Bedrock**: Managed service that provides foundation models through a unified API 14 | 15 | ### Architecture 16 | 17 | The project follows MCP's client-server architecture: 18 | 19 | - **MCP Host**: Claude Desktop or other AI tools that want to access Nova Act capabilities 20 | - **MCP Client**: Protocol clients maintaining 1:1 connections with the Nova Act server 21 | - **MCP Server**: A Nova Act server exposing web automation capabilities through the standardized protocol 22 | - **Remote Services**: Web services that Nova Act interacts with through browser automation 23 | 24 | ### Key Features 25 | 26 | - Standardized LLM integration through MCP 27 | - Automated web navigation and interaction via Nova Act 28 | - Parallel execution of multiple browser tasks 29 | - Data extraction and processing 30 | - Screenshot capture and visual verification 31 | - Session management and browser control 32 | - Seamless integration with Claude Desktop 33 | - Access to foundation models via Amazon Bedrock 34 | 35 | ## Prerequisites 36 | 37 | - Operating System: MacOS or Ubuntu (Nova Act requirements) 38 | - Python 3.10 or higher 39 | - A valid Nova Act API key (obtain from https://nova.amazon.com/act) 40 | - Node.js (for Claude Desktop integration) 41 | - Amazon Bedrock access: 42 | - [Amazon Bedrock enabled in your AWS account](https://docs.aws.amazon.com/bedrock/latest/userguide/getting-started.html) 43 | - Claude 3.5 Sonnet V2 model enabled (this is default for this example) 44 | - AWS credentials and region properly configured - [AWS CLI Quickstart Guide](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-quickstart.html) 45 | 46 | ## Project Structure 47 | 48 | The repository contains two main example directories: 49 | 50 | ### 1. Streamlit Examples (`streamlit_examples/`) 51 | - `video_game_research_st.py`: A comprehensive tool that: 52 | - Finds top games for any selected gaming system 53 | - Searches Amazon in parallel for pricing and details 54 | - Creates interactive result tables 55 | - Saves research data for future reference 56 | 57 | ### 2. MCP Examples (`mcp_examples/`) 58 | - `nova_act_mcp_server.py`: MCP-compliant server implementation exposing Nova Act capabilities 59 | - `nova_act_mcp_client.py`: Example MCP client implementation 60 | - Demonstrates integration with Claude Desktop and other MCP hosts 61 | 62 | ### 3. Nova Act Examples (`nova_act_examples/`) 63 | - `get_coffee_maker.py`: Simple example demonstrating how to: 64 | - Initialize Nova Act with a starting web page 65 | - Perform basic web search and selection actions 66 | - Extract information from product pages 67 | - `multi_monitor.py`: Advanced example showing: 68 | - Parallel execution of web tasks using ThreadPoolExecutor 69 | - Searching and comparing multiple products simultaneously 70 | - Structured data extraction and comparison 71 | - Error handling for robust web automation 72 | 73 | ## Getting Started 74 | 75 | 1. Clone this repository 76 | 77 | 2. Install dependencies: 78 | ```bash 79 | pip install -r requirements.txt 80 | ``` 81 | 82 | 3. Set your Nova Act API key: 83 | ```bash 84 | export NOVA_ACT_API_KEY="your_api_key" 85 | ``` 86 | 87 | 4. Make sure your environment has AWS configured correctly - https://docs.aws.amazon.com/cli/latest/userguide/getting-started-quickstart.html 88 | 89 | ## Usage 90 | 91 | ### Running Streamlit Examples 92 | ```bash 93 | cd streamlit_examples 94 | streamlit run video_game_research_st.py 95 | ``` 96 | 97 | ### Running the MCP Server and Client 98 | ```bash 99 | cd mcp_examples 100 | python nova_act_mcp_client.py nova_act_mcp_server.py 101 | ``` 102 | 103 | The command above will: 104 | 1. Start the MCP server that exposes Nova Act capabilities 105 | 2. Launch the MCP client that connects to the server 106 | 3. Enable communication between Claude and the Nova Act browser automation 107 | 108 | ### Claude Desktop Integration 109 | 110 | For setting up and using this server with Claude Desktop, please follow the official [Claude Desktop MCP Setup Guide](https://modelcontextprotocol.io/quickstart/user). The guide covers: 111 | 112 | - Installing and configuring Claude Desktop 113 | - Setting up MCP servers in Claude Desktop 114 | - Troubleshooting common issues 115 | - Example usage and best practices 116 | 117 | ## Best Practices 118 | 119 | 1. Follow MCP's standardized approach for exposing capabilities 120 | 2. Always close browser sessions when done 121 | 3. Use headless mode for automated tasks not requiring visual feedback 122 | 4. Break down complex actions into smaller, specific instructions 123 | 5. Use schemas when expecting structured data responses 124 | 6. Save important results to files for persistence 125 | 7. Handle errors appropriately in your code 126 | 127 | ## Security 128 | 129 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 130 | 131 | ## License 132 | 133 | This library is licensed under the MIT-0 License. See the LICENSE file. 134 | 135 | ## Additional Resources 136 | 137 | - [MCP Documentation](https://modelcontextprotocol.io/introduction) 138 | - [Nova Act Documentation](https://nova.amazon.com/act) 139 | - [Amazon Bedrock Documentation](https://docs.aws.amazon.com/bedrock/) -------------------------------------------------------------------------------- /images/nova_act_img.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-agents-with-nova-act-and-mcp/32efe840913cedcfbe4bc4ef13bd4d6515f410f9/images/nova_act_img.jpg -------------------------------------------------------------------------------- /mcp_examples/README.md: -------------------------------------------------------------------------------- 1 | # MCP Nova Act Examples 2 | 3 | An MCP (Model Context Protocol) server implementation for Amazon Nova Act, allowing LLMs to control web browsers through a standardized MCP interface. 4 | 5 | ## Features 6 | 7 | - Execute browser actions through natural language instructions 8 | - Run parallel tasks across multiple browser instances 9 | - Capture and store action results 10 | - Take screenshots of browser sessions 11 | - Save results to JSON files for later analysis 12 | 13 | ## Prerequisites 14 | 15 | 1. Operating System: MacOS or Ubuntu (Nova Act requirements) 16 | 2. Python 3.10 or above 17 | 3. A valid Nova Act API key (obtain from https://nova.amazon.com/act) 18 | 4. Amazon Bedrock access: 19 | - [Amazon Bedrock enabled in your AWS account](https://docs.aws.amazon.com/bedrock/latest/userguide/getting-started.html) 20 | - Claude 3.5 Sonnet V2 model enabled (this is the default for this example) 21 | - AWS credentials and region properly configured 22 | 23 | ## Installation 24 | 25 | 1. Install dependencies from main folder: 26 | ```bash 27 | pip install -r requirements.txt 28 | ``` 29 | 30 | 2. Set your Nova Act API key as an environment variable: 31 | ```bash 32 | export NOVA_ACT_API_KEY="your_api_key" 33 | ``` 34 | 35 | 3. Configure AWS credentials following the [AWS CLI Quickstart Guide](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-quickstart.html) 36 | 37 | ## Usage 38 | 39 | ### Starting the Client and Server 40 | 41 | ```bash 42 | python nova_act_mcp_client.py nova_act_mcp_server.py 43 | ``` 44 | 45 | This command will: 46 | 1. Start the MCP server that exposes Nova Act capabilities 47 | 2. Launch the MCP client that connects to the server 48 | 3. Enable communication between Claude and the Nova Act browser automation 49 | 50 | ## Claude Desktop Integration 51 | 52 | For setting up and using this server with Claude Desktop, please follow the official [Claude Desktop MCP Setup Guide](https://modelcontextprotocol.io/quickstart/user). 53 | 54 | 55 | ## Troubleshooting 56 | 57 | - If browser sessions fail to start, check your Nova Act API key 58 | - For parallel execution issues, try reducing the number of concurrent tasks 59 | - Browser performance problems may indicate insufficient system resources 60 | - If actions are not working as expected, try making them more specific and explicit 61 | - Might need to install chrome with this command: `playwright install chrome` 62 | -------------------------------------------------------------------------------- /mcp_examples/hello_world_mcp_client.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import sys 4 | from contextlib import AsyncExitStack 5 | from typing import Any, List 6 | 7 | import boto3 8 | from mcp import ClientSession, StdioServerParameters 9 | from mcp.client.stdio import stdio_client 10 | 11 | 12 | class HelloWorldBedrockAgent: 13 | def __init__(self): 14 | # Initialize session and client objects 15 | self.session = None 16 | self.exit_stack = AsyncExitStack() 17 | self.model_id = "us.anthropic.claude-3-5-haiku-20241022-v1:0" 18 | self.bedrock_runtime = None 19 | 20 | async def connect_to_server(self, server_script_path: str): 21 | """Connect to an MCP server""" 22 | if not server_script_path.endswith(".py"): 23 | raise ValueError("Server script must be a Python file with .py extension") 24 | 25 | print(f"Starting the MCP server: {server_script_path}...") 26 | 27 | # Use environment variables for server process 28 | env = os.environ.copy() 29 | 30 | # Start the server as a subprocess 31 | server_params = StdioServerParameters( 32 | command="python3", args=[server_script_path], env=env 33 | ) 34 | 35 | stdio_transport = await self.exit_stack.enter_async_context( 36 | stdio_client(server_params) 37 | ) 38 | self.stdio, self.write = stdio_transport 39 | self.session = await self.exit_stack.enter_async_context( 40 | ClientSession(self.stdio, self.write) 41 | ) 42 | 43 | # Initialize the session 44 | await self.session.initialize() 45 | 46 | # List available tools 47 | response = await self.session.list_tools() 48 | print( 49 | "\nConnected to server with tools:", [tool.name for tool in response.tools] 50 | ) 51 | return response.tools 52 | 53 | async def initialize_bedrock(self): 54 | """Initialize the Amazon Bedrock client""" 55 | print("Initializing Amazon Bedrock client...") 56 | try: 57 | self.bedrock_runtime = boto3.client( 58 | "bedrock-runtime", region_name="us-west-2" 59 | ) 60 | print(f"Using model: {self.model_id}") 61 | return True 62 | except Exception as e: 63 | print(f"Error initializing Bedrock client: {str(e)}") 64 | print( 65 | "Make sure you have the necessary AWS credentials and permissions set up." 66 | ) 67 | return False 68 | 69 | def extract_tool_result(self, tool_result): 70 | """Extract content from a CallToolResult object or other result types""" 71 | try: 72 | # If it's a string, number, bool, etc., return it directly 73 | if isinstance(tool_result, (str, int, float, bool)): 74 | return tool_result 75 | 76 | # If it has a content attribute (like CallToolResult) 77 | if hasattr(tool_result, "content"): 78 | content = tool_result.content 79 | 80 | # If content is a list (like TextContent objects) 81 | if isinstance(content, list) and content: 82 | # If the first item has a text attribute 83 | if hasattr(content[0], "text"): 84 | return content[0].text 85 | # Otherwise return the list 86 | return content 87 | 88 | # For other content types 89 | return str(content) 90 | 91 | # If it's already a dict or list, return as is 92 | if isinstance(tool_result, (dict, list)): 93 | return tool_result 94 | 95 | # Fallback to string representation 96 | return str(tool_result) 97 | 98 | except Exception as e: 99 | print(f"Error extracting tool result: {e}") 100 | return str(tool_result) 101 | 102 | async def process_query(self, query: str, available_tools: List[Any]): 103 | """Process a user query using Bedrock and the MCP tools""" 104 | if not self.bedrock_runtime: 105 | return "Bedrock client not initialized" 106 | 107 | # Format tools for Bedrock 108 | tool_list = [ 109 | { 110 | "toolSpec": { 111 | "name": tool.name, 112 | "description": tool.description, 113 | "inputSchema": {"json": tool.inputSchema}, 114 | } 115 | } 116 | for tool in available_tools 117 | ] 118 | 119 | # Create the system message 120 | system_prompt = """You are a helpful assistant with access to calculator and greeting tools. 121 | When asked about calculations, use the calculator tools like add, subtract, multiply, and divide. 122 | When asked to greet someone, use the greet tool. 123 | When asked for a joke, use the tell_joke tool. 124 | Always include the full text of any joke or greeting in your response to make sure the user can see it. 125 | Respond in a friendly and helpful manner. Keep your answers brief but informative.""" 126 | 127 | # Initialize messages array - exactly like in nova_act_mcp_client.py 128 | messages = [{"role": "user", "content": [{"text": query}]}] 129 | 130 | try: 131 | # Call Amazon Bedrock with the user query - exactly like in nova_act_mcp_client.py 132 | print("Sending query to Bedrock...") 133 | response = self.bedrock_runtime.converse( 134 | modelId=self.model_id, 135 | messages=messages, 136 | inferenceConfig={"temperature": 0.7}, 137 | toolConfig={"tools": tool_list}, 138 | system=[{"text": system_prompt}], 139 | ) 140 | 141 | # Extract the assistant's response - exactly like in nova_act_mcp_client.py 142 | response_message = response["output"]["message"] 143 | final_responses = [] 144 | tool_results = {} 145 | 146 | # Process each content block in the response - exactly like in nova_act_mcp_client.py 147 | for content_block in response_message["content"]: 148 | if "text" in content_block: 149 | # Add text responses to our final output 150 | final_responses.append(content_block["text"]) 151 | 152 | elif "toolUse" in content_block: 153 | # Handle tool usage 154 | tool_use = content_block["toolUse"] 155 | tool_name = tool_use["name"] 156 | tool_input = tool_use["input"] 157 | tool_use_id = tool_use["toolUseId"] 158 | 159 | print(f"Calling tool: {tool_name} with input: {tool_input}") 160 | final_responses.append(f"[Calling tool {tool_name}]") 161 | 162 | # Call the tool through MCP session 163 | raw_tool_result = await self.session.call_tool( 164 | tool_name, tool_input 165 | ) 166 | 167 | # Extract the actual content from the tool result 168 | extracted_result = self.extract_tool_result(raw_tool_result) 169 | print(f"Raw tool result type: {type(raw_tool_result)}") 170 | print(f"Extracted result: {extracted_result}") 171 | 172 | # Save the result for later display 173 | tool_results[tool_name] = extracted_result 174 | 175 | # Create follow-up message with tool result 176 | tool_result_message = { 177 | "role": "user", 178 | "content": [ 179 | { 180 | "toolResult": { 181 | "toolUseId": tool_use_id, 182 | "content": [{"json": {"result": extracted_result}}], 183 | } 184 | } 185 | ], 186 | } 187 | 188 | # Add the AI message and tool result to messages 189 | messages.append(response_message) 190 | messages.append(tool_result_message) 191 | 192 | # Make another call to get the final response 193 | follow_up_response = self.bedrock_runtime.converse( 194 | modelId=self.model_id, 195 | messages=messages, 196 | inferenceConfig={"temperature": 0.7}, 197 | toolConfig={"tools": tool_list}, 198 | system=[{"text": system_prompt}], 199 | ) 200 | 201 | # Add the follow-up response to our final output 202 | follow_up_text = follow_up_response["output"]["message"]["content"][ 203 | 0 204 | ]["text"] 205 | final_responses.append(follow_up_text) 206 | 207 | # Compose the final response with explicit tool results 208 | final_text = "\n".join(final_responses) 209 | 210 | # If we have tool results but they're not obviously included in the response, 211 | # add them explicitly 212 | for tool_name, result in tool_results.items(): 213 | if tool_name == "tell_joke" and result not in final_text: 214 | final_text += f"\n\nJoke: {result}" 215 | elif tool_name == "greet" and result not in final_text: 216 | final_text += f"\n\nGreeting: {result}" 217 | elif ( 218 | tool_name in ["add", "subtract", "multiply", "divide"] 219 | and str(result) not in final_text 220 | ): 221 | final_text += f"\n\nCalculation result: {result}" 222 | 223 | return final_text 224 | 225 | except Exception as e: 226 | print(f"Error in Bedrock API call: {str(e)}") 227 | import traceback 228 | 229 | traceback.print_exc() 230 | return f"Error: {str(e)}" 231 | 232 | async def chat_loop(self, available_tools: List[Any]): 233 | """Run an interactive chat loop""" 234 | print("\nYou can now chat with the agent. Type 'exit' to quit.") 235 | 236 | while True: 237 | try: 238 | # Get user input 239 | user_query = input("\nYou: ") 240 | if user_query.lower() in ["exit", "quit"]: 241 | break 242 | 243 | # Process the query 244 | response = await self.process_query(user_query, available_tools) 245 | print("\nAssistant:", response) 246 | 247 | except Exception as e: 248 | print(f"\nError: {str(e)}") 249 | 250 | async def cleanup(self): 251 | """Clean up resources""" 252 | await self.exit_stack.aclose() 253 | print("\nShutting down and cleaning up resources...") 254 | 255 | 256 | async def main(): 257 | if len(sys.argv) < 2: 258 | print("Usage: python hello_world_mcp_client.py ") 259 | print("Example: python hello_world_mcp_client.py hello_world_mcp_server.py") 260 | sys.exit(1) 261 | 262 | server_script_path = sys.argv[1] 263 | agent = HelloWorldBedrockAgent() 264 | 265 | try: 266 | # Connect to the MCP server and get available tools 267 | available_tools = await agent.connect_to_server(server_script_path) 268 | 269 | # Initialize the Bedrock client 270 | if await agent.initialize_bedrock(): 271 | # Run the chat loop 272 | await agent.chat_loop(available_tools) 273 | except Exception as e: 274 | print(f"Error: {str(e)}") 275 | finally: 276 | # Clean up resources 277 | await agent.cleanup() 278 | 279 | 280 | if __name__ == "__main__": 281 | asyncio.run(main()) 282 | -------------------------------------------------------------------------------- /mcp_examples/hello_world_mcp_server.py: -------------------------------------------------------------------------------- 1 | from mcp.server.fastmcp import FastMCP 2 | 3 | # Initialize FastMCP server 4 | mcp = FastMCP("hello-world-server") 5 | 6 | 7 | # Define tools 8 | @mcp.tool() 9 | async def greet(name: str) -> str: 10 | """Greet a person with their name. 11 | 12 | Parameters: 13 | name: The name of the person to greet 14 | 15 | Returns: 16 | A greeting message 17 | """ 18 | return f"Hello, {name}! Welcome to MCP." 19 | 20 | 21 | @mcp.tool() 22 | async def add(a: float, b: float) -> float: 23 | """Add two numbers together. 24 | 25 | Parameters: 26 | a: First number 27 | b: Second number 28 | 29 | Returns: 30 | The sum of the two numbers 31 | """ 32 | return a + b 33 | 34 | 35 | @mcp.tool() 36 | async def subtract(a: float, b: float) -> float: 37 | """Subtract the second number from the first. 38 | 39 | Parameters: 40 | a: First number 41 | b: Second number to subtract from the first 42 | 43 | Returns: 44 | The difference between the two numbers 45 | """ 46 | return a - b 47 | 48 | 49 | @mcp.tool() 50 | async def multiply(a: float, b: float) -> float: 51 | """Multiply two numbers together. 52 | 53 | Parameters: 54 | a: First number 55 | b: Second number 56 | 57 | Returns: 58 | The product of the two numbers 59 | """ 60 | return a * b 61 | 62 | 63 | @mcp.tool() 64 | async def divide(a: float, b: float) -> float: 65 | """Divide the first number by the second. 66 | 67 | Parameters: 68 | a: Numerator 69 | b: Denominator 70 | 71 | Returns: 72 | The result of the division 73 | """ 74 | if b == 0: 75 | raise ValueError("Cannot divide by zero") 76 | return a / b 77 | 78 | 79 | @mcp.tool() 80 | async def tell_joke() -> str: 81 | """Tell a programming joke. 82 | 83 | Returns: 84 | A programming joke 85 | """ 86 | return "Why do programmers prefer dark mode? Because light attracts bugs!" 87 | 88 | 89 | # Run the server 90 | if __name__ == "__main__": 91 | print("Starting Hello World MCP Server...") 92 | print("This server provides simple greeting and calculator tools.") 93 | print("Connect to this server using an MCP client to interact with these tools.") 94 | print("Press Ctrl+C to stop the server.") 95 | 96 | # Start the server 97 | mcp.run() 98 | -------------------------------------------------------------------------------- /mcp_examples/nova_act_mcp_client.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import os 4 | from contextlib import AsyncExitStack 5 | from typing import Optional 6 | 7 | import boto3 8 | from mcp import ClientSession, StdioServerParameters 9 | from mcp.client.stdio import stdio_client 10 | 11 | NOVA_ACT_API_KEY = os.getenv("NOVA_ACT_API_KEY") 12 | 13 | bedrock_runtime = boto3.client( 14 | service_name="bedrock-runtime", 15 | region_name="us-west-2", 16 | ) 17 | 18 | 19 | class NovaActMCPClient: 20 | def __init__(self): 21 | # Initialize session and client objects 22 | self.session: Optional[ClientSession] = None 23 | self.exit_stack = AsyncExitStack() 24 | 25 | async def connect_to_server(self, server_script_path: str): 26 | """Connect to an MCP server""" 27 | is_python = server_script_path.endswith(".py") 28 | is_js = server_script_path.endswith(".js") 29 | if not (is_python or is_js): 30 | raise ValueError("Server script must be a .py or .js file") 31 | 32 | command = "python3" if is_python else "node" 33 | 34 | # Set environment variables including the API key 35 | env = os.environ.copy() 36 | if NOVA_ACT_API_KEY: 37 | env["NOVA_ACT_API_KEY"] = NOVA_ACT_API_KEY 38 | else: 39 | print("Warning: NOVA_ACT_API_KEY environment variable not set") 40 | raise ValueError("NOVA_ACT_API_KEY environment variable not set") 41 | 42 | server_params = StdioServerParameters( 43 | command=command, args=[server_script_path], env=env 44 | ) 45 | 46 | stdio_transport = await self.exit_stack.enter_async_context( 47 | stdio_client(server_params) 48 | ) 49 | self.stdio, self.write = stdio_transport 50 | self.session = await self.exit_stack.enter_async_context( 51 | ClientSession(self.stdio, self.write) 52 | ) 53 | 54 | await self.session.initialize() 55 | 56 | # List available tools 57 | response = await self.session.list_tools() 58 | tools = response.tools 59 | print("\nConnected to server with tools:", [tool.name for tool in tools]) 60 | 61 | async def process_query(self, query: str) -> str: 62 | """Process a query using Bedrock and available tools""" 63 | # Get available tools 64 | response = await self.session.list_tools() 65 | if not response.tools: 66 | return "No tools available on the server." 67 | 68 | available_tools = [ 69 | { 70 | "name": tool.name, 71 | "description": tool.description, 72 | "input_schema": tool.inputSchema, 73 | } 74 | for tool in response.tools 75 | ] 76 | 77 | # Prepare messages and tools for Bedrock 78 | messages = [{"role": "user", "content": [{"text": query}]}] 79 | 80 | # Format tools for Bedrock 81 | tool_list = [ 82 | { 83 | "toolSpec": { 84 | "name": tool["name"], 85 | "description": tool["description"], 86 | "inputSchema": {"json": tool["input_schema"]}, 87 | } 88 | } 89 | for tool in available_tools 90 | ] 91 | 92 | # Set system prompt 93 | system_prompt = "You are an AI assistant capable of using tools to help users. Use the provided tools when necessary." 94 | 95 | # Generate conversation with Bedrock 96 | model_id = "anthropic.claude-3-5-sonnet-20241022-v2:0" 97 | try: 98 | # Make the API call to Bedrock 99 | response = bedrock_runtime.converse( 100 | modelId=model_id, 101 | messages=messages, 102 | inferenceConfig={"temperature": 0.7}, 103 | toolConfig={"tools": tool_list}, 104 | system=[{"text": system_prompt}], 105 | ) 106 | 107 | # Process the response 108 | final_responses = [] 109 | response_message = response["output"]["message"] 110 | 111 | # Process each content block in the response 112 | for content_block in response_message["content"]: 113 | if "text" in content_block: 114 | # Add text responses to our final output 115 | final_responses.append(content_block["text"]) 116 | 117 | elif "toolUse" in content_block: 118 | # Handle tool usage 119 | tool_use = content_block["toolUse"] 120 | tool_name = tool_use["name"] 121 | tool_input = tool_use["input"] 122 | tool_use_id = tool_use["toolUseId"] 123 | 124 | print(f"Calling tool: {tool_name} with input: {tool_input}") 125 | final_responses.append(f"[Calling tool {tool_name}]") 126 | 127 | # Call the tool through MCP session 128 | tool_result = await self.session.call_tool(tool_name, tool_input) 129 | 130 | # Extract the content from the tool result and convert to JSON if needed 131 | try: 132 | # Check what we actually got back 133 | # print(f"Raw tool result type: {type(tool_result)}") 134 | # print(f"Raw tool result: {tool_result}") 135 | 136 | # Convert the content to a proper JSON object if it's a string 137 | if hasattr(tool_result, "content"): 138 | content_val = tool_result.content 139 | print(f"Content type: {type(content_val)}") 140 | 141 | # Handle different types of content 142 | if isinstance(content_val, list): 143 | # Handle list of TextContent objects 144 | if len(content_val) > 0 and hasattr( 145 | content_val[0], "text" 146 | ): 147 | # Extract text from the first TextContent object 148 | text_content = content_val[0].text 149 | try: 150 | # Try to parse as JSON 151 | result_content = json.loads(text_content) 152 | except json.JSONDecodeError: 153 | # If not valid JSON, use as text 154 | result_content = {"text": text_content} 155 | else: 156 | # Just a list of normal values 157 | result_content = {"items": content_val} 158 | elif isinstance(content_val, str): 159 | try: 160 | # First try to parse as JSON 161 | result_content = json.loads(content_val) 162 | except json.JSONDecodeError: 163 | # If not JSON, use as plain string 164 | result_content = {"text": content_val} 165 | else: 166 | # If it's already a dict/list, use directly 167 | result_content = content_val 168 | else: 169 | # If no content attribute, convert the whole object to string 170 | result_content = {"text": str(tool_result)} 171 | except Exception as e: 172 | print(f"Error processing tool result: {e}") 173 | result_content = {"error": str(e)} 174 | 175 | # print(f"Result content type: {type(result_content)}") 176 | # print(f"Result content: {result_content}") 177 | 178 | # Ensure we have a properly structured result for Bedrock 179 | # Bedrock expects a simple JSON object, not a nested structure 180 | if ( 181 | isinstance(result_content, dict) 182 | and "starting_page" in result_content 183 | ): 184 | # For browser_session results, extract the most useful info 185 | 186 | # Check if we have a response in the final result 187 | final_result = result_content.get("final_result", {}) 188 | response_text = final_result.get("response") 189 | final_page = final_result.get("final_page") 190 | 191 | if result_content.get("collected_data"): 192 | result_for_bedrock = { 193 | "result": result_content.get("collected_data") 194 | } 195 | elif response_text: 196 | # If we have a text response from the tool, use it 197 | result_for_bedrock = { 198 | "result": response_text, 199 | "url": final_page, 200 | "action": final_result.get("action", ""), 201 | } 202 | elif final_page: 203 | result_for_bedrock = { 204 | "result": f"Successfully loaded page: {final_page}", 205 | "url": final_page, 206 | } 207 | else: 208 | result_for_bedrock = { 209 | "result": f"Opened {result_content.get('starting_page', 'page')}", 210 | } 211 | 212 | # Include all results for context if available 213 | if result_content.get("all_results"): 214 | all_results = [] 215 | for res in result_content.get("all_results", []): 216 | if res.get("response"): 217 | all_results.append( 218 | { 219 | "action": res.get("action", ""), 220 | "response": res.get("response", ""), 221 | } 222 | ) 223 | if all_results: 224 | result_for_bedrock["all_results"] = all_results 225 | else: 226 | # For other results, use as is 227 | result_for_bedrock = {"result": result_content} 228 | 229 | # Create follow-up message with tool result 230 | tool_result_message = { 231 | "role": "user", 232 | "content": [ 233 | { 234 | "toolResult": { 235 | "toolUseId": tool_use_id, 236 | "content": [{"json": result_for_bedrock}], 237 | } 238 | } 239 | ], 240 | } 241 | 242 | # Add the AI message and tool result to messages 243 | messages.append(response_message) 244 | messages.append(tool_result_message) 245 | 246 | # Make another call to get the final response 247 | follow_up_response = bedrock_runtime.converse( 248 | modelId=model_id, 249 | messages=messages, 250 | inferenceConfig={"temperature": 0.7}, 251 | toolConfig={"tools": tool_list}, 252 | system=[{"text": system_prompt}], 253 | ) 254 | 255 | # Add the follow-up response to our final output 256 | follow_up_text = follow_up_response["output"]["message"]["content"][ 257 | 0 258 | ]["text"] 259 | final_responses.append(follow_up_text) 260 | 261 | return "\n".join(final_responses) 262 | 263 | except Exception as e: 264 | print(f"Error in Bedrock API call: {e}") 265 | return f"Error: {str(e)}" 266 | 267 | async def chat_loop(self): 268 | """Run an interactive chat loop""" 269 | print("\nHello World MCP Client Started!") 270 | print("Type your queries or 'quit' to exit.") 271 | 272 | while True: 273 | try: 274 | query = input("\nQuery: ").strip() 275 | 276 | if query.lower() == "quit": 277 | break 278 | 279 | response = await self.process_query(query) 280 | print("\n" + response) 281 | 282 | except Exception as e: 283 | print(f"\nError: {str(e)}") 284 | 285 | async def cleanup(self): 286 | """Clean up resources""" 287 | await self.exit_stack.aclose() 288 | 289 | 290 | async def main(): 291 | if len(sys.argv) < 2: 292 | print("Usage: python test_client.py ") 293 | sys.exit(1) 294 | 295 | client = NovaActMCPClient() 296 | try: 297 | await client.connect_to_server(sys.argv[1]) 298 | await client.chat_loop() 299 | finally: 300 | await client.cleanup() 301 | 302 | 303 | if __name__ == "__main__": 304 | import sys 305 | 306 | asyncio.run(main()) 307 | -------------------------------------------------------------------------------- /mcp_examples/nova_act_mcp_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import asyncio 4 | import json 5 | import multiprocessing 6 | import os 7 | import tempfile 8 | import threading 9 | from typing import Any, Dict, List, Optional 10 | 11 | from mcp.server.fastmcp import FastMCP 12 | from nova_act import ActError, NovaAct 13 | 14 | # Initialize FastMCP server 15 | mcp = FastMCP("nova-act-server") 16 | 17 | # Global variables for session and results 18 | nova_act_instance = None 19 | results_store = {} 20 | session_lock = threading.Lock() 21 | results_lock = threading.Lock() 22 | 23 | 24 | # Helper functions 25 | def generate_id(prefix: str) -> str: 26 | """Generate a unique ID for results""" 27 | import uuid 28 | 29 | return f"{prefix}_{uuid.uuid4().hex[:8]}" 30 | 31 | 32 | def save_results_to_file( 33 | file_path: str, result_ids: Optional[List[str]] = None 34 | ) -> bool: 35 | """Save selected results to a JSON file""" 36 | with results_lock: 37 | if result_ids: 38 | data = { 39 | rid: results_store[rid] for rid in result_ids if rid in results_store 40 | } 41 | else: 42 | data = dict(results_store) 43 | 44 | try: 45 | os.makedirs(os.path.dirname(os.path.abspath(file_path)), exist_ok=True) 46 | with open(file_path, "w") as f: 47 | json.dump(data, f, indent=2) 48 | return True 49 | except Exception as e: 50 | return False 51 | 52 | 53 | def execute_nova_act_task(task_args, result_file=None): 54 | """ 55 | Execute a single Nova Act task in an isolated process. 56 | This function runs in a separate process for each task. 57 | """ 58 | starting_page = task_args.get("starting_page") 59 | actions_input = task_args.get("actions", []) 60 | headless = task_args.get("headless", False) 61 | 62 | # Convert action strings to action objects if needed 63 | actions = [] 64 | for action in actions_input: 65 | if isinstance(action, str): 66 | # Convert string to action object 67 | actions.append({"action": action}) 68 | else: 69 | # Already an action object 70 | actions.append(action) 71 | 72 | task_results = [] 73 | 74 | try: 75 | # Create and start NovaAct instance 76 | with NovaAct( 77 | starting_page=starting_page, 78 | headless=headless, 79 | ) as nova_act: 80 | # Execute each action in sequence 81 | for action_params in actions: 82 | action_text = action_params.get("action") 83 | schema = action_params.get("schema") 84 | max_steps = action_params.get("max_steps") 85 | 86 | kwargs = {} 87 | if schema: 88 | kwargs["schema"] = schema 89 | if max_steps: 90 | kwargs["max_steps"] = max_steps 91 | 92 | try: 93 | result = nova_act.act(action_text, **kwargs) 94 | 95 | # Create a result object 96 | result_id = generate_id("result") 97 | result_data = { 98 | "result_id": result_id, 99 | "action": action_text, 100 | "starting_page": starting_page, 101 | "final_page": nova_act.page.url, 102 | "response": result.response, 103 | "parsed_response": ( 104 | result.parsed_response 105 | if hasattr(result, "parsed_response") 106 | else None 107 | ), 108 | "valid_json": ( 109 | result.valid_json if hasattr(result, "valid_json") else None 110 | ), 111 | "matches_schema": ( 112 | result.matches_schema 113 | if hasattr(result, "matches_schema") 114 | else None 115 | ), 116 | "metadata": ( 117 | { 118 | "num_steps_executed": result.metadata.num_steps_executed, 119 | "start_time": str(result.metadata.start_time), 120 | "end_time": str(result.metadata.end_time), 121 | "prompt": str(result.metadata.prompt), 122 | } 123 | if hasattr(result, "metadata") 124 | else {} 125 | ), 126 | } 127 | 128 | task_results.append(result_data) 129 | except Exception as e: 130 | task_results.append({"action": action_text, "error": str(e)}) 131 | 132 | # Write results to file if specified 133 | if result_file: 134 | with open(result_file, "w") as f: 135 | json.dump({"starting_page": starting_page, "results": task_results}, f) 136 | 137 | return {"starting_page": starting_page, "results": task_results} 138 | except Exception as e: 139 | result = { 140 | "starting_page": starting_page, 141 | "error": str(e), 142 | "results": task_results, 143 | } 144 | if result_file: 145 | with open(result_file, "w") as f: 146 | json.dump(result, f) 147 | return result 148 | 149 | 150 | # MCP tools 151 | @mcp.tool() 152 | async def browser_session( 153 | starting_page: str, 154 | actions: List[str], 155 | headless: bool = False, 156 | ) -> Dict[str, Any]: 157 | """Start a browser and perform a sequence of actions in a single command. 158 | 159 | USE THIS TOOL for tasks that need to be executed in sequence within the same browser session, where each action depends on the state created by previous actions. 160 | 161 | For multiple independent tasks that can run in parallel, use execute_parallel_browser_tasks instead. 162 | 163 | When writing actions for Nova Act: 164 | 165 | 1. Be prescriptive and succinct - tell the agent exactly what to do 166 | ✅ "Click the hamburger menu icon, go to Order History" 167 | ❌ "Find my order history" 168 | 169 | 2. Break complex tasks into smaller actions 170 | ✅ "Search for hotels in Houston", then "Sort by avg customer review" 171 | ❌ "Find the highest rated hotel in Houston" 172 | 173 | 3. Be specific about UI elements and navigation patterns 174 | ✅ "Scroll down until you see 'add to cart' and then click it" 175 | ❌ "Add the item to cart" 176 | 177 | 4. For searches, be explicit about interaction patterns 178 | ✅ "Type 'coffee maker' in the search box and press enter" 179 | ❌ "Search for coffee makers" 180 | 181 | 5. For date selection, use absolute dates 182 | ✅ "Select dates March 23 to March 28" 183 | ❌ "Book for next week" 184 | 185 | 6. For extracting information, use a dedicated action 186 | ✅ "Return a list of all visible product names and prices" 187 | ❌ "Find the cheapest option and tell me about it" 188 | 189 | Important limitations: 190 | - Nova Act cannot interact with elements hidden behind mouseovers 191 | - Nova Act cannot interact with browser windows/modals 192 | - Nova Act works best with short, specific instructions 193 | 194 | This function returns a structured response with: 195 | - starting_page: The URL where the browser started 196 | - collected_data: The parsed response data 197 | """ 198 | global nova_act_instance 199 | 200 | # Close any existing browser session 201 | with session_lock: 202 | if nova_act_instance: 203 | try: 204 | nova_act_instance.stop() 205 | except: 206 | raise ActError("Failed to stop existing browser session") 207 | 208 | # Start a new NovaAct session in a separate thread 209 | def run_browser_session(): 210 | try: 211 | # Convert simple action strings to action objects 212 | action_objects = [{"action": action_text} for action_text in actions] 213 | 214 | # Create and use a NovaAct instance with context manager 215 | results = [] 216 | final_response = {} 217 | 218 | with NovaAct( 219 | starting_page=starting_page, 220 | headless=headless, 221 | ) as nova_act: 222 | # Keep a reference to the nova_act instance 223 | with session_lock: 224 | global nova_act_instance 225 | nova_act_instance = nova_act 226 | 227 | # Execute each action in sequence 228 | for i, action_text in enumerate(actions): 229 | try: 230 | result = nova_act.act(action_text) 231 | 232 | # Store the result 233 | result_id = generate_id("result") 234 | result_data = { 235 | "result_id": result_id, 236 | "action": action_text, 237 | "starting_page": starting_page, 238 | "final_page": nova_act.page.url, 239 | "response": result.response, 240 | "parsed_response": ( 241 | result.parsed_response 242 | if hasattr(result, "parsed_response") 243 | else None 244 | ), 245 | "valid_json": ( 246 | result.valid_json 247 | if hasattr(result, "valid_json") 248 | else None 249 | ), 250 | "matches_schema": ( 251 | result.matches_schema 252 | if hasattr(result, "matches_schema") 253 | else None 254 | ), 255 | "metadata": ( 256 | { 257 | "num_steps_executed": result.metadata.num_steps_executed, 258 | "start_time": str(result.metadata.start_time), 259 | "end_time": str(result.metadata.end_time), 260 | "prompt": str(result.metadata.prompt), 261 | } 262 | if hasattr(result, "metadata") 263 | else {} 264 | ), 265 | } 266 | 267 | with results_lock: 268 | results_store[result_id] = result_data 269 | 270 | # Store action result 271 | result_item = { 272 | "result_id": result_id, 273 | "action": action_text, 274 | "starting_page": starting_page, 275 | "final_page": nova_act.page.url, 276 | "response": result.response, 277 | "parsed_response": ( 278 | result.parsed_response 279 | if hasattr(result, "parsed_response") 280 | else None 281 | ), 282 | "valid_json": ( 283 | result.valid_json 284 | if hasattr(result, "valid_json") 285 | else None 286 | ), 287 | "matches_schema": ( 288 | result.matches_schema 289 | if hasattr(result, "matches_schema") 290 | else None 291 | ), 292 | } 293 | 294 | results.append(result_item) 295 | 296 | # If this is the last action, it usually contains the data we want 297 | # Store it in final_response for easier access 298 | if i == len(actions) - 1: 299 | final_response = result_item 300 | 301 | except Exception as e: 302 | error_result = {"action": action_text, "error": str(e)} 303 | results.append(error_result) 304 | 305 | # If this was the last action, store the error in final_response 306 | if i == len(actions) - 1: 307 | final_response = error_result 308 | 309 | # Do not close the browser here - leave it open for further interaction 310 | # The context manager will have stopped the browser when exiting 311 | with session_lock: 312 | nova_act_instance = None 313 | 314 | # Return a structured response with both complete results and the final response 315 | return { 316 | "starting_page": starting_page, 317 | "all_results": results, 318 | "final_result": final_response, 319 | "collected_data": final_response.get( 320 | "parsed_response", final_response.get("response", None) 321 | ), 322 | } 323 | except Exception as e: 324 | with session_lock: 325 | nova_act_instance = None 326 | return {"error": str(e)} 327 | 328 | return await asyncio.to_thread(run_browser_session) 329 | 330 | 331 | @mcp.tool() 332 | async def browser_action( 333 | action: str, 334 | schema: Optional[Dict[str, Any]] = None, 335 | max_steps: Optional[int] = None, 336 | ) -> Dict[str, Any]: 337 | """Perform a single action in the Nova Act browser""" 338 | global nova_act_instance 339 | 340 | with session_lock: 341 | if not nova_act_instance: 342 | raise ValueError("Browser session not started. Use browser_session first.") 343 | act = nova_act_instance 344 | 345 | # Execute the action in a separate thread 346 | def execute_action(): 347 | try: 348 | kwargs = {} 349 | if schema: 350 | kwargs["schema"] = schema 351 | if max_steps: 352 | kwargs["max_steps"] = max_steps 353 | 354 | result = act.act(action, **kwargs) 355 | 356 | # Store the result 357 | result_id = generate_id("result") 358 | result_data = { 359 | "result_id": result_id, 360 | "action": action, 361 | "final_page": act.page.url, 362 | "response": result.response, 363 | "parsed_response": ( 364 | result.parsed_response 365 | if hasattr(result, "parsed_response") 366 | else None 367 | ), 368 | "valid_json": ( 369 | result.valid_json if hasattr(result, "valid_json") else None 370 | ), 371 | "matches_schema": ( 372 | result.matches_schema if hasattr(result, "matches_schema") else None 373 | ), 374 | "metadata": ( 375 | { 376 | "num_steps_executed": result.metadata.num_steps_executed, 377 | "start_time": result.metadata.start_time, 378 | "end_time": result.metadata.end_time, 379 | "prompt": result.metadata.prompt, 380 | } 381 | if hasattr(result, "metadata") 382 | else {} 383 | ), 384 | } 385 | 386 | with results_lock: 387 | results_store[result_id] = result_data 388 | 389 | return { 390 | "result_id": result_id, 391 | "final_page": act.page.url, 392 | "response": result.response, 393 | "parsed_response": ( 394 | result.parsed_response 395 | if hasattr(result, "parsed_response") 396 | else None 397 | ), 398 | "valid_json": ( 399 | result.valid_json if hasattr(result, "valid_json") else None 400 | ), 401 | "matches_schema": ( 402 | result.matches_schema if hasattr(result, "matches_schema") else None 403 | ), 404 | } 405 | except Exception as e: 406 | print(f"Error executing action: {e}") 407 | raise 408 | 409 | return await asyncio.to_thread(execute_action) 410 | 411 | 412 | @mcp.tool() 413 | async def execute_parallel_browser_tasks( 414 | browser_tasks: List[Dict[str, Any]], 415 | ) -> List[Dict[str, Any]]: 416 | """Execute multiple sequences of actions in parallel across different browser sessions. 417 | 418 | WHEN TO USE: 419 | - For tasks that don't depend on each other's results 420 | - When you need to collect information from multiple sources simultaneously 421 | - When you want to significantly speed up web workflows through parallelization 422 | 423 | EXAMPLES: 424 | - Comparing multiple products on Amazon (each product in a separate browser) 425 | - Searching for information across different websites 426 | - Checking apartment listings and calculating distances to locations (as shown in the documentation) 427 | 428 | Each browser_task should include: 429 | - starting_page: URL to start the browser 430 | - actions: List of actions to perform, following these guidelines: 431 | 1. Make each action prescriptive and succinct 432 | ✅ "Click the hamburger menu icon" 433 | ❌ "Find the menu" 434 | 435 | 2. Be specific about UI elements 436 | ✅ "Scroll down until you see 'Reviews' and click it" 437 | ❌ "Go to the reviews section" 438 | 439 | 3. For data extraction, use a dedicated action with schema 440 | ✅ {"action": "Return the current price and rating", "schema": {...}} 441 | ❌ "Tell me about this product" 442 | 443 | Format options for actions: 444 | 1. Simple string format: ["Search for iPhone", "Click on the first result"] 445 | 2. Object format for data extraction: [{"action": "Return product details", "schema": {...}}] 446 | 447 | Browser configuration options: 448 | - headless: Run browsers without visible UI (default: False) 449 | - user_data_dir: Path to browser profile (note: each session requires its own) 450 | - quiet: Suppress logs (default: False) 451 | 452 | IMPORTANT NOTES: 453 | - Each task runs in its own isolated browser - they cannot interact with each other 454 | - For authentication, each session needs its own user_data_dir 455 | - Nova Act cannot interact with elements hidden behind mouseovers 456 | - Data extraction works best with clear schemas 457 | 458 | Returns a list of task results, each containing: 459 | - starting_page: The URL where the browser started 460 | - final_result: The result of the last action (usually the most relevant) 461 | """ 462 | 463 | # Create a temporary directory for result files 464 | temp_dir = tempfile.mkdtemp(prefix="nova_act_results_") 465 | 466 | # Prepare processes for each task 467 | processes = [] 468 | result_files = [] 469 | 470 | for i, task in enumerate(browser_tasks): 471 | # Create a file to store the result 472 | result_file = os.path.join(temp_dir, f"task_{i}_result.json") 473 | result_files.append(result_file) 474 | 475 | # Create and start a new process for this task 476 | process = multiprocessing.Process( 477 | target=execute_nova_act_task, args=(task, result_file) 478 | ) 479 | processes.append(process) 480 | process.start() 481 | 482 | # Wait for all processes to complete 483 | for process in processes: 484 | process.join() 485 | 486 | # Collect results 487 | all_results = [] 488 | 489 | for result_file in result_files: 490 | try: 491 | if os.path.exists(result_file): 492 | with open(result_file, "r") as f: 493 | task_result = json.load(f) 494 | 495 | # Process the task result to add final_result and collected_data 496 | if "results" in task_result and task_result["results"]: 497 | # Get the last result (final action) 498 | final_result = task_result["results"][-1] 499 | 500 | # Add final_result to the task_result 501 | task_result["final_result"] = final_result 502 | 503 | # Add collected_data extracted from the final_result 504 | if "parsed_response" in final_result: 505 | task_result["collected_data"] = final_result["parsed_response"] 506 | elif "response" in final_result: 507 | task_result["collected_data"] = final_result["response"] 508 | 509 | all_results.append(task_result) 510 | 511 | # Store results in the results_store 512 | if "results" in task_result: 513 | with results_lock: 514 | for result in task_result["results"]: 515 | if "result_id" in result: 516 | results_store[result["result_id"]] = result 517 | 518 | all_results.append({"error": "Result file not found"}) 519 | 520 | except Exception as e: 521 | all_results.append({"error": str(e)}) 522 | 523 | # Clean up temporary files 524 | try: 525 | for file in result_files: 526 | if os.path.exists(file): 527 | os.remove(file) 528 | os.rmdir(temp_dir) 529 | except Exception as e: 530 | raise ValueError(f"Error cleaning up temporary files: {e}") 531 | 532 | return all_results 533 | 534 | 535 | @mcp.tool() 536 | async def list_results() -> List[Dict[str, Any]]: 537 | """List all stored results""" 538 | result_infos = [] 539 | with results_lock: 540 | for result_id, result_data in results_store.items(): 541 | result_infos.append( 542 | { 543 | "result_id": result_id, 544 | "action": result_data.get("action", ""), 545 | "response": result_data.get("response", ""), 546 | } 547 | ) 548 | return result_infos 549 | 550 | 551 | @mcp.tool() 552 | async def get_result(result_id: str) -> Dict[str, Any]: 553 | """Get a specific result by ID""" 554 | with results_lock: 555 | if result_id not in results_store: 556 | raise ValueError(f"Result {result_id} not found") 557 | return results_store[result_id] 558 | 559 | 560 | @mcp.tool() 561 | async def save_results(file_path: str, result_ids: Optional[List[str]] = None) -> bool: 562 | """Save results to a file""" 563 | return save_results_to_file(file_path, result_ids) 564 | 565 | 566 | @mcp.tool() 567 | async def take_screenshot(save_path: Optional[str] = None) -> str: 568 | """Take a screenshot in the browser session""" 569 | global nova_act_instance 570 | 571 | with session_lock: 572 | if not nova_act_instance: 573 | raise ValueError("Browser session not started. Use browser_session first.") 574 | act = nova_act_instance 575 | 576 | # Take screenshot in a separate thread 577 | def capture_screenshot(): 578 | try: 579 | screenshot_bytes = act.page.screenshot() 580 | if save_path: 581 | os.makedirs(os.path.dirname(os.path.abspath(save_path)), exist_ok=True) 582 | with open(save_path, "wb") as f: 583 | f.write(screenshot_bytes) 584 | return save_path 585 | else: 586 | import tempfile 587 | 588 | temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False) 589 | temp_file.write(screenshot_bytes) 590 | temp_file.close() 591 | return temp_file.name 592 | except Exception as e: 593 | raise 594 | 595 | return await asyncio.to_thread(capture_screenshot) 596 | 597 | 598 | @mcp.tool() 599 | async def close_browser() -> bool: 600 | """Close the browser session""" 601 | global nova_act_instance 602 | 603 | with session_lock: 604 | if not nova_act_instance: 605 | return False 606 | 607 | act = nova_act_instance 608 | 609 | # Stop NovaAct in a separate thread 610 | def stop_session(act_instance): 611 | try: 612 | act_instance.stop() 613 | return True 614 | except Exception as e: 615 | return False 616 | 617 | success = await asyncio.to_thread(stop_session, act) 618 | 619 | if success: 620 | with session_lock: 621 | nova_act_instance = None 622 | return True 623 | else: 624 | return False 625 | 626 | 627 | # Run the server when the script is executed directly 628 | if __name__ == "__main__": 629 | # Register multiprocessing start method 630 | multiprocessing.set_start_method("spawn", force=True) 631 | 632 | # Ensure clean shutdown on exit 633 | import atexit 634 | 635 | def cleanup(): 636 | # Close the browser session if it exists 637 | global nova_act_instance 638 | if nova_act_instance: 639 | try: 640 | nova_act_instance.stop() 641 | except: 642 | raise ValueError("Error closing browser session") 643 | 644 | atexit.register(cleanup) 645 | 646 | # Start the server 647 | mcp.run() 648 | -------------------------------------------------------------------------------- /nova_act_examples/README.md: -------------------------------------------------------------------------------- 1 | # Nova Act Examples 2 | 3 | This directory contains examples demonstrating how to use Amazon Nova Act for web automation. 4 | 5 | ## Overview 6 | 7 | Amazon Nova Act is a powerful web automation tool that enables you to create agents that interact with web pages. These examples showcase how to use Nova Act for various automation tasks, from basic web interactions to advanced parallel processing. 8 | 9 | ## Examples 10 | 11 | ### 1. Basic Example (`get_coffee_maker.py`) 12 | 13 | A simple example that demonstrates the fundamentals of Nova Act: 14 | 15 | - Initializing Nova Act with a starting web page (Amazon.com) 16 | - Performing a simple search for "coffee maker" 17 | - Selecting a search result 18 | - Extracting information from a product page 19 | 20 | Run this example to get familiar with the basic Nova Act workflow. 21 | 22 | ### 2. Advanced Example (`multi_monitor.py`) 23 | 24 | A more complex example that demonstrates advanced Nova Act capabilities: 25 | 26 | - Parallel execution of web tasks using ThreadPoolExecutor 27 | - Searching for multiple monitor models simultaneously 28 | - Extracting structured data (price, rating, size) from product pages 29 | - Error handling for robust web automation 30 | - Comparing results from multiple searches 31 | 32 | This example shows how to scale web automation tasks for efficiency. 33 | 34 | ## Prerequisites 35 | 36 | - Python 3.10 or higher 37 | - A valid Nova Act API key (obtain from https://nova.amazon.com/act) 38 | - Required Python packages (install from the main project's requirements.txt) 39 | 40 | ## Running the Examples 41 | 42 | 1. Make sure your Nova Act API key is set in your environment: 43 | ```bash 44 | export NOVA_ACT_API_KEY="your_api_key" 45 | ``` 46 | 47 | 2. Run the basic example: 48 | ```bash 49 | python get_coffee_maker.py 50 | ``` 51 | 52 | 3. Run the advanced example: 53 | ```bash 54 | python multi_monitor.py 55 | ``` 56 | 57 | ## Key Concepts 58 | 59 | - **Nova Act Initialization**: Each example demonstrates how to initialize Nova Act with appropriate parameters (starting page, video recording, headless mode). 60 | - **Act Commands**: The examples use natural language commands with the `act()` method to instruct the browser. 61 | - **Data Extraction**: Both examples show how to extract and process data from web pages. 62 | - **Session Management**: The examples demonstrate proper handling of Nova Act sessions using context managers. 63 | - **Parallel Execution**: The advanced example shows how to run multiple Nova Act instances in parallel for efficiency. 64 | 65 | ## Important Notes 66 | 67 | - Nova Act creates browser logs and (optionally) video recordings in the `./logs` directory 68 | - Headless mode runs browsers without a visible UI, which is useful for background automation 69 | - For parallel execution, be mindful of resource usage when setting the number of workers -------------------------------------------------------------------------------- /nova_act_examples/get_coffee_maker.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from nova_act import NovaAct 4 | 5 | # make logs directory 6 | os.makedirs("./logs", exist_ok=True) 7 | 8 | # Initialize Nova Act with Amazon as the starting page 9 | with NovaAct( 10 | starting_page="https://www.amazon.com", record_video=True, logs_directory="./logs" 11 | ) as n: 12 | # Perform a simple action - search for a product 13 | n.act("search for a coffee maker") 14 | # Click on a result 15 | n.act("select the first result") 16 | # Print the title of the page 17 | title = n.act("What is the title of this product page?") 18 | print(f"Product title: {title.response}") 19 | -------------------------------------------------------------------------------- /nova_act_examples/multi_monitor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import uuid 4 | from concurrent.futures import ThreadPoolExecutor, as_completed 5 | 6 | from nova_act import NovaAct 7 | 8 | # make logs directory 9 | os.makedirs("./logs", exist_ok=True) 10 | 11 | 12 | def check_monitor_on_amazon(monitor_model, headless=True): 13 | """Search for a specific monitor model on Amazon and extract information""" 14 | # Create a unique ID for this thread's browser session 15 | session_id = uuid.uuid4().hex[:8] 16 | print(f"[Thread {session_id}] Starting search for {monitor_model}") 17 | 18 | results = {"model": monitor_model, "price": "N/A", "rating": "N/A", "size": "N/A"} 19 | 20 | try: 21 | # Each thread gets its own isolated browser session 22 | with NovaAct( 23 | starting_page="https://www.amazon.com", 24 | record_video=True, 25 | headless=headless, 26 | logs_directory="./logs", 27 | ) as n: 28 | print(f"[Thread {session_id}] Browser session started") 29 | 30 | # Search for the specific monitor model 31 | n.act(f"search for '{monitor_model}'") 32 | 33 | # Click on the first result 34 | n.act("click on the first search result") 35 | 36 | # Get the price 37 | price_result = n.act("What is the current price of this monitor?") 38 | results["price"] = price_result.response or "N/A" # Default to "N/A" if None 39 | 40 | # Get the rating 41 | rating_result = n.act("What is the average rating of this monitor?") 42 | results["rating"] = rating_result.response or "N/A" # Default to "N/A" if None 43 | 44 | # Get screen size 45 | size_result = n.act("What is the screen size of this monitor?") 46 | results["size"] = size_result.response or "N/A" # Default to "N/A" if None 47 | 48 | print(f"[Thread {session_id}] ✅ Completed search for {monitor_model}") 49 | except Exception as e: 50 | print(f"[Thread {session_id}] ❌ Error searching for {monitor_model}: {e}") 51 | 52 | return results 53 | 54 | 55 | def parallel_monitor_comparison(): 56 | # List of popular monitor models to check 57 | monitor_models = [ 58 | "Dell S2722QC 27-inch 4K USB-C Monitor", 59 | "LG 27GP850-B 27-inch Ultragear Gaming Monitor", 60 | "Samsung Odyssey G7 32-inch Gaming Monitor", 61 | ] 62 | 63 | all_results = [] 64 | start_time = time.time() 65 | 66 | # On machines with fewer cores, running too many browser instances can cause failures 67 | max_workers = min(3, len(monitor_models)) 68 | print(f"Starting parallel execution with {max_workers} workers") 69 | 70 | with ThreadPoolExecutor(max_workers=max_workers) as executor: 71 | # Submit search tasks with a delay between submissions 72 | future_to_model = {} 73 | for model in monitor_models: 74 | # Submit the task 75 | future = executor.submit(check_monitor_on_amazon, model, True) 76 | future_to_model[future] = model 77 | 78 | # Add a delay before submitting the next task to avoid resource spikes 79 | time.sleep(5) # Give 5 seconds between browser startups 80 | 81 | # Process results as they complete 82 | for future in as_completed(future_to_model): 83 | model = future_to_model[future] 84 | try: 85 | results = future.result() 86 | if results: # Make sure results isn't None 87 | all_results.append(results) 88 | except Exception as e: 89 | print(f"❌ Error processing results for {model}: {e}") 90 | 91 | elapsed = time.time() - start_time 92 | print(f"Parallel execution completed in {elapsed:.2f} seconds") 93 | 94 | # Print comparison table 95 | print("\nMonitor Comparison:") 96 | print("-" * 80) 97 | print(f"{'Model':<30} {'Price':<10} {'Rating':<10} {'Size':<10}") 98 | print("-" * 80) 99 | 100 | for result in all_results: 101 | # Ensure all values are strings to prevent formatting errors 102 | model = str(result.get('model', 'Unknown'))[:28] if result.get('model') is not None else 'Unknown' 103 | price = str(result.get('price', 'N/A')) if result.get('price') is not None else 'N/A' 104 | rating = str(result.get('rating', 'N/A')) if result.get('rating') is not None else 'N/A' 105 | size = str(result.get('size', 'N/A')) if result.get('size') is not None else 'N/A' 106 | 107 | print(f"{model:<30} {price:<10} {rating:<10} {size:<10}") 108 | 109 | return all_results 110 | 111 | 112 | if __name__ == "__main__": 113 | parallel_monitor_comparison() 114 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mcp[cli] 2 | nova-act 3 | streamlit 4 | pandas 5 | boto3 6 | strands-agents 7 | strands-agents-tools 8 | uv -------------------------------------------------------------------------------- /strands_examples/mcp_docs_diag.py: -------------------------------------------------------------------------------- 1 | from mcp import StdioServerParameters, stdio_client 2 | from strands import Agent 3 | from strands.models import BedrockModel 4 | from strands.tools.mcp import MCPClient 5 | 6 | aws_docs_client = MCPClient( 7 | lambda: stdio_client( 8 | StdioServerParameters( 9 | command="uvx", args=["awslabs.aws-documentation-mcp-server@latest"] 10 | ) 11 | ) 12 | ) 13 | 14 | aws_diag_client = MCPClient( 15 | lambda: stdio_client( 16 | StdioServerParameters( 17 | command="uvx", args=["awslabs.aws-diagram-mcp-server@latest"] 18 | ) 19 | ) 20 | ) 21 | 22 | 23 | bedrock_model = BedrockModel( 24 | model_id="us.anthropic.claude-3-5-haiku-20241022-v1:0", 25 | temperature=0.7, 26 | ) 27 | 28 | SYSTEM_PROMPT = """ 29 | You are an expert AWS Certified Solutions Architect. Your role is to help customers understand best practices on building on AWS. You can querying the AWS Documentation and generate diagrams. Make sure to tell the customer the full file path of the diagram. 30 | """ 31 | 32 | with aws_diag_client, aws_docs_client: 33 | all_tools = aws_diag_client.list_tools_sync() + aws_docs_client.list_tools_sync() 34 | agent = Agent(tools=all_tools, model=bedrock_model, system_prompt=SYSTEM_PROMPT) 35 | 36 | response = agent( 37 | "Get the documentation for AWS Lambda then create a diagram of a website that uses AWS Lambda for a static website hosted on S3" 38 | ) 39 | -------------------------------------------------------------------------------- /strands_examples/nova_act_mcp_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import asyncio 4 | import json 5 | import multiprocessing 6 | import os 7 | import tempfile 8 | import threading 9 | from typing import Any, Dict, List, Optional 10 | 11 | from mcp.server.fastmcp import FastMCP 12 | from nova_act import ActError, NovaAct 13 | 14 | # Initialize FastMCP server 15 | mcp = FastMCP("nova-act-server") 16 | 17 | # Global variables for session and results 18 | nova_act_instance = None 19 | results_store = {} 20 | session_lock = threading.Lock() 21 | results_lock = threading.Lock() 22 | 23 | 24 | # Helper functions 25 | def generate_id(prefix: str) -> str: 26 | """Generate a unique ID for results""" 27 | import uuid 28 | 29 | return f"{prefix}_{uuid.uuid4().hex[:8]}" 30 | 31 | 32 | def save_results_to_file( 33 | file_path: str, result_ids: Optional[List[str]] = None 34 | ) -> bool: 35 | """Save selected results to a JSON file""" 36 | with results_lock: 37 | if result_ids: 38 | data = { 39 | rid: results_store[rid] for rid in result_ids if rid in results_store 40 | } 41 | else: 42 | data = dict(results_store) 43 | 44 | try: 45 | os.makedirs(os.path.dirname(os.path.abspath(file_path)), exist_ok=True) 46 | with open(file_path, "w") as f: 47 | json.dump(data, f, indent=2) 48 | return True 49 | except Exception as e: 50 | return False 51 | 52 | 53 | def execute_nova_act_task(task_args, result_file=None): 54 | """ 55 | Execute a single Nova Act task in an isolated process. 56 | This function runs in a separate process for each task. 57 | """ 58 | starting_page = task_args.get("starting_page") 59 | actions_input = task_args.get("actions", []) 60 | headless = task_args.get("headless", False) 61 | 62 | # Convert action strings to action objects if needed 63 | actions = [] 64 | for action in actions_input: 65 | if isinstance(action, str): 66 | # Convert string to action object 67 | actions.append({"action": action}) 68 | else: 69 | # Already an action object 70 | actions.append(action) 71 | 72 | task_results = [] 73 | 74 | try: 75 | # Create and start NovaAct instance 76 | with NovaAct( 77 | starting_page=starting_page, 78 | headless=headless, 79 | ) as nova_act: 80 | # Execute each action in sequence 81 | for action_params in actions: 82 | action_text = action_params.get("action") 83 | schema = action_params.get("schema") 84 | max_steps = action_params.get("max_steps") 85 | 86 | kwargs = {} 87 | if schema: 88 | kwargs["schema"] = schema 89 | if max_steps: 90 | kwargs["max_steps"] = max_steps 91 | 92 | try: 93 | result = nova_act.act(action_text, **kwargs) 94 | 95 | # Create a result object 96 | result_id = generate_id("result") 97 | result_data = { 98 | "result_id": result_id, 99 | "action": action_text, 100 | "starting_page": starting_page, 101 | "final_page": nova_act.page.url, 102 | "response": result.response, 103 | "parsed_response": ( 104 | result.parsed_response 105 | if hasattr(result, "parsed_response") 106 | else None 107 | ), 108 | "valid_json": ( 109 | result.valid_json if hasattr(result, "valid_json") else None 110 | ), 111 | "matches_schema": ( 112 | result.matches_schema 113 | if hasattr(result, "matches_schema") 114 | else None 115 | ), 116 | "metadata": ( 117 | { 118 | "num_steps_executed": result.metadata.num_steps_executed, 119 | "start_time": str(result.metadata.start_time), 120 | "end_time": str(result.metadata.end_time), 121 | "prompt": str(result.metadata.prompt), 122 | } 123 | if hasattr(result, "metadata") 124 | else {} 125 | ), 126 | } 127 | 128 | task_results.append(result_data) 129 | except Exception as e: 130 | task_results.append({"action": action_text, "error": str(e)}) 131 | 132 | # Write results to file if specified 133 | if result_file: 134 | with open(result_file, "w") as f: 135 | json.dump({"starting_page": starting_page, "results": task_results}, f) 136 | 137 | return {"starting_page": starting_page, "results": task_results} 138 | except Exception as e: 139 | result = { 140 | "starting_page": starting_page, 141 | "error": str(e), 142 | "results": task_results, 143 | } 144 | if result_file: 145 | with open(result_file, "w") as f: 146 | json.dump(result, f) 147 | return result 148 | 149 | 150 | # MCP tools 151 | @mcp.tool() 152 | async def browser_session( 153 | starting_page: str, 154 | actions: List[str], 155 | headless: bool = False, 156 | ) -> Dict[str, Any]: 157 | """Start a browser and perform a sequence of actions in a single command. 158 | 159 | USE THIS TOOL for tasks that need to be executed in sequence within the same browser session, where each action depends on the state created by previous actions. 160 | 161 | For multiple independent tasks that can run in parallel, use execute_parallel_browser_tasks instead. 162 | 163 | When writing actions for Nova Act: 164 | 165 | 1. Be prescriptive and succinct - tell the agent exactly what to do 166 | ✅ "Click the hamburger menu icon, go to Order History" 167 | ❌ "Find my order history" 168 | 169 | 2. Break complex tasks into smaller actions 170 | ✅ "Search for hotels in Houston", then "Sort by avg customer review" 171 | ❌ "Find the highest rated hotel in Houston" 172 | 173 | 3. Be specific about UI elements and navigation patterns 174 | ✅ "Scroll down until you see 'add to cart' and then click it" 175 | ❌ "Add the item to cart" 176 | 177 | 4. For searches, be explicit about interaction patterns 178 | ✅ "Type 'coffee maker' in the search box and press enter" 179 | ❌ "Search for coffee makers" 180 | 181 | 5. For date selection, use absolute dates 182 | ✅ "Select dates March 23 to March 28" 183 | ❌ "Book for next week" 184 | 185 | 6. For extracting information, use a dedicated action 186 | ✅ "Return a list of all visible product names and prices" 187 | ❌ "Find the cheapest option and tell me about it" 188 | 189 | Important limitations: 190 | - Nova Act cannot interact with elements hidden behind mouseovers 191 | - Nova Act cannot interact with browser windows/modals 192 | - Nova Act works best with short, specific instructions 193 | 194 | This function returns a structured response with: 195 | - starting_page: The URL where the browser started 196 | - collected_data: The parsed response data 197 | """ 198 | global nova_act_instance 199 | 200 | # Close any existing browser session 201 | with session_lock: 202 | if nova_act_instance: 203 | try: 204 | nova_act_instance.stop() 205 | except: 206 | raise ActError("Failed to stop existing browser session") 207 | 208 | # Start a new NovaAct session in a separate thread 209 | def run_browser_session(): 210 | try: 211 | # Convert simple action strings to action objects 212 | action_objects = [{"action": action_text} for action_text in actions] 213 | 214 | # Create and use a NovaAct instance with context manager 215 | results = [] 216 | final_response = {} 217 | 218 | with NovaAct( 219 | starting_page=starting_page, 220 | headless=headless, 221 | ) as nova_act: 222 | # Keep a reference to the nova_act instance 223 | with session_lock: 224 | global nova_act_instance 225 | nova_act_instance = nova_act 226 | 227 | # Execute each action in sequence 228 | for i, action_text in enumerate(actions): 229 | try: 230 | result = nova_act.act(action_text) 231 | 232 | # Store the result 233 | result_id = generate_id("result") 234 | result_data = { 235 | "result_id": result_id, 236 | "action": action_text, 237 | "starting_page": starting_page, 238 | "final_page": nova_act.page.url, 239 | "response": result.response, 240 | "parsed_response": ( 241 | result.parsed_response 242 | if hasattr(result, "parsed_response") 243 | else None 244 | ), 245 | "valid_json": ( 246 | result.valid_json 247 | if hasattr(result, "valid_json") 248 | else None 249 | ), 250 | "matches_schema": ( 251 | result.matches_schema 252 | if hasattr(result, "matches_schema") 253 | else None 254 | ), 255 | "metadata": ( 256 | { 257 | "num_steps_executed": result.metadata.num_steps_executed, 258 | "start_time": str(result.metadata.start_time), 259 | "end_time": str(result.metadata.end_time), 260 | "prompt": str(result.metadata.prompt), 261 | } 262 | if hasattr(result, "metadata") 263 | else {} 264 | ), 265 | } 266 | 267 | with results_lock: 268 | results_store[result_id] = result_data 269 | 270 | # Store action result 271 | result_item = { 272 | "result_id": result_id, 273 | "action": action_text, 274 | "starting_page": starting_page, 275 | "final_page": nova_act.page.url, 276 | "response": result.response, 277 | "parsed_response": ( 278 | result.parsed_response 279 | if hasattr(result, "parsed_response") 280 | else None 281 | ), 282 | "valid_json": ( 283 | result.valid_json 284 | if hasattr(result, "valid_json") 285 | else None 286 | ), 287 | "matches_schema": ( 288 | result.matches_schema 289 | if hasattr(result, "matches_schema") 290 | else None 291 | ), 292 | } 293 | 294 | results.append(result_item) 295 | 296 | # If this is the last action, it usually contains the data we want 297 | # Store it in final_response for easier access 298 | if i == len(actions) - 1: 299 | final_response = result_item 300 | 301 | except Exception as e: 302 | error_result = {"action": action_text, "error": str(e)} 303 | results.append(error_result) 304 | 305 | # If this was the last action, store the error in final_response 306 | if i == len(actions) - 1: 307 | final_response = error_result 308 | 309 | # Do not close the browser here - leave it open for further interaction 310 | # The context manager will have stopped the browser when exiting 311 | with session_lock: 312 | nova_act_instance = None 313 | 314 | # Return a structured response with both complete results and the final response 315 | return { 316 | "starting_page": starting_page, 317 | "all_results": results, 318 | "final_result": final_response, 319 | "collected_data": final_response.get( 320 | "parsed_response", final_response.get("response", None) 321 | ), 322 | } 323 | except Exception as e: 324 | with session_lock: 325 | nova_act_instance = None 326 | return {"error": str(e)} 327 | 328 | return await asyncio.to_thread(run_browser_session) 329 | 330 | 331 | @mcp.tool() 332 | async def browser_action( 333 | action: str, 334 | schema: Optional[Dict[str, Any]] = None, 335 | max_steps: Optional[int] = None, 336 | ) -> Dict[str, Any]: 337 | """Perform a single action in the Nova Act browser""" 338 | global nova_act_instance 339 | 340 | with session_lock: 341 | if not nova_act_instance: 342 | raise ValueError("Browser session not started. Use browser_session first.") 343 | act = nova_act_instance 344 | 345 | # Execute the action in a separate thread 346 | def execute_action(): 347 | try: 348 | kwargs = {} 349 | if schema: 350 | kwargs["schema"] = schema 351 | if max_steps: 352 | kwargs["max_steps"] = max_steps 353 | 354 | result = act.act(action, **kwargs) 355 | 356 | # Store the result 357 | result_id = generate_id("result") 358 | result_data = { 359 | "result_id": result_id, 360 | "action": action, 361 | "final_page": act.page.url, 362 | "response": result.response, 363 | "parsed_response": ( 364 | result.parsed_response 365 | if hasattr(result, "parsed_response") 366 | else None 367 | ), 368 | "valid_json": ( 369 | result.valid_json if hasattr(result, "valid_json") else None 370 | ), 371 | "matches_schema": ( 372 | result.matches_schema if hasattr(result, "matches_schema") else None 373 | ), 374 | "metadata": ( 375 | { 376 | "num_steps_executed": result.metadata.num_steps_executed, 377 | "start_time": result.metadata.start_time, 378 | "end_time": result.metadata.end_time, 379 | "prompt": result.metadata.prompt, 380 | } 381 | if hasattr(result, "metadata") 382 | else {} 383 | ), 384 | } 385 | 386 | with results_lock: 387 | results_store[result_id] = result_data 388 | 389 | return { 390 | "result_id": result_id, 391 | "final_page": act.page.url, 392 | "response": result.response, 393 | "parsed_response": ( 394 | result.parsed_response 395 | if hasattr(result, "parsed_response") 396 | else None 397 | ), 398 | "valid_json": ( 399 | result.valid_json if hasattr(result, "valid_json") else None 400 | ), 401 | "matches_schema": ( 402 | result.matches_schema if hasattr(result, "matches_schema") else None 403 | ), 404 | } 405 | except Exception as e: 406 | print(f"Error executing action: {e}") 407 | raise 408 | 409 | return await asyncio.to_thread(execute_action) 410 | 411 | 412 | @mcp.tool() 413 | async def execute_parallel_browser_tasks( 414 | browser_tasks: List[Dict[str, Any]], 415 | ) -> List[Dict[str, Any]]: 416 | """Execute multiple sequences of actions in parallel across different browser sessions. 417 | 418 | WHEN TO USE: 419 | - For tasks that don't depend on each other's results 420 | - When you need to collect information from multiple sources simultaneously 421 | - When you want to significantly speed up web workflows through parallelization 422 | 423 | EXAMPLES: 424 | - Comparing multiple products on Amazon (each product in a separate browser) 425 | - Searching for information across different websites 426 | - Checking apartment listings and calculating distances to locations (as shown in the documentation) 427 | 428 | Each browser_task should include: 429 | - starting_page: URL to start the browser 430 | - actions: List of actions to perform, following these guidelines: 431 | 1. Make each action prescriptive and succinct 432 | ✅ "Click the hamburger menu icon" 433 | ❌ "Find the menu" 434 | 435 | 2. Be specific about UI elements 436 | ✅ "Scroll down until you see 'Reviews' and click it" 437 | ❌ "Go to the reviews section" 438 | 439 | 3. For data extraction, use a dedicated action with schema 440 | ✅ {"action": "Return the current price and rating", "schema": {...}} 441 | ❌ "Tell me about this product" 442 | 443 | Format options for actions: 444 | 1. Simple string format: ["Search for iPhone", "Click on the first result"] 445 | 2. Object format for data extraction: [{"action": "Return product details", "schema": {...}}] 446 | 447 | Browser configuration options: 448 | - headless: Run browsers without visible UI (default: False) 449 | - user_data_dir: Path to browser profile (note: each session requires its own) 450 | - quiet: Suppress logs (default: False) 451 | 452 | IMPORTANT NOTES: 453 | - Each task runs in its own isolated browser - they cannot interact with each other 454 | - For authentication, each session needs its own user_data_dir 455 | - Nova Act cannot interact with elements hidden behind mouseovers 456 | - Data extraction works best with clear schemas 457 | 458 | Returns a list of task results, each containing: 459 | - starting_page: The URL where the browser started 460 | - final_result: The result of the last action (usually the most relevant) 461 | """ 462 | 463 | # Create a temporary directory for result files 464 | temp_dir = tempfile.mkdtemp(prefix="nova_act_results_") 465 | 466 | # Prepare processes for each task 467 | processes = [] 468 | result_files = [] 469 | 470 | for i, task in enumerate(browser_tasks): 471 | # Create a file to store the result 472 | result_file = os.path.join(temp_dir, f"task_{i}_result.json") 473 | result_files.append(result_file) 474 | 475 | # Create and start a new process for this task 476 | process = multiprocessing.Process( 477 | target=execute_nova_act_task, args=(task, result_file) 478 | ) 479 | processes.append(process) 480 | process.start() 481 | 482 | # Wait for all processes to complete 483 | for process in processes: 484 | process.join() 485 | 486 | # Collect results 487 | all_results = [] 488 | 489 | for result_file in result_files: 490 | try: 491 | if os.path.exists(result_file): 492 | with open(result_file, "r") as f: 493 | task_result = json.load(f) 494 | 495 | # Process the task result to add final_result and collected_data 496 | if "results" in task_result and task_result["results"]: 497 | # Get the last result (final action) 498 | final_result = task_result["results"][-1] 499 | 500 | # Add final_result to the task_result 501 | task_result["final_result"] = final_result 502 | 503 | # Add collected_data extracted from the final_result 504 | if "parsed_response" in final_result: 505 | task_result["collected_data"] = final_result["parsed_response"] 506 | elif "response" in final_result: 507 | task_result["collected_data"] = final_result["response"] 508 | 509 | all_results.append(task_result) 510 | 511 | # Store results in the results_store 512 | if "results" in task_result: 513 | with results_lock: 514 | for result in task_result["results"]: 515 | if "result_id" in result: 516 | results_store[result["result_id"]] = result 517 | 518 | all_results.append({"error": "Result file not found"}) 519 | 520 | except Exception as e: 521 | all_results.append({"error": str(e)}) 522 | 523 | # Clean up temporary files 524 | try: 525 | for file in result_files: 526 | if os.path.exists(file): 527 | os.remove(file) 528 | os.rmdir(temp_dir) 529 | except Exception as e: 530 | raise ValueError(f"Error cleaning up temporary files: {e}") 531 | 532 | return all_results 533 | 534 | 535 | @mcp.tool() 536 | async def list_results() -> List[Dict[str, Any]]: 537 | """List all stored results""" 538 | result_infos = [] 539 | with results_lock: 540 | for result_id, result_data in results_store.items(): 541 | result_infos.append( 542 | { 543 | "result_id": result_id, 544 | "action": result_data.get("action", ""), 545 | "response": result_data.get("response", ""), 546 | } 547 | ) 548 | return result_infos 549 | 550 | 551 | @mcp.tool() 552 | async def get_result(result_id: str) -> Dict[str, Any]: 553 | """Get a specific result by ID""" 554 | with results_lock: 555 | if result_id not in results_store: 556 | raise ValueError(f"Result {result_id} not found") 557 | return results_store[result_id] 558 | 559 | 560 | @mcp.tool() 561 | async def save_results(file_path: str, result_ids: Optional[List[str]] = None) -> bool: 562 | """Save results to a file""" 563 | return save_results_to_file(file_path, result_ids) 564 | 565 | 566 | @mcp.tool() 567 | async def take_screenshot(save_path: Optional[str] = None) -> str: 568 | """Take a screenshot in the browser session""" 569 | global nova_act_instance 570 | 571 | with session_lock: 572 | if not nova_act_instance: 573 | raise ValueError("Browser session not started. Use browser_session first.") 574 | act = nova_act_instance 575 | 576 | # Take screenshot in a separate thread 577 | def capture_screenshot(): 578 | try: 579 | screenshot_bytes = act.page.screenshot() 580 | if save_path: 581 | os.makedirs(os.path.dirname(os.path.abspath(save_path)), exist_ok=True) 582 | with open(save_path, "wb") as f: 583 | f.write(screenshot_bytes) 584 | return save_path 585 | else: 586 | import tempfile 587 | 588 | temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False) 589 | temp_file.write(screenshot_bytes) 590 | temp_file.close() 591 | return temp_file.name 592 | except Exception as e: 593 | raise 594 | 595 | return await asyncio.to_thread(capture_screenshot) 596 | 597 | 598 | @mcp.tool() 599 | async def close_browser() -> bool: 600 | """Close the browser session""" 601 | global nova_act_instance 602 | 603 | with session_lock: 604 | if not nova_act_instance: 605 | return False 606 | 607 | act = nova_act_instance 608 | 609 | # Stop NovaAct in a separate thread 610 | def stop_session(act_instance): 611 | try: 612 | act_instance.stop() 613 | return True 614 | except Exception as e: 615 | return False 616 | 617 | success = await asyncio.to_thread(stop_session, act) 618 | 619 | if success: 620 | with session_lock: 621 | nova_act_instance = None 622 | return True 623 | else: 624 | return False 625 | 626 | 627 | # Run the server when the script is executed directly 628 | if __name__ == "__main__": 629 | # Register multiprocessing start method 630 | multiprocessing.set_start_method("spawn", force=True) 631 | 632 | # Ensure clean shutdown on exit 633 | import atexit 634 | 635 | def cleanup(): 636 | # Close the browser session if it exists 637 | global nova_act_instance 638 | if nova_act_instance: 639 | try: 640 | nova_act_instance.stop() 641 | except: 642 | raise ValueError("Error closing browser session") 643 | 644 | atexit.register(cleanup) 645 | 646 | # Start the server 647 | mcp.run() 648 | -------------------------------------------------------------------------------- /strands_examples/nova_act_strands.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | from mcp import StdioServerParameters, stdio_client 5 | from strands import Agent 6 | from strands.models import BedrockModel 7 | from strands.tools.mcp import MCPClient 8 | 9 | # Step 1: Define MCP stdio parameters 10 | NOVA_ACT_API_KEY = os.getenv("NOVA_ACT_API_KEY") 11 | 12 | # Find the available Python command 13 | python_cmd = "python" 14 | if shutil.which("python3") is not None: 15 | python_cmd = "python3" 16 | elif shutil.which("python") is not None: 17 | python_cmd = "python" 18 | else: 19 | raise RuntimeError("Neither 'python' nor 'python3' command was found in PATH") 20 | 21 | nova_act_client = MCPClient( 22 | lambda: stdio_client( 23 | StdioServerParameters( 24 | command=python_cmd, 25 | args=["nova_act_mcp_server.py"], 26 | env={"NOVA_ACT_API_KEY": NOVA_ACT_API_KEY}, 27 | ), 28 | ) 29 | ) 30 | 31 | bedrock_model = BedrockModel( 32 | model_id="us.anthropic.claude-3-5-haiku-20241022-v1:0", 33 | temperature=0.7, 34 | ) 35 | 36 | with nova_act_client: 37 | agent = Agent( 38 | tools=nova_act_client.list_tools_sync(), 39 | model=bedrock_model, 40 | ) 41 | 42 | response = agent("Find the first backpack on amazon.com use headless mode") 43 | -------------------------------------------------------------------------------- /strands_examples/weather_word_count.py: -------------------------------------------------------------------------------- 1 | from strands import Agent, tool 2 | from strands.models import BedrockModel 3 | from strands_tools import http_request 4 | 5 | # Define a weather-focused system prompt 6 | WEATHER_SYSTEM_PROMPT = """You are a weather assistant with HTTP capabilities. You can: 7 | 8 | 1. Make HTTP requests to the National Weather Service API 9 | 2. Process and display weather forecast data 10 | 3. Provide weather information for locations in the United States 11 | 12 | When retrieving weather information: 13 | 1. First get the coordinates or grid information using https://api.weather.gov/points/{latitude},{longitude} or https://api.weather.gov/points/{zipcode} 14 | 2. Then use the returned forecast URL to get the actual forecast 15 | 16 | When displaying responses: 17 | - Format weather data in a human-readable way 18 | - Highlight important information like temperature, precipitation, and alerts 19 | - Handle errors appropriately 20 | - Convert technical terms to user-friendly language 21 | 22 | Always explain the weather conditions clearly and provide context for the forecast. 23 | """ 24 | 25 | 26 | @tool 27 | def word_count(text: str) -> int: 28 | """Count words in text.""" 29 | return len(text.split()) 30 | 31 | 32 | # Bedrock 33 | bedrock_model = BedrockModel( 34 | model_id="us.anthropic.claude-3-5-haiku-20241022-v1:0", 35 | temperature=0.3, 36 | ) 37 | 38 | agent = Agent( 39 | system_prompt=WEATHER_SYSTEM_PROMPT, 40 | tools=[word_count, http_request], 41 | model=bedrock_model, 42 | ) 43 | response = agent( 44 | "What's the weather like in Seattle? Also how many words are in the response?" 45 | ) 46 | -------------------------------------------------------------------------------- /streamlit_examples/README.md: -------------------------------------------------------------------------------- 1 | # Streamlit Nova Act Examples 2 | 3 | This repository contains example applications demonstrating the power of Amazon Nova Act, a Python SDK for building reliable web automation agents. 4 | 5 | ## Video Game Research Tool 6 | 7 | The `video_game_research_st.py` example demonstrates how to use Nova Act to automate game research and price comparison. This tool: 8 | 9 | 1. Finds top games for any selected gaming system 10 | 2. Searches Amazon in parallel for each game to find prices and descriptions 11 | 3. Compiles results into a beautiful, interactive table 12 | 4. Saves all research for future reference 13 | 14 | ### Features 15 | 16 | - **Parallel Processing**: Search Amazon for multiple games simultaneously 17 | - **Interactive UI**: Built with Streamlit for a user-friendly experience 18 | - **Data Persistence**: Save research results for future reference 19 | - **Detailed Information**: Get comprehensive game details including: 20 | - Title and system 21 | - Release date 22 | - Genre 23 | - Developer 24 | - Rating 25 | - Description 26 | - Amazon price and link 27 | 28 | ### How It Works 29 | 30 | 1. **GameFAQs Research**: 31 | - Finds top games for the selected system 32 | - Extracts game information using Nova Act's schema support 33 | 34 | 2. **Amazon Research**: 35 | - Searches Amazon for each game 36 | - Extracts pricing and detailed descriptions 37 | - Takes screenshots of search results and product pages 38 | 39 | 3. **Results Compilation**: 40 | - Combines data from both sources 41 | - Creates an interactive table view 42 | - Saves all research data for future reference 43 | 44 | ### Usage 45 | 46 | Visit https://nova.amazon.com/act to generate an API key 47 | 48 | 1. Install dependencies from main folder: 49 | ```bash 50 | pip install -r requirements.txt 51 | ``` 52 | 53 | 2. Run the application: 54 | ```bash 55 | streamlit run video_game_research_st.py 56 | ``` 57 | 58 | 3. Use the interface to: 59 | - Select a gaming system 60 | - Choose number of games to research 61 | - Configure search parameters 62 | - View and save results 63 | 64 | ### Data Storage 65 | 66 | Research results are saved in the `game_searches` directory, organized by run ID. Each run includes: 67 | - Metadata about the search 68 | - Game information 69 | - Screenshots 70 | - Amazon product details 71 | 72 | ## About Nova Act 73 | 74 | Amazon Nova Act is an early research preview of an SDK + model for building agents designed to reliably take actions in web browsers. It enables developers to: 75 | 76 | - Break down complex workflows into smaller, reliable commands 77 | - Add more detail where needed 78 | - Call APIs 79 | - Intersperse direct browser manipulation 80 | - Interleave Python code for tests, breakpoints, asserts, or threadpooling 81 | 82 | For more information, visit: https://labs.amazon.science/blog/nova-act -------------------------------------------------------------------------------- /streamlit_examples/video_game_research_st.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import json 3 | import os 4 | import time 5 | import uuid 6 | from datetime import datetime 7 | from typing import Any, Dict, List, Optional 8 | 9 | import pandas as pd 10 | import streamlit as st 11 | from nova_act import NovaAct 12 | from pydantic import BaseModel 13 | 14 | 15 | # Data models for game information 16 | class GameInfo(BaseModel): 17 | title: str 18 | system: str 19 | release_date: Optional[str] = None 20 | genre: Optional[str] = None 21 | developer: Optional[str] = None 22 | rating: Optional[float] = None 23 | description: Optional[str] = None 24 | amazon_url: Optional[str] = None 25 | amazon_price: Optional[str] = None 26 | image_url: Optional[str] = None 27 | 28 | 29 | class GameSearchResults(BaseModel): 30 | run_id: str 31 | search_params: Dict[str, Any] 32 | games: List[GameInfo] 33 | 34 | 35 | # Create searches directory if it doesn't exist 36 | os.makedirs("game_searches", exist_ok=True) 37 | 38 | 39 | # Function to find top games for a system from GameFAQs 40 | def find_top_games(system, num_games=5, headless=False, status_update_callback=None): 41 | """ 42 | Find the top N games for a specified system on GameFAQs 43 | """ 44 | if status_update_callback: 45 | status_update_callback( 46 | f"Searching for top {num_games} games for {system} on GameFAQs..." 47 | ) 48 | 49 | with NovaAct( 50 | starting_page="https://gamefaqs.gamespot.com/games/systems", headless=headless 51 | ) as n: 52 | 53 | # Click on the system link 54 | print("Current URL:", n.page.url) 55 | n.act(f"Click on the link for the '{system}'") 56 | 57 | n.act("Scroll down to view the list of top games") 58 | 59 | # Extract the top N games 60 | result = n.act( 61 | f"Extract information for the top {num_games} games from this list. For each game, include the title, release date, genre, and rating if available.", 62 | schema={"type": "array", "items": GameInfo.model_json_schema()}, 63 | ) 64 | 65 | if not result.matches_schema: 66 | if status_update_callback: 67 | status_update_callback( 68 | f"Failed to extract game information for {system}" 69 | ) 70 | return [] 71 | 72 | # Parse the results 73 | games = [GameInfo.model_validate(g) for g in result.parsed_response] 74 | 75 | # Make sure each game has the system field populated 76 | for game in games: 77 | game.system = system 78 | 79 | if status_update_callback: 80 | status_update_callback(f"Found {len(games)} games for {system}") 81 | 82 | return games 83 | 84 | 85 | # Function to search for a game on Amazon and get details 86 | def search_amazon_for_game(game, run_id, headless=False): 87 | """ 88 | Search for a game on Amazon and extract product information 89 | """ 90 | search_dir = os.path.join("game_searches", run_id) 91 | game_dir = os.path.join( 92 | search_dir, game.title.replace(" ", "_").replace("/", "_").replace(":", "") 93 | ) 94 | os.makedirs(game_dir, exist_ok=True) 95 | 96 | with NovaAct(starting_page="https://www.amazon.com", headless=headless) as n: 97 | # Search for the game with system name for better results 98 | n.act(f"Search for '{game.title} {game.system}'") 99 | 100 | # Take screenshot of search results 101 | screenshot = n.page.screenshot() 102 | with open(os.path.join(game_dir, "amazon_search.png"), "wb") as f: 103 | f.write(screenshot) 104 | 105 | # Click on the most relevant result 106 | n.act( 107 | f"Find and click on the most relevant search result for '{game.title}' for {game.system}" 108 | ) 109 | 110 | # Take screenshot of the product page 111 | screenshot = n.page.screenshot() 112 | with open(os.path.join(game_dir, "amazon_product.png"), "wb") as f: 113 | f.write(screenshot) 114 | 115 | # Extract product information 116 | result = n.act( 117 | f"Extract the following information about this {game.title} product: current price, detailed description, and the current URL. Return the data in JSON format.", 118 | schema={ 119 | "type": "object", 120 | "properties": { 121 | "amazon_price": {"type": "string"}, 122 | "description": {"type": "string"}, 123 | }, 124 | }, 125 | ) 126 | 127 | game.amazon_url = n.page.url 128 | 129 | if result.matches_schema: 130 | # Update the game object with Amazon information 131 | if "amazon_price" in result.parsed_response: 132 | game.amazon_price = result.parsed_response["amazon_price"] 133 | if "description" in result.parsed_response: 134 | game.description = result.parsed_response["description"] 135 | 136 | # Save the game data 137 | with open(os.path.join(game_dir, "game_data.json"), "w") as f: 138 | f.write(game.model_dump_json(indent=2)) 139 | 140 | return game 141 | 142 | 143 | # Function to run game search 144 | def run_game_search(system, headless=False, max_threads=5, num_games=5): 145 | # Generate a unique run ID 146 | run_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}" 147 | 148 | # Create run directory 149 | search_dir = os.path.join("game_searches", run_id) 150 | os.makedirs(search_dir, exist_ok=True) 151 | 152 | # Save search parameters 153 | search_params = { 154 | "system": system, 155 | "num_games": num_games, 156 | "max_threads": max_threads, 157 | } 158 | 159 | metadata = { 160 | "run_id": run_id, 161 | "timestamp": datetime.now().isoformat(), 162 | "search_params": search_params, 163 | } 164 | 165 | with open(os.path.join(search_dir, "metadata.json"), "w") as f: 166 | json.dump(metadata, f, indent=2) 167 | 168 | results = {"run_id": run_id, "search_params": search_params, "games": []} 169 | 170 | # Status update function 171 | def update_status(message): 172 | status_container.info(message) 173 | 174 | # Step 1: Find top games on GameFAQs 175 | update_status(f"Finding top {num_games} games for {system} on GameFAQs...") 176 | progress_bar.progress(0.1) 177 | 178 | top_games = find_top_games( 179 | system=system, 180 | num_games=num_games, 181 | headless=headless, 182 | status_update_callback=update_status, 183 | ) 184 | 185 | if not top_games: 186 | update_status(f"No games found for {system}") 187 | return results 188 | 189 | progress_bar.progress(0.4) 190 | 191 | # Step 2: Search Amazon for each game in parallel 192 | update_status("Searching Amazon for game information in parallel...") 193 | 194 | # Ensure we have at least 1 worker 195 | total_games = len(top_games) 196 | max_workers = max(1, min(total_games, max_threads)) 197 | 198 | detailed_games = [] 199 | 200 | with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: 201 | # Submit all Amazon search tasks 202 | future_to_game = { 203 | executor.submit(search_amazon_for_game, game, run_id, headless): game 204 | for game in top_games 205 | } 206 | 207 | # Collect results as they complete 208 | for i, future in enumerate( 209 | concurrent.futures.as_completed(future_to_game.keys()) 210 | ): 211 | game = future_to_game[future] 212 | try: 213 | detailed_game = future.result() 214 | detailed_games.append(detailed_game) 215 | 216 | # Update progress 217 | update_status( 218 | f"Completed Amazon research for {game.title} ({i+1}/{total_games})" 219 | ) 220 | progress_bar.progress(0.4 + (0.5 * (i + 1) / total_games)) 221 | except Exception as exc: 222 | print(f"Game {game.title} research failed: {exc}") 223 | # Fall back to original game data 224 | detailed_games.append(game) 225 | 226 | # Save the results 227 | results["games"] = detailed_games 228 | 229 | with open(os.path.join(search_dir, "results.json"), "w") as f: 230 | results_json = { 231 | "run_id": run_id, 232 | "search_params": search_params, 233 | "games": [g.model_dump() for g in detailed_games], 234 | } 235 | json.dump(results_json, f, indent=2) 236 | 237 | update_status("Game search complete!") 238 | progress_bar.progress(1.0) 239 | 240 | return results 241 | 242 | 243 | # Function to load previous search data 244 | def load_search_data(run_id): 245 | search_dir = os.path.join("game_searches", run_id) 246 | 247 | # Load metadata 248 | try: 249 | with open(os.path.join(search_dir, "metadata.json"), "r") as f: 250 | metadata = json.load(f) 251 | except: 252 | return None, None 253 | 254 | # Load game results 255 | try: 256 | with open(os.path.join(search_dir, "results.json"), "r") as f: 257 | results_json = json.load(f) 258 | results = {"run_id": run_id} 259 | results["games"] = [ 260 | GameInfo.model_validate(g) for g in results_json["games"] 261 | ] 262 | results["search_params"] = results_json["search_params"] 263 | except: 264 | return None, metadata 265 | 266 | return results, metadata 267 | 268 | 269 | # Function to get available search IDs 270 | def get_available_searches(): 271 | searches = [] 272 | if os.path.exists("game_searches"): 273 | for run_id in os.listdir("game_searches"): 274 | search_dir = os.path.join("game_searches", run_id) 275 | if os.path.isdir(search_dir) and os.path.exists( 276 | os.path.join(search_dir, "metadata.json") 277 | ): 278 | try: 279 | with open(os.path.join(search_dir, "metadata.json"), "r") as f: 280 | metadata = json.load(f) 281 | 282 | # Create a display name with timestamp and system 283 | timestamp = metadata.get("timestamp", "").split("T")[0] 284 | search_params = metadata.get("search_params", {}) 285 | system = search_params.get("system", "Unknown") 286 | num_games = search_params.get("num_games", 5) 287 | 288 | display_name = ( 289 | f"{timestamp} - {system} (Top {num_games}) ({run_id})" 290 | ) 291 | 292 | searches.append( 293 | {"id": run_id, "display": display_name, "metadata": metadata} 294 | ) 295 | except: 296 | searches.append({"id": run_id, "display": run_id, "metadata": {}}) 297 | 298 | # Sort by timestamp (newest first) 299 | searches.sort(key=lambda x: x["metadata"].get("timestamp", ""), reverse=True) 300 | return searches 301 | 302 | 303 | # App layout 304 | st.set_page_config(layout="wide", page_title="Video Game Research Tool") 305 | st.title("Video Game Research Tool") 306 | st.subheader("Find Top Games and Amazon Information") 307 | 308 | # Create two main tabs - one for new search and one for browsing past searches 309 | main_tabs = st.tabs(["New Search", "Browse Previous Searches"]) 310 | 311 | with main_tabs[0]: # New Search tab 312 | # Sidebar for inputs 313 | with st.sidebar: 314 | st.header("Search Parameters") 315 | 316 | system = st.selectbox( 317 | "Gaming System", 318 | [ 319 | "PlayStation 5", 320 | "PlayStation 4", 321 | "Xbox Series X", 322 | "Nintendo Switch", 323 | "PC", 324 | ], 325 | key="new_system", 326 | ) 327 | 328 | num_games = st.slider( 329 | "Number of Games", min_value=1, max_value=10, value=5, key="num_games" 330 | ) 331 | 332 | headless = st.checkbox("Run Browser Headless", value=False, key="new_headless") 333 | 334 | max_threads = st.slider( 335 | "Max Parallel Searches", 336 | min_value=1, 337 | max_value=10, 338 | value=5, 339 | key="max_threads", 340 | ) 341 | 342 | search_button = st.button( 343 | "Search Games", type="primary", key="new_search_button" 344 | ) 345 | 346 | # Search workflow row 347 | st.header("Search Workflow") 348 | workflow_container = st.container() 349 | with workflow_container: 350 | status_container = st.empty() 351 | progress_bar = st.progress(0) 352 | 353 | # Create a separator 354 | st.divider() 355 | 356 | # Results container 357 | results_container = st.container() 358 | 359 | with main_tabs[1]: # Browse Previous Searches tab 360 | # Get and display available searches 361 | available_searches = get_available_searches() 362 | 363 | if not available_searches: 364 | st.info("No previous searches found. Run a new search to create one.") 365 | else: 366 | st.subheader("Select a Previous Search") 367 | 368 | # Create a dropdown to select a search 369 | search_options = { 370 | search["display"]: search["id"] for search in available_searches 371 | } 372 | selected_search_display = st.selectbox( 373 | "Available Searches", 374 | options=list(search_options.keys()), 375 | key="browse_search_select", 376 | ) 377 | 378 | selected_search_id = search_options[selected_search_display] 379 | 380 | # Display search info 381 | st.success(f"Loaded Search: {selected_search_id}") 382 | 383 | # Load the search data 384 | search_data, search_metadata = load_search_data(selected_search_id) 385 | 386 | if search_data: 387 | # Display metadata 388 | search_params = search_data.get("search_params", {}) 389 | col1, col2 = st.columns(2) 390 | with col1: 391 | st.metric("System", search_params.get("system", "Unknown")) 392 | with col2: 393 | st.metric("Number of Games", search_params.get("num_games", 5)) 394 | 395 | # Display the games table 396 | if "games" in search_data and search_data["games"]: 397 | st.subheader("Top Games") 398 | 399 | # Convert to DataFrame for display 400 | df = pd.DataFrame( 401 | [ 402 | { 403 | "Title": game.title, 404 | "Genre": game.genre or "Unknown", 405 | "Release Date": game.release_date or "Unknown", 406 | "Rating": game.rating or "N/A", 407 | "Price": game.amazon_price or "N/A", 408 | "Amazon Link": game.amazon_url or "Not Found", 409 | } 410 | for game in search_data["games"] 411 | ] 412 | ) 413 | 414 | # Display the table 415 | st.dataframe(df, use_container_width=True) 416 | 417 | # Display detailed game information 418 | st.subheader("Game Details") 419 | for game in search_data["games"]: 420 | with st.expander(f"{game.title} - Details"): 421 | col1, col2 = st.columns([2, 1]) 422 | 423 | with col1: 424 | st.markdown(f"**Title:** {game.title}") 425 | st.markdown(f"**System:** {game.system}") 426 | if game.release_date: 427 | st.markdown(f"**Release Date:** {game.release_date}") 428 | if game.genre: 429 | st.markdown(f"**Genre:** {game.genre}") 430 | if game.developer: 431 | st.markdown(f"**Developer:** {game.developer}") 432 | if game.rating: 433 | st.markdown(f"**Rating:** {game.rating}") 434 | 435 | st.markdown("### Description") 436 | if game.description: 437 | st.markdown(game.description) 438 | else: 439 | st.markdown("_No description available_") 440 | 441 | with col2: 442 | if game.amazon_price: 443 | st.markdown(f"**Price:** {game.amazon_price}") 444 | if game.amazon_url: 445 | st.markdown(f"[View on Amazon]({game.amazon_url})") 446 | 447 | else: 448 | st.error( 449 | "Could not load search data. The search directory may be corrupted or incomplete." 450 | ) 451 | 452 | # Run the search when button is clicked 453 | if search_button: 454 | start_time = time.time() 455 | 456 | with st.spinner(f"Searching for top games for {system}..."): 457 | results = run_game_search( 458 | system, headless=headless, max_threads=max_threads, num_games=num_games 459 | ) 460 | 461 | end_time = time.time() 462 | 463 | # Display results 464 | with results_container: 465 | st.header("Search Results") 466 | 467 | if "games" in results and results["games"]: 468 | games = results["games"] 469 | 470 | # Convert to DataFrame for display 471 | df = pd.DataFrame( 472 | [ 473 | { 474 | "Title": game.title, 475 | "Genre": game.genre or "Unknown", 476 | "Release Date": game.release_date or "Unknown", 477 | "Rating": game.rating or "N/A", 478 | "Price": game.amazon_price or "N/A", 479 | "Amazon Link": game.amazon_url or "Not Found", 480 | } 481 | for game in games 482 | ] 483 | ) 484 | 485 | # Display the table 486 | st.dataframe(df, use_container_width=True) 487 | 488 | # Display detailed game information 489 | st.subheader("Game Details") 490 | for game in games: 491 | with st.expander(f"{game.title} - Details"): 492 | col1, col2 = st.columns([2, 1]) 493 | 494 | with col1: 495 | st.markdown(f"**Title:** {game.title}") 496 | st.markdown(f"**System:** {game.system}") 497 | if game.release_date: 498 | st.markdown(f"**Release Date:** {game.release_date}") 499 | if game.genre: 500 | st.markdown(f"**Genre:** {game.genre}") 501 | if game.developer: 502 | st.markdown(f"**Developer:** {game.developer}") 503 | if game.rating: 504 | st.markdown(f"**Rating:** {game.rating}") 505 | 506 | st.markdown("### Description") 507 | if game.description: 508 | st.markdown(game.description) 509 | else: 510 | st.markdown("_No description available_") 511 | 512 | with col2: 513 | if game.amazon_price: 514 | st.markdown(f"**Price:** {game.amazon_price}") 515 | if game.amazon_url: 516 | st.markdown(f"[View on Amazon]({game.amazon_url})") 517 | else: 518 | st.warning(f"No games found for {system}. Try another system.") 519 | 520 | # Display time saved and run ID 521 | st.success( 522 | f"Search completed in {end_time - start_time:.1f} seconds (would take ~15-30 minutes manually)" 523 | ) 524 | st.info(f"Search Run ID: {results['run_id']}") 525 | 526 | # Create a summary section at the bottom 527 | st.divider() 528 | st.subheader("Search Summary") 529 | st.write(f"Completed game search for {system}") 530 | st.write(f"Number of games: {num_games}") 531 | st.write(f"Data saved to game_searches/{results['run_id']}/") 532 | --------------------------------------------------------------------------------