├── NOTICE ├── examples └── strands_math_agent │ ├── .env.example │ ├── pyproject.toml │ ├── basic_app.py │ ├── .dockerignore │ ├── rl_app.py │ ├── reward.py │ └── README.md ├── .env.example ├── src └── agentcore_rl_toolkit │ ├── frameworks │ └── strands │ │ ├── __init__.py │ │ ├── app.py │ │ └── rollout_collector.py │ ├── __init__.py │ ├── reward_function.py │ └── app.py ├── .gitignore ├── CODE_OF_CONDUCT.md ├── .vscode └── settings.json ├── .pre-commit-config.yaml ├── pyproject.toml ├── .dockerignore ├── .bedrock_agentcore ├── examples_strands_math_agent_basic_app │ └── Dockerfile └── examples_strands_math_agent_rl_app │ └── Dockerfile ├── CONTRIBUTING.md ├── scripts └── build_docker_image_and_push_to_ecr.sh ├── README.md └── LICENSE /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /examples/strands_math_agent/.env.example: -------------------------------------------------------------------------------- 1 | BASE_URL=http://localhost:4000/v1 2 | MODEL_ID=Qwen/Qwen3-4B-Instruct-2507 3 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | AWS_REGION=your-aws-account-region 2 | AWS_ACCOUNT=your-aws-account-id 3 | ECR_REPO_NAME=your-ecr-repo-name 4 | -------------------------------------------------------------------------------- /src/agentcore_rl_toolkit/frameworks/strands/__init__.py: -------------------------------------------------------------------------------- 1 | from .app import StrandsAgentCoreRLApp 2 | 3 | __all__ = ["StrandsAgentCoreRLApp"] 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Virtual environments 2 | .venv/ 3 | .venv-*/ 4 | 5 | # Python cache 6 | __pycache__/ 7 | .ruff_cache/ 8 | 9 | # Build artifacts 10 | *.egg-info/ 11 | dist/ 12 | build/ 13 | 14 | # Environment variables 15 | .env 16 | 17 | # Claude Code configuration 18 | .claude/ 19 | 20 | .bedrock_agentcore.yaml 21 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.exclude": { 3 | "**/*.egg-info": true, 4 | "**/__pycache__": true, 5 | "**/dist": true, 6 | "**/build": true, 7 | }, 8 | "search.exclude": { 9 | "**/*.egg-info": true, 10 | "**/__pycache__": true, 11 | "**/dist": true, 12 | "**/build": true, 13 | "**/.venv": true 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/agentcore_rl_toolkit/__init__.py: -------------------------------------------------------------------------------- 1 | from .app import AgentCoreRLApp 2 | from .frameworks.strands import StrandsAgentCoreRLApp 3 | from .frameworks.strands.rollout_collector import RolloutCollector as StrandsRolloutCollector 4 | from .reward_function import RewardFunction 5 | 6 | __all__ = ["AgentCoreRLApp", "StrandsAgentCoreRLApp", "StrandsRolloutCollector", "RewardFunction"] 7 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.5.0 4 | hooks: 5 | - id: check-toml 6 | - id: check-yaml 7 | - id: end-of-file-fixer 8 | - id: trailing-whitespace 9 | 10 | - repo: https://github.com/astral-sh/ruff-pre-commit 11 | rev: v0.1.5 12 | hooks: 13 | - id: ruff 14 | args: [--fix, --exit-non-zero-on-fix, --show-fixes] 15 | - id: ruff-format 16 | -------------------------------------------------------------------------------- /examples/strands_math_agent/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "strands-math-agent-example" 3 | version = "0.1.0" 4 | description = "Example: Strands Math Agent using Bedrock AgentCore RL Toolkit" 5 | readme = "README.md" 6 | requires-python = ">=3.11" 7 | dependencies = [ 8 | "bedrock-agentcore>=1.0.3", 9 | "bedrock-agentcore-starter-toolkit>=0.1.34", 10 | "boto3>=1.40.55", 11 | "python-dotenv>=1.0.0", 12 | # TODO: replace the above dependencies with agentcore-rl-toolkit>=0.1.0 after PyPI indexing 13 | "strands-agents[openai]>=1.18.0", 14 | "strands-agents-tools>=0.2.16", 15 | ] 16 | 17 | [tool.setuptools] 18 | py-modules = ["basic_app", "rl_app", "reward"] 19 | -------------------------------------------------------------------------------- /src/agentcore_rl_toolkit/frameworks/strands/app.py: -------------------------------------------------------------------------------- 1 | from ...app import AgentCoreRLApp 2 | 3 | 4 | class StrandsAgentCoreRLApp(AgentCoreRLApp): 5 | def create_openai_compatible_model(self, **kwargs): 6 | """Create Strands OpenAI-compatible model for vLLM/SGLang server.""" 7 | try: 8 | from strands.models.openai import OpenAIModel 9 | except ImportError: 10 | raise ImportError("Strands not installed. Install with: uv pip install strands-agents[openai]") from None 11 | 12 | base_url, model_id = self._get_model_config() 13 | 14 | return OpenAIModel(client_args={"api_key": "dummy", "base_url": base_url}, model_id=model_id, **kwargs) 15 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "agentcore-rl-toolkit" 7 | version = "0.1.0" 8 | description = "Toolkit for Seamlessly Enabling RL Training with Bedrock AgentCore." 9 | readme = "README.md" 10 | requires-python = ">=3.11" 11 | dependencies = [ 12 | "bedrock-agentcore>=1.0.3", 13 | "bedrock-agentcore-starter-toolkit>=0.1.34", 14 | "boto3>=1.40.55", 15 | "python-dotenv>=1.0.0", 16 | ] 17 | 18 | [project.optional-dependencies] 19 | dev = [ 20 | "pytest>=7.0", 21 | "mypy>=1.0", 22 | "pre-commit>=3.0", 23 | ] 24 | 25 | [tool.ruff] 26 | line-length = 120 27 | select = [ 28 | "F", # pyflakes rules 29 | "E", # pycodestyle error rules 30 | "W", # pycodestyle warning rules 31 | "B", # flake8-bugbear rules 32 | "I", # isort rules 33 | ] 34 | 35 | [tool.ruff.lint.isort] 36 | known-first-party = ["agentcore_rl_toolkit"] 37 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Build artifacts 2 | build/ 3 | dist/ 4 | *.egg-info/ 5 | *.egg 6 | 7 | # Python cache 8 | __pycache__/ 9 | __pycache__* 10 | *.py[cod] 11 | *$py.class 12 | *.so 13 | .Python 14 | 15 | # Virtual environments 16 | .venv/ 17 | .env 18 | venv/ 19 | env/ 20 | ENV/ 21 | 22 | # Testing 23 | .pytest_cache/ 24 | .coverage 25 | .coverage* 26 | htmlcov/ 27 | .tox/ 28 | *.cover 29 | .hypothesis/ 30 | .mypy_cache/ 31 | .ruff_cache/ 32 | 33 | # Development 34 | *.log 35 | *.bak 36 | *.swp 37 | *.swo 38 | *~ 39 | .DS_Store 40 | 41 | # IDEs 42 | .vscode/ 43 | .idea/ 44 | 45 | # Version control 46 | .git/ 47 | .gitignore 48 | .gitattributes 49 | 50 | # Documentation 51 | docs/ 52 | 53 | # CI/CD 54 | .github/ 55 | .gitlab-ci.yml 56 | .travis.yml 57 | 58 | # Project specific 59 | tests/ 60 | 61 | # Bedrock AgentCore specific - keep config but exclude runtime files 62 | .bedrock_agentcore.yaml 63 | .dockerignore 64 | .bedrock_agentcore/ 65 | 66 | # Keep wheelhouse for offline installations 67 | # wheelhouse/ 68 | -------------------------------------------------------------------------------- /examples/strands_math_agent/basic_app.py: -------------------------------------------------------------------------------- 1 | from bedrock_agentcore.runtime import BedrockAgentCoreApp 2 | from dotenv import load_dotenv 3 | from strands import Agent 4 | from strands.models import BedrockModel 5 | from strands_tools import calculator 6 | 7 | app = BedrockAgentCoreApp() 8 | 9 | load_dotenv() 10 | 11 | model = BedrockModel(model_id="us.anthropic.claude-sonnet-4-20250514-v1:0") 12 | 13 | agent = Agent( 14 | model=model, 15 | tools=[calculator], 16 | system_prompt=( 17 | "Your task is to solve the math problem. " 18 | + "Use calculator when applicable. " 19 | + 'Let\'s think step by step and output the final answer after "####".' 20 | ), 21 | ) 22 | 23 | 24 | @app.entrypoint 25 | async def invoke_agent(payload): 26 | """ 27 | Invoke the agent with a payload 28 | """ 29 | user_input = payload.get("prompt") 30 | 31 | print("User input:", user_input) 32 | 33 | response = await agent.invoke_async(user_input) 34 | return response.message["content"][0]["text"] 35 | 36 | 37 | if __name__ == "__main__": 38 | app.run() 39 | -------------------------------------------------------------------------------- /.bedrock_agentcore/examples_strands_math_agent_basic_app/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim 2 | WORKDIR /app 3 | 4 | # All environment variables in one layer 5 | ENV UV_SYSTEM_PYTHON=1 \ 6 | UV_COMPILE_BYTECODE=1 \ 7 | UV_NO_PROGRESS=1 \ 8 | PYTHONUNBUFFERED=1 \ 9 | DOCKER_CONTAINER=1 \ 10 | AWS_REGION=us-west-2 \ 11 | AWS_DEFAULT_REGION=us-west-2 12 | 13 | 14 | 15 | COPY examples/strands_math_agent examples/strands_math_agent 16 | # Install from pyproject.toml directory 17 | RUN cd examples/strands_math_agent && uv pip install . 18 | 19 | 20 | 21 | 22 | RUN uv pip install aws-opentelemetry-distro==0.12.2 23 | 24 | 25 | # Signal that this is running in Docker for host binding logic 26 | ENV DOCKER_CONTAINER=1 27 | 28 | # Create non-root user 29 | RUN useradd -m -u 1000 bedrock_agentcore 30 | USER bedrock_agentcore 31 | 32 | EXPOSE 9000 33 | EXPOSE 8000 34 | EXPOSE 8080 35 | 36 | # Copy entire project (respecting .dockerignore) 37 | COPY . . 38 | 39 | # Use the full module path 40 | 41 | CMD ["opentelemetry-instrument", "python", "-m", "basic_app"] 42 | -------------------------------------------------------------------------------- /examples/strands_math_agent/.dockerignore: -------------------------------------------------------------------------------- 1 | # Build artifacts 2 | build/ 3 | dist/ 4 | *.egg-info/ 5 | *.egg 6 | 7 | # Python cache 8 | __pycache__/ 9 | __pycache__* 10 | *.py[cod] 11 | *$py.class 12 | *.so 13 | .Python 14 | 15 | # Virtual environments 16 | .venv/ 17 | .env 18 | venv/ 19 | env/ 20 | ENV/ 21 | 22 | # Testing 23 | .pytest_cache/ 24 | .coverage 25 | .coverage* 26 | htmlcov/ 27 | .tox/ 28 | *.cover 29 | .hypothesis/ 30 | .mypy_cache/ 31 | .ruff_cache/ 32 | 33 | # Development 34 | *.log 35 | *.bak 36 | *.swp 37 | *.swo 38 | *~ 39 | .DS_Store 40 | 41 | # IDEs 42 | .vscode/ 43 | .idea/ 44 | 45 | # Version control 46 | .git/ 47 | .gitignore 48 | .gitattributes 49 | 50 | # Documentation 51 | docs/ 52 | 53 | # CI/CD 54 | .github/ 55 | .gitlab-ci.yml 56 | .travis.yml 57 | 58 | # Project specific 59 | tests/ 60 | 61 | # Bedrock AgentCore specific - keep config but exclude runtime files 62 | .bedrock_agentcore.yaml 63 | .dockerignore 64 | .bedrock_agentcore/ 65 | 66 | # Keep wheelhouse for offline installations 67 | # wheelhouse/ 68 | 69 | # Monorepo directories 70 | cdk/ 71 | terraform/ 72 | mcp/lambda/ 73 | -------------------------------------------------------------------------------- /.bedrock_agentcore/examples_strands_math_agent_rl_app/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim 2 | WORKDIR /app 3 | 4 | # All environment variables in one layer 5 | ENV UV_SYSTEM_PYTHON=1 \ 6 | UV_COMPILE_BYTECODE=1 \ 7 | UV_NO_PROGRESS=1 \ 8 | PYTHONUNBUFFERED=1 \ 9 | DOCKER_CONTAINER=1 \ 10 | AWS_REGION=us-west-2 \ 11 | AWS_DEFAULT_REGION=us-west-2 12 | 13 | 14 | 15 | COPY examples/strands_math_agent examples/strands_math_agent 16 | # Install from pyproject.toml directory 17 | RUN cd examples/strands_math_agent && uv pip install . 18 | 19 | 20 | 21 | 22 | RUN uv pip install aws-opentelemetry-distro==0.12.2 23 | 24 | # [TODO]: remove the following lines after being able to add this agentcore-rl-toolkit as 25 | # a dependency in examples/strands_math_agent/pyproject.toml 26 | COPY . . 27 | RUN uv pip install --force-reinstall --no-deps . 28 | 29 | 30 | # Signal that this is running in Docker for host binding logic 31 | ENV DOCKER_CONTAINER=1 32 | 33 | # Create non-root user 34 | RUN useradd -m -u 1000 bedrock_agentcore 35 | USER bedrock_agentcore 36 | 37 | EXPOSE 9000 38 | EXPOSE 8000 39 | EXPOSE 8080 40 | 41 | # Copy entire project (respecting .dockerignore) 42 | COPY . . 43 | 44 | # Use the full module path 45 | 46 | CMD ["opentelemetry-instrument", "python", "-m", "rl_app"] 47 | -------------------------------------------------------------------------------- /src/agentcore_rl_toolkit/reward_function.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base reward function interface for pure reward computation in RL training. 3 | 4 | Reward functions only compute rewards - the app framework handles all validation and formatting. 5 | """ 6 | 7 | from abc import ABC, abstractmethod 8 | 9 | 10 | class RewardFunction(ABC): 11 | """ 12 | Base class for reward functions focused purely on reward computation. 13 | 14 | Users implement compute_reward() and can return: 15 | - float: Single reward value 16 | - list of floats: Per-turn rewards or single-element list for outcome rewards 17 | 18 | The app framework handles all validation, normalization, and formatting automatically. 19 | Right now, this class mostly defines a contract, but we might add some more shared utilities 20 | in the future. 21 | """ 22 | 23 | @abstractmethod 24 | def __call__(self, **kwargs): 25 | """ 26 | Compute reward(s) for the rollout. 27 | 28 | Args: 29 | **kwargs: Flexible arguments for reward computation, such as: 30 | - response_text: Agent's response text 31 | - ground_truth: Correct answer 32 | - user_input: Original user input 33 | - Any other context needed for reward computation 34 | 35 | Returns: 36 | float: Single reward value, or 37 | list[float]: Per-turn rewards or single-element list for outcome rewards 38 | """ 39 | pass 40 | -------------------------------------------------------------------------------- /examples/strands_math_agent/rl_app.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | from reward import GSM8KReward 3 | from strands import Agent 4 | from strands_tools import calculator 5 | 6 | from agentcore_rl_toolkit import StrandsAgentCoreRLApp, StrandsRolloutCollector 7 | 8 | app = StrandsAgentCoreRLApp() 9 | 10 | load_dotenv() 11 | 12 | model = app.create_openai_compatible_model() 13 | 14 | rollout_collector = StrandsRolloutCollector() 15 | agent = Agent( 16 | model=model, 17 | tools=[calculator], 18 | system_prompt=( 19 | "Your task is to solve the math problem. " 20 | + "Use the calculator tool to compute all mathematical expressions. " 21 | + 'Let\'s think step by step and output the final answer after "####".' 22 | ), 23 | hooks=[rollout_collector], 24 | ) 25 | reward_fn = GSM8KReward() 26 | 27 | 28 | @app.rollout_entrypoint 29 | async def invoke_agent(payload, context): 30 | """ 31 | Invoke the math agent with a payload using the rollout_entrypoint decorator. 32 | 33 | For RL training, the following fields are expected: 34 | - prompt: question from gsm8k 35 | - answer: ground truth (str) 36 | 37 | The @rollout_entrypoint decorator automatically: 38 | - Handles asyncio.create_task() for non-blocking execution 39 | - Saves rollout data to S3 and notifies SQS when returned 40 | - Handles errors and saves error rollouts for client notification 41 | - Works with both sync and async functions 42 | """ 43 | user_input = payload.get("prompt") 44 | answer = payload.get("answer") # used for computing reward 45 | 46 | print("User input:", user_input) 47 | 48 | # Hooks auto collecting rollout data while agent is running 49 | response = await agent.invoke_async(user_input) 50 | 51 | # Gather rollouts from the collector 52 | rollout_data = rollout_collector.get_rollout_data() 53 | 54 | # Compute rewards 55 | rewards = reward_fn(response_text=response.message["content"][0]["text"], ground_truth=answer) 56 | 57 | # Return expected structure (dict with `rollout_data` and `rewards` keys) 58 | # Framework validates and normalizes values automatically 59 | return {"rollout_data": rollout_data, "rewards": rewards} 60 | 61 | 62 | if __name__ == "__main__": 63 | app.run() 64 | -------------------------------------------------------------------------------- /examples/strands_math_agent/reward.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from agentcore_rl_toolkit import RewardFunction 4 | 5 | 6 | class GSM8KReward(RewardFunction): 7 | @staticmethod 8 | def extract_solution(solution_str, method="strict"): 9 | """ 10 | This is taken from https://github.com/volcengine/verl/blob/4da0d3d3/verl/utils/reward_score/gsm8k.py#L20 11 | """ 12 | _SOLUTION_CLIP_CHARS = 300 13 | 14 | assert method in ["strict", "flexible"] 15 | 16 | # Optimization: Regular expression matching on very long strings can be slow. 17 | # For math problems, the final answer is usually at the end. 18 | # We only match on the last 300 characters, which is a safe approximation for 300 tokens. 19 | if len(solution_str) > _SOLUTION_CLIP_CHARS: 20 | solution_str = solution_str[-_SOLUTION_CLIP_CHARS:] 21 | 22 | if method == "strict": 23 | # this also tests the formatting of the model 24 | solutions = re.findall("#### (\\-?[0-9\\.\\,]+)", solution_str) 25 | if len(solutions) == 0: 26 | final_answer = None 27 | else: 28 | # take the last solution 29 | final_answer = solutions[-1].replace(",", "").replace("$", "") 30 | elif method == "flexible": 31 | answer = re.findall("(\\-?[0-9\\.\\,]+)", solution_str) 32 | final_answer = None 33 | if len(answer) == 0: 34 | # no reward is there is no answer 35 | pass 36 | else: 37 | invalid_str = ["", "."] 38 | # find the last number that is not '.' 39 | for final_answer in reversed(answer): 40 | if final_answer not in invalid_str: 41 | break 42 | return final_answer 43 | 44 | def __call__( 45 | self, 46 | response_text="", 47 | ground_truth="", 48 | method="strict", 49 | format_score=0.0, 50 | score=1.0, 51 | **kwargs, 52 | ): 53 | answer = self.extract_solution(solution_str=response_text, method=method) 54 | if answer is None: 55 | reward = 0 56 | else: 57 | if answer == ground_truth: 58 | reward = score 59 | else: 60 | reward = format_score 61 | return reward 62 | -------------------------------------------------------------------------------- /src/agentcore_rl_toolkit/frameworks/strands/rollout_collector.py: -------------------------------------------------------------------------------- 1 | """Base rollout collector for Strands framework with hooks-based data collection.""" 2 | 3 | 4 | class RolloutCollector: 5 | """Base rollout collector using Strands hooks to collect conversation data and compute rewards.""" 6 | 7 | def __init__(self): 8 | self.turns = [] 9 | 10 | def register_hooks(self, registry): 11 | """Register hooks for rollout collection with Strands HookRegistry.""" 12 | try: 13 | from strands.experimental.hooks import BeforeModelInvocationEvent 14 | from strands.hooks import AfterInvocationEvent 15 | except ImportError: 16 | raise ImportError("Strands not installed. Install with: uv pip install strands-agents[openai]") from None 17 | 18 | registry.add_callback(BeforeModelInvocationEvent, self.collect_messages) 19 | registry.add_callback(AfterInvocationEvent, self.prepare_rollout) 20 | 21 | def collect_messages(self, event: "BeforeModelInvocationEvent"): # noqa: F821 22 | """Collect messages before model invocation.""" 23 | 24 | agent = event.agent 25 | tool_specs = agent.tool_registry.get_all_tool_specs() 26 | formatted_request = agent.model.format_request(agent.messages, tool_specs, agent.system_prompt) 27 | 28 | # Store the complete formatted messages for this turn 29 | self.turns.append( 30 | { 31 | "turn_id": len(self.turns), 32 | "formatted_request": formatted_request, 33 | } 34 | ) 35 | 36 | def prepare_rollout(self, event: "AfterInvocationEvent"): # noqa: F821 37 | if len(self.turns) == 0: 38 | return 39 | 40 | # Since hook is triggered before model invocation, all turns end with the user message 41 | # This loop turns [[u1], [u1, a1, u2], [u1, a1, u2, a2, u3], ..., [u1, ...a(n-1), u(n)]] into 42 | # [[u1, a1], [u1, a1, u2, a2], [u1, a1, u2, a2, u3, a3], ..., [u1, ...a(n-1), u(n)]] 43 | for i in range(1, len(self.turns)): 44 | self.turns[i - 1]["formatted_request"]["messages"].append( 45 | self.turns[i]["formatted_request"]["messages"][-2], # second to last is assistant message 46 | ) 47 | 48 | # Gather final response 49 | agent = event.agent 50 | if agent.messages[-1]["role"] == "assistant": # successful invocation 51 | tool_specs = agent.tool_registry.get_all_tool_specs() 52 | formatted_request = agent.model.format_request(agent.messages, tool_specs, agent.system_prompt) 53 | final_response = formatted_request["messages"][-1] 54 | self.turns[-1]["formatted_request"]["messages"].append(final_response) 55 | 56 | def get_rollout_data(self) -> list: 57 | """Return collected rollout data without computing rewards.""" 58 | return self.turns 59 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /scripts/build_docker_image_and_push_to_ecr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Initialize variables 5 | DOCKERFILE_PATH="" 6 | DOCKER_TAG="" 7 | 8 | # Parse command line arguments 9 | while [[ $# -gt 0 ]]; do 10 | case $1 in 11 | --dockerfile=*) 12 | DOCKERFILE_PATH="${1#*=}" 13 | shift 14 | ;; 15 | --tag=*) 16 | DOCKER_TAG="${1#*=}" 17 | shift 18 | ;; 19 | *) 20 | echo "Unknown option: $1" 21 | echo "Usage: $0 --dockerfile=PATH --tag=TAG" 22 | echo " --dockerfile Path to Dockerfile (required)" 23 | echo " --tag Docker image tag to use (required)" 24 | exit 1 25 | ;; 26 | esac 27 | done 28 | 29 | # Check if required parameters are provided 30 | if [ -z "$DOCKERFILE_PATH" ]; then 31 | echo "Error: --dockerfile parameter is required" 32 | echo "Usage: $0 --dockerfile=PATH --tag=TAG" 33 | exit 1 34 | fi 35 | 36 | if [ -z "$DOCKER_TAG" ]; then 37 | echo "Error: --tag parameter is required" 38 | echo "Usage: $0 --dockerfile=PATH --tag=TAG" 39 | exit 1 40 | fi 41 | 42 | echo "Building and pushing Docker image to ECR" 43 | echo "==========================================" 44 | echo "Dockerfile: ${DOCKERFILE_PATH}" 45 | echo "Tag: ${DOCKER_TAG}" 46 | echo "==========================================" 47 | 48 | # Load and validate environment 49 | if [ ! -f .env ]; then 50 | echo "Error: .env file not found!" 51 | exit 1 52 | fi 53 | 54 | # ECR_REPO_NAME, AWS_REGION, and AWS_ACCOUNT should be specified in .env 55 | source .env 56 | 57 | required_vars=("ECR_REPO_NAME" "AWS_REGION" "AWS_ACCOUNT") 58 | for var in "${required_vars[@]}"; do 59 | if [ -z "${!var}" ]; then 60 | echo "Error: $var is not set in .env file" 61 | exit 1 62 | fi 63 | done 64 | 65 | # Ensure buildx builder exists 66 | echo "" 67 | echo "Checking docker buildx..." 68 | if ! docker buildx inspect >/dev/null 2>&1; then 69 | echo "Creating buildx builder for multi-platform builds..." 70 | docker buildx create --use 71 | else 72 | echo "Buildx builder already configured" 73 | fi 74 | 75 | # Create ECR repository if it doesn't exist 76 | echo "" 77 | echo "Checking ECR repository..." 78 | if ! aws ecr describe-repositories --repository-names ${ECR_REPO_NAME} --region ${AWS_REGION} >/dev/null 2>&1; then 79 | echo "Creating ECR repository: ${ECR_REPO_NAME}" 80 | aws ecr create-repository --repository-name ${ECR_REPO_NAME} --region ${AWS_REGION} 81 | else 82 | echo "ECR repository ${ECR_REPO_NAME} already exists" 83 | fi 84 | 85 | # Login to ECR 86 | echo "" 87 | echo "Logging in to ECR..." 88 | aws ecr get-login-password --region ${AWS_REGION} | docker login --username AWS --password-stdin ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com 89 | 90 | # Build and push to ECR 91 | echo "" 92 | echo "Building and pushing Docker image..." 93 | echo "Image: ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO_NAME}:${DOCKER_TAG}" 94 | # Build to arm64 as required by AgentCore runtime, which uses AWS Graviton 95 | docker buildx build --platform linux/arm64 -f ${DOCKERFILE_PATH} -t ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO_NAME}:${DOCKER_TAG} --push . 96 | 97 | # Verify the image 98 | echo "" 99 | echo "Verifying pushed image..." 100 | aws ecr describe-images --repository-name ${ECR_REPO_NAME} --region ${AWS_REGION} 101 | 102 | echo "" 103 | echo "Successfully built and pushed image: ${ECR_REPO_NAME}:${DOCKER_TAG}" 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # AgentCore RL Toolkit (ART) 3 | 4 | Toolkit for Seamlessly Enabling RL Training on Any Agent with Bedrock AgentCore. 5 | 6 | ## Repo Structure 7 | 8 | - **Main package**: `agentcore-rl-toolkit` is a thin wrapper around of [bedrock-agentcore-sdk-python](https://github.com/aws/bedrock-agentcore-sdk-python/tree/main) that allows developers to start RL training their production agent with only a few lines of code change. 9 | - **Examples**: Located in `examples/` directory, each with their own `pyproject.toml` and dependencies. Their corresponding docker files are located in `.bedrock_agentcore`, most of which have been generated automatically (see [instructions below](#prepare-docker-file)). 10 | 11 | 12 | ## Start Training on Example Agents 13 | AgentCore runtime is currently supported by the following training library. 14 | - [verl](https://github.com/volcengine/verl): related [PR](https://github.com/volcengine/verl/pull/4216). 15 | 16 | Before training, build the docker for the RL-ready application and upload to ECR. To do this, follow the steps below: 17 | 18 | ### Setup Credentials and Environment Variables 19 | 20 | First, make sure `aws sts get-caller-identity` returns the right identity. If not, follow the [developer guide](https://docs.aws.amazon.com/en_us/serverless-application-model/latest/developerguide/serverless-getting-started-set-up-credentials.html) to set up AWS Credentials. After setup, run `aws sts get-caller-identity` again to verify. 21 | 22 | Next, the build script requires info related to your AWS account. Create a `.env` file from the example: 23 | 24 | ```bash 25 | cp .env.example .env 26 | ``` 27 | 28 | Then edit `.env` and fill in your values: 29 | - `AWS_REGION`: Your AWS region (e.g., `us-west-2`) 30 | - `AWS_ACCOUNT`: Your AWS account ID 31 | - `ECR_REPO_NAME`: Your ECR repository name 32 | 33 | ### Build and Push Docker Image 34 | 35 | ```bash 36 | # Use examples/strands_math_agent as an example 37 | chmod +x scripts/build_docker_image_and_push_to_ecr.sh 38 | bash ./scripts/build_docker_image_and_push_to_ecr.sh --dockerfile=.bedrock_agentcore/examples_strands_math_agent_rl_app/Dockerfile --tag=dev 39 | ``` 40 | 41 | Then, go to the training library of your choice and simply provide agentcore specific config args to start training. 42 | 43 | 44 | ## Development 45 | 46 | ### Installation 47 | 48 | This project uses [uv](https://docs.astral.sh/uv/) for dependency management. Install uv if you haven't already, follow the installation [guide](https://docs.astral.sh/uv/getting-started/installation/#standalone-installer) here. 49 | 50 | ### For Package Development 51 | 52 | If you're developing or contributing to the `agentcore-rl-toolkit` package itself: 53 | 54 | ```bash 55 | # Enter the repository 56 | cd agentcore-rl-toolkit 57 | 58 | # Create and activate uv environment 59 | uv venv --python 3.13 60 | source .venv/bin/activate 61 | 62 | # Install with development dependencies 63 | uv pip install -e ".[dev]" 64 | 65 | # Install pre-commit hooks 66 | pre-commit install 67 | ``` 68 | 69 | Additionally, when co-developing the toolkit together with examples, add the following to the example app's docker file so that changes to the toolkit is reflected in the container. 70 | 71 | ```bash 72 | COPY . . 73 | RUN uv pip install --force-reinstall --no-deps . 74 | ``` 75 | 76 | ### For Running Examples 77 | 78 | Each example has its own dependencies and can be installed independently. Follow the README for specific examples there (e.g., `examples/strands_math_agent/README.md`). 79 | 80 | ## Appendix 81 | 82 | ### Prepare Docker file 83 | 84 | Docker file for most examples can be automatically generated with the `agentcore` CLI. Use `examples/strands_math_agent` as an example: 85 | 86 | ```bash 87 | agentcore configure --entrypoint examples/strands_math_agent/rl_app.py --requirements-file examples/strands_math_agent/pyproject.toml --deployment-type container --disable-memory --non-interactive 88 | ``` 89 | 90 | Make sure to run the command in project root. 91 | 92 | ## Security 93 | 94 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 95 | 96 | ## License 97 | 98 | This project is licensed under the Apache-2.0 License. 99 | -------------------------------------------------------------------------------- /examples/strands_math_agent/README.md: -------------------------------------------------------------------------------- 1 | # Strands Math Agent 2 | 3 | ## Installation 4 | 5 | ```bash 6 | cd examples/strands_math_agent 7 | 8 | # Option A: Use the main project's venv (if already activated) 9 | uv pip install -e . 10 | 11 | # Option B: Create a separate venv for this example 12 | uv venv --python 3.13 13 | source .venv/bin/activate 14 | uv pip install -e . 15 | uv pip install -e ../../ --force-reinstall --no-deps . # install the parent repo 16 | 17 | ``` 18 | 19 | ## Run Basic App With Bedrock API 20 | ```bash 21 | cd examples/strands_math_agent 22 | 23 | # start the server in one terminal 24 | python basic_app.py 25 | 26 | # submit the following request in another terminal 27 | curl -X POST http://localhost:8080/invocations \ 28 | -H "Content-Type: application/json" \ 29 | -d '{"prompt": "Toula went to the bakery and bought various types of pastries. She bought 3 dozen donuts which cost $68 per dozen, 2 dozen mini cupcakes which cost $80 per dozen, and 6 dozen mini cheesecakes for $55 per dozen. How much was the total cost?"}' 30 | ``` 31 | 32 | ## Run Basic App Inside Docker Locally 33 | 34 | ### Build Docker 35 | 36 | ```bash 37 | # Make sure you are at the project root. 38 | cd ../../ 39 | 40 | # Build Docker 41 | docker build -t math:dev --load . -f .bedrock_agentcore/examples_strands_math_agent_basic_app/Dockerfile 42 | 43 | # Run Docker 44 | # Note that we override the docker CMD to avoid cluttering error logs due to missing OTLP collector, which is not set up locally. 45 | docker run -p 8080:8080 --env-file examples/strands_math_agent/.env math:dev python -m basic_app 46 | 47 | # Submit request 48 | curl -X POST http://localhost:8080/invocations \ 49 | -H "Content-Type: application/json" \ 50 | -d '{"prompt": "Toula went to the bakery and bought various types of pastries. She bought 3 dozen donuts which cost $68 per dozen, 2 dozen mini cupcakes which cost $80 per dozen, and 6 dozen mini cheesecakes for $55 per dozen. How much was the total cost?"}' 51 | ``` 52 | 53 | ## Run RL App With a Local vLLM Server 54 | ```bash 55 | 56 | # Start vLLM server (assume access to GPU) 57 | CUDA_VISIBLE_DEVICES=0 vllm serve Qwen/Qwen3-4B-Instruct-2507 --max-model-len 8192 --port 4000 --enable-auto-tool-choice --tool-call-parser hermes 58 | 59 | # Create .env file from examples/strands_math_agent/.env.example 60 | cp .env.example .env 61 | 62 | # Update the following env vars in .env if needed 63 | BASE_URL=http://localhost:4000/v1 64 | MODEL_ID=Qwen/Qwen3-4B-Instruct-2507 65 | 66 | # Submit request 67 | # Note: the main difference between this request to RL app and that to basic app is the "_training" 68 | # field. This field will be prepared automatically by the training framework (veRL) during RL training, 69 | # but when we test it out locally, we will need to specify them, especially s3 bucket name and sqs url. 70 | # You will need to create a sqs queue and s3 bucket if you don't have existing ones. 71 | 72 | curl -X POST http://localhost:8080/invocations \ 73 | -H "Content-Type: application/json" \ 74 | -d '{ 75 | "prompt": "Toula went to the bakery and bought various types of pastries. She bought 3 dozen donuts which cost $68 per dozen, 2 dozen mini cupcakes which cost $80 per dozen, and 6 dozen mini cheesecakes for $55 per dozen. How much was the total cost?", 76 | "answer": "694", 77 | "_training": { 78 | "exp_id": "test", 79 | "sqs_url": "https://sqs.{region}.amazonaws.com/{account_id}/{queue_name}", 80 | "s3_bucket": "{bucket_name}", 81 | "session_id": "session_123", 82 | "input_id": "prompt_123" 83 | } 84 | }' 85 | ``` 86 | 87 | ## Run RL App Inside Docker Locally 88 | 89 | ### Build Docker 90 | ```bash 91 | # Make sure you are at the project root. 92 | cd ../../ 93 | 94 | # Build Docker 95 | docker build -t math_rl:dev --load . -f .bedrock_agentcore/examples_strands_math_agent_rl_app/Dockerfile 96 | 97 | # Run Docker 98 | # In addition to overriding the docker CMD, we also directly use the host's network so that the agent 99 | # can access the locally hosted model via http://localhost:4000/v1. Alternatively, replace `localhost` 100 | # with IP of your machine in BASE_URL and keep the port mapping (-p 8080:8080) 101 | docker run --network host --env-file examples/strands_math_agent/.env math_rl:dev python -m rl_app 102 | 103 | # Submit request 104 | curl -X POST http://localhost:8080/invocations \ 105 | -H "Content-Type: application/json" \ 106 | -d '{ 107 | "prompt": "Toula went to the bakery and bought various types of pastries. She bought 3 dozen donuts which cost $68 per dozen, 2 dozen mini cupcakes which cost $80 per dozen, and 6 dozen mini cheesecakes for $55 per dozen. How much was the total cost?", 108 | "answer": "694", 109 | "_training": { 110 | "exp_id": "test", 111 | "sqs_url": "https://sqs.{region}.amazonaws.com/{account_id}/{queue_name}", 112 | "s3_bucket": "{bucket_name}", 113 | "session_id": "session_123", 114 | "input_id": "prompt_123" 115 | } 116 | }' 117 | ``` 118 | -------------------------------------------------------------------------------- /src/agentcore_rl_toolkit/app.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import logging 4 | import os 5 | from abc import ABC, abstractmethod 6 | from dataclasses import dataclass 7 | from datetime import datetime, timezone 8 | from functools import wraps 9 | 10 | import boto3 11 | from bedrock_agentcore.runtime import BedrockAgentCoreApp 12 | 13 | 14 | @dataclass 15 | class TrainingConfig: 16 | """Training configuration for rollout collection and storage.""" 17 | 18 | exp_id: str 19 | session_id: str 20 | input_id: str 21 | sqs_url: str 22 | s3_bucket: str 23 | 24 | @classmethod 25 | def from_dict(cls, data: dict) -> "TrainingConfig": 26 | """Create TrainingConfig from dictionary with validation.""" 27 | try: 28 | return cls( 29 | exp_id=data["exp_id"], 30 | session_id=data["session_id"], 31 | input_id=data["input_id"], 32 | sqs_url=data["sqs_url"], 33 | s3_bucket=data["s3_bucket"], 34 | ) 35 | except KeyError as e: 36 | raise ValueError(f"Missing required training config field: {e}") from e 37 | 38 | 39 | class AgentCoreRLApp(BedrockAgentCoreApp, ABC): 40 | def __init__(self): 41 | super().__init__() 42 | self.s3_client = boto3.client("s3") 43 | self.sqs_client = boto3.client("sqs") 44 | 45 | @abstractmethod 46 | def create_openai_compatible_model(self, **kwargs): 47 | """Create an OpenAI-compatible model for this framework. 48 | 49 | Must be implemented by framework-specific subclasses. 50 | 51 | Args: 52 | **kwargs: Framework-specific model parameters 53 | 54 | Returns: 55 | Framework-specific model instance configured for vLLM server 56 | """ 57 | pass 58 | 59 | def _get_model_config(self): 60 | """Get and validate model configuration from environment.""" 61 | base_url = os.getenv("BASE_URL") 62 | model_id = os.getenv("MODEL_ID") 63 | 64 | if not base_url or not model_id: 65 | raise ValueError( 66 | "Missing required environment variables: BASE_URL, MODEL_ID. " "Make sure to call load_dotenv()." 67 | ) 68 | 69 | return base_url, model_id 70 | 71 | def _validate_and_normalize_rollout(self, rollout_dict: dict) -> dict: 72 | """ 73 | Validate and normalize rollout data structure. 74 | 75 | Ensures the return value from user functions has the expected format: 76 | {"rollout_data": [...], "rewards": [...]} 77 | 78 | Args: 79 | rollout_dict: Dictionary returned from user function 80 | 81 | Returns: 82 | Normalized rollout dictionary with validated structure 83 | 84 | Raises: 85 | ValueError: If structure is invalid or rewards don't match rollout length 86 | """ 87 | # Require both fields to exist 88 | if "rollout_data" not in rollout_dict: 89 | raise ValueError("Return value must include 'rollout_data' field") 90 | if "rewards" not in rollout_dict: 91 | raise ValueError("Return value must include 'rewards' field") 92 | 93 | rollout_data = rollout_dict["rollout_data"] 94 | rewards = rollout_dict["rewards"] 95 | 96 | # Validate rollout_data 97 | if not isinstance(rollout_data, list) or len(rollout_data) == 0: 98 | raise ValueError("rollout_data must be a list with length >= 1") 99 | 100 | # Normalize rewards to list if not already 101 | if not isinstance(rewards, list): 102 | rewards = [rewards] 103 | 104 | # Validate rewards length 105 | if len(rewards) != 1 and len(rewards) != len(rollout_data): 106 | raise ValueError( 107 | f"rewards must be length 1 (outcome reward) or " 108 | f"match rollout_data length {len(rollout_data)} (per-step reward)" 109 | ) 110 | 111 | # Update with normalized rewards 112 | rollout_dict["rewards"] = rewards 113 | return rollout_dict 114 | 115 | def save_rollout_and_notify(self, rollout_data: dict, training_config: dict): 116 | """ 117 | Save rollout data to S3 and notify SQS queue. 118 | 119 | Args: 120 | rollout_data: The prepared rollout data 121 | training_config: Training configuration dict containing: 122 | - s3_bucket: S3 bucket name 123 | - sqs_url: SQS queue URL for notifications 124 | - exp_id: Experiment ID for organizing data 125 | - session_id: Session id for the current task 126 | - input_id: id for discriminating different input data examples 127 | """ 128 | # Validate and extract training configuration 129 | try: 130 | config = TrainingConfig.from_dict(training_config) 131 | except ValueError as e: 132 | logging.error(f"Invalid training configuration: {e}") 133 | raise 134 | 135 | result_key = f"{config.exp_id}/{config.input_id}_{config.session_id}.json" 136 | 137 | if "status_code" not in rollout_data: 138 | rollout_data["status_code"] = 200 139 | 140 | if "stop_reason" not in rollout_data: 141 | rollout_data["stop_reason"] = "end_turn" 142 | 143 | # Return the input id identifying rollouts of the same input data (prompt) example 144 | # for advantage computation. 145 | rollout_data["input_id"] = config.input_id 146 | 147 | # Save to S3 148 | try: 149 | self.s3_client.put_object( 150 | Bucket=config.s3_bucket, 151 | Key=result_key, 152 | Body=json.dumps(rollout_data, indent=2), 153 | ContentType="application/json", 154 | ) 155 | logging.info(f"Stored complete results at {result_key}") 156 | except Exception as e: 157 | logging.error(f"Failed to store results in S3: {e}") 158 | raise 159 | 160 | # Send SQS notification (mimic S3 notification format) 161 | try: 162 | sqs_message = { 163 | "Records": [ 164 | { 165 | "eventSource": "rollout:collector", 166 | "eventName": "ObjectCreated:Put", 167 | "eventTime": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), 168 | "s3": {"bucket": {"name": config.s3_bucket}, "object": {"key": result_key}}, 169 | } 170 | ] 171 | } 172 | 173 | self.sqs_client.send_message(QueueUrl=config.sqs_url, MessageBody=json.dumps(sqs_message)) 174 | logging.info(f"Sent SQS notification for {result_key}") 175 | except Exception as e: 176 | logging.error(f"Failed to send SQS notification for {result_key}: {e}") 177 | raise 178 | 179 | def rollout_entrypoint(self, func): 180 | """ 181 | Decorator for RL training that handles asyncio.create_task and rollout saving automatically. 182 | 183 | This decorator: 184 | 1. Handles both sync and async user functions using BedrockAgentCoreApp's infrastructure 185 | 2. Automatically saves rollout data when user returns it 186 | 3. Handles errors and saves error rollouts for client notification 187 | 4. Returns immediately with {"status": "processing"} for non-blocking behavior 188 | 189 | Usage: 190 | @app.rollout_entrypoint 191 | def invoke_agent(payload, context): # Can be sync or async 192 | # Framework-specific rollout collection 193 | rollout_data = collect_rollout(...) 194 | return rollout_data # Automatically saved! 195 | 196 | Args: 197 | func: The user function that handles agent logic and rollout collection 198 | 199 | Returns: 200 | Decorated function registered as entrypoint 201 | """ 202 | 203 | async def rollout_background_task(payload, context): 204 | """Background task that does the actual agent work and rollout saving.""" 205 | training_config = payload.get("_training") 206 | 207 | # Register with async task tracking system for logging and ping status 208 | task_id = self.add_async_task(f"{func.__name__}") 209 | 210 | try: 211 | # Use BedrockAgentCoreApp's _invoke_handler for sync/async compatibility 212 | # This automatically runs sync functions in thread pool to avoid blocking 213 | result = await self._invoke_handler(func, context, self._takes_context(func), payload) 214 | 215 | # If this is an RL training run, validate and normalize the rollout structure 216 | if training_config: 217 | if not isinstance(result, dict): 218 | raise ValueError("RL training runs must return a dictionary") 219 | result = self._validate_and_normalize_rollout(result) 220 | 221 | # Save rollout data if we have training config 222 | if isinstance(result, dict) and training_config: 223 | self.save_rollout_and_notify(rollout_data=result, training_config=training_config) 224 | logging.info(f"Rollout data saved for function: {func.__name__}") 225 | 226 | return result 227 | 228 | except Exception as e: 229 | # Always save error rollout for client notification 230 | if training_config: 231 | error_rollout = {"status_code": 500, "stop_reason": str(e)} 232 | self.save_rollout_and_notify(rollout_data=error_rollout, training_config=training_config) 233 | logging.error(f"Error rollout saved for function: {func.__name__}: {e}") 234 | raise 235 | finally: 236 | # Complete the async task for logging and ping status 237 | self.complete_async_task(task_id) 238 | 239 | @wraps(func) 240 | async def rollout_entrypoint_wrapper(payload, context): 241 | """Entrypoint that starts background task and returns immediately.""" 242 | # Start background task without waiting 243 | asyncio.create_task(rollout_background_task(payload, context)) 244 | return {"status": "processing"} 245 | 246 | # Register using existing BedrockAgentCoreApp entrypoint infrastructure 247 | return self.entrypoint(rollout_entrypoint_wrapper) 248 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | --------------------------------------------------------------------------------