├── NOTICE
├── examples
    └── strands_math_agent
    │   ├── .env.example
    │   ├── pyproject.toml
    │   ├── basic_app.py
    │   ├── .dockerignore
    │   ├── rl_app.py
    │   ├── reward.py
    │   └── README.md
├── .env.example
├── src
    └── agentcore_rl_toolkit
    │   ├── frameworks
    │       └── strands
    │       │   ├── __init__.py
    │       │   ├── app.py
    │       │   └── rollout_collector.py
    │   ├── __init__.py
    │   ├── reward_function.py
    │   └── app.py
├── .gitignore
├── CODE_OF_CONDUCT.md
├── .vscode
    └── settings.json
├── .pre-commit-config.yaml
├── pyproject.toml
├── .dockerignore
├── .bedrock_agentcore
    ├── examples_strands_math_agent_basic_app
    │   └── Dockerfile
    └── examples_strands_math_agent_rl_app
    │   └── Dockerfile
├── CONTRIBUTING.md
├── scripts
    └── build_docker_image_and_push_to_ecr.sh
├── README.md
└── LICENSE


/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/examples/strands_math_agent/.env.example:
--------------------------------------------------------------------------------
1 | BASE_URL=http://localhost:4000/v1
2 | MODEL_ID=Qwen/Qwen3-4B-Instruct-2507
3 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | AWS_REGION=your-aws-account-region
2 | AWS_ACCOUNT=your-aws-account-id
3 | ECR_REPO_NAME=your-ecr-repo-name
4 | 


--------------------------------------------------------------------------------
/src/agentcore_rl_toolkit/frameworks/strands/__init__.py:
--------------------------------------------------------------------------------
1 | from .app import StrandsAgentCoreRLApp
2 | 
3 | __all__ = ["StrandsAgentCoreRLApp"]
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Virtual environments
 2 | .venv/
 3 | .venv-*/
 4 | 
 5 | # Python cache
 6 | __pycache__/
 7 | .ruff_cache/
 8 | 
 9 | # Build artifacts
10 | *.egg-info/
11 | dist/
12 | build/
13 | 
14 | # Environment variables
15 | .env
16 | 
17 | # Claude Code configuration
18 | .claude/
19 | 
20 | .bedrock_agentcore.yaml
21 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "files.exclude": {
 3 |     "**/*.egg-info": true,
 4 |     "**/__pycache__": true,
 5 |     "**/dist": true,
 6 |     "**/build": true,
 7 |   },
 8 |   "search.exclude": {
 9 |     "**/*.egg-info": true,
10 |     "**/__pycache__": true,
11 |     "**/dist": true,
12 |     "**/build": true,
13 |     "**/.venv": true
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/src/agentcore_rl_toolkit/__init__.py:
--------------------------------------------------------------------------------
1 | from .app import AgentCoreRLApp
2 | from .frameworks.strands import StrandsAgentCoreRLApp
3 | from .frameworks.strands.rollout_collector import RolloutCollector as StrandsRolloutCollector
4 | from .reward_function import RewardFunction
5 | 
6 | __all__ = ["AgentCoreRLApp", "StrandsAgentCoreRLApp", "StrandsRolloutCollector", "RewardFunction"]
7 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.5.0
 4 |     hooks:
 5 |       - id: check-toml
 6 |       - id: check-yaml
 7 |       - id: end-of-file-fixer
 8 |       - id: trailing-whitespace
 9 | 
10 |   - repo: https://github.com/astral-sh/ruff-pre-commit
11 |     rev: v0.1.5
12 |     hooks:
13 |       - id: ruff
14 |         args: [--fix, --exit-non-zero-on-fix, --show-fixes]
15 |       - id: ruff-format
16 | 


--------------------------------------------------------------------------------
/examples/strands_math_agent/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "strands-math-agent-example"
 3 | version = "0.1.0"
 4 | description = "Example: Strands Math Agent using Bedrock AgentCore RL Toolkit"
 5 | readme = "README.md"
 6 | requires-python = ">=3.11"
 7 | dependencies = [
 8 |     "bedrock-agentcore>=1.0.3",
 9 |     "bedrock-agentcore-starter-toolkit>=0.1.34",
10 |     "boto3>=1.40.55",
11 |     "python-dotenv>=1.0.0",
12 |     # TODO: replace the above dependencies with agentcore-rl-toolkit>=0.1.0 after PyPI indexing
13 |     "strands-agents[openai]>=1.18.0",
14 |     "strands-agents-tools>=0.2.16",
15 | ]
16 | 
17 | [tool.setuptools]
18 | py-modules = ["basic_app", "rl_app", "reward"]
19 | 


--------------------------------------------------------------------------------
/src/agentcore_rl_toolkit/frameworks/strands/app.py:
--------------------------------------------------------------------------------
 1 | from ...app import AgentCoreRLApp
 2 | 
 3 | 
 4 | class StrandsAgentCoreRLApp(AgentCoreRLApp):
 5 |     def create_openai_compatible_model(self, **kwargs):
 6 |         """Create Strands OpenAI-compatible model for vLLM/SGLang server."""
 7 |         try:
 8 |             from strands.models.openai import OpenAIModel
 9 |         except ImportError:
10 |             raise ImportError("Strands not installed. Install with: uv pip install strands-agents[openai]") from None
11 | 
12 |         base_url, model_id = self._get_model_config()
13 | 
14 |         return OpenAIModel(client_args={"api_key": "dummy", "base_url": base_url}, model_id=model_id, **kwargs)
15 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "agentcore-rl-toolkit"
 7 | version = "0.1.0"
 8 | description = "Toolkit for Seamlessly Enabling RL Training with Bedrock AgentCore."
 9 | readme = "README.md"
10 | requires-python = ">=3.11"
11 | dependencies = [
12 |     "bedrock-agentcore>=1.0.3",
13 |     "bedrock-agentcore-starter-toolkit>=0.1.34",
14 |     "boto3>=1.40.55",
15 |     "python-dotenv>=1.0.0",
16 | ]
17 | 
18 | [project.optional-dependencies]
19 | dev = [
20 |     "pytest>=7.0",
21 |     "mypy>=1.0",
22 |     "pre-commit>=3.0",
23 | ]
24 | 
25 | [tool.ruff]
26 | line-length = 120
27 | select = [
28 |     "F",  # pyflakes rules
29 |     "E",  # pycodestyle error rules
30 |     "W",  # pycodestyle warning rules
31 |     "B",  # flake8-bugbear rules
32 |     "I",  # isort rules
33 | ]
34 | 
35 | [tool.ruff.lint.isort]
36 | known-first-party = ["agentcore_rl_toolkit"]
37 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Build artifacts
 2 | build/
 3 | dist/
 4 | *.egg-info/
 5 | *.egg
 6 | 
 7 | # Python cache
 8 | __pycache__/
 9 | __pycache__*
10 | *.py[cod]
11 | *$py.class
12 | *.so
13 | .Python
14 | 
15 | # Virtual environments
16 | .venv/
17 | .env
18 | venv/
19 | env/
20 | ENV/
21 | 
22 | # Testing
23 | .pytest_cache/
24 | .coverage
25 | .coverage*
26 | htmlcov/
27 | .tox/
28 | *.cover
29 | .hypothesis/
30 | .mypy_cache/
31 | .ruff_cache/
32 | 
33 | # Development
34 | *.log
35 | *.bak
36 | *.swp
37 | *.swo
38 | *~
39 | .DS_Store
40 | 
41 | # IDEs
42 | .vscode/
43 | .idea/
44 | 
45 | # Version control
46 | .git/
47 | .gitignore
48 | .gitattributes
49 | 
50 | # Documentation
51 | docs/
52 | 
53 | # CI/CD
54 | .github/
55 | .gitlab-ci.yml
56 | .travis.yml
57 | 
58 | # Project specific
59 | tests/
60 | 
61 | # Bedrock AgentCore specific - keep config but exclude runtime files
62 | .bedrock_agentcore.yaml
63 | .dockerignore
64 | .bedrock_agentcore/
65 | 
66 | # Keep wheelhouse for offline installations
67 | # wheelhouse/
68 | 


--------------------------------------------------------------------------------
/examples/strands_math_agent/basic_app.py:
--------------------------------------------------------------------------------
 1 | from bedrock_agentcore.runtime import BedrockAgentCoreApp
 2 | from dotenv import load_dotenv
 3 | from strands import Agent
 4 | from strands.models import BedrockModel
 5 | from strands_tools import calculator
 6 | 
 7 | app = BedrockAgentCoreApp()
 8 | 
 9 | load_dotenv()
10 | 
11 | model = BedrockModel(model_id="us.anthropic.claude-sonnet-4-20250514-v1:0")
12 | 
13 | agent = Agent(
14 |     model=model,
15 |     tools=[calculator],
16 |     system_prompt=(
17 |         "Your task is to solve the math problem. "
18 |         + "Use calculator when applicable. "
19 |         + 'Let\'s think step by step and output the final answer after "####".'
20 |     ),
21 | )
22 | 
23 | 
24 | @app.entrypoint
25 | async def invoke_agent(payload):
26 |     """
27 |     Invoke the agent with a payload
28 |     """
29 |     user_input = payload.get("prompt")
30 | 
31 |     print("User input:", user_input)
32 | 
33 |     response = await agent.invoke_async(user_input)
34 |     return response.message["content"][0]["text"]
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     app.run()
39 | 


--------------------------------------------------------------------------------
/.bedrock_agentcore/examples_strands_math_agent_basic_app/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
 2 | WORKDIR /app
 3 | 
 4 | # All environment variables in one layer
 5 | ENV UV_SYSTEM_PYTHON=1 \
 6 |     UV_COMPILE_BYTECODE=1 \
 7 |     UV_NO_PROGRESS=1 \
 8 |     PYTHONUNBUFFERED=1 \
 9 |     DOCKER_CONTAINER=1 \
10 |     AWS_REGION=us-west-2 \
11 |     AWS_DEFAULT_REGION=us-west-2
12 | 
13 | 
14 | 
15 | COPY examples/strands_math_agent examples/strands_math_agent
16 | # Install from pyproject.toml directory
17 | RUN cd examples/strands_math_agent && uv pip install .
18 | 
19 | 
20 | 
21 | 
22 | RUN uv pip install aws-opentelemetry-distro==0.12.2
23 | 
24 | 
25 | # Signal that this is running in Docker for host binding logic
26 | ENV DOCKER_CONTAINER=1
27 | 
28 | # Create non-root user
29 | RUN useradd -m -u 1000 bedrock_agentcore
30 | USER bedrock_agentcore
31 | 
32 | EXPOSE 9000
33 | EXPOSE 8000
34 | EXPOSE 8080
35 | 
36 | # Copy entire project (respecting .dockerignore)
37 | COPY . .
38 | 
39 | # Use the full module path
40 | 
41 | CMD ["opentelemetry-instrument", "python", "-m", "basic_app"]
42 | 


--------------------------------------------------------------------------------
/examples/strands_math_agent/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Build artifacts
 2 | build/
 3 | dist/
 4 | *.egg-info/
 5 | *.egg
 6 | 
 7 | # Python cache
 8 | __pycache__/
 9 | __pycache__*
10 | *.py[cod]
11 | *$py.class
12 | *.so
13 | .Python
14 | 
15 | # Virtual environments
16 | .venv/
17 | .env
18 | venv/
19 | env/
20 | ENV/
21 | 
22 | # Testing
23 | .pytest_cache/
24 | .coverage
25 | .coverage*
26 | htmlcov/
27 | .tox/
28 | *.cover
29 | .hypothesis/
30 | .mypy_cache/
31 | .ruff_cache/
32 | 
33 | # Development
34 | *.log
35 | *.bak
36 | *.swp
37 | *.swo
38 | *~
39 | .DS_Store
40 | 
41 | # IDEs
42 | .vscode/
43 | .idea/
44 | 
45 | # Version control
46 | .git/
47 | .gitignore
48 | .gitattributes
49 | 
50 | # Documentation
51 | docs/
52 | 
53 | # CI/CD
54 | .github/
55 | .gitlab-ci.yml
56 | .travis.yml
57 | 
58 | # Project specific
59 | tests/
60 | 
61 | # Bedrock AgentCore specific - keep config but exclude runtime files
62 | .bedrock_agentcore.yaml
63 | .dockerignore
64 | .bedrock_agentcore/
65 | 
66 | # Keep wheelhouse for offline installations
67 | # wheelhouse/
68 | 
69 | # Monorepo directories
70 | cdk/
71 | terraform/
72 | mcp/lambda/
73 | 


--------------------------------------------------------------------------------
/.bedrock_agentcore/examples_strands_math_agent_rl_app/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
 2 | WORKDIR /app
 3 | 
 4 | # All environment variables in one layer
 5 | ENV UV_SYSTEM_PYTHON=1 \
 6 |     UV_COMPILE_BYTECODE=1 \
 7 |     UV_NO_PROGRESS=1 \
 8 |     PYTHONUNBUFFERED=1 \
 9 |     DOCKER_CONTAINER=1 \
10 |     AWS_REGION=us-west-2 \
11 |     AWS_DEFAULT_REGION=us-west-2
12 | 
13 | 
14 | 
15 | COPY examples/strands_math_agent examples/strands_math_agent
16 | # Install from pyproject.toml directory
17 | RUN cd examples/strands_math_agent && uv pip install .
18 | 
19 | 
20 | 
21 | 
22 | RUN uv pip install aws-opentelemetry-distro==0.12.2
23 | 
24 | # [TODO]: remove the following lines after being able to add this agentcore-rl-toolkit as
25 | # a dependency in examples/strands_math_agent/pyproject.toml
26 | COPY . .
27 | RUN uv pip install --force-reinstall --no-deps .
28 | 
29 | 
30 | # Signal that this is running in Docker for host binding logic
31 | ENV DOCKER_CONTAINER=1
32 | 
33 | # Create non-root user
34 | RUN useradd -m -u 1000 bedrock_agentcore
35 | USER bedrock_agentcore
36 | 
37 | EXPOSE 9000
38 | EXPOSE 8000
39 | EXPOSE 8080
40 | 
41 | # Copy entire project (respecting .dockerignore)
42 | COPY . .
43 | 
44 | # Use the full module path
45 | 
46 | CMD ["opentelemetry-instrument", "python", "-m", "rl_app"]
47 | 


--------------------------------------------------------------------------------
/src/agentcore_rl_toolkit/reward_function.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Base reward function interface for pure reward computation in RL training.
 3 | 
 4 | Reward functions only compute rewards - the app framework handles all validation and formatting.
 5 | """
 6 | 
 7 | from abc import ABC, abstractmethod
 8 | 
 9 | 
10 | class RewardFunction(ABC):
11 |     """
12 |     Base class for reward functions focused purely on reward computation.
13 | 
14 |     Users implement compute_reward() and can return:
15 |     - float: Single reward value
16 |     - list of floats: Per-turn rewards or single-element list for outcome rewards
17 | 
18 |     The app framework handles all validation, normalization, and formatting automatically.
19 |     Right now, this class mostly defines a contract, but we might add some more shared utilities
20 |     in the future.
21 |     """
22 | 
23 |     @abstractmethod
24 |     def __call__(self, **kwargs):
25 |         """
26 |         Compute reward(s) for the rollout.
27 | 
28 |         Args:
29 |             **kwargs: Flexible arguments for reward computation, such as:
30 |                      - response_text: Agent's response text
31 |                      - ground_truth: Correct answer
32 |                      - user_input: Original user input
33 |                      - Any other context needed for reward computation
34 | 
35 |         Returns:
36 |             float: Single reward value, or
37 |             list[float]: Per-turn rewards or single-element list for outcome rewards
38 |         """
39 |         pass
40 | 


--------------------------------------------------------------------------------
/examples/strands_math_agent/rl_app.py:
--------------------------------------------------------------------------------
 1 | from dotenv import load_dotenv
 2 | from reward import GSM8KReward
 3 | from strands import Agent
 4 | from strands_tools import calculator
 5 | 
 6 | from agentcore_rl_toolkit import StrandsAgentCoreRLApp, StrandsRolloutCollector
 7 | 
 8 | app = StrandsAgentCoreRLApp()
 9 | 
10 | load_dotenv()
11 | 
12 | model = app.create_openai_compatible_model()
13 | 
14 | rollout_collector = StrandsRolloutCollector()
15 | agent = Agent(
16 |     model=model,
17 |     tools=[calculator],
18 |     system_prompt=(
19 |         "Your task is to solve the math problem. "
20 |         + "Use the calculator tool to compute all mathematical expressions. "
21 |         + 'Let\'s think step by step and output the final answer after "####".'
22 |     ),
23 |     hooks=[rollout_collector],
24 | )
25 | reward_fn = GSM8KReward()
26 | 
27 | 
28 | @app.rollout_entrypoint
29 | async def invoke_agent(payload, context):
30 |     """
31 |     Invoke the math agent with a payload using the rollout_entrypoint decorator.
32 | 
33 |     For RL training, the following fields are expected:
34 |     - prompt: question from gsm8k
35 |     - answer: ground truth (str)
36 | 
37 |     The @rollout_entrypoint decorator automatically:
38 |     - Handles asyncio.create_task() for non-blocking execution
39 |     - Saves rollout data to S3 and notifies SQS when returned
40 |     - Handles errors and saves error rollouts for client notification
41 |     - Works with both sync and async functions
42 |     """
43 |     user_input = payload.get("prompt")
44 |     answer = payload.get("answer")  # used for computing reward
45 | 
46 |     print("User input:", user_input)
47 | 
48 |     # Hooks auto collecting rollout data while agent is running
49 |     response = await agent.invoke_async(user_input)
50 | 
51 |     # Gather rollouts from the collector
52 |     rollout_data = rollout_collector.get_rollout_data()
53 | 
54 |     # Compute rewards
55 |     rewards = reward_fn(response_text=response.message["content"][0]["text"], ground_truth=answer)
56 | 
57 |     # Return expected structure (dict with `rollout_data` and `rewards` keys)
58 |     # Framework validates and normalizes values automatically
59 |     return {"rollout_data": rollout_data, "rewards": rewards}
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     app.run()
64 | 


--------------------------------------------------------------------------------
/examples/strands_math_agent/reward.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from agentcore_rl_toolkit import RewardFunction
 4 | 
 5 | 
 6 | class GSM8KReward(RewardFunction):
 7 |     @staticmethod
 8 |     def extract_solution(solution_str, method="strict"):
 9 |         """
10 |         This is taken from https://github.com/volcengine/verl/blob/4da0d3d3/verl/utils/reward_score/gsm8k.py#L20
11 |         """
12 |         _SOLUTION_CLIP_CHARS = 300
13 | 
14 |         assert method in ["strict", "flexible"]
15 | 
16 |         # Optimization: Regular expression matching on very long strings can be slow.
17 |         # For math problems, the final answer is usually at the end.
18 |         # We only match on the last 300 characters, which is a safe approximation for 300 tokens.
19 |         if len(solution_str) > _SOLUTION_CLIP_CHARS:
20 |             solution_str = solution_str[-_SOLUTION_CLIP_CHARS:]
21 | 
22 |         if method == "strict":
23 |             # this also tests the formatting of the model
24 |             solutions = re.findall("#### (\\-?[0-9\\.\\,]+)", solution_str)
25 |             if len(solutions) == 0:
26 |                 final_answer = None
27 |             else:
28 |                 # take the last solution
29 |                 final_answer = solutions[-1].replace(",", "").replace("$", "")
30 |         elif method == "flexible":
31 |             answer = re.findall("(\\-?[0-9\\.\\,]+)", solution_str)
32 |             final_answer = None
33 |             if len(answer) == 0:
34 |                 # no reward is there is no answer
35 |                 pass
36 |             else:
37 |                 invalid_str = ["", "."]
38 |                 # find the last number that is not '.'
39 |                 for final_answer in reversed(answer):
40 |                     if final_answer not in invalid_str:
41 |                         break
42 |         return final_answer
43 | 
44 |     def __call__(
45 |         self,
46 |         response_text="",
47 |         ground_truth="",
48 |         method="strict",
49 |         format_score=0.0,
50 |         score=1.0,
51 |         **kwargs,
52 |     ):
53 |         answer = self.extract_solution(solution_str=response_text, method=method)
54 |         if answer is None:
55 |             reward = 0
56 |         else:
57 |             if answer == ground_truth:
58 |                 reward = score
59 |             else:
60 |                 reward = format_score
61 |         return reward
62 | 


--------------------------------------------------------------------------------
/src/agentcore_rl_toolkit/frameworks/strands/rollout_collector.py:
--------------------------------------------------------------------------------
 1 | """Base rollout collector for Strands framework with hooks-based data collection."""
 2 | 
 3 | 
 4 | class RolloutCollector:
 5 |     """Base rollout collector using Strands hooks to collect conversation data and compute rewards."""
 6 | 
 7 |     def __init__(self):
 8 |         self.turns = []
 9 | 
10 |     def register_hooks(self, registry):
11 |         """Register hooks for rollout collection with Strands HookRegistry."""
12 |         try:
13 |             from strands.experimental.hooks import BeforeModelInvocationEvent
14 |             from strands.hooks import AfterInvocationEvent
15 |         except ImportError:
16 |             raise ImportError("Strands not installed. Install with: uv pip install strands-agents[openai]") from None
17 | 
18 |         registry.add_callback(BeforeModelInvocationEvent, self.collect_messages)
19 |         registry.add_callback(AfterInvocationEvent, self.prepare_rollout)
20 | 
21 |     def collect_messages(self, event: "BeforeModelInvocationEvent"):  # noqa: F821
22 |         """Collect messages before model invocation."""
23 | 
24 |         agent = event.agent
25 |         tool_specs = agent.tool_registry.get_all_tool_specs()
26 |         formatted_request = agent.model.format_request(agent.messages, tool_specs, agent.system_prompt)
27 | 
28 |         # Store the complete formatted messages for this turn
29 |         self.turns.append(
30 |             {
31 |                 "turn_id": len(self.turns),
32 |                 "formatted_request": formatted_request,
33 |             }
34 |         )
35 | 
36 |     def prepare_rollout(self, event: "AfterInvocationEvent"):  # noqa: F821
37 |         if len(self.turns) == 0:
38 |             return
39 | 
40 |         # Since hook is triggered before model invocation, all turns end with the user message
41 |         # This loop turns [[u1], [u1, a1, u2], [u1, a1, u2, a2, u3], ..., [u1, ...a(n-1), u(n)]] into
42 |         # [[u1, a1], [u1, a1, u2, a2], [u1, a1, u2, a2, u3, a3], ..., [u1, ...a(n-1), u(n)]]
43 |         for i in range(1, len(self.turns)):
44 |             self.turns[i - 1]["formatted_request"]["messages"].append(
45 |                 self.turns[i]["formatted_request"]["messages"][-2],  # second to last is assistant message
46 |             )
47 | 
48 |         # Gather final response
49 |         agent = event.agent
50 |         if agent.messages[-1]["role"] == "assistant":  # successful invocation
51 |             tool_specs = agent.tool_registry.get_all_tool_specs()
52 |             formatted_request = agent.model.format_request(agent.messages, tool_specs, agent.system_prompt)
53 |             final_response = formatted_request["messages"][-1]
54 |             self.turns[-1]["formatted_request"]["messages"].append(final_response)
55 | 
56 |     def get_rollout_data(self) -> list:
57 |         """Return collected rollout data without computing rewards."""
58 |         return self.turns
59 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/scripts/build_docker_image_and_push_to_ecr.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -e
  3 | 
  4 | # Initialize variables
  5 | DOCKERFILE_PATH=""
  6 | DOCKER_TAG=""
  7 | 
  8 | # Parse command line arguments
  9 | while [[ $# -gt 0 ]]; do
 10 |   case $1 in
 11 |     --dockerfile=*)
 12 |       DOCKERFILE_PATH="${1#*=}"
 13 |       shift
 14 |       ;;
 15 |     --tag=*)
 16 |       DOCKER_TAG="${1#*=}"
 17 |       shift
 18 |       ;;
 19 |     *)
 20 |       echo "Unknown option: $1"
 21 |       echo "Usage: $0 --dockerfile=PATH --tag=TAG"
 22 |       echo "  --dockerfile Path to Dockerfile (required)"
 23 |       echo "  --tag        Docker image tag to use (required)"
 24 |       exit 1
 25 |       ;;
 26 |   esac
 27 | done
 28 | 
 29 | # Check if required parameters are provided
 30 | if [ -z "$DOCKERFILE_PATH" ]; then
 31 |   echo "Error: --dockerfile parameter is required"
 32 |   echo "Usage: $0 --dockerfile=PATH --tag=TAG"
 33 |   exit 1
 34 | fi
 35 | 
 36 | if [ -z "$DOCKER_TAG" ]; then
 37 |   echo "Error: --tag parameter is required"
 38 |   echo "Usage: $0 --dockerfile=PATH --tag=TAG"
 39 |   exit 1
 40 | fi
 41 | 
 42 | echo "Building and pushing Docker image to ECR"
 43 | echo "=========================================="
 44 | echo "Dockerfile: ${DOCKERFILE_PATH}"
 45 | echo "Tag:        ${DOCKER_TAG}"
 46 | echo "=========================================="
 47 | 
 48 | # Load and validate environment
 49 | if [ ! -f .env ]; then
 50 |     echo "Error: .env file not found!"
 51 |     exit 1
 52 | fi
 53 | 
 54 | # ECR_REPO_NAME, AWS_REGION, and AWS_ACCOUNT should be specified in .env
 55 | source .env
 56 | 
 57 | required_vars=("ECR_REPO_NAME" "AWS_REGION" "AWS_ACCOUNT")
 58 | for var in "${required_vars[@]}"; do
 59 |     if [ -z "${!var}" ]; then
 60 |         echo "Error: $var is not set in .env file"
 61 |         exit 1
 62 |     fi
 63 | done
 64 | 
 65 | # Ensure buildx builder exists
 66 | echo ""
 67 | echo "Checking docker buildx..."
 68 | if ! docker buildx inspect >/dev/null 2>&1; then
 69 |     echo "Creating buildx builder for multi-platform builds..."
 70 |     docker buildx create --use
 71 | else
 72 |     echo "Buildx builder already configured"
 73 | fi
 74 | 
 75 | # Create ECR repository if it doesn't exist
 76 | echo ""
 77 | echo "Checking ECR repository..."
 78 | if ! aws ecr describe-repositories --repository-names ${ECR_REPO_NAME} --region ${AWS_REGION} >/dev/null 2>&1; then
 79 |     echo "Creating ECR repository: ${ECR_REPO_NAME}"
 80 |     aws ecr create-repository --repository-name ${ECR_REPO_NAME} --region ${AWS_REGION}
 81 | else
 82 |     echo "ECR repository ${ECR_REPO_NAME} already exists"
 83 | fi
 84 | 
 85 | # Login to ECR
 86 | echo ""
 87 | echo "Logging in to ECR..."
 88 | aws ecr get-login-password --region ${AWS_REGION} | docker login --username AWS --password-stdin ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com
 89 | 
 90 | # Build and push to ECR
 91 | echo ""
 92 | echo "Building and pushing Docker image..."
 93 | echo "Image: ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO_NAME}:${DOCKER_TAG}"
 94 | # Build to arm64 as required by AgentCore runtime, which uses AWS Graviton
 95 | docker buildx build --platform linux/arm64 -f ${DOCKERFILE_PATH} -t ${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPO_NAME}:${DOCKER_TAG} --push .
 96 | 
 97 | # Verify the image
 98 | echo ""
 99 | echo "Verifying pushed image..."
100 | aws ecr describe-images --repository-name ${ECR_REPO_NAME} --region ${AWS_REGION}
101 | 
102 | echo ""
103 | echo "Successfully built and pushed image: ${ECR_REPO_NAME}:${DOCKER_TAG}"
104 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # AgentCore RL Toolkit (ART)
 3 | 
 4 | Toolkit for Seamlessly Enabling RL Training on Any Agent with Bedrock AgentCore.
 5 | 
 6 | ## Repo Structure
 7 | 
 8 | - **Main package**: `agentcore-rl-toolkit` is a thin wrapper around of [bedrock-agentcore-sdk-python](https://github.com/aws/bedrock-agentcore-sdk-python/tree/main) that allows developers to start RL training their production agent with only a few lines of code change.
 9 | - **Examples**: Located in `examples/` directory, each with their own `pyproject.toml` and dependencies. Their corresponding docker files are located in `.bedrock_agentcore`, most of which have been generated automatically (see [instructions below](#prepare-docker-file)).
10 | 
11 | 
12 | ## Start Training on Example Agents
13 | AgentCore runtime is currently supported by the following training library.
14 | - [verl](https://github.com/volcengine/verl): related [PR](https://github.com/volcengine/verl/pull/4216).
15 | 
16 | Before training, build the docker for the RL-ready application and upload to ECR. To do this, follow the steps below:
17 | 
18 | ### Setup Credentials and Environment Variables
19 | 
20 | First, make sure `aws sts get-caller-identity` returns the right identity. If not, follow the [developer guide](https://docs.aws.amazon.com/en_us/serverless-application-model/latest/developerguide/serverless-getting-started-set-up-credentials.html) to set up AWS Credentials. After setup, run `aws sts get-caller-identity` again to verify.
21 | 
22 | Next, the build script requires info related to your AWS account. Create a `.env` file from the example:
23 | 
24 | ```bash
25 | cp .env.example .env
26 | ```
27 | 
28 | Then edit `.env` and fill in your values:
29 | - `AWS_REGION`: Your AWS region (e.g., `us-west-2`)
30 | - `AWS_ACCOUNT`: Your AWS account ID
31 | - `ECR_REPO_NAME`: Your ECR repository name
32 | 
33 | ### Build and Push Docker Image
34 | 
35 | ```bash
36 | # Use examples/strands_math_agent as an example
37 | chmod +x scripts/build_docker_image_and_push_to_ecr.sh
38 | bash ./scripts/build_docker_image_and_push_to_ecr.sh --dockerfile=.bedrock_agentcore/examples_strands_math_agent_rl_app/Dockerfile --tag=dev
39 | ```
40 | 
41 | Then, go to the training library of your choice and simply provide agentcore specific config args to start training.
42 | 
43 | 
44 | ## Development
45 | 
46 | ### Installation
47 | 
48 | This project uses [uv](https://docs.astral.sh/uv/) for dependency management. Install uv if you haven't already, follow the installation [guide](https://docs.astral.sh/uv/getting-started/installation/#standalone-installer) here.
49 | 
50 | ### For Package Development
51 | 
52 | If you're developing or contributing to the `agentcore-rl-toolkit` package itself:
53 | 
54 | ```bash
55 | # Enter the repository
56 | cd agentcore-rl-toolkit
57 | 
58 | # Create and activate uv environment
59 | uv venv --python 3.13
60 | source .venv/bin/activate
61 | 
62 | # Install with development dependencies
63 | uv pip install -e ".[dev]"
64 | 
65 | # Install pre-commit hooks
66 | pre-commit install
67 | ```
68 | 
69 | Additionally, when co-developing the toolkit together with examples, add the following to the example app's docker file so that changes to the toolkit is reflected in the container.
70 | 
71 | ```bash
72 | COPY . .
73 | RUN uv pip install --force-reinstall --no-deps .
74 | ```
75 | 
76 | ### For Running Examples
77 | 
78 | Each example has its own dependencies and can be installed independently. Follow the README for specific examples there (e.g., `examples/strands_math_agent/README.md`).
79 | 
80 | ## Appendix
81 | 
82 | ### Prepare Docker file
83 | 
84 | Docker file for most examples can be automatically generated with the `agentcore` CLI. Use `examples/strands_math_agent` as an example:
85 | 
86 | ```bash
87 | agentcore configure --entrypoint examples/strands_math_agent/rl_app.py --requirements-file examples/strands_math_agent/pyproject.toml --deployment-type container --disable-memory --non-interactive
88 | ```
89 | 
90 | Make sure to run the command in project root.
91 | 
92 | ## Security
93 | 
94 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
95 | 
96 | ## License
97 | 
98 | This project is licensed under the Apache-2.0 License.
99 | 


--------------------------------------------------------------------------------
/examples/strands_math_agent/README.md:
--------------------------------------------------------------------------------
  1 | # Strands Math Agent
  2 | 
  3 | ## Installation
  4 | 
  5 | ```bash
  6 | cd examples/strands_math_agent
  7 | 
  8 | # Option A: Use the main project's venv (if already activated)
  9 | uv pip install -e .
 10 | 
 11 | # Option B: Create a separate venv for this example
 12 | uv venv --python 3.13
 13 | source .venv/bin/activate
 14 | uv pip install -e .
 15 | uv pip install -e ../../ --force-reinstall --no-deps . # install the parent repo
 16 | 
 17 | ```
 18 | 
 19 | ## Run Basic App With Bedrock API
 20 | ```bash
 21 | cd examples/strands_math_agent
 22 | 
 23 | # start the server in one terminal
 24 | python basic_app.py
 25 | 
 26 | # submit the following request in another terminal
 27 | curl -X POST http://localhost:8080/invocations \
 28 |      -H "Content-Type: application/json" \
 29 |      -d '{"prompt": "Toula went to the bakery and bought various types of pastries. She bought 3 dozen donuts which cost $68 per dozen, 2 dozen mini cupcakes which cost $80 per dozen, and 6 dozen mini cheesecakes for $55 per dozen. How much was the total cost?"}'
 30 | ```
 31 | 
 32 | ## Run Basic App Inside Docker Locally
 33 | 
 34 | ### Build Docker
 35 | 
 36 | ```bash
 37 | # Make sure you are at the project root.
 38 | cd ../../
 39 | 
 40 | # Build Docker
 41 | docker build -t math:dev --load . -f .bedrock_agentcore/examples_strands_math_agent_basic_app/Dockerfile
 42 | 
 43 | # Run Docker
 44 | # Note that we override the docker CMD to avoid cluttering error logs due to missing OTLP collector, which is not set up locally.
 45 | docker run -p 8080:8080 --env-file examples/strands_math_agent/.env math:dev python -m basic_app
 46 | 
 47 | # Submit request
 48 | curl -X POST http://localhost:8080/invocations \
 49 |      -H "Content-Type: application/json" \
 50 |      -d '{"prompt": "Toula went to the bakery and bought various types of pastries. She bought 3 dozen donuts which cost $68 per dozen, 2 dozen mini cupcakes which cost $80 per dozen, and 6 dozen mini cheesecakes for $55 per dozen. How much was the total cost?"}'
 51 | ```
 52 | 
 53 | ## Run RL App With a Local vLLM Server
 54 | ```bash
 55 | 
 56 | # Start vLLM server (assume access to GPU)
 57 | CUDA_VISIBLE_DEVICES=0 vllm serve Qwen/Qwen3-4B-Instruct-2507 --max-model-len 8192 --port 4000 --enable-auto-tool-choice --tool-call-parser hermes
 58 | 
 59 | # Create .env file from examples/strands_math_agent/.env.example
 60 | cp .env.example .env
 61 | 
 62 | # Update the following env vars in .env if needed
 63 | BASE_URL=http://localhost:4000/v1
 64 | MODEL_ID=Qwen/Qwen3-4B-Instruct-2507
 65 | 
 66 | # Submit request
 67 | # Note: the main difference between this request to RL app and that to basic app is the "_training"
 68 | # field. This field will be prepared automatically by the training framework (veRL) during RL training,
 69 | # but when we test it out locally, we will need to specify them, especially s3 bucket name and sqs url.
 70 | # You will need to create a sqs queue and s3 bucket if you don't have existing ones.
 71 | 
 72 | curl -X POST http://localhost:8080/invocations \
 73 |      -H "Content-Type: application/json" \
 74 |      -d '{
 75 |        "prompt": "Toula went to the bakery and bought various types of pastries. She bought 3 dozen donuts which cost $68 per dozen, 2 dozen mini cupcakes which cost $80 per dozen, and 6 dozen mini cheesecakes for $55 per dozen. How much was the total cost?",
 76 |        "answer": "694",
 77 |        "_training": {
 78 |          "exp_id": "test",
 79 |          "sqs_url": "https://sqs.{region}.amazonaws.com/{account_id}/{queue_name}",
 80 |          "s3_bucket": "{bucket_name}",
 81 |          "session_id": "session_123",
 82 |          "input_id": "prompt_123"
 83 |        }
 84 |      }'
 85 | ```
 86 | 
 87 | ## Run RL App Inside Docker Locally
 88 | 
 89 | ### Build Docker
 90 | ```bash
 91 | # Make sure you are at the project root.
 92 | cd ../../
 93 | 
 94 | # Build Docker
 95 | docker build -t math_rl:dev --load . -f .bedrock_agentcore/examples_strands_math_agent_rl_app/Dockerfile
 96 | 
 97 | # Run Docker
 98 | # In addition to overriding the docker CMD, we also directly use the host's network so that the agent
 99 | # can access the locally hosted model via http://localhost:4000/v1. Alternatively, replace `localhost`
100 | # with IP of your machine in BASE_URL and keep the port mapping (-p 8080:8080)
101 | docker run --network host --env-file examples/strands_math_agent/.env math_rl:dev python -m rl_app
102 | 
103 | # Submit request
104 | curl -X POST http://localhost:8080/invocations \
105 |      -H "Content-Type: application/json" \
106 |      -d '{
107 |        "prompt": "Toula went to the bakery and bought various types of pastries. She bought 3 dozen donuts which cost $68 per dozen, 2 dozen mini cupcakes which cost $80 per dozen, and 6 dozen mini cheesecakes for $55 per dozen. How much was the total cost?",
108 |        "answer": "694",
109 |        "_training": {
110 |          "exp_id": "test",
111 |          "sqs_url": "https://sqs.{region}.amazonaws.com/{account_id}/{queue_name}",
112 |          "s3_bucket": "{bucket_name}",
113 |          "session_id": "session_123",
114 |          "input_id": "prompt_123"
115 |        }
116 |      }'
117 | ```
118 | 


--------------------------------------------------------------------------------
/src/agentcore_rl_toolkit/app.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import json
  3 | import logging
  4 | import os
  5 | from abc import ABC, abstractmethod
  6 | from dataclasses import dataclass
  7 | from datetime import datetime, timezone
  8 | from functools import wraps
  9 | 
 10 | import boto3
 11 | from bedrock_agentcore.runtime import BedrockAgentCoreApp
 12 | 
 13 | 
 14 | @dataclass
 15 | class TrainingConfig:
 16 |     """Training configuration for rollout collection and storage."""
 17 | 
 18 |     exp_id: str
 19 |     session_id: str
 20 |     input_id: str
 21 |     sqs_url: str
 22 |     s3_bucket: str
 23 | 
 24 |     @classmethod
 25 |     def from_dict(cls, data: dict) -> "TrainingConfig":
 26 |         """Create TrainingConfig from dictionary with validation."""
 27 |         try:
 28 |             return cls(
 29 |                 exp_id=data["exp_id"],
 30 |                 session_id=data["session_id"],
 31 |                 input_id=data["input_id"],
 32 |                 sqs_url=data["sqs_url"],
 33 |                 s3_bucket=data["s3_bucket"],
 34 |             )
 35 |         except KeyError as e:
 36 |             raise ValueError(f"Missing required training config field: {e}") from e
 37 | 
 38 | 
 39 | class AgentCoreRLApp(BedrockAgentCoreApp, ABC):
 40 |     def __init__(self):
 41 |         super().__init__()
 42 |         self.s3_client = boto3.client("s3")
 43 |         self.sqs_client = boto3.client("sqs")
 44 | 
 45 |     @abstractmethod
 46 |     def create_openai_compatible_model(self, **kwargs):
 47 |         """Create an OpenAI-compatible model for this framework.
 48 | 
 49 |         Must be implemented by framework-specific subclasses.
 50 | 
 51 |         Args:
 52 |             **kwargs: Framework-specific model parameters
 53 | 
 54 |         Returns:
 55 |             Framework-specific model instance configured for vLLM server
 56 |         """
 57 |         pass
 58 | 
 59 |     def _get_model_config(self):
 60 |         """Get and validate model configuration from environment."""
 61 |         base_url = os.getenv("BASE_URL")
 62 |         model_id = os.getenv("MODEL_ID")
 63 | 
 64 |         if not base_url or not model_id:
 65 |             raise ValueError(
 66 |                 "Missing required environment variables: BASE_URL, MODEL_ID. " "Make sure to call load_dotenv()."
 67 |             )
 68 | 
 69 |         return base_url, model_id
 70 | 
 71 |     def _validate_and_normalize_rollout(self, rollout_dict: dict) -> dict:
 72 |         """
 73 |         Validate and normalize rollout data structure.
 74 | 
 75 |         Ensures the return value from user functions has the expected format:
 76 |         {"rollout_data": [...], "rewards": [...]}
 77 | 
 78 |         Args:
 79 |             rollout_dict: Dictionary returned from user function
 80 | 
 81 |         Returns:
 82 |             Normalized rollout dictionary with validated structure
 83 | 
 84 |         Raises:
 85 |             ValueError: If structure is invalid or rewards don't match rollout length
 86 |         """
 87 |         # Require both fields to exist
 88 |         if "rollout_data" not in rollout_dict:
 89 |             raise ValueError("Return value must include 'rollout_data' field")
 90 |         if "rewards" not in rollout_dict:
 91 |             raise ValueError("Return value must include 'rewards' field")
 92 | 
 93 |         rollout_data = rollout_dict["rollout_data"]
 94 |         rewards = rollout_dict["rewards"]
 95 | 
 96 |         # Validate rollout_data
 97 |         if not isinstance(rollout_data, list) or len(rollout_data) == 0:
 98 |             raise ValueError("rollout_data must be a list with length >= 1")
 99 | 
100 |         # Normalize rewards to list if not already
101 |         if not isinstance(rewards, list):
102 |             rewards = [rewards]
103 | 
104 |         # Validate rewards length
105 |         if len(rewards) != 1 and len(rewards) != len(rollout_data):
106 |             raise ValueError(
107 |                 f"rewards must be length 1 (outcome reward) or "
108 |                 f"match rollout_data length {len(rollout_data)} (per-step reward)"
109 |             )
110 | 
111 |         # Update with normalized rewards
112 |         rollout_dict["rewards"] = rewards
113 |         return rollout_dict
114 | 
115 |     def save_rollout_and_notify(self, rollout_data: dict, training_config: dict):
116 |         """
117 |         Save rollout data to S3 and notify SQS queue.
118 | 
119 |         Args:
120 |             rollout_data: The prepared rollout data
121 |             training_config: Training configuration dict containing:
122 |                 - s3_bucket: S3 bucket name
123 |                 - sqs_url: SQS queue URL for notifications
124 |                 - exp_id: Experiment ID for organizing data
125 |                 - session_id: Session id for the current task
126 |                 - input_id: id for discriminating different input data examples
127 |         """
128 |         # Validate and extract training configuration
129 |         try:
130 |             config = TrainingConfig.from_dict(training_config)
131 |         except ValueError as e:
132 |             logging.error(f"Invalid training configuration: {e}")
133 |             raise
134 | 
135 |         result_key = f"{config.exp_id}/{config.input_id}_{config.session_id}.json"
136 | 
137 |         if "status_code" not in rollout_data:
138 |             rollout_data["status_code"] = 200
139 | 
140 |         if "stop_reason" not in rollout_data:
141 |             rollout_data["stop_reason"] = "end_turn"
142 | 
143 |         # Return the input id identifying rollouts of the same input data (prompt) example
144 |         # for advantage computation.
145 |         rollout_data["input_id"] = config.input_id
146 | 
147 |         # Save to S3
148 |         try:
149 |             self.s3_client.put_object(
150 |                 Bucket=config.s3_bucket,
151 |                 Key=result_key,
152 |                 Body=json.dumps(rollout_data, indent=2),
153 |                 ContentType="application/json",
154 |             )
155 |             logging.info(f"Stored complete results at {result_key}")
156 |         except Exception as e:
157 |             logging.error(f"Failed to store results in S3: {e}")
158 |             raise
159 | 
160 |         # Send SQS notification (mimic S3 notification format)
161 |         try:
162 |             sqs_message = {
163 |                 "Records": [
164 |                     {
165 |                         "eventSource": "rollout:collector",
166 |                         "eventName": "ObjectCreated:Put",
167 |                         "eventTime": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
168 |                         "s3": {"bucket": {"name": config.s3_bucket}, "object": {"key": result_key}},
169 |                     }
170 |                 ]
171 |             }
172 | 
173 |             self.sqs_client.send_message(QueueUrl=config.sqs_url, MessageBody=json.dumps(sqs_message))
174 |             logging.info(f"Sent SQS notification for {result_key}")
175 |         except Exception as e:
176 |             logging.error(f"Failed to send SQS notification for {result_key}: {e}")
177 |             raise
178 | 
179 |     def rollout_entrypoint(self, func):
180 |         """
181 |         Decorator for RL training that handles asyncio.create_task and rollout saving automatically.
182 | 
183 |         This decorator:
184 |         1. Handles both sync and async user functions using BedrockAgentCoreApp's infrastructure
185 |         2. Automatically saves rollout data when user returns it
186 |         3. Handles errors and saves error rollouts for client notification
187 |         4. Returns immediately with {"status": "processing"} for non-blocking behavior
188 | 
189 |         Usage:
190 |             @app.rollout_entrypoint
191 |             def invoke_agent(payload, context):  # Can be sync or async
192 |                 # Framework-specific rollout collection
193 |                 rollout_data = collect_rollout(...)
194 |                 return rollout_data  # Automatically saved!
195 | 
196 |         Args:
197 |             func: The user function that handles agent logic and rollout collection
198 | 
199 |         Returns:
200 |             Decorated function registered as entrypoint
201 |         """
202 | 
203 |         async def rollout_background_task(payload, context):
204 |             """Background task that does the actual agent work and rollout saving."""
205 |             training_config = payload.get("_training")
206 | 
207 |             # Register with async task tracking system for logging and ping status
208 |             task_id = self.add_async_task(f"{func.__name__}")
209 | 
210 |             try:
211 |                 # Use BedrockAgentCoreApp's _invoke_handler for sync/async compatibility
212 |                 # This automatically runs sync functions in thread pool to avoid blocking
213 |                 result = await self._invoke_handler(func, context, self._takes_context(func), payload)
214 | 
215 |                 # If this is an RL training run, validate and normalize the rollout structure
216 |                 if training_config:
217 |                     if not isinstance(result, dict):
218 |                         raise ValueError("RL training runs must return a dictionary")
219 |                     result = self._validate_and_normalize_rollout(result)
220 | 
221 |                 # Save rollout data if we have training config
222 |                 if isinstance(result, dict) and training_config:
223 |                     self.save_rollout_and_notify(rollout_data=result, training_config=training_config)
224 |                     logging.info(f"Rollout data saved for function: {func.__name__}")
225 | 
226 |                 return result
227 | 
228 |             except Exception as e:
229 |                 # Always save error rollout for client notification
230 |                 if training_config:
231 |                     error_rollout = {"status_code": 500, "stop_reason": str(e)}
232 |                     self.save_rollout_and_notify(rollout_data=error_rollout, training_config=training_config)
233 |                     logging.error(f"Error rollout saved for function: {func.__name__}: {e}")
234 |                 raise
235 |             finally:
236 |                 # Complete the async task for logging and ping status
237 |                 self.complete_async_task(task_id)
238 | 
239 |         @wraps(func)
240 |         async def rollout_entrypoint_wrapper(payload, context):
241 |             """Entrypoint that starts background task and returns immediately."""
242 |             # Start background task without waiting
243 |             asyncio.create_task(rollout_background_task(payload, context))
244 |             return {"status": "processing"}
245 | 
246 |         # Register using existing BedrockAgentCoreApp entrypoint infrastructure
247 |         return self.entrypoint(rollout_entrypoint_wrapper)
248 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 


--------------------------------------------------------------------------------