├── .gitignore ├── sgr-agent-store ├── .gitignore ├── requirements.txt ├── README.md ├── main.py └── store_agent.py ├── sgr-agent-erc3 ├── .gitignore ├── requirements.txt ├── README.md ├── main.py └── agent.py ├── sgr-agent-erc3-prod ├── .gitignore ├── requirements.txt ├── main.py ├── lib.py ├── README.md └── agent.py ├── sgr-agent-erc3-test ├── .gitignore ├── requirements.txt ├── lib.py ├── main.py ├── README.md └── agent.py ├── res ├── log.png ├── console.png └── session.png └── README.MD /.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | .envrc 3 | -------------------------------------------------------------------------------- /sgr-agent-store/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | -------------------------------------------------------------------------------- /sgr-agent-erc3/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | venv 3 | -------------------------------------------------------------------------------- /sgr-agent-erc3-prod/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | venv 3 | *.json 4 | -------------------------------------------------------------------------------- /sgr-agent-erc3-test/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | venv 3 | *.json 4 | -------------------------------------------------------------------------------- /res/log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trustbit/erc3-agents/HEAD/res/log.png -------------------------------------------------------------------------------- /res/console.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trustbit/erc3-agents/HEAD/res/console.png -------------------------------------------------------------------------------- /res/session.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trustbit/erc3-agents/HEAD/res/session.png -------------------------------------------------------------------------------- /sgr-agent-erc3/requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url https://erc.timetoact-group.at/ 2 | erc3>=1.2.0 3 | openai>=2.8.1 4 | -------------------------------------------------------------------------------- /sgr-agent-store/requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url https://erc.timetoact-group.at/ 2 | erc3>=1.2.0 3 | openai>=2.8.1 4 | -------------------------------------------------------------------------------- /sgr-agent-erc3-prod/requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url https://erc.timetoact-group.at/ 2 | erc3>=1.2.0 3 | openai>=2.8.1 4 | -------------------------------------------------------------------------------- /sgr-agent-erc3-test/requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url https://erc.timetoact-group.at/ 2 | erc3>=1.2.0 3 | openai>=2.8.1 4 | -------------------------------------------------------------------------------- /sgr-agent-store/README.md: -------------------------------------------------------------------------------- 1 | # Sample Agent for STORE benchmark at ERC platform 2 | 3 | This agent demonstrates how to build a simple chatbot capable of automating processes in an online store. It is not designed to be state of the art, but rather be something readable and compact, to show how to get started. 4 | 5 | 6 | Check out [STORE benchmark](https://erc.timetoact-group.at/benchmarks/store) for the leaderboard and more information about the benchmark. Check out [SDK README.md](../README.MD) for more details about this project and SDK 7 | 8 | This agent doesn't use any external libraries aside from OpenAI SDK and ERC3 SDK. Files: 9 | 10 | - [requirements.txt](requirements.txt) - dependencies. 11 | - [main.py](main.py) - entry point that connects to the ERC platform and gets a list of tasks 12 | - [store_agent.py](store_agent.py) - agent itself. It uses [Schema-Guided Reasoning](https://abdullin.com/schema-guided-reasoning/) and is based on simple [SGR NextStep architecture](https://abdullin.com/schema-guided-reasoning/demo) 13 | -------------------------------------------------------------------------------- /sgr-agent-erc3/README.md: -------------------------------------------------------------------------------- 1 | # Sample Agent for ERC3 benchmark at ERC platform 2 | 3 | This agent demonstrates how to build a simple chatbot capable of automating enterprise APIs in a complex company. It is not designed to be state of the art, but rather something readable and compact. 4 | 5 | Read [Project README.md](../README.MD) for more details about this repository. Benchmarks and their leaderboards: 6 | 7 | - [ERC3-DEV](https://erc.timetoact-group.at/benchmarks/erc3-dev) - get started with this one 8 | - [ERC3-TEST](https://erc.timetoact-group.at/benchmarks/erc3-test) - more complex, includes subtle changes in companies 9 | - ERC3-PROD - Coming soon, December 9th! 10 | 11 | This agent doesn't use any external libraries aside from OpenAI SDK and ERC3 SDK. Files: 12 | 13 | - [requirements.txt](requirements.txt) - dependencies. 14 | - [main.py](main.py) - entry point that connects to the ERC platform and gets a list of tasks 15 | - [agent.py](agent.py) - agent itself. It uses [Schema-Guided Reasoning](https://abdullin.com/schema-guided-reasoning/) and is based on simple [SGR NextStep architecture](https://abdullin.com/schema-guided-reasoning/demo) 16 | -------------------------------------------------------------------------------- /sgr-agent-store/main.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | from openai import OpenAI 3 | from store_agent import run_agent 4 | from erc3 import ERC3 5 | 6 | client = OpenAI() 7 | core = ERC3() 8 | MODEL_ID = "gpt-4o" 9 | 10 | # Start session with metadata 11 | res = core.start_session( 12 | benchmark="store", 13 | workspace="my", 14 | name=f"Simple SGR Agent ({MODEL_ID})", 15 | architecture="NextStep SGR Agent with OpenAI", 16 | # can also set to compete_budget, compete_speed and/or compete_local 17 | flags=["compete_accuracy"] 18 | 19 | ) 20 | 21 | status = core.session_status(res.session_id) 22 | print(f"Session has {len(status.tasks)} tasks") 23 | 24 | for task in status.tasks: 25 | print("="*40) 26 | print(f"Starting Task: {task.task_id} ({task.spec_id}): {task.task_text}") 27 | # start the task 28 | core.start_task(task) 29 | try: 30 | run_agent(MODEL_ID, core, task) 31 | except Exception as e: 32 | print(e) 33 | result = core.complete_task(task) 34 | if result.eval: 35 | explain = textwrap.indent(result.eval.logs, " ") 36 | print(f"\nSCORE: {result.eval.score}\n{explain}\n") 37 | 38 | core.submit_session(res.session_id) 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /sgr-agent-erc3/main.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | from openai import OpenAI 3 | from agent import run_agent 4 | from erc3 import ERC3 5 | 6 | client = OpenAI() 7 | core = ERC3() 8 | MODEL_ID = "gpt-4o" 9 | 10 | # Start session with metadata 11 | res = core.start_session( 12 | benchmark="erc3-dev", 13 | workspace="my", 14 | name=f"NextStep SGR Agent ({MODEL_ID}) from ERC3 Samples", 15 | architecture="NextStep SGR Agent with OpenAI", 16 | # can also set to compete_budget, compete_speed and/or compete_local 17 | flags=["compete_accuracy"] 18 | ) 19 | 20 | status = core.session_status(res.session_id) 21 | print(f"Session has {len(status.tasks)} tasks") 22 | 23 | for task in status.tasks: 24 | print("="*40) 25 | print(f"Starting Task: {task.task_id} ({task.spec_id}): {task.task_text}") 26 | # start the task 27 | core.start_task(task) 28 | try: 29 | run_agent(MODEL_ID, core, task) 30 | except Exception as e: 31 | print(e) 32 | result = core.complete_task(task) 33 | if result.eval: 34 | explain = textwrap.indent(result.eval.logs, " ") 35 | print(f"\nSCORE: {result.eval.score}\n{explain}\n") 36 | 37 | core.submit_session(res.session_id) 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /sgr-agent-erc3-prod/main.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | 3 | from openai import OpenAI 4 | from agent import run_agent 5 | from erc3 import ERC3 6 | 7 | core = ERC3() 8 | MODEL_ID = "gpt-4.1" 9 | 10 | # Debugging a single task 11 | # task = core.start_new_task("erc3-test", "project_check_by_member") 12 | #run_agent(MODEL_ID, core, task) 13 | 14 | # Start session with metadata 15 | res = core.start_session( 16 | benchmark="erc3-prod", 17 | workspace="my", 18 | name=f"NextStep SGR ({MODEL_ID}) from ERC3 Samples +pipelined", 19 | architecture="NextStep SGR Agent with OpenAI", 20 | # can also set to compete_budget, compete_speed and/or compete_local 21 | flags=["compete_accuracy"] 22 | ) 23 | 24 | status = core.session_status(res.session_id) 25 | print(f"Session has {len(status.tasks)} tasks") 26 | 27 | for task in status.tasks: 28 | print("="*40) 29 | print(f"Starting Task: {task.task_id} ({task.spec_id}): {task.task_text}") 30 | # start the task 31 | core.start_task(task) 32 | try: 33 | run_agent(MODEL_ID, core, task) 34 | except Exception as e: 35 | print(e) 36 | result = core.complete_task(task) 37 | if result.eval: 38 | explain = textwrap.indent(result.eval.logs, " ") 39 | print(f"\nSCORE: {result.eval.score}\n{explain}\n") 40 | 41 | core.submit_session(res.session_id) 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /sgr-agent-erc3-test/lib.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import List, Type, TypeVar 3 | 4 | from erc3 import ERC3, TaskInfo 5 | from openai import OpenAI 6 | from pydantic import BaseModel 7 | 8 | T = TypeVar('T', bound=BaseModel) 9 | 10 | class MyLLM: 11 | client: OpenAI 12 | api: ERC3 13 | task: TaskInfo 14 | model: str 15 | max_tokens: int 16 | 17 | def __init__(self, api: ERC3, model:str, task: TaskInfo, max_tokens=40000) -> None: 18 | self.api = api 19 | self.model = model 20 | self.task = task 21 | self.max_tokens = max_tokens 22 | self.client = OpenAI() 23 | 24 | 25 | def query(self, messages: List, response_format: Type[T], model: str = None) -> T: 26 | 27 | started = time.time() 28 | resp = self.client.beta.chat.completions.parse(messages=messages, model=model or self.model, response_format=response_format, max_completion_tokens=self.max_tokens) 29 | 30 | self.api.log_llm( 31 | task_id=self.task.task_id, 32 | model=model or self.model, 33 | duration_sec=time.time() - started, 34 | completion=resp.choices[0].message.content, 35 | prompt_tokens=resp.usage.prompt_tokens, 36 | completion_tokens=resp.usage.completion_tokens, 37 | cached_prompt_tokens=resp.usage.prompt_tokens_details.cached_tokens, 38 | ) 39 | 40 | return resp.choices[0].message.parsed 41 | -------------------------------------------------------------------------------- /sgr-agent-erc3-prod/lib.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import List, Type, TypeVar 3 | 4 | from erc3 import ERC3, TaskInfo 5 | from openai import OpenAI 6 | from pydantic import BaseModel 7 | 8 | T = TypeVar('T', bound=BaseModel) 9 | 10 | class MyLLM: 11 | client: OpenAI 12 | api: ERC3 13 | task: TaskInfo 14 | model: str 15 | max_tokens: int 16 | 17 | def __init__(self, api: ERC3, model:str, task: TaskInfo, max_tokens=40000) -> None: 18 | self.api = api 19 | self.model = model 20 | self.task = task 21 | self.max_tokens = max_tokens 22 | self.client = OpenAI(timeout=30) 23 | 24 | 25 | def query(self, messages: List, response_format: Type[T], model: str = None) -> T: 26 | 27 | started = time.time() 28 | resp = self.client.beta.chat.completions.parse(messages=messages, model=model or self.model, response_format=response_format, max_completion_tokens=self.max_tokens) 29 | 30 | self.api.log_llm( 31 | task_id=self.task.task_id, 32 | model=model or self.model, 33 | duration_sec=time.time() - started, 34 | completion=resp.choices[0].message.content, 35 | prompt_tokens=resp.usage.prompt_tokens, 36 | completion_tokens=resp.usage.completion_tokens, 37 | cached_prompt_tokens=resp.usage.prompt_tokens_details.cached_tokens, 38 | ) 39 | 40 | return resp.choices[0].message.parsed 41 | -------------------------------------------------------------------------------- /sgr-agent-erc3-test/main.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | 3 | from openai import OpenAI 4 | from agent import run_agent 5 | from erc3 import ERC3 6 | 7 | client = OpenAI() 8 | core = ERC3() 9 | MODEL_ID = "gpt-4.1" 10 | 11 | 12 | # Debugging a single task 13 | # task = core.start_new_task("erc3-test", "project_check_by_member") 14 | #run_agent(MODEL_ID, core, task) 15 | 16 | 17 | 18 | # Start session with metadata 19 | res = core.start_session( 20 | benchmark="erc3-test", 21 | workspace="my", 22 | name=f"NextStep SGR ({MODEL_ID}) from ERC3 Samples +pipelined", 23 | architecture="NextStep SGR Agent with OpenAI", 24 | # can also set to compete_budget, compete_speed and/or compete_local 25 | flags=["compete_accuracy"] 26 | ) 27 | 28 | status = core.session_status(res.session_id) 29 | print(f"Session has {len(status.tasks)} tasks") 30 | 31 | for task in status.tasks: 32 | print("="*40) 33 | print(f"Starting Task: {task.task_id} ({task.spec_id}): {task.task_text}") 34 | # start the task 35 | core.start_task(task) 36 | try: 37 | run_agent(MODEL_ID, core, task) 38 | except Exception as e: 39 | print(e) 40 | result = core.complete_task(task) 41 | if result.eval: 42 | explain = textwrap.indent(result.eval.logs, " ") 43 | print(f"\nSCORE: {result.eval.score}\n{explain}\n") 44 | 45 | core.submit_session(res.session_id) 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /sgr-agent-erc3-test/README.md: -------------------------------------------------------------------------------- 1 | # Sample Agent for ERC3-TEST benchmark at ERC platform 2 | 3 | This agent demonstrates how to build a simple chatbot capable of automating enterprise APIs in a complex company. It is not designed to be state of the art, but rather something readable and compact. 4 | 5 | It is an extended version of an agent from ERC3-DEV benchmark it adds: 6 | 7 | 1. Automatic detection of changed knowledge base (wiki) and rule distillation. 8 | 2. Preflight checks to short-cut security violations 9 | 3. Demo of adding custom tools and custom tool handlers 10 | 11 | So the pipeline looks like this (each step uses Schema-Guided Reasoning): 12 | 13 | 1. Extract rules from the knowledge base (reuse cache) 14 | 2. Check request for obvious security violations 15 | 3. Start NextStep SGR agent 16 | 17 | 18 | Read [Project README.md](../README.MD) for more details about this repository. Benchmarks and their leaderboards: 19 | 20 | - [ERC3-DEV](https://erc.timetoact-group.at/benchmarks/erc3-dev) - get started with this one 21 | - [ERC3-TEST](https://erc.timetoact-group.at/benchmarks/erc3-test) - more complex, includes subtle changes in companies 22 | - ERC3-PROD - Coming soon, December 9th! 23 | 24 | This agent doesn't use any external libraries aside from OpenAI SDK and ERC3 SDK. Files: 25 | 26 | - [requirements.txt](requirements.txt) - dependencies. 27 | - [main.py](main.py) - entry point that connects to the ERC platform and gets a list of tasks 28 | - [agent.py](agent.py) - agent itself. It uses [Schema-Guided Reasoning](https://abdullin.com/schema-guided-reasoning/) and is based on simple [SGR NextStep architecture](https://abdullin.com/schema-guided-reasoning/demo) 29 | -------------------------------------------------------------------------------- /sgr-agent-erc3-prod/README.md: -------------------------------------------------------------------------------- 1 | # Sample Agent for ERC3-PROD benchmark at ERC platform 2 | 3 | This agent demonstrates how to build a simple chatbot capable of automating enterprise APIs in a complex company. It is not designed to be state of the art, but rather something readable and compact. 4 | 5 | It is an extended version of an agent from ERC3-DEV benchmark it adds: 6 | 7 | 1. Automatic detection of changed knowledge base (wiki) and rule distillation. 8 | 2. Preflight checks to short-cut security violations 9 | 3. Demo of adding custom tools and custom tool handlers 10 | 11 | So the pipeline looks like this (each step uses Schema-Guided Reasoning): 12 | 13 | 1. Extract rules from the knowledge base (reuse cache) 14 | 2. Check request for obvious security violations 15 | 3. Start NextStep SGR agent 16 | 17 | 18 | Read [Project README.md](../README.MD) for more details about this repository. Benchmarks and their leaderboards: 19 | 20 | - [ERC3-DEV](https://erc.timetoact-group.at/benchmarks/erc3-dev) - get started with this one 21 | - [ERC3-TEST](https://erc.timetoact-group.at/benchmarks/erc3-test) - more complex, includes subtle changes in companies 22 | - [ERC3-PROD](https://erc.timetoact-group.at/benchmarks/erc3-prod) - the real challenge 23 | 24 | This agent doesn't use any external libraries aside from OpenAI SDK and ERC3 SDK. Files: 25 | 26 | - [requirements.txt](requirements.txt) - dependencies. 27 | - [main.py](main.py) - entry point that connects to the ERC platform and gets a list of tasks 28 | - [agent.py](agent.py) - agent itself. It uses [Schema-Guided Reasoning](https://abdullin.com/schema-guided-reasoning/) and is based on simple [SGR NextStep architecture](https://abdullin.com/schema-guided-reasoning/demo) 29 | -------------------------------------------------------------------------------- /README.MD: -------------------------------------------------------------------------------- 1 | # ERC3 Sample Agents 2 | 3 | Sample agents demonstrating how to participate in the ERC3: AI Agents in Action competition. 4 | 5 | ## Getting Started 6 | 7 | ### 1. Get Your API Key 8 | 9 | To use these agents, you'll need an ERC3 API key: 10 | 11 | 1. Visit https://erc.timetoact-group.at/ 12 | 2. Enter the email address you used during registration 13 | 3. Your API key will be displayed 14 | 15 | Note: If you haven't registered yet, https://www.timetoact-group.at/events/enterprise-rag-challenge-part-3 and allow 24 hours for your 16 | registration to be processed. 17 | 18 | ### 2. Prerequisites 19 | 20 | All agents require: 21 | - ERC3 SDK - for connecting to the platform and accessing benchmarks 22 | - ERC3_API_KEY - your competition API key 23 | - LLM API Key - such as OPENAI_API_KEY or equivalent (depending on the agent) 24 | 25 | ## Running an Agent 26 | 27 | Here's an example of running the sgr-agent-store (a simple agent that solves the store benchmark): 28 | 29 | ``` 30 | # Set up your environment variables 31 | export OPENAI_API_KEY=sk-... 32 | export ERC3_API_KEY=key-... 33 | 34 | # Navigate to the agent directory 35 | cd sgr-agent-store 36 | 37 | # Activate your virtual environment (optional but recommended) 38 | # python3 -m venv venv 39 | # source venv/bin/activate # On Windows: venv\Scripts\activate 40 | 41 | # Install dependencies 42 | pip install -r requirements.txt 43 | 44 | # Run the agent 45 | python3 main.py 46 | ``` 47 | 48 | this should look like this: 49 | 50 | ![res/console.png](res/console.png) 51 | 52 | you can also go to the [Web UI]() and view your running session there: 53 | 54 | ![res/session.png](res/session.png) 55 | 56 | Task execution view allows to see interactions of agent with the Benchmark in more detail: 57 | 58 | ![res/log.png](res/log.png) 59 | 60 | 61 | 62 | # Available Agents 63 | 64 | - sgr-agent-store - A simple agent implementation for [STORE benchmark](https://erc.timetoact-group.at/benchmarks/store). It relies on [Schema-Guided Reasoning](https://abdullin.com/schema-guided-reasoning/) to provide adaptive thinking capabilities with a single recursive prompt and gpt-4o. 65 | - sgr-agent-erc3 - A simple [SGR](https://abdullin.com/schema-guided-reasoning/) NextStep agent for [ERC3-DEV benchmark](https://erc.timetoact-group.at/benchmarks/erc3-dev). 66 | 67 | # Resources 68 | 69 | - https://www.timetoact-group.at/events/enterprise-rag-challenge-part-3 70 | - https://erc.timetoact-group.at/ 71 | 72 | # Support 73 | 74 | You can ask questions in the discord channel (you get a link to that with the registration email) 75 | 76 | -------------------------------------------------------------------------------- /sgr-agent-store/store_agent.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import Annotated, List, Union, Literal 3 | from annotated_types import MaxLen, MinLen 4 | from pydantic import BaseModel, Field 5 | from erc3 import store, ApiException, TaskInfo, ERC3 6 | from openai import OpenAI 7 | 8 | client = OpenAI() 9 | 10 | class ReportTaskCompletion(BaseModel): 11 | tool: Literal["report_completion"] 12 | completed_steps_laconic: List[str] 13 | code: Literal["completed", "failed"] 14 | 15 | class NextStep(BaseModel): 16 | current_state: str 17 | # we'll use only the first step, discarding all the rest. 18 | plan_remaining_steps_brief: Annotated[List[str], MinLen(1), MaxLen(5)] = Field(..., description="explain your thoughts on how to accomplish - what steps to execute") 19 | # now let's continue the cascade and check with LLM if the task is done 20 | task_completed: bool 21 | # Routing to one of the tools to execute the first remaining step 22 | # if task is completed, model will pick ReportTaskCompletion 23 | function: Union[ 24 | ReportTaskCompletion, 25 | store.Req_ListProducts, 26 | store.Req_ViewBasket, 27 | store.Req_ApplyCoupon, 28 | store.Req_RemoveCoupon, 29 | store.Req_AddProductToBasket, 30 | store.Req_RemoveItemFromBasket, 31 | store.Req_CheckoutBasket, 32 | ] = Field(..., description="execute first remaining step") 33 | 34 | system_prompt = """ 35 | You are a business assistant helping customers of OnlineStore. 36 | 37 | - Clearly report when tasks are done. 38 | - If ListProducts returns non-zero "NextOffset", it means there are more products available. 39 | - You can apply coupon codes to get discounts. Use ViewBasket to see current discount and total. 40 | - Only one coupon can be applied at a time. Apply a new coupon to replace the current one, or remove it explicitly. 41 | """ 42 | 43 | CLI_RED = "\x1B[31m" 44 | CLI_GREEN = "\x1B[32m" 45 | CLI_CLR = "\x1B[0m" 46 | 47 | def run_agent(model: str, api: ERC3, task: TaskInfo): 48 | 49 | store_api = api.get_store_client(task) 50 | 51 | # log will contain conversation context for the agent within task 52 | log = [ 53 | {"role": "system", "content": system_prompt}, 54 | {"role": "user", "content": task.task_text}, 55 | ] 56 | 57 | # let's limit number of reasoning steps by 20, just to be safe 58 | for i in range(30): 59 | step = f"step_{i + 1}" 60 | print(f"Next {step}... ", end="") 61 | 62 | started = time.time() 63 | 64 | resp = client.beta.chat.completions.parse( 65 | model=model, 66 | response_format=NextStep, 67 | messages=log, 68 | max_completion_tokens=16384, 69 | ) 70 | 71 | api.log_llm( 72 | task_id=task.task_id, 73 | model=model, # should match model slug from OpenRouter 74 | duration_sec=time.time() - started, 75 | completion=resp.choices[0].message.content, 76 | prompt_tokens=resp.usage.prompt_tokens, 77 | completion_tokens=resp.usage.completion_tokens, 78 | cached_prompt_tokens=resp.usage.prompt_tokens_details.cached_tokens, 79 | ) 80 | 81 | job = resp.choices[0].message.parsed 82 | 83 | # if SGR wants to finish, then quit loop 84 | if isinstance(job.function, ReportTaskCompletion): 85 | print(f"[blue]agent {job.function.code}[/blue]. Summary:") 86 | for s in job.function.completed_steps_laconic: 87 | print(f"- {s}") 88 | break 89 | 90 | # print next sep for debugging 91 | print(job.plan_remaining_steps_brief[0], f"\n {job.function}") 92 | 93 | # Let's add tool request to conversation history as if OpenAI asked for it. 94 | # a shorter way would be to just append `job.model_dump_json()` entirely 95 | log.append({ 96 | "role": "assistant", 97 | "content": job.plan_remaining_steps_brief[0], 98 | "tool_calls": [{ 99 | "type": "function", 100 | "id": step, 101 | "function": { 102 | "name": job.function.__class__.__name__, 103 | "arguments": job.function.model_dump_json(), 104 | }}] 105 | }) 106 | 107 | # now execute the tool by dispatching command to our handler 108 | try: 109 | result = store_api.dispatch(job.function) 110 | txt = result.model_dump_json(exclude_none=True, exclude_unset=True) 111 | print(f"{CLI_GREEN}OUT{CLI_CLR}: {txt}") 112 | except ApiException as e: 113 | txt = e.detail 114 | # print to console as ascii red 115 | print(f"{CLI_RED}ERR: {e.api_error.error}{CLI_CLR}") 116 | 117 | # and now we add results back to the convesation history, so that agent 118 | # we'll be able to act on the results in the next reasoning step. 119 | log.append({"role": "tool", "content": txt, "tool_call_id": step}) 120 | -------------------------------------------------------------------------------- /sgr-agent-erc3/agent.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import Annotated, List, Union, Literal 3 | from annotated_types import MaxLen, MinLen 4 | from pydantic import BaseModel, Field 5 | from erc3 import erc3 as dev, ApiException, TaskInfo, ERC3 6 | from openai import OpenAI 7 | 8 | client = OpenAI() 9 | 10 | class NextStep(BaseModel): 11 | current_state: str 12 | # we'll use only the first step, discarding all the rest. 13 | plan_remaining_steps_brief: Annotated[List[str], MinLen(1), MaxLen(5)] = Field(..., description="explain your thoughts on how to accomplish - what steps to execute") 14 | # now let's continue the cascade and check with LLM if the task is done 15 | task_completed: bool 16 | # Routing to one of the tools to execute the first remaining step 17 | # if task is completed, model will pick ReportTaskCompletion 18 | function: Union[ 19 | dev.Req_ProvideAgentResponse, 20 | dev.Req_ListProjects, 21 | dev.Req_ListEmployees, 22 | dev.Req_ListCustomers, 23 | dev.Req_GetCustomer, 24 | dev.Req_GetEmployee, 25 | dev.Req_GetProject, 26 | dev.Req_GetTimeEntry, 27 | dev.Req_SearchProjects, 28 | dev.Req_SearchEmployees, 29 | dev.Req_LogTimeEntry, 30 | dev.Req_SearchTimeEntries, 31 | dev.Req_SearchCustomers, 32 | dev.Req_UpdateTimeEntry, 33 | dev.Req_UpdateProjectTeam, 34 | dev.Req_UpdateProjectStatus, 35 | dev.Req_UpdateEmployeeInfo, 36 | dev.Req_TimeSummaryByProject, 37 | dev.Req_TimeSummaryByEmployee, 38 | ] = Field(..., description="execute first remaining step") 39 | 40 | 41 | 42 | CLI_RED = "\x1B[31m" 43 | CLI_GREEN = "\x1B[32m" 44 | CLI_BLUE = "\x1B[34m" 45 | CLI_CLR = "\x1B[0m" 46 | 47 | def run_agent(model: str, api: ERC3, task: TaskInfo): 48 | 49 | store_api = api.get_erc_client(task) 50 | about = store_api.who_am_i() 51 | 52 | system_prompt = f""" 53 | You are a business assistant helping customers of Aetherion. 54 | 55 | When interacting with Aetherion's internal systems, always operate strictly within the user's access level (Executives have broad access, project leads can write with the projects they lead, team members can read). For guests (public access, no user account) respond exclusively with public-safe data, refuse sensitive queries politely, and never reveal internal details or identities. Responses must always include a clear outcome status and explicit entity links. 56 | 57 | To confirm project access - get or find project (and get after finding) 58 | When updating entry - fill all fields to keep with old values from being erased 59 | When task is done or can't be done - Req_ProvideAgentResponse. 60 | 61 | # Current user info: 62 | {about.model_dump_json()} 63 | """ 64 | if about.current_user: 65 | usr = store_api.get_employee(about.current_user) 66 | system_prompt += f"\n{usr.model_dump_json()}" 67 | 68 | # log will contain conversation context for the agent within task 69 | log = [ 70 | {"role": "system", "content": system_prompt}, 71 | {"role": "user", "content": task.task_text}, 72 | ] 73 | 74 | # let's limit number of reasoning steps by 20, just to be safe 75 | for i in range(20): 76 | step = f"step_{i + 1}" 77 | print(f"Next {step}... ", end="") 78 | 79 | started = time.time() 80 | 81 | resp = client.beta.chat.completions.parse( 82 | model=model, 83 | response_format=NextStep, 84 | messages=log, 85 | max_completion_tokens=16384, 86 | ) 87 | 88 | api.log_llm( 89 | task_id=task.task_id, 90 | model=model, # should match model slug from OpenRouter 91 | duration_sec=time.time() - started, 92 | completion=resp.choices[0].message.content, 93 | prompt_tokens=resp.usage.prompt_tokens, 94 | completion_tokens=resp.usage.completion_tokens, 95 | cached_prompt_tokens=resp.usage.prompt_tokens_details.cached_tokens, 96 | ) 97 | 98 | job = resp.choices[0].message.parsed 99 | 100 | # print next sep for debugging 101 | print(job.plan_remaining_steps_brief[0], f"\n {job.function}") 102 | 103 | # Let's add tool request to conversation history as if OpenAI asked for it. 104 | # a shorter way would be to just append `job.model_dump_json()` entirely 105 | log.append({ 106 | "role": "assistant", 107 | "content": job.plan_remaining_steps_brief[0], 108 | "tool_calls": [{ 109 | "type": "function", 110 | "id": step, 111 | "function": { 112 | "name": job.function.__class__.__name__, 113 | "arguments": job.function.model_dump_json(), 114 | }}] 115 | }) 116 | 117 | # now execute the tool by dispatching command to our handler 118 | try: 119 | result = store_api.dispatch(job.function) 120 | txt = result.model_dump_json(exclude_none=True, exclude_unset=True) 121 | print(f"{CLI_GREEN}OUT{CLI_CLR}: {txt}") 122 | except ApiException as e: 123 | txt = e.detail 124 | # print to console as ascii red 125 | print(f"{CLI_RED}ERR: {e.api_error.error}{CLI_CLR}") 126 | 127 | # if SGR wants to finish, then quit loop 128 | if isinstance(job.function, dev.Req_ProvideAgentResponse): 129 | print(f"{CLI_BLUE}agent {job.function.outcome}{CLI_CLR}. Summary:\n{job.function.message}") 130 | 131 | for link in job.function.links: 132 | print(f" - link {link.kind}: {link.id}") 133 | 134 | break 135 | 136 | # and now we add results back to the convesation history, so that agent 137 | # we'll be able to act on the results in the next reasoning step. 138 | log.append({"role": "tool", "content": txt, "tool_call_id": step}) 139 | -------------------------------------------------------------------------------- /sgr-agent-erc3-prod/agent.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import Annotated, List, Union, Literal, Optional 4 | from annotated_types import MaxLen, MinLen, Gt, Lt 5 | from erc3.erc3 import ProjectDetail 6 | from pydantic import BaseModel, Field 7 | from erc3 import erc3 as dev, ApiException, TaskInfo, ERC3, Erc3Client 8 | 9 | from lib import MyLLM 10 | 11 | # this is how you can add custom tools that work slightly better 12 | class Req_DeleteWikiPage(BaseModel): 13 | tool: Literal["/wiki/delete"] = "/wiki/delete" 14 | file: str 15 | changed_by: Optional[dev.EmployeeID] = None 16 | 17 | class Req_ListAllProjectsForUser(BaseModel): 18 | tool: Literal["/all-projects-for-user"] = "/all-projects-for-user" 19 | user: dev.EmployeeID 20 | 21 | class Resp_ListAllProjectsForUser(BaseModel): 22 | lead_in: List[ProjectDetail] 23 | member_of: List[ProjectDetail] 24 | 25 | class Req_ListAllCustomersForUser(BaseModel): 26 | tool: Literal["/all-customers-for-user"] = "/all-customers-for-user" 27 | user: dev.EmployeeID 28 | 29 | class Resp_ListAllCustomersForUser(BaseModel): 30 | customers: List[dev.CompanyDetail] 31 | 32 | # wrap this with more descriptive names to avoid confusing LLM 33 | class GetTimesheetReportByProject(dev.Req_TimeSummaryByProject): 34 | pass 35 | class GetTimesheetReportByEmployee(dev.Req_TimeSummaryByEmployee): 36 | pass 37 | class CreateTimesheetEntryForUser(dev.Req_LogTimeEntry): 38 | pass 39 | 40 | # next-step planner 41 | class NextStep(BaseModel): 42 | current_state: str 43 | # we'll use only the first step, discarding all the rest. 44 | plan_remaining_steps_brief: Annotated[List[str], MinLen(1), MaxLen(5)] = Field(..., description="explain your thoughts on how to accomplish - what steps to execute") 45 | # now let's continue the cascade and check with LLM if the task is done 46 | task_completed: bool 47 | # Routing to one of the tools to execute the first remaining step 48 | # if task is completed, model will pick ReportTaskCompletion 49 | first_step_from_plan: Union[ 50 | dev.Req_ProvideAgentResponse, 51 | dev.Req_ListProjects, 52 | dev.Req_SearchProjects, 53 | Req_ListAllProjectsForUser, 54 | dev.Req_GetProject, 55 | dev.Req_UpdateProjectTeam, 56 | dev.Req_UpdateProjectStatus, 57 | dev.Req_ListEmployees, 58 | dev.Req_SearchEmployees, 59 | dev.Req_GetEmployee, 60 | dev.Req_UpdateEmployeeInfo, 61 | dev.Req_ListCustomers, 62 | Req_ListAllCustomersForUser, 63 | dev.Req_GetCustomer, 64 | dev.Req_SearchCustomers, 65 | dev.Req_SearchTimeEntries, 66 | GetTimesheetReportByProject, 67 | dev.Req_TimeSummaryByEmployee, 68 | dev.Req_GetTimeEntry, 69 | CreateTimesheetEntryForUser, 70 | dev.Req_UpdateTimeEntry, 71 | Req_DeleteWikiPage, 72 | ] = Field(..., description="first step from plan above") 73 | 74 | CLI_RED = "\x1B[31m" 75 | CLI_GREEN = "\x1B[32m" 76 | CLI_BLUE = "\x1B[34m" 77 | CLI_CLR = "\x1B[0m" 78 | 79 | # custom tool to list my projects 80 | def list_my_projects(api: Erc3Client, user: str) -> Resp_ListAllProjectsForUser: 81 | page_limit = 32 82 | next_offset = 0 83 | lead_in = [] 84 | member_of = [] 85 | while True: 86 | try: 87 | prjs = api.search_projects(offset=next_offset, limit=page_limit, include_archived=True, team=dict(employee_id=user)) 88 | 89 | for p in prjs.projects or []: 90 | detail = api.get_project(p.id).project 91 | role = [t for t in detail.team if t.employee == user][0].role 92 | 93 | if role == "Lead": 94 | lead_in.append(detail) 95 | else: 96 | member_of.append(detail) 97 | 98 | next_offset = prjs.next_offset 99 | if next_offset == -1: 100 | return Resp_ListAllProjectsForUser(lead_in=lead_in, member_of=member_of) 101 | except ApiException as e: 102 | if "page limit exceeded" in str(e): 103 | page_limit /= 2 104 | if page_limit <= 2: 105 | raise 106 | def list_my_customers(api: Erc3Client, user: str) -> Resp_ListAllCustomersForUser: 107 | page_limit = 32 108 | next_offset = 0 109 | loaded = [] 110 | while True: 111 | try: 112 | custs = api.search_customers(offset=next_offset, limit=page_limit, account_managers=[user]) 113 | 114 | for p in custs.companies or []: 115 | loaded.append(api.get_customer(p.id).company) 116 | 117 | next_offset = custs.next_offset 118 | if next_offset == -1: 119 | return Resp_ListAllCustomersForUser(customers=loaded) 120 | except ApiException as e: 121 | if "page limit exceeded" in str(e): 122 | page_limit /= 2 123 | if page_limit <= 2: 124 | raise 125 | 126 | # Tool do automatically distill wiki rules 127 | def distill_rules(api: Erc3Client, llm: MyLLM, about: dev.Resp_WhoAmI) -> str: 128 | 129 | context_id = about.wiki_sha1 130 | 131 | loc = Path(f"context_{context_id}_v2.json") 132 | 133 | Category = Literal["applies_to_guests", "applies_to_users", "other"] 134 | 135 | class Rule(BaseModel): 136 | why_relevant_summary: str = Field(...) 137 | category: Category = Field(...) 138 | compact_rule: str 139 | 140 | class DistillWikiRules(BaseModel): 141 | company_name: str 142 | company_locations: List[str] = Field(..., description="list of locations where company operates") 143 | company_execs: List[str] 144 | rules: List[Rule] 145 | 146 | if not loc.exists(): 147 | print("New context discovered. Distilling rules once") 148 | schema = json.dumps(NextStep.model_json_schema()) 149 | prompt = f""" 150 | Carefully review the wiki below and identify most important security/scoping/data rules that will be highly relevant for the agent or user that are automating APIs of this company. 151 | 152 | Pay attention to the rules that mention AI Agent or Public ChatBot. When talking about Public Chatbot use - applies_to_guests 153 | 154 | Rules must be compact RFC-style, ok to use pseudo code for compactness. They will be used by an agent that operates following APIs: {schema} 155 | """.strip() 156 | 157 | for path in api.list_wiki().paths: 158 | content = api.load_wiki(path) 159 | prompt += f"\n---- start of {path} ----\n\n{content}\n\n ---- end of {path} ----\n" 160 | 161 | 162 | messages = [{ "role": "system", "content": prompt}] 163 | 164 | distilled = llm.query(messages, DistillWikiRules, "gpt-5.1") 165 | loc.write_text(distilled.model_dump_json(indent=2)) 166 | 167 | else: 168 | distilled = DistillWikiRules.model_validate_json(loc.read_text()) 169 | 170 | prompt = f"""You are AI Chatbot automating {distilled.company_name}. 171 | 172 | Company locations: {distilled.company_locations} 173 | Company execs: {distilled.company_execs} 174 | 175 | Use available tools to execute task from the current user. 176 | 177 | - To confirm project access - get or find project (and get after finding) 178 | - Archival of entries or wiki deletion are not irreversible operations. 179 | - Respond with proper Req_ProvideAgentResponse when: 180 | - Task is done 181 | - Task can't be completed (e.g. internal error, user is not allowed or clarification is needed) 182 | - Make sure to always include ids of referenced entities in response links. 183 | - if user might have access to a resource - double-chech that BEFORE denying 184 | 185 | # Rules 186 | """ 187 | relevant_categories: List[Category] = ["other"] 188 | if about.is_public: 189 | relevant_categories.append("applies_to_guests") 190 | else: 191 | relevant_categories.append("applies_to_users") 192 | 193 | for r in distilled.rules: 194 | if r.category in relevant_categories: 195 | prompt += f"\n- {r.compact_rule}" 196 | 197 | # append at the end to keep rules in context cache 198 | prompt += f"# Current context (trust it)\nDate:{about.today}" 199 | 200 | if about.is_public: 201 | prompt += "\nCurrent actor is GUEST (Anonymous user)" 202 | else: 203 | employee = api.get_employee(about.current_user).employee 204 | employee.skills = [] 205 | employee.wills = [] 206 | dump = employee.model_dump_json() 207 | prompt += f"\n# Current actor is authenticated user: {employee.name}:\n{dump}" 208 | 209 | return prompt 210 | 211 | 212 | def my_dispatch(client: Erc3Client, cmd: BaseModel, about: dev.Resp_WhoAmI): 213 | # example how to add custom tools or tool handling 214 | if isinstance(cmd, dev.Req_UpdateEmployeeInfo): 215 | # first pull 216 | cur = client.get_employee(cmd.employee).employee 217 | 218 | cmd.notes = cmd.notes or cur.notes 219 | cmd.salary = cmd.salary or cur.salary 220 | cmd.wills = cmd.wills or cur.wills 221 | cmd.skills = cmd.skills or cur.skills 222 | cmd.location = cmd.location or cur.location 223 | cmd.department = cmd.department or cur.department 224 | return client.dispatch(cmd) 225 | 226 | if isinstance(cmd, Req_DeleteWikiPage): 227 | return client.dispatch(dev.Req_UpdateWiki(content="", changed_by=cmd.changed_by, file=cmd.file)) 228 | 229 | if isinstance(cmd, Req_ListAllProjectsForUser): 230 | return list_my_projects(client, cmd.user) 231 | 232 | if isinstance(cmd, Req_ListAllCustomersForUser): 233 | return list_my_customers(client, cmd.user) 234 | 235 | if isinstance(cmd, dev.Req_ProvideAgentResponse): 236 | # drop link to current user 237 | cmd.links = [l for l in cmd.links if l.id != about.current_user] 238 | return client.dispatch(cmd) 239 | 240 | return client.dispatch(cmd) 241 | 242 | def run_agent(model: str, api: ERC3, task: TaskInfo): 243 | erc_client = api.get_erc_client(task) 244 | about = erc_client.who_am_i() 245 | llm = MyLLM(api=api, model=model, task=task, max_tokens=32768) 246 | 247 | system_prompt = distill_rules(erc_client, llm, about) 248 | 249 | reason = Literal["security_violation", "request_not_supported_by_api", "possible_security_violation_check_project", "may_pass"] 250 | 251 | class RequestPreflightCheck(BaseModel): 252 | current_actor: str 253 | preflight_check_explanation_brief: Optional[str] 254 | denial_reason: reason 255 | outcome_confidence_1_to_5: Annotated[int, Gt(0), Lt(6)] 256 | 257 | # log will contain conversation context for the agent within task 258 | log = [ 259 | {"role": "system", "content": system_prompt}, 260 | {"role": "user", "content": f"Request: '{task.task_text}'"}, 261 | ] 262 | 263 | preflight_check = llm.query(log, RequestPreflightCheck) 264 | confidence = preflight_check.outcome_confidence_1_to_5 265 | 266 | if confidence >=4: 267 | print(f"PREFLIGHT {confidence}: {preflight_check.preflight_check_explanation_brief}") 268 | if preflight_check.denial_reason == "request_not_supported_by_api": 269 | erc_client.provide_agent_response("Not supported", outcome="none_unsupported") 270 | return 271 | if preflight_check.denial_reason == "security_violation": 272 | erc_client.provide_agent_response("Security check failed", outcome="denied_security") 273 | return 274 | 275 | log.append({"role": "system", "content": preflight_check.preflight_check_explanation_brief}) 276 | 277 | # let's limit number of reasoning steps by 20, just to be safe 278 | for i in range(20): 279 | step = f"step_{i + 1}" 280 | print(f"Next {step}... ", end="") 281 | 282 | job = llm.query(log, NextStep) 283 | 284 | # print next sep for debugging 285 | print(job.plan_remaining_steps_brief[0], f"\n {job.first_step_from_plan}") 286 | 287 | # Let's add tool request to conversation history as if OpenAI asked for it. 288 | # a shorter way would be to just append `job.model_dump_json()` entirely 289 | log.append({ 290 | "role": "assistant", 291 | "content": job.plan_remaining_steps_brief[0], 292 | "tool_calls": [{ 293 | "type": "function", 294 | "id": step, 295 | "function": { 296 | "name": job.first_step_from_plan.__class__.__name__, 297 | "arguments": job.first_step_from_plan.model_dump_json(), 298 | }}] 299 | }) 300 | 301 | # now execute the tool by dispatching command to our handler 302 | try: 303 | result = my_dispatch(erc_client, job.first_step_from_plan, about) 304 | txt = result.model_dump_json(exclude_none=True, exclude_unset=True) 305 | print(f"{CLI_GREEN}OUT{CLI_CLR}: {txt}") 306 | txt = "DONE: " + txt 307 | except ApiException as e: 308 | txt = e.detail 309 | # print to console as ascii red 310 | print(f"{CLI_RED}ERR: {e.api_error.error}{CLI_CLR}") 311 | 312 | txt = "ERROR: " + txt 313 | 314 | # if SGR wants to finish, then quit loop 315 | if isinstance(job.first_step_from_plan, dev.Req_ProvideAgentResponse): 316 | print(f"{CLI_BLUE}agent {job.first_step_from_plan.outcome}{CLI_CLR}. Summary:\n{job.first_step_from_plan.message}") 317 | 318 | for link in job.first_step_from_plan.links: 319 | print(f" - link {link.kind}: {link.id}") 320 | break 321 | 322 | # and now we add results back to the convesation history, so that agent 323 | # we'll be able to act on the results in the next reasoning step. 324 | log.append({"role": "tool", "content": txt, "tool_call_id": step}) 325 | -------------------------------------------------------------------------------- /sgr-agent-erc3-test/agent.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import Annotated, List, Union, Literal, Optional 4 | from annotated_types import MaxLen, MinLen, Gt, Lt 5 | from erc3.erc3 import ProjectDetail 6 | from pydantic import BaseModel, Field 7 | from erc3 import erc3 as dev, ApiException, TaskInfo, ERC3, Erc3Client 8 | 9 | from lib import MyLLM 10 | 11 | # this is how you can add custom tools that work slightly better 12 | class Req_DeleteWikiPage(BaseModel): 13 | tool: Literal["/wiki/delete"] = "/wiki/delete" 14 | file: str 15 | changed_by: Optional[dev.EmployeeID] = None 16 | 17 | class Req_ListAllProjectsForUser(BaseModel): 18 | tool: Literal["/all-projects-for-user"] = "/all-projects-for-user" 19 | user: dev.EmployeeID 20 | 21 | class Resp_ListAllProjectsForUser(BaseModel): 22 | lead_in: List[ProjectDetail] 23 | member_of: List[ProjectDetail] 24 | 25 | class Req_ListAllCustomersForUser(BaseModel): 26 | tool: Literal["/all-customers-for-user"] = "/all-customers-for-user" 27 | user: dev.EmployeeID 28 | 29 | class Resp_ListAllCustomersForUser(BaseModel): 30 | customers: List[dev.CompanyDetail] 31 | 32 | # wrap this with more descriptive names to avoid confusing LLM 33 | class GetTimesheetReportByProject(dev.Req_TimeSummaryByProject): 34 | pass 35 | class GetTimesheetReportByEmployee(dev.Req_TimeSummaryByEmployee): 36 | pass 37 | class CreateTimesheetEntryForUser(dev.Req_LogTimeEntry): 38 | pass 39 | 40 | # next-step planner 41 | class NextStep(BaseModel): 42 | current_state: str 43 | # we'll use only the first step, discarding all the rest. 44 | plan_remaining_steps_brief: Annotated[List[str], MinLen(1), MaxLen(5)] = Field(..., description="explain your thoughts on how to accomplish - what steps to execute") 45 | # now let's continue the cascade and check with LLM if the task is done 46 | task_completed: bool 47 | # Routing to one of the tools to execute the first remaining step 48 | # if task is completed, model will pick ReportTaskCompletion 49 | first_step_from_plan: Union[ 50 | dev.Req_ProvideAgentResponse, 51 | dev.Req_ListProjects, 52 | dev.Req_SearchProjects, 53 | Req_ListAllProjectsForUser, 54 | dev.Req_GetProject, 55 | dev.Req_UpdateProjectTeam, 56 | dev.Req_UpdateProjectStatus, 57 | dev.Req_ListEmployees, 58 | dev.Req_SearchEmployees, 59 | dev.Req_GetEmployee, 60 | dev.Req_UpdateEmployeeInfo, 61 | dev.Req_ListCustomers, 62 | Req_ListAllCustomersForUser, 63 | dev.Req_GetCustomer, 64 | dev.Req_SearchCustomers, 65 | dev.Req_SearchTimeEntries, 66 | GetTimesheetReportByProject, 67 | dev.Req_TimeSummaryByEmployee, 68 | dev.Req_GetTimeEntry, 69 | CreateTimesheetEntryForUser, 70 | dev.Req_UpdateTimeEntry, 71 | Req_DeleteWikiPage, 72 | ] = Field(..., description="first step from plan above") 73 | 74 | CLI_RED = "\x1B[31m" 75 | CLI_GREEN = "\x1B[32m" 76 | CLI_BLUE = "\x1B[34m" 77 | CLI_CLR = "\x1B[0m" 78 | 79 | # custom tool to list my projects 80 | def list_my_projects(api: Erc3Client, user: str) -> Resp_ListAllProjectsForUser: 81 | page_limit = 32 82 | next_offset = 0 83 | lead_in = [] 84 | member_of = [] 85 | while True: 86 | try: 87 | prjs = api.search_projects(offset=next_offset, limit=page_limit, include_archived=True, team=dict(employee_id=user)) 88 | 89 | for p in prjs.projects or []: 90 | detail = api.get_project(p.id).project 91 | role = [t for t in detail.team if t.employee == user][0].role 92 | 93 | if role == "Lead": 94 | lead_in.append(detail) 95 | else: 96 | member_of.append(detail) 97 | 98 | next_offset = prjs.next_offset 99 | if next_offset == -1: 100 | return Resp_ListAllProjectsForUser(lead_in=lead_in, member_of=member_of) 101 | except ApiException as e: 102 | if "page limit exceeded" in str(e): 103 | page_limit /= 2 104 | if page_limit <= 2: 105 | raise 106 | def list_my_customers(api: Erc3Client, user: str) -> Resp_ListAllCustomersForUser: 107 | page_limit = 32 108 | next_offset = 0 109 | loaded = [] 110 | while True: 111 | try: 112 | custs = api.search_customers(offset=next_offset, limit=page_limit, account_managers=[user]) 113 | 114 | for p in custs.companies or []: 115 | loaded.append(api.get_customer(p.id).company) 116 | 117 | next_offset = custs.next_offset 118 | if next_offset == -1: 119 | return Resp_ListAllCustomersForUser(customers=loaded) 120 | except ApiException as e: 121 | if "page limit exceeded" in str(e): 122 | page_limit /= 2 123 | if page_limit <= 2: 124 | raise 125 | 126 | # Tool do automatically distill wiki rules 127 | def distill_rules(api: Erc3Client, llm: MyLLM, about: dev.Resp_WhoAmI) -> str: 128 | 129 | context_id = about.wiki_sha1 130 | 131 | loc = Path(f"context_{context_id}_v2.json") 132 | 133 | Category = Literal["applies_to_guests", "applies_to_users", "other"] 134 | 135 | class Rule(BaseModel): 136 | why_relevant_summary: str = Field(...) 137 | category: Category = Field(...) 138 | compact_rule: str 139 | 140 | class DistillWikiRules(BaseModel): 141 | company_name: str 142 | company_locations: List[str] = Field(..., description="list of locations where company operates") 143 | company_execs: List[str] 144 | rules: List[Rule] 145 | 146 | if not loc.exists(): 147 | print("New context discovered. Distilling rules once") 148 | schema = json.dumps(NextStep.model_json_schema()) 149 | prompt = f""" 150 | Carefully review the wiki below and identify most important security/scoping/data rules that will be highly relevant for the agent or user that are automating APIs of this company. 151 | 152 | Pay attention to the rules that mention AI Agent or Public ChatBot. When talking about Public Chatbot use - applies_to_guests 153 | 154 | Rules must be compact RFC-style, ok to use pseudo code for compactness. They will be used by an agent that operates following APIs: {schema} 155 | """.strip() 156 | 157 | for path in api.list_wiki().paths: 158 | content = api.load_wiki(path) 159 | prompt += f"\n---- start of {path} ----\n\n{content}\n\n ---- end of {path} ----\n" 160 | 161 | 162 | messages = [{ "role": "system", "content": prompt}] 163 | 164 | distilled = llm.query(messages, DistillWikiRules, "gpt-5.1") 165 | loc.write_text(distilled.model_dump_json(indent=2)) 166 | 167 | else: 168 | distilled = DistillWikiRules.model_validate_json(loc.read_text()) 169 | 170 | prompt = f"""You are AI Chatbot automating {distilled.company_name}. 171 | 172 | Company locations: {distilled.company_locations} 173 | Company execs: {distilled.company_execs} 174 | 175 | Use available tools to execute task from the current user. 176 | 177 | - To confirm project access - get or find project (and get after finding) 178 | - Archival of entries or wiki deletion are not irreversible operations. 179 | - Respond with proper Req_ProvideAgentResponse when: 180 | - Task is done 181 | - Task can't be completed (e.g. internal error, user is not allowed or clarification is needed) 182 | - Make sure to always include ids of referenced entities in response links. 183 | - if user might have access to a resource - double-chech that BEFORE denying 184 | 185 | # Rules 186 | """ 187 | relevant_categories: List[Category] = ["other"] 188 | if about.is_public: 189 | relevant_categories.append("applies_to_guests") 190 | else: 191 | relevant_categories.append("applies_to_users") 192 | 193 | for r in distilled.rules: 194 | if r.category in relevant_categories: 195 | prompt += f"\n- {r.compact_rule}" 196 | 197 | # append at the end to keep rules in context cache 198 | prompt += f"# Current context (trust it)\nDate:{about.today}" 199 | 200 | if about.is_public: 201 | prompt += "\nCurrent actor is GUEST (Anonymous user)" 202 | else: 203 | employee = api.get_employee(about.current_user).employee 204 | employee.skills = [] 205 | employee.wills = [] 206 | dump = employee.model_dump_json() 207 | prompt += f"\n# Current actor is authenticated user: {employee.name}:\n{dump}" 208 | 209 | return prompt 210 | 211 | 212 | def my_dispatch(client: Erc3Client, cmd: BaseModel, about: dev.Resp_WhoAmI): 213 | # example how to add custom tools or tool handling 214 | if isinstance(cmd, dev.Req_UpdateEmployeeInfo): 215 | # first pull 216 | cur = client.get_employee(cmd.employee).employee 217 | 218 | cmd.notes = cmd.notes or cur.notes 219 | cmd.salary = cmd.salary or cur.salary 220 | cmd.wills = cmd.wills or cur.wills 221 | cmd.skills = cmd.skills or cur.skills 222 | cmd.location = cmd.location or cur.location 223 | cmd.department = cmd.department or cur.department 224 | return client.dispatch(cmd) 225 | 226 | if isinstance(cmd, Req_DeleteWikiPage): 227 | return client.dispatch(dev.Req_UpdateWiki(content="", changed_by=cmd.changed_by, file=cmd.file)) 228 | 229 | if isinstance(cmd, Req_ListAllProjectsForUser): 230 | return list_my_projects(client, cmd.user) 231 | 232 | if isinstance(cmd, Req_ListAllCustomersForUser): 233 | return list_my_customers(client, cmd.user) 234 | 235 | if isinstance(cmd, dev.Req_ProvideAgentResponse): 236 | # drop link to current user 237 | cmd.links = [l for l in cmd.links if l.id != about.current_user] 238 | return client.dispatch(cmd) 239 | 240 | return client.dispatch(cmd) 241 | 242 | def run_agent(model: str, api: ERC3, task: TaskInfo): 243 | erc_client = api.get_erc_client(task) 244 | about = erc_client.who_am_i() 245 | llm = MyLLM(api=api, model=model, task=task, max_tokens=32768) 246 | 247 | system_prompt = distill_rules(erc_client, llm, about) 248 | 249 | reason = Literal["security_violation", "request_not_supported_by_api", "possible_security_violation_check_project", "may_pass"] 250 | 251 | class RequestPreflightCheck(BaseModel): 252 | current_actor: str 253 | preflight_check_explanation_brief: Optional[str] 254 | denial_reason: reason 255 | outcome_confidence_1_to_5: Annotated[int, Gt(0), Lt(6)] 256 | 257 | # log will contain conversation context for the agent within task 258 | log = [ 259 | {"role": "system", "content": system_prompt}, 260 | {"role": "user", "content": f"Request: '{task.task_text}'"}, 261 | ] 262 | 263 | preflight_check = llm.query(log, RequestPreflightCheck) 264 | confidence = preflight_check.outcome_confidence_1_to_5 265 | 266 | if confidence >=4: 267 | print(f"PREFLIGHT {confidence}: {preflight_check.preflight_check_explanation_brief}") 268 | if preflight_check.denial_reason == "request_not_supported_by_api": 269 | erc_client.provide_agent_response("Not supported", outcome="none_unsupported") 270 | return 271 | if preflight_check.denial_reason == "security_violation": 272 | erc_client.provide_agent_response("Security check failed", outcome="denied_security") 273 | return 274 | 275 | log.append({"role": "system", "content": preflight_check.preflight_check_explanation_brief}) 276 | 277 | # let's limit number of reasoning steps by 20, just to be safe 278 | for i in range(20): 279 | step = f"step_{i + 1}" 280 | print(f"Next {step}... ", end="") 281 | 282 | job = llm.query(log, NextStep) 283 | 284 | # print next sep for debugging 285 | print(job.plan_remaining_steps_brief[0], f"\n {job.first_step_from_plan}") 286 | 287 | # Let's add tool request to conversation history as if OpenAI asked for it. 288 | # a shorter way would be to just append `job.model_dump_json()` entirely 289 | log.append({ 290 | "role": "assistant", 291 | "content": job.plan_remaining_steps_brief[0], 292 | "tool_calls": [{ 293 | "type": "function", 294 | "id": step, 295 | "function": { 296 | "name": job.first_step_from_plan.__class__.__name__, 297 | "arguments": job.first_step_from_plan.model_dump_json(), 298 | }}] 299 | }) 300 | 301 | # now execute the tool by dispatching command to our handler 302 | try: 303 | result = my_dispatch(erc_client, job.first_step_from_plan, about) 304 | txt = result.model_dump_json(exclude_none=True, exclude_unset=True) 305 | print(f"{CLI_GREEN}OUT{CLI_CLR}: {txt}") 306 | txt = "DONE: " + txt 307 | except ApiException as e: 308 | txt = e.detail 309 | # print to console as ascii red 310 | print(f"{CLI_RED}ERR: {e.api_error.error}{CLI_CLR}") 311 | 312 | txt = "ERROR: " + txt 313 | 314 | # if SGR wants to finish, then quit loop 315 | if isinstance(job.first_step_from_plan, dev.Req_ProvideAgentResponse): 316 | print(f"{CLI_BLUE}agent {job.first_step_from_plan.outcome}{CLI_CLR}. Summary:\n{job.first_step_from_plan.message}") 317 | 318 | for link in job.first_step_from_plan.links: 319 | print(f" - link {link.kind}: {link.id}") 320 | break 321 | 322 | # and now we add results back to the convesation history, so that agent 323 | # we'll be able to act on the results in the next reasoning step. 324 | log.append({"role": "tool", "content": txt, "tool_call_id": step}) 325 | --------------------------------------------------------------------------------