├── .github
    └── workflows
    │   └── documentation.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── agents
    ├── CHANGELOG.rst
    ├── CMakeLists.txt
    ├── agents
    │   ├── __init__.py
    │   ├── callbacks.py
    │   ├── clients
    │   │   ├── __init__.py
    │   │   ├── db_base.py
    │   │   ├── model_base.py
    │   │   ├── ollama.py
    │   │   └── roboml.py
    │   ├── components
    │   │   ├── __init__.py
    │   │   ├── component_base.py
    │   │   ├── imagestovideo.py
    │   │   ├── llm.py
    │   │   ├── map_encoding.py
    │   │   ├── mllm.py
    │   │   ├── model_component.py
    │   │   ├── semantic_router.py
    │   │   ├── speechtotext.py
    │   │   ├── texttospeech.py
    │   │   └── vision.py
    │   ├── config.py
    │   ├── models.py
    │   ├── publisher.py
    │   ├── resources
    │   │   ├── test.jpeg
    │   │   └── test.wav
    │   ├── ros.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── pluralize.py
    │   │   ├── utils.py
    │   │   └── voice.py
    │   └── vectordbs.py
    ├── msg
    │   ├── Bbox2D.msg
    │   ├── Detection2D.msg
    │   ├── Detections2D.msg
    │   ├── Point2D.msg
    │   ├── Tracking.msg
    │   ├── Trackings.msg
    │   └── Video.msg
    ├── package.xml
    ├── scripts
    │   ├── chainlit_client
    │   │   ├── app.py
    │   │   ├── chainlit.md
    │   │   └── tiny_web_client
    │   └── executable
    └── tests
    │   └── test_clients.py
├── docs
    ├── _static
    │   ├── ROS_AGENTS.png
    │   ├── ROS_AGENTS_DARK.png
    │   ├── automatika-logo.png
    │   ├── complete_dark.png
    │   └── complete_light.png
    ├── basics.md
    ├── conf.py
    ├── examples
    │   ├── complete.md
    │   ├── conversational.md
    │   ├── goto.md
    │   ├── index.md
    │   ├── multiprocessing.md
    │   ├── prompt_engineering.md
    │   ├── semantic_map.md
    │   ├── semantic_router.md
    │   └── tool_calling.md
    ├── index.md
    ├── installation.md
    ├── intro.md
    └── quickstart.md
├── examples
    ├── complete_agent.py
    ├── complete_agent_multiprocessing.py
    ├── conversational_agent_with_audio.py
    ├── go_to_x.py
    ├── prompt_engineering.py
    ├── semantic_map.py
    ├── semantic_router.py
    └── tool_calling.py
├── interrogate_badge.svg
└── pyproject.toml


/.github/workflows/documentation.yml:
--------------------------------------------------------------------------------
 1 | name: documentation
 2 | 
 3 | on: [push, pull_request, workflow_dispatch]
 4 | 
 5 | permissions:
 6 |   contents: write
 7 | 
 8 | jobs:
 9 |   docs:
10 |     runs-on: ubuntu-24.04
11 |     steps:
12 |       - uses: actions/checkout@v4
13 |       - uses: actions/setup-python@v5
14 |       - name: Install dependencies
15 |         run: |
16 |           pip install --break-system-packages sphinx myst_parser sphinx-copybutton sphinx-autodoc2 sphinx-book-theme linkify-it-py
17 |       - name: Sphinx build
18 |         run: |
19 |           sphinx-build docs _build
20 |       - name: Deploy to GitHub Pages
21 |         uses: peaceiris/actions-gh-pages@v3
22 |         if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
23 |         with:
24 |           publish_branch: gh-pages
25 |           github_token: ${{ secrets.GITHUB_TOKEN }}
26 |           publish_dir: _build/
27 |           force_orphan: true
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *,cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # dotenv
 80 | .env
 81 | 
 82 | # virtualenv
 83 | .venv/
 84 | venv/
 85 | ENV/
 86 | 
 87 | # Spyder project settings
 88 | .spyderproject
 89 | 
 90 | # Rope project settings
 91 | .ropeproject
 92 | 
 93 | # MAC
 94 | .DS_Store
 95 | 
 96 | # VSCode
 97 | .vscode
 98 | 
 99 | # ROS
100 | log/
101 | install/
102 | src/
103 | 
104 | # custom
105 | shared/
106 | logdir/
107 | data/
108 | logs/
109 | tmp/
110 | *.csv
111 | *.h5
112 | *.npz
113 | *.zip
114 | *.ods
115 | *.xyz
116 | *.off
117 | *.obj
118 | 
119 | # Ignores for web client
120 | .chainlit/
121 | 
122 | # Ignores for Docs
123 | docs/Makefile
124 | docs/make.bat
125 | docs/apidocs/
126 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v2.3.0
 4 |     hooks:
 5 |     -   id: check-yaml
 6 |     -   id: end-of-file-fixer
 7 |     -   id: trailing-whitespace
 8 |     -   id: check-docstring-first
 9 |     -   id: check-toml
10 | -   repo: https://github.com/astral-sh/ruff-pre-commit
11 |     # Ruff version.
12 |     rev: v0.5.4
13 |     hooks:
14 |     # linter.
15 |     - id: ruff
16 |       types_or: [ python, pyi, jupyter ]
17 |     # formatter.
18 |     - id: ruff-format
19 |       types_or: [ python, pyi, jupyter ]
20 | -   repo: https://github.com/econchick/interrogate
21 |     rev: 1.7.0
22 |     hooks:
23 |      # docstring coverage
24 |     - id: interrogate
25 |       args: [-vv, --fail-under=80, -c, pyproject.toml]
26 |       pass_filenames: false
27 | 
28 | 
29 | ## Uncomment mypy for type-checking errors in pre-commit
30 | 
31 | # -   repo: https://github.com/pre-commit/mirrors-mypy
32 | #     rev: v1.5.0
33 | #     hooks:
34 | #       - id: mypy
35 | #         additional_dependencies: [tokenize-rt==3.2.0, 'types-PyYAML']
36 | #         exclude: ^tests/
37 | #         args:
38 | #           [
39 | #               "--ignore-missing-imports",
40 | #               "--check-untyped-defs",
41 | #               "--warn-redundant-casts",
42 | #               "--no-implicit-optional",
43 | #               "--warn-return-any"
44 | #           ]
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Automatika Robotics
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <picture>
  2 |   <source media="(prefers-color-scheme: dark)" srcset="docs/_static/ROS_AGENTS.png">
  3 |   <source media="(prefers-color-scheme: light)" srcset="docs/_static/ROS_AGENTS_DARK.png">
  4 |   <img alt="ROS Agents Logo." src="docs/_static/ROS_AGENTS.png">
  5 | </picture>
  6 | 
  7 | ROS Agents is a fully-loaded framework for creating interactive embodied agents that can understand, remember, and act upon contextual information from their environment.
  8 | 
  9 | - **Agents in the real world:** Designed to be used with autonomous robot systems that operate in dynamic environments, specifically AMRs.
 10 | - **Intuitive API**: Simple pythonic API to utilize local or cloud based ML models (specifically **Multimodal LLMs** and other **Transformer Architectures**) on robots.
 11 | - **Semantic Memory**: Integrates vector databases, semantic routing and other supporting components to quickly build arbitrarily complex graphs for agentic information flow. No need to utilize bloated "GenAI" frameworks on your robot.
 12 | - **Made in ROS2**: Utilizes ROS2 as the underlying middleware. Theoretically, all devices that provide a ROS2 package can be utilized to send data to ML models, as long as the datatype callback has been implemented.
 13 | 
 14 | Checkout [Installation Instructions](https://automatika-robotics.github.io/ros-agents/installation.html) 🛠️
 15 | 
 16 | Get started with the [Quickstart Guide](https://automatika-robotics.github.io/ros-agents/quickstart.html) 🚀
 17 | 
 18 | Get familiar with [Basic Concepts](https://automatika-robotics.github.io/ros-agents/basics.html) 📚
 19 | 
 20 | Dive right in with [Examples](https://automatika-robotics.github.io/ros-agents/examples/index.html) ✨
 21 | 
 22 | ## Installation 🛠️
 23 | 
 24 | ### Pre-Requisits
 25 | 
 26 | #### Install ROS
 27 | 
 28 | ROS Agents is built to be used with ROS2. All ROS distributions starting from _Iron_ are supported. Install ROS2 by following the instructions on the [official site](https://docs.ros.org/en/iron/Installation.html).
 29 | 
 30 | #### Install a model serving platform
 31 | 
 32 | The core of ROS Agents is agnostic to model serving platforms. It currently supports [Ollama](https://ollama.com) and [RoboML](https://github.com/automatika-robotics/robo-ml). Please install either of these by following the instructions provided by respective projects. Support for new platforms will be continuously added. If you would like to support a particular platform, please open an issue/PR.
 33 | 
 34 | ### Install ROS Agents (Ubuntu)
 35 | 
 36 | **Binary packages for Ubuntu will be released soon. Check this space.**
 37 | 
 38 | ### Install ROS Agents from source
 39 | 
 40 | #### Get Dependencies
 41 | 
 42 | Install python dependencies
 43 | 
 44 | ```shell
 45 | pip install numpy opencv-python-headless 'attrs>=23.2.0' jinja2 httpx setproctitle msgpack msgpack-numpy numpy-quaternion platformdirs
 46 | ```
 47 | 
 48 | Download ROS Sugar
 49 | 
 50 | ```shell
 51 | git clone https://github.com/automatika-robotics/ros-sugar
 52 | ```
 53 | 
 54 | #### Install ROS Agents
 55 | 
 56 | ```shell
 57 | git clone https://github.com/automatika-robotics/ros-agents.git
 58 | cd ..
 59 | colcon build
 60 | source install/setup.bash
 61 | python your_script.py
 62 | ```
 63 | 
 64 | ## Quick Start 🚀
 65 | 
 66 | Unlike other ROS package, ROS Agents provides a pure pythonic way of describing the node graph using [ROS Sugar](https://www.github.com/automatika-robotics/ros-sugar). Copy the following code in a python script and run it.
 67 | 
 68 | ```python
 69 | from agents.clients.ollama import OllamaClient
 70 | from agents.components import MLLM
 71 | from agents.models import Llava
 72 | from agents.ros import Topic, Launcher
 73 | 
 74 | # Define input and output topics (pay attention to msg_type)
 75 | text0 = Topic(name="text0", msg_type="String")
 76 | image0 = Topic(name="image_raw", msg_type="Image")
 77 | text1 = Topic(name="text1", msg_type="String")
 78 | 
 79 | # Define a model client (working with Ollama in this case)
 80 | llava = Llava(name="llava")
 81 | llava_client = OllamaClient(llava)
 82 | 
 83 | # Define an MLLM component (A component represents a node with a particular functionality)
 84 | mllm = MLLM(
 85 |     inputs=[text0, image0],
 86 |     outputs=[text1],
 87 |     model_client=llava_client,
 88 |     trigger=[text0],
 89 |     component_name="vqa"
 90 | )
 91 | # Additional prompt settings
 92 | mllm.set_topic_prompt(text0, template="""You are an amazing and funny robot.
 93 |     Answer the following about this image: {{ text0 }}"""
 94 | )
 95 | # Launch the component
 96 | launcher = Launcher()
 97 | launcher.add_pkg(components=[mllm])
 98 | launcher.bringup()
 99 | ```
100 | 
101 | And just like that we have an agent that can answer questions like **'What do you see?'**. To interact with this agent, ROS Agents includes a tiny web client. Checkout the [Quick Start Guide](https://automatika-robotics.github.io/ros-agents/quickstart.html) to learn more about how components and models work together.
102 | 
103 | ## Elaborate Embodied Agents
104 | The quickstart example above is just an amuse-bouche of what is possible with ROS Agents. In ROS Agents we can create arbitrarily sophisticated component graphs. And furthermore our system can be configured to even change or reconfigure itself based on events internal or external to the system. Check out the code for the following agent [here](https://automatika-robotics.github.io/ros-agents/examples/complete.html).
105 | 
106 | <picture>
107 |   <source media="(prefers-color-scheme: dark)" srcset="docs/_static/complete_dark.png">
108 |   <source media="(prefers-color-scheme: light)" srcset="docs/_static/complete_light.png">
109 |   <img alt="Elaborate Agent" src="docs/_static/complete_dark.png">
110 | </picture>
111 | 
112 | ## Copyright
113 | 
114 | The code in this distribution is Copyright (c) 2024 Automatika Robotics unless explicitly indicated otherwise.
115 | 
116 | ROS Agents is made available under the MIT license. Details can be found in the [LICENSE](LICENSE) file.
117 | 
118 | ## Contributions
119 | 
120 | ROS Agents has been developed in collaboration betweeen [Automatika Robotics](https://automatikarobotics.com/) and [Inria](https://inria.fr/). Contributions from the community are most welcome.
121 | 


--------------------------------------------------------------------------------
/agents/CHANGELOG.rst:
--------------------------------------------------------------------------------
  1 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  2 | Changelog for package automatika_embodied_agents
  3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  4 | 
  5 | 0.3.3 (2025-01-28)
  6 | ------------------
  7 | * (fix) Removes python dependencies from package manifest until package names merged in rosdistro
  8 | * Contributors: ahr
  9 | 
 10 | 0.3.2 (2025-01-28)
 11 | ------------------
 12 | * (docs) Updates docs for conversational agent and SpeechToTextConfig
 13 | * (feature) Adds vad, audio feautres and wakeword classification classes based local onnx models
 14 | * (feature) Adds utility function for downloading models and status classes for speech processing
 15 | * (feature) Adds configuration for wakeword detections in speechtotext component
 16 | * (fix) Fixes error in ollama client where tool calls are received without output content
 17 | * (fix) Adds a fix to map encoding where it can start with a single detections layer
 18 | * (refactor) Makes component name non-optional in components to avoid name conflicts
 19 | * (fix) Fixes error for long prompts when checking if prompt is a filename
 20 | * (refactor) Removes pytorch as a dependency and runs VAD model with onnxruntime
 21 | * (refactor) Makes warmup a property of model components that defaults to false
 22 | * (feature) Adds utility method to download onnx model files
 23 | * (refactor) Replaces info with debug to reduce logging spam
 24 | * (fix) Fixes getting logging severity level for jazzy onwards
 25 | * (fix) Adds minor improvements to branching for llm and mllm components
 26 | * (chore) Cleansup dependencies for packaging
 27 | * (chore) Adds dependency for sugar and removes unnecessary python dependencies from packaging
 28 | * (fix) Corrects import of Topic class
 29 | * (docs) Removes redefinition of Topic and corrects links to ROS Sugar
 30 | * (fix) Changes topic in base component to be directly inherited from ROS Sugar for consistency accross packages
 31 | * (feature) Adds warmup functions to all model based components
 32 | * (refactor) Removes pillow as a dependancy
 33 | * (refactor) Removes overrrides from components and adds custom meathods instead
 34 | * (feature) Adds warmup to vision component for displaying stats on init
 35 | * (fix) Adds fix for correct colors in cv2 visualization
 36 | * (fix) Adds node name as window name for visualization in vision component
 37 | * (feature) Adds cv2 based visualization option to vision component
 38 | * (refactor) Reduces branching in execution step for components
 39 | * (chore) Combines agents and agents_interfaces to one package
 40 | * (chore) Changes deb package name
 41 | * (fix) Fixes raising error in model initialization for roboml clients
 42 | * (refactor) Adds passing additional agent types to ros sugar
 43 | * (fix) Fixes error messages when wrong component inputs/outputs are passed
 44 | * (feature) Adds support for CompressedImage msg type in components
 45 | * (feature) Adds option to deploy vision models using tensorrt
 46 |   Works with roboml
 47 | * (fix) Fixes check on sufficient topics in component validation
 48 | * (fix) Fixes a bug in topic validation
 49 | * (fix) Fixes validation of topics in components
 50 | * (refactor) Changes handling of image messages for publication
 51 |   - Adds support for CompressedImage messages
 52 |   - Gathers image messages directly in vision component instead of getting them back from clients
 53 | * (feature) Adds frame_id to trackings publisher and updates msg and callback
 54 | * (feature) Adds boxes to vision tracking message
 55 | * Contributors: ahr, mkabtoul
 56 | 
 57 | 0.3.1 (2024-10-29)
 58 | ------------------
 59 | * (chore) bump version 0.3.0 -> 0.3.1
 60 | * (feature) Adds support for using tool calling in LLM components in multiprocess execution
 61 | * Contributors: ahr
 62 | 
 63 | 0.3.0 (2024-10-28)
 64 | ------------------
 65 | * (chore) bump version 0.2.0 -> 0.3.0
 66 | * (chore) Adds bumpver config
 67 | * Merge pull request `#14 <https://github.com/automatika-robotics/ros-agents/issues/14>`_ from automatika-robotics/feature/external_processors
 68 |   Adds support for running components as separate processes
 69 | * (docs) Updates docs based on ROS Sugar version update
 70 | * (fix) Fixes bug in registering triggers with components
 71 | * (refactor) Simplifies by adding direct serialization of clients and triggers
 72 | * (refactor) Removes gratuitous logging from utils
 73 | * (fix) Minor bug fixes for components to run in multiprocessing
 74 |   - Fixes trigger assignment for components
 75 |   - Handles private attributes of attrs classes
 76 |   - Fixes component and config init in common executable
 77 | * (fix) Fixes serializing log level in clients
 78 | * (fix) Fixes minor bugs in utils, components, configs and models
 79 | * (feature) Adds support for running components in multiple processes
 80 |   - Adds common executable to the package for ROS Sugar launcher
 81 |   - Refactors components to be serializable
 82 |   - Adds serialization to clients
 83 |   - Minor type hint changes for compatibility with older versions of ROS
 84 | * (fix) Adds the correct check for external processors given new ros-sugar implementation
 85 | * Contributors: ahr
 86 | 
 87 | 0.2.0 (2024-09-28)
 88 | ------------------
 89 | * (chore) Bump up the version
 90 | * Merge pull request `#13 <https://github.com/automatika-robotics/ros-agents/issues/13>`_ from automatika-robotics/feature/better_clients
 91 |   Adds enhanced functionality in clients specifically for LLM and MLLM components
 92 | * (feature) Adds tool calling for LLM component using the OllamaClient
 93 | * (fix) Fixes rag results in templated inputs to LLMs which do not contain input
 94 | * (refactor) Makes named models subclasses of TransformersLLM and TransformersMLLM for easier handling in roboml client
 95 | * (fix) Fixes key error in ollama client response retreival
 96 | * (fix) Adds flag for chat history for chat history reset and fixes logging
 97 | * (feature) Adds TransformersLLM and TransformersMLLM models for roboml clients
 98 | * (fix) Removes history reset phrase from model definitions and add system prompt for LLMs and derivates
 99 | * (refactor) Changes model component to have execution step as an abstract method implemented by child components
100 | * (fix) Changes ollama client inference call to use chat endpoint
101 | * (feature) Adds chat history management to llm and mllm components
102 | * (docs) Clarifies handling of RAG results for llm component
103 | * (fix) Fixes bug in rag result handling for llm component
104 | * (fix) Removes default init_timeout from models
105 | * (refactor) Moves roboml resp client dependancies inside the client initialization
106 | * (fix) Explicity exposes QoSConfig in ros module
107 | * (refactor) Replaces map_meta_data parameter with map_topic for MapEncoding component
108 | * (refactor) Removes direct dependancy on pypdf
109 | * (fix) Changes map meta data topic to type OccupancyGrid
110 | * (feature) Adds audio options to chainlit client
111 | * (fix) Removes unused imports
112 | * (fix) Fixes the initialization of map encoding and semantic router components
113 | * (refactor) Fixes imports and refactors code according to latest version of ROS sugar
114 | * (fix) Fixes passing the config in components to parent base component
115 | * (fix) Fixes ROS sugar import for BaseTopic
116 | * (refactor) Removes auto_ros as a dependency
117 | * (feature) Adds init_on_activation flag to all implemented clientsc
118 | * (feature) Seperates abstract methods from callable methods in db client base
119 | * (feature) Seperates callable methods, from abstract methods in client base class
120 | * Contributors: ahr
121 | 
122 | 0.1.1 (2024-09-05)
123 | ------------------
124 | * (feature) Adds component action for adding points to map collection (`#12 <https://github.com/automatika-robotics/ros-agents/issues/12>`_)
125 |   * Makes version compliant with ROS convention
126 | * (chore) Adds license declaration in setup.py
127 | * Bumps version number and adds license information
128 | * Initial release 0.1.1a
129 | * Contributors: ahr, mkabtoul
130 | 


--------------------------------------------------------------------------------
/agents/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.8)
 2 | project(automatika_embodied_agents)
 3 | 
 4 | if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 5 |   add_compile_options(-Wall -Wextra -Wpedantic)
 6 | endif()
 7 | 
 8 | # find dependencies
 9 | find_package(ament_cmake REQUIRED)
10 | find_package(ament_cmake_python REQUIRED)
11 | 
12 | find_package(rclcpp REQUIRED)
13 | find_package(rclpy REQUIRED)
14 | find_package(rosidl_default_generators REQUIRED)
15 | find_package(builtin_interfaces REQUIRED)
16 | find_package(std_msgs REQUIRED)
17 | find_package(sensor_msgs REQUIRED)
18 | 
19 | file(GLOB_RECURSE MSG_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "msg/*.msg" )
20 | 
21 | rosidl_generate_interfaces(${PROJECT_NAME}
22 |   ${MSG_FILES}
23 |   DEPENDENCIES builtin_interfaces std_msgs sensor_msgs
24 | )
25 | 
26 | ament_export_dependencies(rosidl_default_runtime)
27 | 
28 | # Install Python module
29 | ament_python_install_package(agents)
30 | # Add executables
31 | install(PROGRAMS
32 | scripts/executable
33 | scripts/chainlit_client/tiny_web_client
34 | scripts/chainlit_client/app.py # chainlit app definition
35 | scripts/chainlit_client/chainlit.md # readme picked by chainlit client
36 | DESTINATION lib/${PROJECT_NAME}
37 | )
38 | 
39 | ament_package()
40 | 


--------------------------------------------------------------------------------
/agents/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automatika-robotics/ros-agents/1fbf24d683bc5bcb9398c8ab3146ffd8197020fc/agents/agents/__init__.py


--------------------------------------------------------------------------------
/agents/agents/callbacks.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | import os
  3 | import cv2
  4 | import numpy as np
  5 | from ros_sugar.io import (
  6 |     GenericCallback,
  7 |     TextCallback,
  8 |     get_logger,
  9 | )
 10 | 
 11 | from ros_sugar.io.utils import image_pre_processing, read_compressed_image
 12 | 
 13 | from .utils import create_detection_context
 14 | 
 15 | __all__ = ["GenericCallback", "TextCallback"]
 16 | 
 17 | 
 18 | class VideoCallback(GenericCallback):
 19 |     """
 20 |     Video Callback class. Its get method saves a video as list of bytes
 21 |     """
 22 | 
 23 |     def __init__(self, input_topic, node_name: Optional[str] = None) -> None:
 24 |         """
 25 |         Constructs a new instance.
 26 |         :param      input_topic:  Subscription topic
 27 |         :type       input_topic:  Input
 28 |         """
 29 |         super().__init__(input_topic, node_name)
 30 |         # fixed video needs to be a path to cv2 readable video
 31 |         if hasattr(input_topic, "fixed"):
 32 |             if os.path.isfile(input_topic.fixed):
 33 |                 try:
 34 |                     # read all video frames
 35 |                     video = []
 36 |                     cap = cv2.VideoCapture(input_topic.fixed)
 37 |                     if not cap.isOpened():
 38 |                         raise TypeError()
 39 |                     while cap.isOpened():
 40 |                         ret, frame = cap.read()
 41 |                         if ret:
 42 |                             video.append(frame)
 43 |                         else:
 44 |                             break
 45 |                     # Convert frame list to ndarray
 46 |                     self.msg = np.array(video)
 47 |                 except Exception:
 48 |                     get_logger(self.node_name).error(
 49 |                         f"Fixed path {self.msg} provided for Vidoe topic is not readable Video file"
 50 |                     )
 51 |             else:
 52 |                 get_logger(self.node_name).error(
 53 |                     f"Fixed path {self.msg} provided for Video topic is not a valid file path"
 54 |                 )
 55 | 
 56 |     def _get_output(self, **_) -> Optional[np.ndarray]:
 57 |         """
 58 |         Gets video as a numpy array.
 59 |         :returns:   Video as nd_array
 60 |         :rtype:     np.ndarray
 61 |         """
 62 |         if not self.msg:
 63 |             return None
 64 | 
 65 |         # return np.ndarray if fixed video has been read
 66 |         if isinstance(self.msg, np.ndarray):
 67 |             return self.msg
 68 |         else:
 69 |             # pre-process in case of weird encodings and reshape ROS topic
 70 |             video = []
 71 |             for img in self.msg.frames:
 72 |                 video.append(image_pre_processing(img))
 73 |             for img in self.msg.compressed_frames:
 74 |                 video.append(read_compressed_image(img))
 75 |             return np.array(video)
 76 | 
 77 | 
 78 | class ObjectDetectionCallback(GenericCallback):
 79 |     """
 80 |     Object detection Callback class.
 81 |     Its get method returns the bounding box data
 82 |     """
 83 | 
 84 |     def __init__(self, input_topic, node_name: Optional[str] = None) -> None:
 85 |         """
 86 |         Constructs a new instance.
 87 | 
 88 |         :param      input_topic:  Subscription topic
 89 |         :type       input_topic:  str
 90 |         """
 91 |         super().__init__(input_topic, node_name)
 92 |         self.msg = input_topic.fixed if hasattr(input_topic, "fixed") else None
 93 | 
 94 |     def _get_output(self, **_) -> Optional[str]:
 95 |         """
 96 |         Processes labels and returns a context string for
 97 |         prompt engineering
 98 | 
 99 |         :returns:   Comma separated classnames
100 |         :rtype:     str
101 |         """
102 |         if not self.msg:
103 |             return None
104 |         # send fixed list of labels if it exists
105 |         if isinstance(self.msg, list):
106 |             return create_detection_context(self.msg)
107 |         # send labels from ROS message
108 |         else:
109 |             label_list = [
110 |                 label for detection in self.msg.detections for label in detection.labels
111 |             ]
112 |             detections_string = create_detection_context(label_list)
113 |             return detections_string
114 | 


--------------------------------------------------------------------------------
/agents/agents/clients/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Clients are standard interfaces for components to interact with ML models or vector DBs served by various platforms. Currently ROS Agents provides the following clients, which cover the most popular open source model deployment platforms. Simple clients can be easily implemented for other platforms and the use of heavy duct-tape "AI" frameworks on the robot is discouraged 😅.
 3 | 
 4 | ```{note}
 5 | Some clients might need additional dependacies, which are provided in the following table. If missing the user will also be prompted for them at runtime.
 6 | ```
 7 | 
 8 | ```{list-table}
 9 | :widths: 20 20 60
10 | :header-rows: 1
11 | * - Platform
12 |   - Client
13 |   - Description
14 | 
15 | * - **RoboML**
16 |   - [HTTPModelClient](agents.clients.roboml.HTTPModelClient)
17 |   - An HTTP client for interaction with ML models served on RoboML.
18 | 
19 | * - **RoboML**
20 |   - [HTTPDBClient](agents.clients.roboml.HTTPDBClient)
21 |   - An HTTP client for interaction with vector DBs served on RoboML.
22 | 
23 | * - **RoboML**
24 |   - [RESPModelClient](agents.clients.roboml.RESPModelClient)
25 |   - A Redis Serialization Protocol (RESP) based client for interaction with ML models served on RoboML. **Note:** In order to use this client, please install dependancies with `pip install redis[hiredis] msgpack msgpack-numpy`
26 | 
27 | * - **RoboML**
28 |   - [RESPDBClient](agents.clients.roboml.RESPDBClient)
29 |   - A Redis Serialization Protocol (RESP) based client for interaction with vector DBs served on RoboML. **Note:** In order to use this client, please install dependancies with `pip install redis[hiredis] msgpack msgpack-numpy`
30 | 
31 | * - **Ollama**
32 |   - [OllamaClient](agents.clients.ollama.OllamaClient)
33 |   - An HTTP client for interaction with ML models served on Ollama. **Note:** In order to use this client, please install dependancies with `pip install ollama`
34 | 
35 | """
36 | 
37 | from .ollama import OllamaClient
38 | from .roboml import HTTPDBClient, HTTPModelClient, RESPDBClient, RESPModelClient
39 | 
40 | 
41 | __all__ = [
42 |     "OllamaClient",
43 |     "HTTPDBClient",
44 |     "HTTPModelClient",
45 |     "RESPDBClient",
46 |     "RESPModelClient",
47 | ]
48 | 


--------------------------------------------------------------------------------
/agents/agents/clients/db_base.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import Any, Optional, Dict, Union
  3 | 
  4 | from rclpy import logging
  5 | 
  6 | from ..vectordbs import DB
  7 | from ..utils import validate_func_args
  8 | 
  9 | 
 10 | class DBClient(ABC):
 11 |     """DBClient."""
 12 | 
 13 |     @validate_func_args
 14 |     def __init__(
 15 |         self,
 16 |         db: Union[DB, Dict],
 17 |         host: Optional[str] = None,
 18 |         port: Optional[int] = None,
 19 |         response_timeout: int = 30,
 20 |         init_on_activation: bool = True,
 21 |         logging_level: str = "info",
 22 |         **_,
 23 |     ):
 24 |         """__init__.
 25 |         :param db:
 26 |         :type db: DB
 27 |         :param host:
 28 |         :type host: Optional[str]
 29 |         :param port:
 30 |         :type port: Optional[int]
 31 |         :param init_on_activation:
 32 |         :type init_on_activation: bool
 33 |         :param logging_level:
 34 |         :type logging_level: str
 35 |         """
 36 |         if isinstance(db, DB):
 37 |             self.db_type = db.__class__.__name__
 38 |             self.db_name = db.name
 39 |             self.init_timeout = db.init_timeout
 40 |             self.db_init_params = db._get_init_params()
 41 | 
 42 |         else:
 43 |             self.db_type = db["db_type"]
 44 |             self.db_name = db["db_name"]
 45 |             self.init_timeout = db["init_timeout"]
 46 |             self.db_init_params = db["db_init_params"]
 47 | 
 48 |         self.host = host
 49 |         self.port = port
 50 |         self.init_on_activation = init_on_activation
 51 |         self.logger = logging.get_logger(self.db_name)
 52 |         logging.set_logger_level(
 53 |             self.db_name, logging.get_logging_severity_from_string(logging_level)
 54 |         )
 55 |         self.response_timeout = response_timeout
 56 | 
 57 |     def serialize(self) -> Dict:
 58 |         """Get client json
 59 |         :rtype: Dict
 60 |         """
 61 |         db = {
 62 |             "db_name": self.db_name,
 63 |             "db_type": self.db_type,
 64 |             "init_timeout": self.init_timeout,
 65 |             "db_init_params": self.db_init_params,
 66 |         }
 67 | 
 68 |         return {
 69 |             "client_type": self.__class__.__name__,
 70 |             "db": db,
 71 |             "host": self.host,
 72 |             "port": self.port,
 73 |             "init_on_activation": self.init_on_activation,
 74 |             "logging_level": self.logger.get_effective_level().name,
 75 |             "response_timeout": self.response_timeout,
 76 |         }
 77 | 
 78 |     def check_connection(self) -> None:
 79 |         """initialize.
 80 |         :rtype: None
 81 |         """
 82 |         self._check_connection()
 83 | 
 84 |     def initialize(self) -> None:
 85 |         """initialize.
 86 |         :rtype: None
 87 |         """
 88 |         if self.init_on_activation:
 89 |             self._initialize()
 90 | 
 91 |     def add(self, db_input: Dict[str, Any]) -> Optional[Dict]:
 92 |         """add data.
 93 |         :param db_input:
 94 |         :type db_input: dict[str, Any]
 95 |         :rtype: dict | None
 96 |         """
 97 |         return self._add(db_input)
 98 | 
 99 |     def conditional_add(self, db_input: Dict[str, Any]) -> Optional[Dict]:
100 |         """add data if given ids dont exist. Update metadatas of the ids that exist
101 |         :param db_input:
102 |         :type db_input: dict[str, Any]
103 |         :rtype: dict | None
104 |         """
105 |         return self._conditional_add(db_input)
106 | 
107 |     def metadata_query(self, db_input: Dict[str, Any]) -> Optional[Dict]:
108 |         """Query based on given metadata.
109 |         :param db_input:
110 |         :type db_input: dict[str, Any]
111 |         :rtype: dict | None
112 |         """
113 |         return self._metadata_query(db_input)
114 | 
115 |     def query(self, db_input: Dict[str, Any]) -> Optional[Dict]:
116 |         """Query based on query string.
117 |         :param db_input:
118 |         :type db_input: dict[str, Any]
119 |         :rtype: dict | None
120 |         """
121 |         return self._query(db_input)
122 | 
123 |     def deinitialize(self) -> None:
124 |         """deinitialize."""
125 |         # TODO: Add check for db initialization by keeping db
126 |         # state in client
127 |         if self.init_on_activation:
128 |             self._deinitialize()
129 | 
130 |     @abstractmethod
131 |     def _check_connection(self) -> None:
132 |         """check_connection.
133 |         :rtype: None
134 |         """
135 |         raise NotImplementedError(
136 |             "This method needs to be implemented in a child class"
137 |         )
138 | 
139 |     @abstractmethod
140 |     def _initialize(self) -> None:
141 |         """initialize.
142 |         :rtype: None
143 |         """
144 |         raise NotImplementedError(
145 |             "This method needs to be implemented in a child class"
146 |         )
147 | 
148 |     @abstractmethod
149 |     def _add(self, db_input: Dict[str, Any]) -> Optional[Dict]:
150 |         """add data.
151 |         :param db_input:
152 |         :type db_input: dict[str, Any]
153 |         :rtype: dict | None
154 |         """
155 |         raise NotImplementedError(
156 |             "This method needs to be implemented in a child class"
157 |         )
158 | 
159 |     @abstractmethod
160 |     def _conditional_add(self, db_input: Dict[str, Any]) -> Optional[Dict]:
161 |         """add data if given ids dont exist. Update metadatas of the ids that exist
162 |         :param db_input:
163 |         :type db_input: dict[str, Any]
164 |         :rtype: dict | None
165 |         """
166 |         raise NotImplementedError(
167 |             "This method needs to be implemented in a child class"
168 |         )
169 | 
170 |     @abstractmethod
171 |     def _metadata_query(self, db_input: Dict[str, Any]) -> Optional[Dict]:
172 |         """Query based on given metadata.
173 |         :param db_input:
174 |         :type db_input: dict[str, Any]
175 |         :rtype: dict | None
176 |         """
177 |         raise NotImplementedError(
178 |             "This method needs to be implemented in a child class"
179 |         )
180 | 
181 |     @abstractmethod
182 |     def _query(self, db_input: Dict[str, Any]) -> Optional[Dict]:
183 |         """Query based on query string.
184 |         :param db_input:
185 |         :type db_input: dict[str, Any]
186 |         :rtype: dict | None
187 |         """
188 |         raise NotImplementedError(
189 |             "This method needs to be implemented in a child class"
190 |         )
191 | 
192 |     @abstractmethod
193 |     def _deinitialize(self) -> None:
194 |         """deinitialize."""
195 |         raise NotImplementedError(
196 |             "This method needs to be implemented in a child class"
197 |         )
198 | 


--------------------------------------------------------------------------------
/agents/agents/clients/model_base.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import Any, Optional, Dict, Union
  3 | 
  4 | from rclpy import logging
  5 | 
  6 | from ..models import Model
  7 | from ..utils import validate_func_args
  8 | 
  9 | 
 10 | class ModelClient(ABC):
 11 |     """MLClient."""
 12 | 
 13 |     @validate_func_args
 14 |     def __init__(
 15 |         self,
 16 |         model: Union[Model, Dict],
 17 |         host: Optional[str] = None,
 18 |         port: Optional[int] = None,
 19 |         inference_timeout: int = 30,
 20 |         init_on_activation: bool = True,
 21 |         logging_level: str = "info",
 22 |         **_,
 23 |     ):
 24 |         """__init__.
 25 |         :param model:
 26 |         :type model: Model
 27 |         :param host:
 28 |         :type host: Optional[str]
 29 |         :param port:
 30 |         :type port: Optional[int]
 31 |         :param inference_timeout:
 32 |         :type inference_timeout: int
 33 |         :param logging_level:
 34 |         :type logging_level: str
 35 |         """
 36 |         if isinstance(model, Model):
 37 |             self._model = model
 38 |             self.model_type = model.__class__.__name__
 39 |             self.model_name = model.name
 40 |             self.init_timeout = model.init_timeout
 41 |             self.model_init_params = model._get_init_params()
 42 | 
 43 |         else:
 44 |             self.model_type = model["model_type"]
 45 |             self.model_name = model["model_name"]
 46 |             self.init_timeout = model["init_timeout"]
 47 |             self.model_init_params = model["model_init_params"]
 48 | 
 49 |         self.host = host
 50 |         self.port = port
 51 |         self.init_on_activation = init_on_activation
 52 |         self.logger = logging.get_logger(self.model_name)
 53 |         logging.set_logger_level(
 54 |             self.model_name, logging.get_logging_severity_from_string(logging_level)
 55 |         )
 56 |         self.inference_timeout = inference_timeout
 57 | 
 58 |     def serialize(self) -> Dict:
 59 |         """Get client json
 60 |         :rtype: Dict
 61 |         """
 62 |         model = {
 63 |             "model_name": self.model_name,
 64 |             "model_type": self.model_type,
 65 |             "init_timeout": self.init_timeout,
 66 |             "model_init_params": self.model_init_params,
 67 |         }
 68 | 
 69 |         return {
 70 |             "client_type": self.__class__.__name__,
 71 |             "model": model,
 72 |             "host": self.host,
 73 |             "port": self.port,
 74 |             "init_on_activation": self.init_on_activation,
 75 |             "logging_level": self.logger.get_effective_level().name,
 76 |             "inference_timeout": self.inference_timeout,
 77 |         }
 78 | 
 79 |     def check_connection(self) -> None:
 80 |         """initialize.
 81 |         :rtype: None
 82 |         """
 83 |         self._check_connection()
 84 | 
 85 |     def initialize(self) -> None:
 86 |         """initialize.
 87 |         :rtype: None
 88 |         """
 89 |         if self.init_on_activation:
 90 |             self._initialize()
 91 | 
 92 |     def inference(self, inference_input: Dict[str, Any]) -> Optional[Dict]:
 93 |         """inference.
 94 |         :param inference_input:
 95 |         :type inference_input: dict[str, Any]
 96 |         :rtype: dict | None
 97 |         """
 98 |         return self._inference(inference_input)
 99 | 
100 |     def deinitialize(self):
101 |         """deinitialize."""
102 |         # TODO: Add check for model initialization by keeping model
103 |         # state in client
104 |         if self.init_on_activation:
105 |             self._deinitialize()
106 | 
107 |     @abstractmethod
108 |     def _check_connection(self) -> None:
109 |         """check_connection.
110 |         :rtype: None
111 |         """
112 |         raise NotImplementedError(
113 |             "This method needs to be implemented in a child class"
114 |         )
115 | 
116 |     @abstractmethod
117 |     def _initialize(self) -> None:
118 |         """initialize.
119 |         :rtype: None
120 |         """
121 |         raise NotImplementedError(
122 |             "This method needs to be implemented in a child class"
123 |         )
124 | 
125 |     @abstractmethod
126 |     def _inference(self, inference_input: Dict[str, Any]) -> Optional[Dict]:
127 |         """inference.
128 |         :param inference_input:
129 |         :type inference_input: dict[str, Any]
130 |         :rtype: dict | None
131 |         """
132 |         raise NotImplementedError(
133 |             "This method needs to be implemented in a child class"
134 |         )
135 | 
136 |     @abstractmethod
137 |     def _deinitialize(self):
138 |         """deinitialize."""
139 |         raise NotImplementedError(
140 |             "This method needs to be implemented in a child class"
141 |         )
142 | 


--------------------------------------------------------------------------------
/agents/agents/clients/ollama.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Optional, Dict, Union
  2 | 
  3 | import httpx
  4 | 
  5 | from ..models import LLM
  6 | from ..utils import encode_arr_base64
  7 | from .model_base import ModelClient
  8 | 
  9 | __all__ = ["OllamaClient"]
 10 | 
 11 | 
 12 | class OllamaClient(ModelClient):
 13 |     """An HTTP client for interaction with ML models served on ollama"""
 14 | 
 15 |     def __init__(
 16 |         self,
 17 |         model: Union[LLM, Dict],
 18 |         host: str = "127.0.0.1",
 19 |         port: int = 11434,
 20 |         inference_timeout: int = 30,
 21 |         init_on_activation: bool = True,
 22 |         logging_level: str = "info",
 23 |         **kwargs,
 24 |     ):
 25 |         if isinstance(model, LLM):
 26 |             model._set_ollama_checkpoint()
 27 |         try:
 28 |             from ollama import Client
 29 | 
 30 |             self.client = Client(host=f"{host}:{port}")
 31 |         except ModuleNotFoundError as e:
 32 |             raise ModuleNotFoundError(
 33 |                 "In order to use the OllamaClient, you need ollama-python package installed. You can install it with 'pip install ollama'"
 34 |             ) from e
 35 |         super().__init__(
 36 |             model=model,
 37 |             host=host,
 38 |             port=port,
 39 |             inference_timeout=inference_timeout,
 40 |             init_on_activation=init_on_activation,
 41 |             logging_level=logging_level,
 42 |             **kwargs,
 43 |         )
 44 |         self._check_connection()
 45 | 
 46 |     def _check_connection(self) -> None:
 47 |         """Check if the platfrom is being served on specified IP and port"""
 48 |         # Ping remote server to check connection
 49 |         self.logger.info("Checking connection with remote_host Ollama")
 50 |         try:
 51 |             httpx.get(f"http://{self.host}:{self.port}").raise_for_status()
 52 |         except Exception as e:
 53 |             self.logger.error(str(e))
 54 |             raise
 55 | 
 56 |     def _initialize(self) -> None:
 57 |         """
 58 |         Initialize the model on platform using the paramters provided in the model specification class
 59 |         """
 60 |         self.logger.info(f"Initializing {self.model_name} on ollama")
 61 |         try:
 62 |             # set timeout on underlying httpx client
 63 |             self.client._client.timeout = self.init_timeout
 64 |             r = self.client.pull(self.model_init_params["checkpoint"])
 65 |             if r.get("status") != "success":  # type: ignore
 66 |                 raise Exception(
 67 |                     f"Could not pull model {self.model_init_params['checkpoint']}"
 68 |                 )
 69 |             # load model in memory with empty request
 70 |             self.client.generate(
 71 |                 model=self.model_init_params["checkpoint"], keep_alive=10
 72 |             )
 73 |             self.logger.info(f"{self.model_name} model initialized")
 74 |         except Exception as e:
 75 |             self.logger.error(str(e))
 76 |             return None
 77 | 
 78 |     def _inference(self, inference_input: Dict[str, Any]) -> Optional[Dict]:
 79 |         """Call inference on the model using data and inference parameters from the component"""
 80 |         if not (query := inference_input.get("query")):
 81 |             raise TypeError(
 82 |                 "OllamaClient can only be used with LLM and MLLM components"
 83 |             )
 84 |         # create input
 85 |         input = {
 86 |             "model": self.model_init_params["checkpoint"],
 87 |             "messages": query,
 88 |         }
 89 |         inference_input.pop("query")
 90 | 
 91 |         # make images part of the latest message in message list
 92 |         if images := inference_input.get("images"):
 93 |             input["messages"][-1]["images"] = [encode_arr_base64(img) for img in images]
 94 |             inference_input.pop("images")
 95 | 
 96 |         # Add tools as part of input, if available
 97 |         if tools := inference_input.get("tools"):
 98 |             input["tools"] = tools
 99 |             inference_input.pop("tools")
100 | 
101 |         # ollama uses num_predict for max_new_tokens
102 |         if inference_input.get("max_new_tokens"):
103 |             inference_input["num_predict"] = inference_input["max_new_tokens"]
104 |             inference_input.pop("max_new_tokens")
105 |         input["options"] = inference_input
106 | 
107 |         # call inference method
108 |         try:
109 |             # set timeout on underlying httpx client
110 |             self.client._client.timeout = self.inference_timeout
111 |             ollama_result = self.client.chat(**input)
112 |         except Exception as e:
113 |             self.logger.error(str(e))
114 |             return None
115 | 
116 |         self.logger.debug(str(ollama_result))
117 | 
118 |         # make result part of the input
119 |         if output := ollama_result["message"].get("content"):
120 |             input["output"] = output  # type: ignore
121 |             # if tool calls exist
122 |             if tool_calls := ollama_result["message"].get("tool_calls"):  # type: ignore
123 |                 input["tool_calls"] = tool_calls
124 |             return input
125 |         else:
126 |             # if tool calls exist
127 |             if tool_calls := ollama_result["message"].get("tool_calls"):  # type: ignore
128 |                 input["output"] = ""  # Add empty output for tool calls
129 |                 input["tool_calls"] = tool_calls
130 |                 return input
131 | 
132 |             # no output or tool calls
133 |             self.logger.debug("Output not received")
134 |             return
135 | 
136 |     def _deinitialize(self):
137 |         """Deinitialize the model on the platform"""
138 | 
139 |         self.logger.error(f"Deinitializing {self.model_name} model on ollama")
140 |         try:
141 |             self.client.generate(
142 |                 model=self.model_init_params["checkpoint"], keep_alive=0
143 |             )
144 |         except Exception as e:
145 |             self.logger.error(str(e))
146 |             return None
147 | 


--------------------------------------------------------------------------------
/agents/agents/components/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A Component is the main execution unit in ROS Agents and in essence each component is synctactic sugar over a ROS2 Lifecycle Node. ROS Agents provides the following components. These components can be arbitrarily combined to form an embodied agent graph.
 3 | 
 4 | ```{list-table}
 5 | :widths: 20 80
 6 | :header-rows: 1
 7 | * - Component Name
 8 |   - Description
 9 | 
10 | * - **[LLM](agents.components.llm.md)**
11 |   - This component utilizes large language models (e.g LLama) that can be used to process text data.
12 | 
13 | * - **[MLLM](agents.components.mllm.md)**
14 |   - This component utilizes multi-modal large language models (e.g. Llava) that can be used to process text and image data.
15 | 
16 | * - **[SpeechToText](agents.components.speechtotext.md)**
17 |   - This component takes in audio input and outputs a text representation of the audio using Speech-to-Text models (e.g. Whisper).
18 | 
19 | * - **[TextToSpeech](agents.components.texttospeech.md)**
20 |   - This component takes in text input and outputs an audio representation of the text using TTS models (e.g. SpeechT5). The generated audio can be played using any audio playback device available on the agent.
21 | 
22 | * - **[MapEncoding](agents.components.map_encoding.md)**
23 |   - Map encoding component that encodes text information as a semantic map based on the robots localization. It takes in map layers, position topic, map meta data topic, and a vector database client. Map layers can be arbitrary text based outputs from other components such as MLLMs or Vision.
24 | 
25 | * - **[SemanticRouter](agents.components.semantic_router.md)**
26 |   - A component that routes semantic information from input topics to output topics based on pre-defined routes. The Semantic Router takes in a list of input topics, a list of routes, an optional default route, and a configuration object. It uses the database client to store and retrieve routing information.
27 | 
28 | * - **[Vision](agents.components.vision.md)**
29 |   - This component performs object detection and tracking on input images and outputs a list of detected objects, along with their bounding boxes and confidence scores.
30 | 
31 | * - **[VideoMessageMaker](agents.components.imagestovideo.md)**
32 |   - This component generates ROS video messages from input image messages. A video message is a collection of image messages that have a perceivable motion. I.e. the primary task of this component is to make intentionality decisions about what sequence of consecutive images should be treated as one coherent temporal sequence. The motion estimation method used for selecting images for a video can be configured in component config.
33 | ```
34 | """
35 | 
36 | from .component_base import Component
37 | from .imagestovideo import VideoMessageMaker
38 | from .llm import LLM
39 | from .map_encoding import MapEncoding
40 | from .mllm import MLLM
41 | from .model_component import ModelComponent
42 | from .semantic_router import SemanticRouter
43 | from .speechtotext import SpeechToText
44 | from .texttospeech import TextToSpeech
45 | from .vision import Vision
46 | 
47 | __all__ = [
48 |     "Component",
49 |     "ModelComponent",
50 |     "MapEncoding",
51 |     "MLLM",
52 |     "LLM",
53 |     "SpeechToText",
54 |     "TextToSpeech",
55 |     "Vision",
56 |     "VideoMessageMaker",
57 |     "SemanticRouter",
58 | ]
59 | 


--------------------------------------------------------------------------------
/agents/agents/components/component_base.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from abc import abstractmethod
  3 | from copy import deepcopy
  4 | from typing import Optional, Sequence, Union, List, Dict, Type
  5 | 
  6 | from ..ros import BaseComponent, ComponentRunType, FixedInput, SupportedType, Topic
  7 | from ..config import BaseComponentConfig
  8 | 
  9 | 
 10 | class Component(BaseComponent):
 11 |     """Component."""
 12 | 
 13 |     def __init__(
 14 |         self,
 15 |         inputs: Optional[Sequence[Union[Topic, FixedInput]]] = None,
 16 |         outputs: Optional[Sequence[Topic]] = None,
 17 |         config: Optional[BaseComponentConfig] = None,
 18 |         trigger: Union[Topic, List[Topic], float] = 1.0,
 19 |         callback_group=None,
 20 |         component_name: str = "agents_component",
 21 |         **kwargs,
 22 |     ):
 23 |         self.config: BaseComponentConfig = (
 24 |             deepcopy(config) if config else BaseComponentConfig()
 25 |         )
 26 |         self.allowed_inputs: Dict[str, List[Type[SupportedType]]]
 27 |         self.allowed_outputs: Dict[str, List[Type[SupportedType]]]
 28 | 
 29 |         # setup inputs and outputs
 30 |         if inputs:
 31 |             self.validate_topics(
 32 |                 inputs,
 33 |                 allowed_topic_types=self.allowed_inputs,
 34 |                 topics_direction="Inputs",
 35 |             )
 36 | 
 37 |         if outputs:
 38 |             if hasattr(self, "allowed_outputs"):
 39 |                 self.validate_topics(
 40 |                     outputs,
 41 |                     allowed_topic_types=self.allowed_outputs,
 42 |                     topics_direction="Outputs",
 43 |                 )
 44 | 
 45 |         # Initialize Parent Component
 46 |         super().__init__(
 47 |             component_name=component_name,
 48 |             inputs=inputs,
 49 |             outputs=outputs,
 50 |             config=self.config,
 51 |             callback_group=callback_group,
 52 |             enable_health_broadcast=False,
 53 |             **kwargs,
 54 |         )
 55 | 
 56 |         # setup component run type and triggers
 57 |         self.trigger(trigger)
 58 | 
 59 |     def custom_on_activate(self):
 60 |         """
 61 |         Custom configuration for creating triggers.
 62 |         """
 63 |         # Setup trigger based callback or frequency based timer
 64 |         if self.run_type is ComponentRunType.EVENT:
 65 |             self.activate_all_triggers()
 66 | 
 67 |     def create_all_subscribers(self):
 68 |         """
 69 |         Override to handle trigger topics and fixed inputs.
 70 |         Called by parent BaseComponent
 71 |         """
 72 |         self.get_logger().info("STARTING ALL SUBSCRIBERS")
 73 |         all_callbacks = (
 74 |             list(self.callbacks.values()) + list(self.trig_callbacks.values())
 75 |             if self.run_type is ComponentRunType.EVENT
 76 |             else self.callbacks.values()
 77 |         )
 78 |         for callback in all_callbacks:
 79 |             callback.set_node_name(self.node_name)
 80 |             if hasattr(callback.input_topic, "fixed"):
 81 |                 self.get_logger().debug(
 82 |                     f"Fixed input specified for topic: {callback.input_topic} of type {callback.input_topic.msg_type}"
 83 |                 )
 84 |             else:
 85 |                 callback.set_subscriber(self._add_ros_subscriber(callback))
 86 | 
 87 |     def activate_all_triggers(self) -> None:
 88 |         """
 89 |         Activates component triggers by attaching execution step to callbacks
 90 |         """
 91 |         self.get_logger().info("ACTIVATING TRIGGER TOPICS")
 92 |         if hasattr(self, "trig_callbacks"):
 93 |             for callback in self.trig_callbacks.values():
 94 |                 # Add execution step of the node as a post callback function
 95 |                 callback.on_callback_execute(self._execution_step)
 96 | 
 97 |     def destroy_all_subscribers(self) -> None:
 98 |         """
 99 |         Destroys all node subscribers
100 |         """
101 |         self.get_logger().info("DESTROYING ALL SUBSCRIBERS")
102 |         all_callbacks = (
103 |             list(self.callbacks.values()) + list(self.trig_callbacks.values())
104 |             if self.run_type is ComponentRunType.EVENT
105 |             else self.callbacks.values()
106 |         )
107 |         for callback in all_callbacks:
108 |             if callback._subscriber:
109 |                 self.destroy_subscription(callback._subscriber)
110 | 
111 |     def trigger(self, trigger: Union[Topic, List[Topic], float]) -> None:
112 |         """
113 |         Set component trigger
114 |         """
115 |         if isinstance(trigger, list):
116 |             for t in trigger:
117 |                 if t.name not in self.callbacks:
118 |                     raise TypeError(
119 |                         f"Invalid configuration for component trigger {t.name} - A trigger needs to be one of the inputs already defined in component inputs."
120 |                     )
121 |             self.run_type = ComponentRunType.EVENT
122 |             self.trig_callbacks = {}
123 |             for t in trigger:
124 |                 self.trig_callbacks[t.name] = self.callbacks[t.name]
125 |                 # remove trigger inputs from self.callbacks
126 |                 del self.callbacks[t.name]
127 | 
128 |         elif isinstance(trigger, Topic):
129 |             if trigger.name not in self.callbacks:
130 |                 raise TypeError(
131 |                     f"Invalid configuration for component trigger {trigger.name} - A trigger needs to be one of the inputs already defined in component inputs."
132 |                 )
133 |             self.run_type = ComponentRunType.EVENT
134 |             self.trig_callbacks = {trigger.name: self.callbacks[trigger.name]}
135 |             del self.callbacks[trigger.name]
136 | 
137 |         else:
138 |             self.run_type = ComponentRunType.TIMED
139 |             # Set component loop_rate (Hz)
140 |             self.config.loop_rate = 1 / trigger
141 | 
142 |         self.trig_topic: Union[Topic, list[Topic], float] = trigger
143 | 
144 |     def validate_topics(
145 |         self,
146 |         topics: Sequence[Union[Topic, FixedInput]],
147 |         allowed_topic_types: Optional[Dict[str, List[Type[SupportedType]]]] = None,
148 |         topics_direction: str = "Topics",
149 |     ):
150 |         """
151 |         Verify component specific inputs or outputs using allowed topics if provided
152 |         """
153 |         # type validation
154 |         correct_type = all(isinstance(i, (Topic, FixedInput)) for i in topics)
155 |         if not correct_type:
156 |             raise TypeError(
157 |                 f"{topics_direction} to a component can only be of type Topic"
158 |             )
159 | 
160 |         # Check that only the allowed topics (or their subtypes) have been given
161 |         if not allowed_topic_types:
162 |             return
163 | 
164 |         all_msg_types = {topic.msg_type for topic in topics}
165 |         all_topic_types = allowed_topic_types["Required"] + (
166 |             allowed_topic_types.get("Optional") or []
167 |         )
168 | 
169 |         if msg_type := next(
170 |             (
171 |                 topic
172 |                 for topic in all_msg_types
173 |                 if not any(
174 |                     issubclass(topic, allowed_t) for allowed_t in all_topic_types
175 |                 )
176 |             ),
177 |             None,
178 |         ):
179 |             raise TypeError(
180 |                 f"{topics_direction} to the component of type {self.__class__.__name__} can only be of the allowed datatypes: {[topic.__name__ for topic in all_topic_types]} or their subclasses. A topic of type {msg_type.__name__} cannot be given to this component."
181 |             )
182 | 
183 |         # Check that all required topics (or subtypes) have been given
184 |         sufficient_topics = all(
185 |             any(issubclass(m_type, allowed_type) for m_type in all_msg_types)
186 |             for allowed_type in allowed_topic_types["Required"]
187 |         )
188 | 
189 |         if not sufficient_topics:
190 |             raise TypeError(
191 |                 f"{self.__class__.__name__} component {topics_direction} should have at least one topic of each datatype in the following list: {[topic.__name__ for topic in allowed_topic_types['Required']]}"
192 |             )
193 | 
194 |     @abstractmethod
195 |     def _execution_step(self, **kwargs):
196 |         """_execution_step.
197 | 
198 |         :param args:
199 |         :param kwargs:
200 |         """
201 |         raise NotImplementedError(
202 |             "This method needs to be implemented by child components."
203 |         )
204 | 
205 |     def _update_cmd_args_list(self):
206 |         """
207 |         Update launch command arguments
208 |         """
209 |         super()._update_cmd_args_list()
210 | 
211 |         self.launch_cmd_args = [
212 |             "--trigger",
213 |             self._get_trigger_json(),
214 |         ]
215 | 
216 |     def _get_trigger_json(self) -> Union[str, bytes, bytearray]:
217 |         """
218 |         Serialize component routes to json
219 | 
220 |         :return: Serialized inputs
221 |         :rtype:  str | bytes | bytearray
222 |         """
223 |         if isinstance(self.trig_topic, Topic):
224 |             return self.trig_topic.to_json()
225 |         elif isinstance(self.trig_topic, List):
226 |             return json.dumps([t.to_json() for t in self.trig_topic])
227 |         else:
228 |             return json.dumps(self.trig_topic)
229 | 


--------------------------------------------------------------------------------
/agents/agents/components/imagestovideo.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from typing import Optional, Union, List
  3 | 
  4 | import cv2
  5 | import numpy as np
  6 | 
  7 | from ..config import VideoMessageMakerConfig
  8 | from ..ros import Image, Topic, Video, ROSImage, ROSCompressedImage
  9 | from ..utils import validate_func_args
 10 | from .component_base import Component
 11 | 
 12 | 
 13 | class VideoMessageMaker(Component):
 14 |     """
 15 |     This component generates ROS video messages from input image messages. A video message is a collection of image messages that have a perceivable motion.
 16 |     I.e. the primary task of this component is to make intentionality decisions about what sequence of consecutive images should be treated as one coherent temporal sequence.
 17 |     The motion estimation method used for selecting images for a video can be configured in component config.
 18 | 
 19 |     :param inputs: The input topics for the object detection.
 20 |         This should be a list of Topic objects or FixedInput objects, limited to Image type.
 21 |     :type inputs: list[Topic]
 22 |     :param outputs: The output topics for the object detection.
 23 |         This should be a list of Topic objects, Video type.
 24 |     :type outputs: list[Topic]
 25 |     :param config: The configuration for the video message generation.
 26 |         This should be an instance of VideoMessageMakerConfig.
 27 |     :type config: VideoMessageMakerConfig
 28 |     :param trigger: The trigger value or topic for the object detection.
 29 |         This can be a single Topic object or a list of Topic objects.
 30 |     :type trigger: Union[Topic, list[Topic]]
 31 |     :param callback_group: An optional callback group for the video message generation.
 32 |         If provided, this should be a string. Otherwise, it defaults to None.
 33 |     :type callback_group: str
 34 |     :param component_name: The name of the video message generation component.
 35 |         This should be a string and defaults to "video_maker_component".
 36 |     :type component_name: str
 37 | 
 38 |     Example usage:
 39 |     ```python
 40 |     image_topic = Topic(name="image", msg_type="Image")
 41 |     video_topic = Topic(name="video", msg_type="Video")
 42 |     config = VideoMessageMakerConfig()
 43 |     video_message_maker = VideoMessageMaker(
 44 |         inputs=[image_topic],
 45 |         outputs=[video_topic],
 46 |         config=config,
 47 |         component_name="video_message_maker",
 48 |     )
 49 |     ```
 50 |     """
 51 | 
 52 |     @validate_func_args
 53 |     def __init__(
 54 |         self,
 55 |         *,
 56 |         inputs: List[Topic],
 57 |         outputs: List[Topic],
 58 |         config: Optional[VideoMessageMakerConfig] = None,
 59 |         trigger: Union[Topic, List[Topic]],
 60 |         component_name: str,
 61 |         callback_group=None,
 62 |         **kwargs,
 63 |     ):
 64 |         if isinstance(trigger, float):
 65 |             raise TypeError(
 66 |                 "VideoMessageMaker component needs to be given a valid trigger topic. It cannot be started as a timed component."
 67 |             )
 68 | 
 69 |         self.config: VideoMessageMakerConfig = config or VideoMessageMakerConfig()
 70 |         self.allowed_inputs = {"Required": [Image]}
 71 |         self.allowed_outputs = {"Required": [Video]}
 72 | 
 73 |         super().__init__(
 74 |             inputs,
 75 |             outputs,
 76 |             self.config,
 77 |             trigger,
 78 |             callback_group,
 79 |             component_name,
 80 |             **kwargs,
 81 |         )
 82 | 
 83 |         self._frames: Union[List[ROSImage], List[ROSCompressedImage]] = []
 84 |         self._last_frame: Optional[np.ndarray] = None
 85 |         self._capture: bool = False
 86 | 
 87 |     def _motion_estimation(self, current_frame: np.ndarray) -> bool:
 88 |         """Motion estimation methods between two frames.
 89 |         :param current_frame:
 90 |         :type current_frame: np.ndarray
 91 |         :rtype: bool
 92 |         """
 93 |         # get gray scale image
 94 |         gray = cv2.cvtColor(current_frame, cv2.COLOR_RGB2GRAY)
 95 |         if self.config.motion_estimation_func == "frame_difference":
 96 |             return self._frame_difference(gray, self.config.threshold)
 97 |         elif self.config.motion_estimation_func == "optical_flow":
 98 |             return self._optical_flow(
 99 |                 gray, self.config.threshold, **self.config.flow_kwargs
100 |             )
101 |         else:
102 |             return True
103 | 
104 |     def _frame_difference(self, img: np.ndarray, threshold: float) -> bool:
105 |         """Calculates difference between two frames and returns true
106 |         if difference is greater than defined threshold.
107 |         :param img:
108 |         :type img: np.ndarray
109 |         :param threshold:
110 |         :type threshold: int
111 |         :rtype: bool
112 |         """
113 |         # calculate frame difference
114 |         diff = cv2.subtract(img, self._last_frame)
115 |         # apply blur to improve thresholding
116 |         diff = cv2.medianBlur(diff, 3)
117 |         # apply adaptive thresholding
118 |         mask = cv2.adaptiveThreshold(
119 |             diff, 1, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2
120 |         )
121 |         return True if mask.sum() > (threshold * math.prod(img.shape) / 100) else False
122 | 
123 |     def _optical_flow(self, img: np.ndarray, threshold: float, **kwargs) -> bool:
124 |         """Calculates optical flow between two frames and returns true
125 |         if flow is greater than defined threshold.
126 |         :param img:
127 |         :type img: np.ndarray
128 |         :param threshold:
129 |         :type threshold: int
130 |         :rtype: bool
131 |         """
132 |         # calculate optical flow
133 |         flow = cv2.calcOpticalFlowFarneback(self._last_frame, img, None, **kwargs)
134 |         mask = np.uint8(flow > 1) / 10
135 |         return True if mask.sum() > (threshold * math.prod(img.shape) / 100) else False
136 | 
137 |     def _execution_step(self, *_, **kwargs) -> None:
138 |         """Collects incoming image messages until a criteria is met
139 |         When met, publishes image messages as video
140 |         :param args:
141 |         :param kwargs:
142 |         """
143 |         msg = kwargs.get("msg")
144 |         topic = kwargs.get("topic")
145 |         if msg and topic:
146 |             output = self.trig_callbacks[topic.name].get_output()
147 |             if self._last_frame is not None:
148 |                 # calculate motion estimation for start and stop
149 |                 self._capture = (
150 |                     True
151 |                     if self._motion_estimation(output)
152 |                     and len(self._frames) < self.config.max_video_frames
153 |                     else False
154 |                 )
155 |                 if self._capture:
156 |                     self._frames.append(msg)
157 |             self._last_frame = cv2.cvtColor(output, cv2.COLOR_RGB2GRAY)
158 | 
159 |         # publish if video capture finished
160 |         if (
161 |             self.publishers_dict
162 |             and (not self._capture)
163 |             and len(self._frames) >= self.config.min_video_frames
164 |         ):
165 |             self.get_logger().debug(f"Sending out video of {len(self._frames)} frames")
166 |             for publisher in self.publishers_dict.values():
167 |                 publisher.publish(output=self._frames)
168 |             self._frames = []
169 | 


--------------------------------------------------------------------------------
/agents/agents/components/mllm.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Union, Optional, List, Dict
  2 | 
  3 | from ..clients.db_base import DBClient
  4 | from ..clients.model_base import ModelClient
  5 | from ..config import MLLMConfig
  6 | from ..ros import FixedInput, Image, String, Topic, Detections
  7 | from ..utils import validate_func_args
  8 | from .llm import LLM
  9 | 
 10 | 
 11 | class MLLM(LLM):
 12 |     """
 13 |     This component utilizes multi-modal large language models (e.g. Llava) that can be used to process text and image data.
 14 | 
 15 |     :param inputs: The input topics or fixed inputs for the MLLM component.
 16 |         This should be a list of Topic objects or FixedInput instances, limited to String and Image types.
 17 |     :type inputs: list[Topic | FixedInput]
 18 |     :param outputs: The output topics for the MLLM component.
 19 |         This should be a list of Topic objects. String type is handled automatically.
 20 |     :type outputs: list[Topic]
 21 |     :param model_client: The model client for the MLLM component.
 22 |         This should be an instance of ModelClient.
 23 |     :type model_client: ModelClient
 24 |     :param config: Optional configuration for the MLLM component.
 25 |         This should be an instance of MLLMConfig. If not provided, defaults to MLLMConfig().
 26 |     :type config: MLLMConfig
 27 |     :param trigger: The trigger value or topic for the MLLM component.
 28 |         This can be a single Topic object, a list of Topic objects, or a float value for a timed component. Defaults to 1.
 29 |     :type trigger: Union[Topic, list[Topic], float]
 30 |     :param callback_group: An optional callback group for the MLLM component.
 31 |         If provided, this should be a string. Otherwise, it defaults to None.
 32 |     :type callback_group: str
 33 |     :param component_name: The name of the MLLM component.
 34 |         This should be a string and defaults to "mllm_component".
 35 |     :type component_name: str
 36 | 
 37 |     Example usage:
 38 |     ```python
 39 |     text0 = Topic(name="text0", msg_type="String")
 40 |     image0 = Topic(name="image0", msg_type="Image")
 41 |     text0 = Topic(name="text1", msg_type="String")
 42 |     config = MLLMConfig()
 43 |     model = TransformersMLLM(name='idefics')
 44 |     model_client = ModelClient(model=model)
 45 |     mllm_component = MLLM(inputs=[text0, image0],
 46 |                           outputs=[text1],
 47 |                           model_client=model_client,
 48 |                           config=config,
 49 |                           component_name='mllm_component')
 50 |     ```
 51 |     """
 52 | 
 53 |     @validate_func_args
 54 |     def __init__(
 55 |         self,
 56 |         *,
 57 |         inputs: List[Union[Topic, FixedInput]],
 58 |         outputs: List[Topic],
 59 |         model_client: ModelClient,
 60 |         config: Optional[MLLMConfig] = None,
 61 |         db_client: Optional[DBClient] = None,
 62 |         trigger: Union[Topic, List[Topic], float] = 1.0,
 63 |         component_name: str,
 64 |         callback_group=None,
 65 |         **kwargs,
 66 |     ):
 67 |         self.allowed_inputs = {"Required": [String, Image], "Optional": [Detections]}
 68 | 
 69 |         config = config or MLLMConfig()
 70 | 
 71 |         super().__init__(
 72 |             inputs=inputs,
 73 |             outputs=outputs,
 74 |             model_client=model_client,
 75 |             config=config,
 76 |             db_client=db_client,
 77 |             trigger=trigger,
 78 |             callback_group=callback_group,
 79 |             component_name=component_name,
 80 |             allowed_inputs=self.allowed_inputs,
 81 |             **kwargs,
 82 |         )
 83 | 
 84 |     def _create_input(self, *_, **kwargs) -> Optional[Dict[str, Any]]:
 85 |         """Create inference input for MLLM models
 86 |         :param args:
 87 |         :param kwargs:
 88 |         :rtype: dict[str, Any]
 89 |         """
 90 |         images = []
 91 |         # context dict to gather all String inputs for use in system prompt
 92 |         context = {}
 93 |         # set mllm query as trigger
 94 |         if trigger := kwargs.get("topic"):
 95 |             query = self.trig_callbacks[trigger.name].get_output()
 96 |             context[trigger.name] = query
 97 | 
 98 |             # handle chat reset
 99 |             if (
100 |                 self.config.chat_history
101 |                 and query.strip().lower() == self.config.history_reset_phrase
102 |             ):
103 |                 self.messages = []
104 |                 return None
105 | 
106 |         else:
107 |             query = None
108 | 
109 |         # aggregate all inputs that are available
110 |         for i in self.callbacks.values():
111 |             if (item := i.get_output()) is not None:
112 |                 # set trigger equal to a topic with type String if trigger not found
113 |                 if i.input_topic.msg_type is String:
114 |                     if not query:
115 |                         query = item
116 |                     context[i.input_topic.name] = item
117 |                 elif i.input_topic.msg_type is Detections:
118 |                     context[i.input_topic.name] = item
119 |                 # get images from image topics
120 |                 if issubclass(i.input_topic.msg_type, Image):
121 |                     images.append(item)
122 | 
123 |         if not query or not images:
124 |             return None
125 | 
126 |         # get RAG results if enabled in config and if docs retreived
127 |         rag_result = self._handle_rag_query(query) if self.config.enable_rag else None
128 | 
129 |         # set system prompt template
130 |         query = (
131 |             self.component_prompt.render(context) if self.component_prompt else query
132 |         )
133 | 
134 |         # get RAG results if enabled in config and if docs retreived
135 |         query = f"{rag_result}\n{query}" if rag_result else query
136 | 
137 |         message = {"role": "user", "content": query}
138 |         self._handle_chat_history(message)
139 | 
140 |         self.get_logger().debug(f"Input from component: {self.messages}")
141 | 
142 |         input = {
143 |             "query": self.messages,
144 |             "images": images,
145 |             **self.config._get_inference_params(),
146 |         }
147 | 
148 |         # Add any tools, if registered
149 |         if self.config._tool_descriptions:
150 |             input["tools"] = self.config._tool_descriptions
151 | 
152 |         return input
153 | 
154 |     def _warmup(self):
155 |         """Warm up and stat check"""
156 |         import time
157 |         from pathlib import Path
158 |         import cv2
159 | 
160 |         image = cv2.imread(str(Path(__file__).parents[1] / Path("resources/test.jpeg")))
161 | 
162 |         message = {"role": "user", "content": "What do you see?"}
163 |         inference_input = {
164 |             "query": [message],
165 |             "images": [image],
166 |             **self.config._get_inference_params(),
167 |         }
168 | 
169 |         # Run inference once to warm up and once to measure time
170 |         self.model_client.inference(inference_input)
171 | 
172 |         inference_input = {
173 |             "query": [message],
174 |             "images": [image],
175 |             **self.config._get_inference_params(),
176 |         }
177 |         start_time = time.time()
178 |         result = self.model_client.inference(inference_input)
179 |         elapsed_time = time.time() - start_time
180 | 
181 |         self.get_logger().warning(f"Model Output: {result['output']}")
182 |         self.get_logger().warning(f"Approximate Inference time: {elapsed_time} seconds")
183 | 


--------------------------------------------------------------------------------
/agents/agents/components/model_component.py:
--------------------------------------------------------------------------------
  1 | from abc import abstractmethod
  2 | import inspect
  3 | import json
  4 | from typing import Any, Optional, Sequence, Union, List, Dict, Type
  5 | 
  6 | from ..clients.model_base import ModelClient
  7 | from ..config import ModelComponentConfig
  8 | from ..ros import FixedInput, Topic, SupportedType
  9 | from .component_base import Component
 10 | 
 11 | 
 12 | class ModelComponent(Component):
 13 |     """ModelComponent."""
 14 | 
 15 |     def __init__(
 16 |         self,
 17 |         inputs: Optional[Sequence[Union[Topic, FixedInput]]] = None,
 18 |         outputs: Optional[Sequence[Topic]] = None,
 19 |         model_client: Optional[ModelClient] = None,
 20 |         config: Optional[ModelComponentConfig] = None,
 21 |         trigger: Union[Topic, List[Topic], float] = 1.0,
 22 |         callback_group=None,
 23 |         component_name: str = "model_component",
 24 |         **kwargs,
 25 |     ):
 26 |         # setup model client
 27 |         self.model_client = model_client if model_client else None
 28 | 
 29 |         self.handled_outputs: List[Type[SupportedType]]
 30 | 
 31 |         if not config:
 32 |             self.config = ModelComponentConfig()
 33 | 
 34 |         # Initialize Component
 35 |         super().__init__(
 36 |             inputs,
 37 |             outputs,
 38 |             config,
 39 |             trigger,
 40 |             callback_group,
 41 |             component_name,
 42 |             **kwargs,
 43 |         )
 44 | 
 45 |     def custom_on_configure(self):
 46 |         """
 47 |         Create model client if provided and initialize model.
 48 |         """
 49 |         self.get_logger().debug(f"Current Status: {self.health_status.value}")
 50 | 
 51 |         # validate output topics if handled_outputs exist
 52 |         self.get_logger().info("Validating Model Component Output Topics")
 53 |         self._validate_output_topics()
 54 | 
 55 |         # Initialize model
 56 |         if self.model_client:
 57 |             self.model_client.check_connection()
 58 |             self.model_client.initialize()
 59 |             if self.config.warmup:
 60 |                 try:
 61 |                     self._warmup()
 62 |                 except Exception as e:
 63 |                     self.get_logger().error(f"Error encountered in warmup: {e}")
 64 | 
 65 |     def custom_on_deactivate(self):
 66 |         """
 67 |         Destroy model client if it exists
 68 |         """
 69 |         # Deinitialize model
 70 |         if self.model_client:
 71 |             self.model_client.check_connection()
 72 |             self.model_client.deinitialize()
 73 | 
 74 |     def _validate_output_topics(self) -> None:
 75 |         """
 76 |         Verify that output topics that are not handled, have pre-processing functions provided. We just check that there is a pre-processing function and do not check whether the functions have output of the corresponding type.
 77 |         """
 78 | 
 79 |         if hasattr(self, "publishers_dict") and hasattr(self, "handled_outputs"):
 80 |             for name, pub in self.publishers_dict.items():
 81 |                 if pub.output_topic.msg_type not in self.handled_outputs and (
 82 |                     not self._external_processors
 83 |                 ):
 84 |                     func_body = inspect.getsource(pub.output_topic.msg_type.convert)
 85 |                     raise TypeError(f"""{type(self).__name__} components can only handle output topics of type(s) {self.handled_outputs} automatically. Topic {name} is of type {pub.output_topic.msg_type}. EITHER provide a pre-processing function for this topic and attach it to the topic by calling the `add_publisher_preprocessor` on the component {self.node_name} OR provide a tool call that can provide structured inference output and attach it by calling `register_tool` on {self.node_name}. Make sure the output can be passed as parameter `output` to the following function:
 86 | {func_body}""")
 87 | 
 88 |     @property
 89 |     def warmup(self) -> bool:
 90 |         """Enable warmup of the model."""
 91 |         return self.config.warmup
 92 | 
 93 |     @warmup.setter
 94 |     def warmup(self, value: bool) -> None:
 95 |         """Enable warmup of the model."""
 96 |         self.config.warmup = value
 97 | 
 98 |     @abstractmethod
 99 |     def _create_input(self, *args, **kwargs) -> Union[Dict[str, Any], None]:
100 |         """_create_input.
101 | 
102 |         :param args:
103 |         :param kwargs:
104 |         :rtype: dict[str, Any] | None
105 |         """
106 |         raise NotImplementedError(
107 |             "_create_input method needs to be implemented by child components."
108 |         )
109 | 
110 |     @abstractmethod
111 |     def _execution_step(self, *args, **kwargs):
112 |         """_execution_step.
113 | 
114 |         :param args:
115 |         :param kwargs:
116 |         """
117 |         raise NotImplementedError(
118 |             "_execution_step method needs to be implemented by child components."
119 |         )
120 | 
121 |     @abstractmethod
122 |     def _warmup(self, *args, **kwargs):
123 |         """_warmup.
124 | 
125 |         :param args:
126 |         :param kwargs:
127 |         """
128 |         raise NotImplementedError(
129 |             "_warmup method needs to be implemented by child components."
130 |         )
131 | 
132 |     def _update_cmd_args_list(self):
133 |         """
134 |         Update launch command arguments
135 |         """
136 |         super()._update_cmd_args_list()
137 | 
138 |         self.launch_cmd_args = [
139 |             "--model_client",
140 |             self._get_model_client_json(),
141 |         ]
142 | 
143 |     def _get_model_client_json(self) -> Union[str, bytes, bytearray]:
144 |         """
145 |         Serialize component routes to json
146 | 
147 |         :return: Serialized inputs
148 |         :rtype:  str | bytes | bytearray
149 |         """
150 |         if not self.model_client:
151 |             return ""
152 |         return json.dumps(self.model_client.serialize())
153 | 


--------------------------------------------------------------------------------
/agents/agents/components/semantic_router.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, List, Union
  2 | import json
  3 | 
  4 | from ..clients.db_base import DBClient
  5 | from ..config import SemanticRouterConfig
  6 | from ..publisher import Publisher
  7 | from ..ros import String, Topic, Route
  8 | from ..utils import validate_func_args
  9 | from .component_base import Component
 10 | 
 11 | 
 12 | class SemanticRouter(Component):
 13 |     """A component that routes semantic information from input topics to output topics based on pre-defined routes. The Semantic Router takes in a list of input topics, a list of routes, an optional default route, and a configuration object. It uses the database client to store and retrieve routing information.
 14 | 
 15 |     :param inputs:
 16 |         A list of input text topics that this component will subscribe to.
 17 |     :type inputs: list[Topic]
 18 |     :param routes:
 19 |         A list of pre-defined routes that publish incoming input to the routed output topics.
 20 |     :type routes: list[Route]
 21 |     :param default_route:
 22 |         An optional route that specifies the default behavior when no specific route matches up to a threshold. If not provided, the component will use the first route in the list.
 23 |     :type default_route: Optional[Route]
 24 |     :param config:
 25 |         The configuration object for this Semantic Router component.
 26 |     :type config: SemanticRouterConfig
 27 |     :param db_client:
 28 |         A database client that is used to store and retrieve routing information.
 29 |     :type db_client: DBClient
 30 |     :param callback_group:
 31 |         An optional callback group for this component.
 32 |     :param component_name:
 33 |         The name of this Semantic Router component (default: "router_component").
 34 |     :type component_name: str
 35 |     :param kwargs:
 36 |         Additional keyword arguments.
 37 | 
 38 |     Example usage:
 39 |     ```python
 40 |     input_text = Topic(name="text0", msg_type="String")
 41 |     goto_route = Route(
 42 |         routes_to=goto,  # where goto is an input topic to another component
 43 |         samples=[
 44 |             "Go to the door",
 45 |             "Go to the kitchen",
 46 |             "Get me a glass",
 47 |             "Fetch a ball",
 48 |             "Go to hallway",
 49 |             "Go over there",
 50 |         ],
 51 |     )
 52 |     mllm_route = Route(
 53 |         routes_to=mllm_input,  # where mllm_input is an input topic to another component
 54 |         samples=[
 55 |             "Are we indoors or outdoors",
 56 |             "What do you see?",
 57 |             "Whats in front of you?",
 58 |             "Where are we",
 59 |             "Do you see any people?",
 60 |             "How many things are infront of you?",
 61 |             "Is this room occupied?",
 62 |         ],
 63 |     )
 64 |     config = SemanticRouterConfig(router_name="my_router")
 65 |     db_client = DBClient(db=ChromaDB("database_name"))
 66 |     semantic_router = SemanticRouter(
 67 |         inputs=[input_text],
 68 |         routes=[route1, route2],
 69 |         default_route=None,
 70 |         config=config,
 71 |         db_client=db_client
 72 |         component_name = "router"
 73 |     )
 74 |     ```
 75 |     """
 76 | 
 77 |     @validate_func_args
 78 |     def __init__(
 79 |         self,
 80 |         *,
 81 |         inputs: List[Topic],
 82 |         routes: List[Route],
 83 |         config: SemanticRouterConfig,
 84 |         db_client: DBClient,
 85 |         default_route: Optional[Route] = None,
 86 |         component_name: str,
 87 |         callback_group=None,
 88 |         **kwargs,
 89 |     ):
 90 |         self.config: SemanticRouterConfig = config
 91 |         self.allowed_inputs = {"Required": [String]}
 92 |         self.allowed_outputs = {"Required": [String]}
 93 |         self.db_client = db_client
 94 | 
 95 |         super().__init__(
 96 |             inputs,
 97 |             None,
 98 |             self.config,
 99 |             inputs,
100 |             callback_group,
101 |             component_name,
102 |             **kwargs,
103 |         )
104 | 
105 |         # create routes
106 |         self._routes(routes)
107 | 
108 |         if default_route:
109 |             if default_route.routes_to.name not in self.routes_dict:
110 |                 raise TypeError("default_route must be one of the specified routes")
111 |             self.default_route = self.config._default_route = default_route
112 | 
113 |     def custom_on_configure(self):
114 |         self.get_logger().debug(f"Current Status: {self.health_status.value}")
115 | 
116 |         # configure the rest
117 |         super().custom_on_configure()
118 | 
119 |         # initialize db client
120 |         self.db_client.check_connection()
121 |         self.db_client.initialize()
122 | 
123 |         # initialize routes
124 |         self._initialize_routes()
125 | 
126 |     def deactivate(self):
127 |         # deactivate db client
128 |         self.db_client.check_connection()
129 |         self.db_client.deinitialize()
130 | 
131 |     def _initialize_routes(self):
132 |         """Create routes by saving route samples in the database."""
133 |         self.get_logger().info("Initializing all routes")
134 |         for idx, (name, route) in enumerate(self.routes_dict.items()):
135 |             route_to_add = {
136 |                 "collection_name": self.config.router_name,
137 |                 "distance_func": self.config.distance_func,
138 |                 "documents": route.samples,
139 |                 "metadatas": [{"route_name": name} for _ in range(len(route.samples))],
140 |                 "ids": [f"{name}.{i}" for i in range(len(route.samples))],
141 |             }
142 |             # reset collection on the addition of first route if it exists
143 |             if idx == 0:
144 |                 route_to_add["reset_collection"] = True
145 | 
146 |             self.db_client.add(route_to_add)
147 | 
148 |     def _execution_step(self, **kwargs):
149 |         """Execution step for Semantic Router component.
150 |         :param args:
151 |         :param kwargs:
152 |         """
153 |         trigger = kwargs.get("topic")
154 |         if not trigger:
155 |             return
156 | 
157 |         self.get_logger().debug(f"Received trigger on {trigger.name}")
158 |         trigger_query = self.trig_callbacks[trigger.name].get_output()
159 |         # get route
160 |         db_input = {
161 |             "collection_name": self.config.router_name,
162 |             "query": trigger_query,
163 |             "n_results": 1,
164 |         }
165 |         result = self.db_client.query(db_input)
166 | 
167 |         # TODO: Add treatment of multiple results by using an averaging function
168 |         if result:
169 |             distance = result["output"]["distances"][0][0]
170 |             # if default route is specified and distance is less than min
171 |             # threshold, redirect to default route
172 |             route = (
173 |                 self.default_route.routes_to.name
174 |                 if self.default_route and distance > self.config.maximum_distance
175 |                 else result["output"]["metadatas"][0][0]["route_name"]
176 |             )
177 | 
178 |             self.publishers_dict[route].publish(trigger_query)
179 |         else:
180 |             self.health_status.set_failure()
181 | 
182 |     def _routes(self, routes: List[Route]):
183 |         """
184 |         Set component Routes (topics)
185 |         """
186 |         self.routes_dict = {route.routes_to.name: route for route in routes}
187 |         route_topics: List[Topic] = [route.routes_to for route in routes]  # type: ignore
188 |         self.validate_topics(route_topics, self.allowed_outputs, "Outputs")
189 |         self.publishers_dict = {
190 |             route_topic.name: Publisher(route_topic) for route_topic in route_topics
191 |         }
192 | 
193 |     def _update_cmd_args_list(self):
194 |         """
195 |         Update launch command arguments
196 |         """
197 |         super()._update_cmd_args_list()
198 | 
199 |         self.launch_cmd_args = [
200 |             "--routes",
201 |             self._get_routes_json(),
202 |         ]
203 | 
204 |         self.launch_cmd_args = [
205 |             "--db_client",
206 |             self._get_db_client_json(),
207 |         ]
208 | 
209 |     def _get_routes_json(self) -> Union[str, bytes, bytearray]:
210 |         """
211 |         Serialize component routes to json
212 | 
213 |         :return: Serialized inputs
214 |         :rtype:  str | bytes | bytearray
215 |         """
216 |         if not hasattr(self, "routes_dict"):
217 |             return "[]"
218 |         return json.dumps([route.to_json() for route in self.routes_dict.values()])
219 | 
220 |     def _get_db_client_json(self) -> Union[str, bytes, bytearray]:
221 |         """
222 |         Serialize component routes to json
223 | 
224 |         :return: Serialized inputs
225 |         :rtype:  str | bytes | bytearray
226 |         """
227 |         if not self.db_client:
228 |             return ""
229 |         return json.dumps(self.db_client.serialize())
230 | 


--------------------------------------------------------------------------------
/agents/agents/publisher.py:
--------------------------------------------------------------------------------
1 | from ros_sugar.io import Publisher
2 | 
3 | __all__ = ["Publisher"]
4 | 


--------------------------------------------------------------------------------
/agents/agents/resources/test.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automatika-robotics/ros-agents/1fbf24d683bc5bcb9398c8ab3146ffd8197020fc/agents/agents/resources/test.jpeg


--------------------------------------------------------------------------------
/agents/agents/resources/test.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automatika-robotics/ros-agents/1fbf24d683bc5bcb9398c8ab3146ffd8197020fc/agents/agents/resources/test.wav


--------------------------------------------------------------------------------
/agents/agents/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .utils import (
 2 |     create_detection_context,
 3 |     validate_kwargs,
 4 |     validate_func_args,
 5 |     PDFReader,
 6 |     get_prompt_template,
 7 |     encode_arr_base64,
 8 |     VADStatus,
 9 |     WakeWordStatus,
10 |     load_model,
11 | )
12 | 
13 | __all__ = [
14 |     "create_detection_context",
15 |     "validate_kwargs",
16 |     "validate_func_args",
17 |     "PDFReader",
18 |     "get_prompt_template",
19 |     "encode_arr_base64",
20 |     "VADStatus",
21 |     "WakeWordStatus",
22 |     "load_model",
23 | ]
24 | 


--------------------------------------------------------------------------------
/agents/agents/vectordbs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The following vector DB specification classes are meant to define a comman interface for initialization of vector DBs. Currently the only supported vector DB is Chroma.
 3 | """
 4 | 
 5 | from typing import Optional, Dict
 6 | 
 7 | from attrs import define, field
 8 | from .ros import BaseAttrs
 9 | from .models import Encoder
10 | 
11 | __all__ = ["ChromaDB"]
12 | 
13 | 
14 | @define(kw_only=True)
15 | class DB(BaseAttrs):
16 |     """This class describes a database initialization configuration."""
17 | 
18 |     name: str
19 |     db_location: str = field(default="./data")
20 |     username: Optional[str] = field(default=None)
21 |     password: Optional[str] = field(default=None)
22 |     encoder: Optional[Encoder] = field(default=None)
23 |     init_timeout: int = field(default=600)  # 10 minutes
24 |     host: str = field(default="127.0.0.1")
25 |     port: Optional[int] = field(default=None)
26 | 
27 |     def _get_init_params(self) -> Dict:
28 |         params = {
29 |             "username": self.username,
30 |             "password": self.password,
31 |             "db_location": self.db_location,
32 |         }
33 |         if self.encoder:
34 |             params["encoder"] = self.encoder._get_init_params()
35 |         return params
36 | 
37 | 
38 | @define(kw_only=True)
39 | class ChromaDB(DB):
40 |     """[Chroma](https://www.trychroma.com/) is the open-source AI application database. It provides embeddings, vector search, document storage, full-text search, metadata filtering, and multi-modal retreival support.
41 | 
42 |     :param name: An arbitrary name given to the database.
43 |     :type name: str
44 |     :param db_location: The on-disk location where the database will be initialized. Defaults to "./data".
45 |     :type db_location: str, optional
46 |     :param username: The username for authentication. Defaults to None.
47 |     :type username: Optional[str], optional
48 |     :param password: The password for authentication. Defaults to None.
49 |     :type password: Optional[str], optional
50 |     :param encoder: An optional encoder model to use for text encoding. Defaults to None.
51 |     :type encoder: Optional[Encoder], optional
52 |     :param init_timeout: The timeout in seconds for the initialization process. Defaults to 10 minutes (600 seconds).
53 |     :type init_timeout: int, optional
54 |     :param host: The hostname or IP address of the database server. Defaults to "127.0.0.1".
55 |     :type host: str, optional
56 |     :param port: The port number to connect to the database server. Defaults to None.
57 |     :type port: Optional[int], optional
58 | 
59 |     Example usage:
60 |     ```python
61 |     from agents.models import Encoder
62 |     db_config = DB(name='my_database', username='user123', password='pass123')
63 |     db_config.db_location = '/path/to/new/location'
64 |     db_config.encoder = Encoder(checkpoint="BAAI/bge-small-en")
65 |     ```
66 |     """
67 | 
68 |     pass
69 | 


--------------------------------------------------------------------------------
/agents/msg/Bbox2D.msg:
--------------------------------------------------------------------------------
1 | float64 top_left_x
2 | float64 top_left_y
3 | float64 bottom_right_x
4 | float64 bottom_right_y
5 | 


--------------------------------------------------------------------------------
/agents/msg/Detection2D.msg:
--------------------------------------------------------------------------------
 1 | std_msgs/Header header
 2 | 
 3 | float64[] scores
 4 | string[] labels
 5 | Bbox2D[]  boxes
 6 | 
 7 | # Either an image or compressed image
 8 | sensor_msgs/Image image
 9 | sensor_msgs/CompressedImage compressed_image
10 | 


--------------------------------------------------------------------------------
/agents/msg/Detections2D.msg:
--------------------------------------------------------------------------------
1 | std_msgs/Header header
2 | 
3 | Detection2D[] detections
4 | 


--------------------------------------------------------------------------------
/agents/msg/Point2D.msg:
--------------------------------------------------------------------------------
1 | float64 x
2 | float64 y
3 | 


--------------------------------------------------------------------------------
/agents/msg/Tracking.msg:
--------------------------------------------------------------------------------
 1 | std_msgs/Header header
 2 | 
 3 | Point2D[] centroids
 4 | string[] labels
 5 | Bbox2D[]  boxes
 6 | int8[] ids
 7 | Point2D[] estimated_velocities
 8 | 
 9 | # Either an image or compressed image
10 | sensor_msgs/Image image
11 | sensor_msgs/CompressedImage compressed_image
12 | 


--------------------------------------------------------------------------------
/agents/msg/Trackings.msg:
--------------------------------------------------------------------------------
1 | std_msgs/Header header
2 | 
3 | Tracking[] trackings
4 | 


--------------------------------------------------------------------------------
/agents/msg/Video.msg:
--------------------------------------------------------------------------------
1 | std_msgs/Header header
2 | 
3 | # Eithen a list of images or compressed images
4 | sensor_msgs/Image[] frames
5 | sensor_msgs/CompressedImage[] compressed_frames
6 | 


--------------------------------------------------------------------------------
/agents/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 3 | <package format="3">
 4 |   <name>automatika_embodied_agents</name>
 5 |   <version>0.3.3</version>
 6 |   <description>agents</description>
 7 |   <maintainer email="contact@automatikarobotics.com">Automatika Robotics</maintainer>
 8 |   <license>MIT</license>
 9 | 
10 |   <depend>builtin_interfaces</depend>
11 |   <depend>std_msgs</depend>
12 |   <depend>sensor_msgs</depend>
13 |   <depend>python3-tqdm</depend>
14 |   <depend>python3-httpx</depend>
15 |   <depend>automatika_ros_sugar</depend>
16 | 
17 |   <buildtool_depend>ament_cmake</buildtool_depend>
18 |   <buildtool_depend>ament_cmake_python</buildtool_depend>
19 |   <buildtool_depend>rosidl_default_generators</buildtool_depend>
20 |   <exec_depend>rosidl_default_runtime</exec_depend>
21 |   <member_of_group>rosidl_interface_packages</member_of_group>
22 | 
23 |   <test_depend>python3-pytest</test_depend>
24 | 
25 |   <export>
26 |     <build_type>ament_cmake</build_type>
27 |   </export>
28 | </package>
29 | 


--------------------------------------------------------------------------------
/agents/scripts/chainlit_client/app.py:
--------------------------------------------------------------------------------
  1 | from io import BytesIO
  2 | from typing import Union, Optional, List
  3 | from enum import Enum
  4 | 
  5 | import chainlit as cl
  6 | from chainlit.element import ElementBased
  7 | from chainlit.input_widget import TextInput
  8 | 
  9 | import rclpy
 10 | from rclpy.node import Node
 11 | from std_msgs.msg import ByteMultiArray, String
 12 | 
 13 | 
 14 | class Status(Enum):
 15 |     INIT = 0
 16 |     RECEIVED_TEXT = 1
 17 |     RECEIVED_AUDIO = 2
 18 |     TIMEOUT = 3
 19 | 
 20 | 
 21 | class ClientNode(Node):
 22 |     """
 23 |     Cli based text client with a publisher and subscriber.
 24 |     """
 25 | 
 26 |     def __init__(self) -> None:
 27 |         """
 28 |         Constructs a new instance.
 29 |         """
 30 |         super().__init__("cli_client")
 31 |         self.msg: Optional[Union[str, bytes]] = None
 32 |         # Start with defaults
 33 |         self.set_trigger("text0", "audio0")
 34 |         self.set_target("text1", "audio1")
 35 | 
 36 |     def publish(self, prompt: Union[str, bytes]) -> None:
 37 |         """
 38 |         Publish to the trigger topics and listen to the target topics
 39 | 
 40 |         :param      prompt:  The prompt/question
 41 |         :type       prompt:  {str, bytes}
 42 | 
 43 |         :returns:   None
 44 |         :rtype:     None
 45 |         """
 46 | 
 47 |         # set timeout flag
 48 |         self.msg_received = Status.INIT
 49 |         # Check for publishers on available topic and quit if none available
 50 |         if isinstance(prompt, bytes):
 51 |             if not self.count_subscribers(self.audio_trigger) > 0:
 52 |                 self.get_logger().info(
 53 |                     f"No one is listening to {self.audio_trigger}, so I am timing out"
 54 |                 )
 55 |                 self.timer = self.create_timer(0, self.timer_callback)
 56 |                 return None
 57 |             msg = ByteMultiArray()
 58 |             msg.data = prompt
 59 |             self.audio_publisher.publish(msg)
 60 |             self.get_logger().info(f"Publishing to {self.audio_trigger}")
 61 |         else:
 62 |             if not self.count_subscribers(self.text_trigger) > 0:
 63 |                 self.get_logger().info(
 64 |                     f"No one is listening to {self.text_trigger}, so I am timing out"
 65 |                 )
 66 |                 self.timer = self.create_timer(0, self.timer_callback)
 67 |                 return None
 68 |             # Create and publish message
 69 |             msg = String()
 70 |             msg.data = prompt
 71 |             self.text_publisher.publish(msg)
 72 |             self.get_logger().info(f"Publishing to {self.text_trigger}")
 73 | 
 74 |         self.get_logger().info("Now listening..")
 75 | 
 76 |     def listener_callback(self, msg: Union[String, ByteMultiArray]) -> None:
 77 |         """
 78 |         Listener callback
 79 | 
 80 |         :param      msg:  The message
 81 |         :type       msg:  {ROS Message}
 82 |         """
 83 |         if isinstance(msg, String):
 84 |             self.msg_received = Status.RECEIVED_TEXT
 85 |             self.get_logger().info(f"A: {msg.data}")
 86 |             self.msg = msg.data
 87 |         elif isinstance(msg, ByteMultiArray):
 88 |             self.msg_received = Status.RECEIVED_AUDIO
 89 |             self.get_logger().info("A: Audio bytes")
 90 |             self.msg = b"".join(msg.data)
 91 |         else:
 92 |             self.get_logger().error(
 93 |                 "Something went wrong. Received message is neither String nor ByteMultiArray"
 94 |             )
 95 | 
 96 |     def timer_callback(self):
 97 |         """
 98 |         Timer Callback just for destroying the time and end node spin_once
 99 |         """
100 |         # the timer should be destroyed once utilized
101 |         self.destroy_timer(self.timer)
102 |         self.msg_received = Status.TIMEOUT
103 | 
104 |     def set_trigger(self, text_trigger: str, audio_trigger: str):
105 |         """
106 |         Set topic to send messages to
107 |         """
108 |         if hasattr(self, "text_publisher"):
109 |             self.destroy_publisher(self.text_publisher)
110 |         self.text_trigger = text_trigger
111 |         self.text_publisher = self.create_publisher(String, self.text_trigger, 1)
112 | 
113 |         if hasattr(self, "audio_publisher"):
114 |             self.destroy_publisher(self.audio_publisher)
115 |         self.audio_trigger = audio_trigger
116 |         self.audio_publisher = self.create_publisher(
117 |             ByteMultiArray, self.audio_trigger, 1
118 |         )
119 | 
120 |     def set_target(self, text_target: str, audio_target: str):
121 |         """
122 |         Set topic to receive messages from
123 |         """
124 |         if hasattr(self, "text_subscription"):
125 |             self.destroy_subscription(self.text_subscription)
126 |         self.text_target = text_target
127 |         self.text_subscription = self.create_subscription(
128 |             String, self.text_target, self.listener_callback, 1
129 |         )
130 | 
131 |         if hasattr(self, "audio_subscription"):
132 |             self.destroy_subscription(self.audio_subscription)
133 |         self.audio_target = audio_target
134 |         self.audio_subscription = self.create_subscription(
135 |             ByteMultiArray, self.audio_target, self.listener_callback, 1
136 |         )
137 | 
138 | 
139 | @cl.on_chat_start
140 | async def on_chat_start():
141 |     """
142 |     On chat start, specify default settings
143 |     """
144 |     # Init rclpy
145 |     if not rclpy.ok():
146 |         rclpy.init()
147 |     await cl.ChatSettings([
148 |         TextInput(
149 |             id="text_trigger",
150 |             label="String topic to send message to",
151 |             initial="text0",
152 |         ),
153 |         TextInput(
154 |             id="text_target",
155 |             label="String topic to listen to for response",
156 |             initial="text1",
157 |         ),
158 |         TextInput(
159 |             id="audio_trigger",
160 |             label="Audio topic to send message to",
161 |             initial="audio0",
162 |         ),
163 |         TextInput(
164 |             id="audio_target",
165 |             label="Audio topic to listen to for response",
166 |             initial="audio1",
167 |         ),
168 |         TextInput(id="timeout", label="Timeout (sec)", initial="30"),
169 |     ]).send()
170 |     cl.user_session.set("timeout", 30)
171 |     client: ClientNode = ClientNode()
172 |     cl.user_session.set("client", client)
173 |     await cl.Message(
174 |         content="Welcome to Leibniz ROS client. Set the input/output topics in settings. Then type your message or press `P` to send audio!"
175 |     ).send()
176 | 
177 | 
178 | @cl.on_settings_update
179 | async def setup_ros_node(settings):
180 |     """
181 |     On settings update, update nodes
182 |     """
183 |     client: ClientNode = cl.user_session.get("client")
184 |     client.set_trigger(settings["text_trigger"], settings["audio_trigger"])
185 |     client.set_target(settings["text_target"], settings["audio_target"])
186 |     if not settings["timeout"].isdigit():
187 |         return
188 |     cl.user_session.set("timeout", int(settings["timeout"]))
189 | 
190 | 
191 | @cl.step(type="run")
192 | def publish_on_ros(msg: Union[str, bytes]):
193 |     """Publish input to the ROS Client node.
194 |     :param msg:
195 |     :type msg: Union[str, bytes]
196 |     """
197 |     timeout: int = cl.user_session.get("timeout")
198 |     client: ClientNode = cl.user_session.get("client")
199 |     client.publish(msg)
200 |     rclpy.spin_once(client, timeout_sec=timeout)
201 | 
202 | 
203 | @cl.step(type="run")
204 | async def handle_output(msg_type: type):
205 |     """Handle Output from the ROS Client node.
206 |     :param msg_type:
207 |     :type msg_type: type
208 |     """
209 |     client: ClientNode = cl.user_session.get("client")
210 |     if client.msg_received is Status.INIT:
211 |         await cl.Message(
212 |             content=f"I did not receive a message on **{client.text_target}** or **{client.audio_target}**. Timedout.",
213 |         ).send()
214 |     elif client.msg_received is Status.RECEIVED_TEXT:
215 |         await cl.Message(
216 |             content=f"{client.msg}",
217 |         ).send()
218 |     elif client.msg_received is Status.RECEIVED_AUDIO:
219 |         output_audio_el = cl.Audio(content=client.msg, name="Response Audio")
220 |         await cl.Message(
221 |             author="Robot",
222 |             type="assistant_message",
223 |             content="",
224 |             elements=[output_audio_el],
225 |         ).send()
226 |     else:
227 |         trig = client.audio_trigger if msg_type is bytes else client.text_trigger
228 |         await cl.Message(
229 |             content=f"There is no one listening on **{trig}**. Is this the correct topic. If not, set the correct trigger and response topics in the settings.",
230 |         ).send()
231 | 
232 | 
233 | @cl.on_message
234 | async def on_message(msg: cl.Message):
235 |     """
236 |     On message, handle text message
237 |     """
238 |     publish_on_ros(msg.content)
239 |     await handle_output(type(msg))
240 | 
241 | 
242 | @cl.on_audio_chunk
243 | async def on_audio_chunk(chunk: cl.AudioChunk):
244 |     """Receive audio chunks
245 |     :param chunk:
246 |     :type chunk: cl.AudioChunk
247 |     """
248 |     if chunk.isStart:
249 |         # Initialize new audio buffer
250 |         buffer = BytesIO()
251 |         buffer.name = "input_audio"
252 |         cl.user_session.set("audio_buffer", buffer)
253 |         cl.user_session.set("audio_mime_type", chunk.mimeType)
254 | 
255 |     # write chunks to buffer
256 |     cl.user_session.get("audio_buffer").write(chunk.data)
257 | 
258 | 
259 | @cl.on_audio_end
260 | async def on_audio_end(elements: List[ElementBased]):
261 |     """Publish audio to the topic.
262 |     :param elements:
263 |     :type elements: list[ElementBased]
264 |     """
265 |     audio_buffer: BytesIO = cl.user_session.get("audio_buffer")
266 |     audio_buffer.seek(0)
267 |     audio_mime_type: str = cl.user_session.get("audio_mime_type")
268 |     audio_bytes = audio_buffer.read()
269 | 
270 |     # Add users audio to the chat
271 |     input_audio_el = cl.Audio(
272 |         mime=audio_mime_type, content=audio_bytes, name="User Audio"
273 |     )
274 |     await cl.Message(
275 |         author="User",
276 |         type="user_message",
277 |         content="",
278 |         elements=[input_audio_el, *elements],
279 |     ).send()
280 | 
281 |     # publish using ROS client
282 |     publish_on_ros(audio_bytes)
283 |     await handle_output(type(audio_bytes))
284 | 
285 | 
286 | @cl.on_chat_end
287 | async def on_chat_end():
288 |     """
289 |     On chat end destroy client nodes
290 |     """
291 |     if rclpy.ok():
292 |         client: ClientNode = cl.user_session.get("client")
293 |         client.destroy_node()
294 |         rclpy.shutdown()
295 | 


--------------------------------------------------------------------------------
/agents/scripts/chainlit_client/chainlit.md:
--------------------------------------------------------------------------------
 1 | # Tiny Web Client for ROS Agents
 2 | 
 3 | This client is based on chainlit. In order to use it, run the following in order.
 4 | 
 5 | `pip install chainlit`
 6 | 
 7 | `ros2 run automatika_embodied_agents tiny_web_client`
 8 | 
 9 | The client displays a web UI on **localhost:8000**. Open this link from browser.
10 | 
11 | ROS input and output topic settings for text and audio topics can be configured from the web UI by pressing the settings icon.
12 | 


--------------------------------------------------------------------------------
/agents/scripts/chainlit_client/tiny_web_client:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from pathlib import Path
 3 | from chainlit.cli import run_chainlit
 4 | from chainlit.config import config
 5 | 
 6 | 
 7 | def main():
 8 |     """Run from ROS"""
 9 |     # TODO: Add chainlit option handling via ROS
10 | 
11 |     root_path = Path(__file__).parent / Path("app.py")
12 | 
13 |     # Set general config options
14 |     config.run.headless = True
15 |     config.project.enable_telemetry = False
16 |     config.root = str(root_path.parent)
17 | 
18 |     # Set audio config options
19 |     config.features.audio.sample_rate = 16000  # type: ignore
20 |     config.features.audio.initial_silence_timeout = 2000  # type: ignore
21 |     config.features.audio.silence_timeout = 1000  # type: ignore
22 | 
23 |     run_chainlit(str(root_path))
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     main()
28 | 


--------------------------------------------------------------------------------
/agents/scripts/executable:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import json
  3 | import argparse
  4 | from typing import List, Dict, Union
  5 | 
  6 | import rclpy
  7 | import setproctitle
  8 | from rclpy.executors import MultiThreadedExecutor
  9 | from rclpy.utilities import try_shutdown
 10 | 
 11 | from agents import config as all_configs
 12 | from agents import components as all_components
 13 | from agents import clients
 14 | from agents.ros import Topic, FixedInput, MapLayer, Route
 15 | 
 16 | 
 17 | def _parse_args() -> tuple[argparse.Namespace, List[str]]:
 18 |     """Parse arguments."""
 19 |     parser = argparse.ArgumentParser(description="Component Executable Config")
 20 |     parser.add_argument(
 21 |         "--config_type", type=str, help="Component configuration class name"
 22 |     )
 23 |     parser.add_argument("--component_type", type=str, help="Component class name")
 24 |     parser.add_argument(
 25 |         "--node_name",
 26 |         type=str,
 27 |         help="Component ROS2 node name",
 28 |     )
 29 |     parser.add_argument("--config", type=str, help="Component configuration object")
 30 |     parser.add_argument(
 31 |         "--inputs",
 32 |         type=str,
 33 |         help="Component input topics",
 34 |     )
 35 |     parser.add_argument(
 36 |         "--outputs",
 37 |         type=str,
 38 |         help="Component output topics",
 39 |     )
 40 |     parser.add_argument(
 41 |         "--routes",
 42 |         type=str,
 43 |         help="Semantic router routes",
 44 |     )
 45 |     parser.add_argument(
 46 |         "--layers",
 47 |         type=str,
 48 |         help="Map Encoding layers",
 49 |     )
 50 |     parser.add_argument(
 51 |         "--trigger",
 52 |         type=str,
 53 |         help="Component trigger",
 54 |     )
 55 |     parser.add_argument(
 56 |         "--model_client",
 57 |         type=str,
 58 |         help="Model Client",
 59 |     )
 60 |     parser.add_argument(
 61 |         "--db_client",
 62 |         type=str,
 63 |         help="DB Client",
 64 |     )
 65 |     parser.add_argument(
 66 |         "--config_file", type=str, help="Path to configuration YAML file"
 67 |     )
 68 |     parser.add_argument(
 69 |         "--events", type=str, help="Events to be monitored by the component"
 70 |     )
 71 |     parser.add_argument(
 72 |         "--actions", type=str, help="Actions associated with the component Events"
 73 |     )
 74 |     parser.add_argument(
 75 |         "--external_processors",
 76 |         type=str,
 77 |         help="External processors associated with the component input and output topics",
 78 |     )
 79 |     return parser.parse_known_args()
 80 | 
 81 | 
 82 | def _parse_component_config(
 83 |     args: argparse.Namespace,
 84 | ) -> all_configs.BaseComponentConfig:
 85 |     """Parse the component config object
 86 | 
 87 |     :param args: Command line arguments
 88 |     :type args: argparse.Namespace
 89 | 
 90 |     :return: Component config object
 91 |     :rtype: object
 92 |     """
 93 |     config_type = args.config_type or None
 94 |     if not config_type:
 95 |         raise ValueError("config_type must be provided")
 96 | 
 97 |     # Get config type and update from json arg
 98 |     config_class = getattr(all_configs, config_type)
 99 |     if not config_class:
100 |         raise TypeError(
101 |             f"Unknown config_type '{config_type}'. Known types are {all_configs.__all__}"
102 |         )
103 | 
104 |     config = config_class(**json.loads(args.config))
105 | 
106 |     return config
107 | 
108 | 
109 | def _parse_trigger(trigger_str: str) -> Union[Topic, List[Topic], float]:
110 |     """Parse component trigger json string
111 | 
112 |     :param trigger_str: Trigger JSON string
113 |     :type trigger_str: str
114 | 
115 |     :return: Trigger topics or float
116 |     :rtype: Topic | List[Topic] | float
117 |     """
118 |     trigger_json = json.loads(trigger_str)
119 |     if isinstance(trigger_json, List):
120 |         return [Topic(**json.loads(t)) for t in trigger_json]
121 |     elif isinstance(trigger_json, Dict):
122 |         return Topic(**trigger_json)
123 |     else:
124 |         # return float
125 |         return trigger_json
126 | 
127 | 
128 | def _deserialize_topics(serialized_topics: str) -> List[Dict]:
129 |     list_of_str = json.loads(serialized_topics)
130 |     return [json.loads(t) for t in list_of_str]
131 | 
132 | 
133 | def _parse_ros_args(args_names: List[str]) -> List[str]:
134 |     """Parse ROS arguments from command line arguments
135 | 
136 |     :param args_names: List of all parsed arguments
137 |     :type args_names: list[str]
138 | 
139 |     :return: List ROS parsed arguments
140 |     :rtype: list[str]
141 |     """
142 |     # Look for --ros-args in ros_args
143 |     ros_args_start = None
144 |     if "--ros-args" in args_names:
145 |         ros_args_start = args_names.index("--ros-args")
146 | 
147 |     if ros_args_start is not None:
148 |         ros_specific_args = args_names[ros_args_start:]
149 |     else:
150 |         ros_specific_args = []
151 |     return ros_specific_args
152 | 
153 | 
154 | def main():
155 |     """Executable main function to run a component as a ROS2 node in a new process.
156 |     Used to start a node using ROS Sugar Launcher. Extends functionality from ROS Sugar
157 | 
158 |     :param list_of_components: List of all known Component classes in the package
159 |     :type list_of_components: List[Type]
160 |     :param list_of_configs: List of all known ComponentConfig classes in the package
161 |     :type list_of_configs: List[Type]
162 |     :raises ValueError: If component or component config are unknown classes
163 |     :raises ValueError: If component cannot be started with provided arguments
164 |     """
165 |     args, args_names = _parse_args()
166 | 
167 |     # Initialize rclpy with the ros-specific arguments
168 |     rclpy.init(args=_parse_ros_args(args_names))
169 | 
170 |     component_type = args.component_type or None
171 | 
172 |     if not component_type:
173 |         raise ValueError("Cannot launch without providing a component_type")
174 | 
175 |     comp_class = getattr(all_components, component_type)
176 | 
177 |     if not comp_class:
178 |         raise ValueError(
179 |             f"Cannot launch unknown component type '{component_type}'. Known types are: '{all_components.__all__}'"
180 |         )
181 | 
182 |     # Get name
183 |     component_name = args.node_name or None
184 | 
185 |     if not component_name:
186 |         raise ValueError("Cannot launch component without specifying a name")
187 | 
188 |     # SET PROCESS NAME
189 |     setproctitle.setproctitle(component_name)
190 | 
191 |     config = _parse_component_config(args)
192 | 
193 |     # Get Yaml config file if provided
194 |     config_file = args.config_file or None
195 | 
196 |     # Get inputs/outputs/layers/routes
197 |     inputs = (
198 |         [
199 |             FixedInput(**i) if i.get("fixed") else Topic(**i)
200 |             for i in _deserialize_topics(args.inputs)
201 |         ]
202 |         if args.inputs
203 |         else None
204 |     )
205 |     outputs = (
206 |         [Topic(**o) for o in _deserialize_topics(args.outputs)]
207 |         if args.outputs
208 |         else None
209 |     )
210 |     layers = (
211 |         [MapLayer(**i) for i in _deserialize_topics(args.layers)]
212 |         if args.layers
213 |         else None
214 |     )
215 |     routes = (
216 |         [Route(**r) for r in _deserialize_topics(args.routes)] if args.routes else None
217 |     )
218 | 
219 |     # Get triggers
220 |     trigger = _parse_trigger(args.trigger)
221 | 
222 |     # Init the component
223 |     # Semantic Router Component
224 |     if component_type == all_components.SemanticRouter.__name__:
225 |         db_client_json = json.loads(args.db_client)
226 |         db_client = getattr(clients, db_client_json["client_type"])(**db_client_json)
227 |         component = comp_class(
228 |             inputs=inputs,
229 |             routes=routes,
230 |             db_client=db_client,
231 |             config=config,
232 |             default_route=config._default_route,
233 |             component_name=component_name,
234 |             config_file=config_file,
235 |         )
236 |     # Map Encoding Component
237 |     elif component_type == all_components.MapEncoding.__name__:
238 |         db_client_json = json.loads(args.db_client)
239 |         db_client = getattr(clients, db_client_json["client_type"])(**db_client_json)
240 |         component = comp_class(
241 |             layers=layers,
242 |             position=config._position,
243 |             map_topic=config._map_topic,
244 |             db_client=db_client,
245 |             config=config,
246 |             trigger=trigger,
247 |             component_name=component_name,
248 |             config_file=config_file,
249 |         )
250 | 
251 |     # All other components
252 |     else:
253 |         if args.model_client:
254 |             model_client_json = json.loads(args.model_client)
255 |             model_client = getattr(clients, model_client_json["client_type"])(
256 |                 **model_client_json
257 |             )
258 |         else:
259 |             model_client = None
260 |         if args.db_client:
261 |             db_client_json = json.loads(args.db_client)
262 |             db_client = getattr(clients, db_client_json["client_type"])(
263 |                 **db_client_json
264 |             )
265 |         else:
266 |             db_client = None
267 | 
268 |         component = comp_class(
269 |             inputs=inputs,
270 |             outputs=outputs,
271 |             model_client=model_client,
272 |             db_client=db_client,
273 |             trigger=trigger,
274 |             config=config,
275 |             component_name=component_name,
276 |             config_file=config_file,
277 |         )
278 | 
279 |     # Init the node with rclpy
280 |     component.rclpy_init_node()
281 | 
282 |     # Set events/actions
283 |     events_json = args.events or None
284 |     actions_json = args.actions or None
285 | 
286 |     if events_json and actions_json:
287 |         component._events_json = events_json
288 |         component._actions_json = actions_json
289 | 
290 |     # Set external processors
291 |     external_processors = args.external_processors or None
292 |     if external_processors:
293 |         component._external_processors_json = external_processors
294 | 
295 |     executor = MultiThreadedExecutor()
296 | 
297 |     executor.add_node(component)
298 | 
299 |     try:
300 |         executor.spin()
301 | 
302 |     except KeyboardInterrupt:
303 |         pass
304 | 
305 |     finally:
306 |         executor.remove_node(component)
307 |         try_shutdown()
308 | 
309 | 
310 | if __name__ == "__main__":
311 |     main()
312 | 


--------------------------------------------------------------------------------
/agents/tests/test_clients.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import time
  3 | import subprocess
  4 | import shutil
  5 | 
  6 | import cv2
  7 | import pytest
  8 | from agents.models import Idefics2, OllamaModel
  9 | from agents.vectordbs import ChromaDB
 10 | from agents.clients.roboml import (
 11 |     HTTPModelClient,
 12 |     HTTPDBClient,
 13 |     RESPDBClient,
 14 |     RESPModelClient,
 15 | )
 16 | from agents.clients.ollama import OllamaClient
 17 | 
 18 | HOST = "http://localhost"
 19 | RAY_PORT = 8000
 20 | RESP_PORT = 6379
 21 | 
 22 | 
 23 | @pytest.fixture(scope="class")
 24 | def http_clients():
 25 |     """Fixture to run roboml ray and make its clients before tests are run"""
 26 | 
 27 |     # start server
 28 |     p = subprocess.Popen(["roboml"])
 29 |     # give it 20 seconds to start before sending request
 30 |     time.sleep(20)
 31 |     model = Idefics2(name="idefics")
 32 |     model_client = HTTPModelClient(model, port=RAY_PORT, logging_level="debug")
 33 |     db = ChromaDB(name="chroma", db_location="./http_data")
 34 |     db_client = HTTPDBClient(db, port=RAY_PORT, logging_level="debug")
 35 | 
 36 |     yield {"model": model_client, "db": db_client}
 37 | 
 38 |     # terminate server process - kill to remove ray monitoring child
 39 |     p.kill()
 40 |     shutil.rmtree("./http_data")
 41 | 
 42 | 
 43 | @pytest.fixture(scope="class")
 44 | def resp_clients():
 45 |     """Fixture to run roboml-resp and make its clients before tests are run"""
 46 | 
 47 |     # start server
 48 |     p = subprocess.Popen(["roboml-resp"])
 49 |     # give it 20 seconds to start before sending request
 50 |     time.sleep(20)
 51 |     model = Idefics2(name="idefics")
 52 |     model_client = RESPModelClient(model, logging_level="debug")
 53 |     db = ChromaDB(name="chroma", db_location="./resp_data")
 54 |     db_client = RESPDBClient(db, logging_level="debug")
 55 | 
 56 |     yield {"model": model_client, "db": db_client}
 57 | 
 58 |     # terminate server process
 59 |     p.terminate()
 60 |     shutil.rmtree("./resp_data")
 61 | 
 62 | 
 63 | @pytest.fixture(scope="class")
 64 | def ollama_client():
 65 |     """Fixture to create client ollama before tests are run"""
 66 | 
 67 |     model = OllamaModel(name="llava", checkpoint="llava")
 68 |     ollama_client = OllamaClient(model, logging_level="debug")
 69 |     yield ollama_client
 70 | 
 71 | 
 72 | @pytest.fixture
 73 | def loaded_img():
 74 |     """Fixture to load test image"""
 75 |     return cv2.imread("agents/resources/test.jpeg", cv2.COLOR_BGR2RGB)
 76 | 
 77 | 
 78 | @pytest.fixture
 79 | def data():
 80 |     return {
 81 |         "ids": ["a"],
 82 |         "metadatas": [{"something": "about a"}],
 83 |         "documents": ["description of a"],
 84 |         "collection_name": "alphabets",
 85 |     }
 86 | 
 87 | 
 88 | class TestRobomlHTTPClient:
 89 |     """
 90 |     Test roboml http client
 91 |     """
 92 | 
 93 |     def test_model_init(self, http_clients):
 94 |         """
 95 |         Test roboml http model client init
 96 |         """
 97 |         try:
 98 |             http_clients["model"].check_connection()
 99 |         except Exception:
100 |             logging.error(
101 |                 "Make sure roboml is installed on this machine before running these tests. roboml can be installed with `pip install roboml`"
102 |             )
103 |             raise
104 |         http_clients["model"].initialize()
105 | 
106 |     def test_model_inference(self, http_clients, loaded_img):
107 |         """
108 |         Test roboml http model client inference
109 |         """
110 |         inference_input = {"query": "What do you see?", "images": [loaded_img]}
111 |         result = http_clients["model"].inference(inference_input)
112 |         assert result is not None
113 |         assert result["output"] is not None
114 |         logging.info(result["output"])
115 | 
116 |     def test_model_deinit(self, http_clients):
117 |         """
118 |         Test roboml http model client deinit
119 |         """
120 |         http_clients["model"].deinitialize()
121 | 
122 |     def test_db_init(self, http_clients):
123 |         """
124 |         Test roboml http db client init
125 |         """
126 |         http_clients["db"].check_connection()
127 |         http_clients["db"].initialize()
128 | 
129 |     def test_db_add(self, http_clients, data):
130 |         """
131 |         Test roboml http db client add
132 |         """
133 |         result = http_clients["db"].add(data)
134 |         assert result is not None
135 |         assert result["output"] is not None
136 |         logging.info(result["output"])
137 | 
138 |     def test_db_conditional_add(self, http_clients, data):
139 |         """
140 |         Test roboml http db client conditional add
141 |         """
142 |         result = http_clients["db"].conditional_add(data)
143 |         assert result is not None
144 |         assert result["output"] is not None
145 |         logging.info(result["output"])
146 | 
147 |     def test_db_metadata_query(self, http_clients, data):
148 |         """
149 |         Test roboml http db client metadata query
150 |         """
151 |         metadata_query = {
152 |             "metadatas": data["metadatas"],
153 |             "collection_name": data["collection_name"],
154 |         }
155 |         result = http_clients["db"].metadata_query(metadata_query)
156 |         assert result is not None
157 |         assert result["output"] is not None
158 |         logging.info(result["output"])
159 | 
160 |     def test_db_query(self, http_clients, data):
161 |         """
162 |         Test roboml http db client query
163 |         """
164 |         metadata_query = {
165 |             "query": "what is a",
166 |             "collection_name": data["collection_name"],
167 |         }
168 |         result = http_clients["db"].query(metadata_query)
169 |         assert result is not None
170 |         assert result["output"] is not None
171 |         logging.info(result["output"])
172 | 
173 |     def test_db_deinit(self, http_clients):
174 |         """
175 |         Test roboml http db client deinit
176 |         """
177 |         http_clients["db"].deinitialize()
178 | 
179 | 
180 | class TestRobomlRESPClient:
181 |     """
182 |     Test roboml resp client
183 |     """
184 | 
185 |     def test_model_init(self, resp_clients):
186 |         """
187 |         Test roboml resp model client init
188 |         """
189 |         try:
190 |             resp_clients["model"].check_connection()
191 |         except Exception:
192 |             logging.error(
193 |                 "Make sure roboml is installed on this machine before running these tests. roboml can be installed with `pip install roboml`"
194 |             )
195 |             raise
196 |         resp_clients["model"].initialize()
197 | 
198 |     def test_model_inference(self, resp_clients, loaded_img):
199 |         """
200 |         Test roboml resp model client inference
201 |         """
202 |         inference_input = {"query": "What do you see?", "images": [loaded_img]}
203 |         result = resp_clients["model"].inference(inference_input)
204 |         assert result is not None
205 |         assert result["output"] is not None
206 |         logging.info(result["output"])
207 | 
208 |     def test_model_deinit(self, resp_clients):
209 |         """
210 |         Test roboml resp model client deinit
211 |         """
212 |         resp_clients["model"].deinitialize()
213 | 
214 |     def test_db_init(self, resp_clients):
215 |         """
216 |         Test roboml resp db client init
217 |         """
218 |         resp_clients["db"].check_connection()
219 |         resp_clients["db"].initialize()
220 | 
221 |     def test_db_add(self, resp_clients, data):
222 |         """
223 |         Test roboml resp db client add
224 |         """
225 |         result = resp_clients["db"].add(data)
226 |         assert result is not None
227 |         assert result["output"] is not None
228 |         logging.info(result["output"])
229 | 
230 |     def test_db_conditional_add(self, resp_clients, data):
231 |         """
232 |         Test roboml resp db client conditional add
233 |         """
234 |         result = resp_clients["db"].conditional_add(data)
235 |         assert result is not None
236 |         assert result["output"] is not None
237 |         logging.info(result["output"])
238 | 
239 |     def test_db_metadata_query(self, resp_clients, data):
240 |         """
241 |         Test roboml resp db client metadata query
242 |         """
243 |         metadata_query = {
244 |             "metadatas": data["metadatas"],
245 |             "collection_name": data["collection_name"],
246 |         }
247 |         result = resp_clients["db"].metadata_query(metadata_query)
248 |         assert result is not None
249 |         assert result["output"] is not None
250 |         logging.info(result["output"])
251 | 
252 |     def test_db_query(self, resp_clients, data):
253 |         """
254 |         Test roboml resp db client query
255 |         """
256 |         metadata_query = {
257 |             "query": "what is a",
258 |             "collection_name": data["collection_name"],
259 |         }
260 |         result = resp_clients["db"].query(metadata_query)
261 |         assert result is not None
262 |         assert result["output"] is not None
263 |         logging.info(result["output"])
264 | 
265 |     def test_db_deinit(self, resp_clients):
266 |         """
267 |         Test roboml resp db client deinit
268 |         """
269 |         resp_clients["db"].deinitialize()
270 | 
271 | 
272 | class TestOllamaClient:
273 |     """
274 |     Test ollama client
275 |     """
276 | 
277 |     def test_model_init(self, ollama_client):
278 |         """
279 |         Test ollama model client init
280 |         """
281 |         try:
282 |             ollama_client.check_connection()
283 |         except Exception:
284 |             logging.error(
285 |                 "Make sure Ollama is installed on this machine before running these tests. Visit https://ollama.com for installation instructions."
286 |             )
287 |             raise
288 |         ollama_client.initialize()
289 | 
290 |     def test_model_inference(self, ollama_client, loaded_img):
291 |         """
292 |         Test ollama model client inference
293 |         """
294 |         inference_input = {"query": "What do you see?", "images": [loaded_img]}
295 |         result = ollama_client.inference(inference_input)
296 |         assert result is not None
297 |         assert result["output"] is not None
298 |         logging.info(result["output"])
299 | 
300 |     def test_model_deinit(self, ollama_client):
301 |         """
302 |         Test ollama model client deinit
303 |         """
304 |         ollama_client.deinitialize()
305 | 


--------------------------------------------------------------------------------
/docs/_static/ROS_AGENTS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automatika-robotics/ros-agents/1fbf24d683bc5bcb9398c8ab3146ffd8197020fc/docs/_static/ROS_AGENTS.png


--------------------------------------------------------------------------------
/docs/_static/ROS_AGENTS_DARK.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automatika-robotics/ros-agents/1fbf24d683bc5bcb9398c8ab3146ffd8197020fc/docs/_static/ROS_AGENTS_DARK.png


--------------------------------------------------------------------------------
/docs/_static/automatika-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automatika-robotics/ros-agents/1fbf24d683bc5bcb9398c8ab3146ffd8197020fc/docs/_static/automatika-logo.png


--------------------------------------------------------------------------------
/docs/_static/complete_dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automatika-robotics/ros-agents/1fbf24d683bc5bcb9398c8ab3146ffd8197020fc/docs/_static/complete_dark.png


--------------------------------------------------------------------------------
/docs/_static/complete_light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/automatika-robotics/ros-agents/1fbf24d683bc5bcb9398c8ab3146ffd8197020fc/docs/_static/complete_light.png


--------------------------------------------------------------------------------
/docs/basics.md:
--------------------------------------------------------------------------------
 1 | # Basic Concepts 📚
 2 | 
 3 | The following is an overview of basic building blocks of ROS Agents. You can follow the links in each subsection to dig deeper.
 4 | 
 5 | ## Component
 6 | 
 7 | A Component is the main execution unit in ROS Agents and in essence each component is synctactic sugar over a ROS2 Lifecycle Node. All the functionalities implemented in ROS2 nodes can be found in the component. Components take a single Topic or a list of Topics as inputs and ouputs. Depending on the components functionality, certain types of Topics might be mandatory.
 8 | 
 9 | ```{note}
10 | To learn more about components, checkout [ROS Sugar Documentation](https://automatika-robotics.github.io/ros-sugar/).
11 | ```
12 | 
13 | ### Components Available in ROS Agents
14 | 
15 | ROS Agents provides various ready to use components. You can see their details [here](apidocs/agents/agents.components).
16 | 
17 | ### Component Config
18 | 
19 | Each component can take in an optional config. Configs are generally [attrs](https://www.attrs.org/en/stable/) classes and for components that use ML models, configs are also the place where inference parameters are defined. You can see the default options for configs of each available component [here](apidocs/agents/agents.config).
20 | 
21 | ### Component RunType
22 | 
23 | In ROS Agents, components can be of the following two types:
24 | 
25 | ```{list-table}
26 | :widths: 10 80
27 | * - **Timed**
28 |   - Execute the main execution function in a timed loop.
29 | * - **Event**
30 |   - Execute the main execution function based on a trigger topic/event.
31 | ```
32 | 
33 | ### Health Check and Fallback
34 | 
35 | Each component maintains a health status, based on which, one can configure various fallback options for the component allowing it to recover from failures or shutdown gracefully. This aspect can be significant in embodied autonomous agents, not just in terms of safety but for generally coherent and reliable performance. To learn more about these topics, check out the documentation of [ROS Sugar Documentation](https://automatika-robotics.github.io/ros-sugar/).
36 | 
37 | ## Topic
38 | 
39 | A [topic](apidocs/agents/agents.ros) is an idomatic wrapper for a ROS2 topic. Topics can be given as inputs or outputs to components. When given as inputs, components automatically create listeners for the topics upon their activation. And when given as outputs, components create publishers for publishing to the topic. Each topic has a name (duh?) and a data type, defining its listening callback and publishing behavior. The data type can be provided to the topic as a string. Checkout the list of supported data types [here](https://automatika-robotics.github.io/ros-sugar/advanced/types.html).
40 | 
41 | ```{note}
42 | Learn more about Topics in [ROS Sugar](https://automatika-robotics.github.io/ros-sugar/).
43 | ```
44 | 
45 | ## Model/DB Client
46 | 
47 | Certain components in ROS Agents deal with ML models, vector DBs or both. These components take in a model or db client as one of their initialization parameters. The reason for this separate abstraction is to enforce _separation of concerns_. An ML model can be running on the edge hardware itself, or a powerful compute node in the network, or in the cloud, the components running on the robot edge can always use the model (or DB) via a client in a standardized way. This also makes the components independant of the model serving platforms, which can implement various inference optimizations which are usually model specific. Thus one can choose an ML serving platform with the best latency/accuracy tradeoff, depending on the application concerns.
48 | 
49 | All clients implement a connection check. ML clients must implement a model inference and optionally model initialization and deinitialization methods (since an embodied agent can initialize different models (or fine tuned versions of the same model) for the same component, depending on some event in the environment). Similarly vector DB clients implement standard CRUD methods for vector DBs. Checkout the list of available clients [here](apidocs/agents/agents.clients).
50 | 
51 | ## Models/DBs
52 | 
53 | The clients we mentioned above take as input a model or vector database specification. These are in the form of [attrs](https://www.attrs.org/en/stable/) classes and define intialization parameters, such as quantization for ML models or choice of encoding model for vector DBs, among others. The available models and databases that can be instantiated on a particular model serving platform usually depend on the platform itself. However, with these model and vector DB specifications, we aim to standardize the model initialization specifications across platforms. Check the list of [models](apidocs/agents/agents.models) and [vector DBs](apidocs/agents/agents.vectordbs) that are available.
54 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | import os
  3 | import sys
  4 | from datetime import date
  5 | import xml.etree.ElementTree as ET
  6 | 
  7 | sys.path.insert(0, os.path.abspath(".."))
  8 | version = ET.parse("../agents/package.xml").getroot()[1].text
  9 | print("Found version:", version)
 10 | 
 11 | project = "ROS Agents"
 12 | copyright = f"{date.today().year}, Automatika Robotics"
 13 | author = "Automatika Robotics"
 14 | release = version
 15 | 
 16 | extensions = [
 17 |     "sphinx.ext.viewcode",
 18 |     "sphinx.ext.doctest",
 19 |     "sphinx_copybutton",  # install with `pip install sphinx-copybutton`
 20 |     "autodoc2",  # install with `pip install sphinx-autodoc2`
 21 |     "myst_parser",  # install with `pip install myst-parser`
 22 | ]
 23 | 
 24 | autodoc2_packages = [
 25 |     {
 26 |         "module": "agents",
 27 |         "path": "../agents/agents",
 28 |         "exclude_dirs": ["__pycache__", "utils"],
 29 |         "exclude_files": [
 30 |             "callbacks.py",
 31 |             "publisher.py",
 32 |             "component_base.py",
 33 |             "model_component.py",
 34 |             "model_base.py",
 35 |             "db_base.py",
 36 |             "executable.py",
 37 |         ],
 38 |     },
 39 | ]
 40 | 
 41 | autodoc2_docstrings = "all"
 42 | autodoc2_class_docstring = "both"  # bug in autodoc2, should be `merge`
 43 | autodoc2_render_plugin = "myst"
 44 | autodoc2_hidden_objects = ["private", "dunder", "undoc"]
 45 | autodoc2_module_all_regexes = [
 46 |     r"agents.config",
 47 |     r"agents.models",
 48 |     r"agents.vectordbs",
 49 |     r"agents.ros",
 50 |     r"agents.clients\.[^\.]+",
 51 | ]
 52 | 
 53 | templates_path = ["_templates"]
 54 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 55 | 
 56 | myst_enable_extensions = [
 57 |     "amsmath",
 58 |     "attrs_inline",
 59 |     "colon_fence",
 60 |     "deflist",
 61 |     "dollarmath",
 62 |     "fieldlist",
 63 |     "html_admonition",
 64 |     "html_image",
 65 |     "linkify",
 66 |     "replacements",
 67 |     "smartquotes",
 68 |     "strikethrough",
 69 |     "substitution",
 70 |     "tasklist",
 71 | ]
 72 | language = "en"
 73 | myst_html_meta = {
 74 |     "google-site-verification": "cQVj-BaADcGVOGB7GOvfbkgJjxni10C2fYWCZ03jOeo"
 75 | }
 76 | 
 77 | 
 78 | html_theme = "sphinx_book_theme"  # install with `pip install sphinx-book-theme`
 79 | html_static_path = ["_static"]
 80 | html_theme_options = {
 81 |     "logo": {
 82 |         "image_light": "_static/ROS_AGENTS_DARK.png",
 83 |         "image_dark": "_static/ROS_AGENTS.png",
 84 |     },
 85 |     "icon_links": [
 86 |         {
 87 |             "name": "Automatika",
 88 |             "url": "https://automatikarobotics.com/",
 89 |             "icon": "_static/automatika-logo.png",
 90 |             "type": "local",
 91 |         },
 92 |         {
 93 |             "name": "GitHub",
 94 |             "url": "https://github.com/automatika-robotics/ros-agents",
 95 |             "icon": "fa-brands fa-github",
 96 |         },
 97 |         {
 98 |             "name": "Discord",
 99 |             "url": "https://discord.gg/cAW3BWwt",
100 |             "icon": "fa-brands fa-discord",
101 |         },
102 |     ],
103 |     "path_to_docs": "docs",
104 |     "repository_url": "https://github.com/automatika-robotics/ros-agents",
105 |     "repository_branch": "main",
106 |     "use_source_button": True,
107 |     "use_issues_button": True,
108 |     "use_edit_page_button": True,
109 |     "show_navbar_depth": 2,
110 | }
111 | 


--------------------------------------------------------------------------------
/docs/examples/complete.md:
--------------------------------------------------------------------------------
  1 | # Bringing it all together 🤖
  2 | 
  3 | In this example we will combine everything we implemented in the previous examples to create one big graph of components. Afterwards we will analyze what we have accomplished. Here is what the code looks like:
  4 | 
  5 | ```python
  6 | import numpy as np
  7 | import json
  8 | from typing import Optional
  9 | from agents.components import MLLM, SpeechToText, TextToSpeech, LLM, Vision, MapEncoding, SemanticRouter
 10 | from agents.config import SpeechToTextConfig, TextToSpeechConfig
 11 | from agents.clients.roboml import HTTPModelClient, RESPModelClient, HTTPDBClient
 12 | from agents.clients.ollama import OllamaClient
 13 | from agents.models import Whisper, SpeechT5, Llava, Llama3_1, VisionModel
 14 | from agents.vectordbs import ChromaDB
 15 | from agents.config import VisionConfig, LLMConfig, MapConfig, SemanticRouterConfig
 16 | from agents.ros import Topic, Launcher, FixedInput, MapLayer, Route
 17 | 
 18 | 
 19 | ### Setup our models and vectordb ###
 20 | whisper = Whisper(name="whisper")
 21 | whisper_client = HTTPModelClient(whisper)
 22 | speecht5 = SpeechT5(name="speecht5")
 23 | speecht5_client = HTTPModelClient(speecht5)
 24 | object_detection_model = VisionModel(name="dino_4scale",
 25 |                                checkpoint="dino-4scale_r50_8xb2-12e_coco")
 26 | detection_client = RESPModelClient(object_detection_model)
 27 | llava = Llava(name="llava")
 28 | llava_client = OllamaClient(llava)
 29 | llama = Llama3_1(name="llama")
 30 | llama_client = OllamaClient(llama)
 31 | chroma = ChromaDB(name="MainDB")
 32 | chroma_client = HTTPDBClient(db=chroma)
 33 | 
 34 | ### Setup our components ###
 35 | # Setup a speech to text component
 36 | audio_in = Topic(name="audio0", msg_type="Audio")
 37 | query_topic = Topic(name="question", msg_type="String")
 38 | 
 39 | speech_to_text = SpeechToText(
 40 |     inputs=[audio_in],
 41 |     outputs=[query_topic],
 42 |     model_client=whisper_client,
 43 |     trigger=audio_in,
 44 |     config=SpeechToTextConfig(enable_vad=True),  # option to always listen for speech through the microphone
 45 |     component_name="speech_to_text"
 46 | )
 47 | 
 48 | # Setup a text to speech component
 49 | query_answer = Topic(name="answer", msg_type="String")
 50 | 
 51 | t2s_config = TextToSpeechConfig(play_on_device=True)
 52 | 
 53 | text_to_speech = TextToSpeech(
 54 |     inputs=[query_answer],
 55 |     trigger=query_answer,
 56 |     model_client=speecht5_client,
 57 |     config=t2s_config,
 58 |     component_name="text_to_speech",
 59 | )
 60 | 
 61 | # Setup a vision component for object detection
 62 | image0 = Topic(name="image_raw", msg_type="Image")
 63 | detections_topic = Topic(name="detections", msg_type="Detections")
 64 | 
 65 | detection_config = VisionConfig(threshold=0.5)
 66 | vision = Vision(
 67 |     inputs=[image0],
 68 |     outputs=[detections_topic],
 69 |     trigger=image0,
 70 |     config=detection_config,
 71 |     model_client=detection_client,
 72 |     component_name="object_detection",
 73 | )
 74 | 
 75 | # Define a generic mllm component for vqa
 76 | mllm_query = Topic(name="mllm_query", msg_type="String")
 77 | 
 78 | mllm = MLLM(
 79 |     inputs=[mllm_query, image0, detections_topic],
 80 |     outputs=[query_answer],
 81 |     model_client=llava_client,
 82 |     trigger=mllm_query,
 83 |     component_name="visual_q_and_a"
 84 | )
 85 | 
 86 | mllm.set_component_prompt(
 87 |     template="""Imagine you are a robot.
 88 |     This image has following items: {{ detections }}.
 89 |     Answer the following about this image: {{ text0 }}"""
 90 | )
 91 | 
 92 | # Define a fixed input mllm component that does introspection
 93 | introspection_query = FixedInput(
 94 |     name="introspection_query", msg_type="String",
 95 |     fixed="What kind of a room is this? Is it an office, a bedroom or a kitchen? Give a one word answer, out of the given choices")
 96 | introspection_answer = Topic(name="introspection_answer", msg_type="String")
 97 | 
 98 | introspector = MLLM(
 99 |     inputs=[introspection_query, image0],
100 |     outputs=[introspection_answer],
101 |     model_client=llava_client,
102 |     trigger=15.0,
103 |     component_name="introspector",
104 | )
105 | 
106 | 
107 | def introspection_validation(output: str) -> Optional[str]:
108 |     for option in ["office", "bedroom", "kitchen"]:
109 |         if option in output.lower():
110 |             return option
111 | 
112 | 
113 | introspector.add_publisher_preprocessor(introspection_answer, introspection_validation)
114 | 
115 | # Define a semantic map using MapEncoding component
116 | layer1 = MapLayer(subscribes_to=detections_topic, temporal_change=True)
117 | layer2 = MapLayer(subscribes_to=introspection_answer, resolution_multiple=3)
118 | 
119 | position = Topic(name="odom", msg_type="Odometry")
120 | map_topic = Topic(name="map", msg_type="OccupancyGrid")
121 | 
122 | map_conf = MapConfig(map_name="map")
123 | map = MapEncoding(
124 |     layers=[layer1, layer2],
125 |     position=position,
126 |     map_topic=map_topic,
127 |     config=map_conf,
128 |     db_client=chroma_client,
129 |     trigger=15.0,
130 |     component_name="map_encoder"
131 | )
132 | 
133 | # Define a generic LLM component
134 | llm_query = Topic(name="llm_query", msg_type="String")
135 | 
136 | llm = LLM(
137 |     inputs=[llm_query],
138 |     outputs=[query_answer],
139 |     model_client=llama_client,
140 |     trigger=[llm_query],
141 |     component_name="general_q_and_a"
142 | )
143 | 
144 | # Define a Go-to-X component using LLM
145 | goto_query = Topic(name="goto_query", msg_type="String")
146 | goal_point = Topic(name="goal_point", msg_type="PoseStamped")
147 | 
148 | goto_config = LLMConfig(
149 |     enable_rag=True,
150 |     collection_name="map",
151 |     distance_func="l2",
152 |     n_results=1,
153 |     add_metadata=True,
154 | )
155 | 
156 | goto = LLM(
157 |     inputs=[goto_query],
158 |     outputs=[goal_point],
159 |     model_client=llama_client,
160 |     config=goto_config,
161 |     db_client=chroma_client,
162 |     trigger=goto_query,
163 |     component_name="go_to_x",
164 | )
165 | 
166 | goto.set_component_prompt(
167 |     template="""From the given metadata, extract coordinates and provide
168 |     the coordinates in the following json format:\n {"position": coordinates}"""
169 | )
170 | 
171 | 
172 | # pre-process the output before publishing to a topic of msg_type PoseStamped
173 | def llm_answer_to_goal_point(output: str) -> Optional[np.ndarray]:
174 |     # extract the json part of the output string (including brackets)
175 |     # one can use sophisticated regex parsing here but we'll keep it simple
176 |     json_string = output[output.find("{") : output.rfind("}") + 1]
177 |     # load the string as a json and extract position coordinates
178 |     # if there is an error, return None, i.e. no output would be published to goal_point
179 |     try:
180 |         json_dict = json.loads(json_string)
181 |         coordinates = np.fromstring(json_dict["position"], sep=',', dtype=np.float64)
182 |         print('Coordinates Extracted:', coordinates)
183 |         if coordinates.shape[0] < 2 or coordinates.shape[0] > 3:
184 |             return
185 |         elif coordinates.shape[0] == 2:  # sometimes LLMs avoid adding the zeros of z-dimension
186 |             coordinates = np.append(coordinates, 0)
187 |         return coordinates
188 |     except Exception:
189 |         return
190 | 
191 | 
192 | goto.add_publisher_preprocessor(goal_point, llm_answer_to_goal_point)
193 | 
194 | # Define a semantic router between a generic LLM component, VQA MLLM component and Go-to-X component
195 | goto_route = Route(routes_to=goto_query,
196 |     samples=["Go to the door", "Go to the kitchen",
197 |         "Get me a glass", "Fetch a ball", "Go to hallway"])
198 | 
199 | llm_route = Route(routes_to=llm_query,
200 |     samples=["What is the capital of France?", "Is there life on Mars?",
201 |         "How many tablespoons in a cup?", "How are you today?", "Whats up?"])
202 | 
203 | mllm_route = Route(routes_to=mllm_query,
204 |     samples=["Are we indoors or outdoors", "What do you see?", "Whats in front of you?",
205 |         "Where are we", "Do you see any people?", "How many things are infront of you?",
206 |         "Is this room occupied?"])
207 | 
208 | router_config = SemanticRouterConfig(router_name="go-to-router", distance_func="l2")
209 | # Initialize the router component
210 | router = SemanticRouter(
211 |     inputs=[query_topic],
212 |     routes=[llm_route, goto_route, mllm_route],
213 |     default_route=llm_route,
214 |     config=router_config,
215 |     db_client=chroma_client,
216 |     component_name='router'
217 | )
218 | 
219 | # Launch the components
220 | launcher = Launcher()
221 | launcher.add_pkg(
222 |     components=[
223 |         mllm,
224 |         llm,
225 |         goto,
226 |         introspector,
227 |         map,
228 |         router,
229 |         speech_to_text,
230 |         text_to_speech,
231 |         vision
232 |     ]
233 | )
234 | launcher.bringup()
235 | ```
236 | ```{note}
237 | Note how we use the same model for _general_q_and_a_ and _goto_to_x_ components. Similarly _visual_q_and_a_ and _introspector_ components share a multimodal LLM model.
238 | ```
239 | 
240 | In this small code block above, we have setup a fairly sophisticated embodied agent with the following capabilities.
241 | 
242 | - A conversational interface using speech-to-text and text-to-speech models that uses the robots microphone and playback speaker.
243 | - The ability to answer contextual queries based on the robots camera, using an MLLM model.
244 | - The ability to answer generic queries, using an LLM model.
245 | - A semantic map of the robots observations, that acts as a spatio-temporal memory.
246 | - The ability to respond to Go-to-X commands utilizing the semantic map.
247 | - A single input interface that routes the input to different models based on its content.
248 | 
249 | We can visualize the complete graph in the following diagram:
250 | ```{figure} ../_static/complete_dark.png
251 | :class: only-dark
252 | :alt: Complete embodied agent
253 | :align: center
254 | Complete embodied agent graph
255 | ```
256 | ```{figure} ../_static/complete_light.png
257 | :class: only-light
258 | :alt: Complete embodied agent
259 | :align: center
260 | Complete embodied agent graph
261 | ```
262 | 


--------------------------------------------------------------------------------
/docs/examples/goto.md:
--------------------------------------------------------------------------------
  1 | # Create a Go-to-X component using map data
  2 | 
  3 | In the previous [example](semantic_map.md) we created a semantic map using the MapEncoding component. Intuitively one can imagine that using the map data would require some form of RAG. Let us suppose that we want to create a Go-to-X component, which, when given a command like 'Go to the yellow door', would retreive the coordinates of the _yellow door_ from the map and publish them to a goal point topic of type _PoseStamped_ to be handled by our robots navigation system. We will create our Go-to-X component using the LLM component provided by ROS Agents. We will start by initializing the component, and configuring it to use RAG.
  4 | 
  5 | ## Initialize the component
  6 | 
  7 | ```python
  8 | from agents.components import LLM
  9 | from agents.models import Llama3_1
 10 | from agents.config import LLMConfig
 11 | from agents.clients.ollama import OllamaClient
 12 | from agents.ros import Topic
 13 | 
 14 | # Start a Llama3.1 based llm component using ollama client
 15 | llama = Llama3_1(name="llama")
 16 | llama_client = OllamaClient(llama)
 17 | 
 18 | # Define LLM input and output topics including goal_point topic of type PoseStamped
 19 | goto_in = Topic(name="goto_in", msg_type="String")
 20 | goal_point = Topic(name="goal_point", msg_type="PoseStamped")
 21 | ```
 22 | 
 23 | In order to configure the component to use RAG, we will set the following options in its config.
 24 | 
 25 | ```python
 26 | config = LLMConfig(enable_rag=True,
 27 |                    collection_name="map",
 28 |                    distance_func="l2",
 29 |                    n_results=1,
 30 |                    add_metadata=True)
 31 | ```
 32 | 
 33 | Note that the _collection_name_ parameter is the same as the map name we set in the previous [example](semantic_map.md). We have also set _add_metadata_ parameter to true to make sure that our metadata is included in the RAG result, as the spatial coordinates we want to get are part of the metadata. Let us have a quick look at the metadata stored in the map by the MapEncoding component.
 34 | 
 35 | ```
 36 | {
 37 |     "coordinates": [1.1, 2.2, 0.0],
 38 |     "layer_name": "Topic_Name",  # same as topic name that the layer is subscribed to
 39 |     "timestamp": 1234567,
 40 |     "temporal_change": True
 41 | }
 42 | ```
 43 | 
 44 | With this information, we will first initialize our component.
 45 | ```{caution}
 46 | In the following code block we are using the same DB client that was setup in the previous [example](semantic_map.md).
 47 | ```
 48 | 
 49 | ```python
 50 | # initialize the component
 51 | goto = LLM(
 52 |     inputs=[goto_in],
 53 |     outputs=[goal_point],
 54 |     model_client=llama_client,
 55 |     db_client=chroma_client,  # check the previous example where we setup this database client
 56 |     trigger=goto_in,
 57 |     config=config,
 58 |     component_name='go_to_x'
 59 | )
 60 | ```
 61 | 
 62 | ## Pre-process the model output before publishing
 63 | 
 64 | Knowing that the output of retreival will be appended to the beggining of our query as context, we will setup a component level promot for our LLM.
 65 | 
 66 | ```python
 67 | # set a component prompt
 68 | goto.set_component_prompt(
 69 |     template="""From the given metadata, extract coordinates and provide
 70 |     the coordinates in the following json format:\n {"position": coordinates}"""
 71 | )
 72 | ```
 73 | 
 74 | ```{note}
 75 | One might notice that we have not used an input topic name in our prompt. This is because we only need the input topic to fetch data from the vector DB during the RAG step. The query to the LLM in this case would only be composed of data fetched from the DB and our prompt.
 76 | ```
 77 | 
 78 | As the LLM output will contain text other than the _json_ string that we have asked for, we need to add a pre-processing function to the output topic that extracts the required part of the text and returns the output in a format that can be published to a _PoseStamped_ topic, i.e. a numpy array of floats.
 79 | 
 80 | ```python
 81 | from typing import Optional
 82 | import json
 83 | import numpy as np
 84 | 
 85 | # pre-process the output before publishing to a topic of msg_type PoseStamped
 86 | def llm_answer_to_goal_point(output: str) -> Optional[np.ndarray]:
 87 |     # extract the json part of the output string (including brackets)
 88 |     # one can use sophisticated regex parsing here but we'll keep it simple
 89 |     json_string = output[output.find("{") : output.rfind("}") + 1]
 90 |     # load the string as a json and extract position coordinates
 91 |     # if there is an error, return None, i.e. no output would be published to goal_point
 92 |     try:
 93 |         json_dict = json.loads(json_string)
 94 |         coordinates = np.fromstring(json_dict["position"], sep=',', dtype=np.float64)
 95 |         print('Coordinates Extracted:', coordinates)
 96 |         if coordinates.shape[0] < 2 or coordinates.shape[0] > 3:
 97 |             return
 98 |         elif coordinates.shape[0] == 2:  # sometimes LLMs avoid adding the zeros of z-dimension
 99 |             coordinates = np.append(coordinates, 0)
100 |         return coordinates
101 |     except Exception:
102 |         return
103 | 
104 | # add the pre-processing function to the goal_point output topic
105 | goto.add_publisher_preprocessor(goal_point, llm_answer_to_goal_point)
106 | ```
107 | 
108 | ## Launching the Components
109 | 
110 | And we will launch our Go-to-X component.
111 | 
112 | ```python
113 | from agents.ros import Launcher
114 | 
115 | # Launch the component
116 | launcher = Launcher()
117 | launcher.add_pkg(
118 |     components=[goto]
119 |     )
120 | launcher.bringup()
121 | ```
122 | 
123 | And that is all. Our Go-to-X component is ready. The complete code for this example is given below:
124 | 
125 | ```{code-block} python
126 | :caption: Go-to-X Component
127 | :linenos:
128 | from typing import Optional
129 | import json
130 | import numpy as np
131 | from agents.components import LLM
132 | from agents.models import Llama3_1
133 | from agents.vectordbs import ChromaDB
134 | from agents.config import LLMConfig
135 | from agents.clients.roboml import HTTPDBClient
136 | from agents.clients.ollama import OllamaClient
137 | from agents.ros import Launcher, Topic
138 | 
139 | # Start a Llama3.1 based llm component using ollama client
140 | llama = Llama3_1(name="llama")
141 | llama_client = OllamaClient(llama)
142 | 
143 | # Initialize a vector DB that will store our routes
144 | chroma = ChromaDB(name="MainDB")
145 | chroma_client = HTTPDBClient(db=chroma)
146 | 
147 | # Define LLM input and output topics including goal_point topic of type PoseStamped
148 | goto_in = Topic(name="goto_in", msg_type="String")
149 | goal_point = Topic(name="goal_point", msg_type="PoseStamped")
150 | 
151 | config = LLMConfig(enable_rag=True,
152 |                    collection_name="map",
153 |                    distance_func="l2",
154 |                    n_results=1,
155 |                    add_metadata=True)
156 | 
157 | # initialize the component
158 | goto = LLM(
159 |     inputs=[goto_in],
160 |     outputs=[goal_point],
161 |     model_client=llama_client,
162 |     db_client=chroma_client,  # check the previous example where we setup this database client
163 |     trigger=goto_in,
164 |     config=config,
165 |     component_name='go_to_x'
166 | )
167 | 
168 | # set a component prompt
169 | goto.set_component_prompt(
170 |     template="""From the given metadata, extract coordinates and provide
171 |     the coordinates in the following json format:\n {"position": coordinates}"""
172 | )
173 | 
174 | 
175 | # pre-process the output before publishing to a topic of msg_type PoseStamped
176 | def llm_answer_to_goal_point(output: str) -> Optional[np.ndarray]:
177 |     # extract the json part of the output string (including brackets)
178 |     # one can use sophisticated regex parsing here but we'll keep it simple
179 |     json_string = output[output.find("{") : output.rfind("}") + 1]
180 |     # load the string as a json and extract position coordinates
181 |     # if there is an error, return None, i.e. no output would be published to goal_point
182 |     try:
183 |         json_dict = json.loads(json_string)
184 |         coordinates = np.fromstring(json_dict["position"], sep=',', dtype=np.float64)
185 |         print('Coordinates Extracted:', coordinates)
186 |         if coordinates.shape[0] < 2 or coordinates.shape[0] > 3:
187 |             return
188 |         elif coordinates.shape[0] == 2:  # sometimes LLMs avoid adding the zeros of z-dimension
189 |             coordinates = np.append(coordinates, 0)
190 |         return coordinates
191 |     except Exception:
192 |         return
193 | 
194 | 
195 | # add the pre-processing function to the goal_point output topic
196 | goto.add_publisher_preprocessor(goal_point, llm_answer_to_goal_point)
197 | 
198 | # Launch the component
199 | launcher = Launcher()
200 | launcher.add_pkg(
201 |     components=[goto]
202 |     )
203 | launcher.bringup()
204 | ```
205 | 


--------------------------------------------------------------------------------
/docs/examples/index.md:
--------------------------------------------------------------------------------
 1 | # Examples ✨
 2 | 
 3 | In this section you will find basic examples of ROS Agents usage in the form of short tutorials. These examples would show you how ROS Agents' components can be used to create real world embodied agent capabilities in robots. It is recommended to go through the examples in sequence.
 4 | 
 5 | ```{toctree}
 6 | :maxdepth: 1
 7 | 
 8 | conversational
 9 | prompt_engineering
10 | semantic_map
11 | goto
12 | tool_calling
13 | semantic_router
14 | complete
15 | multiprocessing
16 | ```
17 | 


--------------------------------------------------------------------------------
/docs/examples/prompt_engineering.md:
--------------------------------------------------------------------------------
  1 | # Prompt engineering for LLMs/MLLMs using vision models
  2 | 
  3 | In this example we will use the output of an object detection component to enrich the prompt of an MLLM component. Let us start by importing the components.
  4 | ```python
  5 | from agents.components import Vision, MLLM
  6 | ```
  7 | 
  8 | ## Setting up the Object Detection Component
  9 | For object detection and tracking, ROS Agents provides a unified Vision component. This component takes as input an image topic published by a camera device onboard our robot. The output of this component can be a _detections_ topic in case of object detection or a _trackings_ topic in case of object tracking. In this example we will use a _detections_ topic.
 10 | 
 11 | ```python
 12 | from agents.ros import Topic
 13 | 
 14 | # Define the image input topic
 15 | image0 = Topic(name="image_raw", msg_type="Image")
 16 | # Create a detection topic
 17 | detections_topic = Topic(name="detections", msg_type="Detections")
 18 | ```
 19 | Additionally the component requiers a model client with an object detection model. We will use the RESP client for RoboML and use the VisionModel a convenient model class made available in ROS Agents, for initializing all vision models available in the opensource [mmdetection](https://github.com/open-mmlab/mmdetection) library. We will specify the model we want to use by specifying the checkpoint attribute.
 20 | 
 21 | ```{note}
 22 | Learn about setting up RoboML with vision [here](https://github.com/automatika-robotics/roboml/blob/main/README.md#for-vision-models-support).
 23 | ```
 24 | ```{seealso}
 25 | Checkout all available mmdetection models and their benchmarking results in the [mmdetection model zoo](https://github.com/open-mmlab/mmdetection?tab=readme-ov-file#overview-of-benchmark-and-model-zoo).
 26 | ```
 27 | 
 28 | ```python
 29 | from agents.models import VisionModel
 30 | from agents.clients.roboml import RESPModelClient, HTTPModelClient
 31 | from agents.config import VisionConfig
 32 | 
 33 | # Add an object detection model
 34 | object_detection = VisionModel(name="object_detection",
 35 |                                checkpoint="dino-4scale_r50_8xb2-12e_coco")
 36 | roboml_detection = RESPModelClient(object_detection)
 37 | 
 38 | # Initialize the Vision component
 39 | detection_config = VisionConfig(threshold=0.5)
 40 | vision = Vision(
 41 |     inputs=[image0],
 42 |     outputs=[detections_topic],
 43 |     trigger=image0,
 44 |     config=detection_config,
 45 |     model_client=roboml_detection,
 46 |     component_name="detection_component",
 47 | )
 48 | ```
 49 | 
 50 | ```{tip}
 51 | Notice that we passed in an option config to the component. Component configs can be used to setup various parameters in the component. If the component calls an ML than inference parameters for the model can be set in the component config.
 52 | ```
 53 | 
 54 | ## Setting up the MLLM Component
 55 | 
 56 | For the MLLM component, we will provide an additional text input topic, which will listen to our queries. The output of the component will be another text topic. We will use the RoboML HTTP client with the multimodal LLM Idefics2 by the good folks at HuggingFace for this example.
 57 | 
 58 | ```python
 59 | from agents.models import Idefics2
 60 | 
 61 | # Define MLLM input and output text topics
 62 | text_query = Topic(name="text0", msg_type="String")
 63 | text_answer = Topic(name="text1", msg_type="String")
 64 | 
 65 | # Define a model client (working with roboml in this case)
 66 | idefics = Idefics2(name="idefics_model")
 67 | idefics_client = HTTPModelClient(idefics)
 68 | 
 69 | # Define an MLLM component
 70 | # We can pass in the detections topic which we defined previously directy as an optional input
 71 | # to the MLLM component in addition to its other required inputs
 72 | mllm = MLLM(
 73 |     inputs=[text_query, image0, detections_topic],
 74 |     outputs=[text_answer],
 75 |     model_client=idefics_client,
 76 |     trigger=text_query,
 77 |     component_name="mllm_component"
 78 | )
 79 | ```
 80 | Next we will setup a component level prompt to ensure that our text query and the output of the detections topic are sent to the model as we intend. We will do this by passing a jinja2 template to the **set_component_prompt** function.
 81 | ```python
 82 | mllm.set_component_prompt(
 83 |     template="""Imagine you are a robot.
 84 |     This image has following items: {{ detections }}.
 85 |     Answer the following about this image: {{ text0 }}"""
 86 | )
 87 | ```
 88 | ```{caution}
 89 | The names of the topics used in the jinja2 template are the same as the name parameters set when creation the Topic objects.
 90 | ```
 91 | 
 92 | ## Launching the Components
 93 | 
 94 | Finally we will launch our components as we did in the previous example.
 95 | 
 96 | ```python
 97 | from agents.ros import Launcher
 98 | 
 99 | # Launch the components
100 | launcher = Launcher()
101 | launcher.add_pkg(
102 |     components=[vision, mllm]
103 |     )
104 | launcher.bringup()
105 | ```
106 | 
107 | And there we have it. Complete code of this example is provided below.
108 | 
109 | ```{code-block} python
110 | :caption: Prompt Engineering with Object Detection
111 | :linenos:
112 | from agents.components import Vision, MLLM
113 | from agents.models import VisionModel, Idefics2
114 | from agents.clients.roboml import RESPModelClient, HTTPModelClient
115 | from agents.config import VisionConfig
116 | from agents.ros import Topic, Launcher
117 | 
118 | image0 = Topic(name="image_raw", msg_type="Image")
119 | detections_topic = Topic(name="detections", msg_type="Detections")
120 | 
121 | object_detection = VisionModel(name="object_detection",
122 |                                checkpoint="dino-4scale_r50_8xb2-12e_coco")
123 | roboml_detection = RESPModelClient(object_detection)
124 | 
125 | detection_config = VisionConfig(threshold=0.5)
126 | vision = Vision(
127 |     inputs=[image0],
128 |     outputs=[detections_topic],
129 |     trigger=image0,
130 |     config=detection_config,
131 |     model_client=roboml_detection,
132 |     component_name="detection_component",
133 | )
134 | 
135 | text_query = Topic(name="text0", msg_type="String")
136 | text_answer = Topic(name="text1", msg_type="String")
137 | 
138 | idefics = Idefics2(name="idefics_model")
139 | idefics_client = HTTPModelClient(idefics)
140 | 
141 | mllm = MLLM(
142 |     inputs=[text_query, image0, detections_topic],
143 |     outputs=[text_answer],
144 |     model_client=idefics_client,
145 |     trigger=text_query,
146 |     component_name="mllm_component"
147 | )
148 | 
149 | mllm.set_component_prompt(
150 |     template="""Imagine you are a robot.
151 |     This image has following items: {{ detections }}.
152 |     Answer the following about this image: {{ text0 }}"""
153 | )
154 | launcher = Launcher()
155 | launcher.add_pkg(
156 |     components=[vision, mllm]
157 |     )
158 | launcher.bringup()
159 | ```
160 | 


--------------------------------------------------------------------------------
/docs/examples/semantic_router.md:
--------------------------------------------------------------------------------
  1 | # Create a semantic router to route text queries between different components
  2 | 
  3 | While semantic routing can be implemented with an LLM component, ROS Agents also provides a convenient SemanticRouter component that works directly with text encoding distances and can be utilized with a vector DB.
  4 | 
  5 | In this example we will use the SemanticRouter component to route text queries between two components, a general purpose LLM and a Go-to-X component that we built in the previous [example](goto.md). Lets start by setting up our components.
  6 | 
  7 | ## Setting up the components
  8 | 
  9 | In the following code snippet we will setup our two components.
 10 | 
 11 | ```python
 12 | from agents.components import LLM
 13 | from agents.clients.ollama import OllamaClient
 14 | from agents.clients.roboml import HTTPModelClient
 15 | from agents.models import Idefics2, Llama3_1
 16 | from agents.config import LLMConfig
 17 | from agents.ros import Topic
 18 | 
 19 | # Create a llama3.1 client using Ollama
 20 | llama = Llama3_1(name="llama")
 21 | ollama_client = OllamaClient(llama)
 22 | 
 23 | # Make a generic LLM component using the Llama3_1 model
 24 | llm_in = Topic(name="llm_in", msg_type="String")
 25 | llm_out = Topic(name="llm_out", msg_type="String")
 26 | 
 27 | llm = LLM(
 28 |     inputs=[llm_in],
 29 |     outputs=[llm_out],
 30 |     model_client=llama_client,
 31 |     trigger=[llm_in],
 32 | )
 33 | 
 34 | # Make a Go-to-X component using the same Llama3_1 model
 35 | goto_in = Topic(name="goto_in", msg_type="String")
 36 | goal_point = Topic(name="goal_point", msg_type="PoseStamped")
 37 | 
 38 | config = LLMConfig(enable_rag=True,
 39 |                    collection_name="map",
 40 |                    distance_func="l2",
 41 |                    n_results=1,
 42 |                    add_metadata=True)
 43 | 
 44 | goto = LLM(
 45 |     inputs=[goto_in],
 46 |     outputs=[goal_point],
 47 |     model_client=llama_client,
 48 |     db_client=chroma_client,
 49 |     trigger=goto_in,
 50 |     config=config,
 51 |     component_name='go_to_x'
 52 | )
 53 | 
 54 | # set a component prompt
 55 | goto.set_component_prompt(
 56 |     template="""From the given metadata, extract coordinates and provide
 57 |     the coordinates in the following json format:\n {"position": coordinates}"""
 58 | )
 59 | 
 60 | # pre-process the output before publishing to a topic of msg_type PoseStamped
 61 | def llm_answer_to_goal_point(output: str) -> Optional[np.ndarray]:
 62 |     # extract the json part of the output string (including brackets)
 63 |     # one can use sophisticated regex parsing here but we'll keep it simple
 64 |     json_string = output[output.find("{"):output.find("}") + 1]
 65 | 
 66 |     # load the string as a json and extract position coordinates
 67 |     # if there is an error, return None, i.e. no output would be published to goal_point
 68 |     try:
 69 |         json_dict = json.loads(json_string)
 70 |         return np.array(json_dict['position'])
 71 |     except Exception:
 72 |         return
 73 | 
 74 | # add the pre-processing function to the goal_point output topic
 75 | goto.add_publisher_preprocessor(goal_point, llm_answer_to_goal_point)
 76 | ```
 77 | 
 78 | ```{note}
 79 | Note that we have reused the same model and its client for both components.
 80 | ```
 81 | 
 82 | ```{note}
 83 | For a detailed explanation of the code for setting up the Go-to-X component, check the previous [example](goto.md).
 84 | ```
 85 | 
 86 | ```{caution}
 87 | In the code block above we are using the same DB client that was setup in this [example](semantic_map.md).
 88 | ```
 89 | 
 90 | ## Creating the SemanticRouter
 91 | 
 92 | The SemanticRouter takes an input _String_ topic and sends whatever is published on that topic to a _Route_. A _Route_ is a thin wrapper around _Topic_ and takes in the name of a topic to publish on and example queries, that would match a potential query that should be published to a particular topic. For example, if we ask our robot a general question, like "Whats the capital of France?", we do not want that question to be routed to a Go-to-X component, but to a generic LLM. Thus in its route, we would provide examples of general questions. The SemanticRouter component works by storing these examples in a vector DB. Distance is calculated between an incoming query's embedding and the embeddings of example queries to determine which _Route_(_Topic_) the query should be sent on. Lets start by creating our routes for the input topics of the two components above.
 93 | 
 94 | ```python
 95 | from agents.ros import Route
 96 | 
 97 | # Create the input topic for the router
 98 | query_topic = Topic(name="question", msg_type="String")
 99 | 
100 | # Define a route to a topic that processes go-to-x commands
101 | goto_route = Route(routes_to=goto_in,
102 |     samples=["Go to the door", "Go to the kitchen",
103 |         "Get me a glass", "Fetch a ball", "Go to hallway"])
104 | 
105 | # Define a route to a topic that is input to an LLM component
106 | llm_route = Route(routes_to=llm_in,
107 |     samples=["What is the capital of France?", "Is there life on Mars?",
108 |         "How many tablespoons in a cup?", "How are you today?", "Whats up?"])
109 | ```
110 | 
111 | For the database client we will use the ChromaDB client setup in [this example](semantic_map.md). We will specify a router name in our router config, which will act as a _collection_name_ in the database.
112 | 
113 | ```python
114 | from agents.components import SemanticRouter
115 | from agents.config import SemanticRouterConfig
116 | 
117 | router_config = SemanticRouterConfig(router_name="go-to-router", distance_func="l2")
118 | # Initialize the router component
119 | router = SemanticRouter(
120 |     inputs=[query_topic],
121 |     routes=[llm_route, goto_route],
122 |     default_route=llm_route,  # If none of the routes fall within a distance threshold
123 |     config=router_config,
124 |     db_client=chroma_client,  # reusing the db_client from the previous example
125 |     component_name="router"
126 | )
127 | ```
128 | 
129 | And that is it. Whenever something is published on the input topic **question**, it will be routed, either to a Go-to-X component or an LLM component. We can now expose this topic to our command interface. The complete code for setting up the router is given below:
130 | 
131 | ```{code-block} python
132 | :caption: Semantic Routing
133 | :linenos:
134 | from typing import Optional
135 | import json
136 | import numpy as np
137 | from agents.components import LLM, SemanticRouter
138 | from agents.models import Llama3_1
139 | from agents.vectordbs import ChromaDB
140 | from agents.config import LLMConfig, SemanticRouterConfig
141 | from agents.clients.roboml import HTTPDBClient
142 | from agents.clients.ollama import OllamaClient
143 | from agents.ros import Launcher, Topic, Route
144 | 
145 | 
146 | # Start a Llama3.1 based llm component using ollama client
147 | llama = Llama3_1(name="llama")
148 | llama_client = OllamaClient(llama)
149 | 
150 | # Initialize a vector DB that will store our routes
151 | chroma = ChromaDB(name="MainDB")
152 | chroma_client = HTTPDBClient(db=chroma)
153 | 
154 | 
155 | # Make a generic LLM component using the Llama3_1 model
156 | llm_in = Topic(name="llm_in", msg_type="String")
157 | llm_out = Topic(name="llm_out", msg_type="String")
158 | 
159 | llm = LLM(
160 |     inputs=[llm_in],
161 |     outputs=[llm_out],
162 |     model_client=llama_client,
163 |     trigger=llm_in
164 | )
165 | 
166 | 
167 | # Define LLM input and output topics including goal_point topic of type PoseStamped
168 | goto_in = Topic(name="goto_in", msg_type="String")
169 | goal_point = Topic(name="goal_point", msg_type="PoseStamped")
170 | 
171 | config = LLMConfig(enable_rag=True,
172 |                    collection_name="map",
173 |                    distance_func="l2",
174 |                    n_results=1,
175 |                    add_metadata=True)
176 | 
177 | # initialize the component
178 | goto = LLM(
179 |     inputs=[goto_in],
180 |     outputs=[goal_point],
181 |     model_client=llama_client,
182 |     db_client=chroma_client,  # check the previous example where we setup this database client
183 |     trigger=goto_in,
184 |     config=config,
185 |     component_name='go_to_x'
186 | )
187 | 
188 | # set a component prompt
189 | goto.set_component_prompt(
190 |     template="""From the given metadata, extract coordinates and provide
191 |     the coordinates in the following json format:\n {"position": coordinates}"""
192 | )
193 | 
194 | 
195 | # pre-process the output before publishing to a topic of msg_type PoseStamped
196 | def llm_answer_to_goal_point(output: str) -> Optional[np.ndarray]:
197 |     # extract the json part of the output string (including brackets)
198 |     # one can use sophisticated regex parsing here but we'll keep it simple
199 |     json_string = output[output.find("{"):output.find("}") + 1]
200 | 
201 |     # load the string as a json and extract position coordinates
202 |     # if there is an error, return None, i.e. no output would be published to goal_point
203 |     try:
204 |         json_dict = json.loads(json_string)
205 |         return np.array(json_dict['position'])
206 |     except Exception:
207 |         return
208 | 
209 | 
210 | # add the pre-processing function to the goal_point output topic
211 | goto.add_publisher_preprocessor(goal_point, llm_answer_to_goal_point)
212 | 
213 | # Create the input topic for the router
214 | query_topic = Topic(name="question", msg_type="String")
215 | 
216 | # Define a route to a topic that processes go-to-x commands
217 | goto_route = Route(routes_to=goto_in,
218 |     samples=["Go to the door", "Go to the kitchen",
219 |         "Get me a glass", "Fetch a ball", "Go to hallway"])
220 | 
221 | # Define a route to a topic that is input to an LLM component
222 | llm_route = Route(routes_to=llm_in,
223 |     samples=["What is the capital of France?", "Is there life on Mars?",
224 |         "How many tablespoons in a cup?", "How are you today?", "Whats up?"])
225 | 
226 | router_config = SemanticRouterConfig(router_name="go-to-router", distance_func="l2")
227 | # Initialize the router component
228 | router = SemanticRouter(
229 |     inputs=[query_topic],
230 |     routes=[llm_route, goto_route],
231 |     default_route=llm_route,  # If none of the routes fall within a distance threshold
232 |     config=router_config,
233 |     db_client=chroma_client,  # reusing the db_client from the previous example
234 |     component_name="router",
235 | )
236 | 
237 | # Launch the components
238 | launcher = Launcher()
239 | launcher.add_pkg(
240 |     components=[llm, goto, router]
241 |     )
242 | launcher.bringup()
243 | ```
244 | 


--------------------------------------------------------------------------------
/docs/examples/tool_calling.md:
--------------------------------------------------------------------------------
  1 | # Use Tool Calling in Go-to-X
  2 | 
  3 | In the previous [example](goto.md) we created a Go-to-X component using basic text manipulation on LLM output. However, for models that have been specifically trained for tool calling, one can get better results for structured outputs by invoking tool calling. At the same time tool calling can be useful to generate responses which require intermediate use of tools by the LLM before providing a final answer. In this example we will utilize tool calling for the former utility of getting a better structured output from the LLM, by reimplementing the Go-to-X component.
  4 | 
  5 | ## Register a tool (function) to be called by the LLM
  6 | To utilize tool calling we will change our strategy of doing pre-processing to LLM text output, and instead ask the LLM to provide structured input to a function (tool). The output of this function will then be sent for publishing to the output topic. Lets see what this will look like in the following code snippets.
  7 | 
  8 | First we will modify the component level prompt for our LLM.
  9 | 
 10 | ```python
 11 | # set a component prompt
 12 | goto.set_component_prompt(
 13 |     template="""What are the position coordinates in the given metadata?"""
 14 | )
 15 | ```
 16 | Next we will replace our pre-processing function, with a much simpler function that takes in a list and provides a numpy array. The LLM will be expected to call this function with the appropriate output. This strategy generally works better than getting text input from LLM and trying to parse it with an arbitrary function. To register the function as a tool, we will also need to create its description in a format that is explanatory for the LLM. This format has been specified by the _Ollama_ client.
 17 | 
 18 | ```{caution}
 19 | Tool calling is currently available only when components utilize the OllamaClient.
 20 | ```
 21 | ```{seealso}
 22 | To see a list of models that work for tool calling using the OllamaClient, check [here](https://ollama.com/search?c=tools)
 23 | ```
 24 | ```python
 25 | # pre-process the output before publishing to a topic of msg_type PoseStamped
 26 | def get_coordinates(position: list[float]) -> np.ndarray:
 27 |     """Get position coordinates"""
 28 |     return np.array(position, dtype=float)
 29 | 
 30 | 
 31 | function_description = {
 32 |     "type": "function",
 33 |     "function": {
 34 |         "name": "get_coordinates",
 35 |         "description": "Get position coordinates",
 36 |         "parameters": {
 37 |             "type": "object",
 38 |             "properties": {
 39 |                 "position": {
 40 |                     "type": "list[float]",
 41 |                     "description": "The position coordinates in x, y and z",
 42 |                 }
 43 |             },
 44 |         },
 45 |         "required": ["position"],
 46 |     },
 47 | }
 48 | 
 49 | # add the pre-processing function to the goal_point output topic
 50 | goto.register_tool(
 51 |     tool=get_coordinates,
 52 |     tool_description=function_description,
 53 |     send_tool_response_to_model=False,
 54 | )
 55 | ```
 56 | In the code above, the flag _send_tool_response_to_model_ has been set to False. This means that the function output will be sent directly for publication, since our usage of the tool in this example is limited to forcing the model to provide a structured output. If this flag was set to True, the output of the tool (function) will be sent back to the model to produce the final output, which will then be published. This latter usage is employed when a tool like a calculator, browser or code interpreter can be provided to the model for generating better answers.
 57 | 
 58 | ## Launching the Components
 59 | 
 60 | And as before, we will launch our Go-to-X component.
 61 | 
 62 | ```python
 63 | from agents.ros import Launcher
 64 | 
 65 | # Launch the component
 66 | launcher = Launcher()
 67 | launcher.add_pkg(components=[goto])
 68 | launcher.bringup()
 69 | ```
 70 | 
 71 | The complete code for this example is given below:
 72 | 
 73 | ```{code-block} python
 74 | :caption: Go-to-X Component
 75 | :linenos:
 76 | import numpy as np
 77 | from agents.components import LLM
 78 | from agents.models import Llama3_1
 79 | from agents.vectordbs import ChromaDB
 80 | from agents.config import LLMConfig
 81 | from agents.clients.roboml import HTTPDBClient
 82 | from agents.clients.ollama import OllamaClient
 83 | from agents.ros import Launcher, Topic
 84 | 
 85 | # Start a Llama3.1 based llm component using ollama client
 86 | llama = Llama3_1(name="llama")
 87 | llama_client = OllamaClient(llama)
 88 | 
 89 | # Initialize a vector DB that will store our routes
 90 | chroma = ChromaDB(name="MainDB")
 91 | chroma_client = HTTPDBClient(db=chroma)
 92 | 
 93 | # Define LLM input and output topics including goal_point topic of type PoseStamped
 94 | goto_in = Topic(name="goto_in", msg_type="String")
 95 | goal_point = Topic(name="goal_point", msg_type="PoseStamped")
 96 | 
 97 | config = LLMConfig(
 98 |     enable_rag=True,
 99 |     collection_name="map",
100 |     distance_func="l2",
101 |     n_results=1,
102 |     add_metadata=True,
103 | )
104 | 
105 | # initialize the component
106 | goto = LLM(
107 |     inputs=[goto_in],
108 |     outputs=[goal_point],
109 |     model_client=llama_client,
110 |     db_client=chroma_client,  # check the previous example where we setup this database client
111 |     trigger=goto_in,
112 |     config=config,
113 |     component_name="go_to_x",
114 | )
115 | 
116 | # set a component prompt
117 | goto.set_component_prompt(
118 |     template="""What are the position coordinates in the given metadata?"""
119 | )
120 | 
121 | 
122 | # pre-process the output before publishing to a topic of msg_type PoseStamped
123 | def get_coordinates(position: list[float]) -> np.ndarray:
124 |     """Get position coordinates"""
125 |     return np.array(position, dtype=float)
126 | 
127 | 
128 | function_description = {
129 |     "type": "function",
130 |     "function": {
131 |         "name": "get_coordinates",
132 |         "description": "Get position coordinates",
133 |         "parameters": {
134 |             "type": "object",
135 |             "properties": {
136 |                 "position": {
137 |                     "type": "list[float]",
138 |                     "description": "The position coordinates in x, y and z",
139 |                 }
140 |             },
141 |         },
142 |         "required": ["position"],
143 |     },
144 | }
145 | 
146 | # add the pre-processing function to the goal_point output topic
147 | goto.register_tool(
148 |     tool=get_coordinates,
149 |     tool_description=function_description,
150 |     send_tool_response_to_model=False,
151 | )
152 | 
153 | # Launch the component
154 | launcher = Launcher()
155 | launcher.add_pkg(components=[goto])
156 | launcher.bringup()
157 | ```
158 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: ROS Agents Documentation
 3 | ---
 4 | 
 5 | 
 6 | ```{include} intro.md
 7 | ```
 8 | 
 9 | ## Table of Contents
10 | 
11 | ```{toctree}
12 | :maxdepth: 2
13 | 
14 | intro
15 | installation
16 | quickstart
17 | basics
18 | examples/index
19 | apidocs/index
20 | ```
21 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation 🛠️
 2 | 
 3 | ## Pre-Requisits
 4 | 
 5 | ### Install ROS
 6 | 
 7 | ROS Agents is built to be used with ROS2. All ROS distributions starting from _Iron_ are supported. Install ROS2 by following the instructions on the [official site](https://docs.ros.org/en/iron/Installation.html).
 8 | 
 9 | ### Install a model serving platform
10 | 
11 | The core of ROS Agents is agnostic to model serving platforms. It currently supports [Ollama](https://ollama.com) and [RoboML](https://github.com/automatika-robotics/RoboML). Please install either of these by following the instructions provided by respective projects. Support for new platforms will be continuously added. If you would like to support a particular platform, please open an issue/PR.
12 | 
13 | ```{tip}
14 | For utilizing larger models, it is recommended that model serving platforms are not installed directly on the robot (or the edge device) but on a GPU powered machine on the local network (or one of the cloud providers).
15 | ```
16 | 
17 | ## Install ROS Agents (Ubuntu)
18 | 
19 | **Binary packages for Ubuntu will be released soon. Check this space.**
20 | 
21 | ## Install ROS Agents from source
22 | 
23 | Create your ROS workspace.
24 | ```shell
25 | mkdir -p agents_ws/src
26 | cd agents_ws/src
27 | ```
28 | ### Get Dependencies
29 | 
30 | Install python dependencies
31 | ```shell
32 | pip install numpy opencv-python-headless 'attrs>=23.2.0' jinja2 httpx setproctitle msgpack msgpack-numpy numpy-quaternion platformdirs
33 | ```
34 | 
35 | Download ROS Sugar.
36 | ```shell
37 | git clone https://github.com/automatika-robotics/ros-sugar
38 | ```
39 | ### Install ROS Agents
40 | ```shell
41 | git clone https://github.com/automatika-robotics/ros-agents.git
42 | cd ..
43 | colcon build
44 | source install/setup.bash
45 | python your_script.py
46 | ```
47 | 


--------------------------------------------------------------------------------
/docs/intro.md:
--------------------------------------------------------------------------------
 1 | ![Logo](_static/ROS_AGENTS_DARK.png)
 2 | 
 3 | # ROS Agents  🤖
 4 | 
 5 | ROS Agents is a fully-loaded framework for creating interactive embodied agents that can understand, remember, and act upon contextual information from their environment.
 6 | 
 7 | - **Agents in the real world:** Designed to be used with autonomous robot systems that operate in dynamic environments, specifically AMRs.
 8 | - **Intuitive API**: Simple pythonic API to utilize local or cloud based ML models (specifically **Multimodal LLMs** and other **Transformer Architectures**) on robots.
 9 | - **Semantic Memory**: Integrates vector databases, semantic routing and other supporting components to quickly build arbitrarily complex graphs for agentic information flow. No need to utilize bloated "GenAI" frameworks on your robot.
10 | - **Made in ROS2**: Utilizes ROS2 as the underlying distributed communications backbone. Theoretically, all devices that provide a ROS2 package can be utilized to send data to ML models, as long as the datatype callback has been implemented.
11 | 
12 | Checkout [Installation Instructions](installation.md) 🛠️
13 | 
14 | Get started with the [Quickstart Guide](quickstart.md) 🚀
15 | 
16 | Get familiar with [Basic Concepts](basics.md) 📚
17 | 
18 | Dive right in with [Examples](examples/index.md) ✨
19 | 
20 | ## Contributions
21 | 
22 | ROS Agents has been developed in collaboration betweeen [Automatika Robotics](https://automatikarobotics.com/) and [Inria](https://inria.fr/). Contributions from the community are most welcome.
23 | 


--------------------------------------------------------------------------------
/docs/quickstart.md:
--------------------------------------------------------------------------------
  1 | # Quick Start 🚀
  2 | 
  3 | Unlike other ROS package, ROS Agents provides a pure pythonic way of describing the node graph using [ROS Sugar](https://automatika-robotics.github.io/ros-sugar/). Copy the following code in a python script and run it.
  4 | 
  5 | ```python
  6 | from agents.clients.ollama import OllamaClient
  7 | from agents.components import MLLM
  8 | from agents.models import Llava
  9 | from agents.ros import Topic, Launcher
 10 | 
 11 | # Define input and output topics (pay attention to msg_type)
 12 | text0 = Topic(name="text0", msg_type="String")
 13 | image0 = Topic(name="image_raw", msg_type="Image")
 14 | text1 = Topic(name="text1", msg_type="String")
 15 | 
 16 | # Define a model client (working with Ollama in this case)
 17 | llava = Llava(name="llava")
 18 | llava_client = OllamaClient(llava)
 19 | 
 20 | # Define an MLLM component (A component represents a node with a particular functionality)
 21 | mllm = MLLM(
 22 |     inputs=[text0, image0],
 23 |     outputs=[text1],
 24 |     model_client=llava_client,
 25 |     trigger=[text0],
 26 |     component_name="vqa"
 27 | )
 28 | # Additional prompt settings
 29 | mllm.set_topic_prompt(text0, template="""You are an amazing and funny robot.
 30 |     Answer the following about this image: {{ text0 }}"""
 31 | )
 32 | # Launch the component
 33 | launcher = Launcher()
 34 | launcher.add_pkg(components=[mllm])
 35 | launcher.bringup()
 36 | ```
 37 | 
 38 | Now let us see step-by-step what we have done in this code. First we defined inputs and outputs to our component in the form of ROS Topics. Components automatically create listeners for input topics and publishers for output topics.
 39 | 
 40 | ```python
 41 | # Define input and output topics (pay attention to msg_type)
 42 | text0 = Topic(name="text0", msg_type="String")
 43 | image0 = Topic(name="image_raw", msg_type="Image")
 44 | text1 = Topic(name="text1", msg_type="String")
 45 | ```
 46 | 
 47 | ````{important}
 48 | If you are running ROS Agents on a robot, make sure you change the name of the topic to which the robot's camera is publishing the RGB images to in the following line.
 49 | 
 50 | ```python
 51 | image0 = Topic(name="NAME_OF_THE_TOPIC", msg_type="Image")
 52 | ````
 53 | 
 54 | ```{note}
 55 | If you are running ROS Agents on a testing machine, and the machine has a webcam, you can install the [**ROS2 USB Cam**](https://github.com/klintan/ros2_usb_camera). Make sure you use the correct name of the image topic as above.
 56 | ```
 57 | 
 58 | Then we will create a multimodal LLM component. Components are functional units in ROS Agents. To learn more about them, check out [Basic Concepts](basics.md). Other than input/output topics, the MLLM component expects a model client. So first we will create a model client that can utilize a [Llava](https://ollama.com/library/llava) model on [Ollama](https://ollama.com) as its model serving platform.
 59 | 
 60 | ```python
 61 | # Define a model client (working with Ollama in this case)
 62 | llava = Llava(name="llava")
 63 | llava_client = OllamaClient(llava)
 64 | ```
 65 | 
 66 | ````{important}
 67 | If you are not running Ollama on the same machine (robot) on which you are running ROS Agents, you can define access to the machine running Ollama using host and port in this line:
 68 | ```python
 69 | llava_client = OllamaClient(llava, host="127.0.0.1", port=8000)
 70 | ````
 71 | 
 72 | ```{note}
 73 | If the use of Ollama as a model serving platform is unclear, checkout [installation instructions](installation.md).
 74 | ```
 75 | 
 76 | Now we are ready to setup our component.
 77 | 
 78 | ```python
 79 | # Define an MLLM component (A component represents a node with a particular functionality)
 80 | mllm = MLLM(
 81 |     inputs=[text0, image0],
 82 |     outputs=[text1],
 83 |     model_client=llava_client,
 84 |     trigger=[text0],
 85 |     component_name="vqa"
 86 | )
 87 | # Additional prompt settings
 88 | mllm.set_topic_prompt(text0, template="""You are an amazing and funny robot.
 89 |     Answer the following about this image: {{ text0 }}"""
 90 | )
 91 | ```
 92 | 
 93 | Note how the MLLM type of component, also allows us to set a topic or component level prompt, where a jinja2 template can be used to define a template in which our input string should be embedded. Finally we will launch the component.
 94 | 
 95 | ```python
 96 | # Launch the component
 97 | launcher = Launcher()
 98 | launcher.add_pkg(components=[mllm])
 99 | launcher.bringup()
100 | ```
101 | 
102 | Now we can check that our component is running by using familiar ROS2 commands from a new terminal. We should see our component running as a ROS node and the its input and output topics in the topic list.
103 | 
104 | ```shell
105 | ros2 node list
106 | ros2 topic list
107 | ```
108 | 
109 | In order to interact with our component we can use the tiny web client that is bundled with ROS Agents. We can launch the client by running:
110 | 
111 | ```shell
112 | ros2 run automatica_embodied_agents tiny_web_client
113 | ```
114 | 
115 | The client displays a web UI on http://localhost:8000. Open this address from browser. ROS input and output topic settings for text input and output topics can be configured from the web UI by pressing the settings icon. Send a question to your ROS Agent and you should get a the reply generated by the Llava model.
116 | 


--------------------------------------------------------------------------------
/examples/complete_agent.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import json
  3 | from typing import Optional
  4 | from agents.components import (
  5 |     MLLM,
  6 |     SpeechToText,
  7 |     TextToSpeech,
  8 |     LLM,
  9 |     Vision,
 10 |     MapEncoding,
 11 |     SemanticRouter,
 12 | )
 13 | from agents.config import TextToSpeechConfig
 14 | from agents.clients.roboml import HTTPModelClient, RESPModelClient, HTTPDBClient
 15 | from agents.clients.ollama import OllamaClient
 16 | from agents.models import Whisper, SpeechT5, Llava, Llama3_1, VisionModel
 17 | from agents.vectordbs import ChromaDB
 18 | from agents.config import VisionConfig, LLMConfig, MapConfig, SemanticRouterConfig
 19 | from agents.ros import Topic, Launcher, FixedInput, MapLayer, Route
 20 | 
 21 | 
 22 | ### Setup our models and vectordb ###
 23 | whisper = Whisper(name="whisper")
 24 | whisper_client = HTTPModelClient(whisper)
 25 | speecht5 = SpeechT5(name="speecht5")
 26 | speecht5_client = HTTPModelClient(speecht5)
 27 | object_detection_model = VisionModel(
 28 |     name="dino_4scale", checkpoint="dino-4scale_r50_8xb2-12e_coco"
 29 | )
 30 | detection_client = RESPModelClient(object_detection_model)
 31 | llava = Llava(name="llava")
 32 | llava_client = OllamaClient(llava)
 33 | llama = Llama3_1(name="llama")
 34 | llama_client = OllamaClient(llama)
 35 | chroma = ChromaDB(name="MainDB")
 36 | chroma_client = HTTPDBClient(db=chroma)
 37 | 
 38 | ### Setup our components ###
 39 | # Setup a speech to text component
 40 | audio_in = Topic(name="audio0", msg_type="Audio")
 41 | query_topic = Topic(name="question", msg_type="String")
 42 | 
 43 | speech_to_text = SpeechToText(
 44 |     inputs=[audio_in],
 45 |     outputs=[query_topic],
 46 |     model_client=whisper_client,
 47 |     trigger=audio_in,
 48 |     component_name="speech_to_text",
 49 | )
 50 | 
 51 | # Setup a text to speech component
 52 | query_answer = Topic(name="answer", msg_type="String")
 53 | 
 54 | t2s_config = TextToSpeechConfig(play_on_device=True)
 55 | 
 56 | text_to_speech = TextToSpeech(
 57 |     inputs=[query_answer],
 58 |     trigger=query_answer,
 59 |     model_client=speecht5_client,
 60 |     config=t2s_config,
 61 |     component_name="text_to_speech",
 62 | )
 63 | 
 64 | # Setup a vision component for object detection
 65 | image0 = Topic(name="image_raw", msg_type="Image")
 66 | detections_topic = Topic(name="detections", msg_type="Detections")
 67 | 
 68 | detection_config = VisionConfig(threshold=0.5)
 69 | vision = Vision(
 70 |     inputs=[image0],
 71 |     outputs=[detections_topic],
 72 |     trigger=image0,
 73 |     config=detection_config,
 74 |     model_client=detection_client,
 75 |     component_name="object_detection",
 76 | )
 77 | 
 78 | # Define a generic mllm component for vqa
 79 | mllm_query = Topic(name="mllm_query", msg_type="String")
 80 | 
 81 | mllm = MLLM(
 82 |     inputs=[mllm_query, image0, detections_topic],
 83 |     outputs=[query_answer],
 84 |     model_client=llava_client,
 85 |     trigger=mllm_query,
 86 |     component_name="visual_q_and_a",
 87 | )
 88 | 
 89 | mllm.set_component_prompt(
 90 |     template="""Imagine you are a robot.
 91 |     This image has following items: {{ detections }}.
 92 |     Answer the following about this image: {{ text0 }}"""
 93 | )
 94 | 
 95 | # Define a fixed input mllm component that does introspection
 96 | introspection_query = FixedInput(
 97 |     name="introspection_query",
 98 |     msg_type="String",
 99 |     fixed="What kind of a room is this? Is it an office, a bedroom or a kitchen? Give a one word answer, out of the given choices",
100 | )
101 | introspection_answer = Topic(name="introspection_answer", msg_type="String")
102 | 
103 | introspector = MLLM(
104 |     inputs=[introspection_query, image0],
105 |     outputs=[introspection_answer],
106 |     model_client=llava_client,
107 |     trigger=15.0,
108 |     component_name="introspector",
109 | )
110 | 
111 | 
112 | def introspection_validation(output: str) -> Optional[str]:
113 |     for option in ["office", "bedroom", "kitchen"]:
114 |         if option in output.lower():
115 |             return option
116 | 
117 | 
118 | introspector.add_publisher_preprocessor(introspection_answer, introspection_validation)
119 | 
120 | # Define a semantic map using MapEncoding component
121 | layer1 = MapLayer(subscribes_to=detections_topic, temporal_change=True)
122 | layer2 = MapLayer(subscribes_to=introspection_answer, resolution_multiple=3)
123 | 
124 | position = Topic(name="odom", msg_type="Odometry")
125 | map_topic = Topic(name="map", msg_type="OccupancyGrid")
126 | 
127 | map_conf = MapConfig(map_name="map")
128 | map = MapEncoding(
129 |     layers=[layer1, layer2],
130 |     position=position,
131 |     map_topic=map_topic,
132 |     config=map_conf,
133 |     db_client=chroma_client,
134 |     trigger=15.0,
135 |     component_name="map_encoder",
136 | )
137 | 
138 | # Define a generic LLM component
139 | llm_query = Topic(name="llm_query", msg_type="String")
140 | 
141 | llm = LLM(
142 |     inputs=[llm_query],
143 |     outputs=[query_answer],
144 |     model_client=llama_client,
145 |     trigger=[llm_query],
146 |     component_name="general_q_and_a",
147 | )
148 | 
149 | # Define a Go-to-X component using LLM
150 | goto_query = Topic(name="goto_query", msg_type="String")
151 | goal_point = Topic(name="goal_point", msg_type="PoseStamped")
152 | 
153 | goto_config = LLMConfig(
154 |     enable_rag=True,
155 |     collection_name="map",
156 |     distance_func="l2",
157 |     n_results=1,
158 |     add_metadata=True,
159 | )
160 | 
161 | goto = LLM(
162 |     inputs=[goto_query],
163 |     outputs=[goal_point],
164 |     model_client=llama_client,
165 |     config=goto_config,
166 |     db_client=chroma_client,
167 |     trigger=goto_query,
168 |     component_name="go_to_x",
169 | )
170 | 
171 | goto.set_component_prompt(
172 |     template="""From the given metadata, extract coordinates and provide
173 |     the coordinates in the following json format:\n {"position": coordinates}"""
174 | )
175 | 
176 | 
177 | # pre-process the output before publishing to a topic of msg_type PoseStamped
178 | def llm_answer_to_goal_point(output: str) -> Optional[np.ndarray]:
179 |     # extract the json part of the output string (including brackets)
180 |     # one can use sophisticated regex parsing here but we'll keep it simple
181 |     json_string = output[output.find("{") : output.rfind("}") + 1]
182 |     # load the string as a json and extract position coordinates
183 |     # if there is an error, return None, i.e. no output would be published to goal_point
184 |     try:
185 |         json_dict = json.loads(json_string)
186 |         coordinates = np.fromstring(json_dict["position"], sep=",", dtype=np.float64)
187 |         print("Coordinates Extracted:", coordinates)
188 |         if coordinates.shape[0] < 2 or coordinates.shape[0] > 3:
189 |             return
190 |         elif (
191 |             coordinates.shape[0] == 2
192 |         ):  # sometimes LLMs avoid adding the zeros of z-dimension
193 |             coordinates = np.append(coordinates, 0)
194 |         return coordinates
195 |     except Exception:
196 |         return
197 | 
198 | 
199 | goto.add_publisher_preprocessor(goal_point, llm_answer_to_goal_point)
200 | 
201 | # Define a semantic router between a generic LLM component, VQA MLLM component and Go-to-X component
202 | goto_route = Route(
203 |     routes_to=goto_query,
204 |     samples=[
205 |         "Go to the door",
206 |         "Go to the kitchen",
207 |         "Get me a glass",
208 |         "Fetch a ball",
209 |         "Go to hallway",
210 |     ],
211 | )
212 | 
213 | llm_route = Route(
214 |     routes_to=llm_query,
215 |     samples=[
216 |         "What is the capital of France?",
217 |         "Is there life on Mars?",
218 |         "How many tablespoons in a cup?",
219 |         "How are you today?",
220 |         "Whats up?",
221 |     ],
222 | )
223 | 
224 | mllm_route = Route(
225 |     routes_to=mllm_query,
226 |     samples=[
227 |         "Are we indoors or outdoors",
228 |         "What do you see?",
229 |         "Whats in front of you?",
230 |         "Where are we",
231 |         "Do you see any people?",
232 |         "How many things are infront of you?",
233 |         "Is this room occupied?",
234 |     ],
235 | )
236 | 
237 | router_config = SemanticRouterConfig(router_name="go-to-router", distance_func="l2")
238 | # Initialize the router component
239 | router = SemanticRouter(
240 |     inputs=[query_topic],
241 |     routes=[llm_route, goto_route, mllm_route],
242 |     default_route=llm_route,
243 |     config=router_config,
244 |     db_client=chroma_client,
245 |     component_name="router",
246 | )
247 | 
248 | # Launch the components
249 | launcher = Launcher()
250 | launcher.add_pkg(
251 |     components=[
252 |         mllm,
253 |         llm,
254 |         goto,
255 |         introspector,
256 |         map,
257 |         router,
258 |         speech_to_text,
259 |         text_to_speech,
260 |         vision,
261 |     ]
262 | )
263 | launcher.bringup()
264 | 


--------------------------------------------------------------------------------
/examples/complete_agent_multiprocessing.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import json
  3 | from typing import Optional
  4 | from agents.components import (
  5 |     MLLM,
  6 |     SpeechToText,
  7 |     TextToSpeech,
  8 |     LLM,
  9 |     Vision,
 10 |     MapEncoding,
 11 |     SemanticRouter,
 12 | )
 13 | from agents.config import TextToSpeechConfig
 14 | from agents.clients.roboml import HTTPModelClient, RESPModelClient, HTTPDBClient
 15 | from agents.clients.ollama import OllamaClient
 16 | from agents.models import Whisper, SpeechT5, Llava, Llama3_1, VisionModel
 17 | from agents.vectordbs import ChromaDB
 18 | from agents.config import VisionConfig, LLMConfig, MapConfig, SemanticRouterConfig
 19 | from agents.ros import Topic, Launcher, FixedInput, MapLayer, Route
 20 | 
 21 | 
 22 | ### Setup our models and vectordb ###
 23 | whisper = Whisper(name="whisper")
 24 | whisper_client = HTTPModelClient(whisper)
 25 | speecht5 = SpeechT5(name="speecht5")
 26 | speecht5_client = HTTPModelClient(speecht5)
 27 | object_detection_model = VisionModel(
 28 |     name="dino_4scale", checkpoint="dino-4scale_r50_8xb2-12e_coco"
 29 | )
 30 | detection_client = RESPModelClient(object_detection_model)
 31 | llava = Llava(name="llava")
 32 | llava_client = OllamaClient(llava)
 33 | llama = Llama3_1(name="llama")
 34 | llama_client = OllamaClient(llama)
 35 | chroma = ChromaDB(name="MainDB")
 36 | chroma_client = HTTPDBClient(db=chroma)
 37 | 
 38 | ### Setup our components ###
 39 | # Setup a speech to text component
 40 | audio_in = Topic(name="audio0", msg_type="Audio")
 41 | query_topic = Topic(name="question", msg_type="String")
 42 | 
 43 | speech_to_text = SpeechToText(
 44 |     inputs=[audio_in],
 45 |     outputs=[query_topic],
 46 |     model_client=whisper_client,
 47 |     trigger=audio_in,
 48 |     component_name="speech_to_text",
 49 | )
 50 | 
 51 | # Setup a text to speech component
 52 | query_answer = Topic(name="answer", msg_type="String")
 53 | 
 54 | t2s_config = TextToSpeechConfig(play_on_device=True)
 55 | 
 56 | text_to_speech = TextToSpeech(
 57 |     inputs=[query_answer],
 58 |     trigger=query_answer,
 59 |     model_client=speecht5_client,
 60 |     config=t2s_config,
 61 |     component_name="text_to_speech",
 62 | )
 63 | 
 64 | # Setup a vision component for object detection
 65 | image0 = Topic(name="image_raw", msg_type="Image")
 66 | detections_topic = Topic(name="detections", msg_type="Detections")
 67 | 
 68 | detection_config = VisionConfig(threshold=0.5)
 69 | vision = Vision(
 70 |     inputs=[image0],
 71 |     outputs=[detections_topic],
 72 |     trigger=image0,
 73 |     config=detection_config,
 74 |     model_client=detection_client,
 75 |     component_name="object_detection",
 76 | )
 77 | 
 78 | # Define a generic mllm component for vqa
 79 | mllm_query = Topic(name="mllm_query", msg_type="String")
 80 | 
 81 | mllm = MLLM(
 82 |     inputs=[mllm_query, image0, detections_topic],
 83 |     outputs=[query_answer],
 84 |     model_client=llava_client,
 85 |     trigger=mllm_query,
 86 |     component_name="visual_q_and_a",
 87 | )
 88 | 
 89 | mllm.set_component_prompt(
 90 |     template="""Imagine you are a robot.
 91 |     This image has following items: {{ detections }}.
 92 |     Answer the following about this image: {{ text0 }}"""
 93 | )
 94 | 
 95 | # Define a fixed input mllm component that does introspection
 96 | introspection_query = FixedInput(
 97 |     name="introspection_query",
 98 |     msg_type="String",
 99 |     fixed="What kind of a room is this? Is it an office, a bedroom or a kitchen? Give a one word answer, out of the given choices",
100 | )
101 | introspection_answer = Topic(name="introspection_answer", msg_type="String")
102 | 
103 | introspector = MLLM(
104 |     inputs=[introspection_query, image0],
105 |     outputs=[introspection_answer],
106 |     model_client=llava_client,
107 |     trigger=15.0,
108 |     component_name="introspector",
109 | )
110 | 
111 | 
112 | def introspection_validation(output: str) -> Optional[str]:
113 |     for option in ["office", "bedroom", "kitchen"]:
114 |         if option in output.lower():
115 |             return option
116 | 
117 | 
118 | introspector.add_publisher_preprocessor(introspection_answer, introspection_validation)
119 | 
120 | # Define a semantic map using MapEncoding component
121 | layer1 = MapLayer(subscribes_to=detections_topic, temporal_change=True)
122 | layer2 = MapLayer(
123 |     subscribes_to=introspection_answer,
124 |     resolution_multiple=3,
125 |     pre_defined=[(np.array([1.1, 2.1, 3.2]), "The door is here. DOOR.")],
126 | )
127 | 
128 | position = Topic(name="odom", msg_type="Odometry")
129 | map_topic = Topic(name="map", msg_type="OccupancyGrid")
130 | 
131 | map_conf = MapConfig(map_name="map")
132 | map = MapEncoding(
133 |     layers=[layer1, layer2],
134 |     position=position,
135 |     map_topic=map_topic,
136 |     config=map_conf,
137 |     db_client=chroma_client,
138 |     trigger=15.0,
139 |     component_name="map_encoder",
140 | )
141 | 
142 | # Define a generic LLM component
143 | llm_query = Topic(name="llm_query", msg_type="String")
144 | 
145 | llm = LLM(
146 |     inputs=[llm_query],
147 |     outputs=[query_answer],
148 |     model_client=llama_client,
149 |     trigger=[llm_query],
150 |     component_name="general_q_and_a",
151 | )
152 | 
153 | # Define a Go-to-X component using LLM
154 | goto_query = Topic(name="goto_query", msg_type="String")
155 | goal_point = Topic(name="goal_point", msg_type="PoseStamped")
156 | 
157 | goto_config = LLMConfig(
158 |     enable_rag=True,
159 |     collection_name="map",
160 |     distance_func="l2",
161 |     n_results=1,
162 |     add_metadata=True,
163 | )
164 | 
165 | goto = LLM(
166 |     inputs=[goto_query],
167 |     outputs=[goal_point],
168 |     model_client=llama_client,
169 |     config=goto_config,
170 |     db_client=chroma_client,
171 |     trigger=goto_query,
172 |     component_name="go_to_x",
173 | )
174 | 
175 | goto.set_component_prompt(
176 |     template="""From the given metadata, extract coordinates and provide
177 |     the coordinates in the following json format:\n {"position": coordinates}"""
178 | )
179 | 
180 | 
181 | # pre-process the output before publishing to a topic of msg_type PoseStamped
182 | def llm_answer_to_goal_point(output: str) -> Optional[np.ndarray]:
183 |     # extract the json part of the output string (including brackets)
184 |     # one can use sophisticated regex parsing here but we'll keep it simple
185 |     json_string = output[output.find("{") : output.rfind("}") + 1]
186 |     # load the string as a json and extract position coordinates
187 |     # if there is an error, return None, i.e. no output would be published to goal_point
188 |     try:
189 |         json_dict = json.loads(json_string)
190 |         coordinates = np.fromstring(json_dict["position"], sep=",", dtype=np.float64)
191 |         print("Coordinates Extracted:", coordinates)
192 |         if coordinates.shape[0] < 2 or coordinates.shape[0] > 3:
193 |             return
194 |         elif (
195 |             coordinates.shape[0] == 2
196 |         ):  # sometimes LLMs avoid adding the zeros of z-dimension
197 |             coordinates = np.append(coordinates, 0)
198 |         return coordinates
199 |     except Exception:
200 |         return
201 | 
202 | 
203 | goto.add_publisher_preprocessor(goal_point, llm_answer_to_goal_point)
204 | 
205 | # Define a semantic router between a generic LLM component, VQA MLLM component and Go-to-X component
206 | goto_route = Route(
207 |     routes_to=goto_query,
208 |     samples=[
209 |         "Go to the door",
210 |         "Go to the kitchen",
211 |         "Get me a glass",
212 |         "Fetch a ball",
213 |         "Go to hallway",
214 |     ],
215 | )
216 | 
217 | llm_route = Route(
218 |     routes_to=llm_query,
219 |     samples=[
220 |         "What is the capital of France?",
221 |         "Is there life on Mars?",
222 |         "How many tablespoons in a cup?",
223 |         "How are you today?",
224 |         "Whats up?",
225 |     ],
226 | )
227 | 
228 | mllm_route = Route(
229 |     routes_to=mllm_query,
230 |     samples=[
231 |         "Are we indoors or outdoors",
232 |         "What do you see?",
233 |         "Whats in front of you?",
234 |         "Where are we",
235 |         "Do you see any people?",
236 |         "How many things are infront of you?",
237 |         "Is this room occupied?",
238 |     ],
239 | )
240 | 
241 | router_config = SemanticRouterConfig(router_name="go-to-router", distance_func="l2")
242 | # Initialize the router component
243 | router = SemanticRouter(
244 |     inputs=[query_topic],
245 |     routes=[llm_route, goto_route, mllm_route],
246 |     default_route=llm_route,
247 |     config=router_config,
248 |     db_client=chroma_client,
249 |     component_name="router",
250 | )
251 | 
252 | # Launch the components
253 | launcher = Launcher()
254 | launcher.add_pkg(
255 |     components=[
256 |         mllm,
257 |         llm,
258 |         goto,
259 |         introspector,
260 |         map,
261 |         router,
262 |         speech_to_text,
263 |         text_to_speech,
264 |         vision,
265 |     ],
266 |     package_name="automatika_embodied_agents",
267 |     multiprocessing=True,
268 | )
269 | launcher.on_fail(action_name="restart")
270 | launcher.fallback_rate = 1 / 10  # 0.1 Hz or 10 seconds
271 | launcher.bringup()
272 | 


--------------------------------------------------------------------------------
/examples/conversational_agent_with_audio.py:
--------------------------------------------------------------------------------
 1 | from agents.components import MLLM, SpeechToText, TextToSpeech
 2 | from agents.config import SpeechToTextConfig, TextToSpeechConfig
 3 | from agents.clients.roboml import HTTPModelClient
 4 | from agents.clients.ollama import OllamaClient
 5 | from agents.models import Whisper, SpeechT5, Llava
 6 | from agents.ros import Topic, Launcher
 7 | 
 8 | audio_in = Topic(name="audio0", msg_type="Audio")
 9 | text_query = Topic(name="text0", msg_type="String")
10 | 
11 | whisper = Whisper(name="whisper")  # Custom model init params can be provided here
12 | roboml_whisper = HTTPModelClient(whisper)
13 | 
14 | s2t_config = SpeechToTextConfig(
15 |     enable_vad=True,  # option to listen for speech through the microphone
16 |     enable_wakeword=True,  # option to invoke the component with a wakeword like 'hey jarvis'
17 | )
18 | speech_to_text = SpeechToText(
19 |     inputs=[audio_in],
20 |     outputs=[text_query],
21 |     model_client=roboml_whisper,
22 |     trigger=audio_in,
23 |     config=s2t_config,
24 |     component_name="speech_to_text",
25 | )
26 | 
27 | image0 = Topic(name="image_raw", msg_type="Image")
28 | text_answer = Topic(name="text1", msg_type="String")
29 | 
30 | llava = Llava(name="llava")
31 | llava_client = OllamaClient(llava)
32 | 
33 | mllm = MLLM(
34 |     inputs=[text_query, image0],
35 |     outputs=[text_answer],
36 |     model_client=llava_client,
37 |     trigger=text_query,
38 |     component_name="vqa",
39 | )
40 | 
41 | # config for playing audio on device
42 | t2s_config = TextToSpeechConfig(play_on_device=True)
43 | 
44 | speecht5 = SpeechT5(name="speecht5")
45 | roboml_speecht5 = HTTPModelClient(speecht5)
46 | text_to_speech = TextToSpeech(
47 |     inputs=[text_answer],
48 |     trigger=text_answer,
49 |     model_client=roboml_speecht5,
50 |     config=t2s_config,
51 |     component_name="text_to_speech",
52 | )
53 | 
54 | launcher = Launcher()
55 | launcher.add_pkg(
56 |     components=[speech_to_text, mllm, text_to_speech],
57 | )
58 | launcher.bringup()
59 | 


--------------------------------------------------------------------------------
/examples/go_to_x.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | import json
 3 | import numpy as np
 4 | from agents.components import LLM
 5 | from agents.models import Llama3_1
 6 | from agents.vectordbs import ChromaDB
 7 | from agents.config import LLMConfig
 8 | from agents.clients.roboml import HTTPDBClient
 9 | from agents.clients.ollama import OllamaClient
10 | from agents.ros import Launcher, Topic
11 | 
12 | # Start a Llama3.1 based llm component using ollama client
13 | llama = Llama3_1(name="llama")
14 | llama_client = OllamaClient(llama)
15 | 
16 | # Initialize a vector DB that will store our routes
17 | chroma = ChromaDB(name="MainDB")
18 | chroma_client = HTTPDBClient(db=chroma)
19 | 
20 | # Define LLM input and output topics including goal_point topic of type PoseStamped
21 | goto_in = Topic(name="goto_in", msg_type="String")
22 | goal_point = Topic(name="goal_point", msg_type="PoseStamped")
23 | 
24 | config = LLMConfig(
25 |     enable_rag=True,
26 |     collection_name="map",
27 |     distance_func="l2",
28 |     n_results=1,
29 |     add_metadata=True,
30 | )
31 | 
32 | # initialize the component
33 | goto = LLM(
34 |     inputs=[goto_in],
35 |     outputs=[goal_point],
36 |     model_client=llama_client,
37 |     db_client=chroma_client,  # check the previous example where we setup this database client
38 |     trigger=goto_in,
39 |     config=config,
40 |     component_name="go_to_x",
41 | )
42 | 
43 | # set a component prompt
44 | goto.set_component_prompt(
45 |     template="""From the given metadata, extract coordinates and provide
46 |     the coordinates in the following json format:\n {"position": coordinates}"""
47 | )
48 | 
49 | 
50 | # pre-process the output before publishing to a topic of msg_type PoseStamped
51 | def llm_answer_to_goal_point(output: str) -> Optional[np.ndarray]:
52 |     # extract the json part of the output string (including brackets)
53 |     # one can use sophisticated regex parsing here but we'll keep it simple
54 |     json_string = output[output.find("{") : output.rfind("}") + 1]
55 |     # load the string as a json and extract position coordinates
56 |     # if there is an error, return None, i.e. no output would be published to goal_point
57 |     try:
58 |         json_dict = json.loads(json_string)
59 |         coordinates = np.fromstring(json_dict["position"], sep=",", dtype=np.float64)
60 |         print("Coordinates Extracted:", coordinates)
61 |         if coordinates.shape[0] < 2 or coordinates.shape[0] > 3:
62 |             return
63 |         elif (
64 |             coordinates.shape[0] == 2
65 |         ):  # sometimes LLMs avoid adding the zeros of z-dimension
66 |             coordinates = np.append(coordinates, 0)
67 |         return coordinates
68 |     except Exception:
69 |         return
70 | 
71 | 
72 | # add the pre-processing function to the goal_point output topic
73 | goto.add_publisher_preprocessor(goal_point, llm_answer_to_goal_point)
74 | 
75 | # Launch the component
76 | launcher = Launcher()
77 | launcher.add_pkg(components=[goto])
78 | launcher.bringup()
79 | 


--------------------------------------------------------------------------------
/examples/prompt_engineering.py:
--------------------------------------------------------------------------------
 1 | from agents.components import Vision, MLLM
 2 | from agents.models import VisionModel, Idefics2
 3 | from agents.clients.roboml import RESPModelClient, HTTPModelClient
 4 | from agents.ros import Topic, Launcher
 5 | from agents.config import VisionConfig
 6 | 
 7 | image0 = Topic(name="image_raw", msg_type="Image")
 8 | detections_topic = Topic(name="detections", msg_type="Detections")
 9 | 
10 | object_detection = VisionModel(
11 |     name="object_detection", checkpoint="dino-4scale_r50_8xb2-12e_coco"
12 | )
13 | roboml_detection = RESPModelClient(object_detection)
14 | 
15 | detection_config = VisionConfig(threshold=0.5)
16 | vision = Vision(
17 |     inputs=[image0],
18 |     outputs=[detections_topic],
19 |     trigger=image0,
20 |     config=detection_config,
21 |     model_client=roboml_detection,
22 |     component_name="detection_component",
23 | )
24 | 
25 | text_query = Topic(name="text0", msg_type="String")
26 | text_answer = Topic(name="text1", msg_type="String")
27 | 
28 | idefics = Idefics2(name="idefics_model")
29 | idefics_client = HTTPModelClient(idefics)
30 | 
31 | mllm = MLLM(
32 |     inputs=[text_query, image0, detections_topic],
33 |     outputs=[text_answer],
34 |     model_client=idefics_client,
35 |     trigger=text_query,
36 |     component_name="mllm_component",
37 | )
38 | 
39 | mllm.set_component_prompt(
40 |     template="""Imagine you are a robot.
41 |     This image has following items: {{ detections }}.
42 |     Answer the following about this image: {{ text0 }}"""
43 | )
44 | launcher = Launcher()
45 | launcher.add_pkg(components=[vision, mllm])
46 | launcher.bringup()
47 | 


--------------------------------------------------------------------------------
/examples/semantic_map.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from agents.components import MapEncoding, Vision, MLLM
 3 | from agents.models import VisionModel, Llava
 4 | from agents.clients.roboml import RESPModelClient, HTTPDBClient
 5 | from agents.clients.ollama import OllamaClient
 6 | from agents.ros import Topic, MapLayer, Launcher, FixedInput
 7 | from agents.vectordbs import ChromaDB
 8 | from agents.config import MapConfig, VisionConfig
 9 | 
10 | # Define the image input topic
11 | image0 = Topic(name="image_raw", msg_type="Image")
12 | # Create a detection topic
13 | detections_topic = Topic(name="detections", msg_type="Detections")
14 | 
15 | # Add an object detection model
16 | object_detection = VisionModel(
17 |     name="object_detection", checkpoint="dino-4scale_r50_8xb2-12e_coco"
18 | )
19 | roboml_detection = RESPModelClient(object_detection)
20 | 
21 | # Initialize the Vision component
22 | detection_config = VisionConfig(threshold=0.5)
23 | vision = Vision(
24 |     inputs=[image0],
25 |     outputs=[detections_topic],
26 |     trigger=image0,
27 |     config=detection_config,
28 |     model_client=roboml_detection,
29 |     component_name="detection_component",
30 | )
31 | 
32 | 
33 | # Define a model client (working with Ollama in this case)
34 | llava = Llava(name="llava")
35 | llava_client = OllamaClient(llava)
36 | 
37 | # Define a fixed input for the component
38 | introspection_query = FixedInput(
39 |     name="introspection_query",
40 |     msg_type="String",
41 |     fixed="What kind of a room is this? Is it an office, a bedroom or a kitchen? Give a one word answer, out of the given choices",
42 | )
43 | # Define output of the component
44 | introspection_answer = Topic(name="introspection_answer", msg_type="String")
45 | 
46 | # Start a timed (periodic) component using the mllm model defined earlier
47 | # This component answers the same question after every 15 seconds
48 | introspector = MLLM(
49 |     inputs=[introspection_query, image0],  # we use the image0 topic defined earlier
50 |     outputs=[introspection_answer],
51 |     model_client=llava_client,
52 |     trigger=15.0,  # we provide the time interval as a float value to the trigger parameter
53 |     component_name="introspector",
54 | )
55 | 
56 | 
57 | # Define an arbitrary function to validate the output of the introspective component
58 | # before publication.
59 | def introspection_validation(output: str) -> Optional[str]:
60 |     for option in ["office", "bedroom", "kitchen"]:
61 |         if option in output.lower():
62 |             return option
63 | 
64 | 
65 | introspector.add_publisher_preprocessor(introspection_answer, introspection_validation)
66 | 
67 | # Object detection output from vision component
68 | layer1 = MapLayer(subscribes_to=detections_topic, temporal_change=True)
69 | # Introspection output from mllm component
70 | layer2 = MapLayer(subscribes_to=introspection_answer, resolution_multiple=3)
71 | 
72 | # Initialize mandatory topics defining the robots localization in space
73 | position = Topic(name="odom", msg_type="Odometry")
74 | map_topic = Topic(name="map", msg_type="OccupancyGrid")
75 | 
76 | # Initialize a vector DB that will store our semantic map
77 | chroma = ChromaDB(name="MainDB")
78 | chroma_client = HTTPDBClient(db=chroma)
79 | 
80 | # Create the map component
81 | map_conf = MapConfig(map_name="map")  # We give our map a name
82 | map = MapEncoding(
83 |     layers=[layer1, layer2],
84 |     position=position,
85 |     map_topic=map_topic,
86 |     config=map_conf,
87 |     db_client=chroma_client,
88 |     trigger=15.0,
89 |     component_name="map_encoding",
90 | )
91 | 
92 | # Launch the components
93 | launcher = Launcher()
94 | launcher.add_pkg(components=[vision, introspector, map])
95 | launcher.bringup()
96 | 


--------------------------------------------------------------------------------
/examples/semantic_router.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | import json
  3 | import numpy as np
  4 | from agents.components import LLM, SemanticRouter
  5 | from agents.models import Llama3_1
  6 | from agents.vectordbs import ChromaDB
  7 | from agents.config import LLMConfig, SemanticRouterConfig
  8 | from agents.clients.roboml import HTTPDBClient
  9 | from agents.clients.ollama import OllamaClient
 10 | from agents.ros import Launcher, Topic, Route
 11 | 
 12 | 
 13 | # Start a Llama3.1 based llm component using ollama client
 14 | llama = Llama3_1(name="llama")
 15 | llama_client = OllamaClient(llama)
 16 | 
 17 | # Initialize a vector DB that will store our routes
 18 | chroma = ChromaDB(name="MainDB")
 19 | chroma_client = HTTPDBClient(db=chroma)
 20 | 
 21 | 
 22 | # Make a generic LLM component using the Llama3_1 model
 23 | llm_in = Topic(name="text_in_llm", msg_type="String")
 24 | llm_out = Topic(name="text_out_llm", msg_type="String")
 25 | 
 26 | llm = LLM(inputs=[llm_in], outputs=[llm_out], model_client=llama_client, trigger=llm_in)
 27 | 
 28 | 
 29 | # Define LLM input and output topics including goal_point topic of type PoseStamped
 30 | goto_in = Topic(name="goto_in", msg_type="String")
 31 | goal_point = Topic(name="goal_point", msg_type="PoseStamped")
 32 | 
 33 | config = LLMConfig(
 34 |     enable_rag=True,
 35 |     collection_name="map",
 36 |     distance_func="l2",
 37 |     n_results=1,
 38 |     add_metadata=True,
 39 | )
 40 | 
 41 | # initialize the component
 42 | goto = LLM(
 43 |     inputs=[goto_in],
 44 |     outputs=[goal_point],
 45 |     model_client=llama_client,
 46 |     db_client=chroma_client,  # check the previous example where we setup this database client
 47 |     trigger=goto_in,
 48 |     config=config,
 49 |     component_name="go_to_x",
 50 | )
 51 | 
 52 | # set a component prompt
 53 | goto.set_component_prompt(
 54 |     template="""From the given metadata, extract coordinates and provide
 55 |     the coordinates in the following json format:\n {"position": coordinates}"""
 56 | )
 57 | 
 58 | 
 59 | # pre-process the output before publishing to a topic of msg_type PoseStamped
 60 | def llm_answer_to_goal_point(output: str) -> Optional[np.ndarray]:
 61 |     # extract the json part of the output string (including brackets)
 62 |     # one can use sophisticated regex parsing here but we'll keep it simple
 63 |     json_string = output[output.find("{") : output.find("}") + 1]
 64 | 
 65 |     # load the string as a json and extract position coordinates
 66 |     # if there is an error, return None, i.e. no output would be published to goal_point
 67 |     try:
 68 |         json_dict = json.loads(json_string)
 69 |         return np.array(json_dict["position"])
 70 |     except Exception:
 71 |         return
 72 | 
 73 | 
 74 | # add the pre-processing function to the goal_point output topic
 75 | goto.add_publisher_preprocessor(goal_point, llm_answer_to_goal_point)
 76 | 
 77 | # Create the input topic for the router
 78 | query_topic = Topic(name="question", msg_type="String")
 79 | 
 80 | # Define a route to a topic that processes go-to-x commands
 81 | goto_route = Route(
 82 |     routes_to=goto_in,
 83 |     samples=[
 84 |         "Go to the door",
 85 |         "Go to the kitchen",
 86 |         "Get me a glass",
 87 |         "Fetch a ball",
 88 |         "Go to hallway",
 89 |     ],
 90 | )
 91 | 
 92 | # Define a route to a topic that is input to an LLM component
 93 | llm_route = Route(
 94 |     routes_to=llm_in,
 95 |     samples=[
 96 |         "What is the capital of France?",
 97 |         "Is there life on Mars?",
 98 |         "How many tablespoons in a cup?",
 99 |         "How are you today?",
100 |         "Whats up?",
101 |     ],
102 | )
103 | 
104 | router_config = SemanticRouterConfig(router_name="go-to-router", distance_func="l2")
105 | # Initialize the router component
106 | router = SemanticRouter(
107 |     inputs=[query_topic],
108 |     routes=[llm_route, goto_route],
109 |     default_route=llm_route,  # If none of the routes fall within a distance threshold
110 |     config=router_config,
111 |     db_client=chroma_client,  # reusing the db_client from the previous example
112 |     component_name="router",
113 | )
114 | 
115 | # Launch the components
116 | launcher = Launcher()
117 | launcher.add_pkg(components=[llm, goto, router])
118 | launcher.bringup()
119 | 


--------------------------------------------------------------------------------
/examples/tool_calling.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from agents.components import LLM
 3 | from agents.models import Llama3_1
 4 | from agents.vectordbs import ChromaDB
 5 | from agents.config import LLMConfig
 6 | from agents.clients.roboml import HTTPDBClient
 7 | from agents.clients.ollama import OllamaClient
 8 | from agents.ros import Launcher, Topic
 9 | 
10 | # Start a Llama3.1 based llm component using ollama client
11 | llama = Llama3_1(name="llama")
12 | llama_client = OllamaClient(llama)
13 | 
14 | # Initialize a vector DB that will store our routes
15 | chroma = ChromaDB(name="MainDB")
16 | chroma_client = HTTPDBClient(db=chroma)
17 | 
18 | # Define LLM input and output topics including goal_point topic of type PoseStamped
19 | goto_in = Topic(name="goto_in", msg_type="String")
20 | goal_point = Topic(name="goal_point", msg_type="PoseStamped")
21 | 
22 | config = LLMConfig(
23 |     enable_rag=True,
24 |     collection_name="map",
25 |     distance_func="l2",
26 |     n_results=1,
27 |     add_metadata=True,
28 | )
29 | 
30 | # initialize the component
31 | goto = LLM(
32 |     inputs=[goto_in],
33 |     outputs=[goal_point],
34 |     model_client=llama_client,
35 |     db_client=chroma_client,  # check the previous example where we setup this database client
36 |     trigger=goto_in,
37 |     config=config,
38 |     component_name="go_to_x",
39 | )
40 | 
41 | # set a component prompt
42 | goto.set_component_prompt(
43 |     template="""What are the position coordinates in the given metadata?"""
44 | )
45 | 
46 | 
47 | # pre-process the output before publishing to a topic of msg_type PoseStamped
48 | def get_coordinates(position: list[float]) -> np.ndarray:
49 |     """Get position coordinates"""
50 |     return np.array(position, dtype=float)
51 | 
52 | 
53 | function_description = {
54 |     "type": "function",
55 |     "function": {
56 |         "name": "get_coordinates",
57 |         "description": "Get position coordinates",
58 |         "parameters": {
59 |             "type": "object",
60 |             "properties": {
61 |                 "position": {
62 |                     "type": "list[float]",
63 |                     "description": "The position coordinates in x, y and z",
64 |                 }
65 |             },
66 |         },
67 |         "required": ["position"],
68 |     },
69 | }
70 | 
71 | # add the pre-processing function to the goal_point output topic
72 | goto.register_tool(
73 |     tool=get_coordinates,
74 |     tool_description=function_description,
75 |     send_tool_response_to_model=False,
76 | )
77 | 
78 | # Launch the component
79 | launcher = Launcher()
80 | launcher.add_pkg(components=[goto])
81 | launcher.bringup()
82 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.pytest.ini_options]
 2 | minversion = "6.0"
 3 | addopts = "-ra -q"
 4 | log_cli = true
 5 | log_cli_level = "INFO"
 6 | log_cli_format="[%(levelname)s] [%(asctime)s] [%(name)s] [%(process)d-%(thread)d] %(message)s"
 7 | testpaths = [
 8 |     "agents/tests"
 9 | ]
10 | 
11 | [tool.interrogate]
12 | ignore-init-method = true
13 | ignore-init-module = true
14 | ignore-magic = false
15 | ignore-semiprivate = false
16 | ignore-private = false
17 | ignore-property-decorators = false
18 | ignore-module = true
19 | ignore-nested-functions = false
20 | ignore-nested-classes = true
21 | ignore-setters = false
22 | exclude = ["setup.py", "docs", "build", "log", "install", "agents/tests", "examples"]
23 | ignore-regex = ["^get$", "^mock_.*", ".*BaseClass.*", "^main"]
24 | quiet = false
25 | whitelist-regex = []
26 | color = true
27 | generate-badge = "."
28 | badge-format = "svg"
29 | 
30 | [tool.ruff]
31 | extend-exclude = [".mypy_cache", ".tox", ".venv", "buck-out", "build", ".pytest_cache"]
32 | fix = true
33 | line-length = 88
34 | preview = true
35 | [tool.ruff.lint]
36 | ignore = ["E203", "E266", "E501", "F403", "F401"]
37 | select = ["B","C","E","F","W","B9"]
38 | [tool.ruff.lint.mccabe]
39 | max-complexity = 11
40 | 
41 | [tool.bumpver]
42 | current_version = "0.3.1"
43 | version_pattern = "MAJOR.MINOR.PATCH"
44 | commit_message = "(chore) bump version {old_version} -> {new_version}"
45 | tag_message = "{new_version}"
46 | tag_scope = "default"
47 | pre_commit_hook = ""
48 | post_commit_hook = ""
49 | commit = true
50 | tag = true
51 | push = true
52 | 
53 | [tool.bumpver.file_patterns]
54 | "agents/package.xml" = [
55 |     "<version>{version}</version>",
56 | ]
57 | 


--------------------------------------------------------------------------------