├── tabletop_handybot ├── resource │ └── tabletop_handybot ├── tabletop_handybot │ ├── __init__.py │ ├── audio_prompt_node.py │ ├── point_cloud_conversion.py │ ├── openai_assistant.py │ └── tabletop_handybot_node.py ├── setup.cfg ├── package.xml ├── setup.py └── launch │ └── run.launch.py ├── requirements.txt ├── assets ├── roll_dice_thumbnail.png ├── architecture_diagram.png └── pick_markers_thumbnail.png ├── .vscode └── settings.json ├── tabletop-handybot.repos ├── .gitignore └── README.md /tabletop_handybot/resource/tabletop_handybot: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tabletop_handybot/tabletop_handybot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | open3d 2 | whisper-mic 3 | openai -------------------------------------------------------------------------------- /assets/roll_dice_thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycheng517/tabletop-handybot/HEAD/assets/roll_dice_thumbnail.png -------------------------------------------------------------------------------- /assets/architecture_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycheng517/tabletop-handybot/HEAD/assets/architecture_diagram.png -------------------------------------------------------------------------------- /assets/pick_markers_thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ycheng517/tabletop-handybot/HEAD/assets/pick_markers_thumbnail.png -------------------------------------------------------------------------------- /tabletop_handybot/setup.cfg: -------------------------------------------------------------------------------- 1 | [develop] 2 | script_dir=$base/lib/tabletop_handybot 3 | [install] 4 | install_scripts=$base/lib/tabletop_handybot 5 | 6 | [build_scripts] 7 | executable = /usr/bin/env python3 8 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.analysis.extraPaths": [ 3 | "/home/yifei/src/Grounded-Segment-Anything/GroundingDINO" 4 | ], 5 | "files.exclude": { 6 | "build/": true, 7 | "install/": true, 8 | "log/": true 9 | } 10 | } -------------------------------------------------------------------------------- /tabletop-handybot.repos: -------------------------------------------------------------------------------- 1 | repositories: 2 | pymoveit2: 3 | type: git 4 | url: https://github.com/ycheng517/pymoveit2.git 5 | version: devel 6 | ar4_ros_driver: 7 | type: git 8 | url: https://github.com/ycheng517/ar4_ros_driver.git 9 | Grounded-Segment-Anything/Grounded-Segment-Anything: 10 | type: git 11 | url: https://github.com/IDEA-Research/Grounded-Segment-Anything 12 | 13 | # dependencies of ar4_ros_driver 14 | easy_handeye2: 15 | type: git 16 | url: https://github.com/ycheng517/easy_handeye2.git 17 | ros2_aruco: 18 | type: git 19 | url: https://github.com/JMU-ROBOTICS-VIVA/ros2_aruco.git 20 | -------------------------------------------------------------------------------- /tabletop_handybot/tabletop_handybot/audio_prompt_node.py: -------------------------------------------------------------------------------- 1 | import rclpy 2 | import rclpy.node 3 | from std_msgs.msg import Empty, String 4 | from whisper_mic import WhisperMic 5 | 6 | 7 | class AudioPromptNode(rclpy.node.Node): 8 | """ROS2 Node that listens for a /listen message and publishes the 9 | result of the audio prompt to /prompt.""" 10 | 11 | def __init__(self): 12 | super().__init__("audio_prompt_node") 13 | 14 | self.whisper_mic = WhisperMic() 15 | 16 | self.prompt_pub = self.create_publisher(String, "/prompt", 10) 17 | self.listen_sub = self.create_subscription(Empty, "/listen", 18 | self.listen, 10) 19 | self._logger.info("Audio Prompt Node Initialized") 20 | 21 | def listen(self, _: Empty): 22 | result = self.whisper_mic.listen(timeout=10.0, phrase_time_limit=10.0) 23 | self._logger.info(f"Prompt: {result}") 24 | self.prompt_pub.publish(String(data=result)) 25 | 26 | 27 | def main(): 28 | rclpy.init() 29 | node = AudioPromptNode() 30 | try: 31 | rclpy.spin(node) 32 | except KeyboardInterrupt: 33 | pass 34 | 35 | rclpy.shutdown() 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /tabletop_handybot/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | tabletop_handybot 5 | 0.1.0 6 | A handy robot arm for performing tabletop tasks. 7 | 8 | Apache License 2.0 9 | Yifei Cheng 10 | Yifei Cheng 11 | 12 | ament_python 13 | 14 | ar_moveit_config 15 | cv_bridge 16 | easy_handeye2 17 | geometry_msgs 18 | rclpy 19 | realsense2_camera 20 | ros2_aruco 21 | tf2_ros 22 | sensor_msgs 23 | std_msgs 24 | 25 | 27 | ament_copyright 28 | ament_flake8 29 | ament_pep257 30 | python3-pytest 31 | 32 | 33 | ament_python 34 | 35 | 36 | -------------------------------------------------------------------------------- /tabletop_handybot/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from glob import glob 3 | from setuptools import setup 4 | 5 | package_name = 'tabletop_handybot' 6 | 7 | setup( 8 | name=package_name, 9 | version='0.1.0', 10 | packages=[package_name], 11 | data_files=[('share/ament_index/resource_index/packages', 12 | ['resource/' + package_name]), 13 | ('share/' + package_name, ['package.xml']), 14 | (os.path.join('share', package_name, 'launch'), 15 | glob(os.path.join('launch', '*launch.[pxy][yma]*')))], 16 | install_requires=['setuptools'], 17 | zip_safe=True, 18 | author='Yifei Cheng', 19 | author_email='ycheng517@gmail.com', 20 | maintainer='Yifei Cheng', 21 | maintainer_email='ycheng517@gmail.com', 22 | keywords=['ROS'], 23 | classifiers=[ 24 | 'Intended Audience :: Developers', 25 | 'License :: OSI Approved :: Apache Software License', 26 | 'Programming Language :: Python', 27 | 'Topic :: Software Development', 28 | ], 29 | description='A handy robot arm for performing tabletop tasks.', 30 | license='Apache License, Version 2.0', 31 | tests_require=['pytest'], 32 | entry_points={ 33 | 'console_scripts': [ 34 | 'tabletop_handybot_node = tabletop_handybot.tabletop_handybot_node:main', 35 | 'audio_prompt_node = tabletop_handybot.audio_prompt_node:main', 36 | ], 37 | }, 38 | ) 39 | -------------------------------------------------------------------------------- /tabletop_handybot/tabletop_handybot/point_cloud_conversion.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sensor_msgs.msg import PointCloud2, PointField 3 | from std_msgs.msg import Header 4 | 5 | 6 | def point_cloud_to_msg(points, parent_frame): 7 | """Creates a point cloud message. 8 | Args: 9 | points: Nx3 array of xyz positions. 10 | parent_frame: frame in which the point cloud is defined 11 | Returns: 12 | sensor_msgs/PointCloud2 message 13 | 14 | Code source: 15 | https://gist.github.com/pgorczak/5c717baa44479fa064eb8d33ea4587e0 16 | 17 | References: 18 | http://docs.ros.org/melodic/api/sensor_msgs/html/msg/PointCloud2.html 19 | http://docs.ros.org/melodic/api/sensor_msgs/html/msg/PointField.html 20 | http://docs.ros.org/melodic/api/std_msgs/html/msg/Header.html 21 | 22 | """ 23 | # In a PointCloud2 message, the point cloud is stored as an byte 24 | # array. In order to unpack it, we also include some parameters 25 | # which desribes the size of each individual point. 26 | ros_dtype = PointField.FLOAT32 27 | dtype = np.float32 28 | itemsize = np.dtype(dtype).itemsize # A 32-bit float takes 4 bytes. 29 | 30 | data = points.astype(dtype).tobytes() 31 | 32 | # The fields specify what the bytes represents. The first 4 bytes 33 | # represents the x-coordinate, the next 4 the y-coordinate, etc. 34 | fields = [ 35 | PointField(name=n, offset=i * itemsize, datatype=ros_dtype, count=1) 36 | for i, n in enumerate("xyz") 37 | ] 38 | 39 | # The PointCloud2 message also has a header which specifies which 40 | # coordinate frame it is represented in. 41 | header = Header(frame_id=parent_frame) 42 | 43 | return PointCloud2( 44 | header=header, 45 | height=1, 46 | width=points.shape[0], 47 | is_dense=False, 48 | is_bigendian=False, 49 | fields=fields, 50 | point_step=(itemsize * 3), # Every point consists of three float32s. 51 | row_step=(itemsize * 3 * points.shape[0]), 52 | data=data, 53 | ) 54 | -------------------------------------------------------------------------------- /tabletop_handybot/launch/run.launch.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import yaml 4 | from ament_index_python.packages import get_package_share_directory 5 | from launch import LaunchDescription 6 | from launch.actions import IncludeLaunchDescription, TimerAction 7 | from launch.launch_description_sources import PythonLaunchDescriptionSource 8 | from launch_ros.actions import Node 9 | 10 | 11 | def load_yaml(package_name, file_name): 12 | package_path = get_package_share_directory(package_name) 13 | absolute_file_path = os.path.join(package_path, file_name) 14 | with open(absolute_file_path, "r", encoding="utf-8") as file: 15 | return yaml.safe_load(file) 16 | 17 | 18 | def generate_launch_description(): 19 | realsense_args = { 20 | "enable_rgbd": "true", 21 | "enable_sync": "true", 22 | "align_depth.enable": "true", 23 | "enable_color": "true", 24 | "enable_depth": "true", 25 | "depth_module.depth_profile": "640x480x30", 26 | "depth_module.infra_profile": "640x480x30", 27 | } 28 | realsense = IncludeLaunchDescription( 29 | PythonLaunchDescriptionSource([ 30 | os.path.join(get_package_share_directory("realsense2_camera"), 31 | "launch", "rs_launch.py") 32 | ]), 33 | launch_arguments=realsense_args.items()) 34 | 35 | calibration_tf_publisher = IncludeLaunchDescription( 36 | PythonLaunchDescriptionSource([ 37 | os.path.join(get_package_share_directory("easy_handeye2"), 38 | "launch", "publish.launch.py") 39 | ]), 40 | launch_arguments={"name": "ar4_calibration"}.items()) 41 | 42 | delay_calibration_tf_publisher = TimerAction( 43 | actions=[calibration_tf_publisher], 44 | period=2.0, 45 | ) 46 | 47 | ar_moveit_launch = PythonLaunchDescriptionSource([ 48 | os.path.join(get_package_share_directory("ar_moveit_config"), "launch", 49 | "ar_moveit.launch.py") 50 | ]) 51 | ar_moveit_args = { 52 | "include_gripper": "True", 53 | "rviz_config_file": "moveit_with_camera.rviz" 54 | }.items() 55 | ar_moveit = IncludeLaunchDescription(ar_moveit_launch, 56 | launch_arguments=ar_moveit_args) 57 | 58 | tabletop_handybot_node = Node( 59 | package="tabletop_handybot", 60 | executable="tabletop_handybot_node", 61 | name="tabletop_handybot_node", 62 | output="screen", 63 | ) 64 | 65 | audio_prompt_node = Node( 66 | package="tabletop_handybot", 67 | executable="audio_prompt_node", 68 | name="audio_prompt_node", 69 | output="screen", 70 | ) 71 | 72 | return LaunchDescription([ 73 | realsense, delay_calibration_tf_publisher, ar_moveit, 74 | audio_prompt_node, tabletop_handybot_node 75 | ]) 76 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ROS 2 | install/ 3 | log/ 4 | 5 | # imported repositories 6 | pymoveit2/ 7 | ar4_ros_driver/ 8 | Grounded-Segment-Anything/ 9 | easy_handeye2/ 10 | ros2_aruco/ 11 | 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | share/python-wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | MANIFEST 39 | 40 | # PyInstaller 41 | # Usually these files are written by a python script from a template 42 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 43 | *.manifest 44 | *.spec 45 | 46 | # Installer logs 47 | pip-log.txt 48 | pip-delete-this-directory.txt 49 | 50 | # Unit test / coverage reports 51 | htmlcov/ 52 | .tox/ 53 | .nox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | *.py,cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | cover/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | db.sqlite3 73 | db.sqlite3-journal 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | .pybuilder/ 87 | target/ 88 | 89 | # Jupyter Notebook 90 | .ipynb_checkpoints 91 | 92 | # IPython 93 | profile_default/ 94 | ipython_config.py 95 | 96 | # pyenv 97 | # For a library or package, you might want to ignore these files since the code is 98 | # intended to run in multiple environments; otherwise, check them in: 99 | # .python-version 100 | 101 | # pipenv 102 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 103 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 104 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 105 | # install all needed dependencies. 106 | #Pipfile.lock 107 | 108 | # poetry 109 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 110 | # This is especially recommended for binary packages to ensure reproducibility, and is more 111 | # commonly ignored for libraries. 112 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 113 | #poetry.lock 114 | 115 | # pdm 116 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 117 | #pdm.lock 118 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 119 | # in version control. 120 | # https://pdm.fming.dev/#use-with-ide 121 | .pdm.toml 122 | 123 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 124 | __pypackages__/ 125 | 126 | # Celery stuff 127 | celerybeat-schedule 128 | celerybeat.pid 129 | 130 | # SageMath parsed files 131 | *.sage.py 132 | 133 | # Environments 134 | .env 135 | .venv 136 | env/ 137 | venv/ 138 | ENV/ 139 | env.bak/ 140 | venv.bak/ 141 | 142 | # Spyder project settings 143 | .spyderproject 144 | .spyproject 145 | 146 | # Rope project settings 147 | .ropeproject 148 | 149 | # mkdocs documentation 150 | /site 151 | 152 | # mypy 153 | .mypy_cache/ 154 | .dmypy.json 155 | dmypy.json 156 | 157 | # Pyre type checker 158 | .pyre/ 159 | 160 | # pytype static type analyzer 161 | .pytype/ 162 | 163 | # Cython debug symbols 164 | cython_debug/ 165 | 166 | # PyCharm 167 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 168 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 169 | # and can be added to the global gitignore or merged into this file. For a more nuclear 170 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 171 | #.idea/ 172 | -------------------------------------------------------------------------------- /tabletop_handybot/tabletop_handybot/openai_assistant.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI # pylint: disable=import-self 2 | from openai.types.beta import Assistant 3 | 4 | # pylint: disable=line-too-long 5 | 6 | 7 | def get_or_create_assistant(client: OpenAI, 8 | assistant_id: str = "") -> Assistant: 9 | if assistant_id: 10 | return client.beta.assistants.retrieve(assistant_id) 11 | 12 | return client.beta.assistants.create( 13 | name="Tabletop Assistant", 14 | instructions=( 15 | "You are a robot arm mounted on a table. Write and run code to " 16 | "do tasks on the table. You can only pick up one object at a time." 17 | ), 18 | model="gpt-4o", 19 | temperature=0.01, 20 | tools=[{ 21 | "type": "function", 22 | "function": { 23 | "name": "detect_objects", 24 | "description": 25 | "Detect objects in the field of view of the camera", 26 | "parameters": { 27 | "type": "object", 28 | "properties": { 29 | "object_classes": { 30 | "type": 31 | "string", 32 | "description": 33 | ("Object classes to detect, comma separated" 34 | "For example: horses,rivers,plain"), 35 | } 36 | }, 37 | "required": ["object_classes"], 38 | }, 39 | }, 40 | }, { 41 | "type": "function", 42 | "function": { 43 | "name": "pick_object", 44 | "description": 45 | "Pick up an object from the output of get_objects.", 46 | "parameters": { 47 | "type": "object", 48 | "properties": { 49 | "object_index": { 50 | "type": 51 | "integer", 52 | "description": 53 | "index of target object in the detected objects list to execute the action for." 54 | } 55 | }, 56 | "required": ["object_index"] 57 | } 58 | } 59 | }, { 60 | "type": "function", 61 | "function": { 62 | "name": "move_above_object_and_release", 63 | "description": 64 | "move the end effector above the object and release the gripper. The object is from the output of get_objects", 65 | "parameters": { 66 | "type": "object", 67 | "properties": { 68 | "object_index": { 69 | "type": 70 | "integer", 71 | "description": 72 | "index of target object in the detected objects list to execute the action for." 73 | } 74 | }, 75 | "required": ["object_index"] 76 | } 77 | } 78 | }, { 79 | "type": "function", 80 | "function": { 81 | "name": "release_gripper", 82 | "description": "Open up the gripper", 83 | "parameters": { 84 | "type": "object", 85 | "properties": {}, 86 | "required": [] 87 | } 88 | } 89 | }, { 90 | "type": "function", 91 | "function": { 92 | "name": "flick_wrist_while_release", 93 | "description": 94 | "Flick the wrist while releasing the gripper, basically tossing the object.", 95 | "parameters": { 96 | "type": "object", 97 | "properties": {}, 98 | "required": [] 99 | } 100 | } 101 | }], 102 | ) 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tabletop HandyBot 2 | 3 | A low-cost AI powered robotic arm assistant that listens to your voice commands 4 | and can carry out a variety of tabletop tasks. Currently it can pick and place 5 | arbitrary objects presented in front of it. The total BOM is around $2300 USD. 6 | 7 | ## Video Demo 8 | 9 |
10 | 11 | | Rolling Dice | Picking Markers | 12 | | :--------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------: | 13 | | [![Roll Dice](./assets/roll_dice_thumbnail.png)](https://youtube.com/shorts/xAmPpvCZZU4) | [![Pick Markers](./assets/pick_markers_thumbnail.png)](https://youtube.com/shorts/fsCkS7HvgGo) | 14 | 15 |
16 | 17 | ## System Architecture 18 | 19 | ![System Architecture](./assets/architecture_diagram.png) 20 | 21 | ### Technology Used 22 | 23 | - [AR4 Robot](https://www.anninrobotics.com/): a $2000 robot arm that can lift 5lb dumbbells 24 | - [ar4_ros_driver](https://github.com/ycheng517/ar4_ros_driver): ROS driver for the arm 25 | - [Realsense D435](https://store.intelrealsense.com/buy-intel-realsense-depth-camera-d435.html?_ga=2.156205052.639062113.1715864879-1357777019.1715864879): a reliable $300 RGBD camera 26 | - [realsense-ros](https://github.com/IntelRealSense/realsense-ros): ROS wrapper for the camera 27 | - [Whisper Mic](https://github.com/mallorbc/whisper_mic): Convenient library for using OpenAI Whisper with a microphone 28 | - [Grounding DINO](https://github.com/IDEA-Research/Grounded-Segment-Anything/tree/main/GroundingDINO) A SOTA zero-shot object detector that can detect any object. 29 | - [Segment Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything/tree/main/segment_anything): A SOTA zero-shot object segmentation model that can segment any object. 30 | - [OpenAI Assistants](https://platform.openai.com/docs/assistants/overview): an API that calls ChatGPT with specific instructions and function calls 31 | 32 | ## Setup 33 | 34 | ### Pre-requisites 35 | 36 | You should have [ROS 2 Iron](https://docs.ros.org/en/iron/index.html) installed on Ubuntu 22.04. 37 | 38 | The follow hardware is is used. The use of other hardware will require adapting the codebase. 39 | 40 | - [AR4 robot arm](https://www.anninrobotics.com/) with [AR servo gripper](https://www.anninrobotics.com/product-page/servo-gripper-parts-kit) 41 | - [Intel RealSense D435 Depth Camera](https://www.intelrealsense.com/depth-camera-d435/) 42 | 43 | You should be able to run [ar4_ros_driver](https://github.com/ycheng517/ar4_ros_driver) 44 | with the gripper, and successfully perform hand-eye calibration. 45 | 46 | ### Install 47 | 48 | Import dependent repos 49 | 50 | ```bash 51 | vcs import . --input tabletop-handybot.repos 52 | ``` 53 | 54 | Create a virtual environment, i.e. 55 | 56 | ```bash 57 | pyenv virtualenv 3.10.12 handybot 58 | ``` 59 | 60 | Go to the `./Grounded-Segment-Anything/Grounded-Segment-Anything/` sub-directory 61 | and setup [Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything) 62 | in the virtualenv. Ensure you can run the [grounded_sam.ipynb](https://github.com/IDEA-Research/Grounded-Segment-Anything/blob/main/grounded_sam.ipynb) 63 | notebook. 64 | 65 | Install Python dependencies not in the ROS Index 66 | 67 | ```bash 68 | pip install -r requirements.txt 69 | ``` 70 | 71 | Build and source the project 72 | 73 | ```bash 74 | colcon build 75 | source install/setup.bash 76 | ``` 77 | 78 | ### Run 79 | 80 | Launch the AR4 81 | 82 | ```bash 83 | ros2 launch ar_hardware_interface ar_hardware.launch.py calibrate:=True include_gripper:=True 84 | ``` 85 | 86 | Launch all other programs 87 | 88 | ```bash 89 | ros2 launch tabletop_handybot run.launch.py 90 | ``` 91 | 92 | If all things are looking good, then you can try publishing a prompt to the 93 | `/prompt` topic for the robot to execute, i.e: 94 | 95 | ```bash 96 | ros2 topic pub --once /prompt std_msgs/msg/String "data: 'put the marker in the container'" 97 | ``` 98 | 99 | If you have a microphone attached to the computer, you can publish a message 100 | to the `/listen` topic, and then say your prompt. 101 | 102 | ```bash 103 | ros2 topic pub --once /listen std_msgs/msg/Empty "{}" 104 | ``` 105 | -------------------------------------------------------------------------------- /tabletop_handybot/tabletop_handybot/tabletop_handybot_node.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import time 4 | from functools import cached_property 5 | from typing import List 6 | 7 | import cv2 8 | import numpy as np 9 | import open3d as o3d 10 | import openai 11 | import rclpy 12 | import supervision as sv 13 | import tf2_ros 14 | import torch 15 | import torchvision 16 | from cv_bridge import CvBridge 17 | from geometry_msgs.msg import Pose, PoseStamped 18 | from groundingdino.util.inference import Model 19 | from openai.types.beta import Assistant 20 | from openai.types.beta.threads import RequiredActionFunctionToolCall 21 | from rclpy.callback_groups import ReentrantCallbackGroup 22 | from rclpy.duration import Duration 23 | from rclpy.executors import MultiThreadedExecutor 24 | from rclpy.callback_groups import MutuallyExclusiveCallbackGroup 25 | from rclpy.node import Node 26 | from rclpy.time import Time 27 | from scipy.spatial.transform import Rotation 28 | from segment_anything import SamPredictor, sam_model_registry 29 | from sensor_msgs.msg import Image, PointCloud2 30 | from std_msgs.msg import Int64, String 31 | from sensor_msgs.msg import JointState 32 | 33 | from pymoveit2 import GripperInterface, MoveIt2 34 | 35 | from .openai_assistant import get_or_create_assistant 36 | from .point_cloud_conversion import point_cloud_to_msg 37 | 38 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 39 | 40 | # GroundingDINO config and checkpoint 41 | GSA_PATH = "./Grounded-Segment-Anything/Grounded-Segment-Anything" 42 | GROUNDING_DINO_CONFIG_PATH = os.path.join( 43 | GSA_PATH, "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py") 44 | GROUNDING_DINO_CHECKPOINT_PATH = os.path.join(GSA_PATH, 45 | "groundingdino_swint_ogc.pth") 46 | 47 | # Segment-Anything checkpoint 48 | SAM_ENCODER_VERSION = "vit_h" 49 | SAM_CHECKPOINT_PATH = os.path.join(GSA_PATH, "sam_vit_h_4b8939.pth") 50 | 51 | # Predict classes and hyper-param for GroundingDINO 52 | BOX_THRESHOLD = 0.4 53 | TEXT_THRESHOLD = 0.25 54 | NMS_THRESHOLD = 0.8 55 | 56 | 57 | # Prompting SAM with detected boxes 58 | def segment(sam_predictor: SamPredictor, image: np.ndarray, 59 | xyxy: np.ndarray) -> np.ndarray: 60 | sam_predictor.set_image(image) 61 | result_masks = [] 62 | for box in xyxy: 63 | masks, scores, _ = sam_predictor.predict(box=box, 64 | multimask_output=True) 65 | index = np.argmax(scores) 66 | result_masks.append(masks[index]) 67 | return np.array(result_masks) 68 | 69 | 70 | class TabletopHandyBotNode(Node): 71 | """Main ROS 2 node for Tabletop HandyBot.""" 72 | 73 | # TODO: make args rosparams 74 | def __init__( 75 | self, 76 | annotate: bool = False, 77 | publish_point_cloud: bool = False, 78 | assistant_id: str = "", 79 | # Adjust these offsets to your needs: 80 | offset_x: float = 0.015, 81 | offset_y: float = -0.015, 82 | offset_z: float = 0.08, # accounts for the height of the gripper 83 | ): 84 | super().__init__("tabletop_handybot_node") 85 | 86 | self.logger = self.get_logger() 87 | 88 | self.cv_bridge = CvBridge() 89 | self.gripper_joint_name = "gripper_joint" 90 | callback_group = ReentrantCallbackGroup() 91 | # Create MoveIt 2 interface 92 | self.arm_joint_names = [ 93 | "joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6" 94 | ] 95 | self.moveit2 = MoveIt2( 96 | node=self, 97 | joint_names=self.arm_joint_names, 98 | base_link_name="base_link", 99 | end_effector_name="link_6", 100 | group_name="ar_manipulator", 101 | callback_group=callback_group, 102 | ) 103 | self.moveit2.planner_id = "RRTConnectkConfigDefault" 104 | self.gripper_interface = GripperInterface( 105 | node=self, 106 | gripper_joint_names=["gripper_jaw1_joint"], 107 | open_gripper_joint_positions=[-0.012], 108 | closed_gripper_joint_positions=[0.0], 109 | gripper_group_name="ar_gripper", 110 | callback_group=callback_group, 111 | gripper_command_action_name="/gripper_controller/gripper_cmd", 112 | ) 113 | self.tf_buffer = tf2_ros.Buffer() 114 | self.tf_listener = tf2_ros.TransformListener(self.tf_buffer, self) 115 | 116 | self.grounding_dino_model = Model( 117 | model_config_path=GROUNDING_DINO_CONFIG_PATH, 118 | model_checkpoint_path=GROUNDING_DINO_CHECKPOINT_PATH, 119 | ) 120 | self.sam = sam_model_registry[SAM_ENCODER_VERSION]( 121 | checkpoint=SAM_CHECKPOINT_PATH) 122 | self.sam.to(device=DEVICE) 123 | self.sam_predictor = SamPredictor(self.sam) 124 | self.openai = openai.OpenAI() 125 | self.assistant: Assistant = get_or_create_assistant( 126 | self.openai, assistant_id) 127 | self.logger.info(f"Loaded assistant with ID: {self.assistant.id}") 128 | 129 | self.annotate = annotate 130 | self.publish_point_cloud = publish_point_cloud 131 | self.n_frames_processed = 0 132 | self._last_depth_msg = None 133 | self._last_rgb_msg = None 134 | self.arm_joint_state: JointState | None = None 135 | self._last_detections: sv.Detections | None = None 136 | self._object_in_gripper: bool = False 137 | self.gripper_squeeze_factor = 0.5 138 | self.offset_x = offset_x 139 | self.offset_y = offset_y 140 | self.offset_z = offset_z 141 | 142 | self.image_sub = self.create_subscription(Image, 143 | "/camera/color/image_raw", 144 | self.image_callback, 10) 145 | self.depth_sub = self.create_subscription( 146 | Image, "/camera/aligned_depth_to_color/image_raw", 147 | self.depth_callback, 10) 148 | self.joint_states_sub = self.create_subscription( 149 | JointState, "/joint_states", self.joint_states_callback, 10) 150 | 151 | if self.publish_point_cloud: 152 | self.point_cloud_pub = self.create_publisher( 153 | PointCloud2, "/point_cloud", 2) 154 | self.prompt_sub = self.create_subscription( 155 | String, 156 | "/prompt", 157 | self.start, 158 | 10, 159 | callback_group=MutuallyExclusiveCallbackGroup()) 160 | self.save_images_sub = self.create_subscription( 161 | String, "/save_images", self.save_images, 10) 162 | self.detect_objects_sub = self.create_subscription( 163 | String, "/detect_objects", self.detect_objects_cb, 10) 164 | self.release_at_sub = self.create_subscription(Int64, "/release_above", 165 | self.release_above_cb, 166 | 10) 167 | self.pick_object_sub = self.create_subscription( 168 | Int64, "/pick_object", self.pick_object_cb, 10) 169 | 170 | self.logger.info("Tabletop HandyBot node initialized.") 171 | 172 | def execution_success(self, tool_call: RequiredActionFunctionToolCall, 173 | tool_outputs: List[dict]) -> None: 174 | output_dict = {"success": True} 175 | tool_outputs.append({ 176 | "tool_call_id": tool_call.id, 177 | "output": json.dumps(output_dict), 178 | }) 179 | 180 | def execution_failure(self, tool_call: RequiredActionFunctionToolCall, 181 | tool_outputs: List[dict], failure_reason: str): 182 | output_dict = { 183 | "success": False, 184 | "failure_reason": failure_reason, 185 | } 186 | tool_outputs.append({ 187 | "tool_call_id": tool_call.id, 188 | "output": json.dumps(output_dict), 189 | }) 190 | 191 | def handle_tool_call(self, tool_call: RequiredActionFunctionToolCall, 192 | rgb_image: np.ndarray, depth_image: np.ndarray, 193 | tool_outputs: List[dict]): 194 | if tool_call.function.name == "detect_objects": 195 | args = json.loads(tool_call.function.arguments) 196 | classes_str = args["object_classes"] 197 | classes = classes_str.split(",") 198 | self._last_detections = self.detect_objects(rgb_image, classes) 199 | detected_classes = [ 200 | classes[class_id] 201 | for class_id in self._last_detections.class_id 202 | ] 203 | self.logger.info(f"Detected {detected_classes}.") 204 | self.logger.info( 205 | f"detection confidence: {self._last_detections.confidence}") 206 | output_dict = { 207 | "object_classes": detected_classes, 208 | } 209 | tool_outputs.append({ 210 | "tool_call_id": tool_call.id, 211 | "output": json.dumps(output_dict), 212 | }) 213 | elif tool_call.function.name == "pick_object": 214 | args = json.loads(tool_call.function.arguments) 215 | if self._last_detections is None: 216 | self.execution_failure(tool_call, tool_outputs, 217 | "No detections available.") 218 | return 219 | 220 | if self._object_in_gripper: 221 | self.execution_failure( 222 | tool_call, tool_outputs, 223 | "Gripper is already holding an object.") 224 | return 225 | 226 | if args["object_index"] >= len(self._last_detections.mask): 227 | self.execution_failure( 228 | tool_call, tool_outputs, 229 | ("Invalid object index. Only " 230 | f"{len(self._last_detections.mask)} objects detected.")) 231 | return 232 | 233 | self.pick_object(args["object_index"], self._last_detections, 234 | depth_image) 235 | self.logger.info( 236 | f"done picking object. Joint states: {self.arm_joint_state.position}" 237 | ) 238 | self._object_in_gripper = True 239 | self.execution_success(tool_call, tool_outputs) 240 | elif tool_call.function.name == "move_above_object_and_release": 241 | args = json.loads(tool_call.function.arguments) 242 | if self._last_detections is None: 243 | self.execution_failure(tool_call, tool_outputs, 244 | "No detections available.") 245 | return 246 | 247 | if args["object_index"] >= len(self._last_detections.mask): 248 | self.execution_failure( 249 | tool_call, tool_outputs, 250 | ("Invalid object index. Only " 251 | f"{len(self._last_detections.mask)} objects detected.")) 252 | return 253 | 254 | self.release_above(args["object_index"], self._last_detections, 255 | depth_image) 256 | self._object_in_gripper = False 257 | self.execution_success(tool_call, tool_outputs) 258 | elif tool_call.function.name == "release_gripper": 259 | self.release_gripper() 260 | self.execution_success(tool_call, tool_outputs) 261 | self._object_in_gripper = False 262 | elif tool_call.function.name == "flick_wrist_while_release": 263 | self.flick_wrist_while_release() 264 | self.execution_success(tool_call, tool_outputs) 265 | self._object_in_gripper = False 266 | 267 | def start(self, msg: String): 268 | if not self._last_rgb_msg or not self._last_depth_msg: 269 | return 270 | 271 | rgb_image = self.cv_bridge.imgmsg_to_cv2(self._last_rgb_msg) 272 | depth_image = self.cv_bridge.imgmsg_to_cv2(self._last_depth_msg) 273 | self._last_detections = None 274 | 275 | self.logger.info(f"Processing: {msg.data}") 276 | self.logger.info( 277 | f"Initial Joint states: {self.arm_joint_state.position}") 278 | thread = self.openai.beta.threads.create() 279 | message = self.openai.beta.threads.messages.create( 280 | thread_id=thread.id, 281 | role="user", 282 | content=msg.data, 283 | ) 284 | self.logger.info(str(message)) 285 | 286 | run = self.openai.beta.threads.runs.create_and_poll( 287 | thread_id=thread.id, 288 | assistant_id=self.assistant.id, 289 | ) 290 | 291 | done = False 292 | while not done: 293 | if run.status == "completed": 294 | messages = self.openai.beta.threads.messages.list( 295 | thread_id=thread.id) 296 | self.logger.info(str(messages)) 297 | done = True 298 | break 299 | else: 300 | self.logger.info(run.status) 301 | 302 | tool_outputs = [] 303 | for tool_call in run.required_action.submit_tool_outputs.tool_calls: 304 | self.logger.info(f"tool_call: {tool_call}") 305 | if tool_call.type == "function": 306 | self.handle_tool_call(tool_call, rgb_image, depth_image, 307 | tool_outputs) 308 | 309 | self.logger.info(f"tool_outputs: {tool_outputs}") 310 | if tool_outputs: 311 | try: 312 | run = self.openai.beta.threads.runs.submit_tool_outputs_and_poll( 313 | thread_id=thread.id, 314 | run_id=run.id, 315 | tool_outputs=tool_outputs) 316 | self.logger.info("Tool outputs submitted successfully.") 317 | except Exception as e: # pylint: disable=broad-except 318 | self.logger.error(f"Failed to submit tool outputs: {e}") 319 | else: 320 | self.logger.info("No tool outputs to submit.") 321 | 322 | self.go_home() 323 | self.logger.info("Task completed.") 324 | 325 | def detect_objects(self, image: np.ndarray, object_classes: List[str]): 326 | self.logger.info(f"Detecting objects of classes: {object_classes}") 327 | detections: sv.Detections = self.grounding_dino_model.predict_with_classes( 328 | image=image, 329 | classes=object_classes, 330 | box_threshold=BOX_THRESHOLD, 331 | text_threshold=TEXT_THRESHOLD, 332 | ) 333 | 334 | # NMS post process 335 | nms_idx = (torchvision.ops.nms( 336 | torch.from_numpy(detections.xyxy), 337 | torch.from_numpy(detections.confidence), 338 | NMS_THRESHOLD, 339 | ).numpy().tolist()) 340 | 341 | detections.xyxy = detections.xyxy[nms_idx] 342 | detections.confidence = detections.confidence[nms_idx] 343 | detections.class_id = detections.class_id[nms_idx] 344 | 345 | detections.mask = segment( 346 | sam_predictor=self.sam_predictor, 347 | image=cv2.cvtColor(image, cv2.COLOR_BGR2RGB), 348 | xyxy=detections.xyxy, 349 | ) 350 | 351 | if self.annotate: 352 | box_annotator = sv.BoxAnnotator() 353 | mask_annotator = sv.MaskAnnotator() 354 | labels = [ 355 | f"{object_classes[class_id]} {confidence:0.2f}" 356 | for _, _, confidence, class_id, _, _ in detections 357 | ] 358 | annotated_frame = box_annotator.annotate(scene=image.copy(), 359 | detections=detections, 360 | labels=labels) 361 | cv2.imwrite( 362 | f"annotated_image_detections_{self.n_frames_processed}.jpg", 363 | annotated_frame) 364 | 365 | annotated_frame = mask_annotator.annotate(scene=image.copy(), 366 | detections=detections) 367 | cv2.imwrite(f"annotated_image_masks_{self.n_frames_processed}.jpg", 368 | annotated_frame) 369 | 370 | if self.publish_point_cloud: 371 | depth_image = self.cv_bridge.imgmsg_to_cv2(self._last_depth_msg) 372 | # mask out the depth image except for the detected objects 373 | masked_depth_image = np.zeros_like(depth_image, dtype=np.float32) 374 | for mask in detections.mask: 375 | masked_depth_image[mask] = depth_image[mask] 376 | masked_depth_image /= 1000.0 377 | 378 | # convert the masked depth image to a point cloud 379 | pcd = o3d.geometry.PointCloud.create_from_depth_image( 380 | o3d.geometry.Image(masked_depth_image), 381 | o3d.camera.PinholeCameraIntrinsic( 382 | o3d.camera.PinholeCameraIntrinsicParameters. 383 | PrimeSenseDefault), 384 | ) 385 | 386 | # convert it to a ROS PointCloud2 message 387 | points = np.asarray(pcd.points) 388 | pc_msg = point_cloud_to_msg(points, "/camera_color_frame") 389 | self.point_cloud_pub.publish(pc_msg) 390 | 391 | self.n_frames_processed += 1 392 | return detections 393 | 394 | def pick_object(self, object_index: int, detections: sv.Detections, 395 | depth_image: np.ndarray): 396 | """Perform a top-down grasp on the object.""" 397 | # mask out the depth image except for the detected objects 398 | masked_depth_image = np.zeros_like(depth_image, dtype=np.float32) 399 | mask = detections.mask[object_index] 400 | masked_depth_image[mask] = depth_image[mask] 401 | masked_depth_image /= 1000.0 402 | 403 | # convert the masked depth image to a point cloud 404 | pcd = o3d.geometry.PointCloud.create_from_depth_image( 405 | o3d.geometry.Image(masked_depth_image), 406 | o3d.camera.PinholeCameraIntrinsic( 407 | o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault), 408 | ) 409 | pcd.transform(self.cam_to_base_affine) 410 | points = np.asarray(pcd.points) 411 | grasp_z = points[:, 2].max() 412 | 413 | near_grasp_z_points = points[points[:, 2] > grasp_z - 0.008] 414 | xy_points = near_grasp_z_points[:, :2] 415 | xy_points = xy_points.astype(np.float32) 416 | center, dimensions, theta = cv2.minAreaRect(xy_points) 417 | 418 | gripper_rotation = theta 419 | if dimensions[0] > dimensions[1]: 420 | gripper_rotation -= 90 421 | if gripper_rotation < -90: 422 | gripper_rotation += 180 423 | elif gripper_rotation > 90: 424 | gripper_rotation -= 180 425 | 426 | gripper_opening = min(dimensions) 427 | grasp_pose = Pose() 428 | grasp_pose.position.x = center[0] + self.offset_x 429 | grasp_pose.position.y = center[1] + self.offset_y 430 | grasp_pose.position.z = grasp_z + self.offset_z 431 | top_down_rot = Rotation.from_quat([0, 1, 0, 0]) 432 | extra_rot = Rotation.from_euler("z", gripper_rotation, degrees=True) 433 | grasp_quat = (extra_rot * top_down_rot).as_quat() 434 | grasp_pose.orientation.x = grasp_quat[0] 435 | grasp_pose.orientation.y = grasp_quat[1] 436 | grasp_pose.orientation.z = grasp_quat[2] 437 | grasp_pose.orientation.w = grasp_quat[3] 438 | self.grasp_at(grasp_pose, gripper_opening) 439 | 440 | def grasp_at(self, msg: Pose, gripper_opening: float): 441 | self.logger.info(f"Grasp at: {msg} with opening: {gripper_opening}") 442 | 443 | self.gripper_interface.open() 444 | self.gripper_interface.wait_until_executed() 445 | 446 | # move 5cm above the item first 447 | msg.position.z += 0.05 448 | self.move_to(msg) 449 | time.sleep(0.05) 450 | 451 | # grasp the item 452 | msg.position.z -= 0.05 453 | self.move_to(msg) 454 | time.sleep(0.05) 455 | 456 | gripper_pos = -gripper_opening / 2. * self.gripper_squeeze_factor 457 | gripper_pos = min(gripper_pos, 0.0) 458 | self.gripper_interface.move_to_position(gripper_pos) 459 | self.gripper_interface.wait_until_executed() 460 | 461 | # lift the item 462 | msg.position.z += 0.12 463 | self.move_to(msg) 464 | time.sleep(0.05) 465 | 466 | def release_above(self, object_index: int, detections: sv.Detections, 467 | depth_image: np.ndarray): 468 | """Move the robot arm above the object and release the gripper.""" 469 | masked_depth_image = np.zeros_like(depth_image, dtype=np.float32) 470 | mask = detections.mask[object_index] 471 | masked_depth_image[mask] = depth_image[mask] 472 | masked_depth_image /= 1000.0 473 | 474 | # convert the masked depth image to a point cloud 475 | pcd = o3d.geometry.PointCloud.create_from_depth_image( 476 | o3d.geometry.Image(masked_depth_image), 477 | o3d.camera.PinholeCameraIntrinsic( 478 | o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault), 479 | ) 480 | pcd.transform(self.cam_to_base_affine) 481 | 482 | points = np.asarray(pcd.points).astype(np.float32) 483 | # release 5cm above the object 484 | drop_z = np.percentile(points[:, 2], 95) + 0.05 485 | median_z = np.median(points[:, 2]) 486 | 487 | xy_points = points[points[:, 2] > median_z, :2] 488 | xy_points = xy_points.astype(np.float32) 489 | center, _, _ = cv2.minAreaRect(xy_points) 490 | 491 | drop_pose = Pose() 492 | drop_pose.position.x = center[0] + self.offset_x 493 | drop_pose.position.y = center[1] + self.offset_y 494 | drop_pose.position.z = drop_z + self.offset_z 495 | # Straight down pose 496 | drop_pose.orientation.x = 0.0 497 | drop_pose.orientation.y = 1.0 498 | drop_pose.orientation.z = 0.0 499 | drop_pose.orientation.w = 0.0 500 | 501 | self.release_at(drop_pose) 502 | 503 | def release_gripper(self): 504 | self.gripper_interface.open() 505 | self.gripper_interface.wait_until_executed() 506 | 507 | def flick_wrist_while_release(self): 508 | joint_positions = self.arm_joint_state.position 509 | joint_positions[4] -= np.deg2rad(25) 510 | self.moveit2.move_to_configuration(joint_positions, 511 | self.arm_joint_names, 512 | tolerance=0.005) 513 | time.sleep(3) 514 | 515 | self.gripper_interface.open() 516 | self.gripper_interface.wait_until_executed() 517 | self.moveit2.wait_until_executed() 518 | 519 | def go_home(self): 520 | joint_positions = [0., 0., 0., 0., 0., 0.] 521 | self.moveit2.move_to_configuration(joint_positions, 522 | self.arm_joint_names, 523 | tolerance=0.005) 524 | self.moveit2.wait_until_executed() 525 | 526 | @cached_property 527 | def cam_to_base_affine(self): 528 | cam_to_base_link_tf = self.tf_buffer.lookup_transform( 529 | target_frame="base_link", 530 | source_frame="camera_color_frame", 531 | time=Time(), 532 | timeout=Duration(seconds=5)) 533 | cam_to_base_rot = Rotation.from_quat([ 534 | cam_to_base_link_tf.transform.rotation.x, 535 | cam_to_base_link_tf.transform.rotation.y, 536 | cam_to_base_link_tf.transform.rotation.z, 537 | cam_to_base_link_tf.transform.rotation.w, 538 | ]) 539 | cam_to_base_pos = np.array([ 540 | cam_to_base_link_tf.transform.translation.x, 541 | cam_to_base_link_tf.transform.translation.y, 542 | cam_to_base_link_tf.transform.translation.z, 543 | ]) 544 | affine = np.eye(4) 545 | affine[:3, :3] = cam_to_base_rot.as_matrix() 546 | affine[:3, 3] = cam_to_base_pos 547 | return affine 548 | 549 | def move_to(self, msg: Pose): 550 | pose_goal = PoseStamped() 551 | pose_goal.header.frame_id = "base_link" 552 | pose_goal.pose = msg 553 | 554 | self.moveit2.move_to_pose(pose=pose_goal) 555 | self.moveit2.wait_until_executed() 556 | 557 | def release_at(self, msg: Pose): 558 | # NOTE: straight down is wxyz 0, 0, 1, 0 559 | # good pose is 0, -0.3, 0.35 560 | self.logger.info(f"Releasing at: {msg}") 561 | self.move_to(msg) 562 | 563 | self.gripper_interface.open() 564 | self.gripper_interface.wait_until_executed() 565 | 566 | def detect_objects_cb(self, msg: String): 567 | if self._last_rgb_msg is None: 568 | self.logger.warning("No RGB image available.") 569 | return 570 | 571 | class_names = msg.data.split(",") 572 | rgb_image = self.cv_bridge.imgmsg_to_cv2(self._last_rgb_msg) 573 | self._last_detections = self.detect_objects(rgb_image, class_names) 574 | detected_classes = [ 575 | class_names[class_id] 576 | for class_id in self._last_detections.class_id 577 | ] 578 | self.logger.info(f"Detected objects: {detected_classes}") 579 | 580 | def pick_object_cb(self, msg: Int64): 581 | if self._last_detections is None or self._last_depth_msg is None: 582 | self.logger.warning("No detections or depth image available.") 583 | return 584 | 585 | depth_image = self.cv_bridge.imgmsg_to_cv2(self._last_depth_msg) 586 | self.pick_object(msg.data, self._last_detections, depth_image) 587 | 588 | def release_above_cb(self, msg: Int64): 589 | if self._last_detections is None or self._last_depth_msg is None: 590 | self.logger.warning("No detections or depth image available.") 591 | return 592 | 593 | depth_image = self.cv_bridge.imgmsg_to_cv2(self._last_depth_msg) 594 | self.release_above(msg.data, self._last_detections, depth_image) 595 | 596 | def depth_callback(self, msg): 597 | self._last_depth_msg = msg 598 | 599 | def image_callback(self, msg): 600 | self._last_rgb_msg = msg 601 | 602 | def joint_states_callback(self, msg: JointState): 603 | joint_state = JointState() 604 | joint_state.header = msg.header 605 | for name in self.arm_joint_names: 606 | for i, joint_state_joint_name in enumerate(msg.name): 607 | if name == joint_state_joint_name: 608 | joint_state.name.append(name) 609 | joint_state.position.append(msg.position[i]) 610 | joint_state.velocity.append(msg.velocity[i]) 611 | joint_state.effort.append(msg.effort[i]) 612 | 613 | self.arm_joint_state = joint_state 614 | # self.logger.info(f"joint_state in: {msg}") 615 | # self.logger.info(f"joint_state out: {self.arm_joint_state}") 616 | 617 | def save_images(self, msg): 618 | if not self._last_rgb_msg or not self._last_depth_msg: 619 | return 620 | 621 | rgb_image = self.cv_bridge.imgmsg_to_cv2(self._last_rgb_msg) 622 | depth_image = self.cv_bridge.imgmsg_to_cv2(self._last_depth_msg) 623 | 624 | save_dir = msg.data 625 | cv2.imwrite( 626 | os.path.join(save_dir, f"rgb_image_{self.n_frames_processed}.png"), 627 | rgb_image) 628 | np.save( 629 | os.path.join(save_dir, f"depth_image_{self.n_frames_processed}"), 630 | depth_image) 631 | 632 | 633 | def main(): 634 | rclpy.init() 635 | node = TabletopHandyBotNode() 636 | executor = MultiThreadedExecutor(4) 637 | executor.add_node(node) 638 | try: 639 | executor.spin() 640 | except KeyboardInterrupt: 641 | pass 642 | 643 | rclpy.shutdown() 644 | 645 | 646 | if __name__ == "__main__": 647 | main() 648 | --------------------------------------------------------------------------------