├── tabletop_handybot
├── resource
│ └── tabletop_handybot
├── tabletop_handybot
│ ├── __init__.py
│ ├── audio_prompt_node.py
│ ├── point_cloud_conversion.py
│ ├── openai_assistant.py
│ └── tabletop_handybot_node.py
├── setup.cfg
├── package.xml
├── setup.py
└── launch
│ └── run.launch.py
├── requirements.txt
├── assets
├── roll_dice_thumbnail.png
├── architecture_diagram.png
└── pick_markers_thumbnail.png
├── .vscode
└── settings.json
├── tabletop-handybot.repos
├── .gitignore
└── README.md
/tabletop_handybot/resource/tabletop_handybot:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tabletop_handybot/tabletop_handybot/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | open3d
2 | whisper-mic
3 | openai
--------------------------------------------------------------------------------
/assets/roll_dice_thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycheng517/tabletop-handybot/HEAD/assets/roll_dice_thumbnail.png
--------------------------------------------------------------------------------
/assets/architecture_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycheng517/tabletop-handybot/HEAD/assets/architecture_diagram.png
--------------------------------------------------------------------------------
/assets/pick_markers_thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ycheng517/tabletop-handybot/HEAD/assets/pick_markers_thumbnail.png
--------------------------------------------------------------------------------
/tabletop_handybot/setup.cfg:
--------------------------------------------------------------------------------
1 | [develop]
2 | script_dir=$base/lib/tabletop_handybot
3 | [install]
4 | install_scripts=$base/lib/tabletop_handybot
5 |
6 | [build_scripts]
7 | executable = /usr/bin/env python3
8 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.analysis.extraPaths": [
3 | "/home/yifei/src/Grounded-Segment-Anything/GroundingDINO"
4 | ],
5 | "files.exclude": {
6 | "build/": true,
7 | "install/": true,
8 | "log/": true
9 | }
10 | }
--------------------------------------------------------------------------------
/tabletop-handybot.repos:
--------------------------------------------------------------------------------
1 | repositories:
2 | pymoveit2:
3 | type: git
4 | url: https://github.com/ycheng517/pymoveit2.git
5 | version: devel
6 | ar4_ros_driver:
7 | type: git
8 | url: https://github.com/ycheng517/ar4_ros_driver.git
9 | Grounded-Segment-Anything/Grounded-Segment-Anything:
10 | type: git
11 | url: https://github.com/IDEA-Research/Grounded-Segment-Anything
12 |
13 | # dependencies of ar4_ros_driver
14 | easy_handeye2:
15 | type: git
16 | url: https://github.com/ycheng517/easy_handeye2.git
17 | ros2_aruco:
18 | type: git
19 | url: https://github.com/JMU-ROBOTICS-VIVA/ros2_aruco.git
20 |
--------------------------------------------------------------------------------
/tabletop_handybot/tabletop_handybot/audio_prompt_node.py:
--------------------------------------------------------------------------------
1 | import rclpy
2 | import rclpy.node
3 | from std_msgs.msg import Empty, String
4 | from whisper_mic import WhisperMic
5 |
6 |
7 | class AudioPromptNode(rclpy.node.Node):
8 | """ROS2 Node that listens for a /listen message and publishes the
9 | result of the audio prompt to /prompt."""
10 |
11 | def __init__(self):
12 | super().__init__("audio_prompt_node")
13 |
14 | self.whisper_mic = WhisperMic()
15 |
16 | self.prompt_pub = self.create_publisher(String, "/prompt", 10)
17 | self.listen_sub = self.create_subscription(Empty, "/listen",
18 | self.listen, 10)
19 | self._logger.info("Audio Prompt Node Initialized")
20 |
21 | def listen(self, _: Empty):
22 | result = self.whisper_mic.listen(timeout=10.0, phrase_time_limit=10.0)
23 | self._logger.info(f"Prompt: {result}")
24 | self.prompt_pub.publish(String(data=result))
25 |
26 |
27 | def main():
28 | rclpy.init()
29 | node = AudioPromptNode()
30 | try:
31 | rclpy.spin(node)
32 | except KeyboardInterrupt:
33 | pass
34 |
35 | rclpy.shutdown()
36 |
37 |
38 | if __name__ == "__main__":
39 | main()
40 |
--------------------------------------------------------------------------------
/tabletop_handybot/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | tabletop_handybot
5 | 0.1.0
6 | A handy robot arm for performing tabletop tasks.
7 |
8 | Apache License 2.0
9 | Yifei Cheng
10 | Yifei Cheng
11 |
12 | ament_python
13 |
14 | ar_moveit_config
15 | cv_bridge
16 | easy_handeye2
17 | geometry_msgs
18 | rclpy
19 | realsense2_camera
20 | ros2_aruco
21 | tf2_ros
22 | sensor_msgs
23 | std_msgs
24 |
25 |
27 | ament_copyright
28 | ament_flake8
29 | ament_pep257
30 | python3-pytest
31 |
32 |
33 | ament_python
34 |
35 |
36 |
--------------------------------------------------------------------------------
/tabletop_handybot/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | from glob import glob
3 | from setuptools import setup
4 |
5 | package_name = 'tabletop_handybot'
6 |
7 | setup(
8 | name=package_name,
9 | version='0.1.0',
10 | packages=[package_name],
11 | data_files=[('share/ament_index/resource_index/packages',
12 | ['resource/' + package_name]),
13 | ('share/' + package_name, ['package.xml']),
14 | (os.path.join('share', package_name, 'launch'),
15 | glob(os.path.join('launch', '*launch.[pxy][yma]*')))],
16 | install_requires=['setuptools'],
17 | zip_safe=True,
18 | author='Yifei Cheng',
19 | author_email='ycheng517@gmail.com',
20 | maintainer='Yifei Cheng',
21 | maintainer_email='ycheng517@gmail.com',
22 | keywords=['ROS'],
23 | classifiers=[
24 | 'Intended Audience :: Developers',
25 | 'License :: OSI Approved :: Apache Software License',
26 | 'Programming Language :: Python',
27 | 'Topic :: Software Development',
28 | ],
29 | description='A handy robot arm for performing tabletop tasks.',
30 | license='Apache License, Version 2.0',
31 | tests_require=['pytest'],
32 | entry_points={
33 | 'console_scripts': [
34 | 'tabletop_handybot_node = tabletop_handybot.tabletop_handybot_node:main',
35 | 'audio_prompt_node = tabletop_handybot.audio_prompt_node:main',
36 | ],
37 | },
38 | )
39 |
--------------------------------------------------------------------------------
/tabletop_handybot/tabletop_handybot/point_cloud_conversion.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sensor_msgs.msg import PointCloud2, PointField
3 | from std_msgs.msg import Header
4 |
5 |
6 | def point_cloud_to_msg(points, parent_frame):
7 | """Creates a point cloud message.
8 | Args:
9 | points: Nx3 array of xyz positions.
10 | parent_frame: frame in which the point cloud is defined
11 | Returns:
12 | sensor_msgs/PointCloud2 message
13 |
14 | Code source:
15 | https://gist.github.com/pgorczak/5c717baa44479fa064eb8d33ea4587e0
16 |
17 | References:
18 | http://docs.ros.org/melodic/api/sensor_msgs/html/msg/PointCloud2.html
19 | http://docs.ros.org/melodic/api/sensor_msgs/html/msg/PointField.html
20 | http://docs.ros.org/melodic/api/std_msgs/html/msg/Header.html
21 |
22 | """
23 | # In a PointCloud2 message, the point cloud is stored as an byte
24 | # array. In order to unpack it, we also include some parameters
25 | # which desribes the size of each individual point.
26 | ros_dtype = PointField.FLOAT32
27 | dtype = np.float32
28 | itemsize = np.dtype(dtype).itemsize # A 32-bit float takes 4 bytes.
29 |
30 | data = points.astype(dtype).tobytes()
31 |
32 | # The fields specify what the bytes represents. The first 4 bytes
33 | # represents the x-coordinate, the next 4 the y-coordinate, etc.
34 | fields = [
35 | PointField(name=n, offset=i * itemsize, datatype=ros_dtype, count=1)
36 | for i, n in enumerate("xyz")
37 | ]
38 |
39 | # The PointCloud2 message also has a header which specifies which
40 | # coordinate frame it is represented in.
41 | header = Header(frame_id=parent_frame)
42 |
43 | return PointCloud2(
44 | header=header,
45 | height=1,
46 | width=points.shape[0],
47 | is_dense=False,
48 | is_bigendian=False,
49 | fields=fields,
50 | point_step=(itemsize * 3), # Every point consists of three float32s.
51 | row_step=(itemsize * 3 * points.shape[0]),
52 | data=data,
53 | )
54 |
--------------------------------------------------------------------------------
/tabletop_handybot/launch/run.launch.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import yaml
4 | from ament_index_python.packages import get_package_share_directory
5 | from launch import LaunchDescription
6 | from launch.actions import IncludeLaunchDescription, TimerAction
7 | from launch.launch_description_sources import PythonLaunchDescriptionSource
8 | from launch_ros.actions import Node
9 |
10 |
11 | def load_yaml(package_name, file_name):
12 | package_path = get_package_share_directory(package_name)
13 | absolute_file_path = os.path.join(package_path, file_name)
14 | with open(absolute_file_path, "r", encoding="utf-8") as file:
15 | return yaml.safe_load(file)
16 |
17 |
18 | def generate_launch_description():
19 | realsense_args = {
20 | "enable_rgbd": "true",
21 | "enable_sync": "true",
22 | "align_depth.enable": "true",
23 | "enable_color": "true",
24 | "enable_depth": "true",
25 | "depth_module.depth_profile": "640x480x30",
26 | "depth_module.infra_profile": "640x480x30",
27 | }
28 | realsense = IncludeLaunchDescription(
29 | PythonLaunchDescriptionSource([
30 | os.path.join(get_package_share_directory("realsense2_camera"),
31 | "launch", "rs_launch.py")
32 | ]),
33 | launch_arguments=realsense_args.items())
34 |
35 | calibration_tf_publisher = IncludeLaunchDescription(
36 | PythonLaunchDescriptionSource([
37 | os.path.join(get_package_share_directory("easy_handeye2"),
38 | "launch", "publish.launch.py")
39 | ]),
40 | launch_arguments={"name": "ar4_calibration"}.items())
41 |
42 | delay_calibration_tf_publisher = TimerAction(
43 | actions=[calibration_tf_publisher],
44 | period=2.0,
45 | )
46 |
47 | ar_moveit_launch = PythonLaunchDescriptionSource([
48 | os.path.join(get_package_share_directory("ar_moveit_config"), "launch",
49 | "ar_moveit.launch.py")
50 | ])
51 | ar_moveit_args = {
52 | "include_gripper": "True",
53 | "rviz_config_file": "moveit_with_camera.rviz"
54 | }.items()
55 | ar_moveit = IncludeLaunchDescription(ar_moveit_launch,
56 | launch_arguments=ar_moveit_args)
57 |
58 | tabletop_handybot_node = Node(
59 | package="tabletop_handybot",
60 | executable="tabletop_handybot_node",
61 | name="tabletop_handybot_node",
62 | output="screen",
63 | )
64 |
65 | audio_prompt_node = Node(
66 | package="tabletop_handybot",
67 | executable="audio_prompt_node",
68 | name="audio_prompt_node",
69 | output="screen",
70 | )
71 |
72 | return LaunchDescription([
73 | realsense, delay_calibration_tf_publisher, ar_moveit,
74 | audio_prompt_node, tabletop_handybot_node
75 | ])
76 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # ROS
2 | install/
3 | log/
4 |
5 | # imported repositories
6 | pymoveit2/
7 | ar4_ros_driver/
8 | Grounded-Segment-Anything/
9 | easy_handeye2/
10 | ros2_aruco/
11 |
12 | # Byte-compiled / optimized / DLL files
13 | __pycache__/
14 | *.py[cod]
15 | *$py.class
16 |
17 | # C extensions
18 | *.so
19 |
20 | # Distribution / packaging
21 | .Python
22 | build/
23 | develop-eggs/
24 | dist/
25 | downloads/
26 | eggs/
27 | .eggs/
28 | lib/
29 | lib64/
30 | parts/
31 | sdist/
32 | var/
33 | wheels/
34 | share/python-wheels/
35 | *.egg-info/
36 | .installed.cfg
37 | *.egg
38 | MANIFEST
39 |
40 | # PyInstaller
41 | # Usually these files are written by a python script from a template
42 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
43 | *.manifest
44 | *.spec
45 |
46 | # Installer logs
47 | pip-log.txt
48 | pip-delete-this-directory.txt
49 |
50 | # Unit test / coverage reports
51 | htmlcov/
52 | .tox/
53 | .nox/
54 | .coverage
55 | .coverage.*
56 | .cache
57 | nosetests.xml
58 | coverage.xml
59 | *.cover
60 | *.py,cover
61 | .hypothesis/
62 | .pytest_cache/
63 | cover/
64 |
65 | # Translations
66 | *.mo
67 | *.pot
68 |
69 | # Django stuff:
70 | *.log
71 | local_settings.py
72 | db.sqlite3
73 | db.sqlite3-journal
74 |
75 | # Flask stuff:
76 | instance/
77 | .webassets-cache
78 |
79 | # Scrapy stuff:
80 | .scrapy
81 |
82 | # Sphinx documentation
83 | docs/_build/
84 |
85 | # PyBuilder
86 | .pybuilder/
87 | target/
88 |
89 | # Jupyter Notebook
90 | .ipynb_checkpoints
91 |
92 | # IPython
93 | profile_default/
94 | ipython_config.py
95 |
96 | # pyenv
97 | # For a library or package, you might want to ignore these files since the code is
98 | # intended to run in multiple environments; otherwise, check them in:
99 | # .python-version
100 |
101 | # pipenv
102 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
103 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
104 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
105 | # install all needed dependencies.
106 | #Pipfile.lock
107 |
108 | # poetry
109 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
110 | # This is especially recommended for binary packages to ensure reproducibility, and is more
111 | # commonly ignored for libraries.
112 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
113 | #poetry.lock
114 |
115 | # pdm
116 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
117 | #pdm.lock
118 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
119 | # in version control.
120 | # https://pdm.fming.dev/#use-with-ide
121 | .pdm.toml
122 |
123 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
124 | __pypackages__/
125 |
126 | # Celery stuff
127 | celerybeat-schedule
128 | celerybeat.pid
129 |
130 | # SageMath parsed files
131 | *.sage.py
132 |
133 | # Environments
134 | .env
135 | .venv
136 | env/
137 | venv/
138 | ENV/
139 | env.bak/
140 | venv.bak/
141 |
142 | # Spyder project settings
143 | .spyderproject
144 | .spyproject
145 |
146 | # Rope project settings
147 | .ropeproject
148 |
149 | # mkdocs documentation
150 | /site
151 |
152 | # mypy
153 | .mypy_cache/
154 | .dmypy.json
155 | dmypy.json
156 |
157 | # Pyre type checker
158 | .pyre/
159 |
160 | # pytype static type analyzer
161 | .pytype/
162 |
163 | # Cython debug symbols
164 | cython_debug/
165 |
166 | # PyCharm
167 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
168 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
169 | # and can be added to the global gitignore or merged into this file. For a more nuclear
170 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
171 | #.idea/
172 |
--------------------------------------------------------------------------------
/tabletop_handybot/tabletop_handybot/openai_assistant.py:
--------------------------------------------------------------------------------
1 | from openai import OpenAI # pylint: disable=import-self
2 | from openai.types.beta import Assistant
3 |
4 | # pylint: disable=line-too-long
5 |
6 |
7 | def get_or_create_assistant(client: OpenAI,
8 | assistant_id: str = "") -> Assistant:
9 | if assistant_id:
10 | return client.beta.assistants.retrieve(assistant_id)
11 |
12 | return client.beta.assistants.create(
13 | name="Tabletop Assistant",
14 | instructions=(
15 | "You are a robot arm mounted on a table. Write and run code to "
16 | "do tasks on the table. You can only pick up one object at a time."
17 | ),
18 | model="gpt-4o",
19 | temperature=0.01,
20 | tools=[{
21 | "type": "function",
22 | "function": {
23 | "name": "detect_objects",
24 | "description":
25 | "Detect objects in the field of view of the camera",
26 | "parameters": {
27 | "type": "object",
28 | "properties": {
29 | "object_classes": {
30 | "type":
31 | "string",
32 | "description":
33 | ("Object classes to detect, comma separated"
34 | "For example: horses,rivers,plain"),
35 | }
36 | },
37 | "required": ["object_classes"],
38 | },
39 | },
40 | }, {
41 | "type": "function",
42 | "function": {
43 | "name": "pick_object",
44 | "description":
45 | "Pick up an object from the output of get_objects.",
46 | "parameters": {
47 | "type": "object",
48 | "properties": {
49 | "object_index": {
50 | "type":
51 | "integer",
52 | "description":
53 | "index of target object in the detected objects list to execute the action for."
54 | }
55 | },
56 | "required": ["object_index"]
57 | }
58 | }
59 | }, {
60 | "type": "function",
61 | "function": {
62 | "name": "move_above_object_and_release",
63 | "description":
64 | "move the end effector above the object and release the gripper. The object is from the output of get_objects",
65 | "parameters": {
66 | "type": "object",
67 | "properties": {
68 | "object_index": {
69 | "type":
70 | "integer",
71 | "description":
72 | "index of target object in the detected objects list to execute the action for."
73 | }
74 | },
75 | "required": ["object_index"]
76 | }
77 | }
78 | }, {
79 | "type": "function",
80 | "function": {
81 | "name": "release_gripper",
82 | "description": "Open up the gripper",
83 | "parameters": {
84 | "type": "object",
85 | "properties": {},
86 | "required": []
87 | }
88 | }
89 | }, {
90 | "type": "function",
91 | "function": {
92 | "name": "flick_wrist_while_release",
93 | "description":
94 | "Flick the wrist while releasing the gripper, basically tossing the object.",
95 | "parameters": {
96 | "type": "object",
97 | "properties": {},
98 | "required": []
99 | }
100 | }
101 | }],
102 | )
103 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Tabletop HandyBot
2 |
3 | A low-cost AI powered robotic arm assistant that listens to your voice commands
4 | and can carry out a variety of tabletop tasks. Currently it can pick and place
5 | arbitrary objects presented in front of it. The total BOM is around $2300 USD.
6 |
7 | ## Video Demo
8 |
9 |
10 |
11 | | Rolling Dice | Picking Markers |
12 | | :--------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------: |
13 | | [](https://youtube.com/shorts/xAmPpvCZZU4) | [](https://youtube.com/shorts/fsCkS7HvgGo) |
14 |
15 |
16 |
17 | ## System Architecture
18 |
19 | 
20 |
21 | ### Technology Used
22 |
23 | - [AR4 Robot](https://www.anninrobotics.com/): a $2000 robot arm that can lift 5lb dumbbells
24 | - [ar4_ros_driver](https://github.com/ycheng517/ar4_ros_driver): ROS driver for the arm
25 | - [Realsense D435](https://store.intelrealsense.com/buy-intel-realsense-depth-camera-d435.html?_ga=2.156205052.639062113.1715864879-1357777019.1715864879): a reliable $300 RGBD camera
26 | - [realsense-ros](https://github.com/IntelRealSense/realsense-ros): ROS wrapper for the camera
27 | - [Whisper Mic](https://github.com/mallorbc/whisper_mic): Convenient library for using OpenAI Whisper with a microphone
28 | - [Grounding DINO](https://github.com/IDEA-Research/Grounded-Segment-Anything/tree/main/GroundingDINO) A SOTA zero-shot object detector that can detect any object.
29 | - [Segment Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything/tree/main/segment_anything): A SOTA zero-shot object segmentation model that can segment any object.
30 | - [OpenAI Assistants](https://platform.openai.com/docs/assistants/overview): an API that calls ChatGPT with specific instructions and function calls
31 |
32 | ## Setup
33 |
34 | ### Pre-requisites
35 |
36 | You should have [ROS 2 Iron](https://docs.ros.org/en/iron/index.html) installed on Ubuntu 22.04.
37 |
38 | The follow hardware is is used. The use of other hardware will require adapting the codebase.
39 |
40 | - [AR4 robot arm](https://www.anninrobotics.com/) with [AR servo gripper](https://www.anninrobotics.com/product-page/servo-gripper-parts-kit)
41 | - [Intel RealSense D435 Depth Camera](https://www.intelrealsense.com/depth-camera-d435/)
42 |
43 | You should be able to run [ar4_ros_driver](https://github.com/ycheng517/ar4_ros_driver)
44 | with the gripper, and successfully perform hand-eye calibration.
45 |
46 | ### Install
47 |
48 | Import dependent repos
49 |
50 | ```bash
51 | vcs import . --input tabletop-handybot.repos
52 | ```
53 |
54 | Create a virtual environment, i.e.
55 |
56 | ```bash
57 | pyenv virtualenv 3.10.12 handybot
58 | ```
59 |
60 | Go to the `./Grounded-Segment-Anything/Grounded-Segment-Anything/` sub-directory
61 | and setup [Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything)
62 | in the virtualenv. Ensure you can run the [grounded_sam.ipynb](https://github.com/IDEA-Research/Grounded-Segment-Anything/blob/main/grounded_sam.ipynb)
63 | notebook.
64 |
65 | Install Python dependencies not in the ROS Index
66 |
67 | ```bash
68 | pip install -r requirements.txt
69 | ```
70 |
71 | Build and source the project
72 |
73 | ```bash
74 | colcon build
75 | source install/setup.bash
76 | ```
77 |
78 | ### Run
79 |
80 | Launch the AR4
81 |
82 | ```bash
83 | ros2 launch ar_hardware_interface ar_hardware.launch.py calibrate:=True include_gripper:=True
84 | ```
85 |
86 | Launch all other programs
87 |
88 | ```bash
89 | ros2 launch tabletop_handybot run.launch.py
90 | ```
91 |
92 | If all things are looking good, then you can try publishing a prompt to the
93 | `/prompt` topic for the robot to execute, i.e:
94 |
95 | ```bash
96 | ros2 topic pub --once /prompt std_msgs/msg/String "data: 'put the marker in the container'"
97 | ```
98 |
99 | If you have a microphone attached to the computer, you can publish a message
100 | to the `/listen` topic, and then say your prompt.
101 |
102 | ```bash
103 | ros2 topic pub --once /listen std_msgs/msg/Empty "{}"
104 | ```
105 |
--------------------------------------------------------------------------------
/tabletop_handybot/tabletop_handybot/tabletop_handybot_node.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import time
4 | from functools import cached_property
5 | from typing import List
6 |
7 | import cv2
8 | import numpy as np
9 | import open3d as o3d
10 | import openai
11 | import rclpy
12 | import supervision as sv
13 | import tf2_ros
14 | import torch
15 | import torchvision
16 | from cv_bridge import CvBridge
17 | from geometry_msgs.msg import Pose, PoseStamped
18 | from groundingdino.util.inference import Model
19 | from openai.types.beta import Assistant
20 | from openai.types.beta.threads import RequiredActionFunctionToolCall
21 | from rclpy.callback_groups import ReentrantCallbackGroup
22 | from rclpy.duration import Duration
23 | from rclpy.executors import MultiThreadedExecutor
24 | from rclpy.callback_groups import MutuallyExclusiveCallbackGroup
25 | from rclpy.node import Node
26 | from rclpy.time import Time
27 | from scipy.spatial.transform import Rotation
28 | from segment_anything import SamPredictor, sam_model_registry
29 | from sensor_msgs.msg import Image, PointCloud2
30 | from std_msgs.msg import Int64, String
31 | from sensor_msgs.msg import JointState
32 |
33 | from pymoveit2 import GripperInterface, MoveIt2
34 |
35 | from .openai_assistant import get_or_create_assistant
36 | from .point_cloud_conversion import point_cloud_to_msg
37 |
38 | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
39 |
40 | # GroundingDINO config and checkpoint
41 | GSA_PATH = "./Grounded-Segment-Anything/Grounded-Segment-Anything"
42 | GROUNDING_DINO_CONFIG_PATH = os.path.join(
43 | GSA_PATH, "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py")
44 | GROUNDING_DINO_CHECKPOINT_PATH = os.path.join(GSA_PATH,
45 | "groundingdino_swint_ogc.pth")
46 |
47 | # Segment-Anything checkpoint
48 | SAM_ENCODER_VERSION = "vit_h"
49 | SAM_CHECKPOINT_PATH = os.path.join(GSA_PATH, "sam_vit_h_4b8939.pth")
50 |
51 | # Predict classes and hyper-param for GroundingDINO
52 | BOX_THRESHOLD = 0.4
53 | TEXT_THRESHOLD = 0.25
54 | NMS_THRESHOLD = 0.8
55 |
56 |
57 | # Prompting SAM with detected boxes
58 | def segment(sam_predictor: SamPredictor, image: np.ndarray,
59 | xyxy: np.ndarray) -> np.ndarray:
60 | sam_predictor.set_image(image)
61 | result_masks = []
62 | for box in xyxy:
63 | masks, scores, _ = sam_predictor.predict(box=box,
64 | multimask_output=True)
65 | index = np.argmax(scores)
66 | result_masks.append(masks[index])
67 | return np.array(result_masks)
68 |
69 |
70 | class TabletopHandyBotNode(Node):
71 | """Main ROS 2 node for Tabletop HandyBot."""
72 |
73 | # TODO: make args rosparams
74 | def __init__(
75 | self,
76 | annotate: bool = False,
77 | publish_point_cloud: bool = False,
78 | assistant_id: str = "",
79 | # Adjust these offsets to your needs:
80 | offset_x: float = 0.015,
81 | offset_y: float = -0.015,
82 | offset_z: float = 0.08, # accounts for the height of the gripper
83 | ):
84 | super().__init__("tabletop_handybot_node")
85 |
86 | self.logger = self.get_logger()
87 |
88 | self.cv_bridge = CvBridge()
89 | self.gripper_joint_name = "gripper_joint"
90 | callback_group = ReentrantCallbackGroup()
91 | # Create MoveIt 2 interface
92 | self.arm_joint_names = [
93 | "joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6"
94 | ]
95 | self.moveit2 = MoveIt2(
96 | node=self,
97 | joint_names=self.arm_joint_names,
98 | base_link_name="base_link",
99 | end_effector_name="link_6",
100 | group_name="ar_manipulator",
101 | callback_group=callback_group,
102 | )
103 | self.moveit2.planner_id = "RRTConnectkConfigDefault"
104 | self.gripper_interface = GripperInterface(
105 | node=self,
106 | gripper_joint_names=["gripper_jaw1_joint"],
107 | open_gripper_joint_positions=[-0.012],
108 | closed_gripper_joint_positions=[0.0],
109 | gripper_group_name="ar_gripper",
110 | callback_group=callback_group,
111 | gripper_command_action_name="/gripper_controller/gripper_cmd",
112 | )
113 | self.tf_buffer = tf2_ros.Buffer()
114 | self.tf_listener = tf2_ros.TransformListener(self.tf_buffer, self)
115 |
116 | self.grounding_dino_model = Model(
117 | model_config_path=GROUNDING_DINO_CONFIG_PATH,
118 | model_checkpoint_path=GROUNDING_DINO_CHECKPOINT_PATH,
119 | )
120 | self.sam = sam_model_registry[SAM_ENCODER_VERSION](
121 | checkpoint=SAM_CHECKPOINT_PATH)
122 | self.sam.to(device=DEVICE)
123 | self.sam_predictor = SamPredictor(self.sam)
124 | self.openai = openai.OpenAI()
125 | self.assistant: Assistant = get_or_create_assistant(
126 | self.openai, assistant_id)
127 | self.logger.info(f"Loaded assistant with ID: {self.assistant.id}")
128 |
129 | self.annotate = annotate
130 | self.publish_point_cloud = publish_point_cloud
131 | self.n_frames_processed = 0
132 | self._last_depth_msg = None
133 | self._last_rgb_msg = None
134 | self.arm_joint_state: JointState | None = None
135 | self._last_detections: sv.Detections | None = None
136 | self._object_in_gripper: bool = False
137 | self.gripper_squeeze_factor = 0.5
138 | self.offset_x = offset_x
139 | self.offset_y = offset_y
140 | self.offset_z = offset_z
141 |
142 | self.image_sub = self.create_subscription(Image,
143 | "/camera/color/image_raw",
144 | self.image_callback, 10)
145 | self.depth_sub = self.create_subscription(
146 | Image, "/camera/aligned_depth_to_color/image_raw",
147 | self.depth_callback, 10)
148 | self.joint_states_sub = self.create_subscription(
149 | JointState, "/joint_states", self.joint_states_callback, 10)
150 |
151 | if self.publish_point_cloud:
152 | self.point_cloud_pub = self.create_publisher(
153 | PointCloud2, "/point_cloud", 2)
154 | self.prompt_sub = self.create_subscription(
155 | String,
156 | "/prompt",
157 | self.start,
158 | 10,
159 | callback_group=MutuallyExclusiveCallbackGroup())
160 | self.save_images_sub = self.create_subscription(
161 | String, "/save_images", self.save_images, 10)
162 | self.detect_objects_sub = self.create_subscription(
163 | String, "/detect_objects", self.detect_objects_cb, 10)
164 | self.release_at_sub = self.create_subscription(Int64, "/release_above",
165 | self.release_above_cb,
166 | 10)
167 | self.pick_object_sub = self.create_subscription(
168 | Int64, "/pick_object", self.pick_object_cb, 10)
169 |
170 | self.logger.info("Tabletop HandyBot node initialized.")
171 |
172 | def execution_success(self, tool_call: RequiredActionFunctionToolCall,
173 | tool_outputs: List[dict]) -> None:
174 | output_dict = {"success": True}
175 | tool_outputs.append({
176 | "tool_call_id": tool_call.id,
177 | "output": json.dumps(output_dict),
178 | })
179 |
180 | def execution_failure(self, tool_call: RequiredActionFunctionToolCall,
181 | tool_outputs: List[dict], failure_reason: str):
182 | output_dict = {
183 | "success": False,
184 | "failure_reason": failure_reason,
185 | }
186 | tool_outputs.append({
187 | "tool_call_id": tool_call.id,
188 | "output": json.dumps(output_dict),
189 | })
190 |
191 | def handle_tool_call(self, tool_call: RequiredActionFunctionToolCall,
192 | rgb_image: np.ndarray, depth_image: np.ndarray,
193 | tool_outputs: List[dict]):
194 | if tool_call.function.name == "detect_objects":
195 | args = json.loads(tool_call.function.arguments)
196 | classes_str = args["object_classes"]
197 | classes = classes_str.split(",")
198 | self._last_detections = self.detect_objects(rgb_image, classes)
199 | detected_classes = [
200 | classes[class_id]
201 | for class_id in self._last_detections.class_id
202 | ]
203 | self.logger.info(f"Detected {detected_classes}.")
204 | self.logger.info(
205 | f"detection confidence: {self._last_detections.confidence}")
206 | output_dict = {
207 | "object_classes": detected_classes,
208 | }
209 | tool_outputs.append({
210 | "tool_call_id": tool_call.id,
211 | "output": json.dumps(output_dict),
212 | })
213 | elif tool_call.function.name == "pick_object":
214 | args = json.loads(tool_call.function.arguments)
215 | if self._last_detections is None:
216 | self.execution_failure(tool_call, tool_outputs,
217 | "No detections available.")
218 | return
219 |
220 | if self._object_in_gripper:
221 | self.execution_failure(
222 | tool_call, tool_outputs,
223 | "Gripper is already holding an object.")
224 | return
225 |
226 | if args["object_index"] >= len(self._last_detections.mask):
227 | self.execution_failure(
228 | tool_call, tool_outputs,
229 | ("Invalid object index. Only "
230 | f"{len(self._last_detections.mask)} objects detected."))
231 | return
232 |
233 | self.pick_object(args["object_index"], self._last_detections,
234 | depth_image)
235 | self.logger.info(
236 | f"done picking object. Joint states: {self.arm_joint_state.position}"
237 | )
238 | self._object_in_gripper = True
239 | self.execution_success(tool_call, tool_outputs)
240 | elif tool_call.function.name == "move_above_object_and_release":
241 | args = json.loads(tool_call.function.arguments)
242 | if self._last_detections is None:
243 | self.execution_failure(tool_call, tool_outputs,
244 | "No detections available.")
245 | return
246 |
247 | if args["object_index"] >= len(self._last_detections.mask):
248 | self.execution_failure(
249 | tool_call, tool_outputs,
250 | ("Invalid object index. Only "
251 | f"{len(self._last_detections.mask)} objects detected."))
252 | return
253 |
254 | self.release_above(args["object_index"], self._last_detections,
255 | depth_image)
256 | self._object_in_gripper = False
257 | self.execution_success(tool_call, tool_outputs)
258 | elif tool_call.function.name == "release_gripper":
259 | self.release_gripper()
260 | self.execution_success(tool_call, tool_outputs)
261 | self._object_in_gripper = False
262 | elif tool_call.function.name == "flick_wrist_while_release":
263 | self.flick_wrist_while_release()
264 | self.execution_success(tool_call, tool_outputs)
265 | self._object_in_gripper = False
266 |
267 | def start(self, msg: String):
268 | if not self._last_rgb_msg or not self._last_depth_msg:
269 | return
270 |
271 | rgb_image = self.cv_bridge.imgmsg_to_cv2(self._last_rgb_msg)
272 | depth_image = self.cv_bridge.imgmsg_to_cv2(self._last_depth_msg)
273 | self._last_detections = None
274 |
275 | self.logger.info(f"Processing: {msg.data}")
276 | self.logger.info(
277 | f"Initial Joint states: {self.arm_joint_state.position}")
278 | thread = self.openai.beta.threads.create()
279 | message = self.openai.beta.threads.messages.create(
280 | thread_id=thread.id,
281 | role="user",
282 | content=msg.data,
283 | )
284 | self.logger.info(str(message))
285 |
286 | run = self.openai.beta.threads.runs.create_and_poll(
287 | thread_id=thread.id,
288 | assistant_id=self.assistant.id,
289 | )
290 |
291 | done = False
292 | while not done:
293 | if run.status == "completed":
294 | messages = self.openai.beta.threads.messages.list(
295 | thread_id=thread.id)
296 | self.logger.info(str(messages))
297 | done = True
298 | break
299 | else:
300 | self.logger.info(run.status)
301 |
302 | tool_outputs = []
303 | for tool_call in run.required_action.submit_tool_outputs.tool_calls:
304 | self.logger.info(f"tool_call: {tool_call}")
305 | if tool_call.type == "function":
306 | self.handle_tool_call(tool_call, rgb_image, depth_image,
307 | tool_outputs)
308 |
309 | self.logger.info(f"tool_outputs: {tool_outputs}")
310 | if tool_outputs:
311 | try:
312 | run = self.openai.beta.threads.runs.submit_tool_outputs_and_poll(
313 | thread_id=thread.id,
314 | run_id=run.id,
315 | tool_outputs=tool_outputs)
316 | self.logger.info("Tool outputs submitted successfully.")
317 | except Exception as e: # pylint: disable=broad-except
318 | self.logger.error(f"Failed to submit tool outputs: {e}")
319 | else:
320 | self.logger.info("No tool outputs to submit.")
321 |
322 | self.go_home()
323 | self.logger.info("Task completed.")
324 |
325 | def detect_objects(self, image: np.ndarray, object_classes: List[str]):
326 | self.logger.info(f"Detecting objects of classes: {object_classes}")
327 | detections: sv.Detections = self.grounding_dino_model.predict_with_classes(
328 | image=image,
329 | classes=object_classes,
330 | box_threshold=BOX_THRESHOLD,
331 | text_threshold=TEXT_THRESHOLD,
332 | )
333 |
334 | # NMS post process
335 | nms_idx = (torchvision.ops.nms(
336 | torch.from_numpy(detections.xyxy),
337 | torch.from_numpy(detections.confidence),
338 | NMS_THRESHOLD,
339 | ).numpy().tolist())
340 |
341 | detections.xyxy = detections.xyxy[nms_idx]
342 | detections.confidence = detections.confidence[nms_idx]
343 | detections.class_id = detections.class_id[nms_idx]
344 |
345 | detections.mask = segment(
346 | sam_predictor=self.sam_predictor,
347 | image=cv2.cvtColor(image, cv2.COLOR_BGR2RGB),
348 | xyxy=detections.xyxy,
349 | )
350 |
351 | if self.annotate:
352 | box_annotator = sv.BoxAnnotator()
353 | mask_annotator = sv.MaskAnnotator()
354 | labels = [
355 | f"{object_classes[class_id]} {confidence:0.2f}"
356 | for _, _, confidence, class_id, _, _ in detections
357 | ]
358 | annotated_frame = box_annotator.annotate(scene=image.copy(),
359 | detections=detections,
360 | labels=labels)
361 | cv2.imwrite(
362 | f"annotated_image_detections_{self.n_frames_processed}.jpg",
363 | annotated_frame)
364 |
365 | annotated_frame = mask_annotator.annotate(scene=image.copy(),
366 | detections=detections)
367 | cv2.imwrite(f"annotated_image_masks_{self.n_frames_processed}.jpg",
368 | annotated_frame)
369 |
370 | if self.publish_point_cloud:
371 | depth_image = self.cv_bridge.imgmsg_to_cv2(self._last_depth_msg)
372 | # mask out the depth image except for the detected objects
373 | masked_depth_image = np.zeros_like(depth_image, dtype=np.float32)
374 | for mask in detections.mask:
375 | masked_depth_image[mask] = depth_image[mask]
376 | masked_depth_image /= 1000.0
377 |
378 | # convert the masked depth image to a point cloud
379 | pcd = o3d.geometry.PointCloud.create_from_depth_image(
380 | o3d.geometry.Image(masked_depth_image),
381 | o3d.camera.PinholeCameraIntrinsic(
382 | o3d.camera.PinholeCameraIntrinsicParameters.
383 | PrimeSenseDefault),
384 | )
385 |
386 | # convert it to a ROS PointCloud2 message
387 | points = np.asarray(pcd.points)
388 | pc_msg = point_cloud_to_msg(points, "/camera_color_frame")
389 | self.point_cloud_pub.publish(pc_msg)
390 |
391 | self.n_frames_processed += 1
392 | return detections
393 |
394 | def pick_object(self, object_index: int, detections: sv.Detections,
395 | depth_image: np.ndarray):
396 | """Perform a top-down grasp on the object."""
397 | # mask out the depth image except for the detected objects
398 | masked_depth_image = np.zeros_like(depth_image, dtype=np.float32)
399 | mask = detections.mask[object_index]
400 | masked_depth_image[mask] = depth_image[mask]
401 | masked_depth_image /= 1000.0
402 |
403 | # convert the masked depth image to a point cloud
404 | pcd = o3d.geometry.PointCloud.create_from_depth_image(
405 | o3d.geometry.Image(masked_depth_image),
406 | o3d.camera.PinholeCameraIntrinsic(
407 | o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault),
408 | )
409 | pcd.transform(self.cam_to_base_affine)
410 | points = np.asarray(pcd.points)
411 | grasp_z = points[:, 2].max()
412 |
413 | near_grasp_z_points = points[points[:, 2] > grasp_z - 0.008]
414 | xy_points = near_grasp_z_points[:, :2]
415 | xy_points = xy_points.astype(np.float32)
416 | center, dimensions, theta = cv2.minAreaRect(xy_points)
417 |
418 | gripper_rotation = theta
419 | if dimensions[0] > dimensions[1]:
420 | gripper_rotation -= 90
421 | if gripper_rotation < -90:
422 | gripper_rotation += 180
423 | elif gripper_rotation > 90:
424 | gripper_rotation -= 180
425 |
426 | gripper_opening = min(dimensions)
427 | grasp_pose = Pose()
428 | grasp_pose.position.x = center[0] + self.offset_x
429 | grasp_pose.position.y = center[1] + self.offset_y
430 | grasp_pose.position.z = grasp_z + self.offset_z
431 | top_down_rot = Rotation.from_quat([0, 1, 0, 0])
432 | extra_rot = Rotation.from_euler("z", gripper_rotation, degrees=True)
433 | grasp_quat = (extra_rot * top_down_rot).as_quat()
434 | grasp_pose.orientation.x = grasp_quat[0]
435 | grasp_pose.orientation.y = grasp_quat[1]
436 | grasp_pose.orientation.z = grasp_quat[2]
437 | grasp_pose.orientation.w = grasp_quat[3]
438 | self.grasp_at(grasp_pose, gripper_opening)
439 |
440 | def grasp_at(self, msg: Pose, gripper_opening: float):
441 | self.logger.info(f"Grasp at: {msg} with opening: {gripper_opening}")
442 |
443 | self.gripper_interface.open()
444 | self.gripper_interface.wait_until_executed()
445 |
446 | # move 5cm above the item first
447 | msg.position.z += 0.05
448 | self.move_to(msg)
449 | time.sleep(0.05)
450 |
451 | # grasp the item
452 | msg.position.z -= 0.05
453 | self.move_to(msg)
454 | time.sleep(0.05)
455 |
456 | gripper_pos = -gripper_opening / 2. * self.gripper_squeeze_factor
457 | gripper_pos = min(gripper_pos, 0.0)
458 | self.gripper_interface.move_to_position(gripper_pos)
459 | self.gripper_interface.wait_until_executed()
460 |
461 | # lift the item
462 | msg.position.z += 0.12
463 | self.move_to(msg)
464 | time.sleep(0.05)
465 |
466 | def release_above(self, object_index: int, detections: sv.Detections,
467 | depth_image: np.ndarray):
468 | """Move the robot arm above the object and release the gripper."""
469 | masked_depth_image = np.zeros_like(depth_image, dtype=np.float32)
470 | mask = detections.mask[object_index]
471 | masked_depth_image[mask] = depth_image[mask]
472 | masked_depth_image /= 1000.0
473 |
474 | # convert the masked depth image to a point cloud
475 | pcd = o3d.geometry.PointCloud.create_from_depth_image(
476 | o3d.geometry.Image(masked_depth_image),
477 | o3d.camera.PinholeCameraIntrinsic(
478 | o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault),
479 | )
480 | pcd.transform(self.cam_to_base_affine)
481 |
482 | points = np.asarray(pcd.points).astype(np.float32)
483 | # release 5cm above the object
484 | drop_z = np.percentile(points[:, 2], 95) + 0.05
485 | median_z = np.median(points[:, 2])
486 |
487 | xy_points = points[points[:, 2] > median_z, :2]
488 | xy_points = xy_points.astype(np.float32)
489 | center, _, _ = cv2.minAreaRect(xy_points)
490 |
491 | drop_pose = Pose()
492 | drop_pose.position.x = center[0] + self.offset_x
493 | drop_pose.position.y = center[1] + self.offset_y
494 | drop_pose.position.z = drop_z + self.offset_z
495 | # Straight down pose
496 | drop_pose.orientation.x = 0.0
497 | drop_pose.orientation.y = 1.0
498 | drop_pose.orientation.z = 0.0
499 | drop_pose.orientation.w = 0.0
500 |
501 | self.release_at(drop_pose)
502 |
503 | def release_gripper(self):
504 | self.gripper_interface.open()
505 | self.gripper_interface.wait_until_executed()
506 |
507 | def flick_wrist_while_release(self):
508 | joint_positions = self.arm_joint_state.position
509 | joint_positions[4] -= np.deg2rad(25)
510 | self.moveit2.move_to_configuration(joint_positions,
511 | self.arm_joint_names,
512 | tolerance=0.005)
513 | time.sleep(3)
514 |
515 | self.gripper_interface.open()
516 | self.gripper_interface.wait_until_executed()
517 | self.moveit2.wait_until_executed()
518 |
519 | def go_home(self):
520 | joint_positions = [0., 0., 0., 0., 0., 0.]
521 | self.moveit2.move_to_configuration(joint_positions,
522 | self.arm_joint_names,
523 | tolerance=0.005)
524 | self.moveit2.wait_until_executed()
525 |
526 | @cached_property
527 | def cam_to_base_affine(self):
528 | cam_to_base_link_tf = self.tf_buffer.lookup_transform(
529 | target_frame="base_link",
530 | source_frame="camera_color_frame",
531 | time=Time(),
532 | timeout=Duration(seconds=5))
533 | cam_to_base_rot = Rotation.from_quat([
534 | cam_to_base_link_tf.transform.rotation.x,
535 | cam_to_base_link_tf.transform.rotation.y,
536 | cam_to_base_link_tf.transform.rotation.z,
537 | cam_to_base_link_tf.transform.rotation.w,
538 | ])
539 | cam_to_base_pos = np.array([
540 | cam_to_base_link_tf.transform.translation.x,
541 | cam_to_base_link_tf.transform.translation.y,
542 | cam_to_base_link_tf.transform.translation.z,
543 | ])
544 | affine = np.eye(4)
545 | affine[:3, :3] = cam_to_base_rot.as_matrix()
546 | affine[:3, 3] = cam_to_base_pos
547 | return affine
548 |
549 | def move_to(self, msg: Pose):
550 | pose_goal = PoseStamped()
551 | pose_goal.header.frame_id = "base_link"
552 | pose_goal.pose = msg
553 |
554 | self.moveit2.move_to_pose(pose=pose_goal)
555 | self.moveit2.wait_until_executed()
556 |
557 | def release_at(self, msg: Pose):
558 | # NOTE: straight down is wxyz 0, 0, 1, 0
559 | # good pose is 0, -0.3, 0.35
560 | self.logger.info(f"Releasing at: {msg}")
561 | self.move_to(msg)
562 |
563 | self.gripper_interface.open()
564 | self.gripper_interface.wait_until_executed()
565 |
566 | def detect_objects_cb(self, msg: String):
567 | if self._last_rgb_msg is None:
568 | self.logger.warning("No RGB image available.")
569 | return
570 |
571 | class_names = msg.data.split(",")
572 | rgb_image = self.cv_bridge.imgmsg_to_cv2(self._last_rgb_msg)
573 | self._last_detections = self.detect_objects(rgb_image, class_names)
574 | detected_classes = [
575 | class_names[class_id]
576 | for class_id in self._last_detections.class_id
577 | ]
578 | self.logger.info(f"Detected objects: {detected_classes}")
579 |
580 | def pick_object_cb(self, msg: Int64):
581 | if self._last_detections is None or self._last_depth_msg is None:
582 | self.logger.warning("No detections or depth image available.")
583 | return
584 |
585 | depth_image = self.cv_bridge.imgmsg_to_cv2(self._last_depth_msg)
586 | self.pick_object(msg.data, self._last_detections, depth_image)
587 |
588 | def release_above_cb(self, msg: Int64):
589 | if self._last_detections is None or self._last_depth_msg is None:
590 | self.logger.warning("No detections or depth image available.")
591 | return
592 |
593 | depth_image = self.cv_bridge.imgmsg_to_cv2(self._last_depth_msg)
594 | self.release_above(msg.data, self._last_detections, depth_image)
595 |
596 | def depth_callback(self, msg):
597 | self._last_depth_msg = msg
598 |
599 | def image_callback(self, msg):
600 | self._last_rgb_msg = msg
601 |
602 | def joint_states_callback(self, msg: JointState):
603 | joint_state = JointState()
604 | joint_state.header = msg.header
605 | for name in self.arm_joint_names:
606 | for i, joint_state_joint_name in enumerate(msg.name):
607 | if name == joint_state_joint_name:
608 | joint_state.name.append(name)
609 | joint_state.position.append(msg.position[i])
610 | joint_state.velocity.append(msg.velocity[i])
611 | joint_state.effort.append(msg.effort[i])
612 |
613 | self.arm_joint_state = joint_state
614 | # self.logger.info(f"joint_state in: {msg}")
615 | # self.logger.info(f"joint_state out: {self.arm_joint_state}")
616 |
617 | def save_images(self, msg):
618 | if not self._last_rgb_msg or not self._last_depth_msg:
619 | return
620 |
621 | rgb_image = self.cv_bridge.imgmsg_to_cv2(self._last_rgb_msg)
622 | depth_image = self.cv_bridge.imgmsg_to_cv2(self._last_depth_msg)
623 |
624 | save_dir = msg.data
625 | cv2.imwrite(
626 | os.path.join(save_dir, f"rgb_image_{self.n_frames_processed}.png"),
627 | rgb_image)
628 | np.save(
629 | os.path.join(save_dir, f"depth_image_{self.n_frames_processed}"),
630 | depth_image)
631 |
632 |
633 | def main():
634 | rclpy.init()
635 | node = TabletopHandyBotNode()
636 | executor = MultiThreadedExecutor(4)
637 | executor.add_node(node)
638 | try:
639 | executor.spin()
640 | except KeyboardInterrupt:
641 | pass
642 |
643 | rclpy.shutdown()
644 |
645 |
646 | if __name__ == "__main__":
647 | main()
648 |
--------------------------------------------------------------------------------