├── .gitignore
├── Hardware
    └── 3D_Printing
    │   ├── Beam.STL
    │   ├── CameraMount.STL
    │   └── Elevating.STL
├── LICENSE
├── README.md
├── README_zh-CN.md
├── config.yaml
├── requirements.txt
├── resources
    ├── WeChat
    │   ├── WeChat03.png
    │   ├── WechatIMG2.jpg
    │   └── WechatIMG3.jpg
    ├── eight_skills.gif
    ├── joystick_mapping.png
    └── robomatrix-logo3.png
├── robomaster_driver
    ├── robomaster_msgs
    │   ├── CMakeLists.txt
    │   ├── msg
    │   │   ├── ChassisAttitute.msg
    │   │   ├── ChassisEsc.msg
    │   │   ├── ChassisIMU.msg
    │   │   ├── ChassisPosition.msg
    │   │   ├── ChassisSingleEsc.msg
    │   │   ├── GripperStatus.msg
    │   │   ├── RoboticArmPosition.msg
    │   │   ├── ServoSingleStatus.msg
    │   │   └── ServoStatus.msg
    │   └── package.xml
    └── robomaster_ros
    │   ├── launch
    │       └── collect_data.launch.py
    │   ├── package.xml
    │   ├── resource
    │       └── robomaster_ros
    │   ├── robomaster_ros
    │       ├── __init__.py
    │       ├── data_collection.py
    │       └── teleoperation.py
    │   ├── setup.cfg
    │   └── setup.py
├── robomatrix_client
    ├── package.xml
    ├── resource
    │   └── robomatrix_client
    ├── robomatrix_client
    │   ├── __init__.py
    │   ├── agent_propmts.py
    │   ├── agent_skills.py
    │   ├── audio2text.py
    │   └── robomatrix_client.py
    ├── setup.cfg
    └── setup.py
├── robomatrix_interface
    ├── CMakeLists.txt
    ├── package.xml
    └── srv
    │   ├── Audio.srv
    │   └── Vla.srv
├── robomatrix_server
    ├── package.xml
    ├── resource
    │   └── robomatrix_server
    ├── robomatrix_server
    │   ├── __init__.py
    │   ├── resource
    │   │   ├── __init__.py
    │   │   ├── funcs.py
    │   │   ├── inference_multistage.py
    │   │   └── utils.py
    │   └── vla_service.py
    ├── setup.cfg
    └── setup.py
└── teleoperation
    ├── joystick_driver
        ├── joystick_driver
        │   ├── __init__.py
        │   └── xbox_controller.py
        ├── package.xml
        ├── resource
        │   └── joystick_driver
        ├── setup.cfg
        └── setup.py
    └── joystick_msgs
        ├── CMakeLists.txt
        ├── msg
            ├── Axis.msg
            ├── Button.msg
            ├── Hat.msg
            └── XBox360Controller.msg
        └── package.xml


/.gitignore:
--------------------------------------------------------------------------------
 1 | README_zh-CN_old.md
 2 | README_old.md
 3 | .vscode
 4 | .DS_Store
 5 | *.pt
 6 | *.mp4
 7 | *.wav
 8 | __pycache__/
 9 | Grounding-DINO-1.5-API/
10 | 
11 | # ROS2
12 | build/
13 | install/
14 | log/
15 | 


--------------------------------------------------------------------------------
/Hardware/3D_Printing/Beam.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/Hardware/3D_Printing/Beam.STL


--------------------------------------------------------------------------------
/Hardware/3D_Printing/CameraMount.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/Hardware/3D_Printing/CameraMount.STL


--------------------------------------------------------------------------------
/Hardware/3D_Printing/Elevating.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/Hardware/3D_Printing/Elevating.STL


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 ADG@PJLab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 |     <img src="resources\robomatrix-logo3.png" alt="logo" width="70%">
  3 | </div>
  4 | 
  5 | <div align="center">
  6 | 
  7 | English | [简体中文](README_zh-CN.md)
  8 | 
  9 | </div>
 10 | 
 11 | # RoboMatrix: A Skill-centric Hierarchical Framework for Scalable Robot Task Planning and Execution in Open-World
 12 | 
 13 | ## 📝[Paper](https://arxiv.org/abs/2412.00171) | 🌍[Project Page](https://robo-matrix.github.io/) | 🛢️[Data](https://huggingface.co/datasets/WayneMao/RoboMatrix)
 14 |   
 15 | <!-- ![eight_skills](resources/eight_skills.gif) -->
 16 | <div align="center">
 17 |     <img src="resources/eight_skills.gif" alt="demo" width="95%">
 18 | </div>
 19 | 
 20 | <!-- ## 📰 News -->
 21 | 
 22 | ## 📰 Release
 23 | 
 24 | - [2025/01/08] 🔥 We release [data collection](https://github.com/WayneMao/RoboMatrix?tab=readme-ov-file#data-collection) method.
 25 | - [2024/12/04] 🔥 We release the RoboMatrix supervised fine-tuning (SFT) dataset containing 1,500 high-quality human-annotated demonstration videos.
 26 | 
 27 | ## Demos
 28 | ### Dynamic Adversarial Interaction
 29 | <!-- <video src="https://robo-matrix.github.io/static/videos/crossing_obstacles_with_disturbance.mp4" controls="controls" style="max-width: 100%; height: auto;">
 30 | </video> -->
 31 | <!-- https://private-user-images.githubusercontent.com/35285052/392064253-ff1d7e2a-8650-430d-a60e-15bffcc237e2.mp4 -->
 32 | https://private-user-images.githubusercontent.com/35285052/392642975-b78e28aa-45c2-4bb0-9e70-b6a08c678f85.mp4
 33 | 
 34 | 
 35 | ## Hardware Preparation
 36 | We use robots from DJI’s **RoboMaster** series as the hardware platform, including the Engineering Robot (EP) and
 37 | the Warrior Robot (S1). These two forms of robots share some common components, including the mobile chassis, monocular RGB camera, audio module, and controller. Additionally, each robot is equipped with a unique set of components to perform specific tasks, such as the target shooting capability of the S1 robot and the target grasping capability of the EP robot.
 38 | 
 39 | We modified the EP robot by mounting the camera above the robot to prevent the camera’s viewpoint from changing with the movement of the robotic arm. See [3D_Printing](Hardware/3D_Printing) for the parts of the designed camera mount.
 40 | 
 41 | We use **BEITONG ASURA 2PRO+ GAMEPAD NEARLINK VERSION** as the controller for robot teleoperation.
 42 | 
 43 | ## Environment Configuration
 44 | We developed RoboMatrix using the ROS2 framework on Ubuntu 20.04. You can follow the [official installation guidance](https://docs.ros.org/en/foxy/Installation/Ubuntu-Install-Debians.html) to complete the installation of the Foxy distro of ROS2 and the necessary tools. In addition, we passed the test on Ubuntu 22.04 ([ROS2 Humble](https://docs.ros.org/en/humble/Installation/Ubuntu-Install-Debians.html)), which may provide some reference for you if you want to install RoboMatrix in a later version of Ubuntu.
 45 | 
 46 | We provide a general installation procedure for ROS2, this might give you some help. **If you already have it installed on your system, please skip this step.**
 47 | 
 48 | <details close>
 49 | <summary> <b> ROS2 Installation </b> </summary>
 50 | 
 51 | ### Step 1: Set UTF-8
 52 | Open a terminal, check weather your system supports UTF-8.
 53 | ```bash
 54 | locale
 55 | ```
 56 | If not support (no output in terminal), please install.
 57 | ```bash
 58 | sudo locale-gen en_US en_US.UTF-8
 59 | sudo update-locale LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8
 60 | export LANG=en_US.UTF-8
 61 | ```
 62 | 
 63 | ### Step 2: Set Ubuntu Universe
 64 | Open a terminal, check weather your system supports Ubuntu Universe.
 65 | ```bash
 66 | apt-cache policy | grep universe
 67 | ```
 68 | If not support (no output in terminal), please install.
 69 | ```bash
 70 | sudo apt install software-properties-common
 71 | sudo add-apt-repository universe
 72 | ```
 73 | 
 74 | ### Step 3: Add ROS2 software source and key
 75 | ```bash
 76 | sudo apt update && sudo apt install curl gnupg2 lsb-release
 77 | sudo curl -sSL https://raw.githubusercontent.com/ros/rosdistro/master/ros.key  -o /usr/share/keyrings/ros-archive-keyring.gpg
 78 | echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/ros-archive-keyring.gpg] http://packages.ros.org/ros2/ubuntu $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/ros2.list > /dev/null
 79 | ```
 80 | 
 81 | ### Step 4: Get ROS2
 82 | Install the specified version of ROS2, using **Foxy** as an example.
 83 | ```bash
 84 | sudo apt update
 85 | sudo apt install ros-foxy-desktop
 86 | ```
 87 | 
 88 | ### Step 5: Source bash
 89 | ```bash
 90 | echo "source /opt/ros/foxy/setup.bash" >> ~/.bashrc
 91 | source .bashrc
 92 | ```
 93 | 
 94 | ### Step 6: Test demo
 95 | Open a terminal, start talker node.
 96 | ```bash
 97 | ros2 run demo_nodes_cpp talker
 98 | ```
 99 | Open a new terminal, start listener node.
100 | ```bash
101 | ros2 run demo_nodes_cpp listener
102 | ```
103 | 
104 | ### Step 7: Install colcon
105 | ```bash
106 | sudo apt install python3-colcon-common-extensions
107 | ```
108 | 
109 | </details>
110 | 
111 | ## 🛠️ Installation
112 | ### Build workspace
113 | ```bash
114 | git clone https://github.com/WayneMao/RoboMatrix.git
115 | cd ~/RoboMatrix && colcon build
116 | ```
117 | 
118 | ### RoboMaster SDK
119 | Install dependencies.
120 | ```bash
121 | sudo apt install libopus-dev python3-pip
122 | python3 -m pip install -U numpy numpy-quaternion pyyaml
123 | ```
124 | Install SDK from source code.
125 | ```bash
126 | python3 -m pip install git+https://github.com/jeguzzi/RoboMaster-SDK.git
127 | python3 -m pip install git+https://github.com/jeguzzi/RoboMaster-SDK.git#"egg=libmedia_codec&subdirectory=lib/libmedia_codec"
128 | ```
129 | 
130 | ### Dependencies
131 | ```bash
132 | pip install -r requirements.txt
133 | pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118
134 | ```
135 | 
136 | ### Grounding-DINO-1.5-API
137 | ```bash
138 | cd ~/RoboMatrix/robomatrix_client/robomatrix_client
139 | git clone https://github.com/IDEA-Research/Grounding-DINO-1.5-API.git
140 | cd Grounding-DINO-1.5-API
141 | pip install -v -e .
142 | ```
143 | 
144 | ## Data Collection
145 | 
146 | Download the RoboMaster official APP, follow the instructions to connect the robot to WiFi (only WiFi5), and connect the computer to the same WiFi to complete the connection.
147 | 
148 | <div align=center>
149 | <img src="resources/joystick_mapping.png" alt="joystick_mapping" width=50%>
150 | </div>
151 | 
152 | ### Step 1: Start launch file
153 | ```bash
154 | source ~/RoboMatrixinstall/setup.bash
155 | ros2 launch robomaster_ros collect_data.launch.py name:=example idx:=1 dir:=~/RoboMatrixDatasets
156 | ```
157 | 
158 | <div align=center>
159 | 
160 | |Parameter |Definition |Example|
161 | |----- |----- |-----|
162 | |name |A custom task name |move_to_box|
163 | |idx |The sequence number of the current episode of the task |10|
164 | |dir |The folder where the data is saved | ~/MyDatasets|
165 | 
166 | </div>
167 | 
168 | ***NOTEs***
169 | 1. Make sure the robot is successfully connected to the specified WIFI before launching the launch file.
170 | 2. Make sure the controller's button mode is **XBOX**, which you can view in the terminal. In the case of **BEITONG**, long press the `POWER` button to switch.
171 | 3. Ensure that the robot initialization is complete before proceeding with the following operations.
172 | 
173 | ### Step 2: Start collecting
174 | 
175 | By pressing the `START` button, the robot's status begins to be recorded and the other buttons on the handle are activated, allowing control of the robot's movement.
176 | 
177 | ### Step 3: Control the robot
178 | 
179 | The control mode of the robot chassis is speed control. The `RS` axis controls the translation speed of the chassis, and the `LT` and `RT` axes control the rotation speed of the chassis.
180 | 
181 | The control mode of the robot arm is position control. The `HAT` key set changes the position of the end of the robot arm in the plane. Each press moves its position a fixed distance in the specified direction.
182 | 
183 | The gripper control is binarized. The `A` button controls the gripper open to the maximum, and the `B` button controls the gripper closed to the maximum.
184 | 
185 | ### Step 4: Save data
186 | 
187 | Press the `BACK` button to save the data, then press the `POWER` button to clean the ROS2 node and wait for the video to finish saving.
188 | 
189 | ## Dataset Construction
190 | 
191 | Comming soon.
192 | 
193 | ## Task Execution
194 | 
195 | Comming soon.
196 | 
197 | ## TODOs
198 | - [ ] Package Docker
199 | - [X] 🤗 Release Supervised Fine-tuning dataset
200 | - [x] Optimize VLA ROS communication
201 | - [ ] Open source VLA Skill model code
202 | - [ ] Release VLA Skill model weights
203 | - [ ] Open source Shooting code
204 | 
205 | ## Citation
206 | 
207 | If you find our work helpful, please cite us:
208 | 
209 | ```bibtex
210 | @article{mao2024robomatrix,
211 |   title={Robomatrix: A skill-centric hierarchical framework for scalable robot task planning and execution in open-world},
212 |   author={Mao, Weixin and Zhong, Weiheng and Jiang, Zhou and Fang, Dong and Zhang, Zhongyue and Lan, Zihan and Li, Haosheng and Jia, Fan and Wang, Tiancai and Fan, Haoqiang and others},
213 |   journal={arXiv preprint arXiv:2412.00171},
214 |   year={2024}
215 | }
216 | ```
217 | 
218 | ## 招聘
219 | 具身智能-具身大模型算法研究实习生 职位描述
220 | 
221 | 职位描述
222 | 1. 参与多模态理解与生成大模型、VLA大模型所需的数据清洗和自动标注系统开发，确保各类型/模态数据的质量与多样性；
223 | 2. 探索高效的数据增强和数据合成方法，例如图像/视频编辑；
224 | 3. 对机器人平台实现算法的部署和调试，提高机器人策略效率；
225 | 4. 对前沿具身算法进行研究探索，包括不限于VLA、RDT、Pi0等；
226 | 5. 我们提供有力的研究指导，进行论文发表；
227 | 职位要求
228 | 1、实习时间至少6个月，每周保证4天以上实习
229 | 2、硕士及以上学历在读，计算机、自动化等相关专业优先；
230 | 3、具备较强的软件工程能力，熟练使用Python、pytorch，熟悉Linux操作系统；
231 | 4、熟悉并行化编程，熟悉三维坐标变换、计算机视觉基础知识，了解机器人运动学；
232 | 5、有较好的英文科技文献阅读及算法复现的能力；
233 | 6、有实际的机器人开发经验优先，有大规模数据生成与处理经验优先；
234 | 
235 | 工作地点：北京·中关村，逐际动力北京实验室  
236 | 申请方式：请将你的简历以及相关项目/研究的介绍发送至 waynemao@limxdynamics.com ，简历格式：实习_姓名_学校_方向.pdf  
237 | PS. 同时也招收物理仿真实习生，视频生成/世界模型实习生，运动控制实习生， 还有少量全职HC。  
238 | PPS. 请简历优先投递邮箱，走内推通道  
239 | 
240 | ## Acknowledgments
241 | - Implementation of Vision-Language-Action (VLA) skill model is based on [LLaVA](https://github.com/haotian-liu/LLaVA/).  
242 | - RoboMatrix-ROS is based on official [RoboMaster-SDK](https://github.com/dji-sdk/RoboMaster-SDK), modified [RoboMaster-SDK](https://github.com/jeguzzi/RoboMaster-SDK) and [ROS2](https://github.com/ros2).
243 | - Some additional libraries: [Grounding-DINO-1.5](https://github.com/IDEA-Research/Grounding-DINO-1.5-API), [YOLO-World](https://github.com/AILab-CVC/YOLO-World).
244 | 
245 | 
246 | ---
247 | ## WeChat
248 | <!-- <img src="resources/WeChat/WechatIMG2.jpg" alt="weichat" width=200> -->
249 | <!-- <img src="https://github.com/user-attachments/assets/de8efd1d-b8bc-4323-ab6b-0df8c5ecbd12" alt="weichat" width=200> -->
250 | <div align=center>
251 | <img src="resources/WeChat/WeChat03.png" alt="wechat" width=300>
252 | </div>
253 | 


--------------------------------------------------------------------------------
/README_zh-CN.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 |     <img src="resources\robomatrix-logo3.png" alt="logo" width="70%">
  3 | </div>
  4 | 
  5 | <div align="center">
  6 | 
  7 | [English](README.md) | 简体中文
  8 | 
  9 | </div>
 10 | 
 11 | # RoboMatrix: A Skill-centric Hierarchical Framework for Scalable Robot Task Planning and Execution in Open-World
 12 | 
 13 | ### 📝[论文]() | 🌍[项目主页](https://robo-matrix.github.io/) | 🛢️[数据](https://huggingface.co/datasets/WayneMao/RoboMatrix)
 14 | 
 15 | ![eight_skills](resources/eight_skills.gif)
 16 | 
 17 | <!-- ## 📰 新闻 -->
 18 | 
 19 | ## 发布
 20 | - [2024/12/04] 🤗 发布RoboMatrix有监督微调数据集，该SFT数据集包含1,500条高质量人类标注的演示视频。 [[数据](https://huggingface.co/datasets/WayneMao/RoboMatrix)]
 21 | 
 22 | ## Demo
 23 | ### 动态对抗交互
 24 | <!-- <video src="https://robo-matrix.github.io/static/videos/crossing_obstacles_with_disturbance.mp4" controls="controls" style="max-width: 100%; height: auto;">
 25 | </video> -->
 26 | <!-- https://private-user-images.githubusercontent.com/35285052/392064253-ff1d7e2a-8650-430d-a60e-15bffcc237e2.mp4 -->
 27 | https://private-user-images.githubusercontent.com/35285052/392642975-b78e28aa-45c2-4bb0-9e70-b6a08c678f85.mp4
 28 | 
 29 | ## 🛠️ 安装
 30 | ### 1. 安装 ROS2
 31 | 
 32 | **注意：如果您的系统上已安装 ROS2，请跳过此步骤。**
 33 | 
 34 | **适配您 Ubuntu 系统的 ROS2 版本**
 35 | * Ubuntu 20.04 ---> ROS2 Foxy ---> [官方安装指南](https://docs.ros.org/en/foxy/Installation/Ubuntu-Install-Debians.html)
 36 | * Ubuntu 22.04 ---> ROS2 Humble ---> [官方安装指南](https://docs.ros.org/en/humble/Installation/Ubuntu-Install-Debians.html)
 37 | 
 38 | **安装 colcon**
 39 | ```bash
 40 | sudo apt install python3-colcon-common-extensions
 41 | ```
 42 | 
 43 | ### 2. 构建工作空间
 44 | ```bash
 45 | mkdir -p ~/RoboMatrix/src && cd ~/RoboMatrix/src
 46 | git clone https://github.com/WayneMao/RoboMatrix.git
 47 | cd ~/RoboMatrix && colcon build
 48 | ```
 49 | 
 50 | ### 3. 安装依赖
 51 | ```bash
 52 | sudo apt install libopus-dev python3-pip
 53 | python3 -m pip install -U numpy numpy-quaternion pyyaml
 54 | 
 55 | # 安装 RoboMaster-SDK
 56 | python3 -m pip install git+https://github.com/jeguzzi/RoboMaster-SDK.git
 57 | python3 -m pip install git+https://github.com/jeguzzi/RoboMaster-SDK.git#"egg=libmedia_codec&subdirectory=lib/libmedia_codec"
 58 | 
 59 | # 安装依赖和 PyTorch
 60 | pip install -r requirements.txt
 61 | pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118
 62 | ```
 63 | ### 4. 第三方依赖
 64 | **Grounding-DINO-1.5-API**
 65 | ```bash
 66 | cd src/robomatrix_client/robomatrix_client
 67 | git clone https://github.com/IDEA-Research/Grounding-DINO-1.5-API.git
 68 | cd Grounding-DINO-1.5-API
 69 | pip install -v -e .
 70 | ```
 71 | 
 72 | ## 真机部署
 73 | 
 74 | ## TODO
 75 | - [ ] 打包Docker
 76 | - [x] 🤗 发布有监督微调数据集
 77 | - [ ] 优化VLA ROS通信
 78 | - [ ] 开源VLA Skill model代码
 79 | - [ ] 开放VLA Skill Model权重
 80 | - [ ] 开源Shooting代码
 81 | 
 82 | ## Citation
 83 | 
 84 | If you find our work helpful, please cite us:
 85 | 
 86 | ```bibtex
 87 | @article{mao2024robomatrix,
 88 |   title={Robomatrix: A skill-centric hierarchical framework for scalable robot task planning and execution in open-world},
 89 |   author={Mao, Weixin and Zhong, Weiheng and Jiang, Zhou and Fang, Dong and Zhang, Zhongyue and Lan, Zihan and Li, Haosheng and Jia, Fan and Wang, Tiancai and Fan, Haoqiang and others},
 90 |   journal={arXiv preprint arXiv:2412.00171},
 91 |   year={2024}
 92 | }
 93 | ```
 94 | 
 95 | ## 招聘
 96 | 具身智能-具身大模型算法研究实习生
 97 | 职位描述
 98 | 1. 参与多模态理解与生成大模型、VLA大模型所需的数据清洗和自动标注系统开发，确保各类型/模态数据的质量与多样性；
 99 | 2. 探索高效的数据增强和数据合成方法，例如图像/视频编辑；
100 | 3. 对机器人平台实现算法的部署和调试，提高机器人策略效率；
101 | 4. 对前沿具身算法进行研究探索，包括不限于VLA、RDT、Pi0等；
102 | 5. 我们提供有力的研究指导，进行论文发表；
103 | 职位要求
104 | 1、实习时间至少6个月，每周保证4天以上实习
105 | 2、硕士及以上学历在读，计算机、自动化等相关专业优先；
106 | 3、具备较强的软件工程能力，熟练使用Python、pytorch，熟悉Linux操作系统；
107 | 4、熟悉并行化编程，熟悉三维坐标变换、计算机视觉基础知识，了解机器人运动学；
108 | 5、有较好的英文科技文献阅读及算法复现的能力；
109 | 6、有实际的机器人开发经验优先，有大规模数据生成与处理经验优先；
110 | 
111 | 工作地点：北京·中关村，逐际动力北京实验室  
112 | 申请方式：请将你的简历以及相关项目/研究的介绍发送至 waynemao@limxdynamics.com ，简历格式：实习_姓名_学校_方向.pdf  
113 | PS. 同时也招收**物理仿真实习生，视频生成/世界模型实习生，运动控制实习生**， 还有少量全职HC。  
114 | PPS. 请简历优先投递邮箱，走内推通道  
115 | ## 致谢
116 | - 视觉-语言-动作 (VLA) 技能模型的实现基于 [LLaVA](https://github.com/haotian-liu/LLaVA/)。  
117 | - RoboMatrix-ROS 基于 [RoboMaster-SDK](https://github.com/dji-sdk/RoboMaster-SDK) 和 [ROS2](https://github.com/ros2)。
118 | - 其他一些库包括：[Grounding-DINO-1.5](https://github.com/IDEA-Research/Grounding-DINO-1.5-API)、[YOLO-World](https://github.com/AILab-CVC/YOLO-World)。
119 | 
120 | 
121 | ---
122 | ## 微信群
123 | <img src="resources/WeChat/WechatIMG2.jpg" alt="weichat" width=200><img src="resources/WeChat/WechatIMG3.jpg" alt="weichat" width=200>
124 | 


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
 1 | OPENAI_API_TYPE: 'openai' #'azure'  OR 'openai'
 2 | 
 3 | # for 'openai' api key
 4 | OPENAI_KEY: 'xxxx'
 5 | OPENAI_API_BASE: 'xxxx'
 6 | 
 7 | # for 'azure'
 8 | AZURE_MODEL: 'XXXXX' # your deploment_model_name 
 9 | AZURE_API_BASE: https://xxxxxxxx.openai.azure.com/ # your deployment endpoint
10 | AZURE_API_KEY: 'xxxxxx' # your deployment key
11 | AZURE_API_VERSION: '2023-03-15-preview'
12 | 
13 | # For STEP1
14 | STEP1_API_KEY: 'xxxx'
15 | STEP1_API_BASE: 'https://xxxx/v1/'
16 | 
17 | # For Grounding-DINO-1.5
18 | Grounding_DINO_1.5_API: xxxx
19 | 
20 | # Audio2text api
21 | AssemblyAI_API: xxxx


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # langchain==0.0.225
 2 | # openai==0.27.7
 3 | matplotlib==3.7.1
 4 | numpy==1.24.3
 5 | rich
 6 | PyYAML==6.0.1
 7 | openai==1.35.14
 8 | langchain==0.2.8
 9 | langchain-community==0.2.0
10 | langchain-core==0.2.20
11 | langchain-openai==0.1.16
12 | langchain-text-splitters==0.2.2
13 | langsmith==0.1.88
14 | # langserve==0.2.2  
15 | # pip install "langserve[all]"
16 | 
17 | # torch==2.0.1
18 | 
19 | pyquaternion==0.9.9
20 | 
21 | # Joystick Control dependencies
22 | pygame
23 | 
24 | dds-cloudapi-sdk==0.2.1
25 | gradio==4.22.0
26 | scipy
27 | noisereduce
28 | assemblyai


--------------------------------------------------------------------------------
/resources/WeChat/WeChat03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/resources/WeChat/WeChat03.png


--------------------------------------------------------------------------------
/resources/WeChat/WechatIMG2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/resources/WeChat/WechatIMG2.jpg


--------------------------------------------------------------------------------
/resources/WeChat/WechatIMG3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/resources/WeChat/WechatIMG3.jpg


--------------------------------------------------------------------------------
/resources/eight_skills.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/resources/eight_skills.gif


--------------------------------------------------------------------------------
/resources/joystick_mapping.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/resources/joystick_mapping.png


--------------------------------------------------------------------------------
/resources/robomatrix-logo3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/resources/robomatrix-logo3.png


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_msgs/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.5)
 2 | project(robomaster_msgs)
 3 | 
 4 | # Default to C99
 5 | if(NOT CMAKE_C_STANDARD)
 6 |   set(CMAKE_C_STANDARD 99)
 7 | endif()
 8 | 
 9 | # Default to C++14
10 | if(NOT CMAKE_CXX_STANDARD)
11 |   set(CMAKE_CXX_STANDARD 14)
12 | endif()
13 | 
14 | if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
15 |   add_compile_options(-Wall -Wextra -Wpedantic)
16 | endif()
17 | 
18 | # find dependencies
19 | find_package(ament_cmake REQUIRED)
20 | # uncomment the following section in order to fill in
21 | # further dependencies manually.
22 | # find_package(<dependency> REQUIRED)
23 | 
24 | if(BUILD_TESTING)
25 |   find_package(ament_lint_auto REQUIRED)
26 |   # the following line skips the linter which checks for copyrights
27 |   # uncomment the line when a copyright and license is not present in all source files
28 |   #set(ament_cmake_copyright_FOUND TRUE)
29 |   # the following line skips cpplint (only works in a git repo)
30 |   # uncomment the line when this package is not in a git repo
31 |   #set(ament_cmake_cpplint_FOUND TRUE)
32 |   ament_lint_auto_find_test_dependencies()
33 | endif()
34 | 
35 | find_package(rosidl_default_generators REQUIRED)
36 | 
37 | find_package(std_msgs REQUIRED)
38 | 
39 | rosidl_generate_interfaces(${PROJECT_NAME}
40 |   "msg/ChassisAttitute.msg"
41 |   "msg/ChassisPosition.msg"
42 |   "msg/ChassisSingleEsc.msg"
43 |   "msg/ChassisEsc.msg"
44 |   "msg/ChassisIMU.msg"
45 |   "msg/GripperStatus.msg"
46 |   "msg/ServoSingleStatus.msg"
47 |   "msg/ServoStatus.msg"
48 |   "msg/RoboticArmPosition.msg"
49 |   DEPENDENCIES std_msgs
50 |  )
51 | 
52 | ament_package()
53 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_msgs/msg/ChassisAttitute.msg:
--------------------------------------------------------------------------------
1 | # Chassis raw RPY
2 | 
3 | std_msgs/Header header
4 | float32 roll
5 | float32 pitch
6 | float32 yaw
7 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_msgs/msg/ChassisEsc.msg:
--------------------------------------------------------------------------------
1 | # Chassis esc
2 | 
3 | std_msgs/Header header
4 | 
5 | robomaster_msgs/ChassisSingleEsc front_right
6 | robomaster_msgs/ChassisSingleEsc front_left
7 | robomaster_msgs/ChassisSingleEsc back_left
8 | robomaster_msgs/ChassisSingleEsc back_right
9 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_msgs/msg/ChassisIMU.msg:
--------------------------------------------------------------------------------
 1 | # Chassis IMU
 2 | 
 3 | std_msgs/Header header
 4 | float32 acc_x
 5 | float32 acc_y
 6 | float32 acc_z
 7 | float32 gyro_x
 8 | float32 gyro_y
 9 | float32 gyro_z
10 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_msgs/msg/ChassisPosition.msg:
--------------------------------------------------------------------------------
1 | # Chassis raw XYZ
2 | 
3 | std_msgs/Header header
4 | float32 x
5 | float32 y
6 | float32 z
7 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_msgs/msg/ChassisSingleEsc.msg:
--------------------------------------------------------------------------------
1 | # Chassis single esc
2 | 
3 | uint16 angle
4 | int16 speed
5 | uint32 time_stamp
6 | uint8 status
7 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_msgs/msg/GripperStatus.msg:
--------------------------------------------------------------------------------
1 | # gripper status
2 | 
3 | std_msgs/Header header
4 | string status
5 | uint8 power
6 | string action
7 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_msgs/msg/RoboticArmPosition.msg:
--------------------------------------------------------------------------------
1 | # arm end-effector position
2 | 
3 | std_msgs/Header header
4 | 
5 | int64 x
6 | int64 y
7 | int64 z
8 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_msgs/msg/ServoSingleStatus.msg:
--------------------------------------------------------------------------------
1 | # servo single status
2 | 
3 | bool valid
4 | int32 value
5 | int32 speed
6 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_msgs/msg/ServoStatus.msg:
--------------------------------------------------------------------------------
 1 | # Chassis esc
 2 | 
 3 | std_msgs/Header header
 4 | 
 5 | bool[4] valid # whenever a servo in connected to a port
 6 | int32[4] value # the angle of the servos (1024 corresponds to 180 degrees)
 7 | int32[4] speed # the speed of the servos (1024 corresponds to about 36 degrees per seconds)
 8 | 
 9 | robomaster_msgs/ServoSingleStatus servo_0
10 | robomaster_msgs/ServoSingleStatus servo_1
11 | robomaster_msgs/ServoSingleStatus servo_2
12 | robomaster_msgs/ServoSingleStatus servo_3
13 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_msgs/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 3 | <package format="3">
 4 |   <name>robomaster_msgs</name>
 5 |   <version>0.2.0</version>
 6 |   <description>TODO: Package description</description>
 7 |   <maintainer email="weiheng_z@bit.edu.cn">weiheng_z</maintainer>
 8 |   <license>TODO: License declaration</license>
 9 | 
10 |   <buildtool_depend>ament_cmake</buildtool_depend>
11 | 
12 |   <test_depend>ament_lint_auto</test_depend>
13 |   <test_depend>ament_lint_common</test_depend>
14 | 
15 |   <build_depend>rosidl_default_generators</build_depend>
16 | 
17 |   <exec_depend>rosidl_default_runtime</exec_depend>
18 | 
19 |   <member_of_group>rosidl_interface_packages</member_of_group>
20 | 
21 |   <export>
22 |     <build_type>ament_cmake</build_type>
23 |   </export>
24 | </package>
25 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_ros/launch/collect_data.launch.py:
--------------------------------------------------------------------------------
 1 | from launch import LaunchDescription
 2 | from launch_ros.actions import Node
 3 | from launch.actions import ExecuteProcess, DeclareLaunchArgument
 4 | from launch.substitutions import LaunchConfiguration
 5 | 
 6 | import os
 7 | home_path = os.path.expanduser("~")
 8 | 
 9 | def generate_launch_description():
10 |     return LaunchDescription([
11 |         DeclareLaunchArgument('name', default_value='mobile_manipulation', description='task name'),
12 |         DeclareLaunchArgument('idx', default_value='0', description='episode index'),
13 |         DeclareLaunchArgument('dir', default_value=home_path+'/RoboMatrixDatasets', description='vedio save directory'),
14 |         Node(
15 |             package='joystick_driver',
16 |             executable='xbox_driver',
17 |             name='xbox_driver'
18 |         ),
19 |         Node(
20 |             package='robomaster_ros',
21 |             executable='teleoperation',
22 |             name='teleoperation'
23 |         ),
24 |         Node(
25 |             package='robomaster_ros',
26 |             executable='data_collection',
27 |             name='data_collection',
28 |             output='screen',
29 |             parameters=[{
30 |                 'task_name'  : LaunchConfiguration('name'),
31 |                 "episode_idx": LaunchConfiguration('idx'),
32 |                 "save_dir"   : LaunchConfiguration('dir'),
33 |             }]
34 |         ),
35 |     ])
36 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_ros/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 3 | <package format="3">
 4 |   <name>robomaster_ros</name>
 5 |   <version>0.2.0</version>
 6 |   <description>TODO: Package description</description>
 7 |   <maintainer email="weiheng_z@bit.edu.cn">weiheng_z</maintainer>
 8 |   <license>TODO: License declaration</license>
 9 | 
10 |   <test_depend>ament_copyright</test_depend>
11 |   <test_depend>ament_flake8</test_depend>
12 |   <test_depend>ament_pep257</test_depend>
13 |   <test_depend>python3-pytest</test_depend>
14 | 
15 |   <exec_depend>rclpy</exec_depend>
16 |   <exec_depend>robomaster_msgs</exec_depend>
17 |   <exec_depend>sensor_msgs</exec_depend>
18 |   <exec_depend>geometry_msgs</exec_depend>
19 |   <exec_depend>std_msgs</exec_depend>
20 |   <exec_depend>std_srvs</exec_depend>
21 |   <exec_depend>cv_bridge</exec_depend>
22 |   <exec_depend>nav_msgs</exec_depend>
23 |   <export>
24 |     <build_type>ament_python</build_type>
25 |   </export>
26 | </package>
27 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_ros/resource/robomaster_ros:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/robomaster_driver/robomaster_ros/resource/robomaster_ros


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_ros/robomaster_ros/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/robomaster_driver/robomaster_ros/robomaster_ros/__init__.py


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_ros/robomaster_ros/data_collection.py:
--------------------------------------------------------------------------------
  1 | import rclpy
  2 | from rclpy.node import Node
  3 | from sensor_msgs.msg import Image
  4 | from std_msgs.msg import Bool
  5 | from cv_bridge import CvBridge
  6 | 
  7 | from robomaster_msgs.msg import ChassisAttitute, ChassisPosition, ChassisEsc, ChassisIMU
  8 | from robomaster_msgs.msg import GripperStatus
  9 | from robomaster_msgs.msg import ServoStatus
 10 | from robomaster_msgs.msg import RoboticArmPosition
 11 | 
 12 | from collections import deque
 13 | from queue import Queue
 14 | 
 15 | import os, time
 16 | import json
 17 | import cv2
 18 | import threading
 19 | 
 20 | 
 21 | FPS = 30
 22 | TASK_PROMPT = "None."
 23 | 
 24 | 
 25 | class DataCollectionNode(Node):
 26 | 
 27 |     def __init__(self) -> None:
 28 |         super().__init__('rm_data_collection')
 29 | 
 30 |         # save path params
 31 |         from datetime import datetime
 32 |         now = datetime.now().date().strftime("%Y-%m-%d")
 33 |         self.declare_parameter("task_name", now).value
 34 | 
 35 |         self.declare_parameter("episode_idx", 0).value
 36 |         
 37 |         home_path = os.path.expanduser("~")
 38 |         self.declare_parameter("save_dir", home_path+'/RoboMatrixDatasets').value
 39 | 
 40 |         self._task_name: str = self.get_parameter("task_name").get_parameter_value().string_value
 41 |         self._episode_idx: str = self.get_parameter("episode_idx").get_parameter_value().integer_value
 42 |         self._save_dir: str = self.get_parameter("save_dir").get_parameter_value().string_value
 43 |         self.get_logger().info(f'task_name: {self._task_name}')
 44 |         self.get_logger().info(f'episode_idx: {self._episode_idx}')
 45 |         self.get_logger().info(f'save_dir: {self._save_dir}')
 46 |         
 47 |         self._initialize_parameters()
 48 |         
 49 |         # visual
 50 |         self._sub_image = self.create_subscription(Image, "/camera/raw/image", self._image_cb, 1000)
 51 |         self._image_deque = deque()
 52 |         self._image_queue = Queue(maxsize=2000)
 53 | 
 54 |         # subscribe robotic arm topic to ger end effector position
 55 |         self._sub_arm_position = self.create_subscription(RoboticArmPosition, "/arm/raw/position", self._arm_position_cb, 1000)
 56 |         self._arm_position_deque = deque()
 57 | 
 58 |         # subscribe servo topic to get joint value
 59 |         self._sub_servo_value = self.create_subscription(ServoStatus, "/servo/raw/value", self._servo_value_cb, 1000)
 60 |         self._servo_value_deque = deque()
 61 | 
 62 |         # subscribe gripper topic to get status
 63 |         self._sub_gripper_status = self.create_subscription(GripperStatus, "/gripper/raw/status", self._gripper_status_cb, 1000)
 64 |         self._gripper_status_deque = deque()
 65 | 
 66 |         # subscribe chassis topic to get raw data
 67 |         self._sub_chassis_esc = self.create_subscription(ChassisEsc, "/chassis/raw/esc", self._chassis_esc_cb, 1000)
 68 |         self._chassis_esc_data_deque = deque()
 69 |         self._sub_chassis_position = self.create_subscription(ChassisPosition, "/chassis/raw/position", self._chassis_position_cb, 1000)
 70 |         self._chassis_position_deque = deque()
 71 |         self._sub_chassis_attitute = self.create_subscription(ChassisAttitute, "/chassis/raw/attitute", self._chassis_attitute_cb, 1000)
 72 |         self._chassis_attitute_deque = deque()
 73 |         self._sub_chassis_imu = self.create_subscription(ChassisIMU, "/chassis/raw/imu", self._chassis_imu_cb, 1000)
 74 |         self._chassis_imu_deque = deque()
 75 | 
 76 |         self._sub_action = self.create_subscription(Bool, '/data_collection_start', self._action_cb, 10)
 77 | 
 78 |         self._cv_bridge = CvBridge()
 79 | 
 80 |         self._rate = 30
 81 |         self._max_frame_num = 10000
 82 | 
 83 |         self._can_action = False
 84 | 
 85 |         self.thread = threading.Thread(target=self._collect)
 86 |         self.thread.start()
 87 |         self._lock = threading.Lock()
 88 | 
 89 |         self.end = False
 90 |     
 91 |     def _initialize_parameters(self):
 92 |         # self._file_name = f'episode_{self._episode_idx}'
 93 |         self._file_name = f"{self._task_name}_{self._episode_idx}"
 94 |         # 数据集文件夹
 95 |         if not os.path.exists(self._save_dir): os.mkdir(self._save_dir)
 96 |         # 任务文件夹
 97 |         self._task_dir = os.path.join(self._save_dir, self._task_name)
 98 |         if not os.path.exists(self._task_dir): os.mkdir(self._task_dir)
 99 |         # 视频文件夹
100 |         self._video_dir = os.path.join(self._task_dir, "videos")
101 |         if not os.path.exists(self._video_dir): os.mkdir(self._video_dir)
102 |         # 标注文件夹
103 |         self._anno_dir = os.path.join(self._task_dir, "annotations")
104 |         if not os.path.exists(self._anno_dir): os.mkdir(self._anno_dir)
105 | 
106 |     def _image_cb(self, msg: Image):
107 |         try:
108 |             if len(self._image_deque) >= 1: self._image_deque.popleft()
109 |         except: pass
110 |         if not self._can_action: return
111 |         time_stamp = self.to_sec(msg.header.stamp)
112 |         image = self._cv_bridge.imgmsg_to_cv2(msg, 'passthrough')
113 |         data = (time_stamp, image)
114 |         self._image_deque.append(data)
115 |     
116 |     def _arm_position_cb(self, msg: RoboticArmPosition):
117 |         if len(self._arm_position_deque) >= 100: self._arm_position_deque.popleft()
118 |         time_stamp = self.to_sec(msg.header.stamp)
119 |         x, y = msg.x, msg.y
120 |         data = (time_stamp, [x, y])
121 |         self._arm_position_deque.append(data)
122 |     
123 |     def _servo_value_cb(self, msg: ServoStatus):
124 |         if len(self._servo_value_deque) >= 100: self._servo_value_deque.popleft()
125 |         time_stamp = self.to_sec(msg.header.stamp)
126 |         values = [msg.servo_0.value, msg.servo_1.value]
127 |         speeds = [msg.servo_0.speed, msg.servo_1.speed]
128 |         data = (time_stamp, values, speeds)
129 |         self._servo_value_deque.append(data)
130 |     
131 |     def _gripper_status_cb(self, msg: GripperStatus):
132 |         if len(self._gripper_status_deque) >= 100: self._gripper_status_deque.popleft()
133 |         time_stamp = self.to_sec(msg.header.stamp)
134 |         gripper_status = msg.status
135 |         gripper_power = msg.power
136 |         gripper_action = msg.action
137 |         data = (time_stamp, gripper_status, gripper_power, gripper_action)
138 |         self._gripper_status_deque.append(data)
139 | 
140 |     def _chassis_esc_cb(self, msg: ChassisEsc):
141 |         if len(self._chassis_esc_data_deque) >= 100: self._chassis_esc_data_deque.popleft()
142 |         time_stamp = self.to_sec(msg.header.stamp)
143 |         angles = [msg.front_right.angle, msg.front_left.angle, msg.back_left.angle, msg.back_right.angle]
144 |         speeds = [msg.front_right.speed, msg.front_left.speed, msg.back_left.speed, msg.back_right.speed]
145 |         data = (time_stamp, angles, speeds)
146 |         self._chassis_esc_data_deque.append(data)
147 | 
148 |     def _chassis_position_cb(self, msg: ChassisPosition):
149 |         if len(self._chassis_position_deque) >= 100: self._chassis_position_deque.popleft()
150 |         time_stamp = self.to_sec(msg.header.stamp)
151 |         position = [round(msg.x * 1000, 2), round(msg.y * 1000, 2)]
152 |         data = (time_stamp, position)
153 |         self._chassis_position_deque.append(data)
154 |     
155 |     def _chassis_attitute_cb(self, msg: ChassisAttitute):
156 |         if len(self._chassis_attitute_deque) >= 100: self._chassis_attitute_deque.popleft()
157 |         time_stamp = self.to_sec(msg.header.stamp)
158 |         attitute = [round(msg.roll, 2), round(msg.pitch, 2), round(msg.yaw, 2)]
159 |         data = (time_stamp, attitute)
160 |         self._chassis_attitute_deque.append(data)
161 |     
162 |     def _chassis_imu_cb(self, msg: ChassisIMU):
163 |         if len(self._chassis_imu_deque) >= 100: self._chassis_imu_deque.popleft()
164 |         time_stamp = self.to_sec(msg.header.stamp)
165 |         linear_acceleration = [msg.acc_x, msg.acc_y, msg.acc_z]
166 |         angular_velocity = [msg.gyro_x, msg.gyro_y, msg.gyro_z]
167 |         data = (time_stamp, linear_acceleration, angular_velocity)
168 |         self._chassis_imu_deque.append(data)
169 | 
170 |     def _action_cb(self, msg: Bool):
171 |         self._can_action = msg.data
172 |     
173 |     def _get_frame(self) -> bool:
174 |         if not self._image_deque:
175 |             # self.get_logger().error("image deque is empty")
176 |             return False
177 |         
178 |         tolerance = 0.05
179 | 
180 |         frame_time = self._image_deque[-1][0] - tolerance # 图像缓存中最新的数据作为标准
181 | 
182 |         # get image
183 |         while self._image_deque:
184 |             time_stamp, image = self._image_deque.popleft()
185 |             if time_stamp >= frame_time: break
186 |         else: pass
187 | 
188 |         # get chassis position
189 |         while self._chassis_position_deque:
190 |             time_stamp, chassis_position = self._chassis_position_deque.popleft()
191 |             if time_stamp >= frame_time: break
192 |         else:
193 |             self.get_logger().error("get chassis position fail")
194 |             return False
195 | 
196 |         # get chassis attitute
197 |         while self._chassis_attitute_deque:
198 |             time_stamp, chassis_attitute = self._chassis_attitute_deque.popleft()
199 |             if time_stamp >= frame_time: break
200 |         else:
201 |             self.get_logger().error("get chassis attitute fail")
202 |             return False
203 | 
204 |         # get arm position
205 |         while self._arm_position_deque:
206 |             time_stamp, end_position = self._arm_position_deque.popleft()
207 |             if time_stamp >= frame_time: break
208 |         else:
209 |             self.get_logger().error("get arm position fail")
210 |             return False
211 | 
212 |         # get gripper status
213 |         while self._gripper_status_deque:
214 |             time_stamp, gripper_status, gripper_power, gripper_command = self._gripper_status_deque.popleft()
215 |             gripper_action = [gripper_command, gripper_power]
216 |             if time_stamp >= frame_time: break
217 |         else:
218 |             self.get_logger().error("get gripper status fail")
219 |             return False
220 | 
221 |         frame_dict = {
222 |             "image": image,
223 |             "arm_position": end_position,
224 |             "gripper_status": gripper_status,
225 |             "gripper_action": gripper_action,
226 |             "chassis_position": chassis_position,
227 |             "chassis_attitute": chassis_attitute,
228 |         }
229 | 
230 |         return frame_dict
231 |     
232 |     def _collect(self):
233 |         visual = []
234 |         observations = {
235 |             'arm_position': [],
236 |             'gripper_status': [],
237 |             'gripper_action': [],
238 |             "chassis_position": [],
239 |             "chassis_attitute": [],
240 |             }
241 |         frame_count, fail_count= 0, 0
242 | 
243 |         while rclpy.ok():
244 |             self.get_logger().info("Wait to start data collection")
245 |             while rclpy.ok() and not self._can_action: pass
246 |             self.get_logger().info("start collect")
247 | 
248 |             start_time = None
249 |             while rclpy.ok() and frame_count <= self._max_frame_num and self._can_action:
250 |                 fps = int(1 / (time.time() - start_time)) if start_time else 0
251 |                 start_time = time.time()
252 |                 
253 |                 frame_dict = self._get_frame()
254 |                 if not frame_dict:
255 |                     fail_count += 1
256 |                     continue
257 |                 else: frame_count += 1
258 | 
259 |                 if fps <= self._rate:
260 |                     self.get_logger().info(f"Get a frame {frame_count} / {self._max_frame_num} ({fps} FPS).")
261 |                 else:
262 |                     self.get_logger().warn(f"Get a frame {frame_count} / {self._max_frame_num} ({fps} FPS).")
263 | 
264 |                 for name, data in frame_dict.items():
265 |                     if name == "image":
266 |                         data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
267 |                         visual.append(data)
268 |                     elif name in observations.keys(): observations[name].append(data)
269 |                 
270 |                 if not rclpy.ok(): exit(-1)
271 | 
272 |                 time.sleep(max(0, 1 / self._rate - (time.time() - start_time))) # 控制采样频率
273 |             
274 |             self.get_logger().warn(f"Fail count {fail_count}")
275 |             # self._save_video(frame_count, visual, FPS)
276 |             self._save_video_v2(visual, FPS)
277 |             self._save_annotation(frame_count, observations)
278 | 
279 |             self._can_action = False
280 |             break
281 | 
282 |         self.end = True
283 | 
284 |     def _save_video(self, frame_count, img_list, fps):
285 |         if frame_count == 0:
286 |             self.get_logger().error(f"No frame to save")
287 |             return
288 |         
289 |         video_path = os.path.join(self._video_dir, f'{self._file_name}.mp4')
290 | 
291 |         height, width, _ = img_list[0].shape
292 |         out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
293 |         for t in range(frame_count):
294 |             img = img_list[t]
295 |             out.write(img)
296 |         out.release()
297 |         self.get_logger().info(f"Save video to: {video_path}")
298 |     
299 |     def _save_video_v2(self, images, fps):
300 |         from moviepy.editor import ImageSequenceClip
301 |         clip = ImageSequenceClip(images, fps=fps)
302 |         video_path = os.path.join(self._video_dir, f'{self._file_name}.mp4')
303 |         clip.write_videofile(video_path, codec='libx264')
304 |         self.get_logger().info(f"Save video to: {video_path}")
305 |     
306 |     def _save_annotation(self, frame_count, observations):
307 |         if frame_count == 0:
308 |             self.get_logger().error(f"No annotation to save")
309 |             return
310 | 
311 |         video_annotation = []
312 | 
313 |         for index in range(frame_count):
314 |             frame_annotation = {}
315 | 
316 |             # frame_annotation['stream'] = f"{self._task_name}/videos/{self._file_name}.mp4" # video name
317 |             frame_annotation['stream'] = f"{self._file_name}.mp4" # video name
318 |             frame_annotation['frame_inds'] = [index] # frame index
319 | 
320 |             frame_annotation["observations"] = {}
321 |             for key, data in observations.items():
322 |                 frame_annotation["observations"][key] = data[index]
323 |             
324 |             conversation = []
325 |             conv_human, conv_agent = {}, {}
326 |             conv_human['from'] = 'human'
327 |             conv_human['value'] = 'prompt'
328 |             conv_agent['from'] = 'gpt'
329 |             conv_agent['value'] = 'tokens'
330 |             conversation.append(conv_human)
331 |             conversation.append(conv_agent)
332 |             frame_annotation['conversations'] = conversation
333 | 
334 |             video_annotation.append(frame_annotation)
335 |         
336 |         json_path = os.path.join(self._anno_dir, f'{self._file_name}.json')
337 |         with open(json_path, 'w', encoding='utf-8') as f:
338 |             json.dump(video_annotation, f, ensure_ascii=False, indent=4)
339 |         self.get_logger().info(f"Save annotation to: {json_path}")
340 | 
341 |     def to_sec(self, frame_time):
342 |         sec = frame_time.sec
343 |         nanosec = frame_time.nanosec
344 |         return sec + nanosec / 1e9
345 | 
346 | def main(args=None):
347 |     rclpy.init(args=args)
348 | 
349 |     node = DataCollectionNode()
350 |     while rclpy.ok():
351 |         rclpy.spin_once(node, timeout_sec=1)
352 |         if node.end: break
353 | 
354 |     node.destroy_node()
355 |     rclpy.shutdown()
356 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_ros/robomaster_ros/teleoperation.py:
--------------------------------------------------------------------------------
  1 | import rclpy
  2 | from rclpy.node import Node
  3 | 
  4 | from robomaster import robot
  5 | 
  6 | from sensor_msgs.msg import Imu
  7 | from pyquaternion import Quaternion
  8 | from sensor_msgs.msg import Image
  9 | from nav_msgs.msg import Odometry
 10 | from geometry_msgs.msg import PointStamped, TransformStamped
 11 | from std_msgs.msg import Bool
 12 | import tf2_ros
 13 | 
 14 | from joystick_msgs.msg import XBox360Controller
 15 | from robomaster_msgs.msg import ChassisAttitute, ChassisPosition, ChassisStatus, ChassisEsc, ChassisIMU
 16 | from robomaster_msgs.msg import GripperStatus
 17 | from robomaster_msgs.msg import ServoStatus
 18 | from robomaster_msgs.msg import RoboticArmPosition
 19 | 
 20 | from cv_bridge import CvBridge
 21 | 
 22 | from collections import deque
 23 | from threading import Thread, Event
 24 | import time
 25 | 
 26 | 
 27 | INIT_POS = (180, -70)
 28 | FINAL_POS = (100, 100)
 29 | 
 30 | 
 31 | class TeleOperationNode(Node):
 32 | 
 33 |     def __init__(self) -> None:
 34 |         super().__init__('rm_teleoperation')
 35 |         self._log = False
 36 | 
 37 |         self._robot = robot.Robot()
 38 | 
 39 |         self._joint_angle = [None] * 2
 40 |         self._joint_speed = [None] * 2
 41 |         self._end_position = [None] * 2 + [0]
 42 | 
 43 |         # arm pub
 44 |         self._pub_arm_position = self.create_publisher(RoboticArmPosition, "/arm/raw/position", 3) # end position publisher
 45 |        
 46 |         # servo pub
 47 |         self._pub_servo_value = self.create_publisher(ServoStatus, "/servo/raw/value", 3) # joint value publisher
 48 |         
 49 |         # gripper pub
 50 |         self._pub_gripper_status = self.create_publisher(GripperStatus, "/gripper/raw/status", 3) # gripper status publisher
 51 | 
 52 |         # img pub
 53 |         self._pub_image = self.create_publisher(Image, "/camera/raw/image", 3) # image publisher
 54 | 
 55 |         # chassis pub
 56 |         self._pub_chassis_attitute = self.create_publisher(ChassisAttitute, "/chassis/raw/attitute", 3) # rpy publisher
 57 |         self._pub_chassis_position = self.create_publisher(ChassisPosition, "/chassis/raw/position", 3) # xyz publisher
 58 |         self._pub_chassis_status = self.create_publisher(ChassisStatus, "/chassis/raw/status", 3) # status publisher
 59 |         self._pub_chassis_esc = self.create_publisher(ChassisEsc, "/chassis/raw/esc", 3) # esc publisher
 60 |         self._pub_chassis_imu = self.create_publisher(ChassisIMU, "/chassis/raw/imu", 3) # imu publisher
 61 | 
 62 |         # action pub
 63 |         self._pub_action = self.create_publisher(Bool, '/data_collection_start', 3) # code flag publisher
 64 |         
 65 |         # joy sub
 66 |         self._sub_joy = self.create_subscription(XBox360Controller, '/joystick', self._joy_cb, 3)
 67 |         
 68 |         self._cv_bridge = CvBridge()
 69 | 
 70 |         self._axis_threshold = 0.1
 71 |         
 72 |         self._event = Event()
 73 | 
 74 |         # arm
 75 |         self._arm_deque = deque()
 76 |         self._arm_moving = False
 77 |         self._arm_thread = Thread(target=self._arm_action)
 78 |         self._arm_thread.start()
 79 |         self._arm_postion = []
 80 |         
 81 |         # flags
 82 |         self._action_flag = True
 83 |         self._action_activate = False
 84 | 
 85 |         # main
 86 |         self._main_thread = Thread(target=self._start)
 87 |         self._main_thread.start()
 88 |         
 89 |         # chassis
 90 |         self._chassis_deque = deque()
 91 |         self._chassis_moving = False
 92 |         self._chassis_thread = Thread(target=self._chassis_action)
 93 |         self._chassis_thread.start()
 94 | 
 95 |         # gripper
 96 |         self._gripper_deque = deque()
 97 |         self._gripper_status = None
 98 |         self._gripper_moving = False
 99 |         self._gripper_command = "None"
100 |         self._gripper_power = None
101 |         self._gripper_thread = Thread(target=self._gripper_action)
102 |         self._gripper_thread.start()
103 | 
104 |         self.shutdown = False
105 |         self._gripper_action_end = False
106 |         self._chassis_action_end = False
107 |         self._arm_action_end = False
108 |     
109 |     def _init_sub(self, freq=50) -> bool:
110 |         if freq not in (1, 5, 10, 20, 50): return False
111 |         # 末端
112 |         self._arm.sub_position(freq, self._arm_position_cb)
113 |         # 关节
114 |         self._servo.sub_servo_info(freq, self._servo_value_cb)
115 |         # 夹爪
116 |         self._gripper.sub_status(freq, self._gripper_status_cb)
117 |         # 相机        
118 |         self._vision.start_video_stream(display=False)
119 |         self._cam_thread = Thread(target=self._get_image)
120 |         self._cam_thread.start()
121 |         # 底盘姿态
122 |         self._init_orientation, self._last_attitude = None, None
123 |         self._chassis.sub_attitude(freq, self._chassis_attitude_cb)
124 |         # 底盘位置
125 |         self._chassis.sub_position(0, freq, self._odom_cb)
126 |         # IMU
127 |         self._chassis.sub_imu(freq, self._chassis_imu_cb)
128 |         # 底盘状态
129 |         self._chassis.sub_status(freq, self._chassis_status_cb)
130 |         # 电调
131 |         self._chassis.sub_esc(freq, self._chassis_esc_cb)
132 |         return True
133 |     
134 |     def _unsub(self):
135 |         self._arm.unsub_position()
136 |         self._gripper.unsub_status()
137 |         self._servo.unsub_servo_info()
138 | 
139 |         self._vision.stop_video_stream()
140 | 
141 |         self._chassis.unsub_attitude()
142 |         self._chassis.unsub_position()
143 |         self._chassis.unsub_imu()
144 |         self._chassis.unsub_status()
145 |         self._chassis.unsub_esc()
146 | 
147 |         self._robot.close()
148 | 
149 |     def arm_move(self, position):
150 |         x, y = position
151 |         self._arm.move(x, y).wait_for_completed(timeout=1)
152 |     
153 |     def arm_moveto(self, position):
154 |         x, y = position
155 |         self._arm.moveto(x, y).wait_for_completed(timeout=1)
156 |     
157 |     def arm_recenter(self):
158 |         self._arm.moveto(5, 5).wait_for_completed()
159 |         self.get_logger().info("Arm recenter")
160 |     
161 |     def gripper(self, action, power=50):
162 |         if power not in [power for power in range(1,100)]: return False
163 |         
164 |         self._gripper_command = action
165 |         self._gripper_power = power
166 |         if action == "grab": self._gripper.close(power)
167 |         elif action == "release": self._gripper.open(power)
168 |         elif action == "pause": self._gripper.pause()
169 |         else: return
170 |     
171 |     def _get_image(self):
172 |         while rclpy.ok():
173 |             try:
174 |                 # tsn = time.time()
175 |                 img = self._vision.read_cv2_image(timeout=0.1, strategy='newest')
176 |                 # tsn2 = time.time()
177 |                 msg = self._cv_bridge.cv2_to_imgmsg(img, encoding="bgr8")
178 |                 # msg.header.stamp = tsn + (tsn2 - tsn) / 2.0
179 |                 msg.header.stamp = self.get_clock().now().to_msg()
180 |                 msg.header.frame_id = "camera_link_optical_frame"
181 |                 self._pub_image.publish(msg)
182 |             except:
183 |                 pass
184 |     
185 |     def chassis_move(self, x_dis, y_dis, z_angle, xy_vel=0, z_w=0):
186 |         self._chassis.move(x_dis, y_dis, z_angle, xy_vel, z_w).wait_for_completed(timeout=10)
187 |     
188 |     def chassis_wheels(self, wheel_speed, /, *, timeout=1):
189 |         w1, w2, w3, w4 = wheel_speed
190 |         self._chassis.drive_wheels(w1, w2, w3, w4, timeout)
191 |     
192 |     def chassis_speed(self, x_vel, y_vel, z_w, /, *, timeout=1):
193 |         self._chassis.drive_speed(x_vel, y_vel, z_w, timeout)
194 |     
195 |     def _chassis_attitude_cb(self, attitude_data):
196 |         yaw, pitch, roll = attitude_data
197 |         if self._log: self.get_logger().info("chassis attitude: yaw:{0}".format(yaw))
198 | 
199 |         msg = ChassisAttitute()
200 |         msg.header.stamp = self.get_clock().now().to_msg()
201 |         msg.header.frame_id = "chassis_attitude_raw"
202 |         msg.yaw = yaw
203 |         msg.pitch = pitch
204 |         msg.roll = roll
205 |         self._pub_chassis_attitute.publish(msg)
206 | 
207 |         self._last_attitude = Quaternion(axis=(0, 0, 1), degrees=-yaw) * Quaternion(axis=(0, 1, 0), degrees=-pitch) * Quaternion(axis=(1, 0, 0), degrees=roll)
208 |         if self._init_orientation is None: self._init_orientation = self._last_attitude
209 |     
210 |     def _odom_cb(self, position_data):
211 |         x, y, z = position_data
212 |         if self._log: self.get_logger().info("chassis position: x:{0}, y:{1}".format(x, y)) # x 前正 y 右正
213 | 
214 |         msg = ChassisPosition()
215 |         msg.header.stamp = self.get_clock().now().to_msg()
216 |         msg.header.frame_id = "chassis_position_raw"
217 |         msg.x = x
218 |         msg.y = y
219 |         msg.z = z
220 |         self._pub_chassis_position.publish(msg)
221 | 
222 |     def _chassis_imu_cb(self, imu_data):
223 |         acc_x, acc_y, acc_z, gyro_x, gyro_y, gyro_z = imu_data
224 |         if self._log: print("chassis imu: acc_x:{0}, acc_y:{1}, acc_z:{2}, gyro_x:{3}, gyro_y:{4}, gyro_z:{5}".format(acc_x, acc_y, acc_z, gyro_x, gyro_y, gyro_z))
225 |         
226 |         msg = ChassisIMU()
227 |         msg.header.stamp = self.get_clock().now().to_msg()
228 |         msg.header.frame_id = "chassis_imu_raw"
229 |         msg.acc_x = acc_x
230 |         msg.acc_y = acc_y
231 |         msg.acc_z = acc_z
232 |         msg.gyro_x = gyro_x
233 |         msg.gyro_y = gyro_y
234 |         msg.gyro_z = gyro_z
235 |         self._pub_chassis_imu.publish(msg)
236 |     
237 |     def _chassis_status_cb(self, status_info):
238 |         static_flag, up_hill, down_hill, on_slope, pick_up, slip_flag, impact_x, impact_y, impact_z, roll_over, hill_static = status_info
239 |         if self._log: print("chassis status: static_flag:{0}, up_hill:{1}, down_hill:{2}, on_slope:{3}, "
240 |           "pick_up:{4}, slip_flag:{5}, impact_x:{6}, impact_y:{7}, impact_z:{8}, roll_over:{9}, "
241 |           "hill_static:{10}".format(static_flag, up_hill, down_hill, on_slope, pick_up,
242 |                                    slip_flag, impact_x, impact_y, impact_z, roll_over, hill_static))
243 |         
244 |         msg = ChassisStatus()
245 |         msg.header.stamp = self.get_clock().now().to_msg()
246 |         msg.header.frame_id = "chassis_status_raw"
247 |         msg.is_static = bool(static_flag)
248 |         msg.up_hill = bool(up_hill)
249 |         msg.down_hill = bool(down_hill)
250 |         msg.on_slope = bool(on_slope)
251 |         msg.is_pick_up = bool(pick_up)
252 |         msg.slip = bool(slip_flag)
253 |         msg.impact_x = bool(impact_x)
254 |         msg.impact_y = bool(impact_y)
255 |         msg.impact_z = bool(impact_z)
256 |         msg.roll_over = bool(roll_over)
257 |         msg.hill = bool(hill_static)
258 |         self._pub_chassis_status.publish(msg)
259 | 
260 |     def _chassis_esc_cb(self, esc_data):
261 |         speed, angle, timestamp, state = esc_data
262 |         if self._log: print("chassis esc: speed:{0}, angle:{1}, timestamp:{2}, state:{3}".format(speed, angle, timestamp, state))
263 | 
264 |         msg = ChassisEsc()
265 |         msg.header.stamp = self.get_clock().now().to_msg()
266 |         msg.header.frame_id = "chassis_esc_raw"
267 |         # 前左
268 |         msg.front_right.angle = angle[0]
269 |         msg.front_right.speed = speed[0]
270 |         msg.front_right.time_stamp = timestamp[0]
271 |         msg.front_right.status = state[0]
272 |         # 前右
273 |         msg.front_left.angle = angle[1]
274 |         msg.front_left.speed = speed[1]
275 |         msg.front_left.time_stamp = timestamp[1]
276 |         msg.front_left.status = state[1]
277 |         # 后左
278 |         msg.back_left.angle = angle[2]
279 |         msg.back_left.speed = speed[2]
280 |         msg.back_left.time_stamp = timestamp[2]
281 |         msg.back_left.status = state[2]
282 |         # 后右
283 |         msg.back_right.angle = angle[3]
284 |         msg.back_right.speed = speed[3]
285 |         msg.back_right.time_stamp = timestamp[3]
286 |         msg.back_right.status = state[3]
287 |         self._pub_chassis_esc.publish(msg)
288 |     
289 |     def _arm_position_cb(self, position_data):
290 |         pos_x, pos_y = position_data
291 |         self._arm_postion = [pos_x, pos_y]
292 |         if self._log: self.get_logger().info("Arm: x = {0}, y = {1}".format(pos_x, pos_y))
293 | 
294 |         msg = RoboticArmPosition()
295 |         msg.header.stamp = self.get_clock().now().to_msg()
296 |         msg.header.frame_id = "robotic_arm_position"
297 |         msg.x = pos_x
298 |         msg.y = pos_y
299 |         msg.z = 0
300 |         self._pub_arm_position.publish(msg)
301 |     
302 |     def _gripper_status_cb(self, status_info):
303 |         self._gripper_status = status_info
304 |         if self._log: print("Gripper: {}".format(self._gripper_status))
305 | 
306 |         msg = GripperStatus()
307 |         msg.header.stamp = self.get_clock().now().to_msg()
308 |         msg.header.frame_id = "gripper_status_raw"
309 |         msg.status = self._gripper_status
310 |         msg.power = self._gripper_power
311 |         msg.action = self._gripper_command
312 |         self._pub_gripper_status.publish(msg)
313 | 
314 |     def _servo_value_cb(self, servo_data):
315 |         status, speed, angle = servo_data
316 |         if self._log: print("Servo: status: {}, speed: {}, angle: {}".format(status, speed, angle))
317 | 
318 |         msg = ServoStatus()
319 |         msg.header.stamp = self.get_clock().now().to_msg()
320 |         msg.servo_0.valid = bool(status[0])
321 |         msg.servo_0.value = angle[0]
322 |         msg.servo_0.speed = speed[0]
323 |         msg.servo_1.valid = bool(status[1])
324 |         msg.servo_1.value = angle[1]
325 |         msg.servo_1.speed = speed[1]
326 |         msg.servo_2.valid = bool(status[2])
327 |         msg.servo_2.value = angle[2]
328 |         msg.servo_2.speed = speed[2]
329 |         msg.servo_3.valid = bool(status[3])
330 |         msg.servo_3.value = angle[3]
331 |         msg.servo_3.speed = speed[3]
332 | 
333 |         self._pub_servo_value.publish(msg)
334 |         
335 |     def _start(self):
336 |         try:
337 |             while not self._robot.initialize(conn_type="sta", sn="3JKCJC400301RW"):
338 |                 self.get_logger().error("Initialize fail")
339 |             self._robot.set_robot_mode(mode=robot.FREE)
340 |             self.get_logger().info("RM init")
341 | 
342 |             self._arm = self._robot.robotic_arm
343 |             self._gripper = self._robot.gripper
344 |             self._servo = self._robot.servo
345 |             self._vision = self._robot.camera
346 |             self._chassis = self._robot.chassis
347 | 
348 |             self._init_sub()
349 | 
350 |             self._event.set()
351 | 
352 |             self.get_logger().info("Press START button to start")
353 |             while rclpy.ok():
354 |                 if self._action_activate: break
355 |             self.get_logger().info("Press BACK button to shut down")
356 |             while rclpy.ok():
357 |                 if not self._action_activate: break
358 |         except KeyboardInterrupt:
359 |             self.get_logger().error("Shut down")
360 |         finally:
361 |             while not self._gripper_action_end or not self._chassis_action_end or not self._arm_action_end: pass
362 |             self._unsub()
363 |             self.get_logger().info("Node End")
364 |             self.shutdown = True
365 | 
366 |     # core code
367 |     def _joy_cb(self, msg: XBox360Controller):
368 |         current_time = time.time()
369 | 
370 |         # data collection
371 |         if msg.button_start == 1 and self._action_flag:
372 |             action = Bool()
373 |             action.data = True
374 |             self._pub_action.publish(action)
375 |             self._action_activate = True
376 |             self._action_flag = False
377 |         if msg.button_back == 1 and not self._action_flag:
378 |             action = Bool()
379 |             action.data = False
380 |             self._pub_action.publish(action)
381 |             self._action_activate = False
382 |             self._action_flag = True
383 | 
384 |         # arm
385 |         if msg.hat_x != 0 or msg.hat_y != 0:
386 |             # k = 10
387 |             k = 20
388 |             if len(self._arm_deque) > 20: self._arm_deque.popleft()
389 |             self._arm_deque.append((current_time, self.arm_move, (k*msg.hat_y, k*msg.hat_x)))
390 |         # if msg.button_rb == 1: self._arm_deque.append((current_time, self.arm_moveto, (75, 145)))
391 |         # if msg.button_lb == 1: self._arm_deque.append((current_time, self.arm_moveto, (200, -50)))
392 |         
393 |         # gripper
394 |         if msg.button_a == 1: self._gripper_deque.append((current_time, self.gripper, ("release", 70)))
395 |         if msg.button_b == 1: self._gripper_deque.append((current_time, self.gripper, ("grab", 15)))
396 |         # if msg.button_x == 1: self._gripper_deque.append((current_time, self.gripper, ("pause", 50)))
397 |         
398 |         # chassis
399 |         if msg.button_rs == 1: k, k_w = 0.4, 40
400 |         # else: k, k_w = 0.1, 15
401 |         else: k, k_w = 0.1, 5
402 |         x_vel = -msg.axis_rs_y if abs(msg.axis_rs_y) >= self._axis_threshold else 0
403 |         y_vel = msg.axis_rs_x if abs(msg.axis_rs_x) >= self._axis_threshold else 0
404 |         delta = msg.axis_lt - msg.axis_rt
405 |         z_w = -delta if abs(delta) >= self._axis_threshold else 0
406 |         # z_w = 0
407 |         if x_vel == 0 and y_vel == 0 and z_w == 0 and not self._chassis_moving: return
408 |         if x_vel == 0 and y_vel == 0 and z_w == 0: self._chassis_moving = False
409 |         else: self._chassis_moving = True
410 |         self._chassis_deque.append((current_time, self.chassis_speed, (x_vel*k, y_vel*k, z_w*k_w)))
411 |     
412 |     def _gripper_action(self):
413 |         self._event.wait()
414 |         self.gripper("release", 70)
415 |         try:
416 |             while rclpy.ok() and not self._action_activate: pass
417 |             self.get_logger().info("Gripper action activate")
418 |             while rclpy.ok():
419 |                 if not self._action_activate: break
420 |                 if not self._gripper_deque: continue
421 | 
422 |                 current_time = time.time()
423 |                 while self._gripper_deque:
424 |                     stamp, func, args = self._gripper_deque.popleft()
425 |                     if stamp > current_time: break
426 |                 func(*args)
427 |         except KeyboardInterrupt:
428 |             self.get_logger().error("Gripper action shut down")
429 |         finally:
430 |             self.gripper("release", 70)
431 |             self.get_logger().info("Gripper action End")
432 |             self._gripper_action_end = True
433 |     
434 |     def _chassis_action(self):
435 |         self._event.wait()
436 |         try:
437 |             while rclpy.ok() and not self._action_activate: pass
438 |             self.get_logger().info("Chassis action activate")
439 |             while rclpy.ok():
440 |                 if not self._action_activate: break
441 |                 if not self._chassis_deque: continue
442 | 
443 |                 current_time = time.time()
444 |                 while self._chassis_deque:
445 |                     stamp, func, args = self._chassis_deque.popleft()
446 |                     if stamp > current_time: break
447 |                 func(*args)
448 |         except KeyboardInterrupt:
449 |             self.get_logger().error("Chassis action shut down")
450 |         finally:
451 |             self.get_logger().info("Chassis action End")
452 |             self._chassis_action_end = True
453 |     
454 |     def _arm_action(self):
455 |         self._event.wait()
456 |         # self.arm_moveto((100, 100))
457 |         # time.sleep(0.5)
458 |         self.arm_moveto(INIT_POS)
459 |         time.sleep(0.5)
460 |         try:
461 |             while rclpy.ok() and not self._action_activate: pass
462 |             self.get_logger().info("Arm action activate")
463 |             while rclpy.ok():
464 |                 if not self._action_activate: break
465 |                 if not self._arm_deque: continue
466 | 
467 |                 current_time = time.time()
468 |                 while self._arm_deque:
469 |                     stamp, func, position = self._arm_deque.popleft()
470 |                     if stamp > current_time: break
471 |                 func(position)
472 |         except KeyboardInterrupt:
473 |             self.get_logger().error("Arm action shut down")
474 |         finally:
475 |             self.arm_moveto(FINAL_POS)
476 |             self.get_logger().info("Arm action End")
477 |             self._arm_action_end = True
478 | 
479 | 
480 | def main(args=None):
481 |     rclpy.init(args=args)
482 | 
483 |     node = TeleOperationNode()
484 |     try:
485 |         while rclpy.ok():
486 |             rclpy.spin_once(node, timeout_sec=1)
487 |             if node.shutdown: break
488 |     except KeyboardInterrupt: pass
489 |     finally:
490 |         node.destroy_node()
491 |         rclpy.shutdown()
492 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_ros/setup.cfg:
--------------------------------------------------------------------------------
1 | [develop]
2 | script-dir=$base/lib/robomaster_ros
3 | [install]
4 | install-scripts=$base/lib/robomaster_ros
5 | 


--------------------------------------------------------------------------------
/robomaster_driver/robomaster_ros/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | import glob
 3 | 
 4 | package_name = 'robomaster_ros'
 5 | 
 6 | setup(
 7 |     name=package_name,
 8 |     version='0.0.0',
 9 |     packages=[package_name],
10 |     data_files=[
11 |         ('share/ament_index/resource_index/packages',
12 |             ['resource/' + package_name]),
13 |         ('share/' + package_name, ['package.xml']),
14 |         ('share/' + package_name + '/launch', glob.glob('launch/*.launch')),
15 |         ('share/' + package_name + '/launch', glob.glob('launch/*.launch.py')),
16 |         ('share/' + package_name + '/config', glob.glob('config/*'))
17 |     ],
18 |     install_requires=['setuptools', 'numpy', 'numpy-quaternion', 'pyyaml', 'robomaster'],
19 |     zip_safe=True,
20 |     maintainer='weiheng zhong',
21 |     maintainer_email='weiheng_z@bit.edu.cn',
22 |     description='TODO: Package description',
23 |     license='TODO: License declaration',
24 |     tests_require=['pytest'],
25 |     entry_points={
26 |         'console_scripts': [
27 |             'teleoperation = robomaster_ros.teleoperation:main',
28 |             'data_collection = robomaster_ros.data_collection:main',
29 |         ],
30 |     },
31 | )
32 | 


--------------------------------------------------------------------------------
/robomatrix_client/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 3 | <package format="3">
 4 |   <name>robomatrix_client</name>
 5 |   <version>0.0.0</version>
 6 |   <description>RoboMatrix client for task planning and managing</description>
 7 |   <maintainer email="jzian789521@163.com">jzian</maintainer>
 8 |   <license>TODO: License declaration</license>
 9 | 
10 |   <depend>rclpy</depend>
11 | 
12 |   <test_depend>ament_copyright</test_depend>
13 |   <test_depend>ament_flake8</test_depend>
14 |   <test_depend>ament_pep257</test_depend>
15 |   <test_depend>python3-pytest</test_depend>
16 | 
17 |   <export>
18 |     <build_type>ament_python</build_type>
19 |   </export>
20 | </package>
21 | 


--------------------------------------------------------------------------------
/robomatrix_client/resource/robomatrix_client:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/robomatrix_client/resource/robomatrix_client


--------------------------------------------------------------------------------
/robomatrix_client/robomatrix_client/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/robomatrix_client/robomatrix_client/__init__.py


--------------------------------------------------------------------------------
/robomatrix_client/robomatrix_client/agent_propmts.py:
--------------------------------------------------------------------------------
1 | RULE_PROMPT = 'In the skill library, <VLA> represents using the Vision-Language Model, and <shooting> represents using the shooting model. \
2 |                 In the final output, `<VLA><shooting>` must not be discarded.'


--------------------------------------------------------------------------------
/robomatrix_client/robomatrix_client/agent_skills.py:
--------------------------------------------------------------------------------
1 | SKILLS = ['<VLA>: move to <object1>',
2 |           '<VLA>: grasp <object1>',
3 |           '<VLA>: position <object1> Over the <object2>'
4 |           '<VLA>: release the <object1>',
5 |           '<VLA>: Open the drawer',
6 |           '<VLA>: Close the drawer',
7 |           '<shooting>: shooting <object1>',
8 |           ]


--------------------------------------------------------------------------------
/robomatrix_client/robomatrix_client/audio2text.py:
--------------------------------------------------------------------------------
 1 | from pydub import AudioSegment
 2 | import assemblyai as aai
 3 | import noisereduce as nr
 4 | import numpy as np
 5 | from scipy.io import wavfile
 6 | import os
 7 | 
 8 | 
 9 | class AudioProcessor:
10 |     def __init__(self, audio_file, api_key):
11 |         self.audio_file = audio_file
12 |         self.api_key = api_key
13 | 
14 |         # Get the directory of the input audio file
15 |         self.audio_dir = os.path.dirname(audio_file)
16 | 
17 |         # Define paths for intermediate and processed files
18 |         self.temp_file = os.path.join(self.audio_dir, "temp.wav")
19 |         self.reduced_file = os.path.join(self.audio_dir, "reduced_noise.wav")
20 |         self.enhanced_file = os.path.join(self.audio_dir, "enhanced_audio.wav")
21 | 
22 |     def process_audio(self):
23 |         # Convert the input audio file to WAV format
24 |         audio = AudioSegment.from_file(self.audio_file)
25 |         audio.export(self.temp_file, format="wav")
26 | 
27 |         # Read the WAV file and apply noise reduction
28 |         rate, data = wavfile.read(self.temp_file)
29 |         reduced_noise = nr.reduce_noise(y=data, sr=rate)
30 | 
31 |         # Save the noise-reduced audio
32 |         wavfile.write(self.reduced_file, rate, reduced_noise)
33 | 
34 |         # Increase the volume of the noise-reduced audio
35 |         cleaned_audio = AudioSegment.from_file(self.reduced_file)
36 |         louder_audio = cleaned_audio + 30  # Increase volume by 30dB
37 |         louder_audio.export(self.enhanced_file, format="wav")
38 | 
39 |         print(f"Processed audio saved at: {self.enhanced_file}")
40 | 
41 |     def transcribe_audio(self):
42 |         # Set AssemblyAI API key
43 |         aai.settings.api_key = self.api_key
44 |         transcriber = aai.Transcriber()
45 | 
46 |         # Transcribe the enhanced audio
47 |         transcript = transcriber.transcribe(self.enhanced_file)
48 |         return transcript.text
49 | 
50 | 
51 | # if __name__ == "__main__":
52 | #     # Input audio file path (can be relative or absolute)
53 | #     input_audio_file = "/home/jzian/ws/RM/test.wav"  # Replace with your audio file path
54 | #     api_key = "aed2e39f5b324388b68db696db3c9ad3"
55 | 
56 | #     processor = AudioProcessor(audio_file=input_audio_file, api_key=api_key)
57 | #     processor.process_audio()
58 | 
59 | #     # Transcribe the audio and print the transcript
60 | #     transcript_text = processor.transcribe_audio()
61 | #     print(transcript_text)
62 | 


--------------------------------------------------------------------------------
/robomatrix_client/robomatrix_client/robomatrix_client.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import yaml
  3 | import re
  4 | import json
  5 | import sys
  6 | import threading
  7 | import requests
  8 | import ast
  9 | import logging
 10 | from rich.logging import RichHandler
 11 | from time import time
 12 | from typing import List, Dict, Any
 13 | 
 14 | from PIL import Image
 15 | 
 16 | import openai
 17 | from langchain_core.prompts import ChatPromptTemplate
 18 | from langchain_core.output_parsers import StrOutputParser
 19 | from langchain_openai import ChatOpenAI, OpenAI
 20 | from langchain_core.messages import HumanMessage, SystemMessage
 21 | 
 22 | from gdino import GroundingDINOAPIWrapper, visualize
 23 | 
 24 | from .agent_propmts import RULE_PROMPT
 25 | from .agent_skills import SKILLS, META_SKILLS, TASK
 26 | from .audio2text import AudioProcessor
 27 | import ipdb  # noqa
 28 | 
 29 | # import ros2
 30 | import rclpy
 31 | from rclpy.node import Node
 32 | from sensor_msgs.msg import Image
 33 | from cv_bridge import CvBridge, CvBridgeError
 34 | from robomatrix_interface.srv import Vla, Audio
 35 | import requests
 36 | import subprocess
 37 | import cv2
 38 | timeout_duration = 10 * 60  # 10 minutes in seconds
 39 | 
 40 | # # 设置基本配置
 41 | # logging.basicConfig(
 42 | #     level=logging.DEBUG,  # 设置日志级别
 43 | #     format="%(asctime)s - %(levelname)s - %(message)s",  # 日志输出格式
 44 | #     datefmt="%Y-%m-%d %H:%M:%S"
 45 | # )
 46 | 
 47 | def check_network():
 48 |     """Check if the network is accessible by trying to access Google and Baidu"""
 49 |     try:
 50 |         response = requests.get("https://www.google.com", timeout=5)
 51 |         return response.status_code == 200
 52 |     except requests.ConnectionError:
 53 |         try:
 54 |             response = requests.get("https://www.baidu.com", timeout=5)
 55 |             return response.status_code == 200
 56 |         except requests.ConnectionError:
 57 |             return False
 58 | 
 59 | def run_proxy_command():
 60 |     """Run the proxy command to set up the network proxy"""
 61 |     command = f"eval $(curl -s http://deploy.i.brainpp.cn/httpproxy)"
 62 |     try:
 63 |         subprocess.call(command, shell=True, check=True)
 64 |         print("Proxy command executed successfully.")
 65 |     except subprocess.CalledProcessError as e:
 66 |         print(f"Failed to execute proxy command: {e}")
 67 | 
 68 | # 配置日志记录器
 69 | logging.basicConfig(
 70 |     level="INFO",
 71 |     format="%(message)s",
 72 |     datefmt="%Y-%m-%d %H:%M:%S",
 73 |     handlers=[RichHandler()]
 74 | )
 75 | 
 76 | logger = logging.getLogger("rich_logger")
 77 | 
 78 | root_path= os.getcwd()
 79 | cfg_path = os.path.join(root_path, "src", "config.yaml")
 80 | 
 81 | with open(cfg_path, 'r') as f:
 82 |     OPENAI_CONFIG = yaml.load(f, Loader=yaml.FullLoader)
 83 | 
 84 | Grounding_DINO_15_API = OPENAI_CONFIG['Grounding_DINO_1.5_API']
 85 | 
 86 | class RoboMatrix(Node):
 87 | 
 88 |     def __init__(self, api_type='ChatOpenAI'):
 89 |         super().__init__('rmm')
 90 |         # get image from ros2 img_topic
 91 |         self._sub_image = self.create_subscription(Image, "/camera/raw/image", self._image_cb, 1000)
 92 |         self._cv_bridge = CvBridge()
 93 |         self.cur_image = None
 94 |         
 95 |         # create client
 96 |         self.vla_client = self.create_client(Vla, 'vla_service')
 97 |         self.audio_client = self.create_client(Audio, 'audio_service')
 98 |         while not self.vla_client.wait_for_service(timeout_sec=1.0):
 99 |             self.get_logger().info('VLA Service not available, waiting again...')
100 |         while not self.audio_client.wait_for_service(timeout_sec=1.0):
101 |             self.get_logger().info('Audio Service not available, waiting again...')
102 |         self.vla_req = Vla.Request()
103 |         self.audio_req = Audio.Request()
104 | 
105 |         self.audio_path = "src/robomatrix_client/robomatrix_client/audios/task.wav"  # Replace with your audio file path
106 |         api_key = OPENAI_CONFIG['AssemblyAI_API']
107 | 
108 |         self.processor = AudioProcessor(audio_file=self.audio_path, api_key=api_key)
109 | 
110 |         # Init config
111 |         self.api_type = api_type
112 |         if api_type == 'OpenAI':
113 |             openai.api_key = os.environ["OPENAI_API_KEY"]
114 |             openai.base_url = os.environ["OPENAI_API_BASE"]
115 |             model = OpenAI(model_name="text-davinci-003", max_tokens=1024)
116 |         elif api_type == 'ChatOpenAI':
117 |             print('LLM: ChatOpenAI')
118 |             os.environ["OPENAI_API_KEY"] = OPENAI_CONFIG['OPENAI_KEY']
119 |             os.environ["OPENAI_API_BASE"] = OPENAI_CONFIG['OPENAI_API_BASE']
120 |             model = ChatOpenAI(
121 |                 temperature=0,
122 |                 model_name='gpt-4o-mini',
123 |                 # model_name='gpt-4o',
124 |                 # model_name='o1-mini',
125 |                 max_tokens=1024,
126 |                 # request_timeout=30
127 |                 timeout=30
128 |             )
129 |         elif api_type == 'STEP1':
130 |             print('LLM: STEP1')
131 |             os.environ["OPENAI_API_KEY"] = OPENAI_CONFIG['STEP1_API_KEY']
132 |             os.environ["OPENAI_API_BASE"] = OPENAI_CONFIG['STEP1_API_BASE']
133 |             model = ChatOpenAI(model_name="open_prod_model")
134 | 
135 |         # Task system template
136 |         system_template = f"""You are an intelligent mobile robot, The skills you have are {SKILLS}. \
137 |                             {RULE_PROMPT} \
138 |                             The current environment is an office setting. \
139 |                             Based on the tasks I provide, help me break down the tasks into multiple actionable steps. \
140 |                             Let's think step by step. Here is an example. "Place the red cola can in the trash bin." Once you made a final result, output it in the following format:\n
141 |                     ```
142 |                         "steps_1":"<VLA>: Move to the red cola can",
143 |                         "steps_2":"<VLA>: Grasp the red cola can",
144 |                         "steps_3":"<VLA>: Move to the trash bin",
145 |                         "steps_4":"<VLA>: Position the red cola can over the trash bin",
146 |                         "steps_5":"<VLA>: Release the red cola can"
147 |                     ```\n
148 |                     """
149 |         system_template_obj = "Extract the object in the sentence. Example: 'Grasp the red cola can', output the 'cola can' without any other word."
150 | 
151 |         print(system_template)
152 |         prompt_template = ChatPromptTemplate.from_messages([
153 |             ('system', system_template),
154 |             ('user', '{text}')
155 |         ])
156 | 
157 |         prompt_obj = ChatPromptTemplate.from_messages([
158 |             ('system', system_template_obj),
159 |             ('user', '{text}')
160 |         ])
161 |         # 3. Create parser
162 |         parser = StrOutputParser()
163 |         # 4. Create chain  
164 |         self.chain = prompt_template | model | parser # task planning
165 |         self.chain_obj = prompt_obj | model | parser  # split obj for gdino detection
166 |     
167 |     
168 |     def _image_cb(self, msg: Image):
169 | 
170 |         self.cur_image = self._cv_bridge.imgmsg_to_cv2(msg, 'passthrough')
171 | 
172 |     def split_task(self, text):
173 |         print("Start split task")
174 |         action = self.chain.invoke({"text": text})
175 |         print(action)
176 |         return action
177 | 
178 |     def get_gdino(self, text=''):
179 |         gdino = GroundingDINOAPIWrapper(Grounding_DINO_15_API)
180 |         if self.cur_image is None:
181 |             print('Can not get img!')
182 |             return None
183 |         prompts = dict(image=self.cur_image, prompt=text)
184 |         results = gdino.inference(prompts)
185 | 
186 |         return results
187 | 
188 |     def vis_gdino(self, img_path, results, save_path):
189 |         # now visualize the results
190 |         image_pil = Image.open(img_path)
191 |         image_pil = visualize(image_pil, results)
192 |         # dump the image to the disk
193 |         if save_path is None:
194 |             save_path = img_path.replace('.jpg', '_output.jpg')
195 |         image_pil.save(save_path)
196 |         print(f'The output image has been saved to {save_path}')
197 | 
198 | 
199 | 
200 |     def format_output(self, result, type='json'):
201 |         # ipdb.set_trace()
202 |         if type == 'json':
203 |             try:
204 |                 if "`" in result:
205 |                     # result = result.replace("`", "")
206 |                     result = re.findall(r'```(.*?)```', result, re.DOTALL)
207 |                     json_str = "{" + result[0] + "}"
208 |                     result = json.loads(json_str)
209 |                 # ipdb.set_trace()
210 |                 return result
211 |             except ValueError:
212 |                 # ipdb.set_trace()
213 |                 logging.error("This is an incorrect json format.")
214 |                 return {}
215 |     
216 |     def core_loop(self):
217 |         
218 |         while True:
219 |             # ---- call sever ---- #
220 |             self.audio_req.path = self.audio_path
221 |             res = self.audio_client.call_async(self.audio_req)
222 |             rclpy.spin_until_future_complete(self, res)
223 |             # Transcribe the audio
224 |             self.processor.process_audio()
225 |             input_text = self.processor.transcribe_audio()
226 |             print("Identify task: ", input_text)
227 | 
228 |             # print("User: ",  end="")
229 |             # input_text = input()
230 |             # if input_text == '':
231 |             #     # input_text = "Place the red cola can into the white box."
232 |             #     input_text = "Put the red coke can in the open drawer and then close the drawer"
233 |             #     print(input_text)
234 | 
235 |             timer = threading.Timer(timeout_duration, exit_program)
236 |             timer.start()
237 | 
238 |             task_steps = self.split_task(input_text)
239 |             print("Task steps: \n", task_steps)
240 |             task_steps_dict = self.format_output(task_steps)
241 |             print("Task steps dict: \n", task_steps_dict)
242 | 
243 |             # import pdb;pdb.set_trace()
244 |             _, obj_prompt = task_steps_dict['steps_1'].split(':')
245 |             obj = self.chain_obj.invoke({"text": obj_prompt.strip()})
246 |             print(obj)
247 |             info = self.get_gdino(obj)
248 |             confidence = any([score > 0.5 for score in info['scores']])
249 |             if not confidence:
250 |                 print(f"Did not observe the {obj}! Task reset!")
251 |                 continue
252 | 
253 |             for i,item in enumerate(task_steps_dict.values()):
254 |                 task, prompt = item.split(':')
255 |                 print(f"Step {i+1}: {item}")
256 |                 print(f"task: {task}", f"prompt: {prompt.strip()}")
257 | 
258 |                 self.vla_req.task = TASK[task]
259 |                 self.vla_req.msg = prompt.strip()
260 | 
261 |                 # ---- call sever ---- #
262 |                 self.future = self.vla_client.call_async(self.vla_req)
263 |                 rclpy.spin_until_future_complete(self, self.future)
264 |                 #         -----         #
265 |             print("Processed input and reset timer.")
266 |               
267 | 
268 | 
269 | # if no input for 10 minutes, exit the program
270 | def exit_program():
271 |     print(f"No new input for {timeout_duration} seconds. Exiting...")
272 |     sys.exit(0)
273 |     # os._exit(0)
274 | 
275 | def main():
276 |     rclpy.init(args=None)
277 |     # Check if the network is accessible
278 |     if check_network():
279 |         print("Network is working")
280 |     else:
281 |         print("Network connection failed")
282 |         run_proxy_command()
283 | 
284 |     logging.info("Init RoboMatrix...")
285 |     robot_matrix = RoboMatrix()
286 |     
287 |     # setting up the timer
288 |     timer = threading.Timer(timeout_duration, exit_program)
289 | 
290 |     # start timer
291 |     timer.start()
292 |     loop = threading.Thread(target=robot_matrix.core_loop)
293 |     loop.start()
294 | 
295 | if __name__=="__main__":
296 |     main()


--------------------------------------------------------------------------------
/robomatrix_client/setup.cfg:
--------------------------------------------------------------------------------
1 | [develop]
2 | script-dir=$base/lib/robomatrix_client
3 | [install]
4 | install-scripts=$base/lib/robomatrix_client
5 | 


--------------------------------------------------------------------------------
/robomatrix_client/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | package_name = 'robomatrix_client'
 4 | 
 5 | setup(
 6 |     name=package_name,
 7 |     version='0.0.0',
 8 |     packages=[package_name],
 9 |     data_files=[
10 |         ('share/ament_index/resource_index/packages',
11 |             ['resource/' + package_name]),
12 |         ('share/' + package_name, ['package.xml']),
13 |     ],
14 |     install_requires=['setuptools'],
15 |     zip_safe=True,
16 |     maintainer='jzian',
17 |     maintainer_email='jiangzhou@megvii.com',
18 |     description='TODO: Package description',
19 |     license='TODO: License declaration',
20 |     tests_require=['pytest'],
21 |     entry_points={
22 |         'console_scripts': [
23 |             "robomatrix_client = robomatrix_client.main:robomatrix_client"
24 |         ],
25 |     },
26 | )
27 | 


--------------------------------------------------------------------------------
/robomatrix_interface/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.8)
 2 | project(robomatrix_interface)
 3 | 
 4 | if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 5 |   add_compile_options(-Wall -Wextra -Wpedantic)
 6 | endif()
 7 | 
 8 | # cmake_policy(SET CMP0148 NEW)
 9 | 
10 | # find dependencies
11 | find_package(ament_cmake REQUIRED)
12 | # uncomment the following section in order to fill in
13 | # further dependencies manually.
14 | # find_package(<dependency> REQUIRED)
15 | 
16 | if(BUILD_TESTING)
17 |   find_package(ament_lint_auto REQUIRED)
18 |   # the following line skips the linter which checks for copyrights
19 |   # comment the line when a copyright and license is added to all source files
20 |   set(ament_cmake_copyright_FOUND TRUE)
21 |   # the following line skips cpplint (only works in a git repo)
22 |   # comment the line when this package is in a git repo and when
23 |   # a copyright and license is added to all source files
24 |   set(ament_cmake_cpplint_FOUND TRUE)
25 |   ament_lint_auto_find_test_dependencies()
26 | endif()
27 | 
28 | find_package(rosidl_default_generators REQUIRED)
29 | 
30 | ################################### rm_srv
31 | 
32 | set(MODULE_NAME robomatrix_srv)
33 | 
34 | file(GLOB ${MODULE_NAME}_FILES "${CMAKE_CURRENT_SOURCE_DIR}/srv/*.srv")
35 | 
36 | set(${MODULE_NAME}_FILE_NAMES)
37 | 
38 | foreach(file ${${MODULE_NAME}_FILES})
39 |     get_filename_component(file_name ${file} NAME)
40 |     list(APPEND ${MODULE_NAME}_FILE_NAMES "srv/${file_name}")
41 | endforeach()
42 | 
43 | ################################ generate the interface
44 | ament_export_dependencies(std_msgs)
45 | ament_export_dependencies(rosidl_default_generators)
46 | rosidl_generate_interfaces(${PROJECT_NAME}
47 |       ${robomatrix_srv_FILE_NAMES}
48 |     #   DEPENDENCIES geometry_msgs 
49 |     #   DEPENDENCIES std_msgs
50 |     )
51 | ament_package()
52 | 


--------------------------------------------------------------------------------
/robomatrix_interface/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 3 | <package format="3">
 4 |   <name>robomatrix_interface</name>
 5 |   <version>1.0.0</version>
 6 |   <description>The Ros2 Interfaces for RM</description>
 7 |   <maintainer email="jzian789521@163.com">blackbird</maintainer>
 8 |   <license>BBLAB</license>
 9 | 
10 |   <buildtool_depend>ament_cmake</buildtool_depend>
11 |   <test_depend>ament_lint_auto</test_depend>
12 |   <test_depend>ament_lint_common</test_depend>
13 | 
14 |   <!-- interface-action dependent -->
15 |   <buildtool_depend>rosidl_default_generators</buildtool_depend>
16 |   <depend>action_msgs</depend>
17 |   <!-- <member_of_group>rosidl_interface_packages</member_of_group> -->
18 |   
19 |   <!-- interface-msg dependent -->
20 |   <!-- <buildtool_depend>rosidl_default_generators</buildtool_depend> -->
21 |   <exec_depend>rosidl_default_runtime</exec_depend>
22 |   <member_of_group>rosidl_interface_packages</member_of_group>  
23 | 
24 |   <build_depend>rosidl_default_generators</build_depend>
25 | 
26 | 
27 |   <export>
28 |     <build_type>ament_cmake</build_type>
29 |   </export>
30 | </package>
31 | 


--------------------------------------------------------------------------------
/robomatrix_interface/srv/Audio.srv:
--------------------------------------------------------------------------------
1 | string path
2 | ---
3 | bool success
4 | 


--------------------------------------------------------------------------------
/robomatrix_interface/srv/Vla.srv:
--------------------------------------------------------------------------------
1 | int32 task
2 | string msg
3 | ---
4 | bool success
5 | string msg


--------------------------------------------------------------------------------
/robomatrix_server/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 3 | <package format="3">
 4 |   <name>robomatrix_server</name>
 5 |   <version>0.0.0</version>
 6 |   <description>TODO: Package description</description>
 7 |   <maintainer email="jzian789521@163.com">jzian</maintainer>
 8 |   <license>TODO: License declaration</license>
 9 | 
10 |   <exec_depend>example_interfaces</exec_depend>
11 |   <exec_depend>std_msgs</exec_depend>
12 |   <exec_depend>rclpy</exec_depend>
13 |   <exec_depend>robomatrix_interface</exec_depend>
14 |   <exec_depend>pysoem</exec_depend>
15 |   <exec_depend>py_trees</exec_depend>
16 |   
17 |   <build_depend>rosidl_default_generators</build_depend>
18 |   <exec_depend>rosidl_default_runtime</exec_depend>
19 |   
20 |   
21 |   <test_depend>ament_copyright</test_depend>
22 |   <test_depend>ament_flake8</test_depend>
23 |   <test_depend>ament_pep257</test_depend>
24 |   <test_depend>python3-pytest</test_depend>
25 | 
26 | 
27 |   <member_of_group>rosidl_interface_packages</member_of_group>
28 |   
29 |   <export>
30 |     <build_type>ament_python</build_type>
31 |   </export>
32 | </package>
33 | 


--------------------------------------------------------------------------------
/robomatrix_server/resource/robomatrix_server:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/robomatrix_server/resource/robomatrix_server


--------------------------------------------------------------------------------
/robomatrix_server/robomatrix_server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/robomatrix_server/robomatrix_server/__init__.py


--------------------------------------------------------------------------------
/robomatrix_server/robomatrix_server/resource/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/robomatrix_server/robomatrix_server/resource/__init__.py


--------------------------------------------------------------------------------
/robomatrix_server/robomatrix_server/resource/funcs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import time
  4 | import cv2
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | from .utils import HOME_PATH, load_json, save_json, load_yaml, save_yaml, log_info, log_warn, log_error, extract_last_number, save_video_v2
  8 | from moviepy.editor import VideoFileClip
  9 | from concurrent.futures import ThreadPoolExecutor, as_completed
 10 | import uuid
 11 | from robomaster import robot
 12 | from collections import deque
 13 | from threading import Thread
 14 | import requests
 15 | from sensor_msgs.msg import Image
 16 | from cv_bridge import CvBridge
 17 | 
 18 | class BaseTokenizeDataset():
 19 | 
 20 |     def __init__(self, dataset_directory) -> None:
 21 |         self._dataset_dir = dataset_directory
 22 |     
 23 |     def check_dataset(self):
 24 |         if not os.path.exists(self._dataset_dir): return False
 25 | 
 26 |         yaml_file = "dataset_information.yaml"
 27 |         if yaml_file not in os.listdir(self._dataset_dir): return False
 28 | 
 29 |         data = load_yaml(os.path.join(self._dataset_dir, yaml_file))
 30 |         
 31 |         self._task_name = data["dataset_name"]
 32 |         self._episode_info = data["episode_info"]
 33 |         self._episode_names = sorted(list(data["episode_info"].keys()), key=lambda name: extract_last_number(name, ""))
 34 |         return True
 35 | 
 36 |     def check_distribution(self):
 37 |         if not os.path.exists(self._dataset_dir): return False
 38 | 
 39 |         json_file = "data_distribution.json"
 40 |         if json_file not in os.listdir(self._dataset_dir): return False
 41 | 
 42 |         data = load_json(os.path.join(self._dataset_dir, json_file))
 43 | 
 44 |         self._data_distribution = data
 45 |         return True
 46 | 
 47 | 
 48 | class Decoder():
 49 | 
 50 |     def __init__(self, name, data_distribution, vocab_size, log=False) -> None:
 51 |         self._name = name
 52 |         self._data_distribution = data_distribution
 53 |         self._vocab_size = vocab_size
 54 | 
 55 |         self._min_value = data_distribution[0]
 56 |         self._max_value = data_distribution[1]
 57 | 
 58 |         self._log = log
 59 |     
 60 |     def decode(self, token: str):
 61 |         if not 0 <= int(token) <= 255:
 62 |             if self._log: print(f"[Decoder {self._name}] Wrong token: {token}")
 63 |             return None
 64 |         tile = np.linspace(self._min_value, self._max_value, self._vocab_size)[:, None]
 65 |         value = tile[int(token)][0]
 66 |         if self._log: print(f"[Decoder {self._name}] token {token} -> value {value}")
 67 |         return value
 68 | 
 69 | 
 70 | class BaseRobotInference():
 71 | 
 72 |     def __init__(self, 
 73 |                 node,
 74 |                 #  sn="159CK9W0060AHS", 
 75 |                 sn="3JKCJC400301RW",
 76 |                 dataset_dir=os.path.join(HOME_PATH, "RoboMatrixDatasetsDIY"),
 77 |                 task_name="",
 78 |                 url="http://localhost:7893/process_frame",
 79 |                 arm_init_pose=(180, -70),
 80 |                 ) -> None:
 81 |         
 82 |         self.node = node
 83 |         # self.publisher_ = self.node.create_publisher(Image, 'image_topic', 10)
 84 |         self.publisher_ = self.node.create_publisher(Image, "/camera/raw/image", 3) # image publisher
 85 |         self.bridge = CvBridge()
 86 | 
 87 |         self._url = url
 88 |         log_info(f"[BaseRobotInference] URL: {self._url}")
 89 |         
 90 |         self._robot = robot.Robot()
 91 |         self._robot_sn = sn
 92 |         log_info(f"[BaseRobotInference] Robot SN: {self._robot_sn}")
 93 |         self._robot_ok = False
 94 | 
 95 |         self.arm_init_pose = arm_init_pose
 96 |         log_info(f"[BaseRobotInference] Arm init pose: {self.arm_init_pose}")
 97 | 
 98 |         distribution_file = os.path.join(dataset_dir, task_name, "data_distribution.json")
 99 |         self._distribution = load_json(distribution_file)
100 |         log_info(f"[BaseRobotInference] Distribution file: {distribution_file}")
101 |         
102 |         self._range = {
103 |             "arm":
104 |             {
105 |                 "x": self._distribution["arm"][0],
106 |                 "y": self._distribution["arm"][1],
107 |             },
108 |             "chassis":
109 |             {
110 |                 "d_x": self._distribution["chassis"][0],
111 |                 "d_y": self._distribution["chassis"][1],
112 |                 "d_yaw": self._distribution["chassis"][2],
113 |             }
114 |         }
115 |         log_info(f"[BaseRobotInference] Distribution info:")
116 |         for s in self._range["chassis"]:
117 |             s_min = self._range["chassis"][s][0]
118 |             s_max = self._range["chassis"][s][1]
119 |             log_info(f"Chassis {s}: {s_min} ~ {s_max}")
120 |         for s in self._range["arm"]:
121 |             s_min = self._range["arm"][s][0]
122 |             s_max = self._range["arm"][s][1]
123 |             log_info(f"Arm {s}: {s_min} ~ {s_max}")
124 | 
125 |         self._arm_position_x_decoder = Decoder("arm_x", self._range["arm"]["x"], 256)
126 |         self._arm_position_y_decoder = Decoder("arm_y", self._range["arm"]["y"], 256)
127 |         self._chassis_position_x_decoder = Decoder("chassis_d_x", self._range["chassis"]["d_x"], 256)
128 |         self._chassis_position_y_decoder = Decoder("chassis_d_y", self._range["chassis"]["d_y"], 256)
129 |         self._chassis_position_yaw_decoder = Decoder("chassis_d_yaw", self._range["chassis"]["d_yaw"], 256)
130 | 
131 |         self._rate = 30
132 |         self._log = False
133 | 
134 |         self._images = []
135 | 
136 |         self.init_robot()
137 | 
138 |     def init_robot(self):
139 |         while not self._robot.initialize(conn_type="sta", sn=self._robot_sn):
140 |             log_error("[BaseRobotInference] Initialize robot fail")
141 | 
142 |         freq = 50
143 | 
144 |         # 末端
145 |         self._arm = self._robot.robotic_arm
146 | 
147 |         self._arm.sub_position(freq, self._update_arm_position)
148 | 
149 |         self._arm_position_deque = deque()
150 | 
151 |         self.arm_moveto(self.arm_init_pose)
152 |         
153 |         # 关节
154 |         self._servo = self._robot.servo
155 | 
156 |         self._servo.sub_servo_info(freq, self._update_servo)
157 | 
158 |         self._servo_value_deque = deque()
159 |         
160 |         # 夹爪
161 |         self._gripper = self._robot.gripper
162 | 
163 |         self._gripper.sub_status(freq, self._update_gripper)
164 | 
165 |         self._gripper_status_deque = deque()
166 | 
167 |         self._gripper_status = None
168 |         self._gripper_command = "None"
169 |         self._gripper_power = None
170 | 
171 |         self.gripper("release", 70)
172 |         
173 |         # 相机
174 |         self._vision = self._robot.camera
175 |           
176 |         self._vision.start_video_stream(display=False)
177 | 
178 |         self._image_deque = deque()
179 |         self._current_image_data = None
180 | 
181 |         self._cam_thread = Thread(target=self._update_image)
182 |         self._cam_thread.start()
183 |         
184 |         # 底盘
185 |         self._chassis = self._robot.chassis
186 |         # 姿态
187 |         self._chassis.sub_attitude(freq, self._update_chassis_attitude)
188 |         self._chassis_attitute_deque = deque()
189 |         # 位置
190 |         self._chassis.sub_position(0, freq, self._update_chassis_position)
191 |         self._chassis_position_deque = deque()
192 |         # IMU
193 |         self._chassis.sub_imu(freq, self._update_chassis_imu)
194 |         self._chassis_imu_deque = deque()
195 |         # 状态
196 |         self._chassis.sub_status(freq, self._update_chassis_status)
197 |         self._chassis_status_deque = deque()
198 |         # 电调
199 |         self._chassis.sub_esc(freq, self._update_chassis_esc)
200 |         self._chassis_esc_data_deque = deque()
201 | 
202 |         self._robot_ok = True
203 |         self._robot.led.set_led(comp="all", r=0, g=255, b=0, effect='breath', freq=1)
204 |         log_info("[BaseRobotInference] Initialize robot success")
205 |         return self._robot_ok
206 | 
207 |     def start_robot(self):
208 |         if not self._robot_ok: return False
209 | 
210 |         self._core_thread = Thread(target=self._core_loop)
211 |         self._core_thread.start()
212 | 
213 |     def close_robot(self):
214 |         if not self._robot_ok: return
215 |         self._robot_ok = False
216 |         
217 |         self._arm.unsub_position()
218 |         self._gripper.unsub_status()
219 |         self._servo.unsub_servo_info()
220 | 
221 |         self._vision.stop_video_stream()
222 | 
223 |         self._chassis.unsub_attitude()
224 |         self._chassis.unsub_position()
225 |         self._chassis.unsub_imu()
226 |         self._chassis.unsub_status()
227 |         self._chassis.unsub_esc()
228 | 
229 |         self._robot.close()
230 |     
231 |     def _update_arm_position(self, position_data):
232 |         if len(self._arm_position_deque) >= 100: self._arm_position_deque.popleft()
233 |         
234 |         pos_x, pos_y = position_data
235 |         time_stamp = time.time()
236 |         data = (time_stamp, [pos_x, pos_y])
237 | 
238 |         self._arm_x, self._arm_y = pos_x, pos_y
239 | 
240 |         self._arm_position_deque.append(data)
241 | 
242 |         if self._log: print("End position: x: {0}, y: {1}".format(pos_x, pos_y))
243 | 
244 |     def _update_servo(self, servo_data):
245 |         if len(self._servo_value_deque) >= 100: self._servo_value_deque.popleft()
246 |         
247 |         time_stamp = time.time()
248 |         status, speeds, angles = servo_data
249 |         data = (time_stamp, angles[:2], speeds[:2])
250 |         
251 |         self._servo_value_deque.append(data)
252 | 
253 |         if self._log: print("Servo: status: {}, speed: {}, angle: {}".format(status, speeds, angles))
254 |     
255 |     def _update_gripper(self, status_info):
256 |         if len(self._gripper_status_deque) >= 100: self._gripper_status_deque.popleft()
257 |         
258 |         time_stamp = time.time()
259 |         self._gripper_status = status_info
260 |         data = (time_stamp, self._gripper_status, self._gripper_power, self._gripper_command)
261 |         
262 |         self._gripper_status_deque.append(data)
263 | 
264 |         if self._log: print("Gripper: {}".format(self._gripper_status))
265 | 
266 |     def _update_image(self):
267 |         while True:
268 |             try:
269 |                 if len(self._image_deque) >= 100: self._image_deque.popleft()
270 |                 time_stamp = time.time()
271 |                 
272 |                 image = self._vision.read_cv2_image(timeout=0.1, strategy='newest')
273 | 
274 |             
275 |                 data = (time_stamp, image)
276 |                 # self._images.append(image)
277 |                 self._images.append(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
278 | 
279 |                 self._image_deque.append(data)
280 |                 self._current_image_data = data
281 |             except:
282 |                 continue
283 |             ros_image = self.bridge.cv2_to_imgmsg(image, encoding='bgr8')
284 |             ros_image.header.stamp = self.node.get_clock().now().to_msg()
285 |             # print(ros_image.header.stamp)
286 |             ros_image.header.frame_id = "camera_link_optical_frame"
287 |             self.publisher_.publish(ros_image)
288 | 
289 |     def _update_chassis_attitude(self, attitude_data):
290 |         if len(self._chassis_attitute_deque) >= 100: self._chassis_attitute_deque.popleft()
291 |         
292 |         yaw, pitch, roll = attitude_data
293 | 
294 |         time_stamp = time.time()
295 |         attitute = [yaw, pitch, roll]
296 |         data = (time_stamp, attitute)
297 |         
298 |         self._chassis_attitute_deque.append(data)
299 | 
300 |         if self._log: print("chassis attitude: yaw:{0}, pitch:{1}, roll:{2} ".format(yaw, pitch, roll))
301 | 
302 |     def _update_chassis_position(self, position_data):
303 |         if len(self._chassis_position_deque) >= 100: self._chassis_position_deque.popleft()
304 |         
305 |         x, y, z = position_data
306 |         time_stamp = time.time()
307 |         position = [x, y]
308 |         data = (time_stamp, position)
309 | 
310 |         self._chassis_position_deque.append(data)
311 |         
312 |         if self._log: print("chassis position: x:{0}, y:{1}, z:{2}".format(x, y, z))
313 |     
314 |     def _update_chassis_imu(self, imu_data):
315 |         if len(self._chassis_imu_deque) >= 100: self._chassis_imu_deque.popleft()
316 |         
317 |         acc_x, acc_y, acc_z, gyro_x, gyro_y, gyro_z = imu_data
318 | 
319 |         time_stamp = time.time()
320 |         linear_acceleration = [acc_x, acc_y, acc_z]
321 |         angular_velocity = [gyro_x, gyro_y, gyro_z]
322 |         data = (time_stamp, linear_acceleration, angular_velocity)
323 | 
324 |         self._chassis_imu_deque.append(data)
325 |         
326 |         if self._log: print("chassis imu: acc_x:{0}, acc_y:{1}, acc_z:{2}, gyro_x:{3}, gyro_y:{4}, gyro_z:{5}".format(acc_x, acc_y, acc_z, gyro_x, gyro_y, gyro_z))
327 |     
328 |     def _update_chassis_status(self, status_info):
329 |         static_flag, up_hill, down_hill, on_slope, pick_up, slip_flag, impact_x, impact_y, impact_z, roll_over, hill_static = status_info
330 |         
331 |         if self._log: print("chassis status: static_flag:{0}, up_hill:{1}, down_hill:{2}, on_slope:{3}, "
332 |           "pick_up:{4}, slip_flag:{5}, impact_x:{6}, impact_y:{7}, impact_z:{8}, roll_over:{9}, "
333 |           "hill_static:{10}".format(static_flag, up_hill, down_hill, on_slope, pick_up,
334 |                                    slip_flag, impact_x, impact_y, impact_z, roll_over, hill_static))
335 | 
336 |     def _update_chassis_esc(self, esc_data):
337 |         if len(self._chassis_esc_data_deque) >= 100: self._chassis_esc_data_deque.popleft()
338 |         
339 |         speed, angle, timestamp, state = esc_data
340 | 
341 |         time_stamp = time.time()
342 |         angles = list(angle)
343 |         speeds = list(speed)
344 |         data = (time_stamp, angles, speeds)
345 | 
346 |         self._chassis_esc_data_deque.append(data)
347 | 
348 |         if self._log: print("chassis esc: speed:{0}, angle:{1}, timestamp:{2}, state:{3}".format(speed, angle, timestamp, state))
349 | 
350 |     def arm_moveto(self, position):
351 |         x, y = position
352 |         self._arm.moveto(x, y).wait_for_completed(timeout=1)
353 | 
354 |     def gripper(self, action, power=50):
355 |         if power not in [power for power in range(1,100)]: return False
356 |         
357 |         self._gripper_command = action
358 |         self._gripper_power = power
359 |         if action == "grab": self._gripper.close(power)
360 |         elif action == "release": self._gripper.open(power)
361 |         elif action == "pause": self._gripper.pause()
362 |         else: return
363 |     
364 |     def chassis_move(self, x_dis, y_dis, z_angle, xy_vel=0.5, z_w=30):
365 |         self._chassis.move(x_dis, y_dis, z_angle, xy_vel, z_w).wait_for_completed(timeout=None)
366 |     
367 |     def get_image(self):
368 |         if not self._current_image_data: return False
369 |         _, image = self._current_image_data
370 |         return image
371 | 
372 |     def get_llava_output(self, image, prompt, temp=1.0):
373 |         file_path = 'ep.jpg'
374 |         cv2.imwrite(file_path, image)
375 |         with open(file_path, 'rb') as image_file:
376 |             files = {'image': (file_path, image_file, 'image/jpg')}
377 |             
378 |             text = prompt
379 |             data={"text": text,
380 |                   "temperature": temp}
381 |             
382 |             response = requests.post(self._url, files=files, data=data)
383 |             time.sleep(0.3)
384 |         return response.json()['response']
385 |         
386 |     def _core_loop(self):
387 |         pass
388 | 
389 | 
390 | if __name__ == "__main__":
391 |     DATASET_DIR = os.path.join(HOME_PATH, "RoboMatrixDatasets", "move_to_object")
392 | 
393 |     # transfer_video_v2(os.path.join(DATASET_DIR, "videos"))
394 | 


--------------------------------------------------------------------------------
/robomatrix_server/robomatrix_server/resource/inference_multistage.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | 
  4 | from .utils import HOME_PATH, log_info, log_warn, save_video_v2
  5 | from .funcs import BaseRobotInference
  6 | 
  7 | LED = {
  8 |     0: (255, 0, 255),
  9 |     1: (0, 255, 255),
 10 | }
 11 | 
 12 | 
 13 | class RobotInference(BaseRobotInference):
 14 | 
 15 |     def __init__(self, node, dataset_dir, task_name, url="http://localhost:7893/process_frame", debug=True) -> None:
 16 |         super().__init__(node, task_name=task_name, dataset_dir=dataset_dir, url=url)
 17 |         self.debug = debug
 18 |         
 19 |     def exec_vla_task(self, task, prompt):
 20 |         frame_count, fail_count= 0, 0
 21 |         try:
 22 |             log_warn(f"Press ENTER to start.")
 23 |             input()
 24 | 
 25 |             current_stage = 1
 26 |             total = 0
 27 |             temp_init = 1.0
 28 |             temp_high = 1.1
 29 |             self._robot.led.set_led(comp="all", r=LED[current_stage%2][0], g=LED[current_stage%2][1], b=LED[current_stage%2][2], effect='on', freq=1)
 30 |             while self._robot_ok:
 31 |                 image = self.get_image()
 32 | 
 33 |                 if not image.any():
 34 |                     fail_count += 1
 35 |                     continue
 36 |                 frame_count += 1
 37 |                 print(f"Get a frame {frame_count}")
 38 |                 
 39 |                 print(f"Prompt: {prompt}")
 40 | 
 41 |                 # temperature
 42 |                 temp = temp_init if total < 5 else temp_high
 43 | 
 44 |                 token = self.get_llava_output(image, prompt, temp=temp)
 45 |                 print(f"LLaVa response: {token}")
 46 | 
 47 |                 # decode
 48 |                 tokens = token.split()
 49 |                 flag = False
 50 |                 if len(tokens) != 7: continue
 51 |                 for token in tokens:
 52 |                     if not token.isdigit():
 53 |                         flag = True
 54 |                         break
 55 |                 if flag: continue
 56 | 
 57 |                 # stop
 58 |                 stage_end = tokens[0]
 59 |                 if stage_end == "1" \
 60 |                     or ("release" in prompt.lower() and total > 5) \
 61 |                         or (("grasp" in prompt.lower() or "position" in prompt.lower()) and total > 10) \
 62 |                             or ("move" in prompt.lower() and total > 15):
 63 |                     
 64 |                     log_info(f"Current stage {current_stage} is end.")
 65 |                     current_stage += 1
 66 |                     total = 0
 67 |                     self._robot.led.set_led(comp="all", r=LED[current_stage%2][0], g=LED[current_stage%2][1], b=LED[current_stage%2][2], effect='on', freq=1)
 68 |                     return True
 69 | 
 70 |                 # chassis
 71 |                 scale = 10
 72 |                 
 73 |                 d_x = self._chassis_position_x_decoder.decode(tokens[1])
 74 |                 if abs(d_x) < 1: d_x = 0
 75 |                 elif abs(d_x) < 10: d_x = d_x * scale / 1000
 76 |                 else: d_x /= 1000
 77 |                 
 78 |                 d_y = self._chassis_position_y_decoder.decode(tokens[2])
 79 |                 if abs(d_y) < 1: d_y = 0
 80 |                 elif abs(d_y) < 10: d_y = d_y * scale / 1000
 81 |                 else: d_y /= 1000
 82 |                 
 83 |                 d_yaw = self._chassis_position_yaw_decoder.decode(tokens[3])
 84 |                 if abs(d_yaw) < 1: d_yaw = 0
 85 |                 d_yaw = - d_yaw
 86 |                 
 87 |                 # arm
 88 |                 x = self._arm_position_x_decoder.decode(tokens[4])
 89 |                 y = self._arm_position_y_decoder.decode(tokens[5])
 90 | 
 91 |                 arm_d_x, arm_d_y = x - self._arm_x, y - self._arm_y
 92 |                 if "grasp" in prompt.lower() or "position" in prompt.lower():
 93 |                     if 2 < arm_d_y < 5: y = self._arm_y + 5
 94 |                     elif 5 <= arm_d_y < 8: y = self._arm_y + 8
 95 |                     elif arm_d_y >= 8: y = self._arm_y + 10
 96 | 
 97 |                 # gripper
 98 |                 if tokens[6] == "0": g = "release"
 99 |                 elif tokens[6] == "1": g = "grab"
100 |                 else: g = "pause"
101 |                 
102 |                 # info
103 |                 log_info(f"Chassis: {round(d_x,2)}, {round(d_y,2)}, {round(d_yaw,2)}")
104 |                 # print(f"Arm: {x}, {y}")
105 |                 log_info(f"Arm: {round(arm_d_x,2)}, {round(arm_d_y,2)}")
106 |                 log_info(f"Gripper: {g}")
107 | 
108 |                 log_warn(f"Wait: {total}, temp: {temp}")
109 |                 
110 |                 # action
111 |                 self.chassis_move(0, 0, float(d_yaw))
112 |                 self.chassis_move(float(d_x), float(d_y), 0)
113 |                 self.arm_moveto((int(x), int(y)))
114 |                 self.gripper(g, 20)
115 | 
116 |                 if ("move" in prompt.lower() and d_x == 0 and d_y == 0 and d_yaw == 0) \
117 |                     or ("release" in prompt.lower() and g == "release") \
118 |                         or ("grasp" in prompt.lower() and abs(arm_d_x) < 2 and abs(arm_d_y) < 2):
119 |                     last = True
120 |                 else:
121 |                     last = False
122 |                     total = 0
123 |                 if last:
124 |                     if ("move" in prompt.lower() and d_x == 0 and d_y == 0 and d_yaw == 0) \
125 |                         or ("release" in prompt.lower() and g == "release") \
126 |                             or (("grasp" in prompt.lower() or "position" in prompt.lower()) and abs(arm_d_x) < 2 and abs(arm_d_y) < 2):
127 |                         total += 1
128 |         except KeyboardInterrupt: pass
129 |         
130 |     def exec_get_audio(self, file_path):
131 |         print("Recording... ")
132 |         res = self._vision.record_audio(save_file=file_path, seconds=5, sample_rate=16000)
133 |         print("audio file save in: ", file_path)
134 |         return res
135 | 
136 | 


--------------------------------------------------------------------------------
/robomatrix_server/robomatrix_server/resource/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import yaml
  4 | import cv2
  5 | import re
  6 | from moviepy.editor import ImageSequenceClip
  7 | import shutil
  8 | 
  9 | 
 10 | HOME_PATH = os.path.expanduser("~")
 11 | 
 12 | RED = '\033[31m'
 13 | GREEN = '\033[32m'
 14 | YELLOW = '\033[33m'
 15 | RESET = '\033[0m'
 16 | 
 17 | def log_info(log):
 18 |     print(f"{GREEN}{log}{RESET}")
 19 | 
 20 | def log_warn(log):
 21 |     print(f"{YELLOW}{log}{RESET}")
 22 | 
 23 | def log_error(log):
 24 |     print(f"{RED}{log}{RESET}")
 25 | 
 26 | def load_json(file_path):
 27 |     with open(file_path, 'r') as file:
 28 |         data = json.load(file)
 29 |     return data
 30 | 
 31 | def save_json(data, save_directory: str, file_name: str, log=True):
 32 |     if not os.path.exists(save_directory):
 33 |         os.mkdir(save_directory)
 34 |         if log: log_info(f"[Save Json] Create directory: {save_directory}")
 35 |     if not file_name.endswith('.json'):
 36 |         file_name += '.json'
 37 |         if log: log_warn(f"[Save Json] Change file name to: {file_name}")
 38 |     json_path = os.path.join(save_directory, file_name)
 39 |     with open(json_path, 'w', encoding='utf-8') as f:
 40 |         json.dump(data, f, ensure_ascii=False, indent=4)
 41 |     if log: log_info(f"[Save Json] Save json file to: {json_path}")
 42 | 
 43 | def load_yaml(file_path):
 44 |     with open(file_path, 'r') as file:
 45 |         data = yaml.safe_load(file)
 46 |     return data
 47 | 
 48 | def save_yaml(data, save_directory: str, file_name: str, log=True):
 49 |     if not os.path.exists(save_directory):
 50 |         os.mkdir(save_directory)
 51 |         if log: log_warn(f"[Save Yaml] Create directory: {save_directory}")
 52 |     if not file_name.endswith('.yaml'):
 53 |         file_name += '.yaml'
 54 |         if log: log_warn(f"[Save Yaml] Change file name to: {file_name}")
 55 |     yaml_path = os.path.join(save_directory, file_name)
 56 |     with open(yaml_path, 'w') as file:
 57 |         yaml.dump(data, file, default_flow_style=False, allow_unicode=True)
 58 |     if log: log_info(f"[Save Yaml] Save yaml file to: {yaml_path}")
 59 | 
 60 | # 放弃
 61 | def save_video(images: list, fps: int, save_path: str):
 62 |     frame_count = len(images)
 63 |     if frame_count == 0:
 64 |         log_error(f"[Save video] Image list is empty")
 65 |         return None
 66 |     log_warn(f"[Save video] Saving video to {save_path}, don't close.")
 67 |     height, width, _ = images[0].shape
 68 |     out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
 69 |     for t in range(frame_count):
 70 |         img = images[t]
 71 |         out.write(img)
 72 |     out.release()
 73 |     log_info(f"[Save video] Save video to {save_path}")
 74 | 
 75 | # 可用
 76 | def save_video_v2(images: list, fps: int, save_path: str):
 77 |     frame_count = len(images)
 78 |     if frame_count == 0:
 79 |         log_error(f"[Save video] Image list is empty")
 80 |         return None
 81 |     log_warn(f"[Save video] Saving video to {save_path}, don't close.")
 82 |     clip = ImageSequenceClip(images, fps=fps)
 83 |     clip.write_videofile(save_path, codec='libx264')
 84 |     log_info(f"[Save video] Save video to {save_path}")
 85 | 
 86 | def extract_last_number(string, suffix):
 87 |     '''
 88 |     用于sort的key
 89 |     '''
 90 |     # 使用正则表达式提取最后的数字
 91 |     match = re.search(r'_(\d+)' + re.escape(suffix) + r'$', string)
 92 |     if match: return int(match.group(1))
 93 |     return None
 94 | 
 95 | def find_ranges(nums):
 96 |     '''
 97 |     从有序正整数列表提取区间
 98 |     '''
 99 |     if not nums: return []
100 | 
101 |     ranges_str, ranges_list = [], []
102 |     start = nums[0]
103 | 
104 |     for i in range(1, len(nums)):
105 |         # 检查当前数字与前一个数字的差是否为 1
106 |         if nums[i] != nums[i - 1] + 1:
107 |             # 结束当前区间
108 |             if start == nums[i - 1]:
109 |                 ranges_str.append(f"{start}")
110 |                 ranges_list.append([start])
111 |             else:
112 |                 ranges_str.append(f"{start}~{nums[i - 1]}")
113 |                 ranges_list.append([start, nums[i - 1]])
114 |             start = nums[i]
115 | 
116 |     # 添加最后一个区间
117 |     if start == nums[-1]:
118 |         ranges_str.append(f"{start}")
119 |         ranges_list.append([start])
120 |     else:
121 |         ranges_str.append(f"{start}~{nums[-1]}")
122 |         ranges_list.append([start, nums[-1]])
123 | 
124 |     return ranges_str, ranges_list
125 | 
126 | def compare_intervals(intervals_A, intervals_B):
127 |     '''
128 |     比较B区间是否被A区间包含
129 |     '''
130 |     def normalize_intervals(intervals):
131 |         normalized = []
132 |         for interval in intervals:
133 |             if len(interval) == 1:  # 处理只有一个端点的情况
134 |                 normalized.append([interval[0], interval[0]])
135 |             else:
136 |                 normalized.append(interval)
137 |         return normalized
138 |     
139 |     # 先规范化 B 区间
140 |     intervals_B = normalize_intervals(intervals_B)
141 | 
142 |     for interval_B in intervals_B:
143 |         start_B, end_B = interval_B
144 |         covered = False
145 | 
146 |         # 遍历 A 区间
147 |         for start_A, end_A in intervals_A:
148 |             # 检查 B 区间是否被 A 区间包含
149 |             if start_A <= start_B and end_B <= end_A:
150 |                 covered = True
151 |                 break  # 找到一个包含的 A 区间，停止检查
152 |         
153 |         if not covered:
154 |             return False  # 如果任何 B 区间不被 A 区间包含，返回 False
155 |     
156 |     return True  # 所有 B 区间均被 A 区间包含
157 | 
158 | def analyse_folder(folder_path, log=True):
159 |     if not os.path.exists(folder_path):
160 |         log_error(f"[Analyse Folder] Cannot find: {folder_path}")
161 |         return None
162 |     
163 |     information = {}
164 | 
165 |     for file in os.listdir(folder_path):
166 |         name, suffix = os.path.splitext(file) # 分割名称和后缀
167 |         if suffix not in information: information[suffix] = {}
168 |         match = re.match(r'^(.*)_(\d+)$', name) # 分割任务描述和数字
169 |         if match:
170 |             task_description = match.group(1)
171 |             episode_index = match.group(2)
172 |             if task_description not in information[suffix]: information[suffix][task_description] = []
173 |             information[suffix][task_description].append(int(episode_index))
174 |         else: log_warn(f"[Analyse Folder] Cannot match: {file}")
175 |     
176 |     for suffix in sorted(information.keys()):
177 |         if log: log_info(f"[Analyse Folder] File type: {suffix}")
178 |         group = information[suffix]
179 |         for task_description in sorted(group.keys()):
180 |             # log_info(f"Task description: {task_description}")
181 |             if log: log_info(f"{task_description}")
182 |             group[task_description].sort()
183 |             ranges_str, ranges_list = find_ranges(group[task_description])
184 |             # log_info(f"Episode index range: {''.join(ranges_str)}, count: {len(group[task_description])}")
185 |             if log: log_info(f"Range: {' '.join(ranges_str)}, count: {len(group[task_description])}")
186 |             group[task_description] = ranges_list
187 |     
188 |     return information
189 | 
190 | def compare_information(videos_information, annotations_information):
191 |     if ".mp4" not in videos_information:
192 |         log_error(f"[Compare] No MP4 in videos resource.")
193 |         return False
194 |     if ".json" not in annotations_information:
195 |         log_error(f"[Compare] No JSON in annotations resource.")
196 |         return False
197 |     status = True
198 |     for task_description in sorted(videos_information[".mp4"]):
199 |         if task_description not in annotations_information[".json"]:
200 |             log_error(f"[Compare] Task not in annotations: {task_description}")
201 |         videos_index_ranges = videos_information[".mp4"][task_description]
202 |         annotations_index_ranges = annotations_information[".json"][task_description]
203 |         if videos_index_ranges == annotations_index_ranges:
204 |             log_info(f"[Compare] OK: {task_description}")
205 |         else:
206 |             status = False
207 |             log_error(f"[Compare] Wrong: {task_description}")
208 |     return status
209 | 
210 | def check_dataset(videos_resource=os.path.join(HOME_PATH, "RoboMatrixDatasetsALL", "videos"),
211 |                   annotations_resource=os.path.join(HOME_PATH, "RoboMatrixDatasetsALL", "annotations")):
212 |     videos_info = analyse_folder(videos_resource)
213 |     save_json(videos_info, os.path.join(HOME_PATH, "RoboMatrixDatasetsALL"), "videos_information.json")
214 |     annos_info = analyse_folder(annotations_resource)
215 |     save_json(annos_info, os.path.join(HOME_PATH, "RoboMatrixDatasetsALL"), "annotations_information.json")
216 |     return compare_information(videos_info, annos_info)
217 | 
218 | def rename_dataset(videos_folder, annos_folder, *chosen_group):
219 |     analyse_folder(videos_folder)
220 |     analyse_folder(annos_folder)
221 | 
222 |     old_names, new_names = [], []
223 |     for old_name, new_name in chosen_group:
224 |         old_names.append(old_name)
225 |         new_names.append(new_name)
226 |         log_info(f"[Rename] Your change: {old_name} -> {new_name}")
227 |     
228 |     group = set()
229 |     file_names = {}
230 |     for file_name in os.listdir(videos_folder):
231 |         name, suffix = os.path.splitext(file_name)
232 |         if suffix == ".mp4":
233 |             match = re.match(r'^(.*)_(\d+)$', name)
234 |             if match:
235 |                 task = match.group(1)
236 |                 index = match.group(2)
237 |                 if task not in group:
238 |                     file_names[task] = []
239 |                 group.add(task)
240 |                 file_names[task].append(int(index))
241 |             else: log_warn(f"[Rename] {file_name} cannot match")
242 |         else: log_warn(f"[Rename] Skip non-mp4 file: {file_name}")
243 |     for key in file_names:
244 |         try: file_names[key].sort()
245 |         except:
246 |             log_error(f"[Rename] Sort failed: {key}")
247 |             if key in old_names: return
248 |         range, _ = find_ranges(file_names[key])
249 |         log_info(f"[Rename] {key} count: {len(file_names[key])} range: {' '.join(range)}")
250 |     
251 |     for s in old_names:
252 |         if s not in group:
253 |             log_error(f"[Rename] Not in group: {s}")
254 |             return
255 | 
256 |     log_warn(f"[Rename] Press ENTER to continue")
257 |     input()
258 | 
259 |     for i, s in enumerate(old_names):
260 |         indexs = file_names[s]
261 |         names = [f"{s}_{num}" for num in indexs]
262 |         task_name = new_names[i]
263 |         for j, name in enumerate(names):
264 |             new_name = f'{task_name}_{j + 1}'
265 |             new_json_name = f'{new_name}.json'
266 |             new_video_name = f'{new_name}.mp4'
267 | 
268 |             # 对json处理
269 |             old_json_name = f"{name}.json"
270 |             old_json_path = os.path.join(annos_folder, old_json_name)
271 |             data = load_json(old_json_path)
272 |             for frame in data:
273 |                 frame["stream"] = new_video_name # 改stream
274 |             save_json(data, annos_folder, new_json_name)
275 |             log_info(f"[Rename] New annotation: {annos_folder} -> {new_json_name}")
276 |             if old_json_name != new_json_name:
277 |                 os.remove(old_json_path)
278 | 
279 |             # 对video处理
280 |             old_video_name = f"{name}.mp4"
281 |             old_video_file = os.path.join(videos_folder, old_video_name)
282 |             new_video_file = os.path.join(videos_folder, new_video_name)
283 |             os.rename(old_video_file, new_video_file)
284 |             log_info(f"[Rename] New video: {new_video_file}")
285 | 
286 | def create_dataset(dataset_folder,
287 |                    *groups,
288 |                    videos_resource=os.path.join(HOME_PATH, "RoboMatrixDatasetsALL", "videos"),
289 |                    annotations_resource=os.path.join(HOME_PATH, "RoboMatrixDatasetsALL", "annotations")):
290 |     '''
291 |     *groups: (move_to_object, [[1, 10], [20], [30, 40]])
292 |     '''
293 |     # 检查是否已经存在
294 |     if os.path.exists(dataset_folder):
295 |         log_error(f"[Create Dataset] Already exist: {dataset_folder}")
296 |         return
297 |     
298 |     # 检查原始数据集
299 |     videos_info = analyse_folder(videos_resource)
300 |     annos_info = analyse_folder(annotations_resource)
301 |     if not compare_information(videos_info, annos_info):
302 |         log_error(f"[Create Dataset] Dataset error")
303 |         return
304 |     
305 |     # 检查任务描述和范围
306 |     log_info(f"[Create Dataset] Your choice:")
307 |     status = True
308 |     for task_name, index_ranges in groups:
309 |         log_info(f"Task: {task_name}")
310 |         log_info(f"Range: {index_ranges}")
311 | 
312 |         if task_name not in videos_info[".mp4"] or task_name not in annos_info[".json"]:
313 |             log_error(f"Invalid task description: {task_name}")
314 |             status = False
315 |             continue
316 |         
317 |         main_ranges = videos_info[".mp4"][task_name]
318 |         if not compare_intervals(main_ranges, index_ranges):
319 |             log_error(f"Invalid episode range: {index_ranges}")
320 |             status = False
321 |     if not status: return
322 |         
323 |     # 继续
324 |     log_info(f"[Create Dataset] Press ENTER to continue")
325 |     input()
326 | 
327 |     # 创建文件夹
328 |     if not os.path.exists(dataset_folder):
329 |         os.mkdir(dataset_folder)
330 |         os.mkdir(os.path.join(dataset_folder, "videos"))
331 |         os.mkdir(os.path.join(dataset_folder, "annotations"))
332 |         log_info(f"[Create Dataset] Create dataset: {dataset_folder}")
333 |     
334 |     # 复制文件
335 |     for task_name, index_ranges in groups:
336 |         log_info(f"[Create Dataset] Copy: {task_name}")
337 |         log_info(f"Range: {index_ranges}")
338 |         indexs = []
339 |         for r in index_ranges:
340 |             if len(r) == 1: indexs.append(r[0])
341 |             elif len(r) == 2:
342 |                 start, end = r
343 |                 for i in range(start, end+1): indexs.append(i)
344 |         log_info(f"Count: {len(indexs)}")
345 |         log_info(f"Press ENTER to continue")
346 |         input()
347 |         for index in indexs:
348 |             name = f"{task_name}_{index}"
349 |             resource_video = os.path.join(videos_resource, f"{name}.mp4")
350 |             resource_anno = os.path.join(annotations_resource, f"{name}.json")
351 |             destination_video = os.path.join(dataset_folder, "videos", f"{name}.mp4")
352 |             destination_anno = os.path.join(dataset_folder, "annotations", f"{name}.json")
353 |             shutil.copy(resource_video, destination_video)
354 |             shutil.copy(resource_anno, destination_anno)
355 |             log_info(f"OK: {name}")
356 |         
357 | 


--------------------------------------------------------------------------------
/robomatrix_server/robomatrix_server/vla_service.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import rclpy
 4 | from rclpy.node import Node
 5 | # from robomatrix_server.srv import Vla
 6 | # import pdb; pdb.set_trace()
 7 | from robomatrix_interface.srv import Vla, Audio
 8 | from .resource.inference_multistage import *
 9 | import time
10 | 
11 | class VLAServiceServer(Node):
12 |     def __init__(self, robot: RobotInference):
13 |         super().__init__('vla_service_server')
14 |         self.robot_vla = robot
15 | 
16 |         self.vla_server = self.create_service(Vla, 'vla_service', self.handle_vla_service)
17 |         self.audio_server = self.create_service(Audio, 'audio_service', self.handle_audio_service)
18 | 
19 |     def handle_vla_service(self, request, response):
20 |         self.get_logger().info(f'Received: {request.task}, {request.msg}')
21 |         # vla task
22 |         self.robot_vla.exec_vla_task(request.task, request.msg)
23 | 
24 |         response.msg = f'Task {request.msg} complete!'
25 |         response.success = True
26 |         self.get_logger().info(f'Task {request.msg} complete!')
27 |         
28 |         return response 
29 |     
30 |     def handle_audio_service(self, request, response):
31 |         self.get_logger().info(f'Received audio task')
32 |         # audio task
33 |         res = self.robot_vla.exec_get_audio(request.path)
34 |         
35 |         response.success = True
36 |  
37 |         self.get_logger().info(f'Audio record!')
38 |         
39 |         return response  
40 | 
41 | def main(args=None):
42 |     rclpy.init(args=args)
43 |     # robot = RobotInference()
44 |     robot = RobotInference(
45 |                         node=VLAServiceServer('vla_service_server'),
46 |                         dataset_dir=os.path.join(HOME_PATH, "RoboMatrixDatasetsDIY"), 
47 |                         task_name="skill_model_11_12", 
48 |                         url="http://localhost:7893/process_frame", 
49 |                         debug=False)
50 |     service_server = VLAServiceServer(robot)
51 |     # robot.init_robot()
52 |     print("RM service wait for call!")
53 |     rclpy.spin(service_server)  # 保持节点活跃，等待请求
54 |     rclpy.shutdown()
55 | 
56 | if __name__ == '__main__':
57 |     main()
58 | 


--------------------------------------------------------------------------------
/robomatrix_server/setup.cfg:
--------------------------------------------------------------------------------
1 | [develop]
2 | script_dir=$base/lib/robomatrix_server
3 | [install]
4 | install_scripts=$base/lib/robomatrix_server
5 | 


--------------------------------------------------------------------------------
/robomatrix_server/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from glob import glob
 3 | import os
 4 | 
 5 | package_name = "robomatrix_server"
 6 | module_name = "robomatrix_server/resource"
 7 | 
 8 | setup(
 9 |     name=package_name,
10 |     version="0.0.0",
11 |     packages=[package_name, module_name],
12 |     # packages=[package_name
13 |     #           ],
14 |     data_files=[
15 |         ("share/ament_index/resource_index/packages", ["resource/" + package_name]),
16 |         ("share/" + package_name, ["package.xml"]),
17 |         # (
18 |         #     "share/" + package_name + "/resource",
19 |         # ),
20 |     ],
21 |     install_requires=["setuptools"],
22 |     zip_safe=True,
23 |     maintainer="blackbird",
24 |     maintainer_email="zwh@163.com",
25 |     description="TODO: Package description",
26 |     license="TODO: License declaration",
27 |     tests_require=["pytest"],
28 |     entry_points={
29 |         "console_scripts": [
30 |             "vla_service = robomatrix_server.vla_service:main",
31 |             # "inference_multistage = robomatrix_server.inference_multistage:main"
32 |         ],
33 |     },
34 | )
35 | 


--------------------------------------------------------------------------------
/teleoperation/joystick_driver/joystick_driver/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/teleoperation/joystick_driver/joystick_driver/__init__.py


--------------------------------------------------------------------------------
/teleoperation/joystick_driver/joystick_driver/xbox_controller.py:
--------------------------------------------------------------------------------
  1 | import rclpy
  2 | from rclpy.node import Node
  3 | 
  4 | import pygame
  5 | 
  6 | import time
  7 | import threading
  8 | 
  9 | from joystick_msgs.msg import XBox360Controller
 10 | 
 11 | 
 12 | class XBoxDriver(Node):
 13 | 
 14 |     BUTTON_NAME = ['A', 'B', 'X', "Y", 'LB', 'RB', 'BACK', 'START', 'POWER', 'LS', 'RS']
 15 |     AXIS_NAME = ['LS_X', 'LS_Y', 'LT', 'RS_X', 'RS_Y', 'RT']
 16 |     HAT_NAME = ['X', 'Y']
 17 | 
 18 |     def __init__(self) -> None:
 19 |         super().__init__('xbox_360_controller')
 20 | 
 21 |         pygame.init()
 22 |         pygame.joystick.init()
 23 |         self._device = None
 24 | 
 25 |         self._rate = 30
 26 | 
 27 |         self._joystick_publisher = self.create_publisher(XBox360Controller, '/joystick', 3)
 28 | 
 29 |         self._is_activate = False
 30 | 
 31 |         self._joystick_thread = threading.Thread(target=self._joystick_loop)
 32 |         self._joystick_thread.start()
 33 | 
 34 |         self.shutdown = False
 35 |     
 36 |     def _get_joystick(self):
 37 |         if self._device: return self._device
 38 | 
 39 |         if pygame.joystick.get_count() == 0:
 40 |             self.get_logger().error("No joystick detected")
 41 |             return False
 42 |         elif pygame.joystick.get_count() > 1:
 43 |             self.get_logger().error("Multi joystick detected")
 44 |             return False
 45 |         else:
 46 |             self._device = pygame.joystick.Joystick(0)
 47 |             self._device.init()
 48 | 
 49 |             self._name = self._device.get_name()
 50 |             self.get_logger().info(f"{self._name} is connected")
 51 |             self._button_num = self._device.get_numbuttons()
 52 |             self._axis_num = self._device.get_numaxes()
 53 |             self._hat_num = self._device.get_numhats()
 54 | 
 55 |             return self._device
 56 |         
 57 |     def _joystick_loop(self):
 58 |         try:
 59 |             while not self._get_joystick(): time.sleep(1)
 60 |             self._is_activate = True
 61 |             
 62 |             while rclpy.ok():
 63 |                 if self.shutdown: break
 64 | 
 65 |                 events = pygame.event.get()
 66 | 
 67 |                 if not self._is_activate:
 68 |                     if self._device.get_button(self.BUTTON_NAME.index('POWER')) == 1:
 69 |                         self._is_activate = True
 70 |                         self.get_logger().info("Joystick activated, press POWER button to deactivate")
 71 |                         time.sleep(0.2)
 72 |                 else: self.get_logger().info("Joystick activated, press POWER button to deactivate")
 73 | 
 74 |                 while self._is_activate:
 75 |                     events = pygame.event.get()
 76 | 
 77 |                     # if self._device.get_button(self.BUTTON_NAME.index('POWER')) == 1:
 78 |                     #     self._is_activate = False
 79 |                     #     self.get_logger().info("Joystick deactivated, press POWER button to activate")
 80 |                     #     time.sleep(0.2)
 81 |                     #     continue
 82 |                     if self._device.get_button(self.BUTTON_NAME.index('POWER')) == 1:
 83 |                         self._is_activate = False
 84 |                         self.get_logger().info("Joystick shutdown")
 85 |                         self.shutdown = True
 86 |                         time.sleep(0.2)
 87 |                         break
 88 |                     
 89 |                     button_list, axis_list, hat_list = [], [], []
 90 |                     for i in range(self._button_num):
 91 |                         button_list.append(int(self._device.get_button(i)))
 92 |                     for i in range(self._axis_num):
 93 |                         axis_list.append(float(self._device.get_axis(i)))
 94 |                     for i in range(self._hat_num):
 95 |                         x, y = self._device.get_hat(i)
 96 |                         hat_list.append(int(x))
 97 |                         hat_list.append(int(y))
 98 |                     
 99 |                     msg = XBox360Controller()
100 |                     msg.header.stamp = self.get_clock().now().to_msg()
101 |                     msg.header.frame_id = "joystick_data"
102 |                     msg.name = self._name
103 |                     msg.buttons = button_list
104 |                     msg.axes = axis_list
105 |                     msg.hats = hat_list
106 |                     try:
107 |                         msg.button_a = button_list[self.BUTTON_NAME.index('A')]
108 |                         msg.button_b = button_list[self.BUTTON_NAME.index('B')]
109 |                         msg.button_x = button_list[self.BUTTON_NAME.index('X')]
110 |                         msg.button_y = button_list[self.BUTTON_NAME.index('Y')]
111 |                         msg.button_lb = button_list[self.BUTTON_NAME.index('LB')]
112 |                         msg.button_rb = button_list[self.BUTTON_NAME.index('RB')]
113 |                         msg.button_back = button_list[self.BUTTON_NAME.index('BACK')]
114 |                         msg.button_start = button_list[self.BUTTON_NAME.index('START')]
115 |                         msg.button_power = button_list[self.BUTTON_NAME.index('POWER')]
116 |                         msg.button_ls = button_list[self.BUTTON_NAME.index('LS')]
117 |                         msg.button_rs = button_list[self.BUTTON_NAME.index('RS')]
118 |                     except: pass
119 |                     try:
120 |                         msg.axis_ls_x = axis_list[self.AXIS_NAME.index('LS_X')]
121 |                         msg.axis_ls_y = axis_list[self.AXIS_NAME.index('LS_Y')]
122 |                         msg.axis_lt = axis_list[self.AXIS_NAME.index('LT')]
123 |                         msg.axis_rs_x = axis_list[self.AXIS_NAME.index('RS_X')]
124 |                         msg.axis_rs_y = axis_list[self.AXIS_NAME.index('RS_Y')]
125 |                         msg.axis_rt = axis_list[self.AXIS_NAME.index('RT')]
126 |                     except: pass
127 |                     try:
128 |                         msg.hat_x = hat_list[self.HAT_NAME.index('X')]
129 |                         msg.hat_y = hat_list[self.HAT_NAME.index('Y')]
130 |                     except: pass
131 |                     
132 |                     self._joystick_publisher.publish(msg)
133 |                     time.sleep(1 / self._rate)
134 |         except KeyboardInterrupt:
135 |             self.get_logger().warn("Keyboard Interrupt")
136 |         finally:
137 |             self._device.quit()
138 |             pygame.joystick.quit()
139 |             pygame.quit()
140 |             self.get_logger().info("Node end")
141 | 
142 | 
143 | def main(args=None):
144 |     rclpy.init(args=args)
145 | 
146 |     node = XBoxDriver()
147 |     try:
148 |         while rclpy.ok():
149 |             rclpy.spin_once(node, timeout_sec=1)
150 |             if node.shutdown: break
151 |             
152 |     except KeyboardInterrupt: print("Keyboard Interrupt")
153 |     finally:
154 |         node.destroy_node()
155 |         rclpy.shutdown()
156 | 


--------------------------------------------------------------------------------
/teleoperation/joystick_driver/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 3 | <package format="3">
 4 |   <name>joystick_driver</name>
 5 |   <version>0.0.0</version>
 6 |   <description>TODO: Package description</description>
 7 |   <maintainer email="weiheng_z@bit.edu.cn">jyxc</maintainer>
 8 |   <license>TODO: License declaration</license>
 9 | 
10 |   <test_depend>ament_copyright</test_depend>
11 |   <test_depend>ament_flake8</test_depend>
12 |   <test_depend>ament_pep257</test_depend>
13 |   <test_depend>python3-pytest</test_depend>
14 | 
15 |   <exec_depend>rclpy</exec_depend>
16 |   <exec_depend>joystick_msgs</exec_depend>
17 | 
18 |   <export>
19 |     <build_type>ament_python</build_type>
20 |   </export>
21 | </package>
22 | 


--------------------------------------------------------------------------------
/teleoperation/joystick_driver/resource/joystick_driver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WayneMao/RoboMatrix/71b077ee5904078f1cbf9008e3fb1256643ba832/teleoperation/joystick_driver/resource/joystick_driver


--------------------------------------------------------------------------------
/teleoperation/joystick_driver/setup.cfg:
--------------------------------------------------------------------------------
1 | [develop]
2 | script-dir=$base/lib/joystick_driver
3 | [install]
4 | install-scripts=$base/lib/joystick_driver
5 | 


--------------------------------------------------------------------------------
/teleoperation/joystick_driver/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | package_name = 'joystick_driver'
 4 | 
 5 | setup(
 6 |     name=package_name,
 7 |     version='0.1.0',
 8 |     packages=find_packages(exclude=['test']),
 9 |     data_files=[
10 |         ('share/ament_index/resource_index/packages',
11 |             ['resource/' + package_name]),
12 |         ('share/' + package_name, ['package.xml']),
13 |     ],
14 |     install_requires=['setuptools'],
15 |     zip_safe=True,
16 |     maintainer='weiheng zhong',
17 |     maintainer_email='weiheng_z@bit.edu.cn',
18 |     description='TODO: Package description',
19 |     license='TODO: License declaration',
20 |     tests_require=['pytest'],
21 |     entry_points={
22 |         'console_scripts': [
23 |             'xbox_driver = joystick_driver.xbox_controller:main',
24 |         ],
25 |     },
26 | )
27 | 


--------------------------------------------------------------------------------
/teleoperation/joystick_msgs/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.8)
 2 | project(joystick_msgs)
 3 | 
 4 | if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 5 |   add_compile_options(-Wall -Wextra -Wpedantic)
 6 | endif()
 7 | 
 8 | # find dependencies
 9 | find_package(ament_cmake REQUIRED)
10 | find_package(rosidl_default_generators REQUIRED)
11 | find_package(std_msgs REQUIRED)
12 | # uncomment the following section in order to fill in
13 | # further dependencies manually.
14 | # find_package(<dependency> REQUIRED)
15 | 
16 | if(BUILD_TESTING)
17 |   find_package(ament_lint_auto REQUIRED)
18 |   # the following line skips the linter which checks for copyrights
19 |   # comment the line when a copyright and license is added to all source files
20 |   set(ament_cmake_copyright_FOUND TRUE)
21 |   # the following line skips cpplint (only works in a git repo)
22 |   # comment the line when this package is in a git repo and when
23 |   # a copyright and license is added to all source files
24 |   set(ament_cmake_cpplint_FOUND TRUE)
25 |   ament_lint_auto_find_test_dependencies()
26 | endif()
27 | 
28 | rosidl_generate_interfaces(${PROJECT_NAME}
29 |   "msg/XBox360Controller.msg"
30 |   "msg/Button.msg"
31 |   "msg/Hat.msg"
32 |   "msg/Axis.msg"
33 |   DEPENDENCIES std_msgs
34 | )
35 | 
36 | ament_export_dependencies(rosidl_default_runtime)
37 | 
38 | ament_package()
39 | 


--------------------------------------------------------------------------------
/teleoperation/joystick_msgs/msg/Axis.msg:
--------------------------------------------------------------------------------
 1 | # XBOX 360 Controller
 2 | 
 3 | # AXIS_NAME = ['LS_X', 'LS_Y', 'LT', 'RS_X', 'RS_Y', 'RT']
 4 | 
 5 | float32 ls_x
 6 | float32 ls_y
 7 | float32 lt
 8 | float32 rs_x
 9 | float32 rs_y
10 | float32 rt
11 | 


--------------------------------------------------------------------------------
/teleoperation/joystick_msgs/msg/Button.msg:
--------------------------------------------------------------------------------
 1 | # XBOX 360 Controller
 2 | 
 3 | # BUTTON_NAME = ['A', 'B', 'X', "Y", 'LB', 'RB', 'BACK', 'START', 'POWER', 'LS', 'RS']
 4 | 
 5 | uint8 a
 6 | uint8 b
 7 | uint8 x
 8 | uint8 y
 9 | uint8 lb
10 | uint8 rb
11 | uint8 back
12 | uint8 start
13 | uint8 power
14 | uint8 ls
15 | uint8 rs
16 | 


--------------------------------------------------------------------------------
/teleoperation/joystick_msgs/msg/Hat.msg:
--------------------------------------------------------------------------------
1 | # XBOX 360 Controller
2 | 
3 | # HAT_NAME = ['X', 'Y']
4 | 
5 | int8 x
6 | int8 y
7 | 


--------------------------------------------------------------------------------
/teleoperation/joystick_msgs/msg/XBox360Controller.msg:
--------------------------------------------------------------------------------
 1 | # XBOX 360 Controller
 2 | 
 3 | std_msgs/Header header
 4 | 
 5 | uint8[] buttons
 6 | float32[] axes
 7 | int8[] hats
 8 | 
 9 | string name
10 | 
11 | # BUTTON_NAME = ['A', 'B', 'X', "Y", 'LB', 'RB', 'BACK', 'START', 'POWER', 'LS', 'RS']
12 | # AXIS_NAME = ['LS_X', 'LS_Y', 'LT', 'RS_X', 'RS_Y', 'RT']
13 | # HAT_NAME = ['X', 'Y']
14 | 
15 | uint8 button_a
16 | uint8 button_b
17 | uint8 button_x
18 | uint8 button_y
19 | uint8 button_lb
20 | uint8 button_rb
21 | uint8 button_back
22 | uint8 button_start
23 | uint8 button_power
24 | uint8 button_ls
25 | uint8 button_rs
26 | 
27 | float32 axis_ls_x
28 | float32 axis_ls_y
29 | float32 axis_lt
30 | float32 axis_rs_x
31 | float32 axis_rs_y
32 | float32 axis_rt
33 | 
34 | int8 hat_x
35 | int8 hat_y
36 | 


--------------------------------------------------------------------------------
/teleoperation/joystick_msgs/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 3 | <package format="3">
 4 |   <name>joystick_msgs</name>
 5 |   <version>0.0.0</version>
 6 |   <description>TODO: Package description</description>
 7 |   <maintainer email="maoweixin@megvii.com">jyxc</maintainer>
 8 |   <license>TODO: License declaration</license>
 9 | 
10 |   <buildtool_depend>ament_cmake</buildtool_depend>
11 | 
12 |   <test_depend>ament_lint_auto</test_depend>
13 |   <test_depend>ament_lint_common</test_depend>
14 | 
15 |   <build_depend>rosidl_default_generators</build_depend>
16 |   <build_depend>std_msgs</build_depend>
17 |   <exec_depend>rosidl_default_runtime</exec_depend>
18 |   <exec_depend>std_msgs</exec_depend>
19 | 
20 |   <member_of_group>rosidl_interface_packages</member_of_group>
21 | 
22 |   <export>
23 |     <build_type>ament_cmake</build_type>
24 |   </export>
25 | </package>
26 | 


--------------------------------------------------------------------------------