├── drone
├── vla_ws
│ └── src
│ │ ├── CMakeLists.txt
│ │ └── vlm_drone
│ │ ├── launch
│ │ ├── node.launch
│ │ └── apm.launch
│ │ ├── scripts
│ │ ├── vla_start.sh
│ │ └── drone.py
│ │ ├── package.xml
│ │ └── CMakeLists.txt
└── readme.md
├── server
└── server.py
└── README.md
/drone/vla_ws/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | /opt/ros/noetic/share/catkin/cmake/toplevel.cmake
--------------------------------------------------------------------------------
/drone/vla_ws/src/vlm_drone/launch/node.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/drone/readme.md:
--------------------------------------------------------------------------------
1 | ### Run camera
2 | roslaunch racing_vla rs_t265.launch
3 |
4 | ### Run FCU
5 | roslaunch racing_vla apm.launch
6 |
7 | ### Run VLA
8 | rosrun vlm_drone drone.py
9 |
10 | ### Recording experements
11 | rosbag record /t265/fisheye1/image_raw /t265/fisheye1/camera_info /mavros/vision_pose/pose /mavros/setpoint_velocity/cmd_vel /vla_image /tf
12 |
13 | ### Recording experements (only traj and actions)
14 | rosbag record /t265/fisheye1/image_raw /vicon/drone_vla/drone_vla /mavros/vision_pose/pose /mavros/setpoint_velocity/cmd_vel
15 |
16 | for bashrc
17 | alias vla_rs="roslaunch racing_vla rs_t265.launch"
18 | alias vla_fcu="roslaunch racing_vla apm.launch"
19 | alias vla_run="rosrun vlm_drone drone.py"
20 |
21 | ### All launch
22 | /vla_ws/src/vlm_drone/scripts/vla_start.sh
--------------------------------------------------------------------------------
/drone/vla_ws/src/vlm_drone/launch/apm.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/drone/vla_ws/src/vlm_drone/scripts/vla_start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | tmux new-session -d -s flightsession
4 |
5 | # initial window
6 | tmux send-keys -t flightsession:0.0 "source /opt/ros/noetic/setup.bash && roscore" Enter
7 |
8 | # first window
9 | tmux new-window -n "Camera"
10 | tmux split-window -v
11 | tmux send-keys -t flightsession:1.0 "source /home/nuc/vlm_ws/devel/setup.bash && mon launch racing_vla rs_t265.launch" Enter
12 | sleep 3
13 | tmux send-keys -t flightsession:1.1 "rosbag record /t265/fisheye1/image_raw /vicon/drone_vla/drone_vla /mavros/vision_pose/pose /mavros/setpoint_velocity/cmd_vel /vicon/gate/gate"
14 |
15 | # second window
16 | tmux new-window -n "VIO"
17 | tmux split-window -v
18 | tmux send-keys -t flightsession:2.0 "source /home/nuc/openvins_ws/devel/setup.bash && mon launch /home/nuc/openvins_ws/src/open_vins/ov_msckf/launch/subscribe_with_transform.launch config:=rs_t265" Enter
19 | tmux send-keys -t flightsession:2.1 "source /home/nuc/openvins_ws/devel/setup.bash && rosrun ov_msckf apply_tf_to_global.py" Enter
20 | tmux split-window -v
21 | tmux send-keys -t flightsession:2.2 "source /home/nuc/openvins_ws/devel/setup.bash && rosrun ov_msckf apply_ov_to_room.py" Enter
22 | tmux split-window -h
23 | tmux send-keys -t flightsession:2.3 "source /home/nuc/openvins_ws/devel/setup.bash && rosrun ov_msckf correct_tf_frame.py" Enter
24 |
25 |
26 | # third window
27 | tmux new-window -n "APM Initialize"
28 | tmux send-keys -t flightsession:3.0 "source /home/nuc/vlm_ws/devel/setup.bash && mon launch racing_vla apm.launch" Enter
29 | tmux split-window -v
30 | tmux send-keys -t flightsession:3.1 "source /home/nuc/vlm_ws/devel/setup.bash && mon launch vlm_drone node.launch"
31 |
32 | tmux bind-key -n S-Left select-pane -L
33 | tmux bind-key -n S-Right select-pane -R
34 | tmux bind-key -n S-Up select-pane -U
35 | tmux bind-key -n S-Down select-pane -D
36 | tmux bind-key -n M-S-Left previous-window
37 | tmux bind-key -n M-S-Right next-window
38 |
39 | tmux set-option -g mouse on
40 | tmux set-option -g default-terminal "screen-256color"
41 | tmux set-option -g terminal-overrides ',*:smcup@:rmcup@'
42 | tmux set-option -g history-limit 10000
43 |
44 | tmux attach-session -t flightsession
45 |
--------------------------------------------------------------------------------
/server/server.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, request, jsonify
2 | from transformers import AutoModelForVision2Seq, AutoProcessor
3 | from PIL import Image
4 | import torch
5 | import numpy as np
6 | import io
7 | import time
8 |
9 | app = Flask(__name__)
10 |
11 | processor = AutoProcessor.from_pretrained(
12 | "/home/isr-lab-4/openvla/models/drone3_cycle/runs/openvla-7b+drone_set3+b16+lr-0.0005+lora-r32+dropout-0.0/",
13 | # "openvla/openvla-7b",
14 | trust_remote_code=True
15 | )
16 | vla = AutoModelForVision2Seq.from_pretrained(
17 | "/home/isr-lab-4/openvla/models/drone3_cycle/finetuned_model/openvla-7b+drone_set3+b16+lr-0.0005+lora-r32+dropout-0.0/",
18 | # "openvla/openvla-7b",
19 | attn_implementation="flash_attention_2", # [Optional] Requires `flash_attn`
20 | torch_dtype=torch.bfloat16,
21 | low_cpu_mem_usage=True,
22 | trust_remote_code=True
23 | ).to("cuda:0")
24 |
25 | @app.route('/predict_action', methods=['POST'])
26 | def predict_action():
27 | try:
28 | image_file = request.files['image']
29 | instruction = request.form['instruction']
30 | image = Image.open(io.BytesIO(image_file.read())).convert("RGB")
31 | # image.show()
32 | prompt = f"In: What action should the robot take to {instruction}?\nOut:"
33 | inputs = processor(prompt, image, return_tensors="pt").to("cuda:0", dtype=torch.bfloat16)
34 | time0 = time.perf_counter()
35 | action = vla.predict_action(**inputs, unnorm_key="drone_set3", do_sample=False) #change unnorm key to (drone_set4) if u r using one gate (different positions), change it to drone_set5_two_gates for two gates and arched
36 | time1 = time.perf_counter()
37 | print(f"Prediction time = {time1 - time0} sec")
38 |
39 | if isinstance(action, torch.Tensor):
40 | action = action.tolist()
41 | elif isinstance(action, np.ndarray):
42 | action = action.tolist()
43 | formatted_action = {
44 | "velocities": {
45 | "x": action[0],
46 | "y": action[1],
47 | "z": action[2]
48 | },
49 | "delta_yaw": action[5],
50 | }
51 | print(formatted_action)
52 | return jsonify(formatted_action)
53 | # return jsonify(action[:6])
54 |
55 | except Exception as e:
56 | print(f"Error: {str(e)}")
57 | return jsonify({"error": str(e)}), 500
58 |
59 | if __name__ == '__main__':
60 | app.run(host='0.0.0.0', port=5000)
61 |
--------------------------------------------------------------------------------
/drone/vla_ws/src/vlm_drone/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | vlm_drone
4 | 0.0.0
5 | The vlm_drone package
6 |
7 |
8 |
9 |
10 | nuc
11 |
12 |
13 |
14 |
15 |
16 | TODO
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 | catkin
52 | roscpp
53 | rospy
54 | std_msgs
55 | roscpp
56 | rospy
57 | std_msgs
58 | roscpp
59 | rospy
60 | std_msgs
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RaceVLA
2 |
3 | The repository is currently under preparation for publication **RaceVLA**.
4 |
5 | - Arxiv link: [http://arxiv.org/abs/2503.02572]
6 | - Pretrained models: [https://huggingface.co/SerValera/RaceVLA_models]
7 | - Dataset: [https://huggingface.co/datasets/SerValera/RaceVLA_dataset]
8 |
9 | This repository contains the codebase for launching a racing drone and the necessary server-side scripts to host and run the RaceVLA model.
10 | Repository Structure
11 |
12 | /drone/ — Scripts and configurations to initialize and launch the racing drone.
13 | /server/ — Scripts to set up a server environment where the RaceVLA model is deployed and running.
14 |
15 | Features
16 |
17 | - Drone initialization and takeoff procedures
18 | - Communication setup between drone and RaceVLA server
19 | - RaceVLA model inference handling on the server side
20 | - Data exchange scripts to process and send image from onboard camera and receive control commands from the RaceVLA model
21 |
22 | ## System Overview
23 |
24 | The project integrates:
25 |
26 | - **Visual-Inertial Odometry (VIO)** for state estimation using [open_vins](https://github.com/rpng/open_vins)
27 | - **FishEye T265** with Intel RealSense [realsense-ros](https://github.com/IntelRealSense/realsense-ros)
28 | - **Flight control** using **ArduPilot firmware (v4.4.4)** running on a **SpeedyBee F4 V4 flight controller**.
29 | - **ROS 1** for data handling, visualization, and control logic.
30 | - **Flask** for transferring data between the drone and a remote server.
31 | ---
32 |
33 | ## Tested Environment
34 |
35 | - **Platform:** Intel NUC
36 | - **OS:** Ubuntu 22.04
37 | - **Flight Controller:** SpeedyBee STM32F405 ARM
38 | - **Firmware:** ArduPilot v4.4.4
39 | - **Middleware:** ROS 1 (Noetic)
40 | - **Communication Framework:** Flask (Python-based server)
41 |
42 | ---
43 |
44 | ## Project Components
45 |
46 | ### 1. Visual-Inertial Odometry
47 | - OpenVINS provides real-time visual-inertial state estimation.
48 | - Repository: [https://github.com/rpng/open_vins](https://github.com/rpng/open_vins)
49 |
50 | ### 2. FishEye T265
51 | - Intel RealSense T265 camera for VLA vision.
52 | - ROS integration via realsense-ros.
53 | - Repository: [https://github.com/IntelRealSense/realsense-ros](https://github.com/IntelRealSense/realsense-ros)
54 |
55 | ### 3. Flight Control
56 | - ArduPilot firmware for vehicle stabilization and high-level commands.
57 | - ROS integration via `ardupilot_ros` for bidirectional communication.
58 | - Documentation: [https://ardupilot.org/dev/docs/ros.html](https://ardupilot.org/dev/docs/ros.html)
59 | - Repository: [https://github.com/ArduPilot/ardupilot_ros](https://github.com/ArduPilot/ardupilot_ros)
60 |
61 | ### 4. Data Transfer Between Drone and Server
62 | - **Flask** is used to transfer telemetry, images, or processed data between the drone's companion computer and an external server.
63 | - Flask Documentation: [https://flask.palletsprojects.com/en/stable/](https://flask.palletsprojects.com/en/stable/)
64 |
65 | ---
66 |
67 | ## Hardware Setup
68 |
69 | ### Flight Controller
70 | - **SpeedyBee F405 (STM32-based)**
71 | - Firmware: **ArduPilot v4.4.4**
72 |
73 | ### Companion Computer
74 | - **Intel NUC**
75 | - Runs ROS and processes data from the RealSense and VIO system.
76 |
77 | ### Sensors
78 | - **Intel RealSense T265** (or compatible) for fisheye frame.
79 | - IMU and camera used by OpenVINS for state estimation.
80 |
81 | ---
82 |
--------------------------------------------------------------------------------
/drone/vla_ws/src/vlm_drone/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.0.2)
2 | project(vlm_drone)
3 |
4 | ## Compile as C++11, supported in ROS Kinetic and newer
5 | # add_compile_options(-std=c++11)
6 |
7 | ## Find catkin macros and libraries
8 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
9 | ## is used, also find other catkin packages
10 | find_package(catkin REQUIRED COMPONENTS
11 | roscpp
12 | rospy
13 | std_msgs
14 | )
15 |
16 | ## System dependencies are found with CMake's conventions
17 | # find_package(Boost REQUIRED COMPONENTS system)
18 |
19 |
20 | ## Uncomment this if the package has a setup.py. This macro ensures
21 | ## modules and global scripts declared therein get installed
22 | ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
23 | # catkin_python_setup()
24 |
25 | ################################################
26 | ## Declare ROS messages, services and actions ##
27 | ################################################
28 |
29 | ## To declare and build messages, services or actions from within this
30 | ## package, follow these steps:
31 | ## * Let MSG_DEP_SET be the set of packages whose message types you use in
32 | ## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
33 | ## * In the file package.xml:
34 | ## * add a build_depend tag for "message_generation"
35 | ## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
36 | ## * If MSG_DEP_SET isn't empty the following dependency has been pulled in
37 | ## but can be declared for certainty nonetheless:
38 | ## * add a exec_depend tag for "message_runtime"
39 | ## * In this file (CMakeLists.txt):
40 | ## * add "message_generation" and every package in MSG_DEP_SET to
41 | ## find_package(catkin REQUIRED COMPONENTS ...)
42 | ## * add "message_runtime" and every package in MSG_DEP_SET to
43 | ## catkin_package(CATKIN_DEPENDS ...)
44 | ## * uncomment the add_*_files sections below as needed
45 | ## and list every .msg/.srv/.action file to be processed
46 | ## * uncomment the generate_messages entry below
47 | ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
48 |
49 | ## Generate messages in the 'msg' folder
50 | # add_message_files(
51 | # FILES
52 | # Message1.msg
53 | # Message2.msg
54 | # )
55 |
56 | ## Generate services in the 'srv' folder
57 | # add_service_files(
58 | # FILES
59 | # Service1.srv
60 | # Service2.srv
61 | # )
62 |
63 | ## Generate actions in the 'action' folder
64 | # add_action_files(
65 | # FILES
66 | # Action1.action
67 | # Action2.action
68 | # )
69 |
70 | ## Generate added messages and services with any dependencies listed here
71 | # generate_messages(
72 | # DEPENDENCIES
73 | # std_msgs
74 | # )
75 |
76 | ################################################
77 | ## Declare ROS dynamic reconfigure parameters ##
78 | ################################################
79 |
80 | ## To declare and build dynamic reconfigure parameters within this
81 | ## package, follow these steps:
82 | ## * In the file package.xml:
83 | ## * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
84 | ## * In this file (CMakeLists.txt):
85 | ## * add "dynamic_reconfigure" to
86 | ## find_package(catkin REQUIRED COMPONENTS ...)
87 | ## * uncomment the "generate_dynamic_reconfigure_options" section below
88 | ## and list every .cfg file to be processed
89 |
90 | ## Generate dynamic reconfigure parameters in the 'cfg' folder
91 | # generate_dynamic_reconfigure_options(
92 | # cfg/DynReconf1.cfg
93 | # cfg/DynReconf2.cfg
94 | # )
95 |
96 | ###################################
97 | ## catkin specific configuration ##
98 | ###################################
99 | ## The catkin_package macro generates cmake config files for your package
100 | ## Declare things to be passed to dependent projects
101 | ## INCLUDE_DIRS: uncomment this if your package contains header files
102 | ## LIBRARIES: libraries you create in this project that dependent projects also need
103 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need
104 | ## DEPENDS: system dependencies of this project that dependent projects also need
105 | catkin_package(
106 | # INCLUDE_DIRS include
107 | # LIBRARIES vlm_drone
108 | # CATKIN_DEPENDS roscpp rospy std_msgs
109 | # DEPENDS system_lib
110 | )
111 |
112 | ###########
113 | ## Build ##
114 | ###########
115 |
116 | ## Specify additional locations of header files
117 | ## Your package locations should be listed before other locations
118 | include_directories(
119 | # include
120 | ${catkin_INCLUDE_DIRS}
121 | )
122 |
123 | ## Declare a C++ library
124 | # add_library(${PROJECT_NAME}
125 | # src/${PROJECT_NAME}/vlm_drone.cpp
126 | # )
127 |
128 | ## Add cmake target dependencies of the library
129 | ## as an example, code may need to be generated before libraries
130 | ## either from message generation or dynamic reconfigure
131 | # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
132 |
133 | ## Declare a C++ executable
134 | ## With catkin_make all packages are built within a single CMake context
135 | ## The recommended prefix ensures that target names across packages don't collide
136 | # add_executable(${PROJECT_NAME}_node src/vlm_drone_node.cpp)
137 |
138 | ## Rename C++ executable without prefix
139 | ## The above recommended prefix causes long target names, the following renames the
140 | ## target back to the shorter version for ease of user use
141 | ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
142 | # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
143 |
144 | ## Add cmake target dependencies of the executable
145 | ## same as for the library above
146 | # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
147 |
148 | ## Specify libraries to link a library or executable target against
149 | # target_link_libraries(${PROJECT_NAME}_node
150 | # ${catkin_LIBRARIES}
151 | # )
152 |
153 | #############
154 | ## Install ##
155 | #############
156 |
157 | # all install targets should use catkin DESTINATION variables
158 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
159 |
160 | ## Mark executable scripts (Python etc.) for installation
161 | ## in contrast to setup.py, you can choose the destination
162 | # catkin_install_python(PROGRAMS
163 | # scripts/my_python_script
164 | # DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
165 | # )
166 |
167 | ## Mark executables for installation
168 | ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
169 | # install(TARGETS ${PROJECT_NAME}_node
170 | # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
171 | # )
172 |
173 | ## Mark libraries for installation
174 | ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html
175 | # install(TARGETS ${PROJECT_NAME}
176 | # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
177 | # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
178 | # RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
179 | # )
180 |
181 | ## Mark cpp header files for installation
182 | # install(DIRECTORY include/${PROJECT_NAME}/
183 | # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
184 | # FILES_MATCHING PATTERN "*.h"
185 | # PATTERN ".svn" EXCLUDE
186 | # )
187 |
188 | ## Mark other files for installation (e.g. launch and bag files, etc.)
189 | # install(FILES
190 | # # myfile1
191 | # # myfile2
192 | # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
193 | # )
194 |
195 | #############
196 | ## Testing ##
197 | #############
198 |
199 | ## Add gtest based cpp test target and link libraries
200 | # catkin_add_gtest(${PROJECT_NAME}-test test/test_vlm_drone.cpp)
201 | # if(TARGET ${PROJECT_NAME}-test)
202 | # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
203 | # endif()
204 |
205 | ## Add folders to be run by python nosetests
206 | # catkin_add_nosetests(test)
207 |
--------------------------------------------------------------------------------
/drone/vla_ws/src/vlm_drone/scripts/drone.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import rospy
4 | from sensor_msgs.msg import Image
5 | from cv_bridge import CvBridge
6 | import cv2
7 |
8 | from mavros_msgs.srv import CommandBool, SetMode, CommandTOL
9 | from mavros_msgs.msg import State, PositionTarget
10 | from geometry_msgs.msg import PoseStamped, TwistStamped, TransformStamped
11 |
12 | import numpy as np
13 | import requests
14 | import time
15 | import math
16 |
17 | SERVER_URL = "http://192.168.50.2:5000/predict_action"
18 |
19 | class ImageProcessor:
20 | def __init__(self):
21 | rospy.init_node('image_saver', anonymous=True)
22 | self.rate = rospy.Rate(30)
23 |
24 | self.is_initiate = False
25 |
26 | self.frame = None
27 | self.bridge = CvBridge()
28 | self.image_sub = rospy.Subscriber('/t265/fisheye1/image_raw', Image, self.image_callback)
29 |
30 | self.instruction = "Fly through the gates and avoid obstacles"
31 | self.drone_actions = [0, 0, 0, 0] # vx, vy, vz, dyaw
32 | self.once = True
33 |
34 | self.counter = 0
35 | self.c_rate = 20
36 |
37 | self.vel_k_z = 0.5
38 | self.vel_l_xy = 1.875
39 | self.yaw_k = 1.5
40 |
41 | self.results = []
42 | self.yaw_drone = 0
43 | self.connected = False
44 | self.armed = False
45 |
46 | rospy.wait_for_service('/mavros/cmd/arming')
47 | rospy.wait_for_service('/mavros/set_mode')
48 | rospy.wait_for_service('/mavros/cmd/takeoff')
49 | rospy.wait_for_service('/mavros/cmd/land')
50 |
51 | rospy.Subscriber("/vicon/drone_vla/drone_vla", TransformStamped, self.orient_callback)
52 | rospy.Subscriber("/mavros/vision_pose/pose", PoseStamped, self.orient_callback_vio)
53 | rospy.Subscriber("/mavros/state", State, self.callback_state, queue_size=10)
54 |
55 | self.arming_client = rospy.ServiceProxy('/mavros/cmd/arming', CommandBool)
56 | self.set_mode_client = rospy.ServiceProxy('/mavros/set_mode', SetMode)
57 | self.takeoff_client = rospy.ServiceProxy('/mavros/cmd/takeoff', CommandTOL)
58 | self.land_cl = rospy.ServiceProxy('/mavros/cmd/land', CommandTOL)
59 |
60 | self.pose_pub = rospy.Publisher('/mavros/setpoint_position/local', PoseStamped, queue_size=1)
61 | self.vel_publisher = rospy.Publisher('/mavros/setpoint_velocity/cmd_vel', TwistStamped, queue_size=1)
62 | self.raw_pub = rospy.Publisher('/mavros/setpoint_raw/local', PositionTarget, queue_size=1)
63 | self.vla_image_pub = rospy.Publisher('/vla_image', Image, queue_size=10)
64 |
65 |
66 | def prepare_vel(self, vel):
67 | vx_r = self.vel_l_xy * (vel[0] * math.cos(self.yaw_drone) - vel[1] * math.sin(self.yaw_drone))
68 | vy_r = self.vel_l_xy * (vel[0] * math.sin(self.yaw_drone) + vel[1] * math.cos(self.yaw_drone))
69 | vz_r = self.vel_k_z * vel[2]
70 | yaw_r = self.yaw_k * vel[3]
71 | return [vx_r, vy_r, vz_r, yaw_r]
72 |
73 | def callback_state(self, state):
74 | self.connected = state.connected
75 | self.armed = state.armed
76 |
77 | def orient_callback_vio(self, msg):
78 | def euler_from_quaternion(x, y, z, w):
79 | """Convert a quaternion into euler angles (roll, pitch, yaw)
80 |
81 | Args:
82 | x (_type_): _description_
83 | y (_type_): _description_
84 | z (_type_): _description_
85 | w (_type_): _description_
86 |
87 | Returns:
88 | roll_x, pitch_y, yaw_z: is rotation around x, y, z in radians (counterclockwise)
89 | """
90 | t0 = +2.0 * (w * x + y * z)
91 | t1 = +1.0 - 2.0 * (x * x + y * y)
92 | roll_x = math.atan2(t0, t1)
93 | t2 = +2.0 * (w * y - z * x)
94 | t2 = +1.0 if t2 > +1.0 else t2
95 | t2 = -1.0 if t2 < -1.0 else t2
96 | pitch_y = math.asin(t2)
97 | t3 = +2.0 * (w * z + x * y)
98 | t4 = +1.0 - 2.0 * (y * y + z * z)
99 | yaw_z = math.atan2(t3, t4)
100 | return roll_x, pitch_y, yaw_z # in radians
101 |
102 | x = msg.pose.orientation.x
103 | y = msg.pose.orientation.y
104 | z = msg.pose.orientation.z
105 | w = msg.pose.orientation.w
106 | r, p, y = euler_from_quaternion(x, y, z, w)
107 | self.yaw_drone = y
108 |
109 | def orient_callback(self, msg):
110 | x = msg.transform.rotation.x
111 | y = msg.transform.rotation.y
112 | z = msg.transform.rotation.z
113 | w = msg.transform.rotation.w
114 |
115 | def euler_from_quaternion(x, y, z, w):
116 | t0 = +2.0 * (w * x + y * z)
117 | t1 = +1.0 - 2.0 * (x * x + y * y)
118 | roll_x = math.atan2(t0, t1)
119 | # roll_x = math.atan2(t0, t1) * (180 / math.pi)
120 | t2 = +2.0 * (w * y - z * x)
121 | t2 = +1.0 if t2 > +1.0 else t2
122 | t2 = -1.0 if t2 < -1.0 else t2
123 | pitch_y = math.asin(t2)
124 | # pitch_y = math.asin(t2) * (180 / math.pi)
125 | t3 = +2.0 * (w * z + x * y)
126 | t4 = +1.0 - 2.0 * (y * y + z * z)
127 | yaw_z = math.atan2(t3, t4)
128 | # yaw_z = math.atan2(t3, t4)* (180 / math.pi)
129 | return roll_x, pitch_y, yaw_z # in radians
130 |
131 | _, _, y = euler_from_quaternion(x, y, z, w)
132 | self.yaw_drone = y
133 |
134 | def take_off_init(self):
135 | self.set_mode("GUIDED")
136 | self.takeoff(height=0.6)
137 | rospy.sleep(5)
138 | self.is_initiate = True
139 |
140 | def arm(self):
141 | """
142 | Sets the drone mode to "GUIDED" and arms the drone, enabling it to complete missions and tasks.
143 | """
144 |
145 | # Changes mode to "GUIDED"
146 | try:
147 | rospy.logwarn("sending mode")
148 | resp = self.set_mode_client(0, 'GUIDED')
149 | while not resp.mode_sent:
150 | resp = self.set_mode_client(0, 'GUIDED')
151 | self.rate.sleep()
152 | if resp.mode_sent:
153 | rospy.logwarn("Mode changed")
154 | # self.mode = "GUIDED"
155 | except rospy.ServiceException as e:
156 | rospy.logerr("Service call failed: %s" % e)
157 |
158 | # Arms the drone
159 | try:
160 | rospy.logwarn("sending arm request")
161 | resp = self.arming_client(True)
162 | while not resp.success:
163 | resp = self.arming_client(True)
164 | self.rate.sleep()
165 | if resp.success:
166 | rospy.logwarn("armed")
167 | except rospy.ServiceException as e:
168 | rospy.logerr("Service call failed: %s" % e)
169 |
170 | def takeoff(self, height=0.5):
171 | """
172 | The takeoff strategy for the drone.
173 | """
174 |
175 | # Arm the drone
176 | if not self.armed:
177 | self.arm()
178 |
179 | # Takeoff
180 | try:
181 | rospy.logwarn("sending takeoff request")
182 | resp = self.takeoff_client(0, 0, 0, 0, height)
183 | while not resp.success:
184 | resp = self.takeoff_client(0, 0, 0, 0, height)
185 | self.rate.sleep()
186 |
187 | if resp.success:
188 | rospy.logwarn("takeoff successful")
189 |
190 | except rospy.ServiceException as e:
191 | rospy.logerr("Service call failed: %s" % e)
192 | rospy.logwarn("takeoff END")
193 |
194 | def set_mode(self, mode):
195 | try:
196 | rospy.loginfo(f"Setting mode to {mode}...")
197 | response = self.set_mode_client(0, mode)
198 | if response.mode_sent:
199 | rospy.loginfo(f"Mode set to {mode} successfully")
200 | else:
201 | rospy.logerr(f"Failed to set mode to {mode}")
202 | except rospy.ServiceException as e:
203 | rospy.logerr("Service call failed: %s" % e)
204 |
205 | def image_callback(self, msg):
206 | self.counter += 1
207 |
208 | if self.counter >= self.c_rate:
209 | self.counter = 0
210 | self.once = True
211 |
212 | if self.once:
213 | self.vla_image_pub.publish(msg)
214 |
215 | self.frame = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8')
216 | self.frame = np.asanyarray(self.frame)
217 |
218 | self.results = self.process_and_predict()
219 | self.once = False
220 |
221 | try:
222 | self.drone_actions = self.get_action_from_json(self.results)
223 | if self.is_initiate:
224 | print("Result: vx: " + str(round(self.drone_actions[0], 3)) + " " +
225 | "vy: " + str(round(self.drone_actions[1], 3)) + " " +
226 | "vz: " + str(round(self.drone_actions[2], 3)) + " " +
227 | "d_ayw " + str(round(self.drone_actions[2], 3)))
228 | rospy.loginfo("-- VEL CMD SENT --")
229 |
230 | self.set_vel(self.prepare_vel(self.drone_actions))
231 |
232 | except Exception as e:
233 | print(f"Error: {e}")
234 |
235 | def set_vel(self, vel):
236 | vel_cmd = TwistStamped()
237 | vel_cmd.header.frame_id = "map"
238 | vel_cmd.header.stamp = rospy.Time.now()
239 | vel_cmd.twist.linear.x = vel[0]
240 | vel_cmd.twist.linear.y= vel[1]
241 | vel_cmd.twist.linear.z = vel[2]
242 | vel_cmd.twist.angular.z = vel[3]
243 | self.vel_publisher.publish(vel_cmd)
244 |
245 | def get_action_from_json(self, data):
246 | for entry in data:
247 | result = entry.get('result', {})
248 | delta_yaw = result.get('delta_yaw', None)
249 | velocities = result.get('velocities', {})
250 | vx = velocities.get('x', None)
251 | vy = velocities.get('y', None)
252 | vz = velocities.get('z', None)
253 | return [vx, vy, vz, delta_yaw]
254 |
255 | def send_image_to_server(self):
256 | _, image_encoded = cv2.imencode('.jpg', self.frame)
257 | start_time = time.time()
258 | response = requests.post(
259 | SERVER_URL,
260 | files={"image": ("image.jpg", image_encoded.tobytes(), "image/jpeg")},
261 | data={"instruction": self.instruction}
262 | )
263 | end_time = time.time()
264 | total_time = end_time - start_time
265 |
266 | if response.status_code == 200:
267 | print(f"Server round-trip time: {total_time:.2f} seconds")
268 | return response.json()
269 | else:
270 | raise RuntimeError(f"Server error: {response.text}")
271 |
272 | def process_and_predict(self):
273 | results = []
274 |
275 | try:
276 | print("Processing...")
277 | result = self.send_image_to_server()
278 | results.append({"result": result})
279 |
280 | except Exception as e:
281 | print(f"Error processing: {e}")
282 | results.append({"error": str(e)})
283 |
284 | return results
285 |
286 |
287 | if __name__ == "__main__":
288 | node = ImageProcessor()
289 | node.take_off_init()
290 | try:
291 | rospy.spin()
292 | except rospy.ROSInterruptException:
293 | pass
294 |
295 |
296 |
--------------------------------------------------------------------------------