├── drone ├── vla_ws │ └── src │ │ ├── CMakeLists.txt │ │ └── vlm_drone │ │ ├── launch │ │ ├── node.launch │ │ └── apm.launch │ │ ├── scripts │ │ ├── vla_start.sh │ │ └── drone.py │ │ ├── package.xml │ │ └── CMakeLists.txt └── readme.md ├── server └── server.py └── README.md /drone/vla_ws/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | /opt/ros/noetic/share/catkin/cmake/toplevel.cmake -------------------------------------------------------------------------------- /drone/vla_ws/src/vlm_drone/launch/node.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /drone/readme.md: -------------------------------------------------------------------------------- 1 | ### Run camera 2 | roslaunch racing_vla rs_t265.launch 3 | 4 | ### Run FCU 5 | roslaunch racing_vla apm.launch 6 | 7 | ### Run VLA 8 | rosrun vlm_drone drone.py 9 | 10 | ### Recording experements 11 | rosbag record /t265/fisheye1/image_raw /t265/fisheye1/camera_info /mavros/vision_pose/pose /mavros/setpoint_velocity/cmd_vel /vla_image /tf 12 | 13 | ### Recording experements (only traj and actions) 14 | rosbag record /t265/fisheye1/image_raw /vicon/drone_vla/drone_vla /mavros/vision_pose/pose /mavros/setpoint_velocity/cmd_vel 15 | 16 | for bashrc 17 | alias vla_rs="roslaunch racing_vla rs_t265.launch" 18 | alias vla_fcu="roslaunch racing_vla apm.launch" 19 | alias vla_run="rosrun vlm_drone drone.py" 20 | 21 | ### All launch 22 | /vla_ws/src/vlm_drone/scripts/vla_start.sh -------------------------------------------------------------------------------- /drone/vla_ws/src/vlm_drone/launch/apm.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /drone/vla_ws/src/vlm_drone/scripts/vla_start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | tmux new-session -d -s flightsession 4 | 5 | # initial window 6 | tmux send-keys -t flightsession:0.0 "source /opt/ros/noetic/setup.bash && roscore" Enter 7 | 8 | # first window 9 | tmux new-window -n "Camera" 10 | tmux split-window -v 11 | tmux send-keys -t flightsession:1.0 "source /home/nuc/vlm_ws/devel/setup.bash && mon launch racing_vla rs_t265.launch" Enter 12 | sleep 3 13 | tmux send-keys -t flightsession:1.1 "rosbag record /t265/fisheye1/image_raw /vicon/drone_vla/drone_vla /mavros/vision_pose/pose /mavros/setpoint_velocity/cmd_vel /vicon/gate/gate" 14 | 15 | # second window 16 | tmux new-window -n "VIO" 17 | tmux split-window -v 18 | tmux send-keys -t flightsession:2.0 "source /home/nuc/openvins_ws/devel/setup.bash && mon launch /home/nuc/openvins_ws/src/open_vins/ov_msckf/launch/subscribe_with_transform.launch config:=rs_t265" Enter 19 | tmux send-keys -t flightsession:2.1 "source /home/nuc/openvins_ws/devel/setup.bash && rosrun ov_msckf apply_tf_to_global.py" Enter 20 | tmux split-window -v 21 | tmux send-keys -t flightsession:2.2 "source /home/nuc/openvins_ws/devel/setup.bash && rosrun ov_msckf apply_ov_to_room.py" Enter 22 | tmux split-window -h 23 | tmux send-keys -t flightsession:2.3 "source /home/nuc/openvins_ws/devel/setup.bash && rosrun ov_msckf correct_tf_frame.py" Enter 24 | 25 | 26 | # third window 27 | tmux new-window -n "APM Initialize" 28 | tmux send-keys -t flightsession:3.0 "source /home/nuc/vlm_ws/devel/setup.bash && mon launch racing_vla apm.launch" Enter 29 | tmux split-window -v 30 | tmux send-keys -t flightsession:3.1 "source /home/nuc/vlm_ws/devel/setup.bash && mon launch vlm_drone node.launch" 31 | 32 | tmux bind-key -n S-Left select-pane -L 33 | tmux bind-key -n S-Right select-pane -R 34 | tmux bind-key -n S-Up select-pane -U 35 | tmux bind-key -n S-Down select-pane -D 36 | tmux bind-key -n M-S-Left previous-window 37 | tmux bind-key -n M-S-Right next-window 38 | 39 | tmux set-option -g mouse on 40 | tmux set-option -g default-terminal "screen-256color" 41 | tmux set-option -g terminal-overrides ',*:smcup@:rmcup@' 42 | tmux set-option -g history-limit 10000 43 | 44 | tmux attach-session -t flightsession 45 | -------------------------------------------------------------------------------- /server/server.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify 2 | from transformers import AutoModelForVision2Seq, AutoProcessor 3 | from PIL import Image 4 | import torch 5 | import numpy as np 6 | import io 7 | import time 8 | 9 | app = Flask(__name__) 10 | 11 | processor = AutoProcessor.from_pretrained( 12 | "/home/isr-lab-4/openvla/models/drone3_cycle/runs/openvla-7b+drone_set3+b16+lr-0.0005+lora-r32+dropout-0.0/", 13 | # "openvla/openvla-7b", 14 | trust_remote_code=True 15 | ) 16 | vla = AutoModelForVision2Seq.from_pretrained( 17 | "/home/isr-lab-4/openvla/models/drone3_cycle/finetuned_model/openvla-7b+drone_set3+b16+lr-0.0005+lora-r32+dropout-0.0/", 18 | # "openvla/openvla-7b", 19 | attn_implementation="flash_attention_2", # [Optional] Requires `flash_attn` 20 | torch_dtype=torch.bfloat16, 21 | low_cpu_mem_usage=True, 22 | trust_remote_code=True 23 | ).to("cuda:0") 24 | 25 | @app.route('/predict_action', methods=['POST']) 26 | def predict_action(): 27 | try: 28 | image_file = request.files['image'] 29 | instruction = request.form['instruction'] 30 | image = Image.open(io.BytesIO(image_file.read())).convert("RGB") 31 | # image.show() 32 | prompt = f"In: What action should the robot take to {instruction}?\nOut:" 33 | inputs = processor(prompt, image, return_tensors="pt").to("cuda:0", dtype=torch.bfloat16) 34 | time0 = time.perf_counter() 35 | action = vla.predict_action(**inputs, unnorm_key="drone_set3", do_sample=False) #change unnorm key to (drone_set4) if u r using one gate (different positions), change it to drone_set5_two_gates for two gates and arched 36 | time1 = time.perf_counter() 37 | print(f"Prediction time = {time1 - time0} sec") 38 | 39 | if isinstance(action, torch.Tensor): 40 | action = action.tolist() 41 | elif isinstance(action, np.ndarray): 42 | action = action.tolist() 43 | formatted_action = { 44 | "velocities": { 45 | "x": action[0], 46 | "y": action[1], 47 | "z": action[2] 48 | }, 49 | "delta_yaw": action[5], 50 | } 51 | print(formatted_action) 52 | return jsonify(formatted_action) 53 | # return jsonify(action[:6]) 54 | 55 | except Exception as e: 56 | print(f"Error: {str(e)}") 57 | return jsonify({"error": str(e)}), 500 58 | 59 | if __name__ == '__main__': 60 | app.run(host='0.0.0.0', port=5000) 61 | -------------------------------------------------------------------------------- /drone/vla_ws/src/vlm_drone/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | vlm_drone 4 | 0.0.0 5 | The vlm_drone package 6 | 7 | 8 | 9 | 10 | nuc 11 | 12 | 13 | 14 | 15 | 16 | TODO 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | catkin 52 | roscpp 53 | rospy 54 | std_msgs 55 | roscpp 56 | rospy 57 | std_msgs 58 | roscpp 59 | rospy 60 | std_msgs 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RaceVLA 2 | 3 | The repository is currently under preparation for publication **RaceVLA**. 4 | 5 | - Arxiv link: [http://arxiv.org/abs/2503.02572] 6 | - Pretrained models: [https://huggingface.co/SerValera/RaceVLA_models] 7 | - Dataset: [https://huggingface.co/datasets/SerValera/RaceVLA_dataset] 8 | 9 | This repository contains the codebase for launching a racing drone and the necessary server-side scripts to host and run the RaceVLA model. 10 | Repository Structure 11 | 12 | /drone/ — Scripts and configurations to initialize and launch the racing drone. 13 | /server/ — Scripts to set up a server environment where the RaceVLA model is deployed and running. 14 | 15 | Features 16 | 17 | - Drone initialization and takeoff procedures 18 | - Communication setup between drone and RaceVLA server 19 | - RaceVLA model inference handling on the server side 20 | - Data exchange scripts to process and send image from onboard camera and receive control commands from the RaceVLA model 21 | 22 | ## System Overview 23 | 24 | The project integrates: 25 | 26 | - **Visual-Inertial Odometry (VIO)** for state estimation using [open_vins](https://github.com/rpng/open_vins) 27 | - **FishEye T265** with Intel RealSense [realsense-ros](https://github.com/IntelRealSense/realsense-ros) 28 | - **Flight control** using **ArduPilot firmware (v4.4.4)** running on a **SpeedyBee F4 V4 flight controller**. 29 | - **ROS 1** for data handling, visualization, and control logic. 30 | - **Flask** for transferring data between the drone and a remote server. 31 | --- 32 | 33 | ## Tested Environment 34 | 35 | - **Platform:** Intel NUC 36 | - **OS:** Ubuntu 22.04 37 | - **Flight Controller:** SpeedyBee STM32F405 ARM 38 | - **Firmware:** ArduPilot v4.4.4 39 | - **Middleware:** ROS 1 (Noetic) 40 | - **Communication Framework:** Flask (Python-based server) 41 | 42 | --- 43 | 44 | ## Project Components 45 | 46 | ### 1. Visual-Inertial Odometry 47 | - OpenVINS provides real-time visual-inertial state estimation. 48 | - Repository: [https://github.com/rpng/open_vins](https://github.com/rpng/open_vins) 49 | 50 | ### 2. FishEye T265 51 | - Intel RealSense T265 camera for VLA vision. 52 | - ROS integration via realsense-ros. 53 | - Repository: [https://github.com/IntelRealSense/realsense-ros](https://github.com/IntelRealSense/realsense-ros) 54 | 55 | ### 3. Flight Control 56 | - ArduPilot firmware for vehicle stabilization and high-level commands. 57 | - ROS integration via `ardupilot_ros` for bidirectional communication. 58 | - Documentation: [https://ardupilot.org/dev/docs/ros.html](https://ardupilot.org/dev/docs/ros.html) 59 | - Repository: [https://github.com/ArduPilot/ardupilot_ros](https://github.com/ArduPilot/ardupilot_ros) 60 | 61 | ### 4. Data Transfer Between Drone and Server 62 | - **Flask** is used to transfer telemetry, images, or processed data between the drone's companion computer and an external server. 63 | - Flask Documentation: [https://flask.palletsprojects.com/en/stable/](https://flask.palletsprojects.com/en/stable/) 64 | 65 | --- 66 | 67 | ## Hardware Setup 68 | 69 | ### Flight Controller 70 | - **SpeedyBee F405 (STM32-based)** 71 | - Firmware: **ArduPilot v4.4.4** 72 | 73 | ### Companion Computer 74 | - **Intel NUC** 75 | - Runs ROS and processes data from the RealSense and VIO system. 76 | 77 | ### Sensors 78 | - **Intel RealSense T265** (or compatible) for fisheye frame. 79 | - IMU and camera used by OpenVINS for state estimation. 80 | 81 | --- 82 | -------------------------------------------------------------------------------- /drone/vla_ws/src/vlm_drone/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0.2) 2 | project(vlm_drone) 3 | 4 | ## Compile as C++11, supported in ROS Kinetic and newer 5 | # add_compile_options(-std=c++11) 6 | 7 | ## Find catkin macros and libraries 8 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) 9 | ## is used, also find other catkin packages 10 | find_package(catkin REQUIRED COMPONENTS 11 | roscpp 12 | rospy 13 | std_msgs 14 | ) 15 | 16 | ## System dependencies are found with CMake's conventions 17 | # find_package(Boost REQUIRED COMPONENTS system) 18 | 19 | 20 | ## Uncomment this if the package has a setup.py. This macro ensures 21 | ## modules and global scripts declared therein get installed 22 | ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html 23 | # catkin_python_setup() 24 | 25 | ################################################ 26 | ## Declare ROS messages, services and actions ## 27 | ################################################ 28 | 29 | ## To declare and build messages, services or actions from within this 30 | ## package, follow these steps: 31 | ## * Let MSG_DEP_SET be the set of packages whose message types you use in 32 | ## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). 33 | ## * In the file package.xml: 34 | ## * add a build_depend tag for "message_generation" 35 | ## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET 36 | ## * If MSG_DEP_SET isn't empty the following dependency has been pulled in 37 | ## but can be declared for certainty nonetheless: 38 | ## * add a exec_depend tag for "message_runtime" 39 | ## * In this file (CMakeLists.txt): 40 | ## * add "message_generation" and every package in MSG_DEP_SET to 41 | ## find_package(catkin REQUIRED COMPONENTS ...) 42 | ## * add "message_runtime" and every package in MSG_DEP_SET to 43 | ## catkin_package(CATKIN_DEPENDS ...) 44 | ## * uncomment the add_*_files sections below as needed 45 | ## and list every .msg/.srv/.action file to be processed 46 | ## * uncomment the generate_messages entry below 47 | ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) 48 | 49 | ## Generate messages in the 'msg' folder 50 | # add_message_files( 51 | # FILES 52 | # Message1.msg 53 | # Message2.msg 54 | # ) 55 | 56 | ## Generate services in the 'srv' folder 57 | # add_service_files( 58 | # FILES 59 | # Service1.srv 60 | # Service2.srv 61 | # ) 62 | 63 | ## Generate actions in the 'action' folder 64 | # add_action_files( 65 | # FILES 66 | # Action1.action 67 | # Action2.action 68 | # ) 69 | 70 | ## Generate added messages and services with any dependencies listed here 71 | # generate_messages( 72 | # DEPENDENCIES 73 | # std_msgs 74 | # ) 75 | 76 | ################################################ 77 | ## Declare ROS dynamic reconfigure parameters ## 78 | ################################################ 79 | 80 | ## To declare and build dynamic reconfigure parameters within this 81 | ## package, follow these steps: 82 | ## * In the file package.xml: 83 | ## * add a build_depend and a exec_depend tag for "dynamic_reconfigure" 84 | ## * In this file (CMakeLists.txt): 85 | ## * add "dynamic_reconfigure" to 86 | ## find_package(catkin REQUIRED COMPONENTS ...) 87 | ## * uncomment the "generate_dynamic_reconfigure_options" section below 88 | ## and list every .cfg file to be processed 89 | 90 | ## Generate dynamic reconfigure parameters in the 'cfg' folder 91 | # generate_dynamic_reconfigure_options( 92 | # cfg/DynReconf1.cfg 93 | # cfg/DynReconf2.cfg 94 | # ) 95 | 96 | ################################### 97 | ## catkin specific configuration ## 98 | ################################### 99 | ## The catkin_package macro generates cmake config files for your package 100 | ## Declare things to be passed to dependent projects 101 | ## INCLUDE_DIRS: uncomment this if your package contains header files 102 | ## LIBRARIES: libraries you create in this project that dependent projects also need 103 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need 104 | ## DEPENDS: system dependencies of this project that dependent projects also need 105 | catkin_package( 106 | # INCLUDE_DIRS include 107 | # LIBRARIES vlm_drone 108 | # CATKIN_DEPENDS roscpp rospy std_msgs 109 | # DEPENDS system_lib 110 | ) 111 | 112 | ########### 113 | ## Build ## 114 | ########### 115 | 116 | ## Specify additional locations of header files 117 | ## Your package locations should be listed before other locations 118 | include_directories( 119 | # include 120 | ${catkin_INCLUDE_DIRS} 121 | ) 122 | 123 | ## Declare a C++ library 124 | # add_library(${PROJECT_NAME} 125 | # src/${PROJECT_NAME}/vlm_drone.cpp 126 | # ) 127 | 128 | ## Add cmake target dependencies of the library 129 | ## as an example, code may need to be generated before libraries 130 | ## either from message generation or dynamic reconfigure 131 | # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) 132 | 133 | ## Declare a C++ executable 134 | ## With catkin_make all packages are built within a single CMake context 135 | ## The recommended prefix ensures that target names across packages don't collide 136 | # add_executable(${PROJECT_NAME}_node src/vlm_drone_node.cpp) 137 | 138 | ## Rename C++ executable without prefix 139 | ## The above recommended prefix causes long target names, the following renames the 140 | ## target back to the shorter version for ease of user use 141 | ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" 142 | # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") 143 | 144 | ## Add cmake target dependencies of the executable 145 | ## same as for the library above 146 | # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) 147 | 148 | ## Specify libraries to link a library or executable target against 149 | # target_link_libraries(${PROJECT_NAME}_node 150 | # ${catkin_LIBRARIES} 151 | # ) 152 | 153 | ############# 154 | ## Install ## 155 | ############# 156 | 157 | # all install targets should use catkin DESTINATION variables 158 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html 159 | 160 | ## Mark executable scripts (Python etc.) for installation 161 | ## in contrast to setup.py, you can choose the destination 162 | # catkin_install_python(PROGRAMS 163 | # scripts/my_python_script 164 | # DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 165 | # ) 166 | 167 | ## Mark executables for installation 168 | ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html 169 | # install(TARGETS ${PROJECT_NAME}_node 170 | # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 171 | # ) 172 | 173 | ## Mark libraries for installation 174 | ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html 175 | # install(TARGETS ${PROJECT_NAME} 176 | # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 177 | # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 178 | # RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} 179 | # ) 180 | 181 | ## Mark cpp header files for installation 182 | # install(DIRECTORY include/${PROJECT_NAME}/ 183 | # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} 184 | # FILES_MATCHING PATTERN "*.h" 185 | # PATTERN ".svn" EXCLUDE 186 | # ) 187 | 188 | ## Mark other files for installation (e.g. launch and bag files, etc.) 189 | # install(FILES 190 | # # myfile1 191 | # # myfile2 192 | # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} 193 | # ) 194 | 195 | ############# 196 | ## Testing ## 197 | ############# 198 | 199 | ## Add gtest based cpp test target and link libraries 200 | # catkin_add_gtest(${PROJECT_NAME}-test test/test_vlm_drone.cpp) 201 | # if(TARGET ${PROJECT_NAME}-test) 202 | # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) 203 | # endif() 204 | 205 | ## Add folders to be run by python nosetests 206 | # catkin_add_nosetests(test) 207 | -------------------------------------------------------------------------------- /drone/vla_ws/src/vlm_drone/scripts/drone.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import rospy 4 | from sensor_msgs.msg import Image 5 | from cv_bridge import CvBridge 6 | import cv2 7 | 8 | from mavros_msgs.srv import CommandBool, SetMode, CommandTOL 9 | from mavros_msgs.msg import State, PositionTarget 10 | from geometry_msgs.msg import PoseStamped, TwistStamped, TransformStamped 11 | 12 | import numpy as np 13 | import requests 14 | import time 15 | import math 16 | 17 | SERVER_URL = "http://192.168.50.2:5000/predict_action" 18 | 19 | class ImageProcessor: 20 | def __init__(self): 21 | rospy.init_node('image_saver', anonymous=True) 22 | self.rate = rospy.Rate(30) 23 | 24 | self.is_initiate = False 25 | 26 | self.frame = None 27 | self.bridge = CvBridge() 28 | self.image_sub = rospy.Subscriber('/t265/fisheye1/image_raw', Image, self.image_callback) 29 | 30 | self.instruction = "Fly through the gates and avoid obstacles" 31 | self.drone_actions = [0, 0, 0, 0] # vx, vy, vz, dyaw 32 | self.once = True 33 | 34 | self.counter = 0 35 | self.c_rate = 20 36 | 37 | self.vel_k_z = 0.5 38 | self.vel_l_xy = 1.875 39 | self.yaw_k = 1.5 40 | 41 | self.results = [] 42 | self.yaw_drone = 0 43 | self.connected = False 44 | self.armed = False 45 | 46 | rospy.wait_for_service('/mavros/cmd/arming') 47 | rospy.wait_for_service('/mavros/set_mode') 48 | rospy.wait_for_service('/mavros/cmd/takeoff') 49 | rospy.wait_for_service('/mavros/cmd/land') 50 | 51 | rospy.Subscriber("/vicon/drone_vla/drone_vla", TransformStamped, self.orient_callback) 52 | rospy.Subscriber("/mavros/vision_pose/pose", PoseStamped, self.orient_callback_vio) 53 | rospy.Subscriber("/mavros/state", State, self.callback_state, queue_size=10) 54 | 55 | self.arming_client = rospy.ServiceProxy('/mavros/cmd/arming', CommandBool) 56 | self.set_mode_client = rospy.ServiceProxy('/mavros/set_mode', SetMode) 57 | self.takeoff_client = rospy.ServiceProxy('/mavros/cmd/takeoff', CommandTOL) 58 | self.land_cl = rospy.ServiceProxy('/mavros/cmd/land', CommandTOL) 59 | 60 | self.pose_pub = rospy.Publisher('/mavros/setpoint_position/local', PoseStamped, queue_size=1) 61 | self.vel_publisher = rospy.Publisher('/mavros/setpoint_velocity/cmd_vel', TwistStamped, queue_size=1) 62 | self.raw_pub = rospy.Publisher('/mavros/setpoint_raw/local', PositionTarget, queue_size=1) 63 | self.vla_image_pub = rospy.Publisher('/vla_image', Image, queue_size=10) 64 | 65 | 66 | def prepare_vel(self, vel): 67 | vx_r = self.vel_l_xy * (vel[0] * math.cos(self.yaw_drone) - vel[1] * math.sin(self.yaw_drone)) 68 | vy_r = self.vel_l_xy * (vel[0] * math.sin(self.yaw_drone) + vel[1] * math.cos(self.yaw_drone)) 69 | vz_r = self.vel_k_z * vel[2] 70 | yaw_r = self.yaw_k * vel[3] 71 | return [vx_r, vy_r, vz_r, yaw_r] 72 | 73 | def callback_state(self, state): 74 | self.connected = state.connected 75 | self.armed = state.armed 76 | 77 | def orient_callback_vio(self, msg): 78 | def euler_from_quaternion(x, y, z, w): 79 | """Convert a quaternion into euler angles (roll, pitch, yaw) 80 | 81 | Args: 82 | x (_type_): _description_ 83 | y (_type_): _description_ 84 | z (_type_): _description_ 85 | w (_type_): _description_ 86 | 87 | Returns: 88 | roll_x, pitch_y, yaw_z: is rotation around x, y, z in radians (counterclockwise) 89 | """ 90 | t0 = +2.0 * (w * x + y * z) 91 | t1 = +1.0 - 2.0 * (x * x + y * y) 92 | roll_x = math.atan2(t0, t1) 93 | t2 = +2.0 * (w * y - z * x) 94 | t2 = +1.0 if t2 > +1.0 else t2 95 | t2 = -1.0 if t2 < -1.0 else t2 96 | pitch_y = math.asin(t2) 97 | t3 = +2.0 * (w * z + x * y) 98 | t4 = +1.0 - 2.0 * (y * y + z * z) 99 | yaw_z = math.atan2(t3, t4) 100 | return roll_x, pitch_y, yaw_z # in radians 101 | 102 | x = msg.pose.orientation.x 103 | y = msg.pose.orientation.y 104 | z = msg.pose.orientation.z 105 | w = msg.pose.orientation.w 106 | r, p, y = euler_from_quaternion(x, y, z, w) 107 | self.yaw_drone = y 108 | 109 | def orient_callback(self, msg): 110 | x = msg.transform.rotation.x 111 | y = msg.transform.rotation.y 112 | z = msg.transform.rotation.z 113 | w = msg.transform.rotation.w 114 | 115 | def euler_from_quaternion(x, y, z, w): 116 | t0 = +2.0 * (w * x + y * z) 117 | t1 = +1.0 - 2.0 * (x * x + y * y) 118 | roll_x = math.atan2(t0, t1) 119 | # roll_x = math.atan2(t0, t1) * (180 / math.pi) 120 | t2 = +2.0 * (w * y - z * x) 121 | t2 = +1.0 if t2 > +1.0 else t2 122 | t2 = -1.0 if t2 < -1.0 else t2 123 | pitch_y = math.asin(t2) 124 | # pitch_y = math.asin(t2) * (180 / math.pi) 125 | t3 = +2.0 * (w * z + x * y) 126 | t4 = +1.0 - 2.0 * (y * y + z * z) 127 | yaw_z = math.atan2(t3, t4) 128 | # yaw_z = math.atan2(t3, t4)* (180 / math.pi) 129 | return roll_x, pitch_y, yaw_z # in radians 130 | 131 | _, _, y = euler_from_quaternion(x, y, z, w) 132 | self.yaw_drone = y 133 | 134 | def take_off_init(self): 135 | self.set_mode("GUIDED") 136 | self.takeoff(height=0.6) 137 | rospy.sleep(5) 138 | self.is_initiate = True 139 | 140 | def arm(self): 141 | """ 142 | Sets the drone mode to "GUIDED" and arms the drone, enabling it to complete missions and tasks. 143 | """ 144 | 145 | # Changes mode to "GUIDED" 146 | try: 147 | rospy.logwarn("sending mode") 148 | resp = self.set_mode_client(0, 'GUIDED') 149 | while not resp.mode_sent: 150 | resp = self.set_mode_client(0, 'GUIDED') 151 | self.rate.sleep() 152 | if resp.mode_sent: 153 | rospy.logwarn("Mode changed") 154 | # self.mode = "GUIDED" 155 | except rospy.ServiceException as e: 156 | rospy.logerr("Service call failed: %s" % e) 157 | 158 | # Arms the drone 159 | try: 160 | rospy.logwarn("sending arm request") 161 | resp = self.arming_client(True) 162 | while not resp.success: 163 | resp = self.arming_client(True) 164 | self.rate.sleep() 165 | if resp.success: 166 | rospy.logwarn("armed") 167 | except rospy.ServiceException as e: 168 | rospy.logerr("Service call failed: %s" % e) 169 | 170 | def takeoff(self, height=0.5): 171 | """ 172 | The takeoff strategy for the drone. 173 | """ 174 | 175 | # Arm the drone 176 | if not self.armed: 177 | self.arm() 178 | 179 | # Takeoff 180 | try: 181 | rospy.logwarn("sending takeoff request") 182 | resp = self.takeoff_client(0, 0, 0, 0, height) 183 | while not resp.success: 184 | resp = self.takeoff_client(0, 0, 0, 0, height) 185 | self.rate.sleep() 186 | 187 | if resp.success: 188 | rospy.logwarn("takeoff successful") 189 | 190 | except rospy.ServiceException as e: 191 | rospy.logerr("Service call failed: %s" % e) 192 | rospy.logwarn("takeoff END") 193 | 194 | def set_mode(self, mode): 195 | try: 196 | rospy.loginfo(f"Setting mode to {mode}...") 197 | response = self.set_mode_client(0, mode) 198 | if response.mode_sent: 199 | rospy.loginfo(f"Mode set to {mode} successfully") 200 | else: 201 | rospy.logerr(f"Failed to set mode to {mode}") 202 | except rospy.ServiceException as e: 203 | rospy.logerr("Service call failed: %s" % e) 204 | 205 | def image_callback(self, msg): 206 | self.counter += 1 207 | 208 | if self.counter >= self.c_rate: 209 | self.counter = 0 210 | self.once = True 211 | 212 | if self.once: 213 | self.vla_image_pub.publish(msg) 214 | 215 | self.frame = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8') 216 | self.frame = np.asanyarray(self.frame) 217 | 218 | self.results = self.process_and_predict() 219 | self.once = False 220 | 221 | try: 222 | self.drone_actions = self.get_action_from_json(self.results) 223 | if self.is_initiate: 224 | print("Result: vx: " + str(round(self.drone_actions[0], 3)) + " " + 225 | "vy: " + str(round(self.drone_actions[1], 3)) + " " + 226 | "vz: " + str(round(self.drone_actions[2], 3)) + " " + 227 | "d_ayw " + str(round(self.drone_actions[2], 3))) 228 | rospy.loginfo("-- VEL CMD SENT --") 229 | 230 | self.set_vel(self.prepare_vel(self.drone_actions)) 231 | 232 | except Exception as e: 233 | print(f"Error: {e}") 234 | 235 | def set_vel(self, vel): 236 | vel_cmd = TwistStamped() 237 | vel_cmd.header.frame_id = "map" 238 | vel_cmd.header.stamp = rospy.Time.now() 239 | vel_cmd.twist.linear.x = vel[0] 240 | vel_cmd.twist.linear.y= vel[1] 241 | vel_cmd.twist.linear.z = vel[2] 242 | vel_cmd.twist.angular.z = vel[3] 243 | self.vel_publisher.publish(vel_cmd) 244 | 245 | def get_action_from_json(self, data): 246 | for entry in data: 247 | result = entry.get('result', {}) 248 | delta_yaw = result.get('delta_yaw', None) 249 | velocities = result.get('velocities', {}) 250 | vx = velocities.get('x', None) 251 | vy = velocities.get('y', None) 252 | vz = velocities.get('z', None) 253 | return [vx, vy, vz, delta_yaw] 254 | 255 | def send_image_to_server(self): 256 | _, image_encoded = cv2.imencode('.jpg', self.frame) 257 | start_time = time.time() 258 | response = requests.post( 259 | SERVER_URL, 260 | files={"image": ("image.jpg", image_encoded.tobytes(), "image/jpeg")}, 261 | data={"instruction": self.instruction} 262 | ) 263 | end_time = time.time() 264 | total_time = end_time - start_time 265 | 266 | if response.status_code == 200: 267 | print(f"Server round-trip time: {total_time:.2f} seconds") 268 | return response.json() 269 | else: 270 | raise RuntimeError(f"Server error: {response.text}") 271 | 272 | def process_and_predict(self): 273 | results = [] 274 | 275 | try: 276 | print("Processing...") 277 | result = self.send_image_to_server() 278 | results.append({"result": result}) 279 | 280 | except Exception as e: 281 | print(f"Error processing: {e}") 282 | results.append({"error": str(e)}) 283 | 284 | return results 285 | 286 | 287 | if __name__ == "__main__": 288 | node = ImageProcessor() 289 | node.take_off_init() 290 | try: 291 | rospy.spin() 292 | except rospy.ROSInterruptException: 293 | pass 294 | 295 | 296 | --------------------------------------------------------------------------------