├── CITATION.cff
├── LICENSE
├── README.md
├── assets
├── Covarince.png
└── Odometry.png
└── cuUKF
├── CMakeLists.txt
├── LICENSE
├── include
└── cuUKF
│ ├── coreNode.hpp
│ ├── cpuFilter.hpp
│ ├── cpuNode.hpp
│ └── gpuFilter.cuh
├── launch
└── gpu_filter.launch.py
├── nodes
├── coreNode.cpp
└── cpuNode.cpp
├── package.xml
└── src
├── cpuFilter.cpp
└── gpuFilter.cu
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If you use this software, please cite it as below."
3 | authors:
4 | - family-names: "Hari"
5 | given-names: "Jagennath"
6 | title: "CUDA-Accelerated-Visual-Inertial-Odometry-Fusion"
7 | version: 1.0.0
8 | doi: 10.5281/zenodo.1234
9 | date-released: 2024-02-28
10 | url: "https://github.com/jagennath-hari/CUDA-Accelerated-Visual-Inertial-Odometry-Fusion"
11 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2024, Jagennath Hari
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | 3. Neither the name of the copyright holder nor the names of its
16 | contributors may be used to endorse or promote products derived from
17 | this software without specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CUDA-Accelerated-Visual-Inertial-Odometry-Fusion
2 | Harness GPU acceleration for advanced visual odometry and IMU data fusion with our Unscented Kalman Filter (UKF) implementation. Developed with C++ and powered by CUDA, cuBLAS, and cuSOLVER, the system delivers unmatched real-time performance in state and covariance estimation for robotics applications. Integrated with ROS 2 for seamless sensor data management, it ensures high-efficiency and scalable solutions for complex robotic systems, making it ideal for a wide range of autonomous system applications.
3 |
4 | ## 🏁 Dependencies
5 | 1) NVIDIA Driver ([Official Download Link](https://www.nvidia.com/download/index.aspx))
6 | 2) CUDA Toolkit ([Official Link](https://developer.nvidia.com/cuda-downloads))
7 | 3) ROS 2 Humble ([Official Link](https://docs.ros.org/en/humble/Installation.html))
8 |
9 | ## ⚙️ Install
10 | 1) Clone `https://github.com/jagennath-hari/CUDA-Accelerated-Visual-Inertial-Odometry-Fusion.git`
11 | 2) Move `cuUKF` into `ROS2_WORKSPACE`
12 | 3) Modify the `CMakeLists.txt` file at `set(CMAKE_CUDA_ARCHITECTURES 89)` and change to your GPU architecture. If you don't know which one you can refer to [NVIDIA GPU Compute Capability](https://developer.nvidia.com/cuda-gpus#compute).
13 | 4) `cd ROS2_WORKSPACE` build workspace using `colcon build --symlink-install --cmake-args=-DCMAKE_BUILD_TYPE=Release --parallel-workers $(nproc)`
14 |
15 | ## 📈 Running cuUKF
16 | Launch the node using `ros2 launch cuUKF gpu_filter.launch.py odom_topic:=/odom imu_topic:=/imu`.
17 | The `/odom` should be replaced with your `nav_msgs/Odometry` and the `/imu` should be replaced with your `sensor_msgs/Imu`.
18 |
19 | ## 💬 ROS 2 Message
20 | The filter odometry gets published as `nav_msgs/Odometry` in the `/cuUKF/filtered_odom` topic.
21 |
22 | ## 🖼️ RVIZ2 GUI
23 | Launch RVIZ2 using `ros2 run rviz2 rviz2` and subcribe to the `/cuUKF/filered_odom` topic.
24 |
25 |
26 |

27 |
Odometry
28 |
29 |
30 | ### Visualize the covariance of the state
31 |
32 |
33 |

34 |
Covariance
35 |
36 |
37 | ## ⚠️ Note
38 | 1) The fusion does not consider the IMU's orientation only the visual odometry's orientation for the system dynamics and measurements, as raw IMU don't produce orientation without additional filters such as complementary filter and the Madgwick filter.
39 | 2) Feel free to change the alpha, beta and kappa values along with the covariance to improve state estimation.
40 | 3) The dynamics of the system use simple equations, for the best fusion you may need to change the dynamics.
41 | 4) Consider adding augmented sigma points to further increase the robustness.
42 | 5) There is also a CPU version of the UKF for quick development and testing.
43 |
--------------------------------------------------------------------------------
/assets/Covarince.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jagennath-hari/CUDA-Accelerated-Visual-Inertial-Odometry-Fusion/1dd9f36500c23322e66e96758657f788b2b99c0a/assets/Covarince.png
--------------------------------------------------------------------------------
/assets/Odometry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jagennath-hari/CUDA-Accelerated-Visual-Inertial-Odometry-Fusion/1dd9f36500c23322e66e96758657f788b2b99c0a/assets/Odometry.png
--------------------------------------------------------------------------------
/cuUKF/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.8)
2 |
3 | get_filename_component(PACKAGE_NAME ${CMAKE_CURRENT_SOURCE_DIR} NAME)
4 | message("Creating Package: ${PACKAGE_NAME}")
5 | project(${PACKAGE_NAME} LANGUAGES CXX CUDA) ## this package name is the name of the directory this cmake file is in
6 |
7 | add_compile_definitions(CMAKE_PACKAGE_NAME=${PACKAGE_NAME})
8 |
9 | # Default to C++17 for humble
10 | if(NOT CMAKE_CXX_STANDARD)
11 | set(CMAKE_CXX_STANDARD 17)
12 | endif()
13 |
14 | if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
15 | add_compile_options(-Wall -Wextra -Wpedantic)
16 | endif()
17 |
18 | set(CMAKE_CUDA_ARCHITECTURES 89)
19 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=-Wno-unknown-pragmas")
20 |
21 |
22 | set(ROS_DEPENDS
23 | rclcpp
24 | std_msgs
25 | ament_cmake
26 | sensor_msgs
27 | nav_msgs
28 | message_filters
29 | CUDAToolkit
30 | )
31 |
32 | # find dependencies
33 | foreach(DEPEND ${ROS_DEPENDS})
34 | find_package(${DEPEND} REQUIRED)
35 | endforeach(DEPEND)
36 |
37 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include/cuUKF)
38 | include_directories(
39 | include
40 | ${OpenCV_INCLUDE_DIRS}
41 | ${Eigen_INCLUDE_DIRS}
42 | )
43 |
44 | message("buliding ${PROJECT_NAME} node: ")
45 |
46 | add_library(cpu_filter
47 | src/cpuFilter.cpp
48 | )
49 |
50 | add_executable(cpu_node nodes/cpuNode.cpp)
51 | ament_target_dependencies(cpu_node
52 | rclcpp
53 | sensor_msgs
54 | nav_msgs
55 | std_msgs
56 | Eigen3
57 | message_filters
58 | geometry_msgs
59 | )
60 |
61 | ament_target_dependencies(cpu_filter
62 | rclcpp
63 | sensor_msgs
64 | nav_msgs
65 | std_msgs
66 | Eigen3
67 | message_filters
68 | geometry_msgs
69 | )
70 |
71 | target_link_libraries(cpu_node
72 | cpu_filter
73 | )
74 |
75 | #############################################
76 | message(STATUS "CUDA Compiler: ${CMAKE_CUDA_COMPILER}")
77 |
78 | add_library(gpu_filter
79 | src/gpuFilter.cu
80 | )
81 |
82 | target_include_directories(gpu_filter PUBLIC
83 | ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
84 | )
85 |
86 | target_link_libraries(gpu_filter
87 | CUDA::cublas
88 | CUDA::cusolver
89 | CUDA::cuda_driver
90 | )
91 |
92 | set_target_properties(gpu_filter PROPERTIES
93 | CUDA_SEPARABLE_COMPILATION ON
94 | )
95 | target_compile_options(gpu_filter PRIVATE $<$:-Xcompiler=-Wno-unknown-pragmas>)
96 |
97 |
98 | add_executable(core_node nodes/coreNode.cpp)
99 | ament_target_dependencies(core_node
100 | rclcpp
101 | sensor_msgs
102 | nav_msgs
103 | std_msgs
104 | message_filters
105 | geometry_msgs
106 | )
107 |
108 | ament_target_dependencies(gpu_filter
109 | rclcpp
110 | sensor_msgs
111 | nav_msgs
112 | std_msgs
113 | message_filters
114 | geometry_msgs
115 | )
116 |
117 | target_link_libraries(core_node
118 | gpu_filter
119 | )
120 |
121 | ###############################################
122 | install(TARGETS
123 | cpu_node
124 | core_node
125 | DESTINATION lib/${PROJECT_NAME})
126 |
127 | install(DIRECTORY
128 | launch
129 | DESTINATION share/${PROJECT_NAME}/
130 | )
131 |
132 | if(BUILD_TESTING)
133 | find_package(ament_lint_auto REQUIRED)
134 | # the following line skips the linter which checks for copyrights
135 | # comment the line when a copyright and license is added to all source files
136 | set(ament_cmake_copyright_FOUND TRUE)
137 | # the following line skips cpplint (only works in a git repo)
138 | # comment the line when this package is in a git repo and when
139 | # a copyright and license is added to all source files
140 | set(ament_cmake_cpplint_FOUND TRUE)
141 | ament_lint_auto_find_test_dependencies()
142 | endif()
143 |
144 | ament_package()
145 |
--------------------------------------------------------------------------------
/cuUKF/LICENSE:
--------------------------------------------------------------------------------
1 | Redistribution and use in source and binary forms, with or without
2 | modification, are permitted provided that the following conditions are met:
3 |
4 | * Redistributions of source code must retain the above copyright
5 | notice, this list of conditions and the following disclaimer.
6 |
7 | * Redistributions in binary form must reproduce the above copyright
8 | notice, this list of conditions and the following disclaimer in the
9 | documentation and/or other materials provided with the distribution.
10 |
11 | * Neither the name of the copyright holder nor the names of its
12 | contributors may be used to endorse or promote products derived from
13 | this software without specific prior written permission.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 | POSSIBILITY OF SUCH DAMAGE.
26 |
--------------------------------------------------------------------------------
/cuUKF/include/cuUKF/coreNode.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CORENODE_HPP_
2 | #define CORENODE_HPP_
3 |
4 | // ROS includes
5 | #include
6 |
7 | // Message filters
8 | #include
9 | #include
10 | #include
11 |
12 | // Nav and Sensor Msgs includes
13 | #include
14 | #include
15 |
16 | // Unscneted Kalaman filter include
17 | #include "gpuFilter.cuh"
18 |
19 | class CoreNode : public rclcpp::Node
20 | {
21 | private:
22 | // Message filter subscribers
23 | message_filters::Subscriber odomDataSub_;
24 | message_filters::Subscriber imuDataSub_;
25 |
26 | // Synchronizer
27 | typedef message_filters::sync_policies::ApproximateTime MySyncPolicy;
28 | typedef message_filters::Synchronizer Sync;
29 | std::shared_ptr sync_;
30 |
31 | // Combined callback
32 | void callback_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu);
33 |
34 | // UKF Object
35 | std::shared_ptr kalmanFilter_;
36 |
37 | // Init for UKF
38 | bool isIntialized_;
39 |
40 | // Member to store the previous timestamp
41 | rclcpp::Time prevTimestamp_;
42 |
43 | // Generating State vector
44 | std::vector genState_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu);
45 |
46 | // Generating Measurement Vector
47 | std::vector genMeasurement_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu);
48 |
49 | // Generate Odom Msg
50 | nav_msgs::msg::Odometry createOdometryMsg_(const std::vector& state, const std::vector& covariance_matrix, const nav_msgs::msg::Odometry::ConstSharedPtr& original_odom);
51 |
52 | // Publisher
53 | rclcpp::Publisher::SharedPtr odomPublisher_;
54 |
55 | public:
56 | CoreNode();
57 | ~CoreNode();
58 | };
59 |
60 | #endif
61 |
--------------------------------------------------------------------------------
/cuUKF/include/cuUKF/cpuFilter.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CPUFILTER_HPP_
2 | #define CPUFILTER_HPP_
3 |
4 | // ROS includes
5 | #include
6 | #include
7 |
8 | // Eigen3 Includes
9 | #if defined __GNUC__ || defined __APPLE__
10 | #include
11 | #else
12 | #include
13 | #endif
14 |
15 | class UKF
16 | {
17 | private:
18 | // ROS Logger for outsdie node logging
19 | rclcpp::Logger rosLogger_;
20 |
21 | // 16x1 state vector
22 | Eigen::VectorXd state_;
23 |
24 | // 16x16 model covariance matrix
25 | Eigen::MatrixXd P_;
26 | void initializeCovarianceMatrix_();
27 |
28 | // 16x16 sensor covariance matrix
29 | Eigen::MatrixXd R_; // Measurement noise covariance matrix
30 |
31 | // Kalman Gain
32 | Eigen::MatrixXd K_;
33 |
34 | // Measurement Model Functions
35 | Eigen::Vector3d prevPosition_;
36 |
37 | // Sigma points
38 | static const int n = 16; // Dimension of the state vector
39 | Eigen::MatrixXd sigmaPoints_; // Matrix to hold sigma points, size n x (2n + 1)
40 | Eigen::MatrixXd measurementSigmaPoints_; // Matrix to hold measurment sigma points during tranformation to measuremnt space
41 | void generateSigmaPoints_(); // gen sigma points
42 |
43 | // UKF parameters
44 | double lambda_;
45 | double alpha_;
46 | double beta_;
47 | double kappa_;
48 |
49 | // Sigma weights, mean and covaraince
50 | Eigen::VectorXd weightsMean_;
51 | Eigen::VectorXd weightsCovariance_;
52 | Eigen::VectorXd predictedStateMean_; // Predicted state mean
53 | Eigen::VectorXd predictedMeasurementMean_;
54 | Eigen::MatrixXd predictedMeasurementCovariance_; // Predicted measurement covariance
55 | Eigen::MatrixXd Pxy_; // Cross-covariance matrix
56 | void initializeWeights_();
57 |
58 | // Sigma Process Model functions
59 | Eigen::Quaterniond updateOrientationSigmaPoint_(const Eigen::Quaterniond& current_orientation, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& gyro_bias, double dt);
60 | void updatePositionAndVelocitySigmaPoint_(Eigen::VectorXd& sigmaPoint, const Eigen::Vector3d& accel_meas, double dt);
61 | void updateBiasesSigmaPoint_(Eigen::VectorXd& sigmaPoint, const Eigen::Vector3d& gyro_error, const Eigen::Vector3d& accel_error);
62 |
63 | // Sigma Measurement Model
64 | Eigen::VectorXd applyMeasurementModelToSigmaPoint_(const Eigen::VectorXd& sigmaPoint, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas);
65 |
66 | public:
67 | UKF();
68 | ~UKF();
69 |
70 | // Initalize state vector
71 | void initializeState(const Eigen::VectorXd& initialState, const Eigen::MatrixXd& pose_covariance, const Eigen::MatrixXd& ang_vel_covariance, const Eigen::MatrixXd& accel_covariance);
72 |
73 | // Update prev position
74 | void updatePrevPose(const Eigen::Vector3d& prevPose);
75 |
76 | // Measurement Model
77 | void updateNoiseMatrix(const Eigen::MatrixXd& pose_covariance_6x6, const Eigen::MatrixXd& ang_vel_covariance_3x3,const Eigen::MatrixXd& accel_covariance_3x3);
78 | Eigen::VectorXd updateMeasurementModel(const Eigen::Vector3d& position, const Eigen::Quaterniond& orientation, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas, double dt, const Eigen::MatrixXd& pose_covariance_6x6, const Eigen::MatrixXd& ang_vel_covariance_3x3,const Eigen::MatrixXd& accel_covariance_3x3);
79 |
80 | // Prediction Step
81 | void predictStateMean_();
82 | void predictStateCovariance_();
83 | void propagateSigmaPointsThroughProcessModel_(double dt, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas, const Eigen::Vector3d& gyro_error, const Eigen::Vector3d& accel_error);
84 |
85 | // Update Step
86 | void transformSigmaPointsToMeasurementSpace_(const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas);
87 | void calculatePredictedMeasurementMean_();
88 | void calculatePredictedMeasurementCovariance_();
89 | void calculateCrossCovariance_();
90 | void computeKalmanGain_();
91 | void updateStateAndCovariance_(const Eigen::VectorXd& actualMeasurement);
92 | };
93 |
94 | #endif
--------------------------------------------------------------------------------
/cuUKF/include/cuUKF/cpuNode.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CPUNODE_HPP_
2 | #define CPUNODE_HPP_
3 |
4 | // ROS includes
5 | #include
6 |
7 | // Message filters
8 | #include
9 | #include
10 | #include
11 |
12 | // Nav and Sensor Msgs includes
13 | #include
14 | #include
15 |
16 | // Unscneted Kalaman filter include
17 | #include "cpuFilter.hpp"
18 |
19 | class CpuNode : public rclcpp::Node
20 | {
21 | private:
22 | // Message filter subscribers
23 | message_filters::Subscriber odomDataSub_;
24 | message_filters::Subscriber imuDataSub_;
25 |
26 | // Synchronizer
27 | typedef message_filters::sync_policies::ApproximateTime MySyncPolicy;
28 | typedef message_filters::Synchronizer Sync;
29 | std::shared_ptr sync_;
30 |
31 | // Combined callback
32 | void callback_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu);
33 |
34 | // UKF Object
35 | std::shared_ptr kalmanFilter_;
36 |
37 | // Init for UKF
38 | bool isIntialized_;
39 |
40 | // Member to store the previous timestamp
41 | rclcpp::Time prevTimestamp_;
42 |
43 | // ROS msg to Eigen conversions
44 | Eigen::MatrixXd extractPoseCovariance_(const nav_msgs::msg::Odometry& odom);
45 | Eigen::MatrixXd extractAngularVelocityCovariance_(const sensor_msgs::msg::Imu& imu);
46 | Eigen::MatrixXd extractLinearAccelerationCovariance_(const sensor_msgs::msg::Imu& imu);
47 |
48 | public:
49 | CpuNode();
50 | ~CpuNode();
51 | };
52 |
53 | #endif
54 |
--------------------------------------------------------------------------------
/cuUKF/include/cuUKF/gpuFilter.cuh:
--------------------------------------------------------------------------------
1 | #ifndef GPUFILTER_CUH_
2 | #define GPUFILTER_CUH_
3 |
4 | // Include
5 | #include
6 |
7 | // ROS includes
8 | #include
9 | #include
10 |
11 | // Nav and Sensor Msgs includes
12 | #include
13 | #include
14 |
15 | // CUDA includes
16 | #include
17 |
18 | // cuBLAS for linear algebra operations
19 | #include
20 |
21 | // cuSOLVER for Cholesky decomposition
22 | #include
23 |
24 | // Thrust for device based vector operations
25 | #include
26 |
27 | class UKF
28 | {
29 | private:
30 | // ROS Logger for outsdie node logging
31 | rclcpp::Logger rosLogger_;
32 |
33 | // CUDA handles
34 | cublasHandle_t cublasHandle;
35 | cusolverDnHandle_t cusolverHandle;
36 |
37 | // UKF parameters
38 | static const int n = 16; // Dimension of the state vector
39 | static const int sigmaPointCount = 2 * 16 + 1;
40 | float lambda_;
41 | float alpha_;
42 | float beta_;
43 | float kappa_;
44 |
45 | // Stores the previous position (x, y, z)
46 | thrust::device_vector prevPose_;
47 | // State vector using thrust
48 | thrust::device_vector state_;
49 | // State Covariance matrix, linearized using thrust
50 | thrust::device_vector P_;
51 | // Process Covariance matrix, linearized using thrust
52 | thrust::device_vector Q_;
53 | // Measurement Covariance matrix, linearized using thrust
54 | thrust::device_vector R_;
55 | // Sigma Points matrix
56 | thrust::device_vector sigmaPoints_;
57 | // Weights for sigma points
58 | thrust::device_vector weightsMean_;
59 | thrust::device_vector weightsCovariance_;
60 | // Predicted State Mean
61 | thrust::device_vector predictedStateMean_;
62 | // Predicted State Covarince
63 | thrust::device_vector predictedStateCovariance_;
64 | // Precited Measuement Sigma points
65 | thrust::device_vector predictedMeasurementSigmaPoints_;
66 | // Precited Measurement mean
67 | thrust::device_vector predictedMeasurementMean_;
68 | // Precited Measurement Covariance
69 | thrust::device_vector predictedMeasurementCovariance_;
70 | // Cross-Covariance matrix between state and measurement
71 | thrust::device_vector crossCovariance_;
72 | // Kalman Gain
73 | thrust::device_vector kalmanGain_;
74 | // Functions
75 | void updateAllSigmaPoints(const std::vector& measurement, const float dt);
76 | void predictStateMean();
77 | void predictStateCovariance();
78 | thrust::device_vector scaleMeasurementCovarianceMatrix();
79 | thrust::device_vector choleskyDecompositionMeasurment();
80 | void generateMeasurmentSigmaPoints(const std::vector& measurement, float dt);
81 | void predictMeasurementMean();
82 | void computeMeasurementCovariance();
83 | void computeCrossCovariance();
84 | thrust::device_vector prepareMeasurementVector(const std::vector& measurement, float dt);
85 | thrust::device_vector computeInverseMeasurementCovariance();
86 | void computeKalmanGain();
87 | void updateStateWithMeasurement(const std::vector& measurement, float dt);
88 | void updateStateCovariance();
89 | public:
90 | UKF(const float & alpha, const float & beta, const float & kappa);
91 | ~UKF();
92 | // Init state for thrust::vector
93 | void initState(const std::vector& state_vector);
94 | void initStateCovarianceMatrix(const float & positionVariance, const float & orientationVariance, const float & velocityVariance, const float & gyroBiasVariance, const float & accelBiasVariance);
95 | void initProcessCovarianceMatrix(const float & positionNoise, const float & orientationNoise, const float & velocityNoise, const float & gyroBiasNoise, const float & accelBiasNoise);
96 | void updateMeasurementCovarianceMatrix(const float positionVariance, const float orientationVariance, const float velocityVariance, const float angularVelocityVariance, const float linearAccelerationVariance);
97 | // sigma points generation functions
98 | void generateSigmaPoints();
99 | thrust::device_vector scaleCovarianceMatrix() ;
100 | thrust::device_vector choleskyDecomposition();
101 | void initializeWeights();
102 | void updatePrevPose(const std::vector& newPos);
103 | void printStateInfo();
104 | void updateFilter(const std::vector& measurement, const float dt, const float positionVariance, const float orientationVariance, const float velocityVariance, const float angularVelocityVariance, const float linearAccelerationVariance);
105 | std::vector getState();
106 | std::vector getStateCovarianceMatrix();
107 | };
108 |
109 | #endif
--------------------------------------------------------------------------------
/cuUKF/launch/gpu_filter.launch.py:
--------------------------------------------------------------------------------
1 | from launch import LaunchDescription
2 | from launch_ros.actions import Node
3 | from launch.actions import DeclareLaunchArgument
4 | from launch.substitutions import LaunchConfiguration
5 |
6 | def generate_launch_description():
7 | return LaunchDescription([
8 | DeclareLaunchArgument('odom_topic', default_value='/odom', description='odom topic'),
9 | DeclareLaunchArgument('imu_topic', default_value='/imu', description='imu topic'),
10 | Node(
11 | package = 'cuUKF',
12 | executable = 'core_node',
13 | name = 'gpu_filter_node',
14 | output = 'screen',
15 | parameters = [
16 | {"odom_topic": LaunchConfiguration('odom_topic')},
17 | {"imu_topic": LaunchConfiguration('imu_topic')}
18 | ]
19 | )
20 | ])
21 |
--------------------------------------------------------------------------------
/cuUKF/nodes/coreNode.cpp:
--------------------------------------------------------------------------------
1 | #include "coreNode.hpp"
2 |
3 | CoreNode::CoreNode() : Node("cuUKF")
4 | {
5 | // Topics
6 | this->declare_parameter("odom_topic", "/odom");
7 | this->declare_parameter("imu_topic", "/imu");
8 |
9 | // Init for UKF
10 | this->isIntialized_ = false;
11 |
12 | // Initalize filter
13 | this->kalmanFilter_ = std::make_shared(5e-2, 2.0, 0);
14 |
15 | // Subscribe to Msgs
16 | this->odomDataSub_.subscribe(this, this->get_parameter("odom_topic").as_string());
17 | this->imuDataSub_.subscribe(this, this->get_parameter("imu_topic").as_string());
18 |
19 | // Initialize the synchronizer
20 | sync_.reset(new Sync(MySyncPolicy(100), odomDataSub_, imuDataSub_));
21 | sync_->registerCallback(std::bind(&CoreNode::callback_, this, std::placeholders::_1, std::placeholders::_2));
22 |
23 | // Publisher
24 | this->odomPublisher_ = this->create_publisher("/cuUKF/filtered_odom", 10);
25 | }
26 |
27 | CoreNode::~CoreNode()
28 | {
29 | }
30 |
31 | void CoreNode::callback_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu)
32 | {
33 | if (!(this->isIntialized_))
34 | {
35 | // Set the prevoous time stamp
36 | this->prevTimestamp_ = odom->header.stamp;
37 |
38 | // Initialize state vector
39 | std::vector state = this->genState_(odom, imu);
40 |
41 | // Move to thrust vector for GPU in filter
42 | this->kalmanFilter_->initState(state);
43 | this->kalmanFilter_->initStateCovarianceMatrix(10.0f, 10.0f, 10.0f, 10.0f, 10.0f);
44 | this->kalmanFilter_->initProcessCovarianceMatrix(10.0f, 10.0f, 10.0f, 10.0f, 10.0f);
45 | this->kalmanFilter_->updateMeasurementCovarianceMatrix(0.001, 0.001, 0.01f, 0.001, 0.001);
46 | this->kalmanFilter_->generateSigmaPoints();
47 | this->kalmanFilter_->initializeWeights();
48 |
49 | // Update Prev Pose
50 | this->kalmanFilter_->updatePrevPose({static_cast(odom->pose.pose.position.x), static_cast(odom->pose.pose.position.y), static_cast(odom->pose.pose.position.z)});
51 |
52 | // Update the flag
53 | this->isIntialized_ = true;
54 | return;
55 | }
56 | // Get the current timestamp from the odometry message
57 | rclcpp::Time currentTimestamp = odom->header.stamp;
58 |
59 | // Calculate dt in seconds
60 | double dt = (currentTimestamp - this->prevTimestamp_).seconds();
61 | this->prevTimestamp_ = currentTimestamp; // Update the previous timestamp
62 |
63 | // Start filter
64 | this->kalmanFilter_->updateFilter(this->genMeasurement_(odom, imu), dt, 0.001, 0.001, 0.01f, 0.001, 0.001);
65 |
66 | // Update Prev Pose
67 | this->kalmanFilter_->updatePrevPose({static_cast(odom->pose.pose.position.x), static_cast(odom->pose.pose.position.y), static_cast(odom->pose.pose.position.z)});
68 |
69 | std::vector state = this->kalmanFilter_->getState();
70 | std::vector stateCovarince = this->kalmanFilter_->getStateCovarianceMatrix();
71 | // Publish Msg
72 | this->odomPublisher_->publish(this->createOdometryMsg_(state, stateCovarince, odom));
73 | }
74 |
75 | std::vector CoreNode::genState_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu)
76 | {
77 | // Initialize state vector
78 | std::vector state(16, 0.0f); // Initialize all elements to zero
79 |
80 | // Assign position and orientation to state vector
81 | state[0] = odom->pose.pose.position.x;
82 | state[1] = odom->pose.pose.position.y;
83 | state[2] = odom->pose.pose.position.z;
84 | state[3] = odom->pose.pose.orientation.w;
85 | state[4] = odom->pose.pose.orientation.x;
86 | state[5] = odom->pose.pose.orientation.y;
87 | state[6] = odom->pose.pose.orientation.z;
88 | state[10] = imu->angular_velocity.x;
89 | state[11] = imu->angular_velocity.y;
90 | state[12] = imu->angular_velocity.z;
91 | state[13] = imu->linear_acceleration.x;
92 | state[14] = imu->linear_acceleration.y;
93 | state[15] = imu->linear_acceleration.z;
94 |
95 | return state;
96 | }
97 |
98 | std::vector CoreNode::genMeasurement_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu)
99 | {
100 | std::vector measurementVector(16, 0.0f); // Assuming a 16-element state vector
101 |
102 | // Position
103 | measurementVector[0] = odom->pose.pose.position.x;
104 | measurementVector[1] = odom->pose.pose.position.y;
105 | measurementVector[2] = odom->pose.pose.position.z;
106 |
107 | // Orientation (quaternion)
108 | measurementVector[3] = odom->pose.pose.orientation.w;
109 | measurementVector[4] = odom->pose.pose.orientation.x;
110 | measurementVector[5] = odom->pose.pose.orientation.y;
111 | measurementVector[6] = odom->pose.pose.orientation.z;
112 |
113 | // Angular velocity
114 | measurementVector[10] = imu->angular_velocity.x;
115 | measurementVector[11] = imu->angular_velocity.y;
116 | measurementVector[12] = imu->angular_velocity.z;
117 |
118 | // Linear acceleration
119 | measurementVector[13] = imu->linear_acceleration.x;
120 | measurementVector[14] = imu->linear_acceleration.y;
121 | measurementVector[15] = imu->linear_acceleration.z;
122 |
123 | return measurementVector;
124 | }
125 |
126 | nav_msgs::msg::Odometry CoreNode::createOdometryMsg_(const std::vector& state, const std::vector& covariance_matrix, const nav_msgs::msg::Odometry::ConstSharedPtr& original_odom)
127 | {
128 | nav_msgs::msg::Odometry odom_msg;
129 |
130 | // Assuming the state vector layout is as follows:
131 | // [x, y, z, qw, qx, qy, qz, vx, vy, vz, ax, ay, az]
132 | odom_msg.pose.pose.position.x = state[0];
133 | odom_msg.pose.pose.position.y = state[1];
134 | odom_msg.pose.pose.position.z = state[2];
135 | odom_msg.pose.pose.orientation.w = state[3];
136 | odom_msg.pose.pose.orientation.x = state[4];
137 | odom_msg.pose.pose.orientation.y = state[5];
138 | odom_msg.pose.pose.orientation.z = state[6];
139 | odom_msg.twist.twist.linear.x = state[7];
140 | odom_msg.twist.twist.linear.y = state[8];
141 | odom_msg.twist.twist.linear.z = state[9];
142 | odom_msg.twist.twist.angular.x = state[10];
143 | odom_msg.twist.twist.angular.y = state[11];
144 | odom_msg.twist.twist.angular.z = state[12];
145 |
146 | // Copy position covariance (indices 0, 1, 2 for x, y, z)
147 | for (int i = 0; i < 3; ++i) for (int j = 0; j < 3; ++j) odom_msg.pose.covariance[i * 6 + j] = covariance_matrix[i * 16 + j];
148 |
149 | // Copy orientation covariance (assuming direct mapping from quaternion x, y, z to orientation)
150 | for (int i = 4, k = 0; i < 7; ++i, ++k) for (int j = 4, l = 0; j < 7; ++j, ++l) odom_msg.pose.covariance[(k + 3) * 6 + (l + 3)] = covariance_matrix[i * 16 + j];
151 |
152 | // Copy velocity covariance (linear velocity: vx, vy, vz)
153 | for (int i = 0; i < 3; ++i) for (int j = 0; j < 3; ++j) odom_msg.twist.covariance[i * 6 + j] = covariance_matrix[(i + 7) * 16 + (j + 7)];
154 |
155 | // Copy angular velocity covariance (angular velocity: wx, wy, wz)
156 | for (int i = 0; i < 3; ++i) for (int j = 0; j < 3; ++j) odom_msg.twist.covariance[(i + 3) * 6 + (j + 3)] = covariance_matrix[(i + 10) * 16 + (j + 10)];
157 |
158 | odom_msg.header = original_odom->header;
159 | odom_msg.child_frame_id = original_odom->child_frame_id;
160 | // Set the header stamp to the current time
161 | odom_msg.header.stamp = this->now();
162 |
163 | return odom_msg;
164 | }
165 |
166 | int main(int argc, char * argv[])
167 | {
168 | rclcpp::init(argc, argv);
169 | rclcpp::spin(std::make_shared());
170 | rclcpp::shutdown();
171 |
172 | return 0;
173 | }
--------------------------------------------------------------------------------
/cuUKF/nodes/cpuNode.cpp:
--------------------------------------------------------------------------------
1 | #include "cpuNode.hpp"
2 |
3 | CpuNode::CpuNode() : Node("cuUKF")
4 | {
5 | // Init for UKF
6 | this->isIntialized_ = false;
7 |
8 | // Initialize previous timestamp with current ROS time
9 | this->prevTimestamp_ = this->now();
10 |
11 | // Initalize filter
12 | this->kalmanFilter_ = std::make_shared();
13 |
14 | // Subscribe to Msgs
15 | this->odomDataSub_.subscribe(this, "/zed2i/zed_node/odom");
16 | this->imuDataSub_.subscribe(this, "/zed2i/zed_node/imu/data_raw");
17 |
18 | // Initialize the synchronizer
19 | sync_.reset(new Sync(MySyncPolicy(10), odomDataSub_, imuDataSub_));
20 | sync_->registerCallback(std::bind(&CpuNode::callback_, this, std::placeholders::_1, std::placeholders::_2));
21 | }
22 |
23 | CpuNode::~CpuNode()
24 | {
25 | }
26 |
27 | Eigen::MatrixXd CpuNode::extractPoseCovariance_(const nav_msgs::msg::Odometry& odom)
28 | {
29 | // The pose covariance matrix in Odometry is 6x6, stored as a flat array of 36 elements
30 | const int matrix_size = 6;
31 |
32 | // Use Eigen's Map to create a 6x6 matrix view of the 1D array without copying the data
33 | Eigen::Map> covariance_matrix(odom.pose.covariance.data());
34 |
35 | return covariance_matrix;
36 | }
37 |
38 | Eigen::MatrixXd CpuNode::extractAngularVelocityCovariance_(const sensor_msgs::msg::Imu& imu)
39 | {
40 | const int matrix_size = 3; // The angular velocity covariance matrix in Imu is 3x3
41 |
42 | Eigen::Map> covariance_matrix(imu.angular_velocity_covariance.data());
43 |
44 | return covariance_matrix;
45 | }
46 |
47 | Eigen::MatrixXd CpuNode::extractLinearAccelerationCovariance_(const sensor_msgs::msg::Imu& imu)
48 | {
49 | const int matrix_size = 3; // The linear acceleration covariance matrix in Imu is 3x3
50 |
51 | Eigen::Map> covariance_matrix(imu.linear_acceleration_covariance.data());
52 |
53 | return covariance_matrix;
54 | }
55 |
56 | void CpuNode::callback_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu)
57 | {
58 | // Get the current timestamp from the odometry message
59 | rclcpp::Time currentTimestamp = odom->header.stamp;
60 |
61 | // Calculate dt in seconds
62 | double dt = (currentTimestamp - this->prevTimestamp_).seconds();
63 | this->prevTimestamp_ = currentTimestamp; // Update the previous timestamp
64 |
65 | if (!(this->isIntialized_))
66 | {
67 | // Initialize the state vector with the first readings
68 | Eigen::Vector3d initial_position(odom->pose.pose.position.x, odom->pose.pose.position.y, odom->pose.pose.position.z);
69 | Eigen::Quaterniond initial_orientation(odom->pose.pose.orientation.w, odom->pose.pose.orientation.x, odom->pose.pose.orientation.y, odom->pose.pose.orientation.z);
70 | // Assuming zero initial velocity and biases
71 | Eigen::VectorXd initial_state(16);
72 | initial_state << initial_position, initial_orientation.coeffs(), Eigen::Vector3d::Zero(), Eigen::Vector3d::Zero(), Eigen::Vector3d::Zero();
73 |
74 | this->kalmanFilter_->initializeState(initial_state, this->extractPoseCovariance_(*odom), this->extractAngularVelocityCovariance_(*imu), this->extractLinearAccelerationCovariance_(*imu));
75 | this->kalmanFilter_->updatePrevPose(initial_position);
76 | this->isIntialized_ = true;
77 | return;
78 | }
79 |
80 | // Extract position and orientation from the odometry message
81 | Eigen::Vector3d position(odom->pose.pose.position.x, odom->pose.pose.position.y, odom->pose.pose.position.z);
82 | Eigen::Quaterniond orientation(odom->pose.pose.orientation.w, odom->pose.pose.orientation.x, odom->pose.pose.orientation.y, odom->pose.pose.orientation.z);
83 |
84 | // Extract angular velocity and linear acceleration from the IMU message
85 | Eigen::Vector3d angular_velocity(imu->angular_velocity.x, imu->angular_velocity.y, imu->angular_velocity.z);
86 | Eigen::Vector3d linear_acceleration(imu->linear_acceleration.x, imu->linear_acceleration.y, imu->linear_acceleration.z);
87 |
88 | Eigen::Vector3d gyro_error(0.001, 0.001, 0.001); // rad/s
89 | Eigen::Vector3d accel_error(0.01, 0.01, 0.01); // m/s^2
90 |
91 | // Propagate each sigma point through the process model
92 | this->kalmanFilter_->propagateSigmaPointsThroughProcessModel_(dt, angular_velocity, linear_acceleration, gyro_error, accel_error);
93 |
94 | // Perform the UKF Prediction Step
95 | this->kalmanFilter_->predictStateMean_();
96 | this->kalmanFilter_->predictStateCovariance_();
97 |
98 | // Transform the propagated sigma points into the measurement space
99 | this->kalmanFilter_->transformSigmaPointsToMeasurementSpace_(angular_velocity, linear_acceleration);
100 |
101 | // Calculate the predicted measurement mean
102 | this->kalmanFilter_->calculatePredictedMeasurementMean_();
103 |
104 | // Calculate the predicted measurement covariance
105 | this->kalmanFilter_->calculatePredictedMeasurementCovariance_();
106 |
107 | // Calculate the cross covariance between the state and the measurements
108 | this->kalmanFilter_->calculateCrossCovariance_();
109 |
110 | // Compute the Kalman Gain
111 | this->kalmanFilter_->computeKalmanGain_();
112 |
113 | // Prepare the actual measurement vector from the current odometry and IMU data
114 | Eigen::VectorXd actualMeasurement = this->kalmanFilter_->updateMeasurementModel(position, orientation, angular_velocity, linear_acceleration, dt, this->extractPoseCovariance_(*odom), this->extractAngularVelocityCovariance_(*imu), this->extractLinearAccelerationCovariance_(*imu));
115 |
116 | // Update the state and covariance of the UKF with the actual measurement
117 | this->kalmanFilter_->updateStateAndCovariance_(actualMeasurement);
118 |
119 | // Update the previous position for velocity calculation in the next cycle
120 | this->kalmanFilter_->updatePrevPose(position);
121 | }
122 |
123 | int main(int argc, char * argv[])
124 | {
125 | rclcpp::init(argc, argv);
126 | rclcpp::spin(std::make_shared());
127 | rclcpp::shutdown();
128 |
129 | return 0;
130 | }
--------------------------------------------------------------------------------
/cuUKF/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | cuUKF
5 | 0.0.0
6 | TODO: Package description
7 | Hari
8 | BSD-3-Clause
9 |
10 | ament_cmake
11 |
12 | rclcpp
13 | sensor_msgs
14 | std_msgs
15 | message_filters
16 | geometry_msgs
17 | nav_msgs
18 |
19 | ament_lint_auto
20 | ament_lint_common
21 |
22 |
23 | ament_cmake
24 |
25 |
26 |
--------------------------------------------------------------------------------
/cuUKF/src/cpuFilter.cpp:
--------------------------------------------------------------------------------
1 | #include "cpuFilter.hpp"
2 |
3 | UKF::UKF() : rosLogger_(rclcpp::get_logger("cuUKF"))
4 | {
5 | // Initialize the state vector to a size of 16
6 | this->state_ = Eigen::VectorXd(16);
7 | this->state_.setZero(); // Initialize all elements to zero
8 |
9 | // Initialize UKF parameters
10 | this->alpha_ = 1e-3;
11 | this->beta_ = 2.0;
12 | this->kappa_ = 0.0;
13 | this->lambda_ = this->alpha_ * this->alpha_ * (this->n + this->kappa_) - this->n;
14 | }
15 |
16 | void UKF::initializeState(const Eigen::VectorXd& initialState, const Eigen::MatrixXd& pose_covariance, const Eigen::MatrixXd& ang_vel_covariance, const Eigen::MatrixXd& accel_covariance)
17 | {
18 | // Check the dimension of initialState to ensure it matches your state vector size
19 | if (initialState.size() != state_.size()) throw std::runtime_error("Initial state size does not match the UKF state size.");
20 |
21 | // Set the initial state
22 | this->state_ = initialState;
23 |
24 | // Set the covaiance matrix
25 | this->initializeCovarianceMatrix_();
26 | this->updateNoiseMatrix(pose_covariance, ang_vel_covariance, accel_covariance);
27 |
28 | // Initalise sigma points and weights
29 | this->generateSigmaPoints_();
30 | this->initializeWeights_();
31 |
32 | // Log the initialization
33 | RCLCPP_INFO(this->rosLogger_, "UKF State initialized.");
34 | }
35 |
36 | void UKF::initializeCovarianceMatrix_()
37 | {
38 | // Initialize the full 16x16 covariance matrix to zero
39 | this->P_ = Eigen::MatrixXd::Zero(16, 16);
40 |
41 | // Define standard variances
42 | double positionVariance = 0.1; // Variance for position
43 | double orientationVariance = 0.01; // Variance for quaternion components
44 | double velocityVariance = 1.0; // Variance for velocity
45 | double angularVelocityVariance = 0.01; // Variance for angular velocity
46 | double accelerationVariance = 0.1; // Variance for linear acceleration
47 |
48 | // Set covariance for position (3x3 block)
49 | this->P_.block<3, 3>(0, 0) = Eigen::Matrix3d::Identity() * positionVariance;
50 |
51 | // Set covariance for orientation (4x4 block)
52 | this->P_.block<4, 4>(3, 3) = Eigen::Matrix4d::Identity() * orientationVariance;
53 |
54 | // Set covariance for velocity (3x3 block)
55 | this->P_.block<3, 3>(7, 7) = Eigen::Matrix3d::Identity() * velocityVariance;
56 |
57 | // Set covariance for angular velocity (3x3 block)
58 | this->P_.block<3, 3>(10, 10) = Eigen::Matrix3d::Identity() * angularVelocityVariance;
59 |
60 | // Set covariance for linear acceleration (3x3 block)
61 | this->P_.block<3, 3>(13, 13) = Eigen::Matrix3d::Identity() * accelerationVariance;
62 | }
63 |
64 | void UKF::updateNoiseMatrix(const Eigen::MatrixXd& pose_covariance_6x6, const Eigen::MatrixXd& ang_vel_covariance_3x3,const Eigen::MatrixXd& accel_covariance_3x3)
65 | {
66 | // Initialize the full 16x16 covariance matrix to zero
67 | this->R_ = Eigen::MatrixXd::Zero(16, 16);
68 |
69 | // Example variance values
70 | double orientationVariance = 0.01; // Orientation measurement noise variance
71 | double velocityVariance = 1.0; // Velocity measurement noise variance
72 |
73 | // Set variances for position (first 3 elements)
74 | this->R_.block<3, 3>(0, 0) = pose_covariance_6x6.block<3, 3>(0, 0);
75 |
76 | // Set variances for orientation (next 4 elements)
77 | this->R_.block<4, 4>(3, 3) = Eigen::Matrix4d::Identity() * orientationVariance;
78 |
79 | // Set variances for velocity (next 3 elements)
80 | this->R_.block<3, 3>(7, 7) = Eigen::Matrix3d::Identity() * velocityVariance;
81 |
82 | // Set variances for gyro bias (next 3 elements)
83 | this->R_.block<3, 3>(10, 10) = ang_vel_covariance_3x3;
84 |
85 | // Set variances for accelerometer bias (next 3 elements)
86 | this->R_.block<3, 3>(13, 13) = accel_covariance_3x3;
87 | }
88 |
89 | void UKF::generateSigmaPoints_()
90 | {
91 | this->sigmaPoints_.resize(this->n, 2 * this->n + 1);
92 |
93 | // Compute scaling factor
94 | Eigen::MatrixXd S = (this->P_ * (this->n + this->lambda_)).llt().matrixL(); // Cholesky decomposition
95 |
96 | // Set the first column to the current state
97 | this->sigmaPoints_.col(0) = state_;
98 |
99 | // Set remaining sigma points
100 | for (int i = 0; i < n; ++i)
101 | {
102 | this->sigmaPoints_.col(i + 1) = this->state_ + S.col(i);
103 | this->sigmaPoints_.col(i + 1 + n) = this->state_ - S.col(i);
104 | }
105 | }
106 |
107 | void UKF::initializeWeights_()
108 | {
109 | this->weightsMean_ = Eigen::VectorXd(2 * this->n + 1);
110 | this->weightsCovariance_ = Eigen::VectorXd(2 * this->n + 1);
111 |
112 | double weight_0 = this->lambda_ / (this->lambda_ + this->n);
113 | double weight_others = 1 / (2 * (this->lambda_ + this->n));
114 |
115 | this->weightsMean_(0) = weight_0;
116 | this->weightsCovariance_(0) = weight_0 + (1 - alpha_ * alpha_ + beta_);
117 |
118 | for (int i = 1; i < 2 * n + 1; ++i)
119 | {
120 | this->weightsMean_(i) = weight_others;
121 | this->weightsCovariance_(i) = weight_others;
122 | }
123 | }
124 |
125 | Eigen::Quaterniond UKF::updateOrientationSigmaPoint_(const Eigen::Quaterniond& current_orientation, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& gyro_bias, double dt)
126 | {
127 | Eigen::Vector3d correctedOmega = 0.5 * (angular_velocity - gyro_bias) * dt;
128 | double omega_norm = correctedOmega.norm();
129 |
130 | if (omega_norm <= std::numeric_limits::epsilon()) return current_orientation;
131 |
132 | double half_theta = 0.5 * omega_norm;
133 | Eigen::Quaterniond delta_q_quaternion(Eigen::AngleAxisd(half_theta, correctedOmega.normalized()));
134 |
135 | // Perform the quaternion multiplication and normalize
136 | Eigen::Quaterniond updated_orientation = (delta_q_quaternion * current_orientation).normalized();
137 |
138 | return updated_orientation;
139 | }
140 |
141 | void UKF::updatePositionAndVelocitySigmaPoint_(Eigen::VectorXd& sigmaPoint, const Eigen::Vector3d& accel_meas, double dt)
142 | {
143 | // Extract position, orientation, and velocity from the sigma point
144 | Eigen::Vector3d position = sigmaPoint.segment(0, 3);
145 | Eigen::Quaterniond orientation(sigmaPoint(3), sigmaPoint(4), sigmaPoint(5), sigmaPoint(6));
146 | Eigen::Vector3d velocity = sigmaPoint.segment(7, 3);
147 | Eigen::Vector3d accel_bias = sigmaPoint.segment(13, 3);
148 |
149 | // Compute the acceleration in the global frame
150 | Eigen::Vector3d accel_global = orientation * (accel_meas - accel_bias) - Eigen::Vector3d(0, 0, -9.81); // Assuming gravity along z
151 |
152 | // Update velocity
153 | Eigen::Vector3d velocity_updated = velocity + accel_global * dt;
154 |
155 | // Update position
156 | Eigen::Vector3d position_updated = position + (velocity * dt) + (0.5 * accel_global * dt * dt);
157 |
158 | // Write the updated position and velocity back into the sigma point
159 | sigmaPoint.segment(0, 3) = position_updated;
160 | sigmaPoint.segment(7, 3) = velocity_updated;
161 | }
162 |
163 | void UKF::updateBiasesSigmaPoint_(Eigen::VectorXd& sigmaPoint, const Eigen::Vector3d& gyro_error, const Eigen::Vector3d& accel_error)
164 | {
165 | // Extract gyro and accel biases from the sigma point
166 | Eigen::Vector3d gyro_bias = sigmaPoint.segment(10, 3);
167 | Eigen::Vector3d accel_bias = sigmaPoint.segment(13, 3);
168 |
169 | // Update biases based on the error estimates
170 | gyro_bias += gyro_error;
171 | accel_bias += accel_error;
172 |
173 | // Write the updated biases back into the sigma point
174 | sigmaPoint.segment(10, 3) = gyro_bias;
175 | sigmaPoint.segment(13, 3) = accel_bias;
176 | }
177 |
178 | void UKF::propagateSigmaPointsThroughProcessModel_(double dt, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas, const Eigen::Vector3d& gyro_error, const Eigen::Vector3d& accel_error)
179 | {
180 | for (int i = 0; i < this->sigmaPoints_.cols(); ++i)
181 | {
182 | // Create a copy of the i-th column of sigmaPoints_
183 | Eigen::VectorXd sigmaPoint = this->sigmaPoints_.col(i);
184 |
185 | // Extract current orientation from the sigma point
186 | Eigen::Quaterniond current_orientation(sigmaPoint(3), sigmaPoint(4), sigmaPoint(5), sigmaPoint(6));
187 |
188 | // Update orientation for this sigma point
189 | Eigen::Quaterniond updated_orientation = updateOrientationSigmaPoint_(current_orientation, angular_velocity, gyro_error, dt);
190 | updated_orientation.normalize();
191 |
192 | // Write back updated orientation into the sigma point
193 | sigmaPoint(3) = updated_orientation.w();
194 | sigmaPoint(4) = updated_orientation.x();
195 | sigmaPoint(5) = updated_orientation.y();
196 | sigmaPoint(6) = updated_orientation.z();
197 |
198 | // Update position and velocity for this sigma point
199 | this->updatePositionAndVelocitySigmaPoint_(sigmaPoint, accel_meas, dt);
200 |
201 | // Update biases for this sigma point using the error measurements
202 | this->updateBiasesSigmaPoint_(sigmaPoint, gyro_error, accel_error);
203 |
204 | // Write the updated sigma point back into the matrix
205 | this->sigmaPoints_.col(i) = sigmaPoint;
206 | }
207 | }
208 |
209 | void UKF::predictStateMean_()
210 | {
211 | // Initialize the predicted state mean vector
212 | Eigen::VectorXd tempPredictedStateMean = Eigen::VectorXd::Zero(this->state_.size());
213 |
214 | // Compute the weighted sum of the sigma points
215 | for (int i = 0; i < this->sigmaPoints_.cols(); ++i) tempPredictedStateMean += this->weightsMean_(i) * this->sigmaPoints_.col(i);
216 |
217 | // Store the predicted state mean
218 | this->predictedStateMean_ = tempPredictedStateMean;
219 | }
220 |
221 | void UKF::predictStateCovariance_()
222 | {
223 | // Initialize the predicted state covariance matrix
224 | Eigen::MatrixXd predictedStateCovariance = Eigen::MatrixXd::Zero(this->state_.size(), this->state_.size());
225 |
226 | // Compute the weighted sum of the outer products of the sigma point deviations
227 | for (int i = 0; i < this->sigmaPoints_.cols(); ++i)
228 | {
229 | Eigen::VectorXd deviation = this->sigmaPoints_.col(i) - this->state_; // Deviation of sigma point from predicted mean
230 | predictedStateCovariance += this->weightsCovariance_(i) * deviation * deviation.transpose();
231 | }
232 |
233 | // Update the state covariance matrix with the predicted state covariance
234 | this->P_ = predictedStateCovariance;
235 | }
236 |
237 | void UKF::updatePrevPose(const Eigen::Vector3d& prevPose)
238 | {
239 | this->prevPosition_ = prevPose;
240 | }
241 |
242 | Eigen::VectorXd UKF::updateMeasurementModel(const Eigen::Vector3d& position, const Eigen::Quaterniond& orientation, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas, double dt, const Eigen::MatrixXd& pose_covariance_6x6, const Eigen::MatrixXd& ang_vel_covariance_3x3,const Eigen::MatrixXd& accel_covariance_3x3)
243 | {
244 | // Update Noise Matrix R
245 | this->updateNoiseMatrix(pose_covariance_6x6, ang_vel_covariance_3x3, accel_covariance_3x3);
246 |
247 | Eigen::VectorXd measurement(16); // Adjust based on your measurement vector size
248 |
249 | // Position - directly from visual odometry
250 | measurement.head(3) = position;
251 |
252 | // Orientation - directly from visual odometry
253 | Eigen::Vector4d orientation_coeffs = Eigen::Vector4d(orientation.w(), orientation.x(), orientation.y(), orientation.z());
254 | measurement.segment(3, 4) = orientation_coeffs;
255 |
256 | // Derived Velocity
257 | Eigen::Vector3d derived_velocity = (position - this->prevPosition_) / dt;
258 | measurement.segment(7, 3) = derived_velocity;
259 |
260 | // Angular Velocity corrected for Gyro Bias
261 | Eigen::Vector3d gyro_bias = this->state_.segment(10, 3);
262 | measurement.segment(10, 3) = angular_velocity - gyro_bias;
263 |
264 | // Linear Acceleration corrected for Accelerometer Bias
265 | Eigen::Vector3d accel_bias = this->state_.segment(13, 3);
266 | measurement.segment(13, 3) = accel_meas - accel_bias;
267 |
268 | return measurement;
269 | }
270 |
271 | Eigen::VectorXd UKF::applyMeasurementModelToSigmaPoint_(const Eigen::VectorXd& sigmaPoint, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas)
272 | {
273 | Eigen::VectorXd measurement(16); // Measurement vector size
274 |
275 | // Extract components from the sigma point
276 | Eigen::Vector3d position = sigmaPoint.segment(0, 3);
277 | Eigen::Quaterniond orientation(sigmaPoint(3), sigmaPoint(4), sigmaPoint(5), sigmaPoint(6));
278 | Eigen::Vector3d velocity = sigmaPoint.segment(7, 3); // Directly use velocity
279 | Eigen::Vector3d gyro_bias = sigmaPoint.segment(10, 3);
280 | Eigen::Vector3d accel_bias = sigmaPoint.segment(13, 3);
281 |
282 | // Position
283 | measurement.head(3) = position;
284 |
285 | // Orientation
286 | measurement.segment(3, 4) = Eigen::Vector4d(orientation.w(), orientation.x(), orientation.y(), orientation.z());
287 |
288 | // Velocity
289 | measurement.segment(7, 3) = velocity;
290 |
291 | // Angular Velocity corrected for Gyro Bias
292 | measurement.segment(10, 3) = angular_velocity - gyro_bias;
293 |
294 | // Linear Acceleration corrected for Accelerometer Bias
295 | measurement.segment(13, 3) = accel_meas - accel_bias;
296 |
297 | return measurement;
298 | }
299 |
300 | void UKF::transformSigmaPointsToMeasurementSpace_(const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas)
301 | {
302 | const int measurementDimension = 16; /* size of your measurement vector, e.g., 16 */
303 | this->measurementSigmaPoints_.resize(measurementDimension, this->sigmaPoints_.cols());
304 |
305 | for (int i = 0; i < sigmaPoints_.cols(); ++i) this->measurementSigmaPoints_.col(i) = this->applyMeasurementModelToSigmaPoint_(this->sigmaPoints_.col(i), angular_velocity, accel_meas);
306 | }
307 |
308 | void UKF::calculatePredictedMeasurementMean_()
309 | {
310 | // Initialize the predicted measurement mean vector
311 | this->predictedMeasurementMean_ = Eigen::VectorXd::Zero(this->measurementSigmaPoints_.rows());
312 |
313 | // Compute the weighted sum of the measurement sigma points
314 | for (int i = 0; i < this->measurementSigmaPoints_.cols(); ++i) this->predictedMeasurementMean_ += this->weightsMean_(i) * this->measurementSigmaPoints_.col(i);
315 | }
316 |
317 | void UKF::calculatePredictedMeasurementCovariance_()
318 | {
319 | int measurementDimension = this->predictedMeasurementMean_.size();
320 | this->predictedMeasurementCovariance_ = Eigen::MatrixXd::Zero(measurementDimension, measurementDimension);
321 |
322 | for (int i = 0; i < this->measurementSigmaPoints_.cols(); ++i)
323 | {
324 | Eigen::VectorXd deviation = this->measurementSigmaPoints_.col(i) - this->predictedMeasurementMean_;
325 | this->predictedMeasurementCovariance_ += this->weightsCovariance_(i) * (deviation * deviation.transpose());
326 | }
327 |
328 | // Add the measurement noise covariance
329 | this->predictedMeasurementCovariance_ += this->R_;
330 | }
331 |
332 | void UKF::calculateCrossCovariance_()
333 | {
334 | int stateDimension = this->predictedStateMean_.size();
335 | int measurementDimension = this->predictedMeasurementMean_.size();
336 | this->Pxy_ = Eigen::MatrixXd::Zero(stateDimension, measurementDimension);
337 |
338 | for (int i = 0; i < this->sigmaPoints_.cols(); ++i)
339 | {
340 | Eigen::VectorXd stateDeviation = this->sigmaPoints_.col(i) - this->predictedStateMean_;
341 | Eigen::VectorXd measurementDeviation = this->measurementSigmaPoints_.col(i) - this->predictedMeasurementMean_;
342 | this->Pxy_ += weightsCovariance_(i) * (stateDeviation * measurementDeviation.transpose());
343 | }
344 | }
345 |
346 | void UKF::computeKalmanGain_()
347 | {
348 | this->K_ = this->Pxy_ * this->predictedMeasurementCovariance_.inverse();
349 | }
350 |
351 | void UKF::updateStateAndCovariance_(const Eigen::VectorXd& actualMeasurement)
352 | {
353 | // Calculate the measurement residual
354 | Eigen::VectorXd measurementResidual = actualMeasurement - this->predictedMeasurementMean_;
355 |
356 | // Update state estimate using the Kalman Gain
357 | this->state_ = this->predictedStateMean_ + this->K_ * measurementResidual;
358 |
359 | // Extract quaternion components (w, x, y, z) from the state
360 | Eigen::Quaterniond updated_orientation(this->state_(3), this->state_(4), this->state_(5), this->state_(6));
361 |
362 | // Normalize the quaternion
363 | updated_orientation.normalize();
364 |
365 | // Put the normalized quaternion back into the state vector
366 | this->state_(3) = updated_orientation.w();
367 | this->state_(4) = updated_orientation.x();
368 | this->state_(5) = updated_orientation.y();
369 | this->state_(6) = updated_orientation.z();
370 |
371 | std::cout << this->state_ << "\n" << std::endl;
372 |
373 | // Update state covariance matrix
374 | this->P_ = this->P_ - this->K_ * this->predictedMeasurementCovariance_ * this->K_.transpose();
375 | }
376 |
377 | UKF::~UKF()
378 | {
379 | }
--------------------------------------------------------------------------------
/cuUKF/src/gpuFilter.cu:
--------------------------------------------------------------------------------
1 | #include "gpuFilter.cuh"
2 |
3 |
4 | // --------------- GPU Kernels ---------------------- //
5 |
6 | __global__ void scaleCovarianceMatrixKernel(float* P, int n, float alpha, float kappa)
7 | {
8 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
9 | if (idx < n*n) P[idx] *= (alpha * (n + kappa));
10 | }
11 |
12 | __global__ void generateSigmaPointsKernel(float* sigmaPoints, const float* state, const float* cholP, int n, float lambda_plus_n)
13 | {
14 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
15 | if (idx >= 2 * n + 1) return;
16 |
17 | int state_idx = idx * n; // Index for the beginning of the current sigma point in the flattened array
18 |
19 | // For the 0th sigma point, copy the state directly
20 | if (idx == 0)
21 | {
22 | for (int i = 0; i < n; ++i) sigmaPoints[state_idx + i] = state[i];
23 | }
24 | else
25 | {
26 | int chol_idx = (idx - 1) % n; // Index for the current column of the Cholesky matrix
27 | float sign = idx <= n ? 1 : -1; // Determine whether to add or subtract
28 |
29 | for (int i = 0; i < n; ++i) sigmaPoints[state_idx + i] = state[i] + sign * sqrt(lambda_plus_n) * cholP[i * n + chol_idx];
30 | }
31 |
32 | // Normalize the quaternion if this sigma point contains one
33 | if (n > 6) // Assuming quaternion starts at index 3 and ends at index 6
34 | {
35 | float norm = sqrt(sigmaPoints[state_idx + 3] * sigmaPoints[state_idx + 3] + sigmaPoints[state_idx + 4] * sigmaPoints[state_idx + 4] + sigmaPoints[state_idx + 5] * sigmaPoints[state_idx + 5] + sigmaPoints[state_idx + 6] * sigmaPoints[state_idx + 6]);
36 | for (int i = 3; i <= 6; ++i) sigmaPoints[state_idx + i] /= norm;
37 | }
38 | }
39 |
40 | __global__ void initializeWeightsKernel(float* weightsMean, float* weightsCovariance, int n, float lambda, float alpha, float beta)
41 | {
42 | int idx = threadIdx.x + blockIdx.x * blockDim.x;
43 | if (idx >= 2 * n + 1) return;
44 |
45 | if (idx == 0)
46 | {
47 | weightsMean[idx] = lambda / (n + lambda);
48 | weightsCovariance[idx] = weightsMean[idx] + (1 - alpha * alpha + beta);
49 | }
50 | else
51 | {
52 | weightsMean[idx] = 1.0 / (2 * (n + lambda));
53 | weightsCovariance[idx] = 1.0 / (2 * (n + lambda));
54 | }
55 | }
56 |
57 | __global__ void updateSigmaPointsKernel(float* sigmaPoints, const float* measurement, int sigmaPointCount, float dt, int n)
58 | {
59 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
60 | if (idx >= sigmaPointCount) return;
61 |
62 | int baseIdx = idx * n;
63 |
64 | // Angular velocity components updated in the previous step
65 | float wx = sigmaPoints[baseIdx + 10];
66 | float wy = sigmaPoints[baseIdx + 11];
67 | float wz = sigmaPoints[baseIdx + 12];
68 |
69 | // Current quaternion components
70 | float qw = sigmaPoints[baseIdx + 3];
71 | float qx = sigmaPoints[baseIdx + 4];
72 | float qy = sigmaPoints[baseIdx + 5];
73 | float qz = sigmaPoints[baseIdx + 6];
74 |
75 | // Quaternion derivative due to angular velocity
76 | float dot_qw = 0.5 * (-qx * wx - qy * wy - qz * wz);
77 | float dot_qx = 0.5 * (qw * wx + qy * wz - qz * wy);
78 | float dot_qy = 0.5 * (qw * wy - qx * wz + qz * wx);
79 | float dot_qz = 0.5 * (qw * wz + qx * wy - qy * wx);
80 |
81 | // Update quaternion components with Euler integration
82 | sigmaPoints[baseIdx + 3] += dot_qw * dt;
83 | sigmaPoints[baseIdx + 4] += dot_qx * dt;
84 | sigmaPoints[baseIdx + 5] += dot_qy * dt;
85 | sigmaPoints[baseIdx + 6] += dot_qz * dt;
86 |
87 | // Normalize the updated quaternion
88 | float norm = sqrt(sigmaPoints[baseIdx + 3] * sigmaPoints[baseIdx + 3] +
89 | sigmaPoints[baseIdx + 4] * sigmaPoints[baseIdx + 4] +
90 | sigmaPoints[baseIdx + 5] * sigmaPoints[baseIdx + 5] +
91 | sigmaPoints[baseIdx + 6] * sigmaPoints[baseIdx + 6]);
92 |
93 | sigmaPoints[baseIdx + 3] /= norm;
94 | sigmaPoints[baseIdx + 4] /= norm;
95 | sigmaPoints[baseIdx + 5] /= norm;
96 | sigmaPoints[baseIdx + 6] /= norm;
97 |
98 | // Directly update the sigma point's angular velocity by adding the difference between measured and current angular velocity
99 | sigmaPoints[baseIdx + 10] += measurement[10] - sigmaPoints[baseIdx + 10];
100 | sigmaPoints[baseIdx + 11] += measurement[11] - sigmaPoints[baseIdx + 11];
101 | sigmaPoints[baseIdx + 12] += measurement[12] - sigmaPoints[baseIdx + 12];
102 |
103 | // Extract current position, velocity, and acceleration
104 | float x = sigmaPoints[baseIdx + 0];
105 | float y = sigmaPoints[baseIdx + 1];
106 | float z = sigmaPoints[baseIdx + 2];
107 | float vx = sigmaPoints[baseIdx + 7];
108 | float vy = sigmaPoints[baseIdx + 8];
109 | float vz = sigmaPoints[baseIdx + 9];
110 | float ax = sigmaPoints[baseIdx + 13];
111 | float ay = sigmaPoints[baseIdx + 14];
112 | float az = sigmaPoints[baseIdx + 15] - 9.81f; // Adjust for gravity
113 |
114 | // Update position using the equation of motion
115 | sigmaPoints[baseIdx + 0] = x + vx * dt + 0.5f * ax * dt * dt; // Update x position
116 | sigmaPoints[baseIdx + 1] = y + vy * dt + 0.5f * ay * dt * dt; // Update y position
117 | sigmaPoints[baseIdx + 2] = z + vz * dt + 0.5f * az * dt * dt; // Update z position
118 |
119 | // Update velocity using the equation of motion v = u + at
120 | sigmaPoints[baseIdx + 7] = vx + ax * dt; // Update vx
121 | sigmaPoints[baseIdx + 8] = vy + ay * dt; // Update vy
122 | sigmaPoints[baseIdx + 9] = vz + az * dt; // Update vz, considering gravity if az includes it
123 |
124 | // Directly update the sigma point's linear acceleration by adding the difference between measured and current linear acceleration
125 | sigmaPoints[baseIdx + 13] += measurement[13] - sigmaPoints[baseIdx + 13]; // ax
126 | sigmaPoints[baseIdx + 14] += measurement[14] - sigmaPoints[baseIdx + 14]; // ay
127 | sigmaPoints[baseIdx + 15] += measurement[15] - sigmaPoints[baseIdx + 15]; // az
128 | }
129 |
130 | __global__ void predictStateMeanKernel(const float* sigmaPoints, const float* weightsMean, float* predictedStateMean, int stateDimension, int sigmaPointCount)
131 | {
132 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
133 | if (idx < stateDimension)
134 | {
135 | float sum = 0.0f;
136 | for (int j = 0; j < sigmaPointCount; ++j) sum += sigmaPoints[j * stateDimension + idx] * weightsMean[j];
137 | predictedStateMean[idx] = sum;
138 | }
139 | }
140 |
141 | __global__ void predictStateCovarianceKernel(float* predictedSigmaPoints, float* predictedStateMean, float* weightsCovariance, float* processNoiseCovariance, float* predictedStateCovariance, int n, int sigmaPointCount) {
142 | int idx = threadIdx.x + blockIdx.x * blockDim.x;
143 | int jdx = threadIdx.y + blockIdx.y * blockDim.y;
144 |
145 | if (idx < n && jdx < n)
146 | {
147 | float covSum = 0.0f;
148 | for (int i = 0; i < sigmaPointCount; ++i)
149 | {
150 | float diff_i_j = (predictedSigmaPoints[i * n + idx] - predictedStateMean[idx]) * (predictedSigmaPoints[i * n + jdx] - predictedStateMean[jdx]);
151 | covSum += weightsCovariance[i] * diff_i_j;
152 | }
153 | if (idx == jdx) predictedStateCovariance[idx * n + jdx] = covSum + processNoiseCovariance[idx * n + jdx];
154 | else predictedStateCovariance[idx * n + jdx] = covSum;
155 | }
156 | }
157 |
158 | __global__ void predictMeasurementMeanKernel(const float* sigmaPoints, const float* weightsMean, float* predictedMeasurementMean, int measurementDimension, int sigmaPointCount) {
159 | int idx = threadIdx.x + blockIdx.x * blockDim.x;
160 | if (idx < measurementDimension)
161 | {
162 | float mean = 0.0f;
163 | for (int i = 0; i < sigmaPointCount; ++i) mean += sigmaPoints[i * measurementDimension + idx] * weightsMean[i];
164 | predictedMeasurementMean[idx] = mean;
165 | }
166 | }
167 |
168 | __global__ void computeMeasurementCovarianceKernel(const float* predictedMeasurementSigmaPoints, const float* predictedMeasurementMean, float* predictedMeasurementCovariance, const float* measurementNoiseCovariance, int n, int sigmaPointCount, const float* weightsCovariance)
169 | {
170 | int idx = threadIdx.x + blockIdx.x * blockDim.x;
171 | int jdx = threadIdx.y + blockIdx.y * blockDim.y;
172 |
173 | if (idx < n && jdx < n)
174 | {
175 | float covariance = 0.0f;
176 | for (int k = 0; k < sigmaPointCount; ++k)
177 | {
178 | float diff_i = predictedMeasurementSigmaPoints[k * n + idx] - predictedMeasurementMean[idx];
179 | float diff_j = predictedMeasurementSigmaPoints[k * n + jdx] - predictedMeasurementMean[jdx];
180 | covariance += weightsCovariance[k] * diff_i * diff_j;
181 | }
182 |
183 | if (idx == jdx) covariance += measurementNoiseCovariance[idx * n + jdx];
184 | predictedMeasurementCovariance[idx * n + jdx] = covariance;
185 | }
186 | }
187 |
188 | __global__ void computeCrossCovarianceKernel(const float* predictedStateSigmaPoints, const float* predictedMeasurementSigmaPoints, const float* predictedStateMean, const float* predictedMeasurementMean, float* crossCovariance, const float* weightsCovariance, int stateDimension, int measurementDimension, int sigmaPointCount)
189 | {
190 | int idx = threadIdx.x + blockIdx.x * blockDim.x;
191 | int jdx = threadIdx.y + blockIdx.y * blockDim.y;
192 |
193 | if (idx < stateDimension && jdx < measurementDimension)
194 | {
195 | float covSum = 0.0f;
196 | for (int i = 0; i < sigmaPointCount; ++i)
197 | {
198 | float diffState = predictedStateSigmaPoints[i * stateDimension + idx] - predictedStateMean[idx];
199 | float diffMeasurement = predictedMeasurementSigmaPoints[i * measurementDimension + jdx] - predictedMeasurementMean[jdx];
200 | covSum += weightsCovariance[i] * diffState * diffMeasurement;
201 | }
202 | crossCovariance[idx * measurementDimension + jdx] = covSum;
203 | }
204 | }
205 |
206 | __global__ void computeKalmanGainKernel(const float* crossCovariance, const float* predictedMeasurementCovarianceInv, float* kalmanGain, int stateDim, int measDim)
207 | {
208 | int idx = threadIdx.x + blockIdx.x * blockDim.x;
209 | int jdx = threadIdx.y + blockIdx.y * blockDim.y;
210 |
211 | if (idx < stateDim && jdx < measDim)
212 | {
213 | float sum = 0.0;
214 | for (int k = 0; k < measDim; ++k) sum += crossCovariance[idx * measDim + k] * predictedMeasurementCovarianceInv[k * measDim + jdx];
215 | kalmanGain[idx * measDim + jdx] = sum;
216 | }
217 | }
218 |
219 | __global__ void updateStateWithMeasurementKernel(float* stateMean, const float* kalmanGain, const float* measurementResidual, int stateDim, int measDim)
220 | {
221 | int idx = blockIdx.x * blockDim.x + threadIdx.x;
222 | if (idx < stateDim)
223 | {
224 | float updateValue = 0.0;
225 | for (int k = 0; k < measDim; ++k) updateValue += kalmanGain[idx * measDim + k] * measurementResidual[k];
226 | stateMean[idx] += updateValue; // Update the state with the computed value
227 | }
228 | }
229 |
230 | __global__ void normalizeQuaternionKernel(float* stateData)
231 | {
232 | // Since this operation is lightweight, we'll use a single thread.
233 | if (threadIdx.x == 0)
234 | {
235 | int quaternionStartIdx = 3; // Starting index of quaternion components in the state vector
236 | float norm = sqrtf(stateData[quaternionStartIdx] * stateData[quaternionStartIdx] + stateData[quaternionStartIdx + 1] * stateData[quaternionStartIdx + 1] + stateData[quaternionStartIdx + 2] * stateData[quaternionStartIdx + 2] + stateData[quaternionStartIdx + 3] * stateData[quaternionStartIdx + 3]);
237 |
238 | // Normalize each component of the quaternion
239 | for (int i = 0; i < 4; ++i) stateData[quaternionStartIdx + i] /= norm;
240 | }
241 | }
242 |
243 | // --------------- GPU Kernels ---------------------- //
244 |
245 |
246 | UKF::UKF(const float & alpha, const float & beta, const float & kappa) : rosLogger_(rclcpp::get_logger("cuUKF"))
247 | {
248 | // Initialize UKF parameters
249 | this->alpha_ = alpha;
250 | this->beta_ = beta;
251 | this->kappa_ = kappa;
252 | this->lambda_ = this->alpha_ * this->alpha_ * (this->n + this->kappa_) - this->n;
253 |
254 | // Initialize prevPose_ to store 3 floats
255 | this->prevPose_.resize(3, 0.0f);
256 |
257 | // cuBLAS handle initialization
258 | cublasStatus_t cublasStatus = cublasCreate(&this->cublasHandle);
259 | if (cublasStatus != CUBLAS_STATUS_SUCCESS) RCLCPP_ERROR(this->rosLogger_, "cuBLAS handle creation failed");
260 |
261 | // cuSOLVER handle initialization
262 | cusolverStatus_t cusolverStatus = cusolverDnCreate(&this->cusolverHandle);
263 | if (cusolverStatus != CUSOLVER_STATUS_SUCCESS) RCLCPP_ERROR(this->rosLogger_, "cuSOLVER handle creation failed");
264 |
265 | // Initialize State
266 | this->state_ = thrust::device_vector(this->n);
267 |
268 | // Initialize State Covaraince matrix
269 | this->P_ = thrust::device_vector(this->n * this->n);
270 |
271 | // Initialize Process Covairance matrix
272 | this->Q_ = thrust::device_vector(this->n * this->n);
273 |
274 | // Initalize Measuremnt Covariance matrix
275 | this->R_ = thrust::device_vector(this->n * this->n);
276 |
277 | RCLCPP_INFO(this->rosLogger_, "GPU UKF Initalized!!!");
278 | }
279 |
280 | UKF::~UKF()
281 | {
282 | cublasDestroy(this->cublasHandle);
283 | cusolverDnDestroy(this->cusolverHandle);
284 | RCLCPP_INFO(this->rosLogger_, "CUDA handles destroyed");
285 | }
286 |
287 | void UKF::initState(const std::vector& state_vector)
288 | {
289 | thrust::copy(state_vector.begin(), state_vector.end(), this->state_.begin());
290 | }
291 |
292 | void UKF::initStateCovarianceMatrix(const float & positionVariance, const float & orientationVariance, const float & linearVelocityVariance, const float & angularVelocityVariance, const float & linearAccelerationVariance)
293 | {
294 | // Initialize all elements to zero
295 | thrust::fill(this->P_.begin(), this->P_.end(), 0.0f);
296 |
297 | // Set variances for each part of the state
298 | // Position
299 | for (int i = 0; i < 3; ++i) this->P_[i * this->n + i] = positionVariance;
300 |
301 | // Orientation
302 | for (int i = 3; i < 7; ++i) this->P_[i * this->n + i] = orientationVariance;
303 |
304 | // Linear Velocity
305 | for (int i = 7; i < 10; ++i) this->P_[i * this->n + i] = linearVelocityVariance;
306 |
307 | // Angular Velocity
308 | for (int i = 10; i < 13; ++i) this->P_[i * this->n + i] = angularVelocityVariance;
309 |
310 | // Linear Acceleration
311 | for (int i = 13; i < 16; ++i) this->P_[i * this->n + i] = linearAccelerationVariance;
312 | }
313 |
314 | void UKF::initProcessCovarianceMatrix(const float & positionNoise, const float & orientationNoise, const float & linearVelocityNoise, const float & angularVelocityNoise, const float & linearAccelerationNoise)
315 | {
316 | // Assuming Q_ is a 16x16 matrix for a state vector with 16 elements
317 | thrust::fill(this->Q_.begin(), this->Q_.end(), 0.0f);
318 |
319 | // Position-related noise
320 | for (int i = 0; i < 3; ++i) this->Q_[i * this->n + i] = positionNoise;
321 |
322 | // Orientation-related noise (quaternion)
323 | for (int i = 3; i < 7; ++i) this->Q_[i * this->n + i] = orientationNoise;
324 |
325 | // Linear Velocity Noise
326 | for (int i = 7; i < 10; ++i) this->Q_[i * this->n + i] = linearVelocityNoise;
327 |
328 | // Angular Velocity Noise
329 | for (int i = 10; i < 13; ++i) this->Q_[i * this->n + i] = angularVelocityNoise;
330 |
331 | // Linear Acceleration Noise
332 | for (int i = 13; i < 16; ++i) this->Q_[i * this->n + i] = linearAccelerationNoise;
333 | }
334 |
335 | void UKF::updateMeasurementCovarianceMatrix(const float positionVariance, const float orientationVariance, const float velocityVariance, const float angularVelocityVariance, const float linearAccelerationVariance)
336 | {
337 | // Reset R_ to zeros, assuming R_ is for a 16x16 matrix
338 | thrust::fill(this->R_.begin(), this->R_.end(), 0.0f);
339 |
340 | // Update position covariance
341 | for (int i = 0; i < 3; ++i) this->R_[i * this->n + i] = positionVariance;
342 |
343 | // Update orientation covariance for quaternion components (w, x, y, z)
344 | for (int i = 3; i < 7; ++i) this->R_[i * this->n + i] = orientationVariance;
345 |
346 | // Update linear velocity covariance
347 | for (int i = 7; i < 10; ++i) this->R_[i * this->n + i] = velocityVariance;
348 |
349 | // Update angular velocity covariance
350 | for (int i = 10; i < 13; ++i) this->R_[i * this->n + i] = angularVelocityVariance;
351 |
352 | // Update linear acceleration covariance
353 | for (int i = 13; i < 16; ++i) this->R_[i * this->n + i] = linearAccelerationVariance;
354 | }
355 |
356 | thrust::device_vector UKF::scaleCovarianceMatrix()
357 | {
358 | // Compute the scale factor using class variables alpha_ and kappa_
359 | float scale = this->alpha_ * (this->n + this->kappa_);
360 |
361 | // Launch the CUDA kernel to scale the covariance matrix P_
362 | int numThreadsPerBlock = 256; // You can adjust this based on your GPU's capabilities
363 | int numBlocks = (this->n * this->n + numThreadsPerBlock - 1) / numThreadsPerBlock;
364 |
365 | thrust::device_vector P_copy = this->P_;
366 |
367 | // Call the CUDA kernel
368 | scaleCovarianceMatrixKernel<<>>(thrust::raw_pointer_cast(P_copy.data()), this->n, this->alpha_, this->kappa_);
369 |
370 | // Synchronize to ensure the kernel has finished execution
371 | cudaDeviceSynchronize();
372 |
373 | return P_copy;
374 | }
375 |
376 | thrust::device_vector UKF::choleskyDecomposition()
377 | {
378 | // Check if the cuSOLVER handle is valid
379 | if (this->cusolverHandle == nullptr) RCLCPP_ERROR(this->rosLogger_, "cuSOLVER handle is not valid. Make sure it was initialized correctly.");
380 |
381 | // Create a copy of the state covariance matrix P_ to perform the Cholesky decomposition
382 | thrust::device_vector P_scaled = this->scaleCovarianceMatrix();
383 |
384 | // Declare integer pointer for devInfo
385 | int* devInfo = nullptr;
386 |
387 | // Allocate memory for devInfo
388 | cudaMalloc((void**)&devInfo, sizeof(int));
389 |
390 | // Determine workspace size for Cholesky decomposition
391 | int lwork = 0;
392 | cusolverDnSpotrf_bufferSize(this->cusolverHandle, CUBLAS_FILL_MODE_LOWER, this->n, thrust::raw_pointer_cast(P_scaled.data()), this->n, &lwork);
393 |
394 | // Allocate workspace
395 | float* d_work = nullptr;
396 | cudaMalloc((void**)&d_work, sizeof(float) * lwork);
397 |
398 | // Perform Cholesky decomposition
399 | cusolverStatus_t status = cusolverDnSpotrf(this->cusolverHandle, CUBLAS_FILL_MODE_LOWER, this->n, thrust::raw_pointer_cast(P_scaled.data()), this->n, d_work, lwork, devInfo);
400 |
401 | // Check the value of devInfo for any errors
402 | int devInfo_h = 0;
403 | cudaMemcpy(&devInfo_h, devInfo, sizeof(int), cudaMemcpyDeviceToHost);
404 | if (status != CUSOLVER_STATUS_SUCCESS || devInfo_h != 0) RCLCPP_ERROR(this->rosLogger_, "Cholesky decomposition failed.");
405 |
406 | // Free the workspace memory and devInfo
407 | cudaFree(d_work);
408 | cudaFree(devInfo);
409 |
410 | cudaDeviceSynchronize();
411 |
412 | return P_scaled;
413 | }
414 |
415 | void UKF::generateSigmaPoints()
416 | {
417 | // Ensure sigmaPoints_ is correctly sized
418 | this->sigmaPoints_.resize((2 * this->n + 1) * this->n);
419 |
420 | // Calculate lambda_plus_n
421 | float lambda_plus_n = this->lambda_ + this->n;
422 |
423 | // Set up kernel execution parameters
424 | int threadsPerBlock = 256;
425 | int blocksPerGrid = ((2 * this->n + 1) + threadsPerBlock - 1) / threadsPerBlock;
426 |
427 | // Get scaled and decomposed State covariance matrix from Cholesky decomposition
428 | thrust::device_vector P_scaled = this->choleskyDecomposition();
429 |
430 | // Launch kernel to generate sigma points, now passing lambda_plus_n as an additional argument
431 | generateSigmaPointsKernel<<>>(thrust::raw_pointer_cast(this->sigmaPoints_.data()), thrust::raw_pointer_cast(this->state_.data()), thrust::raw_pointer_cast(P_scaled.data()), this->n, lambda_plus_n);
432 |
433 | // Wait for kernel to finish
434 | cudaDeviceSynchronize();
435 | }
436 |
437 | void UKF::initializeWeights()
438 | {
439 | // Ensure the device vectors have the correct size
440 | this->weightsMean_.resize(2 * this->n + 1);
441 | this->weightsCovariance_.resize(2 * this->n + 1);
442 |
443 | // Calculate grid and block sizes for the kernel launch
444 | int threadsPerBlock = 256; // You may adjust this based on your GPU's capabilities
445 | int blocksPerGrid = ((2 * this->n + 1) + threadsPerBlock - 1) / threadsPerBlock; // Ensure enough blocks to cover all elements
446 |
447 | // Launch the kernel to initialize the weights on the device
448 | initializeWeightsKernel<<>>(thrust::raw_pointer_cast(this->weightsMean_.data()), thrust::raw_pointer_cast(this->weightsCovariance_.data()), this->n, this->lambda_, this->alpha_, this->beta_);
449 |
450 | // Synchronize to ensure the kernel has finished execution
451 | cudaDeviceSynchronize();
452 | }
453 |
454 | void UKF::updatePrevPose(const std::vector& newPos)
455 | {
456 | thrust::copy(newPos.begin(), newPos.begin() + 3, this->prevPose_.begin());
457 | }
458 |
459 | void UKF::updateAllSigmaPoints(const std::vector& measurement, const float dt)
460 | {
461 | thrust::device_vector measurement_vector = this->prepareMeasurementVector(measurement, dt);
462 |
463 | int threadsPerBlock = 256;
464 | int blocksPerGrid = ((this->sigmaPointCount) + threadsPerBlock - 1) / threadsPerBlock;
465 |
466 | // Launch the unified update kernel
467 | updateSigmaPointsKernel<<>>(thrust::raw_pointer_cast(this->sigmaPoints_.data()), thrust::raw_pointer_cast(measurement_vector.data()), this->sigmaPointCount, dt, this->n);
468 | cudaDeviceSynchronize(); // Wait for the kernel to complete
469 | }
470 |
471 | void UKF::predictStateMean()
472 | {
473 | // Ensure the predicted state mean vector is correctly sized
474 | this->predictedStateMean_.resize(this->n); // Assuming 'n' is the state dimension
475 |
476 | int threadsPerBlock = 256;
477 | int blocksPerGrid = (this->n + threadsPerBlock - 1) / threadsPerBlock;
478 |
479 | // Launch the kernel
480 | predictStateMeanKernel<<>>(thrust::raw_pointer_cast(this->sigmaPoints_.data()), thrust::raw_pointer_cast(this->weightsMean_.data()), thrust::raw_pointer_cast(this->predictedStateMean_.data()), this->n, this->sigmaPointCount);
481 |
482 | // Synchronize to ensure the kernel execution completes
483 | cudaDeviceSynchronize();
484 | }
485 |
486 | void UKF::predictStateCovariance()
487 | {
488 | this->predictedStateCovariance_.resize(this->n * this->n);
489 | int threadsPerBlockDim = 16; // Using a 2D block for matrix operations
490 | dim3 threadsPerBlock(threadsPerBlockDim, threadsPerBlockDim);
491 | int blocksPerGridDim = (this->n + threadsPerBlockDim - 1) / threadsPerBlockDim;
492 | dim3 blocksPerGrid(blocksPerGridDim, blocksPerGridDim);
493 |
494 | // Launching the kernel
495 | predictStateCovarianceKernel<<>>(thrust::raw_pointer_cast(this->sigmaPoints_.data()), thrust::raw_pointer_cast(this->predictedStateMean_.data()), thrust::raw_pointer_cast(this->weightsCovariance_.data()), thrust::raw_pointer_cast(this->Q_.data()), thrust::raw_pointer_cast(this->predictedStateCovariance_.data()), this->n, this->sigmaPointCount);
496 | cudaDeviceSynchronize(); // Ensure completion
497 | cudaMemcpy(thrust::raw_pointer_cast(this->P_.data()), thrust::raw_pointer_cast(this->predictedStateCovariance_.data()), this->n * this->n * sizeof(float), cudaMemcpyDeviceToDevice);
498 | }
499 |
500 | thrust::device_vector UKF::scaleMeasurementCovarianceMatrix()
501 | {
502 | // Compute the scale factor using class variables alpha_ and kappa_
503 | float scale = this->alpha_ * (this->n + this->kappa_);
504 |
505 | // Launch the CUDA kernel to scale the covariance matrix P_
506 | int numThreadsPerBlock = 256; // You can adjust this based on your GPU's capabilities
507 | int numBlocks = (this->n * this->n + numThreadsPerBlock - 1) / numThreadsPerBlock;
508 |
509 | thrust::device_vector R_copy = this->R_;
510 |
511 | // Call the CUDA kernel
512 | scaleCovarianceMatrixKernel<<>>(thrust::raw_pointer_cast(R_copy.data()), this->n, this->alpha_, this->kappa_);
513 |
514 | // Synchronize to ensure the kernel has finished execution
515 | cudaDeviceSynchronize();
516 |
517 | return R_copy;
518 | }
519 |
520 | thrust::device_vector UKF::choleskyDecompositionMeasurment()
521 | {
522 | // Check if the cuSOLVER handle is valid
523 | if (this->cusolverHandle == nullptr) RCLCPP_ERROR(this->rosLogger_, "cuSOLVER handle is not valid. Make sure it was initialized correctly.");
524 |
525 | // Create a copy of the state covariance matrix P_ to perform the Cholesky decomposition
526 | thrust::device_vector R_scaled = this->scaleMeasurementCovarianceMatrix();
527 |
528 | // Declare integer pointer for devInfo
529 | int* devInfo = nullptr;
530 |
531 | // Allocate memory for devInfo
532 | cudaMalloc((void**)&devInfo, sizeof(int));
533 |
534 | // Determine workspace size for Cholesky decomposition
535 | int lwork = 0;
536 | cusolverDnSpotrf_bufferSize(this->cusolverHandle, CUBLAS_FILL_MODE_LOWER, this->n, thrust::raw_pointer_cast(R_scaled.data()), this->n, &lwork);
537 |
538 | // Allocate workspace
539 | float* d_work = nullptr;
540 | cudaMalloc((void**)&d_work, sizeof(float) * lwork);
541 |
542 | // Perform Cholesky decomposition
543 | cusolverStatus_t status = cusolverDnSpotrf(this->cusolverHandle, CUBLAS_FILL_MODE_LOWER, this->n, thrust::raw_pointer_cast(R_scaled.data()), this->n, d_work, lwork, devInfo);
544 |
545 | // Check the value of devInfo for any errors
546 | int devInfo_h = 0;
547 | cudaMemcpy(&devInfo_h, devInfo, sizeof(int), cudaMemcpyDeviceToHost);
548 | if (status != CUSOLVER_STATUS_SUCCESS || devInfo_h != 0) RCLCPP_ERROR(this->rosLogger_, "Cholesky decomposition failed.");
549 |
550 | // Free the workspace memory and devInfo
551 | cudaFree(d_work);
552 | cudaFree(devInfo);
553 |
554 | cudaDeviceSynchronize();
555 |
556 | return R_scaled;
557 | }
558 |
559 | void UKF::generateMeasurmentSigmaPoints(const std::vector& measurement, float dt)
560 | {
561 | // Measurement
562 | thrust::device_vector newMeasurement= this->prepareMeasurementVector(measurement, dt);
563 |
564 | // Ensure sigmaPoints_ is correctly sized
565 | this->predictedMeasurementSigmaPoints_.resize(this->sigmaPointCount * this->n);
566 |
567 | // Calculate lambda_plus_n
568 | float lambda_plus_n = this->lambda_ + this->n;
569 |
570 | // Set up kernel execution parameters
571 | int threadsPerBlock = 256;
572 | int blocksPerGrid = ((this->sigmaPointCount) + threadsPerBlock - 1) / threadsPerBlock;
573 |
574 | // Get scaled and decomposed State covariance matrix from Cholesky decomposition
575 | thrust::device_vector R_scaled = this->choleskyDecompositionMeasurment();
576 |
577 | // Launch kernel to generate sigma points, now passing lambda_plus_n as an additional argument
578 | generateSigmaPointsKernel<<>>(thrust::raw_pointer_cast(this->predictedMeasurementSigmaPoints_.data()), thrust::raw_pointer_cast(newMeasurement.data()), thrust::raw_pointer_cast(R_scaled.data()), this->n, lambda_plus_n);
579 |
580 | // Wait for kernel to finish
581 | cudaDeviceSynchronize();
582 | }
583 |
584 | void UKF::predictMeasurementMean()
585 | {
586 | // Assuming 'n' represents the dimension of your measurement vector
587 | this->predictedMeasurementMean_.resize(this->n, 0.0f); // Resize each time to ensure correct size
588 |
589 | // Setup kernel launch parameters
590 | int threadsPerBlock = 256;
591 | int blocksPerGrid = (this->n + threadsPerBlock - 1) / threadsPerBlock;
592 |
593 | // Call the kernel
594 | predictMeasurementMeanKernel<<>>(thrust::raw_pointer_cast(this->predictedMeasurementSigmaPoints_.data()), thrust::raw_pointer_cast(this->weightsMean_.data()), thrust::raw_pointer_cast(this->predictedMeasurementMean_.data()), this->n, this->sigmaPointCount);
595 |
596 | cudaDeviceSynchronize(); // Ensure completion
597 | }
598 |
599 |
600 | void UKF::computeMeasurementCovariance()
601 | {
602 | // Resize and initialize the predictedMeasurementCovariance_ vector
603 | this->predictedMeasurementCovariance_.resize(this->n * this->n, 0.0f);
604 |
605 | dim3 threadsPerBlock(16, 16); // 2D block for matrix operations
606 | dim3 blocksPerGrid((this->n + threadsPerBlock.x - 1) / threadsPerBlock.x, (this->n + threadsPerBlock.y - 1) / threadsPerBlock.y);
607 |
608 | computeMeasurementCovarianceKernel<<>>(thrust::raw_pointer_cast(this->predictedMeasurementSigmaPoints_.data()), thrust::raw_pointer_cast(this->predictedMeasurementMean_.data()), thrust::raw_pointer_cast(this->predictedMeasurementCovariance_.data()), thrust::raw_pointer_cast(this->R_.data()), this->n, this->sigmaPointCount, thrust::raw_pointer_cast(this->weightsCovariance_.data()));
609 |
610 | cudaDeviceSynchronize(); // Ensure the kernel execution completes
611 | }
612 |
613 | void UKF::computeCrossCovariance()
614 | {
615 | // Make sure the crossCovariance_ is resized appropriately
616 | this->crossCovariance_.resize(this->n * this->n, 0.0f); // Ensures it's correctly sized every time the function is called
617 |
618 | dim3 threadsPerBlock(16, 16); // Or other suitable configuration
619 | dim3 blocksPerGrid((this->n + threadsPerBlock.x - 1) / threadsPerBlock.x, (this->n + threadsPerBlock.y - 1) / threadsPerBlock.y);
620 |
621 | // Launch the kernel
622 | computeCrossCovarianceKernel<<>>(thrust::raw_pointer_cast(this->sigmaPoints_.data()), thrust::raw_pointer_cast(this->predictedMeasurementSigmaPoints_.data()), thrust::raw_pointer_cast(this->predictedStateMean_.data()), thrust::raw_pointer_cast(this->predictedMeasurementMean_.data()), thrust::raw_pointer_cast(this->crossCovariance_.data()), thrust::raw_pointer_cast(this->weightsCovariance_.data()), this->n, this->n, this->sigmaPointCount);
623 |
624 | cudaDeviceSynchronize(); // Wait for CUDA to finish
625 | }
626 |
627 | thrust::device_vector UKF::prepareMeasurementVector(const std::vector& measurement, float dt)
628 | {
629 | std::vector updatedMeasurement = measurement;
630 |
631 | // Compute derived velocity for each position dimension and update the measurement vector
632 | // Assuming the positions are stored at indices 0, 1, 2, and derived velocities should be placed at indices 7, 8, 9 for example
633 | for (size_t i = 0; i < 3; ++i)
634 | {
635 | float derivedVelocity = (measurement[i] - prevPose_[i]) / dt;
636 | updatedMeasurement[i + 7] = derivedVelocity; // Update with derived velocity
637 | }
638 |
639 | // Convert updatedMeasurement to device_vector and return
640 | return thrust::device_vector(updatedMeasurement.begin(), updatedMeasurement.end());
641 | }
642 |
643 |
644 | thrust::device_vector UKF::computeInverseMeasurementCovariance()
645 | {
646 | thrust::device_vector predictedMeasurementCovarianceInv(this->n * this->n); // Allocate space for the inverse matrix
647 |
648 | // Make a copy of the predictedMeasurementCovariance_ to preserve the original
649 | thrust::device_vector predictedMeasurementCovarianceCopy = this->predictedMeasurementCovariance_;
650 |
651 | int *d_ipiv; // Pivot indices for LU decomposition
652 | int *d_info; // Info about the execution
653 | float *d_work; // Workspace for getrf
654 | int lwork = 0; // Size of workspace
655 | cusolverDnHandle_t cusolverHandle = this->cusolverHandle;
656 |
657 | cudaMalloc(&d_ipiv, this->n * sizeof(int));
658 | cudaMalloc(&d_info, sizeof(int));
659 |
660 | // Step 1: Query working space of getrf
661 | cusolverDnSgetrf_bufferSize(cusolverHandle, this->n, this->n, thrust::raw_pointer_cast(predictedMeasurementCovarianceCopy.data()), this->n, &lwork);
662 | cudaMalloc(&d_work, lwork * sizeof(float));
663 |
664 | // Step 2: LU decomposition on the copy
665 | cusolverDnSgetrf(cusolverHandle, this->n, this->n, thrust::raw_pointer_cast(predictedMeasurementCovarianceCopy.data()), this->n, d_work, d_ipiv, d_info);
666 |
667 | // Prepare identity matrix as the right-hand side
668 | thrust::device_vector identity(this->n * this->n, 0);
669 | for (int i = 0; i < this->n; ++i) identity[i * this->n + i] = 1.0f;
670 |
671 | // Step 3: Solve linear equations for each column of the identity matrix to compute the inverse
672 | for (int i = 0; i < this->n; ++i)
673 | {
674 | float* column = thrust::raw_pointer_cast(&identity[i * this->n]);
675 | cusolverDnSgetrs(cusolverHandle, CUBLAS_OP_N, this->n, 1, thrust::raw_pointer_cast(predictedMeasurementCovarianceCopy.data()), this->n, d_ipiv, column, this->n, d_info);
676 | }
677 |
678 | // Copy the inverse from identity matrix to predictedMeasurementCovarianceInv
679 | thrust::copy(identity.begin(), identity.end(), predictedMeasurementCovarianceInv.begin());
680 |
681 | // Cleanup
682 | cudaFree(d_ipiv);
683 | cudaFree(d_info);
684 | cudaFree(d_work);
685 |
686 | return predictedMeasurementCovarianceInv;
687 | }
688 |
689 | void UKF::computeKalmanGain()
690 | {
691 | // Ensure kalmanGain_ is correctly sized
692 | this->kalmanGain_.resize(this->n * this->n, 0.0f);
693 |
694 | // Compute the inverse of the predicted measurement covariance matrix
695 | thrust::device_vector predictedMeasurementCovarianceInv = this->computeInverseMeasurementCovariance();
696 |
697 | // Setup kernel launch parameters
698 | dim3 threadsPerBlock(16, 16);
699 | dim3 blocksPerGrid((this->n + threadsPerBlock.x - 1) / threadsPerBlock.x, (this->n + threadsPerBlock.y - 1) / threadsPerBlock.y);
700 |
701 | // Launch the kernel
702 | computeKalmanGainKernel<<>>(thrust::raw_pointer_cast(this->crossCovariance_.data()), thrust::raw_pointer_cast(predictedMeasurementCovarianceInv.data()), thrust::raw_pointer_cast(this->kalmanGain_.data()), this->n, this->n);
703 | cudaDeviceSynchronize(); // Wait for CUDA to finish
704 | }
705 |
706 | void UKF::updateStateWithMeasurement(const std::vector& measurement, float dt)
707 | {
708 | thrust::device_vector actualMeasurement = this->prepareMeasurementVector(measurement, dt);
709 | thrust::device_vector measurementResidual(this->n); // Ensure measDim is defined correctly
710 | thrust::transform(actualMeasurement.begin(), actualMeasurement.end(), this->predictedMeasurementMean_.begin(), measurementResidual.begin(), thrust::minus());
711 |
712 | // Update the state with the measurement residual
713 | // blockSize and numBlocks need to be defined appropriately
714 | const int blockSize = 256; // A common choice for many CUDA-capable GPUs
715 | const int numBlocks = (this->n + blockSize - 1) / blockSize;
716 | updateStateWithMeasurementKernel<<>>(thrust::raw_pointer_cast(this->state_.data()), thrust::raw_pointer_cast(this->kalmanGain_.data()), thrust::raw_pointer_cast(measurementResidual.data()), this->n, this->n);
717 | cudaDeviceSynchronize(); // Ensure kernel execution completion
718 |
719 | normalizeQuaternionKernel<<<1, 1>>>(thrust::raw_pointer_cast(this->state_.data()));
720 | cudaDeviceSynchronize(); // Ensure the normalization completes before proceeding
721 | }
722 |
723 | void UKF::updateStateCovariance()
724 | {
725 | // Step 1: Calculate K * R
726 | thrust::device_vector KR(this->n * this->n); // Adjust dimensions if necessary
727 | float* d_KR = thrust::raw_pointer_cast(KR.data());
728 |
729 | cublasSgemm(this->cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N, this->n, this->n, this->n, &(this->alpha_), thrust::raw_pointer_cast(this->kalmanGain_.data()), this->n, thrust::raw_pointer_cast(this->predictedMeasurementCovariance_.data()), this->n, &(this->beta_), d_KR, this->n);
730 |
731 | // Step 2: Calculate (K * R) * K^T
732 | thrust::device_vector KRKT(this->n * this->n);
733 | float* d_KRKT = thrust::raw_pointer_cast(KRKT.data());
734 |
735 | cublasSgemm(this->cublasHandle, CUBLAS_OP_N, CUBLAS_OP_T, this->n, this->n, this->n, &(this->alpha_), d_KR, this->n, thrust::raw_pointer_cast(this->kalmanGain_.data()), this->n, &(this->beta_), d_KRKT, this->n);
736 |
737 | // Step 3: Update P = P - (K * R) * K^T
738 | // Negate KRKT for subtraction
739 | thrust::transform(KRKT.begin(), KRKT.end(), KRKT.begin(), thrust::negate());
740 |
741 | // Add the negated (K * R) * K^T to P
742 | cublasSaxpy(this->cublasHandle, this->n * this->n, &(this->alpha_), d_KRKT, 1, thrust::raw_pointer_cast(this->P_.data()), 1);
743 | }
744 |
745 | void UKF::updateFilter(const std::vector& measurement, const float dt, const float positionVariance, const float orientationVariance, const float velocityVariance, const float angularVelocityVariance, const float linearAccelerationVariance)
746 | {
747 | this->updateAllSigmaPoints(measurement, dt);
748 | this->predictStateMean();
749 | this->predictStateCovariance();
750 | this->generateMeasurmentSigmaPoints(measurement, dt);
751 | this->updateMeasurementCovarianceMatrix(positionVariance, orientationVariance, velocityVariance, angularVelocityVariance, linearAccelerationVariance);
752 | this->predictMeasurementMean();
753 | this->computeMeasurementCovariance();
754 | this->computeCrossCovariance();
755 | this->computeKalmanGain();
756 | this->updateStateWithMeasurement(measurement, dt);
757 | this->updateStateCovariance();
758 | }
759 |
760 | std::vector UKF::getState()
761 | {
762 | // Create a standard vector with the same size as the device vector
763 | std::vector stateHost(this->state_.size());
764 |
765 | // Copy data from device vector to host vector
766 | thrust::copy(this->state_.begin(), this->state_.end(), stateHost.begin());
767 |
768 | return stateHost;
769 | }
770 |
771 | std::vector UKF::getStateCovarianceMatrix()
772 | {
773 | // Allocate a std::vector with the appropriate size
774 | std::vector state_covariance_matrix(this->n * this->n);
775 |
776 | // Copy the data from the device_vector to the std::vector
777 | thrust::copy(this->P_.begin(), this->P_.end(), state_covariance_matrix.begin());
778 |
779 | return state_covariance_matrix;
780 | }
781 |
782 | void UKF::printStateInfo()
783 | {
784 | std::stringstream ss;
785 |
786 | // State vector
787 | ss << "State: ";
788 | for (size_t i = 0; i < this->state_.size(); ++i) {
789 | ss << this->state_[i] << " ";
790 | }
791 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
792 | ss.str("");
793 |
794 | // State Covariance matrix
795 | ss << "State Covariance matrix: ";
796 | for (size_t i = 0; i < this->P_.size(); ++i) {
797 | ss << this->P_[i] << " ";
798 | }
799 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
800 | ss.str("");
801 |
802 | // Process Covariance matrix
803 | ss << "Process Covariance matrix: ";
804 | for (size_t i = 0; i < this->Q_.size(); ++i) {
805 | ss << this->Q_[i] << " ";
806 | }
807 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
808 | ss.str("");
809 |
810 | // Measurement Covariance matrix
811 | ss << "Measurement Covariance matrix: ";
812 | for (size_t i = 0; i < this->R_.size(); ++i) {
813 | ss << this->R_[i] << " ";
814 | }
815 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
816 | ss.str("");
817 |
818 | // Sigma Points matrix
819 | ss << "Sigma Points matrix: ";
820 | for (size_t i = 0; i < this->sigmaPoints_.size(); ++i) {
821 | ss << this->sigmaPoints_[i] << " ";
822 | }
823 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
824 | ss.str("");
825 |
826 | // Weights for sigma points (Mean)
827 | ss << "Weights for sigma points (Mean): ";
828 | for (size_t i = 0; i < this->weightsMean_.size(); ++i) {
829 | ss << this->weightsMean_[i] << " ";
830 | }
831 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
832 | ss.str("");
833 |
834 | // Weights for sigma points (Covariance)
835 | ss << "Weights for sigma points (Covariance): ";
836 | for (size_t i = 0; i < this->weightsCovariance_.size(); ++i) {
837 | ss << this->weightsCovariance_[i] << " ";
838 | }
839 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
840 | ss.str("");
841 |
842 | // Predicted State Mean
843 | ss << "Predicted State Mean: ";
844 | for (size_t i = 0; i < this->predictedStateMean_.size(); ++i) {
845 | ss << this->predictedStateMean_[i] << " ";
846 | }
847 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
848 | ss.str("");
849 |
850 | // Predicted State Covariance
851 | ss << "Predicted State Covariance: ";
852 | for (size_t i = 0; i < this->predictedStateCovariance_.size(); ++i) {
853 | ss << this->predictedStateCovariance_[i] << " ";
854 | }
855 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
856 | ss.str("");
857 |
858 | // Predicted Measurement Sigma Points
859 | ss << "Predicted Measurement Sigma Points: ";
860 | for (size_t i = 0; i < this->predictedMeasurementSigmaPoints_.size(); ++i) {
861 | ss << this->predictedMeasurementSigmaPoints_[i] << " ";
862 | }
863 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
864 | ss.str("");
865 |
866 | // Predicted Measurement Mean
867 | ss << "Predicted Measurement Mean: ";
868 | for (size_t i = 0; i < this->predictedMeasurementMean_.size(); ++i) {
869 | ss << this->predictedMeasurementMean_[i] << " ";
870 | }
871 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
872 | ss.str("");
873 |
874 | // Predicted Measurement Covariance
875 | ss << "Predicted Measurement Covariance: ";
876 | for (size_t i = 0; i < this->predictedMeasurementCovariance_.size(); ++i) {
877 | ss << this->predictedMeasurementCovariance_[i] << " ";
878 | }
879 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
880 | ss.str("");
881 |
882 | // Cross-Covariance matrix between state and measurement
883 | ss << "Cross-Covariance matrix between state and measurement: ";
884 | for (size_t i = 0; i < this->crossCovariance_.size(); ++i) {
885 | ss << this->crossCovariance_[i] << " ";
886 | }
887 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
888 | ss.str("");
889 |
890 | // Kalman Gain
891 | ss << "Kalman Gain: ";
892 | for (size_t i = 0; i < this->kalmanGain_.size(); ++i) {
893 | ss << this->kalmanGain_[i] << " ";
894 | }
895 | RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
896 | }
--------------------------------------------------------------------------------