├── CITATION.cff
├── LICENSE
├── README.md
├── assets
    ├── Covarince.png
    └── Odometry.png
└── cuUKF
    ├── CMakeLists.txt
    ├── LICENSE
    ├── include
        └── cuUKF
        │   ├── coreNode.hpp
        │   ├── cpuFilter.hpp
        │   ├── cpuNode.hpp
        │   └── gpuFilter.cuh
    ├── launch
        └── gpu_filter.launch.py
    ├── nodes
        ├── coreNode.cpp
        └── cpuNode.cpp
    ├── package.xml
    └── src
        ├── cpuFilter.cpp
        └── gpuFilter.cu


/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Hari"
 5 |   given-names: "Jagennath"
 6 | title: "CUDA-Accelerated-Visual-Inertial-Odometry-Fusion"
 7 | version: 1.0.0
 8 | doi: 10.5281/zenodo.1234
 9 | date-released: 2024-02-28
10 | url: "https://github.com/jagennath-hari/CUDA-Accelerated-Visual-Inertial-Odometry-Fusion"
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2024, Jagennath Hari
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CUDA-Accelerated-Visual-Inertial-Odometry-Fusion
 2 | Harness GPU acceleration for advanced visual odometry and IMU data fusion with our Unscented Kalman Filter (UKF) implementation. Developed with C++ and powered by CUDA, cuBLAS, and cuSOLVER, the system delivers unmatched real-time performance in state and covariance estimation for robotics applications. Integrated with ROS 2 for seamless sensor data management, it ensures high-efficiency and scalable solutions for complex robotic systems, making it ideal for a wide range of autonomous system applications.
 3 | 
 4 | ## 🏁 Dependencies
 5 | 1) NVIDIA Driver ([Official Download Link](https://www.nvidia.com/download/index.aspx))
 6 | 2) CUDA Toolkit ([Official Link](https://developer.nvidia.com/cuda-downloads))
 7 | 3) ROS 2 Humble ([Official Link](https://docs.ros.org/en/humble/Installation.html))
 8 | 
 9 | ## ⚙️ Install
10 | 1) Clone `https://github.com/jagennath-hari/CUDA-Accelerated-Visual-Inertial-Odometry-Fusion.git`
11 | 2) Move `cuUKF` into `ROS2_WORKSPACE`
12 | 3) Modify the `CMakeLists.txt` file at `set(CMAKE_CUDA_ARCHITECTURES 89)` and change to your GPU architecture. If you don't know which one you can refer to [NVIDIA GPU Compute Capability](https://developer.nvidia.com/cuda-gpus#compute).
13 | 4) `cd ROS2_WORKSPACE` build workspace using `colcon build --symlink-install --cmake-args=-DCMAKE_BUILD_TYPE=Release --parallel-workers $(nproc)`
14 | 
15 | ## 📈 Running cuUKF
16 | Launch the node using `ros2 launch cuUKF gpu_filter.launch.py odom_topic:=/odom imu_topic:=/imu`.
17 | The `/odom` should be replaced with your `nav_msgs/Odometry` and the `/imu` should be replaced with your `sensor_msgs/Imu`.
18 | 
19 | ## 💬 ROS 2 Message
20 | The filter odometry gets published as `nav_msgs/Odometry` in the `/cuUKF/filtered_odom` topic.
21 | 
22 | ## 🖼️ RVIZ2 GUI
23 | Launch RVIZ2 using `ros2 run rviz2 rviz2` and subcribe to the `/cuUKF/filered_odom` topic.
24 | 
25 | <div align="center">
26 |     <img src="assets/Odometry.png" alt="Odometry" width="800"/>
27 |     <p>Odometry</p>
28 | </div>
29 | 
30 | ### Visualize the covariance of the state
31 | 
32 | <div align="center">
33 |     <img src="assets/Covarince.png" alt="Covariance" width="800"/>
34 |     <p>Covariance</p>
35 | </div>
36 | 
37 | ## ⚠️ Note
38 | 1) The fusion does not consider the IMU's orientation only the visual odometry's orientation for the system dynamics and measurements, as raw IMU don't produce orientation without additional filters such as complementary filter and the Madgwick filter.
39 | 2) Feel free to change the alpha, beta and kappa values along with the covariance to improve state estimation.
40 | 3) The dynamics of the system use simple equations, for the best fusion you may need to change the dynamics.
41 | 4) Consider adding augmented sigma points to further increase the robustness.  
42 | 5) There is also a CPU version of the UKF for quick development and testing.
43 | 


--------------------------------------------------------------------------------
/assets/Covarince.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jagennath-hari/CUDA-Accelerated-Visual-Inertial-Odometry-Fusion/1dd9f36500c23322e66e96758657f788b2b99c0a/assets/Covarince.png


--------------------------------------------------------------------------------
/assets/Odometry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jagennath-hari/CUDA-Accelerated-Visual-Inertial-Odometry-Fusion/1dd9f36500c23322e66e96758657f788b2b99c0a/assets/Odometry.png


--------------------------------------------------------------------------------
/cuUKF/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.8)
  2 | 
  3 | get_filename_component(PACKAGE_NAME ${CMAKE_CURRENT_SOURCE_DIR} NAME)
  4 | message("Creating Package: ${PACKAGE_NAME}")
  5 | project(${PACKAGE_NAME} LANGUAGES CXX CUDA)  ## this package name is the name of the directory this cmake file is in
  6 | 
  7 | add_compile_definitions(CMAKE_PACKAGE_NAME=${PACKAGE_NAME})
  8 | 
  9 | # Default to C++17 for humble
 10 | if(NOT CMAKE_CXX_STANDARD)
 11 |   set(CMAKE_CXX_STANDARD 17)
 12 | endif()
 13 | 
 14 | if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 15 |   add_compile_options(-Wall -Wextra -Wpedantic)
 16 | endif()
 17 | 
 18 | set(CMAKE_CUDA_ARCHITECTURES 89)
 19 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=-Wno-unknown-pragmas")
 20 | 
 21 | 
 22 | set(ROS_DEPENDS
 23 |     rclcpp
 24 |     std_msgs
 25 |     ament_cmake
 26 |     sensor_msgs
 27 |     nav_msgs
 28 |     message_filters
 29 |     CUDAToolkit
 30 |   )
 31 | 
 32 | # find dependencies
 33 | foreach(DEPEND ${ROS_DEPENDS})
 34 |   find_package(${DEPEND} REQUIRED)
 35 | endforeach(DEPEND)
 36 | 
 37 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include/cuUKF)
 38 | include_directories(
 39 |   include
 40 |   ${OpenCV_INCLUDE_DIRS}
 41 |   ${Eigen_INCLUDE_DIRS}
 42 | )
 43 | 
 44 | message("buliding ${PROJECT_NAME} node: ")
 45 | 
 46 | add_library(cpu_filter
 47 |   src/cpuFilter.cpp
 48 | )
 49 | 
 50 | add_executable(cpu_node nodes/cpuNode.cpp)
 51 | ament_target_dependencies(cpu_node
 52 |   rclcpp
 53 |   sensor_msgs
 54 |   nav_msgs
 55 |   std_msgs
 56 |   Eigen3
 57 |   message_filters
 58 |   geometry_msgs
 59 | )
 60 | 
 61 | ament_target_dependencies(cpu_filter
 62 |   rclcpp
 63 |   sensor_msgs
 64 |   nav_msgs
 65 |   std_msgs
 66 |   Eigen3
 67 |   message_filters
 68 |   geometry_msgs
 69 | )
 70 | 
 71 | target_link_libraries(cpu_node
 72 |   cpu_filter
 73 | )
 74 | 
 75 | #############################################
 76 | message(STATUS "CUDA Compiler: ${CMAKE_CUDA_COMPILER}")
 77 | 
 78 | add_library(gpu_filter
 79 |   src/gpuFilter.cu
 80 | )
 81 | 
 82 | target_include_directories(gpu_filter PUBLIC
 83 |   ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
 84 | )
 85 | 
 86 | target_link_libraries(gpu_filter
 87 |   CUDA::cublas
 88 |   CUDA::cusolver
 89 |   CUDA::cuda_driver
 90 | )
 91 | 
 92 | set_target_properties(gpu_filter PROPERTIES
 93 |   CUDA_SEPARABLE_COMPILATION ON
 94 | )
 95 | target_compile_options(gpu_filter PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-unknown-pragmas>)
 96 | 
 97 | 
 98 | add_executable(core_node nodes/coreNode.cpp)
 99 | ament_target_dependencies(core_node
100 |   rclcpp
101 |   sensor_msgs
102 |   nav_msgs
103 |   std_msgs
104 |   message_filters
105 |   geometry_msgs
106 | )
107 | 
108 | ament_target_dependencies(gpu_filter
109 |   rclcpp
110 |   sensor_msgs
111 |   nav_msgs
112 |   std_msgs
113 |   message_filters
114 |   geometry_msgs
115 | )
116 | 
117 | target_link_libraries(core_node
118 |   gpu_filter
119 | )
120 | 
121 | ###############################################
122 | install(TARGETS
123 |   cpu_node
124 |   core_node
125 |   DESTINATION lib/${PROJECT_NAME})
126 | 
127 | install(DIRECTORY
128 |   launch
129 |   DESTINATION share/${PROJECT_NAME}/
130 | )
131 | 
132 | if(BUILD_TESTING)
133 |   find_package(ament_lint_auto REQUIRED)
134 |   # the following line skips the linter which checks for copyrights
135 |   # comment the line when a copyright and license is added to all source files
136 |   set(ament_cmake_copyright_FOUND TRUE)
137 |   # the following line skips cpplint (only works in a git repo)
138 |   # comment the line when this package is in a git repo and when
139 |   # a copyright and license is added to all source files
140 |   set(ament_cmake_cpplint_FOUND TRUE)
141 |   ament_lint_auto_find_test_dependencies()
142 | endif()
143 | 
144 | ament_package()
145 | 


--------------------------------------------------------------------------------
/cuUKF/LICENSE:
--------------------------------------------------------------------------------
 1 | Redistribution and use in source and binary forms, with or without
 2 | modification, are permitted provided that the following conditions are met:
 3 | 
 4 |    * Redistributions of source code must retain the above copyright
 5 |      notice, this list of conditions and the following disclaimer.
 6 | 
 7 |    * Redistributions in binary form must reproduce the above copyright
 8 |      notice, this list of conditions and the following disclaimer in the
 9 |      documentation and/or other materials provided with the distribution.
10 | 
11 |    * Neither the name of the copyright holder nor the names of its
12 |      contributors may be used to endorse or promote products derived from
13 |      this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 | POSSIBILITY OF SUCH DAMAGE.
26 | 


--------------------------------------------------------------------------------
/cuUKF/include/cuUKF/coreNode.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef CORENODE_HPP_
 2 | #define CORENODE_HPP_
 3 | 
 4 | // ROS includes
 5 | #include <rclcpp/rclcpp.hpp>
 6 | 
 7 | // Message filters
 8 | #include <message_filters/subscriber.h>
 9 | #include <message_filters/synchronizer.h>
10 | #include <message_filters/sync_policies/approximate_time.h>
11 | 
12 | // Nav and Sensor Msgs includes
13 | #include <nav_msgs/msg/odometry.hpp>
14 | #include <sensor_msgs/msg/imu.hpp>
15 | 
16 | // Unscneted Kalaman filter include
17 | #include "gpuFilter.cuh"
18 | 
19 | class CoreNode : public rclcpp::Node
20 | {
21 | private:
22 |     // Message filter subscribers
23 |     message_filters::Subscriber<nav_msgs::msg::Odometry> odomDataSub_;
24 |     message_filters::Subscriber<sensor_msgs::msg::Imu> imuDataSub_;
25 | 
26 |     // Synchronizer
27 |     typedef message_filters::sync_policies::ApproximateTime<nav_msgs::msg::Odometry, sensor_msgs::msg::Imu> MySyncPolicy;
28 |     typedef message_filters::Synchronizer<MySyncPolicy> Sync;
29 |     std::shared_ptr<Sync> sync_;
30 | 
31 |     // Combined callback
32 |     void callback_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu);
33 | 
34 |     // UKF Object
35 |     std::shared_ptr<UKF> kalmanFilter_;
36 | 
37 |     // Init for UKF
38 |     bool isIntialized_;
39 | 
40 |     // Member to store the previous timestamp
41 |     rclcpp::Time prevTimestamp_;
42 | 
43 |     // Generating State vector
44 |     std::vector<float> genState_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu);
45 | 
46 |     // Generating Measurement Vector
47 |     std::vector<float> genMeasurement_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu);
48 | 
49 |     // Generate Odom Msg
50 |     nav_msgs::msg::Odometry createOdometryMsg_(const std::vector<float>& state, const std::vector<float>& covariance_matrix, const nav_msgs::msg::Odometry::ConstSharedPtr& original_odom);
51 | 
52 |     // Publisher
53 |     rclcpp::Publisher<nav_msgs::msg::Odometry>::SharedPtr odomPublisher_;
54 | 
55 | public:
56 |     CoreNode();
57 |     ~CoreNode();
58 | };
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/cuUKF/include/cuUKF/cpuFilter.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef CPUFILTER_HPP_
 2 | #define CPUFILTER_HPP_
 3 | 
 4 | // ROS includes
 5 | #include <rclcpp/rclcpp.hpp>
 6 | #include <rclcpp/logging.hpp>
 7 | 
 8 | // Eigen3 Includes
 9 | #if defined __GNUC__ || defined __APPLE__
10 | #include <Eigen/Dense>
11 | #else
12 | #include <eigen3/Eigen/Dense>
13 | #endif
14 | 
15 | class UKF
16 | {
17 | private:
18 |     // ROS Logger for outsdie node logging
19 |     rclcpp::Logger rosLogger_;
20 | 
21 |     // 16x1 state vector
22 |     Eigen::VectorXd state_; 
23 | 
24 |     // 16x16 model covariance matrix
25 |     Eigen::MatrixXd P_;
26 |     void initializeCovarianceMatrix_();
27 | 
28 |     // 16x16 sensor covariance matrix
29 |     Eigen::MatrixXd R_; // Measurement noise covariance matrix
30 | 
31 |     // Kalman Gain
32 |     Eigen::MatrixXd K_;
33 | 
34 |     // Measurement Model Functions
35 |     Eigen::Vector3d prevPosition_;
36 |     
37 |     // Sigma points
38 |     static const int n = 16; // Dimension of the state vector
39 |     Eigen::MatrixXd sigmaPoints_; // Matrix to hold sigma points, size n x (2n + 1)
40 |     Eigen::MatrixXd measurementSigmaPoints_; // Matrix to hold measurment sigma points during tranformation to measuremnt space
41 |     void generateSigmaPoints_(); // gen sigma points
42 | 
43 |     // UKF parameters
44 |     double lambda_;
45 |     double alpha_;
46 |     double beta_;
47 |     double kappa_;
48 | 
49 |     // Sigma weights, mean and covaraince
50 |     Eigen::VectorXd weightsMean_;
51 |     Eigen::VectorXd weightsCovariance_;
52 |     Eigen::VectorXd predictedStateMean_; // Predicted state mean
53 |     Eigen::VectorXd predictedMeasurementMean_;
54 |     Eigen::MatrixXd predictedMeasurementCovariance_; // Predicted measurement covariance
55 |     Eigen::MatrixXd Pxy_; // Cross-covariance matrix
56 |     void initializeWeights_();
57 | 
58 |     // Sigma Process Model functions
59 |     Eigen::Quaterniond updateOrientationSigmaPoint_(const Eigen::Quaterniond& current_orientation, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& gyro_bias, double dt);
60 |     void updatePositionAndVelocitySigmaPoint_(Eigen::VectorXd& sigmaPoint, const Eigen::Vector3d& accel_meas, double dt);
61 |     void updateBiasesSigmaPoint_(Eigen::VectorXd& sigmaPoint, const Eigen::Vector3d& gyro_error, const Eigen::Vector3d& accel_error);
62 | 
63 |     // Sigma Measurement Model
64 |     Eigen::VectorXd applyMeasurementModelToSigmaPoint_(const Eigen::VectorXd& sigmaPoint, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas);
65 |     
66 | public:
67 |     UKF();
68 |     ~UKF();
69 | 
70 |     // Initalize state vector
71 |     void initializeState(const Eigen::VectorXd& initialState, const Eigen::MatrixXd& pose_covariance, const Eigen::MatrixXd& ang_vel_covariance, const Eigen::MatrixXd& accel_covariance);
72 | 
73 |     // Update prev position
74 |     void updatePrevPose(const Eigen::Vector3d& prevPose);
75 | 
76 |     // Measurement Model
77 |     void updateNoiseMatrix(const Eigen::MatrixXd& pose_covariance_6x6, const Eigen::MatrixXd& ang_vel_covariance_3x3,const Eigen::MatrixXd& accel_covariance_3x3);
78 |     Eigen::VectorXd updateMeasurementModel(const Eigen::Vector3d& position, const Eigen::Quaterniond& orientation, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas, double dt, const Eigen::MatrixXd& pose_covariance_6x6, const Eigen::MatrixXd& ang_vel_covariance_3x3,const Eigen::MatrixXd& accel_covariance_3x3);
79 | 
80 |     // Prediction Step
81 |     void predictStateMean_();
82 |     void predictStateCovariance_();
83 |     void propagateSigmaPointsThroughProcessModel_(double dt, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas, const Eigen::Vector3d& gyro_error, const Eigen::Vector3d& accel_error);
84 | 
85 |     // Update Step
86 |     void transformSigmaPointsToMeasurementSpace_(const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas);
87 |     void calculatePredictedMeasurementMean_();
88 |     void calculatePredictedMeasurementCovariance_();
89 |     void calculateCrossCovariance_();
90 |     void computeKalmanGain_();
91 |     void updateStateAndCovariance_(const Eigen::VectorXd& actualMeasurement);
92 | };
93 | 
94 | #endif


--------------------------------------------------------------------------------
/cuUKF/include/cuUKF/cpuNode.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef CPUNODE_HPP_
 2 | #define CPUNODE_HPP_
 3 | 
 4 | // ROS includes
 5 | #include <rclcpp/rclcpp.hpp>
 6 | 
 7 | // Message filters
 8 | #include <message_filters/subscriber.h>
 9 | #include <message_filters/synchronizer.h>
10 | #include <message_filters/sync_policies/approximate_time.h>
11 | 
12 | // Nav and Sensor Msgs includes
13 | #include <nav_msgs/msg/odometry.hpp>
14 | #include <sensor_msgs/msg/imu.hpp>
15 | 
16 | // Unscneted Kalaman filter include
17 | #include "cpuFilter.hpp"
18 | 
19 | class CpuNode : public rclcpp::Node
20 | {
21 | private:
22 |     // Message filter subscribers
23 |     message_filters::Subscriber<nav_msgs::msg::Odometry> odomDataSub_;
24 |     message_filters::Subscriber<sensor_msgs::msg::Imu> imuDataSub_;
25 | 
26 |     // Synchronizer
27 |     typedef message_filters::sync_policies::ApproximateTime<nav_msgs::msg::Odometry, sensor_msgs::msg::Imu> MySyncPolicy;
28 |     typedef message_filters::Synchronizer<MySyncPolicy> Sync;
29 |     std::shared_ptr<Sync> sync_;
30 | 
31 |     // Combined callback
32 |     void callback_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu);
33 | 
34 |     // UKF Object
35 |     std::shared_ptr<UKF> kalmanFilter_;
36 | 
37 |     // Init for UKF
38 |     bool isIntialized_;
39 | 
40 |     // Member to store the previous timestamp
41 |     rclcpp::Time prevTimestamp_;
42 | 
43 |     // ROS msg to Eigen conversions
44 |     Eigen::MatrixXd extractPoseCovariance_(const nav_msgs::msg::Odometry& odom);
45 |     Eigen::MatrixXd extractAngularVelocityCovariance_(const sensor_msgs::msg::Imu& imu);
46 |     Eigen::MatrixXd extractLinearAccelerationCovariance_(const sensor_msgs::msg::Imu& imu);
47 | 
48 | public:
49 |     CpuNode();
50 |     ~CpuNode();
51 | };
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/cuUKF/include/cuUKF/gpuFilter.cuh:
--------------------------------------------------------------------------------
  1 | #ifndef GPUFILTER_CUH_
  2 | #define GPUFILTER_CUH_
  3 | 
  4 | // Include
  5 | #include <iostream>
  6 | 
  7 | // ROS includes
  8 | #include <rclcpp/rclcpp.hpp>
  9 | #include <rclcpp/logging.hpp>
 10 | 
 11 | // Nav and Sensor Msgs includes
 12 | #include <nav_msgs/msg/odometry.hpp>
 13 | #include <sensor_msgs/msg/imu.hpp>
 14 | 
 15 | // CUDA includes
 16 | #include <cuda_runtime.h>
 17 | 
 18 | // cuBLAS for linear algebra operations
 19 | #include <cublas_v2.h>
 20 | 
 21 | // cuSOLVER for Cholesky decomposition
 22 | #include <cusolverDn.h>
 23 | 
 24 | // Thrust for device based vector operations
 25 | #include <thrust/device_vector.h>
 26 | 
 27 | class UKF
 28 | {
 29 | private:
 30 |     // ROS Logger for outsdie node logging
 31 |     rclcpp::Logger rosLogger_;  
 32 | 
 33 |     // CUDA handles
 34 |     cublasHandle_t cublasHandle;
 35 |     cusolverDnHandle_t cusolverHandle;
 36 |     
 37 |     // UKF parameters
 38 |     static const int n = 16; // Dimension of the state vector
 39 |     static const int sigmaPointCount = 2 * 16 + 1;
 40 |     float lambda_;
 41 |     float alpha_;
 42 |     float beta_;
 43 |     float kappa_;  
 44 | 
 45 |     // Stores the previous position (x, y, z)
 46 |     thrust::device_vector<float> prevPose_;
 47 |     // State vector using thrust
 48 |     thrust::device_vector<float> state_;
 49 |     // State Covariance matrix, linearized using thrust
 50 |     thrust::device_vector<float> P_;
 51 |     // Process Covariance matrix, linearized using thrust
 52 |     thrust::device_vector<float> Q_;
 53 |     // Measurement Covariance matrix, linearized using thrust
 54 |     thrust::device_vector<float> R_;
 55 |     // Sigma Points matrix
 56 |     thrust::device_vector<float> sigmaPoints_;
 57 |     // Weights for sigma points
 58 |     thrust::device_vector<float> weightsMean_;
 59 |     thrust::device_vector<float> weightsCovariance_;
 60 |     // Predicted State Mean
 61 |     thrust::device_vector<float> predictedStateMean_;
 62 |     // Predicted State Covarince 
 63 |     thrust::device_vector<float> predictedStateCovariance_;
 64 |     // Precited Measuement Sigma points
 65 |     thrust::device_vector<float> predictedMeasurementSigmaPoints_;
 66 |     // Precited Measurement mean
 67 |     thrust::device_vector<float> predictedMeasurementMean_;
 68 |     // Precited Measurement Covariance
 69 |     thrust::device_vector<float> predictedMeasurementCovariance_;
 70 |     // Cross-Covariance matrix between state and measurement
 71 |     thrust::device_vector<float> crossCovariance_;
 72 |     // Kalman Gain
 73 |     thrust::device_vector<float> kalmanGain_;
 74 |     // Functions
 75 |     void updateAllSigmaPoints(const std::vector<float>& measurement, const float dt); 
 76 |     void predictStateMean();
 77 |     void predictStateCovariance(); 
 78 |     thrust::device_vector<float> scaleMeasurementCovarianceMatrix();
 79 |     thrust::device_vector<float> choleskyDecompositionMeasurment(); 
 80 |     void generateMeasurmentSigmaPoints(const std::vector<float>& measurement, float dt);
 81 |     void predictMeasurementMean();
 82 |     void computeMeasurementCovariance();
 83 |     void computeCrossCovariance();
 84 |     thrust::device_vector<float> prepareMeasurementVector(const std::vector<float>& measurement, float dt);
 85 |     thrust::device_vector<float> computeInverseMeasurementCovariance();
 86 |     void computeKalmanGain();
 87 |     void updateStateWithMeasurement(const std::vector<float>& measurement, float dt);
 88 |     void updateStateCovariance();
 89 | public:
 90 |     UKF(const float & alpha, const float & beta, const float & kappa);
 91 |     ~UKF();
 92 |     // Init state for thrust::vector
 93 |     void initState(const std::vector<float>& state_vector);
 94 |     void initStateCovarianceMatrix(const float & positionVariance, const float & orientationVariance, const float & velocityVariance, const float & gyroBiasVariance, const float & accelBiasVariance);
 95 |     void initProcessCovarianceMatrix(const float & positionNoise, const float & orientationNoise, const float & velocityNoise, const float & gyroBiasNoise, const float & accelBiasNoise);
 96 |     void updateMeasurementCovarianceMatrix(const float positionVariance, const float orientationVariance, const float velocityVariance, const float angularVelocityVariance, const float linearAccelerationVariance);
 97 |     // sigma points generation functions
 98 |     void generateSigmaPoints();
 99 |     thrust::device_vector<float> scaleCovarianceMatrix() ;
100 |     thrust::device_vector<float> choleskyDecomposition();
101 |     void initializeWeights();
102 |     void updatePrevPose(const std::vector<float>& newPos);
103 |     void printStateInfo();
104 |     void updateFilter(const std::vector<float>& measurement, const float dt, const float positionVariance, const float orientationVariance, const float velocityVariance, const float angularVelocityVariance, const float linearAccelerationVariance);
105 |     std::vector<float> getState();
106 |     std::vector<float> getStateCovarianceMatrix();
107 | };
108 | 
109 | #endif


--------------------------------------------------------------------------------
/cuUKF/launch/gpu_filter.launch.py:
--------------------------------------------------------------------------------
 1 | from launch import LaunchDescription
 2 | from launch_ros.actions import Node
 3 | from launch.actions import DeclareLaunchArgument
 4 | from launch.substitutions import LaunchConfiguration
 5 | 
 6 | def generate_launch_description():
 7 |     return LaunchDescription([
 8 |         DeclareLaunchArgument('odom_topic',           default_value='/odom',       description='odom topic'),
 9 |         DeclareLaunchArgument('imu_topic',           default_value='/imu',       description='imu topic'),
10 |         Node(
11 |             package = 'cuUKF',
12 |             executable = 'core_node',
13 |             name = 'gpu_filter_node',
14 |             output = 'screen', 
15 |             parameters = [
16 |                 {"odom_topic": LaunchConfiguration('odom_topic')},
17 |                 {"imu_topic": LaunchConfiguration('imu_topic')}
18 |             ]
19 |         )
20 |     ])
21 | 


--------------------------------------------------------------------------------
/cuUKF/nodes/coreNode.cpp:
--------------------------------------------------------------------------------
  1 | #include "coreNode.hpp"
  2 | 
  3 | CoreNode::CoreNode() : Node("cuUKF")
  4 | {
  5 |     // Topics
  6 |     this->declare_parameter("odom_topic", "/odom");
  7 |     this->declare_parameter("imu_topic", "/imu");
  8 | 
  9 |     // Init for UKF
 10 |     this->isIntialized_ = false;
 11 | 
 12 |     // Initalize filter
 13 |     this->kalmanFilter_ = std::make_shared<UKF>(5e-2, 2.0, 0);
 14 | 
 15 |     // Subscribe to Msgs
 16 |     this->odomDataSub_.subscribe(this, this->get_parameter("odom_topic").as_string());
 17 |     this->imuDataSub_.subscribe(this, this->get_parameter("imu_topic").as_string());
 18 | 
 19 |     // Initialize the synchronizer
 20 |     sync_.reset(new Sync(MySyncPolicy(100), odomDataSub_, imuDataSub_));
 21 |     sync_->registerCallback(std::bind(&CoreNode::callback_, this, std::placeholders::_1, std::placeholders::_2));
 22 | 
 23 |     // Publisher 
 24 |     this->odomPublisher_ = this->create_publisher<nav_msgs::msg::Odometry>("/cuUKF/filtered_odom", 10);
 25 | }
 26 | 
 27 | CoreNode::~CoreNode()
 28 | {
 29 | }
 30 | 
 31 | void CoreNode::callback_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu)
 32 | {
 33 |     if (!(this->isIntialized_))
 34 |     {
 35 |         // Set the prevoous time stamp
 36 |         this->prevTimestamp_ = odom->header.stamp;
 37 |         
 38 |         // Initialize state vector
 39 |         std::vector<float> state = this->genState_(odom, imu);
 40 | 
 41 |         // Move to thrust vector for GPU in filter
 42 |         this->kalmanFilter_->initState(state);
 43 |         this->kalmanFilter_->initStateCovarianceMatrix(10.0f, 10.0f, 10.0f, 10.0f, 10.0f);
 44 |         this->kalmanFilter_->initProcessCovarianceMatrix(10.0f, 10.0f, 10.0f, 10.0f, 10.0f);
 45 |         this->kalmanFilter_->updateMeasurementCovarianceMatrix(0.001, 0.001, 0.01f, 0.001, 0.001);
 46 |         this->kalmanFilter_->generateSigmaPoints();
 47 |         this->kalmanFilter_->initializeWeights();
 48 |         
 49 |         // Update Prev Pose
 50 |         this->kalmanFilter_->updatePrevPose({static_cast<float>(odom->pose.pose.position.x), static_cast<float>(odom->pose.pose.position.y), static_cast<float>(odom->pose.pose.position.z)});
 51 | 
 52 |         // Update the flag
 53 |         this->isIntialized_ = true;
 54 |         return;
 55 |     }
 56 |     // Get the current timestamp from the odometry message
 57 |     rclcpp::Time currentTimestamp = odom->header.stamp;
 58 | 
 59 |     // Calculate dt in seconds
 60 |     double dt = (currentTimestamp - this->prevTimestamp_).seconds();
 61 |     this->prevTimestamp_ = currentTimestamp;  // Update the previous timestamp
 62 | 
 63 |     // Start filter
 64 |     this->kalmanFilter_->updateFilter(this->genMeasurement_(odom, imu), dt, 0.001, 0.001, 0.01f, 0.001, 0.001);
 65 | 
 66 |     // Update Prev Pose
 67 |     this->kalmanFilter_->updatePrevPose({static_cast<float>(odom->pose.pose.position.x), static_cast<float>(odom->pose.pose.position.y), static_cast<float>(odom->pose.pose.position.z)});
 68 | 
 69 |     std::vector<float> state = this->kalmanFilter_->getState();
 70 |     std::vector<float> stateCovarince = this->kalmanFilter_->getStateCovarianceMatrix();
 71 |     // Publish Msg
 72 |     this->odomPublisher_->publish(this->createOdometryMsg_(state, stateCovarince, odom));
 73 | }
 74 | 
 75 | std::vector<float> CoreNode::genState_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu)
 76 | {
 77 |     // Initialize state vector
 78 |     std::vector<float> state(16, 0.0f); // Initialize all elements to zero
 79 | 
 80 |     // Assign position and orientation to state vector
 81 |     state[0] = odom->pose.pose.position.x;
 82 |     state[1] = odom->pose.pose.position.y;
 83 |     state[2] = odom->pose.pose.position.z;
 84 |     state[3] = odom->pose.pose.orientation.w;
 85 |     state[4] = odom->pose.pose.orientation.x;
 86 |     state[5] = odom->pose.pose.orientation.y;
 87 |     state[6] = odom->pose.pose.orientation.z;
 88 |     state[10] = imu->angular_velocity.x;
 89 |     state[11] = imu->angular_velocity.y;
 90 |     state[12] = imu->angular_velocity.z;
 91 |     state[13] = imu->linear_acceleration.x;
 92 |     state[14] = imu->linear_acceleration.y;
 93 |     state[15] = imu->linear_acceleration.z;
 94 | 
 95 |     return state;
 96 | }
 97 | 
 98 | std::vector<float> CoreNode::genMeasurement_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu)
 99 | {
100 |     std::vector<float> measurementVector(16, 0.0f); // Assuming a 16-element state vector
101 | 
102 |     // Position
103 |     measurementVector[0] = odom->pose.pose.position.x;
104 |     measurementVector[1] = odom->pose.pose.position.y;
105 |     measurementVector[2] = odom->pose.pose.position.z;
106 | 
107 |     // Orientation (quaternion)
108 |     measurementVector[3] = odom->pose.pose.orientation.w;
109 |     measurementVector[4] = odom->pose.pose.orientation.x;
110 |     measurementVector[5] = odom->pose.pose.orientation.y;
111 |     measurementVector[6] = odom->pose.pose.orientation.z;
112 | 
113 |     // Angular velocity
114 |     measurementVector[10] = imu->angular_velocity.x;
115 |     measurementVector[11] = imu->angular_velocity.y;
116 |     measurementVector[12] = imu->angular_velocity.z;
117 | 
118 |     // Linear acceleration
119 |     measurementVector[13] = imu->linear_acceleration.x;
120 |     measurementVector[14] = imu->linear_acceleration.y;
121 |     measurementVector[15] = imu->linear_acceleration.z;
122 | 
123 |     return measurementVector;
124 | }
125 | 
126 | nav_msgs::msg::Odometry CoreNode::createOdometryMsg_(const std::vector<float>& state, const std::vector<float>& covariance_matrix, const nav_msgs::msg::Odometry::ConstSharedPtr& original_odom)
127 | {
128 |     nav_msgs::msg::Odometry odom_msg;
129 | 
130 |     // Assuming the state vector layout is as follows:
131 |     // [x, y, z, qw, qx, qy, qz, vx, vy, vz, ax, ay, az]
132 |     odom_msg.pose.pose.position.x = state[0];
133 |     odom_msg.pose.pose.position.y = state[1];
134 |     odom_msg.pose.pose.position.z = state[2];
135 |     odom_msg.pose.pose.orientation.w = state[3];
136 |     odom_msg.pose.pose.orientation.x = state[4];
137 |     odom_msg.pose.pose.orientation.y = state[5];
138 |     odom_msg.pose.pose.orientation.z = state[6];
139 |     odom_msg.twist.twist.linear.x = state[7];
140 |     odom_msg.twist.twist.linear.y = state[8];
141 |     odom_msg.twist.twist.linear.z = state[9];
142 |     odom_msg.twist.twist.angular.x = state[10];
143 |     odom_msg.twist.twist.angular.y = state[11];
144 |     odom_msg.twist.twist.angular.z = state[12];
145 | 
146 |     // Copy position covariance (indices 0, 1, 2 for x, y, z)
147 |     for (int i = 0; i < 3; ++i) for (int j = 0; j < 3; ++j) odom_msg.pose.covariance[i * 6 + j] = covariance_matrix[i * 16 + j];
148 | 
149 |     // Copy orientation covariance (assuming direct mapping from quaternion x, y, z to orientation)
150 |     for (int i = 4, k = 0; i < 7; ++i, ++k) for (int j = 4, l = 0; j < 7; ++j, ++l) odom_msg.pose.covariance[(k + 3) * 6 + (l + 3)] = covariance_matrix[i * 16 + j];
151 | 
152 |     // Copy velocity covariance (linear velocity: vx, vy, vz)
153 |     for (int i = 0; i < 3; ++i) for (int j = 0; j < 3; ++j) odom_msg.twist.covariance[i * 6 + j] = covariance_matrix[(i + 7) * 16 + (j + 7)];
154 | 
155 |     // Copy angular velocity covariance (angular velocity: wx, wy, wz)
156 |     for (int i = 0; i < 3; ++i) for (int j = 0; j < 3; ++j) odom_msg.twist.covariance[(i + 3) * 6 + (j + 3)] = covariance_matrix[(i + 10) * 16 + (j + 10)];
157 | 
158 |     odom_msg.header = original_odom->header;
159 |     odom_msg.child_frame_id = original_odom->child_frame_id;
160 |     // Set the header stamp to the current time
161 |     odom_msg.header.stamp = this->now();
162 | 
163 |     return odom_msg;
164 | }
165 | 
166 | int main(int argc, char * argv[])
167 | {
168 |     rclcpp::init(argc, argv);
169 |     rclcpp::spin(std::make_shared<CoreNode>());
170 |     rclcpp::shutdown();
171 |     
172 |     return 0;
173 | }


--------------------------------------------------------------------------------
/cuUKF/nodes/cpuNode.cpp:
--------------------------------------------------------------------------------
  1 | #include "cpuNode.hpp"
  2 | 
  3 | CpuNode::CpuNode() : Node("cuUKF")
  4 | {
  5 |     // Init for UKF
  6 |     this->isIntialized_ = false;
  7 | 
  8 |     // Initialize previous timestamp with current ROS time
  9 |     this->prevTimestamp_ = this->now();
 10 | 
 11 |     // Initalize filter
 12 |     this->kalmanFilter_ = std::make_shared<UKF>();
 13 | 
 14 |     // Subscribe to Msgs
 15 |     this->odomDataSub_.subscribe(this, "/zed2i/zed_node/odom");
 16 |     this->imuDataSub_.subscribe(this, "/zed2i/zed_node/imu/data_raw");
 17 | 
 18 |     // Initialize the synchronizer
 19 |     sync_.reset(new Sync(MySyncPolicy(10), odomDataSub_, imuDataSub_));
 20 |     sync_->registerCallback(std::bind(&CpuNode::callback_, this, std::placeholders::_1, std::placeholders::_2));
 21 | }
 22 | 
 23 | CpuNode::~CpuNode()
 24 | {
 25 | }
 26 | 
 27 | Eigen::MatrixXd CpuNode::extractPoseCovariance_(const nav_msgs::msg::Odometry& odom)
 28 | {
 29 |     // The pose covariance matrix in Odometry is 6x6, stored as a flat array of 36 elements
 30 |     const int matrix_size = 6;
 31 | 
 32 |     // Use Eigen's Map to create a 6x6 matrix view of the 1D array without copying the data
 33 |     Eigen::Map<const Eigen::Matrix<double, matrix_size, matrix_size, Eigen::RowMajor>> covariance_matrix(odom.pose.covariance.data());
 34 | 
 35 |     return covariance_matrix;
 36 | }
 37 | 
 38 | Eigen::MatrixXd CpuNode::extractAngularVelocityCovariance_(const sensor_msgs::msg::Imu& imu)
 39 | {
 40 |     const int matrix_size = 3; // The angular velocity covariance matrix in Imu is 3x3
 41 | 
 42 |     Eigen::Map<const Eigen::Matrix<double, matrix_size, matrix_size, Eigen::RowMajor>> covariance_matrix(imu.angular_velocity_covariance.data());
 43 | 
 44 |     return covariance_matrix;
 45 | }
 46 | 
 47 | Eigen::MatrixXd CpuNode::extractLinearAccelerationCovariance_(const sensor_msgs::msg::Imu& imu)
 48 | {
 49 |     const int matrix_size = 3; // The linear acceleration covariance matrix in Imu is 3x3
 50 | 
 51 |     Eigen::Map<const Eigen::Matrix<double, matrix_size, matrix_size, Eigen::RowMajor>> covariance_matrix(imu.linear_acceleration_covariance.data());
 52 | 
 53 |     return covariance_matrix;
 54 | }
 55 | 
 56 | void CpuNode::callback_(const nav_msgs::msg::Odometry::ConstSharedPtr& odom, const sensor_msgs::msg::Imu::ConstSharedPtr& imu)
 57 | {
 58 |     // Get the current timestamp from the odometry message
 59 |     rclcpp::Time currentTimestamp = odom->header.stamp;
 60 | 
 61 |     // Calculate dt in seconds
 62 |     double dt = (currentTimestamp - this->prevTimestamp_).seconds();
 63 |     this->prevTimestamp_ = currentTimestamp;  // Update the previous timestamp
 64 | 
 65 |     if (!(this->isIntialized_)) 
 66 |     {
 67 |         // Initialize the state vector with the first readings
 68 |         Eigen::Vector3d initial_position(odom->pose.pose.position.x, odom->pose.pose.position.y, odom->pose.pose.position.z);
 69 |         Eigen::Quaterniond initial_orientation(odom->pose.pose.orientation.w, odom->pose.pose.orientation.x, odom->pose.pose.orientation.y, odom->pose.pose.orientation.z);
 70 |         // Assuming zero initial velocity and biases
 71 |         Eigen::VectorXd initial_state(16);
 72 |         initial_state << initial_position, initial_orientation.coeffs(), Eigen::Vector3d::Zero(), Eigen::Vector3d::Zero(), Eigen::Vector3d::Zero();
 73 | 
 74 |         this->kalmanFilter_->initializeState(initial_state, this->extractPoseCovariance_(*odom), this->extractAngularVelocityCovariance_(*imu), this->extractLinearAccelerationCovariance_(*imu));
 75 |         this->kalmanFilter_->updatePrevPose(initial_position);
 76 |         this->isIntialized_ = true;
 77 |         return;
 78 |     }
 79 | 
 80 |     // Extract position and orientation from the odometry message
 81 |     Eigen::Vector3d position(odom->pose.pose.position.x, odom->pose.pose.position.y, odom->pose.pose.position.z);
 82 |     Eigen::Quaterniond orientation(odom->pose.pose.orientation.w, odom->pose.pose.orientation.x, odom->pose.pose.orientation.y, odom->pose.pose.orientation.z);
 83 | 
 84 |     // Extract angular velocity and linear acceleration from the IMU message
 85 |     Eigen::Vector3d angular_velocity(imu->angular_velocity.x, imu->angular_velocity.y, imu->angular_velocity.z);
 86 |     Eigen::Vector3d linear_acceleration(imu->linear_acceleration.x, imu->linear_acceleration.y, imu->linear_acceleration.z);
 87 | 
 88 |     Eigen::Vector3d gyro_error(0.001, 0.001, 0.001); // rad/s
 89 |     Eigen::Vector3d accel_error(0.01, 0.01, 0.01); // m/s^2
 90 | 
 91 |     // Propagate each sigma point through the process model
 92 |     this->kalmanFilter_->propagateSigmaPointsThroughProcessModel_(dt, angular_velocity, linear_acceleration, gyro_error, accel_error);
 93 | 
 94 |     // Perform the UKF Prediction Step
 95 |     this->kalmanFilter_->predictStateMean_();
 96 |     this->kalmanFilter_->predictStateCovariance_();
 97 | 
 98 |     // Transform the propagated sigma points into the measurement space
 99 |     this->kalmanFilter_->transformSigmaPointsToMeasurementSpace_(angular_velocity, linear_acceleration);
100 |     
101 |     // Calculate the predicted measurement mean
102 |     this->kalmanFilter_->calculatePredictedMeasurementMean_();
103 |     
104 |     // Calculate the predicted measurement covariance
105 |     this->kalmanFilter_->calculatePredictedMeasurementCovariance_();
106 |     
107 |     // Calculate the cross covariance between the state and the measurements
108 |     this->kalmanFilter_->calculateCrossCovariance_();
109 |     
110 |     // Compute the Kalman Gain
111 |     this->kalmanFilter_->computeKalmanGain_();
112 |     
113 |     // Prepare the actual measurement vector from the current odometry and IMU data
114 |     Eigen::VectorXd actualMeasurement = this->kalmanFilter_->updateMeasurementModel(position, orientation, angular_velocity, linear_acceleration, dt, this->extractPoseCovariance_(*odom), this->extractAngularVelocityCovariance_(*imu), this->extractLinearAccelerationCovariance_(*imu));
115 |     
116 |     // Update the state and covariance of the UKF with the actual measurement
117 |     this->kalmanFilter_->updateStateAndCovariance_(actualMeasurement);
118 |     
119 |     // Update the previous position for velocity calculation in the next cycle
120 |     this->kalmanFilter_->updatePrevPose(position);
121 | }
122 | 
123 | int main(int argc, char * argv[])
124 | {
125 |     rclcpp::init(argc, argv);
126 |     rclcpp::spin(std::make_shared<CpuNode>());
127 |     rclcpp::shutdown();
128 |     
129 |     return 0;
130 | }


--------------------------------------------------------------------------------
/cuUKF/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 3 | <package format="3">
 4 |   <name>cuUKF</name>
 5 |   <version>0.0.0</version>
 6 |   <description>TODO: Package description</description>
 7 |   <maintainer email="jh7454@nyu.edu">Hari</maintainer>
 8 |   <license>BSD-3-Clause</license>
 9 | 
10 |   <buildtool_depend>ament_cmake</buildtool_depend>
11 | 
12 |   <depend>rclcpp</depend>
13 |   <depend>sensor_msgs</depend>
14 |   <depend>std_msgs</depend>
15 |   <depend>message_filters</depend>
16 |   <depend>geometry_msgs</depend>
17 |   <depend>nav_msgs</depend>
18 | 
19 |   <test_depend>ament_lint_auto</test_depend>
20 |   <test_depend>ament_lint_common</test_depend>
21 | 
22 |   <export>
23 |     <build_type>ament_cmake</build_type>
24 |   </export>
25 | </package>
26 | 


--------------------------------------------------------------------------------
/cuUKF/src/cpuFilter.cpp:
--------------------------------------------------------------------------------
  1 | #include "cpuFilter.hpp"
  2 | 
  3 | UKF::UKF() : rosLogger_(rclcpp::get_logger("cuUKF"))
  4 | {
  5 |     // Initialize the state vector to a size of 16
  6 |     this->state_ = Eigen::VectorXd(16);
  7 |     this->state_.setZero(); // Initialize all elements to zero
  8 | 
  9 |     // Initialize UKF parameters
 10 |     this->alpha_ = 1e-3;
 11 |     this->beta_ = 2.0;
 12 |     this->kappa_ = 0.0;
 13 |     this->lambda_ = this->alpha_ * this->alpha_ * (this->n + this->kappa_) - this->n;
 14 | }
 15 | 
 16 | void UKF::initializeState(const Eigen::VectorXd& initialState, const Eigen::MatrixXd& pose_covariance, const Eigen::MatrixXd& ang_vel_covariance, const Eigen::MatrixXd& accel_covariance)
 17 | {
 18 |     // Check the dimension of initialState to ensure it matches your state vector size
 19 |     if (initialState.size() != state_.size()) throw std::runtime_error("Initial state size does not match the UKF state size.");
 20 | 
 21 |     // Set the initial state
 22 |     this->state_ = initialState;
 23 | 
 24 |     // Set the covaiance matrix
 25 |     this->initializeCovarianceMatrix_();
 26 |     this->updateNoiseMatrix(pose_covariance, ang_vel_covariance, accel_covariance);
 27 | 
 28 |     // Initalise sigma points and weights
 29 |     this->generateSigmaPoints_();
 30 |     this->initializeWeights_();
 31 | 
 32 |     // Log the initialization
 33 |     RCLCPP_INFO(this->rosLogger_, "UKF State initialized.");
 34 | }
 35 | 
 36 | void UKF::initializeCovarianceMatrix_()
 37 | {
 38 |     // Initialize the full 16x16 covariance matrix to zero
 39 |     this->P_ = Eigen::MatrixXd::Zero(16, 16);
 40 | 
 41 |     // Define standard variances
 42 |     double positionVariance = 0.1;    // Variance for position
 43 |     double orientationVariance = 0.01; // Variance for quaternion components
 44 |     double velocityVariance = 1.0;     // Variance for velocity
 45 |     double angularVelocityVariance = 0.01; // Variance for angular velocity
 46 |     double accelerationVariance = 0.1; // Variance for linear acceleration
 47 | 
 48 |     // Set covariance for position (3x3 block)
 49 |     this->P_.block<3, 3>(0, 0) = Eigen::Matrix3d::Identity() * positionVariance;
 50 | 
 51 |     // Set covariance for orientation (4x4 block)
 52 |     this->P_.block<4, 4>(3, 3) = Eigen::Matrix4d::Identity() * orientationVariance;
 53 | 
 54 |     // Set covariance for velocity (3x3 block)
 55 |     this->P_.block<3, 3>(7, 7) = Eigen::Matrix3d::Identity() * velocityVariance;
 56 | 
 57 |     // Set covariance for angular velocity (3x3 block)
 58 |     this->P_.block<3, 3>(10, 10) = Eigen::Matrix3d::Identity() * angularVelocityVariance;
 59 | 
 60 |     // Set covariance for linear acceleration (3x3 block)
 61 |     this->P_.block<3, 3>(13, 13) = Eigen::Matrix3d::Identity() * accelerationVariance;
 62 | }
 63 | 
 64 | void UKF::updateNoiseMatrix(const Eigen::MatrixXd& pose_covariance_6x6, const Eigen::MatrixXd& ang_vel_covariance_3x3,const Eigen::MatrixXd& accel_covariance_3x3)
 65 | {
 66 |     // Initialize the full 16x16 covariance matrix to zero
 67 |     this->R_ = Eigen::MatrixXd::Zero(16, 16);
 68 | 
 69 |     // Example variance values
 70 |     double orientationVariance = 0.01; // Orientation measurement noise variance
 71 |     double velocityVariance = 1.0; // Velocity measurement noise variance
 72 | 
 73 |     // Set variances for position (first 3 elements)
 74 |     this->R_.block<3, 3>(0, 0) = pose_covariance_6x6.block<3, 3>(0, 0);
 75 | 
 76 |     // Set variances for orientation (next 4 elements)
 77 |     this->R_.block<4, 4>(3, 3) = Eigen::Matrix4d::Identity() * orientationVariance;
 78 | 
 79 |     // Set variances for velocity (next 3 elements)
 80 |     this->R_.block<3, 3>(7, 7) = Eigen::Matrix3d::Identity() * velocityVariance;
 81 | 
 82 |     // Set variances for gyro bias (next 3 elements)
 83 |     this->R_.block<3, 3>(10, 10) = ang_vel_covariance_3x3;
 84 | 
 85 |     // Set variances for accelerometer bias (next 3 elements)
 86 |     this->R_.block<3, 3>(13, 13) = accel_covariance_3x3;
 87 | }
 88 | 
 89 | void UKF::generateSigmaPoints_()
 90 | {
 91 |     this->sigmaPoints_.resize(this->n, 2 * this->n + 1);
 92 | 
 93 |     // Compute scaling factor
 94 |     Eigen::MatrixXd S = (this->P_ * (this->n + this->lambda_)).llt().matrixL(); // Cholesky decomposition
 95 | 
 96 |     // Set the first column to the current state
 97 |     this->sigmaPoints_.col(0) = state_;
 98 | 
 99 |     // Set remaining sigma points
100 |     for (int i = 0; i < n; ++i)
101 |     {
102 |         this->sigmaPoints_.col(i + 1)       = this->state_ + S.col(i);
103 |         this->sigmaPoints_.col(i + 1 + n)   = this->state_ - S.col(i);
104 |     }
105 | }
106 | 
107 | void UKF::initializeWeights_()
108 | {
109 |     this->weightsMean_ = Eigen::VectorXd(2 * this->n + 1);
110 |     this->weightsCovariance_ = Eigen::VectorXd(2 * this->n + 1);
111 | 
112 |     double weight_0 = this->lambda_ / (this->lambda_ + this->n);
113 |     double weight_others = 1 / (2 * (this->lambda_ + this->n));
114 | 
115 |     this->weightsMean_(0) = weight_0;
116 |     this->weightsCovariance_(0) = weight_0 + (1 - alpha_ * alpha_ + beta_);
117 | 
118 |     for (int i = 1; i < 2 * n + 1; ++i)
119 |     {
120 |         this->weightsMean_(i) = weight_others;
121 |         this->weightsCovariance_(i) = weight_others;
122 |     }
123 | }
124 | 
125 | Eigen::Quaterniond UKF::updateOrientationSigmaPoint_(const Eigen::Quaterniond& current_orientation, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& gyro_bias, double dt)
126 | {
127 |     Eigen::Vector3d correctedOmega = 0.5 * (angular_velocity - gyro_bias) * dt;
128 |     double omega_norm = correctedOmega.norm();
129 | 
130 |     if (omega_norm <= std::numeric_limits<double>::epsilon()) return current_orientation;
131 | 
132 |     double half_theta = 0.5 * omega_norm;
133 |     Eigen::Quaterniond delta_q_quaternion(Eigen::AngleAxisd(half_theta, correctedOmega.normalized()));
134 | 
135 |     // Perform the quaternion multiplication and normalize
136 |     Eigen::Quaterniond updated_orientation = (delta_q_quaternion * current_orientation).normalized();
137 | 
138 |     return updated_orientation;
139 | }
140 | 
141 | void UKF::updatePositionAndVelocitySigmaPoint_(Eigen::VectorXd& sigmaPoint, const Eigen::Vector3d& accel_meas, double dt)
142 | {
143 |     // Extract position, orientation, and velocity from the sigma point
144 |     Eigen::Vector3d position = sigmaPoint.segment(0, 3);
145 |     Eigen::Quaterniond orientation(sigmaPoint(3), sigmaPoint(4), sigmaPoint(5), sigmaPoint(6));
146 |     Eigen::Vector3d velocity = sigmaPoint.segment(7, 3);
147 |     Eigen::Vector3d accel_bias = sigmaPoint.segment(13, 3);
148 | 
149 |     // Compute the acceleration in the global frame
150 |     Eigen::Vector3d accel_global = orientation * (accel_meas - accel_bias) - Eigen::Vector3d(0, 0, -9.81);  // Assuming gravity along z
151 | 
152 |     // Update velocity
153 |     Eigen::Vector3d velocity_updated = velocity + accel_global * dt;
154 | 
155 |     // Update position
156 |     Eigen::Vector3d position_updated = position + (velocity * dt) + (0.5 * accel_global * dt * dt);
157 | 
158 |     // Write the updated position and velocity back into the sigma point
159 |     sigmaPoint.segment(0, 3) = position_updated;
160 |     sigmaPoint.segment(7, 3) = velocity_updated;
161 | }
162 | 
163 | void UKF::updateBiasesSigmaPoint_(Eigen::VectorXd& sigmaPoint, const Eigen::Vector3d& gyro_error, const Eigen::Vector3d& accel_error)
164 | {
165 |     // Extract gyro and accel biases from the sigma point
166 |     Eigen::Vector3d gyro_bias = sigmaPoint.segment(10, 3);
167 |     Eigen::Vector3d accel_bias = sigmaPoint.segment(13, 3);
168 | 
169 |     // Update biases based on the error estimates
170 |     gyro_bias += gyro_error;
171 |     accel_bias += accel_error;
172 | 
173 |     // Write the updated biases back into the sigma point
174 |     sigmaPoint.segment(10, 3) = gyro_bias;
175 |     sigmaPoint.segment(13, 3) = accel_bias;
176 | }
177 | 
178 | void UKF::propagateSigmaPointsThroughProcessModel_(double dt, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas, const Eigen::Vector3d& gyro_error, const Eigen::Vector3d& accel_error)
179 | {
180 |     for (int i = 0; i < this->sigmaPoints_.cols(); ++i) 
181 |     {
182 |         // Create a copy of the i-th column of sigmaPoints_
183 |         Eigen::VectorXd sigmaPoint = this->sigmaPoints_.col(i);
184 | 
185 |         // Extract current orientation from the sigma point
186 |         Eigen::Quaterniond current_orientation(sigmaPoint(3), sigmaPoint(4), sigmaPoint(5), sigmaPoint(6));
187 | 
188 |         // Update orientation for this sigma point
189 |         Eigen::Quaterniond updated_orientation = updateOrientationSigmaPoint_(current_orientation, angular_velocity, gyro_error, dt);
190 |         updated_orientation.normalize();
191 | 
192 |         // Write back updated orientation into the sigma point
193 |         sigmaPoint(3) = updated_orientation.w();
194 |         sigmaPoint(4) = updated_orientation.x();
195 |         sigmaPoint(5) = updated_orientation.y();
196 |         sigmaPoint(6) = updated_orientation.z();
197 | 
198 |         // Update position and velocity for this sigma point
199 |         this->updatePositionAndVelocitySigmaPoint_(sigmaPoint, accel_meas, dt);
200 | 
201 |         // Update biases for this sigma point using the error measurements
202 |         this->updateBiasesSigmaPoint_(sigmaPoint, gyro_error, accel_error);
203 | 
204 |         // Write the updated sigma point back into the matrix
205 |         this->sigmaPoints_.col(i) = sigmaPoint;
206 |     }
207 | }
208 | 
209 | void UKF::predictStateMean_()
210 | {
211 |     // Initialize the predicted state mean vector
212 |     Eigen::VectorXd tempPredictedStateMean = Eigen::VectorXd::Zero(this->state_.size());
213 | 
214 |     // Compute the weighted sum of the sigma points
215 |     for (int i = 0; i < this->sigmaPoints_.cols(); ++i) tempPredictedStateMean += this->weightsMean_(i) * this->sigmaPoints_.col(i);
216 | 
217 |     // Store the predicted state mean
218 |     this->predictedStateMean_ = tempPredictedStateMean;
219 | }
220 | 
221 | void UKF::predictStateCovariance_()
222 | {
223 |     // Initialize the predicted state covariance matrix
224 |     Eigen::MatrixXd predictedStateCovariance = Eigen::MatrixXd::Zero(this->state_.size(), this->state_.size());
225 | 
226 |     // Compute the weighted sum of the outer products of the sigma point deviations
227 |     for (int i = 0; i < this->sigmaPoints_.cols(); ++i)
228 |     {
229 |         Eigen::VectorXd deviation = this->sigmaPoints_.col(i) - this->state_; // Deviation of sigma point from predicted mean
230 |         predictedStateCovariance += this->weightsCovariance_(i) * deviation * deviation.transpose();
231 |     }
232 | 
233 |     // Update the state covariance matrix with the predicted state covariance
234 |     this->P_ = predictedStateCovariance;
235 | }
236 | 
237 | void UKF::updatePrevPose(const Eigen::Vector3d& prevPose)
238 | {
239 |     this->prevPosition_ = prevPose;
240 | }
241 | 
242 | Eigen::VectorXd UKF::updateMeasurementModel(const Eigen::Vector3d& position, const Eigen::Quaterniond& orientation, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas, double dt, const Eigen::MatrixXd& pose_covariance_6x6, const Eigen::MatrixXd& ang_vel_covariance_3x3,const Eigen::MatrixXd& accel_covariance_3x3)
243 | {
244 |     // Update Noise Matrix R
245 |     this->updateNoiseMatrix(pose_covariance_6x6, ang_vel_covariance_3x3, accel_covariance_3x3);
246 | 
247 |     Eigen::VectorXd measurement(16); // Adjust based on your measurement vector size
248 | 
249 |     // Position - directly from visual odometry
250 |     measurement.head(3) = position;
251 | 
252 |     // Orientation - directly from visual odometry
253 |     Eigen::Vector4d orientation_coeffs = Eigen::Vector4d(orientation.w(), orientation.x(), orientation.y(), orientation.z());
254 |     measurement.segment(3, 4) = orientation_coeffs;
255 | 
256 |     // Derived Velocity
257 |     Eigen::Vector3d derived_velocity = (position - this->prevPosition_) / dt;
258 |     measurement.segment(7, 3) = derived_velocity;
259 | 
260 |     // Angular Velocity corrected for Gyro Bias
261 |     Eigen::Vector3d gyro_bias = this->state_.segment(10, 3);
262 |     measurement.segment(10, 3) = angular_velocity - gyro_bias;
263 | 
264 |     // Linear Acceleration corrected for Accelerometer Bias
265 |     Eigen::Vector3d accel_bias = this->state_.segment(13, 3);
266 |     measurement.segment(13, 3) = accel_meas - accel_bias;
267 | 
268 |     return measurement;
269 | }
270 | 
271 | Eigen::VectorXd UKF::applyMeasurementModelToSigmaPoint_(const Eigen::VectorXd& sigmaPoint, const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas)
272 | {
273 |     Eigen::VectorXd measurement(16); // Measurement vector size
274 | 
275 |     // Extract components from the sigma point
276 |     Eigen::Vector3d position = sigmaPoint.segment(0, 3);
277 |     Eigen::Quaterniond orientation(sigmaPoint(3), sigmaPoint(4), sigmaPoint(5), sigmaPoint(6));
278 |     Eigen::Vector3d velocity = sigmaPoint.segment(7, 3); // Directly use velocity
279 |     Eigen::Vector3d gyro_bias = sigmaPoint.segment(10, 3);
280 |     Eigen::Vector3d accel_bias = sigmaPoint.segment(13, 3);
281 | 
282 |     // Position
283 |     measurement.head(3) = position;
284 | 
285 |     // Orientation
286 |     measurement.segment(3, 4) = Eigen::Vector4d(orientation.w(), orientation.x(), orientation.y(), orientation.z());
287 | 
288 |     // Velocity
289 |     measurement.segment(7, 3) = velocity;
290 | 
291 |     // Angular Velocity corrected for Gyro Bias
292 |     measurement.segment(10, 3) = angular_velocity - gyro_bias;
293 | 
294 |     // Linear Acceleration corrected for Accelerometer Bias
295 |     measurement.segment(13, 3) = accel_meas - accel_bias;
296 | 
297 |     return measurement;
298 | }
299 | 
300 | void UKF::transformSigmaPointsToMeasurementSpace_(const Eigen::Vector3d& angular_velocity, const Eigen::Vector3d& accel_meas)
301 | {
302 |     const int measurementDimension = 16; /* size of your measurement vector, e.g., 16 */
303 |     this->measurementSigmaPoints_.resize(measurementDimension, this->sigmaPoints_.cols());
304 | 
305 |     for (int i = 0; i < sigmaPoints_.cols(); ++i) this->measurementSigmaPoints_.col(i) = this->applyMeasurementModelToSigmaPoint_(this->sigmaPoints_.col(i), angular_velocity, accel_meas);
306 | }
307 | 
308 | void UKF::calculatePredictedMeasurementMean_()
309 | {
310 |     // Initialize the predicted measurement mean vector
311 |     this->predictedMeasurementMean_ = Eigen::VectorXd::Zero(this->measurementSigmaPoints_.rows());
312 | 
313 |     // Compute the weighted sum of the measurement sigma points
314 |     for (int i = 0; i < this->measurementSigmaPoints_.cols(); ++i) this->predictedMeasurementMean_ += this->weightsMean_(i) * this->measurementSigmaPoints_.col(i);
315 | }
316 | 
317 | void UKF::calculatePredictedMeasurementCovariance_()
318 | {
319 |     int measurementDimension = this->predictedMeasurementMean_.size();
320 |     this->predictedMeasurementCovariance_ = Eigen::MatrixXd::Zero(measurementDimension, measurementDimension);
321 | 
322 |     for (int i = 0; i < this->measurementSigmaPoints_.cols(); ++i) 
323 |     {
324 |         Eigen::VectorXd deviation = this->measurementSigmaPoints_.col(i) - this->predictedMeasurementMean_;
325 |         this->predictedMeasurementCovariance_ += this->weightsCovariance_(i) * (deviation * deviation.transpose());
326 |     }
327 | 
328 |     // Add the measurement noise covariance
329 |     this->predictedMeasurementCovariance_ += this->R_;
330 | }
331 | 
332 | void UKF::calculateCrossCovariance_()
333 | {
334 |     int stateDimension = this->predictedStateMean_.size();
335 |     int measurementDimension = this->predictedMeasurementMean_.size();
336 |     this->Pxy_ = Eigen::MatrixXd::Zero(stateDimension, measurementDimension);
337 | 
338 |     for (int i = 0; i < this->sigmaPoints_.cols(); ++i) 
339 |     {
340 |         Eigen::VectorXd stateDeviation = this->sigmaPoints_.col(i) - this->predictedStateMean_;
341 |         Eigen::VectorXd measurementDeviation = this->measurementSigmaPoints_.col(i) - this->predictedMeasurementMean_;
342 |         this->Pxy_ += weightsCovariance_(i) * (stateDeviation * measurementDeviation.transpose());
343 |     }
344 | }
345 | 
346 | void UKF::computeKalmanGain_()
347 | {
348 |     this->K_ = this->Pxy_ * this->predictedMeasurementCovariance_.inverse();
349 | }
350 | 
351 | void UKF::updateStateAndCovariance_(const Eigen::VectorXd& actualMeasurement)
352 | {
353 |     // Calculate the measurement residual
354 |     Eigen::VectorXd measurementResidual = actualMeasurement - this->predictedMeasurementMean_;
355 | 
356 |     // Update state estimate using the Kalman Gain
357 |     this->state_ = this->predictedStateMean_ + this->K_ * measurementResidual;
358 | 
359 |     // Extract quaternion components (w, x, y, z) from the state
360 |     Eigen::Quaterniond updated_orientation(this->state_(3), this->state_(4), this->state_(5), this->state_(6));
361 | 
362 |     // Normalize the quaternion
363 |     updated_orientation.normalize();
364 | 
365 |     // Put the normalized quaternion back into the state vector
366 |     this->state_(3) = updated_orientation.w();
367 |     this->state_(4) = updated_orientation.x();
368 |     this->state_(5) = updated_orientation.y();
369 |     this->state_(6) = updated_orientation.z();
370 | 
371 |     std::cout << this->state_ << "\n" << std::endl;
372 | 
373 |     // Update state covariance matrix
374 |     this->P_ = this->P_ - this->K_ * this->predictedMeasurementCovariance_ * this->K_.transpose();
375 | }
376 | 
377 | UKF::~UKF()
378 | {
379 | }


--------------------------------------------------------------------------------
/cuUKF/src/gpuFilter.cu:
--------------------------------------------------------------------------------
  1 | #include "gpuFilter.cuh"
  2 | 
  3 | 
  4 | // --------------- GPU Kernels ---------------------- //
  5 | 
  6 | __global__ void scaleCovarianceMatrixKernel(float* P, int n, float alpha, float kappa) 
  7 | {
  8 |     int idx = blockIdx.x * blockDim.x + threadIdx.x;
  9 |     if (idx < n*n) P[idx] *= (alpha * (n + kappa));
 10 | }
 11 | 
 12 | __global__ void generateSigmaPointsKernel(float* sigmaPoints, const float* state, const float* cholP, int n, float lambda_plus_n)
 13 | {
 14 |     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 15 |     if (idx >= 2 * n + 1) return;
 16 | 
 17 |     int state_idx = idx * n; // Index for the beginning of the current sigma point in the flattened array
 18 | 
 19 |     // For the 0th sigma point, copy the state directly
 20 |     if (idx == 0) 
 21 |     {
 22 |         for (int i = 0; i < n; ++i) sigmaPoints[state_idx + i] = state[i];
 23 |     } 
 24 |     else 
 25 |     {
 26 |         int chol_idx = (idx - 1) % n; // Index for the current column of the Cholesky matrix
 27 |         float sign = idx <= n ? 1 : -1; // Determine whether to add or subtract
 28 | 
 29 |         for (int i = 0; i < n; ++i) sigmaPoints[state_idx + i] = state[i] + sign * sqrt(lambda_plus_n) * cholP[i * n + chol_idx];
 30 |     }
 31 | 
 32 |     // Normalize the quaternion if this sigma point contains one
 33 |     if (n > 6) // Assuming quaternion starts at index 3 and ends at index 6
 34 |     {
 35 |         float norm = sqrt(sigmaPoints[state_idx + 3] * sigmaPoints[state_idx + 3] + sigmaPoints[state_idx + 4] * sigmaPoints[state_idx + 4] + sigmaPoints[state_idx + 5] * sigmaPoints[state_idx + 5] + sigmaPoints[state_idx + 6] * sigmaPoints[state_idx + 6]);
 36 |         for (int i = 3; i <= 6; ++i) sigmaPoints[state_idx + i] /= norm;
 37 |     }
 38 | }
 39 | 
 40 | __global__ void initializeWeightsKernel(float* weightsMean, float* weightsCovariance, int n, float lambda, float alpha, float beta)
 41 | {
 42 |     int idx = threadIdx.x + blockIdx.x * blockDim.x;
 43 |     if (idx >= 2 * n + 1) return;
 44 | 
 45 |     if (idx == 0)
 46 |     {
 47 |         weightsMean[idx] = lambda / (n + lambda);
 48 |         weightsCovariance[idx] = weightsMean[idx] + (1 - alpha * alpha + beta);
 49 |     }
 50 |     else
 51 |     {
 52 |         weightsMean[idx] = 1.0 / (2 * (n + lambda));
 53 |         weightsCovariance[idx] = 1.0 / (2 * (n + lambda));
 54 |     }
 55 | }
 56 | 
 57 | __global__ void updateSigmaPointsKernel(float* sigmaPoints, const float* measurement, int sigmaPointCount, float dt, int n) 
 58 | {
 59 |     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 60 |     if (idx >= sigmaPointCount) return;
 61 | 
 62 |     int baseIdx = idx * n;
 63 | 
 64 |     // Angular velocity components updated in the previous step
 65 |     float wx = sigmaPoints[baseIdx + 10];
 66 |     float wy = sigmaPoints[baseIdx + 11];
 67 |     float wz = sigmaPoints[baseIdx + 12];
 68 | 
 69 |     // Current quaternion components
 70 |     float qw = sigmaPoints[baseIdx + 3];
 71 |     float qx = sigmaPoints[baseIdx + 4];
 72 |     float qy = sigmaPoints[baseIdx + 5];
 73 |     float qz = sigmaPoints[baseIdx + 6];
 74 | 
 75 |     // Quaternion derivative due to angular velocity
 76 |     float dot_qw = 0.5 * (-qx * wx - qy * wy - qz * wz);
 77 |     float dot_qx = 0.5 * (qw * wx + qy * wz - qz * wy);
 78 |     float dot_qy = 0.5 * (qw * wy - qx * wz + qz * wx);
 79 |     float dot_qz = 0.5 * (qw * wz + qx * wy - qy * wx);
 80 | 
 81 |     // Update quaternion components with Euler integration
 82 |     sigmaPoints[baseIdx + 3] += dot_qw * dt;
 83 |     sigmaPoints[baseIdx + 4] += dot_qx * dt;
 84 |     sigmaPoints[baseIdx + 5] += dot_qy * dt;
 85 |     sigmaPoints[baseIdx + 6] += dot_qz * dt;
 86 | 
 87 |     // Normalize the updated quaternion
 88 |     float norm = sqrt(sigmaPoints[baseIdx + 3] * sigmaPoints[baseIdx + 3] +
 89 |                       sigmaPoints[baseIdx + 4] * sigmaPoints[baseIdx + 4] +
 90 |                       sigmaPoints[baseIdx + 5] * sigmaPoints[baseIdx + 5] +
 91 |                       sigmaPoints[baseIdx + 6] * sigmaPoints[baseIdx + 6]);
 92 | 
 93 |     sigmaPoints[baseIdx + 3] /= norm;
 94 |     sigmaPoints[baseIdx + 4] /= norm;
 95 |     sigmaPoints[baseIdx + 5] /= norm;
 96 |     sigmaPoints[baseIdx + 6] /= norm;
 97 | 
 98 |     // Directly update the sigma point's angular velocity by adding the difference between measured and current angular velocity
 99 |     sigmaPoints[baseIdx + 10] += measurement[10] - sigmaPoints[baseIdx + 10];
100 |     sigmaPoints[baseIdx + 11] += measurement[11] - sigmaPoints[baseIdx + 11];
101 |     sigmaPoints[baseIdx + 12] += measurement[12] - sigmaPoints[baseIdx + 12];
102 | 
103 |     // Extract current position, velocity, and acceleration
104 |     float x = sigmaPoints[baseIdx + 0];
105 |     float y = sigmaPoints[baseIdx + 1];
106 |     float z = sigmaPoints[baseIdx + 2];
107 |     float vx = sigmaPoints[baseIdx + 7];
108 |     float vy = sigmaPoints[baseIdx + 8];
109 |     float vz = sigmaPoints[baseIdx + 9];
110 |     float ax = sigmaPoints[baseIdx + 13];
111 |     float ay = sigmaPoints[baseIdx + 14];
112 |     float az = sigmaPoints[baseIdx + 15] - 9.81f; // Adjust for gravity
113 | 
114 |     // Update position using the equation of motion
115 |     sigmaPoints[baseIdx + 0] = x + vx * dt + 0.5f * ax * dt * dt; // Update x position
116 |     sigmaPoints[baseIdx + 1] = y + vy * dt + 0.5f * ay * dt * dt; // Update y position
117 |     sigmaPoints[baseIdx + 2] = z + vz * dt + 0.5f * az * dt * dt; // Update z position
118 | 
119 |     // Update velocity using the equation of motion v = u + at
120 |     sigmaPoints[baseIdx + 7] = vx + ax * dt; // Update vx
121 |     sigmaPoints[baseIdx + 8] = vy + ay * dt; // Update vy
122 |     sigmaPoints[baseIdx + 9] = vz + az * dt; // Update vz, considering gravity if az includes it
123 | 
124 |     // Directly update the sigma point's linear acceleration by adding the difference between measured and current linear acceleration
125 |     sigmaPoints[baseIdx + 13] += measurement[13] - sigmaPoints[baseIdx + 13]; // ax
126 |     sigmaPoints[baseIdx + 14] += measurement[14] - sigmaPoints[baseIdx + 14]; // ay
127 |     sigmaPoints[baseIdx + 15] += measurement[15] - sigmaPoints[baseIdx + 15]; // az
128 | }
129 | 
130 | __global__ void predictStateMeanKernel(const float* sigmaPoints, const float* weightsMean, float* predictedStateMean, int stateDimension, int sigmaPointCount)
131 | {
132 |     int idx = blockIdx.x * blockDim.x + threadIdx.x;
133 |     if (idx < stateDimension)
134 |     {
135 |         float sum = 0.0f;
136 |         for (int j = 0; j < sigmaPointCount; ++j) sum += sigmaPoints[j * stateDimension + idx] * weightsMean[j];
137 |         predictedStateMean[idx] = sum;
138 |     }
139 | }
140 | 
141 | __global__ void predictStateCovarianceKernel(float* predictedSigmaPoints, float* predictedStateMean, float* weightsCovariance, float* processNoiseCovariance, float* predictedStateCovariance, int n, int sigmaPointCount) {
142 |     int idx = threadIdx.x + blockIdx.x * blockDim.x;
143 |     int jdx = threadIdx.y + blockIdx.y * blockDim.y;
144 | 
145 |     if (idx < n && jdx < n)
146 |     {
147 |         float covSum = 0.0f;
148 |         for (int i = 0; i < sigmaPointCount; ++i)
149 |         {
150 |             float diff_i_j = (predictedSigmaPoints[i * n + idx] - predictedStateMean[idx]) * (predictedSigmaPoints[i * n + jdx] - predictedStateMean[jdx]);
151 |             covSum += weightsCovariance[i] * diff_i_j;
152 |         }
153 |         if (idx == jdx) predictedStateCovariance[idx * n + jdx] = covSum + processNoiseCovariance[idx * n + jdx];
154 |         else predictedStateCovariance[idx * n + jdx] = covSum;
155 |     }
156 | }
157 | 
158 | __global__ void predictMeasurementMeanKernel(const float* sigmaPoints, const float* weightsMean, float* predictedMeasurementMean, int measurementDimension, int sigmaPointCount) {
159 |     int idx = threadIdx.x + blockIdx.x * blockDim.x;
160 |     if (idx < measurementDimension) 
161 |     {
162 |         float mean = 0.0f;
163 |         for (int i = 0; i < sigmaPointCount; ++i) mean += sigmaPoints[i * measurementDimension + idx] * weightsMean[i];
164 |         predictedMeasurementMean[idx] = mean;
165 |     }
166 | }
167 | 
168 | __global__ void computeMeasurementCovarianceKernel(const float* predictedMeasurementSigmaPoints, const float* predictedMeasurementMean, float* predictedMeasurementCovariance, const float* measurementNoiseCovariance, int n, int sigmaPointCount, const float* weightsCovariance) 
169 | {
170 |     int idx = threadIdx.x + blockIdx.x * blockDim.x;
171 |     int jdx = threadIdx.y + blockIdx.y * blockDim.y;
172 | 
173 |     if (idx < n && jdx < n) 
174 |     {
175 |         float covariance = 0.0f;
176 |         for (int k = 0; k < sigmaPointCount; ++k)
177 |         {
178 |             float diff_i = predictedMeasurementSigmaPoints[k * n + idx] - predictedMeasurementMean[idx];
179 |             float diff_j = predictedMeasurementSigmaPoints[k * n + jdx] - predictedMeasurementMean[jdx];
180 |             covariance += weightsCovariance[k] * diff_i * diff_j;
181 |         }
182 | 
183 |         if (idx == jdx) covariance += measurementNoiseCovariance[idx * n + jdx];
184 |         predictedMeasurementCovariance[idx * n + jdx] = covariance;
185 |     }
186 | }
187 | 
188 | __global__ void computeCrossCovarianceKernel(const float* predictedStateSigmaPoints, const float* predictedMeasurementSigmaPoints, const float* predictedStateMean, const float* predictedMeasurementMean, float* crossCovariance, const float* weightsCovariance, int stateDimension, int measurementDimension, int sigmaPointCount)
189 | {
190 |     int idx = threadIdx.x + blockIdx.x * blockDim.x;
191 |     int jdx = threadIdx.y + blockIdx.y * blockDim.y;
192 | 
193 |     if (idx < stateDimension && jdx < measurementDimension)
194 |     {
195 |         float covSum = 0.0f;
196 |         for (int i = 0; i < sigmaPointCount; ++i)
197 |         {
198 |             float diffState = predictedStateSigmaPoints[i * stateDimension + idx] - predictedStateMean[idx];
199 |             float diffMeasurement = predictedMeasurementSigmaPoints[i * measurementDimension + jdx] - predictedMeasurementMean[jdx];
200 |             covSum += weightsCovariance[i] * diffState * diffMeasurement;
201 |         }
202 |         crossCovariance[idx * measurementDimension + jdx] = covSum;
203 |     }
204 | }
205 | 
206 | __global__ void computeKalmanGainKernel(const float* crossCovariance, const float* predictedMeasurementCovarianceInv, float* kalmanGain, int stateDim, int measDim)
207 | {
208 |     int idx = threadIdx.x + blockIdx.x * blockDim.x;
209 |     int jdx = threadIdx.y + blockIdx.y * blockDim.y;
210 | 
211 |     if (idx < stateDim && jdx < measDim) 
212 |     {
213 |         float sum = 0.0;
214 |         for (int k = 0; k < measDim; ++k) sum += crossCovariance[idx * measDim + k] * predictedMeasurementCovarianceInv[k * measDim + jdx];
215 |         kalmanGain[idx * measDim + jdx] = sum;
216 |     }
217 | }
218 | 
219 | __global__ void updateStateWithMeasurementKernel(float* stateMean, const float* kalmanGain, const float* measurementResidual, int stateDim, int measDim) 
220 | {
221 |     int idx = blockIdx.x * blockDim.x + threadIdx.x;
222 |     if (idx < stateDim) 
223 |     {
224 |         float updateValue = 0.0;
225 |         for (int k = 0; k < measDim; ++k) updateValue += kalmanGain[idx * measDim + k] * measurementResidual[k];
226 |         stateMean[idx] += updateValue; // Update the state with the computed value
227 |     }
228 | }
229 | 
230 | __global__ void normalizeQuaternionKernel(float* stateData) 
231 | {
232 |     // Since this operation is lightweight, we'll use a single thread.
233 |     if (threadIdx.x == 0) 
234 |     {
235 |         int quaternionStartIdx = 3; // Starting index of quaternion components in the state vector
236 |         float norm = sqrtf(stateData[quaternionStartIdx] * stateData[quaternionStartIdx] + stateData[quaternionStartIdx + 1] * stateData[quaternionStartIdx + 1] + stateData[quaternionStartIdx + 2] * stateData[quaternionStartIdx + 2] + stateData[quaternionStartIdx + 3] * stateData[quaternionStartIdx + 3]);
237 |         
238 |         // Normalize each component of the quaternion
239 |         for (int i = 0; i < 4; ++i) stateData[quaternionStartIdx + i] /= norm;
240 |     }
241 | }
242 | 
243 | // --------------- GPU Kernels ---------------------- //
244 | 
245 | 
246 | UKF::UKF(const float & alpha, const float & beta, const float & kappa) : rosLogger_(rclcpp::get_logger("cuUKF"))
247 | {
248 |     // Initialize UKF parameters
249 |     this->alpha_ = alpha;
250 |     this->beta_ = beta;
251 |     this->kappa_ = kappa;
252 |     this->lambda_ = this->alpha_ * this->alpha_ * (this->n + this->kappa_) - this->n;
253 | 
254 |     // Initialize prevPose_ to store 3 floats
255 |     this->prevPose_.resize(3, 0.0f);
256 | 
257 |     // cuBLAS handle initialization
258 |     cublasStatus_t cublasStatus = cublasCreate(&this->cublasHandle);
259 |     if (cublasStatus != CUBLAS_STATUS_SUCCESS) RCLCPP_ERROR(this->rosLogger_, "cuBLAS handle creation failed");
260 | 
261 |     // cuSOLVER handle initialization
262 |     cusolverStatus_t cusolverStatus = cusolverDnCreate(&this->cusolverHandle);
263 |     if (cusolverStatus != CUSOLVER_STATUS_SUCCESS) RCLCPP_ERROR(this->rosLogger_, "cuSOLVER handle creation failed");
264 | 
265 |     // Initialize State
266 |     this->state_ = thrust::device_vector<float>(this->n);
267 | 
268 |     // Initialize State Covaraince matrix
269 |     this->P_ = thrust::device_vector<float>(this->n * this->n);
270 | 
271 |     // Initialize Process Covairance matrix
272 |     this->Q_ = thrust::device_vector<float>(this->n * this->n);
273 | 
274 |     // Initalize Measuremnt Covariance matrix
275 |     this->R_ = thrust::device_vector<float>(this->n * this->n);
276 | 
277 |     RCLCPP_INFO(this->rosLogger_, "GPU UKF Initalized!!!");
278 | }
279 | 
280 | UKF::~UKF()
281 | {
282 |     cublasDestroy(this->cublasHandle);
283 |     cusolverDnDestroy(this->cusolverHandle);
284 |     RCLCPP_INFO(this->rosLogger_, "CUDA handles destroyed");
285 | }
286 | 
287 | void UKF::initState(const std::vector<float>& state_vector)
288 | {
289 |     thrust::copy(state_vector.begin(), state_vector.end(), this->state_.begin());
290 | }
291 | 
292 | void UKF::initStateCovarianceMatrix(const float & positionVariance, const float & orientationVariance, const float & linearVelocityVariance, const float & angularVelocityVariance, const float & linearAccelerationVariance)
293 | {
294 |     // Initialize all elements to zero
295 |     thrust::fill(this->P_.begin(), this->P_.end(), 0.0f);
296 | 
297 |     // Set variances for each part of the state
298 |     // Position
299 |     for (int i = 0; i < 3; ++i) this->P_[i * this->n + i] = positionVariance;
300 | 
301 |     // Orientation
302 |     for (int i = 3; i < 7; ++i) this->P_[i * this->n + i] = orientationVariance;
303 | 
304 |     // Linear Velocity
305 |     for (int i = 7; i < 10; ++i) this->P_[i * this->n + i] = linearVelocityVariance;
306 | 
307 |     // Angular Velocity
308 |     for (int i = 10; i < 13; ++i) this->P_[i * this->n + i] = angularVelocityVariance;
309 | 
310 |     // Linear Acceleration
311 |     for (int i = 13; i < 16; ++i) this->P_[i * this->n + i] = linearAccelerationVariance;
312 | }
313 | 
314 | void UKF::initProcessCovarianceMatrix(const float & positionNoise, const float & orientationNoise, const float & linearVelocityNoise, const float & angularVelocityNoise, const float & linearAccelerationNoise)
315 | {
316 |     // Assuming Q_ is a 16x16 matrix for a state vector with 16 elements
317 |     thrust::fill(this->Q_.begin(), this->Q_.end(), 0.0f);
318 | 
319 |     // Position-related noise
320 |     for (int i = 0; i < 3; ++i) this->Q_[i * this->n + i] = positionNoise;
321 | 
322 |     // Orientation-related noise (quaternion)
323 |     for (int i = 3; i < 7; ++i) this->Q_[i * this->n + i] = orientationNoise;
324 | 
325 |     // Linear Velocity Noise
326 |     for (int i = 7; i < 10; ++i) this->Q_[i * this->n + i] = linearVelocityNoise;
327 | 
328 |     // Angular Velocity Noise
329 |     for (int i = 10; i < 13; ++i) this->Q_[i * this->n + i] = angularVelocityNoise;
330 | 
331 |     // Linear Acceleration Noise
332 |     for (int i = 13; i < 16; ++i) this->Q_[i * this->n + i] = linearAccelerationNoise;
333 | }
334 | 
335 | void UKF::updateMeasurementCovarianceMatrix(const float positionVariance, const float orientationVariance, const float velocityVariance, const float angularVelocityVariance, const float linearAccelerationVariance)
336 | {
337 |     // Reset R_ to zeros, assuming R_ is for a 16x16 matrix
338 |     thrust::fill(this->R_.begin(), this->R_.end(), 0.0f);
339 | 
340 |     // Update position covariance
341 |     for (int i = 0; i < 3; ++i) this->R_[i * this->n + i] = positionVariance;
342 |     
343 |     // Update orientation covariance for quaternion components (w, x, y, z)
344 |     for (int i = 3; i < 7; ++i) this->R_[i * this->n + i] = orientationVariance;
345 | 
346 |     // Update linear velocity covariance
347 |     for (int i = 7; i < 10; ++i) this->R_[i * this->n + i] = velocityVariance;
348 | 
349 |     // Update angular velocity covariance
350 |     for (int i = 10; i < 13; ++i) this->R_[i * this->n + i] = angularVelocityVariance;
351 | 
352 |     // Update linear acceleration covariance
353 |     for (int i = 13; i < 16; ++i) this->R_[i * this->n + i] = linearAccelerationVariance;
354 | }
355 | 
356 | thrust::device_vector<float> UKF::scaleCovarianceMatrix() 
357 | {
358 |     // Compute the scale factor using class variables alpha_ and kappa_
359 |     float scale = this->alpha_ * (this->n + this->kappa_);
360 | 
361 |     // Launch the CUDA kernel to scale the covariance matrix P_
362 |     int numThreadsPerBlock = 256; // You can adjust this based on your GPU's capabilities
363 |     int numBlocks = (this->n * this->n + numThreadsPerBlock - 1) / numThreadsPerBlock;
364 | 
365 |     thrust::device_vector<float> P_copy = this->P_;
366 | 
367 |     // Call the CUDA kernel
368 |     scaleCovarianceMatrixKernel<<<numBlocks, numThreadsPerBlock>>>(thrust::raw_pointer_cast(P_copy.data()), this->n, this->alpha_, this->kappa_);
369 | 
370 |     // Synchronize to ensure the kernel has finished execution
371 |     cudaDeviceSynchronize();
372 | 
373 |     return P_copy;
374 | }
375 | 
376 | thrust::device_vector<float> UKF::choleskyDecomposition()
377 | {
378 |     // Check if the cuSOLVER handle is valid
379 |     if (this->cusolverHandle == nullptr) RCLCPP_ERROR(this->rosLogger_, "cuSOLVER handle is not valid. Make sure it was initialized correctly.");
380 | 
381 |     // Create a copy of the state covariance matrix P_ to perform the Cholesky decomposition
382 |     thrust::device_vector<float> P_scaled = this->scaleCovarianceMatrix();
383 | 
384 |     // Declare integer pointer for devInfo
385 |     int* devInfo = nullptr;
386 | 
387 |     // Allocate memory for devInfo
388 |     cudaMalloc((void**)&devInfo, sizeof(int));
389 | 
390 |     // Determine workspace size for Cholesky decomposition
391 |     int lwork = 0;
392 |     cusolverDnSpotrf_bufferSize(this->cusolverHandle, CUBLAS_FILL_MODE_LOWER, this->n, thrust::raw_pointer_cast(P_scaled.data()), this->n, &lwork);
393 | 
394 |     // Allocate workspace
395 |     float* d_work = nullptr;
396 |     cudaMalloc((void**)&d_work, sizeof(float) * lwork);
397 | 
398 |     // Perform Cholesky decomposition
399 |     cusolverStatus_t status = cusolverDnSpotrf(this->cusolverHandle, CUBLAS_FILL_MODE_LOWER, this->n, thrust::raw_pointer_cast(P_scaled.data()), this->n, d_work, lwork, devInfo);
400 | 
401 |     // Check the value of devInfo for any errors
402 |     int devInfo_h = 0;
403 |     cudaMemcpy(&devInfo_h, devInfo, sizeof(int), cudaMemcpyDeviceToHost);
404 |     if (status != CUSOLVER_STATUS_SUCCESS || devInfo_h != 0) RCLCPP_ERROR(this->rosLogger_, "Cholesky decomposition failed.");
405 | 
406 |     // Free the workspace memory and devInfo
407 |     cudaFree(d_work);
408 |     cudaFree(devInfo);
409 | 
410 |     cudaDeviceSynchronize();
411 |     
412 |     return P_scaled;
413 | }
414 | 
415 | void UKF::generateSigmaPoints()
416 | {
417 |     // Ensure sigmaPoints_ is correctly sized
418 |     this->sigmaPoints_.resize((2 * this->n + 1) * this->n);
419 | 
420 |     // Calculate lambda_plus_n
421 |     float lambda_plus_n = this->lambda_ + this->n;
422 | 
423 |     // Set up kernel execution parameters
424 |     int threadsPerBlock = 256;
425 |     int blocksPerGrid = ((2 * this->n + 1) + threadsPerBlock - 1) / threadsPerBlock;
426 | 
427 |     // Get scaled and decomposed State covariance matrix from Cholesky decomposition
428 |     thrust::device_vector<float> P_scaled = this->choleskyDecomposition();
429 | 
430 |     // Launch kernel to generate sigma points, now passing lambda_plus_n as an additional argument
431 |     generateSigmaPointsKernel<<<blocksPerGrid, threadsPerBlock>>>(thrust::raw_pointer_cast(this->sigmaPoints_.data()), thrust::raw_pointer_cast(this->state_.data()), thrust::raw_pointer_cast(P_scaled.data()), this->n, lambda_plus_n);
432 | 
433 |     // Wait for kernel to finish
434 |     cudaDeviceSynchronize();
435 | }
436 | 
437 | void UKF::initializeWeights()
438 | {
439 |     // Ensure the device vectors have the correct size
440 |     this->weightsMean_.resize(2 * this->n + 1);
441 |     this->weightsCovariance_.resize(2 * this->n + 1);
442 | 
443 |     // Calculate grid and block sizes for the kernel launch
444 |     int threadsPerBlock = 256; // You may adjust this based on your GPU's capabilities
445 |     int blocksPerGrid = ((2 * this->n + 1) + threadsPerBlock - 1) / threadsPerBlock; // Ensure enough blocks to cover all elements
446 | 
447 |     // Launch the kernel to initialize the weights on the device
448 |     initializeWeightsKernel<<<blocksPerGrid, threadsPerBlock>>>(thrust::raw_pointer_cast(this->weightsMean_.data()), thrust::raw_pointer_cast(this->weightsCovariance_.data()), this->n, this->lambda_, this->alpha_, this->beta_);
449 | 
450 |     // Synchronize to ensure the kernel has finished execution
451 |     cudaDeviceSynchronize();
452 | }
453 | 
454 | void UKF::updatePrevPose(const std::vector<float>& newPos)
455 | {
456 |     thrust::copy(newPos.begin(), newPos.begin() + 3, this->prevPose_.begin());
457 | }
458 | 
459 | void UKF::updateAllSigmaPoints(const std::vector<float>& measurement, const float dt) 
460 | {
461 |     thrust::device_vector<float> measurement_vector = this->prepareMeasurementVector(measurement, dt);
462 | 
463 |     int threadsPerBlock = 256;
464 |     int blocksPerGrid = ((this->sigmaPointCount) + threadsPerBlock - 1) / threadsPerBlock;
465 | 
466 |     // Launch the unified update kernel
467 |     updateSigmaPointsKernel<<<blocksPerGrid, threadsPerBlock>>>(thrust::raw_pointer_cast(this->sigmaPoints_.data()), thrust::raw_pointer_cast(measurement_vector.data()), this->sigmaPointCount, dt, this->n);
468 |     cudaDeviceSynchronize(); // Wait for the kernel to complete
469 | }
470 | 
471 | void UKF::predictStateMean()
472 | {
473 |     // Ensure the predicted state mean vector is correctly sized
474 |     this->predictedStateMean_.resize(this->n); // Assuming 'n' is the state dimension
475 | 
476 |     int threadsPerBlock = 256;
477 |     int blocksPerGrid = (this->n + threadsPerBlock - 1) / threadsPerBlock;
478 | 
479 |     // Launch the kernel
480 |     predictStateMeanKernel<<<blocksPerGrid, threadsPerBlock>>>(thrust::raw_pointer_cast(this->sigmaPoints_.data()), thrust::raw_pointer_cast(this->weightsMean_.data()), thrust::raw_pointer_cast(this->predictedStateMean_.data()), this->n, this->sigmaPointCount);
481 | 
482 |     // Synchronize to ensure the kernel execution completes
483 |     cudaDeviceSynchronize();
484 | } 
485 | 
486 | void UKF::predictStateCovariance()
487 | {
488 |     this->predictedStateCovariance_.resize(this->n * this->n);
489 |     int threadsPerBlockDim = 16;  // Using a 2D block for matrix operations
490 |     dim3 threadsPerBlock(threadsPerBlockDim, threadsPerBlockDim);
491 |     int blocksPerGridDim = (this->n + threadsPerBlockDim - 1) / threadsPerBlockDim;
492 |     dim3 blocksPerGrid(blocksPerGridDim, blocksPerGridDim);
493 | 
494 |     // Launching the kernel
495 |     predictStateCovarianceKernel<<<blocksPerGrid, threadsPerBlock>>>(thrust::raw_pointer_cast(this->sigmaPoints_.data()), thrust::raw_pointer_cast(this->predictedStateMean_.data()), thrust::raw_pointer_cast(this->weightsCovariance_.data()), thrust::raw_pointer_cast(this->Q_.data()), thrust::raw_pointer_cast(this->predictedStateCovariance_.data()), this->n, this->sigmaPointCount);
496 |     cudaDeviceSynchronize();  // Ensure completion
497 |     cudaMemcpy(thrust::raw_pointer_cast(this->P_.data()), thrust::raw_pointer_cast(this->predictedStateCovariance_.data()), this->n * this->n * sizeof(float), cudaMemcpyDeviceToDevice);
498 | } 
499 | 
500 | thrust::device_vector<float> UKF::scaleMeasurementCovarianceMatrix() 
501 | {
502 |     // Compute the scale factor using class variables alpha_ and kappa_
503 |     float scale = this->alpha_ * (this->n + this->kappa_);
504 | 
505 |     // Launch the CUDA kernel to scale the covariance matrix P_
506 |     int numThreadsPerBlock = 256; // You can adjust this based on your GPU's capabilities
507 |     int numBlocks = (this->n * this->n + numThreadsPerBlock - 1) / numThreadsPerBlock;
508 | 
509 |     thrust::device_vector<float> R_copy = this->R_;
510 | 
511 |     // Call the CUDA kernel
512 |     scaleCovarianceMatrixKernel<<<numBlocks, numThreadsPerBlock>>>(thrust::raw_pointer_cast(R_copy.data()), this->n, this->alpha_, this->kappa_);
513 | 
514 |     // Synchronize to ensure the kernel has finished execution
515 |     cudaDeviceSynchronize();
516 | 
517 |     return R_copy;
518 | }
519 | 
520 | thrust::device_vector<float> UKF::choleskyDecompositionMeasurment()
521 | {
522 |     // Check if the cuSOLVER handle is valid
523 |     if (this->cusolverHandle == nullptr) RCLCPP_ERROR(this->rosLogger_, "cuSOLVER handle is not valid. Make sure it was initialized correctly.");
524 | 
525 |     // Create a copy of the state covariance matrix P_ to perform the Cholesky decomposition
526 |     thrust::device_vector<float> R_scaled = this->scaleMeasurementCovarianceMatrix();
527 | 
528 |     // Declare integer pointer for devInfo
529 |     int* devInfo = nullptr;
530 | 
531 |     // Allocate memory for devInfo
532 |     cudaMalloc((void**)&devInfo, sizeof(int));
533 | 
534 |     // Determine workspace size for Cholesky decomposition
535 |     int lwork = 0;
536 |     cusolverDnSpotrf_bufferSize(this->cusolverHandle, CUBLAS_FILL_MODE_LOWER, this->n, thrust::raw_pointer_cast(R_scaled.data()), this->n, &lwork);
537 | 
538 |     // Allocate workspace
539 |     float* d_work = nullptr;
540 |     cudaMalloc((void**)&d_work, sizeof(float) * lwork);
541 | 
542 |     // Perform Cholesky decomposition
543 |     cusolverStatus_t status = cusolverDnSpotrf(this->cusolverHandle, CUBLAS_FILL_MODE_LOWER, this->n, thrust::raw_pointer_cast(R_scaled.data()), this->n, d_work, lwork, devInfo);
544 | 
545 |     // Check the value of devInfo for any errors
546 |     int devInfo_h = 0;
547 |     cudaMemcpy(&devInfo_h, devInfo, sizeof(int), cudaMemcpyDeviceToHost);
548 |     if (status != CUSOLVER_STATUS_SUCCESS || devInfo_h != 0) RCLCPP_ERROR(this->rosLogger_, "Cholesky decomposition failed.");
549 | 
550 |     // Free the workspace memory and devInfo
551 |     cudaFree(d_work);
552 |     cudaFree(devInfo);
553 | 
554 |     cudaDeviceSynchronize();
555 |     
556 |     return R_scaled;
557 | }
558 | 
559 | void UKF::generateMeasurmentSigmaPoints(const std::vector<float>& measurement, float dt)
560 | {
561 |     // Measurement
562 |     thrust::device_vector<float> newMeasurement= this->prepareMeasurementVector(measurement, dt);
563 | 
564 |     // Ensure sigmaPoints_ is correctly sized
565 |     this->predictedMeasurementSigmaPoints_.resize(this->sigmaPointCount * this->n);
566 | 
567 |     // Calculate lambda_plus_n
568 |     float lambda_plus_n = this->lambda_ + this->n;
569 | 
570 |     // Set up kernel execution parameters
571 |     int threadsPerBlock = 256;
572 |     int blocksPerGrid = ((this->sigmaPointCount) + threadsPerBlock - 1) / threadsPerBlock;
573 | 
574 |     // Get scaled and decomposed State covariance matrix from Cholesky decomposition
575 |     thrust::device_vector<float> R_scaled = this->choleskyDecompositionMeasurment();
576 | 
577 |     // Launch kernel to generate sigma points, now passing lambda_plus_n as an additional argument
578 |     generateSigmaPointsKernel<<<blocksPerGrid, threadsPerBlock>>>(thrust::raw_pointer_cast(this->predictedMeasurementSigmaPoints_.data()), thrust::raw_pointer_cast(newMeasurement.data()), thrust::raw_pointer_cast(R_scaled.data()), this->n, lambda_plus_n);
579 | 
580 |     // Wait for kernel to finish
581 |     cudaDeviceSynchronize();
582 | }
583 | 
584 | void UKF::predictMeasurementMean() 
585 | {
586 |     // Assuming 'n' represents the dimension of your measurement vector
587 |     this->predictedMeasurementMean_.resize(this->n, 0.0f);  // Resize each time to ensure correct size
588 | 
589 |     // Setup kernel launch parameters
590 |     int threadsPerBlock = 256;
591 |     int blocksPerGrid = (this->n + threadsPerBlock - 1) / threadsPerBlock;
592 | 
593 |     // Call the kernel
594 |     predictMeasurementMeanKernel<<<blocksPerGrid, threadsPerBlock>>>(thrust::raw_pointer_cast(this->predictedMeasurementSigmaPoints_.data()), thrust::raw_pointer_cast(this->weightsMean_.data()), thrust::raw_pointer_cast(this->predictedMeasurementMean_.data()), this->n, this->sigmaPointCount);
595 | 
596 |     cudaDeviceSynchronize();  // Ensure completion
597 | }
598 | 
599 | 
600 | void UKF::computeMeasurementCovariance() 
601 | {
602 |     // Resize and initialize the predictedMeasurementCovariance_ vector
603 |     this->predictedMeasurementCovariance_.resize(this->n * this->n, 0.0f);
604 | 
605 |     dim3 threadsPerBlock(16, 16); // 2D block for matrix operations
606 |     dim3 blocksPerGrid((this->n + threadsPerBlock.x - 1) / threadsPerBlock.x, (this->n + threadsPerBlock.y - 1) / threadsPerBlock.y);
607 | 
608 |     computeMeasurementCovarianceKernel<<<blocksPerGrid, threadsPerBlock>>>(thrust::raw_pointer_cast(this->predictedMeasurementSigmaPoints_.data()), thrust::raw_pointer_cast(this->predictedMeasurementMean_.data()), thrust::raw_pointer_cast(this->predictedMeasurementCovariance_.data()), thrust::raw_pointer_cast(this->R_.data()), this->n, this->sigmaPointCount, thrust::raw_pointer_cast(this->weightsCovariance_.data()));
609 | 
610 |     cudaDeviceSynchronize(); // Ensure the kernel execution completes
611 | }
612 | 
613 | void UKF::computeCrossCovariance()
614 | {
615 |     // Make sure the crossCovariance_ is resized appropriately
616 |     this->crossCovariance_.resize(this->n * this->n, 0.0f); // Ensures it's correctly sized every time the function is called
617 | 
618 |     dim3 threadsPerBlock(16, 16); // Or other suitable configuration
619 |     dim3 blocksPerGrid((this->n + threadsPerBlock.x - 1) / threadsPerBlock.x, (this->n + threadsPerBlock.y - 1) / threadsPerBlock.y);
620 | 
621 |     // Launch the kernel
622 |     computeCrossCovarianceKernel<<<blocksPerGrid, threadsPerBlock>>>(thrust::raw_pointer_cast(this->sigmaPoints_.data()), thrust::raw_pointer_cast(this->predictedMeasurementSigmaPoints_.data()), thrust::raw_pointer_cast(this->predictedStateMean_.data()), thrust::raw_pointer_cast(this->predictedMeasurementMean_.data()), thrust::raw_pointer_cast(this->crossCovariance_.data()), thrust::raw_pointer_cast(this->weightsCovariance_.data()), this->n, this->n, this->sigmaPointCount);
623 | 
624 |     cudaDeviceSynchronize(); // Wait for CUDA to finish
625 | }
626 | 
627 | thrust::device_vector<float> UKF::prepareMeasurementVector(const std::vector<float>& measurement, float dt)
628 | {
629 |     std::vector<float> updatedMeasurement = measurement;
630 | 
631 |     // Compute derived velocity for each position dimension and update the measurement vector
632 |     // Assuming the positions are stored at indices 0, 1, 2, and derived velocities should be placed at indices 7, 8, 9 for example
633 |     for (size_t i = 0; i < 3; ++i)
634 |     {
635 |         float derivedVelocity = (measurement[i] - prevPose_[i]) / dt;
636 |         updatedMeasurement[i + 7] = derivedVelocity; // Update with derived velocity
637 |     }
638 | 
639 |     // Convert updatedMeasurement to device_vector and return
640 |     return thrust::device_vector<float>(updatedMeasurement.begin(), updatedMeasurement.end());
641 | }
642 | 
643 | 
644 | thrust::device_vector<float> UKF::computeInverseMeasurementCovariance()
645 | {
646 |     thrust::device_vector<float> predictedMeasurementCovarianceInv(this->n * this->n); // Allocate space for the inverse matrix
647 | 
648 |     // Make a copy of the predictedMeasurementCovariance_ to preserve the original
649 |     thrust::device_vector<float> predictedMeasurementCovarianceCopy = this->predictedMeasurementCovariance_;
650 | 
651 |     int *d_ipiv; // Pivot indices for LU decomposition
652 |     int *d_info; // Info about the execution
653 |     float *d_work; // Workspace for getrf
654 |     int lwork = 0; // Size of workspace
655 |     cusolverDnHandle_t cusolverHandle = this->cusolverHandle;
656 | 
657 |     cudaMalloc(&d_ipiv, this->n * sizeof(int));
658 |     cudaMalloc(&d_info, sizeof(int));
659 | 
660 |     // Step 1: Query working space of getrf
661 |     cusolverDnSgetrf_bufferSize(cusolverHandle, this->n, this->n, thrust::raw_pointer_cast(predictedMeasurementCovarianceCopy.data()), this->n, &lwork);
662 |     cudaMalloc(&d_work, lwork * sizeof(float));
663 | 
664 |     // Step 2: LU decomposition on the copy
665 |     cusolverDnSgetrf(cusolverHandle, this->n, this->n, thrust::raw_pointer_cast(predictedMeasurementCovarianceCopy.data()), this->n, d_work, d_ipiv, d_info);
666 | 
667 |     // Prepare identity matrix as the right-hand side
668 |     thrust::device_vector<float> identity(this->n * this->n, 0);
669 |     for (int i = 0; i < this->n; ++i) identity[i * this->n + i] = 1.0f;
670 | 
671 |     // Step 3: Solve linear equations for each column of the identity matrix to compute the inverse
672 |     for (int i = 0; i < this->n; ++i) 
673 |     {
674 |         float* column = thrust::raw_pointer_cast(&identity[i * this->n]);
675 |         cusolverDnSgetrs(cusolverHandle, CUBLAS_OP_N, this->n, 1, thrust::raw_pointer_cast(predictedMeasurementCovarianceCopy.data()), this->n, d_ipiv, column, this->n, d_info);
676 |     }
677 | 
678 |     // Copy the inverse from identity matrix to predictedMeasurementCovarianceInv
679 |     thrust::copy(identity.begin(), identity.end(), predictedMeasurementCovarianceInv.begin());
680 | 
681 |     // Cleanup
682 |     cudaFree(d_ipiv);
683 |     cudaFree(d_info);
684 |     cudaFree(d_work);
685 | 
686 |     return predictedMeasurementCovarianceInv;
687 | }
688 | 
689 | void UKF::computeKalmanGain() 
690 | {
691 |     // Ensure kalmanGain_ is correctly sized
692 |     this->kalmanGain_.resize(this->n * this->n, 0.0f);
693 | 
694 |     // Compute the inverse of the predicted measurement covariance matrix
695 |     thrust::device_vector<float> predictedMeasurementCovarianceInv = this->computeInverseMeasurementCovariance();
696 | 
697 |     // Setup kernel launch parameters
698 |     dim3 threadsPerBlock(16, 16);
699 |     dim3 blocksPerGrid((this->n + threadsPerBlock.x - 1) / threadsPerBlock.x, (this->n + threadsPerBlock.y - 1) / threadsPerBlock.y);
700 | 
701 |     // Launch the kernel
702 |     computeKalmanGainKernel<<<blocksPerGrid, threadsPerBlock>>>(thrust::raw_pointer_cast(this->crossCovariance_.data()), thrust::raw_pointer_cast(predictedMeasurementCovarianceInv.data()), thrust::raw_pointer_cast(this->kalmanGain_.data()), this->n, this->n);
703 |     cudaDeviceSynchronize(); // Wait for CUDA to finish
704 | }
705 | 
706 | void UKF::updateStateWithMeasurement(const std::vector<float>& measurement, float dt) 
707 | {
708 |     thrust::device_vector<float> actualMeasurement = this->prepareMeasurementVector(measurement, dt);
709 |     thrust::device_vector<float> measurementResidual(this->n); // Ensure measDim is defined correctly
710 |     thrust::transform(actualMeasurement.begin(), actualMeasurement.end(), this->predictedMeasurementMean_.begin(), measurementResidual.begin(), thrust::minus<float>());
711 | 
712 |     // Update the state with the measurement residual
713 |     // blockSize and numBlocks need to be defined appropriately
714 |     const int blockSize = 256; // A common choice for many CUDA-capable GPUs
715 |     const int numBlocks = (this->n + blockSize - 1) / blockSize;
716 |     updateStateWithMeasurementKernel<<<numBlocks, blockSize>>>(thrust::raw_pointer_cast(this->state_.data()), thrust::raw_pointer_cast(this->kalmanGain_.data()), thrust::raw_pointer_cast(measurementResidual.data()), this->n, this->n);
717 |     cudaDeviceSynchronize(); // Ensure kernel execution completion
718 | 
719 |     normalizeQuaternionKernel<<<1, 1>>>(thrust::raw_pointer_cast(this->state_.data()));
720 |     cudaDeviceSynchronize(); // Ensure the normalization completes before proceeding
721 | }
722 | 
723 | void UKF::updateStateCovariance()
724 | {
725 |     // Step 1: Calculate K * R
726 |     thrust::device_vector<float> KR(this->n * this->n); // Adjust dimensions if necessary
727 |     float* d_KR = thrust::raw_pointer_cast(KR.data());
728 | 
729 |     cublasSgemm(this->cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N, this->n, this->n, this->n, &(this->alpha_), thrust::raw_pointer_cast(this->kalmanGain_.data()), this->n, thrust::raw_pointer_cast(this->predictedMeasurementCovariance_.data()), this->n, &(this->beta_), d_KR, this->n);
730 | 
731 |     // Step 2: Calculate (K * R) * K^T
732 |     thrust::device_vector<float> KRKT(this->n * this->n);
733 |     float* d_KRKT = thrust::raw_pointer_cast(KRKT.data());
734 | 
735 |     cublasSgemm(this->cublasHandle, CUBLAS_OP_N, CUBLAS_OP_T, this->n, this->n, this->n, &(this->alpha_), d_KR, this->n, thrust::raw_pointer_cast(this->kalmanGain_.data()), this->n, &(this->beta_), d_KRKT, this->n);
736 | 
737 |     // Step 3: Update P = P - (K * R) * K^T
738 |     // Negate KRKT for subtraction
739 |     thrust::transform(KRKT.begin(), KRKT.end(), KRKT.begin(), thrust::negate<float>());
740 | 
741 |     // Add the negated (K * R) * K^T to P
742 |     cublasSaxpy(this->cublasHandle, this->n * this->n, &(this->alpha_), d_KRKT, 1, thrust::raw_pointer_cast(this->P_.data()), 1);
743 | }
744 | 
745 | void UKF::updateFilter(const std::vector<float>& measurement, const float dt, const float positionVariance, const float orientationVariance, const float velocityVariance, const float angularVelocityVariance, const float linearAccelerationVariance)
746 | {
747 |     this->updateAllSigmaPoints(measurement, dt);
748 |     this->predictStateMean();
749 |     this->predictStateCovariance();
750 |     this->generateMeasurmentSigmaPoints(measurement, dt);
751 |     this->updateMeasurementCovarianceMatrix(positionVariance, orientationVariance, velocityVariance, angularVelocityVariance, linearAccelerationVariance);
752 |     this->predictMeasurementMean();
753 |     this->computeMeasurementCovariance();
754 |     this->computeCrossCovariance();
755 |     this->computeKalmanGain();
756 |     this->updateStateWithMeasurement(measurement, dt);
757 |     this->updateStateCovariance();
758 | }
759 | 
760 | std::vector<float> UKF::getState()
761 | {
762 |     // Create a standard vector with the same size as the device vector
763 |     std::vector<float> stateHost(this->state_.size());
764 | 
765 |     // Copy data from device vector to host vector
766 |     thrust::copy(this->state_.begin(), this->state_.end(), stateHost.begin());
767 | 
768 |     return stateHost;
769 | }
770 | 
771 | std::vector<float> UKF::getStateCovarianceMatrix()
772 | {
773 |     // Allocate a std::vector<float> with the appropriate size
774 |     std::vector<float> state_covariance_matrix(this->n * this->n);
775 | 
776 |     // Copy the data from the device_vector to the std::vector
777 |     thrust::copy(this->P_.begin(), this->P_.end(), state_covariance_matrix.begin());
778 | 
779 |     return state_covariance_matrix;
780 | }
781 | 
782 | void UKF::printStateInfo() 
783 | {
784 |     std::stringstream ss;
785 | 
786 |     // State vector
787 |     ss << "State: ";
788 |     for (size_t i = 0; i < this->state_.size(); ++i) {
789 |         ss << this->state_[i] << " ";
790 |     }
791 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
792 |     ss.str("");
793 |     
794 |     // State Covariance matrix
795 |     ss << "State Covariance matrix: ";
796 |     for (size_t i = 0; i < this->P_.size(); ++i) {
797 |         ss << this->P_[i] << " ";
798 |     }
799 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
800 |     ss.str("");
801 | 
802 |     // Process Covariance matrix
803 |     ss << "Process Covariance matrix: ";
804 |     for (size_t i = 0; i < this->Q_.size(); ++i) {
805 |         ss << this->Q_[i] << " ";
806 |     }
807 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
808 |     ss.str("");
809 | 
810 |     // Measurement Covariance matrix
811 |     ss << "Measurement Covariance matrix: ";
812 |     for (size_t i = 0; i < this->R_.size(); ++i) {
813 |         ss << this->R_[i] << " ";
814 |     }
815 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
816 |     ss.str("");
817 | 
818 |     // Sigma Points matrix
819 |     ss << "Sigma Points matrix: ";
820 |     for (size_t i = 0; i < this->sigmaPoints_.size(); ++i) {
821 |         ss << this->sigmaPoints_[i] << " ";
822 |     }
823 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
824 |     ss.str("");
825 | 
826 |     // Weights for sigma points (Mean)
827 |     ss << "Weights for sigma points (Mean): ";
828 |     for (size_t i = 0; i < this->weightsMean_.size(); ++i) {
829 |         ss << this->weightsMean_[i] << " ";
830 |     }
831 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
832 |     ss.str("");
833 | 
834 |     // Weights for sigma points (Covariance)
835 |     ss << "Weights for sigma points (Covariance): ";
836 |     for (size_t i = 0; i < this->weightsCovariance_.size(); ++i) {
837 |         ss << this->weightsCovariance_[i] << " ";
838 |     }
839 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
840 |     ss.str("");
841 |     
842 |     // Predicted State Mean
843 |     ss << "Predicted State Mean: ";
844 |     for (size_t i = 0; i < this->predictedStateMean_.size(); ++i) {
845 |         ss << this->predictedStateMean_[i] << " ";
846 |     }
847 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
848 |     ss.str("");
849 | 
850 |     // Predicted State Covariance
851 |     ss << "Predicted State Covariance: ";
852 |     for (size_t i = 0; i < this->predictedStateCovariance_.size(); ++i) {
853 |         ss << this->predictedStateCovariance_[i] << " ";
854 |     }
855 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
856 |     ss.str("");
857 |     
858 |     // Predicted Measurement Sigma Points
859 |     ss << "Predicted Measurement Sigma Points: ";
860 |     for (size_t i = 0; i < this->predictedMeasurementSigmaPoints_.size(); ++i) {
861 |         ss << this->predictedMeasurementSigmaPoints_[i] << " ";
862 |     }
863 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
864 |     ss.str("");
865 |     
866 |     // Predicted Measurement Mean
867 |     ss << "Predicted Measurement Mean: ";
868 |     for (size_t i = 0; i < this->predictedMeasurementMean_.size(); ++i) {
869 |         ss << this->predictedMeasurementMean_[i] << " ";
870 |     }
871 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
872 |     ss.str("");
873 |     
874 |     // Predicted Measurement Covariance
875 |     ss << "Predicted Measurement Covariance: ";
876 |     for (size_t i = 0; i < this->predictedMeasurementCovariance_.size(); ++i) {
877 |         ss << this->predictedMeasurementCovariance_[i] << " ";
878 |     }
879 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
880 |     ss.str("");
881 |     
882 |     // Cross-Covariance matrix between state and measurement
883 |     ss << "Cross-Covariance matrix between state and measurement: ";
884 |     for (size_t i = 0; i < this->crossCovariance_.size(); ++i) {
885 |         ss << this->crossCovariance_[i] << " ";
886 |     }
887 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
888 |     ss.str("");
889 |     
890 |     // Kalman Gain
891 |     ss << "Kalman Gain: ";
892 |     for (size_t i = 0; i < this->kalmanGain_.size(); ++i) {
893 |         ss << this->kalmanGain_[i] << " ";
894 |     }
895 |     RCLCPP_INFO(this->rosLogger_, ss.str().c_str());
896 | }


--------------------------------------------------------------------------------