├── .clang-format ├── .github └── workflows │ ├── build.yml │ └── issue_response.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── data ├── 251370668.pcd ├── 251371071.pcd ├── proctime.png └── relative.txt ├── docker ├── focal │ └── Dockerfile ├── focal_cuda │ └── Dockerfile ├── kinetic │ └── Dockerfile ├── melodic │ └── Dockerfile ├── melodic_llvm │ └── Dockerfile ├── noetic │ └── Dockerfile └── noetic_llvm │ └── Dockerfile ├── include └── fast_gicp │ ├── cuda │ ├── brute_force_knn.cuh │ ├── compute_derivatives.cuh │ ├── compute_mahalanobis.cuh │ ├── covariance_estimation.cuh │ ├── covariance_regularization.cuh │ ├── fast_vgicp_cuda.cuh │ ├── find_voxel_correspondences.cuh │ ├── gaussian_voxelmap.cuh │ ├── ndt_compute_derivatives.cuh │ ├── ndt_cuda.cuh │ └── vector3_hash.cuh │ ├── gicp │ ├── experimental │ │ ├── fast_gicp_mp.hpp │ │ └── fast_gicp_mp_impl.hpp │ ├── fast_gicp.hpp │ ├── fast_gicp_st.hpp │ ├── fast_vgicp.hpp │ ├── fast_vgicp_cuda.hpp │ ├── fast_vgicp_voxel.hpp │ ├── gicp_settings.hpp │ ├── impl │ │ ├── fast_gicp_impl.hpp │ │ ├── fast_gicp_st_impl.hpp │ │ ├── fast_vgicp_cuda_impl.hpp │ │ ├── fast_vgicp_impl.hpp │ │ └── lsq_registration_impl.hpp │ └── lsq_registration.hpp │ ├── ndt │ ├── impl │ │ └── ndt_cuda_impl.hpp │ ├── ndt_cuda.hpp │ └── ndt_settings.hpp │ └── so3 │ └── so3.hpp ├── package.xml ├── scripts └── runtest_cuda.sh ├── setup.py └── src ├── align.cpp ├── fast_gicp ├── cuda │ ├── brute_force_knn.cu │ ├── compute_derivatives.cu │ ├── compute_mahalanobis.cu │ ├── covariance_estimation.cu │ ├── covariance_estimation_rbf.cu │ ├── covariance_regularization.cu │ ├── fast_vgicp_cuda.cu │ ├── find_voxel_correspondences.cu │ ├── gaussian_voxelmap.cu │ ├── ndt_compute_derivatives.cu │ └── ndt_cuda.cu ├── gicp │ ├── experimental │ │ └── fast_gicp_mp.cpp │ ├── fast_gicp.cpp │ ├── fast_gicp_st.cpp │ ├── fast_vgicp.cpp │ ├── fast_vgicp_cuda.cpp │ └── lsq_registration.cpp └── ndt │ └── ndt_cuda.cpp ├── kitti.cpp ├── kitti.py ├── python └── main.cpp └── test └── gicp_test.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: Google 4 | AccessModifierOffset: -2 5 | AlignAfterOpenBracket: AlwaysBreak 6 | AlignConsecutiveMacros: false 7 | AlignConsecutiveAssignments: false 8 | AlignConsecutiveDeclarations: false 9 | AlignEscapedNewlines: Left 10 | AlignOperands: true 11 | AlignTrailingComments: true 12 | AllowAllArgumentsOnNextLine: false 13 | AllowAllConstructorInitializersOnNextLine: false 14 | AllowAllParametersOfDeclarationOnNextLine: false 15 | AllowShortBlocksOnASingleLine: Never 16 | AllowShortCaseLabelsOnASingleLine: false 17 | AllowShortFunctionsOnASingleLine: Inline 18 | AllowShortLambdasOnASingleLine: All 19 | AllowShortIfStatementsOnASingleLine: WithoutElse 20 | AllowShortLoopsOnASingleLine: true 21 | AlwaysBreakAfterDefinitionReturnType: None 22 | AlwaysBreakAfterReturnType: None 23 | AlwaysBreakBeforeMultilineStrings: true 24 | AlwaysBreakTemplateDeclarations: Yes 25 | BinPackArguments: false 26 | BinPackParameters: false 27 | BraceWrapping: 28 | AfterCaseLabel: false 29 | AfterClass: false 30 | AfterControlStatement: false 31 | AfterEnum: false 32 | AfterFunction: false 33 | AfterNamespace: false 34 | AfterObjCDeclaration: false 35 | AfterStruct: false 36 | AfterUnion: false 37 | AfterExternBlock: false 38 | BeforeCatch: false 39 | BeforeElse: false 40 | IndentBraces: false 41 | SplitEmptyFunction: true 42 | SplitEmptyRecord: true 43 | SplitEmptyNamespace: true 44 | BreakBeforeBinaryOperators: None 45 | BreakBeforeBraces: Attach 46 | BreakBeforeInheritanceComma: false 47 | BreakInheritanceList: BeforeColon 48 | BreakBeforeTernaryOperators: true 49 | BreakConstructorInitializersBeforeComma: false 50 | BreakConstructorInitializers: BeforeColon 51 | BreakAfterJavaFieldAnnotations: false 52 | BreakStringLiterals: true 53 | ColumnLimit: 180 54 | CommentPragmas: '^ IWYU pragma:' 55 | CompactNamespaces: false 56 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 57 | ConstructorInitializerIndentWidth: 0 58 | ContinuationIndentWidth: 2 59 | Cpp11BracedListStyle: true 60 | DeriveLineEnding: true 61 | DerivePointerAlignment: false 62 | DisableFormat: false 63 | ExperimentalAutoDetectBinPacking: false 64 | FixNamespaceComments: true 65 | ForEachMacros: 66 | - foreach 67 | - Q_FOREACH 68 | - BOOST_FOREACH 69 | IncludeBlocks: Regroup 70 | IncludeCategories: 71 | - Regex: '^' 72 | Priority: 2 73 | SortPriority: 0 74 | - Regex: '^<.*\.h>' 75 | Priority: 1 76 | SortPriority: 0 77 | - Regex: '^<.*' 78 | Priority: 2 79 | SortPriority: 0 80 | - Regex: '.*' 81 | Priority: 3 82 | SortPriority: 0 83 | IncludeIsMainRegex: '([-_](test|unittest))?$' 84 | IncludeIsMainSourceRegex: '' 85 | IndentCaseLabels: true 86 | IndentGotoLabels: true 87 | IndentPPDirectives: None 88 | IndentWidth: 2 89 | IndentWrappedFunctionNames: false 90 | JavaScriptQuotes: Leave 91 | JavaScriptWrapImports: true 92 | KeepEmptyLinesAtTheStartOfBlocks: false 93 | MacroBlockBegin: '' 94 | MacroBlockEnd: '' 95 | MaxEmptyLinesToKeep: 1 96 | NamespaceIndentation: None 97 | ObjCBinPackProtocolList: Never 98 | ObjCBlockIndentWidth: 2 99 | ObjCSpaceAfterProperty: false 100 | ObjCSpaceBeforeProtocolList: true 101 | PenaltyBreakAssignment: 2 102 | PenaltyBreakBeforeFirstCallParameter: 1 103 | PenaltyBreakComment: 300 104 | PenaltyBreakFirstLessLess: 120 105 | PenaltyBreakString: 1000 106 | PenaltyBreakTemplateDeclaration: 10 107 | PenaltyExcessCharacter: 1000000 108 | PenaltyReturnTypeOnItsOwnLine: 200 109 | PointerAlignment: Left 110 | RawStringFormats: 111 | - Language: Cpp 112 | Delimiters: 113 | - cc 114 | - CC 115 | - cpp 116 | - Cpp 117 | - CPP 118 | - 'c++' 119 | - 'C++' 120 | CanonicalDelimiter: '' 121 | BasedOnStyle: google 122 | - Language: TextProto 123 | Delimiters: 124 | - pb 125 | - PB 126 | - proto 127 | - PROTO 128 | EnclosingFunctions: 129 | - EqualsProto 130 | - EquivToProto 131 | - PARSE_PARTIAL_TEXT_PROTO 132 | - PARSE_TEST_PROTO 133 | - PARSE_TEXT_PROTO 134 | - ParseTextOrDie 135 | - ParseTextProtoOrDie 136 | CanonicalDelimiter: '' 137 | BasedOnStyle: google 138 | ReflowComments: true 139 | SortIncludes: false 140 | SortUsingDeclarations: true 141 | SpaceAfterCStyleCast: false 142 | SpaceAfterLogicalNot: false 143 | SpaceAfterTemplateKeyword: true 144 | SpaceBeforeAssignmentOperators: true 145 | SpaceBeforeCpp11BracedList: false 146 | SpaceBeforeCtorInitializerColon: true 147 | SpaceBeforeInheritanceColon: true 148 | SpaceBeforeParens: ControlStatements 149 | SpaceBeforeRangeBasedForLoopColon: true 150 | SpaceInEmptyBlock: false 151 | SpaceInEmptyParentheses: false 152 | SpacesBeforeTrailingComments: 2 153 | SpacesInAngles: false 154 | SpacesInConditionalStatement: false 155 | SpacesInContainerLiterals: true 156 | SpacesInCStyleCastParentheses: false 157 | SpacesInParentheses: false 158 | SpacesInSquareBrackets: false 159 | SpaceBeforeSquareBrackets: false 160 | Standard: Auto 161 | StatementMacros: 162 | - Q_UNUSED 163 | - QT_REQUIRE_VERSION 164 | TabWidth: 8 165 | UseCRLF: false 166 | UseTab: Never 167 | ... 168 | 169 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | ROS_DISTRO: [focal, focal_cuda, melodic, melodic_llvm, noetic, noetic_llvm] 18 | 19 | steps: 20 | - uses: actions/checkout@v2 21 | 22 | - name: Docker login 23 | continue-on-error: true 24 | uses: docker/login-action@v1 25 | with: 26 | username: ${{ secrets.DOCKER_USERNAME }} 27 | password: ${{ secrets.DOCKER_TOKEN }} 28 | 29 | - name: Docker build 30 | uses: docker/build-push-action@v2 31 | with: 32 | file: ${{github.workspace}}/docker/${{ matrix.ROS_DISTRO }}/Dockerfile 33 | context: . 34 | push: false 35 | -------------------------------------------------------------------------------- /.github/workflows/issue_response.yml: -------------------------------------------------------------------------------- 1 | on: 2 | fork: 3 | issues: 4 | types: [opened] 5 | 6 | jobs: 7 | welcome: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Leave Comment 11 | run: gh issue comment "$NUMBER" --body "$BODY" 12 | env: 13 | GH_TOKEN: ${{ github.token }} 14 | GH_REPO: ${{ github.repository }} 15 | NUMBER: ${{ github.event.issue.number }} 16 | BODY: > 17 | Sorry!! We are reducing the support for fast_gicp. 18 | Please consider using [small_gicp](https://github.com/koide3/small_gicp), a new faster library with better portability. 19 | Meanwhile, feel free to open a PR if you have any improvements that can be merged into fast_gicp. 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/* 2 | build/* 3 | dist/* 4 | 5 | *.so 6 | *.py[cod] 7 | *.egg-info 8 | README.html 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "thidparty/Sophus"] 2 | path = thirdparty/Sophus 3 | url = https://github.com/strasdat/Sophus.git 4 | [submodule "thirdparty/Eigen"] 5 | path = thirdparty/Eigen 6 | url = https://gitlab.com/libeigen/eigen.git 7 | [submodule "thirdparty/nvbio"] 8 | path = thirdparty/nvbio 9 | url = https://github.com/NVlabs/nvbio.git 10 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10.0) 2 | project(fast_gicp) 3 | 4 | option(BUILD_VGICP_CUDA "Build GPU-powered VGICP" OFF) 5 | option(BUILD_apps "Build application programs" ON) 6 | option(BUILD_test "Build test programs" OFF) 7 | option(BUILD_PYTHON_BINDINGS "Build python bindings" OFF) 8 | 9 | set(CMAKE_BUILD_TYPE "Release") 10 | 11 | find_package(PCL REQUIRED) 12 | find_package(Eigen3 REQUIRED) 13 | add_definitions(${PCL_DEFINITIONS}) 14 | 15 | if(DEFINED ENV{ROS_VERSION}) 16 | set(ROS_VERSION $ENV{ROS_VERSION}) 17 | endif() 18 | 19 | if(NOT BUILD_PYTHON_BINDINGS) 20 | if(${ROS_VERSION}) 21 | if(${ROS_VERSION} EQUAL 1) 22 | find_package(catkin) 23 | elseif (${ROS_VERSION} EQUAL 2) 24 | find_package(ament_cmake) 25 | endif() 26 | endif() 27 | endif() 28 | 29 | find_package(OpenMP) 30 | if (OPENMP_FOUND) 31 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") 32 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 33 | endif() 34 | 35 | if(BUILD_VGICP_CUDA) 36 | find_package(CUDA REQUIRED) 37 | include_directories(${CUDA_INCLUDE_DIRS}) 38 | link_directories(${CUDA_LIBRARY_DIRS}) 39 | endif() 40 | 41 | ################################### 42 | ## catkin specific configuration ## 43 | ################################### 44 | if(catkin_FOUND) 45 | catkin_package( 46 | INCLUDE_DIRS include 47 | LIBRARIES fast_gicp 48 | ) 49 | endif() 50 | 51 | ########### 52 | ## Build ## 53 | ########### 54 | 55 | add_library(fast_gicp SHARED 56 | src/fast_gicp/gicp/lsq_registration.cpp 57 | src/fast_gicp/gicp/fast_gicp.cpp 58 | src/fast_gicp/gicp/fast_gicp_st.cpp 59 | src/fast_gicp/gicp/fast_vgicp.cpp 60 | ) 61 | target_link_libraries(fast_gicp 62 | ${PCL_LIBRARIES} 63 | ) 64 | if (OPENMP_FOUND) 65 | if (TARGET OpenMP::OpenMP_CXX) 66 | target_link_libraries(fast_gicp OpenMP::OpenMP_CXX) 67 | endif () 68 | endif () 69 | target_include_directories(fast_gicp PUBLIC 70 | include 71 | ${PCL_INCLUDE_DIRS} 72 | ${EIGEN3_INCLUDE_DIR} 73 | ) 74 | 75 | ### APPS ### 76 | if(BUILD_apps) 77 | add_executable(gicp_align src/align.cpp) 78 | add_dependencies(gicp_align fast_gicp) 79 | target_link_libraries(gicp_align 80 | ${PCL_LIBRARIES} 81 | fast_gicp 82 | ) 83 | 84 | add_executable(gicp_kitti src/kitti.cpp) 85 | add_dependencies(gicp_kitti fast_gicp) 86 | target_link_libraries(gicp_kitti 87 | ${PCL_LIBRARIES} 88 | fast_gicp 89 | ) 90 | endif() 91 | 92 | ### Python bindings ### 93 | if(BUILD_PYTHON_BINDINGS) 94 | find_package(pybind11 CONFIG) 95 | pybind11_add_module(pygicp 96 | src/python/main.cpp 97 | ) 98 | target_include_directories(pygicp PUBLIC 99 | include 100 | ${PCL_INCLUDE_DIRS} 101 | ${EIGEN3_INCLUDE_DIR} 102 | ) 103 | target_link_libraries(pygicp PRIVATE 104 | fast_gicp 105 | ) 106 | set_target_properties(pygicp PROPERTIES 107 | INSTALL_RPATH "$ORIGIN/" 108 | BUILD_WITH_INSTALL_RPATH TRUE 109 | ) 110 | endif() 111 | 112 | ### CUDA ### 113 | if(BUILD_VGICP_CUDA) 114 | set(CUDA_NVCC_FLAGS "--expt-relaxed-constexpr") 115 | add_definitions(-DUSE_VGICP_CUDA) 116 | 117 | cuda_add_library(fast_vgicp_cuda SHARED 118 | src/fast_gicp/cuda/fast_vgicp_cuda.cu 119 | src/fast_gicp/cuda/brute_force_knn.cu 120 | src/fast_gicp/cuda/covariance_estimation.cu 121 | src/fast_gicp/cuda/covariance_estimation_rbf.cu 122 | src/fast_gicp/cuda/covariance_regularization.cu 123 | src/fast_gicp/cuda/gaussian_voxelmap.cu 124 | src/fast_gicp/cuda/find_voxel_correspondences.cu 125 | src/fast_gicp/cuda/compute_derivatives.cu 126 | src/fast_gicp/cuda/compute_mahalanobis.cu 127 | src/fast_gicp/cuda/ndt_cuda.cu 128 | src/fast_gicp/cuda/ndt_compute_derivatives.cu 129 | ) 130 | target_include_directories(fast_vgicp_cuda PRIVATE 131 | include 132 | thirdparty/Eigen 133 | thirdparty/nvbio 134 | ${catkin_INCLUDE_DIRS} 135 | ) 136 | target_link_libraries(fast_vgicp_cuda 137 | ${catkin_LIBRARIES} 138 | ) 139 | cuda_add_cublas_to_target(fast_vgicp_cuda) 140 | 141 | # add vgicp_cuda to libfast_gicp 142 | target_sources(fast_gicp PRIVATE 143 | src/fast_gicp/gicp/fast_vgicp_cuda.cpp 144 | src/fast_gicp/ndt/ndt_cuda.cpp 145 | ) 146 | target_link_libraries(fast_gicp 147 | fast_vgicp_cuda 148 | ) 149 | add_dependencies(fast_gicp fast_vgicp_cuda) 150 | if(catkin_FOUND) 151 | install(TARGETS fast_vgicp_cuda 152 | LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}) 153 | elseif (ament_cmake_FOUND) 154 | install(TARGETS fast_vgicp_cuda 155 | LIBRARY DESTINATION lib) 156 | endif() 157 | endif() 158 | 159 | ### TEST ### 160 | if(BUILD_test) 161 | find_package(GTest REQUIRED) 162 | 163 | add_executable(gicp_test src/test/gicp_test.cpp) 164 | add_dependencies(gicp_test fast_gicp) 165 | target_link_libraries(gicp_test ${GTEST_LIBRARIES} ${PCL_LIBRARIES} fast_gicp) 166 | gtest_add_tests(TARGET gicp_test WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} EXTRA_ARGS "${CMAKE_SOURCE_DIR}/data") 167 | endif() 168 | 169 | if(catkin_FOUND) 170 | ################################### 171 | ## catkin specific configuration ## 172 | ################################### 173 | install(TARGETS ${PROJECT_NAME} 174 | LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}) 175 | 176 | install(DIRECTORY include/ 177 | DESTINATION ${CATKIN_GLOBAL_INCLUDE_DESTINATION} 178 | FILES_MATCHING PATTERN "*.hpp") 179 | elseif (ament_cmake_FOUND) 180 | ################################## 181 | ## ament specific configuration ## 182 | ################################## 183 | ament_export_include_directories(include) 184 | ament_export_libraries(fast_gicp) 185 | ament_package() 186 | 187 | install(TARGETS ${PROJECT_NAME} 188 | LIBRARY DESTINATION lib) 189 | 190 | install( 191 | DIRECTORY "include/" 192 | DESTINATION include 193 | ) 194 | endif() 195 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, SMRT-AIST 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Note: New faster library is released 3 | 4 | We released [small_gicp](https://github.com/koide3/small_gicp) that is twice as fast as fast_gicp and with minimum dependencies and clean interfaces. 5 | 6 | # fast_gicp 7 | 8 | This package is a collection of GICP-based fast point cloud registration algorithms. It constains a multi-threaded GICP as well as multi-thread and GPU implementations of our voxelized GICP (VGICP) algorithm. All the implemented algorithms have the PCL registration interface so that they can be used as an inplace replacement for GICP in PCL. 9 | 10 | - FastGICP: multi-threaded GICP algorithm (**\~40FPS**) 11 | - FastGICPSingleThread: GICP algorithm optimized for single-threading (**\~15FPS**) 12 | - FastVGICP: multi-threaded and voxelized GICP algorithm (**\~70FPS**) 13 | - FastVGICPCuda: CUDA-accelerated voxelized GICP algorithm (**\~120FPS**) 14 | - NDTCuda: CUDA-accelerated D2D NDT algorithm (**\~500FPS**) 15 | ![proctime](data/proctime.png) 16 | 17 | [![Build](https://github.com/SMRT-AIST/fast_gicp/actions/workflows/build.yml/badge.svg)](https://github.com/SMRT-AIST/fast_gicp/actions/workflows/build.yml) on melodic & noetic 18 | 19 | ## Installation 20 | 21 | ### Dependencies 22 | - PCL 23 | - Eigen 24 | - OpenMP 25 | - CUDA (optional) 26 | - [Sophus](https://github.com/strasdat/Sophus) 27 | - [nvbio](https://github.com/NVlabs/nvbio) 28 | 29 | We have tested this package on Ubuntu 18.04/20.04 and CUDA 11.1. 30 | 31 | On macOS when using `brew`, you might have to set up your depenencies like this 32 | 33 | ``` 34 | cmake .. "-DCMAKE_PREFIX_PATH=$(brew --prefix libomp)[;other-custom-prefixes]" -DQt5_DIR=$(brew --prefix qt@5)lib/cmake/Qt5 35 | ``` 36 | 37 | ### CUDA 38 | 39 | To enable the CUDA-powered implementations, set ```BUILD_VGICP_CUDA``` cmake option to ```ON```. 40 | 41 | ### ROS 42 | ```bash 43 | cd ~/catkin_ws/src 44 | git clone https://github.com/SMRT-AIST/fast_gicp --recursive 45 | cd .. && catkin_make -DCMAKE_BUILD_TYPE=Release 46 | # enable cuda-based implementations 47 | # cd .. && catkin_make -DCMAKE_BUILD_TYPE=Release -DBUILD_VGICP_CUDA=ON 48 | ``` 49 | 50 | ### Non-ROS 51 | ```bash 52 | git clone https://github.com/SMRT-AIST/fast_gicp --recursive 53 | mkdir fast_gicp/build && cd fast_gicp/build 54 | cmake .. -DCMAKE_BUILD_TYPE=Release 55 | # enable cuda-based implementations 56 | # cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_VGICP_CUDA=ON 57 | make -j8 58 | ``` 59 | 60 | ### Python bindings 61 | ```bash 62 | cd fast_gicp 63 | python3 setup.py install --user 64 | ``` 65 | Note: If you are on a catkin-enabled environment and the installation doesn't work well, comment out ```find_package(catkin)``` in CMakeLists.txt and run the above installation command again. 66 | 67 | 68 | ```python 69 | import pygicp 70 | 71 | target = # Nx3 numpy array 72 | source = # Mx3 numpy array 73 | 74 | # 1. function interface 75 | matrix = pygicp.align_points(target, source) 76 | 77 | # optional arguments 78 | # initial_guess : Initial guess of the relative pose (4x4 matrix) 79 | # method : GICP, VGICP, VGICP_CUDA, or NDT_CUDA 80 | # downsample_resolution : Downsampling resolution (used only if positive) 81 | # k_correspondences : Number of points used for covariance estimation 82 | # max_correspondence_distance : Maximum distance for corresponding point search 83 | # voxel_resolution : Resolution of voxel-based algorithms 84 | # neighbor_search_method : DIRECT1, DIRECT7, DIRECT27, or DIRECT_RADIUS 85 | # neighbor_search_radius : Neighbor voxel search radius (for GPU-based methods) 86 | # num_threads : Number of threads 87 | 88 | 89 | # 2. class interface 90 | # you may want to downsample the input clouds before registration 91 | target = pygicp.downsample(target, 0.25) 92 | source = pygicp.downsample(source, 0.25) 93 | 94 | # pygicp.FastGICP has more or less the same interfaces as the C++ version 95 | gicp = pygicp.FastGICP() 96 | gicp.set_input_target(target) 97 | gicp.set_input_source(source) 98 | matrix = gicp.align() 99 | 100 | # optional 101 | gicp.set_num_threads(4) 102 | gicp.set_max_correspondence_distance(1.0) 103 | gicp.get_final_transformation() 104 | gicp.get_final_hessian() 105 | ``` 106 | 107 | ## Benchmark 108 | CPU:Core i9-9900K GPU:GeForce RTX2080Ti 109 | 110 | ```bash 111 | roscd fast_gicp/data 112 | rosrun fast_gicp gicp_align 251370668.pcd 251371071.pcd 113 | ``` 114 | 115 | ``` 116 | target:17249[pts] source:17518[pts] 117 | --- pcl_gicp --- 118 | single:127.508[msec] 100times:12549.4[msec] fitness_score:0.204892 119 | --- pcl_ndt --- 120 | single:53.5904[msec] 100times:5467.16[msec] fitness_score:0.229616 121 | --- fgicp_st --- 122 | single:111.324[msec] 100times:10662.7[msec] 100times_reuse:6794.59[msec] fitness_score:0.204379 123 | --- fgicp_mt --- 124 | single:20.1602[msec] 100times:1585[msec] 100times_reuse:1017.74[msec] fitness_score:0.204412 125 | --- vgicp_st --- 126 | single:112.001[msec] 100times:7959.9[msec] 100times_reuse:4408.22[msec] fitness_score:0.204067 127 | --- vgicp_mt --- 128 | single:18.1106[msec] 100times:1381[msec] 100times_reuse:806.53[msec] fitness_score:0.204067 129 | --- vgicp_cuda (parallel_kdtree) --- 130 | single:15.9587[msec] 100times:1451.85[msec] 100times_reuse:695.48[msec] fitness_score:0.204061 131 | --- vgicp_cuda (gpu_bruteforce) --- 132 | single:53.9113[msec] 100times:3463.5[msec] 100times_reuse:1703.41[msec] fitness_score:0.204049 133 | --- vgicp_cuda (gpu_rbf_kernel) --- 134 | single:5.91508[msec] 100times:590.725[msec] 100times_reuse:226.787[msec] fitness_score:0.20557 135 | ``` 136 | 137 | See [src/align.cpp](https://github.com/SMRT-AIST/fast_gicp/blob/master/src/align.cpp) for the detailed usage. 138 | 139 | ## Test on KITTI 140 | 141 | ### C++ 142 | 143 | ```bash 144 | # Perform frame-by-frame registration 145 | rosrun fast_gicp gicp_kitti /your/kitti/path/sequences/00/velodyne 146 | ``` 147 | 148 | ![kitti00](https://user-images.githubusercontent.com/31344317/86207074-b98ac280-bba8-11ea-9687-e65f03aaf25b.png) 149 | 150 | ### Python 151 | 152 | ```bash 153 | cd fast_gicp/src 154 | python3 kitti.py /your/kitti/path/sequences/00/velodyne 155 | ``` 156 | 157 | ## Note 158 | 159 | In some environments, setting a fewer number of threads rather than the (default) maximum number of threads may result in faster processing (see https://github.com/SMRT-AIST/fast_gicp/issues/145#issuecomment-1890885373). 160 | 161 | ## Related packages 162 | - [ndt_omp](https://github.com/koide3/ndt_omp) 163 | - [fast_gicp](https://github.com/SMRT-AIST/fast_gicp) 164 | 165 | 166 | ## Papers 167 | - Kenji Koide, Masashi Yokozuka, Shuji Oishi, and Atsuhiko Banno, Voxelized GICP for fast and accurate 3D point cloud registration, ICRA2021 [[link]](https://easychair.org/publications/preprint/ftvV) 168 | 169 | ## Contact 170 | Kenji Koide, k.koide@aist.go.jp 171 | 172 | Human-Centered Mobility Research Center, National Institute of Advanced Industrial Science and Technology, Japan [\[URL\]](https://unit.aist.go.jp/rirc/en/team/smart_mobility.html) 173 | -------------------------------------------------------------------------------- /data/251370668.pcd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koide3/fast_gicp/0e7ec1441c99f7be453db2ea216d5de029387417/data/251370668.pcd -------------------------------------------------------------------------------- /data/251371071.pcd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koide3/fast_gicp/0e7ec1441c99f7be453db2ea216d5de029387417/data/251371071.pcd -------------------------------------------------------------------------------- /data/proctime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koide3/fast_gicp/0e7ec1441c99f7be453db2ea216d5de029387417/data/proctime.png -------------------------------------------------------------------------------- /data/relative.txt: -------------------------------------------------------------------------------- 1 | 0.999941 0.0108432 -0.000635437 0.485657 2 | -0.0108468 0.999924 -0.00587782 0.10642 3 | 0.000571654 0.00588436 0.999983 -0.0131581 4 | 0 0 0 1 5 | -------------------------------------------------------------------------------- /docker/focal/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:focal 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | 5 | RUN apt-get update && apt-get install --no-install-recommends -y \ 6 | && apt-get install --no-install-recommends -y wget nano cmake build-essential git \ 7 | ca-certificates libeigen3-dev libboost-all-dev libflann-dev libvtk7-dev libomp-dev libgtest-dev \ 8 | && apt-get clean \ 9 | && rm -rf /var/lib/apt/lists/* 10 | 11 | WORKDIR /root 12 | RUN git clone https://github.com/PointCloudLibrary/pcl 13 | 14 | WORKDIR /root/pcl/build 15 | RUN cmake .. -DCMAKE_BUILD_TYPE=Release \ 16 | -DBUILD_geometry=OFF -DBUILD_keypoints=OFF -DBUILD_ml=OFF -DBUILD_outofcore=OFF -DBUILD_people=OFF \ 17 | -DBUILD_recognition=OFF -DBUILD_segmentation=OFF -DBUILD_stereo=OFF -DBUILD_surface=OFF -DBUILD_tools=OFF -DBUILD_tracking=OFF 18 | RUN make -j$(nproc) && make install 19 | 20 | COPY . /root/fast_gicp 21 | WORKDIR /root/fast_gicp 22 | RUN git submodule init && git submodule update 23 | 24 | RUN rm -rf /root/fast_gicp/build 25 | WORKDIR /root/fast_gicp/build 26 | RUN cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_PYTHON_BINDINGS=ON -DBUILD_apps=OFF -DBUILD_test=ON 27 | RUN make -j$(nproc) 28 | 29 | RUN /root/fast_gicp/build/gicp_test /root/fast_gicp/data 30 | 31 | WORKDIR /root 32 | 33 | CMD ["bash"] 34 | -------------------------------------------------------------------------------- /docker/focal_cuda/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.6.2-devel-ubuntu20.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | 5 | RUN apt-get update && apt-get install --no-install-recommends -y \ 6 | && apt-get install --no-install-recommends -y wget nano cmake build-essential git \ 7 | ca-certificates libeigen3-dev libboost-all-dev libflann-dev libvtk7-dev libomp-dev libgtest-dev \ 8 | && apt-get clean \ 9 | && rm -rf /var/lib/apt/lists/* 10 | 11 | WORKDIR /root 12 | RUN git clone https://github.com/PointCloudLibrary/pcl 13 | 14 | WORKDIR /root/pcl/build 15 | RUN cmake .. -DCMAKE_BUILD_TYPE=Release \ 16 | -DBUILD_geometry=OFF -DBUILD_keypoints=OFF -DBUILD_ml=OFF -DBUILD_outofcore=OFF -DBUILD_people=OFF \ 17 | -DBUILD_recognition=OFF -DBUILD_segmentation=OFF -DBUILD_stereo=OFF -DBUILD_surface=OFF -DBUILD_tools=OFF -DBUILD_tracking=OFF 18 | RUN make -j$(nproc) && make install 19 | 20 | COPY . /root/fast_gicp 21 | WORKDIR /root/fast_gicp 22 | RUN git submodule init && git submodule update 23 | 24 | RUN rm -rf /root/fast_gicp/build 25 | WORKDIR /root/fast_gicp/build 26 | RUN cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_VGICP_CUDA=ON -DBUILD_PYTHON_BINDINGS=ON -DBUILD_apps=OFF -DBUILD_test=ON 27 | RUN make -j$(nproc) 28 | 29 | WORKDIR /root 30 | 31 | CMD ["bash"] 32 | -------------------------------------------------------------------------------- /docker/kinetic/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ros:kinetic 2 | 3 | RUN apt-get update && apt-get install --no-install-recommends -y \ 4 | && apt-get install --no-install-recommends -y wget nano build-essential \ 5 | ros-kinetic-pcl-ros \ 6 | && apt-get clean \ 7 | && rm -rf /var/lib/apt/lists/* 8 | 9 | # install cmake 3.19 10 | WORKDIR /root 11 | RUN wget https://github.com/Kitware/CMake/releases/download/v3.19.0-rc1/cmake-3.19.0-rc1.tar.gz 12 | RUN tar xzvf cmake-3.19.0-rc1.tar.gz 13 | 14 | WORKDIR /root/cmake-3.19.0-rc1 15 | RUN ./bootstrap 16 | RUN make -j$(nproc) && make install 17 | 18 | RUN mkdir -p /root/catkin_ws/src 19 | WORKDIR /root/catkin_ws/src 20 | RUN /bin/bash -c '. /opt/ros/kinetic/setup.bash; catkin_init_workspace' 21 | 22 | COPY . /root/catkin_ws/src/fast_gicp/ 23 | WORKDIR /root/catkin_ws/src/fast_gicp 24 | RUN git submodule init && git submodule update 25 | 26 | WORKDIR /root/catkin_ws 27 | RUN /bin/bash -c '. /opt/ros/kinetic/setup.bash; catkin_make' 28 | RUN sed -i "6i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh 29 | 30 | WORKDIR / 31 | 32 | ENTRYPOINT ["/ros_entrypoint.sh"] 33 | CMD ["bash"] 34 | -------------------------------------------------------------------------------- /docker/melodic/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ros:melodic 2 | 3 | RUN apt-get update && apt-get install --no-install-recommends -y \ 4 | && apt-get install --no-install-recommends -y wget nano build-essential \ 5 | libgtest-dev libomp-dev ros-melodic-pcl-ros \ 6 | && apt-get clean \ 7 | && rm -rf /var/lib/apt/lists/* 8 | 9 | 10 | RUN mkdir -p /root/catkin_ws/src 11 | WORKDIR /root/catkin_ws/src 12 | RUN /bin/bash -c '. /opt/ros/melodic/setup.bash; catkin_init_workspace' 13 | 14 | COPY . /root/catkin_ws/src/fast_gicp/ 15 | WORKDIR /root/catkin_ws/src/fast_gicp 16 | RUN git submodule init && git submodule update 17 | 18 | WORKDIR /root/catkin_ws 19 | RUN /bin/bash -c '. /opt/ros/melodic/setup.bash; catkin_make' 20 | RUN sed -i "6i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh 21 | 22 | WORKDIR / 23 | 24 | ENTRYPOINT ["/ros_entrypoint.sh"] 25 | CMD ["bash"] 26 | -------------------------------------------------------------------------------- /docker/melodic_llvm/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ros:melodic 2 | 3 | RUN apt-get update && apt-get install --no-install-recommends -y \ 4 | && apt-get install --no-install-recommends -y wget nano build-essential \ 5 | libgtest-dev libomp-dev clang lld ros-melodic-pcl-ros \ 6 | && apt-get clean \ 7 | && rm -rf /var/lib/apt/lists/* 8 | 9 | RUN update-alternatives --install /usr/bin/ld ld /usr/bin/ld.lld 50 10 | 11 | RUN mkdir -p /root/catkin_ws/src 12 | WORKDIR /root/catkin_ws/src 13 | RUN /bin/bash -c '. /opt/ros/melodic/setup.bash; catkin_init_workspace' 14 | 15 | COPY . /root/catkin_ws/src/fast_gicp/ 16 | WORKDIR /root/catkin_ws/src/fast_gicp 17 | RUN git submodule init && git submodule update 18 | 19 | WORKDIR /root/catkin_ws 20 | RUN /bin/bash -c '. /opt/ros/melodic/setup.bash; CC=clang CXX=clang++ catkin_make' 21 | RUN sed -i "6i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh 22 | 23 | WORKDIR / 24 | 25 | ENTRYPOINT ["/ros_entrypoint.sh"] 26 | CMD ["bash"] 27 | -------------------------------------------------------------------------------- /docker/noetic/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ros:noetic 2 | 3 | RUN apt-get update && apt-get install --no-install-recommends -y \ 4 | && apt-get install --no-install-recommends -y wget nano build-essential \ 5 | git libgtest-dev ros-noetic-pcl-ros \ 6 | && apt-get clean \ 7 | && rm -rf /var/lib/apt/lists/* 8 | 9 | RUN mkdir -p /root/catkin_ws/src 10 | WORKDIR /root/catkin_ws/src 11 | RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_init_workspace' 12 | 13 | COPY . /root/catkin_ws/src/fast_gicp/ 14 | WORKDIR /root/catkin_ws/src/fast_gicp 15 | RUN git submodule init && git submodule update 16 | 17 | WORKDIR /root/catkin_ws 18 | RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_make -DBUILD_test=ON' 19 | RUN sed -i "6i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh 20 | 21 | RUN /root/catkin_ws/devel/lib/fast_gicp/gicp_test /root/catkin_ws/src/fast_gicp/data 22 | 23 | WORKDIR / 24 | 25 | ENTRYPOINT ["/ros_entrypoint.sh"] 26 | CMD ["bash"] 27 | -------------------------------------------------------------------------------- /docker/noetic_llvm/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ros:noetic 2 | 3 | RUN apt-get update && apt-get install --no-install-recommends -y \ 4 | && apt-get install --no-install-recommends -y wget nano build-essential \ 5 | git clang lld libomp-dev libgtest-dev ros-noetic-pcl-ros \ 6 | && apt-get clean \ 7 | && rm -rf /var/lib/apt/lists/* 8 | 9 | RUN update-alternatives --install /usr/bin/ld ld /usr/bin/ld.lld 50 10 | 11 | RUN mkdir -p /root/catkin_ws/src 12 | WORKDIR /root/catkin_ws/src 13 | RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_init_workspace' 14 | 15 | COPY . /root/catkin_ws/src/fast_gicp/ 16 | WORKDIR /root/catkin_ws/src/fast_gicp 17 | RUN git submodule init && git submodule update 18 | 19 | WORKDIR /root/catkin_ws 20 | RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; CC=clang CXX=clang++ catkin_make -DBUILD_test=ON' 21 | RUN sed -i "6i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh 22 | 23 | RUN /root/catkin_ws/devel/lib/fast_gicp/gicp_test /root/catkin_ws/src/fast_gicp/data 24 | 25 | WORKDIR / 26 | 27 | ENTRYPOINT ["/ros_entrypoint.sh"] 28 | CMD ["bash"] 29 | -------------------------------------------------------------------------------- /include/fast_gicp/cuda/brute_force_knn.cuh: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_CUDA_BRUTE_FORCE_KNN_CUH 2 | #define FAST_GICP_CUDA_BRUTE_FORCE_KNN_CUH 3 | 4 | #include 5 | 6 | #include 7 | 8 | namespace fast_gicp { 9 | namespace cuda { 10 | 11 | void brute_force_knn_search(const thrust::device_vector& source, const thrust::device_vector& target, int k, thrust::device_vector>& k_neighbors, bool do_sort=false); 12 | 13 | } 14 | } // namespace fast_gicp 15 | 16 | 17 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/cuda/compute_derivatives.cuh: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_CUDA_COMPUTE_DERIVATIVES_CUH 2 | #define FAST_GICP_CUDA_COMPUTE_DERIVATIVES_CUH 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | namespace fast_gicp { 10 | namespace cuda { 11 | 12 | double compute_derivatives( 13 | const thrust::device_vector& src_points, 14 | const thrust::device_vector& src_covs, 15 | const GaussianVoxelMap& voxelmap, 16 | const thrust::device_vector>& voxel_correspondences, 17 | const thrust::device_ptr& linearized_x_ptr, 18 | const thrust::device_ptr& x_ptr, 19 | Eigen::Matrix* H, 20 | Eigen::Matrix* b); 21 | } 22 | } // namespace fast_gicp 23 | 24 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/cuda/compute_mahalanobis.cuh: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_CUDA_COMPUTE_MAHALANOBIS_CUH 2 | #define FAST_GICP_CUDA_COMPUTE_MAHALANOBIS_CUH 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | namespace fast_gicp { 11 | namespace cuda { 12 | 13 | void compute_mahalanobis( 14 | const thrust::device_vector& src_points, 15 | const thrust::device_vector& src_covs, 16 | const GaussianVoxelMap& voxelmap, 17 | const thrust::device_vector& voxel_correspondences, 18 | const Eigen::Isometry3f& linearized_x, 19 | thrust::device_vector& mahalanobis 20 | ); 21 | 22 | } 23 | } // namespace fast_gicp 24 | 25 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/cuda/covariance_estimation.cuh: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_CUDA_COVARIANCE_ESTIMATION_CUH 2 | #define FAST_GICP_CUDA_COVARIANCE_ESTIMATION_CUH 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace fast_gicp { 9 | namespace cuda { 10 | 11 | void covariance_estimation(const thrust::device_vector& points, int k, const thrust::device_vector& k_neighbors, thrust::device_vector& covariances); 12 | 13 | void covariance_estimation_rbf(const thrust::device_vector& points, double kernel_width, double max_dist, thrust::device_vector& covariances); 14 | } 15 | } // namespace fast_gicp 16 | 17 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/cuda/covariance_regularization.cuh: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_CUDA_COVARIANCE_REGULARIZATION_CUH 2 | #define FAST_GICP_CUDA_COVARIANCE_REGULARIZATION_CUH 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace fast_gicp { 9 | namespace cuda { 10 | 11 | void covariance_regularization(thrust::device_vector& means, thrust::device_vector& covs, RegularizationMethod method); 12 | 13 | } // namespace cuda 14 | } // namespace fast_gicp 15 | 16 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/cuda/fast_vgicp_cuda.cuh: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_FAST_VGICP_CUDA_CORE_CUH 2 | #define FAST_GICP_FAST_VGICP_CUDA_CORE_CUH 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | namespace thrust { 12 | 13 | template 14 | class pair; 15 | 16 | template 17 | class device_allocator; 18 | 19 | template 20 | class device_vector; 21 | } // namespace thrust 22 | 23 | namespace fast_gicp { 24 | namespace cuda { 25 | 26 | class GaussianVoxelMap; 27 | 28 | class FastVGICPCudaCore { 29 | public: 30 | using Points = thrust::device_vector>; 31 | using Indices = thrust::device_vector>; 32 | using Matrices = thrust::device_vector>; 33 | using Correspondences = thrust::device_vector, thrust::device_allocator>>; 34 | using VoxelCoordinates = thrust::device_vector>; 35 | 36 | EIGEN_MAKE_ALIGNED_OPERATOR_NEW 37 | FastVGICPCudaCore(); 38 | ~FastVGICPCudaCore(); 39 | 40 | void set_resolution(double resolution); 41 | void set_kernel_params(double kernel_width, double kernel_max_dist); 42 | void set_neighbor_search_method(fast_gicp::NeighborSearchMethod method, double radius); 43 | 44 | void swap_source_and_target(); 45 | void set_source_cloud(const std::vector>& cloud); 46 | void set_target_cloud(const std::vector>& cloud); 47 | 48 | void set_source_neighbors(int k, const std::vector& neighbors); 49 | void set_target_neighbors(int k, const std::vector& neighbors); 50 | void find_source_neighbors(int k); 51 | void find_target_neighbors(int k); 52 | 53 | void calculate_source_covariances(RegularizationMethod method); 54 | void calculate_target_covariances(RegularizationMethod method); 55 | 56 | void calculate_source_covariances_rbf(RegularizationMethod method); 57 | void calculate_target_covariances_rbf(RegularizationMethod method); 58 | 59 | void get_source_covariances(std::vector>& covs) const; 60 | void get_target_covariances(std::vector>& covs) const; 61 | 62 | void get_voxel_num_points(std::vector& num_points) const; 63 | void get_voxel_means(std::vector>& means) const; 64 | void get_voxel_covs(std::vector>& covs) const; 65 | void get_voxel_correspondences(std::vector>& correspondences) const; 66 | 67 | void create_target_voxelmap(); 68 | 69 | void update_correspondences(const Eigen::Isometry3d& trans); 70 | 71 | double compute_error(const Eigen::Isometry3d& trans, Eigen::Matrix* H, Eigen::Matrix* b) const; 72 | 73 | public: 74 | double resolution; 75 | double kernel_width; 76 | double kernel_max_dist; 77 | std::unique_ptr offsets; 78 | 79 | std::unique_ptr source_points; 80 | std::unique_ptr target_points; 81 | 82 | std::unique_ptr source_neighbors; 83 | std::unique_ptr target_neighbors; 84 | 85 | std::unique_ptr source_covariances; 86 | std::unique_ptr target_covariances; 87 | 88 | std::unique_ptr voxelmap; 89 | 90 | Eigen::Isometry3f linearized_x; 91 | std::unique_ptr voxel_correspondences; 92 | }; 93 | 94 | } // namespace cuda 95 | } // namespace fast_gicp 96 | 97 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/cuda/find_voxel_correspondences.cuh: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_CUDA_FIND_VOXEL_CORRESPONDENCES_CUH 2 | #define FAST_GICP_CUDA_FIND_VOXEL_CORRESPONDENCES_CUH 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace fast_gicp { 12 | namespace cuda { 13 | 14 | void find_voxel_correspondences( 15 | const thrust::device_vector& src_points, 16 | const GaussianVoxelMap& voxelmap, 17 | const thrust::device_ptr& x_ptr, 18 | const thrust::device_vector& offsets, 19 | thrust::device_vector>& correspondences) ; 20 | 21 | } // namespace cuda 22 | } // namespace fast_gicp 23 | 24 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/cuda/gaussian_voxelmap.cuh: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_CUDA_GAUSSIAN_VOXELMAP_CUH 2 | #define FAST_GICP_CUDA_GAUSSIAN_VOXELMAP_CUH 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace fast_gicp { 9 | namespace cuda { 10 | 11 | struct VoxelMapInfo { 12 | int num_voxels; 13 | int num_buckets; 14 | int max_bucket_scan_count; 15 | float voxel_resolution; 16 | }; 17 | 18 | class GaussianVoxelMap { 19 | public: 20 | GaussianVoxelMap(float resolution, int init_num_buckets = 8192, int max_bucket_scan_count = 10); 21 | 22 | void create_voxelmap(const thrust::device_vector& points); 23 | void create_voxelmap(const thrust::device_vector& points, const thrust::device_vector& covariances); 24 | 25 | private: 26 | void create_bucket_table(cudaStream_t stream, const thrust::device_vector& points); 27 | 28 | public: 29 | const int init_num_buckets; 30 | VoxelMapInfo voxelmap_info; 31 | thrust::device_vector voxelmap_info_ptr; 32 | 33 | thrust::device_vector> buckets; 34 | 35 | // voxel data 36 | thrust::device_vector num_points; 37 | thrust::device_vector voxel_means; 38 | thrust::device_vector voxel_covs; 39 | }; 40 | 41 | } // namespace cuda 42 | } // namespace fast_gicp 43 | 44 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/cuda/ndt_compute_derivatives.cuh: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_CUDA_NDT_COMPUTE_DERIVATIVES_CUH 2 | #define FAST_GICP_CUDA_NDT_COMPUTE_DERIVATIVES_CUH 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | namespace fast_gicp { 11 | namespace cuda { 12 | 13 | double p2d_ndt_compute_derivatives( 14 | const GaussianVoxelMap& target_voxelmap, 15 | const thrust::device_vector& source_points, 16 | const thrust::device_vector>& correspondences, 17 | const thrust::device_ptr& linearized_x_ptr, 18 | const thrust::device_ptr& x_ptr, 19 | Eigen::Matrix* H, 20 | Eigen::Matrix* b); 21 | 22 | double d2d_ndt_compute_derivatives( 23 | const GaussianVoxelMap& target_voxelmap, 24 | const GaussianVoxelMap& source_voxelmap, 25 | const thrust::device_vector>& correspondences, 26 | const thrust::device_ptr& linearized_x_ptr, 27 | const thrust::device_ptr& x_ptr, 28 | Eigen::Matrix* H, 29 | Eigen::Matrix* b); 30 | 31 | } // namespace cuda 32 | } // namespace fast_gicp 33 | 34 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/cuda/ndt_cuda.cuh: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_NDT_CUDA_CUH 2 | #define FAST_GICP_NDT_CUDA_CUH 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | namespace thrust { 13 | template 14 | class pair; 15 | 16 | template 17 | class device_allocator; 18 | 19 | template 20 | class device_vector; 21 | } // namespace thrust 22 | 23 | namespace fast_gicp { 24 | namespace cuda { 25 | 26 | class GaussianVoxelMap; 27 | 28 | class NDTCudaCore { 29 | public: 30 | using Points = thrust::device_vector>; 31 | using Indices = thrust::device_vector>; 32 | using Matrices = thrust::device_vector>; 33 | using Correspondences = thrust::device_vector, thrust::device_allocator>>; 34 | using VoxelCoordinates = thrust::device_vector>; 35 | 36 | EIGEN_MAKE_ALIGNED_OPERATOR_NEW 37 | NDTCudaCore(); 38 | ~NDTCudaCore(); 39 | 40 | void set_distance_mode(fast_gicp::NDTDistanceMode mode); 41 | void set_resolution(double resolution); 42 | void set_neighbor_search_method(fast_gicp::NeighborSearchMethod method, double radius); 43 | 44 | void swap_source_and_target(); 45 | void set_source_cloud(const std::vector>& cloud); 46 | void set_target_cloud(const std::vector>& cloud); 47 | 48 | void create_voxelmaps(); 49 | void create_target_voxelmap(); 50 | void create_source_voxelmap(); 51 | 52 | void update_correspondences(const Eigen::Isometry3d& trans); 53 | double compute_error(const Eigen::Isometry3d& trans, Eigen::Matrix* H, Eigen::Matrix* b) const; 54 | 55 | public: 56 | fast_gicp::NDTDistanceMode distance_mode; 57 | double resolution; 58 | std::unique_ptr offsets; 59 | 60 | std::unique_ptr source_points; 61 | std::unique_ptr target_points; 62 | 63 | std::unique_ptr source_voxelmap; 64 | std::unique_ptr target_voxelmap; 65 | 66 | Eigen::Isometry3f linearized_x; 67 | std::unique_ptr correspondences; 68 | }; 69 | 70 | } // namespace cuda 71 | } // namespace fast_gicp 72 | 73 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/cuda/vector3_hash.cuh: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_CUDA_VECTOR3_HASH_CUH 2 | #define FAST_GICP_CUDA_VECTOR3_HASH_CUH 3 | 4 | namespace fast_gicp { 5 | namespace cuda { 6 | 7 | // taken from boost/hash.hpp 8 | __host__ __device__ inline void hash_combine(uint64_t& h, uint64_t k) { 9 | const uint64_t m = UINT64_C(0xc6a4a7935bd1e995); 10 | const int r = 47; 11 | 12 | k *= m; 13 | k ^= k >> r; 14 | k *= m; 15 | 16 | h ^= k; 17 | h *= m; 18 | 19 | h += 0xe6546b64; 20 | } 21 | 22 | inline __host__ __device__ bool equal(const Eigen::Vector3i& lhs, const Eigen::Vector3i& rhs) { 23 | return lhs[0] == rhs[0] && lhs[1] == rhs[1] && lhs[2] == rhs[2]; 24 | } 25 | 26 | // compute vector3i hash 27 | __host__ __device__ inline uint64_t vector3i_hash(const Eigen::Vector3i& x) { 28 | uint64_t seed = 0; 29 | hash_combine(seed, x[0]); 30 | hash_combine(seed, x[1]); 31 | hash_combine(seed, x[2]); 32 | return seed; 33 | } 34 | 35 | __host__ __device__ inline Eigen::Vector3i calc_voxel_coord(const Eigen::Vector3f& x, float resolution) { 36 | Eigen::Vector3i coord = (x.array() / resolution - 0.5).floor().cast(); 37 | return coord; 38 | } 39 | 40 | } 41 | } // namespace fast_gicp 42 | 43 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/gicp/experimental/fast_gicp_mp.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_FAST_GICP_MP_HPP 2 | #define FAST_GICP_FAST_GICP_MP_HPP 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace fast_gicp { 14 | 15 | template 16 | class FastGICPMultiPoints : public pcl::Registration { 17 | public: 18 | using Scalar = float; 19 | using Matrix4 = typename pcl::Registration::Matrix4; 20 | 21 | using PointCloudSource = typename pcl::Registration::PointCloudSource; 22 | using PointCloudSourcePtr = typename PointCloudSource::Ptr; 23 | using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr; 24 | 25 | using PointCloudTarget = typename pcl::Registration::PointCloudTarget; 26 | using PointCloudTargetPtr = typename PointCloudTarget::Ptr; 27 | using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr; 28 | 29 | using pcl::Registration::reg_name_; 30 | using pcl::Registration::input_; 31 | using pcl::Registration::target_; 32 | 33 | using pcl::Registration::nr_iterations_; 34 | using pcl::Registration::max_iterations_; 35 | using pcl::Registration::final_transformation_; 36 | using pcl::Registration::transformation_epsilon_; 37 | using pcl::Registration::converged_; 38 | using pcl::Registration::corr_dist_threshold_; 39 | 40 | FastGICPMultiPoints(); 41 | virtual ~FastGICPMultiPoints() override; 42 | 43 | void setNumThreads(int n); 44 | 45 | void setRotationEpsilon(double eps); 46 | 47 | void setCorrespondenceRandomness(int k); 48 | 49 | void setRegularizationMethod(RegularizationMethod method); 50 | 51 | virtual void setInputSource(const PointCloudSourceConstPtr& cloud) override; 52 | 53 | virtual void setInputTarget(const PointCloudTargetConstPtr& cloud) override; 54 | 55 | protected: 56 | virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override; 57 | 58 | private: 59 | bool is_converged(const Eigen::Matrix& delta) const; 60 | 61 | void update_correspondences(const Eigen::Matrix& x); 62 | 63 | Eigen::VectorXf loss_ls(const Eigen::Matrix& x, Eigen::MatrixXf* J) const; 64 | 65 | template 66 | bool calculate_covariances(const pcl::shared_ptr>& cloud, pcl::search::KdTree& kdtree, std::vector>& covariances); 67 | 68 | private: 69 | int num_threads_; 70 | int k_correspondences_; 71 | double rotation_epsilon_; 72 | double neighbor_search_radius_; 73 | 74 | RegularizationMethod regularization_method_; 75 | 76 | pcl::search::KdTree source_kdtree; 77 | pcl::search::KdTree target_kdtree; 78 | 79 | std::vector> source_covs; 80 | std::vector> target_covs; 81 | 82 | std::vector> correspondences; 83 | std::vector> sq_distances; 84 | }; 85 | } // namespace fast_gicp 86 | 87 | #endif 88 | -------------------------------------------------------------------------------- /include/fast_gicp/gicp/experimental/fast_gicp_mp_impl.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_FAST_GICP_MP_IMPL_HPP 2 | #define FAST_GICP_FAST_GICP_MP_IMPL_HPP 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | namespace fast_gicp { 18 | 19 | template 20 | FastGICPMultiPoints::FastGICPMultiPoints() { 21 | #ifdef _OPENMP 22 | num_threads_ = omp_get_max_threads(); 23 | #else 24 | num_threads_ = 1; 25 | #endif 26 | 27 | k_correspondences_ = 20; 28 | reg_name_ = "FastGICPMultiPoints"; 29 | max_iterations_ = 64; 30 | rotation_epsilon_ = 1e-5; 31 | transformation_epsilon_ = 1e-5; 32 | // corr_dist_threshold_ = 1.0; 33 | regularization_method_ = RegularizationMethod::PLANE; 34 | corr_dist_threshold_ = std::numeric_limits::max(); 35 | neighbor_search_radius_ = 0.5; 36 | } 37 | 38 | template 39 | FastGICPMultiPoints::~FastGICPMultiPoints() {} 40 | 41 | template 42 | void FastGICPMultiPoints::setRotationEpsilon(double eps) { 43 | rotation_epsilon_ = eps; 44 | } 45 | 46 | template 47 | void FastGICPMultiPoints::setNumThreads(int n) { 48 | num_threads_ = n; 49 | 50 | #ifdef _OPENMP 51 | if(n == 0) { 52 | num_threads_ = omp_get_max_threads(); 53 | } 54 | #endif 55 | } 56 | 57 | template 58 | void FastGICPMultiPoints::setCorrespondenceRandomness(int k) { 59 | k_correspondences_ = k; 60 | } 61 | 62 | template 63 | void FastGICPMultiPoints::setRegularizationMethod(RegularizationMethod method) { 64 | regularization_method_ = method; 65 | } 66 | 67 | template 68 | void FastGICPMultiPoints::setInputSource(const PointCloudSourceConstPtr& cloud) { 69 | pcl::Registration::setInputSource(cloud); 70 | calculate_covariances(*cloud, source_kdtree, source_covs); 71 | } 72 | 73 | template 74 | void FastGICPMultiPoints::setInputTarget(const PointCloudTargetConstPtr& cloud) { 75 | pcl::Registration::setInputTarget(cloud); 76 | calculate_covariances(cloud, target_kdtree, target_covs); 77 | } 78 | 79 | template 80 | void FastGICPMultiPoints::computeTransformation(PointCloudSource& output, const Matrix4& guess) { 81 | Eigen::Matrix x0; 82 | x0.head<3>() = Sophus::SO3f(guess.template block<3, 3>(0, 0)).log(); 83 | x0.tail<3>() = guess.template block<3, 1>(0, 3); 84 | 85 | // if(x0.head<3>().norm() < 1e-2) { 86 | // x0.head<3>() = (Eigen::Vector3f::Random()).normalized() * 1e-2; 87 | // } 88 | 89 | converged_ = false; 90 | GaussNewton solver; 91 | 92 | for(int i = 0; i < max_iterations_; i++) { 93 | nr_iterations_ = i; 94 | 95 | update_correspondences(x0); 96 | Eigen::MatrixXf J; 97 | Eigen::VectorXf loss = loss_ls(x0, &J); 98 | 99 | Eigen::Matrix delta = solver.delta(loss.cast(), J.cast()).cast(); 100 | 101 | x0.head<3>() = (Sophus::SO3f::exp(-delta.head<3>()) * Sophus::SO3f::exp(x0.head<3>())).log(); 102 | x0.tail<3>() -= delta.tail<3>(); 103 | 104 | if(is_converged(delta)) { 105 | converged_ = true; 106 | break; 107 | } 108 | } 109 | 110 | final_transformation_.setIdentity(); 111 | final_transformation_.template block<3, 3>(0, 0) = Sophus::SO3f::exp(x0.head<3>()).matrix(); 112 | final_transformation_.template block<3, 1>(0, 3) = x0.tail<3>(); 113 | 114 | pcl::transformPointCloud(*input_, output, final_transformation_); 115 | } 116 | 117 | template 118 | bool FastGICPMultiPoints::is_converged(const Eigen::Matrix& delta) const { 119 | double accum = 0.0; 120 | Eigen::Matrix3f R = Sophus::SO3f::exp(delta.head<3>()).matrix() - Eigen::Matrix3f::Identity(); 121 | Eigen::Vector3f t = delta.tail<3>(); 122 | 123 | Eigen::Matrix3f r_delta = 1.0 / rotation_epsilon_ * R.array().abs(); 124 | Eigen::Vector3f t_delta = 1.0 / transformation_epsilon_ * t.array().abs(); 125 | 126 | return std::max(r_delta.maxCoeff(), t_delta.maxCoeff()) < 1; 127 | } 128 | 129 | template 130 | void FastGICPMultiPoints::update_correspondences(const Eigen::Matrix& x) { 131 | Eigen::Matrix4f trans = Eigen::Matrix4f::Identity(); 132 | trans.block<3, 3>(0, 0) = Sophus::SO3f::exp(x.head<3>()).matrix(); 133 | trans.block<3, 1>(0, 3) = x.tail<3>(); 134 | 135 | correspondences.resize(input_->size()); 136 | sq_distances.resize(input_->size()); 137 | 138 | #pragma omp parallel for num_threads(num_threads_) 139 | for(int i = 0; i < input_->size(); i++) { 140 | PointTarget pt; 141 | pt.getVector4fMap() = trans * input_->at(i).getVector4fMap(); 142 | 143 | std::vector k_indices; 144 | std::vector k_sq_dists; 145 | target_kdtree.radiusSearch(pt, neighbor_search_radius_, k_indices, k_sq_dists); 146 | 147 | if(k_indices.empty()) { 148 | // target_kdtree.nearestKSearch(pt, 1, k_indices, k_sq_dists); 149 | } 150 | 151 | correspondences[i] = k_indices; 152 | sq_distances[i] = k_sq_dists; 153 | } 154 | } 155 | 156 | template 157 | Eigen::VectorXf FastGICPMultiPoints::loss_ls(const Eigen::Matrix& x, Eigen::MatrixXf* J) const { 158 | Eigen::Matrix4f trans = Eigen::Matrix4f::Identity(); 159 | trans.block<3, 3>(0, 0) = Sophus::SO3f::exp(x.head<3>()).matrix(); 160 | trans.block<3, 1>(0, 3) = x.tail<3>(); 161 | 162 | std::vector> losses(input_->size()); 163 | // use row-major arrangement for ease of repacking 164 | std::vector, Eigen::aligned_allocator>> Js(input_->size()); 165 | 166 | std::atomic_int count(0); 167 | 168 | #pragma omp parallel for num_threads(num_threads_) 169 | for(int i = 0; i < correspondences.size(); i++) { 170 | int source_index = i; 171 | const auto& mean_A = input_->at(source_index).getVector4fMap(); 172 | const auto& cov_A = source_covs[source_index]; 173 | 174 | const auto& k_indices = correspondences[i]; 175 | const auto& k_sq_dists = sq_distances[i]; 176 | if(k_indices.empty()) { 177 | continue; 178 | } 179 | 180 | double sum_w = 0.0; 181 | Eigen::Vector4d sum_mean_B = Eigen::Vector4d::Zero(); 182 | Eigen::Matrix4d sum_cov_B = Eigen::Matrix4d::Zero(); 183 | for(int j = 0; j < k_indices.size(); j++) { 184 | double w = 1 - std::sqrt(k_sq_dists[j]) / neighbor_search_radius_; 185 | w = std::max(1e-3, std::min(1.0, w)); 186 | sum_w += w; 187 | 188 | int target_index = k_indices[j]; 189 | 190 | sum_mean_B += w * target_->at(target_index).getVector4fMap().template cast(); 191 | sum_cov_B += w * target_covs[target_index].template cast(); 192 | } 193 | 194 | Eigen::Vector4f mean_B = (sum_mean_B / sum_w).template cast(); 195 | Eigen::Matrix4f cov_B = (sum_cov_B / sum_w).template cast(); 196 | 197 | Eigen::Vector4f transed_mean_A = trans * mean_A; 198 | Eigen::Vector4f d = mean_B - transed_mean_A; 199 | Eigen::Matrix4f RCR = cov_B + trans * cov_A * trans.transpose(); 200 | RCR(3, 3) = 1; 201 | 202 | Eigen::Matrix4f RCR_inv = RCR.inverse(); 203 | Eigen::Vector4f RCRd = RCR_inv * d; 204 | 205 | Eigen::Matrix dtdx0 = Eigen::Matrix::Zero(); 206 | dtdx0.block<3, 3>(0, 0) = skew(transed_mean_A.head<3>()); 207 | dtdx0.block<3, 3>(0, 3) = -Eigen::Matrix3f::Identity(); 208 | 209 | Eigen::Matrix jlossexp = RCR_inv * dtdx0; 210 | 211 | int n = count++; 212 | losses[n] = RCRd.head<3>(); 213 | Js[n] = jlossexp.block<3, 6>(0, 0); 214 | } 215 | 216 | int final_size = count; 217 | Eigen::VectorXf loss = Eigen::Map(losses.front().data(), final_size * 3); 218 | *J = Eigen::Map(Js.front().data(), 6, final_size * 3).transpose(); 219 | 220 | return loss; 221 | } 222 | 223 | template 224 | template 225 | bool FastGICPMultiPoints::calculate_covariances(const pcl::shared_ptr>& cloud, pcl::search::KdTree& kdtree, std::vector>& covariances) { 226 | kdtree.setInputCloud(cloud); 227 | covariances.resize(cloud->size()); 228 | 229 | #pragma omp parallel for num_threads(num_threads_) 230 | for(int i = 0; i < cloud->size(); i++) { 231 | std::vector k_indices; 232 | std::vector k_sq_distances; 233 | kdtree.nearestKSearch(cloud->at(i), k_correspondences_, k_indices, k_sq_distances); 234 | 235 | Eigen::Matrix data(4, k_correspondences_); 236 | 237 | for(int j = 0; j < k_indices.size(); j++) { 238 | data.col(j) = cloud->at(k_indices[j]).getVector4fMap(); 239 | } 240 | 241 | data.colwise() -= data.rowwise().mean().eval(); 242 | Eigen::Matrix4f cov = data * data.transpose(); 243 | 244 | if(regularization_method_ == RegularizationMethod::FROBENIUS) { 245 | double lambda = 1e-3; 246 | Eigen::Matrix3d C = cov.block<3, 3>(0, 0).cast() + lambda * Eigen::Matrix3d::Identity(); 247 | Eigen::Matrix3d C_inv = C.inverse(); 248 | covariances[i].setZero(); 249 | covariances[i].template block<3, 3>(0, 0) = (C_inv / C_inv.norm()).inverse().cast(); 250 | } else { 251 | Eigen::JacobiSVD svd(cov.block<3, 3>(0, 0), Eigen::ComputeFullU | Eigen::ComputeFullV); 252 | Eigen::Vector3f values; 253 | 254 | switch(regularization_method_) { 255 | default: 256 | std::cerr << "here must not be reached" << std::endl; 257 | abort(); 258 | case RegularizationMethod::PLANE: 259 | values = Eigen::Vector3f(1, 1, 1e-3); 260 | break; 261 | case RegularizationMethod::MIN_EIG: 262 | values = svd.singularValues().array().max(1e-3); 263 | break; 264 | case RegularizationMethod::NORMALIZED_MIN_EIG: 265 | values = svd.singularValues() / svd.singularValues().maxCoeff(); 266 | values = values.array().max(1e-3); 267 | break; 268 | } 269 | 270 | covariances[i].setZero(); 271 | covariances[i].template block<3, 3>(0, 0) = svd.matrixU() * values.asDiagonal() * svd.matrixV().transpose(); 272 | } 273 | } 274 | 275 | return true; 276 | } 277 | 278 | } // namespace fast_gicp 279 | 280 | #endif 281 | -------------------------------------------------------------------------------- /include/fast_gicp/gicp/fast_gicp.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_FAST_GICP_HPP 2 | #define FAST_GICP_FAST_GICP_HPP 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace fast_gicp { 15 | 16 | /** 17 | * @brief Fast GICP algorithm optimized for multi threading with OpenMP 18 | */ 19 | template, typename SearchMethodTarget = pcl::search::KdTree> 20 | class FastGICP : public LsqRegistration { 21 | public: 22 | using Scalar = float; 23 | using Matrix4 = typename pcl::Registration::Matrix4; 24 | 25 | using PointCloudSource = typename pcl::Registration::PointCloudSource; 26 | using PointCloudSourcePtr = typename PointCloudSource::Ptr; 27 | using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr; 28 | 29 | using PointCloudTarget = typename pcl::Registration::PointCloudTarget; 30 | using PointCloudTargetPtr = typename PointCloudTarget::Ptr; 31 | using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr; 32 | 33 | #if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0) 34 | using Ptr = pcl::shared_ptr>; 35 | using ConstPtr = pcl::shared_ptr>; 36 | #else 37 | using Ptr = boost::shared_ptr>; 38 | using ConstPtr = boost::shared_ptr>; 39 | #endif 40 | 41 | protected: 42 | using pcl::Registration::reg_name_; 43 | using pcl::Registration::input_; 44 | using pcl::Registration::target_; 45 | using pcl::Registration::corr_dist_threshold_; 46 | 47 | public: 48 | FastGICP(); 49 | virtual ~FastGICP() override; 50 | 51 | void setNumThreads(int n); 52 | void setCorrespondenceRandomness(int k); 53 | void setRegularizationMethod(RegularizationMethod method); 54 | 55 | virtual void swapSourceAndTarget() override; 56 | virtual void clearSource() override; 57 | virtual void clearTarget() override; 58 | 59 | virtual void setInputSource(const PointCloudSourceConstPtr& cloud) override; 60 | virtual void setSourceCovariances(const std::vector>& covs); 61 | virtual void setInputTarget(const PointCloudTargetConstPtr& cloud) override; 62 | virtual void setTargetCovariances(const std::vector>& covs); 63 | 64 | const std::vector>& getSourceCovariances() const { 65 | return source_covs_; 66 | } 67 | 68 | const std::vector>& getTargetCovariances() const { 69 | return target_covs_; 70 | } 71 | 72 | protected: 73 | virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override; 74 | 75 | virtual void update_correspondences(const Eigen::Isometry3d& trans); 76 | 77 | virtual double linearize(const Eigen::Isometry3d& trans, Eigen::Matrix* H, Eigen::Matrix* b) override; 78 | 79 | virtual double compute_error(const Eigen::Isometry3d& trans) override; 80 | 81 | template 82 | bool calculate_covariances(const typename pcl::PointCloud::ConstPtr& cloud, pcl::search::Search& kdtree, std::vector>& covariances); 83 | 84 | protected: 85 | int num_threads_; 86 | int k_correspondences_; 87 | 88 | RegularizationMethod regularization_method_; 89 | 90 | std::shared_ptr search_source_; 91 | std::shared_ptr search_target_; 92 | 93 | std::vector> source_covs_; 94 | std::vector> target_covs_; 95 | 96 | std::vector> mahalanobis_; 97 | 98 | std::vector correspondences_; 99 | std::vector sq_distances_; 100 | }; 101 | } // namespace fast_gicp 102 | 103 | #endif 104 | -------------------------------------------------------------------------------- /include/fast_gicp/gicp/fast_gicp_st.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_FAST_GICP_ST_HPP 2 | #define FAST_GICP_FAST_GICP_ST_HPP 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace fast_gicp { 15 | 16 | /** 17 | * @brief Fast GICP algorithm optimized for single threading 18 | */ 19 | template 20 | class FastGICPSingleThread : public FastGICP { 21 | public: 22 | using Scalar = float; 23 | using Matrix4 = typename pcl::Registration::Matrix4; 24 | 25 | using PointCloudSource = typename pcl::Registration::PointCloudSource; 26 | using PointCloudSourcePtr = typename PointCloudSource::Ptr; 27 | using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr; 28 | 29 | #if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0) 30 | using Ptr = pcl::shared_ptr>; 31 | using ConstPtr = pcl::shared_ptr>; 32 | #else 33 | using Ptr = boost::shared_ptr>; 34 | using ConstPtr = boost::shared_ptr>; 35 | #endif 36 | 37 | protected: 38 | using pcl::Registration::input_; 39 | using pcl::Registration::target_; 40 | 41 | using FastGICP::search_target_; 42 | using FastGICP::correspondences_; 43 | using FastGICP::sq_distances_; 44 | using FastGICP::source_covs_; 45 | using FastGICP::target_covs_; 46 | using FastGICP::mahalanobis_; 47 | 48 | public: 49 | FastGICPSingleThread(); 50 | virtual ~FastGICPSingleThread() override; 51 | 52 | protected: 53 | virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override; 54 | 55 | private: 56 | virtual void update_correspondences(const Eigen::Isometry3d& trans) override; 57 | 58 | virtual double linearize(const Eigen::Isometry3d& trans, Eigen::Matrix* H = nullptr, Eigen::Matrix* b = nullptr) override; 59 | 60 | virtual double compute_error(const Eigen::Isometry3d& trans) override; 61 | 62 | private: 63 | std::vector second_sq_distances_; 64 | std::vector> anchors_; 65 | }; 66 | } // namespace fast_gicp 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /include/fast_gicp/gicp/fast_vgicp.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_FAST_VGICP_HPP 2 | #define FAST_GICP_FAST_VGICP_HPP 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | namespace fast_gicp { 19 | 20 | /** 21 | * @brief Fast Voxelized GICP algorithm boosted with OpenMP 22 | */ 23 | template 24 | class FastVGICP : public FastGICP { 25 | public: 26 | using Scalar = float; 27 | using Matrix4 = typename pcl::Registration::Matrix4; 28 | 29 | using PointCloudSource = typename pcl::Registration::PointCloudSource; 30 | using PointCloudSourcePtr = typename PointCloudSource::Ptr; 31 | using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr; 32 | 33 | using PointCloudTarget = typename pcl::Registration::PointCloudTarget; 34 | using PointCloudTargetPtr = typename PointCloudTarget::Ptr; 35 | using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr; 36 | 37 | #if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0) 38 | using Ptr = pcl::shared_ptr>; 39 | using ConstPtr = pcl::shared_ptr>; 40 | #else 41 | using Ptr = boost::shared_ptr>; 42 | using ConstPtr = boost::shared_ptr>; 43 | #endif 44 | 45 | protected: 46 | using pcl::Registration::input_; 47 | using pcl::Registration::target_; 48 | 49 | using FastGICP::num_threads_; 50 | using FastGICP::search_source_; 51 | using FastGICP::search_target_; 52 | using FastGICP::source_covs_; 53 | using FastGICP::target_covs_; 54 | 55 | public: 56 | FastVGICP(); 57 | virtual ~FastVGICP() override; 58 | 59 | void setResolution(double resolution); 60 | void setVoxelAccumulationMode(VoxelAccumulationMode mode); 61 | void setNeighborSearchMethod(NeighborSearchMethod method); 62 | 63 | virtual void swapSourceAndTarget() override; 64 | virtual void setInputTarget(const PointCloudTargetConstPtr& cloud) override; 65 | 66 | protected: 67 | virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override; 68 | virtual void update_correspondences(const Eigen::Isometry3d& trans) override; 69 | virtual double linearize(const Eigen::Isometry3d& trans, Eigen::Matrix* H = nullptr, Eigen::Matrix* b = nullptr) override; 70 | virtual double compute_error(const Eigen::Isometry3d& trans) override; 71 | 72 | protected: 73 | double voxel_resolution_; 74 | NeighborSearchMethod search_method_; 75 | VoxelAccumulationMode voxel_mode_; 76 | 77 | std::unique_ptr> voxelmap_; 78 | 79 | std::vector> voxel_correspondences_; 80 | std::vector> voxel_mahalanobis_; 81 | }; 82 | } // namespace fast_gicp 83 | 84 | #endif 85 | -------------------------------------------------------------------------------- /include/fast_gicp/gicp/fast_vgicp_cuda.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_FAST_VGICP_CUDA_HPP 2 | #define FAST_GICP_FAST_VGICP_CUDA_HPP 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | namespace fast_gicp { 16 | 17 | namespace cuda { 18 | class FastVGICPCudaCore; 19 | } 20 | 21 | enum class NearestNeighborMethod { CPU_PARALLEL_KDTREE, GPU_BRUTEFORCE, GPU_RBF_KERNEL }; 22 | 23 | /** 24 | * @brief Fast Voxelized GICP algorithm boosted with CUDA 25 | */ 26 | template 27 | class FastVGICPCuda : public LsqRegistration { 28 | public: 29 | using Scalar = float; 30 | using Matrix4 = typename pcl::Registration::Matrix4; 31 | 32 | using PointCloudSource = typename pcl::Registration::PointCloudSource; 33 | using PointCloudSourcePtr = typename PointCloudSource::Ptr; 34 | using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr; 35 | 36 | using PointCloudTarget = typename pcl::Registration::PointCloudTarget; 37 | using PointCloudTargetPtr = typename PointCloudTarget::Ptr; 38 | using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr; 39 | 40 | #if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0) 41 | using Ptr = pcl::shared_ptr>; 42 | using ConstPtr = pcl::shared_ptr>; 43 | #else 44 | using Ptr = boost::shared_ptr>; 45 | using ConstPtr = boost::shared_ptr>; 46 | #endif 47 | 48 | protected: 49 | using pcl::Registration::input_; 50 | using pcl::Registration::target_; 51 | 52 | public: 53 | FastVGICPCuda(); 54 | virtual ~FastVGICPCuda() override; 55 | 56 | void setCorrespondenceRandomness(int k); 57 | void setResolution(double resolution); 58 | void setKernelWidth(double kernel_width, double max_dist = -1.0); 59 | void setRegularizationMethod(RegularizationMethod method); 60 | void setNeighborSearchMethod(NeighborSearchMethod method, double radius = -1.0); 61 | void setNearestNeighborSearchMethod(NearestNeighborMethod method); 62 | 63 | virtual void swapSourceAndTarget() override; 64 | virtual void clearSource() override; 65 | virtual void clearTarget() override; 66 | 67 | virtual void setInputSource(const PointCloudSourceConstPtr& cloud) override; 68 | virtual void setInputTarget(const PointCloudTargetConstPtr& cloud) override; 69 | 70 | protected: 71 | virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override; 72 | virtual double linearize(const Eigen::Isometry3d& trans, Eigen::Matrix* H = nullptr, Eigen::Matrix* b = nullptr) override; 73 | virtual double compute_error(const Eigen::Isometry3d& trans) override; 74 | 75 | template 76 | std::vector find_neighbors_parallel_kdtree(int k, typename pcl::PointCloud::ConstPtr cloud) const; 77 | 78 | private: 79 | int k_correspondences_; 80 | double voxel_resolution_; 81 | RegularizationMethod regularization_method_; 82 | NearestNeighborMethod neighbor_search_method_; 83 | 84 | std::unique_ptr vgicp_cuda_; 85 | }; 86 | 87 | } // namespace fast_gicp 88 | 89 | #endif 90 | -------------------------------------------------------------------------------- /include/fast_gicp/gicp/fast_vgicp_voxel.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_FAST_VGICP_VOXEL_HPP 2 | #define FAST_GICP_FAST_VGICP_VOXEL_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace fast_gicp { 9 | 10 | static std::vector> neighbor_offsets(NeighborSearchMethod search_method) { 11 | switch(search_method) { 12 | // clang-format off 13 | default: 14 | std::cerr << "unsupported neighbor search method" << std::endl; 15 | abort(); 16 | case NeighborSearchMethod::DIRECT1: 17 | return std::vector>{ 18 | Eigen::Vector3i(0, 0, 0) 19 | }; 20 | case NeighborSearchMethod::DIRECT7: 21 | return std::vector>{ 22 | Eigen::Vector3i(0, 0, 0), 23 | Eigen::Vector3i(1, 0, 0), 24 | Eigen::Vector3i(-1, 0, 0), 25 | Eigen::Vector3i(0, 1, 0), 26 | Eigen::Vector3i(0, -1, 0), 27 | Eigen::Vector3i(0, 0, 1), 28 | Eigen::Vector3i(0, 0, -1) 29 | }; 30 | case NeighborSearchMethod::DIRECT27: 31 | break; 32 | // clang-format on 33 | } 34 | 35 | std::vector> offsets27; 36 | for(int i = 0; i < 3; i++) { 37 | for(int j = 0; j < 3; j++) { 38 | for(int k = 0; k < 3; k++) { 39 | offsets27.push_back(Eigen::Vector3i(i - 1, j - 1, k - 1)); 40 | } 41 | } 42 | } 43 | return offsets27; 44 | } 45 | 46 | class Vector3iHash { 47 | public: 48 | size_t operator()(const Eigen::Vector3i& x) const { 49 | size_t seed = 0; 50 | boost::hash_combine(seed, x[0]); 51 | boost::hash_combine(seed, x[1]); 52 | boost::hash_combine(seed, x[2]); 53 | return seed; 54 | } 55 | }; 56 | 57 | struct GaussianVoxel { 58 | public: 59 | EIGEN_MAKE_ALIGNED_OPERATOR_NEW 60 | using Ptr = std::shared_ptr; 61 | 62 | GaussianVoxel() { 63 | num_points = 0; 64 | mean.setZero(); 65 | cov.setZero(); 66 | } 67 | virtual ~GaussianVoxel() {} 68 | 69 | virtual void append(const Eigen::Vector4d& mean_, const Eigen::Matrix4d& cov_) = 0; 70 | 71 | virtual void finalize() = 0; 72 | 73 | public: 74 | int num_points; 75 | Eigen::Vector4d mean; 76 | Eigen::Matrix4d cov; 77 | }; 78 | 79 | struct MultiplicativeGaussianVoxel : GaussianVoxel { 80 | public: 81 | EIGEN_MAKE_ALIGNED_OPERATOR_NEW 82 | 83 | MultiplicativeGaussianVoxel() : GaussianVoxel() {} 84 | virtual ~MultiplicativeGaussianVoxel() {} 85 | 86 | virtual void append(const Eigen::Vector4d& mean_, const Eigen::Matrix4d& cov_) override { 87 | num_points++; 88 | Eigen::Matrix4d cov_inv = cov_; 89 | cov_inv(3, 3) = 1; 90 | cov_inv = cov_inv.inverse().eval(); 91 | 92 | cov += cov_inv; 93 | mean += cov_inv * mean_; 94 | } 95 | 96 | virtual void finalize() override { 97 | cov(3, 3) = 1; 98 | mean[3] = 1; 99 | 100 | cov = cov.inverse().eval(); 101 | mean = (cov * mean).eval(); 102 | } 103 | }; 104 | 105 | struct AdditiveGaussianVoxel : GaussianVoxel { 106 | public: 107 | EIGEN_MAKE_ALIGNED_OPERATOR_NEW 108 | 109 | AdditiveGaussianVoxel() : GaussianVoxel() {} 110 | virtual ~AdditiveGaussianVoxel() {} 111 | 112 | virtual void append(const Eigen::Vector4d& mean_, const Eigen::Matrix4d& cov_) override { 113 | num_points++; 114 | mean += mean_; 115 | cov += cov_; 116 | } 117 | 118 | virtual void finalize() override { 119 | mean /= num_points; 120 | cov /= num_points; 121 | } 122 | }; 123 | 124 | template 125 | class GaussianVoxelMap { 126 | public: 127 | GaussianVoxelMap(double resolution, VoxelAccumulationMode mode) : voxel_resolution_(resolution), voxel_mode_(mode) {} 128 | 129 | void create_voxelmap(const pcl::PointCloud& cloud, const std::vector>& covs) { 130 | voxels_.clear(); 131 | for(int i = 0; i < cloud.size(); i++) { 132 | Eigen::Vector3i coord = voxel_coord(cloud.at(i).getVector4fMap().template cast()); 133 | 134 | auto found = voxels_.find(coord); 135 | if(found == voxels_.end()) { 136 | GaussianVoxel::Ptr voxel; 137 | switch(voxel_mode_) { 138 | case VoxelAccumulationMode::ADDITIVE: 139 | case VoxelAccumulationMode::ADDITIVE_WEIGHTED: 140 | voxel = std::shared_ptr(new AdditiveGaussianVoxel); 141 | break; 142 | case VoxelAccumulationMode::MULTIPLICATIVE: 143 | voxel = std::shared_ptr(new MultiplicativeGaussianVoxel); 144 | break; 145 | } 146 | found = voxels_.insert(found, std::make_pair(coord, voxel)); 147 | } 148 | 149 | auto& voxel = found->second; 150 | voxel->append(cloud.at(i).getVector4fMap().template cast(), covs[i]); 151 | } 152 | 153 | for(auto& voxel : voxels_) { 154 | voxel.second->finalize(); 155 | } 156 | } 157 | 158 | Eigen::Vector3i voxel_coord(const Eigen::Vector4d& x) const { 159 | return (x.array() / voxel_resolution_ - 0.5).floor().template cast().template head<3>(); 160 | } 161 | 162 | Eigen::Vector4d voxel_origin(const Eigen::Vector3i& coord) const { 163 | Eigen::Vector3d origin = (coord.template cast().array() + 0.5) * voxel_resolution_; 164 | return Eigen::Vector4d(origin[0], origin[1], origin[2], 1.0f); 165 | } 166 | 167 | GaussianVoxel::Ptr lookup_voxel(const Eigen::Vector3i& coord) const { 168 | auto found = voxels_.find(coord); 169 | if(found == voxels_.end()) { 170 | return nullptr; 171 | } 172 | 173 | return found->second; 174 | } 175 | 176 | private: 177 | double voxel_resolution_; 178 | VoxelAccumulationMode voxel_mode_; 179 | 180 | using VoxelMap = std::unordered_map, Eigen::aligned_allocator>>; 181 | VoxelMap voxels_; 182 | }; 183 | 184 | } // namespace fast_gicp 185 | 186 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/gicp/gicp_settings.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_GICP_SETTINGS_HPP 2 | #define FAST_GICP_GICP_SETTINGS_HPP 3 | 4 | namespace fast_gicp { 5 | 6 | enum class RegularizationMethod { NONE, MIN_EIG, NORMALIZED_MIN_EIG, PLANE, FROBENIUS }; 7 | 8 | enum class NeighborSearchMethod { DIRECT27, DIRECT7, DIRECT1, /* supported on only VGICP_CUDA */ DIRECT_RADIUS }; 9 | 10 | enum class VoxelAccumulationMode { ADDITIVE, ADDITIVE_WEIGHTED, MULTIPLICATIVE }; 11 | } 12 | 13 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/gicp/impl/fast_gicp_st_impl.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_FAST_GICP_ST_IMPL_HPP 2 | #define FAST_GICP_FAST_GICP_ST_IMPL_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace fast_gicp { 8 | 9 | template 10 | FastGICPSingleThread::FastGICPSingleThread() : FastGICP() { 11 | this->reg_name_ = "FastGICPSingleThread"; 12 | this->num_threads_ = 1; 13 | } 14 | 15 | template 16 | FastGICPSingleThread::~FastGICPSingleThread() {} 17 | 18 | template 19 | void FastGICPSingleThread::computeTransformation(PointCloudSource& output, const Matrix4& guess) { 20 | anchors_.clear(); 21 | FastGICP::computeTransformation(output, guess); 22 | } 23 | 24 | template 25 | void FastGICPSingleThread::update_correspondences(const Eigen::Isometry3d& x) { 26 | assert(source_covs_.size() == input_->size()); 27 | assert(target_covs_.size() == target_->size()); 28 | 29 | Eigen::Isometry3f trans = x.template cast(); 30 | 31 | bool is_first = anchors_.empty(); 32 | 33 | correspondences_.resize(input_->size()); 34 | sq_distances_.resize(input_->size()); 35 | second_sq_distances_.resize(input_->size()); 36 | anchors_.resize(input_->size()); 37 | mahalanobis_.resize(input_->size()); 38 | 39 | std::vector k_indices; 40 | std::vector k_sq_dists; 41 | 42 | for (int i = 0; i < input_->size(); i++) { 43 | PointTarget pt; 44 | pt.getVector4fMap() = trans * input_->at(i).getVector4fMap(); 45 | 46 | if (!is_first) { 47 | double d = (pt.getVector4fMap() - anchors_[i]).norm(); 48 | double max_first = std::sqrt(sq_distances_[i]) + d; 49 | double min_second = std::sqrt(second_sq_distances_[i]) - d; 50 | 51 | if (max_first < min_second) { 52 | continue; 53 | } 54 | } 55 | 56 | search_target_->nearestKSearch(pt, 2, k_indices, k_sq_dists); 57 | 58 | correspondences_[i] = k_sq_dists[0] < this->corr_dist_threshold_ * this->corr_dist_threshold_ ? k_indices[0] : -1; 59 | sq_distances_[i] = k_sq_dists[0]; 60 | second_sq_distances_[i] = k_sq_dists[1]; 61 | anchors_[i] = pt.getVector4fMap(); 62 | 63 | if (correspondences_[i] < 0) { 64 | continue; 65 | } 66 | 67 | const int target_index = correspondences_[i]; 68 | const auto& cov_A = source_covs_[i]; 69 | const auto& cov_B = target_covs_[target_index]; 70 | 71 | Eigen::Matrix4d RCR = cov_B + x.matrix() * cov_A * x.matrix().transpose(); 72 | RCR(3, 3) = 1.0; 73 | 74 | mahalanobis_[i] = RCR.inverse(); 75 | mahalanobis_[i](3, 3) = 0.0; 76 | } 77 | } 78 | 79 | template 80 | double FastGICPSingleThread::linearize(const Eigen::Isometry3d& trans, Eigen::Matrix* H, Eigen::Matrix* b) { 81 | if (H && b) { 82 | update_correspondences(trans); 83 | H->setZero(); 84 | b->setZero(); 85 | } 86 | 87 | double sum_errors = 0.0; 88 | for (int i = 0; i < input_->size(); i++) { 89 | int target_index = correspondences_[i]; 90 | if (target_index < 0) { 91 | continue; 92 | } 93 | 94 | const Eigen::Vector4d mean_A = input_->at(i).getVector4fMap().template cast(); 95 | const auto& cov_A = source_covs_[i]; 96 | 97 | const Eigen::Vector4d mean_B = target_->at(target_index).getVector4fMap().template cast(); 98 | const auto& cov_B = target_covs_[target_index]; 99 | 100 | const Eigen::Vector4d transed_mean_A = trans * mean_A; 101 | const Eigen::Vector4d error = mean_B - transed_mean_A; 102 | 103 | sum_errors += error.transpose() * mahalanobis_[i] * error; 104 | 105 | if (H == nullptr || b == nullptr) { 106 | continue; 107 | } 108 | 109 | Eigen::Matrix dtdx0 = Eigen::Matrix::Zero(); 110 | dtdx0.block<3, 3>(0, 0) = skewd(transed_mean_A.head<3>()); 111 | dtdx0.block<3, 3>(0, 3) = -Eigen::Matrix3d::Identity(); 112 | 113 | Eigen::Matrix jlossexp = dtdx0; 114 | 115 | (*H) += jlossexp.transpose() * mahalanobis_[i] * jlossexp; 116 | (*b) += jlossexp.transpose() * mahalanobis_[i] * error; 117 | } 118 | 119 | return sum_errors; 120 | } 121 | 122 | template 123 | double FastGICPSingleThread::compute_error(const Eigen::Isometry3d& trans) { 124 | return linearize(trans, nullptr, nullptr); 125 | } 126 | 127 | } // namespace fast_gicp 128 | 129 | #endif 130 | -------------------------------------------------------------------------------- /include/fast_gicp/gicp/impl/fast_vgicp_cuda_impl.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_FAST_VGICP_CUDA_IMPL_HPP 2 | #define FAST_GICP_FAST_VGICP_CUDA_IMPL_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | 19 | namespace fast_gicp { 20 | 21 | template 22 | FastVGICPCuda::FastVGICPCuda() : LsqRegistration() { 23 | this->reg_name_ = "FastVGICPCuda"; 24 | k_correspondences_ = 20; 25 | voxel_resolution_ = 1.0; 26 | regularization_method_ = RegularizationMethod::PLANE; 27 | neighbor_search_method_ = NearestNeighborMethod::CPU_PARALLEL_KDTREE; 28 | 29 | vgicp_cuda_.reset(new cuda::FastVGICPCudaCore()); 30 | vgicp_cuda_->set_resolution(voxel_resolution_); 31 | vgicp_cuda_->set_kernel_params(0.5, 3.0); 32 | } 33 | 34 | template 35 | FastVGICPCuda::~FastVGICPCuda() {} 36 | 37 | template 38 | void FastVGICPCuda::setCorrespondenceRandomness(int k) {} 39 | 40 | template 41 | void FastVGICPCuda::setResolution(double resolution) { 42 | vgicp_cuda_->set_resolution(resolution); 43 | } 44 | 45 | template 46 | void FastVGICPCuda::setKernelWidth(double kernel_width, double max_dist) { 47 | if (max_dist <= 0.0) { 48 | max_dist = kernel_width * 5.0; 49 | } 50 | vgicp_cuda_->set_kernel_params(kernel_width, max_dist); 51 | } 52 | 53 | template 54 | void FastVGICPCuda::setRegularizationMethod(RegularizationMethod method) { 55 | regularization_method_ = method; 56 | } 57 | 58 | template 59 | void FastVGICPCuda::setNeighborSearchMethod(NeighborSearchMethod method, double radius) { 60 | vgicp_cuda_->set_neighbor_search_method(method, radius); 61 | } 62 | 63 | template 64 | void FastVGICPCuda::setNearestNeighborSearchMethod(NearestNeighborMethod method) { 65 | neighbor_search_method_ = method; 66 | } 67 | 68 | template 69 | void FastVGICPCuda::swapSourceAndTarget() { 70 | vgicp_cuda_->swap_source_and_target(); 71 | input_.swap(target_); 72 | } 73 | 74 | template 75 | void FastVGICPCuda::clearSource() { 76 | input_.reset(); 77 | } 78 | 79 | template 80 | void FastVGICPCuda::clearTarget() { 81 | target_.reset(); 82 | } 83 | 84 | template 85 | void FastVGICPCuda::setInputSource(const PointCloudSourceConstPtr& cloud) { 86 | // the input cloud is the same as the previous one 87 | if(cloud == input_) { 88 | return; 89 | } 90 | 91 | pcl::Registration::setInputSource(cloud); 92 | 93 | std::vector> points(cloud->size()); 94 | std::transform(cloud->begin(), cloud->end(), points.begin(), [=](const PointSource& pt) { return pt.getVector3fMap(); }); 95 | 96 | vgicp_cuda_->set_source_cloud(points); 97 | switch(neighbor_search_method_) { 98 | case NearestNeighborMethod::CPU_PARALLEL_KDTREE: { 99 | std::vector neighbors = find_neighbors_parallel_kdtree(k_correspondences_, cloud); 100 | vgicp_cuda_->set_source_neighbors(k_correspondences_, neighbors); 101 | vgicp_cuda_->calculate_source_covariances(regularization_method_); 102 | } break; 103 | case NearestNeighborMethod::GPU_BRUTEFORCE: 104 | vgicp_cuda_->find_source_neighbors(k_correspondences_); 105 | vgicp_cuda_->calculate_source_covariances(regularization_method_); 106 | break; 107 | case NearestNeighborMethod::GPU_RBF_KERNEL: 108 | vgicp_cuda_->calculate_source_covariances_rbf(regularization_method_); 109 | break; 110 | } 111 | } 112 | 113 | template 114 | void FastVGICPCuda::setInputTarget(const PointCloudTargetConstPtr& cloud) { 115 | // the input cloud is the same as the previous one 116 | if(cloud == target_) { 117 | return; 118 | } 119 | 120 | pcl::Registration::setInputTarget(cloud); 121 | 122 | std::vector> points(cloud->size()); 123 | std::transform(cloud->begin(), cloud->end(), points.begin(), [=](const PointTarget& pt) { return pt.getVector3fMap(); }); 124 | 125 | vgicp_cuda_->set_target_cloud(points); 126 | switch(neighbor_search_method_) { 127 | case NearestNeighborMethod::CPU_PARALLEL_KDTREE: { 128 | std::vector neighbors = find_neighbors_parallel_kdtree(k_correspondences_, cloud); 129 | vgicp_cuda_->set_target_neighbors(k_correspondences_, neighbors); 130 | vgicp_cuda_->calculate_target_covariances(regularization_method_); 131 | } break; 132 | case NearestNeighborMethod::GPU_BRUTEFORCE: 133 | vgicp_cuda_->find_target_neighbors(k_correspondences_); 134 | vgicp_cuda_->calculate_target_covariances(regularization_method_); 135 | break; 136 | case NearestNeighborMethod::GPU_RBF_KERNEL: 137 | vgicp_cuda_->calculate_target_covariances_rbf(regularization_method_); 138 | break; 139 | } 140 | vgicp_cuda_->create_target_voxelmap(); 141 | } 142 | 143 | template 144 | void FastVGICPCuda::computeTransformation(PointCloudSource& output, const Matrix4& guess) { 145 | vgicp_cuda_->set_resolution(this->voxel_resolution_); 146 | 147 | LsqRegistration::computeTransformation(output, guess); 148 | } 149 | 150 | template 151 | template 152 | std::vector FastVGICPCuda::find_neighbors_parallel_kdtree(int k, typename pcl::PointCloud::ConstPtr cloud) const { 153 | pcl::search::KdTree kdtree; 154 | kdtree.setInputCloud(cloud); 155 | std::vector neighbors(cloud->size() * k); 156 | 157 | #pragma omp parallel for schedule(guided, 8) 158 | for(int i = 0; i < cloud->size(); i++) { 159 | std::vector k_indices; 160 | std::vector k_sq_distances; 161 | kdtree.nearestKSearch(cloud->at(i), k, k_indices, k_sq_distances); 162 | 163 | std::copy(k_indices.begin(), k_indices.end(), neighbors.begin() + i * k); 164 | } 165 | 166 | return neighbors; 167 | } 168 | 169 | template 170 | double FastVGICPCuda::linearize(const Eigen::Isometry3d& trans, Eigen::Matrix* H, Eigen::Matrix* b) { 171 | vgicp_cuda_->update_correspondences(trans); 172 | return vgicp_cuda_->compute_error(trans, H, b); 173 | } 174 | 175 | template 176 | double FastVGICPCuda::compute_error(const Eigen::Isometry3d& trans) { 177 | return vgicp_cuda_->compute_error(trans, nullptr, nullptr); 178 | } 179 | 180 | } // namespace fast_gicp 181 | 182 | #endif 183 | -------------------------------------------------------------------------------- /include/fast_gicp/gicp/impl/fast_vgicp_impl.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_FAST_VGICP_IMPL_HPP 2 | #define FAST_GICP_FAST_VGICP_IMPL_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | namespace fast_gicp { 17 | 18 | template 19 | FastVGICP::FastVGICP() : FastGICP() { 20 | this->reg_name_ = "FastVGICP"; 21 | 22 | voxel_resolution_ = 1.0; 23 | search_method_ = NeighborSearchMethod::DIRECT1; 24 | voxel_mode_ = VoxelAccumulationMode::ADDITIVE; 25 | } 26 | 27 | template 28 | FastVGICP::~FastVGICP() {} 29 | 30 | template 31 | void FastVGICP::setResolution(double resolution) { 32 | voxel_resolution_ = resolution; 33 | } 34 | 35 | template 36 | void FastVGICP::setNeighborSearchMethod(NeighborSearchMethod method) { 37 | search_method_ = method; 38 | } 39 | 40 | template 41 | void FastVGICP::setVoxelAccumulationMode(VoxelAccumulationMode mode) { 42 | voxel_mode_ = mode; 43 | } 44 | 45 | template 46 | void FastVGICP::swapSourceAndTarget() { 47 | input_.swap(target_); 48 | search_source_.swap(search_target_); 49 | source_covs_.swap(target_covs_); 50 | voxelmap_.reset(); 51 | voxel_correspondences_.clear(); 52 | voxel_mahalanobis_.clear(); 53 | } 54 | 55 | template 56 | void FastVGICP::setInputTarget(const PointCloudTargetConstPtr& cloud) { 57 | if (target_ == cloud) { 58 | return; 59 | } 60 | 61 | FastGICP::setInputTarget(cloud); 62 | voxelmap_.reset(); 63 | } 64 | 65 | template 66 | void FastVGICP::computeTransformation(PointCloudSource& output, const Matrix4& guess) { 67 | voxelmap_.reset(); 68 | 69 | FastGICP::computeTransformation(output, guess); 70 | } 71 | 72 | template 73 | void FastVGICP::update_correspondences(const Eigen::Isometry3d& trans) { 74 | voxel_correspondences_.clear(); 75 | auto offsets = neighbor_offsets(search_method_); 76 | 77 | std::vector>> corrs(num_threads_); 78 | for (auto& c : corrs) { 79 | c.reserve((input_->size() * offsets.size()) / num_threads_); 80 | } 81 | 82 | #pragma omp parallel for num_threads(num_threads_) schedule(guided, 8) 83 | for (int i = 0; i < input_->size(); i++) { 84 | const Eigen::Vector4d mean_A = input_->at(i).getVector4fMap().template cast(); 85 | Eigen::Vector4d transed_mean_A = trans * mean_A; 86 | Eigen::Vector3i coord = voxelmap_->voxel_coord(transed_mean_A); 87 | 88 | for (const auto& offset : offsets) { 89 | auto voxel = voxelmap_->lookup_voxel(coord + offset); 90 | if (voxel != nullptr) { 91 | corrs[omp_get_thread_num()].push_back(std::make_pair(i, voxel)); 92 | } 93 | } 94 | } 95 | 96 | voxel_correspondences_.reserve(input_->size() * offsets.size()); 97 | for (const auto& c : corrs) { 98 | voxel_correspondences_.insert(voxel_correspondences_.end(), c.begin(), c.end()); 99 | } 100 | 101 | // precompute combined covariances 102 | voxel_mahalanobis_.resize(voxel_correspondences_.size()); 103 | 104 | #pragma omp parallel for num_threads(num_threads_) schedule(guided, 8) 105 | for (int i = 0; i < voxel_correspondences_.size(); i++) { 106 | const auto& corr = voxel_correspondences_[i]; 107 | const auto& cov_A = source_covs_[corr.first]; 108 | const auto& cov_B = corr.second->cov; 109 | 110 | Eigen::Matrix4d RCR = cov_B + trans.matrix() * cov_A * trans.matrix().transpose(); 111 | RCR(3, 3) = 1.0; 112 | 113 | voxel_mahalanobis_[i] = RCR.inverse(); 114 | voxel_mahalanobis_[i](3, 3) = 0.0; 115 | } 116 | } 117 | 118 | template 119 | double FastVGICP::linearize(const Eigen::Isometry3d& trans, Eigen::Matrix* H, Eigen::Matrix* b) { 120 | if (voxelmap_ == nullptr) { 121 | voxelmap_.reset(new GaussianVoxelMap(voxel_resolution_, voxel_mode_)); 122 | voxelmap_->create_voxelmap(*target_, target_covs_); 123 | } 124 | 125 | update_correspondences(trans); 126 | 127 | double sum_errors = 0.0; 128 | std::vector, Eigen::aligned_allocator>> Hs(num_threads_); 129 | std::vector, Eigen::aligned_allocator>> bs(num_threads_); 130 | for (int i = 0; i < num_threads_; i++) { 131 | Hs[i].setZero(); 132 | bs[i].setZero(); 133 | } 134 | 135 | #pragma omp parallel for num_threads(num_threads_) reduction(+ : sum_errors) schedule(guided, 8) 136 | for (int i = 0; i < voxel_correspondences_.size(); i++) { 137 | const auto& corr = voxel_correspondences_[i]; 138 | auto target_voxel = corr.second; 139 | 140 | const Eigen::Vector4d mean_A = input_->at(corr.first).getVector4fMap().template cast(); 141 | const auto& cov_A = source_covs_[corr.first]; 142 | 143 | const Eigen::Vector4d mean_B = corr.second->mean; 144 | const auto& cov_B = corr.second->cov; 145 | 146 | const Eigen::Vector4d transed_mean_A = trans * mean_A; 147 | const Eigen::Vector4d error = mean_B - transed_mean_A; 148 | 149 | double w = std::sqrt(target_voxel->num_points); 150 | sum_errors += w * error.transpose() * voxel_mahalanobis_[i] * error; 151 | 152 | if (H == nullptr || b == nullptr) { 153 | continue; 154 | } 155 | 156 | Eigen::Matrix dtdx0 = Eigen::Matrix::Zero(); 157 | dtdx0.block<3, 3>(0, 0) = skewd(transed_mean_A.head<3>()); 158 | dtdx0.block<3, 3>(0, 3) = -Eigen::Matrix3d::Identity(); 159 | 160 | Eigen::Matrix jlossexp = dtdx0; 161 | 162 | Eigen::Matrix Hi = w * jlossexp.transpose() * voxel_mahalanobis_[i] * jlossexp; 163 | Eigen::Matrix bi = w * jlossexp.transpose() * voxel_mahalanobis_[i] * error; 164 | 165 | int thread_num = omp_get_thread_num(); 166 | Hs[thread_num] += Hi; 167 | bs[thread_num] += bi; 168 | } 169 | 170 | if (H && b) { 171 | H->setZero(); 172 | b->setZero(); 173 | for (int i = 0; i < num_threads_; i++) { 174 | (*H) += Hs[i]; 175 | (*b) += bs[i]; 176 | } 177 | } 178 | 179 | return sum_errors; 180 | } 181 | 182 | template 183 | double FastVGICP::compute_error(const Eigen::Isometry3d& trans) { 184 | double sum_errors = 0.0; 185 | #pragma omp parallel for num_threads(num_threads_) reduction(+ : sum_errors) 186 | for (int i = 0; i < voxel_correspondences_.size(); i++) { 187 | const auto& corr = voxel_correspondences_[i]; 188 | auto target_voxel = corr.second; 189 | 190 | const Eigen::Vector4d mean_A = input_->at(corr.first).getVector4fMap().template cast(); 191 | const auto& cov_A = source_covs_[corr.first]; 192 | 193 | const Eigen::Vector4d mean_B = corr.second->mean; 194 | const auto& cov_B = corr.second->cov; 195 | 196 | const Eigen::Vector4d transed_mean_A = trans * mean_A; 197 | const Eigen::Vector4d error = mean_B - transed_mean_A; 198 | 199 | double w = std::sqrt(target_voxel->num_points); 200 | sum_errors += w * error.transpose() * voxel_mahalanobis_[i] * error; 201 | } 202 | 203 | return sum_errors; 204 | } 205 | 206 | } // namespace fast_gicp 207 | 208 | #endif 209 | -------------------------------------------------------------------------------- /include/fast_gicp/gicp/impl/lsq_registration_impl.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | namespace fast_gicp { 7 | 8 | template 9 | LsqRegistration::LsqRegistration() { 10 | this->reg_name_ = "LsqRegistration"; 11 | max_iterations_ = 64; 12 | rotation_epsilon_ = 2e-3; 13 | transformation_epsilon_ = 5e-4; 14 | 15 | lsq_optimizer_type_ = LSQ_OPTIMIZER_TYPE::LevenbergMarquardt; 16 | lm_debug_print_ = false; 17 | lm_max_iterations_ = 10; 18 | lm_init_lambda_factor_ = 1e-9; 19 | lm_lambda_ = -1.0; 20 | 21 | final_hessian_.setIdentity(); 22 | } 23 | 24 | template 25 | LsqRegistration::~LsqRegistration() {} 26 | 27 | template 28 | void LsqRegistration::setRotationEpsilon(double eps) { 29 | rotation_epsilon_ = eps; 30 | } 31 | 32 | template 33 | void LsqRegistration::setInitialLambdaFactor(double init_lambda_factor) { 34 | lm_init_lambda_factor_ = init_lambda_factor; 35 | } 36 | 37 | template 38 | void LsqRegistration::setDebugPrint(bool lm_debug_print) { 39 | lm_debug_print_ = lm_debug_print; 40 | } 41 | 42 | template 43 | const Eigen::Matrix& LsqRegistration::getFinalHessian() const { 44 | return final_hessian_; 45 | } 46 | 47 | template 48 | double LsqRegistration::evaluateCost(const Eigen::Matrix4f& relative_pose, Eigen::Matrix* H, Eigen::Matrix* b) { 49 | return this->linearize(Eigen::Isometry3f(relative_pose).cast(), H, b); 50 | } 51 | 52 | template 53 | void LsqRegistration::computeTransformation(PointCloudSource& output, const Matrix4& guess) { 54 | Eigen::Isometry3d x0 = Eigen::Isometry3d(guess.template cast()); 55 | 56 | lm_lambda_ = -1.0; 57 | converged_ = false; 58 | 59 | if (lm_debug_print_) { 60 | std::cout << "********************************************" << std::endl; 61 | std::cout << "***************** optimize *****************" << std::endl; 62 | std::cout << "********************************************" << std::endl; 63 | } 64 | 65 | for (int i = 0; i < max_iterations_ && !converged_; i++) { 66 | nr_iterations_ = i; 67 | 68 | Eigen::Isometry3d delta; 69 | if (!step_optimize(x0, delta)) { 70 | std::cerr << "lm not converged!!" << std::endl; 71 | break; 72 | } 73 | 74 | converged_ = is_converged(delta); 75 | } 76 | 77 | final_transformation_ = x0.cast().matrix(); 78 | pcl::transformPointCloud(*input_, output, final_transformation_); 79 | } 80 | 81 | template 82 | bool LsqRegistration::is_converged(const Eigen::Isometry3d& delta) const { 83 | double accum = 0.0; 84 | Eigen::Matrix3d R = delta.linear() - Eigen::Matrix3d::Identity(); 85 | Eigen::Vector3d t = delta.translation(); 86 | 87 | Eigen::Matrix3d r_delta = 1.0 / rotation_epsilon_ * R.array().abs(); 88 | Eigen::Vector3d t_delta = 1.0 / transformation_epsilon_ * t.array().abs(); 89 | 90 | return std::max(r_delta.maxCoeff(), t_delta.maxCoeff()) < 1; 91 | } 92 | 93 | template 94 | bool LsqRegistration::step_optimize(Eigen::Isometry3d& x0, Eigen::Isometry3d& delta) { 95 | switch (lsq_optimizer_type_) { 96 | case LSQ_OPTIMIZER_TYPE::LevenbergMarquardt: 97 | return step_lm(x0, delta); 98 | case LSQ_OPTIMIZER_TYPE::GaussNewton: 99 | return step_gn(x0, delta); 100 | } 101 | 102 | return step_lm(x0, delta); 103 | } 104 | 105 | template 106 | bool LsqRegistration::step_gn(Eigen::Isometry3d& x0, Eigen::Isometry3d& delta) { 107 | Eigen::Matrix H; 108 | Eigen::Matrix b; 109 | double y0 = linearize(x0, &H, &b); 110 | 111 | Eigen::LDLT> solver(H); 112 | Eigen::Matrix d = solver.solve(-b); 113 | 114 | delta = se3_exp(d); 115 | 116 | x0 = delta * x0; 117 | final_hessian_ = H; 118 | 119 | return true; 120 | } 121 | 122 | template 123 | bool LsqRegistration::step_lm(Eigen::Isometry3d& x0, Eigen::Isometry3d& delta) { 124 | Eigen::Matrix H; 125 | Eigen::Matrix b; 126 | double y0 = linearize(x0, &H, &b); 127 | 128 | if (lm_lambda_ < 0.0) { 129 | lm_lambda_ = lm_init_lambda_factor_ * H.diagonal().array().abs().maxCoeff(); 130 | } 131 | 132 | double nu = 2.0; 133 | for (int i = 0; i < lm_max_iterations_; i++) { 134 | Eigen::LDLT> solver(H + lm_lambda_ * Eigen::Matrix::Identity()); 135 | Eigen::Matrix d = solver.solve(-b); 136 | 137 | delta = se3_exp(d); 138 | 139 | Eigen::Isometry3d xi = delta * x0; 140 | double yi = compute_error(xi); 141 | double rho = (y0 - yi) / (d.dot(lm_lambda_ * d - b)); 142 | 143 | if (lm_debug_print_) { 144 | if (i == 0) { 145 | std::cout << boost::format("--- LM optimization ---\n%5s %15s %15s %15s %15s %15s %5s\n") % "i" % "y0" % "yi" % "rho" % "lambda" % "|delta|" % "dec"; 146 | } 147 | char dec = rho > 0.0 ? 'x' : ' '; 148 | std::cout << boost::format("%5d %15g %15g %15g %15g %15g %5c") % i % y0 % yi % rho % lm_lambda_ % d.norm() % dec << std::endl; 149 | } 150 | 151 | if (rho < 0) { 152 | if (is_converged(delta)) { 153 | return true; 154 | } 155 | 156 | lm_lambda_ = nu * lm_lambda_; 157 | nu = 2 * nu; 158 | continue; 159 | } 160 | 161 | x0 = xi; 162 | lm_lambda_ = lm_lambda_ * std::max(1.0 / 3.0, 1 - std::pow(2 * rho - 1, 3)); 163 | final_hessian_ = H; 164 | return true; 165 | } 166 | 167 | return false; 168 | } 169 | 170 | } // namespace fast_gicp -------------------------------------------------------------------------------- /include/fast_gicp/gicp/lsq_registration.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_LSQ_REGISTRATION_HPP 2 | #define FAST_GICP_LSQ_REGISTRATION_HPP 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace fast_gicp { 12 | 13 | enum class LSQ_OPTIMIZER_TYPE { GaussNewton, LevenbergMarquardt }; 14 | 15 | template 16 | class LsqRegistration : public pcl::Registration { 17 | public: 18 | using Scalar = float; 19 | using Matrix4 = typename pcl::Registration::Matrix4; 20 | 21 | using PointCloudSource = typename pcl::Registration::PointCloudSource; 22 | using PointCloudSourcePtr = typename PointCloudSource::Ptr; 23 | using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr; 24 | 25 | using PointCloudTarget = typename pcl::Registration::PointCloudTarget; 26 | using PointCloudTargetPtr = typename PointCloudTarget::Ptr; 27 | using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr; 28 | 29 | #if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0) 30 | using Ptr = pcl::shared_ptr>; 31 | using ConstPtr = pcl::shared_ptr>; 32 | #else 33 | using Ptr = boost::shared_ptr>; 34 | using ConstPtr = boost::shared_ptr>; 35 | #endif 36 | 37 | protected: 38 | using pcl::Registration::input_; 39 | using pcl::Registration::nr_iterations_; 40 | using pcl::Registration::max_iterations_; 41 | using pcl::Registration::final_transformation_; 42 | using pcl::Registration::transformation_epsilon_; 43 | using pcl::Registration::converged_; 44 | 45 | public: 46 | EIGEN_MAKE_ALIGNED_OPERATOR_NEW 47 | 48 | LsqRegistration(); 49 | virtual ~LsqRegistration(); 50 | 51 | void setRotationEpsilon(double eps); 52 | void setInitialLambdaFactor(double init_lambda_factor); 53 | void setDebugPrint(bool lm_debug_print); 54 | 55 | const Eigen::Matrix& getFinalHessian() const; 56 | 57 | double evaluateCost(const Eigen::Matrix4f& relative_pose, Eigen::Matrix* H = nullptr, Eigen::Matrix* b = nullptr); 58 | 59 | virtual void swapSourceAndTarget() {} 60 | virtual void clearSource() {} 61 | virtual void clearTarget() {} 62 | 63 | protected: 64 | virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override; 65 | 66 | bool is_converged(const Eigen::Isometry3d& delta) const; 67 | 68 | virtual double linearize(const Eigen::Isometry3d& trans, Eigen::Matrix* H = nullptr, Eigen::Matrix* b = nullptr) = 0; 69 | virtual double compute_error(const Eigen::Isometry3d& trans) = 0; 70 | 71 | bool step_optimize(Eigen::Isometry3d& x0, Eigen::Isometry3d& delta); 72 | bool step_gn(Eigen::Isometry3d& x0, Eigen::Isometry3d& delta); 73 | bool step_lm(Eigen::Isometry3d& x0, Eigen::Isometry3d& delta); 74 | 75 | protected: 76 | double rotation_epsilon_; 77 | 78 | LSQ_OPTIMIZER_TYPE lsq_optimizer_type_; 79 | int lm_max_iterations_; 80 | double lm_init_lambda_factor_; 81 | double lm_lambda_; 82 | bool lm_debug_print_; 83 | 84 | Eigen::Matrix final_hessian_; 85 | }; 86 | } // namespace fast_gicp 87 | 88 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/ndt/impl/ndt_cuda_impl.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_NDT_CUDA_IMPL_HPP 2 | #define FAST_GICP_NDT_CUDA_IMPL_HPP 3 | 4 | #include 5 | 6 | #include 7 | 8 | namespace fast_gicp { 9 | 10 | template 11 | NDTCuda::NDTCuda() : LsqRegistration() { 12 | this->reg_name_ = "NDTCuda"; 13 | ndt_cuda_.reset(new cuda::NDTCudaCore()); 14 | } 15 | 16 | template 17 | NDTCuda::~NDTCuda() {} 18 | 19 | template 20 | void NDTCuda::setDistanceMode(NDTDistanceMode mode) { 21 | ndt_cuda_->set_distance_mode(mode); 22 | } 23 | 24 | template 25 | void NDTCuda::setResolution(double resolution) { 26 | ndt_cuda_->set_resolution(resolution); 27 | } 28 | 29 | template 30 | void NDTCuda::setNeighborSearchMethod(NeighborSearchMethod method, double radius) { 31 | ndt_cuda_->set_neighbor_search_method(method, radius); 32 | } 33 | 34 | template 35 | void NDTCuda::swapSourceAndTarget() { 36 | ndt_cuda_->swap_source_and_target(); 37 | input_.swap(target_); 38 | } 39 | 40 | template 41 | void NDTCuda::clearSource() { 42 | input_.reset(); 43 | } 44 | 45 | template 46 | void NDTCuda::clearTarget() { 47 | target_.reset(); 48 | } 49 | 50 | template 51 | void NDTCuda::setInputSource(const PointCloudSourceConstPtr& cloud) { 52 | if (cloud == input_) { 53 | return; 54 | } 55 | pcl::Registration::setInputSource(cloud); 56 | 57 | std::vector> points(cloud->size()); 58 | std::transform(cloud->begin(), cloud->end(), points.begin(), [=](const PointSource& pt) { return pt.getVector3fMap(); }); 59 | ndt_cuda_->set_source_cloud(points); 60 | } 61 | 62 | template 63 | void NDTCuda::setInputTarget(const PointCloudTargetConstPtr& cloud) { 64 | if (cloud == target_) { 65 | return; 66 | } 67 | 68 | pcl::Registration::setInputTarget(cloud); 69 | 70 | std::vector> points(cloud->size()); 71 | std::transform(cloud->begin(), cloud->end(), points.begin(), [=](const PointTarget& pt) { return pt.getVector3fMap(); }); 72 | ndt_cuda_->set_target_cloud(points); 73 | } 74 | 75 | template 76 | void NDTCuda::computeTransformation(PointCloudSource& output, const Matrix4& guess) { 77 | ndt_cuda_->create_voxelmaps(); 78 | LsqRegistration::computeTransformation(output, guess); 79 | } 80 | 81 | template 82 | double NDTCuda::linearize(const Eigen::Isometry3d& trans, Eigen::Matrix* H, Eigen::Matrix* b) { 83 | ndt_cuda_->update_correspondences(trans); 84 | return ndt_cuda_->compute_error(trans, H, b); 85 | } 86 | 87 | template 88 | double NDTCuda::compute_error(const Eigen::Isometry3d& trans) { 89 | return ndt_cuda_->compute_error(trans, nullptr, nullptr); 90 | } 91 | 92 | } // namespace fast_gicp 93 | 94 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/ndt/ndt_cuda.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_NDT_CUDA_HPP 2 | #define FAST_GICP_NDT_CUDA_HPP 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace fast_gicp { 16 | 17 | namespace cuda { 18 | class NDTCudaCore; 19 | } 20 | 21 | template 22 | class NDTCuda : public LsqRegistration { 23 | public: 24 | using Scalar = float; 25 | using Matrix4 = typename pcl::Registration::Matrix4; 26 | 27 | using PointCloudSource = typename pcl::Registration::PointCloudSource; 28 | using PointCloudSourcePtr = typename PointCloudSource::Ptr; 29 | using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr; 30 | 31 | using PointCloudTarget = typename pcl::Registration::PointCloudTarget; 32 | using PointCloudTargetPtr = typename PointCloudTarget::Ptr; 33 | using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr; 34 | 35 | #if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0) 36 | using Ptr = pcl::shared_ptr>; 37 | using ConstPtr = pcl::shared_ptr>; 38 | #else 39 | using Ptr = boost::shared_ptr>; 40 | using ConstPtr = boost::shared_ptr>; 41 | #endif 42 | 43 | protected: 44 | using pcl::Registration::reg_name_; 45 | using pcl::Registration::input_; 46 | using pcl::Registration::target_; 47 | using pcl::Registration::corr_dist_threshold_; 48 | 49 | public: 50 | NDTCuda(); 51 | virtual ~NDTCuda() override; 52 | 53 | void setDistanceMode(NDTDistanceMode mode); 54 | void setResolution(double resolution); 55 | void setNeighborSearchMethod(NeighborSearchMethod method, double radius = -1.0); 56 | 57 | virtual void swapSourceAndTarget() override; 58 | virtual void clearSource() override; 59 | virtual void clearTarget() override; 60 | 61 | virtual void setInputSource(const PointCloudSourceConstPtr& cloud) override; 62 | virtual void setInputTarget(const PointCloudTargetConstPtr& cloud) override; 63 | 64 | protected: 65 | virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override; 66 | virtual double linearize(const Eigen::Isometry3d& trans, Eigen::Matrix* H, Eigen::Matrix* b) override; 67 | virtual double compute_error(const Eigen::Isometry3d& trans) override; 68 | 69 | protected: 70 | std::unique_ptr ndt_cuda_; 71 | }; 72 | } // namespace fast_gicp 73 | 74 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/ndt/ndt_settings.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_NDT_SETTINGS_HPP 2 | #define FAST_GICP_NDT_SETTINGS_HPP 3 | 4 | namespace fast_gicp { 5 | 6 | enum class NDTDistanceMode { P2D, D2D }; 7 | 8 | } // namespace fast_gicp 9 | 10 | #endif -------------------------------------------------------------------------------- /include/fast_gicp/so3/so3.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FAST_GICP_SO3_HPP 2 | #define FAST_GICP_SO3_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace fast_gicp { 8 | 9 | inline Eigen::Matrix3f skew(const Eigen::Vector3f& x) { 10 | Eigen::Matrix3f skew = Eigen::Matrix3f::Zero(); 11 | skew(0, 1) = -x[2]; 12 | skew(0, 2) = x[1]; 13 | skew(1, 0) = x[2]; 14 | skew(1, 2) = -x[0]; 15 | skew(2, 0) = -x[1]; 16 | skew(2, 1) = x[0]; 17 | 18 | return skew; 19 | } 20 | 21 | inline Eigen::Matrix3d skewd(const Eigen::Vector3d& x) { 22 | Eigen::Matrix3d skew = Eigen::Matrix3d::Zero(); 23 | skew(0, 1) = -x[2]; 24 | skew(0, 2) = x[1]; 25 | skew(1, 0) = x[2]; 26 | skew(1, 2) = -x[0]; 27 | skew(2, 0) = -x[1]; 28 | skew(2, 1) = x[0]; 29 | 30 | return skew; 31 | } 32 | 33 | /* 34 | * SO3 expmap code taken from Sophus 35 | * https://github.com/strasdat/Sophus/blob/593db47500ea1a2de5f0e6579c86147991509c59/sophus/so3.hpp#L585 36 | * 37 | * Copyright 2011-2017 Hauke Strasdat 38 | * 2012-2017 Steven Lovegrove 39 | * 40 | * Permission is hereby granted, free of charge, to any person obtaining a copy 41 | * of this software and associated documentation files (the "Software"), to 42 | * deal in the Software without restriction, including without limitation the 43 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 44 | * sell copies of the Software, and to permit persons to whom the Software is 45 | * furnished to do so, subject to the following conditions: 46 | * 47 | * The above copyright notice and this permission notice shall be included in 48 | * all copies or substantial portions of the Software. 49 | * 50 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 51 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 52 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 53 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 54 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 55 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 56 | * IN THE SOFTWARE. 57 | */ 58 | inline Eigen::Quaterniond so3_exp(const Eigen::Vector3d& omega) { 59 | double theta_sq = omega.dot(omega); 60 | 61 | double theta; 62 | double imag_factor; 63 | double real_factor; 64 | if (theta_sq < 1e-10) { 65 | theta = 0; 66 | double theta_quad = theta_sq * theta_sq; 67 | imag_factor = 0.5 - 1.0 / 48.0 * theta_sq + 1.0 / 3840.0 * theta_quad; 68 | real_factor = 1.0 - 1.0 / 8.0 * theta_sq + 1.0 / 384.0 * theta_quad; 69 | } else { 70 | theta = std::sqrt(theta_sq); 71 | double half_theta = 0.5 * theta; 72 | imag_factor = std::sin(half_theta) / theta; 73 | real_factor = std::cos(half_theta); 74 | } 75 | 76 | return Eigen::Quaterniond(real_factor, imag_factor * omega.x(), imag_factor * omega.y(), imag_factor * omega.z()); 77 | } 78 | 79 | // Rotation-first 80 | inline Eigen::Isometry3d se3_exp(const Eigen::Matrix& a) { 81 | using std::cos; 82 | using std::sin; 83 | const Eigen::Vector3d omega = a.head<3>(); 84 | 85 | double theta = std::sqrt(omega.dot(omega)); 86 | const Eigen::Quaterniond so3 = so3_exp(omega); 87 | const Eigen::Matrix3d Omega = skewd(omega); 88 | const Eigen::Matrix3d Omega_sq = Omega * Omega; 89 | Eigen::Matrix3d V; 90 | 91 | if (theta < 1e-10) { 92 | V = so3.matrix(); 93 | /// Note: That is an accurate expansion! 94 | } else { 95 | const double theta_sq = theta * theta; 96 | V = (Eigen::Matrix3d::Identity() + (1.0 - cos(theta)) / (theta_sq)*Omega + (theta - sin(theta)) / (theta_sq * theta) * Omega_sq); 97 | } 98 | 99 | Eigen::Isometry3d se3 = Eigen::Isometry3d::Identity(); 100 | se3.linear() = so3.toRotationMatrix(); 101 | se3.translation() = V * a.tail<3>(); 102 | 103 | return se3; 104 | } 105 | 106 | } // namespace fast_gicp 107 | 108 | #endif -------------------------------------------------------------------------------- /package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | fast_gicp 5 | 0.0.0 6 | A collection of fast point cloud registration implementations 7 | 8 | k.koide 9 | 10 | BSD 11 | https://github.com/SMRT-AIST/fast_gicp 12 | https://github.com/SMRT-AIST/fast_gicp 13 | https://github.com/SMRT-AIST/fast_gicp/issues 14 | 15 | catkin 16 | ament_cmake 17 | 18 | ros_environment 19 | 20 | libpcl-all-dev 21 | eigen 22 | 23 | 24 | catkin 25 | ament_cmake 26 | 27 | 28 | -------------------------------------------------------------------------------- /scripts/runtest_cuda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker build --tag focal_cuda -f docker/focal_cuda/Dockerfile . 3 | docker run --gpus all -it --rm focal_cuda /root/fast_gicp/build/gicp_test /root/fast_gicp/data 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import sys 4 | import glob 5 | import subprocess 6 | 7 | from setuptools import setup, Extension 8 | from setuptools.command.build_ext import build_ext 9 | 10 | # Convert distutils Windows platform specifiers to CMake -A arguments 11 | PLAT_TO_CMAKE = { 12 | "win32": "Win32", 13 | "win-amd64": "x64", 14 | "win-arm32": "ARM", 15 | "win-arm64": "ARM64", 16 | } 17 | 18 | 19 | # A CMakeExtension needs a sourcedir instead of a file list. 20 | # The name must be the _single_ output extension from the CMake build. 21 | # If you need multiple extensions, see scikit-build. 22 | class CMakeExtension(Extension): 23 | def __init__(self, name, sourcedir=""): 24 | Extension.__init__(self, name, sources=[]) 25 | self.sourcedir = os.path.abspath(sourcedir) 26 | 27 | 28 | class CMakeBuild(build_ext): 29 | def build_extension(self, ext): 30 | extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) 31 | 32 | # required for auto-detection of auxiliary "native" libs 33 | if not extdir.endswith(os.path.sep): 34 | extdir += os.path.sep 35 | 36 | # cfg = "Debug" if self.debug else "Release" 37 | cfg = "Release" 38 | 39 | # CMake lets you override the generator - we need to check this. 40 | # Can be set with Conda-Build, for example. 41 | cmake_generator = os.environ.get("CMAKE_GENERATOR", "") 42 | 43 | # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON 44 | # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code 45 | # from Python. 46 | cmake_args = [ 47 | "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}".format(extdir), 48 | "-DPYTHON_EXECUTABLE={}".format(sys.executable), 49 | "-DEXAMPLE_VERSION_INFO={}".format(self.distribution.get_version()), 50 | "-DCMAKE_BUILD_TYPE={}".format(cfg), # not used on MSVC, but no harm, 51 | # "-DBUILD_VGICP_CUDA=ON", 52 | "-DBUILD_PYTHON_BINDINGS=ON", 53 | "-DBUILD_apps=OFF", 54 | ] 55 | build_args = [] 56 | 57 | if self.compiler.compiler_type != "msvc": 58 | # Using Ninja-build since it a) is available as a wheel and b) 59 | # multithreads automatically. MSVC would require all variables be 60 | # exported for Ninja to pick it up, which is a little tricky to do. 61 | # Users can override the generator with CMAKE_GENERATOR in CMake 62 | # 3.15+. 63 | if not cmake_generator: 64 | pass 65 | # cmake_args += ["-GNinja"] 66 | 67 | else: 68 | # Single config generators are handled "normally" 69 | single_config = any(x in cmake_generator for x in {"NMake", "Ninja"}) 70 | 71 | # CMake allows an arch-in-generator style for backward compatibility 72 | contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"}) 73 | 74 | # Specify the arch if using MSVC generator, but only if it doesn't 75 | # contain a backward-compatibility arch spec already in the 76 | # generator name. 77 | if not single_config and not contains_arch: 78 | cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]] 79 | 80 | # Multi-config generators have a different way to specify configs 81 | if not single_config: 82 | cmake_args += [ 83 | "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir) 84 | ] 85 | build_args += ["--config", cfg] 86 | 87 | # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level 88 | # across all generators. 89 | if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ: 90 | # self.parallel is a Python 3 only way to set parallel jobs by hand 91 | # using -j in the build_ext call, not supported by pip or PyPA-build. 92 | if hasattr(self, "parallel") and self.parallel: 93 | # CMake 3.12+ only. 94 | build_args += ["-j{}".format(self.parallel)] 95 | 96 | if not os.path.exists(self.build_temp): 97 | os.makedirs(self.build_temp) 98 | 99 | subprocess.check_call( 100 | ["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp 101 | ) 102 | subprocess.check_call( 103 | ["cmake", "--build", "."] + build_args, cwd=self.build_temp 104 | ) 105 | 106 | # The information here can also be placed in setup.cfg - better separation of 107 | # logic and declaration, and simpler if you include description/version in a file. 108 | setup( 109 | name="pygicp", 110 | version="0.0.1", 111 | author="k.koide", 112 | author_email="k.koide@aist.go.jp", 113 | description="A collection of GICP-based point cloud registration algorithms", 114 | long_description="", 115 | ext_modules=[CMakeExtension("pygicp")], 116 | cmdclass={"build_ext": CMakeBuild}, 117 | zip_safe=False, 118 | ) 119 | -------------------------------------------------------------------------------- /src/align.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #ifdef USE_VGICP_CUDA 17 | #include 18 | #include 19 | #endif 20 | 21 | // benchmark for PCL's registration methods 22 | template 23 | void test_pcl(Registration& reg, const pcl::PointCloud::ConstPtr& target, const pcl::PointCloud::ConstPtr& source) { 24 | pcl::PointCloud::Ptr aligned(new pcl::PointCloud); 25 | 26 | double fitness_score = 0.0; 27 | 28 | // single run 29 | auto t1 = std::chrono::high_resolution_clock::now(); 30 | reg.setInputTarget(target); 31 | reg.setInputSource(source); 32 | reg.align(*aligned); 33 | auto t2 = std::chrono::high_resolution_clock::now(); 34 | fitness_score = reg.getFitnessScore(); 35 | double single = std::chrono::duration_cast(t2 - t1).count() / 1e6; 36 | std::cout << "single:" << single << "[msec] " << std::flush; 37 | 38 | // 100 times 39 | t1 = std::chrono::high_resolution_clock::now(); 40 | for (int i = 0; i < 100; i++) { 41 | reg.setInputTarget(target); 42 | reg.setInputSource(source); 43 | reg.align(*aligned); 44 | } 45 | t2 = std::chrono::high_resolution_clock::now(); 46 | double multi = std::chrono::duration_cast(t2 - t1).count() / 1e6; 47 | std::cout << "100times:" << multi << "[msec] fitness_score:" << fitness_score << std::endl; 48 | } 49 | 50 | // benchmark for fast_gicp registration methods 51 | template 52 | void test(Registration& reg, const pcl::PointCloud::ConstPtr& target, const pcl::PointCloud::ConstPtr& source) { 53 | pcl::PointCloud::Ptr aligned(new pcl::PointCloud); 54 | 55 | double fitness_score = 0.0; 56 | 57 | // single run 58 | auto t1 = std::chrono::high_resolution_clock::now(); 59 | // fast_gicp reuses calculated covariances if an input cloud is the same as the previous one 60 | // to prevent this for benchmarking, force clear source and target clouds 61 | reg.clearTarget(); 62 | reg.clearSource(); 63 | reg.setInputTarget(target); 64 | reg.setInputSource(source); 65 | reg.align(*aligned); 66 | auto t2 = std::chrono::high_resolution_clock::now(); 67 | fitness_score = reg.getFitnessScore(); 68 | double single = std::chrono::duration_cast(t2 - t1).count() / 1e6; 69 | 70 | std::cout << "single:" << single << "[msec] " << std::flush; 71 | 72 | // 100 times 73 | t1 = std::chrono::high_resolution_clock::now(); 74 | for (int i = 0; i < 100; i++) { 75 | reg.clearTarget(); 76 | reg.clearSource(); 77 | reg.setInputTarget(target); 78 | reg.setInputSource(source); 79 | reg.align(*aligned); 80 | } 81 | t2 = std::chrono::high_resolution_clock::now(); 82 | double multi = std::chrono::duration_cast(t2 - t1).count() / 1e6; 83 | std::cout << "100times:" << multi << "[msec] " << std::flush; 84 | 85 | // for some tasks like odometry calculation, 86 | // you can reuse the covariances of a source point cloud in the next registration 87 | t1 = std::chrono::high_resolution_clock::now(); 88 | pcl::PointCloud::ConstPtr target_ = target; 89 | pcl::PointCloud::ConstPtr source_ = source; 90 | for (int i = 0; i < 100; i++) { 91 | reg.swapSourceAndTarget(); 92 | reg.clearSource(); 93 | 94 | reg.setInputTarget(target_); 95 | reg.setInputSource(source_); 96 | reg.align(*aligned); 97 | 98 | target_.swap(source_); 99 | } 100 | t2 = std::chrono::high_resolution_clock::now(); 101 | double reuse = std::chrono::duration_cast(t2 - t1).count() / 1e6; 102 | 103 | std::cout << "100times_reuse:" << reuse << "[msec] fitness_score:" << fitness_score << std::endl; 104 | } 105 | 106 | /** 107 | * @brief main 108 | */ 109 | int main(int argc, char** argv) { 110 | if (argc < 3) { 111 | std::cout << "usage: gicp_align target_pcd source_pcd" << std::endl; 112 | return 0; 113 | } 114 | 115 | pcl::PointCloud::Ptr target_cloud(new pcl::PointCloud()); 116 | pcl::PointCloud::Ptr source_cloud(new pcl::PointCloud()); 117 | 118 | if (pcl::io::loadPCDFile(argv[1], *target_cloud)) { 119 | std::cerr << "failed to open " << argv[1] << std::endl; 120 | return 1; 121 | } 122 | if (pcl::io::loadPCDFile(argv[2], *source_cloud)) { 123 | std::cerr << "failed to open " << argv[2] << std::endl; 124 | return 1; 125 | } 126 | 127 | // remove invalid points around origin 128 | source_cloud->erase( 129 | std::remove_if(source_cloud->begin(), source_cloud->end(), [=](const pcl::PointXYZ& pt) { return pt.getVector3fMap().squaredNorm() < 1e-3; }), 130 | source_cloud->end()); 131 | target_cloud->erase( 132 | std::remove_if(target_cloud->begin(), target_cloud->end(), [=](const pcl::PointXYZ& pt) { return pt.getVector3fMap().squaredNorm() < 1e-3; }), 133 | target_cloud->end()); 134 | 135 | // downsampling 136 | pcl::ApproximateVoxelGrid voxelgrid; 137 | voxelgrid.setLeafSize(0.1f, 0.1f, 0.1f); 138 | 139 | pcl::PointCloud::Ptr filtered(new pcl::PointCloud()); 140 | voxelgrid.setInputCloud(target_cloud); 141 | voxelgrid.filter(*filtered); 142 | target_cloud = filtered; 143 | 144 | filtered.reset(new pcl::PointCloud()); 145 | voxelgrid.setInputCloud(source_cloud); 146 | voxelgrid.filter(*filtered); 147 | source_cloud = filtered; 148 | 149 | std::cout << "target:" << target_cloud->size() << "[pts] source:" << source_cloud->size() << "[pts]" << std::endl; 150 | 151 | std::cout << "--- pcl_gicp ---" << std::endl; 152 | pcl::GeneralizedIterativeClosestPoint pcl_gicp; 153 | test_pcl(pcl_gicp, target_cloud, source_cloud); 154 | 155 | std::cout << "--- pcl_ndt ---" << std::endl; 156 | pcl::NormalDistributionsTransform pcl_ndt; 157 | pcl_ndt.setResolution(1.0); 158 | test_pcl(pcl_ndt, target_cloud, source_cloud); 159 | 160 | std::cout << "--- fgicp_st ---" << std::endl; 161 | fast_gicp::FastGICPSingleThread fgicp_st; 162 | test(fgicp_st, target_cloud, source_cloud); 163 | 164 | std::cout << "--- fgicp_mt ---" << std::endl; 165 | fast_gicp::FastGICP fgicp_mt; 166 | // fast_gicp uses all the CPU cores by default 167 | // fgicp_mt.setNumThreads(8); 168 | test(fgicp_mt, target_cloud, source_cloud); 169 | 170 | std::cout << "--- vgicp_st ---" << std::endl; 171 | fast_gicp::FastVGICP vgicp; 172 | vgicp.setResolution(1.0); 173 | vgicp.setNumThreads(1); 174 | test(vgicp, target_cloud, source_cloud); 175 | 176 | std::cout << "--- vgicp_mt ---" << std::endl; 177 | vgicp.setNumThreads(omp_get_max_threads()); 178 | test(vgicp, target_cloud, source_cloud); 179 | 180 | #ifdef USE_VGICP_CUDA 181 | std::cout << "--- ndt_cuda (P2D) ---" << std::endl; 182 | fast_gicp::NDTCuda ndt_cuda; 183 | ndt_cuda.setResolution(1.0); 184 | ndt_cuda.setDistanceMode(fast_gicp::NDTDistanceMode::P2D); 185 | test(ndt_cuda, target_cloud, source_cloud); 186 | 187 | std::cout << "--- ndt_cuda (D2D) ---" << std::endl; 188 | ndt_cuda.setDistanceMode(fast_gicp::NDTDistanceMode::D2D); 189 | test(ndt_cuda, target_cloud, source_cloud); 190 | 191 | std::cout << "--- vgicp_cuda (parallel_kdtree) ---" << std::endl; 192 | fast_gicp::FastVGICPCuda vgicp_cuda; 193 | vgicp_cuda.setResolution(1.0); 194 | // vgicp_cuda uses CPU-based parallel KDTree in covariance estimation by default 195 | // on a modern CPU, it is faster than GPU_BRUTEFORCE 196 | // vgicp_cuda.setNearestNeighborSearchMethod(fast_gicp::NearestNeighborMethod::CPU_PARALLEL_KDTREE); 197 | test(vgicp_cuda, target_cloud, source_cloud); 198 | 199 | std::cout << "--- vgicp_cuda (gpu_bruteforce) ---" << std::endl; 200 | // use GPU-based bruteforce nearest neighbor search for covariance estimation 201 | // this would be a good choice if your PC has a weak CPU and a strong GPU (e.g., NVIDIA Jetson) 202 | vgicp_cuda.setNearestNeighborSearchMethod(fast_gicp::NearestNeighborMethod::GPU_BRUTEFORCE); 203 | test(vgicp_cuda, target_cloud, source_cloud); 204 | 205 | std::cout << "--- vgicp_cuda (gpu_rbf_kernel) ---" << std::endl; 206 | // use RBF-kernel-based covariance estimation 207 | // extremely fast but maybe a bit inaccurate 208 | vgicp_cuda.setNearestNeighborSearchMethod(fast_gicp::NearestNeighborMethod::GPU_RBF_KERNEL); 209 | // kernel width (and distance threshold) need to be tuned 210 | vgicp_cuda.setKernelWidth(0.5); 211 | test(vgicp_cuda, target_cloud, source_cloud); 212 | #endif 213 | 214 | return 0; 215 | } 216 | -------------------------------------------------------------------------------- /src/fast_gicp/cuda/brute_force_knn.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | namespace fast_gicp { 13 | namespace cuda { 14 | 15 | namespace { 16 | struct neighborsearch_kernel { 17 | neighborsearch_kernel(int k, const thrust::device_vector& target, thrust::device_vector>& k_neighbors) 18 | : k(k), num_target_points(target.size()), target_points_ptr(target.data()), k_neighbors_ptr(k_neighbors.data()) {} 19 | 20 | template 21 | __host__ __device__ void operator()(const Tuple& idx_x) const { 22 | // threadIdx doesn't work because thrust split for_each in two loops 23 | int idx = thrust::get<0>(idx_x); 24 | const Eigen::Vector3f& x = thrust::get<1>(idx_x); 25 | 26 | // target points buffer & nn output buffer 27 | const Eigen::Vector3f* pts = thrust::raw_pointer_cast(target_points_ptr); 28 | thrust::pair* k_neighbors = thrust::raw_pointer_cast(k_neighbors_ptr) + idx * k; 29 | 30 | // priority queue 31 | struct compare_type { 32 | bool operator()(const thrust::pair& lhs, const thrust::pair& rhs) { 33 | return lhs.first < rhs.first; 34 | } 35 | }; 36 | 37 | typedef nvbio::vector_view*> vector_type; 38 | typedef nvbio::priority_queue, vector_type, compare_type> queue_type; 39 | queue_type queue(vector_type(0, k_neighbors - 1)); 40 | 41 | for(int i = 0; i < k; i++) { 42 | float sq_dist = (pts[i] - x).squaredNorm(); 43 | queue.push(thrust::make_pair(sq_dist, i)); 44 | } 45 | 46 | for(int i = k; i < num_target_points; i++) { 47 | float sq_dist = (pts[i] - x).squaredNorm(); 48 | if(sq_dist < queue.top().first) { 49 | queue.pop(); 50 | queue.push(thrust::make_pair(sq_dist, i)); 51 | } 52 | } 53 | } 54 | 55 | const int k; 56 | const int num_target_points; 57 | thrust::device_ptr target_points_ptr; 58 | 59 | thrust::device_ptr> k_neighbors_ptr; 60 | }; 61 | 62 | struct sorting_kernel { 63 | sorting_kernel(int k, thrust::device_vector>& k_neighbors) : k(k), k_neighbors_ptr(k_neighbors.data()) {} 64 | 65 | __host__ __device__ void operator()(int idx) const { 66 | // target points buffer & nn output buffer 67 | thrust::pair* k_neighbors = thrust::raw_pointer_cast(k_neighbors_ptr) + idx * k; 68 | 69 | // priority queue 70 | struct compare_type { 71 | bool operator()(const thrust::pair& lhs, const thrust::pair& rhs) { 72 | return lhs.first < rhs.first; 73 | } 74 | }; 75 | 76 | typedef nvbio::vector_view*> vector_type; 77 | typedef nvbio::priority_queue, vector_type, compare_type> queue_type; 78 | queue_type queue(vector_type(k, k_neighbors - 1)); 79 | queue.m_size = k; 80 | 81 | for(int i = 0; i < k; i++) { 82 | thrust::pair poped = queue.top(); 83 | queue.pop(); 84 | 85 | k_neighbors[k - i - 1] = poped; 86 | } 87 | } 88 | 89 | const int k; 90 | thrust::device_ptr> k_neighbors_ptr; 91 | }; 92 | } 93 | 94 | void brute_force_knn_search(const thrust::device_vector& source, const thrust::device_vector& target, int k, thrust::device_vector>& k_neighbors, bool do_sort=false) { 95 | thrust::device_vector d_indices(source.size()); 96 | thrust::sequence(d_indices.begin(), d_indices.end()); 97 | 98 | auto first = thrust::make_zip_iterator(thrust::make_tuple(d_indices.begin(), source.begin())); 99 | auto last = thrust::make_zip_iterator(thrust::make_tuple(d_indices.end(), source.end())); 100 | 101 | // nvbio::priority_queue requires (k + 1) working space 102 | k_neighbors.resize(source.size() * k, thrust::make_pair(-1.0f, -1)); 103 | thrust::for_each(first, last, neighborsearch_kernel(k, target, k_neighbors)); 104 | 105 | if(do_sort) { 106 | thrust::for_each(d_indices.begin(), d_indices.end(), sorting_kernel(k, k_neighbors)); 107 | } 108 | } 109 | 110 | } 111 | } // namespace fast_gicp 112 | -------------------------------------------------------------------------------- /src/fast_gicp/cuda/compute_derivatives.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace fast_gicp { 14 | namespace cuda { 15 | 16 | namespace { 17 | 18 | template 19 | struct compute_derivatives_kernel { 20 | EIGEN_MAKE_ALIGNED_OPERATOR_NEW 21 | 22 | compute_derivatives_kernel( 23 | const GaussianVoxelMap& voxelmap, 24 | const thrust::device_vector& src_means, 25 | const thrust::device_vector& src_covs, 26 | const thrust::device_ptr& x_eval_ptr, 27 | const thrust::device_ptr& x_ptr) 28 | : trans_eval_ptr(x_eval_ptr), 29 | trans_ptr(x_ptr), 30 | src_means_ptr(src_means.data()), 31 | src_covs_ptr(src_covs.data()), 32 | voxel_num_points_ptr(voxelmap.num_points.data()), 33 | voxel_means_ptr(voxelmap.voxel_means.data()), 34 | voxel_covs_ptr(voxelmap.voxel_covs.data()) {} 35 | 36 | // skew symmetric matrix 37 | __host__ __device__ Eigen::Matrix3f skew_symmetric(const Eigen::Vector3f& x) const { 38 | Eigen::Matrix3f skew = Eigen::Matrix3f::Zero(); 39 | skew(0, 1) = -x[2]; 40 | skew(0, 2) = x[1]; 41 | skew(1, 0) = x[2]; 42 | skew(1, 2) = -x[0]; 43 | skew(2, 0) = -x[1]; 44 | skew(2, 1) = x[0]; 45 | 46 | return skew; 47 | } 48 | 49 | // calculate derivatives 50 | __host__ __device__ T operator()(const thrust::pair& correspondence) const { 51 | const Eigen::Vector3f& mean_A = thrust::raw_pointer_cast(src_means_ptr)[correspondence.first]; 52 | const Eigen::Matrix3f& cov_A = thrust::raw_pointer_cast(src_covs_ptr)[correspondence.first]; 53 | 54 | if (correspondence.second < 0) { 55 | return thrust::make_tuple(0.0f, Eigen::Matrix::Zero().eval(), Eigen::Matrix::Zero().eval()); 56 | } 57 | 58 | int num_points = thrust::raw_pointer_cast(voxel_num_points_ptr)[correspondence.second]; 59 | const Eigen::Vector3f& mean_B = thrust::raw_pointer_cast(voxel_means_ptr)[correspondence.second]; 60 | const Eigen::Matrix3f& cov_B = thrust::raw_pointer_cast(voxel_covs_ptr)[correspondence.second]; 61 | 62 | if (num_points <= 0) { 63 | return thrust::make_tuple(0.0f, Eigen::Matrix::Zero().eval(), Eigen::Matrix::Zero().eval()); 64 | } 65 | 66 | const auto& trans_eval = *thrust::raw_pointer_cast(trans_eval_ptr); 67 | const auto& trans = *thrust::raw_pointer_cast(trans_ptr); 68 | 69 | Eigen::Matrix3f R_eval = trans_eval.linear(); 70 | Eigen::Matrix3f R = trans.linear(); 71 | Eigen::Vector3f t = trans.translation(); 72 | 73 | const Eigen::Vector3f transed_mean_A = R * mean_A + t; 74 | 75 | Eigen::Matrix3f RCR = R_eval * cov_A * R_eval.transpose(); 76 | Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse(); 77 | 78 | float w = sqrtf(num_points); 79 | Eigen::Vector3f error = mean_B - transed_mean_A; 80 | 81 | Eigen::Matrix dtdx0; 82 | dtdx0.block<3, 3>(0, 0) = skew_symmetric(transed_mean_A); 83 | dtdx0.block<3, 3>(0, 3) = -Eigen::Matrix3f::Identity(); 84 | 85 | Eigen::Matrix J = dtdx0; 86 | 87 | Eigen::Matrix H = w * J.transpose() * RCR_inv * J; 88 | Eigen::Matrix b = w * J.transpose() * RCR_inv * error; 89 | 90 | float err = w * error.transpose() * RCR_inv * error; 91 | return thrust::make_tuple(err, H, b); 92 | } 93 | 94 | thrust::device_ptr trans_eval_ptr; 95 | thrust::device_ptr trans_ptr; 96 | 97 | thrust::device_ptr src_means_ptr; 98 | thrust::device_ptr src_covs_ptr; 99 | 100 | thrust::device_ptr voxel_num_points_ptr; 101 | thrust::device_ptr voxel_means_ptr; 102 | thrust::device_ptr voxel_covs_ptr; 103 | }; 104 | 105 | template <> 106 | __host__ __device__ float compute_derivatives_kernel::operator()(const thrust::pair& correspondence) const { 107 | const Eigen::Vector3f& mean_A = thrust::raw_pointer_cast(src_means_ptr)[correspondence.first]; 108 | const Eigen::Matrix3f& cov_A = thrust::raw_pointer_cast(src_covs_ptr)[correspondence.first]; 109 | const int voxel_index = correspondence.second; 110 | 111 | if (voxel_index < 0) { 112 | return 0.0f; 113 | } 114 | 115 | int num_points = thrust::raw_pointer_cast(voxel_num_points_ptr)[voxel_index]; 116 | const Eigen::Vector3f& mean_B = thrust::raw_pointer_cast(voxel_means_ptr)[voxel_index]; 117 | const Eigen::Matrix3f& cov_B = thrust::raw_pointer_cast(voxel_covs_ptr)[voxel_index]; 118 | 119 | const auto& trans_eval = *thrust::raw_pointer_cast(trans_eval_ptr); 120 | const auto& trans = *thrust::raw_pointer_cast(trans_ptr); 121 | 122 | Eigen::Matrix3f R_eval = trans_eval.linear(); 123 | Eigen::Matrix3f R = trans.linear(); 124 | Eigen::Vector3f t = trans.translation(); 125 | 126 | const Eigen::Vector3f transed_mean_A = R * mean_A + t; 127 | 128 | Eigen::Matrix3f RCR = R_eval * cov_A * R_eval.transpose(); 129 | Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse(); 130 | 131 | float w = sqrtf(num_points); 132 | Eigen::Vector3f error = mean_B - transed_mean_A; 133 | 134 | return w * error.transpose() * RCR_inv * error; 135 | } 136 | 137 | struct sum_errors_kernel { 138 | template 139 | __host__ __device__ Tuple operator()(const Tuple& lhs, const Tuple& rhs) { 140 | return thrust::make_tuple(thrust::get<0>(lhs) + thrust::get<0>(rhs), thrust::get<1>(lhs) + thrust::get<1>(rhs), thrust::get<2>(lhs) + thrust::get<2>(rhs)); 141 | } 142 | }; 143 | 144 | template <> 145 | __host__ __device__ float sum_errors_kernel::operator()(const float& lhs, const float& rhs) { 146 | return lhs + rhs; 147 | } 148 | 149 | } // namespace 150 | 151 | double compute_derivatives( 152 | const thrust::device_vector& src_points, 153 | const thrust::device_vector& src_covs, 154 | const GaussianVoxelMap& voxelmap, 155 | const thrust::device_vector>& voxel_correspondences, 156 | const thrust::device_ptr& linearized_x_ptr, 157 | const thrust::device_ptr& x_ptr, 158 | Eigen::Matrix* H, 159 | Eigen::Matrix* b) { 160 | if (H == nullptr || b == nullptr) { 161 | float sum_errors = thrust::transform_reduce( 162 | voxel_correspondences.begin(), 163 | voxel_correspondences.end(), 164 | compute_derivatives_kernel(voxelmap, src_points, src_covs, linearized_x_ptr, x_ptr), 165 | 0.0f, 166 | sum_errors_kernel()); 167 | 168 | return sum_errors; 169 | } 170 | 171 | auto sum_errors = thrust::transform_reduce( 172 | voxel_correspondences.begin(), 173 | voxel_correspondences.end(), 174 | compute_derivatives_kernel, Eigen::Matrix>>(voxelmap, src_points, src_covs, linearized_x_ptr, x_ptr), 175 | thrust::make_tuple(0.0f, Eigen::Matrix::Zero().eval(), Eigen::Matrix::Zero().eval()), 176 | sum_errors_kernel()); 177 | 178 | if (H && b) { 179 | *H = thrust::get<1>(sum_errors).cast(); 180 | *b = thrust::get<2>(sum_errors).cast(); 181 | } 182 | 183 | return thrust::get<0>(sum_errors); 184 | } 185 | } // namespace cuda 186 | } // namespace fast_gicp 187 | -------------------------------------------------------------------------------- /src/fast_gicp/cuda/compute_mahalanobis.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace fast_gicp { 6 | namespace cuda { 7 | 8 | namespace { 9 | 10 | struct compute_mahalanobis_kernel { 11 | compute_mahalanobis_kernel(const GaussianVoxelMap& voxelmap, const Eigen::Isometry3f& x) 12 | : R(x.linear()), 13 | t(x.translation()), 14 | voxel_num_points_ptr(voxelmap.num_points.data()), 15 | voxel_means_ptr(voxelmap.voxel_means.data()), 16 | voxel_covs_ptr(voxelmap.voxel_covs.data()) 17 | {} 18 | 19 | __host__ __device__ 20 | Eigen::Matrix3f operator() (const thrust::tuple& input) const { 21 | const Eigen::Vector3f& mean_A = thrust::get<0>(input); 22 | const Eigen::Matrix3f& cov_A = thrust::get<1>(input); 23 | int voxel_index = thrust::get<2>(input); 24 | 25 | if(voxel_index < 0) { 26 | return Eigen::Matrix3f::Identity(); 27 | } 28 | 29 | const Eigen::Matrix3f& cov_B = thrust::raw_pointer_cast(voxel_covs_ptr)[voxel_index]; 30 | Eigen::Matrix3f RCR = cov_B + R * cov_A* R.transpose(); 31 | return RCR.inverse(); 32 | } 33 | 34 | const Eigen::Matrix3f R; 35 | const Eigen::Vector3f t; 36 | 37 | thrust::device_ptr voxel_num_points_ptr; 38 | thrust::device_ptr voxel_means_ptr; 39 | thrust::device_ptr voxel_covs_ptr; 40 | }; 41 | } 42 | 43 | 44 | void compute_mahalanobis( 45 | const thrust::device_vector& src_points, 46 | const thrust::device_vector& src_covs, 47 | const GaussianVoxelMap& voxelmap, 48 | const thrust::device_vector& voxel_correspondences, 49 | const Eigen::Isometry3f& linearized_x, 50 | thrust::device_vector& mahalanobis 51 | ) { 52 | mahalanobis.resize(src_points.size()); 53 | thrust::transform( 54 | thrust::make_zip_iterator(thrust::make_tuple(src_points.begin(), src_covs.begin(), voxel_correspondences.begin())), 55 | thrust::make_zip_iterator(thrust::make_tuple(src_points.end(), src_covs.end(), voxel_correspondences.end())), 56 | mahalanobis.begin(), 57 | compute_mahalanobis_kernel(voxelmap, linearized_x) 58 | ); 59 | 60 | thrust::host_vector corrs = voxel_correspondences; 61 | thrust::host_vector> h_maha = mahalanobis; 62 | std::ofstream ofs("/tmp/vgicp_cuda_mahalanobis.txt"); 63 | for(int i=0; i 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | namespace fast_gicp { 13 | namespace cuda { 14 | 15 | namespace { 16 | struct covariance_estimation_kernel { 17 | covariance_estimation_kernel(const thrust::device_vector& points, int k, const thrust::device_vector& k_neighbors, thrust::device_vector& covariances) 18 | : k(k), points_ptr(points.data()), k_neighbors_ptr(k_neighbors.data()), covariances_ptr(covariances.data()) {} 19 | 20 | __host__ __device__ void operator()(int idx) const { 21 | // target points buffer & nn output buffer 22 | const Eigen::Vector3f* points = thrust::raw_pointer_cast(points_ptr); 23 | const int* k_neighbors = thrust::raw_pointer_cast(k_neighbors_ptr) + idx * k; 24 | Eigen::Matrix3f* cov = thrust::raw_pointer_cast(covariances_ptr) + idx; 25 | 26 | Eigen::Vector3f mean(0.0f, 0.0f, 0.0f); 27 | cov->setZero(); 28 | for(int i = 0; i < k; i++) { 29 | const auto& pt = points[k_neighbors[i]]; 30 | mean += pt; 31 | (*cov) += pt * pt.transpose(); 32 | } 33 | mean /= k; 34 | (*cov) = (*cov) / k - mean * mean.transpose(); 35 | } 36 | 37 | const int k; 38 | thrust::device_ptr points_ptr; 39 | thrust::device_ptr k_neighbors_ptr; 40 | 41 | thrust::device_ptr covariances_ptr; 42 | }; 43 | } // namespace 44 | 45 | void covariance_estimation(const thrust::device_vector& points, int k, const thrust::device_vector& k_neighbors, thrust::device_vector& covariances) { 46 | thrust::device_vector d_indices(points.size()); 47 | thrust::sequence(d_indices.begin(), d_indices.end()); 48 | 49 | covariances.resize(points.size()); 50 | thrust::for_each(d_indices.begin(), d_indices.end(), covariance_estimation_kernel(points, k, k_neighbors, covariances)); 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/fast_gicp/cuda/covariance_estimation_rbf.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | namespace fast_gicp { 9 | namespace cuda { 10 | 11 | struct NormalDistribution { 12 | public: 13 | EIGEN_MAKE_ALIGNED_OPERATOR_NEW 14 | 15 | __host__ __device__ NormalDistribution() {} 16 | 17 | static __host__ __device__ NormalDistribution zero() { 18 | NormalDistribution dist; 19 | dist.sum_weights = 0.0f; 20 | dist.mean.setZero(); 21 | dist.cov.setZero(); 22 | return dist; 23 | } 24 | 25 | __host__ __device__ NormalDistribution operator+(const NormalDistribution& rhs) const { 26 | NormalDistribution sum; 27 | sum.sum_weights = sum_weights + rhs.sum_weights; 28 | sum.mean = mean + rhs.mean; 29 | sum.cov = cov + rhs.cov; 30 | return sum; 31 | } 32 | 33 | __host__ __device__ NormalDistribution& operator+=(const NormalDistribution& rhs) { 34 | sum_weights += rhs.sum_weights; 35 | mean += rhs.mean; 36 | cov += rhs.cov; 37 | return *this; 38 | } 39 | 40 | __host__ __device__ void accumulate(const float w, const Eigen::Vector3f& x) { 41 | sum_weights += w; 42 | mean += w * x; 43 | cov += w * x * x.transpose(); 44 | } 45 | 46 | __host__ __device__ NormalDistribution& finalize() { 47 | Eigen::Vector3f sum_pt = mean; 48 | mean /= sum_weights; 49 | cov = (cov - mean * sum_pt.transpose()) / sum_weights; 50 | 51 | return *this; 52 | } 53 | 54 | float sum_weights; 55 | Eigen::Vector3f mean; 56 | Eigen::Matrix3f cov; 57 | }; 58 | 59 | struct covariance_estimation_kernel { 60 | static const int BLOCK_SIZE = 512; 61 | 62 | covariance_estimation_kernel(thrust::device_ptr exp_factor_ptr, thrust::device_ptr max_dist_ptr, thrust::device_ptr points_ptr) 63 | : exp_factor_ptr(exp_factor_ptr), 64 | max_dist_ptr(max_dist_ptr), 65 | points_ptr(points_ptr) {} 66 | 67 | __host__ __device__ NormalDistribution operator()(const Eigen::Vector3f& x) const { 68 | const float exp_factor = *thrust::raw_pointer_cast(exp_factor_ptr); 69 | const float max_dist = *thrust::raw_pointer_cast(max_dist_ptr); 70 | const float max_dist_sq = max_dist * max_dist; 71 | const Eigen::Vector3f* points = thrust::raw_pointer_cast(points_ptr); 72 | 73 | NormalDistribution dist = NormalDistribution::zero(); 74 | for (int i = 0; i < BLOCK_SIZE; i++) { 75 | float sq_d = (x - points[i]).squaredNorm(); 76 | if (sq_d > max_dist_sq) { 77 | continue; 78 | } 79 | 80 | float w = expf(-exp_factor * sq_d); 81 | dist.accumulate(w, points[i]); 82 | } 83 | 84 | return dist; 85 | } 86 | 87 | thrust::device_ptr exp_factor_ptr; 88 | thrust::device_ptr max_dist_ptr; 89 | thrust::device_ptr points_ptr; 90 | }; 91 | 92 | struct finalization_kernel { 93 | finalization_kernel(const int stride, const thrust::device_vector& accumulated_dists) 94 | : stride(stride), 95 | accumulated_dists_first(accumulated_dists.data()), 96 | accumulated_dists_last(accumulated_dists.data() + accumulated_dists.size()) {} 97 | 98 | __host__ __device__ Eigen::Matrix3f operator()(int index) const { 99 | const NormalDistribution* dists = thrust::raw_pointer_cast(accumulated_dists_first); 100 | const NormalDistribution* dists_last = thrust::raw_pointer_cast(accumulated_dists_last); 101 | const int num_dists = dists_last - dists; 102 | 103 | NormalDistribution sum = dists[index]; 104 | for (int dist_index = index + stride; dist_index < num_dists; dist_index += stride) { 105 | sum += dists[dist_index]; 106 | } 107 | 108 | return sum.finalize().cov; 109 | } 110 | 111 | const int stride; 112 | thrust::device_ptr accumulated_dists_first; 113 | thrust::device_ptr accumulated_dists_last; 114 | }; 115 | 116 | void covariance_estimation_rbf(const thrust::device_vector& points, double kernel_width, double max_dist, thrust::device_vector& covariances) { 117 | covariances.resize(points.size()); 118 | 119 | thrust::device_vector constants(2); 120 | constants[0] = kernel_width; 121 | constants[1] = max_dist; 122 | thrust::device_ptr exp_factor_ptr = constants.data(); 123 | thrust::device_ptr max_dist_ptr = constants.data() + 1; 124 | 125 | int num_blocks = (points.size() + (covariance_estimation_kernel::BLOCK_SIZE - 1)) / covariance_estimation_kernel::BLOCK_SIZE; 126 | // padding 127 | thrust::device_vector ext_points(num_blocks * covariance_estimation_kernel::BLOCK_SIZE); 128 | thrust::copy(points.begin(), points.end(), ext_points.begin()); 129 | thrust::fill(ext_points.begin() + points.size(), ext_points.end(), Eigen::Vector3f(0.0f, 0.0f, 0.0f)); 130 | 131 | thrust::device_vector accumulated_dists(points.size() * num_blocks); 132 | 133 | thrust::system::cuda::detail::unique_stream stream; 134 | std::vector events(num_blocks); 135 | 136 | // accumulate kerneled point distributions 137 | for (int i = 0; i < num_blocks; i++) { 138 | covariance_estimation_kernel kernel(exp_factor_ptr, max_dist_ptr, ext_points.data() + covariance_estimation_kernel::BLOCK_SIZE * i); 139 | auto event = thrust::async::transform(points.begin(), points.end(), accumulated_dists.begin() + points.size() * i, kernel); 140 | events[i] = std::move(event); 141 | thrust::system::cuda::detail::create_dependency(stream, events[i]); 142 | } 143 | 144 | // finalize distributions 145 | thrust::transform( 146 | thrust::cuda::par.on(stream.native_handle()), 147 | thrust::counting_iterator(0), 148 | thrust::counting_iterator(points.size()), 149 | covariances.begin(), 150 | finalization_kernel(points.size(), accumulated_dists)); 151 | } 152 | 153 | } // namespace cuda 154 | } // namespace fast_gicp -------------------------------------------------------------------------------- /src/fast_gicp/cuda/covariance_regularization.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace fast_gicp { 11 | namespace cuda { 12 | 13 | namespace { 14 | 15 | struct svd_kernel { 16 | __host__ __device__ thrust::tuple operator()(const thrust::tuple& input) const { 17 | const auto& mean = thrust::get<0>(input); 18 | const auto& cov = thrust::get<1>(input); 19 | 20 | Eigen::SelfAdjointEigenSolver eig; 21 | eig.computeDirect(cov); 22 | 23 | return thrust::make_tuple(mean, eig.eigenvalues(), eig.eigenvectors(), thrust::get<2>(input)); 24 | } 25 | }; 26 | 27 | struct eigenvalue_filter_kernel { 28 | __host__ __device__ bool operator()(const thrust::tuple& input) const { 29 | const auto& values = thrust::get<1>(input); 30 | return values[1] > values[2] * 0.1; 31 | } 32 | }; 33 | 34 | struct svd_reconstruction_kernel { 35 | svd_reconstruction_kernel( 36 | const thrust::device_ptr& values_diag, 37 | thrust::device_vector& covariances) 38 | : values_diag_ptr(values_diag), covariances_ptr(covariances.data()) {} 39 | __host__ __device__ void operator()(const thrust::tuple& input) const { 40 | const auto& mean = thrust::get<0>(input); 41 | const auto& values = thrust::get<1>(input); 42 | const auto& vecs = thrust::get<2>(input); 43 | const auto idx = thrust::get<3>(input); 44 | 45 | Eigen::Matrix3f vecs_inv = vecs.inverse(); 46 | const auto& values_diag = *thrust::raw_pointer_cast(values_diag_ptr); 47 | Eigen::Matrix3f* cov = thrust::raw_pointer_cast(covariances_ptr) + idx; 48 | *cov = (vecs * values_diag * vecs_inv).eval(); 49 | } 50 | const thrust::device_ptr values_diag_ptr; 51 | thrust::device_ptr covariances_ptr; 52 | }; 53 | 54 | 55 | 56 | struct covariance_regularization_svd { 57 | __host__ __device__ void operator()(Eigen::Matrix3f& cov) const { 58 | Eigen::SelfAdjointEigenSolver eig; 59 | eig.computeDirect(cov); 60 | 61 | // why this doen't work...??? 62 | // cov = eig.eigenvectors() * values.asDiagonal() * eig.eigenvectors().inverse(); 63 | Eigen::Matrix3f values = Eigen::Vector3f(1e-3, 1, 1).asDiagonal(); 64 | Eigen::Matrix3f v_inv = eig.eigenvectors().inverse(); 65 | cov = eig.eigenvectors() * values * v_inv; 66 | 67 | // JacobiSVD is not supported on CUDA 68 | // Eigen::JacobiSVD(cov, Eigen::ComputeFullU | Eigen::ComputeFullV); 69 | // Eigen::Vector3f values(1, 1, 1e-3); 70 | // cov = svd.matrixU() * values.asDiagonal() * svd.matrixV().transpose(); 71 | } 72 | }; 73 | 74 | struct covariance_regularization_frobenius { 75 | __host__ __device__ void operator()(Eigen::Matrix3f& cov) const { 76 | float lambda = 1e-3; 77 | Eigen::Matrix3f C = cov + lambda * Eigen::Matrix3f::Identity(); 78 | Eigen::Matrix3f C_inv = C.inverse(); 79 | Eigen::Matrix3f C_norm = (C_inv / C_inv.norm()).inverse(); 80 | cov = C_norm; 81 | } 82 | }; 83 | 84 | struct covariance_regularization_mineig { 85 | __host__ __device__ void operator()(Eigen::Matrix3f& cov) const { 86 | Eigen::SelfAdjointEigenSolver eig; 87 | eig.computeDirect(cov); 88 | 89 | Eigen::Vector3f values = eig.eigenvalues(); 90 | for (int i = 0; i < 3; i++) { 91 | values[i] = fmaxf(1e-3f, values[i]); 92 | } 93 | 94 | Eigen::Matrix3f v_diag = Eigen::Matrix3f::Zero(); 95 | v_diag(0,0) = values.x(); 96 | v_diag(1,1) = values.y(); 97 | v_diag(2,2) = values.z(); 98 | Eigen::Matrix3f v_inv = eig.eigenvectors().inverse(); 99 | cov = eig.eigenvectors() * v_diag * v_inv; 100 | } 101 | }; 102 | 103 | } // namespace 104 | 105 | void covariance_regularization(thrust::device_vector& means, thrust::device_vector& covs, RegularizationMethod method) { 106 | if (method == RegularizationMethod::PLANE) { 107 | thrust::device_vector d_indices(covs.size()); 108 | thrust::sequence(d_indices.begin(), d_indices.end()); 109 | auto first = thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(means.begin(), covs.begin(), d_indices.begin())), svd_kernel()); 110 | auto last = thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(means.end(), covs.end(), d_indices.end())), svd_kernel()); 111 | 112 | Eigen::Matrix3f diag_matrix = Eigen::Vector3f(1e-3f, 1.0f, 1.0f).asDiagonal(); 113 | thrust::device_vector val(1); 114 | val[0] = diag_matrix; 115 | thrust::device_ptr diag_matrix_ptr = val.data(); 116 | thrust::for_each(first, last, svd_reconstruction_kernel(diag_matrix_ptr, covs)); 117 | 118 | } else if (method == RegularizationMethod::FROBENIUS) { 119 | thrust::for_each(covs.begin(), covs.end(), covariance_regularization_frobenius()); 120 | } else if (method == RegularizationMethod::MIN_EIG) { 121 | thrust::for_each(covs.begin(), covs.end(), covariance_regularization_mineig()); 122 | } else { 123 | std::cerr << "unimplemented covariance regularization method was selected!!" << std::endl; 124 | } 125 | } 126 | 127 | } // namespace cuda 128 | } // namespace fast_gicp -------------------------------------------------------------------------------- /src/fast_gicp/cuda/find_voxel_correspondences.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace fast_gicp { 12 | namespace cuda { 13 | 14 | namespace { 15 | 16 | struct find_voxel_correspondences_kernel { 17 | find_voxel_correspondences_kernel( 18 | const GaussianVoxelMap& voxelmap, 19 | const thrust::device_vector& src_points, 20 | const thrust::device_ptr& trans_ptr, 21 | thrust::device_ptr offset_ptr) 22 | : trans_ptr(trans_ptr), 23 | offset_ptr(offset_ptr), 24 | src_points_ptr(src_points.data()), 25 | voxelmap_info_ptr(voxelmap.voxelmap_info_ptr.data()), 26 | buckets_ptr(voxelmap.buckets.data()), 27 | voxel_num_points_ptr(voxelmap.num_points.data()), 28 | voxel_means_ptr(voxelmap.voxel_means.data()), 29 | voxel_covs_ptr(voxelmap.voxel_covs.data()) {} 30 | 31 | // lookup voxel 32 | __host__ __device__ int lookup_voxel(const Eigen::Vector3f& x) const { 33 | const VoxelMapInfo& voxelmap_info = *thrust::raw_pointer_cast(voxelmap_info_ptr); 34 | const auto& offset = *thrust::raw_pointer_cast(offset_ptr); 35 | 36 | Eigen::Vector3i coord = calc_voxel_coord(x, voxelmap_info.voxel_resolution) + offset; 37 | uint64_t hash = vector3i_hash(coord); 38 | 39 | for(int i = 0; i < voxelmap_info.max_bucket_scan_count; i++) { 40 | uint64_t bucket_index = (hash + i) % voxelmap_info.num_buckets; 41 | const thrust::pair& bucket = thrust::raw_pointer_cast(buckets_ptr)[bucket_index]; 42 | 43 | if(bucket.second < 0) { 44 | return -1; 45 | } 46 | 47 | if(bucket.first == coord) { 48 | return bucket.second; 49 | } 50 | } 51 | 52 | return -1; 53 | } 54 | 55 | __host__ __device__ thrust::pair operator()(int src_index) const { 56 | const auto& trans = *thrust::raw_pointer_cast(trans_ptr); 57 | 58 | const auto& pt = thrust::raw_pointer_cast(src_points_ptr)[src_index]; 59 | return thrust::make_pair(src_index, lookup_voxel(trans.linear() * pt + trans.translation())); 60 | } 61 | 62 | const thrust::device_ptr trans_ptr; 63 | const thrust::device_ptr offset_ptr; 64 | 65 | const thrust::device_ptr src_points_ptr; 66 | 67 | thrust::device_ptr voxelmap_info_ptr; 68 | thrust::device_ptr> buckets_ptr; 69 | 70 | thrust::device_ptr voxel_num_points_ptr; 71 | thrust::device_ptr voxel_means_ptr; 72 | thrust::device_ptr voxel_covs_ptr; 73 | }; 74 | 75 | struct invalid_correspondence_kernel { 76 | __host__ __device__ bool operator() (const thrust::pair& corr) const { 77 | return corr.first < 0 || corr.second < 0; 78 | } 79 | }; 80 | 81 | } // namespace 82 | 83 | void find_voxel_correspondences( 84 | const thrust::device_vector& src_points, 85 | const GaussianVoxelMap& voxelmap, 86 | const thrust::device_ptr& x_ptr, 87 | const thrust::device_vector& offsets, 88 | thrust::device_vector>& correspondences) { 89 | std::vector events(offsets.size()); 90 | 91 | // find correspondences 92 | correspondences.resize(src_points.size() * offsets.size()); 93 | for(int i=0; i(0), 96 | thrust::counting_iterator(src_points.size()), 97 | correspondences.begin() + src_points.size() * i, 98 | find_voxel_correspondences_kernel(voxelmap, src_points, x_ptr, offsets.data() + i)); 99 | 100 | events[i] = std::move(event); 101 | } 102 | 103 | // synchronize 104 | for(auto& event: events) { 105 | event.wait(); 106 | } 107 | 108 | // remove invlid correspondences 109 | auto remove_loc = thrust::remove_if(correspondences.begin(), correspondences.end(), invalid_correspondence_kernel()); 110 | correspondences.erase(remove_loc, correspondences.end()); 111 | } 112 | 113 | } // namespace cuda 114 | } // namespace fast_gicp 115 | -------------------------------------------------------------------------------- /src/fast_gicp/cuda/ndt_compute_derivatives.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace fast_gicp { 6 | namespace cuda { 7 | 8 | namespace { 9 | 10 | __host__ __device__ float huber(float k, float x) { 11 | float abs_x = fabsf(x); 12 | return abs_x <= k ? 1.0 : k / abs_x; 13 | } 14 | 15 | __host__ __device__ float cauchy(float k, float x) { 16 | float k_sq = k * k; 17 | return k_sq / (k_sq + x * x); 18 | } 19 | 20 | // skew symmetric matrix 21 | __host__ __device__ Eigen::Matrix3f skew_symmetric(const Eigen::Vector3f& x) { 22 | Eigen::Matrix3f skew = Eigen::Matrix3f::Zero(); 23 | skew(0, 1) = -x[2]; 24 | skew(0, 2) = x[1]; 25 | skew(1, 0) = x[2]; 26 | skew(1, 2) = -x[0]; 27 | skew(2, 0) = -x[1]; 28 | skew(2, 1) = x[0]; 29 | 30 | return skew; 31 | } 32 | 33 | struct p2d_ndt_compute_derivatives_kernel { 34 | p2d_ndt_compute_derivatives_kernel( 35 | const GaussianVoxelMap& target_voxelmap, 36 | const thrust::device_vector& source_points, 37 | const thrust::device_ptr& x_eval_ptr, 38 | const thrust::device_ptr& x_ptr) 39 | : trans_eval_ptr(x_eval_ptr), 40 | trans_ptr(x_ptr), 41 | src_means_ptr(source_points.data()), 42 | voxelmap_info_ptr(target_voxelmap.voxelmap_info_ptr.data()), 43 | voxel_num_points_ptr(target_voxelmap.num_points.data()), 44 | voxel_means_ptr(target_voxelmap.voxel_means.data()), 45 | voxel_covs_ptr(target_voxelmap.voxel_covs.data()) {} 46 | 47 | // Here, we use simple point-to-distribution MLE distance equivalent to [Biber, IROS2003] 48 | // Because this formulation can be more sensitive to outliers compared to another formulation based on 49 | // Gaussian + uniform distribution [Magnusson, 2009], we use some robust kernels to filter out outliers 50 | __host__ __device__ thrust::tuple, Eigen::Matrix> operator()(const thrust::pair& correspondence) const { 51 | const Eigen::Vector3f& mean_A = thrust::raw_pointer_cast(src_means_ptr)[correspondence.first]; 52 | 53 | if (correspondence.second < 0) { 54 | return thrust::make_tuple(0.0f, Eigen::Matrix::Zero().eval(), Eigen::Matrix::Zero().eval()); 55 | } 56 | 57 | int num_points = thrust::raw_pointer_cast(voxel_num_points_ptr)[correspondence.second]; 58 | const Eigen::Vector3f& mean_B = thrust::raw_pointer_cast(voxel_means_ptr)[correspondence.second]; 59 | const Eigen::Matrix3f& cov_B = thrust::raw_pointer_cast(voxel_covs_ptr)[correspondence.second]; 60 | 61 | if (num_points <= 6) { 62 | return thrust::make_tuple(0.0f, Eigen::Matrix::Zero().eval(), Eigen::Matrix::Zero().eval()); 63 | } 64 | 65 | const auto& trans_eval = *thrust::raw_pointer_cast(trans_eval_ptr); 66 | const auto& trans = *thrust::raw_pointer_cast(trans_ptr); 67 | 68 | Eigen::Matrix3f R_eval = trans_eval.linear(); 69 | Eigen::Matrix3f R = trans.linear(); 70 | Eigen::Vector3f t = trans.translation(); 71 | 72 | const Eigen::Vector3f transed_mean_A = R * mean_A + t; 73 | 74 | Eigen::Matrix3f RCR_inv = cov_B.inverse(); 75 | 76 | Eigen::Vector3f error = mean_B - transed_mean_A; 77 | 78 | float w = cauchy(thrust::raw_pointer_cast(voxelmap_info_ptr)->voxel_resolution, error.norm()); 79 | float err = w * error.transpose() * RCR_inv * error; 80 | 81 | Eigen::Matrix dtdx0; 82 | dtdx0.block<3, 3>(0, 0) = skew_symmetric(transed_mean_A); 83 | dtdx0.block<3, 3>(0, 3) = -Eigen::Matrix3f::Identity(); 84 | 85 | Eigen::Matrix J = dtdx0; 86 | 87 | Eigen::Matrix H = w * J.transpose() * RCR_inv * J; 88 | Eigen::Matrix b = w * J.transpose() * RCR_inv * error; 89 | 90 | return thrust::make_tuple(err, H, b); 91 | } 92 | 93 | thrust::device_ptr trans_eval_ptr; 94 | thrust::device_ptr trans_ptr; 95 | 96 | thrust::device_ptr src_means_ptr; 97 | 98 | thrust::device_ptr voxelmap_info_ptr; 99 | thrust::device_ptr voxel_num_points_ptr; 100 | thrust::device_ptr voxel_means_ptr; 101 | thrust::device_ptr voxel_covs_ptr; 102 | }; 103 | 104 | struct d2d_ndt_compute_derivatives_kernel { 105 | d2d_ndt_compute_derivatives_kernel( 106 | const GaussianVoxelMap& target_voxelmap, 107 | const GaussianVoxelMap& source_voxelmap, 108 | const thrust::device_ptr& x_eval_ptr, 109 | const thrust::device_ptr& x_ptr) 110 | : trans_eval_ptr(x_eval_ptr), 111 | trans_ptr(x_ptr), 112 | src_means_ptr(source_voxelmap.voxel_means.data()), 113 | src_covs_ptr(source_voxelmap.voxel_covs.data()), 114 | voxelmap_info_ptr(target_voxelmap.voxelmap_info_ptr.data()), 115 | voxel_num_points_ptr(target_voxelmap.num_points.data()), 116 | voxel_means_ptr(target_voxelmap.voxel_means.data()), 117 | voxel_covs_ptr(target_voxelmap.voxel_covs.data()) {} 118 | 119 | // calculate derivatives 120 | __host__ __device__ thrust::tuple, Eigen::Matrix> operator()(const thrust::pair& correspondence) const { 121 | const Eigen::Vector3f& mean_A = thrust::raw_pointer_cast(src_means_ptr)[correspondence.first]; 122 | const Eigen::Matrix3f& cov_A = thrust::raw_pointer_cast(src_covs_ptr)[correspondence.first]; 123 | 124 | if (correspondence.second < 0) { 125 | return thrust::make_tuple(0.0f, Eigen::Matrix::Zero().eval(), Eigen::Matrix::Zero().eval()); 126 | } 127 | 128 | int num_points = thrust::raw_pointer_cast(voxel_num_points_ptr)[correspondence.second]; 129 | const Eigen::Vector3f& mean_B = thrust::raw_pointer_cast(voxel_means_ptr)[correspondence.second]; 130 | const Eigen::Matrix3f& cov_B = thrust::raw_pointer_cast(voxel_covs_ptr)[correspondence.second]; 131 | 132 | if (num_points <= 6) { 133 | return thrust::make_tuple(0.0f, Eigen::Matrix::Zero().eval(), Eigen::Matrix::Zero().eval()); 134 | } 135 | 136 | const auto& trans_eval = *thrust::raw_pointer_cast(trans_eval_ptr); 137 | const auto& trans = *thrust::raw_pointer_cast(trans_ptr); 138 | 139 | Eigen::Matrix3f R_eval = trans_eval.linear(); 140 | Eigen::Matrix3f R = trans.linear(); 141 | Eigen::Vector3f t = trans.translation(); 142 | 143 | const Eigen::Vector3f transed_mean_A = R * mean_A + t; 144 | 145 | Eigen::Matrix3f RCR = R_eval * cov_A * R_eval.transpose(); 146 | Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse(); 147 | 148 | Eigen::Vector3f error = mean_B - transed_mean_A; 149 | 150 | float w = cauchy(thrust::raw_pointer_cast(voxelmap_info_ptr)->voxel_resolution, error.norm()); 151 | float err = w * error.transpose() * RCR_inv * error; 152 | 153 | Eigen::Matrix dtdx0; 154 | dtdx0.block<3, 3>(0, 0) = skew_symmetric(transed_mean_A); 155 | dtdx0.block<3, 3>(0, 3) = -Eigen::Matrix3f::Identity(); 156 | 157 | Eigen::Matrix J = dtdx0; 158 | 159 | Eigen::Matrix H = w * J.transpose() * RCR_inv * J; 160 | Eigen::Matrix b = w * J.transpose() * RCR_inv * error; 161 | 162 | return thrust::make_tuple(err, H, b); 163 | } 164 | 165 | thrust::device_ptr trans_eval_ptr; 166 | thrust::device_ptr trans_ptr; 167 | 168 | thrust::device_ptr src_means_ptr; 169 | thrust::device_ptr src_covs_ptr; 170 | 171 | thrust::device_ptr voxelmap_info_ptr; 172 | thrust::device_ptr voxel_num_points_ptr; 173 | thrust::device_ptr voxel_means_ptr; 174 | thrust::device_ptr voxel_covs_ptr; 175 | }; 176 | 177 | struct sum_errors_kernel { 178 | using Tuple = thrust::tuple, Eigen::Matrix>; 179 | 180 | __host__ __device__ Tuple operator()(const Tuple& lhs, const Tuple& rhs) { 181 | return thrust::make_tuple(thrust::get<0>(lhs) + thrust::get<0>(rhs), thrust::get<1>(lhs) + thrust::get<1>(rhs), thrust::get<2>(lhs) + thrust::get<2>(rhs)); 182 | } 183 | }; 184 | 185 | } // namespace 186 | 187 | double p2d_ndt_compute_derivatives( 188 | const GaussianVoxelMap& target_voxelmap, 189 | const thrust::device_vector& source_points, 190 | const thrust::device_vector>& correspondences, 191 | const thrust::device_ptr& linearized_x_ptr, 192 | const thrust::device_ptr& x_ptr, 193 | Eigen::Matrix* H, 194 | Eigen::Matrix* b) { 195 | auto sum_errors = thrust::transform_reduce( 196 | correspondences.begin(), 197 | correspondences.end(), 198 | p2d_ndt_compute_derivatives_kernel(target_voxelmap, source_points, linearized_x_ptr, x_ptr), 199 | thrust::make_tuple(0.0f, Eigen::Matrix::Zero().eval(), Eigen::Matrix::Zero().eval()), 200 | sum_errors_kernel()); 201 | 202 | if (H && b) { 203 | *H = thrust::get<1>(sum_errors).cast(); 204 | *b = thrust::get<2>(sum_errors).cast(); 205 | } 206 | 207 | return thrust::get<0>(sum_errors); 208 | } 209 | 210 | double d2d_ndt_compute_derivatives( 211 | const GaussianVoxelMap& target_voxelmap, 212 | const GaussianVoxelMap& source_voxelmap, 213 | const thrust::device_vector>& correspondences, 214 | const thrust::device_ptr& linearized_x_ptr, 215 | const thrust::device_ptr& x_ptr, 216 | Eigen::Matrix* H, 217 | Eigen::Matrix* b) { 218 | auto sum_errors = thrust::transform_reduce( 219 | correspondences.begin(), 220 | correspondences.end(), 221 | d2d_ndt_compute_derivatives_kernel(target_voxelmap, source_voxelmap, linearized_x_ptr, x_ptr), 222 | thrust::make_tuple(0.0f, Eigen::Matrix::Zero().eval(), Eigen::Matrix::Zero().eval()), 223 | sum_errors_kernel()); 224 | 225 | if (H && b) { 226 | *H = thrust::get<1>(sum_errors).cast(); 227 | *b = thrust::get<2>(sum_errors).cast(); 228 | } 229 | 230 | return thrust::get<0>(sum_errors); 231 | } 232 | 233 | } // namespace cuda 234 | } // namespace fast_gicp 235 | -------------------------------------------------------------------------------- /src/fast_gicp/cuda/ndt_cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace fast_gicp { 11 | namespace cuda { 12 | 13 | NDTCudaCore::NDTCudaCore() { 14 | cudaDeviceSynchronize(); 15 | resolution = 1.0; 16 | linearized_x.setIdentity(); 17 | 18 | offsets.reset(new thrust::device_vector(1)); 19 | (*offsets)[0] = Eigen::Vector3i::Zero().eval(); 20 | 21 | distance_mode = fast_gicp::NDTDistanceMode::D2D; 22 | set_neighbor_search_method(fast_gicp::NeighborSearchMethod::DIRECT7, 0.0); 23 | } 24 | 25 | NDTCudaCore::~NDTCudaCore() {} 26 | 27 | void NDTCudaCore::set_distance_mode(fast_gicp::NDTDistanceMode mode) { 28 | this->distance_mode = mode; 29 | } 30 | 31 | void NDTCudaCore::set_resolution(double resolution) { 32 | this->resolution = resolution; 33 | } 34 | 35 | void NDTCudaCore::set_neighbor_search_method(fast_gicp::NeighborSearchMethod method, double radius) { 36 | thrust::host_vector> h_offsets; 37 | 38 | switch (method) { 39 | default: 40 | std::cerr << "here must not be reached" << std::endl; 41 | abort(); 42 | 43 | case fast_gicp::NeighborSearchMethod::DIRECT1: 44 | h_offsets.resize(1); 45 | h_offsets[0] = Eigen::Vector3i::Zero(); 46 | break; 47 | 48 | case fast_gicp::NeighborSearchMethod::DIRECT7: 49 | h_offsets.resize(7); 50 | h_offsets[0] = Eigen::Vector3i(0, 0, 0); 51 | h_offsets[1] = Eigen::Vector3i(1, 0, 0); 52 | h_offsets[2] = Eigen::Vector3i(-1, 0, 0); 53 | h_offsets[3] = Eigen::Vector3i(0, 1, 0); 54 | h_offsets[4] = Eigen::Vector3i(0, -1, 0); 55 | h_offsets[5] = Eigen::Vector3i(0, 0, 1); 56 | h_offsets[6] = Eigen::Vector3i(0, 0, -1); 57 | break; 58 | 59 | case fast_gicp::NeighborSearchMethod::DIRECT27: 60 | h_offsets.reserve(27); 61 | for (int i = 0; i < 3; i++) { 62 | for (int j = 0; j < 3; j++) { 63 | for (int k = 0; k < 3; k++) { 64 | h_offsets.push_back(Eigen::Vector3i(i - 1, j - 1, k - 1)); 65 | } 66 | } 67 | } 68 | break; 69 | 70 | case fast_gicp::NeighborSearchMethod::DIRECT_RADIUS: 71 | h_offsets.reserve(50); 72 | int range = std::ceil(radius); 73 | for (int i = -range; i <= range; i++) { 74 | for (int j = -range; j <= range; j++) { 75 | for (int k = -range; k <= range; k++) { 76 | Eigen::Vector3i offset(i, j, k); 77 | if (offset.cast().norm() <= radius + 1e-3) { 78 | h_offsets.push_back(offset); 79 | } 80 | } 81 | } 82 | } 83 | 84 | break; 85 | } 86 | 87 | *offsets = h_offsets; 88 | } 89 | 90 | void NDTCudaCore::swap_source_and_target() { 91 | source_points.swap(target_points); 92 | source_voxelmap.swap(target_voxelmap); 93 | } 94 | 95 | void NDTCudaCore::set_source_cloud(const std::vector>& cloud) { 96 | thrust::host_vector> points(cloud.begin(), cloud.end()); 97 | if (!source_points) { 98 | source_points.reset(new Points()); 99 | } 100 | 101 | *source_points = points; 102 | source_voxelmap.reset(); 103 | } 104 | 105 | void NDTCudaCore::set_target_cloud(const std::vector>& cloud) { 106 | thrust::host_vector> points(cloud.begin(), cloud.end()); 107 | if (!target_points) { 108 | target_points.reset(new Points()); 109 | } 110 | 111 | *target_points = points; 112 | target_voxelmap.reset(); 113 | } 114 | 115 | void NDTCudaCore::create_voxelmaps() { 116 | create_source_voxelmap(); 117 | create_target_voxelmap(); 118 | } 119 | 120 | void NDTCudaCore::create_source_voxelmap() { 121 | assert(source_points); 122 | if (source_voxelmap || distance_mode == fast_gicp::NDTDistanceMode::P2D) { 123 | return; 124 | } 125 | 126 | source_voxelmap.reset(new GaussianVoxelMap(resolution)); 127 | source_voxelmap->create_voxelmap(*source_points); 128 | covariance_regularization(source_voxelmap->voxel_means, source_voxelmap->voxel_covs, fast_gicp::RegularizationMethod::MIN_EIG); 129 | } 130 | 131 | void NDTCudaCore::create_target_voxelmap() { 132 | assert(target_points); 133 | if (target_voxelmap) { 134 | return; 135 | } 136 | 137 | target_voxelmap.reset(new GaussianVoxelMap(resolution)); 138 | target_voxelmap->create_voxelmap(*target_points); 139 | covariance_regularization(target_voxelmap->voxel_means, target_voxelmap->voxel_covs, fast_gicp::RegularizationMethod::MIN_EIG); 140 | } 141 | 142 | void NDTCudaCore::update_correspondences(const Eigen::Isometry3d& trans) { 143 | thrust::device_vector trans_ptr(1); 144 | trans_ptr[0] = trans.cast(); 145 | 146 | if (correspondences == nullptr) { 147 | correspondences.reset(new Correspondences()); 148 | } 149 | linearized_x = trans.cast(); 150 | 151 | switch (distance_mode) { 152 | case fast_gicp::NDTDistanceMode::P2D: 153 | find_voxel_correspondences(*source_points, *target_voxelmap, trans_ptr.data(), *offsets, *correspondences); 154 | break; 155 | 156 | case fast_gicp::NDTDistanceMode::D2D: 157 | find_voxel_correspondences(source_voxelmap->voxel_means, *target_voxelmap, trans_ptr.data(), *offsets, *correspondences); 158 | break; 159 | } 160 | } 161 | 162 | double NDTCudaCore::compute_error(const Eigen::Isometry3d& trans, Eigen::Matrix* H, Eigen::Matrix* b) const { 163 | thrust::host_vector> trans_(2); 164 | trans_[0] = linearized_x; 165 | trans_[1] = trans.cast(); 166 | 167 | thrust::device_vector trans_ptr = trans_; 168 | 169 | switch (distance_mode) { 170 | default: 171 | case fast_gicp::NDTDistanceMode::P2D: 172 | return p2d_ndt_compute_derivatives(*target_voxelmap, *source_points, *correspondences, trans_ptr.data(), trans_ptr.data() + 1, H, b); 173 | 174 | case fast_gicp::NDTDistanceMode::D2D: 175 | return d2d_ndt_compute_derivatives(*target_voxelmap, *source_voxelmap, *correspondences, trans_ptr.data(), trans_ptr.data() + 1, H, b); 176 | } 177 | } 178 | 179 | } // namespace cuda 180 | 181 | } // namespace fast_gicp -------------------------------------------------------------------------------- /src/fast_gicp/gicp/experimental/fast_gicp_mp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // template class fast_gicp::FastGICPMultiPoints; 5 | template class fast_gicp::FastGICPMultiPoints; 6 | -------------------------------------------------------------------------------- /src/fast_gicp/gicp/fast_gicp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | template class fast_gicp::FastGICP; 5 | template class fast_gicp::FastGICP; 6 | template class fast_gicp::FastGICP; 7 | template class fast_gicp::FastGICP; -------------------------------------------------------------------------------- /src/fast_gicp/gicp/fast_gicp_st.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | template class fast_gicp::FastGICPSingleThread; 5 | template class fast_gicp::FastGICPSingleThread; 6 | template class fast_gicp::FastGICPSingleThread; 7 | -------------------------------------------------------------------------------- /src/fast_gicp/gicp/fast_vgicp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | template class fast_gicp::FastVGICP; 5 | template class fast_gicp::FastVGICP; 6 | template class fast_gicp::FastVGICP; 7 | template class fast_gicp::FastVGICP; -------------------------------------------------------------------------------- /src/fast_gicp/gicp/fast_vgicp_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | template class fast_gicp::FastVGICPCuda; 5 | template class fast_gicp::FastVGICPCuda; 6 | template class fast_gicp::FastVGICPCuda; 7 | template class fast_gicp::FastVGICPCuda; -------------------------------------------------------------------------------- /src/fast_gicp/gicp/lsq_registration.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | template class fast_gicp::LsqRegistration; 5 | template class fast_gicp::LsqRegistration; 6 | template class fast_gicp::LsqRegistration; 7 | template class fast_gicp::LsqRegistration; -------------------------------------------------------------------------------- /src/fast_gicp/ndt/ndt_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | template class fast_gicp::NDTCuda; 5 | template class fast_gicp::NDTCuda; 6 | template class fast_gicp::NDTCuda; 7 | -------------------------------------------------------------------------------- /src/kitti.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | #ifdef USE_VGICP_CUDA 18 | #include 19 | #include 20 | #endif 21 | 22 | class KittiLoader { 23 | public: 24 | KittiLoader(const std::string& dataset_path) : dataset_path(dataset_path) { 25 | for (num_frames = 0;; num_frames++) { 26 | std::string filename = (boost::format("%s/%06d.bin") % dataset_path % num_frames).str(); 27 | if (!boost::filesystem::exists(filename)) { 28 | break; 29 | } 30 | } 31 | 32 | if (num_frames == 0) { 33 | std::cerr << "error: no files in " << dataset_path << std::endl; 34 | } 35 | } 36 | ~KittiLoader() {} 37 | 38 | size_t size() const { return num_frames; } 39 | 40 | pcl::PointCloud::ConstPtr cloud(size_t i) const { 41 | std::string filename = (boost::format("%s/%06d.bin") % dataset_path % i).str(); 42 | FILE* file = fopen(filename.c_str(), "rb"); 43 | if (!file) { 44 | std::cerr << "error: failed to load " << filename << std::endl; 45 | return nullptr; 46 | } 47 | 48 | std::vector buffer(1000000); 49 | size_t num_points = fread(reinterpret_cast(buffer.data()), sizeof(float), buffer.size(), file) / 4; 50 | fclose(file); 51 | 52 | pcl::PointCloud::Ptr cloud(new pcl::PointCloud()); 53 | cloud->resize(num_points); 54 | 55 | for (int i = 0; i < num_points; i++) { 56 | auto& pt = cloud->at(i); 57 | pt.x = buffer[i * 4]; 58 | pt.y = buffer[i * 4 + 1]; 59 | pt.z = buffer[i * 4 + 2]; 60 | // pt.intensity = buffer[i * 4 + 3]; 61 | } 62 | 63 | return cloud; 64 | } 65 | 66 | private: 67 | int num_frames; 68 | std::string dataset_path; 69 | }; 70 | 71 | int main(int argc, char** argv) { 72 | if (argc < 2) { 73 | std::cout << "usage: gicp_kitti /your/kitti/path/sequences/00/velodyne" << std::endl; 74 | return 0; 75 | } 76 | 77 | KittiLoader kitti(argv[1]); 78 | 79 | // use downsample_resolution=1.0 for fast registration 80 | double downsample_resolution = 0.25; 81 | pcl::ApproximateVoxelGrid voxelgrid; 82 | voxelgrid.setLeafSize(downsample_resolution, downsample_resolution, downsample_resolution); 83 | 84 | // registration method 85 | // you should fine-tune hyper-parameters (e.g., voxel resolution, max correspondence distance) for the best result 86 | fast_gicp::FastGICP gicp; 87 | // fast_gicp::FastVGICP gicp; 88 | // fast_gicp::FastVGICPCuda gicp; 89 | // fast_gicp::NDTCuda gicp; 90 | // gicp.setResolution(1.0); 91 | // gicp.setNearestNeighborSearchMethod(fast_gicp::NearestNeighborMethod::GPU_RBF_KERNEL); 92 | gicp.setMaxCorrespondenceDistance(1.0); 93 | 94 | // set initial frame as target 95 | voxelgrid.setInputCloud(kitti.cloud(0)); 96 | pcl::PointCloud::Ptr target(new pcl::PointCloud); 97 | voxelgrid.filter(*target); 98 | gicp.setInputTarget(target); 99 | 100 | // sensor pose sequence 101 | std::vector> poses(kitti.size()); 102 | poses[0].setIdentity(); 103 | 104 | // trajectory for visualization 105 | pcl::PointCloud::Ptr trajectory(new pcl::PointCloud); 106 | trajectory->push_back(pcl::PointXYZ(0.0f, 0.0f, 0.0f)); 107 | 108 | pcl::visualization::PCLVisualizer vis; 109 | vis.addPointCloud(trajectory, "trajectory"); 110 | 111 | // for calculating FPS 112 | boost::circular_buffer stamps(30); 113 | stamps.push_back(std::chrono::high_resolution_clock::now()); 114 | 115 | for (int i = 1; i < kitti.size(); i++) { 116 | // set the current frame as source 117 | voxelgrid.setInputCloud(kitti.cloud(i)); 118 | pcl::PointCloud::Ptr source(new pcl::PointCloud); 119 | voxelgrid.filter(*source); 120 | gicp.setInputSource(source); 121 | 122 | // align and swap source and target cloud for next registration 123 | pcl::PointCloud::Ptr aligned(new pcl::PointCloud); 124 | gicp.align(*aligned); 125 | gicp.swapSourceAndTarget(); 126 | 127 | // accumulate pose 128 | poses[i] = poses[i - 1] * gicp.getFinalTransformation().cast(); 129 | 130 | // FPS display 131 | stamps.push_back(std::chrono::high_resolution_clock::now()); 132 | std::cout << stamps.size() / (std::chrono::duration_cast(stamps.back() - stamps.front()).count() / 1e9) << "fps" << std::endl; 133 | 134 | // visualization 135 | trajectory->push_back(pcl::PointXYZ(poses[i](0, 3), poses[i](1, 3), poses[i](2, 3))); 136 | vis.updatePointCloud(trajectory, pcl::visualization::PointCloudColorHandlerCustom(trajectory, 255.0, 0.0, 0.0), "trajectory"); 137 | vis.spinOnce(); 138 | } 139 | 140 | // save the estimated poses 141 | std::ofstream ofs("/tmp/traj.txt"); 142 | for (const auto& pose : poses) { 143 | for (int i = 0; i < 3; i++) { 144 | for (int j = 0; j < 4; j++) { 145 | if (i || j) { 146 | ofs << " "; 147 | } 148 | 149 | ofs << pose(i, j); 150 | } 151 | } 152 | ofs << std::endl; 153 | } 154 | 155 | return 0; 156 | } -------------------------------------------------------------------------------- /src/kitti.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import os 3 | import sys 4 | import time 5 | import numpy 6 | import pygicp 7 | from matplotlib import pyplot 8 | 9 | def main(): 10 | if len(sys.argv) < 2: 11 | print('usage: kitti.py /path/to/kitti/sequences/00/velodyne') 12 | return 13 | 14 | # List input files 15 | seq_path = sys.argv[1] 16 | filenames = sorted([seq_path + '/' + x for x in os.listdir(seq_path) if x.endswith('.bin')]) 17 | 18 | # You can choose any of FastGICP, FastVGICP, FastVGICPCuda, or NDTCuda 19 | reg = pygicp.FastGICP() 20 | 21 | # pygicp classes have the same interface as the C++ version 22 | # reg.set_num_threads(8) 23 | # reg.set_max_correspondence_distance(2.0) 24 | 25 | stamps = [time.time()] # for FPS calculation 26 | poses = [numpy.identity(4)] # sensor trajectory 27 | 28 | for i, filename in enumerate(filenames): 29 | # Read and downsample input cloud 30 | points = numpy.fromfile(filename, dtype=numpy.float32).reshape(-1, 4)[:, :3] 31 | points = pygicp.downsample(points, 0.25) 32 | 33 | if i == 0: 34 | reg.set_input_target(points) 35 | delta = numpy.identity(4) 36 | else: 37 | reg.set_input_source(points) 38 | delta = reg.align() 39 | reg.swap_source_and_target() 40 | 41 | # Accumulate the delta to compute the full sensor trajectory 42 | poses.append(poses[-1].dot(delta)) 43 | 44 | # FPS calculation for the last ten frames 45 | stamps = stamps[-9:] + [time.time()] 46 | print('fps:%.3f' % (len(stamps) / (stamps[-1] - stamps[0]))) 47 | 48 | # Plot the estimated trajectory 49 | traj = numpy.array([x[:3, 3] for x in poses]) 50 | 51 | if i % 30 == 0: 52 | pyplot.clf() 53 | pyplot.plot(traj[:, 0], traj[:, 1]) 54 | pyplot.axis('equal') 55 | pyplot.pause(0.01) 56 | 57 | 58 | if __name__ == '__main__': 59 | main() 60 | -------------------------------------------------------------------------------- /src/python/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #ifdef USE_VGICP_CUDA 11 | #include 12 | #include 13 | #endif 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | namespace py = pybind11; 20 | 21 | fast_gicp::NeighborSearchMethod search_method(const std::string& neighbor_search_method) { 22 | if(neighbor_search_method == "DIRECT1") { 23 | return fast_gicp::NeighborSearchMethod::DIRECT1; 24 | } else if (neighbor_search_method == "DIRECT7") { 25 | return fast_gicp::NeighborSearchMethod::DIRECT7; 26 | } else if (neighbor_search_method == "DIRECT27") { 27 | return fast_gicp::NeighborSearchMethod::DIRECT27; 28 | } else if (neighbor_search_method == "DIRECT_RADIUS") { 29 | return fast_gicp::NeighborSearchMethod::DIRECT_RADIUS; 30 | } 31 | 32 | std::cerr << "error: unknown neighbor search method " << neighbor_search_method << std::endl; 33 | return fast_gicp::NeighborSearchMethod::DIRECT1; 34 | } 35 | 36 | pcl::PointCloud::Ptr eigen2pcl(const Eigen::Matrix& points) { 37 | pcl::PointCloud::Ptr cloud(new pcl::PointCloud); 38 | cloud->resize(points.rows()); 39 | 40 | for(int i=0; iat(i).getVector3fMap() = points.row(i).cast(); 42 | } 43 | return cloud; 44 | } 45 | 46 | Eigen::Matrix downsample(const Eigen::Matrix& points, double downsample_resolution) { 47 | auto cloud = eigen2pcl(points); 48 | 49 | pcl::ApproximateVoxelGrid voxelgrid; 50 | voxelgrid.setLeafSize(downsample_resolution, downsample_resolution, downsample_resolution); 51 | voxelgrid.setInputCloud(cloud); 52 | 53 | pcl::PointCloud::Ptr filtered(new pcl::PointCloud); 54 | voxelgrid.filter(*filtered); 55 | 56 | Eigen::Matrix filtered_points(filtered->size(), 3); 57 | for(int i=0; isize(); i++) { 58 | filtered_points.row(i) = filtered->at(i).getVector3fMap(); 59 | } 60 | 61 | return filtered_points.cast(); 62 | } 63 | 64 | Eigen::Matrix4d align_points( 65 | const Eigen::Matrix& target, 66 | const Eigen::Matrix& source, 67 | const std::string& method, 68 | double downsample_resolution, 69 | int k_correspondences, 70 | double max_correspondence_distance, 71 | double voxel_resolution, 72 | int num_threads, 73 | const std::string& neighbor_search_method, 74 | double neighbor_search_radius, 75 | const Eigen::Matrix4f& initial_guess 76 | ) { 77 | pcl::PointCloud::Ptr target_cloud = eigen2pcl(target); 78 | pcl::PointCloud::Ptr source_cloud = eigen2pcl(source); 79 | 80 | if(downsample_resolution > 0.0) { 81 | pcl::ApproximateVoxelGrid voxelgrid; 82 | voxelgrid.setLeafSize(downsample_resolution, downsample_resolution, downsample_resolution); 83 | 84 | pcl::PointCloud::Ptr filtered(new pcl::PointCloud); 85 | voxelgrid.setInputCloud(target_cloud); 86 | voxelgrid.filter(*filtered); 87 | target_cloud.swap(filtered); 88 | 89 | voxelgrid.setInputCloud(source_cloud); 90 | voxelgrid.filter(*filtered); 91 | source_cloud.swap(filtered); 92 | } 93 | 94 | std::shared_ptr> reg; 95 | 96 | if(method == "GICP") { 97 | std::shared_ptr> gicp(new fast_gicp::FastGICP); 98 | gicp->setMaxCorrespondenceDistance(max_correspondence_distance); 99 | gicp->setCorrespondenceRandomness(k_correspondences); 100 | gicp->setNumThreads(num_threads); 101 | reg = gicp; 102 | } else if (method == "VGICP") { 103 | std::shared_ptr> vgicp(new fast_gicp::FastVGICP); 104 | vgicp->setCorrespondenceRandomness(k_correspondences); 105 | vgicp->setResolution(voxel_resolution); 106 | vgicp->setNeighborSearchMethod(search_method(neighbor_search_method)); 107 | vgicp->setNumThreads(num_threads); 108 | reg = vgicp; 109 | } else if (method == "VGICP_CUDA") { 110 | #ifdef USE_VGICP_CUDA 111 | std::shared_ptr> vgicp(new fast_gicp::FastVGICPCuda); 112 | vgicp->setCorrespondenceRandomness(k_correspondences); 113 | vgicp->setNeighborSearchMethod(search_method(neighbor_search_method), neighbor_search_radius); 114 | vgicp->setResolution(voxel_resolution); 115 | reg = vgicp; 116 | #else 117 | std::cerr << "error: you need to build fast_gicp with BUILD_VGICP_CUDA=ON" << std::endl; 118 | return Eigen::Matrix4d::Identity(); 119 | #endif 120 | } else if (method == "NDT_CUDA") { 121 | #ifdef USE_VGICP_CUDA 122 | std::shared_ptr> ndt(new fast_gicp::NDTCuda); 123 | ndt->setResolution(voxel_resolution); 124 | ndt->setNeighborSearchMethod(search_method(neighbor_search_method), neighbor_search_radius); 125 | reg = ndt; 126 | #else 127 | std::cerr << "error: you need to build fast_gicp with BUILD_VGICP_CUDA=ON" << std::endl; 128 | return Eigen::Matrix4d::Identity(); 129 | #endif 130 | } else { 131 | std::cerr << "error: unknown registration method " << method << std::endl; 132 | return Eigen::Matrix4d::Identity(); 133 | } 134 | 135 | reg->setInputTarget(target_cloud); 136 | reg->setInputSource(source_cloud); 137 | 138 | pcl::PointCloud::Ptr aligned(new pcl::PointCloud); 139 | reg->align(*aligned, initial_guess); 140 | 141 | return reg->getFinalTransformation().cast(); 142 | } 143 | 144 | using LsqRegistration = fast_gicp::LsqRegistration; 145 | using FastGICP = fast_gicp::FastGICP; 146 | using FastVGICP = fast_gicp::FastVGICP; 147 | #ifdef USE_VGICP_CUDA 148 | using FastVGICPCuda = fast_gicp::FastVGICPCuda; 149 | using NDTCuda = fast_gicp::NDTCuda; 150 | #endif 151 | 152 | PYBIND11_MODULE(pygicp, m) { 153 | m.def("downsample", &downsample, "downsample points"); 154 | 155 | m.def("align_points", &align_points, "align two point sets", 156 | py::arg("target"), 157 | py::arg("source"), 158 | py::arg("method") = "GICP", 159 | py::arg("downsample_resolution") = -1.0, 160 | py::arg("k_correspondences") = 15, 161 | py::arg("max_correspondence_distance") = std::numeric_limits::max(), 162 | py::arg("voxel_resolution") = 1.0, 163 | py::arg("num_threads") = 0, 164 | py::arg("neighbor_search_method") = "DIRECT1", 165 | py::arg("neighbor_search_radius") = 1.5, 166 | py::arg("initial_guess") = Eigen::Matrix4f::Identity() 167 | ); 168 | 169 | py::class_>(m, "LsqRegistration") 170 | .def("set_input_target", [] (LsqRegistration& reg, const Eigen::Matrix& points) { reg.setInputTarget(eigen2pcl(points)); }) 171 | .def("set_input_source", [] (LsqRegistration& reg, const Eigen::Matrix& points) { reg.setInputSource(eigen2pcl(points)); }) 172 | .def("swap_source_and_target", &LsqRegistration::swapSourceAndTarget) 173 | .def("get_final_hessian", &LsqRegistration::getFinalHessian) 174 | .def("get_final_transformation", &LsqRegistration::getFinalTransformation) 175 | .def("get_fitness_score", [] (LsqRegistration& reg, const double max_range) { return reg.getFitnessScore(max_range); }) 176 | .def("align", 177 | [] (LsqRegistration& reg, const Eigen::Matrix4f& initial_guess) { 178 | pcl::PointCloud aligned; 179 | reg.align(aligned, initial_guess); 180 | return reg.getFinalTransformation(); 181 | }, py::arg("initial_guess") = Eigen::Matrix4f::Identity() 182 | ) 183 | ; 184 | 185 | py::class_>(m, "FastGICP") 186 | .def(py::init()) 187 | .def("set_num_threads", &FastGICP::setNumThreads) 188 | .def("set_correspondence_randomness", &FastGICP::setCorrespondenceRandomness) 189 | .def("set_max_correspondence_distance", &FastGICP::setMaxCorrespondenceDistance) 190 | ; 191 | 192 | py::class_>(m, "FastVGICP") 193 | .def(py::init()) 194 | .def("set_resolution", &FastVGICP::setResolution) 195 | .def("set_neighbor_search_method", [](FastVGICP& vgicp, const std::string& method) { vgicp.setNeighborSearchMethod(search_method(method)); }) 196 | ; 197 | 198 | #ifdef USE_VGICP_CUDA 199 | py::class_>(m, "FastVGICPCuda") 200 | .def(py::init()) 201 | .def("set_resolution", &FastVGICPCuda::setResolution) 202 | .def("set_neighbor_search_method", 203 | [](FastVGICPCuda& vgicp, const std::string& method, double radius) { vgicp.setNeighborSearchMethod(search_method(method), radius); } 204 | , py::arg("method") = "DIRECT1", py::arg("radius") = 1.5 205 | ) 206 | .def("set_correspondence_randomness", &FastVGICPCuda::setCorrespondenceRandomness) 207 | ; 208 | 209 | py::class_>(m, "NDTCuda") 210 | .def(py::init()) 211 | .def("set_neighbor_search_method", 212 | [](NDTCuda& ndt, const std::string& method, double radius) { ndt.setNeighborSearchMethod(search_method(method), radius); } 213 | , py::arg("method") = "DIRECT1", py::arg("radius") = 1.5 214 | ) 215 | .def("set_resolution", &NDTCuda::setResolution) 216 | ; 217 | #endif 218 | 219 | #ifdef VERSION_INFO 220 | m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); 221 | #else 222 | m.attr("__version__") = "dev"; 223 | #endif 224 | } -------------------------------------------------------------------------------- /src/test/gicp_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #ifdef USE_VGICP_CUDA 16 | #include 17 | #include 18 | #endif 19 | 20 | struct GICPTestBase : public testing::Test { 21 | public: 22 | EIGEN_MAKE_ALIGNED_OPERATOR_NEW 23 | using PointCloudConstPtr = pcl::PointCloud::ConstPtr; 24 | 25 | GICPTestBase() {} 26 | 27 | virtual void SetUp() { 28 | if (!load(data_directory)) { 29 | exit(1); 30 | } 31 | } 32 | 33 | bool load(const std::string& data_directory) { 34 | relative_pose.setIdentity(); 35 | 36 | std::ifstream ifs(data_directory + "/relative.txt"); 37 | if (!ifs) { 38 | return false; 39 | } 40 | 41 | for (int i = 0; i < 4; i++) { 42 | for (int j = 0; j < 4; j++) { 43 | ifs >> relative_pose(i, j); 44 | } 45 | } 46 | 47 | auto target = pcl::make_shared>(); 48 | auto source = pcl::make_shared>(); 49 | pcl::io::loadPCDFile(data_directory + "/251370668.pcd", *target); 50 | pcl::io::loadPCDFile(data_directory + "/251371071.pcd", *source); 51 | if (target->empty() || source->empty()) { 52 | return true; 53 | } 54 | 55 | pcl::VoxelGrid voxelgrid; 56 | voxelgrid.setLeafSize(0.2, 0.2, 0.2); 57 | 58 | auto filtered = pcl::make_shared>(); 59 | voxelgrid.setInputCloud(target); 60 | voxelgrid.filter(*filtered); 61 | filtered.swap(target); 62 | 63 | voxelgrid.setInputCloud(source); 64 | voxelgrid.filter(*filtered); 65 | filtered.swap(source); 66 | 67 | this->target = target; 68 | this->source = source; 69 | 70 | return true; 71 | } 72 | 73 | Eigen::Vector2f pose_error(const Eigen::Matrix4f estimated) const { 74 | Eigen::Matrix4f delta = relative_pose.inverse() * estimated; 75 | double t_error = delta.block<3, 1>(0, 3).norm(); 76 | double r_error = Eigen::AngleAxisf(delta.block<3, 3>(0, 0)).angle(); 77 | return Eigen::Vector2f(t_error, r_error); 78 | } 79 | 80 | static std::string data_directory; 81 | 82 | PointCloudConstPtr target; 83 | PointCloudConstPtr source; 84 | Eigen::Matrix4f relative_pose; 85 | }; 86 | 87 | std::string GICPTestBase::data_directory; 88 | 89 | TEST_F(GICPTestBase, LoadCheck) { 90 | EXPECT_NE(target, nullptr); 91 | EXPECT_NE(source, nullptr); 92 | EXPECT_FALSE(target->empty()); 93 | EXPECT_FALSE(source->empty()); 94 | } 95 | 96 | using Parameters = std::tuple; 97 | class AlignmentTest : public GICPTestBase, public testing::WithParamInterface { 98 | public: 99 | pcl::Registration::Ptr create_reg() { 100 | std::string method = std::get<0>(GetParam()); 101 | int num_threads = std::get<1>(GetParam()) ? 4 : 1; 102 | 103 | if (method == "GICP") { 104 | auto gicp = pcl::make_shared>(); 105 | gicp->setNumThreads(num_threads); 106 | gicp->swapSourceAndTarget(); 107 | return gicp; 108 | } else if (method == "VGICP") { 109 | auto vgicp = pcl::make_shared>(); 110 | vgicp->setNumThreads(num_threads); 111 | return vgicp; 112 | } else if (method == "VGICP_CUDA") { 113 | #ifdef USE_VGICP_CUDA 114 | auto vgicp = pcl::make_shared>(); 115 | return vgicp; 116 | #endif 117 | return nullptr; 118 | } else if (method == "NDT_CUDA") { 119 | #ifdef USE_VGICP_CUDA 120 | auto ndt = pcl::make_shared>(); 121 | return ndt; 122 | #endif 123 | return nullptr; 124 | } 125 | 126 | std::cerr << "unknown registration method:" << method << std::endl; 127 | return nullptr; 128 | } 129 | 130 | void swap_source_and_target(pcl::Registration::Ptr reg) { 131 | fast_gicp::LsqRegistration* lsq_reg = dynamic_cast*>(reg.get()); 132 | if (lsq_reg != nullptr) { 133 | lsq_reg->swapSourceAndTarget(); 134 | return; 135 | } 136 | 137 | std::cerr << "failed to swap source and target" << std::endl; 138 | } 139 | }; 140 | 141 | INSTANTIATE_TEST_SUITE_P(AlignmentTest2, AlignmentTest, testing::Combine(testing::Values("GICP", "VGICP", "VGICP_CUDA", "NDT_CUDA"), testing::Bool()), [](const auto& info) { 142 | std::stringstream sst; 143 | sst << std::get<0>(info.param) << (std::get<1>(info.param) ? "_MT" : "_ST"); 144 | return sst.str(); 145 | }); 146 | 147 | TEST_P(AlignmentTest, test) { 148 | const double t_tol = 0.05; 149 | const double r_tol = 1.0 * M_PI / 180.0; 150 | 151 | pcl::Registration::Ptr reg = create_reg(); 152 | if (reg == nullptr) { 153 | std::cout << "[ ] SKIP TEST" << std::endl; 154 | return; 155 | } 156 | 157 | // forward test 158 | auto aligned = pcl::make_shared>(); 159 | reg->setInputTarget(target); 160 | reg->setInputSource(source); 161 | reg->align(*aligned); 162 | 163 | Eigen::Vector2f errors = pose_error(reg->getFinalTransformation()); 164 | EXPECT_LT(errors[0], t_tol) << "FORWARD TEST"; 165 | EXPECT_LT(errors[1], r_tol) << "FORWARD TEST"; 166 | EXPECT_TRUE(reg->hasConverged()) << "FORWARD TEST"; 167 | 168 | // backward test 169 | reg->setInputTarget(source); 170 | reg->setInputSource(target); 171 | reg->align(*aligned); 172 | 173 | errors = pose_error(reg->getFinalTransformation().inverse()); 174 | EXPECT_LT(errors[0], t_tol) << "BACKWARD TEST"; 175 | EXPECT_LT(errors[1], r_tol) << "BACKWARD TEST"; 176 | EXPECT_TRUE(reg->hasConverged()) << "BACKWARD TEST"; 177 | 178 | // swap and set source 179 | reg = create_reg(); 180 | reg->setInputSource(target); 181 | swap_source_and_target(reg); 182 | reg->setInputSource(source); 183 | reg->align(*aligned); 184 | 185 | errors = pose_error(reg->getFinalTransformation()); 186 | EXPECT_LT(errors[0], t_tol) << "SWAP AND SET SOURCE TEST"; 187 | EXPECT_LT(errors[1], r_tol) << "SWAP AND SET SOURCE TEST"; 188 | EXPECT_TRUE(reg->hasConverged()) << "SWAP AND SET SOURCE TEST"; 189 | 190 | // swap and set target 191 | reg = create_reg(); 192 | reg->setInputTarget(source); 193 | swap_source_and_target(reg); // source:target, target:source 194 | reg->setInputTarget(target); 195 | reg->align(*aligned); 196 | 197 | errors = pose_error(reg->getFinalTransformation()); 198 | EXPECT_LT(errors[0], t_tol) << "SWAP AND SET TARGET TEST"; 199 | EXPECT_LT(errors[1], r_tol) << "SWAP AND SET TARGET TEST"; 200 | EXPECT_TRUE(reg->hasConverged()) << "SWAP AND SET TARGET TEST"; 201 | } 202 | 203 | int main(int argc, char** argv) { 204 | GICPTestBase::data_directory = argv[1]; 205 | testing::InitGoogleTest(&argc, argv); 206 | return RUN_ALL_TESTS(); 207 | } 208 | --------------------------------------------------------------------------------