├── .clang-format
├── .github
    └── workflows
    │   ├── build.yml
    │   └── issue_response.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── README.md
├── data
    ├── 251370668.pcd
    ├── 251371071.pcd
    ├── proctime.png
    └── relative.txt
├── docker
    ├── focal
    │   └── Dockerfile
    ├── focal_cuda
    │   └── Dockerfile
    ├── kinetic
    │   └── Dockerfile
    ├── melodic
    │   └── Dockerfile
    ├── melodic_llvm
    │   └── Dockerfile
    ├── noetic
    │   └── Dockerfile
    └── noetic_llvm
    │   └── Dockerfile
├── include
    └── fast_gicp
    │   ├── cuda
    │       ├── brute_force_knn.cuh
    │       ├── compute_derivatives.cuh
    │       ├── compute_mahalanobis.cuh
    │       ├── covariance_estimation.cuh
    │       ├── covariance_regularization.cuh
    │       ├── fast_vgicp_cuda.cuh
    │       ├── find_voxel_correspondences.cuh
    │       ├── gaussian_voxelmap.cuh
    │       ├── ndt_compute_derivatives.cuh
    │       ├── ndt_cuda.cuh
    │       └── vector3_hash.cuh
    │   ├── gicp
    │       ├── experimental
    │       │   ├── fast_gicp_mp.hpp
    │       │   └── fast_gicp_mp_impl.hpp
    │       ├── fast_gicp.hpp
    │       ├── fast_gicp_st.hpp
    │       ├── fast_vgicp.hpp
    │       ├── fast_vgicp_cuda.hpp
    │       ├── fast_vgicp_voxel.hpp
    │       ├── gicp_settings.hpp
    │       ├── impl
    │       │   ├── fast_gicp_impl.hpp
    │       │   ├── fast_gicp_st_impl.hpp
    │       │   ├── fast_vgicp_cuda_impl.hpp
    │       │   ├── fast_vgicp_impl.hpp
    │       │   └── lsq_registration_impl.hpp
    │       └── lsq_registration.hpp
    │   ├── ndt
    │       ├── impl
    │       │   └── ndt_cuda_impl.hpp
    │       ├── ndt_cuda.hpp
    │       └── ndt_settings.hpp
    │   └── so3
    │       └── so3.hpp
├── package.xml
├── scripts
    └── runtest_cuda.sh
├── setup.py
└── src
    ├── align.cpp
    ├── fast_gicp
        ├── cuda
        │   ├── brute_force_knn.cu
        │   ├── compute_derivatives.cu
        │   ├── compute_mahalanobis.cu
        │   ├── covariance_estimation.cu
        │   ├── covariance_estimation_rbf.cu
        │   ├── covariance_regularization.cu
        │   ├── fast_vgicp_cuda.cu
        │   ├── find_voxel_correspondences.cu
        │   ├── gaussian_voxelmap.cu
        │   ├── ndt_compute_derivatives.cu
        │   └── ndt_cuda.cu
        ├── gicp
        │   ├── experimental
        │   │   └── fast_gicp_mp.cpp
        │   ├── fast_gicp.cpp
        │   ├── fast_gicp_st.cpp
        │   ├── fast_vgicp.cpp
        │   ├── fast_vgicp_cuda.cpp
        │   └── lsq_registration.cpp
        └── ndt
        │   └── ndt_cuda.cpp
    ├── kitti.cpp
    ├── kitti.py
    ├── python
        └── main.cpp
    └── test
        └── gicp_test.cpp


/.clang-format:
--------------------------------------------------------------------------------
  1 | ---
  2 | Language:        Cpp
  3 | # BasedOnStyle:  Google
  4 | AccessModifierOffset: -2
  5 | AlignAfterOpenBracket: AlwaysBreak
  6 | AlignConsecutiveMacros: false
  7 | AlignConsecutiveAssignments: false
  8 | AlignConsecutiveDeclarations: false
  9 | AlignEscapedNewlines: Left
 10 | AlignOperands:   true
 11 | AlignTrailingComments: true
 12 | AllowAllArgumentsOnNextLine: false
 13 | AllowAllConstructorInitializersOnNextLine: false
 14 | AllowAllParametersOfDeclarationOnNextLine: false
 15 | AllowShortBlocksOnASingleLine: Never
 16 | AllowShortCaseLabelsOnASingleLine: false
 17 | AllowShortFunctionsOnASingleLine: Inline
 18 | AllowShortLambdasOnASingleLine: All
 19 | AllowShortIfStatementsOnASingleLine: WithoutElse
 20 | AllowShortLoopsOnASingleLine: true
 21 | AlwaysBreakAfterDefinitionReturnType: None
 22 | AlwaysBreakAfterReturnType: None
 23 | AlwaysBreakBeforeMultilineStrings: true
 24 | AlwaysBreakTemplateDeclarations: Yes
 25 | BinPackArguments: false
 26 | BinPackParameters: false
 27 | BraceWrapping:
 28 |   AfterCaseLabel:  false
 29 |   AfterClass:      false
 30 |   AfterControlStatement: false
 31 |   AfterEnum:       false
 32 |   AfterFunction:   false
 33 |   AfterNamespace:  false
 34 |   AfterObjCDeclaration: false
 35 |   AfterStruct:     false
 36 |   AfterUnion:      false
 37 |   AfterExternBlock: false
 38 |   BeforeCatch:     false
 39 |   BeforeElse:      false
 40 |   IndentBraces:    false
 41 |   SplitEmptyFunction: true
 42 |   SplitEmptyRecord: true
 43 |   SplitEmptyNamespace: true
 44 | BreakBeforeBinaryOperators: None
 45 | BreakBeforeBraces: Attach
 46 | BreakBeforeInheritanceComma: false
 47 | BreakInheritanceList: BeforeColon
 48 | BreakBeforeTernaryOperators: true
 49 | BreakConstructorInitializersBeforeComma: false
 50 | BreakConstructorInitializers: BeforeColon
 51 | BreakAfterJavaFieldAnnotations: false
 52 | BreakStringLiterals: true
 53 | ColumnLimit:     180
 54 | CommentPragmas:  '^ IWYU pragma:'
 55 | CompactNamespaces: false
 56 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
 57 | ConstructorInitializerIndentWidth: 0
 58 | ContinuationIndentWidth: 2
 59 | Cpp11BracedListStyle: true
 60 | DeriveLineEnding: true
 61 | DerivePointerAlignment: false
 62 | DisableFormat:   false
 63 | ExperimentalAutoDetectBinPacking: false
 64 | FixNamespaceComments: true
 65 | ForEachMacros:
 66 |   - foreach
 67 |   - Q_FOREACH
 68 |   - BOOST_FOREACH
 69 | IncludeBlocks:   Regroup
 70 | IncludeCategories:
 71 |   - Regex:           '^<ext/.*\.h>'
 72 |     Priority:        2
 73 |     SortPriority:    0
 74 |   - Regex:           '^<.*\.h>'
 75 |     Priority:        1
 76 |     SortPriority:    0
 77 |   - Regex:           '^<.*'
 78 |     Priority:        2
 79 |     SortPriority:    0
 80 |   - Regex:           '.*'
 81 |     Priority:        3
 82 |     SortPriority:    0
 83 | IncludeIsMainRegex: '([-_](test|unittest))?$'
 84 | IncludeIsMainSourceRegex: ''
 85 | IndentCaseLabels: true
 86 | IndentGotoLabels: true
 87 | IndentPPDirectives: None
 88 | IndentWidth:     2
 89 | IndentWrappedFunctionNames: false
 90 | JavaScriptQuotes: Leave
 91 | JavaScriptWrapImports: true
 92 | KeepEmptyLinesAtTheStartOfBlocks: false
 93 | MacroBlockBegin: ''
 94 | MacroBlockEnd:   ''
 95 | MaxEmptyLinesToKeep: 1
 96 | NamespaceIndentation: None
 97 | ObjCBinPackProtocolList: Never
 98 | ObjCBlockIndentWidth: 2
 99 | ObjCSpaceAfterProperty: false
100 | ObjCSpaceBeforeProtocolList: true
101 | PenaltyBreakAssignment: 2
102 | PenaltyBreakBeforeFirstCallParameter: 1
103 | PenaltyBreakComment: 300
104 | PenaltyBreakFirstLessLess: 120
105 | PenaltyBreakString: 1000
106 | PenaltyBreakTemplateDeclaration: 10
107 | PenaltyExcessCharacter: 1000000
108 | PenaltyReturnTypeOnItsOwnLine: 200
109 | PointerAlignment: Left
110 | RawStringFormats:
111 |   - Language:        Cpp
112 |     Delimiters:
113 |       - cc
114 |       - CC
115 |       - cpp
116 |       - Cpp
117 |       - CPP
118 |       - 'c++'
119 |       - 'C++'
120 |     CanonicalDelimiter: ''
121 |     BasedOnStyle:    google
122 |   - Language:        TextProto
123 |     Delimiters:
124 |       - pb
125 |       - PB
126 |       - proto
127 |       - PROTO
128 |     EnclosingFunctions:
129 |       - EqualsProto
130 |       - EquivToProto
131 |       - PARSE_PARTIAL_TEXT_PROTO
132 |       - PARSE_TEST_PROTO
133 |       - PARSE_TEXT_PROTO
134 |       - ParseTextOrDie
135 |       - ParseTextProtoOrDie
136 |     CanonicalDelimiter: ''
137 |     BasedOnStyle:    google
138 | ReflowComments:  true
139 | SortIncludes:    false
140 | SortUsingDeclarations: true
141 | SpaceAfterCStyleCast: false
142 | SpaceAfterLogicalNot: false
143 | SpaceAfterTemplateKeyword: true
144 | SpaceBeforeAssignmentOperators: true
145 | SpaceBeforeCpp11BracedList: false
146 | SpaceBeforeCtorInitializerColon: true
147 | SpaceBeforeInheritanceColon: true
148 | SpaceBeforeParens: ControlStatements
149 | SpaceBeforeRangeBasedForLoopColon: true
150 | SpaceInEmptyBlock: false
151 | SpaceInEmptyParentheses: false
152 | SpacesBeforeTrailingComments: 2
153 | SpacesInAngles:  false
154 | SpacesInConditionalStatement: false
155 | SpacesInContainerLiterals: true
156 | SpacesInCStyleCastParentheses: false
157 | SpacesInParentheses: false
158 | SpacesInSquareBrackets: false
159 | SpaceBeforeSquareBrackets: false
160 | Standard:        Auto
161 | StatementMacros:
162 |   - Q_UNUSED
163 |   - QT_REQUIRE_VERSION
164 | TabWidth:        8
165 | UseCRLF:         false
166 | UseTab:          Never
167 | ...
168 | 
169 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 |   # Allows you to run this workflow manually from the Actions tab
10 |   workflow_dispatch:
11 | 
12 | jobs:
13 |   build:
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       matrix:
17 |         ROS_DISTRO: [focal, focal_cuda, melodic, melodic_llvm, noetic, noetic_llvm]
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v2
21 | 
22 |       - name: Docker login
23 |         continue-on-error: true
24 |         uses: docker/login-action@v1
25 |         with:
26 |           username: ${{ secrets.DOCKER_USERNAME }}
27 |           password: ${{ secrets.DOCKER_TOKEN }}
28 | 
29 |       - name: Docker build
30 |         uses: docker/build-push-action@v2
31 |         with:
32 |           file: ${{github.workspace}}/docker/${{ matrix.ROS_DISTRO }}/Dockerfile
33 |           context: .
34 |           push: false
35 | 


--------------------------------------------------------------------------------
/.github/workflows/issue_response.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   fork:
 3 |   issues:
 4 |     types: [opened]
 5 | 
 6 | jobs:
 7 |   welcome:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Leave Comment
11 |         run: gh issue comment "$NUMBER" --body "$BODY"
12 |         env:
13 |           GH_TOKEN: ${{ github.token }}
14 |           GH_REPO: ${{ github.repository }}
15 |           NUMBER: ${{ github.event.issue.number }}
16 |           BODY: >
17 |             Sorry!! We are reducing the support for fast_gicp.
18 |             Please consider using [small_gicp](https://github.com/koide3/small_gicp), a new faster library with better portability.
19 |             Meanwhile, feel free to open a PR if you have any improvements that can be merged into fast_gicp.
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/*
2 | build/*
3 | dist/*
4 | 
5 | *.so
6 | *.py[cod]
7 | *.egg-info
8 | README.html
9 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "thidparty/Sophus"]
 2 | 	path = thirdparty/Sophus
 3 | 	url = https://github.com/strasdat/Sophus.git
 4 | [submodule "thirdparty/Eigen"]
 5 | 	path = thirdparty/Eigen
 6 | 	url = https://gitlab.com/libeigen/eigen.git
 7 | [submodule "thirdparty/nvbio"]
 8 | 	path = thirdparty/nvbio
 9 | 	url = https://github.com/NVlabs/nvbio.git
10 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.10.0)
  2 | project(fast_gicp)
  3 | 
  4 | option(BUILD_VGICP_CUDA "Build GPU-powered VGICP" OFF)
  5 | option(BUILD_apps "Build application programs" ON)
  6 | option(BUILD_test "Build test programs" OFF)
  7 | option(BUILD_PYTHON_BINDINGS "Build python bindings" OFF)
  8 | 
  9 | set(CMAKE_BUILD_TYPE "Release")
 10 | 
 11 | find_package(PCL REQUIRED)
 12 | find_package(Eigen3 REQUIRED)
 13 | add_definitions(${PCL_DEFINITIONS})
 14 | 
 15 | if(DEFINED ENV{ROS_VERSION})
 16 |   set(ROS_VERSION $ENV{ROS_VERSION})
 17 | endif()
 18 | 
 19 | if(NOT BUILD_PYTHON_BINDINGS)
 20 |   if(${ROS_VERSION})
 21 |     if(${ROS_VERSION} EQUAL 1)
 22 |       find_package(catkin)
 23 |     elseif (${ROS_VERSION} EQUAL 2)
 24 |       find_package(ament_cmake)
 25 |     endif()
 26 |   endif()
 27 | endif()
 28 | 
 29 | find_package(OpenMP)
 30 | if (OPENMP_FOUND)
 31 |   set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
 32 |   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
 33 | endif()
 34 | 
 35 | if(BUILD_VGICP_CUDA)
 36 |   find_package(CUDA REQUIRED)
 37 |   include_directories(${CUDA_INCLUDE_DIRS})
 38 |   link_directories(${CUDA_LIBRARY_DIRS})
 39 | endif()
 40 | 
 41 | ###################################
 42 | ## catkin specific configuration ##
 43 | ###################################
 44 | if(catkin_FOUND)
 45 |   catkin_package(
 46 |     INCLUDE_DIRS include
 47 |     LIBRARIES fast_gicp
 48 |   )
 49 | endif()
 50 | 
 51 | ###########
 52 | ## Build ##
 53 | ###########
 54 | 
 55 | add_library(fast_gicp SHARED
 56 |   src/fast_gicp/gicp/lsq_registration.cpp
 57 |   src/fast_gicp/gicp/fast_gicp.cpp
 58 |   src/fast_gicp/gicp/fast_gicp_st.cpp
 59 |   src/fast_gicp/gicp/fast_vgicp.cpp
 60 | )
 61 | target_link_libraries(fast_gicp
 62 |   ${PCL_LIBRARIES}
 63 | )
 64 | if (OPENMP_FOUND)
 65 |     if (TARGET OpenMP::OpenMP_CXX)
 66 |         target_link_libraries(fast_gicp OpenMP::OpenMP_CXX)
 67 |     endif ()
 68 | endif ()
 69 | target_include_directories(fast_gicp PUBLIC
 70 |   include
 71 |   ${PCL_INCLUDE_DIRS}
 72 |   ${EIGEN3_INCLUDE_DIR}
 73 | )
 74 | 
 75 | ### APPS ###
 76 | if(BUILD_apps)
 77 |   add_executable(gicp_align src/align.cpp)
 78 |   add_dependencies(gicp_align fast_gicp)
 79 |   target_link_libraries(gicp_align
 80 |     ${PCL_LIBRARIES}
 81 |     fast_gicp
 82 |   )
 83 | 
 84 |   add_executable(gicp_kitti src/kitti.cpp)
 85 |   add_dependencies(gicp_kitti fast_gicp)
 86 |   target_link_libraries(gicp_kitti
 87 |     ${PCL_LIBRARIES}
 88 |     fast_gicp
 89 |   )
 90 | endif()
 91 | 
 92 | ### Python bindings ###
 93 | if(BUILD_PYTHON_BINDINGS)
 94 |   find_package(pybind11 CONFIG)
 95 |   pybind11_add_module(pygicp
 96 |     src/python/main.cpp
 97 |   )
 98 |   target_include_directories(pygicp PUBLIC
 99 |     include
100 |     ${PCL_INCLUDE_DIRS}
101 |     ${EIGEN3_INCLUDE_DIR}
102 |   )
103 |   target_link_libraries(pygicp PRIVATE
104 |     fast_gicp
105 |   )
106 |   set_target_properties(pygicp PROPERTIES
107 |     INSTALL_RPATH "$ORIGIN/"
108 |     BUILD_WITH_INSTALL_RPATH TRUE
109 |   )
110 | endif()
111 | 
112 | ### CUDA ###
113 | if(BUILD_VGICP_CUDA)
114 |   set(CUDA_NVCC_FLAGS "--expt-relaxed-constexpr")
115 |   add_definitions(-DUSE_VGICP_CUDA)
116 | 
117 |   cuda_add_library(fast_vgicp_cuda SHARED
118 |     src/fast_gicp/cuda/fast_vgicp_cuda.cu
119 |     src/fast_gicp/cuda/brute_force_knn.cu
120 |     src/fast_gicp/cuda/covariance_estimation.cu
121 |     src/fast_gicp/cuda/covariance_estimation_rbf.cu
122 |     src/fast_gicp/cuda/covariance_regularization.cu
123 |     src/fast_gicp/cuda/gaussian_voxelmap.cu
124 |     src/fast_gicp/cuda/find_voxel_correspondences.cu
125 |     src/fast_gicp/cuda/compute_derivatives.cu
126 |     src/fast_gicp/cuda/compute_mahalanobis.cu
127 |     src/fast_gicp/cuda/ndt_cuda.cu
128 |     src/fast_gicp/cuda/ndt_compute_derivatives.cu
129 |   )
130 |   target_include_directories(fast_vgicp_cuda PRIVATE
131 |     include
132 |     thirdparty/Eigen
133 |     thirdparty/nvbio
134 |     ${catkin_INCLUDE_DIRS}
135 |   )
136 |   target_link_libraries(fast_vgicp_cuda
137 |     ${catkin_LIBRARIES}
138 |   )
139 |   cuda_add_cublas_to_target(fast_vgicp_cuda)
140 | 
141 |   # add vgicp_cuda to libfast_gicp
142 |   target_sources(fast_gicp PRIVATE
143 |     src/fast_gicp/gicp/fast_vgicp_cuda.cpp
144 |     src/fast_gicp/ndt/ndt_cuda.cpp
145 |   )
146 |   target_link_libraries(fast_gicp
147 |     fast_vgicp_cuda
148 |   )
149 |   add_dependencies(fast_gicp fast_vgicp_cuda)
150 |   if(catkin_FOUND)
151 |     install(TARGETS fast_vgicp_cuda
152 |       LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION})
153 |   elseif (ament_cmake_FOUND)
154 |     install(TARGETS fast_vgicp_cuda
155 |       LIBRARY DESTINATION lib)
156 |   endif()
157 | endif()
158 | 
159 | ### TEST ###
160 | if(BUILD_test)
161 |   find_package(GTest REQUIRED)
162 | 
163 |   add_executable(gicp_test src/test/gicp_test.cpp)
164 |   add_dependencies(gicp_test fast_gicp)
165 |   target_link_libraries(gicp_test ${GTEST_LIBRARIES} ${PCL_LIBRARIES} fast_gicp)
166 |   gtest_add_tests(TARGET gicp_test WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} EXTRA_ARGS "${CMAKE_SOURCE_DIR}/data")
167 | endif()
168 | 
169 | if(catkin_FOUND)
170 |   ###################################
171 |   ## catkin specific configuration ##
172 |   ###################################
173 |   install(TARGETS ${PROJECT_NAME}
174 |     LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION})
175 | 
176 |   install(DIRECTORY include/
177 |     DESTINATION ${CATKIN_GLOBAL_INCLUDE_DESTINATION}
178 |     FILES_MATCHING PATTERN "*.hpp")
179 | elseif (ament_cmake_FOUND)
180 |   ##################################
181 |   ## ament specific configuration ##
182 |   ##################################
183 |   ament_export_include_directories(include)
184 |   ament_export_libraries(fast_gicp)
185 |   ament_package()
186 | 
187 |   install(TARGETS ${PROJECT_NAME}
188 |     LIBRARY DESTINATION lib)
189 | 
190 |   install(
191 |       DIRECTORY "include/"
192 |       DESTINATION include
193 |     )
194 | endif()
195 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020, SMRT-AIST
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Note: New faster library is released
  3 | 
  4 | We released [small_gicp](https://github.com/koide3/small_gicp) that is twice as fast as fast_gicp and with minimum dependencies and clean interfaces.
  5 | 
  6 | # fast_gicp
  7 | 
  8 | This package is a collection of GICP-based fast point cloud registration algorithms. It constains a multi-threaded GICP as well as multi-thread and GPU implementations of our voxelized GICP (VGICP) algorithm. All the implemented algorithms have the PCL registration interface so that they can be used as an inplace replacement for GICP in PCL.
  9 | 
 10 | - FastGICP: multi-threaded GICP algorithm (**\~40FPS**)
 11 | - FastGICPSingleThread: GICP algorithm optimized for single-threading (**\~15FPS**)
 12 | - FastVGICP: multi-threaded and voxelized GICP algorithm (**\~70FPS**)
 13 | - FastVGICPCuda: CUDA-accelerated voxelized GICP algorithm (**\~120FPS**)
 14 | - NDTCuda: CUDA-accelerated D2D NDT algorithm (**\~500FPS**)
 15 | ![proctime](data/proctime.png)
 16 | 
 17 | [![Build](https://github.com/SMRT-AIST/fast_gicp/actions/workflows/build.yml/badge.svg)](https://github.com/SMRT-AIST/fast_gicp/actions/workflows/build.yml) on melodic & noetic
 18 | 
 19 | ## Installation
 20 | 
 21 | ### Dependencies
 22 | - PCL
 23 | - Eigen
 24 | - OpenMP
 25 | - CUDA (optional)
 26 | - [Sophus](https://github.com/strasdat/Sophus)
 27 | - [nvbio](https://github.com/NVlabs/nvbio)
 28 | 
 29 | We have tested this package on Ubuntu 18.04/20.04 and CUDA 11.1.
 30 | 
 31 | On macOS when using `brew`, you might have to set up your depenencies like this
 32 | 
 33 | ```
 34 | cmake .. "-DCMAKE_PREFIX_PATH=$(brew --prefix libomp)[;other-custom-prefixes]" -DQt5_DIR=$(brew --prefix qt@5)lib/cmake/Qt5
 35 | ```
 36 | 
 37 | ### CUDA
 38 | 
 39 | To enable the CUDA-powered implementations, set ```BUILD_VGICP_CUDA``` cmake option to ```ON```.
 40 | 
 41 | ### ROS
 42 | ```bash
 43 | cd ~/catkin_ws/src
 44 | git clone https://github.com/SMRT-AIST/fast_gicp --recursive
 45 | cd .. && catkin_make -DCMAKE_BUILD_TYPE=Release
 46 | # enable cuda-based implementations
 47 | # cd .. && catkin_make -DCMAKE_BUILD_TYPE=Release -DBUILD_VGICP_CUDA=ON
 48 | ```
 49 | 
 50 | ### Non-ROS
 51 | ```bash
 52 | git clone https://github.com/SMRT-AIST/fast_gicp --recursive
 53 | mkdir fast_gicp/build && cd fast_gicp/build
 54 | cmake .. -DCMAKE_BUILD_TYPE=Release
 55 | # enable cuda-based implementations
 56 | # cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_VGICP_CUDA=ON
 57 | make -j8
 58 | ```
 59 | 
 60 | ### Python bindings
 61 | ```bash
 62 | cd fast_gicp
 63 | python3 setup.py install --user
 64 | ```
 65 | Note: If you are on a catkin-enabled environment and the installation doesn't work well, comment out ```find_package(catkin)``` in CMakeLists.txt and run the above installation command again.
 66 | 
 67 | 
 68 | ```python
 69 | import pygicp
 70 | 
 71 | target = # Nx3 numpy array
 72 | source = # Mx3 numpy array
 73 | 
 74 | # 1. function interface
 75 | matrix = pygicp.align_points(target, source)
 76 | 
 77 | # optional arguments
 78 | # initial_guess               : Initial guess of the relative pose (4x4 matrix)
 79 | # method                      : GICP, VGICP, VGICP_CUDA, or NDT_CUDA
 80 | # downsample_resolution       : Downsampling resolution (used only if positive)
 81 | # k_correspondences           : Number of points used for covariance estimation
 82 | # max_correspondence_distance : Maximum distance for corresponding point search
 83 | # voxel_resolution            : Resolution of voxel-based algorithms
 84 | # neighbor_search_method      : DIRECT1, DIRECT7, DIRECT27, or DIRECT_RADIUS
 85 | # neighbor_search_radius      : Neighbor voxel search radius (for GPU-based methods)
 86 | # num_threads                 : Number of threads
 87 | 
 88 | 
 89 | # 2. class interface
 90 | # you may want to downsample the input clouds before registration
 91 | target = pygicp.downsample(target, 0.25)
 92 | source = pygicp.downsample(source, 0.25)
 93 | 
 94 | # pygicp.FastGICP has more or less the same interfaces as the C++ version
 95 | gicp = pygicp.FastGICP()
 96 | gicp.set_input_target(target)
 97 | gicp.set_input_source(source)
 98 | matrix = gicp.align()
 99 | 
100 | # optional
101 | gicp.set_num_threads(4)
102 | gicp.set_max_correspondence_distance(1.0)
103 | gicp.get_final_transformation()
104 | gicp.get_final_hessian()
105 | ```
106 | 
107 | ## Benchmark
108 | CPU:Core i9-9900K GPU:GeForce RTX2080Ti
109 | 
110 | ```bash
111 | roscd fast_gicp/data
112 | rosrun fast_gicp gicp_align 251370668.pcd 251371071.pcd
113 | ```
114 | 
115 | ```
116 | target:17249[pts] source:17518[pts]
117 | --- pcl_gicp ---
118 | single:127.508[msec] 100times:12549.4[msec] fitness_score:0.204892
119 | --- pcl_ndt ---
120 | single:53.5904[msec] 100times:5467.16[msec] fitness_score:0.229616
121 | --- fgicp_st ---
122 | single:111.324[msec] 100times:10662.7[msec] 100times_reuse:6794.59[msec] fitness_score:0.204379
123 | --- fgicp_mt ---
124 | single:20.1602[msec] 100times:1585[msec] 100times_reuse:1017.74[msec] fitness_score:0.204412
125 | --- vgicp_st ---
126 | single:112.001[msec] 100times:7959.9[msec] 100times_reuse:4408.22[msec] fitness_score:0.204067
127 | --- vgicp_mt ---
128 | single:18.1106[msec] 100times:1381[msec] 100times_reuse:806.53[msec] fitness_score:0.204067
129 | --- vgicp_cuda (parallel_kdtree) ---
130 | single:15.9587[msec] 100times:1451.85[msec] 100times_reuse:695.48[msec] fitness_score:0.204061
131 | --- vgicp_cuda (gpu_bruteforce) ---
132 | single:53.9113[msec] 100times:3463.5[msec] 100times_reuse:1703.41[msec] fitness_score:0.204049
133 | --- vgicp_cuda (gpu_rbf_kernel) ---
134 | single:5.91508[msec] 100times:590.725[msec] 100times_reuse:226.787[msec] fitness_score:0.20557
135 | ```
136 | 
137 | See [src/align.cpp](https://github.com/SMRT-AIST/fast_gicp/blob/master/src/align.cpp) for the detailed usage.
138 | 
139 | ## Test on KITTI
140 | 
141 | ### C++
142 | 
143 | ```bash
144 | # Perform frame-by-frame registration
145 | rosrun fast_gicp gicp_kitti /your/kitti/path/sequences/00/velodyne
146 | ```
147 | 
148 | ![kitti00](https://user-images.githubusercontent.com/31344317/86207074-b98ac280-bba8-11ea-9687-e65f03aaf25b.png)
149 | 
150 | ### Python
151 | 
152 | ```bash
153 | cd fast_gicp/src
154 | python3 kitti.py /your/kitti/path/sequences/00/velodyne
155 | ```
156 | 
157 | ## Note
158 | 
159 | In some environments, setting a fewer number of threads rather than the (default) maximum number of threads may result in faster processing (see https://github.com/SMRT-AIST/fast_gicp/issues/145#issuecomment-1890885373).
160 | 
161 | ## Related packages
162 | - [ndt_omp](https://github.com/koide3/ndt_omp)
163 | - [fast_gicp](https://github.com/SMRT-AIST/fast_gicp)
164 | 
165 | 
166 | ## Papers
167 | - Kenji Koide, Masashi Yokozuka, Shuji Oishi, and Atsuhiko Banno, Voxelized GICP for fast and accurate 3D point cloud registration, ICRA2021 [[link]](https://easychair.org/publications/preprint/ftvV)
168 | 
169 | ## Contact
170 | Kenji Koide, k.koide@aist.go.jp
171 | 
172 | Human-Centered Mobility Research Center, National Institute of Advanced Industrial Science and Technology, Japan  [\[URL\]](https://unit.aist.go.jp/rirc/en/team/smart_mobility.html)
173 | 


--------------------------------------------------------------------------------
/data/251370668.pcd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koide3/fast_gicp/0e7ec1441c99f7be453db2ea216d5de029387417/data/251370668.pcd


--------------------------------------------------------------------------------
/data/251371071.pcd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koide3/fast_gicp/0e7ec1441c99f7be453db2ea216d5de029387417/data/251371071.pcd


--------------------------------------------------------------------------------
/data/proctime.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koide3/fast_gicp/0e7ec1441c99f7be453db2ea216d5de029387417/data/proctime.png


--------------------------------------------------------------------------------
/data/relative.txt:
--------------------------------------------------------------------------------
1 |     0.999941    0.0108432 -0.000635437     0.485657
2 |   -0.0108468     0.999924  -0.00587782      0.10642
3 |  0.000571654   0.00588436     0.999983   -0.0131581
4 |            0            0            0            1
5 | 


--------------------------------------------------------------------------------
/docker/focal/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:focal
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | RUN apt-get update && apt-get install --no-install-recommends -y \
 6 |     && apt-get install --no-install-recommends -y wget nano cmake build-essential git \
 7 |     ca-certificates libeigen3-dev libboost-all-dev libflann-dev libvtk7-dev libomp-dev libgtest-dev \
 8 |     && apt-get clean \
 9 |     && rm -rf /var/lib/apt/lists/*
10 | 
11 | WORKDIR /root
12 | RUN git clone https://github.com/PointCloudLibrary/pcl
13 | 
14 | WORKDIR /root/pcl/build
15 | RUN cmake .. -DCMAKE_BUILD_TYPE=Release \
16 |     -DBUILD_geometry=OFF -DBUILD_keypoints=OFF -DBUILD_ml=OFF -DBUILD_outofcore=OFF -DBUILD_people=OFF \
17 |     -DBUILD_recognition=OFF -DBUILD_segmentation=OFF -DBUILD_stereo=OFF -DBUILD_surface=OFF -DBUILD_tools=OFF -DBUILD_tracking=OFF
18 | RUN make -j$(nproc) && make install
19 | 
20 | COPY . /root/fast_gicp
21 | WORKDIR /root/fast_gicp
22 | RUN git submodule init && git submodule update
23 | 
24 | RUN rm -rf /root/fast_gicp/build
25 | WORKDIR /root/fast_gicp/build
26 | RUN cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_PYTHON_BINDINGS=ON -DBUILD_apps=OFF -DBUILD_test=ON
27 | RUN make -j$(nproc)
28 | 
29 | RUN /root/fast_gicp/build/gicp_test /root/fast_gicp/data
30 | 
31 | WORKDIR /root
32 | 
33 | CMD ["bash"]
34 | 


--------------------------------------------------------------------------------
/docker/focal_cuda/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.6.2-devel-ubuntu20.04
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | RUN apt-get update && apt-get install --no-install-recommends -y \
 6 |     && apt-get install --no-install-recommends -y wget nano cmake build-essential git \
 7 |     ca-certificates libeigen3-dev libboost-all-dev libflann-dev libvtk7-dev libomp-dev libgtest-dev \
 8 |     && apt-get clean \
 9 |     && rm -rf /var/lib/apt/lists/*
10 | 
11 | WORKDIR /root
12 | RUN git clone https://github.com/PointCloudLibrary/pcl
13 | 
14 | WORKDIR /root/pcl/build
15 | RUN cmake .. -DCMAKE_BUILD_TYPE=Release \
16 |     -DBUILD_geometry=OFF -DBUILD_keypoints=OFF -DBUILD_ml=OFF -DBUILD_outofcore=OFF -DBUILD_people=OFF \
17 |     -DBUILD_recognition=OFF -DBUILD_segmentation=OFF -DBUILD_stereo=OFF -DBUILD_surface=OFF -DBUILD_tools=OFF -DBUILD_tracking=OFF
18 | RUN make -j$(nproc) && make install
19 | 
20 | COPY . /root/fast_gicp
21 | WORKDIR /root/fast_gicp
22 | RUN git submodule init && git submodule update
23 | 
24 | RUN rm -rf /root/fast_gicp/build
25 | WORKDIR /root/fast_gicp/build
26 | RUN cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_VGICP_CUDA=ON -DBUILD_PYTHON_BINDINGS=ON -DBUILD_apps=OFF -DBUILD_test=ON
27 | RUN make -j$(nproc)
28 | 
29 | WORKDIR /root
30 | 
31 | CMD ["bash"]
32 | 


--------------------------------------------------------------------------------
/docker/kinetic/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ros:kinetic
 2 | 
 3 | RUN apt-get update && apt-get install --no-install-recommends -y \
 4 |     && apt-get install --no-install-recommends -y wget nano build-essential \
 5 |                                                   ros-kinetic-pcl-ros \
 6 |  && apt-get clean \
 7 |  && rm -rf /var/lib/apt/lists/*
 8 | 
 9 | # install cmake 3.19
10 | WORKDIR /root
11 | RUN wget https://github.com/Kitware/CMake/releases/download/v3.19.0-rc1/cmake-3.19.0-rc1.tar.gz
12 | RUN tar xzvf cmake-3.19.0-rc1.tar.gz
13 | 
14 | WORKDIR /root/cmake-3.19.0-rc1
15 | RUN ./bootstrap
16 | RUN make -j$(nproc) && make install
17 | 
18 | RUN mkdir -p /root/catkin_ws/src
19 | WORKDIR /root/catkin_ws/src
20 | RUN /bin/bash -c '. /opt/ros/kinetic/setup.bash; catkin_init_workspace'
21 | 
22 | COPY . /root/catkin_ws/src/fast_gicp/
23 | WORKDIR /root/catkin_ws/src/fast_gicp
24 | RUN git submodule init && git submodule update
25 | 
26 | WORKDIR /root/catkin_ws
27 | RUN /bin/bash -c '. /opt/ros/kinetic/setup.bash; catkin_make'
28 | RUN sed -i "6i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh
29 | 
30 | WORKDIR /
31 | 
32 | ENTRYPOINT ["/ros_entrypoint.sh"]
33 | CMD ["bash"]
34 | 


--------------------------------------------------------------------------------
/docker/melodic/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ros:melodic
 2 | 
 3 | RUN apt-get update && apt-get install --no-install-recommends -y \
 4 |     && apt-get install --no-install-recommends -y wget nano build-essential \
 5 |     libgtest-dev libomp-dev ros-melodic-pcl-ros \
 6 |     && apt-get clean \
 7 |     && rm -rf /var/lib/apt/lists/*
 8 | 
 9 | 
10 | RUN mkdir -p /root/catkin_ws/src
11 | WORKDIR /root/catkin_ws/src
12 | RUN /bin/bash -c '. /opt/ros/melodic/setup.bash; catkin_init_workspace'
13 | 
14 | COPY . /root/catkin_ws/src/fast_gicp/
15 | WORKDIR /root/catkin_ws/src/fast_gicp
16 | RUN git submodule init && git submodule update
17 | 
18 | WORKDIR /root/catkin_ws
19 | RUN /bin/bash -c '. /opt/ros/melodic/setup.bash; catkin_make'
20 | RUN sed -i "6i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh
21 | 
22 | WORKDIR /
23 | 
24 | ENTRYPOINT ["/ros_entrypoint.sh"]
25 | CMD ["bash"]
26 | 


--------------------------------------------------------------------------------
/docker/melodic_llvm/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ros:melodic
 2 | 
 3 | RUN apt-get update && apt-get install --no-install-recommends -y \
 4 |     && apt-get install --no-install-recommends -y wget nano build-essential \
 5 |     libgtest-dev libomp-dev clang lld ros-melodic-pcl-ros \
 6 |     && apt-get clean \
 7 |     && rm -rf /var/lib/apt/lists/*
 8 | 
 9 | RUN update-alternatives --install /usr/bin/ld ld /usr/bin/ld.lld 50
10 | 
11 | RUN mkdir -p /root/catkin_ws/src
12 | WORKDIR /root/catkin_ws/src
13 | RUN /bin/bash -c '. /opt/ros/melodic/setup.bash; catkin_init_workspace'
14 | 
15 | COPY . /root/catkin_ws/src/fast_gicp/
16 | WORKDIR /root/catkin_ws/src/fast_gicp
17 | RUN git submodule init && git submodule update
18 | 
19 | WORKDIR /root/catkin_ws
20 | RUN /bin/bash -c '. /opt/ros/melodic/setup.bash; CC=clang CXX=clang++ catkin_make'
21 | RUN sed -i "6i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh
22 | 
23 | WORKDIR /
24 | 
25 | ENTRYPOINT ["/ros_entrypoint.sh"]
26 | CMD ["bash"]
27 | 


--------------------------------------------------------------------------------
/docker/noetic/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ros:noetic
 2 | 
 3 | RUN apt-get update && apt-get install --no-install-recommends -y \
 4 |     && apt-get install --no-install-recommends -y wget nano build-essential \
 5 |     git libgtest-dev ros-noetic-pcl-ros \
 6 |     && apt-get clean \
 7 |     && rm -rf /var/lib/apt/lists/*
 8 | 
 9 | RUN mkdir -p /root/catkin_ws/src
10 | WORKDIR /root/catkin_ws/src
11 | RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_init_workspace'
12 | 
13 | COPY . /root/catkin_ws/src/fast_gicp/
14 | WORKDIR /root/catkin_ws/src/fast_gicp
15 | RUN git submodule init && git submodule update
16 | 
17 | WORKDIR /root/catkin_ws
18 | RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_make -DBUILD_test=ON'
19 | RUN sed -i "6i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh
20 | 
21 | RUN /root/catkin_ws/devel/lib/fast_gicp/gicp_test /root/catkin_ws/src/fast_gicp/data
22 | 
23 | WORKDIR /
24 | 
25 | ENTRYPOINT ["/ros_entrypoint.sh"]
26 | CMD ["bash"]
27 | 


--------------------------------------------------------------------------------
/docker/noetic_llvm/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ros:noetic
 2 | 
 3 | RUN apt-get update && apt-get install --no-install-recommends -y \
 4 |     && apt-get install --no-install-recommends -y wget nano build-essential \
 5 |     git clang lld libomp-dev libgtest-dev ros-noetic-pcl-ros \
 6 |     && apt-get clean \
 7 |     && rm -rf /var/lib/apt/lists/*
 8 | 
 9 | RUN update-alternatives --install /usr/bin/ld ld /usr/bin/ld.lld 50
10 | 
11 | RUN mkdir -p /root/catkin_ws/src
12 | WORKDIR /root/catkin_ws/src
13 | RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; catkin_init_workspace'
14 | 
15 | COPY . /root/catkin_ws/src/fast_gicp/
16 | WORKDIR /root/catkin_ws/src/fast_gicp
17 | RUN git submodule init && git submodule update
18 | 
19 | WORKDIR /root/catkin_ws
20 | RUN /bin/bash -c '. /opt/ros/noetic/setup.bash; CC=clang CXX=clang++ catkin_make -DBUILD_test=ON'
21 | RUN sed -i "6i source \"/root/catkin_ws/devel/setup.bash\"" /ros_entrypoint.sh
22 | 
23 | RUN /root/catkin_ws/devel/lib/fast_gicp/gicp_test /root/catkin_ws/src/fast_gicp/data
24 | 
25 | WORKDIR /
26 | 
27 | ENTRYPOINT ["/ros_entrypoint.sh"]
28 | CMD ["bash"]
29 | 


--------------------------------------------------------------------------------
/include/fast_gicp/cuda/brute_force_knn.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_CUDA_BRUTE_FORCE_KNN_CUH
 2 | #define FAST_GICP_CUDA_BRUTE_FORCE_KNN_CUH
 3 | 
 4 | #include <Eigen/Core>
 5 | 
 6 | #include <thrust/device_vector.h>
 7 | 
 8 | namespace fast_gicp {
 9 |   namespace cuda {
10 | 
11 | void brute_force_knn_search(const thrust::device_vector<Eigen::Vector3f>& source, const thrust::device_vector<Eigen::Vector3f>& target, int k, thrust::device_vector<thrust::pair<float, int>>& k_neighbors, bool do_sort=false);
12 | 
13 |   }
14 | } // namespace fast_gicp
15 | 
16 | 
17 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/cuda/compute_derivatives.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_CUDA_COMPUTE_DERIVATIVES_CUH
 2 | #define FAST_GICP_CUDA_COMPUTE_DERIVATIVES_CUH
 3 | 
 4 | #include <Eigen/Core>
 5 | #include <thrust/device_vector.h>
 6 | 
 7 | #include <fast_gicp/cuda/gaussian_voxelmap.cuh>
 8 | 
 9 | namespace fast_gicp {
10 |   namespace cuda {
11 | 
12 |   double compute_derivatives(
13 |     const thrust::device_vector<Eigen::Vector3f>& src_points,
14 |     const thrust::device_vector<Eigen::Matrix3f>& src_covs,
15 |     const GaussianVoxelMap& voxelmap,
16 |     const thrust::device_vector<thrust::pair<int, int>>& voxel_correspondences,
17 |     const thrust::device_ptr<const Eigen::Isometry3f>& linearized_x_ptr,
18 |     const thrust::device_ptr<const Eigen::Isometry3f>& x_ptr,
19 |     Eigen::Matrix<double, 6, 6>* H,
20 |     Eigen::Matrix<double, 6, 1>* b);
21 |   }
22 | }  // namespace fast_gicp
23 | 
24 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/cuda/compute_mahalanobis.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_CUDA_COMPUTE_MAHALANOBIS_CUH
 2 | #define FAST_GICP_CUDA_COMPUTE_MAHALANOBIS_CUH
 3 | 
 4 | #include <Eigen/Core>
 5 | #include <Eigen/Geometry>
 6 | #include <thrust/device_vector.h>
 7 | 
 8 | #include <fast_gicp/cuda/gaussian_voxelmap.cuh>
 9 | 
10 | namespace fast_gicp {
11 |   namespace cuda {
12 | 
13 | void compute_mahalanobis(
14 |   const thrust::device_vector<Eigen::Vector3f>& src_points,
15 |   const thrust::device_vector<Eigen::Matrix3f>& src_covs,
16 |   const GaussianVoxelMap& voxelmap,
17 |   const thrust::device_vector<int>& voxel_correspondences,
18 |   const Eigen::Isometry3f& linearized_x,
19 |   thrust::device_vector<Eigen::Matrix3f>& mahalanobis
20 |   );
21 | 
22 |   }
23 | }  // namespace fast_gicp
24 | 
25 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/cuda/covariance_estimation.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_CUDA_COVARIANCE_ESTIMATION_CUH
 2 | #define FAST_GICP_CUDA_COVARIANCE_ESTIMATION_CUH
 3 | 
 4 | #include <Eigen/Core>
 5 | #include <thrust/device_vector.h>
 6 | #include <fast_gicp/gicp/gicp_settings.hpp>
 7 | 
 8 | namespace fast_gicp {
 9 | namespace cuda {
10 | 
11 | void covariance_estimation(const thrust::device_vector<Eigen::Vector3f>& points, int k, const thrust::device_vector<int>& k_neighbors, thrust::device_vector<Eigen::Matrix3f>& covariances);
12 | 
13 | void covariance_estimation_rbf(const thrust::device_vector<Eigen::Vector3f>& points, double kernel_width, double max_dist, thrust::device_vector<Eigen::Matrix3f>& covariances);
14 | }
15 |   }  // namespace fast_gicp
16 | 
17 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/cuda/covariance_regularization.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_CUDA_COVARIANCE_REGULARIZATION_CUH
 2 | #define FAST_GICP_CUDA_COVARIANCE_REGULARIZATION_CUH
 3 | 
 4 | #include <Eigen/Core>
 5 | #include <thrust/device_vector.h>
 6 | #include <fast_gicp/gicp/gicp_settings.hpp>
 7 | 
 8 | namespace fast_gicp {
 9 | namespace cuda {
10 | 
11 | void covariance_regularization(thrust::device_vector<Eigen::Vector3f>& means, thrust::device_vector<Eigen::Matrix3f>& covs, RegularizationMethod method);
12 | 
13 | }  // namespace cuda
14 | }  // namespace fast_gicp
15 | 
16 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/cuda/fast_vgicp_cuda.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_FAST_VGICP_CUDA_CORE_CUH
 2 | #define FAST_GICP_FAST_VGICP_CUDA_CORE_CUH
 3 | 
 4 | #include <memory>
 5 | #include <vector>
 6 | #include <Eigen/Core>
 7 | #include <Eigen/Geometry>
 8 | 
 9 | #include <fast_gicp/gicp/gicp_settings.hpp>
10 | 
11 | namespace thrust {
12 | 
13 | template <typename T1, typename T2>
14 | class pair;
15 | 
16 | template <typename T>
17 | class device_allocator;
18 | 
19 | template <typename T, typename Alloc>
20 | class device_vector;
21 | }  // namespace thrust
22 | 
23 | namespace fast_gicp {
24 | namespace cuda {
25 | 
26 | class GaussianVoxelMap;
27 | 
28 | class FastVGICPCudaCore {
29 | public:
30 |   using Points = thrust::device_vector<Eigen::Vector3f, thrust::device_allocator<Eigen::Vector3f>>;
31 |   using Indices = thrust::device_vector<int, thrust::device_allocator<int>>;
32 |   using Matrices = thrust::device_vector<Eigen::Matrix3f, thrust::device_allocator<Eigen::Matrix3f>>;
33 |   using Correspondences = thrust::device_vector<thrust::pair<int, int>, thrust::device_allocator<thrust::pair<int, int>>>;
34 |   using VoxelCoordinates = thrust::device_vector<Eigen::Vector3i, thrust::device_allocator<Eigen::Vector3i>>;
35 | 
36 |   EIGEN_MAKE_ALIGNED_OPERATOR_NEW
37 |   FastVGICPCudaCore();
38 |   ~FastVGICPCudaCore();
39 | 
40 |   void set_resolution(double resolution);
41 |   void set_kernel_params(double kernel_width, double kernel_max_dist);
42 |   void set_neighbor_search_method(fast_gicp::NeighborSearchMethod method, double radius);
43 | 
44 |   void swap_source_and_target();
45 |   void set_source_cloud(const std::vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>>& cloud);
46 |   void set_target_cloud(const std::vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>>& cloud);
47 | 
48 |   void set_source_neighbors(int k, const std::vector<int>& neighbors);
49 |   void set_target_neighbors(int k, const std::vector<int>& neighbors);
50 |   void find_source_neighbors(int k);
51 |   void find_target_neighbors(int k);
52 | 
53 |   void calculate_source_covariances(RegularizationMethod method);
54 |   void calculate_target_covariances(RegularizationMethod method);
55 | 
56 |   void calculate_source_covariances_rbf(RegularizationMethod method);
57 |   void calculate_target_covariances_rbf(RegularizationMethod method);
58 | 
59 |   void get_source_covariances(std::vector<Eigen::Matrix3f, Eigen::aligned_allocator<Eigen::Matrix3f>>& covs) const;
60 |   void get_target_covariances(std::vector<Eigen::Matrix3f, Eigen::aligned_allocator<Eigen::Matrix3f>>& covs) const;
61 | 
62 |   void get_voxel_num_points(std::vector<int>& num_points) const;
63 |   void get_voxel_means(std::vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>>& means) const;
64 |   void get_voxel_covs(std::vector<Eigen::Matrix3f, Eigen::aligned_allocator<Eigen::Matrix3f>>& covs) const;
65 |   void get_voxel_correspondences(std::vector<std::pair<int, int>>& correspondences) const;
66 | 
67 |   void create_target_voxelmap();
68 | 
69 |   void update_correspondences(const Eigen::Isometry3d& trans);
70 | 
71 |   double compute_error(const Eigen::Isometry3d& trans, Eigen::Matrix<double, 6, 6>* H, Eigen::Matrix<double, 6, 1>* b) const;
72 | 
73 | public:
74 |   double resolution;
75 |   double kernel_width;
76 |   double kernel_max_dist;
77 |   std::unique_ptr<VoxelCoordinates> offsets;
78 | 
79 |   std::unique_ptr<Points> source_points;
80 |   std::unique_ptr<Points> target_points;
81 | 
82 |   std::unique_ptr<Indices> source_neighbors;
83 |   std::unique_ptr<Indices> target_neighbors;
84 | 
85 |   std::unique_ptr<Matrices> source_covariances;
86 |   std::unique_ptr<Matrices> target_covariances;
87 | 
88 |   std::unique_ptr<GaussianVoxelMap> voxelmap;
89 | 
90 |   Eigen::Isometry3f linearized_x;
91 |   std::unique_ptr<Correspondences> voxel_correspondences;
92 | };
93 | 
94 | }  // namespace cuda
95 | }  // namespace fast_gicp
96 | 
97 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/cuda/find_voxel_correspondences.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_CUDA_FIND_VOXEL_CORRESPONDENCES_CUH
 2 | #define FAST_GICP_CUDA_FIND_VOXEL_CORRESPONDENCES_CUH
 3 | 
 4 | #include <Eigen/Core>
 5 | #include <Eigen/Geometry>
 6 | 
 7 | #include <thrust/pair.h>
 8 | #include <thrust/device_vector.h>
 9 | #include <fast_gicp/cuda/gaussian_voxelmap.cuh>
10 | 
11 | namespace fast_gicp {
12 | namespace cuda {
13 | 
14 | void find_voxel_correspondences(
15 |   const thrust::device_vector<Eigen::Vector3f>& src_points,
16 |   const GaussianVoxelMap& voxelmap,
17 |   const thrust::device_ptr<const Eigen::Isometry3f>& x_ptr,
18 |   const thrust::device_vector<Eigen::Vector3i>& offsets,
19 |   thrust::device_vector<thrust::pair<int, int>>& correspondences) ;
20 | 
21 | }  // namespace cuda
22 | }  // namespace fast_gicp
23 | 
24 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/cuda/gaussian_voxelmap.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_CUDA_GAUSSIAN_VOXELMAP_CUH
 2 | #define FAST_GICP_CUDA_GAUSSIAN_VOXELMAP_CUH
 3 | 
 4 | #include <Eigen/Core>
 5 | #include <thrust/host_vector.h>
 6 | #include <thrust/device_vector.h>
 7 | 
 8 | namespace fast_gicp {
 9 | namespace cuda {
10 | 
11 | struct VoxelMapInfo {
12 |   int num_voxels;
13 |   int num_buckets;
14 |   int max_bucket_scan_count;
15 |   float voxel_resolution;
16 | };
17 | 
18 | class GaussianVoxelMap {
19 | public:
20 |   GaussianVoxelMap(float resolution, int init_num_buckets = 8192, int max_bucket_scan_count = 10);
21 | 
22 |   void create_voxelmap(const thrust::device_vector<Eigen::Vector3f>& points);
23 |   void create_voxelmap(const thrust::device_vector<Eigen::Vector3f>& points, const thrust::device_vector<Eigen::Matrix3f>& covariances);
24 | 
25 | private:
26 |   void create_bucket_table(cudaStream_t stream, const thrust::device_vector<Eigen::Vector3f>& points);
27 | 
28 | public:
29 |   const int init_num_buckets;
30 |   VoxelMapInfo voxelmap_info;
31 |   thrust::device_vector<VoxelMapInfo> voxelmap_info_ptr;
32 | 
33 |   thrust::device_vector<thrust::pair<Eigen::Vector3i, int>> buckets;
34 | 
35 |   // voxel data
36 |   thrust::device_vector<int> num_points;
37 |   thrust::device_vector<Eigen::Vector3f> voxel_means;
38 |   thrust::device_vector<Eigen::Matrix3f> voxel_covs;
39 | };
40 | 
41 | }  // namespace cuda
42 | }  // namespace fast_gicp
43 | 
44 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/cuda/ndt_compute_derivatives.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_CUDA_NDT_COMPUTE_DERIVATIVES_CUH
 2 | #define FAST_GICP_CUDA_NDT_COMPUTE_DERIVATIVES_CUH
 3 | 
 4 | #include <Eigen/Core>
 5 | #include <Eigen/Geometry>
 6 | #include <thrust/device_vector.h>
 7 | 
 8 | #include <fast_gicp/cuda/gaussian_voxelmap.cuh>
 9 | 
10 | namespace fast_gicp {
11 | namespace cuda {
12 | 
13 | double p2d_ndt_compute_derivatives(
14 |   const GaussianVoxelMap& target_voxelmap,
15 |   const thrust::device_vector<Eigen::Vector3f>& source_points,
16 |   const thrust::device_vector<thrust::pair<int, int>>& correspondences,
17 |   const thrust::device_ptr<const Eigen::Isometry3f>& linearized_x_ptr,
18 |   const thrust::device_ptr<const Eigen::Isometry3f>& x_ptr,
19 |   Eigen::Matrix<double, 6, 6>* H,
20 |   Eigen::Matrix<double, 6, 1>* b);
21 | 
22 | double d2d_ndt_compute_derivatives(
23 |   const GaussianVoxelMap& target_voxelmap,
24 |   const GaussianVoxelMap& source_voxelmap,
25 |   const thrust::device_vector<thrust::pair<int, int>>& correspondences,
26 |   const thrust::device_ptr<const Eigen::Isometry3f>& linearized_x_ptr,
27 |   const thrust::device_ptr<const Eigen::Isometry3f>& x_ptr,
28 |   Eigen::Matrix<double, 6, 6>* H,
29 |   Eigen::Matrix<double, 6, 1>* b);
30 | 
31 | }  // namespace cuda
32 | }  // namespace fast_gicp
33 | 
34 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/cuda/ndt_cuda.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_NDT_CUDA_CUH
 2 | #define FAST_GICP_NDT_CUDA_CUH
 3 | 
 4 | #include <memory>
 5 | #include <vector>
 6 | #include <Eigen/Core>
 7 | #include <Eigen/Geometry>
 8 | 
 9 | #include <fast_gicp/ndt/ndt_settings.hpp>
10 | #include <fast_gicp/gicp/gicp_settings.hpp>
11 | 
12 | namespace thrust {
13 | template <typename T1, typename T2>
14 | class pair;
15 | 
16 | template <typename T>
17 | class device_allocator;
18 | 
19 | template <typename T, typename Alloc>
20 | class device_vector;
21 | }  // namespace thrust
22 | 
23 | namespace fast_gicp {
24 | namespace cuda {
25 | 
26 | class GaussianVoxelMap;
27 | 
28 | class NDTCudaCore {
29 | public:
30 |   using Points = thrust::device_vector<Eigen::Vector3f, thrust::device_allocator<Eigen::Vector3f>>;
31 |   using Indices = thrust::device_vector<int, thrust::device_allocator<int>>;
32 |   using Matrices = thrust::device_vector<Eigen::Matrix3f, thrust::device_allocator<Eigen::Matrix3f>>;
33 |   using Correspondences = thrust::device_vector<thrust::pair<int, int>, thrust::device_allocator<thrust::pair<int, int>>>;
34 |   using VoxelCoordinates = thrust::device_vector<Eigen::Vector3i, thrust::device_allocator<Eigen::Vector3i>>;
35 | 
36 |   EIGEN_MAKE_ALIGNED_OPERATOR_NEW
37 |   NDTCudaCore();
38 |   ~NDTCudaCore();
39 | 
40 |   void set_distance_mode(fast_gicp::NDTDistanceMode mode);
41 |   void set_resolution(double resolution);
42 |   void set_neighbor_search_method(fast_gicp::NeighborSearchMethod method, double radius);
43 | 
44 |   void swap_source_and_target();
45 |   void set_source_cloud(const std::vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>>& cloud);
46 |   void set_target_cloud(const std::vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>>& cloud);
47 | 
48 |   void create_voxelmaps();
49 |   void create_target_voxelmap();
50 |   void create_source_voxelmap();
51 | 
52 |   void update_correspondences(const Eigen::Isometry3d& trans);
53 |   double compute_error(const Eigen::Isometry3d& trans, Eigen::Matrix<double, 6, 6>* H, Eigen::Matrix<double, 6, 1>* b) const;
54 | 
55 | public:
56 |   fast_gicp::NDTDistanceMode distance_mode;
57 |   double resolution;
58 |   std::unique_ptr<VoxelCoordinates> offsets;
59 | 
60 |   std::unique_ptr<Points> source_points;
61 |   std::unique_ptr<Points> target_points;
62 | 
63 |   std::unique_ptr<GaussianVoxelMap> source_voxelmap;
64 |   std::unique_ptr<GaussianVoxelMap> target_voxelmap;
65 | 
66 |   Eigen::Isometry3f linearized_x;
67 |   std::unique_ptr<Correspondences> correspondences;
68 | };
69 | 
70 | }  // namespace cuda
71 | }  // namespace fast_gicp
72 | 
73 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/cuda/vector3_hash.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_CUDA_VECTOR3_HASH_CUH
 2 | #define FAST_GICP_CUDA_VECTOR3_HASH_CUH
 3 | 
 4 | namespace fast_gicp {
 5 |   namespace cuda {
 6 | 
 7 | // taken from boost/hash.hpp
 8 | __host__ __device__  inline void hash_combine(uint64_t& h, uint64_t k) {
 9 |   const uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
10 |   const int r = 47;
11 | 
12 |   k *= m;
13 |   k ^= k >> r;
14 |   k *= m;
15 | 
16 |   h ^= k;
17 |   h *= m;
18 | 
19 |   h += 0xe6546b64;
20 | }
21 | 
22 | inline __host__ __device__ bool equal(const Eigen::Vector3i& lhs, const Eigen::Vector3i& rhs) {
23 |   return lhs[0] == rhs[0] && lhs[1] == rhs[1] && lhs[2] == rhs[2];
24 | }
25 | 
26 | // compute vector3i hash
27 | __host__ __device__ inline uint64_t vector3i_hash(const Eigen::Vector3i& x) {
28 |   uint64_t seed = 0;
29 |   hash_combine(seed, x[0]);
30 |   hash_combine(seed, x[1]);
31 |   hash_combine(seed, x[2]);
32 |   return seed;
33 | }
34 | 
35 | __host__ __device__ inline Eigen::Vector3i calc_voxel_coord(const Eigen::Vector3f& x, float resolution) {
36 |   Eigen::Vector3i coord = (x.array() / resolution - 0.5).floor().cast<int>();
37 |   return coord;
38 | }
39 | 
40 |   }
41 | }  // namespace fast_gicp
42 | 
43 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/gicp/experimental/fast_gicp_mp.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_FAST_GICP_MP_HPP
 2 | #define FAST_GICP_FAST_GICP_MP_HPP
 3 | 
 4 | #include <Eigen/Core>
 5 | #include <Eigen/Geometry>
 6 | 
 7 | #include <pcl/point_types.h>
 8 | #include <pcl/point_cloud.h>
 9 | #include <pcl/search/kdtree.h>
10 | #include <pcl/registration/registration.h>
11 | #include <fast_gicp/gicp/gicp_settings.hpp>
12 | 
13 | namespace fast_gicp {
14 | 
15 | template<typename PointSource, typename PointTarget>
16 | class FastGICPMultiPoints : public pcl::Registration<PointSource, PointTarget, float> {
17 | public:
18 |   using Scalar = float;
19 |   using Matrix4 = typename pcl::Registration<PointSource, PointTarget, Scalar>::Matrix4;
20 | 
21 |   using PointCloudSource = typename pcl::Registration<PointSource, PointTarget, Scalar>::PointCloudSource;
22 |   using PointCloudSourcePtr = typename PointCloudSource::Ptr;
23 |   using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr;
24 | 
25 |   using PointCloudTarget = typename pcl::Registration<PointSource, PointTarget, Scalar>::PointCloudTarget;
26 |   using PointCloudTargetPtr = typename PointCloudTarget::Ptr;
27 |   using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr;
28 | 
29 |   using pcl::Registration<PointSource, PointTarget, Scalar>::reg_name_;
30 |   using pcl::Registration<PointSource, PointTarget, Scalar>::input_;
31 |   using pcl::Registration<PointSource, PointTarget, Scalar>::target_;
32 | 
33 |   using pcl::Registration<PointSource, PointTarget, Scalar>::nr_iterations_;
34 |   using pcl::Registration<PointSource, PointTarget, Scalar>::max_iterations_;
35 |   using pcl::Registration<PointSource, PointTarget, Scalar>::final_transformation_;
36 |   using pcl::Registration<PointSource, PointTarget, Scalar>::transformation_epsilon_;
37 |   using pcl::Registration<PointSource, PointTarget, Scalar>::converged_;
38 |   using pcl::Registration<PointSource, PointTarget, Scalar>::corr_dist_threshold_;
39 | 
40 |   FastGICPMultiPoints();
41 |   virtual ~FastGICPMultiPoints() override;
42 | 
43 |   void setNumThreads(int n);
44 | 
45 |   void setRotationEpsilon(double eps);
46 | 
47 |   void setCorrespondenceRandomness(int k);
48 | 
49 |   void setRegularizationMethod(RegularizationMethod method);
50 | 
51 |   virtual void setInputSource(const PointCloudSourceConstPtr& cloud) override;
52 | 
53 |   virtual void setInputTarget(const PointCloudTargetConstPtr& cloud) override;
54 | 
55 | protected:
56 |   virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override;
57 | 
58 | private:
59 |   bool is_converged(const Eigen::Matrix<float, 6, 1>& delta) const;
60 | 
61 |   void update_correspondences(const Eigen::Matrix<float, 6, 1>& x);
62 | 
63 |   Eigen::VectorXf loss_ls(const Eigen::Matrix<float, 6, 1>& x, Eigen::MatrixXf* J) const;
64 | 
65 |   template<typename PointT>
66 |   bool calculate_covariances(const pcl::shared_ptr<const pcl::PointCloud<PointT>>& cloud, pcl::search::KdTree<PointT>& kdtree, std::vector<Matrix4, Eigen::aligned_allocator<Matrix4>>& covariances);
67 | 
68 | private:
69 |   int num_threads_;
70 |   int k_correspondences_;
71 |   double rotation_epsilon_;
72 |   double neighbor_search_radius_;
73 | 
74 |   RegularizationMethod regularization_method_;
75 | 
76 |   pcl::search::KdTree<PointSource> source_kdtree;
77 |   pcl::search::KdTree<PointTarget> target_kdtree;
78 | 
79 |   std::vector<Matrix4, Eigen::aligned_allocator<Matrix4>> source_covs;
80 |   std::vector<Matrix4, Eigen::aligned_allocator<Matrix4>> target_covs;
81 | 
82 |   std::vector<std::vector<int>> correspondences;
83 |   std::vector<std::vector<float>> sq_distances;
84 | };
85 | }  // namespace fast_gicp
86 | 
87 | #endif
88 | 


--------------------------------------------------------------------------------
/include/fast_gicp/gicp/experimental/fast_gicp_mp_impl.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef FAST_GICP_FAST_GICP_MP_IMPL_HPP
  2 | #define FAST_GICP_FAST_GICP_MP_IMPL_HPP
  3 | 
  4 | #include <Eigen/Core>
  5 | #include <Eigen/Geometry>
  6 | 
  7 | #include <pcl/point_types.h>
  8 | #include <pcl/point_cloud.h>
  9 | #include <pcl/search/kdtree.h>
 10 | #include <pcl/registration/registration.h>
 11 | 
 12 | #include <sophus/so3.hpp>
 13 | #include <fast_gicp/so3/so3.hpp>
 14 | #include <fast_gicp/opt/gauss_newton.hpp>
 15 | #include <fast_gicp/gicp/experimental/fast_gicp_mp.hpp>
 16 | 
 17 | namespace fast_gicp {
 18 | 
 19 | template<typename PointSource, typename PointTarget>
 20 | FastGICPMultiPoints<PointSource, PointTarget>::FastGICPMultiPoints() {
 21 | #ifdef _OPENMP
 22 |   num_threads_ = omp_get_max_threads();
 23 | #else
 24 |   num_threads_ = 1;
 25 | #endif
 26 | 
 27 |   k_correspondences_ = 20;
 28 |   reg_name_ = "FastGICPMultiPoints";
 29 |   max_iterations_ = 64;
 30 |   rotation_epsilon_ = 1e-5;
 31 |   transformation_epsilon_ = 1e-5;
 32 |   // corr_dist_threshold_ = 1.0;
 33 |   regularization_method_ = RegularizationMethod::PLANE;
 34 |   corr_dist_threshold_ = std::numeric_limits<float>::max();
 35 |   neighbor_search_radius_ = 0.5;
 36 | }
 37 | 
 38 | template<typename PointSource, typename PointTarget>
 39 | FastGICPMultiPoints<PointSource, PointTarget>::~FastGICPMultiPoints() {}
 40 | 
 41 | template<typename PointSource, typename PointTarget>
 42 | void FastGICPMultiPoints<PointSource, PointTarget>::setRotationEpsilon(double eps) {
 43 |   rotation_epsilon_ = eps;
 44 | }
 45 | 
 46 | template<typename PointSource, typename PointTarget>
 47 | void FastGICPMultiPoints<PointSource, PointTarget>::setNumThreads(int n) {
 48 |   num_threads_ = n;
 49 | 
 50 | #ifdef _OPENMP
 51 |   if(n == 0) {
 52 |     num_threads_ = omp_get_max_threads();
 53 |   }
 54 | #endif
 55 | }
 56 | 
 57 | template<typename PointSource, typename PointTarget>
 58 | void FastGICPMultiPoints<PointSource, PointTarget>::setCorrespondenceRandomness(int k) {
 59 |   k_correspondences_ = k;
 60 | }
 61 | 
 62 | template<typename PointSource, typename PointTarget>
 63 | void FastGICPMultiPoints<PointSource, PointTarget>::setRegularizationMethod(RegularizationMethod method) {
 64 |   regularization_method_ = method;
 65 | }
 66 | 
 67 | template<typename PointSource, typename PointTarget>
 68 | void FastGICPMultiPoints<PointSource, PointTarget>::setInputSource(const PointCloudSourceConstPtr& cloud) {
 69 |   pcl::Registration<PointSource, PointTarget, Scalar>::setInputSource(cloud);
 70 |   calculate_covariances(*cloud, source_kdtree, source_covs);
 71 | }
 72 | 
 73 | template<typename PointSource, typename PointTarget>
 74 | void FastGICPMultiPoints<PointSource, PointTarget>::setInputTarget(const PointCloudTargetConstPtr& cloud) {
 75 |   pcl::Registration<PointSource, PointTarget, Scalar>::setInputTarget(cloud);
 76 |   calculate_covariances(cloud, target_kdtree, target_covs);
 77 | }
 78 | 
 79 | template<typename PointSource, typename PointTarget>
 80 | void FastGICPMultiPoints<PointSource, PointTarget>::computeTransformation(PointCloudSource& output, const Matrix4& guess) {
 81 |   Eigen::Matrix<float, 6, 1> x0;
 82 |   x0.head<3>() = Sophus::SO3f(guess.template block<3, 3>(0, 0)).log();
 83 |   x0.tail<3>() = guess.template block<3, 1>(0, 3);
 84 | 
 85 |   // if(x0.head<3>().norm() < 1e-2) {
 86 |   //   x0.head<3>() = (Eigen::Vector3f::Random()).normalized() * 1e-2;
 87 |   // }
 88 | 
 89 |   converged_ = false;
 90 |   GaussNewton<double, 6> solver;
 91 | 
 92 |   for(int i = 0; i < max_iterations_; i++) {
 93 |     nr_iterations_ = i;
 94 | 
 95 |     update_correspondences(x0);
 96 |     Eigen::MatrixXf J;
 97 |     Eigen::VectorXf loss = loss_ls(x0, &J);
 98 | 
 99 |     Eigen::Matrix<float, 6, 1> delta = solver.delta(loss.cast<double>(), J.cast<double>()).cast<float>();
100 | 
101 |     x0.head<3>() = (Sophus::SO3f::exp(-delta.head<3>()) * Sophus::SO3f::exp(x0.head<3>())).log();
102 |     x0.tail<3>() -= delta.tail<3>();
103 | 
104 |     if(is_converged(delta)) {
105 |       converged_ = true;
106 |       break;
107 |     }
108 |   }
109 | 
110 |   final_transformation_.setIdentity();
111 |   final_transformation_.template block<3, 3>(0, 0) = Sophus::SO3f::exp(x0.head<3>()).matrix();
112 |   final_transformation_.template block<3, 1>(0, 3) = x0.tail<3>();
113 | 
114 |   pcl::transformPointCloud(*input_, output, final_transformation_);
115 | }
116 | 
117 | template<typename PointSource, typename PointTarget>
118 | bool FastGICPMultiPoints<PointSource, PointTarget>::is_converged(const Eigen::Matrix<float, 6, 1>& delta) const {
119 |   double accum = 0.0;
120 |   Eigen::Matrix3f R = Sophus::SO3f::exp(delta.head<3>()).matrix() - Eigen::Matrix3f::Identity();
121 |   Eigen::Vector3f t = delta.tail<3>();
122 | 
123 |   Eigen::Matrix3f r_delta = 1.0 / rotation_epsilon_ * R.array().abs();
124 |   Eigen::Vector3f t_delta = 1.0 / transformation_epsilon_ * t.array().abs();
125 | 
126 |   return std::max(r_delta.maxCoeff(), t_delta.maxCoeff()) < 1;
127 | }
128 | 
129 | template<typename PointSource, typename PointTarget>
130 | void FastGICPMultiPoints<PointSource, PointTarget>::update_correspondences(const Eigen::Matrix<float, 6, 1>& x) {
131 |   Eigen::Matrix4f trans = Eigen::Matrix4f::Identity();
132 |   trans.block<3, 3>(0, 0) = Sophus::SO3f::exp(x.head<3>()).matrix();
133 |   trans.block<3, 1>(0, 3) = x.tail<3>();
134 | 
135 |   correspondences.resize(input_->size());
136 |   sq_distances.resize(input_->size());
137 | 
138 | #pragma omp parallel for num_threads(num_threads_)
139 |   for(int i = 0; i < input_->size(); i++) {
140 |     PointTarget pt;
141 |     pt.getVector4fMap() = trans * input_->at(i).getVector4fMap();
142 | 
143 |     std::vector<int> k_indices;
144 |     std::vector<float> k_sq_dists;
145 |     target_kdtree.radiusSearch(pt, neighbor_search_radius_, k_indices, k_sq_dists);
146 | 
147 |     if(k_indices.empty()) {
148 |       //  target_kdtree.nearestKSearch(pt, 1, k_indices, k_sq_dists);
149 |     }
150 | 
151 |     correspondences[i] = k_indices;
152 |     sq_distances[i] = k_sq_dists;
153 |   }
154 | }
155 | 
156 | template<typename PointSource, typename PointTarget>
157 | Eigen::VectorXf FastGICPMultiPoints<PointSource, PointTarget>::loss_ls(const Eigen::Matrix<float, 6, 1>& x, Eigen::MatrixXf* J) const {
158 |   Eigen::Matrix4f trans = Eigen::Matrix4f::Identity();
159 |   trans.block<3, 3>(0, 0) = Sophus::SO3f::exp(x.head<3>()).matrix();
160 |   trans.block<3, 1>(0, 3) = x.tail<3>();
161 | 
162 |   std::vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>> losses(input_->size());
163 |   // use row-major arrangement for ease of repacking
164 |   std::vector<Eigen::Matrix<float, 3, 6, Eigen::RowMajor>, Eigen::aligned_allocator<Eigen::Matrix<float, 3, 6, Eigen::RowMajor>>> Js(input_->size());
165 | 
166 |   std::atomic_int count(0);
167 | 
168 | #pragma omp parallel for num_threads(num_threads_)
169 |   for(int i = 0; i < correspondences.size(); i++) {
170 |     int source_index = i;
171 |     const auto& mean_A = input_->at(source_index).getVector4fMap();
172 |     const auto& cov_A = source_covs[source_index];
173 | 
174 |     const auto& k_indices = correspondences[i];
175 |     const auto& k_sq_dists = sq_distances[i];
176 |     if(k_indices.empty()) {
177 |       continue;
178 |     }
179 | 
180 |     double sum_w = 0.0;
181 |     Eigen::Vector4d sum_mean_B = Eigen::Vector4d::Zero();
182 |     Eigen::Matrix4d sum_cov_B = Eigen::Matrix4d::Zero();
183 |     for(int j = 0; j < k_indices.size(); j++) {
184 |       double w = 1 - std::sqrt(k_sq_dists[j]) / neighbor_search_radius_;
185 |       w = std::max(1e-3, std::min(1.0, w));
186 |       sum_w += w;
187 | 
188 |       int target_index = k_indices[j];
189 | 
190 |       sum_mean_B += w * target_->at(target_index).getVector4fMap().template cast<double>();
191 |       sum_cov_B += w * target_covs[target_index].template cast<double>();
192 |     }
193 | 
194 |     Eigen::Vector4f mean_B = (sum_mean_B / sum_w).template cast<float>();
195 |     Eigen::Matrix4f cov_B = (sum_cov_B / sum_w).template cast<float>();
196 | 
197 |     Eigen::Vector4f transed_mean_A = trans * mean_A;
198 |     Eigen::Vector4f d = mean_B - transed_mean_A;
199 |     Eigen::Matrix4f RCR = cov_B + trans * cov_A * trans.transpose();
200 |     RCR(3, 3) = 1;
201 | 
202 |     Eigen::Matrix4f RCR_inv = RCR.inverse();
203 |     Eigen::Vector4f RCRd = RCR_inv * d;
204 | 
205 |     Eigen::Matrix<float, 4, 6> dtdx0 = Eigen::Matrix<float, 4, 6>::Zero();
206 |     dtdx0.block<3, 3>(0, 0) = skew(transed_mean_A.head<3>());
207 |     dtdx0.block<3, 3>(0, 3) = -Eigen::Matrix3f::Identity();
208 | 
209 |     Eigen::Matrix<float, 4, 6> jlossexp = RCR_inv * dtdx0;
210 | 
211 |     int n = count++;
212 |     losses[n] = RCRd.head<3>();
213 |     Js[n] = jlossexp.block<3, 6>(0, 0);
214 |   }
215 | 
216 |   int final_size = count;
217 |   Eigen::VectorXf loss = Eigen::Map<Eigen::VectorXf>(losses.front().data(), final_size * 3);
218 |   *J = Eigen::Map<Eigen::MatrixXf>(Js.front().data(), 6, final_size * 3).transpose();
219 | 
220 |   return loss;
221 | }
222 | 
223 | template<typename PointSource, typename PointTarget>
224 | template<typename PointT>
225 | bool FastGICPMultiPoints<PointSource, PointTarget>::calculate_covariances(const pcl::shared_ptr<const pcl::PointCloud<PointT>>& cloud, pcl::search::KdTree<PointT>& kdtree, std::vector<Matrix4, Eigen::aligned_allocator<Matrix4>>& covariances) {
226 |   kdtree.setInputCloud(cloud);
227 |   covariances.resize(cloud->size());
228 | 
229 | #pragma omp parallel for num_threads(num_threads_)
230 |   for(int i = 0; i < cloud->size(); i++) {
231 |     std::vector<int> k_indices;
232 |     std::vector<float> k_sq_distances;
233 |     kdtree.nearestKSearch(cloud->at(i), k_correspondences_, k_indices, k_sq_distances);
234 | 
235 |     Eigen::Matrix<float, 4, -1> data(4, k_correspondences_);
236 | 
237 |     for(int j = 0; j < k_indices.size(); j++) {
238 |       data.col(j) = cloud->at(k_indices[j]).getVector4fMap();
239 |     }
240 | 
241 |     data.colwise() -= data.rowwise().mean().eval();
242 |     Eigen::Matrix4f cov = data * data.transpose();
243 | 
244 |     if(regularization_method_ == RegularizationMethod::FROBENIUS) {
245 |       double lambda = 1e-3;
246 |       Eigen::Matrix3d C = cov.block<3, 3>(0, 0).cast<double>() + lambda * Eigen::Matrix3d::Identity();
247 |       Eigen::Matrix3d C_inv = C.inverse();
248 |       covariances[i].setZero();
249 |       covariances[i].template block<3, 3>(0, 0) = (C_inv / C_inv.norm()).inverse().cast<float>();
250 |     } else {
251 |       Eigen::JacobiSVD<Eigen::Matrix3f> svd(cov.block<3, 3>(0, 0), Eigen::ComputeFullU | Eigen::ComputeFullV);
252 |       Eigen::Vector3f values;
253 | 
254 |       switch(regularization_method_) {
255 |         default:
256 |           std::cerr << "here must not be reached" << std::endl;
257 |           abort();
258 |         case RegularizationMethod::PLANE:
259 |           values = Eigen::Vector3f(1, 1, 1e-3);
260 |           break;
261 |         case RegularizationMethod::MIN_EIG:
262 |           values = svd.singularValues().array().max(1e-3);
263 |           break;
264 |         case RegularizationMethod::NORMALIZED_MIN_EIG:
265 |           values = svd.singularValues() / svd.singularValues().maxCoeff();
266 |           values = values.array().max(1e-3);
267 |           break;
268 |       }
269 | 
270 |       covariances[i].setZero();
271 |       covariances[i].template block<3, 3>(0, 0) = svd.matrixU() * values.asDiagonal() * svd.matrixV().transpose();
272 |     }
273 |   }
274 | 
275 |   return true;
276 | }
277 | 
278 | }  // namespace fast_gicp
279 | 
280 | #endif
281 | 


--------------------------------------------------------------------------------
/include/fast_gicp/gicp/fast_gicp.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef FAST_GICP_FAST_GICP_HPP
  2 | #define FAST_GICP_FAST_GICP_HPP
  3 | 
  4 | #include <Eigen/Core>
  5 | #include <Eigen/Geometry>
  6 | 
  7 | #include <pcl/point_types.h>
  8 | #include <pcl/point_cloud.h>
  9 | #include <pcl/search/search.h>
 10 | #include <pcl/registration/registration.h>
 11 | #include <fast_gicp/gicp/lsq_registration.hpp>
 12 | #include <fast_gicp/gicp/gicp_settings.hpp>
 13 | 
 14 | namespace fast_gicp {
 15 | 
 16 | /**
 17 |  * @brief Fast GICP algorithm optimized for multi threading with OpenMP
 18 |  */
 19 | template<typename PointSource, typename PointTarget, typename SearchMethodSource = pcl::search::KdTree<PointSource>, typename SearchMethodTarget = pcl::search::KdTree<PointTarget>>
 20 | class FastGICP : public LsqRegistration<PointSource, PointTarget> {
 21 | public:
 22 |   using Scalar = float;
 23 |   using Matrix4 = typename pcl::Registration<PointSource, PointTarget, Scalar>::Matrix4;
 24 | 
 25 |   using PointCloudSource = typename pcl::Registration<PointSource, PointTarget, Scalar>::PointCloudSource;
 26 |   using PointCloudSourcePtr = typename PointCloudSource::Ptr;
 27 |   using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr;
 28 | 
 29 |   using PointCloudTarget = typename pcl::Registration<PointSource, PointTarget, Scalar>::PointCloudTarget;
 30 |   using PointCloudTargetPtr = typename PointCloudTarget::Ptr;
 31 |   using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr;
 32 | 
 33 | #if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0)
 34 |   using Ptr = pcl::shared_ptr<FastGICP<PointSource, PointTarget>>;
 35 |   using ConstPtr = pcl::shared_ptr<const FastGICP<PointSource, PointTarget>>;
 36 | #else
 37 |   using Ptr = boost::shared_ptr<FastGICP<PointSource, PointTarget>>;
 38 |   using ConstPtr = boost::shared_ptr<const FastGICP<PointSource, PointTarget>>;
 39 | #endif
 40 | 
 41 | protected:
 42 |   using pcl::Registration<PointSource, PointTarget, Scalar>::reg_name_;
 43 |   using pcl::Registration<PointSource, PointTarget, Scalar>::input_;
 44 |   using pcl::Registration<PointSource, PointTarget, Scalar>::target_;
 45 |   using pcl::Registration<PointSource, PointTarget, Scalar>::corr_dist_threshold_;
 46 | 
 47 | public:
 48 |   FastGICP();
 49 |   virtual ~FastGICP() override;
 50 | 
 51 |   void setNumThreads(int n);
 52 |   void setCorrespondenceRandomness(int k);
 53 |   void setRegularizationMethod(RegularizationMethod method);
 54 | 
 55 |   virtual void swapSourceAndTarget() override;
 56 |   virtual void clearSource() override;
 57 |   virtual void clearTarget() override;
 58 | 
 59 |   virtual void setInputSource(const PointCloudSourceConstPtr& cloud) override;
 60 |   virtual void setSourceCovariances(const std::vector<Eigen::Matrix4d, Eigen::aligned_allocator<Eigen::Matrix4d>>& covs);
 61 |   virtual void setInputTarget(const PointCloudTargetConstPtr& cloud) override;
 62 |   virtual void setTargetCovariances(const std::vector<Eigen::Matrix4d, Eigen::aligned_allocator<Eigen::Matrix4d>>& covs);
 63 | 
 64 |   const std::vector<Eigen::Matrix4d, Eigen::aligned_allocator<Eigen::Matrix4d>>& getSourceCovariances() const {
 65 |     return source_covs_;
 66 |   }
 67 | 
 68 |   const std::vector<Eigen::Matrix4d, Eigen::aligned_allocator<Eigen::Matrix4d>>& getTargetCovariances() const {
 69 |     return target_covs_;
 70 |   }
 71 | 
 72 | protected:
 73 |   virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override;
 74 | 
 75 |   virtual void update_correspondences(const Eigen::Isometry3d& trans);
 76 | 
 77 |   virtual double linearize(const Eigen::Isometry3d& trans, Eigen::Matrix<double, 6, 6>* H, Eigen::Matrix<double, 6, 1>* b) override;
 78 | 
 79 |   virtual double compute_error(const Eigen::Isometry3d& trans) override;
 80 | 
 81 |   template<typename PointT>
 82 |   bool calculate_covariances(const typename pcl::PointCloud<PointT>::ConstPtr& cloud, pcl::search::Search<PointT>& kdtree, std::vector<Eigen::Matrix4d, Eigen::aligned_allocator<Eigen::Matrix4d>>& covariances);
 83 | 
 84 | protected:
 85 |   int num_threads_;
 86 |   int k_correspondences_;
 87 | 
 88 |   RegularizationMethod regularization_method_;
 89 | 
 90 |   std::shared_ptr<SearchMethodSource> search_source_;
 91 |   std::shared_ptr<SearchMethodTarget> search_target_;
 92 | 
 93 |   std::vector<Eigen::Matrix4d, Eigen::aligned_allocator<Eigen::Matrix4d>> source_covs_;
 94 |   std::vector<Eigen::Matrix4d, Eigen::aligned_allocator<Eigen::Matrix4d>> target_covs_;
 95 | 
 96 |   std::vector<Eigen::Matrix4d, Eigen::aligned_allocator<Eigen::Matrix4d>> mahalanobis_;
 97 | 
 98 |   std::vector<int> correspondences_;
 99 |   std::vector<float> sq_distances_;
100 | };
101 | }  // namespace fast_gicp
102 | 
103 | #endif
104 | 


--------------------------------------------------------------------------------
/include/fast_gicp/gicp/fast_gicp_st.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_FAST_GICP_ST_HPP
 2 | #define FAST_GICP_FAST_GICP_ST_HPP
 3 | 
 4 | #include <Eigen/Core>
 5 | #include <Eigen/Geometry>
 6 | 
 7 | #include <pcl/point_types.h>
 8 | #include <pcl/point_cloud.h>
 9 | #include <pcl/search/kdtree.h>
10 | #include <pcl/registration/registration.h>
11 | #include <fast_gicp/gicp/fast_gicp.hpp>
12 | #include <fast_gicp/gicp/gicp_settings.hpp>
13 | 
14 | namespace fast_gicp {
15 | 
16 | /**
17 |  * @brief Fast GICP algorithm optimized for single threading
18 |  */
19 | template<typename PointSource, typename PointTarget>
20 | class FastGICPSingleThread : public FastGICP<PointSource, PointTarget> {
21 | public:
22 |   using Scalar = float;
23 |   using Matrix4 = typename pcl::Registration<PointSource, PointTarget, Scalar>::Matrix4;
24 | 
25 |   using PointCloudSource = typename pcl::Registration<PointSource, PointTarget, Scalar>::PointCloudSource;
26 |   using PointCloudSourcePtr = typename PointCloudSource::Ptr;
27 |   using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr;
28 | 
29 | #if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0)
30 |   using Ptr = pcl::shared_ptr<FastGICPSingleThread<PointSource, PointTarget>>;
31 |   using ConstPtr = pcl::shared_ptr<const FastGICPSingleThread<PointSource, PointTarget>>;
32 | #else
33 |   using Ptr = boost::shared_ptr<FastGICPSingleThread<PointSource, PointTarget>>;
34 |   using ConstPtr = boost::shared_ptr<const FastGICPSingleThread<PointSource, PointTarget>>;
35 | #endif
36 | 
37 | protected:
38 |   using pcl::Registration<PointSource, PointTarget, Scalar>::input_;
39 |   using pcl::Registration<PointSource, PointTarget, Scalar>::target_;
40 | 
41 |   using FastGICP<PointSource, PointTarget>::search_target_;
42 |   using FastGICP<PointSource, PointTarget>::correspondences_;
43 |   using FastGICP<PointSource, PointTarget>::sq_distances_;
44 |   using FastGICP<PointSource, PointTarget>::source_covs_;
45 |   using FastGICP<PointSource, PointTarget>::target_covs_;
46 |   using FastGICP<PointSource, PointTarget>::mahalanobis_;
47 | 
48 | public:
49 |   FastGICPSingleThread();
50 |   virtual ~FastGICPSingleThread() override;
51 | 
52 | protected:
53 |   virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override;
54 | 
55 | private:
56 |   virtual void update_correspondences(const Eigen::Isometry3d& trans) override;
57 | 
58 |   virtual double linearize(const Eigen::Isometry3d& trans, Eigen::Matrix<double, 6, 6>* H = nullptr, Eigen::Matrix<double, 6, 1>* b = nullptr) override;
59 | 
60 |   virtual double compute_error(const Eigen::Isometry3d& trans) override;
61 | 
62 | private:
63 |   std::vector<float> second_sq_distances_;
64 |   std::vector<Eigen::Vector4f, Eigen::aligned_allocator<Eigen::Vector4f>> anchors_;
65 | };
66 | }  // namespace fast_gicp
67 | 
68 | #endif
69 | 


--------------------------------------------------------------------------------
/include/fast_gicp/gicp/fast_vgicp.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_FAST_VGICP_HPP
 2 | #define FAST_GICP_FAST_VGICP_HPP
 3 | 
 4 | #include <unordered_map>
 5 | 
 6 | #include <Eigen/Core>
 7 | #include <Eigen/Geometry>
 8 | 
 9 | #include <pcl/point_types.h>
10 | #include <pcl/point_cloud.h>
11 | #include <pcl/search/kdtree.h>
12 | #include <pcl/registration/registration.h>
13 | 
14 | #include <fast_gicp/gicp/gicp_settings.hpp>
15 | #include <fast_gicp/gicp/fast_gicp.hpp>
16 | #include <fast_gicp/gicp/fast_vgicp_voxel.hpp>
17 | 
18 | namespace fast_gicp {
19 | 
20 | /**
21 |  * @brief Fast Voxelized GICP algorithm boosted with OpenMP
22 |  */
23 | template<typename PointSource, typename PointTarget>
24 | class FastVGICP : public FastGICP<PointSource, PointTarget> {
25 | public:
26 |   using Scalar = float;
27 |   using Matrix4 = typename pcl::Registration<PointSource, PointTarget, Scalar>::Matrix4;
28 | 
29 |   using PointCloudSource = typename pcl::Registration<PointSource, PointTarget, Scalar>::PointCloudSource;
30 |   using PointCloudSourcePtr = typename PointCloudSource::Ptr;
31 |   using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr;
32 | 
33 |   using PointCloudTarget = typename pcl::Registration<PointSource, PointTarget, Scalar>::PointCloudTarget;
34 |   using PointCloudTargetPtr = typename PointCloudTarget::Ptr;
35 |   using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr;
36 | 
37 | #if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0)
38 |   using Ptr = pcl::shared_ptr<FastVGICP<PointSource, PointTarget>>;
39 |   using ConstPtr = pcl::shared_ptr<const FastVGICP<PointSource, PointTarget>>;
40 | #else
41 |   using Ptr = boost::shared_ptr<FastVGICP<PointSource, PointTarget>>;
42 |   using ConstPtr = boost::shared_ptr<const FastVGICP<PointSource, PointTarget>>;
43 | #endif
44 | 
45 | protected:
46 |   using pcl::Registration<PointSource, PointTarget, Scalar>::input_;
47 |   using pcl::Registration<PointSource, PointTarget, Scalar>::target_;
48 | 
49 |   using FastGICP<PointSource, PointTarget>::num_threads_;
50 |   using FastGICP<PointSource, PointTarget>::search_source_;
51 |   using FastGICP<PointSource, PointTarget>::search_target_;
52 |   using FastGICP<PointSource, PointTarget>::source_covs_;
53 |   using FastGICP<PointSource, PointTarget>::target_covs_;
54 | 
55 | public:
56 |   FastVGICP();
57 |   virtual ~FastVGICP() override;
58 | 
59 |   void setResolution(double resolution);
60 |   void setVoxelAccumulationMode(VoxelAccumulationMode mode);
61 |   void setNeighborSearchMethod(NeighborSearchMethod method);
62 | 
63 |   virtual void swapSourceAndTarget() override;
64 |   virtual void setInputTarget(const PointCloudTargetConstPtr& cloud) override;
65 | 
66 | protected:
67 |   virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override;
68 |   virtual void update_correspondences(const Eigen::Isometry3d& trans) override;
69 |   virtual double linearize(const Eigen::Isometry3d& trans, Eigen::Matrix<double, 6, 6>* H = nullptr, Eigen::Matrix<double, 6, 1>* b = nullptr) override;
70 |   virtual double compute_error(const Eigen::Isometry3d& trans) override;
71 | 
72 | protected:
73 |   double voxel_resolution_;
74 |   NeighborSearchMethod search_method_;
75 |   VoxelAccumulationMode voxel_mode_;
76 | 
77 |   std::unique_ptr<GaussianVoxelMap<PointTarget>> voxelmap_;
78 | 
79 |   std::vector<std::pair<int, GaussianVoxel::Ptr>> voxel_correspondences_;
80 |   std::vector<Eigen::Matrix4d, Eigen::aligned_allocator<Eigen::Matrix4d>> voxel_mahalanobis_;
81 | };
82 | }  // namespace fast_gicp
83 | 
84 | #endif
85 | 


--------------------------------------------------------------------------------
/include/fast_gicp/gicp/fast_vgicp_cuda.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_FAST_VGICP_CUDA_HPP
 2 | #define FAST_GICP_FAST_VGICP_CUDA_HPP
 3 | 
 4 | #include <Eigen/Core>
 5 | #include <Eigen/Geometry>
 6 | 
 7 | #include <pcl/point_types.h>
 8 | #include <pcl/point_cloud.h>
 9 | #include <pcl/search/kdtree.h>
10 | #include <pcl/registration/registration.h>
11 | 
12 | #include <fast_gicp/gicp/gicp_settings.hpp>
13 | #include <fast_gicp/gicp/lsq_registration.hpp>
14 | 
15 | namespace fast_gicp {
16 | 
17 | namespace cuda {
18 | class FastVGICPCudaCore;
19 | }
20 | 
21 | enum class NearestNeighborMethod { CPU_PARALLEL_KDTREE, GPU_BRUTEFORCE, GPU_RBF_KERNEL };
22 | 
23 | /**
24 |  * @brief Fast Voxelized GICP algorithm boosted with CUDA
25 |  */
26 | template<typename PointSource, typename PointTarget>
27 | class FastVGICPCuda : public LsqRegistration<PointSource, PointTarget> {
28 | public:
29 |   using Scalar = float;
30 |   using Matrix4 = typename pcl::Registration<PointSource, PointTarget, Scalar>::Matrix4;
31 | 
32 |   using PointCloudSource = typename pcl::Registration<PointSource, PointTarget, Scalar>::PointCloudSource;
33 |   using PointCloudSourcePtr = typename PointCloudSource::Ptr;
34 |   using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr;
35 | 
36 |   using PointCloudTarget = typename pcl::Registration<PointSource, PointTarget, Scalar>::PointCloudTarget;
37 |   using PointCloudTargetPtr = typename PointCloudTarget::Ptr;
38 |   using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr;
39 | 
40 | #if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0)
41 |   using Ptr = pcl::shared_ptr<FastVGICPCuda<PointSource, PointTarget>>;
42 |   using ConstPtr = pcl::shared_ptr<const FastVGICPCuda<PointSource, PointTarget>>;
43 | #else
44 |   using Ptr = boost::shared_ptr<FastVGICPCuda<PointSource, PointTarget>>;
45 |   using ConstPtr = boost::shared_ptr<const FastVGICPCuda<PointSource, PointTarget>>;
46 | #endif
47 | 
48 | protected:
49 |   using pcl::Registration<PointSource, PointTarget, Scalar>::input_;
50 |   using pcl::Registration<PointSource, PointTarget, Scalar>::target_;
51 | 
52 | public:
53 |   FastVGICPCuda();
54 |   virtual ~FastVGICPCuda() override;
55 | 
56 |   void setCorrespondenceRandomness(int k);
57 |   void setResolution(double resolution);
58 |   void setKernelWidth(double kernel_width, double max_dist = -1.0);
59 |   void setRegularizationMethod(RegularizationMethod method);
60 |   void setNeighborSearchMethod(NeighborSearchMethod method, double radius = -1.0);
61 |   void setNearestNeighborSearchMethod(NearestNeighborMethod method);
62 | 
63 |   virtual void swapSourceAndTarget() override;
64 |   virtual void clearSource() override;
65 |   virtual void clearTarget() override;
66 | 
67 |   virtual void setInputSource(const PointCloudSourceConstPtr& cloud) override;
68 |   virtual void setInputTarget(const PointCloudTargetConstPtr& cloud) override;
69 | 
70 | protected:
71 |   virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override;
72 |   virtual double linearize(const Eigen::Isometry3d& trans, Eigen::Matrix<double, 6, 6>* H = nullptr, Eigen::Matrix<double, 6, 1>* b = nullptr) override;
73 |   virtual double compute_error(const Eigen::Isometry3d& trans) override;
74 | 
75 |   template<typename PointT>
76 |   std::vector<int> find_neighbors_parallel_kdtree(int k, typename pcl::PointCloud<PointT>::ConstPtr cloud) const;
77 | 
78 | private:
79 |   int k_correspondences_;
80 |   double voxel_resolution_;
81 |   RegularizationMethod regularization_method_;
82 |   NearestNeighborMethod neighbor_search_method_;
83 | 
84 |   std::unique_ptr<cuda::FastVGICPCudaCore> vgicp_cuda_;
85 | };
86 | 
87 | }  // namespace fast_gicp
88 | 
89 | #endif
90 | 


--------------------------------------------------------------------------------
/include/fast_gicp/gicp/fast_vgicp_voxel.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef FAST_GICP_FAST_VGICP_VOXEL_HPP
  2 | #define FAST_GICP_FAST_VGICP_VOXEL_HPP
  3 | 
  4 | #include <unordered_map>
  5 | #include <boost/functional/hash.hpp>
  6 | #include <fast_gicp/gicp/gicp_settings.hpp>
  7 | 
  8 | namespace fast_gicp {
  9 | 
 10 | static std::vector<Eigen::Vector3i, Eigen::aligned_allocator<Eigen::Vector3i>> neighbor_offsets(NeighborSearchMethod search_method) {
 11 |   switch(search_method) {
 12 |       // clang-format off
 13 |     default:
 14 |       std::cerr << "unsupported neighbor search method" << std::endl;
 15 |       abort();
 16 |     case NeighborSearchMethod::DIRECT1:
 17 |       return std::vector<Eigen::Vector3i, Eigen::aligned_allocator<Eigen::Vector3i>>{
 18 |         Eigen::Vector3i(0, 0, 0)
 19 |       };
 20 |     case NeighborSearchMethod::DIRECT7:
 21 |       return std::vector<Eigen::Vector3i, Eigen::aligned_allocator<Eigen::Vector3i>>{
 22 |         Eigen::Vector3i(0, 0, 0),
 23 |         Eigen::Vector3i(1, 0, 0),
 24 |         Eigen::Vector3i(-1, 0, 0),
 25 |         Eigen::Vector3i(0, 1, 0),
 26 |         Eigen::Vector3i(0, -1, 0),
 27 |         Eigen::Vector3i(0, 0, 1),
 28 |         Eigen::Vector3i(0, 0, -1)
 29 |       };
 30 |     case NeighborSearchMethod::DIRECT27:
 31 |       break;
 32 |       // clang-format on
 33 |   }
 34 | 
 35 |   std::vector<Eigen::Vector3i, Eigen::aligned_allocator<Eigen::Vector3i>> offsets27;
 36 |   for(int i = 0; i < 3; i++) {
 37 |     for(int j = 0; j < 3; j++) {
 38 |       for(int k = 0; k < 3; k++) {
 39 |         offsets27.push_back(Eigen::Vector3i(i - 1, j - 1, k - 1));
 40 |       }
 41 |     }
 42 |   }
 43 |   return offsets27;
 44 | }
 45 | 
 46 | class Vector3iHash {
 47 | public:
 48 |   size_t operator()(const Eigen::Vector3i& x) const {
 49 |     size_t seed = 0;
 50 |     boost::hash_combine(seed, x[0]);
 51 |     boost::hash_combine(seed, x[1]);
 52 |     boost::hash_combine(seed, x[2]);
 53 |     return seed;
 54 |   }
 55 | };
 56 | 
 57 | struct GaussianVoxel {
 58 | public:
 59 |   EIGEN_MAKE_ALIGNED_OPERATOR_NEW
 60 |   using Ptr = std::shared_ptr<GaussianVoxel>;
 61 | 
 62 |   GaussianVoxel() {
 63 |     num_points = 0;
 64 |     mean.setZero();
 65 |     cov.setZero();
 66 |   }
 67 |   virtual ~GaussianVoxel() {}
 68 | 
 69 |   virtual void append(const Eigen::Vector4d& mean_, const Eigen::Matrix4d& cov_) = 0;
 70 | 
 71 |   virtual void finalize() = 0;
 72 | 
 73 | public:
 74 |   int num_points;
 75 |   Eigen::Vector4d mean;
 76 |   Eigen::Matrix4d cov;
 77 | };
 78 | 
 79 | struct MultiplicativeGaussianVoxel : GaussianVoxel {
 80 | public:
 81 |   EIGEN_MAKE_ALIGNED_OPERATOR_NEW
 82 | 
 83 |   MultiplicativeGaussianVoxel() : GaussianVoxel() {}
 84 |   virtual ~MultiplicativeGaussianVoxel() {}
 85 | 
 86 |   virtual void append(const Eigen::Vector4d& mean_, const Eigen::Matrix4d& cov_) override {
 87 |     num_points++;
 88 |     Eigen::Matrix4d cov_inv = cov_;
 89 |     cov_inv(3, 3) = 1;
 90 |     cov_inv = cov_inv.inverse().eval();
 91 | 
 92 |     cov += cov_inv;
 93 |     mean += cov_inv * mean_;
 94 |   }
 95 | 
 96 |   virtual void finalize() override {
 97 |     cov(3, 3) = 1;
 98 |     mean[3] = 1;
 99 | 
100 |     cov = cov.inverse().eval();
101 |     mean = (cov * mean).eval();
102 |   }
103 | };
104 | 
105 | struct AdditiveGaussianVoxel : GaussianVoxel {
106 | public:
107 |   EIGEN_MAKE_ALIGNED_OPERATOR_NEW
108 | 
109 |   AdditiveGaussianVoxel() : GaussianVoxel() {}
110 |   virtual ~AdditiveGaussianVoxel() {}
111 | 
112 |   virtual void append(const Eigen::Vector4d& mean_, const Eigen::Matrix4d& cov_) override {
113 |     num_points++;
114 |     mean += mean_;
115 |     cov += cov_;
116 |   }
117 | 
118 |   virtual void finalize() override {
119 |     mean /= num_points;
120 |     cov /= num_points;
121 |   }
122 | };
123 | 
124 | template<typename PointT>
125 | class GaussianVoxelMap {
126 | public:
127 |   GaussianVoxelMap(double resolution, VoxelAccumulationMode mode) : voxel_resolution_(resolution), voxel_mode_(mode) {}
128 | 
129 |   void create_voxelmap(const pcl::PointCloud<PointT>& cloud, const std::vector<Eigen::Matrix4d, Eigen::aligned_allocator<Eigen::Matrix4d>>& covs) {
130 |     voxels_.clear();
131 |     for(int i = 0; i < cloud.size(); i++) {
132 |       Eigen::Vector3i coord = voxel_coord(cloud.at(i).getVector4fMap().template cast<double>());
133 | 
134 |       auto found = voxels_.find(coord);
135 |       if(found == voxels_.end()) {
136 |         GaussianVoxel::Ptr voxel;
137 |         switch(voxel_mode_) {
138 |           case VoxelAccumulationMode::ADDITIVE:
139 |           case VoxelAccumulationMode::ADDITIVE_WEIGHTED:
140 |             voxel = std::shared_ptr<AdditiveGaussianVoxel>(new AdditiveGaussianVoxel);
141 |             break;
142 |           case VoxelAccumulationMode::MULTIPLICATIVE:
143 |             voxel = std::shared_ptr<MultiplicativeGaussianVoxel>(new MultiplicativeGaussianVoxel);
144 |             break;
145 |         }
146 |         found = voxels_.insert(found, std::make_pair(coord, voxel));
147 |       }
148 | 
149 |       auto& voxel = found->second;
150 |       voxel->append(cloud.at(i).getVector4fMap().template cast<double>(), covs[i]);
151 |     }
152 | 
153 |     for(auto& voxel : voxels_) {
154 |       voxel.second->finalize();
155 |     }
156 |   }
157 | 
158 |   Eigen::Vector3i voxel_coord(const Eigen::Vector4d& x) const {
159 |     return (x.array() / voxel_resolution_ - 0.5).floor().template cast<int>().template head<3>();
160 |   }
161 | 
162 |   Eigen::Vector4d voxel_origin(const Eigen::Vector3i& coord) const {
163 |     Eigen::Vector3d origin = (coord.template cast<double>().array() + 0.5) * voxel_resolution_;
164 |     return Eigen::Vector4d(origin[0], origin[1], origin[2], 1.0f);
165 |   }
166 | 
167 |   GaussianVoxel::Ptr lookup_voxel(const Eigen::Vector3i& coord) const {
168 |     auto found = voxels_.find(coord);
169 |     if(found == voxels_.end()) {
170 |       return nullptr;
171 |     }
172 | 
173 |     return found->second;
174 |   }
175 | 
176 | private:
177 |   double voxel_resolution_;
178 |   VoxelAccumulationMode voxel_mode_;
179 | 
180 |   using VoxelMap = std::unordered_map<Eigen::Vector3i, GaussianVoxel::Ptr, Vector3iHash, std::equal_to<Eigen::Vector3i>, Eigen::aligned_allocator<std::pair<const Eigen::Vector3i, GaussianVoxel::Ptr>>>;
181 |   VoxelMap voxels_;
182 | };
183 | 
184 | }  // namespace fast_gicp
185 | 
186 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/gicp/gicp_settings.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_GICP_SETTINGS_HPP
 2 | #define FAST_GICP_GICP_SETTINGS_HPP
 3 | 
 4 | namespace fast_gicp {
 5 | 
 6 | enum class RegularizationMethod { NONE, MIN_EIG, NORMALIZED_MIN_EIG, PLANE, FROBENIUS };
 7 | 
 8 | enum class NeighborSearchMethod { DIRECT27, DIRECT7, DIRECT1, /* supported on only VGICP_CUDA */ DIRECT_RADIUS };
 9 | 
10 | enum class VoxelAccumulationMode { ADDITIVE, ADDITIVE_WEIGHTED, MULTIPLICATIVE };
11 | }
12 | 
13 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/gicp/impl/fast_gicp_st_impl.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef FAST_GICP_FAST_GICP_ST_IMPL_HPP
  2 | #define FAST_GICP_FAST_GICP_ST_IMPL_HPP
  3 | 
  4 | #include <fast_gicp/so3/so3.hpp>
  5 | #include <fast_gicp/gicp/fast_gicp_st.hpp>
  6 | 
  7 | namespace fast_gicp {
  8 | 
  9 | template <typename PointSource, typename PointTarget>
 10 | FastGICPSingleThread<PointSource, PointTarget>::FastGICPSingleThread() : FastGICP<PointSource, PointTarget>() {
 11 |   this->reg_name_ = "FastGICPSingleThread";
 12 |   this->num_threads_ = 1;
 13 | }
 14 | 
 15 | template <typename PointSource, typename PointTarget>
 16 | FastGICPSingleThread<PointSource, PointTarget>::~FastGICPSingleThread() {}
 17 | 
 18 | template <typename PointSource, typename PointTarget>
 19 | void FastGICPSingleThread<PointSource, PointTarget>::computeTransformation(PointCloudSource& output, const Matrix4& guess) {
 20 |   anchors_.clear();
 21 |   FastGICP<PointSource, PointTarget>::computeTransformation(output, guess);
 22 | }
 23 | 
 24 | template <typename PointSource, typename PointTarget>
 25 | void FastGICPSingleThread<PointSource, PointTarget>::update_correspondences(const Eigen::Isometry3d& x) {
 26 |   assert(source_covs_.size() == input_->size());
 27 |   assert(target_covs_.size() == target_->size());
 28 | 
 29 |   Eigen::Isometry3f trans = x.template cast<float>();
 30 | 
 31 |   bool is_first = anchors_.empty();
 32 | 
 33 |   correspondences_.resize(input_->size());
 34 |   sq_distances_.resize(input_->size());
 35 |   second_sq_distances_.resize(input_->size());
 36 |   anchors_.resize(input_->size());
 37 |   mahalanobis_.resize(input_->size());
 38 | 
 39 |   std::vector<int> k_indices;
 40 |   std::vector<float> k_sq_dists;
 41 | 
 42 |   for (int i = 0; i < input_->size(); i++) {
 43 |     PointTarget pt;
 44 |     pt.getVector4fMap() = trans * input_->at(i).getVector4fMap();
 45 | 
 46 |     if (!is_first) {
 47 |       double d = (pt.getVector4fMap() - anchors_[i]).norm();
 48 |       double max_first = std::sqrt(sq_distances_[i]) + d;
 49 |       double min_second = std::sqrt(second_sq_distances_[i]) - d;
 50 | 
 51 |       if (max_first < min_second) {
 52 |         continue;
 53 |       }
 54 |     }
 55 | 
 56 |     search_target_->nearestKSearch(pt, 2, k_indices, k_sq_dists);
 57 | 
 58 |     correspondences_[i] = k_sq_dists[0] < this->corr_dist_threshold_ * this->corr_dist_threshold_ ? k_indices[0] : -1;
 59 |     sq_distances_[i] = k_sq_dists[0];
 60 |     second_sq_distances_[i] = k_sq_dists[1];
 61 |     anchors_[i] = pt.getVector4fMap();
 62 | 
 63 |     if (correspondences_[i] < 0) {
 64 |       continue;
 65 |     }
 66 | 
 67 |     const int target_index = correspondences_[i];
 68 |     const auto& cov_A = source_covs_[i];
 69 |     const auto& cov_B = target_covs_[target_index];
 70 | 
 71 |     Eigen::Matrix4d RCR = cov_B + x.matrix() * cov_A * x.matrix().transpose();
 72 |     RCR(3, 3) = 1.0;
 73 | 
 74 |     mahalanobis_[i] = RCR.inverse();
 75 |     mahalanobis_[i](3, 3) = 0.0;
 76 |   }
 77 | }
 78 | 
 79 | template <typename PointSource, typename PointTarget>
 80 | double FastGICPSingleThread<PointSource, PointTarget>::linearize(const Eigen::Isometry3d& trans, Eigen::Matrix<double, 6, 6>* H, Eigen::Matrix<double, 6, 1>* b) {
 81 |   if (H && b) {
 82 |     update_correspondences(trans);
 83 |     H->setZero();
 84 |     b->setZero();
 85 |   }
 86 | 
 87 |   double sum_errors = 0.0;
 88 |   for (int i = 0; i < input_->size(); i++) {
 89 |     int target_index = correspondences_[i];
 90 |     if (target_index < 0) {
 91 |       continue;
 92 |     }
 93 | 
 94 |     const Eigen::Vector4d mean_A = input_->at(i).getVector4fMap().template cast<double>();
 95 |     const auto& cov_A = source_covs_[i];
 96 | 
 97 |     const Eigen::Vector4d mean_B = target_->at(target_index).getVector4fMap().template cast<double>();
 98 |     const auto& cov_B = target_covs_[target_index];
 99 | 
100 |     const Eigen::Vector4d transed_mean_A = trans * mean_A;
101 |     const Eigen::Vector4d error = mean_B - transed_mean_A;
102 | 
103 |     sum_errors += error.transpose() * mahalanobis_[i] * error;
104 | 
105 |     if (H == nullptr || b == nullptr) {
106 |       continue;
107 |     }
108 | 
109 |     Eigen::Matrix<double, 4, 6> dtdx0 = Eigen::Matrix<double, 4, 6>::Zero();
110 |     dtdx0.block<3, 3>(0, 0) = skewd(transed_mean_A.head<3>());
111 |     dtdx0.block<3, 3>(0, 3) = -Eigen::Matrix3d::Identity();
112 | 
113 |     Eigen::Matrix<double, 4, 6> jlossexp = dtdx0;
114 | 
115 |     (*H) += jlossexp.transpose() * mahalanobis_[i] * jlossexp;
116 |     (*b) += jlossexp.transpose() * mahalanobis_[i] * error;
117 |   }
118 | 
119 |   return sum_errors;
120 | }
121 | 
122 | template <typename PointSource, typename PointTarget>
123 | double FastGICPSingleThread<PointSource, PointTarget>::compute_error(const Eigen::Isometry3d& trans) {
124 |   return linearize(trans, nullptr, nullptr);
125 | }
126 | 
127 | }  // namespace fast_gicp
128 | 
129 | #endif
130 | 


--------------------------------------------------------------------------------
/include/fast_gicp/gicp/impl/fast_vgicp_cuda_impl.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef FAST_GICP_FAST_VGICP_CUDA_IMPL_HPP
  2 | #define FAST_GICP_FAST_VGICP_CUDA_IMPL_HPP
  3 | 
  4 | #include <atomic>
  5 | #include <Eigen/Core>
  6 | #include <Eigen/Geometry>
  7 | 
  8 | #include <pcl/point_types.h>
  9 | #include <pcl/point_cloud.h>
 10 | #include <pcl/search/kdtree.h>
 11 | #include <pcl/registration/registration.h>
 12 | 
 13 | #include <fast_gicp/gicp/fast_vgicp_cuda.hpp>
 14 | #include <fast_gicp/cuda/fast_vgicp_cuda.cuh>
 15 | 
 16 | #include <thrust/host_vector.h>
 17 | #include <thrust/device_vector.h>
 18 | 
 19 | namespace fast_gicp {
 20 | 
 21 | template<typename PointSource, typename PointTarget>
 22 | FastVGICPCuda<PointSource, PointTarget>::FastVGICPCuda() : LsqRegistration<PointSource, PointTarget>() {
 23 |   this->reg_name_ = "FastVGICPCuda";
 24 |   k_correspondences_ = 20;
 25 |   voxel_resolution_ = 1.0;
 26 |   regularization_method_ = RegularizationMethod::PLANE;
 27 |   neighbor_search_method_ = NearestNeighborMethod::CPU_PARALLEL_KDTREE;
 28 | 
 29 |   vgicp_cuda_.reset(new cuda::FastVGICPCudaCore());
 30 |   vgicp_cuda_->set_resolution(voxel_resolution_);
 31 |   vgicp_cuda_->set_kernel_params(0.5, 3.0);
 32 | }
 33 | 
 34 | template<typename PointSource, typename PointTarget>
 35 | FastVGICPCuda<PointSource, PointTarget>::~FastVGICPCuda() {}
 36 | 
 37 | template<typename PointSource, typename PointTarget>
 38 | void FastVGICPCuda<PointSource, PointTarget>::setCorrespondenceRandomness(int k) {}
 39 | 
 40 | template<typename PointSource, typename PointTarget>
 41 | void FastVGICPCuda<PointSource, PointTarget>::setResolution(double resolution) {
 42 |   vgicp_cuda_->set_resolution(resolution);
 43 | }
 44 | 
 45 | template <typename PointSource, typename PointTarget>
 46 | void FastVGICPCuda<PointSource, PointTarget>::setKernelWidth(double kernel_width, double max_dist) {
 47 |   if (max_dist <= 0.0) {
 48 |     max_dist = kernel_width * 5.0;
 49 |   }
 50 |   vgicp_cuda_->set_kernel_params(kernel_width, max_dist);
 51 | }
 52 | 
 53 | template<typename PointSource, typename PointTarget>
 54 | void FastVGICPCuda<PointSource, PointTarget>::setRegularizationMethod(RegularizationMethod method) {
 55 |   regularization_method_ = method;
 56 | }
 57 | 
 58 | template <typename PointSource, typename PointTarget>
 59 | void FastVGICPCuda<PointSource, PointTarget>::setNeighborSearchMethod(NeighborSearchMethod method, double radius) {
 60 |   vgicp_cuda_->set_neighbor_search_method(method, radius);
 61 | }
 62 | 
 63 | template <typename PointSource, typename PointTarget>
 64 | void FastVGICPCuda<PointSource, PointTarget>::setNearestNeighborSearchMethod(NearestNeighborMethod method) {
 65 |   neighbor_search_method_ = method;
 66 | }
 67 | 
 68 | template<typename PointSource, typename PointTarget>
 69 | void FastVGICPCuda<PointSource, PointTarget>::swapSourceAndTarget() {
 70 |   vgicp_cuda_->swap_source_and_target();
 71 |   input_.swap(target_);
 72 | }
 73 | 
 74 | template<typename PointSource, typename PointTarget>
 75 | void FastVGICPCuda<PointSource, PointTarget>::clearSource() {
 76 |   input_.reset();
 77 | }
 78 | 
 79 | template<typename PointSource, typename PointTarget>
 80 | void FastVGICPCuda<PointSource, PointTarget>::clearTarget() {
 81 |   target_.reset();
 82 | }
 83 | 
 84 | template<typename PointSource, typename PointTarget>
 85 | void FastVGICPCuda<PointSource, PointTarget>::setInputSource(const PointCloudSourceConstPtr& cloud) {
 86 |   // the input cloud is the same as the previous one
 87 |   if(cloud == input_) {
 88 |     return;
 89 |   }
 90 | 
 91 |   pcl::Registration<PointSource, PointTarget, Scalar>::setInputSource(cloud);
 92 | 
 93 |   std::vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>> points(cloud->size());
 94 |   std::transform(cloud->begin(), cloud->end(), points.begin(), [=](const PointSource& pt) { return pt.getVector3fMap(); });
 95 | 
 96 |   vgicp_cuda_->set_source_cloud(points);
 97 |   switch(neighbor_search_method_) {
 98 |     case NearestNeighborMethod::CPU_PARALLEL_KDTREE: {
 99 |       std::vector<int> neighbors = find_neighbors_parallel_kdtree<PointSource>(k_correspondences_, cloud);
100 |       vgicp_cuda_->set_source_neighbors(k_correspondences_, neighbors);
101 |       vgicp_cuda_->calculate_source_covariances(regularization_method_);
102 |     } break;
103 |     case NearestNeighborMethod::GPU_BRUTEFORCE:
104 |       vgicp_cuda_->find_source_neighbors(k_correspondences_);
105 |       vgicp_cuda_->calculate_source_covariances(regularization_method_);
106 |       break;
107 |     case NearestNeighborMethod::GPU_RBF_KERNEL:
108 |       vgicp_cuda_->calculate_source_covariances_rbf(regularization_method_);
109 |       break;
110 |   }
111 | }
112 | 
113 | template<typename PointSource, typename PointTarget>
114 | void FastVGICPCuda<PointSource, PointTarget>::setInputTarget(const PointCloudTargetConstPtr& cloud) {
115 |   // the input cloud is the same as the previous one
116 |   if(cloud == target_) {
117 |     return;
118 |   }
119 | 
120 |   pcl::Registration<PointSource, PointTarget, Scalar>::setInputTarget(cloud);
121 | 
122 |   std::vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>> points(cloud->size());
123 |   std::transform(cloud->begin(), cloud->end(), points.begin(), [=](const PointTarget& pt) { return pt.getVector3fMap(); });
124 | 
125 |   vgicp_cuda_->set_target_cloud(points);
126 |   switch(neighbor_search_method_) {
127 |     case NearestNeighborMethod::CPU_PARALLEL_KDTREE: {
128 |       std::vector<int> neighbors = find_neighbors_parallel_kdtree<PointTarget>(k_correspondences_, cloud);
129 |       vgicp_cuda_->set_target_neighbors(k_correspondences_, neighbors);
130 |       vgicp_cuda_->calculate_target_covariances(regularization_method_);
131 |     } break;
132 |     case NearestNeighborMethod::GPU_BRUTEFORCE:
133 |       vgicp_cuda_->find_target_neighbors(k_correspondences_);
134 |       vgicp_cuda_->calculate_target_covariances(regularization_method_);
135 |       break;
136 |     case NearestNeighborMethod::GPU_RBF_KERNEL:
137 |       vgicp_cuda_->calculate_target_covariances_rbf(regularization_method_);
138 |       break;
139 |   }
140 |   vgicp_cuda_->create_target_voxelmap();
141 | }
142 | 
143 | template<typename PointSource, typename PointTarget>
144 | void FastVGICPCuda<PointSource, PointTarget>::computeTransformation(PointCloudSource& output, const Matrix4& guess) {
145 |   vgicp_cuda_->set_resolution(this->voxel_resolution_);
146 | 
147 |   LsqRegistration<PointSource, PointTarget>::computeTransformation(output, guess);
148 | }
149 | 
150 | template<typename PointSource, typename PointTarget>
151 | template<typename PointT>
152 | std::vector<int> FastVGICPCuda<PointSource, PointTarget>::find_neighbors_parallel_kdtree(int k, typename pcl::PointCloud<PointT>::ConstPtr cloud) const {
153 |   pcl::search::KdTree<PointT> kdtree;
154 |   kdtree.setInputCloud(cloud);
155 |   std::vector<int> neighbors(cloud->size() * k);
156 | 
157 | #pragma omp parallel for schedule(guided, 8)
158 |   for(int i = 0; i < cloud->size(); i++) {
159 |     std::vector<int> k_indices;
160 |     std::vector<float> k_sq_distances;
161 |     kdtree.nearestKSearch(cloud->at(i), k, k_indices, k_sq_distances);
162 | 
163 |     std::copy(k_indices.begin(), k_indices.end(), neighbors.begin() + i * k);
164 |   }
165 | 
166 |   return neighbors;
167 | }
168 | 
169 | template<typename PointSource, typename PointTarget>
170 | double FastVGICPCuda<PointSource, PointTarget>::linearize(const Eigen::Isometry3d& trans, Eigen::Matrix<double, 6, 6>* H, Eigen::Matrix<double, 6, 1>* b) {
171 |   vgicp_cuda_->update_correspondences(trans);
172 |   return vgicp_cuda_->compute_error(trans, H, b);
173 | }
174 | 
175 | template<typename PointSource, typename PointTarget>
176 | double FastVGICPCuda<PointSource, PointTarget>::compute_error(const Eigen::Isometry3d& trans) {
177 |   return vgicp_cuda_->compute_error(trans, nullptr, nullptr);
178 | }
179 | 
180 | }  // namespace fast_gicp
181 | 
182 | #endif
183 | 


--------------------------------------------------------------------------------
/include/fast_gicp/gicp/impl/fast_vgicp_impl.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef FAST_GICP_FAST_VGICP_IMPL_HPP
  2 | #define FAST_GICP_FAST_VGICP_IMPL_HPP
  3 | 
  4 | #include <atomic>
  5 | #include <Eigen/Core>
  6 | #include <Eigen/Geometry>
  7 | 
  8 | #include <pcl/point_types.h>
  9 | #include <pcl/point_cloud.h>
 10 | #include <pcl/search/kdtree.h>
 11 | #include <pcl/registration/registration.h>
 12 | 
 13 | #include <fast_gicp/so3/so3.hpp>
 14 | #include <fast_gicp/gicp/fast_vgicp.hpp>
 15 | 
 16 | namespace fast_gicp {
 17 | 
 18 | template <typename PointSource, typename PointTarget>
 19 | FastVGICP<PointSource, PointTarget>::FastVGICP() : FastGICP<PointSource, PointTarget>() {
 20 |   this->reg_name_ = "FastVGICP";
 21 | 
 22 |   voxel_resolution_ = 1.0;
 23 |   search_method_ = NeighborSearchMethod::DIRECT1;
 24 |   voxel_mode_ = VoxelAccumulationMode::ADDITIVE;
 25 | }
 26 | 
 27 | template <typename PointSource, typename PointTarget>
 28 | FastVGICP<PointSource, PointTarget>::~FastVGICP() {}
 29 | 
 30 | template <typename PointSource, typename PointTarget>
 31 | void FastVGICP<PointSource, PointTarget>::setResolution(double resolution) {
 32 |   voxel_resolution_ = resolution;
 33 | }
 34 | 
 35 | template <typename PointSource, typename PointTarget>
 36 | void FastVGICP<PointSource, PointTarget>::setNeighborSearchMethod(NeighborSearchMethod method) {
 37 |   search_method_ = method;
 38 | }
 39 | 
 40 | template <typename PointSource, typename PointTarget>
 41 | void FastVGICP<PointSource, PointTarget>::setVoxelAccumulationMode(VoxelAccumulationMode mode) {
 42 |   voxel_mode_ = mode;
 43 | }
 44 | 
 45 | template <typename PointSource, typename PointTarget>
 46 | void FastVGICP<PointSource, PointTarget>::swapSourceAndTarget() {
 47 |   input_.swap(target_);
 48 |   search_source_.swap(search_target_);
 49 |   source_covs_.swap(target_covs_);
 50 |   voxelmap_.reset();
 51 |   voxel_correspondences_.clear();
 52 |   voxel_mahalanobis_.clear();
 53 | }
 54 | 
 55 | template <typename PointSource, typename PointTarget>
 56 | void FastVGICP<PointSource, PointTarget>::setInputTarget(const PointCloudTargetConstPtr& cloud) {
 57 |   if (target_ == cloud) {
 58 |     return;
 59 |   }
 60 | 
 61 |   FastGICP<PointSource, PointTarget>::setInputTarget(cloud);
 62 |   voxelmap_.reset();
 63 | }
 64 | 
 65 | template <typename PointSource, typename PointTarget>
 66 | void FastVGICP<PointSource, PointTarget>::computeTransformation(PointCloudSource& output, const Matrix4& guess) {
 67 |   voxelmap_.reset();
 68 | 
 69 |   FastGICP<PointSource, PointTarget>::computeTransformation(output, guess);
 70 | }
 71 | 
 72 | template <typename PointSource, typename PointTarget>
 73 | void FastVGICP<PointSource, PointTarget>::update_correspondences(const Eigen::Isometry3d& trans) {
 74 |   voxel_correspondences_.clear();
 75 |   auto offsets = neighbor_offsets(search_method_);
 76 | 
 77 |   std::vector<std::vector<std::pair<int, GaussianVoxel::Ptr>>> corrs(num_threads_);
 78 |   for (auto& c : corrs) {
 79 |     c.reserve((input_->size() * offsets.size()) / num_threads_);
 80 |   }
 81 | 
 82 | #pragma omp parallel for num_threads(num_threads_) schedule(guided, 8)
 83 |   for (int i = 0; i < input_->size(); i++) {
 84 |     const Eigen::Vector4d mean_A = input_->at(i).getVector4fMap().template cast<double>();
 85 |     Eigen::Vector4d transed_mean_A = trans * mean_A;
 86 |     Eigen::Vector3i coord = voxelmap_->voxel_coord(transed_mean_A);
 87 | 
 88 |     for (const auto& offset : offsets) {
 89 |       auto voxel = voxelmap_->lookup_voxel(coord + offset);
 90 |       if (voxel != nullptr) {
 91 |         corrs[omp_get_thread_num()].push_back(std::make_pair(i, voxel));
 92 |       }
 93 |     }
 94 |   }
 95 | 
 96 |   voxel_correspondences_.reserve(input_->size() * offsets.size());
 97 |   for (const auto& c : corrs) {
 98 |     voxel_correspondences_.insert(voxel_correspondences_.end(), c.begin(), c.end());
 99 |   }
100 | 
101 |   // precompute combined covariances
102 |   voxel_mahalanobis_.resize(voxel_correspondences_.size());
103 | 
104 | #pragma omp parallel for num_threads(num_threads_) schedule(guided, 8)
105 |   for (int i = 0; i < voxel_correspondences_.size(); i++) {
106 |     const auto& corr = voxel_correspondences_[i];
107 |     const auto& cov_A = source_covs_[corr.first];
108 |     const auto& cov_B = corr.second->cov;
109 | 
110 |     Eigen::Matrix4d RCR = cov_B + trans.matrix() * cov_A * trans.matrix().transpose();
111 |     RCR(3, 3) = 1.0;
112 | 
113 |     voxel_mahalanobis_[i] = RCR.inverse();
114 |     voxel_mahalanobis_[i](3, 3) = 0.0;
115 |   }
116 | }
117 | 
118 | template <typename PointSource, typename PointTarget>
119 | double FastVGICP<PointSource, PointTarget>::linearize(const Eigen::Isometry3d& trans, Eigen::Matrix<double, 6, 6>* H, Eigen::Matrix<double, 6, 1>* b) {
120 |   if (voxelmap_ == nullptr) {
121 |     voxelmap_.reset(new GaussianVoxelMap<PointTarget>(voxel_resolution_, voxel_mode_));
122 |     voxelmap_->create_voxelmap(*target_, target_covs_);
123 |   }
124 | 
125 |   update_correspondences(trans);
126 | 
127 |   double sum_errors = 0.0;
128 |   std::vector<Eigen::Matrix<double, 6, 6>, Eigen::aligned_allocator<Eigen::Matrix<double, 6, 6>>> Hs(num_threads_);
129 |   std::vector<Eigen::Matrix<double, 6, 1>, Eigen::aligned_allocator<Eigen::Matrix<double, 6, 1>>> bs(num_threads_);
130 |   for (int i = 0; i < num_threads_; i++) {
131 |     Hs[i].setZero();
132 |     bs[i].setZero();
133 |   }
134 | 
135 | #pragma omp parallel for num_threads(num_threads_) reduction(+ : sum_errors) schedule(guided, 8)
136 |   for (int i = 0; i < voxel_correspondences_.size(); i++) {
137 |     const auto& corr = voxel_correspondences_[i];
138 |     auto target_voxel = corr.second;
139 | 
140 |     const Eigen::Vector4d mean_A = input_->at(corr.first).getVector4fMap().template cast<double>();
141 |     const auto& cov_A = source_covs_[corr.first];
142 | 
143 |     const Eigen::Vector4d mean_B = corr.second->mean;
144 |     const auto& cov_B = corr.second->cov;
145 | 
146 |     const Eigen::Vector4d transed_mean_A = trans * mean_A;
147 |     const Eigen::Vector4d error = mean_B - transed_mean_A;
148 | 
149 |     double w = std::sqrt(target_voxel->num_points);
150 |     sum_errors += w * error.transpose() * voxel_mahalanobis_[i] * error;
151 | 
152 |     if (H == nullptr || b == nullptr) {
153 |       continue;
154 |     }
155 | 
156 |     Eigen::Matrix<double, 4, 6> dtdx0 = Eigen::Matrix<double, 4, 6>::Zero();
157 |     dtdx0.block<3, 3>(0, 0) = skewd(transed_mean_A.head<3>());
158 |     dtdx0.block<3, 3>(0, 3) = -Eigen::Matrix3d::Identity();
159 | 
160 |     Eigen::Matrix<double, 4, 6> jlossexp = dtdx0;
161 | 
162 |     Eigen::Matrix<double, 6, 6> Hi = w * jlossexp.transpose() * voxel_mahalanobis_[i] * jlossexp;
163 |     Eigen::Matrix<double, 6, 1> bi = w * jlossexp.transpose() * voxel_mahalanobis_[i] * error;
164 | 
165 |     int thread_num = omp_get_thread_num();
166 |     Hs[thread_num] += Hi;
167 |     bs[thread_num] += bi;
168 |   }
169 | 
170 |   if (H && b) {
171 |     H->setZero();
172 |     b->setZero();
173 |     for (int i = 0; i < num_threads_; i++) {
174 |       (*H) += Hs[i];
175 |       (*b) += bs[i];
176 |     }
177 |   }
178 | 
179 |   return sum_errors;
180 | }
181 | 
182 | template <typename PointSource, typename PointTarget>
183 | double FastVGICP<PointSource, PointTarget>::compute_error(const Eigen::Isometry3d& trans) {
184 |   double sum_errors = 0.0;
185 | #pragma omp parallel for num_threads(num_threads_) reduction(+ : sum_errors)
186 |   for (int i = 0; i < voxel_correspondences_.size(); i++) {
187 |     const auto& corr = voxel_correspondences_[i];
188 |     auto target_voxel = corr.second;
189 | 
190 |     const Eigen::Vector4d mean_A = input_->at(corr.first).getVector4fMap().template cast<double>();
191 |     const auto& cov_A = source_covs_[corr.first];
192 | 
193 |     const Eigen::Vector4d mean_B = corr.second->mean;
194 |     const auto& cov_B = corr.second->cov;
195 | 
196 |     const Eigen::Vector4d transed_mean_A = trans * mean_A;
197 |     const Eigen::Vector4d error = mean_B - transed_mean_A;
198 | 
199 |     double w = std::sqrt(target_voxel->num_points);
200 |     sum_errors += w * error.transpose() * voxel_mahalanobis_[i] * error;
201 |   }
202 | 
203 |   return sum_errors;
204 | }
205 | 
206 | }  // namespace fast_gicp
207 | 
208 | #endif
209 | 


--------------------------------------------------------------------------------
/include/fast_gicp/gicp/impl/lsq_registration_impl.hpp:
--------------------------------------------------------------------------------
  1 | #include <fast_gicp/gicp/lsq_registration.hpp>
  2 | 
  3 | #include <boost/format.hpp>
  4 | #include <fast_gicp/so3/so3.hpp>
  5 | 
  6 | namespace fast_gicp {
  7 | 
  8 | template <typename PointTarget, typename PointSource>
  9 | LsqRegistration<PointTarget, PointSource>::LsqRegistration() {
 10 |   this->reg_name_ = "LsqRegistration";
 11 |   max_iterations_ = 64;
 12 |   rotation_epsilon_ = 2e-3;
 13 |   transformation_epsilon_ = 5e-4;
 14 | 
 15 |   lsq_optimizer_type_ = LSQ_OPTIMIZER_TYPE::LevenbergMarquardt;
 16 |   lm_debug_print_ = false;
 17 |   lm_max_iterations_ = 10;
 18 |   lm_init_lambda_factor_ = 1e-9;
 19 |   lm_lambda_ = -1.0;
 20 | 
 21 |   final_hessian_.setIdentity();
 22 | }
 23 | 
 24 | template <typename PointTarget, typename PointSource>
 25 | LsqRegistration<PointTarget, PointSource>::~LsqRegistration() {}
 26 | 
 27 | template <typename PointTarget, typename PointSource>
 28 | void LsqRegistration<PointTarget, PointSource>::setRotationEpsilon(double eps) {
 29 |   rotation_epsilon_ = eps;
 30 | }
 31 | 
 32 | template <typename PointTarget, typename PointSource>
 33 | void LsqRegistration<PointTarget, PointSource>::setInitialLambdaFactor(double init_lambda_factor) {
 34 |   lm_init_lambda_factor_ = init_lambda_factor;
 35 | }
 36 | 
 37 | template <typename PointTarget, typename PointSource>
 38 | void LsqRegistration<PointTarget, PointSource>::setDebugPrint(bool lm_debug_print) {
 39 |   lm_debug_print_ = lm_debug_print;
 40 | }
 41 | 
 42 | template <typename PointTarget, typename PointSource>
 43 | const Eigen::Matrix<double, 6, 6>& LsqRegistration<PointTarget, PointSource>::getFinalHessian() const {
 44 |   return final_hessian_;
 45 | }
 46 | 
 47 | template <typename PointTarget, typename PointSource>
 48 | double LsqRegistration<PointTarget, PointSource>::evaluateCost(const Eigen::Matrix4f& relative_pose, Eigen::Matrix<double, 6, 6>* H, Eigen::Matrix<double, 6, 1>* b) {
 49 |   return this->linearize(Eigen::Isometry3f(relative_pose).cast<double>(), H, b);
 50 | }
 51 | 
 52 | template <typename PointTarget, typename PointSource>
 53 | void LsqRegistration<PointTarget, PointSource>::computeTransformation(PointCloudSource& output, const Matrix4& guess) {
 54 |   Eigen::Isometry3d x0 = Eigen::Isometry3d(guess.template cast<double>());
 55 | 
 56 |   lm_lambda_ = -1.0;
 57 |   converged_ = false;
 58 | 
 59 |   if (lm_debug_print_) {
 60 |     std::cout << "********************************************" << std::endl;
 61 |     std::cout << "***************** optimize *****************" << std::endl;
 62 |     std::cout << "********************************************" << std::endl;
 63 |   }
 64 | 
 65 |   for (int i = 0; i < max_iterations_ && !converged_; i++) {
 66 |     nr_iterations_ = i;
 67 | 
 68 |     Eigen::Isometry3d delta;
 69 |     if (!step_optimize(x0, delta)) {
 70 |       std::cerr << "lm not converged!!" << std::endl;
 71 |       break;
 72 |     }
 73 | 
 74 |     converged_ = is_converged(delta);
 75 |   }
 76 | 
 77 |   final_transformation_ = x0.cast<float>().matrix();
 78 |   pcl::transformPointCloud(*input_, output, final_transformation_);
 79 | }
 80 | 
 81 | template <typename PointTarget, typename PointSource>
 82 | bool LsqRegistration<PointTarget, PointSource>::is_converged(const Eigen::Isometry3d& delta) const {
 83 |   double accum = 0.0;
 84 |   Eigen::Matrix3d R = delta.linear() - Eigen::Matrix3d::Identity();
 85 |   Eigen::Vector3d t = delta.translation();
 86 | 
 87 |   Eigen::Matrix3d r_delta = 1.0 / rotation_epsilon_ * R.array().abs();
 88 |   Eigen::Vector3d t_delta = 1.0 / transformation_epsilon_ * t.array().abs();
 89 | 
 90 |   return std::max(r_delta.maxCoeff(), t_delta.maxCoeff()) < 1;
 91 | }
 92 | 
 93 | template <typename PointTarget, typename PointSource>
 94 | bool LsqRegistration<PointTarget, PointSource>::step_optimize(Eigen::Isometry3d& x0, Eigen::Isometry3d& delta) {
 95 |   switch (lsq_optimizer_type_) {
 96 |     case LSQ_OPTIMIZER_TYPE::LevenbergMarquardt:
 97 |       return step_lm(x0, delta);
 98 |     case LSQ_OPTIMIZER_TYPE::GaussNewton:
 99 |       return step_gn(x0, delta);
100 |   }
101 | 
102 |   return step_lm(x0, delta);
103 | }
104 | 
105 | template <typename PointTarget, typename PointSource>
106 | bool LsqRegistration<PointTarget, PointSource>::step_gn(Eigen::Isometry3d& x0, Eigen::Isometry3d& delta) {
107 |   Eigen::Matrix<double, 6, 6> H;
108 |   Eigen::Matrix<double, 6, 1> b;
109 |   double y0 = linearize(x0, &H, &b);
110 | 
111 |   Eigen::LDLT<Eigen::Matrix<double, 6, 6>> solver(H);
112 |   Eigen::Matrix<double, 6, 1> d = solver.solve(-b);
113 | 
114 |   delta = se3_exp(d);
115 | 
116 |   x0 = delta * x0;
117 |   final_hessian_ = H;
118 | 
119 |   return true;
120 | }
121 | 
122 | template <typename PointTarget, typename PointSource>
123 | bool LsqRegistration<PointTarget, PointSource>::step_lm(Eigen::Isometry3d& x0, Eigen::Isometry3d& delta) {
124 |   Eigen::Matrix<double, 6, 6> H;
125 |   Eigen::Matrix<double, 6, 1> b;
126 |   double y0 = linearize(x0, &H, &b);
127 | 
128 |   if (lm_lambda_ < 0.0) {
129 |     lm_lambda_ = lm_init_lambda_factor_ * H.diagonal().array().abs().maxCoeff();
130 |   }
131 | 
132 |   double nu = 2.0;
133 |   for (int i = 0; i < lm_max_iterations_; i++) {
134 |     Eigen::LDLT<Eigen::Matrix<double, 6, 6>> solver(H + lm_lambda_ * Eigen::Matrix<double, 6, 6>::Identity());
135 |     Eigen::Matrix<double, 6, 1> d = solver.solve(-b);
136 | 
137 |     delta = se3_exp(d);
138 | 
139 |     Eigen::Isometry3d xi = delta * x0;
140 |     double yi = compute_error(xi);
141 |     double rho = (y0 - yi) / (d.dot(lm_lambda_ * d - b));
142 | 
143 |     if (lm_debug_print_) {
144 |       if (i == 0) {
145 |         std::cout << boost::format("--- LM optimization ---\n%5s %15s %15s %15s %15s %15s %5s\n") % "i" % "y0" % "yi" % "rho" % "lambda" % "|delta|" % "dec";
146 |       }
147 |       char dec = rho > 0.0 ? 'x' : ' ';
148 |       std::cout << boost::format("%5d %15g %15g %15g %15g %15g %5c") % i % y0 % yi % rho % lm_lambda_ % d.norm() % dec << std::endl;
149 |     }
150 | 
151 |     if (rho < 0) {
152 |       if (is_converged(delta)) {
153 |         return true;
154 |       }
155 | 
156 |       lm_lambda_ = nu * lm_lambda_;
157 |       nu = 2 * nu;
158 |       continue;
159 |     }
160 | 
161 |     x0 = xi;
162 |     lm_lambda_ = lm_lambda_ * std::max(1.0 / 3.0, 1 - std::pow(2 * rho - 1, 3));
163 |     final_hessian_ = H;
164 |     return true;
165 |   }
166 | 
167 |   return false;
168 | }
169 | 
170 | }  // namespace fast_gicp


--------------------------------------------------------------------------------
/include/fast_gicp/gicp/lsq_registration.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_LSQ_REGISTRATION_HPP
 2 | #define FAST_GICP_LSQ_REGISTRATION_HPP
 3 | 
 4 | #include <Eigen/Core>
 5 | #include <Eigen/Geometry>
 6 | 
 7 | #include <pcl/point_types.h>
 8 | #include <pcl/point_cloud.h>
 9 | #include <pcl/registration/registration.h>
10 | 
11 | namespace fast_gicp {
12 | 
13 | enum class LSQ_OPTIMIZER_TYPE { GaussNewton, LevenbergMarquardt };
14 | 
15 | template<typename PointSource, typename PointTarget>
16 | class LsqRegistration : public pcl::Registration<PointSource, PointTarget, float> {
17 | public:
18 |   using Scalar = float;
19 |   using Matrix4 = typename pcl::Registration<PointSource, PointTarget, Scalar>::Matrix4;
20 | 
21 |   using PointCloudSource = typename pcl::Registration<PointSource, PointTarget, Scalar>::PointCloudSource;
22 |   using PointCloudSourcePtr = typename PointCloudSource::Ptr;
23 |   using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr;
24 | 
25 |   using PointCloudTarget = typename pcl::Registration<PointSource, PointTarget, Scalar>::PointCloudTarget;
26 |   using PointCloudTargetPtr = typename PointCloudTarget::Ptr;
27 |   using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr;
28 | 
29 | #if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0)
30 |   using Ptr = pcl::shared_ptr<LsqRegistration<PointSource, PointTarget>>;
31 |   using ConstPtr = pcl::shared_ptr<const LsqRegistration<PointSource, PointTarget>>;
32 | #else
33 |   using Ptr = boost::shared_ptr<LsqRegistration<PointSource, PointTarget>>;
34 |   using ConstPtr = boost::shared_ptr<const LsqRegistration<PointSource, PointTarget>>;
35 | #endif
36 | 
37 | protected:
38 |   using pcl::Registration<PointSource, PointTarget, Scalar>::input_;
39 |   using pcl::Registration<PointSource, PointTarget, Scalar>::nr_iterations_;
40 |   using pcl::Registration<PointSource, PointTarget, Scalar>::max_iterations_;
41 |   using pcl::Registration<PointSource, PointTarget, Scalar>::final_transformation_;
42 |   using pcl::Registration<PointSource, PointTarget, Scalar>::transformation_epsilon_;
43 |   using pcl::Registration<PointSource, PointTarget, Scalar>::converged_;
44 | 
45 | public:
46 |   EIGEN_MAKE_ALIGNED_OPERATOR_NEW
47 | 
48 |   LsqRegistration();
49 |   virtual ~LsqRegistration();
50 | 
51 |   void setRotationEpsilon(double eps);
52 |   void setInitialLambdaFactor(double init_lambda_factor);
53 |   void setDebugPrint(bool lm_debug_print);
54 | 
55 |   const Eigen::Matrix<double, 6, 6>& getFinalHessian() const;
56 | 
57 |   double evaluateCost(const Eigen::Matrix4f& relative_pose, Eigen::Matrix<double, 6, 6>* H = nullptr, Eigen::Matrix<double, 6, 1>* b = nullptr);
58 | 
59 |   virtual void swapSourceAndTarget() {}
60 |   virtual void clearSource() {}
61 |   virtual void clearTarget() {}
62 | 
63 | protected:
64 |   virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override;
65 | 
66 |   bool is_converged(const Eigen::Isometry3d& delta) const;
67 | 
68 |   virtual double linearize(const Eigen::Isometry3d& trans, Eigen::Matrix<double, 6, 6>* H = nullptr, Eigen::Matrix<double, 6, 1>* b = nullptr) = 0;
69 |   virtual double compute_error(const Eigen::Isometry3d& trans) = 0;
70 | 
71 |   bool step_optimize(Eigen::Isometry3d& x0, Eigen::Isometry3d& delta);
72 |   bool step_gn(Eigen::Isometry3d& x0, Eigen::Isometry3d& delta);
73 |   bool step_lm(Eigen::Isometry3d& x0, Eigen::Isometry3d& delta);
74 | 
75 | protected:
76 |   double rotation_epsilon_;
77 | 
78 |   LSQ_OPTIMIZER_TYPE lsq_optimizer_type_;
79 |   int lm_max_iterations_;
80 |   double lm_init_lambda_factor_;
81 |   double lm_lambda_;
82 |   bool lm_debug_print_;
83 | 
84 |   Eigen::Matrix<double, 6, 6> final_hessian_;
85 | };
86 | }  // namespace fast_gicp
87 | 
88 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/ndt/impl/ndt_cuda_impl.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_NDT_CUDA_IMPL_HPP
 2 | #define FAST_GICP_NDT_CUDA_IMPL_HPP
 3 | 
 4 | #include <fast_gicp/ndt/ndt_cuda.hpp>
 5 | 
 6 | #include <fast_gicp/cuda/ndt_cuda.cuh>
 7 | 
 8 | namespace fast_gicp {
 9 | 
10 | template <typename PointSource, typename PointTarget>
11 | NDTCuda<PointSource, PointTarget>::NDTCuda() : LsqRegistration<PointSource, PointTarget>() {
12 |   this->reg_name_ = "NDTCuda";
13 |   ndt_cuda_.reset(new cuda::NDTCudaCore());
14 | }
15 | 
16 | template <typename PointSource, typename PointTarget>
17 | NDTCuda<PointSource, PointTarget>::~NDTCuda() {}
18 | 
19 | template <typename PointSource, typename PointTarget>
20 | void NDTCuda<PointSource, PointTarget>::setDistanceMode(NDTDistanceMode mode) {
21 |   ndt_cuda_->set_distance_mode(mode);
22 | }
23 | 
24 | template <typename PointSource, typename PointTarget>
25 | void NDTCuda<PointSource, PointTarget>::setResolution(double resolution) {
26 |   ndt_cuda_->set_resolution(resolution);
27 | }
28 | 
29 | template <typename PointSource, typename PointTarget>
30 | void NDTCuda<PointSource, PointTarget>::setNeighborSearchMethod(NeighborSearchMethod method, double radius) {
31 |   ndt_cuda_->set_neighbor_search_method(method, radius);
32 | }
33 | 
34 | template <typename PointSource, typename PointTarget>
35 | void NDTCuda<PointSource, PointTarget>::swapSourceAndTarget() {
36 |   ndt_cuda_->swap_source_and_target();
37 |   input_.swap(target_);
38 | }
39 | 
40 | template <typename PointSource, typename PointTarget>
41 | void NDTCuda<PointSource, PointTarget>::clearSource() {
42 |   input_.reset();
43 | }
44 | 
45 | template <typename PointSource, typename PointTarget>
46 | void NDTCuda<PointSource, PointTarget>::clearTarget() {
47 |   target_.reset();
48 | }
49 | 
50 | template <typename PointSource, typename PointTarget>
51 | void NDTCuda<PointSource, PointTarget>::setInputSource(const PointCloudSourceConstPtr& cloud) {
52 |   if (cloud == input_) {
53 |     return;
54 |   }
55 |   pcl::Registration<PointSource, PointTarget, Scalar>::setInputSource(cloud);
56 | 
57 |   std::vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>> points(cloud->size());
58 |   std::transform(cloud->begin(), cloud->end(), points.begin(), [=](const PointSource& pt) { return pt.getVector3fMap(); });
59 |   ndt_cuda_->set_source_cloud(points);
60 | }
61 | 
62 | template <typename PointSource, typename PointTarget>
63 | void NDTCuda<PointSource, PointTarget>::setInputTarget(const PointCloudTargetConstPtr& cloud) {
64 |   if (cloud == target_) {
65 |     return;
66 |   }
67 | 
68 |   pcl::Registration<PointSource, PointTarget, Scalar>::setInputTarget(cloud);
69 | 
70 |   std::vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>> points(cloud->size());
71 |   std::transform(cloud->begin(), cloud->end(), points.begin(), [=](const PointTarget& pt) { return pt.getVector3fMap(); });
72 |   ndt_cuda_->set_target_cloud(points);
73 | }
74 | 
75 | template <typename PointSource, typename PointTarget>
76 | void NDTCuda<PointSource, PointTarget>::computeTransformation(PointCloudSource& output, const Matrix4& guess) {
77 |   ndt_cuda_->create_voxelmaps();
78 |   LsqRegistration<PointSource, PointTarget>::computeTransformation(output, guess);
79 | }
80 | 
81 | template <typename PointSource, typename PointTarget>
82 | double NDTCuda<PointSource, PointTarget>::linearize(const Eigen::Isometry3d& trans, Eigen::Matrix<double, 6, 6>* H, Eigen::Matrix<double, 6, 1>* b) {
83 |   ndt_cuda_->update_correspondences(trans);
84 |   return ndt_cuda_->compute_error(trans, H, b);
85 | }
86 | 
87 | template <typename PointSource, typename PointTarget>
88 | double NDTCuda<PointSource, PointTarget>::compute_error(const Eigen::Isometry3d& trans) {
89 |   return ndt_cuda_->compute_error(trans, nullptr, nullptr);
90 | }
91 | 
92 | }  // namespace fast_gicp
93 | 
94 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/ndt/ndt_cuda.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_NDT_CUDA_HPP
 2 | #define FAST_GICP_NDT_CUDA_HPP
 3 | 
 4 | #include <Eigen/Core>
 5 | #include <Eigen/Geometry>
 6 | 
 7 | #include <pcl/point_types.h>
 8 | #include <pcl/point_cloud.h>
 9 | #include <pcl/search/kdtree.h>
10 | #include <pcl/registration/registration.h>
11 | #include <fast_gicp/gicp/lsq_registration.hpp>
12 | #include <fast_gicp/gicp/gicp_settings.hpp>
13 | #include <fast_gicp/ndt/ndt_settings.hpp>
14 | 
15 | namespace fast_gicp {
16 | 
17 | namespace cuda {
18 | class NDTCudaCore;
19 | }
20 | 
21 | template <typename PointSource, typename PointTarget>
22 | class NDTCuda : public LsqRegistration<PointSource, PointTarget> {
23 | public:
24 |   using Scalar = float;
25 |   using Matrix4 = typename pcl::Registration<PointSource, PointTarget, Scalar>::Matrix4;
26 | 
27 |   using PointCloudSource = typename pcl::Registration<PointSource, PointTarget, Scalar>::PointCloudSource;
28 |   using PointCloudSourcePtr = typename PointCloudSource::Ptr;
29 |   using PointCloudSourceConstPtr = typename PointCloudSource::ConstPtr;
30 | 
31 |   using PointCloudTarget = typename pcl::Registration<PointSource, PointTarget, Scalar>::PointCloudTarget;
32 |   using PointCloudTargetPtr = typename PointCloudTarget::Ptr;
33 |   using PointCloudTargetConstPtr = typename PointCloudTarget::ConstPtr;
34 | 
35 | #if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0)
36 |   using Ptr = pcl::shared_ptr<NDTCuda<PointSource, PointTarget>>;
37 |   using ConstPtr = pcl::shared_ptr<const NDTCuda<PointSource, PointTarget>>;
38 | #else
39 |   using Ptr = boost::shared_ptr<NDTCuda<PointSource, PointTarget>>;
40 |   using ConstPtr = boost::shared_ptr<const NDTCuda<PointSource, PointTarget>>;
41 | #endif
42 | 
43 | protected:
44 |   using pcl::Registration<PointSource, PointTarget, Scalar>::reg_name_;
45 |   using pcl::Registration<PointSource, PointTarget, Scalar>::input_;
46 |   using pcl::Registration<PointSource, PointTarget, Scalar>::target_;
47 |   using pcl::Registration<PointSource, PointTarget, Scalar>::corr_dist_threshold_;
48 | 
49 | public:
50 |   NDTCuda();
51 |   virtual ~NDTCuda() override;
52 | 
53 |   void setDistanceMode(NDTDistanceMode mode);
54 |   void setResolution(double resolution);
55 |   void setNeighborSearchMethod(NeighborSearchMethod method, double radius = -1.0);
56 | 
57 |   virtual void swapSourceAndTarget() override;
58 |   virtual void clearSource() override;
59 |   virtual void clearTarget() override;
60 | 
61 |   virtual void setInputSource(const PointCloudSourceConstPtr& cloud) override;
62 |   virtual void setInputTarget(const PointCloudTargetConstPtr& cloud) override;
63 | 
64 | protected:
65 |   virtual void computeTransformation(PointCloudSource& output, const Matrix4& guess) override;
66 |   virtual double linearize(const Eigen::Isometry3d& trans, Eigen::Matrix<double, 6, 6>* H, Eigen::Matrix<double, 6, 1>* b) override;
67 |   virtual double compute_error(const Eigen::Isometry3d& trans) override;
68 | 
69 | protected:
70 |   std::unique_ptr<cuda::NDTCudaCore> ndt_cuda_;
71 | };
72 | }  // namespace fast_gicp
73 | 
74 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/ndt/ndt_settings.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FAST_GICP_NDT_SETTINGS_HPP
 2 | #define FAST_GICP_NDT_SETTINGS_HPP
 3 | 
 4 | namespace fast_gicp {
 5 | 
 6 | enum class NDTDistanceMode { P2D, D2D };
 7 | 
 8 | }  // namespace fast_gicp
 9 | 
10 | #endif


--------------------------------------------------------------------------------
/include/fast_gicp/so3/so3.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef FAST_GICP_SO3_HPP
  2 | #define FAST_GICP_SO3_HPP
  3 | 
  4 | #include <Eigen/Core>
  5 | #include <Eigen/Geometry>
  6 | 
  7 | namespace fast_gicp {
  8 | 
  9 | inline Eigen::Matrix3f skew(const Eigen::Vector3f& x) {
 10 |   Eigen::Matrix3f skew = Eigen::Matrix3f::Zero();
 11 |   skew(0, 1) = -x[2];
 12 |   skew(0, 2) = x[1];
 13 |   skew(1, 0) = x[2];
 14 |   skew(1, 2) = -x[0];
 15 |   skew(2, 0) = -x[1];
 16 |   skew(2, 1) = x[0];
 17 | 
 18 |   return skew;
 19 | }
 20 | 
 21 | inline Eigen::Matrix3d skewd(const Eigen::Vector3d& x) {
 22 |   Eigen::Matrix3d skew = Eigen::Matrix3d::Zero();
 23 |   skew(0, 1) = -x[2];
 24 |   skew(0, 2) = x[1];
 25 |   skew(1, 0) = x[2];
 26 |   skew(1, 2) = -x[0];
 27 |   skew(2, 0) = -x[1];
 28 |   skew(2, 1) = x[0];
 29 | 
 30 |   return skew;
 31 | }
 32 | 
 33 | /*
 34 |  * SO3 expmap code taken from Sophus
 35 |  * https://github.com/strasdat/Sophus/blob/593db47500ea1a2de5f0e6579c86147991509c59/sophus/so3.hpp#L585
 36 |  *
 37 |  * Copyright 2011-2017 Hauke Strasdat
 38 |  *           2012-2017 Steven Lovegrove
 39 |  *
 40 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 41 |  * of this software and associated documentation files (the "Software"), to
 42 |  * deal in the Software without restriction, including without limitation the
 43 |  * rights  to use, copy, modify, merge, publish, distribute, sublicense, and/or
 44 |  * sell copies of the Software, and to permit persons to whom the Software is
 45 |  * furnished to do so, subject to the following conditions:
 46 |  *
 47 |  * The above copyright notice and this permission notice shall be included in
 48 |  * all copies or substantial portions of the Software.
 49 |  *
 50 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 51 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 52 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 53 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 54 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 55 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 56 |  * IN THE SOFTWARE.
 57 |  */
 58 | inline Eigen::Quaterniond so3_exp(const Eigen::Vector3d& omega) {
 59 |   double theta_sq = omega.dot(omega);
 60 | 
 61 |   double theta;
 62 |   double imag_factor;
 63 |   double real_factor;
 64 |   if (theta_sq < 1e-10) {
 65 |     theta = 0;
 66 |     double theta_quad = theta_sq * theta_sq;
 67 |     imag_factor = 0.5 - 1.0 / 48.0 * theta_sq + 1.0 / 3840.0 * theta_quad;
 68 |     real_factor = 1.0 - 1.0 / 8.0 * theta_sq + 1.0 / 384.0 * theta_quad;
 69 |   } else {
 70 |     theta = std::sqrt(theta_sq);
 71 |     double half_theta = 0.5 * theta;
 72 |     imag_factor = std::sin(half_theta) / theta;
 73 |     real_factor = std::cos(half_theta);
 74 |   }
 75 | 
 76 |   return Eigen::Quaterniond(real_factor, imag_factor * omega.x(), imag_factor * omega.y(), imag_factor * omega.z());
 77 | }
 78 | 
 79 | // Rotation-first
 80 | inline Eigen::Isometry3d se3_exp(const Eigen::Matrix<double, 6, 1>& a) {
 81 |   using std::cos;
 82 |   using std::sin;
 83 |   const Eigen::Vector3d omega = a.head<3>();
 84 | 
 85 |   double theta = std::sqrt(omega.dot(omega));
 86 |   const Eigen::Quaterniond so3 = so3_exp(omega);
 87 |   const Eigen::Matrix3d Omega = skewd(omega);
 88 |   const Eigen::Matrix3d Omega_sq = Omega * Omega;
 89 |   Eigen::Matrix3d V;
 90 | 
 91 |   if (theta < 1e-10) {
 92 |     V = so3.matrix();
 93 |     /// Note: That is an accurate expansion!
 94 |   } else {
 95 |     const double theta_sq = theta * theta;
 96 |     V = (Eigen::Matrix3d::Identity() + (1.0 - cos(theta)) / (theta_sq)*Omega + (theta - sin(theta)) / (theta_sq * theta) * Omega_sq);
 97 |   }
 98 | 
 99 |   Eigen::Isometry3d se3 = Eigen::Isometry3d::Identity();
100 |   se3.linear() = so3.toRotationMatrix();
101 |   se3.translation() = V * a.tail<3>();
102 | 
103 |   return se3;
104 | }
105 | 
106 | }  // namespace fast_gicp
107 | 
108 | #endif


--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 3 | <package format="3">
 4 |   <name>fast_gicp</name>
 5 |   <version>0.0.0</version>
 6 |   <description>A collection of fast point cloud registration implementations</description>
 7 | 
 8 |   <maintainer email="k.koide@aist.go.jp">k.koide</maintainer>
 9 | 
10 |   <license>BSD</license>
11 |   <url type="website">https://github.com/SMRT-AIST/fast_gicp</url>
12 |   <url type="repository">https://github.com/SMRT-AIST/fast_gicp</url>
13 |   <url type="bugtracker">https://github.com/SMRT-AIST/fast_gicp/issues</url>
14 | 
15 |   <buildtool_depend condition="$ROS_VERSION == 1">catkin</buildtool_depend>
16 |   <buildtool_depend condition="$ROS_VERSION == 2">ament_cmake</buildtool_depend>
17 | 
18 |   <build_depend>ros_environment</build_depend>
19 | 
20 |   <depend>libpcl-all-dev</depend>
21 |   <depend>eigen</depend>
22 | 
23 |   <export>
24 |     <build_type condition="$ROS_VERSION == 1">catkin</build_type>
25 |     <build_type condition="$ROS_VERSION == 2">ament_cmake</build_type>
26 |   </export>
27 | </package>
28 | 


--------------------------------------------------------------------------------
/scripts/runtest_cuda.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | docker build --tag focal_cuda -f docker/focal_cuda/Dockerfile .
3 | docker run --gpus all -it --rm focal_cuda /root/fast_gicp/build/gicp_test /root/fast_gicp/data
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import sys
  4 | import glob
  5 | import subprocess
  6 | 
  7 | from setuptools import setup, Extension
  8 | from setuptools.command.build_ext import build_ext
  9 | 
 10 | # Convert distutils Windows platform specifiers to CMake -A arguments
 11 | PLAT_TO_CMAKE = {
 12 |     "win32": "Win32",
 13 |     "win-amd64": "x64",
 14 |     "win-arm32": "ARM",
 15 |     "win-arm64": "ARM64",
 16 | }
 17 | 
 18 | 
 19 | # A CMakeExtension needs a sourcedir instead of a file list.
 20 | # The name must be the _single_ output extension from the CMake build.
 21 | # If you need multiple extensions, see scikit-build.
 22 | class CMakeExtension(Extension):
 23 |     def __init__(self, name, sourcedir=""):
 24 |         Extension.__init__(self, name, sources=[])
 25 |         self.sourcedir = os.path.abspath(sourcedir)
 26 | 
 27 | 
 28 | class CMakeBuild(build_ext):
 29 |     def build_extension(self, ext):
 30 |         extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
 31 | 
 32 |         # required for auto-detection of auxiliary "native" libs
 33 |         if not extdir.endswith(os.path.sep):
 34 |             extdir += os.path.sep
 35 | 
 36 |         # cfg = "Debug" if self.debug else "Release"
 37 |         cfg = "Release"
 38 | 
 39 |         # CMake lets you override the generator - we need to check this.
 40 |         # Can be set with Conda-Build, for example.
 41 |         cmake_generator = os.environ.get("CMAKE_GENERATOR", "")
 42 | 
 43 |         # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON
 44 |         # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code
 45 |         # from Python.
 46 |         cmake_args = [
 47 |             "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}".format(extdir),
 48 |             "-DPYTHON_EXECUTABLE={}".format(sys.executable),
 49 |             "-DEXAMPLE_VERSION_INFO={}".format(self.distribution.get_version()),
 50 |             "-DCMAKE_BUILD_TYPE={}".format(cfg),  # not used on MSVC, but no harm,
 51 |             # "-DBUILD_VGICP_CUDA=ON",
 52 |             "-DBUILD_PYTHON_BINDINGS=ON",
 53 |             "-DBUILD_apps=OFF",
 54 |         ]
 55 |         build_args = []
 56 | 
 57 |         if self.compiler.compiler_type != "msvc":
 58 |             # Using Ninja-build since it a) is available as a wheel and b)
 59 |             # multithreads automatically. MSVC would require all variables be
 60 |             # exported for Ninja to pick it up, which is a little tricky to do.
 61 |             # Users can override the generator with CMAKE_GENERATOR in CMake
 62 |             # 3.15+.
 63 |             if not cmake_generator:
 64 |                 pass
 65 |                 # cmake_args += ["-GNinja"]
 66 | 
 67 |         else:
 68 |             # Single config generators are handled "normally"
 69 |             single_config = any(x in cmake_generator for x in {"NMake", "Ninja"})
 70 | 
 71 |             # CMake allows an arch-in-generator style for backward compatibility
 72 |             contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"})
 73 | 
 74 |             # Specify the arch if using MSVC generator, but only if it doesn't
 75 |             # contain a backward-compatibility arch spec already in the
 76 |             # generator name.
 77 |             if not single_config and not contains_arch:
 78 |                 cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]]
 79 | 
 80 |             # Multi-config generators have a different way to specify configs
 81 |             if not single_config:
 82 |                 cmake_args += [
 83 |                     "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir)
 84 |                 ]
 85 |                 build_args += ["--config", cfg]
 86 | 
 87 |         # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
 88 |         # across all generators.
 89 |         if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
 90 |             # self.parallel is a Python 3 only way to set parallel jobs by hand
 91 |             # using -j in the build_ext call, not supported by pip or PyPA-build.
 92 |             if hasattr(self, "parallel") and self.parallel:
 93 |                 # CMake 3.12+ only.
 94 |                 build_args += ["-j{}".format(self.parallel)]
 95 | 
 96 |         if not os.path.exists(self.build_temp):
 97 |             os.makedirs(self.build_temp)
 98 | 
 99 |         subprocess.check_call(
100 |             ["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp
101 |         )
102 |         subprocess.check_call(
103 |             ["cmake", "--build", "."] + build_args, cwd=self.build_temp
104 |         )
105 | 
106 | # The information here can also be placed in setup.cfg - better separation of
107 | # logic and declaration, and simpler if you include description/version in a file.
108 | setup(
109 |     name="pygicp",
110 |     version="0.0.1",
111 |     author="k.koide",
112 |     author_email="k.koide@aist.go.jp",
113 |     description="A collection of GICP-based point cloud registration algorithms",
114 |     long_description="",
115 |     ext_modules=[CMakeExtension("pygicp")],
116 |     cmdclass={"build_ext": CMakeBuild},
117 |     zip_safe=False,
118 | )
119 | 


--------------------------------------------------------------------------------
/src/align.cpp:
--------------------------------------------------------------------------------
  1 | #include <chrono>
  2 | #include <iostream>
  3 | 
  4 | #include <pcl/io/pcd_io.h>
  5 | #include <pcl/point_types.h>
  6 | #include <pcl/point_cloud.h>
  7 | #include <pcl/common/transforms.h>
  8 | #include <pcl/filters/approximate_voxel_grid.h>
  9 | 
 10 | #include <pcl/registration/ndt.h>
 11 | #include <pcl/registration/gicp.h>
 12 | #include <fast_gicp/gicp/fast_gicp.hpp>
 13 | #include <fast_gicp/gicp/fast_gicp_st.hpp>
 14 | #include <fast_gicp/gicp/fast_vgicp.hpp>
 15 | 
 16 | #ifdef USE_VGICP_CUDA
 17 | #include <fast_gicp/ndt/ndt_cuda.hpp>
 18 | #include <fast_gicp/gicp/fast_vgicp_cuda.hpp>
 19 | #endif
 20 | 
 21 | // benchmark for PCL's registration methods
 22 | template <typename Registration>
 23 | void test_pcl(Registration& reg, const pcl::PointCloud<pcl::PointXYZ>::ConstPtr& target, const pcl::PointCloud<pcl::PointXYZ>::ConstPtr& source) {
 24 |   pcl::PointCloud<pcl::PointXYZ>::Ptr aligned(new pcl::PointCloud<pcl::PointXYZ>);
 25 | 
 26 |   double fitness_score = 0.0;
 27 | 
 28 |   // single run
 29 |   auto t1 = std::chrono::high_resolution_clock::now();
 30 |   reg.setInputTarget(target);
 31 |   reg.setInputSource(source);
 32 |   reg.align(*aligned);
 33 |   auto t2 = std::chrono::high_resolution_clock::now();
 34 |   fitness_score = reg.getFitnessScore();
 35 |   double single = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count() / 1e6;
 36 |   std::cout << "single:" << single << "[msec] " << std::flush;
 37 | 
 38 |   // 100 times
 39 |   t1 = std::chrono::high_resolution_clock::now();
 40 |   for (int i = 0; i < 100; i++) {
 41 |     reg.setInputTarget(target);
 42 |     reg.setInputSource(source);
 43 |     reg.align(*aligned);
 44 |   }
 45 |   t2 = std::chrono::high_resolution_clock::now();
 46 |   double multi = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count() / 1e6;
 47 |   std::cout << "100times:" << multi << "[msec] fitness_score:" << fitness_score << std::endl;
 48 | }
 49 | 
 50 | // benchmark for fast_gicp registration methods
 51 | template <typename Registration>
 52 | void test(Registration& reg, const pcl::PointCloud<pcl::PointXYZ>::ConstPtr& target, const pcl::PointCloud<pcl::PointXYZ>::ConstPtr& source) {
 53 |   pcl::PointCloud<pcl::PointXYZ>::Ptr aligned(new pcl::PointCloud<pcl::PointXYZ>);
 54 | 
 55 |   double fitness_score = 0.0;
 56 | 
 57 |   // single run
 58 |   auto t1 = std::chrono::high_resolution_clock::now();
 59 |   // fast_gicp reuses calculated covariances if an input cloud is the same as the previous one
 60 |   // to prevent this for benchmarking, force clear source and target clouds
 61 |   reg.clearTarget();
 62 |   reg.clearSource();
 63 |   reg.setInputTarget(target);
 64 |   reg.setInputSource(source);
 65 |   reg.align(*aligned);
 66 |   auto t2 = std::chrono::high_resolution_clock::now();
 67 |   fitness_score = reg.getFitnessScore();
 68 |   double single = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count() / 1e6;
 69 | 
 70 |   std::cout << "single:" << single << "[msec] " << std::flush;
 71 | 
 72 |   // 100 times
 73 |   t1 = std::chrono::high_resolution_clock::now();
 74 |   for (int i = 0; i < 100; i++) {
 75 |     reg.clearTarget();
 76 |     reg.clearSource();
 77 |     reg.setInputTarget(target);
 78 |     reg.setInputSource(source);
 79 |     reg.align(*aligned);
 80 |   }
 81 |   t2 = std::chrono::high_resolution_clock::now();
 82 |   double multi = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count() / 1e6;
 83 |   std::cout << "100times:" << multi << "[msec] " << std::flush;
 84 | 
 85 |   // for some tasks like odometry calculation,
 86 |   // you can reuse the covariances of a source point cloud in the next registration
 87 |   t1 = std::chrono::high_resolution_clock::now();
 88 |   pcl::PointCloud<pcl::PointXYZ>::ConstPtr target_ = target;
 89 |   pcl::PointCloud<pcl::PointXYZ>::ConstPtr source_ = source;
 90 |   for (int i = 0; i < 100; i++) {
 91 |     reg.swapSourceAndTarget();
 92 |     reg.clearSource();
 93 | 
 94 |     reg.setInputTarget(target_);
 95 |     reg.setInputSource(source_);
 96 |     reg.align(*aligned);
 97 | 
 98 |     target_.swap(source_);
 99 |   }
100 |   t2 = std::chrono::high_resolution_clock::now();
101 |   double reuse = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count() / 1e6;
102 | 
103 |   std::cout << "100times_reuse:" << reuse << "[msec] fitness_score:" << fitness_score << std::endl;
104 | }
105 | 
106 | /**
107 |  * @brief main
108 |  */
109 | int main(int argc, char** argv) {
110 |   if (argc < 3) {
111 |     std::cout << "usage: gicp_align target_pcd source_pcd" << std::endl;
112 |     return 0;
113 |   }
114 | 
115 |   pcl::PointCloud<pcl::PointXYZ>::Ptr target_cloud(new pcl::PointCloud<pcl::PointXYZ>());
116 |   pcl::PointCloud<pcl::PointXYZ>::Ptr source_cloud(new pcl::PointCloud<pcl::PointXYZ>());
117 | 
118 |   if (pcl::io::loadPCDFile(argv[1], *target_cloud)) {
119 |     std::cerr << "failed to open " << argv[1] << std::endl;
120 |     return 1;
121 |   }
122 |   if (pcl::io::loadPCDFile(argv[2], *source_cloud)) {
123 |     std::cerr << "failed to open " << argv[2] << std::endl;
124 |     return 1;
125 |   }
126 | 
127 |   // remove invalid points around origin
128 |   source_cloud->erase(
129 |     std::remove_if(source_cloud->begin(), source_cloud->end(), [=](const pcl::PointXYZ& pt) { return pt.getVector3fMap().squaredNorm() < 1e-3; }),
130 |     source_cloud->end());
131 |   target_cloud->erase(
132 |     std::remove_if(target_cloud->begin(), target_cloud->end(), [=](const pcl::PointXYZ& pt) { return pt.getVector3fMap().squaredNorm() < 1e-3; }),
133 |     target_cloud->end());
134 | 
135 |   // downsampling
136 |   pcl::ApproximateVoxelGrid<pcl::PointXYZ> voxelgrid;
137 |   voxelgrid.setLeafSize(0.1f, 0.1f, 0.1f);
138 | 
139 |   pcl::PointCloud<pcl::PointXYZ>::Ptr filtered(new pcl::PointCloud<pcl::PointXYZ>());
140 |   voxelgrid.setInputCloud(target_cloud);
141 |   voxelgrid.filter(*filtered);
142 |   target_cloud = filtered;
143 | 
144 |   filtered.reset(new pcl::PointCloud<pcl::PointXYZ>());
145 |   voxelgrid.setInputCloud(source_cloud);
146 |   voxelgrid.filter(*filtered);
147 |   source_cloud = filtered;
148 | 
149 |   std::cout << "target:" << target_cloud->size() << "[pts] source:" << source_cloud->size() << "[pts]" << std::endl;
150 | 
151 |   std::cout << "--- pcl_gicp ---" << std::endl;
152 |   pcl::GeneralizedIterativeClosestPoint<pcl::PointXYZ, pcl::PointXYZ> pcl_gicp;
153 |   test_pcl(pcl_gicp, target_cloud, source_cloud);
154 | 
155 |   std::cout << "--- pcl_ndt ---" << std::endl;
156 |   pcl::NormalDistributionsTransform<pcl::PointXYZ, pcl::PointXYZ> pcl_ndt;
157 |   pcl_ndt.setResolution(1.0);
158 |   test_pcl(pcl_ndt, target_cloud, source_cloud);
159 | 
160 |   std::cout << "--- fgicp_st ---" << std::endl;
161 |   fast_gicp::FastGICPSingleThread<pcl::PointXYZ, pcl::PointXYZ> fgicp_st;
162 |   test(fgicp_st, target_cloud, source_cloud);
163 | 
164 |   std::cout << "--- fgicp_mt ---" << std::endl;
165 |   fast_gicp::FastGICP<pcl::PointXYZ, pcl::PointXYZ> fgicp_mt;
166 |   // fast_gicp uses all the CPU cores by default
167 |   // fgicp_mt.setNumThreads(8);
168 |   test(fgicp_mt, target_cloud, source_cloud);
169 | 
170 |   std::cout << "--- vgicp_st ---" << std::endl;
171 |   fast_gicp::FastVGICP<pcl::PointXYZ, pcl::PointXYZ> vgicp;
172 |   vgicp.setResolution(1.0);
173 |   vgicp.setNumThreads(1);
174 |   test(vgicp, target_cloud, source_cloud);
175 | 
176 |   std::cout << "--- vgicp_mt ---" << std::endl;
177 |   vgicp.setNumThreads(omp_get_max_threads());
178 |   test(vgicp, target_cloud, source_cloud);
179 | 
180 | #ifdef USE_VGICP_CUDA
181 |   std::cout << "--- ndt_cuda (P2D) ---" << std::endl;
182 |   fast_gicp::NDTCuda<pcl::PointXYZ, pcl::PointXYZ> ndt_cuda;
183 |   ndt_cuda.setResolution(1.0);
184 |   ndt_cuda.setDistanceMode(fast_gicp::NDTDistanceMode::P2D);
185 |   test(ndt_cuda, target_cloud, source_cloud);
186 | 
187 |   std::cout << "--- ndt_cuda (D2D) ---" << std::endl;
188 |   ndt_cuda.setDistanceMode(fast_gicp::NDTDistanceMode::D2D);
189 |   test(ndt_cuda, target_cloud, source_cloud);
190 | 
191 |   std::cout << "--- vgicp_cuda (parallel_kdtree) ---" << std::endl;
192 |   fast_gicp::FastVGICPCuda<pcl::PointXYZ, pcl::PointXYZ> vgicp_cuda;
193 |   vgicp_cuda.setResolution(1.0);
194 |   // vgicp_cuda uses CPU-based parallel KDTree in covariance estimation by default
195 |   // on a modern CPU, it is faster than GPU_BRUTEFORCE
196 |   // vgicp_cuda.setNearestNeighborSearchMethod(fast_gicp::NearestNeighborMethod::CPU_PARALLEL_KDTREE);
197 |   test(vgicp_cuda, target_cloud, source_cloud);
198 | 
199 |   std::cout << "--- vgicp_cuda (gpu_bruteforce) ---" << std::endl;
200 |   // use GPU-based bruteforce nearest neighbor search for covariance estimation
201 |   // this would be a good choice if your PC has a weak CPU and a strong GPU (e.g., NVIDIA Jetson)
202 |   vgicp_cuda.setNearestNeighborSearchMethod(fast_gicp::NearestNeighborMethod::GPU_BRUTEFORCE);
203 |   test(vgicp_cuda, target_cloud, source_cloud);
204 | 
205 |   std::cout << "--- vgicp_cuda (gpu_rbf_kernel) ---" << std::endl;
206 |   // use RBF-kernel-based covariance estimation
207 |   // extremely fast but maybe a bit inaccurate
208 |   vgicp_cuda.setNearestNeighborSearchMethod(fast_gicp::NearestNeighborMethod::GPU_RBF_KERNEL);
209 |   // kernel width (and distance threshold) need to be tuned
210 |   vgicp_cuda.setKernelWidth(0.5);
211 |   test(vgicp_cuda, target_cloud, source_cloud);
212 | #endif
213 | 
214 |   return 0;
215 | }
216 | 


--------------------------------------------------------------------------------
/src/fast_gicp/cuda/brute_force_knn.cu:
--------------------------------------------------------------------------------
  1 | #include <Eigen/Core>
  2 | 
  3 | #include <thrust/sequence.h>
  4 | #include <thrust/functional.h>
  5 | #include <thrust/host_vector.h>
  6 | #include <thrust/device_vector.h>
  7 | #include <thrust/iterator/zip_iterator.h>
  8 | 
  9 | #include <nvbio/basic/vector_view.h>
 10 | #include <nvbio/basic/priority_queue.h>
 11 | 
 12 | namespace fast_gicp {
 13 |   namespace cuda {
 14 | 
 15 | namespace {
 16 |   struct neighborsearch_kernel {
 17 |     neighborsearch_kernel(int k, const thrust::device_vector<Eigen::Vector3f>& target, thrust::device_vector<thrust::pair<float, int>>& k_neighbors)
 18 |         : k(k), num_target_points(target.size()), target_points_ptr(target.data()), k_neighbors_ptr(k_neighbors.data()) {}
 19 | 
 20 |     template<typename Tuple>
 21 |     __host__ __device__ void operator()(const Tuple& idx_x) const {
 22 |       // threadIdx doesn't work because thrust split for_each in two loops
 23 |       int idx = thrust::get<0>(idx_x);
 24 |       const Eigen::Vector3f& x = thrust::get<1>(idx_x);
 25 | 
 26 |       // target points buffer & nn output buffer
 27 |       const Eigen::Vector3f* pts = thrust::raw_pointer_cast(target_points_ptr);
 28 |       thrust::pair<float, int>* k_neighbors = thrust::raw_pointer_cast(k_neighbors_ptr) + idx * k;
 29 | 
 30 |       // priority queue
 31 |       struct compare_type {
 32 |         bool operator()(const thrust::pair<float, int>& lhs, const thrust::pair<float, int>& rhs) {
 33 |           return lhs.first < rhs.first;
 34 |         }
 35 |       };
 36 | 
 37 |       typedef nvbio::vector_view<thrust::pair<float, int>*> vector_type;
 38 |       typedef nvbio::priority_queue<thrust::pair<float, int>, vector_type, compare_type> queue_type;
 39 |       queue_type queue(vector_type(0, k_neighbors - 1));
 40 | 
 41 |       for(int i = 0; i < k; i++) {
 42 |         float sq_dist = (pts[i] - x).squaredNorm();
 43 |         queue.push(thrust::make_pair(sq_dist, i));
 44 |       }
 45 | 
 46 |       for(int i = k; i < num_target_points; i++) {
 47 |         float sq_dist = (pts[i] - x).squaredNorm();
 48 |         if(sq_dist < queue.top().first) {
 49 |           queue.pop();
 50 |           queue.push(thrust::make_pair(sq_dist, i));
 51 |         }
 52 |       }
 53 |     }
 54 | 
 55 |     const int k;
 56 |     const int num_target_points;
 57 |     thrust::device_ptr<const Eigen::Vector3f> target_points_ptr;
 58 | 
 59 |     thrust::device_ptr<thrust::pair<float, int>> k_neighbors_ptr;
 60 |   };
 61 | 
 62 |   struct sorting_kernel {
 63 |     sorting_kernel(int k, thrust::device_vector<thrust::pair<float, int>>& k_neighbors) : k(k), k_neighbors_ptr(k_neighbors.data()) {}
 64 | 
 65 |     __host__ __device__ void operator()(int idx) const {
 66 |       // target points buffer & nn output buffer
 67 |       thrust::pair<float, int>* k_neighbors = thrust::raw_pointer_cast(k_neighbors_ptr) + idx * k;
 68 | 
 69 |       // priority queue
 70 |       struct compare_type {
 71 |         bool operator()(const thrust::pair<float, int>& lhs, const thrust::pair<float, int>& rhs) {
 72 |           return lhs.first < rhs.first;
 73 |         }
 74 |       };
 75 | 
 76 |       typedef nvbio::vector_view<thrust::pair<float, int>*> vector_type;
 77 |       typedef nvbio::priority_queue<thrust::pair<float, int>, vector_type, compare_type> queue_type;
 78 |       queue_type queue(vector_type(k, k_neighbors - 1));
 79 |       queue.m_size = k;
 80 | 
 81 |       for(int i = 0; i < k; i++) {
 82 |         thrust::pair<float, int> poped = queue.top();
 83 |         queue.pop();
 84 | 
 85 |         k_neighbors[k - i - 1] = poped;
 86 |       }
 87 |     }
 88 | 
 89 |     const int k;
 90 |     thrust::device_ptr<thrust::pair<float, int>> k_neighbors_ptr;
 91 |   };
 92 | }
 93 | 
 94 | void brute_force_knn_search(const thrust::device_vector<Eigen::Vector3f>& source, const thrust::device_vector<Eigen::Vector3f>& target, int k, thrust::device_vector<thrust::pair<float, int>>& k_neighbors, bool do_sort=false) {
 95 |   thrust::device_vector<int> d_indices(source.size());
 96 |   thrust::sequence(d_indices.begin(), d_indices.end());
 97 | 
 98 |   auto first = thrust::make_zip_iterator(thrust::make_tuple(d_indices.begin(), source.begin()));
 99 |   auto last = thrust::make_zip_iterator(thrust::make_tuple(d_indices.end(), source.end()));
100 | 
101 |   // nvbio::priority_queue requires (k + 1) working space
102 |   k_neighbors.resize(source.size() * k, thrust::make_pair(-1.0f, -1));
103 |   thrust::for_each(first, last, neighborsearch_kernel(k, target, k_neighbors));
104 | 
105 |   if(do_sort) {
106 |     thrust::for_each(d_indices.begin(), d_indices.end(), sorting_kernel(k, k_neighbors));
107 |   }
108 | }
109 | 
110 |   }
111 | } // namespace fast_gicp
112 | 


--------------------------------------------------------------------------------
/src/fast_gicp/cuda/compute_derivatives.cu:
--------------------------------------------------------------------------------
  1 | #include <fstream>
  2 | 
  3 | #include <Eigen/Core>
  4 | #include <Eigen/Geometry>
  5 | 
  6 | #include <thrust/remove.h>
  7 | #include <thrust/host_vector.h>
  8 | #include <thrust/device_vector.h>
  9 | #include <thrust/transform_reduce.h>
 10 | 
 11 | #include <fast_gicp/cuda/gaussian_voxelmap.cuh>
 12 | 
 13 | namespace fast_gicp {
 14 | namespace cuda {
 15 | 
 16 | namespace {
 17 | 
 18 | template <typename T>
 19 | struct compute_derivatives_kernel {
 20 |   EIGEN_MAKE_ALIGNED_OPERATOR_NEW
 21 | 
 22 |   compute_derivatives_kernel(
 23 |     const GaussianVoxelMap& voxelmap,
 24 |     const thrust::device_vector<Eigen::Vector3f>& src_means,
 25 |     const thrust::device_vector<Eigen::Matrix3f>& src_covs,
 26 |     const thrust::device_ptr<const Eigen::Isometry3f>& x_eval_ptr,
 27 |     const thrust::device_ptr<const Eigen::Isometry3f>& x_ptr)
 28 |   : trans_eval_ptr(x_eval_ptr),
 29 |     trans_ptr(x_ptr),
 30 |     src_means_ptr(src_means.data()),
 31 |     src_covs_ptr(src_covs.data()),
 32 |     voxel_num_points_ptr(voxelmap.num_points.data()),
 33 |     voxel_means_ptr(voxelmap.voxel_means.data()),
 34 |     voxel_covs_ptr(voxelmap.voxel_covs.data()) {}
 35 | 
 36 |   // skew symmetric matrix
 37 |   __host__ __device__ Eigen::Matrix3f skew_symmetric(const Eigen::Vector3f& x) const {
 38 |     Eigen::Matrix3f skew = Eigen::Matrix3f::Zero();
 39 |     skew(0, 1) = -x[2];
 40 |     skew(0, 2) = x[1];
 41 |     skew(1, 0) = x[2];
 42 |     skew(1, 2) = -x[0];
 43 |     skew(2, 0) = -x[1];
 44 |     skew(2, 1) = x[0];
 45 | 
 46 |     return skew;
 47 |   }
 48 | 
 49 |   // calculate derivatives
 50 |   __host__ __device__ T operator()(const thrust::pair<int, int>& correspondence) const {
 51 |     const Eigen::Vector3f& mean_A = thrust::raw_pointer_cast(src_means_ptr)[correspondence.first];
 52 |     const Eigen::Matrix3f& cov_A = thrust::raw_pointer_cast(src_covs_ptr)[correspondence.first];
 53 | 
 54 |     if (correspondence.second < 0) {
 55 |       return thrust::make_tuple(0.0f, Eigen::Matrix<float, 6, 6>::Zero().eval(), Eigen::Matrix<float, 6, 1>::Zero().eval());
 56 |     }
 57 | 
 58 |     int num_points = thrust::raw_pointer_cast(voxel_num_points_ptr)[correspondence.second];
 59 |     const Eigen::Vector3f& mean_B = thrust::raw_pointer_cast(voxel_means_ptr)[correspondence.second];
 60 |     const Eigen::Matrix3f& cov_B = thrust::raw_pointer_cast(voxel_covs_ptr)[correspondence.second];
 61 | 
 62 |     if (num_points <= 0) {
 63 |       return thrust::make_tuple(0.0f, Eigen::Matrix<float, 6, 6>::Zero().eval(), Eigen::Matrix<float, 6, 1>::Zero().eval());
 64 |     }
 65 | 
 66 |     const auto& trans_eval = *thrust::raw_pointer_cast(trans_eval_ptr);
 67 |     const auto& trans = *thrust::raw_pointer_cast(trans_ptr);
 68 | 
 69 |     Eigen::Matrix3f R_eval = trans_eval.linear();
 70 |     Eigen::Matrix3f R = trans.linear();
 71 |     Eigen::Vector3f t = trans.translation();
 72 | 
 73 |     const Eigen::Vector3f transed_mean_A = R * mean_A + t;
 74 | 
 75 |     Eigen::Matrix3f RCR = R_eval * cov_A * R_eval.transpose();
 76 |     Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse();
 77 | 
 78 |     float w = sqrtf(num_points);
 79 |     Eigen::Vector3f error = mean_B - transed_mean_A;
 80 | 
 81 |     Eigen::Matrix<float, 3, 6> dtdx0;
 82 |     dtdx0.block<3, 3>(0, 0) = skew_symmetric(transed_mean_A);
 83 |     dtdx0.block<3, 3>(0, 3) = -Eigen::Matrix3f::Identity();
 84 | 
 85 |     Eigen::Matrix<float, 3, 6> J = dtdx0;
 86 | 
 87 |     Eigen::Matrix<float, 6, 6> H = w * J.transpose() * RCR_inv * J;
 88 |     Eigen::Matrix<float, 6, 1> b = w * J.transpose() * RCR_inv * error;
 89 | 
 90 |     float err = w * error.transpose() * RCR_inv * error;
 91 |     return thrust::make_tuple(err, H, b);
 92 |   }
 93 | 
 94 |   thrust::device_ptr<const Eigen::Isometry3f> trans_eval_ptr;
 95 |   thrust::device_ptr<const Eigen::Isometry3f> trans_ptr;
 96 | 
 97 |   thrust::device_ptr<const Eigen::Vector3f> src_means_ptr;
 98 |   thrust::device_ptr<const Eigen::Matrix3f> src_covs_ptr;
 99 | 
100 |   thrust::device_ptr<const int> voxel_num_points_ptr;
101 |   thrust::device_ptr<const Eigen::Vector3f> voxel_means_ptr;
102 |   thrust::device_ptr<const Eigen::Matrix3f> voxel_covs_ptr;
103 | };
104 | 
105 | template <>
106 | __host__ __device__ float compute_derivatives_kernel<float>::operator()(const thrust::pair<int, int>& correspondence) const {
107 |   const Eigen::Vector3f& mean_A = thrust::raw_pointer_cast(src_means_ptr)[correspondence.first];
108 |   const Eigen::Matrix3f& cov_A = thrust::raw_pointer_cast(src_covs_ptr)[correspondence.first];
109 |   const int voxel_index = correspondence.second;
110 | 
111 |   if (voxel_index < 0) {
112 |     return 0.0f;
113 |   }
114 | 
115 |   int num_points = thrust::raw_pointer_cast(voxel_num_points_ptr)[voxel_index];
116 |   const Eigen::Vector3f& mean_B = thrust::raw_pointer_cast(voxel_means_ptr)[voxel_index];
117 |   const Eigen::Matrix3f& cov_B = thrust::raw_pointer_cast(voxel_covs_ptr)[voxel_index];
118 | 
119 |   const auto& trans_eval = *thrust::raw_pointer_cast(trans_eval_ptr);
120 |   const auto& trans = *thrust::raw_pointer_cast(trans_ptr);
121 | 
122 |   Eigen::Matrix3f R_eval = trans_eval.linear();
123 |   Eigen::Matrix3f R = trans.linear();
124 |   Eigen::Vector3f t = trans.translation();
125 | 
126 |   const Eigen::Vector3f transed_mean_A = R * mean_A + t;
127 | 
128 |   Eigen::Matrix3f RCR = R_eval * cov_A * R_eval.transpose();
129 |   Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse();
130 | 
131 |   float w = sqrtf(num_points);
132 |   Eigen::Vector3f error = mean_B - transed_mean_A;
133 | 
134 |   return w * error.transpose() * RCR_inv * error;
135 | }
136 | 
137 | struct sum_errors_kernel {
138 |   template <typename Tuple>
139 |   __host__ __device__ Tuple operator()(const Tuple& lhs, const Tuple& rhs) {
140 |     return thrust::make_tuple(thrust::get<0>(lhs) + thrust::get<0>(rhs), thrust::get<1>(lhs) + thrust::get<1>(rhs), thrust::get<2>(lhs) + thrust::get<2>(rhs));
141 |   }
142 | };
143 | 
144 | template <>
145 | __host__ __device__ float sum_errors_kernel::operator()(const float& lhs, const float& rhs) {
146 |   return lhs + rhs;
147 | }
148 | 
149 | }  // namespace
150 | 
151 | double compute_derivatives(
152 |   const thrust::device_vector<Eigen::Vector3f>& src_points,
153 |   const thrust::device_vector<Eigen::Matrix3f>& src_covs,
154 |   const GaussianVoxelMap& voxelmap,
155 |   const thrust::device_vector<thrust::pair<int, int>>& voxel_correspondences,
156 |   const thrust::device_ptr<const Eigen::Isometry3f>& linearized_x_ptr,
157 |   const thrust::device_ptr<const Eigen::Isometry3f>& x_ptr,
158 |   Eigen::Matrix<double, 6, 6>* H,
159 |   Eigen::Matrix<double, 6, 1>* b) {
160 |   if (H == nullptr || b == nullptr) {
161 |     float sum_errors = thrust::transform_reduce(
162 |       voxel_correspondences.begin(),
163 |       voxel_correspondences.end(),
164 |       compute_derivatives_kernel<float>(voxelmap, src_points, src_covs, linearized_x_ptr, x_ptr),
165 |       0.0f,
166 |       sum_errors_kernel());
167 | 
168 |     return sum_errors;
169 |   }
170 | 
171 |   auto sum_errors = thrust::transform_reduce(
172 |     voxel_correspondences.begin(),
173 |     voxel_correspondences.end(),
174 |     compute_derivatives_kernel<thrust::tuple<float, Eigen::Matrix<float, 6, 6>, Eigen::Matrix<float, 6, 1>>>(voxelmap, src_points, src_covs, linearized_x_ptr, x_ptr),
175 |     thrust::make_tuple(0.0f, Eigen::Matrix<float, 6, 6>::Zero().eval(), Eigen::Matrix<float, 6, 1>::Zero().eval()),
176 |     sum_errors_kernel());
177 | 
178 |   if (H && b) {
179 |     *H = thrust::get<1>(sum_errors).cast<double>();
180 |     *b = thrust::get<2>(sum_errors).cast<double>();
181 |   }
182 | 
183 |   return thrust::get<0>(sum_errors);
184 | }
185 | }  // namespace cuda
186 | }  // namespace fast_gicp
187 | 


--------------------------------------------------------------------------------
/src/fast_gicp/cuda/compute_mahalanobis.cu:
--------------------------------------------------------------------------------
 1 | #include <fast_gicp/cuda/compute_mahalanobis.cuh>
 2 | 
 3 | #include <fstream>
 4 | 
 5 | namespace fast_gicp {
 6 |   namespace cuda {
 7 | 
 8 | namespace {
 9 | 
10 | struct compute_mahalanobis_kernel {
11 |   compute_mahalanobis_kernel(const GaussianVoxelMap& voxelmap, const Eigen::Isometry3f& x)
12 |   : R(x.linear()),
13 |     t(x.translation()),
14 |     voxel_num_points_ptr(voxelmap.num_points.data()),
15 |     voxel_means_ptr(voxelmap.voxel_means.data()),
16 |     voxel_covs_ptr(voxelmap.voxel_covs.data())
17 |   {}
18 | 
19 |   __host__ __device__
20 |   Eigen::Matrix3f operator() (const thrust::tuple<Eigen::Vector3f, Eigen::Matrix3f, int>& input) const {
21 |     const Eigen::Vector3f& mean_A = thrust::get<0>(input);
22 |     const Eigen::Matrix3f& cov_A = thrust::get<1>(input);
23 |     int voxel_index = thrust::get<2>(input);
24 | 
25 |     if(voxel_index < 0) {
26 |       return Eigen::Matrix3f::Identity();
27 |     }
28 | 
29 |     const Eigen::Matrix3f& cov_B = thrust::raw_pointer_cast(voxel_covs_ptr)[voxel_index];
30 |     Eigen::Matrix3f RCR = cov_B + R * cov_A* R.transpose();
31 |     return RCR.inverse();
32 |   }
33 | 
34 |   const Eigen::Matrix3f R;
35 |   const Eigen::Vector3f t;
36 | 
37 |   thrust::device_ptr<const int> voxel_num_points_ptr;
38 |   thrust::device_ptr<const Eigen::Vector3f> voxel_means_ptr;
39 |   thrust::device_ptr<const Eigen::Matrix3f> voxel_covs_ptr;
40 | };
41 | }
42 | 
43 | 
44 | void compute_mahalanobis(
45 |   const thrust::device_vector<Eigen::Vector3f>& src_points,
46 |   const thrust::device_vector<Eigen::Matrix3f>& src_covs,
47 |   const GaussianVoxelMap& voxelmap,
48 |   const thrust::device_vector<int>& voxel_correspondences,
49 |   const Eigen::Isometry3f& linearized_x,
50 |   thrust::device_vector<Eigen::Matrix3f>& mahalanobis
51 | ) {
52 |   mahalanobis.resize(src_points.size());
53 |   thrust::transform(
54 |     thrust::make_zip_iterator(thrust::make_tuple(src_points.begin(), src_covs.begin(), voxel_correspondences.begin())),
55 |     thrust::make_zip_iterator(thrust::make_tuple(src_points.end(), src_covs.end(), voxel_correspondences.end())),
56 |     mahalanobis.begin(),
57 |     compute_mahalanobis_kernel(voxelmap, linearized_x)
58 |   );
59 | 
60 |   thrust::host_vector<int> corrs = voxel_correspondences;
61 |   thrust::host_vector<Eigen::Matrix3f, Eigen::aligned_allocator<Eigen::Matrix3f>> h_maha = mahalanobis;
62 |   std::ofstream ofs("/tmp/vgicp_cuda_mahalanobis.txt");
63 |   for(int i=0; i<corrs.size(); i++) {
64 |     if(corrs[i] < 0) {
65 |       continue;
66 |     }
67 | 
68 |     ofs << i << std::endl << h_maha[i] << std::endl;
69 |   }
70 | 
71 |   // exit(0);
72 | }
73 | 
74 |   }
75 | }


--------------------------------------------------------------------------------
/src/fast_gicp/cuda/covariance_estimation.cu:
--------------------------------------------------------------------------------
 1 | #include <Eigen/Core>
 2 | #include <Eigen/Geometry>
 3 | #include <Eigen/Eigenvalues>
 4 | 
 5 | #include <thrust/sequence.h>
 6 | #include <thrust/host_vector.h>
 7 | #include <thrust/device_vector.h>
 8 | 
 9 | #include <fast_gicp/gicp/gicp_settings.hpp>
10 | #include <fast_gicp/cuda/covariance_estimation.cuh>
11 | 
12 | namespace fast_gicp {
13 |   namespace cuda {
14 | 
15 | namespace {
16 |   struct covariance_estimation_kernel {
17 |     covariance_estimation_kernel(const thrust::device_vector<Eigen::Vector3f>& points, int k, const thrust::device_vector<int>& k_neighbors, thrust::device_vector<Eigen::Matrix3f>& covariances)
18 |         : k(k), points_ptr(points.data()), k_neighbors_ptr(k_neighbors.data()), covariances_ptr(covariances.data()) {}
19 | 
20 |     __host__ __device__ void operator()(int idx) const {
21 |       // target points buffer & nn output buffer
22 |       const Eigen::Vector3f* points = thrust::raw_pointer_cast(points_ptr);
23 |       const int* k_neighbors = thrust::raw_pointer_cast(k_neighbors_ptr) + idx * k;
24 |       Eigen::Matrix3f* cov = thrust::raw_pointer_cast(covariances_ptr) + idx;
25 | 
26 |       Eigen::Vector3f mean(0.0f, 0.0f, 0.0f);
27 |       cov->setZero();
28 |       for(int i = 0; i < k; i++) {
29 |         const auto& pt = points[k_neighbors[i]];
30 |         mean += pt;
31 |         (*cov) += pt * pt.transpose();
32 |       }
33 |       mean /= k;
34 |       (*cov) = (*cov) / k - mean * mean.transpose();
35 |     }
36 | 
37 |     const int k;
38 |     thrust::device_ptr<const Eigen::Vector3f> points_ptr;
39 |     thrust::device_ptr<const int> k_neighbors_ptr;
40 | 
41 |     thrust::device_ptr<Eigen::Matrix3f> covariances_ptr;
42 |   };
43 |   }  // namespace
44 | 
45 |   void covariance_estimation(const thrust::device_vector<Eigen::Vector3f>& points, int k, const thrust::device_vector<int>& k_neighbors, thrust::device_vector<Eigen::Matrix3f>& covariances) {
46 |     thrust::device_vector<int> d_indices(points.size());
47 |     thrust::sequence(d_indices.begin(), d_indices.end());
48 | 
49 |     covariances.resize(points.size());
50 |     thrust::for_each(d_indices.begin(), d_indices.end(), covariance_estimation_kernel(points, k, k_neighbors, covariances));
51 |   }
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/fast_gicp/cuda/covariance_estimation_rbf.cu:
--------------------------------------------------------------------------------
  1 | #include <fast_gicp/cuda/covariance_estimation.cuh>
  2 | 
  3 | #include <thrust/device_vector.h>
  4 | 
  5 | #include <thrust/async/for_each.h>
  6 | #include <thrust/async/transform.h>
  7 | 
  8 | namespace fast_gicp {
  9 | namespace cuda {
 10 | 
 11 | struct NormalDistribution {
 12 | public:
 13 |   EIGEN_MAKE_ALIGNED_OPERATOR_NEW
 14 | 
 15 |   __host__ __device__ NormalDistribution() {}
 16 | 
 17 |   static __host__ __device__ NormalDistribution zero() {
 18 |     NormalDistribution dist;
 19 |     dist.sum_weights = 0.0f;
 20 |     dist.mean.setZero();
 21 |     dist.cov.setZero();
 22 |     return dist;
 23 |   }
 24 | 
 25 |   __host__ __device__ NormalDistribution operator+(const NormalDistribution& rhs) const {
 26 |     NormalDistribution sum;
 27 |     sum.sum_weights = sum_weights + rhs.sum_weights;
 28 |     sum.mean = mean + rhs.mean;
 29 |     sum.cov = cov + rhs.cov;
 30 |     return sum;
 31 |   }
 32 | 
 33 |   __host__ __device__ NormalDistribution& operator+=(const NormalDistribution& rhs) {
 34 |     sum_weights += rhs.sum_weights;
 35 |     mean += rhs.mean;
 36 |     cov += rhs.cov;
 37 |     return *this;
 38 |   }
 39 | 
 40 |   __host__ __device__ void accumulate(const float w, const Eigen::Vector3f& x) {
 41 |     sum_weights += w;
 42 |     mean += w * x;
 43 |     cov += w * x * x.transpose();
 44 |   }
 45 | 
 46 |   __host__ __device__ NormalDistribution& finalize() {
 47 |     Eigen::Vector3f sum_pt = mean;
 48 |     mean /= sum_weights;
 49 |     cov = (cov - mean * sum_pt.transpose()) / sum_weights;
 50 | 
 51 |     return *this;
 52 |   }
 53 | 
 54 |   float sum_weights;
 55 |   Eigen::Vector3f mean;
 56 |   Eigen::Matrix3f cov;
 57 | };
 58 | 
 59 | struct covariance_estimation_kernel {
 60 |   static const int BLOCK_SIZE = 512;
 61 | 
 62 |   covariance_estimation_kernel(thrust::device_ptr<const float> exp_factor_ptr, thrust::device_ptr<const float> max_dist_ptr, thrust::device_ptr<const Eigen::Vector3f> points_ptr)
 63 |   : exp_factor_ptr(exp_factor_ptr),
 64 |     max_dist_ptr(max_dist_ptr),
 65 |     points_ptr(points_ptr) {}
 66 | 
 67 |   __host__ __device__ NormalDistribution operator()(const Eigen::Vector3f& x) const {
 68 |     const float exp_factor = *thrust::raw_pointer_cast(exp_factor_ptr);
 69 |     const float max_dist = *thrust::raw_pointer_cast(max_dist_ptr);
 70 |     const float max_dist_sq = max_dist * max_dist;
 71 |     const Eigen::Vector3f* points = thrust::raw_pointer_cast(points_ptr);
 72 | 
 73 |     NormalDistribution dist = NormalDistribution::zero();
 74 |     for (int i = 0; i < BLOCK_SIZE; i++) {
 75 |       float sq_d = (x - points[i]).squaredNorm();
 76 |       if (sq_d > max_dist_sq) {
 77 |         continue;
 78 |       }
 79 | 
 80 |       float w = expf(-exp_factor * sq_d);
 81 |       dist.accumulate(w, points[i]);
 82 |     }
 83 | 
 84 |     return dist;
 85 |   }
 86 | 
 87 |   thrust::device_ptr<const float> exp_factor_ptr;
 88 |   thrust::device_ptr<const float> max_dist_ptr;
 89 |   thrust::device_ptr<const Eigen::Vector3f> points_ptr;
 90 | };
 91 | 
 92 | struct finalization_kernel {
 93 |   finalization_kernel(const int stride, const thrust::device_vector<NormalDistribution>& accumulated_dists)
 94 |   : stride(stride),
 95 |     accumulated_dists_first(accumulated_dists.data()),
 96 |     accumulated_dists_last(accumulated_dists.data() + accumulated_dists.size()) {}
 97 | 
 98 |   __host__ __device__ Eigen::Matrix3f operator()(int index) const {
 99 |     const NormalDistribution* dists = thrust::raw_pointer_cast(accumulated_dists_first);
100 |     const NormalDistribution* dists_last = thrust::raw_pointer_cast(accumulated_dists_last);
101 |     const int num_dists = dists_last - dists;
102 | 
103 |     NormalDistribution sum = dists[index];
104 |     for (int dist_index = index + stride; dist_index < num_dists; dist_index += stride) {
105 |       sum += dists[dist_index];
106 |     }
107 | 
108 |     return sum.finalize().cov;
109 |   }
110 | 
111 |   const int stride;
112 |   thrust::device_ptr<const NormalDistribution> accumulated_dists_first;
113 |   thrust::device_ptr<const NormalDistribution> accumulated_dists_last;
114 | };
115 | 
116 | void covariance_estimation_rbf(const thrust::device_vector<Eigen::Vector3f>& points, double kernel_width, double max_dist, thrust::device_vector<Eigen::Matrix3f>& covariances) {
117 |   covariances.resize(points.size());
118 | 
119 |   thrust::device_vector<float> constants(2);
120 |   constants[0] = kernel_width;
121 |   constants[1] = max_dist;
122 |   thrust::device_ptr<const float> exp_factor_ptr = constants.data();
123 |   thrust::device_ptr<const float> max_dist_ptr = constants.data() + 1;
124 | 
125 |   int num_blocks = (points.size() + (covariance_estimation_kernel::BLOCK_SIZE - 1)) / covariance_estimation_kernel::BLOCK_SIZE;
126 |   // padding
127 |   thrust::device_vector<Eigen::Vector3f> ext_points(num_blocks * covariance_estimation_kernel::BLOCK_SIZE);
128 |   thrust::copy(points.begin(), points.end(), ext_points.begin());
129 |   thrust::fill(ext_points.begin() + points.size(), ext_points.end(), Eigen::Vector3f(0.0f, 0.0f, 0.0f));
130 | 
131 |   thrust::device_vector<NormalDistribution> accumulated_dists(points.size() * num_blocks);
132 | 
133 |   thrust::system::cuda::detail::unique_stream stream;
134 |   std::vector<thrust::system::cuda::unique_eager_event> events(num_blocks);
135 | 
136 |   // accumulate kerneled point distributions
137 |   for (int i = 0; i < num_blocks; i++) {
138 |     covariance_estimation_kernel kernel(exp_factor_ptr, max_dist_ptr, ext_points.data() + covariance_estimation_kernel::BLOCK_SIZE * i);
139 |     auto event = thrust::async::transform(points.begin(), points.end(), accumulated_dists.begin() + points.size() * i, kernel);
140 |     events[i] = std::move(event);
141 |     thrust::system::cuda::detail::create_dependency(stream, events[i]);
142 |   }
143 | 
144 |   // finalize distributions
145 |   thrust::transform(
146 |     thrust::cuda::par.on(stream.native_handle()),
147 |     thrust::counting_iterator<int>(0),
148 |     thrust::counting_iterator<int>(points.size()),
149 |     covariances.begin(),
150 |     finalization_kernel(points.size(), accumulated_dists));
151 | }
152 | 
153 | }  // namespace cuda
154 | }  // namespace fast_gicp


--------------------------------------------------------------------------------
/src/fast_gicp/cuda/covariance_regularization.cu:
--------------------------------------------------------------------------------
  1 | #include <fast_gicp/cuda/covariance_regularization.cuh>
  2 | 
  3 | #include <Eigen/Core>
  4 | #include <Eigen/Eigenvalues>
  5 | 
  6 | #include <thrust/copy.h>
  7 | #include <thrust/sequence.h>
  8 | #include <thrust/iterator/transform_output_iterator.h>
  9 | 
 10 | namespace fast_gicp {
 11 | namespace cuda {
 12 | 
 13 | namespace {
 14 | 
 15 | struct svd_kernel {
 16 |   __host__ __device__ thrust::tuple<Eigen::Vector3f, Eigen::Vector3f, Eigen::Matrix3f, int> operator()(const thrust::tuple<Eigen::Vector3f, Eigen::Matrix3f, int>& input) const {
 17 |     const auto& mean = thrust::get<0>(input);
 18 |     const auto& cov = thrust::get<1>(input);
 19 | 
 20 |     Eigen::SelfAdjointEigenSolver<Eigen::Matrix3f> eig;
 21 |     eig.computeDirect(cov);
 22 | 
 23 |     return thrust::make_tuple(mean, eig.eigenvalues(), eig.eigenvectors(), thrust::get<2>(input));
 24 |   }
 25 | };
 26 | 
 27 | struct eigenvalue_filter_kernel {
 28 |   __host__ __device__ bool operator()(const thrust::tuple<Eigen::Vector3f, Eigen::Vector3f, Eigen::Matrix3f>& input) const {
 29 |     const auto& values = thrust::get<1>(input);
 30 |     return values[1] > values[2] * 0.1;
 31 |   }
 32 | };
 33 | 
 34 | struct svd_reconstruction_kernel {
 35 |   svd_reconstruction_kernel(
 36 |     const thrust::device_ptr<const Eigen::Matrix3f>& values_diag,
 37 |     thrust::device_vector<Eigen::Matrix3f>& covariances)
 38 |   : values_diag_ptr(values_diag), covariances_ptr(covariances.data()) {}
 39 |   __host__ __device__ void operator()(const thrust::tuple<Eigen::Vector3f, Eigen::Vector3f, Eigen::Matrix3f, int>& input) const {
 40 |     const auto& mean = thrust::get<0>(input);
 41 |     const auto& values = thrust::get<1>(input);
 42 |     const auto& vecs = thrust::get<2>(input);
 43 |     const auto idx = thrust::get<3>(input);
 44 | 
 45 |     Eigen::Matrix3f vecs_inv = vecs.inverse();
 46 |     const auto& values_diag = *thrust::raw_pointer_cast(values_diag_ptr);
 47 |     Eigen::Matrix3f* cov = thrust::raw_pointer_cast(covariances_ptr) + idx;
 48 |     *cov = (vecs * values_diag * vecs_inv).eval();
 49 |   }
 50 |   const thrust::device_ptr<const Eigen::Matrix3f> values_diag_ptr;
 51 |   thrust::device_ptr<Eigen::Matrix3f> covariances_ptr;
 52 | };
 53 | 
 54 | 
 55 | 
 56 | struct covariance_regularization_svd {
 57 |   __host__ __device__ void operator()(Eigen::Matrix3f& cov) const {
 58 |     Eigen::SelfAdjointEigenSolver<Eigen::Matrix3f> eig;
 59 |     eig.computeDirect(cov);
 60 | 
 61 |     // why this doen't work...???
 62 |     // cov = eig.eigenvectors() * values.asDiagonal() * eig.eigenvectors().inverse();
 63 |     Eigen::Matrix3f values = Eigen::Vector3f(1e-3, 1, 1).asDiagonal();
 64 |     Eigen::Matrix3f v_inv = eig.eigenvectors().inverse();
 65 |     cov = eig.eigenvectors() * values * v_inv;
 66 | 
 67 |     // JacobiSVD is not supported on CUDA
 68 |     // Eigen::JacobiSVD(cov, Eigen::ComputeFullU | Eigen::ComputeFullV);
 69 |     // Eigen::Vector3f values(1, 1, 1e-3);
 70 |     // cov = svd.matrixU() * values.asDiagonal() * svd.matrixV().transpose();
 71 |   }
 72 | };
 73 | 
 74 | struct covariance_regularization_frobenius {
 75 |   __host__ __device__ void operator()(Eigen::Matrix3f& cov) const {
 76 |     float lambda = 1e-3;
 77 |     Eigen::Matrix3f C = cov + lambda * Eigen::Matrix3f::Identity();
 78 |     Eigen::Matrix3f C_inv = C.inverse();
 79 |     Eigen::Matrix3f C_norm = (C_inv / C_inv.norm()).inverse();
 80 |     cov = C_norm;
 81 |   }
 82 | };
 83 | 
 84 | struct covariance_regularization_mineig {
 85 |   __host__ __device__ void operator()(Eigen::Matrix3f& cov) const {
 86 |     Eigen::SelfAdjointEigenSolver<Eigen::Matrix3f> eig;
 87 |     eig.computeDirect(cov);
 88 | 
 89 |     Eigen::Vector3f values = eig.eigenvalues();
 90 |     for (int i = 0; i < 3; i++) {
 91 |       values[i] = fmaxf(1e-3f, values[i]);
 92 |     }
 93 | 
 94 |     Eigen::Matrix3f v_diag = Eigen::Matrix3f::Zero();
 95 |     v_diag(0,0) = values.x();
 96 |     v_diag(1,1) = values.y();
 97 |     v_diag(2,2) = values.z();
 98 |     Eigen::Matrix3f v_inv = eig.eigenvectors().inverse();
 99 |     cov = eig.eigenvectors() * v_diag * v_inv;
100 |   }
101 | };
102 | 
103 | }  // namespace
104 | 
105 | void covariance_regularization(thrust::device_vector<Eigen::Vector3f>& means, thrust::device_vector<Eigen::Matrix3f>& covs, RegularizationMethod method) {
106 |   if (method == RegularizationMethod::PLANE) {
107 |     thrust::device_vector<int> d_indices(covs.size());
108 |     thrust::sequence(d_indices.begin(), d_indices.end());
109 |     auto first = thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(means.begin(), covs.begin(), d_indices.begin())), svd_kernel());
110 |     auto last = thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(means.end(), covs.end(), d_indices.end())), svd_kernel());
111 | 
112 |     Eigen::Matrix3f diag_matrix = Eigen::Vector3f(1e-3f, 1.0f, 1.0f).asDiagonal();
113 |     thrust::device_vector<Eigen::Matrix3f> val(1);
114 |     val[0] = diag_matrix;
115 |     thrust::device_ptr<Eigen::Matrix3f> diag_matrix_ptr = val.data();
116 |     thrust::for_each(first, last, svd_reconstruction_kernel(diag_matrix_ptr, covs));
117 | 
118 |   } else if (method == RegularizationMethod::FROBENIUS) {
119 |     thrust::for_each(covs.begin(), covs.end(), covariance_regularization_frobenius());
120 |   } else if (method == RegularizationMethod::MIN_EIG) {
121 |     thrust::for_each(covs.begin(), covs.end(), covariance_regularization_mineig());
122 |   } else {
123 |     std::cerr << "unimplemented covariance regularization method was selected!!" << std::endl;
124 |   }
125 | }
126 | 
127 | }  // namespace cuda
128 | }  // namespace fast_gicp


--------------------------------------------------------------------------------
/src/fast_gicp/cuda/find_voxel_correspondences.cu:
--------------------------------------------------------------------------------
  1 | #include <Eigen/Core>
  2 | #include <Eigen/Geometry>
  3 | 
  4 | #include <thrust/remove.h>
  5 | #include <thrust/device_vector.h>
  6 | #include <thrust/async/transform.h>
  7 | #include <fast_gicp/cuda/vector3_hash.cuh>
  8 | #include <fast_gicp/cuda/gaussian_voxelmap.cuh>
  9 | #include <fast_gicp/cuda/find_voxel_correspondences.cuh>
 10 | 
 11 | namespace fast_gicp {
 12 | namespace cuda {
 13 | 
 14 | namespace {
 15 | 
 16 | struct find_voxel_correspondences_kernel {
 17 |   find_voxel_correspondences_kernel(
 18 |     const GaussianVoxelMap& voxelmap,
 19 |     const thrust::device_vector<Eigen::Vector3f>& src_points,
 20 |     const thrust::device_ptr<const Eigen::Isometry3f>& trans_ptr,
 21 |     thrust::device_ptr<const Eigen::Vector3i> offset_ptr)
 22 |   : trans_ptr(trans_ptr),
 23 |     offset_ptr(offset_ptr),
 24 |     src_points_ptr(src_points.data()),
 25 |     voxelmap_info_ptr(voxelmap.voxelmap_info_ptr.data()),
 26 |     buckets_ptr(voxelmap.buckets.data()),
 27 |     voxel_num_points_ptr(voxelmap.num_points.data()),
 28 |     voxel_means_ptr(voxelmap.voxel_means.data()),
 29 |     voxel_covs_ptr(voxelmap.voxel_covs.data()) {}
 30 | 
 31 |   // lookup voxel
 32 |   __host__ __device__ int lookup_voxel(const Eigen::Vector3f& x) const {
 33 |     const VoxelMapInfo& voxelmap_info = *thrust::raw_pointer_cast(voxelmap_info_ptr);
 34 |     const auto& offset = *thrust::raw_pointer_cast(offset_ptr);
 35 | 
 36 |     Eigen::Vector3i coord = calc_voxel_coord(x, voxelmap_info.voxel_resolution) + offset;
 37 |     uint64_t hash = vector3i_hash(coord);
 38 | 
 39 |     for(int i = 0; i < voxelmap_info.max_bucket_scan_count; i++) {
 40 |       uint64_t bucket_index = (hash + i) % voxelmap_info.num_buckets;
 41 |       const thrust::pair<Eigen::Vector3i, int>& bucket = thrust::raw_pointer_cast(buckets_ptr)[bucket_index];
 42 | 
 43 |       if(bucket.second < 0) {
 44 |         return -1;
 45 |       }
 46 | 
 47 |       if(bucket.first == coord) {
 48 |         return bucket.second;
 49 |       }
 50 |     }
 51 | 
 52 |     return -1;
 53 |   }
 54 | 
 55 |   __host__ __device__ thrust::pair<int, int> operator()(int src_index) const {
 56 |     const auto& trans = *thrust::raw_pointer_cast(trans_ptr);
 57 | 
 58 |     const auto& pt = thrust::raw_pointer_cast(src_points_ptr)[src_index];
 59 |     return thrust::make_pair(src_index, lookup_voxel(trans.linear() * pt + trans.translation()));
 60 |   }
 61 | 
 62 |   const thrust::device_ptr<const Eigen::Isometry3f> trans_ptr;
 63 |   const thrust::device_ptr<const Eigen::Vector3i> offset_ptr;
 64 | 
 65 |   const thrust::device_ptr<const Eigen::Vector3f> src_points_ptr;
 66 | 
 67 |   thrust::device_ptr<const VoxelMapInfo> voxelmap_info_ptr;
 68 |   thrust::device_ptr<const thrust::pair<Eigen::Vector3i, int>> buckets_ptr;
 69 | 
 70 |   thrust::device_ptr<const int> voxel_num_points_ptr;
 71 |   thrust::device_ptr<const Eigen::Vector3f> voxel_means_ptr;
 72 |   thrust::device_ptr<const Eigen::Matrix3f> voxel_covs_ptr;
 73 | };
 74 | 
 75 | struct invalid_correspondence_kernel {
 76 |   __host__ __device__ bool operator() (const thrust::pair<int, int>& corr) const {
 77 |     return corr.first < 0 || corr.second < 0;
 78 |   }
 79 | };
 80 | 
 81 | }  // namespace
 82 | 
 83 | void find_voxel_correspondences(
 84 |   const thrust::device_vector<Eigen::Vector3f>& src_points,
 85 |   const GaussianVoxelMap& voxelmap,
 86 |   const thrust::device_ptr<const Eigen::Isometry3f>& x_ptr,
 87 |   const thrust::device_vector<Eigen::Vector3i>& offsets,
 88 |   thrust::device_vector<thrust::pair<int, int>>& correspondences) {
 89 |   std::vector<thrust::system::cuda::unique_eager_event> events(offsets.size());
 90 | 
 91 |   // find correspondences
 92 |   correspondences.resize(src_points.size() * offsets.size());
 93 |   for(int i=0; i<offsets.size(); i++) {
 94 |     auto event = thrust::async::transform(
 95 |       thrust::counting_iterator<int>(0),
 96 |       thrust::counting_iterator<int>(src_points.size()),
 97 |       correspondences.begin() + src_points.size() * i,
 98 |       find_voxel_correspondences_kernel(voxelmap, src_points, x_ptr, offsets.data() + i));
 99 | 
100 |     events[i] = std::move(event);
101 |   }
102 | 
103 |   // synchronize
104 |   for(auto& event: events) {
105 |     event.wait();
106 |   }
107 | 
108 |   // remove invlid correspondences
109 |   auto remove_loc = thrust::remove_if(correspondences.begin(), correspondences.end(), invalid_correspondence_kernel());
110 |   correspondences.erase(remove_loc, correspondences.end());
111 | }
112 | 
113 | }  // namespace cuda
114 | }  // namespace fast_gicp
115 | 


--------------------------------------------------------------------------------
/src/fast_gicp/cuda/ndt_compute_derivatives.cu:
--------------------------------------------------------------------------------
  1 | #include <fast_gicp/cuda/ndt_compute_derivatives.cuh>
  2 | 
  3 | #include <thrust/transform_reduce.h>
  4 | 
  5 | namespace fast_gicp {
  6 | namespace cuda {
  7 | 
  8 | namespace {
  9 | 
 10 | __host__ __device__ float huber(float k, float x) {
 11 |   float abs_x = fabsf(x);
 12 |   return abs_x <= k ? 1.0 : k / abs_x;
 13 | }
 14 | 
 15 | __host__ __device__ float cauchy(float k, float x) {
 16 |   float k_sq = k * k;
 17 |   return k_sq / (k_sq + x * x);
 18 | }
 19 | 
 20 | // skew symmetric matrix
 21 | __host__ __device__ Eigen::Matrix3f skew_symmetric(const Eigen::Vector3f& x) {
 22 |   Eigen::Matrix3f skew = Eigen::Matrix3f::Zero();
 23 |   skew(0, 1) = -x[2];
 24 |   skew(0, 2) = x[1];
 25 |   skew(1, 0) = x[2];
 26 |   skew(1, 2) = -x[0];
 27 |   skew(2, 0) = -x[1];
 28 |   skew(2, 1) = x[0];
 29 | 
 30 |   return skew;
 31 | }
 32 | 
 33 | struct p2d_ndt_compute_derivatives_kernel {
 34 |   p2d_ndt_compute_derivatives_kernel(
 35 |     const GaussianVoxelMap& target_voxelmap,
 36 |     const thrust::device_vector<Eigen::Vector3f>& source_points,
 37 |     const thrust::device_ptr<const Eigen::Isometry3f>& x_eval_ptr,
 38 |     const thrust::device_ptr<const Eigen::Isometry3f>& x_ptr)
 39 |   : trans_eval_ptr(x_eval_ptr),
 40 |     trans_ptr(x_ptr),
 41 |     src_means_ptr(source_points.data()),
 42 |     voxelmap_info_ptr(target_voxelmap.voxelmap_info_ptr.data()),
 43 |     voxel_num_points_ptr(target_voxelmap.num_points.data()),
 44 |     voxel_means_ptr(target_voxelmap.voxel_means.data()),
 45 |     voxel_covs_ptr(target_voxelmap.voxel_covs.data()) {}
 46 | 
 47 |   // Here, we use simple point-to-distribution MLE distance equivalent to [Biber, IROS2003]
 48 |   // Because this formulation can be more sensitive to outliers compared to another formulation based on
 49 |   // Gaussian + uniform distribution [Magnusson, 2009], we use some robust kernels to filter out outliers
 50 |   __host__ __device__ thrust::tuple<float, Eigen::Matrix<float, 6, 6>, Eigen::Matrix<float, 6, 1>> operator()(const thrust::pair<int, int>& correspondence) const {
 51 |     const Eigen::Vector3f& mean_A = thrust::raw_pointer_cast(src_means_ptr)[correspondence.first];
 52 | 
 53 |     if (correspondence.second < 0) {
 54 |       return thrust::make_tuple(0.0f, Eigen::Matrix<float, 6, 6>::Zero().eval(), Eigen::Matrix<float, 6, 1>::Zero().eval());
 55 |     }
 56 | 
 57 |     int num_points = thrust::raw_pointer_cast(voxel_num_points_ptr)[correspondence.second];
 58 |     const Eigen::Vector3f& mean_B = thrust::raw_pointer_cast(voxel_means_ptr)[correspondence.second];
 59 |     const Eigen::Matrix3f& cov_B = thrust::raw_pointer_cast(voxel_covs_ptr)[correspondence.second];
 60 | 
 61 |     if (num_points <= 6) {
 62 |       return thrust::make_tuple(0.0f, Eigen::Matrix<float, 6, 6>::Zero().eval(), Eigen::Matrix<float, 6, 1>::Zero().eval());
 63 |     }
 64 | 
 65 |     const auto& trans_eval = *thrust::raw_pointer_cast(trans_eval_ptr);
 66 |     const auto& trans = *thrust::raw_pointer_cast(trans_ptr);
 67 | 
 68 |     Eigen::Matrix3f R_eval = trans_eval.linear();
 69 |     Eigen::Matrix3f R = trans.linear();
 70 |     Eigen::Vector3f t = trans.translation();
 71 | 
 72 |     const Eigen::Vector3f transed_mean_A = R * mean_A + t;
 73 | 
 74 |     Eigen::Matrix3f RCR_inv = cov_B.inverse();
 75 | 
 76 |     Eigen::Vector3f error = mean_B - transed_mean_A;
 77 | 
 78 |     float w = cauchy(thrust::raw_pointer_cast(voxelmap_info_ptr)->voxel_resolution, error.norm());
 79 |     float err = w * error.transpose() * RCR_inv * error;
 80 | 
 81 |     Eigen::Matrix<float, 3, 6> dtdx0;
 82 |     dtdx0.block<3, 3>(0, 0) = skew_symmetric(transed_mean_A);
 83 |     dtdx0.block<3, 3>(0, 3) = -Eigen::Matrix3f::Identity();
 84 | 
 85 |     Eigen::Matrix<float, 3, 6> J = dtdx0;
 86 | 
 87 |     Eigen::Matrix<float, 6, 6> H = w * J.transpose() * RCR_inv * J;
 88 |     Eigen::Matrix<float, 6, 1> b = w * J.transpose() * RCR_inv * error;
 89 | 
 90 |     return thrust::make_tuple(err, H, b);
 91 |   }
 92 | 
 93 |   thrust::device_ptr<const Eigen::Isometry3f> trans_eval_ptr;
 94 |   thrust::device_ptr<const Eigen::Isometry3f> trans_ptr;
 95 | 
 96 |   thrust::device_ptr<const Eigen::Vector3f> src_means_ptr;
 97 | 
 98 |   thrust::device_ptr<const VoxelMapInfo> voxelmap_info_ptr;
 99 |   thrust::device_ptr<const int> voxel_num_points_ptr;
100 |   thrust::device_ptr<const Eigen::Vector3f> voxel_means_ptr;
101 |   thrust::device_ptr<const Eigen::Matrix3f> voxel_covs_ptr;
102 | };
103 | 
104 | struct d2d_ndt_compute_derivatives_kernel {
105 |   d2d_ndt_compute_derivatives_kernel(
106 |     const GaussianVoxelMap& target_voxelmap,
107 |     const GaussianVoxelMap& source_voxelmap,
108 |     const thrust::device_ptr<const Eigen::Isometry3f>& x_eval_ptr,
109 |     const thrust::device_ptr<const Eigen::Isometry3f>& x_ptr)
110 |   : trans_eval_ptr(x_eval_ptr),
111 |     trans_ptr(x_ptr),
112 |     src_means_ptr(source_voxelmap.voxel_means.data()),
113 |     src_covs_ptr(source_voxelmap.voxel_covs.data()),
114 |     voxelmap_info_ptr(target_voxelmap.voxelmap_info_ptr.data()),
115 |     voxel_num_points_ptr(target_voxelmap.num_points.data()),
116 |     voxel_means_ptr(target_voxelmap.voxel_means.data()),
117 |     voxel_covs_ptr(target_voxelmap.voxel_covs.data()) {}
118 | 
119 |   // calculate derivatives
120 |   __host__ __device__ thrust::tuple<float, Eigen::Matrix<float, 6, 6>, Eigen::Matrix<float, 6, 1>> operator()(const thrust::pair<int, int>& correspondence) const {
121 |     const Eigen::Vector3f& mean_A = thrust::raw_pointer_cast(src_means_ptr)[correspondence.first];
122 |     const Eigen::Matrix3f& cov_A = thrust::raw_pointer_cast(src_covs_ptr)[correspondence.first];
123 | 
124 |     if (correspondence.second < 0) {
125 |       return thrust::make_tuple(0.0f, Eigen::Matrix<float, 6, 6>::Zero().eval(), Eigen::Matrix<float, 6, 1>::Zero().eval());
126 |     }
127 | 
128 |     int num_points = thrust::raw_pointer_cast(voxel_num_points_ptr)[correspondence.second];
129 |     const Eigen::Vector3f& mean_B = thrust::raw_pointer_cast(voxel_means_ptr)[correspondence.second];
130 |     const Eigen::Matrix3f& cov_B = thrust::raw_pointer_cast(voxel_covs_ptr)[correspondence.second];
131 | 
132 |     if (num_points <= 6) {
133 |       return thrust::make_tuple(0.0f, Eigen::Matrix<float, 6, 6>::Zero().eval(), Eigen::Matrix<float, 6, 1>::Zero().eval());
134 |     }
135 | 
136 |     const auto& trans_eval = *thrust::raw_pointer_cast(trans_eval_ptr);
137 |     const auto& trans = *thrust::raw_pointer_cast(trans_ptr);
138 | 
139 |     Eigen::Matrix3f R_eval = trans_eval.linear();
140 |     Eigen::Matrix3f R = trans.linear();
141 |     Eigen::Vector3f t = trans.translation();
142 | 
143 |     const Eigen::Vector3f transed_mean_A = R * mean_A + t;
144 | 
145 |     Eigen::Matrix3f RCR = R_eval * cov_A * R_eval.transpose();
146 |     Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse();
147 | 
148 |     Eigen::Vector3f error = mean_B - transed_mean_A;
149 | 
150 |     float w = cauchy(thrust::raw_pointer_cast(voxelmap_info_ptr)->voxel_resolution, error.norm());
151 |     float err = w * error.transpose() * RCR_inv * error;
152 | 
153 |     Eigen::Matrix<float, 3, 6> dtdx0;
154 |     dtdx0.block<3, 3>(0, 0) = skew_symmetric(transed_mean_A);
155 |     dtdx0.block<3, 3>(0, 3) = -Eigen::Matrix3f::Identity();
156 | 
157 |     Eigen::Matrix<float, 3, 6> J = dtdx0;
158 | 
159 |     Eigen::Matrix<float, 6, 6> H = w * J.transpose() * RCR_inv * J;
160 |     Eigen::Matrix<float, 6, 1> b = w * J.transpose() * RCR_inv * error;
161 | 
162 |     return thrust::make_tuple(err, H, b);
163 |   }
164 | 
165 |   thrust::device_ptr<const Eigen::Isometry3f> trans_eval_ptr;
166 |   thrust::device_ptr<const Eigen::Isometry3f> trans_ptr;
167 | 
168 |   thrust::device_ptr<const Eigen::Vector3f> src_means_ptr;
169 |   thrust::device_ptr<const Eigen::Matrix3f> src_covs_ptr;
170 | 
171 |   thrust::device_ptr<const VoxelMapInfo> voxelmap_info_ptr;
172 |   thrust::device_ptr<const int> voxel_num_points_ptr;
173 |   thrust::device_ptr<const Eigen::Vector3f> voxel_means_ptr;
174 |   thrust::device_ptr<const Eigen::Matrix3f> voxel_covs_ptr;
175 | };
176 | 
177 | struct sum_errors_kernel {
178 |   using Tuple = thrust::tuple<float, Eigen::Matrix<float, 6, 6>, Eigen::Matrix<float, 6, 1>>;
179 | 
180 |   __host__ __device__ Tuple operator()(const Tuple& lhs, const Tuple& rhs) {
181 |     return thrust::make_tuple(thrust::get<0>(lhs) + thrust::get<0>(rhs), thrust::get<1>(lhs) + thrust::get<1>(rhs), thrust::get<2>(lhs) + thrust::get<2>(rhs));
182 |   }
183 | };
184 | 
185 | }  // namespace
186 | 
187 | double p2d_ndt_compute_derivatives(
188 |   const GaussianVoxelMap& target_voxelmap,
189 |   const thrust::device_vector<Eigen::Vector3f>& source_points,
190 |   const thrust::device_vector<thrust::pair<int, int>>& correspondences,
191 |   const thrust::device_ptr<const Eigen::Isometry3f>& linearized_x_ptr,
192 |   const thrust::device_ptr<const Eigen::Isometry3f>& x_ptr,
193 |   Eigen::Matrix<double, 6, 6>* H,
194 |   Eigen::Matrix<double, 6, 1>* b) {
195 |   auto sum_errors = thrust::transform_reduce(
196 |     correspondences.begin(),
197 |     correspondences.end(),
198 |     p2d_ndt_compute_derivatives_kernel(target_voxelmap, source_points, linearized_x_ptr, x_ptr),
199 |     thrust::make_tuple(0.0f, Eigen::Matrix<float, 6, 6>::Zero().eval(), Eigen::Matrix<float, 6, 1>::Zero().eval()),
200 |     sum_errors_kernel());
201 | 
202 |   if (H && b) {
203 |     *H = thrust::get<1>(sum_errors).cast<double>();
204 |     *b = thrust::get<2>(sum_errors).cast<double>();
205 |   }
206 | 
207 |   return thrust::get<0>(sum_errors);
208 | }
209 | 
210 | double d2d_ndt_compute_derivatives(
211 |   const GaussianVoxelMap& target_voxelmap,
212 |   const GaussianVoxelMap& source_voxelmap,
213 |   const thrust::device_vector<thrust::pair<int, int>>& correspondences,
214 |   const thrust::device_ptr<const Eigen::Isometry3f>& linearized_x_ptr,
215 |   const thrust::device_ptr<const Eigen::Isometry3f>& x_ptr,
216 |   Eigen::Matrix<double, 6, 6>* H,
217 |   Eigen::Matrix<double, 6, 1>* b) {
218 |   auto sum_errors = thrust::transform_reduce(
219 |     correspondences.begin(),
220 |     correspondences.end(),
221 |     d2d_ndt_compute_derivatives_kernel(target_voxelmap, source_voxelmap, linearized_x_ptr, x_ptr),
222 |     thrust::make_tuple(0.0f, Eigen::Matrix<float, 6, 6>::Zero().eval(), Eigen::Matrix<float, 6, 1>::Zero().eval()),
223 |     sum_errors_kernel());
224 | 
225 |   if (H && b) {
226 |     *H = thrust::get<1>(sum_errors).cast<double>();
227 |     *b = thrust::get<2>(sum_errors).cast<double>();
228 |   }
229 | 
230 |   return thrust::get<0>(sum_errors);
231 | }
232 | 
233 | }  // namespace cuda
234 | }  // namespace fast_gicp
235 | 


--------------------------------------------------------------------------------
/src/fast_gicp/cuda/ndt_cuda.cu:
--------------------------------------------------------------------------------
  1 | #include <fast_gicp/cuda/ndt_cuda.cuh>
  2 | 
  3 | #include <thrust/device_vector.h>
  4 | 
  5 | #include <fast_gicp/cuda/gaussian_voxelmap.cuh>
  6 | #include <fast_gicp/cuda/covariance_regularization.cuh>
  7 | #include <fast_gicp/cuda/find_voxel_correspondences.cuh>
  8 | #include <fast_gicp/cuda/ndt_compute_derivatives.cuh>
  9 | 
 10 | namespace fast_gicp {
 11 | namespace cuda {
 12 | 
 13 | NDTCudaCore::NDTCudaCore() {
 14 |   cudaDeviceSynchronize();
 15 |   resolution = 1.0;
 16 |   linearized_x.setIdentity();
 17 | 
 18 |   offsets.reset(new thrust::device_vector<Eigen::Vector3i>(1));
 19 |   (*offsets)[0] = Eigen::Vector3i::Zero().eval();
 20 | 
 21 |   distance_mode = fast_gicp::NDTDistanceMode::D2D;
 22 |   set_neighbor_search_method(fast_gicp::NeighborSearchMethod::DIRECT7, 0.0);
 23 | }
 24 | 
 25 | NDTCudaCore::~NDTCudaCore() {}
 26 | 
 27 | void NDTCudaCore::set_distance_mode(fast_gicp::NDTDistanceMode mode) {
 28 |   this->distance_mode = mode;
 29 | }
 30 | 
 31 | void NDTCudaCore::set_resolution(double resolution) {
 32 |   this->resolution = resolution;
 33 | }
 34 | 
 35 | void NDTCudaCore::set_neighbor_search_method(fast_gicp::NeighborSearchMethod method, double radius) {
 36 |   thrust::host_vector<Eigen::Vector3i, Eigen::aligned_allocator<Eigen::Vector3i>> h_offsets;
 37 | 
 38 |   switch (method) {
 39 |     default:
 40 |       std::cerr << "here must not be reached" << std::endl;
 41 |       abort();
 42 | 
 43 |     case fast_gicp::NeighborSearchMethod::DIRECT1:
 44 |       h_offsets.resize(1);
 45 |       h_offsets[0] = Eigen::Vector3i::Zero();
 46 |       break;
 47 | 
 48 |     case fast_gicp::NeighborSearchMethod::DIRECT7:
 49 |       h_offsets.resize(7);
 50 |       h_offsets[0] = Eigen::Vector3i(0, 0, 0);
 51 |       h_offsets[1] = Eigen::Vector3i(1, 0, 0);
 52 |       h_offsets[2] = Eigen::Vector3i(-1, 0, 0);
 53 |       h_offsets[3] = Eigen::Vector3i(0, 1, 0);
 54 |       h_offsets[4] = Eigen::Vector3i(0, -1, 0);
 55 |       h_offsets[5] = Eigen::Vector3i(0, 0, 1);
 56 |       h_offsets[6] = Eigen::Vector3i(0, 0, -1);
 57 |       break;
 58 | 
 59 |     case fast_gicp::NeighborSearchMethod::DIRECT27:
 60 |       h_offsets.reserve(27);
 61 |       for (int i = 0; i < 3; i++) {
 62 |         for (int j = 0; j < 3; j++) {
 63 |           for (int k = 0; k < 3; k++) {
 64 |             h_offsets.push_back(Eigen::Vector3i(i - 1, j - 1, k - 1));
 65 |           }
 66 |         }
 67 |       }
 68 |       break;
 69 | 
 70 |     case fast_gicp::NeighborSearchMethod::DIRECT_RADIUS:
 71 |       h_offsets.reserve(50);
 72 |       int range = std::ceil(radius);
 73 |       for (int i = -range; i <= range; i++) {
 74 |         for (int j = -range; j <= range; j++) {
 75 |           for (int k = -range; k <= range; k++) {
 76 |             Eigen::Vector3i offset(i, j, k);
 77 |             if (offset.cast<double>().norm() <= radius + 1e-3) {
 78 |               h_offsets.push_back(offset);
 79 |             }
 80 |           }
 81 |         }
 82 |       }
 83 | 
 84 |       break;
 85 |   }
 86 | 
 87 |   *offsets = h_offsets;
 88 | }
 89 | 
 90 | void NDTCudaCore::swap_source_and_target() {
 91 |   source_points.swap(target_points);
 92 |   source_voxelmap.swap(target_voxelmap);
 93 | }
 94 | 
 95 | void NDTCudaCore::set_source_cloud(const std::vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>>& cloud) {
 96 |   thrust::host_vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>> points(cloud.begin(), cloud.end());
 97 |   if (!source_points) {
 98 |     source_points.reset(new Points());
 99 |   }
100 | 
101 |   *source_points = points;
102 |   source_voxelmap.reset();
103 | }
104 | 
105 | void NDTCudaCore::set_target_cloud(const std::vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>>& cloud) {
106 |   thrust::host_vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f>> points(cloud.begin(), cloud.end());
107 |   if (!target_points) {
108 |     target_points.reset(new Points());
109 |   }
110 | 
111 |   *target_points = points;
112 |   target_voxelmap.reset();
113 | }
114 | 
115 | void NDTCudaCore::create_voxelmaps() {
116 |   create_source_voxelmap();
117 |   create_target_voxelmap();
118 | }
119 | 
120 | void NDTCudaCore::create_source_voxelmap() {
121 |   assert(source_points);
122 |   if (source_voxelmap || distance_mode == fast_gicp::NDTDistanceMode::P2D) {
123 |     return;
124 |   }
125 | 
126 |   source_voxelmap.reset(new GaussianVoxelMap(resolution));
127 |   source_voxelmap->create_voxelmap(*source_points);
128 |   covariance_regularization(source_voxelmap->voxel_means, source_voxelmap->voxel_covs, fast_gicp::RegularizationMethod::MIN_EIG);
129 | }
130 | 
131 | void NDTCudaCore::create_target_voxelmap() {
132 |   assert(target_points);
133 |   if (target_voxelmap) {
134 |     return;
135 |   }
136 | 
137 |   target_voxelmap.reset(new GaussianVoxelMap(resolution));
138 |   target_voxelmap->create_voxelmap(*target_points);
139 |   covariance_regularization(target_voxelmap->voxel_means, target_voxelmap->voxel_covs, fast_gicp::RegularizationMethod::MIN_EIG);
140 | }
141 | 
142 | void NDTCudaCore::update_correspondences(const Eigen::Isometry3d& trans) {
143 |   thrust::device_vector<Eigen::Isometry3f> trans_ptr(1);
144 |   trans_ptr[0] = trans.cast<float>();
145 | 
146 |   if (correspondences == nullptr) {
147 |     correspondences.reset(new Correspondences());
148 |   }
149 |   linearized_x = trans.cast<float>();
150 | 
151 |   switch (distance_mode) {
152 |     case fast_gicp::NDTDistanceMode::P2D:
153 |       find_voxel_correspondences(*source_points, *target_voxelmap, trans_ptr.data(), *offsets, *correspondences);
154 |       break;
155 | 
156 |     case fast_gicp::NDTDistanceMode::D2D:
157 |       find_voxel_correspondences(source_voxelmap->voxel_means, *target_voxelmap, trans_ptr.data(), *offsets, *correspondences);
158 |       break;
159 |   }
160 | }
161 | 
162 | double NDTCudaCore::compute_error(const Eigen::Isometry3d& trans, Eigen::Matrix<double, 6, 6>* H, Eigen::Matrix<double, 6, 1>* b) const {
163 |   thrust::host_vector<Eigen::Isometry3f, Eigen::aligned_allocator<Eigen::Isometry3f>> trans_(2);
164 |   trans_[0] = linearized_x;
165 |   trans_[1] = trans.cast<float>();
166 | 
167 |   thrust::device_vector<Eigen::Isometry3f> trans_ptr = trans_;
168 | 
169 |   switch (distance_mode) {
170 |     default:
171 |     case fast_gicp::NDTDistanceMode::P2D:
172 |       return p2d_ndt_compute_derivatives(*target_voxelmap, *source_points, *correspondences, trans_ptr.data(), trans_ptr.data() + 1, H, b);
173 | 
174 |     case fast_gicp::NDTDistanceMode::D2D:
175 |       return d2d_ndt_compute_derivatives(*target_voxelmap, *source_voxelmap, *correspondences, trans_ptr.data(), trans_ptr.data() + 1, H, b);
176 |   }
177 | }
178 | 
179 | }  // namespace cuda
180 | 
181 | }  // namespace fast_gicp


--------------------------------------------------------------------------------
/src/fast_gicp/gicp/experimental/fast_gicp_mp.cpp:
--------------------------------------------------------------------------------
1 | #include <fast_gicp/gicp/experimental/fast_gicp_mp.hpp>
2 | #include <fast_gicp/gicp/experimental/fast_gicp_mp_impl.hpp>
3 | 
4 | // template class fast_gicp::FastGICPMultiPoints<pcl::PointXYZ, pcl::PointXYZ>;
5 | template class fast_gicp::FastGICPMultiPoints<pcl::PointXYZI, pcl::PointXYZI>;
6 | 


--------------------------------------------------------------------------------
/src/fast_gicp/gicp/fast_gicp.cpp:
--------------------------------------------------------------------------------
1 | #include <fast_gicp/gicp/fast_gicp.hpp>
2 | #include <fast_gicp/gicp/impl/fast_gicp_impl.hpp>
3 | 
4 | template class fast_gicp::FastGICP<pcl::PointXYZ, pcl::PointXYZ>;
5 | template class fast_gicp::FastGICP<pcl::PointXYZI, pcl::PointXYZI>;
6 | template class fast_gicp::FastGICP<pcl::PointNormal, pcl::PointNormal>;
7 | template class fast_gicp::FastGICP<pcl::PointXYZRGB, pcl::PointXYZRGB>;


--------------------------------------------------------------------------------
/src/fast_gicp/gicp/fast_gicp_st.cpp:
--------------------------------------------------------------------------------
1 | #include <fast_gicp/gicp/fast_gicp_st.hpp>
2 | #include <fast_gicp/gicp/impl/fast_gicp_st_impl.hpp>
3 | 
4 | template class fast_gicp::FastGICPSingleThread<pcl::PointXYZ, pcl::PointXYZ>;
5 | template class fast_gicp::FastGICPSingleThread<pcl::PointXYZI, pcl::PointXYZI>;
6 | template class fast_gicp::FastGICPSingleThread<pcl::PointNormal, pcl::PointNormal>;
7 | 


--------------------------------------------------------------------------------
/src/fast_gicp/gicp/fast_vgicp.cpp:
--------------------------------------------------------------------------------
1 | #include <fast_gicp/gicp/fast_vgicp.hpp>
2 | #include <fast_gicp/gicp/impl/fast_vgicp_impl.hpp>
3 | 
4 | template class fast_gicp::FastVGICP<pcl::PointXYZ, pcl::PointXYZ>;
5 | template class fast_gicp::FastVGICP<pcl::PointXYZI, pcl::PointXYZI>;
6 | template class fast_gicp::FastVGICP<pcl::PointNormal, pcl::PointNormal>;
7 | template class fast_gicp::FastVGICP<pcl::PointXYZRGB, pcl::PointXYZRGB>;


--------------------------------------------------------------------------------
/src/fast_gicp/gicp/fast_vgicp_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include <fast_gicp/gicp/fast_vgicp_cuda.hpp>
2 | #include <fast_gicp/gicp/impl/fast_vgicp_cuda_impl.hpp>
3 | 
4 | template class fast_gicp::FastVGICPCuda<pcl::PointXYZ, pcl::PointXYZ>;
5 | template class fast_gicp::FastVGICPCuda<pcl::PointXYZI, pcl::PointXYZI>;
6 | template class fast_gicp::FastVGICPCuda<pcl::PointNormal, pcl::PointNormal>;
7 | template class fast_gicp::FastVGICPCuda<pcl::PointXYZRGB, pcl::PointXYZRGB>;


--------------------------------------------------------------------------------
/src/fast_gicp/gicp/lsq_registration.cpp:
--------------------------------------------------------------------------------
1 | #include <fast_gicp/gicp/lsq_registration.hpp>
2 | #include <fast_gicp/gicp/impl/lsq_registration_impl.hpp>
3 | 
4 | template class fast_gicp::LsqRegistration<pcl::PointXYZ, pcl::PointXYZ>;
5 | template class fast_gicp::LsqRegistration<pcl::PointXYZI, pcl::PointXYZI>;
6 | template class fast_gicp::LsqRegistration<pcl::PointNormal, pcl::PointNormal>;
7 | template class fast_gicp::LsqRegistration<pcl::PointXYZRGB, pcl::PointXYZRGB>;


--------------------------------------------------------------------------------
/src/fast_gicp/ndt/ndt_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include <fast_gicp/ndt/ndt_cuda.hpp>
2 | #include <fast_gicp/ndt/impl/ndt_cuda_impl.hpp>
3 | 
4 | template class fast_gicp::NDTCuda<pcl::PointXYZ, pcl::PointXYZ>;
5 | template class fast_gicp::NDTCuda<pcl::PointXYZI, pcl::PointXYZI>;
6 | template class fast_gicp::NDTCuda<pcl::PointNormal, pcl::PointNormal>;
7 | 


--------------------------------------------------------------------------------
/src/kitti.cpp:
--------------------------------------------------------------------------------
  1 | #include <chrono>
  2 | #include <iostream>
  3 | #include <boost/format.hpp>
  4 | #include <boost/filesystem.hpp>
  5 | #include <boost/circular_buffer.hpp>
  6 | 
  7 | #include <pcl/point_types.h>
  8 | #include <pcl/point_cloud.h>
  9 | #include <pcl/filters/approximate_voxel_grid.h>
 10 | 
 11 | #include <pcl/visualization/pcl_visualizer.h>
 12 | #include <pcl/visualization/point_cloud_color_handlers.h>
 13 | 
 14 | #include <fast_gicp/gicp/fast_gicp.hpp>
 15 | #include <fast_gicp/gicp/fast_vgicp.hpp>
 16 | 
 17 | #ifdef USE_VGICP_CUDA
 18 | #include <fast_gicp/ndt/ndt_cuda.hpp>
 19 | #include <fast_gicp/gicp/fast_vgicp_cuda.hpp>
 20 | #endif
 21 | 
 22 | class KittiLoader {
 23 | public:
 24 |   KittiLoader(const std::string& dataset_path) : dataset_path(dataset_path) {
 25 |     for (num_frames = 0;; num_frames++) {
 26 |       std::string filename = (boost::format("%s/%06d.bin") % dataset_path % num_frames).str();
 27 |       if (!boost::filesystem::exists(filename)) {
 28 |         break;
 29 |       }
 30 |     }
 31 | 
 32 |     if (num_frames == 0) {
 33 |       std::cerr << "error: no files in " << dataset_path << std::endl;
 34 |     }
 35 |   }
 36 |   ~KittiLoader() {}
 37 | 
 38 |   size_t size() const { return num_frames; }
 39 | 
 40 |   pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud(size_t i) const {
 41 |     std::string filename = (boost::format("%s/%06d.bin") % dataset_path % i).str();
 42 |     FILE* file = fopen(filename.c_str(), "rb");
 43 |     if (!file) {
 44 |       std::cerr << "error: failed to load " << filename << std::endl;
 45 |       return nullptr;
 46 |     }
 47 | 
 48 |     std::vector<float> buffer(1000000);
 49 |     size_t num_points = fread(reinterpret_cast<char*>(buffer.data()), sizeof(float), buffer.size(), file) / 4;
 50 |     fclose(file);
 51 | 
 52 |     pcl::PointCloud<pcl::PointXYZ>::Ptr cloud(new pcl::PointCloud<pcl::PointXYZ>());
 53 |     cloud->resize(num_points);
 54 | 
 55 |     for (int i = 0; i < num_points; i++) {
 56 |       auto& pt = cloud->at(i);
 57 |       pt.x = buffer[i * 4];
 58 |       pt.y = buffer[i * 4 + 1];
 59 |       pt.z = buffer[i * 4 + 2];
 60 |       // pt.intensity = buffer[i * 4 + 3];
 61 |     }
 62 | 
 63 |     return cloud;
 64 |   }
 65 | 
 66 | private:
 67 |   int num_frames;
 68 |   std::string dataset_path;
 69 | };
 70 | 
 71 | int main(int argc, char** argv) {
 72 |   if (argc < 2) {
 73 |     std::cout << "usage: gicp_kitti /your/kitti/path/sequences/00/velodyne" << std::endl;
 74 |     return 0;
 75 |   }
 76 | 
 77 |   KittiLoader kitti(argv[1]);
 78 | 
 79 |   // use downsample_resolution=1.0 for fast registration
 80 |   double downsample_resolution = 0.25;
 81 |   pcl::ApproximateVoxelGrid<pcl::PointXYZ> voxelgrid;
 82 |   voxelgrid.setLeafSize(downsample_resolution, downsample_resolution, downsample_resolution);
 83 | 
 84 |   // registration method
 85 |   // you should fine-tune hyper-parameters (e.g., voxel resolution, max correspondence distance) for the best result
 86 |   fast_gicp::FastGICP<pcl::PointXYZ, pcl::PointXYZ> gicp;
 87 |   // fast_gicp::FastVGICP<pcl::PointXYZ, pcl::PointXYZ> gicp;
 88 |   // fast_gicp::FastVGICPCuda<pcl::PointXYZ, pcl::PointXYZ> gicp;
 89 |   // fast_gicp::NDTCuda<pcl::PointXYZ, pcl::PointXYZ> gicp;
 90 |   // gicp.setResolution(1.0);
 91 |   // gicp.setNearestNeighborSearchMethod(fast_gicp::NearestNeighborMethod::GPU_RBF_KERNEL);
 92 |   gicp.setMaxCorrespondenceDistance(1.0);
 93 | 
 94 |   // set initial frame as target
 95 |   voxelgrid.setInputCloud(kitti.cloud(0));
 96 |   pcl::PointCloud<pcl::PointXYZ>::Ptr target(new pcl::PointCloud<pcl::PointXYZ>);
 97 |   voxelgrid.filter(*target);
 98 |   gicp.setInputTarget(target);
 99 | 
100 |   // sensor pose sequence
101 |   std::vector<Eigen::Isometry3d, Eigen::aligned_allocator<Eigen::Isometry3d>> poses(kitti.size());
102 |   poses[0].setIdentity();
103 | 
104 |   // trajectory for visualization
105 |   pcl::PointCloud<pcl::PointXYZ>::Ptr trajectory(new pcl::PointCloud<pcl::PointXYZ>);
106 |   trajectory->push_back(pcl::PointXYZ(0.0f, 0.0f, 0.0f));
107 | 
108 |   pcl::visualization::PCLVisualizer vis;
109 |   vis.addPointCloud<pcl::PointXYZ>(trajectory, "trajectory");
110 | 
111 |   // for calculating FPS
112 |   boost::circular_buffer<std::chrono::high_resolution_clock::time_point> stamps(30);
113 |   stamps.push_back(std::chrono::high_resolution_clock::now());
114 | 
115 |   for (int i = 1; i < kitti.size(); i++) {
116 |     // set the current frame as source
117 |     voxelgrid.setInputCloud(kitti.cloud(i));
118 |     pcl::PointCloud<pcl::PointXYZ>::Ptr source(new pcl::PointCloud<pcl::PointXYZ>);
119 |     voxelgrid.filter(*source);
120 |     gicp.setInputSource(source);
121 | 
122 |     // align and swap source and target cloud for next registration
123 |     pcl::PointCloud<pcl::PointXYZ>::Ptr aligned(new pcl::PointCloud<pcl::PointXYZ>);
124 |     gicp.align(*aligned);
125 |     gicp.swapSourceAndTarget();
126 | 
127 |     // accumulate pose
128 |     poses[i] = poses[i - 1] * gicp.getFinalTransformation().cast<double>();
129 | 
130 |     // FPS display
131 |     stamps.push_back(std::chrono::high_resolution_clock::now());
132 |     std::cout << stamps.size() / (std::chrono::duration_cast<std::chrono::nanoseconds>(stamps.back() - stamps.front()).count() / 1e9) << "fps" << std::endl;
133 | 
134 |     // visualization
135 |     trajectory->push_back(pcl::PointXYZ(poses[i](0, 3), poses[i](1, 3), poses[i](2, 3)));
136 |     vis.updatePointCloud<pcl::PointXYZ>(trajectory, pcl::visualization::PointCloudColorHandlerCustom<pcl::PointXYZ>(trajectory, 255.0, 0.0, 0.0), "trajectory");
137 |     vis.spinOnce();
138 |   }
139 | 
140 |   // save the estimated poses
141 |   std::ofstream ofs("/tmp/traj.txt");
142 |   for (const auto& pose : poses) {
143 |     for (int i = 0; i < 3; i++) {
144 |       for (int j = 0; j < 4; j++) {
145 |         if (i || j) {
146 |           ofs << " ";
147 |         }
148 | 
149 |         ofs << pose(i, j);
150 |       }
151 |     }
152 |     ofs << std::endl;
153 |   }
154 | 
155 |   return 0;
156 | }


--------------------------------------------------------------------------------
/src/kitti.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | import os
 3 | import sys
 4 | import time
 5 | import numpy
 6 | import pygicp
 7 | from matplotlib import pyplot
 8 | 
 9 | def main():
10 | 	if len(sys.argv) < 2:
11 | 		print('usage: kitti.py /path/to/kitti/sequences/00/velodyne')
12 | 		return
13 | 
14 | 	# List input files
15 | 	seq_path = sys.argv[1]
16 | 	filenames = sorted([seq_path + '/' + x for x in os.listdir(seq_path) if x.endswith('.bin')])
17 | 
18 | 	# You can choose any of FastGICP, FastVGICP, FastVGICPCuda, or NDTCuda
19 | 	reg = pygicp.FastGICP()
20 | 
21 | 	# pygicp classes have the same interface as the C++ version
22 | 	# reg.set_num_threads(8)
23 | 	# reg.set_max_correspondence_distance(2.0)
24 | 
25 | 	stamps = [time.time()]		# for FPS calculation
26 | 	poses = [numpy.identity(4)]	# sensor trajectory
27 | 
28 | 	for i, filename in enumerate(filenames):
29 | 		# Read and downsample input cloud
30 | 		points = numpy.fromfile(filename, dtype=numpy.float32).reshape(-1, 4)[:, :3]
31 | 		points = pygicp.downsample(points, 0.25)
32 | 
33 | 		if i == 0:
34 | 			reg.set_input_target(points)
35 | 			delta = numpy.identity(4)
36 | 		else:
37 | 			reg.set_input_source(points)
38 | 			delta = reg.align()
39 | 			reg.swap_source_and_target()
40 | 
41 | 		# Accumulate the delta to compute the full sensor trajectory
42 | 		poses.append(poses[-1].dot(delta))
43 | 
44 | 		# FPS calculation for the last ten frames
45 | 		stamps = stamps[-9:] + [time.time()]
46 | 		print('fps:%.3f' % (len(stamps) / (stamps[-1] - stamps[0])))
47 | 
48 | 		# Plot the estimated trajectory
49 | 		traj = numpy.array([x[:3, 3] for x in poses])
50 | 
51 | 		if i % 30 == 0:
52 | 			pyplot.clf()
53 | 			pyplot.plot(traj[:, 0], traj[:, 1])
54 | 			pyplot.axis('equal')
55 | 			pyplot.pause(0.01)
56 | 
57 | 
58 | if __name__ == '__main__':
59 | 	main()
60 | 


--------------------------------------------------------------------------------
/src/python/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <pybind11/pybind11.h>
  2 | #include <pybind11/numpy.h>
  3 | #include <pybind11/eigen.h>
  4 | #include <pybind11/functional.h>
  5 | 
  6 | #include <boost/filesystem.hpp>
  7 | #include <fast_gicp/gicp/fast_gicp.hpp>
  8 | #include <fast_gicp/gicp/fast_vgicp.hpp>
  9 | 
 10 | #ifdef USE_VGICP_CUDA
 11 | #include <fast_gicp/ndt/ndt_cuda.hpp>
 12 | #include <fast_gicp/gicp/fast_vgicp_cuda.hpp>
 13 | #endif
 14 | 
 15 | #include <pcl/point_types.h>
 16 | #include <pcl/point_cloud.h>
 17 | #include <pcl/filters/approximate_voxel_grid.h>
 18 | 
 19 | namespace py = pybind11;
 20 | 
 21 | fast_gicp::NeighborSearchMethod search_method(const std::string& neighbor_search_method) {
 22 |   if(neighbor_search_method == "DIRECT1") {
 23 |     return fast_gicp::NeighborSearchMethod::DIRECT1;
 24 |   } else if (neighbor_search_method == "DIRECT7") {
 25 |     return fast_gicp::NeighborSearchMethod::DIRECT7;
 26 |   } else if (neighbor_search_method == "DIRECT27") {
 27 |     return fast_gicp::NeighborSearchMethod::DIRECT27;
 28 |   } else if (neighbor_search_method == "DIRECT_RADIUS") {
 29 |     return fast_gicp::NeighborSearchMethod::DIRECT_RADIUS;
 30 |   }
 31 | 
 32 |   std::cerr << "error: unknown neighbor search method " << neighbor_search_method << std::endl;
 33 |   return fast_gicp::NeighborSearchMethod::DIRECT1;
 34 | }
 35 | 
 36 | pcl::PointCloud<pcl::PointXYZ>::Ptr eigen2pcl(const Eigen::Matrix<double, -1, 3>& points) {
 37 |   pcl::PointCloud<pcl::PointXYZ>::Ptr cloud(new pcl::PointCloud<pcl::PointXYZ>);
 38 |   cloud->resize(points.rows());
 39 | 
 40 |   for(int i=0; i<points.rows(); i++) {
 41 |     cloud->at(i).getVector3fMap() = points.row(i).cast<float>();
 42 |   }
 43 |   return cloud;
 44 | }
 45 | 
 46 | Eigen::Matrix<double, -1, 3> downsample(const Eigen::Matrix<double, -1, 3>& points, double downsample_resolution) {
 47 |   auto cloud = eigen2pcl(points);
 48 | 
 49 |   pcl::ApproximateVoxelGrid<pcl::PointXYZ> voxelgrid;
 50 |   voxelgrid.setLeafSize(downsample_resolution, downsample_resolution, downsample_resolution);
 51 |   voxelgrid.setInputCloud(cloud);
 52 | 
 53 |   pcl::PointCloud<pcl::PointXYZ>::Ptr filtered(new pcl::PointCloud<pcl::PointXYZ>);
 54 |   voxelgrid.filter(*filtered);
 55 | 
 56 |   Eigen::Matrix<float, -1, 3> filtered_points(filtered->size(), 3);
 57 |   for(int i=0; i<filtered->size(); i++) {
 58 |     filtered_points.row(i) = filtered->at(i).getVector3fMap();
 59 |   }
 60 | 
 61 |   return filtered_points.cast<double>();
 62 | }
 63 | 
 64 | Eigen::Matrix4d align_points(
 65 |   const Eigen::Matrix<double, -1, 3>& target,
 66 |   const Eigen::Matrix<double, -1, 3>& source,
 67 |   const std::string& method,
 68 |   double downsample_resolution,
 69 |   int k_correspondences,
 70 |   double max_correspondence_distance,
 71 |   double voxel_resolution,
 72 |   int num_threads,
 73 |   const std::string& neighbor_search_method,
 74 |   double neighbor_search_radius,
 75 |   const Eigen::Matrix4f& initial_guess
 76 | ) {
 77 |   pcl::PointCloud<pcl::PointXYZ>::Ptr target_cloud = eigen2pcl(target);
 78 |   pcl::PointCloud<pcl::PointXYZ>::Ptr source_cloud = eigen2pcl(source);
 79 | 
 80 |   if(downsample_resolution > 0.0) {
 81 |     pcl::ApproximateVoxelGrid<pcl::PointXYZ> voxelgrid;
 82 |     voxelgrid.setLeafSize(downsample_resolution, downsample_resolution, downsample_resolution);
 83 | 
 84 |     pcl::PointCloud<pcl::PointXYZ>::Ptr filtered(new pcl::PointCloud<pcl::PointXYZ>);
 85 |     voxelgrid.setInputCloud(target_cloud);
 86 |     voxelgrid.filter(*filtered);
 87 |     target_cloud.swap(filtered);
 88 | 
 89 |     voxelgrid.setInputCloud(source_cloud);
 90 |     voxelgrid.filter(*filtered);
 91 |     source_cloud.swap(filtered);
 92 |   }
 93 | 
 94 |   std::shared_ptr<fast_gicp::LsqRegistration<pcl::PointXYZ, pcl::PointXYZ>> reg;
 95 | 
 96 |   if(method == "GICP") {
 97 |     std::shared_ptr<fast_gicp::FastGICP<pcl::PointXYZ, pcl::PointXYZ>> gicp(new fast_gicp::FastGICP<pcl::PointXYZ, pcl::PointXYZ>);
 98 |     gicp->setMaxCorrespondenceDistance(max_correspondence_distance);
 99 |     gicp->setCorrespondenceRandomness(k_correspondences);
100 |     gicp->setNumThreads(num_threads);
101 |     reg = gicp;
102 |   } else if (method == "VGICP") {
103 |     std::shared_ptr<fast_gicp::FastVGICP<pcl::PointXYZ, pcl::PointXYZ>> vgicp(new fast_gicp::FastVGICP<pcl::PointXYZ, pcl::PointXYZ>);
104 |     vgicp->setCorrespondenceRandomness(k_correspondences);
105 |     vgicp->setResolution(voxel_resolution);
106 |     vgicp->setNeighborSearchMethod(search_method(neighbor_search_method));
107 |     vgicp->setNumThreads(num_threads);
108 |     reg = vgicp;
109 |   } else if (method == "VGICP_CUDA") {
110 | #ifdef USE_VGICP_CUDA
111 |     std::shared_ptr<fast_gicp::FastVGICPCuda<pcl::PointXYZ, pcl::PointXYZ>> vgicp(new fast_gicp::FastVGICPCuda<pcl::PointXYZ, pcl::PointXYZ>);
112 |     vgicp->setCorrespondenceRandomness(k_correspondences);
113 |     vgicp->setNeighborSearchMethod(search_method(neighbor_search_method), neighbor_search_radius);
114 |     vgicp->setResolution(voxel_resolution);
115 |     reg = vgicp;
116 | #else
117 |     std::cerr << "error: you need to build fast_gicp with BUILD_VGICP_CUDA=ON" << std::endl;
118 |     return Eigen::Matrix4d::Identity();
119 | #endif
120 |   } else if (method == "NDT_CUDA") {
121 | #ifdef USE_VGICP_CUDA
122 |     std::shared_ptr<fast_gicp::NDTCuda<pcl::PointXYZ, pcl::PointXYZ>> ndt(new fast_gicp::NDTCuda<pcl::PointXYZ, pcl::PointXYZ>);
123 |     ndt->setResolution(voxel_resolution);
124 |     ndt->setNeighborSearchMethod(search_method(neighbor_search_method), neighbor_search_radius);
125 |     reg = ndt;
126 | #else
127 |     std::cerr << "error: you need to build fast_gicp with BUILD_VGICP_CUDA=ON" << std::endl;
128 |     return Eigen::Matrix4d::Identity();
129 | #endif
130 |   } else {
131 |     std::cerr << "error: unknown registration method " << method << std::endl;
132 |     return Eigen::Matrix4d::Identity();
133 |   }
134 | 
135 |   reg->setInputTarget(target_cloud);
136 |   reg->setInputSource(source_cloud);
137 | 
138 |   pcl::PointCloud<pcl::PointXYZ>::Ptr aligned(new pcl::PointCloud<pcl::PointXYZ>);
139 |   reg->align(*aligned, initial_guess);
140 | 
141 |   return reg->getFinalTransformation().cast<double>();
142 | }
143 | 
144 | using LsqRegistration = fast_gicp::LsqRegistration<pcl::PointXYZ, pcl::PointXYZ>;
145 | using FastGICP = fast_gicp::FastGICP<pcl::PointXYZ, pcl::PointXYZ>;
146 | using FastVGICP = fast_gicp::FastVGICP<pcl::PointXYZ, pcl::PointXYZ>;
147 | #ifdef USE_VGICP_CUDA
148 | using FastVGICPCuda = fast_gicp::FastVGICPCuda<pcl::PointXYZ, pcl::PointXYZ>;
149 | using NDTCuda = fast_gicp::NDTCuda<pcl::PointXYZ, pcl::PointXYZ>;
150 | #endif
151 | 
152 | PYBIND11_MODULE(pygicp, m) {
153 |   m.def("downsample", &downsample, "downsample points");
154 | 
155 |   m.def("align_points", &align_points, "align two point sets",
156 |     py::arg("target"),
157 |     py::arg("source"),
158 |     py::arg("method") = "GICP",
159 |     py::arg("downsample_resolution") = -1.0,
160 |     py::arg("k_correspondences") = 15,
161 |     py::arg("max_correspondence_distance") = std::numeric_limits<double>::max(),
162 |     py::arg("voxel_resolution") = 1.0,
163 |     py::arg("num_threads") = 0,
164 |     py::arg("neighbor_search_method") = "DIRECT1",
165 |     py::arg("neighbor_search_radius") = 1.5,
166 |     py::arg("initial_guess") = Eigen::Matrix4f::Identity()
167 |   );
168 | 
169 |   py::class_<LsqRegistration, std::shared_ptr<LsqRegistration>>(m, "LsqRegistration")
170 |     .def("set_input_target", [] (LsqRegistration& reg, const Eigen::Matrix<double, -1, 3>& points) { reg.setInputTarget(eigen2pcl(points)); })
171 |     .def("set_input_source", [] (LsqRegistration& reg, const Eigen::Matrix<double, -1, 3>& points) { reg.setInputSource(eigen2pcl(points)); })
172 |     .def("swap_source_and_target", &LsqRegistration::swapSourceAndTarget)
173 |     .def("get_final_hessian", &LsqRegistration::getFinalHessian)
174 |     .def("get_final_transformation", &LsqRegistration::getFinalTransformation)
175 |     .def("get_fitness_score", [] (LsqRegistration& reg, const double max_range) { return reg.getFitnessScore(max_range); })
176 |     .def("align",
177 |       [] (LsqRegistration& reg, const Eigen::Matrix4f& initial_guess) { 
178 |         pcl::PointCloud<pcl::PointXYZ> aligned;
179 |         reg.align(aligned, initial_guess);
180 |         return reg.getFinalTransformation();
181 |       }, py::arg("initial_guess") = Eigen::Matrix4f::Identity()
182 |     )
183 |   ;
184 | 
185 |   py::class_<FastGICP, LsqRegistration, std::shared_ptr<FastGICP>>(m, "FastGICP")
186 |     .def(py::init())
187 |     .def("set_num_threads", &FastGICP::setNumThreads)
188 |     .def("set_correspondence_randomness", &FastGICP::setCorrespondenceRandomness)
189 |     .def("set_max_correspondence_distance", &FastGICP::setMaxCorrespondenceDistance)
190 |   ;
191 | 
192 |   py::class_<FastVGICP, FastGICP, std::shared_ptr<FastVGICP>>(m, "FastVGICP")
193 |     .def(py::init())
194 |     .def("set_resolution", &FastVGICP::setResolution)
195 |     .def("set_neighbor_search_method", [](FastVGICP& vgicp, const std::string& method) { vgicp.setNeighborSearchMethod(search_method(method)); })
196 |   ;
197 | 
198 | #ifdef USE_VGICP_CUDA
199 |   py::class_<FastVGICPCuda, LsqRegistration, std::shared_ptr<FastVGICPCuda>>(m, "FastVGICPCuda")
200 |     .def(py::init())
201 |     .def("set_resolution", &FastVGICPCuda::setResolution)
202 |     .def("set_neighbor_search_method",
203 |       [](FastVGICPCuda& vgicp, const std::string& method, double radius) { vgicp.setNeighborSearchMethod(search_method(method), radius); }
204 |       , py::arg("method") = "DIRECT1", py::arg("radius") = 1.5
205 |     )
206 |     .def("set_correspondence_randomness", &FastVGICPCuda::setCorrespondenceRandomness)
207 |   ;
208 | 
209 |   py::class_<NDTCuda, LsqRegistration, std::shared_ptr<NDTCuda>>(m, "NDTCuda")
210 |     .def(py::init())
211 |     .def("set_neighbor_search_method",
212 |       [](NDTCuda& ndt, const std::string& method, double radius) { ndt.setNeighborSearchMethod(search_method(method), radius); }
213 |       , py::arg("method") = "DIRECT1", py::arg("radius") = 1.5
214 |     )
215 |     .def("set_resolution", &NDTCuda::setResolution)
216 |   ;
217 | #endif
218 | 
219 | #ifdef VERSION_INFO
220 |   m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO);
221 | #else
222 |   m.attr("__version__") = "dev";
223 | #endif
224 | }


--------------------------------------------------------------------------------
/src/test/gicp_test.cpp:
--------------------------------------------------------------------------------
  1 | #include <vector>
  2 | #include <sstream>
  3 | #include <iostream>
  4 | #include <gtest/gtest.h>
  5 | 
  6 | #include <pcl/io/pcd_io.h>
  7 | #include <pcl/point_types.h>
  8 | #include <pcl/point_cloud.h>
  9 | #include <pcl/registration/gicp.h>
 10 | #include <pcl/filters/voxel_grid.h>
 11 | 
 12 | #include <fast_gicp/gicp/fast_gicp.hpp>
 13 | #include <fast_gicp/gicp/fast_gicp_st.hpp>
 14 | #include <fast_gicp/gicp/fast_vgicp.hpp>
 15 | #ifdef USE_VGICP_CUDA
 16 | #include <fast_gicp/ndt/ndt_cuda.hpp>
 17 | #include <fast_gicp/gicp/fast_vgicp_cuda.hpp>
 18 | #endif
 19 | 
 20 | struct GICPTestBase : public testing::Test {
 21 | public:
 22 |   EIGEN_MAKE_ALIGNED_OPERATOR_NEW
 23 |   using PointCloudConstPtr = pcl::PointCloud<pcl::PointXYZ>::ConstPtr;
 24 | 
 25 |   GICPTestBase() {}
 26 | 
 27 |   virtual void SetUp() {
 28 |     if (!load(data_directory)) {
 29 |       exit(1);
 30 |     }
 31 |   }
 32 | 
 33 |   bool load(const std::string& data_directory) {
 34 |     relative_pose.setIdentity();
 35 | 
 36 |     std::ifstream ifs(data_directory + "/relative.txt");
 37 |     if (!ifs) {
 38 |       return false;
 39 |     }
 40 | 
 41 |     for (int i = 0; i < 4; i++) {
 42 |       for (int j = 0; j < 4; j++) {
 43 |         ifs >> relative_pose(i, j);
 44 |       }
 45 |     }
 46 | 
 47 |     auto target = pcl::make_shared<pcl::PointCloud<pcl::PointXYZ>>();
 48 |     auto source = pcl::make_shared<pcl::PointCloud<pcl::PointXYZ>>();
 49 |     pcl::io::loadPCDFile(data_directory + "/251370668.pcd", *target);
 50 |     pcl::io::loadPCDFile(data_directory + "/251371071.pcd", *source);
 51 |     if (target->empty() || source->empty()) {
 52 |       return true;
 53 |     }
 54 | 
 55 |     pcl::VoxelGrid<pcl::PointXYZ> voxelgrid;
 56 |     voxelgrid.setLeafSize(0.2, 0.2, 0.2);
 57 | 
 58 |     auto filtered = pcl::make_shared<pcl::PointCloud<pcl::PointXYZ>>();
 59 |     voxelgrid.setInputCloud(target);
 60 |     voxelgrid.filter(*filtered);
 61 |     filtered.swap(target);
 62 | 
 63 |     voxelgrid.setInputCloud(source);
 64 |     voxelgrid.filter(*filtered);
 65 |     filtered.swap(source);
 66 | 
 67 |     this->target = target;
 68 |     this->source = source;
 69 | 
 70 |     return true;
 71 |   }
 72 | 
 73 |   Eigen::Vector2f pose_error(const Eigen::Matrix4f estimated) const {
 74 |     Eigen::Matrix4f delta = relative_pose.inverse() * estimated;
 75 |     double t_error = delta.block<3, 1>(0, 3).norm();
 76 |     double r_error = Eigen::AngleAxisf(delta.block<3, 3>(0, 0)).angle();
 77 |     return Eigen::Vector2f(t_error, r_error);
 78 |   }
 79 | 
 80 |   static std::string data_directory;
 81 | 
 82 |   PointCloudConstPtr target;
 83 |   PointCloudConstPtr source;
 84 |   Eigen::Matrix4f relative_pose;
 85 | };
 86 | 
 87 | std::string GICPTestBase::data_directory;
 88 | 
 89 | TEST_F(GICPTestBase, LoadCheck) {
 90 |   EXPECT_NE(target, nullptr);
 91 |   EXPECT_NE(source, nullptr);
 92 |   EXPECT_FALSE(target->empty());
 93 |   EXPECT_FALSE(source->empty());
 94 | }
 95 | 
 96 | using Parameters = std::tuple<const char*, bool>;
 97 | class AlignmentTest : public GICPTestBase, public testing::WithParamInterface<Parameters> {
 98 | public:
 99 |   pcl::Registration<pcl::PointXYZ, pcl::PointXYZ>::Ptr create_reg() {
100 |     std::string method = std::get<0>(GetParam());
101 |     int num_threads = std::get<1>(GetParam()) ? 4 : 1;
102 | 
103 |     if (method == "GICP") {
104 |       auto gicp = pcl::make_shared<fast_gicp::FastGICP<pcl::PointXYZ, pcl::PointXYZ>>();
105 |       gicp->setNumThreads(num_threads);
106 |       gicp->swapSourceAndTarget();
107 |       return gicp;
108 |     } else if (method == "VGICP") {
109 |       auto vgicp = pcl::make_shared<fast_gicp::FastVGICP<pcl::PointXYZ, pcl::PointXYZ>>();
110 |       vgicp->setNumThreads(num_threads);
111 |       return vgicp;
112 |     } else if (method == "VGICP_CUDA") {
113 | #ifdef USE_VGICP_CUDA
114 |       auto vgicp = pcl::make_shared<fast_gicp::FastVGICPCuda<pcl::PointXYZ, pcl::PointXYZ>>();
115 |       return vgicp;
116 | #endif
117 |       return nullptr;
118 |     } else if (method == "NDT_CUDA") {
119 | #ifdef USE_VGICP_CUDA
120 |       auto ndt = pcl::make_shared<fast_gicp::NDTCuda<pcl::PointXYZ, pcl::PointXYZ>>();
121 |       return ndt;
122 | #endif
123 |       return nullptr;
124 |     }
125 | 
126 |     std::cerr << "unknown registration method:" << method << std::endl;
127 |     return nullptr;
128 |   }
129 | 
130 |   void swap_source_and_target(pcl::Registration<pcl::PointXYZ, pcl::PointXYZ>::Ptr reg) {
131 |     fast_gicp::LsqRegistration<pcl::PointXYZ, pcl::PointXYZ>* lsq_reg = dynamic_cast<fast_gicp::LsqRegistration<pcl::PointXYZ, pcl::PointXYZ>*>(reg.get());
132 |     if (lsq_reg != nullptr) {
133 |       lsq_reg->swapSourceAndTarget();
134 |       return;
135 |     }
136 | 
137 |     std::cerr << "failed to swap source and target" << std::endl;
138 |   }
139 | };
140 | 
141 | INSTANTIATE_TEST_SUITE_P(AlignmentTest2, AlignmentTest, testing::Combine(testing::Values("GICP", "VGICP", "VGICP_CUDA", "NDT_CUDA"), testing::Bool()), [](const auto& info) {
142 |   std::stringstream sst;
143 |   sst << std::get<0>(info.param) << (std::get<1>(info.param) ? "_MT" : "_ST");
144 |   return sst.str();
145 | });
146 | 
147 | TEST_P(AlignmentTest, test) {
148 |   const double t_tol = 0.05;
149 |   const double r_tol = 1.0 * M_PI / 180.0;
150 | 
151 |   pcl::Registration<pcl::PointXYZ, pcl::PointXYZ>::Ptr reg = create_reg();
152 |   if (reg == nullptr) {
153 |     std::cout << "[          ] SKIP TEST" << std::endl;
154 |     return;
155 |   }
156 | 
157 |   // forward test
158 |   auto aligned = pcl::make_shared<pcl::PointCloud<pcl::PointXYZ>>();
159 |   reg->setInputTarget(target);
160 |   reg->setInputSource(source);
161 |   reg->align(*aligned);
162 | 
163 |   Eigen::Vector2f errors = pose_error(reg->getFinalTransformation());
164 |   EXPECT_LT(errors[0], t_tol) << "FORWARD TEST";
165 |   EXPECT_LT(errors[1], r_tol) << "FORWARD TEST";
166 |   EXPECT_TRUE(reg->hasConverged()) << "FORWARD TEST";
167 | 
168 |   // backward test
169 |   reg->setInputTarget(source);
170 |   reg->setInputSource(target);
171 |   reg->align(*aligned);
172 | 
173 |   errors = pose_error(reg->getFinalTransformation().inverse());
174 |   EXPECT_LT(errors[0], t_tol) << "BACKWARD TEST";
175 |   EXPECT_LT(errors[1], r_tol) << "BACKWARD TEST";
176 |   EXPECT_TRUE(reg->hasConverged()) << "BACKWARD TEST";
177 | 
178 |   // swap and set source
179 |   reg = create_reg();
180 |   reg->setInputSource(target);
181 |   swap_source_and_target(reg);
182 |   reg->setInputSource(source);
183 |   reg->align(*aligned);
184 | 
185 |   errors = pose_error(reg->getFinalTransformation());
186 |   EXPECT_LT(errors[0], t_tol) << "SWAP AND SET SOURCE TEST";
187 |   EXPECT_LT(errors[1], r_tol) << "SWAP AND SET SOURCE TEST";
188 |   EXPECT_TRUE(reg->hasConverged()) << "SWAP AND SET SOURCE TEST";
189 | 
190 |   // swap and set target
191 |   reg = create_reg();
192 |   reg->setInputTarget(source);
193 |   swap_source_and_target(reg);  // source:target, target:source
194 |   reg->setInputTarget(target);
195 |   reg->align(*aligned);
196 | 
197 |   errors = pose_error(reg->getFinalTransformation());
198 |   EXPECT_LT(errors[0], t_tol) << "SWAP AND SET TARGET TEST";
199 |   EXPECT_LT(errors[1], r_tol) << "SWAP AND SET TARGET TEST";
200 |   EXPECT_TRUE(reg->hasConverged()) << "SWAP AND SET TARGET TEST";
201 | }
202 | 
203 | int main(int argc, char** argv) {
204 |   GICPTestBase::data_directory = argv[1];
205 |   testing::InitGoogleTest(&argc, argv);
206 |   return RUN_ALL_TESTS();
207 | }
208 | 


--------------------------------------------------------------------------------