├── CMakeLists.txt ├── README.md ├── main.cpp ├── tic_toc.h ├── voxel_grid_omp.cpp └── voxel_grid_omp.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.20) 2 | project(pcl_test) 3 | 4 | set(CMAKE_BUILD_TYPE "Release") 5 | set(CMAKE_CXX_FLAGS_RELEASE "-O3 -Wall -g -pthread") 6 | 7 | set(CMAKE_CXX_STANDARD 11) 8 | 9 | find_package(PCL REQUIRED) 10 | find_package(OpenMP REQUIRED) 11 | 12 | include_directories(${PCL_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS}) 13 | link_directories(${PCL_LIBRARY_DIRS}) 14 | add_definitions(${PCL_DEFINITIONS}) 15 | 16 | 17 | add_executable(pcl_test main.cpp voxel_grid_omp.cpp) 18 | target_compile_options(pcl_test PRIVATE ${OpenMP_CXX_FLAGS}) 19 | target_link_libraries(pcl_test ${PCL_LIBRARIES} ${OpenMP_CXX_FLAGS}) 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VoxelGridOMP 2 | 3 | Voxel grid in parallel, using the OpenMP standard, based on PCL. 4 | Tested on PCL-1.7, 1.8, 1.10. 5 | 6 | In our experiment, 2,500,000 points cost 68 ms with 10 threads compared to 217 ms for PCL voxelgrid. 7 | 8 | It is recommended to use VoxelGripOMP when the size of points exceeds 100000 to compensate for the cost of multithreading. 9 | 10 | use: 11 | 12 | Modify the line 24 and 28 in voxel_grid_omp.h, setting "PointXYZINormal" to the point TYPE you need. 13 | 14 | ... 15 | 16 | class VoxelGridOMP : public VoxelGrid 17 | { 18 | public: 19 | typedef typename pcl::PointXYZINormal PointT; 20 | typedef typename Filter::PointCloud PointCloud; 21 | ... 22 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "tic_toc.h" 9 | #include "voxel_grid_omp.h" 10 | 11 | std::string pcd_file = "/home/hk/CLionProjects/pcl-test/map.pcd"; 12 | 13 | int 14 | main (int argc, char** argv) 15 | { 16 | pcl::PointCloud::Ptr cloud(new pcl::PointCloud()); 17 | 18 | // Fill in the cloud data 19 | pcl::PCDReader reader; 20 | // Replace the path below with the path where you saved your file 21 | reader.read (pcd_file, *cloud); // Remember to download the file first! 22 | std::cerr << "PointCloud before filtering: " << cloud->size() << std::endl; 23 | 24 | pcl::PointCloud::Ptr cloud_PCL_filtered(new pcl::PointCloud()); 25 | // Create the filtering object 26 | pcl::VoxelGrid sor; 27 | sor.setInputCloud (cloud); 28 | sor.setLeafSize (1.0, 1.0, 1.0); 29 | 30 | TicToc t_vg; 31 | sor.filter (*cloud_PCL_filtered); 32 | printf("voxel grid PCL cost: %fms\n", t_vg.toc()); 33 | std::cerr << "PointCloud after filtering: " << cloud_PCL_filtered->size() << std::endl; 34 | 35 | 36 | pcl::VoxelGridOMP vg_omp; 37 | vg_omp.setInputCloud (cloud); 38 | vg_omp.setNumberOfThreads(6); 39 | vg_omp.setLeafSize (1.0, 1.0, 1.0); 40 | vg_omp.setSaveLeafLayout(false); 41 | 42 | pcl::PointCloud::Ptr cloud_OPM_filtered(new pcl::PointCloud()); 43 | TicToc t_vg_omp; 44 | // vg_omp.downSample(*cloud_OPM_filtered); 45 | vg_omp.setFinalFilter(true); 46 | vg_omp.filter(*cloud_OPM_filtered); 47 | printf("voxel grid OMP cost: %fms\n", t_vg_omp.toc()); 48 | std::cerr << "PointCloud after filtering: " << cloud_OPM_filtered->size() << std::endl; 49 | 50 | // for (int i = 0; i < (int)cloud_PCL_filtered->size(); ++i) { 51 | // const pcl::PointXYZINormal& p1 = cloud_PCL_filtered->points[i]; 52 | // const pcl::PointXYZINormal& p2 = cloud_OPM_filtered->points[i]; 53 | // if (p1.getVector3fMap() != p2.getVector3fMap() ) 54 | // { 55 | // printf("i = %d xyz:\n", i); 56 | // std::cout << p1.getVector3fMap().transpose() < 4 | #include 5 | #include 6 | 7 | class TicToc 8 | { 9 | public: 10 | TicToc() 11 | { 12 | tic(); 13 | } 14 | 15 | void tic() 16 | { 17 | start = std::chrono::system_clock::now(); 18 | } 19 | 20 | double toc() 21 | { 22 | end = std::chrono::system_clock::now(); 23 | std::chrono::duration elapsed_seconds = end - start; 24 | return elapsed_seconds.count() * 1000; 25 | } 26 | 27 | private: 28 | std::chrono::time_point start, end; 29 | }; 30 | -------------------------------------------------------------------------------- /voxel_grid_omp.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by hk on 3/26/23. 3 | // 4 | #include "voxel_grid_omp.h" 5 | #include "Eigen/src/Core/Matrix.h" 6 | #include "pcl/impl/point_types.hpp" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace{ 16 | /* 17 | 等同于cloud_point_index_idx 18 | cloud_point_index_idx没有默认构造函数,会导致boost的integer_sort编译不通过 19 | */ 20 | struct VoxelIndex{ 21 | unsigned int idx; 22 | unsigned int cloud_point_index; 23 | 24 | VoxelIndex() = default; 25 | VoxelIndex (unsigned int idx_, unsigned int cloud_point_index_) : idx (idx_), cloud_point_index (cloud_point_index_) {} 26 | bool operator < (const VoxelIndex &p) const { return (idx < p.idx); } 27 | }; 28 | const int kSampleStep = 100; 29 | /* 30 | 为每个线程分配它所处理的voxel index的上下界. 31 | 输入: 32 | - index_all_thread.size()个有序vector,表示之前各个线程产生的voxel索引 33 | - 需要为thread_num个线程分配任务 34 | 输出: 每个线程的任务 35 | */ 36 | size_t AssignTask(const std::vector>>& index_all_thread, int thread_num, std::vector>>* tasks){ 37 | // 采样 38 | std::vector sampled_data; 39 | size_t total = 0; 40 | for(auto& index: index_all_thread){ 41 | total += index->size(); 42 | } 43 | sampled_data.reserve(total/kSampleStep+index_all_thread.size()*2); 44 | for(auto& index: index_all_thread){ 45 | size_t i=0; 46 | for(; isize(); i+=kSampleStep){ 47 | sampled_data.push_back((*index)[i]); 48 | } 49 | if(index->size()>0 && (index->size()-1)%kSampleStep>kSampleStep/2){ 50 | sampled_data.push_back(index->back()); 51 | } 52 | } 53 | // 确定每个进程分配voxel索引的上下界 54 | std::sort(sampled_data.begin(), sampled_data.end()); 55 | tasks->resize(thread_num); 56 | int pad_size = sampled_data.size()%thread_num; 57 | int pivot_step = sampled_data.size()/thread_num; 58 | for(int i=0; ipad_size?pad_size:i); 61 | int pivot_start = sampled_data[start]; 62 | int pivot_end; 63 | if(i+1pad_size?pad_size:i+1); 66 | pivot_end = sampled_data[end]; 67 | } 68 | auto& cur_task = (*tasks)[i]; 69 | cur_task.resize(index_all_thread.size()); 70 | for(size_t j=0; jbegin(), index_all_thread[j]->end(),pivot_start); 72 | cur_task[j].first = int(start_it - index_all_thread[j]->begin()); 73 | if(i+1>=thread_num){ 74 | cur_task[j].second = index_all_thread[j]->size(); 75 | }else{ 76 | auto end_it = std::lower_bound(index_all_thread[j]->begin(), index_all_thread[j]->end(),pivot_end); 77 | cur_task[j].second = int(end_it-index_all_thread[j]->begin()); 78 | } 79 | } 80 | } 81 | return total; 82 | } 83 | } 84 | 85 | void 86 | pcl::VoxelGridOMP::setNumberOfThreads (unsigned int nr_threads) 87 | { 88 | if (nr_threads == 0) 89 | #ifdef _OPENMP 90 | threads_ = omp_get_num_procs(); 91 | #else 92 | threads_ = 1; 93 | #endif 94 | else 95 | threads_ = nr_threads; 96 | printf("set number of threads: %d.\n", threads_); 97 | } 98 | 99 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 100 | void 101 | pcl::VoxelGridOMP::applyFilter(PointCloud &output) 102 | { 103 | // TicToc t_bbox; 104 | // Has the input dataset been set already? 105 | if (!input_) 106 | { 107 | PCL_WARN ("[pcl::%s::applyFilter] No input dataset given!\n", getClassName ().c_str ()); 108 | output.width = output.height = 0; 109 | output.points.clear (); 110 | return; 111 | } 112 | 113 | // Copy the header (and thus the frame_id) + allocate enough space for points 114 | output.height = 1; // downsampling breaks the organized structure 115 | output.is_dense = true; // we filter out invalid points 116 | 117 | Eigen::Vector4f min_p, max_p; 118 | // Get the minimum and maximum dimensions 119 | if (!filter_field_name_.empty ()) // If we don't want to process the entire cloud... 120 | getMinMax3D (input_, *indices_, filter_field_name_, static_cast (filter_limit_min_), static_cast (filter_limit_max_), min_p, max_p, filter_limit_negative_); 121 | else { 122 | // getMinMax3D(*input_, *indices_, min_p, max_p); 123 | getMinMax3DOMP(*input_, *indices_, min_p, max_p); 124 | // printf("min max OMP cost: %fms\n", t_bbox.toc()); 125 | } 126 | 127 | // Check that the leaf size is not too small, given the size of the data 128 | int64_t dx = static_cast((max_p[0] - min_p[0]) * inverse_leaf_size_[0])+1; 129 | int64_t dy = static_cast((max_p[1] - min_p[1]) * inverse_leaf_size_[1])+1; 130 | int64_t dz = static_cast((max_p[2] - min_p[2]) * inverse_leaf_size_[2])+1; 131 | 132 | if ((dx*dy*dz) > static_cast(std::numeric_limits::max())) 133 | { 134 | PCL_WARN("[pcl::%s::applyFilter] Leaf size is too small for the input dataset. Integer indices would overflow.", getClassName().c_str()); 135 | output = *input_; 136 | return; 137 | } 138 | 139 | // Compute the minimum and maximum bounding box values 140 | min_b_[0] = static_cast (floor (min_p[0] * inverse_leaf_size_[0])); 141 | max_b_[0] = static_cast (floor (max_p[0] * inverse_leaf_size_[0])); 142 | min_b_[1] = static_cast (floor (min_p[1] * inverse_leaf_size_[1])); 143 | max_b_[1] = static_cast (floor (max_p[1] * inverse_leaf_size_[1])); 144 | min_b_[2] = static_cast (floor (min_p[2] * inverse_leaf_size_[2])); 145 | max_b_[2] = static_cast (floor (max_p[2] * inverse_leaf_size_[2])); 146 | 147 | // Compute the number of divisions needed along all axis 148 | div_b_ = max_b_ - min_b_ + Eigen::Vector4i::Ones (); // num of voxels in 3d space 149 | div_b_[3] = 0; 150 | 151 | // Set up the division multiplier 152 | divb_mul_ = Eigen::Vector4i (1, div_b_[0], div_b_[0] * div_b_[1], 0); 153 | // printf("bbox cost: %fms\n", t_bbox.toc()); 154 | 155 | int centroid_size = 4; // centroid dimension 156 | if (downsample_all_data_) { 157 | centroid_size = boost::mpl::size::value; // dimension of all fields centroid 158 | // printf("downsample_all_data_\n"); 159 | } 160 | 161 | // ---[ RGB special case 162 | std::vector fields; 163 | int rgba_index = -1; 164 | rgba_index = pcl::getFieldIndex (*input_, "rgb", fields); 165 | if (rgba_index == -1) 166 | rgba_index = pcl::getFieldIndex (*input_, "rgba", fields); 167 | if (rgba_index >= 0) 168 | { 169 | rgba_index = fields[rgba_index].offset; 170 | centroid_size += 3; // centroid + 3 (rgb) 171 | } 172 | 173 | TicToc t_p2v; 174 | //global variables 175 | std::vector cloud_all_threads(threads_); 176 | for (size_t i = 0; i < threads_; ++i) { 177 | cloud_all_threads[i].reset(new PointCloud ()); 178 | } 179 | std::vector>> index_all_threads(threads_); 180 | std::vector>> weight_all_threads(threads_); 181 | std::vector>>> centroid_all_threads(threads_); 182 | #pragma omp parallel num_threads(threads_) 183 | { 184 | int thread_id = omp_get_thread_num(); 185 | PointCloudPtr& cloud_thread = cloud_all_threads[thread_id]; 186 | // PointCloud cloud_thread; 187 | std::vector index_vector; // voxel index, pointer of point 188 | index_vector.reserve(indices_->size() / threads_); 189 | 190 | int num_points = 0; 191 | // If we don't want to process the entire cloud, but rather filter points far away from the viewpoint first... 192 | if (!filter_field_name_.empty()) { 193 | // Get the distance field index 194 | std::vector fields; 195 | int distance_idx = pcl::getFieldIndex(*input_, filter_field_name_, fields); 196 | if (distance_idx == -1) 197 | PCL_WARN ("[pcl::%s::applyFilter] Invalid filter field name. Index is %d.\n", getClassName().c_str(), 198 | distance_idx); 199 | 200 | // First pass: go over all points and insert them into the index_vector vector 201 | // with calculated idx. Points with the same idx value will contribute to the 202 | // same point of resulting CloudPoint 203 | 204 | int thread_id = omp_get_thread_num(); 205 | 206 | for (std::vector::const_iterator it = indices_->begin(); it != indices_->end(); it += thread_id) { 207 | if (!input_->is_dense) 208 | // Check if the point is invalid 209 | if (!pcl_isfinite (input_->points[*it].x) || 210 | !pcl_isfinite (input_->points[*it].y) || 211 | !pcl_isfinite (input_->points[*it].z)) 212 | continue; 213 | 214 | // Get the distance value 215 | const uint8_t *pt_data = reinterpret_cast (&input_->points[*it]); 216 | float distance_value = 0; 217 | memcpy(&distance_value, pt_data + fields[distance_idx].offset, sizeof(float)); 218 | 219 | if (filter_limit_negative_) { 220 | // Use a threshold for cutting out points which inside the interval 221 | if ((distance_value < filter_limit_max_) && (distance_value > filter_limit_min_)) 222 | continue; 223 | } else { 224 | // Use a threshold for cutting out points which are too close/far away 225 | if ((distance_value > filter_limit_max_) || (distance_value < filter_limit_min_)) 226 | continue; 227 | } 228 | 229 | int ijk0 = static_cast (floor(input_->points[*it].x * inverse_leaf_size_[0]) - 230 | static_cast (min_b_[0])); 231 | int ijk1 = static_cast (floor(input_->points[*it].y * inverse_leaf_size_[1]) - 232 | static_cast (min_b_[1])); 233 | int ijk2 = static_cast (floor(input_->points[*it].z * inverse_leaf_size_[2]) - 234 | static_cast (min_b_[2])); 235 | 236 | // Compute the centroid leaf index 237 | int idx = ijk0 * divb_mul_[0] + ijk1 * divb_mul_[1] + ijk2 * divb_mul_[2]; 238 | index_vector.push_back(VoxelIndex(static_cast (idx), *it)); 239 | } 240 | } 241 | // No distance filtering, process all data 242 | else { 243 | // First pass: go over all points and insert them into the index_vector vector 244 | // with calculated idx. Points with the same idx value will contribute to the 245 | // same point of resulting CloudPoint 246 | 247 | #pragma omp for schedule(dynamic,1024) 248 | for (size_t i = 0; i < indices_->size(); ++i) { 249 | const int &point_id = indices_->at(i); 250 | const PointT &point = input_->points[point_id]; 251 | if (!input_->is_dense) 252 | // Check if the point is invalid 253 | if (!pcl_isfinite (point.x) || 254 | !pcl_isfinite (point.y) || 255 | !pcl_isfinite (point.z)) 256 | continue; 257 | 258 | int ijk0 = static_cast (floor(point.x * inverse_leaf_size_[0]) - 259 | static_cast (min_b_[0])); // index of dimension 1 260 | int ijk1 = static_cast (floor(point.y * inverse_leaf_size_[1]) - 261 | static_cast (min_b_[1])); // index of dimension 2 262 | int ijk2 = static_cast (floor(point.z * inverse_leaf_size_[2]) - 263 | static_cast (min_b_[2])); // index of dimension 3 264 | 265 | // Compute the centroid leaf index 266 | int idx = ijk0 * divb_mul_[0] + ijk1 * divb_mul_[1] + ijk2 * divb_mul_[2]; // voxel id the point located 267 | /// !!! so slow !!! 268 | index_vector.emplace_back( 269 | VoxelIndex(static_cast (idx), point_id)); 270 | } 271 | } 272 | // printf("thread %d points to voxel cost: %fms\n", thread_id, t_p2v.toc()); 273 | 274 | /// !!! too slow!!! 275 | // TicToc t_sort; 276 | // Second pass: sort the index_vector vector using value representing target cell as index 277 | // in effect all points belonging to the same output cell will be next to each other 278 | auto rightshift_func = [](const VoxelIndex &x, const unsigned offset) { return x.idx >> offset; }; 279 | boost::sort::spreadsort::integer_sort(index_vector.begin(), index_vector.end(), rightshift_func); 280 | // std::sort(index_vector.begin(), index_vector.end(), std::less()); 281 | // printf("thread %d sort cost: %fms\n", thread_id, t_sort.toc()); 282 | 283 | // TicToc t_valid_voxel; 284 | // Third pass: count output cells 285 | // we need to skip all the same, adjacenent idx values 286 | unsigned int total = 0; 287 | unsigned int index = 0; 288 | // first_and_last_indices_vector[i] represents the index in index_vector of the first point in 289 | // index_vector belonging to the voxel which corresponds to the i-th output point, 290 | // and of the first point not belonging to. 291 | std::vector > first_and_last_indices_vector; 292 | // Worst case size 293 | first_and_last_indices_vector.reserve(index_vector.size()); 294 | 295 | while (index < index_vector.size()) { 296 | unsigned int i = index + 1; 297 | while (i < index_vector.size() && index_vector[i].idx == index_vector[index].idx) 298 | ++i; 299 | //todo 300 | if (i - index >= min_points_per_voxel_) 301 | { 302 | ++total; 303 | first_and_last_indices_vector.push_back(std::pair(index, i)); 304 | } 305 | index = i; 306 | } 307 | // printf("thread %d compute valid voxel cost: %fms\n", thread_id, t_valid_voxel.toc()); 308 | 309 | // TicToc t_layout; 310 | // Fourth pass: compute centroids, insert them into their final position 311 | // cloud_thread.points.resize(total); 312 | size_t cur_size = first_and_last_indices_vector.size(); 313 | auto indexes_one_thread = std::make_shared>(cur_size); 314 | auto weight_one_thread = std::make_shared>(cur_size); 315 | auto centroid_one_thread = std::make_shared>>(cur_size); 316 | for (unsigned int cp = 0; cp < first_and_last_indices_vector.size(); ++cp) 317 | { 318 | Eigen::VectorXf centroid = Eigen::VectorXf::Zero (centroid_size); 319 | Eigen::VectorXf temporary = Eigen::VectorXf::Zero (centroid_size); 320 | // calculate centroid - sum values from all input points, that have the same idx value in index_vector array 321 | unsigned int first_index = first_and_last_indices_vector[cp].first; 322 | unsigned int last_index = first_and_last_indices_vector[cp].second; 323 | int voxel_id = index_vector[first_index].idx; 324 | if (!downsample_all_data_) { 325 | centroid[0] = input_->points[index_vector[first_index].cloud_point_index].x; 326 | centroid[1] = input_->points[index_vector[first_index].cloud_point_index].y; 327 | centroid[2] = input_->points[index_vector[first_index].cloud_point_index].z; 328 | } else { 329 | // ---[ RGB special case 330 | if (rgba_index >= 0) { 331 | // Fill r/g/b data, assuming that the order is BGRA 332 | pcl::RGB rgb; 333 | // copy memory from input to rgb 334 | memcpy(&rgb, 335 | reinterpret_cast (&input_->points[index_vector[first_index].cloud_point_index]) + 336 | rgba_index, sizeof(RGB)); 337 | centroid[centroid_size - 3] = rgb.r; 338 | centroid[centroid_size - 2] = rgb.g; 339 | centroid[centroid_size - 1] = rgb.b; 340 | } 341 | pcl::for_each_type( 342 | NdCopyPointEigenFunctor(input_->points[index_vector[first_index].cloud_point_index], 343 | centroid)); 344 | } 345 | 346 | for (unsigned int i = first_index + 1; i < last_index; ++i) 347 | { 348 | if (!downsample_all_data_) { 349 | centroid[0] += input_->points[index_vector[i].cloud_point_index].x; 350 | centroid[1] += input_->points[index_vector[i].cloud_point_index].y; 351 | centroid[2] += input_->points[index_vector[i].cloud_point_index].z; 352 | } else { 353 | // ---[ RGB special case 354 | if (rgba_index >= 0) { 355 | // Fill r/g/b data, assuming that the order is BGRA 356 | pcl::RGB rgb; 357 | memcpy(&rgb, 358 | reinterpret_cast (&input_->points[index_vector[i].cloud_point_index]) + 359 | rgba_index, sizeof(RGB)); 360 | temporary[centroid_size - 3] = rgb.r; 361 | temporary[centroid_size - 2] = rgb.g; 362 | temporary[centroid_size - 1] = rgb.b; 363 | } 364 | // copying data between an Eigen type and a PointT (input point, output eigen) 365 | pcl::for_each_type( 366 | NdCopyPointEigenFunctor(input_->points[index_vector[i].cloud_point_index], 367 | temporary)); 368 | centroid += temporary; // accumulate centroid 369 | } 370 | } 371 | 372 | // index is centroid final position in resulting PointCloud 373 | if (save_leaf_layout_) 374 | leaf_layout_[index_vector[first_index].idx] = cp; 375 | 376 | (*centroid_one_thread)[cp] = centroid; 377 | (*weight_one_thread)[cp] = last_index-first_index; 378 | (*indexes_one_thread)[cp] = index_vector[first_index].idx; 379 | } 380 | #pragma omp critical 381 | { 382 | centroid_all_threads[thread_id] = centroid_one_thread; 383 | weight_all_threads[thread_id] = weight_one_thread; 384 | index_all_threads[thread_id] = indexes_one_thread; 385 | } 386 | // printf("thread %d compute %d centroids cost: %fms\n", thread_id, (int)cloud_thread->points.size(), t_centriod.toc()); 387 | } 388 | 389 | //merge cloud from all threads 390 | std::vector>> tasks; 391 | size_t total = AssignTask(index_all_threads, threads_, &tasks); 392 | std::vector final_clouds(threads_); 393 | #pragma omp parallel num_threads(threads_) 394 | { 395 | int thread_id = omp_get_thread_num(); 396 | auto& task = tasks[thread_id]; 397 | std::vector cur(task.size()); 398 | for(size_t i=0; ireserve(total/threads_); 403 | while(true){ 404 | bool finished = true; 405 | int min_voxel_index = std::numeric_limits::max(); 406 | for(size_t i=0; ipush_back(PointT()); 424 | if (!downsample_all_data_){ 425 | cloud->back().x = centroid[0]; 426 | cloud->back().y = centroid[1]; 427 | cloud->back().z = centroid[2]; 428 | }else{ 429 | // NdCopyEigenPointFunctor( p1 the input Eigen type, p2 the output Point type) 430 | pcl::for_each_type(pcl::NdCopyEigenPointFunctor(centroid, cloud->back())); 431 | // ---[ RGB special case 432 | if (rgba_index >= 0) { 433 | // pack r/g/b into rgb 434 | float r = centroid[centroid_size - 3], g = centroid[centroid_size - 2], b = centroid[centroid_size - 435 | 1]; 436 | int rgb = (static_cast (r) << 16) | (static_cast (g) << 8) | static_cast (b); 437 | // memcpy (reinterpret_cast (&output.points[index]) + rgba_index, &rgb, sizeof (float)); 438 | memcpy(reinterpret_cast (&(cloud->back())) + rgba_index, &rgb, sizeof(float)); 439 | } 440 | } 441 | } 442 | #pragma omp critical 443 | { 444 | final_clouds[thread_id] = cloud; 445 | } 446 | } 447 | size_t final_num = 0; 448 | for(auto& cloud: final_clouds){ 449 | final_num += cloud->size(); 450 | } 451 | output.resize(final_num); 452 | char* dst= reinterpret_cast(&output[0]); 453 | size_t offset = 0; 454 | for(auto& cloud: final_clouds){ 455 | size_t copy_size = cloud->size()*sizeof(PointT); 456 | char* src = reinterpret_cast(&((cloud->points)[0])); 457 | std::memcpy(dst+offset, src, copy_size); 458 | offset+=copy_size; 459 | } 460 | } 461 | 462 | void pcl::VoxelGridOMP::getMinMax3DOMP(const pcl::PointCloud &cloud, const std::vector &indices, 463 | Eigen::Vector4f &min_pt, Eigen::Vector4f &max_pt) 464 | { 465 | // TicToc t_para; 466 | // prepare momery for all threads 467 | std::vector voxel_mins(threads_); 468 | std::vector voxel_maxs(threads_); 469 | 470 | // If the data is dense, we don't need to check for NaN 471 | if (cloud.is_dense) 472 | { 473 | #pragma omp parallel num_threads(threads_) 474 | { 475 | // thread private 476 | int thread_id = omp_get_thread_num(); 477 | Eigen::Vector4f voxel_min_t, voxel_max_t; 478 | voxel_min_t.setConstant(FLT_MAX); 479 | voxel_max_t.setConstant(-FLT_MAX); 480 | 481 | // #pragma omp for 482 | #pragma omp for schedule(dynamic,1024) 483 | for (size_t i = 0; i < indices.size(); i++) { 484 | pcl::Array4fMapConst pt = cloud.points[indices[i]].getArray4fMap(); 485 | voxel_min_t = voxel_min_t.array().min(pt); 486 | voxel_max_t = voxel_max_t.array().max(pt); 487 | } 488 | voxel_mins[thread_id] = voxel_min_t; 489 | voxel_maxs[thread_id] = voxel_max_t; 490 | } 491 | } 492 | // NaN or Inf values could exist => check for them 493 | else 494 | { 495 | // not be tested yet 496 | #pragma omp parallel num_threads(threads_) 497 | { 498 | int thread_id = omp_get_thread_num(); 499 | Eigen::Vector4f voxel_min_t, voxel_max_t; 500 | voxel_min_t.setConstant(FLT_MAX); // thread private 501 | voxel_max_t.setConstant(-FLT_MAX); 502 | // #pragma omp single 503 | // #pragma omp for 504 | #pragma omp for schedule(dynamic,1024) 505 | for (size_t i = 0; i < indices.size(); i++) { 506 | // Check if the point is invalid 507 | if (!pcl_isfinite (cloud.points[indices[i]].x) || 508 | !pcl_isfinite (cloud.points[indices[i]].y) || 509 | !pcl_isfinite (cloud.points[indices[i]].z)) 510 | continue; 511 | pcl::Array4fMapConst pt = cloud.points[indices[i]].getArray4fMap(); 512 | voxel_min_t = voxel_min_t.array().min(pt); 513 | voxel_max_t = voxel_max_t.array().max(pt); 514 | } 515 | 516 | voxel_mins[thread_id] = voxel_min_t; 517 | voxel_maxs[thread_id] = voxel_max_t; 518 | } 519 | } 520 | 521 | min_pt = voxel_mins[0]; 522 | max_pt = voxel_maxs[0]; 523 | for (size_t i = 1; i < threads_; ++i) 524 | { 525 | min_pt = min_pt.array().min(voxel_mins[i].array()); 526 | max_pt = max_pt.array().max(voxel_maxs[i].array()); 527 | } 528 | // printf("multi thread 4f cost: %fms\n", t_para.toc()); 529 | // printf("min max:\n"); 530 | // std::cout << min_pt.transpose() << std::endl; 531 | // std::cout << max_pt.transpose() << std::endl; 532 | } -------------------------------------------------------------------------------- /voxel_grid_omp.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by hk on 3/26/23. 3 | // 4 | 5 | #pragma once 6 | 7 | #ifndef PCL_FILTERS_VOXEL_GRID_OMP_H 8 | #define PCL_FILTERS_VOXEL_GRID_OMP_H 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "tic_toc.h" 16 | 17 | namespace pcl 18 | { 19 | /** \brief VoxelGridOMP assembles a local 3D grid over a given PointCloud, and downsamples + filters the data., 20 | * in parallel, using the OpenMP standard. 21 | * \author Kai, Huang 22 | */ 23 | // template 24 | class VoxelGridOMP : public VoxelGrid 25 | { 26 | 27 | public: 28 | typedef typename pcl::PointXYZINormal PointT; 29 | typedef typename Filter::PointCloud PointCloud; 30 | typedef typename pcl::PointCloud::Ptr PointCloudPtr; 31 | 32 | public: 33 | /** \brief Initialize the scheduler and set the number of threads to use. 34 | * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) 35 | */ 36 | VoxelGridOMP (unsigned int nr_threads = 0) : 37 | console_print(false), 38 | threads_(nr_threads), 39 | final_filter(true) 40 | {} 41 | 42 | /** \brief Initialize the scheduler and set the number of threads to use. 43 | * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) 44 | */ 45 | void 46 | setNumberOfThreads (unsigned int nr_threads = 0); 47 | 48 | /** \brief Performing the final voxel grid filtering(PCL VoxelGrid). 49 | * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) 50 | */ 51 | void 52 | setFinalFilter (bool flag) {final_filter = flag;} 53 | 54 | bool console_print; 55 | protected: 56 | /** \brief The number of threads the scheduler should use. */ 57 | unsigned int threads_; 58 | 59 | /** \brief Whether to perform the final voxel grid filtering(PCL VoxelGrid) for a strict result. */ 60 | bool final_filter; 61 | private: 62 | /** \brief Get the minimum and maximum values on each of the 3 (x-y-z) dimensions in a given pointcloud 63 | * \param cloud the point cloud data message 64 | * \param indices the vector of point indices to use from \a cloud 65 | * \param min_pt the resultant minimum bounds 66 | * \param max_pt the resultant maximum bounds 67 | * \ingroup common 68 | */ 69 | void 70 | getMinMax3DOMP (const pcl::PointCloud &cloud, const std::vector &indices, 71 | Eigen::Vector4f &min_pt, Eigen::Vector4f &max_pt); 72 | 73 | /** \brief Downsample a Point Cloud using a voxelized grid approach 74 | * \param[out] output the resultant point cloud message 75 | */ 76 | void 77 | applyFilter (PointCloud &output) override; 78 | 79 | }; 80 | } 81 | 82 | 83 | // 84 | //#define PCL_INSTANTIATE_VoxelGrid(T) template class PCL_EXPORTS pcl::VoxelGrid; 85 | //#define PCL_INSTANTIATE_getMinMax3D(T) template PCL_EXPORTS void pcl::getMinMax3D (const pcl::PointCloud::ConstPtr &, const std::string &, float, float, Eigen::Vector4f &, Eigen::Vector4f &, bool); 86 | 87 | 88 | #endif //PCL_FILTERS_VOXEL_GRID_OMP_H 89 | --------------------------------------------------------------------------------