├── APD.cpp ├── APD.cu ├── APD.h ├── CMakeLists.txt ├── LICENSE ├── README.md ├── colmap2mvsnet.py ├── main.cpp └── main.h /APD.cpp: -------------------------------------------------------------------------------- 1 | #include "APD.h" 2 | 3 | bool ReadBinMat(const path &mat_path, cv::Mat &mat) 4 | { 5 | ifstream in(mat_path, std::ios_base::binary); 6 | if (in.bad()) { 7 | std::cerr << "Error opening file: " << mat_path << std::endl; 8 | return false; 9 | } 10 | 11 | int version, rows, cols, type; 12 | in.read((char *)(&version), sizeof(int)); 13 | in.read((char *)(&rows), sizeof(int)); 14 | in.read((char *)(&cols), sizeof(int)); 15 | in.read((char *)(&type), sizeof(int)); 16 | 17 | if (version != 1) { 18 | in.close(); 19 | std::cerr << "Version error: " << mat_path << std::endl; 20 | return false; 21 | } 22 | 23 | mat = cv::Mat(rows, cols, type); 24 | in.read((char *)mat.data, sizeof(char) * mat.step * mat.rows); 25 | in.close(); 26 | return true; 27 | 28 | } 29 | 30 | bool WriteBinMat(const path &mat_path, const cv::Mat &mat) { 31 | 32 | ofstream out(mat_path, std::ios_base::binary); 33 | if (out.bad()) { 34 | std::cout << "Error opening file: " << mat_path << std::endl; 35 | return false; 36 | } 37 | int version = 1; 38 | int rows = mat.rows; 39 | int cols = mat.cols; 40 | int type = mat.type(); 41 | 42 | out.write((char *)&version, sizeof(int)); 43 | out.write((char *)&rows, sizeof(int)); 44 | out.write((char *)&cols, sizeof(int)); 45 | out.write((char *)&type, sizeof(int)); 46 | out.write((char *)mat.data, sizeof(char) * mat.step * mat.rows); 47 | out.close(); 48 | return true; 49 | } 50 | 51 | bool ReadCamera(const path &cam_path, Camera &cam) 52 | { 53 | ifstream in(cam_path); 54 | if (in.bad()) { 55 | return false; 56 | } 57 | 58 | std::string line; 59 | in >> line; 60 | 61 | for (int i = 0; i < 3; ++i) { 62 | in >> cam.R[3 * i + 0] >> cam.R[3 * i + 1] >> cam.R[3 * i + 2] >> cam.t[i]; 63 | } 64 | 65 | float tmp[4]; 66 | in >> tmp[0] >> tmp[1] >> tmp[2] >> tmp[3]; 67 | in >> line; 68 | 69 | for (int i = 0; i < 3; ++i) { 70 | in >> cam.K[3 * i + 0] >> cam.K[3 * i + 1] >> cam.K[3 * i + 2]; 71 | } 72 | // compute camera center in world coord 73 | const auto &R = cam.R; 74 | const auto &t = cam.t; 75 | for (int j = 0; j < 3; ++j) { 76 | cam.c[j] = -float(double(R[0 + j])*double(t[0]) + double(R[3 + j])*double(t[1]) + double(R[6 + j])*double(t[2])); 77 | } 78 | // ==================================================================== 79 | // TAT & ETH version read 80 | float depth_num; 81 | float interval; 82 | in >> cam.depth_min >> interval >> depth_num >> cam.depth_max; 83 | // ==================================================================== 84 | ////DTU version read 85 | //float depth_num = 192; 86 | //float interval; 87 | //in >> cam.depth_min >> interval; 88 | //cam.depth_max = interval * depth_num + cam.depth_min; 89 | ////==================================================================== 90 | in.close(); 91 | return true;; 92 | } 93 | 94 | bool ShowDepthMap(const path &depth_path, const cv::Mat& depth, float depth_min, float depth_max) 95 | { 96 | const float deltaDepth = depth_max - depth_min; 97 | // save image 98 | cv::Mat result_img(depth.size(), CV_8UC3, cv::Scalar(0, 0, 0)); 99 | for (int i = 0; i < depth.cols; i++) { 100 | for (int j = 0; j < depth.rows; j++) { 101 | if (depth.at(j, i) < depth_min || depth.at(j, i) > depth_max || isnan(depth.at(j, i))) { 102 | continue; 103 | } 104 | float pixel_val = (depth_max - depth.at(j, i)) / deltaDepth; 105 | if (pixel_val > 1) { 106 | pixel_val = 1; 107 | } 108 | if (pixel_val < 0) { 109 | pixel_val = 0; 110 | } 111 | pixel_val = pixel_val * 255; 112 | if (pixel_val>255) { 113 | pixel_val = 255; 114 | } 115 | else if (pixel_val<0) { 116 | pixel_val = 0; 117 | } 118 | auto &pixel = result_img.at(j, i); 119 | if (pixel_val <= 51) 120 | { 121 | pixel[0] = 255; 122 | pixel[1] = pixel_val * 5; 123 | pixel[2] = 0; 124 | } 125 | else if (pixel_val <= 102) 126 | { 127 | pixel_val -= 51; 128 | pixel[0] = 255 - pixel_val * 5; 129 | pixel[1] = 255; 130 | pixel[2] = 0; 131 | } 132 | else if (pixel_val <= 153) 133 | { 134 | pixel_val -= 102; 135 | pixel[0] = 0; 136 | pixel[1] = 255; 137 | pixel[2] = pixel_val * 5; 138 | } 139 | else if (pixel_val <= 204) 140 | { 141 | pixel_val -= 153; 142 | pixel[0] = 0; 143 | pixel[1] = 255 - static_cast(pixel_val * 128.0 / 51 + 0.5); 144 | pixel[2] = 255; 145 | } 146 | else if (pixel_val <= 255) 147 | { 148 | pixel_val -= 204; 149 | pixel[0] = 0; 150 | pixel[1] = 127 - static_cast(pixel_val * 127.0 / 51 + 0.5); 151 | pixel[2] = 255; 152 | } 153 | 154 | } 155 | } 156 | cv::imwrite(depth_path.string(), result_img); 157 | return true; 158 | } 159 | 160 | bool ShowNormalMap(const path &normal_path, const cv::Mat &normal) 161 | { 162 | if (normal.empty()) { 163 | return false; 164 | } 165 | cv::Mat normalized_normal = normal.clone(); 166 | for (int i = 0; i < normalized_normal.rows; i++) { 167 | for (int j = 0; j < normalized_normal.cols; j++) { 168 | cv::Vec3f normal_val = normalized_normal.at(i, j); 169 | float norm = sqrt(pow(normal_val[0], 2) + pow(normal_val[1], 2) + pow(normal_val[2], 2)); 170 | if (norm == 0) { 171 | normalized_normal.at(i, j) = cv::Vec3f(0, 0, 0); 172 | } 173 | else { 174 | normalized_normal.at(i, j) = normal_val / norm; 175 | } 176 | } 177 | } 178 | 179 | cv::Mat img(normalized_normal.size(), CV_8UC3, cv::Scalar(0.f, 0.f, 0.f)); 180 | normalized_normal.convertTo(img, img.type(), 255.f / 2.f, 255.f / 2.f); 181 | cv::imwrite(normal_path.string(), img); 182 | return true; 183 | } 184 | 185 | bool ShowWeakImage(const path &weak_path, const cv::Mat &weak) { 186 | // show image 187 | if (weak.empty()) { 188 | return false; 189 | } 190 | const int width = weak.cols; 191 | const int height = weak.rows; 192 | cv::Mat weak_info_image(height, width, CV_8UC3); 193 | for (int r = 0; r < height; ++r) { 194 | for (int c = 0; c < width; ++c) { 195 | switch (weak.at(r, c)) 196 | { 197 | case WEAK: 198 | weak_info_image.at(r, c) = cv::Vec3b(255, 255, 255); 199 | break; 200 | case STRONG: 201 | weak_info_image.at(r, c) = cv::Vec3b(0, 255, 0); 202 | break; 203 | case UNKNOWN: 204 | weak_info_image.at(r, c) = cv::Vec3b(0, 0, 255); 205 | break; 206 | } 207 | } 208 | } 209 | // save 210 | cv::imwrite(weak_path.string(), weak_info_image); 211 | return true; 212 | } 213 | 214 | bool ExportPointCloud(const path& point_cloud_path, std::vector& pointcloud) 215 | { 216 | ofstream out(point_cloud_path, std::ios::binary); 217 | if (out.bad()) { 218 | return false; 219 | } 220 | 221 | out << "ply\n"; 222 | out << "format binary_little_endian 1.0\n"; 223 | out << "element vertex " << int(pointcloud.size()) << "\n"; 224 | out << "property float x\n"; 225 | out << "property float y\n"; 226 | out << "property float z\n"; 227 | out << "property uchar diffuse_blue\n"; 228 | out << "property uchar diffuse_green\n"; 229 | out << "property uchar diffuse_red\n"; 230 | out << "end_header\n"; 231 | 232 | for (size_t idx = 0; idx < pointcloud.size(); idx++) 233 | { 234 | float px = pointcloud[idx].coord.x; 235 | float py = pointcloud[idx].coord.y; 236 | float pz = pointcloud[idx].coord.z; 237 | 238 | 239 | cv::Vec3b pixel; 240 | pixel[0] = static_cast(pointcloud[idx].color.x); 241 | pixel[1] = static_cast(pointcloud[idx].color.y); 242 | pixel[2] = static_cast(pointcloud[idx].color.z); 243 | 244 | out.write((char *)&px, sizeof(float)); 245 | out.write((char *)&py, sizeof(float)); 246 | out.write((char *)&pz, sizeof(float)); 247 | 248 | out.write((char *)&pixel[0], sizeof(uchar)); 249 | out.write((char *)&pixel[1], sizeof(uchar)); 250 | out.write((char *)&pixel[2], sizeof(uchar)); 251 | } 252 | out.close(); 253 | return true; 254 | } 255 | 256 | void StringAppendV(std::string* dst, const char* format, va_list ap) { 257 | // First try with a small fixed size buffer. 258 | static const int kFixedBufferSize = 1024; 259 | char fixed_buffer[kFixedBufferSize]; 260 | 261 | // It is possible for methods that use a va_list to invalidate 262 | // the data in it upon use. The fix is to make a copy 263 | // of the structure before using it and use that copy instead. 264 | va_list backup_ap; 265 | va_copy(backup_ap, ap); 266 | int result = vsnprintf(fixed_buffer, kFixedBufferSize, format, backup_ap); 267 | va_end(backup_ap); 268 | 269 | if (result < kFixedBufferSize) { 270 | if (result >= 0) { 271 | // Normal case - everything fits. 272 | dst->append(fixed_buffer, result); 273 | return; 274 | } 275 | 276 | #ifdef _MSC_VER 277 | // Error or MSVC running out of space. MSVC 8.0 and higher 278 | // can be asked about space needed with the special idiom below: 279 | va_copy(backup_ap, ap); 280 | result = vsnprintf(nullptr, 0, format, backup_ap); 281 | va_end(backup_ap); 282 | #endif 283 | 284 | if (result < 0) { 285 | // Just an error. 286 | return; 287 | } 288 | } 289 | 290 | // Increase the buffer size to the size requested by vsnprintf, 291 | // plus one for the closing \0. 292 | const int variable_buffer_size = result + 1; 293 | std::unique_ptr variable_buffer(new char[variable_buffer_size]); 294 | 295 | // Restore the va_list before we use it again. 296 | va_copy(backup_ap, ap); 297 | result = 298 | vsnprintf(variable_buffer.get(), variable_buffer_size, format, backup_ap); 299 | va_end(backup_ap); 300 | 301 | if (result >= 0 && result < variable_buffer_size) { 302 | dst->append(variable_buffer.get(), result); 303 | } 304 | } 305 | 306 | std::string StringPrintf(const char* format, ...) { 307 | va_list ap; 308 | va_start(ap, format); 309 | std::string result; 310 | StringAppendV(&result, format, ap); 311 | va_end(ap); 312 | return result; 313 | } 314 | 315 | void CudaSafeCall(const cudaError_t error, const std::string& file, 316 | const int line) { 317 | if (error != cudaSuccess) { 318 | std::cerr << StringPrintf("%s in %s at line %i", cudaGetErrorString(error), 319 | file.c_str(), line) 320 | << std::endl; 321 | exit(EXIT_FAILURE); 322 | } 323 | } 324 | 325 | void CudaCheckError(const char* file, const int line) { 326 | cudaError error = cudaGetLastError(); 327 | if (error != cudaSuccess) { 328 | std::cerr << StringPrintf("cudaCheckError() failed at %s:%i : %s", file, 329 | line, cudaGetErrorString(error)) 330 | << std::endl; 331 | exit(EXIT_FAILURE); 332 | } 333 | 334 | // More careful checking. However, this will affect performance. 335 | // Comment away if needed. 336 | error = cudaDeviceSynchronize(); 337 | if (cudaSuccess != error) { 338 | std::cerr << StringPrintf("cudaCheckError() with sync failed at %s:%i : %s", 339 | file, line, cudaGetErrorString(error)) 340 | << std::endl; 341 | std::cerr 342 | << "This error is likely caused by the graphics card timeout " 343 | "detection mechanism of your operating system. Please refer to " 344 | "the FAQ in the documentation on how to solve this problem." 345 | << std::endl; 346 | exit(EXIT_FAILURE); 347 | } 348 | } 349 | 350 | std::string ToFormatIndex(int index) { 351 | std::stringstream ss; 352 | ss << std::setw(8) << std::setfill('0') << index; 353 | return ss.str(); 354 | } 355 | 356 | APD::APD(const Problem &problem) { 357 | params_host = problem.params; 358 | this->problem = problem; 359 | } 360 | 361 | APD::~APD() { 362 | delete[] plane_hypotheses_host; 363 | // free images 364 | { 365 | for (int i = 0; i < num_images; ++i) { 366 | cudaDestroyTextureObject(texture_objects_host.images[i]); 367 | cudaFreeArray(cuArray[i]); 368 | } 369 | cudaFree(texture_objects_cuda); 370 | } 371 | // may free depths 372 | if (params_host.geom_consistency) { 373 | for (int i = 0; i < num_images; ++i) { 374 | cudaDestroyTextureObject(texture_depths_host.images[i]); 375 | cudaFreeArray(cuDepthArray[i]); 376 | } 377 | cudaFree(texture_depths_cuda); 378 | } 379 | cudaFree(cameras_cuda); 380 | cudaFree(plane_hypotheses_cuda); 381 | cudaFree(fit_plane_hypotheses_cuda); 382 | cudaFree(costs_cuda); 383 | cudaFree(rand_states_cuda); 384 | cudaFree(selected_views_cuda); 385 | cudaFree(params_cuda); 386 | cudaFree(helper_cuda); 387 | cudaFree(neighbours_cuda); 388 | cudaFree(neigbours_map_cuda); 389 | cudaFree(weak_info_cuda); 390 | cudaFree(weak_reliable_cuda); 391 | cudaFree(view_weight_cuda); 392 | cudaFree(weak_nearest_strong); 393 | #ifdef DEBUG_COST_LINE 394 | cudaFree(weak_ncc_cost_cuda); 395 | #endif // DEBUG_COST_LINE 396 | 397 | } 398 | 399 | void APD::InuputInitialization() { 400 | images.clear(); 401 | cameras.clear(); 402 | // get folder 403 | path image_folder = problem.dense_folder / path("images"); 404 | path cam_folder = problem.dense_folder / path("cams"); 405 | //path weak_folder = problem.dense_folder / path("weaks"); 406 | // ================================================= 407 | // read ref image and src images 408 | // ref 409 | { 410 | path ref_image_path = image_folder / path(ToFormatIndex(problem.ref_image_id) + ".jpg"); 411 | cv::Mat_ image_uint = cv::imread(ref_image_path.string(), cv::IMREAD_GRAYSCALE); 412 | cv::Mat image_float; 413 | image_uint.convertTo(image_float, CV_32FC1); 414 | images.push_back(image_float); 415 | width = image_float.cols; 416 | height = image_float.rows; 417 | } 418 | // src 419 | for (const auto &src_idx : problem.src_image_ids) { 420 | path src_image_path = image_folder / path(ToFormatIndex(src_idx) + ".jpg"); 421 | cv::Mat_ image_uint = cv::imread(src_image_path.string(), cv::IMREAD_GRAYSCALE); 422 | cv::Mat image_float; 423 | image_uint.convertTo(image_float, CV_32FC1); 424 | images.push_back(image_float); 425 | // assert: images_float.cols == width; 426 | // assert: images_float.rows == height; 427 | } 428 | if (images.size() > MAX_IMAGES) { 429 | std::cerr << "Can't process so much images: " << images.size() << std::endl; 430 | exit(EXIT_FAILURE); 431 | } 432 | // ================================================= 433 | // read ref camera and src camera 434 | // ref 435 | { 436 | path ref_cam_path = cam_folder / path(ToFormatIndex(problem.ref_image_id) + "_cam.txt"); 437 | Camera cam; 438 | ReadCamera(ref_cam_path, cam); 439 | cam.width = width; 440 | cam.height = height; 441 | cameras.push_back(cam); 442 | } 443 | // src 444 | for (const auto &src_idx : problem.src_image_ids) { 445 | path src_cam_path = cam_folder / path(ToFormatIndex(src_idx) + "_cam.txt"); 446 | Camera cam; 447 | ReadCamera(src_cam_path, cam); 448 | cam.width = width; 449 | cam.height = height; 450 | cameras.push_back(cam); 451 | } 452 | // ================================================= 453 | // set some params 454 | params_host.depth_min = cameras[0].depth_min * 0.6f; 455 | params_host.depth_max = cameras[0].depth_max * 1.2f; 456 | params_host.num_images = (int)images.size(); 457 | num_images = (int)images.size(); 458 | // ================================================= 459 | std::cout << "Read images and camera done\n"; 460 | std::cout << "Depth range: " << params_host.depth_min << " " << params_host.depth_max << std::endl; 461 | std::cout << "Num images: " << params_host.num_images << std::endl; 462 | // ================================================= 463 | // scale images 464 | if (problem.scale_size != 1) { 465 | for (int i = 0; i < num_images; ++i) { 466 | const float factor = 1.0f / (float)(problem.scale_size); 467 | const int new_cols = std::round(images[i].cols * factor); 468 | const int new_rows = std::round(images[i].rows * factor); 469 | 470 | const float scale_x = new_cols / static_cast(images[i].cols); 471 | const float scale_y = new_rows / static_cast(images[i].rows); 472 | 473 | cv::Mat_ scaled_image_float; 474 | cv::resize(images[i], scaled_image_float, cv::Size(new_cols, new_rows), 0, 0, cv::INTER_LINEAR); 475 | images[i] = scaled_image_float.clone(); 476 | 477 | width = scaled_image_float.cols; 478 | height = scaled_image_float.rows; 479 | 480 | cameras[i].K[0] *= scale_x; 481 | cameras[i].K[2] *= scale_x; 482 | cameras[i].K[4] *= scale_y; 483 | cameras[i].K[5] *= scale_y; 484 | cameras[i].width = width; 485 | cameras[i].height = height; 486 | } 487 | std::cout << "Scale images and cameras done\n"; 488 | } 489 | std::cout << "Image size: " << width << " * " << height << std::endl; 490 | // ================================================= 491 | // read depth form geom consistency 492 | if (params_host.geom_consistency) { 493 | depths.clear(); 494 | path ref_depth_path = problem.result_folder / path("depths.dmb"); 495 | cv::Mat ref_depth; 496 | ReadBinMat(ref_depth_path, ref_depth); 497 | depths.push_back(ref_depth); 498 | for (const auto &src_idx : problem.src_image_ids) { 499 | path src_depth_path = problem.dense_folder / path("APD") / path(ToFormatIndex(src_idx)) / path("depths.dmb"); 500 | cv::Mat src_depth; 501 | ReadBinMat(src_depth_path, src_depth); 502 | depths.push_back(src_depth); 503 | } 504 | for (auto &depth : depths) { 505 | if (depth.cols != width || depth.rows != height) { 506 | RescaleMatToTargetSize(depth, depth, cv::Size(width, height)); 507 | } 508 | } 509 | 510 | } 511 | // ================================================= 512 | // read weak info 513 | if (params_host.use_APD) { 514 | path weak_info_path = problem.result_folder / path("weak.bin"); 515 | if (!exists(weak_info_path)) { 516 | std::cerr << "Can't find weak info file: " << weak_info_path.string() << std::endl; 517 | exit(EXIT_FAILURE); 518 | } 519 | ReadBinMat(weak_info_path, weak_info_host); 520 | if (weak_info_host.cols != width || weak_info_host.rows != height) { 521 | std::cerr << "Weak info doesn't match the images' size!\n"; 522 | RescaleMatToTargetSize(weak_info_host, weak_info_host, cv::Size(width, height)); 523 | std::cout << "Scale done\n"; 524 | } 525 | 526 | neighbours_map_host = cv::Mat::zeros(weak_info_host.size(), CV_32SC1); 527 | weak_count = 0; 528 | for (int r = 0; r < weak_info_host.rows; ++r) { 529 | for (int c = 0; c < weak_info_host.cols; ++c) { 530 | int val = weak_info_host.at(r, c); 531 | // point is weak 532 | if (val == WEAK) { 533 | neighbours_map_host.at(r, c) = weak_count; 534 | weak_count++; 535 | } 536 | } 537 | } 538 | std::cout << "Weak count: " << weak_count << " / " << weak_info_host.cols * weak_info_host.rows << " = " << (float)weak_count / (float)(weak_info_host.cols * weak_info_host.rows) * 100 << "%" << std::endl; 539 | } 540 | else { 541 | weak_info_host = cv::Mat::zeros(height, width, CV_8UC1); 542 | weak_count = 0; 543 | for (int r = 0; r < weak_info_host.rows; ++r) { 544 | for (int c = 0; c < weak_info_host.cols; ++c) { 545 | weak_info_host.at(r, c) = STRONG; 546 | } 547 | } 548 | } 549 | // ================================================= 550 | plane_hypotheses_host = new float4[cameras[0].height * cameras[0].width]; 551 | selected_views_host = cv::Mat::zeros(height, width, CV_32SC1); 552 | if (params_host.state != FIRST_INIT) { 553 | // input plane hypotheses from existed result 554 | path depth_path = problem.result_folder / path("depths.dmb"); 555 | path normal_path = problem.result_folder / path("normals.dmb"); 556 | cv::Mat depth, normal; 557 | ReadBinMat(depth_path, depth); 558 | ReadBinMat(normal_path, normal); 559 | if (depth.cols != width || depth.rows != height || normal.cols != width || normal.rows != height) { 560 | std::cerr << "Depth and Normal doesn't match the images' size!\n"; 561 | RescaleMatToTargetSize(depth, depth, cv::Size2i(width, height)); 562 | RescaleMatToTargetSize(normal, normal, cv::Size2i(width, height)); 563 | } 564 | for (int col = 0; col < width; ++col) { 565 | for (int row = 0; row < height; ++row) { 566 | int center = row * width + col; 567 | plane_hypotheses_host[center].w = depth.at(row, col); 568 | plane_hypotheses_host[center].x = normal.at(row, col)[0]; 569 | plane_hypotheses_host[center].y = normal.at(row, col)[1]; 570 | plane_hypotheses_host[center].z = normal.at(row, col)[2]; 571 | } 572 | } 573 | { 574 | path selected_view_path = problem.result_folder / path("selected_views.bin"); 575 | ReadBinMat(selected_view_path, selected_views_host); 576 | if (selected_views_host.cols != width || selected_views_host.rows != height) { 577 | std::cerr << "Select view doesn't match the images' size!\n"; 578 | RescaleMatToTargetSize(selected_views_host, selected_views_host, cv::Size2i(width, height)); 579 | } 580 | } 581 | } 582 | // ================================================= 583 | } 584 | 585 | void APD::CudaSpaceInitialization() { 586 | // ================================================= 587 | // move images to gpu 588 | for (int i = 0; i < num_images; ++i) { 589 | cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); 590 | cudaMallocArray(&cuArray[i], &channelDesc, width, height); 591 | cudaMemcpy2DToArray(cuArray[i], 0, 0, images[i].ptr(), images[i].step[0], width * sizeof(float), height, cudaMemcpyHostToDevice); 592 | struct cudaResourceDesc resDesc; 593 | memset(&resDesc, 0, sizeof(cudaResourceDesc)); 594 | resDesc.resType = cudaResourceTypeArray; 595 | resDesc.res.array.array = cuArray[i]; 596 | struct cudaTextureDesc texDesc; 597 | memset(&texDesc, 0, sizeof(cudaTextureDesc)); 598 | texDesc.addressMode[0] = cudaAddressModeWrap; 599 | texDesc.addressMode[1] = cudaAddressModeWrap; 600 | texDesc.filterMode = cudaFilterModeLinear; 601 | texDesc.readMode = cudaReadModeElementType; 602 | texDesc.normalizedCoords = 0; 603 | cudaCreateTextureObject(&(texture_objects_host.images[i]), &resDesc, &texDesc, NULL); 604 | } 605 | cudaMalloc((void**)&texture_objects_cuda, sizeof(cudaTextureObjects)); 606 | cudaMemcpy(texture_objects_cuda, &texture_objects_host, sizeof(cudaTextureObjects), cudaMemcpyHostToDevice); 607 | // may move depths to gpu 608 | if (params_host.geom_consistency) { 609 | for (int i = 0; i < num_images; ++i) { 610 | int height = depths[i].rows; 611 | int width = depths[i].cols; 612 | cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); 613 | cudaMallocArray(&cuDepthArray[i], &channelDesc, width, height); 614 | cudaMemcpy2DToArray(cuDepthArray[i], 0, 0, depths[i].ptr(), depths[i].step[0], width * sizeof(float), height, cudaMemcpyHostToDevice); 615 | struct cudaResourceDesc resDesc; 616 | memset(&resDesc, 0, sizeof(cudaResourceDesc)); 617 | resDesc.resType = cudaResourceTypeArray; 618 | resDesc.res.array.array = cuDepthArray[i]; 619 | struct cudaTextureDesc texDesc; 620 | memset(&texDesc, 0, sizeof(cudaTextureDesc)); 621 | texDesc.addressMode[0] = cudaAddressModeWrap; 622 | texDesc.addressMode[1] = cudaAddressModeWrap; 623 | texDesc.filterMode = cudaFilterModeLinear; 624 | texDesc.readMode = cudaReadModeElementType; 625 | texDesc.normalizedCoords = 0; 626 | cudaCreateTextureObject(&(texture_depths_host.images[i]), &resDesc, &texDesc, NULL); 627 | } 628 | cudaMalloc((void**)&texture_depths_cuda, sizeof(cudaTextureObjects)); 629 | cudaMemcpy(texture_depths_cuda, &texture_depths_host, sizeof(cudaTextureObjects), cudaMemcpyHostToDevice); 630 | } 631 | // ================================================= 632 | // move camera to gpu 633 | cudaMalloc((void**)&cameras_cuda, sizeof(Camera) * (num_images)); 634 | cudaMemcpy(cameras_cuda, &cameras[0], sizeof(Camera) * (num_images), cudaMemcpyHostToDevice); 635 | // malloc memory for important data structure 636 | const int length = width * height; 637 | // define cost 638 | cudaMalloc((void**)&costs_cuda, sizeof(float) * length); 639 | // malloc memory for rand states 640 | cudaMalloc((void**)&rand_states_cuda, sizeof(curandState) * length); 641 | // malloc for selected_views 642 | cudaMalloc((void**)&selected_views_cuda, sizeof(unsigned int) * length); 643 | cudaMemcpy(selected_views_cuda, selected_views_host.ptr(0), sizeof(unsigned int) * length, cudaMemcpyHostToDevice); 644 | // view weight 645 | cudaMalloc((void**)&view_weight_cuda, sizeof(uchar) * length * MAX_IMAGES); 646 | // move plane hypotheses to gpu 647 | cudaMalloc((void**)&plane_hypotheses_cuda, sizeof(float4) * length); 648 | cudaMemcpy(plane_hypotheses_cuda, plane_hypotheses_host, sizeof(float4) * length, cudaMemcpyHostToDevice); 649 | // malloc memory for fit plane 650 | cudaMalloc((void**)&fit_plane_hypotheses_cuda, sizeof(float4) * length); 651 | cudaMemset(fit_plane_hypotheses_cuda, 0, sizeof(float4) * length); 652 | // malloc memory for weak info 653 | cudaMalloc((void **)(&weak_info_cuda), length * sizeof(uchar)); 654 | cudaMemcpy(weak_info_cuda, weak_info_host.ptr(0), length * sizeof(uchar), cudaMemcpyHostToDevice); 655 | // malloc memory for weak reliable info 656 | cudaMalloc((void **)(&weak_reliable_cuda), length * sizeof(uchar)); 657 | // malloc memory for nearest strong points 658 | cudaMalloc((void**)(&weak_nearest_strong), length * sizeof(short2)); 659 | // move neighbour map to gpu 660 | cudaMalloc((void**)(&neigbours_map_cuda), length * sizeof(int)); 661 | cudaMemcpy(neigbours_map_cuda, neighbours_map_host.ptr(0), length * sizeof(int), cudaMemcpyHostToDevice); 662 | // malloc memory for deformable ncc 663 | cudaMalloc((void **)(&neighbours_cuda), weak_count * NEIGHBOUR_NUM * sizeof(short2)); 664 | // move param to gpu 665 | cudaMalloc((void**)(¶ms_cuda), sizeof(PatchMatchParams)); 666 | cudaMemcpy(params_cuda, ¶ms_host, sizeof(PatchMatchParams), cudaMemcpyHostToDevice); 667 | // ================================================= 668 | #ifdef DEBUG_COST_LINE 669 | cudaMalloc((void**)(&weak_ncc_cost_cuda), sizeof(float) * width * height * 61); 670 | #endif // DEBUG_COST_LINE 671 | } 672 | 673 | void APD::SetDataPassHelperInCuda() { 674 | helper_host.width = this->width; 675 | helper_host.height = this->height; 676 | helper_host.ref_index = this->problem.ref_image_id; 677 | helper_host.texture_depths_cuda = this->texture_depths_cuda; 678 | helper_host.texture_objects_cuda = this->texture_objects_cuda; 679 | helper_host.cameras_cuda = this->cameras_cuda; 680 | helper_host.costs_cuda = this->costs_cuda; 681 | helper_host.neighbours_cuda = this->neighbours_cuda; 682 | helper_host.neighbours_map_cuda = this->neigbours_map_cuda; 683 | helper_host.plane_hypotheses_cuda = this->plane_hypotheses_cuda; 684 | helper_host.rand_states_cuda = this->rand_states_cuda; 685 | helper_host.selected_views_cuda = this->selected_views_cuda; 686 | helper_host.weak_info_cuda = this->weak_info_cuda; 687 | helper_host.params = params_cuda; 688 | helper_host.debug_point = make_int2(DEBUG_POINT_X, DEBUG_POINT_Y); 689 | helper_host.show_ncc_info = false; 690 | helper_host.fit_plane_hypotheses_cuda = fit_plane_hypotheses_cuda; 691 | helper_host.weak_reliable_cuda = weak_reliable_cuda; 692 | helper_host.view_weight_cuda = view_weight_cuda; 693 | helper_host.weak_nearest_strong = weak_nearest_strong; 694 | #ifdef DEBUG_COST_LINE 695 | helper_host.weak_ncc_cost_cuda = weak_ncc_cost_cuda; 696 | #endif // DEBUG_COST_LINE 697 | cudaMalloc((void**)(&helper_cuda), sizeof(DataPassHelper)); 698 | cudaMemcpy(helper_cuda, &helper_host, sizeof(DataPassHelper), cudaMemcpyHostToDevice); 699 | } 700 | 701 | float4 APD::GetPlaneHypothesis(int r, int c) { 702 | return plane_hypotheses_host[c + r * width]; 703 | } 704 | 705 | cv::Mat APD::GetPixelStates() { 706 | return weak_info_host; 707 | } 708 | 709 | cv::Mat APD::GetSelectedViews() { 710 | return selected_views_host; 711 | } 712 | 713 | int APD::GetWidth() { 714 | return width; 715 | } 716 | 717 | int APD::GetHeight() { 718 | return height; 719 | } 720 | 721 | float APD::GetDepthMin() { 722 | return params_host.depth_min; 723 | } 724 | 725 | float APD::GetDepthMax() { 726 | return params_host.depth_max; 727 | } 728 | 729 | void RescaleImageAndCamera(cv::Mat &src, cv::Mat &dst, cv::Mat &depth, Camera &camera) 730 | { 731 | const int cols = depth.cols; 732 | const int rows = depth.rows; 733 | 734 | if (cols == src.cols && rows == src.rows) { 735 | dst = src.clone(); 736 | return; 737 | } 738 | 739 | const float scale_x = cols / static_cast(src.cols); 740 | const float scale_y = rows / static_cast(src.rows); 741 | 742 | cv::resize(src, dst, cv::Size(cols, rows), 0, 0, cv::INTER_LINEAR); 743 | 744 | camera.K[0] *= scale_x; 745 | camera.K[2] *= scale_x; 746 | camera.K[4] *= scale_y; 747 | camera.K[5] *= scale_y; 748 | camera.width = cols; 749 | camera.height = rows; 750 | } 751 | 752 | template 753 | void RescaleMatToTargetSize(const cv::Mat &src, cv::Mat &dst, const cv::Size2i &target_size) { 754 | if (src.cols == target_size.width && src.rows == target_size.height) { 755 | return; 756 | } 757 | const float scale_x = target_size.width / static_cast(src.cols); 758 | const float scale_y = target_size.height / static_cast(src.rows); 759 | 760 | int type = src.type(); 761 | cv::Mat src_clone = src.clone(); 762 | dst = cv::Mat(target_size.height, target_size.width, type); 763 | 764 | for (int r = 0; r < target_size.height; ++r) { 765 | for (int c = 0; c < target_size.width; ++c) { 766 | int o_r = static_cast(r / scale_x); 767 | int o_c = static_cast(c / scale_y); 768 | if (o_r < 0 || o_c < 0 || o_r >= src_clone.rows || o_c >= src_clone.cols) { 769 | continue; 770 | } 771 | dst.at(r, c) = src_clone.at(o_r, o_c); 772 | } 773 | } 774 | } 775 | 776 | float3 Get3DPointonWorld(const int x, const int y, const float depth, const Camera camera) 777 | { 778 | float3 pointX; 779 | float3 tmpX; 780 | // Reprojection 781 | pointX.x = depth * (x - camera.K[2]) / camera.K[0]; 782 | pointX.y = depth * (y - camera.K[5]) / camera.K[4]; 783 | pointX.z = depth; 784 | 785 | // Rotation 786 | tmpX.x = camera.R[0] * pointX.x + camera.R[3] * pointX.y + camera.R[6] * pointX.z; 787 | tmpX.y = camera.R[1] * pointX.x + camera.R[4] * pointX.y + camera.R[7] * pointX.z; 788 | tmpX.z = camera.R[2] * pointX.x + camera.R[5] * pointX.y + camera.R[8] * pointX.z; 789 | 790 | // Transformation 791 | float3 C; 792 | C.x = -(camera.R[0] * camera.t[0] + camera.R[3] * camera.t[1] + camera.R[6] * camera.t[2]); 793 | C.y = -(camera.R[1] * camera.t[0] + camera.R[4] * camera.t[1] + camera.R[7] * camera.t[2]); 794 | C.z = -(camera.R[2] * camera.t[0] + camera.R[5] * camera.t[1] + camera.R[8] * camera.t[2]); 795 | pointX.x = tmpX.x + C.x; 796 | pointX.y = tmpX.y + C.y; 797 | pointX.z = tmpX.z + C.z; 798 | 799 | return pointX; 800 | } 801 | 802 | void ProjectCamera(const float3 PointX, const Camera camera, float2 &point, float &depth) 803 | { 804 | float3 tmp; 805 | tmp.x = camera.R[0] * PointX.x + camera.R[1] * PointX.y + camera.R[2] * PointX.z + camera.t[0]; 806 | tmp.y = camera.R[3] * PointX.x + camera.R[4] * PointX.y + camera.R[5] * PointX.z + camera.t[1]; 807 | tmp.z = camera.R[6] * PointX.x + camera.R[7] * PointX.y + camera.R[8] * PointX.z + camera.t[2]; 808 | 809 | depth = camera.K[6] * tmp.x + camera.K[7] * tmp.y + camera.K[8] * tmp.z; 810 | point.x = (camera.K[0] * tmp.x + camera.K[1] * tmp.y + camera.K[2] * tmp.z) / depth; 811 | point.y = (camera.K[3] * tmp.x + camera.K[4] * tmp.y + camera.K[5] * tmp.z) / depth; 812 | } 813 | 814 | float GetAngle(const cv::Vec3f &v1, const cv::Vec3f &v2) 815 | { 816 | float dot_product = v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2]; 817 | float angle = acosf(dot_product); 818 | //if angle is not a number the dot product was 1 and thus the two vectors should be identical --> return 0 819 | if (angle != angle) 820 | return 0.0f; 821 | 822 | return angle; 823 | } 824 | 825 | // ETH version 826 | void RunFusion(const path &dense_folder, const std::vector &problems) 827 | { 828 | int num_images = problems.size(); 829 | path image_folder = dense_folder / path("images"); 830 | path cam_folder = dense_folder / path("cams"); 831 | 832 | std::vector images; 833 | std::vector cameras; 834 | std::vector depths; 835 | std::vector normals; 836 | std::vector masks; 837 | std::vector blocks; 838 | std::vector weaks; 839 | images.clear(); 840 | cameras.clear(); 841 | depths.clear(); 842 | normals.clear(); 843 | masks.clear(); 844 | blocks.clear(); 845 | weaks.clear(); 846 | std::unordered_map imageIdToindexMap; 847 | 848 | path block_folder = dense_folder / path("blocks"); 849 | bool use_block = false; 850 | if (exists(block_folder)) { 851 | use_block = true; 852 | } 853 | 854 | for (int i = 0; i < num_images; ++i) { 855 | const auto &problem = problems[i]; 856 | std::cout << "Reading image " << std::setw(8) << std::setfill('0') << i << "..." << std::endl; 857 | path image_path = image_folder / path(ToFormatIndex(problem.ref_image_id) + ".jpg"); 858 | imageIdToindexMap.emplace(problem.ref_image_id, i); 859 | cv::Mat image = cv::imread(image_path.string(), cv::IMREAD_COLOR); 860 | path cam_path = cam_folder / path(ToFormatIndex(problem.ref_image_id) + "_cam.txt"); 861 | Camera camera; 862 | ReadCamera(cam_path, camera); 863 | 864 | path depth_path = problem.result_folder / path("depths.dmb"); 865 | path normal_path = problem.result_folder / path("normals.dmb"); 866 | path weak_path = problem.result_folder / path("weak.bin"); 867 | cv::Mat depth, normal, weak; 868 | ReadBinMat(depth_path, depth); 869 | ReadBinMat(normal_path, normal); 870 | ReadBinMat(weak_path, weak); 871 | 872 | if (use_block) { 873 | path block_path = block_folder / path("mask_" + std::to_string(problem.ref_image_id) + ".jpg"); 874 | cv::Mat block_jpg = cv::imread(block_path.string(), cv::IMREAD_GRAYSCALE); 875 | blocks.emplace_back(block_jpg); 876 | } 877 | 878 | cv::Mat scaled_image; 879 | RescaleImageAndCamera(image, scaled_image, depth, camera); 880 | images.emplace_back(scaled_image); 881 | cameras.emplace_back(camera); 882 | depths.emplace_back(depth); 883 | normals.emplace_back(normal); 884 | cv::Mat mask = cv::Mat::zeros(depth.rows, depth.cols, CV_8UC1); 885 | masks.emplace_back(mask); 886 | RescaleMatToTargetSize(weak, weak, cv::Size2i(depth.cols, depth.rows)); 887 | weaks.emplace_back(weak); 888 | } 889 | std::vector PointCloud; 890 | PointCloud.clear(); 891 | 892 | for (int i = 0; i < num_images; ++i) { 893 | std::cout << "Fusing image " << std::setw(8) << std::setfill('0') << i << "..." << std::endl; 894 | const auto &problem = problems[i]; 895 | int ref_index = imageIdToindexMap[problem.ref_image_id]; 896 | const int cols = depths[ref_index].cols; 897 | const int rows = depths[ref_index].rows; 898 | int num_ngb = problem.src_image_ids.size(); 899 | for (int r = 0; r < rows; ++r) { 900 | for (int c = 0; c < cols; ++c) { 901 | if (use_block && blocks[ref_index].at(r, c) < 128) { 902 | continue; 903 | } 904 | 905 | if (masks[ref_index].at(r, c) == 1) { 906 | continue; 907 | } 908 | 909 | float ref_depth = depths[ref_index].at(r, c); 910 | if (ref_depth <= 0.0) 911 | continue; 912 | const cv::Vec3f ref_normal = normals[ref_index].at(r, c); 913 | float3 PointX = Get3DPointonWorld(c, r, ref_depth, cameras[ref_index]); 914 | float3 consistent_Point = PointX; 915 | int num_consistent = 0; 916 | float dynamic_consistency = 0.0f; 917 | std::vector used_list(num_ngb, make_int2(-1, -1)); 918 | for (int j = 0; j < num_ngb; ++j) { 919 | int src_index = imageIdToindexMap[problem.src_image_ids[j]]; 920 | const int src_cols = depths[src_index].cols; 921 | const int src_rows = depths[src_index].rows; 922 | float2 point; 923 | float proj_depth; 924 | ProjectCamera(PointX, cameras[src_index], point, proj_depth); 925 | int src_r = int(point.y + 0.5f); 926 | int src_c = int(point.x + 0.5f); 927 | if (src_c >= 0 && src_c < src_cols && src_r >= 0 && src_r < src_rows) { 928 | if (masks[src_index].at(src_r, src_c) == 1) 929 | continue; 930 | float src_depth = depths[src_index].at(src_r, src_c); 931 | if (src_depth <= 0.0) 932 | continue; 933 | const cv::Vec3f src_normal = normals[src_index].at(src_r, src_c); 934 | float3 tmp_X = Get3DPointonWorld(src_c, src_r, src_depth, cameras[src_index]); 935 | float2 tmp_pt; 936 | ProjectCamera(tmp_X, cameras[ref_index], tmp_pt, proj_depth); 937 | float reproj_error = sqrt(pow(c - tmp_pt.x, 2) + pow(r - tmp_pt.y, 2)); 938 | float relative_depth_diff = fabs(proj_depth - ref_depth) / ref_depth; 939 | float angle = GetAngle(ref_normal, src_normal); 940 | 941 | if (reproj_error < 2.0f && relative_depth_diff < 0.01f && angle < 0.174533f) { 942 | used_list[j].x = src_c; 943 | used_list[j].y = src_r; 944 | float tmp_index = reproj_error + 200 * relative_depth_diff + angle * 10; 945 | dynamic_consistency += exp(-tmp_index); 946 | num_consistent++; 947 | } 948 | } 949 | } 950 | float factor = (weaks[ref_index].at(r, c) == WEAK ? 0.45f : 0.3f); 951 | if (num_consistent >= 1 && (dynamic_consistency > factor * num_consistent)) { 952 | PointList point3D; 953 | point3D.coord = consistent_Point; 954 | float consistent_Color[3] = { (float)images[ref_index].at(r, c)[0], (float)images[ref_index].at(r, c)[1], (float)images[ref_index].at(r, c)[2] }; 955 | for (int j = 0; j < num_ngb; ++j) { 956 | if (used_list[j].x == -1) 957 | continue; 958 | int src_index = imageIdToindexMap[problem.src_image_ids[j]]; 959 | masks[src_index].at(used_list[j].y, used_list[j].x) = 1; 960 | const auto &color = images[src_index].at(used_list[j].y, used_list[j].x); 961 | consistent_Color[0] += color[0]; 962 | consistent_Color[1] += color[1]; 963 | consistent_Color[2] += color[2]; 964 | } 965 | consistent_Color[0] /= (num_consistent + 1); 966 | consistent_Color[1] /= (num_consistent + 1); 967 | consistent_Color[2] /= (num_consistent + 1); 968 | point3D.color = make_float3(consistent_Color[0], consistent_Color[1], consistent_Color[2]); 969 | PointCloud.emplace_back(point3D); 970 | } 971 | 972 | } 973 | } 974 | } 975 | path ply_path = dense_folder / path("APD") / path("APD.ply"); 976 | ExportPointCloud(ply_path, PointCloud); 977 | } 978 | 979 | void RunFusion_TAT_Intermediate(const path &dense_folder, const std::vector &problems) 980 | { 981 | int num_images = problems.size(); 982 | path image_folder = dense_folder / path("images"); 983 | path cam_folder = dense_folder / path("cams"); 984 | const float dist_base = 0.25f; 985 | const float depth_base = 1.0f / 3500.0f; 986 | 987 | const float angle_base = 0.06981317007977318f; // 4 degree 988 | const float angle_grad = 0.05235987755982988f; // 3 degree 989 | 990 | std::vector images; 991 | std::vector cameras; 992 | std::vector depths; 993 | std::vector normals; 994 | std::vector masks; 995 | std::vector blocks; 996 | images.clear(); 997 | cameras.clear(); 998 | depths.clear(); 999 | normals.clear(); 1000 | masks.clear(); 1001 | blocks.clear(); 1002 | std::unordered_map imageIdToindexMap; 1003 | 1004 | path block_folder = dense_folder / path("blocks"); 1005 | bool use_block = false; 1006 | if (exists(block_folder)) { 1007 | use_block = true; 1008 | } 1009 | 1010 | for (int i = 0; i < num_images; ++i) { 1011 | const auto &problem = problems[i]; 1012 | std::cout << "Reading image " << std::setw(8) << std::setfill('0') << i << "..." << std::endl; 1013 | path image_path = image_folder / path(ToFormatIndex(problem.ref_image_id) + ".jpg"); 1014 | imageIdToindexMap.emplace(problem.ref_image_id, i); 1015 | cv::Mat image = cv::imread(image_path.string(), cv::IMREAD_COLOR); 1016 | path cam_path = cam_folder / path(ToFormatIndex(problem.ref_image_id) + "_cam.txt"); 1017 | Camera camera; 1018 | ReadCamera(cam_path, camera); 1019 | 1020 | path depth_path = problem.result_folder / path("depths.dmb"); 1021 | path normal_path = problem.result_folder / path("normals.dmb"); 1022 | cv::Mat depth, normal; 1023 | ReadBinMat(depth_path, depth); 1024 | ReadBinMat(normal_path, normal); 1025 | 1026 | if (use_block) { 1027 | path block_path = block_folder / path("mask_" + std::to_string(problem.ref_image_id) + ".jpg"); 1028 | cv::Mat block_jpg = cv::imread(block_path.string(), cv::IMREAD_GRAYSCALE); 1029 | blocks.emplace_back(block_jpg); 1030 | } 1031 | 1032 | cv::Mat scaled_image; 1033 | RescaleImageAndCamera(image, scaled_image, depth, camera); 1034 | images.emplace_back(scaled_image); 1035 | cameras.emplace_back(camera); 1036 | depths.emplace_back(depth); 1037 | normals.emplace_back(normal); 1038 | cv::Mat mask = cv::Mat::zeros(depth.rows, depth.cols, CV_8UC1); 1039 | masks.emplace_back(mask); 1040 | } 1041 | 1042 | std::vector PointCloud; 1043 | PointCloud.clear(); 1044 | 1045 | struct CostData 1046 | { 1047 | float dist; 1048 | float depth; 1049 | float angle; 1050 | int src_r; 1051 | int src_c; 1052 | bool use; 1053 | 1054 | CostData () { 1055 | dist = FLT_MAX; 1056 | depth = FLT_MAX; 1057 | angle = FLT_MAX; 1058 | } 1059 | }; 1060 | 1061 | 1062 | for (int i = 0; i < num_images; ++i) { 1063 | std::cout << "Fusing image " << std::setw(8) << std::setfill('0') << i << "..." << std::endl; 1064 | const auto &problem = problems[i]; 1065 | int ref_index = imageIdToindexMap[problem.ref_image_id]; 1066 | const int cols = depths[ref_index].cols; 1067 | const int rows = depths[ref_index].rows; 1068 | int num_ngb = problem.src_image_ids.size(); 1069 | std::vector diff(num_ngb, CostData()); 1070 | for (int r = 0; r < rows; ++r) { 1071 | for (int c = 0; c < cols; ++c) { 1072 | if (use_block && blocks[ref_index].at(r, c) < 128) { 1073 | continue; 1074 | } 1075 | 1076 | float ref_depth = depths[ref_index].at(r, c); 1077 | if (ref_depth <= 0.0) 1078 | continue; 1079 | const cv::Vec3f ref_normal = normals[ref_index].at(r, c); 1080 | float3 PointX = Get3DPointonWorld(c, r, ref_depth, cameras[ref_index]); 1081 | float3 consistent_Point = PointX; 1082 | for (int j = 0; j < num_ngb; ++j) { 1083 | int src_index = imageIdToindexMap[problem.src_image_ids[j]]; 1084 | const int src_cols = depths[src_index].cols; 1085 | const int src_rows = depths[src_index].rows; 1086 | float2 point; 1087 | float proj_depth; 1088 | ProjectCamera(PointX, cameras[src_index], point, proj_depth); 1089 | int src_r = int(point.y + 0.5f); 1090 | int src_c = int(point.x + 0.5f); 1091 | if (src_c >= 0 && src_c < src_cols && src_r >= 0 && src_r < src_rows) { 1092 | if (masks[src_index].at(src_r, src_c) == 1) 1093 | continue; 1094 | float src_depth = depths[src_index].at(src_r, src_c); 1095 | if (src_depth <= 0.0) 1096 | continue; 1097 | const cv::Vec3f src_normal = normals[src_index].at(src_r, src_c); 1098 | float3 tmp_X = Get3DPointonWorld(src_c, src_r, src_depth, cameras[src_index]); 1099 | float2 tmp_pt; 1100 | ProjectCamera(tmp_X, cameras[ref_index], tmp_pt, proj_depth); 1101 | float reproj_error = sqrt(pow(c - tmp_pt.x, 2) + pow(r - tmp_pt.y, 2)); 1102 | float relative_depth_diff = fabs(proj_depth - ref_depth) / ref_depth; 1103 | float angle = GetAngle(ref_normal, src_normal); 1104 | diff[j].dist = reproj_error; 1105 | diff[j].depth = relative_depth_diff; 1106 | diff[j].angle = angle; 1107 | diff[j].src_r = src_r; 1108 | diff[j].src_c = src_c; 1109 | } 1110 | } 1111 | for (int k = 2; k <= num_ngb; ++k) { 1112 | int count = 0; 1113 | for (int j = 0; j < num_ngb; ++j) { 1114 | diff[j].use = false; 1115 | if (diff[j].dist < k * dist_base && diff[j].depth < k * depth_base && diff[j].angle < (k * angle_grad + angle_base)) { 1116 | count++; 1117 | diff[j].use = true; 1118 | } 1119 | } 1120 | if (count >= k) { 1121 | PointList point3D; 1122 | float consistent_Color[3] = { (float)images[ref_index].at(r, c)[0], (float)images[ref_index].at(r, c)[1], (float)images[ref_index].at(r, c)[2] }; 1123 | for (int j = 0; j < num_ngb; ++j) { 1124 | if (diff[j].use) { 1125 | int src_index = imageIdToindexMap[problem.src_image_ids[j]]; 1126 | consistent_Color[0] += (float)images[src_index].at(diff[j].src_r, diff[j].src_c)[0]; 1127 | consistent_Color[1] += (float)images[src_index].at(diff[j].src_r, diff[j].src_c)[1]; 1128 | consistent_Color[2] += (float)images[src_index].at(diff[j].src_r, diff[j].src_c)[2]; 1129 | } 1130 | } 1131 | consistent_Color[0] /= (count + 1.0f); 1132 | consistent_Color[1] /= (count + 1.0f); 1133 | consistent_Color[2] /= (count + 1.0f); 1134 | 1135 | point3D.coord = consistent_Point; 1136 | point3D.color = make_float3(consistent_Color[0], consistent_Color[1], consistent_Color[2]); 1137 | PointCloud.emplace_back(point3D); 1138 | masks[ref_index].at(r, c) = 1; 1139 | break; 1140 | } 1141 | } 1142 | } 1143 | } 1144 | } 1145 | path ply_path = dense_folder / path("APD") / path("APD.ply"); 1146 | ExportPointCloud(ply_path, PointCloud); 1147 | } 1148 | 1149 | void RunFusion_TAT_advanced(const path &dense_folder, const std::vector &problems) 1150 | { 1151 | int num_images = problems.size(); 1152 | path image_folder = dense_folder / path("images"); 1153 | path cam_folder = dense_folder / path("cams"); 1154 | const float dist_base = 0.25f; 1155 | const float depth_base = 1.0f / 3000.0f; 1156 | 1157 | std::vector images; 1158 | std::vector cameras; 1159 | std::vector depths; 1160 | std::vector normals; 1161 | std::vector masks; 1162 | std::vector blocks; 1163 | images.clear(); 1164 | cameras.clear(); 1165 | depths.clear(); 1166 | normals.clear(); 1167 | masks.clear(); 1168 | blocks.clear(); 1169 | std::unordered_map imageIdToindexMap; 1170 | 1171 | path block_folder = dense_folder / path("blocks"); 1172 | bool use_block = false; 1173 | if (exists(block_folder)) { 1174 | use_block = true; 1175 | } 1176 | 1177 | for (int i = 0; i < num_images; ++i) { 1178 | const auto &problem = problems[i]; 1179 | std::cout << "Reading image " << std::setw(8) << std::setfill('0') << i << "..." << std::endl; 1180 | path image_path = image_folder / path(ToFormatIndex(problem.ref_image_id) + ".jpg"); 1181 | imageIdToindexMap.emplace(problem.ref_image_id, i); 1182 | cv::Mat image = cv::imread(image_path.string(), cv::IMREAD_COLOR); 1183 | path cam_path = cam_folder / path(ToFormatIndex(problem.ref_image_id) + "_cam.txt"); 1184 | Camera camera; 1185 | ReadCamera(cam_path, camera); 1186 | 1187 | path depth_path = problem.result_folder / path("depths.dmb"); 1188 | path normal_path = problem.result_folder / path("normals.dmb"); 1189 | cv::Mat depth, normal; 1190 | ReadBinMat(depth_path, depth); 1191 | ReadBinMat(normal_path, normal); 1192 | 1193 | if (use_block) { 1194 | path block_path = block_folder / path("mask_" + std::to_string(problem.ref_image_id) + ".jpg"); 1195 | cv::Mat block_jpg = cv::imread(block_path.string(), cv::IMREAD_GRAYSCALE); 1196 | blocks.emplace_back(block_jpg); 1197 | } 1198 | 1199 | cv::Mat scaled_image; 1200 | RescaleImageAndCamera(image, scaled_image, depth, camera); 1201 | images.emplace_back(scaled_image); 1202 | cameras.emplace_back(camera); 1203 | depths.emplace_back(depth); 1204 | normals.emplace_back(normal); 1205 | cv::Mat mask = cv::Mat::zeros(depth.rows, depth.cols, CV_8UC1); 1206 | masks.emplace_back(mask); 1207 | } 1208 | 1209 | std::vector PointCloud; 1210 | PointCloud.clear(); 1211 | 1212 | struct CostData 1213 | { 1214 | float dist; 1215 | float depth; 1216 | float angle; 1217 | 1218 | CostData () { 1219 | dist = FLT_MAX; 1220 | depth = FLT_MAX; 1221 | angle = FLT_MAX; 1222 | } 1223 | }; 1224 | 1225 | 1226 | for (int i = 0; i < num_images; ++i) { 1227 | std::cout << "Fusing image " << std::setw(8) << std::setfill('0') << i << "..." << std::endl; 1228 | const auto &problem = problems[i]; 1229 | int ref_index = imageIdToindexMap[problem.ref_image_id]; 1230 | const int cols = depths[ref_index].cols; 1231 | const int rows = depths[ref_index].rows; 1232 | int num_ngb = problem.src_image_ids.size(); 1233 | std::vector diff(num_ngb, CostData()); 1234 | for (int r = 0; r < rows; ++r) { 1235 | for (int c = 0; c < cols; ++c) { 1236 | if (use_block && blocks[ref_index].at(r, c) < 128) { 1237 | continue; 1238 | } 1239 | 1240 | float ref_depth = depths[ref_index].at(r, c); 1241 | if (ref_depth <= 0.0) 1242 | continue; 1243 | const cv::Vec3f ref_normal = normals[ref_index].at(r, c); 1244 | float3 PointX = Get3DPointonWorld(c, r, ref_depth, cameras[ref_index]); 1245 | float3 consistent_Point = PointX; 1246 | float consistent_Color[3] = { (float)images[ref_index].at(r, c)[0], (float)images[ref_index].at(r, c)[1], (float)images[ref_index].at(r, c)[2] }; 1247 | 1248 | for (int j = 0; j < num_ngb; ++j) { 1249 | int src_index = imageIdToindexMap[problem.src_image_ids[j]]; 1250 | const int src_cols = depths[src_index].cols; 1251 | const int src_rows = depths[src_index].rows; 1252 | float2 point; 1253 | float proj_depth; 1254 | ProjectCamera(PointX, cameras[src_index], point, proj_depth); 1255 | int src_r = int(point.y + 0.5f); 1256 | int src_c = int(point.x + 0.5f); 1257 | if (src_c >= 0 && src_c < src_cols && src_r >= 0 && src_r < src_rows) { 1258 | if (masks[src_index].at(src_r, src_c) == 1) 1259 | continue; 1260 | float src_depth = depths[src_index].at(src_r, src_c); 1261 | if (src_depth <= 0.0) 1262 | continue; 1263 | const cv::Vec3f src_normal = normals[src_index].at(src_r, src_c); 1264 | float3 tmp_X = Get3DPointonWorld(src_c, src_r, src_depth, cameras[src_index]); 1265 | float2 tmp_pt; 1266 | ProjectCamera(tmp_X, cameras[ref_index], tmp_pt, proj_depth); 1267 | float reproj_error = sqrt(pow(c - tmp_pt.x, 2) + pow(r - tmp_pt.y, 2)); 1268 | float relative_depth_diff = fabs(proj_depth - ref_depth) / ref_depth; 1269 | float angle = GetAngle(ref_normal, src_normal); 1270 | diff[j].dist = reproj_error; 1271 | diff[j].depth = relative_depth_diff; 1272 | diff[j].angle = angle; 1273 | } 1274 | } 1275 | for (int k = 2; k <= num_ngb; ++k) { 1276 | int count = 0; 1277 | for (int j = 0; j < num_ngb; ++j) { 1278 | if (diff[j].dist < k * dist_base && diff[j].depth < k * depth_base) { 1279 | count++; 1280 | } 1281 | } 1282 | if (count >= k) { 1283 | PointList point3D; 1284 | point3D.coord = consistent_Point; 1285 | point3D.color = make_float3(consistent_Color[0], consistent_Color[1], consistent_Color[2]); 1286 | PointCloud.emplace_back(point3D); 1287 | masks[ref_index].at(r, c) = 1; 1288 | break; 1289 | } 1290 | } 1291 | } 1292 | } 1293 | } 1294 | path ply_path = dense_folder / path("APD") / path("APD.ply"); 1295 | ExportPointCloud(ply_path, PointCloud); 1296 | } 1297 | -------------------------------------------------------------------------------- /APD.cu: -------------------------------------------------------------------------------- 1 | #include "APD.h" 2 | 3 | __device__ void sort_small(float *d, const int n) 4 | { 5 | int j; 6 | for (int i = 1; i < n; i++) { 7 | float tmp = d[i]; 8 | for (j = i; j >= 1 && tmp < d[j - 1]; j--) 9 | d[j] = d[j - 1]; 10 | d[j] = tmp; 11 | } 12 | } 13 | 14 | __device__ void sort_small_weighted(short2 *points, float *w, int n) 15 | { 16 | int j; 17 | for (int i = 1; i < n; i++) { 18 | short2 tmp = points[i]; 19 | float tmp_w = w[i]; 20 | for (j = i; j >= 1 && tmp_w < w[j - 1]; j--) { 21 | points[j] = points[j - 1]; 22 | w[j] = w[j - 1]; 23 | } 24 | points[j] = tmp; 25 | w[j] = tmp_w; 26 | } 27 | } 28 | 29 | __device__ int FindMinCostIndex(const float *costs, const int n) 30 | { 31 | float min_cost = costs[0]; 32 | int min_cost_idx = 0; 33 | for (int idx = 1; idx < n; ++idx) { 34 | if (costs[idx] <= min_cost) { 35 | min_cost = costs[idx]; 36 | min_cost_idx = idx; 37 | } 38 | } 39 | return min_cost_idx; 40 | } 41 | 42 | __device__ void setBit(unsigned int *input, const unsigned int n) 43 | { 44 | (*input) |= (unsigned int)(1 << n); 45 | } 46 | 47 | __device__ void unSetBit(unsigned int *input, const unsigned int n) 48 | { 49 | (*input) &= (unsigned int)(0xFFFFFFFE << n); 50 | } 51 | 52 | __device__ int isSet(unsigned int input, const unsigned int n) 53 | { 54 | return (input >> n) & 1; 55 | } 56 | 57 | __device__ void Mat33DotVec3(const float mat[9], const float4 vec, float4 *result) 58 | { 59 | result->x = mat[0] * vec.x + mat[1] * vec.y + mat[2] * vec.z; 60 | result->y = mat[3] * vec.x + mat[4] * vec.y + mat[5] * vec.z; 61 | result->z = mat[6] * vec.x + mat[7] * vec.y + mat[8] * vec.z; 62 | } 63 | 64 | __device__ float Vec3DotVec3(const float4 vec1, const float4 vec2) 65 | { 66 | return vec1.x * vec2.x + vec1.y * vec2.y + vec1.z * vec2.z; 67 | } 68 | 69 | __device__ float Vec3DotVec3(const float3 vec1, const float3 vec2) 70 | { 71 | return vec1.x * vec2.x + vec1.y * vec2.y + vec1.z * vec2.z; 72 | } 73 | 74 | __device__ float3 Vec3CrossVec3(const float3 vec1, const float3 vec2) 75 | { 76 | float3 cross_vec; 77 | cross_vec.x = vec1.y * vec2.z - vec2.y * vec1.z; 78 | cross_vec.y = -(vec1.x * vec2.z - vec2.x * vec1.z); 79 | cross_vec.z = vec1.x * vec2.y - vec2.x * vec1.y; 80 | return cross_vec; 81 | } 82 | 83 | __device__ float Vec2DotVec2(float2 a, float2 b) { 84 | return a.x * b.x + a.y * b.y; 85 | } 86 | 87 | __device__ float Vec2CrossVec2(float2 a, float2 b) { 88 | return a.x * b.y - a.y * b.x; 89 | } 90 | 91 | __device__ bool PointinTriangle(short2 A, short2 B, short2 C, int2 P) 92 | { 93 | float2 AB = make_float2(B.x - A.x, B.y - A.y); 94 | float2 BC = make_float2(C.x - B.x, C.y - B.y); 95 | float2 CA = make_float2(A.x - C.x, A.y - C.y); 96 | float AB_ = sqrt(AB.x * AB.x + AB.y * AB.y); 97 | float BC_ = sqrt(BC.x * BC.x + BC.y * BC.y); 98 | float CA_ = sqrt(CA.x * CA.x + CA.y * CA.y); 99 | if (AB_ <= 2 || BC_ <= 2 || CA_ <= 2) { 100 | return false; 101 | } 102 | if (!(AB_ + BC_ > CA_ && BC_ + CA_ > AB_ && AB_ + CA_ > BC_)) { 103 | return false; 104 | } 105 | float2 PA = make_float2(A.x - P.x, A.y - P.y); 106 | float2 PB = make_float2(B.x - P.x, B.y - P.y); 107 | float2 PC = make_float2(C.x - P.x, C.y - P.y); 108 | float t1 = Vec2CrossVec2(PA, PB); 109 | float t2 = Vec2CrossVec2(PB, PC); 110 | float t3 = Vec2CrossVec2(PC, PA); 111 | return t1 * t2 >= 0 && t1 * t3 >= 0; 112 | } 113 | 114 | __device__ float TriangleArea(float3 A, float3 B, float3 C) 115 | { 116 | float3 AB = make_float3(B.x - A.x, B.y - A.y, B.z - A.z); 117 | float3 BC = make_float3(C.x - B.x, C.y - B.y, C.z - B.z); 118 | float3 CA = make_float3(A.x - C.x, A.y - C.y, A.z - C.z); 119 | float AB_ = sqrt(AB.x * AB.x + AB.y * AB.y + AB.z * AB.z); 120 | float BC_ = sqrt(BC.x * BC.x + BC.y * BC.y + BC.z * BC.z); 121 | float CA_ = sqrt(CA.x * CA.x + CA.y * CA.y + CA.z * CA.z); 122 | float P = (AB_ + BC_ + CA_) / 2.0f; 123 | return sqrt(P * (P - AB_) * (P - BC_) * (P - CA_)); 124 | } 125 | 126 | __device__ void NormalizeVec3(float4 *vec) 127 | { 128 | const float normSquared = vec->x * vec->x + vec->y * vec->y + vec->z * vec->z; 129 | const float inverse_sqrt = rsqrtf(normSquared); 130 | vec->x *= inverse_sqrt; 131 | vec->y *= inverse_sqrt; 132 | vec->z *= inverse_sqrt; 133 | } 134 | 135 | __device__ void NormalizeVec2(float2 *vec) 136 | { 137 | const float normSquared = vec->x * vec->x + vec->y * vec->y; 138 | const float inverse_sqrt = rsqrtf(normSquared); 139 | vec->x *= inverse_sqrt; 140 | vec->y *= inverse_sqrt; 141 | } 142 | 143 | __device__ void TransformPDFToCDF(float* probs, const int num_probs) 144 | { 145 | float prob_sum = 0.0f; 146 | for (int i = 0; i < num_probs; ++i) { 147 | prob_sum += probs[i]; 148 | } 149 | const float inv_prob_sum = 1.0f / prob_sum; 150 | 151 | float cum_prob = 0.0f; 152 | for (int i = 0; i < num_probs; ++i) { 153 | const float prob = probs[i] * inv_prob_sum; 154 | cum_prob += prob; 155 | probs[i] = cum_prob; 156 | } 157 | } 158 | 159 | __device__ void Get3DPoint(const Camera camera, const int2 p, const float depth, float *X) 160 | { 161 | X[0] = depth * (p.x - camera.K[2]) / camera.K[0]; 162 | X[1] = depth * (p.y - camera.K[5]) / camera.K[4]; 163 | X[2] = depth; 164 | } 165 | 166 | __device__ void Get3DPoint(const Camera camera, const short2 p, const float depth, float *X) 167 | { 168 | X[0] = depth * (p.x - camera.K[2]) / camera.K[0]; 169 | X[1] = depth * (p.y - camera.K[5]) / camera.K[4]; 170 | X[2] = depth; 171 | } 172 | 173 | __device__ float4 GetViewDirection(const Camera camera, const int2 p, const float depth) 174 | { 175 | float X[3]; 176 | Get3DPoint(camera, p, depth, X); 177 | float norm = sqrt(X[0] * X[0] + X[1] * X[1] + X[2] * X[2]); 178 | 179 | float4 view_direction; 180 | view_direction.x = X[0] / norm; 181 | view_direction.y = X[1] / norm; 182 | view_direction.z = X[2] / norm; 183 | view_direction.w = 0; 184 | return view_direction; 185 | } 186 | 187 | __device__ float GetDistance2Origin(const Camera camera, const int2 p, const float depth, const float4 normal) 188 | { 189 | float X[3]; 190 | Get3DPoint(camera, p, depth, X); 191 | return -(normal.x * X[0] + normal.y * X[1] + normal.z * X[2]); 192 | } 193 | 194 | __device__ float SpatialGauss(float x1, float y1, float x2, float y2, float sigma, float mu = 0.0) 195 | { 196 | float dis = pow(x1 - x2, 2) + pow(y1 - y2, 2) - mu; 197 | return exp(-1.0 * dis / (2 * sigma * sigma)); 198 | } 199 | 200 | __device__ float RangeGauss(float x, float sigma, float mu = 0.0) 201 | { 202 | float x_p = x - mu; 203 | return exp(-1.0 * (x_p * x_p) / (2 * sigma * sigma)); 204 | } 205 | 206 | __device__ float ComputeDepthfromPlaneHypothesis(const Camera camera, const float4 plane_hypothesis, const int2 p) 207 | { 208 | return -plane_hypothesis.w * camera.K[0] / ((p.x - camera.K[2]) * plane_hypothesis.x + (camera.K[0] / camera.K[4]) * (p.y - camera.K[5]) * plane_hypothesis.y + camera.K[0] * plane_hypothesis.z); 209 | } 210 | 211 | __device__ float4 GenerateRandomNormal(const Camera camera, const int2 p, curandState *rand_state, const float depth) 212 | { 213 | float4 normal; 214 | float q1 = 1.0f; 215 | float q2 = 1.0f; 216 | float s = 2.0f; 217 | while (s >= 1.0f) { 218 | q1 = 2.0f * curand_uniform(rand_state) - 1.0f; 219 | q2 = 2.0f * curand_uniform(rand_state) - 1.0f; 220 | s = q1 * q1 + q2 * q2; 221 | } 222 | const float sq = sqrt(1.0f - s); 223 | normal.x = 2.0f * q1 * sq; 224 | normal.y = 2.0f * q2 * sq; 225 | normal.z = 1.0f - 2.0f * s; 226 | normal.w = 0; 227 | 228 | float4 view_direction = GetViewDirection(camera, p, depth); 229 | float dot_product = normal.x * view_direction.x + normal.y * view_direction.y + normal.z * view_direction.z; 230 | if (dot_product > 0.0f) { 231 | normal.x = -normal.x; 232 | normal.y = -normal.y; 233 | normal.z = -normal.z; 234 | } 235 | NormalizeVec3(&normal); 236 | return normal; 237 | } 238 | 239 | __device__ float4 GeneratePerturbedNormal(const Camera camera, const int2 p, const float4 normal, curandState *rand_state, const float perturbation) 240 | { 241 | float4 view_direction = GetViewDirection(camera, p, 1.0f); 242 | 243 | const float a1 = (curand_uniform(rand_state) - 0.5f) * perturbation; 244 | const float a2 = (curand_uniform(rand_state) - 0.5f) * perturbation; 245 | const float a3 = (curand_uniform(rand_state) - 0.5f) * perturbation; 246 | 247 | const float sin_a1 = sin(a1); 248 | const float sin_a2 = sin(a2); 249 | const float sin_a3 = sin(a3); 250 | const float cos_a1 = cos(a1); 251 | const float cos_a2 = cos(a2); 252 | const float cos_a3 = cos(a3); 253 | 254 | float R[9]; 255 | R[0] = cos_a2 * cos_a3; 256 | R[1] = cos_a3 * sin_a1 * sin_a2 - cos_a1 * sin_a3; 257 | R[2] = sin_a1 * sin_a3 + cos_a1 * cos_a3 * sin_a2; 258 | R[3] = cos_a2 * sin_a3; 259 | R[4] = cos_a1 * cos_a3 + sin_a1 * sin_a2 * sin_a3; 260 | R[5] = cos_a1 * sin_a2 * sin_a3 - cos_a3 * sin_a1; 261 | R[6] = -sin_a2; 262 | R[7] = cos_a2 * sin_a1; 263 | R[8] = cos_a1 * cos_a2; 264 | 265 | float4 normal_perturbed; 266 | Mat33DotVec3(R, normal, &normal_perturbed); 267 | 268 | if (Vec3DotVec3(normal_perturbed, view_direction) >= 0.0f) { 269 | normal_perturbed = normal; 270 | } 271 | 272 | NormalizeVec3(&normal_perturbed); 273 | return normal_perturbed; 274 | } 275 | 276 | __device__ float4 GenerateRandomPlaneHypothesis(const Camera camera, const int2 p, curandState *rand_state, const float depth_min, const float depth_max) 277 | { 278 | float depth = curand_uniform(rand_state) * (depth_max - depth_min) + depth_min; 279 | float4 plane_hypothesis = GenerateRandomNormal(camera, p, rand_state, depth); 280 | plane_hypothesis.w = GetDistance2Origin(camera, p, depth, plane_hypothesis); 281 | return plane_hypothesis; 282 | } 283 | 284 | __device__ float4 GeneratePertubedPlaneHypothesis(const Camera camera, const int2 p, curandState *rand_state, const float perturbation, const float4 plane_hypothesis_now, const float depth_now, const float depth_min, const float depth_max) 285 | { 286 | float depth_perturbed = depth_now; 287 | 288 | float dist_perturbed = plane_hypothesis_now.w; 289 | const float dist_min_perturbed = (1 - perturbation) * dist_perturbed; 290 | const float dist_max_perturbed = (1 + perturbation) * dist_perturbed; 291 | float4 plane_hypothesis_temp = plane_hypothesis_now; 292 | do { 293 | dist_perturbed = curand_uniform(rand_state) * (dist_max_perturbed - dist_min_perturbed) + dist_min_perturbed; 294 | plane_hypothesis_temp.w = dist_perturbed; 295 | depth_perturbed = ComputeDepthfromPlaneHypothesis(camera, plane_hypothesis_temp, p); 296 | } while (depth_perturbed < depth_min && depth_perturbed > depth_max); 297 | 298 | float4 plane_hypothesis = GeneratePerturbedNormal(camera, p, plane_hypothesis_now, rand_state, perturbation * M_PI); 299 | plane_hypothesis.w = dist_perturbed; 300 | return plane_hypothesis; 301 | } 302 | 303 | __device__ void ComputeHomography(const Camera ref_camera, const Camera src_camera, const float4 plane_hypothesis, float *H) 304 | { 305 | float ref_C[3]; 306 | float src_C[3]; 307 | ref_C[0] = -(ref_camera.R[0] * ref_camera.t[0] + ref_camera.R[3] * ref_camera.t[1] + ref_camera.R[6] * ref_camera.t[2]); 308 | ref_C[1] = -(ref_camera.R[1] * ref_camera.t[0] + ref_camera.R[4] * ref_camera.t[1] + ref_camera.R[7] * ref_camera.t[2]); 309 | ref_C[2] = -(ref_camera.R[2] * ref_camera.t[0] + ref_camera.R[5] * ref_camera.t[1] + ref_camera.R[8] * ref_camera.t[2]); 310 | src_C[0] = -(src_camera.R[0] * src_camera.t[0] + src_camera.R[3] * src_camera.t[1] + src_camera.R[6] * src_camera.t[2]); 311 | src_C[1] = -(src_camera.R[1] * src_camera.t[0] + src_camera.R[4] * src_camera.t[1] + src_camera.R[7] * src_camera.t[2]); 312 | src_C[2] = -(src_camera.R[2] * src_camera.t[0] + src_camera.R[5] * src_camera.t[1] + src_camera.R[8] * src_camera.t[2]); 313 | 314 | float R_relative[9]; 315 | float C_relative[3]; 316 | float t_relative[3]; 317 | R_relative[0] = src_camera.R[0] * ref_camera.R[0] + src_camera.R[1] * ref_camera.R[1] + src_camera.R[2] * ref_camera.R[2]; 318 | R_relative[1] = src_camera.R[0] * ref_camera.R[3] + src_camera.R[1] * ref_camera.R[4] + src_camera.R[2] * ref_camera.R[5]; 319 | R_relative[2] = src_camera.R[0] * ref_camera.R[6] + src_camera.R[1] * ref_camera.R[7] + src_camera.R[2] * ref_camera.R[8]; 320 | R_relative[3] = src_camera.R[3] * ref_camera.R[0] + src_camera.R[4] * ref_camera.R[1] + src_camera.R[5] * ref_camera.R[2]; 321 | R_relative[4] = src_camera.R[3] * ref_camera.R[3] + src_camera.R[4] * ref_camera.R[4] + src_camera.R[5] * ref_camera.R[5]; 322 | R_relative[5] = src_camera.R[3] * ref_camera.R[6] + src_camera.R[4] * ref_camera.R[7] + src_camera.R[5] * ref_camera.R[8]; 323 | R_relative[6] = src_camera.R[6] * ref_camera.R[0] + src_camera.R[7] * ref_camera.R[1] + src_camera.R[8] * ref_camera.R[2]; 324 | R_relative[7] = src_camera.R[6] * ref_camera.R[3] + src_camera.R[7] * ref_camera.R[4] + src_camera.R[8] * ref_camera.R[5]; 325 | R_relative[8] = src_camera.R[6] * ref_camera.R[6] + src_camera.R[7] * ref_camera.R[7] + src_camera.R[8] * ref_camera.R[8]; 326 | C_relative[0] = (ref_C[0] - src_C[0]); 327 | C_relative[1] = (ref_C[1] - src_C[1]); 328 | C_relative[2] = (ref_C[2] - src_C[2]); 329 | t_relative[0] = src_camera.R[0] * C_relative[0] + src_camera.R[1] * C_relative[1] + src_camera.R[2] * C_relative[2]; 330 | t_relative[1] = src_camera.R[3] * C_relative[0] + src_camera.R[4] * C_relative[1] + src_camera.R[5] * C_relative[2]; 331 | t_relative[2] = src_camera.R[6] * C_relative[0] + src_camera.R[7] * C_relative[1] + src_camera.R[8] * C_relative[2]; 332 | 333 | H[0] = R_relative[0] - t_relative[0] * plane_hypothesis.x / plane_hypothesis.w; 334 | H[1] = R_relative[1] - t_relative[0] * plane_hypothesis.y / plane_hypothesis.w; 335 | H[2] = R_relative[2] - t_relative[0] * plane_hypothesis.z / plane_hypothesis.w; 336 | H[3] = R_relative[3] - t_relative[1] * plane_hypothesis.x / plane_hypothesis.w; 337 | H[4] = R_relative[4] - t_relative[1] * plane_hypothesis.y / plane_hypothesis.w; 338 | H[5] = R_relative[5] - t_relative[1] * plane_hypothesis.z / plane_hypothesis.w; 339 | H[6] = R_relative[6] - t_relative[2] * plane_hypothesis.x / plane_hypothesis.w; 340 | H[7] = R_relative[7] - t_relative[2] * plane_hypothesis.y / plane_hypothesis.w; 341 | H[8] = R_relative[8] - t_relative[2] * plane_hypothesis.z / plane_hypothesis.w; 342 | 343 | float tmp[9]; 344 | tmp[0] = H[0] / ref_camera.K[0]; 345 | tmp[1] = H[1] / ref_camera.K[4]; 346 | tmp[2] = -H[0] * ref_camera.K[2] / ref_camera.K[0] - H[1] * ref_camera.K[5] / ref_camera.K[4] + H[2]; 347 | tmp[3] = H[3] / ref_camera.K[0]; 348 | tmp[4] = H[4] / ref_camera.K[4]; 349 | tmp[5] = -H[3] * ref_camera.K[2] / ref_camera.K[0] - H[4] * ref_camera.K[5] / ref_camera.K[4] + H[5]; 350 | tmp[6] = H[6] / ref_camera.K[0]; 351 | tmp[7] = H[7] / ref_camera.K[4]; 352 | tmp[8] = -H[6] * ref_camera.K[2] / ref_camera.K[0] - H[7] * ref_camera.K[5] / ref_camera.K[4] + H[8]; 353 | 354 | H[0] = src_camera.K[0] * tmp[0] + src_camera.K[2] * tmp[6]; 355 | H[1] = src_camera.K[0] * tmp[1] + src_camera.K[2] * tmp[7]; 356 | H[2] = src_camera.K[0] * tmp[2] + src_camera.K[2] * tmp[8]; 357 | H[3] = src_camera.K[4] * tmp[3] + src_camera.K[5] * tmp[6]; 358 | H[4] = src_camera.K[4] * tmp[4] + src_camera.K[5] * tmp[7]; 359 | H[5] = src_camera.K[4] * tmp[5] + src_camera.K[5] * tmp[8]; 360 | H[6] = src_camera.K[8] * tmp[6]; 361 | H[7] = src_camera.K[8] * tmp[7]; 362 | H[8] = src_camera.K[8] * tmp[8]; 363 | } 364 | 365 | __device__ float2 ComputeCorrespondingPoint(const float *H, const int2 p) 366 | { 367 | float3 pt; 368 | pt.x = H[0] * p.x + H[1] * p.y + H[2]; 369 | pt.y = H[3] * p.x + H[4] * p.y + H[5]; 370 | pt.z = H[6] * p.x + H[7] * p.y + H[8]; 371 | return make_float2(pt.x / pt.z, pt.y / pt.z); 372 | } 373 | 374 | __device__ float4 TransformNormal(const Camera camera, float4 plane_hypothesis) 375 | { 376 | float4 transformed_normal; 377 | transformed_normal.x = camera.R[0] * plane_hypothesis.x + camera.R[3] * plane_hypothesis.y + camera.R[6] * plane_hypothesis.z; 378 | transformed_normal.y = camera.R[1] * plane_hypothesis.x + camera.R[4] * plane_hypothesis.y + camera.R[7] * plane_hypothesis.z; 379 | transformed_normal.z = camera.R[2] * plane_hypothesis.x + camera.R[5] * plane_hypothesis.y + camera.R[8] * plane_hypothesis.z; 380 | transformed_normal.w = plane_hypothesis.w; 381 | return transformed_normal; 382 | } 383 | 384 | __device__ float4 TransformNormal2RefCam(const Camera camera, float4 plane_hypothesis) 385 | { 386 | float4 transformed_normal; 387 | transformed_normal.x = camera.R[0] * plane_hypothesis.x + camera.R[1] * plane_hypothesis.y + camera.R[2] * plane_hypothesis.z; 388 | transformed_normal.y = camera.R[3] * plane_hypothesis.x + camera.R[4] * plane_hypothesis.y + camera.R[5] * plane_hypothesis.z; 389 | transformed_normal.z = camera.R[6] * plane_hypothesis.x + camera.R[7] * plane_hypothesis.y + camera.R[8] * plane_hypothesis.z; 390 | transformed_normal.w = plane_hypothesis.w; 391 | return transformed_normal; 392 | } 393 | 394 | __device__ short2 GetNeighbourPoint(const int2 p, const int index, const DataPassHelper *helper) { 395 | const unsigned offset = helper->neighbours_map_cuda[p.x + p.y * helper->width] * NEIGHBOUR_NUM; 396 | short2 neighbour_pt = helper->neighbours_cuda[offset + index]; 397 | return neighbour_pt; 398 | } 399 | 400 | __device__ float ComputeBilateralNCCNew( 401 | const int2 p, 402 | const int src_idx, 403 | const float4 plane_hypothesis, 404 | const DataPassHelper *helper 405 | ) { 406 | const cudaTextureObject_t ref_image = helper->texture_objects_cuda[0].images[0]; 407 | const Camera ref_camera = helper->cameras_cuda[0]; 408 | const cudaTextureObject_t src_image = helper->texture_objects_cuda[0].images[src_idx]; 409 | const Camera src_camera = helper->cameras_cuda[src_idx]; 410 | const PatchMatchParams *params = helper->params; 411 | const uchar *weak_info = helper->weak_info_cuda; 412 | const int width = helper->width; 413 | const int height = helper->height; 414 | const int center = p.x + p.y * width; 415 | 416 | const float cost_max = 2.0f; 417 | 418 | float H[9]; 419 | ComputeHomography(ref_camera, src_camera, plane_hypothesis, H); 420 | float2 pt = ComputeCorrespondingPoint(H, p); 421 | if (pt.x >= src_camera.width || pt.x < 0.0f || pt.y >= src_camera.height || pt.y < 0.0f) { 422 | return cost_max; 423 | } 424 | 425 | float cost = 0.0f; 426 | if (weak_info[center] == WEAK) { 427 | // for weak texture area use deformable ncc 428 | const float ref_center_pix = tex2D(ref_image, p.x + 0.5f, p.y + 0.5f); 429 | // the strong points 430 | float center_cost = 0.0f; 431 | float strong_cost = 0.0f; 432 | int strong_count = 0; 433 | for (int k = 0; k < NEIGHBOUR_NUM; ++k) { 434 | const short2 neighbour_pt = GetNeighbourPoint(p, k, helper); 435 | if (neighbour_pt.x == -1 || neighbour_pt.y == -1) { 436 | continue; 437 | } 438 | float2 neighbour_src_pt = ComputeCorrespondingPoint(H, make_int2(neighbour_pt.x, neighbour_pt.y)); 439 | if (neighbour_src_pt.x < 0 || neighbour_src_pt.y < 0 || neighbour_src_pt.x >= width || neighbour_src_pt.y >= height) { 440 | if (k != 0) { 441 | unsigned int view_info = helper->selected_views_cuda[neighbour_pt.x + neighbour_pt.y * width]; 442 | if (isSet(view_info, src_idx - 1)) { 443 | strong_cost += cost_max; 444 | strong_count++; 445 | } 446 | continue; 447 | } else { 448 | return cost_max; 449 | } 450 | } 451 | // compute ncc for this point 452 | float sum_ref = 0.0f; 453 | float sum_ref_ref = 0.0f; 454 | float sum_src = 0.0f; 455 | float sum_src_src = 0.0f; 456 | float sum_ref_src = 0.0f; 457 | float bilateral_weight_sum = 0.0f; 458 | const float ref_center_pix = tex2D(ref_image, p.x + 0.5f, p.y + 0.5f); 459 | int radius = (k == 0 ? params->strong_radius : params->weak_radius); 460 | int increment = (k == 0 ? params->strong_increment : params->weak_increment); 461 | for (int i = -radius; i <= radius; i += increment) { 462 | float sum_ref_row = 0.0f; 463 | float sum_src_row = 0.0f; 464 | float sum_ref_ref_row = 0.0f; 465 | float sum_src_src_row = 0.0f; 466 | float sum_ref_src_row = 0.0f; 467 | float bilateral_weight_sum_row = 0.0f; 468 | for (int j = -radius; j <= radius; j += increment) { 469 | const int2 ref_pt = make_int2(neighbour_pt.x + i, neighbour_pt.y + j); 470 | const float ref_pix = tex2D(ref_image, ref_pt.x + 0.5f, ref_pt.y + 0.5f); 471 | float2 src_pt = ComputeCorrespondingPoint(H, ref_pt); 472 | const float src_pix = tex2D(src_image, src_pt.x + 0.5f, src_pt.y + 0.5f); 473 | float weight = 1.0f; 474 | sum_ref_row += weight * ref_pix; 475 | sum_ref_ref_row += weight * ref_pix * ref_pix; 476 | sum_src_row += weight * src_pix; 477 | sum_src_src_row += weight * src_pix * src_pix; 478 | sum_ref_src_row += weight * ref_pix * src_pix; 479 | bilateral_weight_sum_row += weight; 480 | } 481 | sum_ref += sum_ref_row; 482 | sum_ref_ref += sum_ref_ref_row; 483 | sum_src += sum_src_row; 484 | sum_src_src += sum_src_src_row; 485 | sum_ref_src += sum_ref_src_row; 486 | bilateral_weight_sum += bilateral_weight_sum_row; 487 | } 488 | const float inv_bilateral_weight_sum = 1.0f / bilateral_weight_sum; 489 | sum_ref *= inv_bilateral_weight_sum; 490 | sum_ref_ref *= inv_bilateral_weight_sum; 491 | sum_src *= inv_bilateral_weight_sum; 492 | sum_src_src *= inv_bilateral_weight_sum; 493 | sum_ref_src *= inv_bilateral_weight_sum; 494 | const float var_ref = sum_ref_ref - sum_ref * sum_ref; 495 | const float var_src = sum_src_src - sum_src * sum_src; 496 | const float kMinVar = 1e-5f; 497 | float temp_cost = 0.0f; 498 | if (var_ref < kMinVar || var_src < kMinVar) { 499 | temp_cost = cost_max; 500 | } 501 | else { 502 | const float covar_src_ref = sum_ref_src - sum_ref * sum_src; 503 | const float var_ref_src = sqrt(var_ref * var_src); 504 | temp_cost = max(0.0f, min(cost_max, 1.0f - covar_src_ref / var_ref_src)); 505 | } 506 | if (k == 0) { 507 | center_cost = temp_cost; 508 | } 509 | else { 510 | strong_cost += temp_cost; 511 | strong_count++; 512 | } 513 | } 514 | if (strong_count == 0) { 515 | cost = center_cost; 516 | } 517 | else { 518 | strong_cost /= strong_count; 519 | strong_cost = MIN(strong_cost, cost_max); 520 | cost = 0.25 * center_cost + 0.75 * strong_cost; 521 | } 522 | } 523 | else { 524 | printf("error\n"); 525 | } 526 | 527 | return cost; 528 | } 529 | 530 | __device__ float ComputeBilateralNCCOld( 531 | const int2 p, 532 | const int src_idx, 533 | const float4 plane_hypothesis, 534 | const DataPassHelper *helper 535 | ) { 536 | const cudaTextureObject_t ref_image = helper->texture_objects_cuda[0].images[0]; 537 | const Camera ref_camera = helper->cameras_cuda[0]; 538 | const cudaTextureObject_t src_image = helper->texture_objects_cuda[0].images[src_idx]; 539 | const Camera src_camera = helper->cameras_cuda[src_idx]; 540 | 541 | const float cost_max = 2.0f; 542 | 543 | float H[9]; 544 | ComputeHomography(ref_camera, src_camera, plane_hypothesis, H); 545 | float2 pt = ComputeCorrespondingPoint(H, p); 546 | if (pt.x >= src_camera.width || pt.x < 0.0f || pt.y >= src_camera.height || pt.y < 0.0f) { 547 | return cost_max; 548 | } 549 | const int radius = helper->params->strong_radius; 550 | const int increment = helper->params->strong_increment; 551 | float cost = 0.0f; 552 | { 553 | float sum_ref = 0.0f; 554 | float sum_ref_ref = 0.0f; 555 | float sum_src = 0.0f; 556 | float sum_src_src = 0.0f; 557 | float sum_ref_src = 0.0f; 558 | float bilateral_weight_sum = 0.0f; 559 | const float ref_center_pix = tex2D(ref_image, p.x + 0.5f, p.y + 0.5f); 560 | 561 | for (int i = -radius; i <= radius; i += increment) { 562 | float sum_ref_row = 0.0f; 563 | float sum_src_row = 0.0f; 564 | float sum_ref_ref_row = 0.0f; 565 | float sum_src_src_row = 0.0f; 566 | float sum_ref_src_row = 0.0f; 567 | float bilateral_weight_sum_row = 0.0f; 568 | 569 | for (int j = -radius; j <= radius; j += increment) { 570 | const int2 ref_pt = make_int2(p.x + i, p.y + j); 571 | const float ref_pix = tex2D(ref_image, ref_pt.x + 0.5f, ref_pt.y + 0.5f); 572 | float2 src_pt = ComputeCorrespondingPoint(H, ref_pt); 573 | const float src_pix = tex2D(src_image, src_pt.x + 0.5f, src_pt.y + 0.5f); 574 | 575 | float weight = 1.0f; 576 | 577 | sum_ref_row += weight * ref_pix; 578 | sum_ref_ref_row += weight * ref_pix * ref_pix; 579 | sum_src_row += weight * src_pix; 580 | sum_src_src_row += weight * src_pix * src_pix; 581 | sum_ref_src_row += weight * ref_pix * src_pix; 582 | bilateral_weight_sum_row += weight; 583 | } 584 | 585 | sum_ref += sum_ref_row; 586 | sum_ref_ref += sum_ref_ref_row; 587 | sum_src += sum_src_row; 588 | sum_src_src += sum_src_src_row; 589 | sum_ref_src += sum_ref_src_row; 590 | bilateral_weight_sum += bilateral_weight_sum_row; 591 | } 592 | const float inv_bilateral_weight_sum = 1.0f / bilateral_weight_sum; 593 | sum_ref *= inv_bilateral_weight_sum; 594 | sum_ref_ref *= inv_bilateral_weight_sum; 595 | sum_src *= inv_bilateral_weight_sum; 596 | sum_src_src *= inv_bilateral_weight_sum; 597 | sum_ref_src *= inv_bilateral_weight_sum; 598 | 599 | const float var_ref = sum_ref_ref - sum_ref * sum_ref; 600 | const float var_src = sum_src_src - sum_src * sum_src; 601 | 602 | const float kMinVar = 1e-5f; 603 | if (var_ref < kMinVar || var_src < kMinVar) { 604 | cost = cost_max; 605 | } 606 | else { 607 | const float covar_src_ref = sum_ref_src - sum_ref * sum_src; 608 | const float var_ref_src = sqrt(var_ref * var_src); 609 | cost = max(0.0f, min(cost_max, 1.0f - covar_src_ref / var_ref_src)); 610 | } 611 | } 612 | 613 | return cost; 614 | } 615 | 616 | __device__ float ComputeMultiViewInitialCostandSelectedViews( 617 | const int2 p, 618 | DataPassHelper *helper 619 | ) { 620 | PatchMatchParams *params = helper->params; 621 | unsigned int *selected_views = helper->selected_views_cuda; 622 | int center = p.x + p.y * helper->width; 623 | float4 plane_hypothesis = helper->plane_hypotheses_cuda[center]; 624 | 625 | float cost_max = 2.0f; 626 | float cost_vector[32] = { 2.0f }; 627 | float cost_vector_copy[32] = { 2.0f }; 628 | int cost_count = 0; 629 | int num_valid_views = 0; 630 | 631 | for (int i = 1; i < params->num_images; ++i) { 632 | float c = 0.0f; 633 | c = ComputeBilateralNCCOld(p, i, plane_hypothesis, helper); 634 | cost_vector[i - 1] = c; 635 | cost_vector_copy[i - 1] = c; 636 | cost_count++; 637 | if (c < cost_max) { 638 | num_valid_views++; 639 | } 640 | } 641 | 642 | sort_small(cost_vector, cost_count); 643 | selected_views[center] = 0; 644 | 645 | int top_k = min(num_valid_views, params->top_k); 646 | if (top_k > 0) { 647 | float cost = 0.0f; 648 | for (int i = 0; i < top_k; ++i) { 649 | cost += cost_vector[i]; 650 | } 651 | float cost_threshold = cost_vector[top_k - 1]; 652 | for (int i = 0; i < params->num_images - 1; ++i) { 653 | if (cost_vector_copy[i] <= cost_threshold) { 654 | setBit(&(selected_views[center]), i); 655 | } 656 | } 657 | return cost / top_k; 658 | } 659 | else { 660 | return cost_max; 661 | } 662 | } 663 | 664 | __device__ float ComputeMultiViewInitialCost( 665 | const int2 p, 666 | DataPassHelper *helper 667 | ) { 668 | PatchMatchParams *params = helper->params; 669 | unsigned int *selected_views = helper->selected_views_cuda; 670 | int center = p.x + p.y * helper->width; 671 | float4 plane_hypothesis = helper->plane_hypotheses_cuda[center]; 672 | 673 | const float cost_max = 2.0f; 674 | int cost_count = 0; 675 | float cost = 0.0f; 676 | 677 | for (int i = 1; i < params->num_images; ++i) { 678 | if (isSet(selected_views[center], i - 1)) { 679 | float c = ComputeBilateralNCCOld(p, i, plane_hypothesis, helper); 680 | if (c < cost_max) { 681 | cost_count++; 682 | cost += c; 683 | } else { 684 | unSetBit(&(selected_views[center]), i - 1); 685 | } 686 | } 687 | } 688 | if (cost_count == 0) { 689 | return cost_max; 690 | } else { 691 | return cost / cost_count; 692 | } 693 | } 694 | 695 | 696 | __device__ void ComputeMultiViewCostVectorNew( 697 | const int2 p, 698 | float4 plane_hypothesis, 699 | float *cost_vector, 700 | DataPassHelper *helper 701 | ) { 702 | for (int i = 1; i < helper->params->num_images; ++i) { 703 | cost_vector[i - 1] = ComputeBilateralNCCNew(p, i, plane_hypothesis, helper); 704 | } 705 | } 706 | 707 | __device__ void ComputeMultiViewCostVectorOld( 708 | const int2 p, 709 | float4 plane_hypothesis, 710 | float *cost_vector, 711 | DataPassHelper *helper 712 | ) { 713 | for (int i = 1; i < helper->params->num_images; ++i) { 714 | cost_vector[i - 1] = ComputeBilateralNCCOld(p, i, plane_hypothesis, helper); 715 | } 716 | } 717 | 718 | __device__ float3 Get3DPointonWorld_cu(const float x, const float y, const float depth, const Camera camera) 719 | { 720 | float3 pointX; 721 | float3 tmpX; 722 | // Reprojection 723 | pointX.x = depth * (x - camera.K[2]) / camera.K[0]; 724 | pointX.y = depth * (y - camera.K[5]) / camera.K[4]; 725 | pointX.z = depth; 726 | 727 | // Rotation 728 | tmpX.x = camera.R[0] * pointX.x + camera.R[3] * pointX.y + camera.R[6] * pointX.z; 729 | tmpX.y = camera.R[1] * pointX.x + camera.R[4] * pointX.y + camera.R[7] * pointX.z; 730 | tmpX.z = camera.R[2] * pointX.x + camera.R[5] * pointX.y + camera.R[8] * pointX.z; 731 | 732 | // Transformation 733 | pointX.x = tmpX.x + camera.c[0]; 734 | pointX.y = tmpX.y + camera.c[1]; 735 | pointX.z = tmpX.z + camera.c[2]; 736 | 737 | return pointX; 738 | } 739 | 740 | __device__ void ProjectonCamera_cu(const float3 PointX, const Camera camera, float2 &point, float &depth) 741 | { 742 | float3 tmp; 743 | tmp.x = camera.R[0] * PointX.x + camera.R[1] * PointX.y + camera.R[2] * PointX.z + camera.t[0]; 744 | tmp.y = camera.R[3] * PointX.x + camera.R[4] * PointX.y + camera.R[5] * PointX.z + camera.t[1]; 745 | tmp.z = camera.R[6] * PointX.x + camera.R[7] * PointX.y + camera.R[8] * PointX.z + camera.t[2]; 746 | 747 | depth = camera.K[6] * tmp.x + camera.K[7] * tmp.y + camera.K[8] * tmp.z; 748 | point.x = (camera.K[0] * tmp.x + camera.K[1] * tmp.y + camera.K[2] * tmp.z) / depth; 749 | point.y = (camera.K[3] * tmp.x + camera.K[4] * tmp.y + camera.K[5] * tmp.z) / depth; 750 | } 751 | 752 | __device__ float ComputeGeomConsistencyCost( 753 | const int2 p, 754 | const int src_idx, 755 | const float4 plane_hypothesis, 756 | DataPassHelper *helper 757 | ) { 758 | const Camera ref_camera = helper->cameras_cuda[0]; 759 | const Camera src_camera = helper->cameras_cuda[src_idx]; 760 | const cudaTextureObject_t depth_image = helper->texture_depths_cuda[0].images[src_idx]; 761 | 762 | const float max_cost = 3.0f; 763 | 764 | float center_cost = 0.0f; 765 | { 766 | float depth = ComputeDepthfromPlaneHypothesis(ref_camera, plane_hypothesis, p); 767 | float3 forward_point = Get3DPointonWorld_cu(p.x, p.y, depth, ref_camera); 768 | 769 | float2 src_pt; 770 | float src_d; 771 | ProjectonCamera_cu(forward_point, src_camera, src_pt, src_d); 772 | const float src_depth = tex2D(depth_image, (int)src_pt.x + 0.5f, (int)src_pt.y + 0.5f); 773 | 774 | if (src_depth == 0.0f) { 775 | return max_cost; 776 | } 777 | 778 | float3 src_3D_pt = Get3DPointonWorld_cu(src_pt.x, src_pt.y, src_depth, src_camera); 779 | 780 | float2 backward_point; 781 | float ref_d; 782 | ProjectonCamera_cu(src_3D_pt, ref_camera, backward_point, ref_d); 783 | 784 | const float diff_col = p.x - backward_point.x; 785 | const float diff_row = p.y - backward_point.y; 786 | center_cost = sqrt(diff_col * diff_col + diff_row * diff_row); 787 | } 788 | return min(max_cost, center_cost); 789 | } 790 | 791 | __global__ void InitRandomStates( 792 | DataPassHelper *helper 793 | ) { 794 | const int width = helper->width; 795 | const int height = helper->height; 796 | curandState *rand_states = helper->rand_states_cuda; 797 | 798 | const int2 p = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 799 | if (p.x >= width || p.y >= height) { 800 | return; 801 | } 802 | const int center = p.y * width + p.x; 803 | curand_init(clock64(), p.y, p.x, &rand_states[center]); 804 | } 805 | 806 | __global__ void RandomInitialization( 807 | DataPassHelper *helper 808 | ) { 809 | int width = helper->width; 810 | int height = helper->height; 811 | const int2 p = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 812 | if (p.x >= width || p.y >= height) { 813 | return; 814 | } 815 | const int center = p.y * width + p.x; 816 | Camera *cameras = helper->cameras_cuda; 817 | float4 *plane_hypotheses = helper->plane_hypotheses_cuda; 818 | float *costs = helper->costs_cuda; 819 | curandState *rand_states = helper->rand_states_cuda; 820 | PatchMatchParams *params = helper->params; 821 | 822 | if (params->state == FIRST_INIT) { 823 | plane_hypotheses[center] = GenerateRandomPlaneHypothesis(cameras[0], p, &rand_states[center], params->depth_min, params->depth_max); 824 | costs[center] = ComputeMultiViewInitialCostandSelectedViews(p, helper); 825 | } 826 | else { 827 | float4 plane_hypothesis; 828 | plane_hypothesis = plane_hypotheses[center]; 829 | plane_hypothesis = TransformNormal2RefCam(cameras[0], plane_hypothesis); 830 | float depth = plane_hypothesis.w; 831 | plane_hypothesis.w = GetDistance2Origin(cameras[0], p, depth, plane_hypothesis); 832 | plane_hypotheses[center] = plane_hypothesis; 833 | costs[center] = ComputeMultiViewInitialCost(p, helper); 834 | } 835 | } 836 | 837 | __device__ void PlaneHypothesisRefinementStrong( 838 | float4 *plane_hypothesis, 839 | float *depth, 840 | float *cost, 841 | curandState *rand_state, 842 | const uchar *view_weights, 843 | const float weight_norm, 844 | const int2 p, 845 | DataPassHelper *helper 846 | 847 | ) { 848 | float depth_perturbation = 0.02f; 849 | float normal_perturbation = 0.02f; 850 | const Camera *cameras = helper->cameras_cuda; 851 | const PatchMatchParams *params = helper->params; 852 | float depth_min = params->depth_min; 853 | float depth_max = params->depth_max; 854 | 855 | float depth_rand = curand_uniform(rand_state) * (depth_max - depth_min) + depth_min; 856 | float4 plane_hypothesis_rand = GenerateRandomNormal(cameras[0], p, rand_state, *depth); 857 | float depth_perturbed = *depth; 858 | const float depth_min_perturbed = (1 - depth_perturbation) * depth_perturbed; 859 | const float depth_max_perturbed = (1 + depth_perturbation) * depth_perturbed; 860 | do { 861 | depth_perturbed = curand_uniform(rand_state) * (depth_max_perturbed - depth_min_perturbed) + depth_min_perturbed; 862 | } while (depth_perturbed < depth_min && depth_perturbed > depth_max); 863 | float4 plane_hypothesis_perturbed = GeneratePerturbedNormal(cameras[0], p, *plane_hypothesis, rand_state, normal_perturbation * M_PI); 864 | 865 | const int num_planes = 5; 866 | float depths[num_planes] = { depth_rand, *depth, depth_rand, *depth, depth_perturbed }; 867 | float4 normals[num_planes] = { *plane_hypothesis, plane_hypothesis_rand, plane_hypothesis_rand, plane_hypothesis_perturbed, *plane_hypothesis }; 868 | 869 | for (int i = 0; i < num_planes; ++i) { 870 | float cost_vector[32] = { 2.0f }; 871 | float4 temp_plane_hypothesis = normals[i]; 872 | temp_plane_hypothesis.w = GetDistance2Origin(cameras[0], p, depths[i], temp_plane_hypothesis); 873 | ComputeMultiViewCostVectorOld(p, temp_plane_hypothesis, cost_vector, helper); 874 | 875 | float temp_cost = 0.0f; 876 | for (int j = 0; j < params->num_images - 1; ++j) { 877 | if (view_weights[j] > 0) { 878 | temp_cost += view_weights[j] * cost_vector[j]; 879 | } 880 | } 881 | temp_cost /= weight_norm; 882 | 883 | float depth_before = ComputeDepthfromPlaneHypothesis(cameras[0], temp_plane_hypothesis, p); 884 | if (depth_before >= depth_min && depth_before <= depth_max && temp_cost < *cost) { 885 | *depth = depth_before; 886 | *plane_hypothesis = temp_plane_hypothesis; 887 | *cost = temp_cost; 888 | } 889 | } 890 | } 891 | 892 | __device__ void PlaneHypothesisRefinementWeak( 893 | float4 *plane_hypothesis, 894 | float *depth, 895 | float *cost, 896 | curandState *rand_state, 897 | const uchar *view_weights, 898 | const float weight_norm, 899 | const int2 p, 900 | DataPassHelper *helper 901 | 902 | ) { 903 | float depth_perturbation = 0.02f; 904 | float normal_perturbation = 0.02f; 905 | const Camera *cameras = helper->cameras_cuda; 906 | const PatchMatchParams *params = helper->params; 907 | float depth_min = params->depth_min; 908 | float depth_max = params->depth_max; 909 | const int center = p.x + p.y * helper->width; 910 | { // test the fit plane 911 | float4 fit_plane_hypothesis = helper->fit_plane_hypotheses_cuda[center]; 912 | if (fit_plane_hypothesis.x == 0 && fit_plane_hypothesis.y == 0 && fit_plane_hypothesis.z == 0) { 913 | return; 914 | } 915 | float cost_vector[32] = { 2.0f }; 916 | ComputeMultiViewCostVectorNew(p, fit_plane_hypothesis, cost_vector, helper); 917 | float temp_cost = 0.0f; 918 | for (int j = 0; j < params->num_images - 1; ++j) { 919 | if (view_weights[j] > 0) { 920 | if (params->geom_consistency) { 921 | temp_cost += view_weights[j] * (cost_vector[j] + params->geom_factor * ComputeGeomConsistencyCost(p, j + 1, fit_plane_hypothesis, helper)); 922 | } 923 | else { 924 | temp_cost += view_weights[j] * cost_vector[j]; 925 | } 926 | } 927 | } 928 | temp_cost /= weight_norm; 929 | 930 | float depth_before = ComputeDepthfromPlaneHypothesis(cameras[0], fit_plane_hypothesis, p); 931 | if (depth_before >= depth_min && depth_before <= depth_max && temp_cost < *cost) { 932 | *depth = depth_before; 933 | *plane_hypothesis = fit_plane_hypothesis; 934 | *cost = temp_cost; 935 | } 936 | } 937 | // random refine 938 | { 939 | float depth_rand = curand_uniform(rand_state) * (depth_max - depth_min) + depth_min; 940 | float4 plane_hypothesis_rand = GenerateRandomNormal(cameras[0], p, rand_state, *depth); 941 | float depth_perturbed = *depth; 942 | const float depth_min_perturbed = (1 - depth_perturbation) * depth_perturbed; 943 | const float depth_max_perturbed = (1 + depth_perturbation) * depth_perturbed; 944 | do { 945 | depth_perturbed = curand_uniform(rand_state) * (depth_max_perturbed - depth_min_perturbed) + depth_min_perturbed; 946 | } while (depth_perturbed < depth_min && depth_perturbed > depth_max); 947 | float4 plane_hypothesis_perturbed = GeneratePerturbedNormal(cameras[0], p, *plane_hypothesis, rand_state, normal_perturbation * M_PI); 948 | 949 | const int num_planes = 5; 950 | float depths[num_planes] = { depth_rand, *depth, depth_rand, *depth, depth_perturbed }; 951 | float4 normals[num_planes] = { *plane_hypothesis, plane_hypothesis_rand, plane_hypothesis_rand, plane_hypothesis_perturbed, *plane_hypothesis }; 952 | 953 | for (int i = 0; i < num_planes; ++i) { 954 | float cost_vector[32] = { 2.0f }; 955 | float4 temp_plane_hypothesis = normals[i]; 956 | temp_plane_hypothesis.w = GetDistance2Origin(cameras[0], p, depths[i], temp_plane_hypothesis); 957 | ComputeMultiViewCostVectorNew(p, temp_plane_hypothesis, cost_vector, helper); 958 | 959 | float temp_cost = 0.0f; 960 | for (int j = 0; j < params->num_images - 1; ++j) { 961 | if (view_weights[j] > 0) { 962 | if (params->geom_consistency) { 963 | temp_cost += view_weights[j] * (cost_vector[j] + params->geom_factor * ComputeGeomConsistencyCost(p, j + 1, temp_plane_hypothesis, helper)); 964 | } 965 | else { 966 | temp_cost += view_weights[j] * cost_vector[j]; 967 | } 968 | } 969 | } 970 | temp_cost /= weight_norm; 971 | 972 | float depth_before = ComputeDepthfromPlaneHypothesis(cameras[0], temp_plane_hypothesis, p); 973 | if (depth_before >= depth_min && depth_before <= depth_max && temp_cost < *cost) { 974 | *depth = depth_before; 975 | *plane_hypothesis = temp_plane_hypothesis; 976 | *cost = temp_cost; 977 | } 978 | } 979 | } 980 | } 981 | 982 | __device__ void CheckerboardPropagationStrong( 983 | const int2 p, 984 | const int iter, 985 | DataPassHelper *helper 986 | ) { 987 | const int width = helper->width; 988 | const int height = helper->height; 989 | float4 *plane_hypotheses = helper->plane_hypotheses_cuda; 990 | float *costs = helper->costs_cuda; 991 | curandState *rand_states = helper->rand_states_cuda; 992 | unsigned int *selected_views = helper->selected_views_cuda; 993 | PatchMatchParams *params = helper->params; 994 | const Camera *cameras = helper->cameras_cuda; 995 | int num_images = params->num_images; 996 | 997 | if (p.x >= width || p.y >= height) { 998 | return; 999 | } 1000 | 1001 | const int center = p.y * width + p.x; 1002 | 1003 | // Adaptive Checkerboard Sampling 1004 | float cost_array[8][32] = { 2.0f }; 1005 | bool flag[8] = { false }; 1006 | int num_valid_pixels = 0; 1007 | 1008 | float costMin; 1009 | int costMinPoint; 1010 | 1011 | 1012 | int left_near = center - 1; 1013 | int left_far = center - 3; 1014 | int right_near = center + 1; 1015 | int right_far = center + 3; 1016 | int up_near = center - width; 1017 | int up_far = center - 3 * width; 1018 | int down_near = center + width; 1019 | int down_far = center + 3 * width; 1020 | // 0 -- up_near, 1 -- up_far, 2 -- down_near, 3 -- down_far, 4 -- left_near, 5 -- left_far, 6 -- right_near, 7 -- right_far 1021 | // up_far 1022 | if (p.y > 2) { 1023 | flag[1] = true; 1024 | num_valid_pixels++; 1025 | costMin = costs[up_far]; 1026 | costMinPoint = up_far; 1027 | for (int i = 1; i < 11; ++i) { 1028 | if (p.y > 2 + 2 * i) { 1029 | int pointTemp = up_far - 2 * i * width; 1030 | if (costs[pointTemp] < costMin) { 1031 | costMin = costs[pointTemp]; 1032 | costMinPoint = pointTemp; 1033 | } 1034 | } 1035 | } 1036 | up_far = costMinPoint; 1037 | ComputeMultiViewCostVectorOld(p, plane_hypotheses[up_far], cost_array[1], helper); 1038 | } 1039 | 1040 | // dwon_far 1041 | if (p.y < height - 3) { 1042 | flag[3] = true; 1043 | num_valid_pixels++; 1044 | costMin = costs[down_far]; 1045 | costMinPoint = down_far; 1046 | for (int i = 1; i < 11; ++i) { 1047 | if (p.y < height - 3 - 2 * i) { 1048 | int pointTemp = down_far + 2 * i * width; 1049 | if (costs[pointTemp] < costMin) { 1050 | costMin = costs[pointTemp]; 1051 | costMinPoint = pointTemp; 1052 | } 1053 | } 1054 | } 1055 | down_far = costMinPoint; 1056 | ComputeMultiViewCostVectorOld(p, plane_hypotheses[down_far], cost_array[3], helper); 1057 | } 1058 | 1059 | // left_far 1060 | if (p.x > 2) { 1061 | flag[5] = true; 1062 | num_valid_pixels++; 1063 | costMin = costs[left_far]; 1064 | costMinPoint = left_far; 1065 | for (int i = 1; i < 11; ++i) { 1066 | if (p.x > 2 + 2 * i) { 1067 | int pointTemp = left_far - 2 * i; 1068 | if (costs[pointTemp] < costMin) { 1069 | costMin = costs[pointTemp]; 1070 | costMinPoint = pointTemp; 1071 | } 1072 | } 1073 | } 1074 | left_far = costMinPoint; 1075 | ComputeMultiViewCostVectorOld(p, plane_hypotheses[left_far], cost_array[5], helper); 1076 | } 1077 | 1078 | // right_far 1079 | if (p.x < width - 3) { 1080 | flag[7] = true; 1081 | num_valid_pixels++; 1082 | costMin = costs[right_far]; 1083 | costMinPoint = right_far; 1084 | for (int i = 1; i < 11; ++i) { 1085 | if (p.x < width - 3 - 2 * i) { 1086 | int pointTemp = right_far + 2 * i; 1087 | if (costs[pointTemp] < costMin) { 1088 | costMin = costs[pointTemp]; 1089 | costMinPoint = pointTemp; 1090 | } 1091 | } 1092 | } 1093 | right_far = costMinPoint; 1094 | ComputeMultiViewCostVectorOld(p, plane_hypotheses[right_far], cost_array[7], helper); 1095 | } 1096 | 1097 | // up_near 1098 | if (p.y > 0) { 1099 | flag[0] = true; 1100 | num_valid_pixels++; 1101 | costMin = costs[up_near]; 1102 | costMinPoint = up_near; 1103 | for (int i = 0; i < 3; ++i) { 1104 | if (p.y > 1 + i && p.x > i) { 1105 | int pointTemp = up_near - (1 + i) * width - (1 + i); 1106 | if (costs[pointTemp] < costMin) { 1107 | costMin = costs[pointTemp]; 1108 | costMinPoint = pointTemp; 1109 | } 1110 | } 1111 | if (p.y > 1 + i && p.x < width - 1 - i) { 1112 | int pointTemp = up_near - (1 + i) * width + (1 + i); 1113 | if (costs[pointTemp] < costMin) { 1114 | costMin = costs[pointTemp]; 1115 | costMinPoint = pointTemp; 1116 | } 1117 | } 1118 | } 1119 | up_near = costMinPoint; 1120 | ComputeMultiViewCostVectorOld(p, plane_hypotheses[up_near], cost_array[0], helper); 1121 | } 1122 | 1123 | // down_near 1124 | if (p.y < height - 1) { 1125 | flag[2] = true; 1126 | num_valid_pixels++; 1127 | costMin = costs[down_near]; 1128 | costMinPoint = down_near; 1129 | for (int i = 0; i < 3; ++i) { 1130 | if (p.y < height - 2 - i && p.x > i) { 1131 | int pointTemp = down_near + (1 + i) * width - (1 + i); 1132 | if (costs[pointTemp] < costMin) { 1133 | costMin = costs[pointTemp]; 1134 | costMinPoint = pointTemp; 1135 | } 1136 | } 1137 | if (p.y < height - 2 - i && p.x < width - 1 - i) { 1138 | int pointTemp = down_near + (1 + i) * width + (1 + i); 1139 | if (costs[pointTemp] < costMin) { 1140 | costMin = costs[pointTemp]; 1141 | costMinPoint = pointTemp; 1142 | } 1143 | } 1144 | } 1145 | down_near = costMinPoint; 1146 | ComputeMultiViewCostVectorOld(p, plane_hypotheses[down_near], cost_array[2], helper); 1147 | } 1148 | 1149 | // left_near 1150 | if (p.x > 0) { 1151 | flag[4] = true; 1152 | num_valid_pixels++; 1153 | costMin = costs[left_near]; 1154 | costMinPoint = left_near; 1155 | for (int i = 0; i < 3; ++i) { 1156 | if (p.x > 1 + i && p.y > i) { 1157 | int pointTemp = left_near - (1 + i) - (1 + i) * width; 1158 | if (costs[pointTemp] < costMin) { 1159 | costMin = costs[pointTemp]; 1160 | costMinPoint = pointTemp; 1161 | } 1162 | } 1163 | if (p.x > 1 + i && p.y < height - 1 - i) { 1164 | int pointTemp = left_near - (1 + i) + (1 + i) * width; 1165 | if (costs[pointTemp] < costMin) { 1166 | costMin = costs[pointTemp]; 1167 | costMinPoint = pointTemp; 1168 | } 1169 | } 1170 | } 1171 | left_near = costMinPoint; 1172 | ComputeMultiViewCostVectorOld(p, plane_hypotheses[left_near], cost_array[4], helper); 1173 | } 1174 | 1175 | // right_near 1176 | if (p.x < width - 1) { 1177 | flag[6] = true; 1178 | num_valid_pixels++; 1179 | costMin = costs[right_near]; 1180 | costMinPoint = right_near; 1181 | for (int i = 0; i < 3; ++i) { 1182 | if (p.x < width - 2 - i && p.y > i) { 1183 | int pointTemp = right_near + (1 + i) - (1 + i) * width; 1184 | if (costs[pointTemp] < costMin) { 1185 | costMin = costs[pointTemp]; 1186 | costMinPoint = pointTemp; 1187 | } 1188 | } 1189 | if (p.x < width - 2 - i && p.y < height - 1 - i) { 1190 | int pointTemp = right_near + (1 + i) + (1 + i) * width; 1191 | if (costs[pointTemp] < costMin) { 1192 | costMin = costs[pointTemp]; 1193 | costMinPoint = pointTemp; 1194 | } 1195 | } 1196 | } 1197 | right_near = costMinPoint; 1198 | ComputeMultiViewCostVectorOld(p, plane_hypotheses[right_near], cost_array[6], helper); 1199 | } 1200 | 1201 | const int positions[8] = {up_near, up_far, down_near, down_far, left_near, left_far, right_near, right_far}; 1202 | 1203 | // Multi-hypothesis Joint View Selection 1204 | uchar *view_weights = &(helper->view_weight_cuda[center * MAX_IMAGES]); 1205 | for (int i = 0; i < MAX_IMAGES; ++i) { 1206 | view_weights[i] = 0; 1207 | } 1208 | float view_selection_priors[32] = { 0.0f }; 1209 | 1210 | int neighbor_positions[4] = { center - width, center + width, center - 1, center + 1 }; 1211 | for (int i = 0; i < 4; ++i) { 1212 | if (flag[2 * i]) { 1213 | for (int j = 0; j < num_images - 1; ++j) { 1214 | if (isSet(selected_views[neighbor_positions[i]], j) == 1) { 1215 | view_selection_priors[j] += 0.9f; 1216 | } 1217 | else { 1218 | view_selection_priors[j] += 0.1f; 1219 | } 1220 | } 1221 | } 1222 | } 1223 | 1224 | float sampling_probs[32] = { 0.0f }; 1225 | float cost_threshold = 0.8 * expf((iter) * (iter) / (-90.0f)); 1226 | for (int i = 0; i < num_images - 1; i++) { 1227 | float count = 0; 1228 | int count_false = 0; 1229 | float tmpw = 0; 1230 | for (int j = 0; j < 8; j++) { 1231 | if (cost_array[j][i] < cost_threshold) { 1232 | tmpw += expf(cost_array[j][i] * cost_array[j][i] / (-0.18f)); 1233 | count++; 1234 | } 1235 | if (cost_array[j][i] > 1.2f) { 1236 | count_false++; 1237 | } 1238 | } 1239 | if (count > 2 && count_false < 3) { 1240 | sampling_probs[i] = tmpw / count; 1241 | } 1242 | else if (count_false < 3) { 1243 | sampling_probs[i] = expf(cost_threshold * cost_threshold / (-0.32f)); 1244 | } 1245 | sampling_probs[i] = sampling_probs[i] * view_selection_priors[i]; 1246 | } 1247 | 1248 | TransformPDFToCDF(sampling_probs, num_images - 1); 1249 | for (int sample = 0; sample < 15; ++sample) { 1250 | const float rand_prob = curand_uniform(&rand_states[center]) - FLT_EPSILON; 1251 | 1252 | for (int image_id = 0; image_id < num_images - 1; ++image_id) { 1253 | const float prob = sampling_probs[image_id]; 1254 | if (prob > rand_prob) { 1255 | view_weights[image_id] += 1; 1256 | break; 1257 | } 1258 | } 1259 | } 1260 | 1261 | unsigned int temp_selected_views = 0; 1262 | int num_selected_view = 0; 1263 | float weight_norm = 0; 1264 | 1265 | for (int i = 0; i < num_images - 1; ++i) { 1266 | if (view_weights[i] > 0) { 1267 | setBit(&temp_selected_views, i); 1268 | weight_norm += view_weights[i]; 1269 | num_selected_view++; 1270 | } 1271 | } 1272 | 1273 | float final_costs[8] = { 0.0f }; 1274 | 1275 | for (int i = 0; i < 8; ++i) { 1276 | for (int j = 0; j < num_images - 1; ++j) { 1277 | if (view_weights[j] > 0) { 1278 | final_costs[i] += view_weights[j] * cost_array[i][j]; 1279 | } 1280 | } 1281 | 1282 | final_costs[i] /= weight_norm; 1283 | } 1284 | 1285 | const int min_cost_idx = FindMinCostIndex(final_costs, 8); 1286 | 1287 | float cost_vector_now[32] = { 2.0f }; 1288 | ComputeMultiViewCostVectorOld(p, plane_hypotheses[center], cost_vector_now, helper); 1289 | float cost_now = 0.0f; 1290 | 1291 | for (int i = 0; i < num_images - 1; ++i) { 1292 | cost_now += view_weights[i] * cost_vector_now[i]; 1293 | } 1294 | cost_now /= weight_norm; 1295 | costs[center] = cost_now; 1296 | float depth_now = ComputeDepthfromPlaneHypothesis(cameras[0], plane_hypotheses[center], p); 1297 | float4 plane_hypotheses_now = plane_hypotheses[center]; 1298 | 1299 | if (flag[min_cost_idx]) { 1300 | float depth_before = ComputeDepthfromPlaneHypothesis(cameras[0], plane_hypotheses[positions[min_cost_idx]], p); 1301 | 1302 | if (depth_before >= params->depth_min && depth_before <= params->depth_max && final_costs[min_cost_idx] < cost_now) { 1303 | depth_now = depth_before; 1304 | plane_hypotheses_now = plane_hypotheses[positions[min_cost_idx]]; 1305 | cost_now = final_costs[min_cost_idx]; 1306 | selected_views[center] = temp_selected_views; 1307 | } 1308 | } 1309 | PlaneHypothesisRefinementStrong(&plane_hypotheses_now, &depth_now, &cost_now, &rand_states[center], view_weights, weight_norm, p, helper); 1310 | 1311 | if (params->state == REFINE_INIT) { 1312 | if (cost_now < costs[center] - 0.1) { 1313 | costs[center] = cost_now; 1314 | plane_hypotheses[center] = plane_hypotheses_now; 1315 | } 1316 | } 1317 | else { 1318 | costs[center] = cost_now; 1319 | plane_hypotheses[center] = plane_hypotheses_now; 1320 | } 1321 | } 1322 | 1323 | __device__ void CheckerboardPropagationWeak( 1324 | const int2 p, 1325 | const int iter, 1326 | DataPassHelper *helper 1327 | ) { 1328 | const int width = helper->width; 1329 | const int height = helper->height; 1330 | float4 *plane_hypotheses = helper->plane_hypotheses_cuda; 1331 | float *costs = helper->costs_cuda; 1332 | curandState *rand_states = helper->rand_states_cuda; 1333 | unsigned int *selected_views = helper->selected_views_cuda; 1334 | PatchMatchParams *params = helper->params; 1335 | const Camera *cameras = helper->cameras_cuda; 1336 | int num_images = params->num_images; 1337 | 1338 | if (p.x >= width || p.y >= height) { 1339 | return; 1340 | } 1341 | 1342 | const int center = p.y * width + p.x; 1343 | 1344 | // Adaptive Checkerboard Sampling 1345 | float cost_array[8][32] = { 2.0f }; 1346 | bool flag[8] = { false }; 1347 | int num_valid_pixels = 0; 1348 | 1349 | int positions[8] = { 0 }; 1350 | float4 new_plane_hypothesis[8]; 1351 | 1352 | for (int i = 0; i < 8; ++i) { 1353 | const auto neighbour_pt = GetNeighbourPoint(p, i + 1, helper); 1354 | if (neighbour_pt.x == -1 || neighbour_pt.y == -1 || helper->weak_info_cuda[neighbour_pt.x + neighbour_pt.y * width] != STRONG) { 1355 | flag[i] = false; 1356 | continue; 1357 | } 1358 | positions[i] = neighbour_pt.x + neighbour_pt.y * width; 1359 | flag[i] = true; 1360 | num_valid_pixels++; 1361 | ComputeMultiViewCostVectorNew(p, plane_hypotheses[neighbour_pt.x + neighbour_pt.y * width], cost_array[i], helper); 1362 | new_plane_hypothesis[i] = plane_hypotheses[neighbour_pt.x + neighbour_pt.y * width]; 1363 | } 1364 | 1365 | // Multi-hypothesis Joint View Selection 1366 | uchar *view_weights = &(helper->view_weight_cuda[center * MAX_IMAGES]); 1367 | for (int i = 0; i < MAX_IMAGES; ++i) { 1368 | view_weights[i] = 0; 1369 | } 1370 | float view_selection_priors[32] = { 0.0f }; 1371 | for (int i = 0; i < 8; ++i) { 1372 | const auto neighbour_pt = GetNeighbourPoint(p, i + 1, helper); 1373 | if (neighbour_pt.x == -1 || neighbour_pt.y == -1) { 1374 | continue; 1375 | } 1376 | for (int j = 0; j < num_images - 1; ++j) { 1377 | if (isSet(selected_views[neighbour_pt.x + neighbour_pt.y * width], j) == 1) { 1378 | view_selection_priors[j] += 0.9f; 1379 | } 1380 | else { 1381 | view_selection_priors[j] += 0.1f; 1382 | } 1383 | } 1384 | } 1385 | 1386 | 1387 | float sampling_probs[32] = { 0.0f }; 1388 | float cost_threshold = 0.8 * expf((iter) * (iter) / (-90.0f)); 1389 | for (int i = 0; i < num_images - 1; i++) { 1390 | float count = 0; 1391 | int count_false = 0; 1392 | float tmpw = 0; 1393 | for (int j = 0; j < 8; j++) { 1394 | if (cost_array[j][i] < cost_threshold) { 1395 | tmpw += expf(cost_array[j][i] * cost_array[j][i] / (-0.18f)); 1396 | count++; 1397 | } 1398 | if (cost_array[j][i] > 1.2f) { 1399 | count_false++; 1400 | } 1401 | } 1402 | if (count > 2 && count_false < 3) { 1403 | sampling_probs[i] = tmpw / count; 1404 | } 1405 | else if (count_false < 3) { 1406 | sampling_probs[i] = expf(cost_threshold * cost_threshold / (-0.32f)); 1407 | } 1408 | sampling_probs[i] = sampling_probs[i] * view_selection_priors[i]; 1409 | } 1410 | 1411 | TransformPDFToCDF(sampling_probs, num_images - 1); 1412 | for (int sample = 0; sample < 15; ++sample) { 1413 | const float rand_prob = curand_uniform(&rand_states[center]) - FLT_EPSILON; 1414 | 1415 | for (int image_id = 0; image_id < num_images - 1; ++image_id) { 1416 | const float prob = sampling_probs[image_id]; 1417 | if (prob > rand_prob) { 1418 | view_weights[image_id] += 1; 1419 | break; 1420 | } 1421 | } 1422 | } 1423 | 1424 | unsigned int temp_selected_views = 0; 1425 | int num_selected_view = 0; 1426 | float weight_norm = 0; 1427 | 1428 | for (int i = 0; i < num_images - 1; ++i) { 1429 | if (view_weights[i] > 0) { 1430 | setBit(&temp_selected_views, i); 1431 | weight_norm += view_weights[i]; 1432 | num_selected_view++; 1433 | } 1434 | } 1435 | 1436 | float final_costs[8] = { 0.0f }; 1437 | 1438 | for (int i = 0; i < 8; ++i) { 1439 | for (int j = 0; j < num_images - 1; ++j) { 1440 | if (view_weights[j] > 0) { 1441 | if (params->geom_consistency) { 1442 | if (flag[i]) { 1443 | final_costs[i] += view_weights[j] * (cost_array[i][j] + params->geom_factor * ComputeGeomConsistencyCost(p, j + 1, plane_hypotheses[positions[i]], helper)); 1444 | } 1445 | else { 1446 | final_costs[i] += view_weights[j] * (cost_array[i][j] + params->geom_factor * 3.0f); 1447 | } 1448 | } 1449 | else { 1450 | final_costs[i] += view_weights[j] * cost_array[i][j]; 1451 | } 1452 | } 1453 | } 1454 | 1455 | final_costs[i] /= weight_norm; 1456 | } 1457 | 1458 | const int min_cost_idx = FindMinCostIndex(final_costs, 8); 1459 | 1460 | float cost_vector_now[32] = { 2.0f }; 1461 | ComputeMultiViewCostVectorNew(p, plane_hypotheses[center], cost_vector_now, helper); 1462 | float cost_now = 0.0f; 1463 | 1464 | for (int i = 0; i < num_images - 1; ++i) { 1465 | if (params->geom_consistency) { 1466 | cost_now += view_weights[i] * (cost_vector_now[i] + params->geom_factor * ComputeGeomConsistencyCost(p, i + 1, plane_hypotheses[center], helper)); 1467 | } 1468 | else { 1469 | cost_now += view_weights[i] * cost_vector_now[i]; 1470 | } 1471 | } 1472 | cost_now /= weight_norm; 1473 | costs[center] = cost_now; 1474 | float depth_now = ComputeDepthfromPlaneHypothesis(cameras[0], plane_hypotheses[center], p); 1475 | float4 plane_hypotheses_now = plane_hypotheses[center]; 1476 | 1477 | if (flag[min_cost_idx]) { 1478 | float depth_before = ComputeDepthfromPlaneHypothesis(cameras[0], new_plane_hypothesis[min_cost_idx] , p); 1479 | if (depth_before >= params->depth_min && depth_before <= params->depth_max && final_costs[min_cost_idx] < cost_now) { 1480 | depth_now = depth_before; 1481 | plane_hypotheses_now = new_plane_hypothesis[min_cost_idx]; 1482 | cost_now = final_costs[min_cost_idx]; 1483 | selected_views[center] = temp_selected_views; 1484 | } 1485 | } 1486 | PlaneHypothesisRefinementWeak(&plane_hypotheses_now, &depth_now, &cost_now, &rand_states[center], view_weights, weight_norm, p, helper); 1487 | 1488 | if (params->state == REFINE_INIT) { 1489 | if (cost_now < costs[center] - 0.1) { 1490 | costs[center] = cost_now; 1491 | plane_hypotheses[center] = plane_hypotheses_now; 1492 | } 1493 | } 1494 | else { 1495 | costs[center] = cost_now; 1496 | plane_hypotheses[center] = plane_hypotheses_now; 1497 | } 1498 | 1499 | {// update cost with old method 1500 | cost_now = 0.0f; 1501 | ComputeMultiViewCostVectorOld(p, plane_hypotheses[center], cost_vector_now, helper); 1502 | for (int i = 0; i < num_images - 1; ++i) { 1503 | cost_now += view_weights[i] * cost_vector_now[i]; 1504 | } 1505 | cost_now /= weight_norm; 1506 | costs[center] = cost_now; 1507 | } 1508 | } 1509 | 1510 | __global__ void BlackPixelUpdateWeak(const int iter, DataPassHelper *helper) 1511 | { 1512 | int2 p = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 1513 | 1514 | if (threadIdx.x % 2 == 0) { 1515 | p.y = p.y * 2; 1516 | } 1517 | else { 1518 | p.y = p.y * 2 + 1; 1519 | } 1520 | if (p.x >= helper->width || p.y >= helper->height) { 1521 | return; 1522 | } 1523 | 1524 | if (helper->weak_info_cuda[p.x + p.y * helper->width] == WEAK) { 1525 | CheckerboardPropagationWeak(p, iter, helper); 1526 | } 1527 | } 1528 | 1529 | __global__ void RedPixelUpdateWeak(const int iter, DataPassHelper *helper) 1530 | { 1531 | int2 p = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 1532 | 1533 | if (threadIdx.x % 2 == 0) { 1534 | p.y = p.y * 2 + 1; 1535 | } 1536 | else { 1537 | p.y = p.y * 2; 1538 | } 1539 | if (p.x >= helper->width || p.y >= helper->height) { 1540 | return; 1541 | } 1542 | if (helper->weak_info_cuda[p.x + p.y * helper->width] == WEAK) { 1543 | CheckerboardPropagationWeak(p, iter, helper); 1544 | } 1545 | } 1546 | 1547 | __global__ void BlackPixelUpdateStrong(const int iter, DataPassHelper *helper) 1548 | { 1549 | int2 p = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 1550 | 1551 | if (threadIdx.x % 2 == 0) { 1552 | p.y = p.y * 2; 1553 | } 1554 | else { 1555 | p.y = p.y * 2 + 1; 1556 | } 1557 | if (p.x >= helper->width || p.y >= helper->height) { 1558 | return; 1559 | } 1560 | if (helper->weak_info_cuda[p.x + p.y * helper->width] == WEAK) { 1561 | return; 1562 | } 1563 | 1564 | CheckerboardPropagationStrong(p, iter, helper); 1565 | } 1566 | 1567 | __global__ void RedPixelUpdateStrong(const int iter, DataPassHelper *helper) 1568 | { 1569 | int2 p = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 1570 | 1571 | if (threadIdx.x % 2 == 0) { 1572 | p.y = p.y * 2 + 1; 1573 | } 1574 | else { 1575 | p.y = p.y * 2; 1576 | } 1577 | if (p.x >= helper->width || p.y >= helper->height) { 1578 | return; 1579 | } 1580 | if (helper->weak_info_cuda[p.x + p.y * helper->width] == WEAK) { 1581 | return; 1582 | } 1583 | 1584 | CheckerboardPropagationStrong(p, iter, helper); 1585 | } 1586 | 1587 | __global__ void GetDepthandNormal( 1588 | DataPassHelper *helper 1589 | ) { 1590 | const int2 p = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 1591 | Camera *cameras = helper->cameras_cuda; 1592 | float4 *plane_hypotheses = helper->plane_hypotheses_cuda; 1593 | const int width = helper->width;; 1594 | const int height = helper->height; 1595 | 1596 | if (p.x >= width || p.y >= height) { 1597 | return; 1598 | } 1599 | const int center = p.y * width + p.x; 1600 | plane_hypotheses[center].w = ComputeDepthfromPlaneHypothesis(cameras[0], plane_hypotheses[center], p); 1601 | plane_hypotheses[center] = TransformNormal(cameras[0], plane_hypotheses[center]); 1602 | } 1603 | 1604 | __device__ void CheckerboardFilterStrong( 1605 | const int2 p, 1606 | DataPassHelper *helper 1607 | ) { 1608 | int width = helper->width; 1609 | int height = helper->height; 1610 | if (p.x >= width || p.y >= height) { 1611 | return; 1612 | } 1613 | float4 *plane_hypotheses = helper->plane_hypotheses_cuda; 1614 | float *costs = helper->costs_cuda; 1615 | const int center = p.y * width + p.x; 1616 | 1617 | float filter[21]; 1618 | int index = 0; 1619 | 1620 | filter[index++] = plane_hypotheses[center].w; 1621 | 1622 | // Left 1623 | const int left = center - 1; 1624 | const int leftleft = center - 3; 1625 | 1626 | // Up 1627 | const int up = center - width; 1628 | const int upup = center - 3 * width; 1629 | 1630 | // Down 1631 | const int down = center + width; 1632 | const int downdown = center + 3 * width; 1633 | 1634 | // Right 1635 | const int right = center + 1; 1636 | const int rightright = center + 3; 1637 | 1638 | if (costs[center] < 0.001f) { 1639 | return; 1640 | } 1641 | 1642 | if (p.y>0 && helper->weak_info_cuda[up] == STRONG) { 1643 | filter[index++] = plane_hypotheses[up].w; 1644 | } 1645 | if (p.y>2 && helper->weak_info_cuda[upup] == STRONG) { 1646 | filter[index++] = plane_hypotheses[upup].w; 1647 | } 1648 | if (p.y>4 && helper->weak_info_cuda[upup - width * 2] == STRONG) { 1649 | filter[index++] = plane_hypotheses[upup - width * 2].w; 1650 | } 1651 | if (p.yweak_info_cuda[down] == STRONG) { 1652 | filter[index++] = plane_hypotheses[down].w; 1653 | } 1654 | if (p.yweak_info_cuda[downdown] == STRONG) { 1655 | filter[index++] = plane_hypotheses[downdown].w; 1656 | } 1657 | if (p.yweak_info_cuda[downdown + width * 2] == STRONG) { 1658 | filter[index++] = plane_hypotheses[downdown + width * 2].w; 1659 | } 1660 | if (p.x>0 && helper->weak_info_cuda[left] == STRONG) { 1661 | filter[index++] = plane_hypotheses[left].w; 1662 | } 1663 | if (p.x>2 && helper->weak_info_cuda[leftleft] == STRONG) { 1664 | filter[index++] = plane_hypotheses[leftleft].w; 1665 | } 1666 | if (p.x>4 && helper->weak_info_cuda[leftleft - 2] == STRONG) { 1667 | filter[index++] = plane_hypotheses[leftleft - 2].w; 1668 | } 1669 | if (p.xweak_info_cuda[right] == STRONG) { 1670 | filter[index++] = plane_hypotheses[right].w; 1671 | } 1672 | if (p.xweak_info_cuda[rightright] == STRONG) { 1673 | filter[index++] = plane_hypotheses[rightright].w; 1674 | } 1675 | if (p.xweak_info_cuda[rightright + 2] == STRONG) { 1676 | filter[index++] = plane_hypotheses[rightright + 2].w; 1677 | } 1678 | if (p.y>0 && p.xweak_info_cuda[up + 2] == STRONG) { 1679 | filter[index++] = plane_hypotheses[up + 2].w; 1680 | } 1681 | if (p.y< height - 1 && p.xweak_info_cuda[down + 2] == STRONG) { 1682 | filter[index++] = plane_hypotheses[down + 2].w; 1683 | } 1684 | if (p.y>0 && p.x>1 && helper->weak_info_cuda[up - 2] == STRONG) 1685 | { 1686 | filter[index++] = plane_hypotheses[up - 2].w; 1687 | } 1688 | if (p.y1 && helper->weak_info_cuda[down - 2] == STRONG) { 1689 | filter[index++] = plane_hypotheses[down - 2].w; 1690 | } 1691 | if (p.x>0 && p.y>2 && helper->weak_info_cuda[left - width * 2] == STRONG) 1692 | { 1693 | filter[index++] = plane_hypotheses[left - width * 2].w; 1694 | } 1695 | if (p.x2 && helper->weak_info_cuda[right - width * 2] == STRONG) 1696 | { 1697 | filter[index++] = plane_hypotheses[right - width * 2].w; 1698 | } 1699 | if (p.x>0 && p.yweak_info_cuda[left + width * 2] == STRONG) { 1700 | filter[index++] = plane_hypotheses[left + width * 2].w; 1701 | } 1702 | if (p.xweak_info_cuda[right + width * 2] == STRONG) { 1703 | filter[index++] = plane_hypotheses[right + width * 2].w; 1704 | } 1705 | 1706 | sort_small(filter, index); 1707 | int median_index = index / 2; 1708 | if (index % 2 == 0) { 1709 | plane_hypotheses[center].w = (filter[median_index - 1] + filter[median_index]) / 2; 1710 | } 1711 | else { 1712 | plane_hypotheses[center].w = filter[median_index]; 1713 | } 1714 | } 1715 | 1716 | __global__ void BlackPixelFilterStrong(DataPassHelper *helper) 1717 | { 1718 | int2 p = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 1719 | if (threadIdx.x % 2 == 0) { 1720 | p.y = p.y * 2; 1721 | } 1722 | else { 1723 | p.y = p.y * 2 + 1; 1724 | } 1725 | if (p.x >= helper->width || p.y >= helper->height) { 1726 | return; 1727 | } 1728 | if (helper->weak_info_cuda[p.x + p.y * helper->width] != WEAK) { 1729 | CheckerboardFilterStrong(p, helper); 1730 | } 1731 | } 1732 | 1733 | __global__ void RedPixelFilterStrong(DataPassHelper *helper) 1734 | { 1735 | int2 p = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 1736 | if (threadIdx.x % 2 == 0) { 1737 | p.y = p.y * 2 + 1; 1738 | } 1739 | else { 1740 | p.y = p.y * 2; 1741 | } 1742 | if (p.x >= helper->width || p.y >= helper->height) { 1743 | return; 1744 | } 1745 | if (helper->weak_info_cuda[p.x + p.y * helper->width] != WEAK) { 1746 | CheckerboardFilterStrong(p, helper); 1747 | } 1748 | } 1749 | 1750 | __global__ void GenNeighbours( 1751 | DataPassHelper *helper 1752 | ) { 1753 | int width = helper->width; 1754 | int height = helper->height; 1755 | const int2 point = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 1756 | if (point.x >= width || point.y >= height) { 1757 | return; 1758 | } 1759 | const unsigned center = point.x + point.y * width; 1760 | const uchar *weak_info = helper->weak_info_cuda; 1761 | if (weak_info[center] != WEAK) { 1762 | // not weak point return 1763 | return; 1764 | } 1765 | const int min_margin = 6; 1766 | const float depth_diff = helper->params->depth_max - helper->params->depth_min; 1767 | const int *neighbours_map = helper->neighbours_map_cuda; 1768 | const PatchMatchParams *params = helper->params; 1769 | const short2 *weak_nearest_strong = helper->weak_nearest_strong; 1770 | const Camera camera = helper->cameras_cuda[0]; 1771 | const unsigned offset = neighbours_map[center] * NEIGHBOUR_NUM; 1772 | const float4 *plane_hypotheses = helper->plane_hypotheses_cuda; 1773 | curandState *rand_state = &(helper->rand_states_cuda[center]); 1774 | short2 *neighbours = &(helper->neighbours_cuda[offset]); 1775 | uchar *weak_reliable = &(helper->weak_reliable_cuda[center]); 1776 | // init for invalid points 1777 | for (int i = 0; i < NEIGHBOUR_NUM; ++i) { 1778 | neighbours[i].x = -1; 1779 | neighbours[i].y = -1; 1780 | } 1781 | neighbours[0] = make_short2(point.x, point.y); // the first point is the center point 1782 | short2 strong_points[8 * 4]; 1783 | bool dir_valid[8 * 4]; 1784 | for (int i = 0; i < 32; ++i) { 1785 | strong_points[i] = make_short2(-1, -1); 1786 | dir_valid[i] = false; 1787 | } 1788 | int origin_direction_index = -1; 1789 | int strong_point_size = 0; 1790 | const int rotate_time = params->rotate_time; // max is 4 from [1, 2, 4] 1791 | const float angle = 45.0f / rotate_time; 1792 | const float cos_angle = cos(angle * M_PI / 180.f); 1793 | const float sin_angle = sin(angle * M_PI / 180.f); 1794 | const float threshhold = cos((angle / 2.0f) * M_PI / 180.0f); 1795 | const int shift_range = MAX((int)(tan((angle / 2.0f) * M_PI / 180.0f) * 20), 1); 1796 | const float ransac_threshold = params->ransac_threshold; 1797 | for (int origin_direction_x = -1; origin_direction_x <= 1; ++origin_direction_x) { 1798 | for (int origin_direction_y = -1; origin_direction_y <= 1; ++origin_direction_y) { 1799 | if (origin_direction_x == 0 && origin_direction_y == 0) { 1800 | continue; 1801 | } 1802 | float2 origin_direction = make_float2(origin_direction_x, origin_direction_y); 1803 | NormalizeVec2(&origin_direction); 1804 | origin_direction_index++; 1805 | for (int rotate_iter = 0; rotate_iter < rotate_time; ++rotate_iter) { 1806 | int dir_index = origin_direction_index * 4 + rotate_iter; 1807 | for (int radius = 2; radius <= MAX_SEARCH_RADIUS; radius = MIN(radius * 2, radius + 25)) { 1808 | float2 test_pt = make_float2(point.x + origin_direction.x * radius, point.y + origin_direction.y * radius); 1809 | if (test_pt.x < 0 || test_pt.y < 0 || test_pt.x >= width || test_pt.y >= height) { 1810 | break; 1811 | } 1812 | for (int radius_iter = 0; radius_iter < 4; ++radius_iter) { 1813 | int rand_x_shift = (curand(rand_state) % 2 == 0 ? 1 : -1) * curand(rand_state) % shift_range; 1814 | int rand_y_shift = (curand(rand_state) % 2 == 0 ? 1 : -1) * curand(rand_state) % shift_range; 1815 | float2 direction = make_float2(origin_direction.x * 20 + rand_x_shift, origin_direction.y * 20 + rand_y_shift); 1816 | NormalizeVec2(&direction); 1817 | short2 neighbour_pt = make_short2(point.x + direction.x * radius, point.y + direction.y * radius); 1818 | if (neighbour_pt.x < min_margin || neighbour_pt.y < min_margin || neighbour_pt.x >= width - min_margin || neighbour_pt.y >= height - min_margin) { 1819 | continue; 1820 | } 1821 | int neighbour_pt_center = neighbour_pt.x + neighbour_pt.y * width; 1822 | if (weak_info[neighbour_pt_center] != STRONG) { 1823 | neighbour_pt = weak_nearest_strong[neighbour_pt_center]; 1824 | if (neighbour_pt.x == -1 || neighbour_pt.y == -1) { 1825 | continue; 1826 | } 1827 | neighbour_pt_center = neighbour_pt.x + neighbour_pt.y * width; 1828 | } 1829 | float2 test_direction = make_float2(neighbour_pt.x - point.x, neighbour_pt.y - point.y); 1830 | NormalizeVec2(&test_direction); 1831 | float cos_angle = Vec2DotVec2(test_direction, origin_direction); 1832 | if ( cos_angle > threshhold) { 1833 | strong_points[dir_index] = neighbour_pt; 1834 | dir_valid[dir_index] = true; 1835 | strong_point_size++; 1836 | break; 1837 | } 1838 | } 1839 | if (dir_valid[dir_index]) { 1840 | break; 1841 | } 1842 | } 1843 | // rotate 1844 | { 1845 | float2 rotated_direction; 1846 | rotated_direction.x = origin_direction.x * cos_angle - origin_direction.y * sin_angle; 1847 | rotated_direction.y = origin_direction.x * sin_angle + origin_direction.y * cos_angle; 1848 | NormalizeVec2(&rotated_direction); 1849 | origin_direction = rotated_direction; 1850 | } 1851 | } 1852 | } 1853 | } 1854 | 1855 | if (strong_point_size <= 3) { 1856 | *weak_reliable = 0; 1857 | return; 1858 | } 1859 | float4 best_plane; 1860 | int use_a_index = -1, use_b_index = -1, use_c_index = -1; 1861 | bool has_valid_plane = false; 1862 | short2 strong_points_valid[8 * 4]; 1863 | float3 strong_points_valid_3d[8 * 4]; 1864 | int valid_count = 0; 1865 | float X[3]; 1866 | Get3DPoint(camera, point, plane_hypotheses[center].w, X); 1867 | float3 center_point_world = make_float3(X[0], X[1], X[2]); 1868 | for (int i = 0; i < 32; ++i) { 1869 | strong_points_valid[i] = make_short2(-1, -1); 1870 | if (dir_valid[i]) { 1871 | const auto &strong_point = strong_points[i]; 1872 | int strong_point_center = strong_point.x + strong_point.y * width; 1873 | strong_points_valid[valid_count] = strong_points[i]; 1874 | Get3DPoint(camera, strong_point, plane_hypotheses[strong_point_center].w, X); 1875 | strong_points_valid_3d[valid_count] = make_float3(X[0], X[1], X[2]); 1876 | valid_count++; 1877 | } 1878 | } 1879 | { // RANSAC to find a good plane 1880 | int iteration = 50; 1881 | float min_cost = FLT_MAX; 1882 | int max_count = 3; 1883 | while (iteration--) { 1884 | int a_index = curand(rand_state) % valid_count; 1885 | int b_index = curand(rand_state) % valid_count; 1886 | int c_index = curand(rand_state) % valid_count; 1887 | if (a_index == b_index || b_index == c_index || a_index == c_index) { 1888 | continue; 1889 | } 1890 | // compute triangle 1891 | if (!PointinTriangle(strong_points_valid[a_index], strong_points_valid[b_index], strong_points_valid[c_index], point)) { 1892 | continue; 1893 | } 1894 | const float3 &A = strong_points_valid_3d[a_index]; 1895 | const float3 &B = strong_points_valid_3d[b_index]; 1896 | const float3 &C = strong_points_valid_3d[c_index]; 1897 | float3 A_C = make_float3(A.x - C.x, A.y - C.y, A.z - C.z); 1898 | float3 B_C = make_float3(B.x - C.x, B.y - C.y, B.z - C.z); 1899 | float4 cross_vec; 1900 | cross_vec.x = A_C.y * B_C.z - B_C.y * A_C.z; 1901 | cross_vec.y = -(A_C.x * B_C.z - B_C.x * A_C.z); 1902 | cross_vec.z = A_C.x * B_C.y - B_C.x * A_C.y; 1903 | if ((cross_vec.x == 0 && cross_vec.y == 0 && cross_vec.z == 0) || isnan(cross_vec.x) || isnan(cross_vec.y) || isnan(cross_vec.z)) { 1904 | continue; 1905 | } 1906 | NormalizeVec3(&cross_vec); 1907 | cross_vec.w = -(cross_vec.x * A.x + cross_vec.y * A.y + cross_vec.z * A.z); 1908 | int temp_count = 0; 1909 | float strong_dist = 0.0f; 1910 | for (int strong_index = 0; strong_index < valid_count; ++strong_index) { 1911 | const float3 &temp_point = strong_points_valid_3d[strong_index]; 1912 | float distance = fabs(cross_vec.x * temp_point.x + cross_vec.y * temp_point.y + cross_vec.z * temp_point.z + cross_vec.w); 1913 | if (distance / depth_diff < ransac_threshold) { 1914 | temp_count++; 1915 | strong_dist += distance; 1916 | } 1917 | } 1918 | if ( temp_count < 6) { 1919 | continue; 1920 | } 1921 | if (temp_count > max_count) { 1922 | max_count = temp_count; 1923 | const float center_distance = fabs(cross_vec.x * center_point_world.x + cross_vec.y * center_point_world.y + cross_vec.z * center_point_world.z + cross_vec.w); 1924 | strong_dist /= temp_count; 1925 | min_cost = center_distance; 1926 | best_plane = cross_vec; 1927 | has_valid_plane = true; 1928 | use_a_index = a_index; 1929 | use_b_index = b_index; 1930 | use_c_index = c_index; 1931 | } 1932 | else if (temp_count == max_count) { 1933 | const float center_distance = fabs(cross_vec.x * center_point_world.x + cross_vec.y * center_point_world.y + cross_vec.z * center_point_world.z + cross_vec.w); 1934 | if (center_distance < min_cost) { 1935 | max_count = temp_count; 1936 | strong_dist /= temp_count; 1937 | min_cost = center_distance; 1938 | best_plane = cross_vec; 1939 | use_a_index = a_index; 1940 | use_b_index = b_index; 1941 | use_c_index = c_index; 1942 | } 1943 | } 1944 | } 1945 | } 1946 | if (!has_valid_plane) { 1947 | *weak_reliable = 0; 1948 | return; 1949 | } 1950 | float weight[32]; 1951 | for (int i = 0; i < valid_count; ++i) { 1952 | const float3 &temp_point = strong_points_valid_3d[i]; 1953 | float distance = fabs(best_plane.x * temp_point.x + best_plane.y * temp_point.y + best_plane.z * temp_point.z + best_plane.w); 1954 | if (distance / depth_diff >= ransac_threshold) { 1955 | strong_points_valid[i] = make_short2(-1, -1); 1956 | weight[i] = FLT_MAX; 1957 | continue; 1958 | } 1959 | if (i == use_a_index || i == use_b_index || i == use_c_index) { 1960 | distance -= 1; 1961 | } 1962 | weight[i] = distance; 1963 | } 1964 | sort_small_weighted(strong_points_valid, weight, valid_count); 1965 | for (int i = 1; i < NEIGHBOUR_NUM; ++i) { 1966 | neighbours[i] = strong_points_valid[i - 1]; 1967 | } 1968 | *weak_reliable = 1; 1969 | } 1970 | 1971 | __global__ void NeigbourUpdate( 1972 | DataPassHelper *helper 1973 | ) { 1974 | const int2 point = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 1975 | const int width = helper->width; 1976 | const int height = helper->height; 1977 | if (point.x >= width || point.y >= height) { 1978 | return; 1979 | } 1980 | const int center = point.x + point.y * width; 1981 | if (helper->weak_info_cuda[center] != WEAK) { 1982 | return; 1983 | } 1984 | if (helper->weak_reliable_cuda[center] != 1) { 1985 | helper->weak_info_cuda[center] = UNKNOWN; 1986 | } 1987 | } 1988 | 1989 | 1990 | __global__ void DepthToWeak(DataPassHelper *helper) { 1991 | const int2 point = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 1992 | const int width = helper->width; 1993 | const int height = helper->height; 1994 | if (point.x >= width || point.y >= height) { 1995 | return; 1996 | } 1997 | 1998 | const int min_margin = 6; 1999 | const int center = point.x + point.y * width; 2000 | 2001 | if (point.x < min_margin || point.y < min_margin || point.x >= width - min_margin || point.y >= height - min_margin) { 2002 | helper->weak_info_cuda[center] = UNKNOWN; 2003 | return; 2004 | } 2005 | const auto &image = helper->texture_objects_cuda[0].images[0]; 2006 | const float center_pix = tex2D(image, point.x + 0.5f, point.y + 0.5f); 2007 | 2008 | const Camera *cameras = helper->cameras_cuda; 2009 | const unsigned *selected_views = helper->selected_views_cuda; 2010 | const int num_images = helper->params->num_images; 2011 | const uchar *view_weight = &(helper->view_weight_cuda[MAX_IMAGES * center]); 2012 | float4 origin_plane_hypothesis; 2013 | origin_plane_hypothesis = helper->plane_hypotheses_cuda[center]; 2014 | origin_plane_hypothesis = TransformNormal2RefCam(cameras[0], origin_plane_hypothesis); 2015 | float origin_depth = origin_plane_hypothesis.w; 2016 | if (origin_depth == 0) { 2017 | helper->weak_info_cuda[center] = UNKNOWN; 2018 | return; 2019 | } 2020 | // compute cost now and baseline 2021 | 2022 | float cost_now = 0.0f; 2023 | float base_line = 0; 2024 | int valid_neighbour = 0; 2025 | float weight_normal = 0.0f; 2026 | for (int src_index = 1; src_index < num_images; ++src_index) { 2027 | int view_index = src_index - 1; 2028 | if (isSet(selected_views[center], view_index)) { 2029 | float4 temp_plane_hypothesis = origin_plane_hypothesis; 2030 | temp_plane_hypothesis.w = GetDistance2Origin(cameras[0], point, origin_depth, temp_plane_hypothesis); 2031 | float temp_cost = ComputeBilateralNCCOld(point, src_index, temp_plane_hypothesis, helper); 2032 | if (helper->params->geom_consistency) { 2033 | temp_cost += helper->params->geom_factor * ComputeGeomConsistencyCost(point, src_index, temp_plane_hypothesis, helper); 2034 | } 2035 | cost_now += (temp_cost * view_weight[view_index]); 2036 | weight_normal += view_weight[view_index]; 2037 | float c_dist[3]; 2038 | c_dist[0] = cameras[0].c[0] - cameras[src_index].c[0]; 2039 | c_dist[1] = cameras[0].c[1] - cameras[src_index].c[1]; 2040 | c_dist[2] = cameras[0].c[2] - cameras[src_index].c[2]; 2041 | double temp_val = c_dist[0] * c_dist[0] + c_dist[1] * c_dist[1] + c_dist[2] * c_dist[2]; 2042 | base_line += sqrtf(temp_val); 2043 | valid_neighbour++; 2044 | } 2045 | } 2046 | if (valid_neighbour == 0) { 2047 | helper->weak_info_cuda[center] = UNKNOWN; 2048 | return; 2049 | } 2050 | 2051 | cost_now /= weight_normal; 2052 | base_line /= valid_neighbour; 2053 | 2054 | float disp = cameras[0].K[0] * base_line / origin_depth; 2055 | const int radius = 30; 2056 | const int p_costs_size = 2 * radius + 1; 2057 | float p_costs[p_costs_size]; 2058 | int increment = 1; 2059 | 2060 | for (int p_disp = -radius * increment; p_disp <= radius * increment; p_disp += increment) { 2061 | 2062 | float p_depth = cameras[0].K[0] * base_line / (disp + p_disp); 2063 | if (p_depth < helper->params->depth_min || p_depth > helper->params->depth_max) { 2064 | p_costs[p_disp + radius] = 2.0f; 2065 | continue; 2066 | } 2067 | float4 temp_plane_hypothesis = origin_plane_hypothesis; 2068 | temp_plane_hypothesis.w = GetDistance2Origin(cameras[0], point, p_depth, temp_plane_hypothesis); 2069 | float p_cost = 0.0f; 2070 | for (int src_index = 1; src_index < num_images; ++src_index) { 2071 | int view_index = src_index - 1; 2072 | float temp_cost = 0.0f; 2073 | if (isSet(selected_views[center], view_index)) { 2074 | temp_cost += ComputeBilateralNCCOld(point, src_index, temp_plane_hypothesis, helper); 2075 | if (helper->params->geom_consistency) { 2076 | temp_cost += helper->params->geom_factor * ComputeGeomConsistencyCost(point, src_index, temp_plane_hypothesis, helper); 2077 | } 2078 | p_cost += (temp_cost * view_weight[view_index]); 2079 | } 2080 | } 2081 | p_cost /= weight_normal; 2082 | p_costs[p_disp + radius] = MIN(2.0f, p_cost); 2083 | } 2084 | #ifdef DEBUG_COST_LINE 2085 | { 2086 | float *weak_ncc_cost = &(helper->weak_ncc_cost_cuda[(size_t)center * 61]); 2087 | for (int i = 0; i < p_costs_size; ++i) { 2088 | weak_ncc_cost[i] = p_costs[i]; 2089 | } 2090 | } 2091 | #endif // DEBUG_COST_LINE 2092 | // find peaks 2093 | bool is_peak[p_costs_size]; 2094 | for (int i = 0; i < p_costs_size; ++i) { 2095 | is_peak[i] = false; 2096 | } 2097 | 2098 | int peak_count = 0; 2099 | int min_peak = 0; 2100 | float min_cost = 2.0f; 2101 | for (int i = 2; i < p_costs_size - 2; ++i) { 2102 | if (p_costs[i - 1] > p_costs[i] && p_costs[i + 1] > p_costs[i]) { 2103 | is_peak[i] = true; 2104 | peak_count++; 2105 | if (p_costs[i] < min_cost) { 2106 | min_peak = i; 2107 | min_cost = p_costs[i]; 2108 | } 2109 | } 2110 | } 2111 | 2112 | if (abs(min_peak - radius) > helper->params->weak_peak_radius || p_costs[min_peak] > 0.5f) { 2113 | helper->weak_info_cuda[center] = WEAK; 2114 | return; 2115 | } 2116 | 2117 | if (peak_count == 1) { 2118 | if (p_costs[min_peak] <= 0.15f) { 2119 | helper->weak_info_cuda[center] = STRONG; 2120 | } 2121 | else { 2122 | helper->weak_info_cuda[center] = WEAK; 2123 | } 2124 | return; 2125 | } 2126 | 2127 | float var = 0.0f; 2128 | for (int i = 2; i < p_costs_size - 2; ++i) { 2129 | if (is_peak[i] && i != min_peak) { 2130 | float dist = p_costs[i] - min_cost; 2131 | var += dist * dist; 2132 | 2133 | } 2134 | } 2135 | var = sqrtf(var); 2136 | var /= (peak_count - 1); 2137 | 2138 | if (var > 0.2f) { 2139 | helper->weak_info_cuda[center] = STRONG; 2140 | } else { 2141 | helper->weak_info_cuda[center] = WEAK; 2142 | } 2143 | 2144 | } 2145 | 2146 | __global__ void LocalRefine(DataPassHelper *helper) { 2147 | const int2 point = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 2148 | const int width = helper->width; 2149 | const int height = helper->height; 2150 | if (point.x >= width || point.y >= height) { 2151 | return; 2152 | } 2153 | 2154 | const int center = point.x + point.y * width; 2155 | 2156 | const Camera *cameras = helper->cameras_cuda; 2157 | const unsigned *selected_views = helper->selected_views_cuda; 2158 | const int num_images = helper->params->num_images; 2159 | const uchar *view_weight = &(helper->view_weight_cuda[MAX_IMAGES * center]); 2160 | float4 origin_plane_hypothesis; 2161 | origin_plane_hypothesis = helper->plane_hypotheses_cuda[center]; 2162 | origin_plane_hypothesis = TransformNormal2RefCam(cameras[0], origin_plane_hypothesis); 2163 | float origin_depth = origin_plane_hypothesis.w; 2164 | if (origin_depth == 0) { 2165 | return; 2166 | } 2167 | 2168 | // compute cost now and baseline 2169 | float cost_now = 0.0f; 2170 | float base_line = 0; 2171 | int valid_neighbour = 0; 2172 | float weight_normal = 0.0f; 2173 | for (int src_index = 1; src_index < num_images; ++src_index) { 2174 | int view_index = src_index - 1; 2175 | if (isSet(selected_views[center], view_index)) { 2176 | float4 temp_plane_hypothesis = origin_plane_hypothesis; 2177 | temp_plane_hypothesis.w = GetDistance2Origin(cameras[0], point, origin_depth, temp_plane_hypothesis); 2178 | float temp_cost = ComputeBilateralNCCOld(point, src_index, temp_plane_hypothesis, helper); 2179 | if (helper->params->geom_consistency) { 2180 | temp_cost += helper->params->geom_factor * ComputeGeomConsistencyCost(point, src_index, temp_plane_hypothesis, helper); 2181 | } 2182 | cost_now += (temp_cost * view_weight[view_index]); 2183 | weight_normal += view_weight[view_index]; 2184 | float c_dist[3]; 2185 | c_dist[0] = cameras[0].c[0] - cameras[src_index].c[0]; 2186 | c_dist[1] = cameras[0].c[1] - cameras[src_index].c[1]; 2187 | c_dist[2] = cameras[0].c[2] - cameras[src_index].c[2]; 2188 | double temp_val = c_dist[0] * c_dist[0] + c_dist[1] * c_dist[1] + c_dist[2] * c_dist[2]; 2189 | base_line += sqrtf(temp_val); 2190 | valid_neighbour++; 2191 | } 2192 | } 2193 | 2194 | if (weight_normal == 0 || valid_neighbour == 0) { 2195 | return; 2196 | } 2197 | 2198 | cost_now /= weight_normal; 2199 | base_line /= valid_neighbour; 2200 | 2201 | float disp = cameras[0].K[0] * base_line / origin_depth; 2202 | const int radius = 5; 2203 | 2204 | float min_cost = 2.0f; 2205 | float best_depth = origin_depth; 2206 | for (int p_disp = -radius ; p_disp <= radius; ++p_disp) { 2207 | float p_depth = cameras[0].K[0] * base_line / (disp + p_disp); 2208 | if (p_depth < helper->params->depth_min || p_depth > helper->params->depth_max) { 2209 | continue; 2210 | } 2211 | float4 temp_plane_hypothesis = origin_plane_hypothesis; 2212 | temp_plane_hypothesis.w = GetDistance2Origin(cameras[0], point, p_depth, temp_plane_hypothesis); 2213 | float temp_cost = 0.0f; 2214 | for (int src_index = 1; src_index < num_images; ++src_index) { 2215 | int view_index = src_index - 1; 2216 | if (isSet(selected_views[center], view_index)) { 2217 | temp_cost += (ComputeBilateralNCCOld(point, src_index, temp_plane_hypothesis, helper) * view_weight[view_index]); 2218 | if (helper->params->geom_consistency) { 2219 | temp_cost += (helper->params->geom_factor * ComputeGeomConsistencyCost(point, src_index, temp_plane_hypothesis, helper) * view_weight[view_index]); 2220 | } 2221 | } 2222 | } 2223 | temp_cost /= weight_normal; 2224 | if (temp_cost < min_cost) { 2225 | min_cost = temp_cost; 2226 | best_depth = p_depth; 2227 | } 2228 | } 2229 | if (cost_now - min_cost > 0.1) { 2230 | helper->plane_hypotheses_cuda[center].w = best_depth; 2231 | } 2232 | } 2233 | 2234 | __global__ void FindNearestStrongPoint(DataPassHelper *helper) { 2235 | const int2 point = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 2236 | const int width = helper->width; 2237 | const int height = helper->height; 2238 | if (point.x >= width || point.y >= height) { 2239 | return; 2240 | } 2241 | const uchar *weak_info = helper->weak_info_cuda; 2242 | short2 *weak_nearest_strong = helper->weak_nearest_strong; 2243 | const int center = point.x + point.y * width; 2244 | weak_nearest_strong[center].x = -1; 2245 | weak_nearest_strong[center].y = -1; 2246 | if (weak_info[center] != WEAK) { 2247 | return; 2248 | } 2249 | 2250 | const int radius = 100; // ETH 100 2251 | float min_dist = 255.0f; 2252 | for (int x = -radius; x <= radius; ++x) { 2253 | for (int y = -radius; y <= radius; ++y) { 2254 | const int2 neighbour_pt = make_int2(point.x + x, point.y + y); 2255 | if (neighbour_pt.x < 0 || neighbour_pt.y < 0 || neighbour_pt.x >= width || neighbour_pt.y >= height) { 2256 | continue; 2257 | } 2258 | const int neighbour_center = neighbour_pt.x + neighbour_pt.y * width; 2259 | if (weak_info[neighbour_center] == STRONG) { 2260 | float dist = x * x + y * y; 2261 | dist = sqrtf(dist); 2262 | if (dist < min_dist) { 2263 | min_dist = dist; 2264 | weak_nearest_strong[center].x = neighbour_pt.x; 2265 | weak_nearest_strong[center].y = neighbour_pt.y; 2266 | } 2267 | } 2268 | } 2269 | } 2270 | } 2271 | 2272 | __global__ void RANSACToGetFitPlane(DataPassHelper *helper) { 2273 | const int2 point = make_int2(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); 2274 | const int width = helper->width; 2275 | const int height = helper->height; 2276 | if (point.x >= width || point.y >= height) { 2277 | return; 2278 | } 2279 | const uchar *weak_info = helper->weak_info_cuda; 2280 | const int center = point.x + point.y * width; 2281 | float4 *plane_hypotheses = helper->plane_hypotheses_cuda; 2282 | float4 *fit_plane_hypothese = helper->fit_plane_hypotheses_cuda; 2283 | if (weak_info[center] != WEAK) { 2284 | fit_plane_hypothese[center] = plane_hypotheses[center]; 2285 | return; 2286 | } 2287 | // make sure that the plane is in the ref camera coord 2288 | curandState *rand_state = &(helper->rand_states_cuda[center]); 2289 | const auto &camera = helper->cameras_cuda[0]; 2290 | 2291 | short2 strong_points[NEIGHBOUR_NUM - 1]; 2292 | float3 strong_points_3d[NEIGHBOUR_NUM - 1]; 2293 | int strong_count = 0; 2294 | float X[3]; 2295 | for (int i = 1; i < NEIGHBOUR_NUM; ++i) { 2296 | short2 temp_point = GetNeighbourPoint(point, i, helper); 2297 | if (temp_point.x == -1 || temp_point.y == -1) { 2298 | continue; 2299 | } 2300 | strong_points[strong_count].x = temp_point.x; 2301 | strong_points[strong_count].y = temp_point.y; 2302 | // get 3d point in ref camera coord 2303 | const int temp_center = temp_point.x + temp_point.y * width; 2304 | float depth = ComputeDepthfromPlaneHypothesis(camera, plane_hypotheses[temp_center], make_int2(temp_point.x, temp_point.y)); 2305 | Get3DPoint(camera, strong_points[strong_count], depth, X); 2306 | strong_points_3d[strong_count].x = X[0]; 2307 | strong_points_3d[strong_count].y = X[1]; 2308 | strong_points_3d[strong_count].z = X[2]; 2309 | strong_count++; 2310 | } 2311 | if (strong_count < 3) { 2312 | fit_plane_hypothese[center] = plane_hypotheses[center]; 2313 | return; 2314 | } 2315 | 2316 | int iteration = 50; 2317 | float min_cost = FLT_MAX; 2318 | float4 best_plane; 2319 | bool has_best_plane = false; 2320 | while (iteration--) 2321 | { 2322 | int a_index = curand(rand_state) % strong_count; 2323 | int b_index = curand(rand_state) % strong_count; 2324 | int c_index = curand(rand_state) % strong_count; 2325 | 2326 | if (a_index == b_index || b_index == c_index || a_index == c_index) { 2327 | continue; 2328 | } 2329 | 2330 | if (!PointinTriangle(strong_points[a_index], strong_points[b_index], strong_points[c_index], point)) { 2331 | continue; 2332 | } 2333 | 2334 | const float3 &A = strong_points_3d[a_index]; 2335 | const float3 &B = strong_points_3d[b_index]; 2336 | const float3 &C = strong_points_3d[c_index]; 2337 | 2338 | float3 A_C = make_float3(A.x - C.x, A.y - C.y, A.z - C.z); 2339 | float3 B_C = make_float3(B.x - C.x, B.y - C.y, B.z - C.z); 2340 | 2341 | float4 cross_vec; 2342 | cross_vec.x = A_C.y * B_C.z - B_C.y * A_C.z; 2343 | cross_vec.y = -(A_C.x * B_C.z - B_C.x * A_C.z); 2344 | cross_vec.z = A_C.x * B_C.y - B_C.x * A_C.y; 2345 | if ((cross_vec.x == 0 && cross_vec.y == 0 && cross_vec.z == 0) || isnan(cross_vec.x) || isnan(cross_vec.y) || isnan(cross_vec.z)) { 2346 | continue; 2347 | } 2348 | NormalizeVec3(&cross_vec); 2349 | cross_vec.w = -(cross_vec.x * A.x + cross_vec.y * A.y + cross_vec.z * A.z); 2350 | float temp_cost = 0.0f; 2351 | for (int strong_index = 0; strong_index < strong_count; ++strong_index) { 2352 | if (strong_index == a_index || strong_index == b_index || strong_index == c_index) { 2353 | continue; 2354 | } 2355 | const float3 &temp_point = strong_points_3d[strong_index]; 2356 | float distance = fabs(cross_vec.x * temp_point.x + cross_vec.y * temp_point.y + cross_vec.z * temp_point.z + cross_vec.w); 2357 | temp_cost += distance; 2358 | } 2359 | if (temp_cost < min_cost) { 2360 | min_cost = temp_cost; 2361 | best_plane = cross_vec; 2362 | has_best_plane = true; 2363 | } 2364 | if (min_cost == 0) { 2365 | break; 2366 | } 2367 | } 2368 | if (has_best_plane) { 2369 | float depth = ComputeDepthfromPlaneHypothesis(camera, plane_hypotheses[center], point); 2370 | float4 view_direction = GetViewDirection(camera, point, depth); 2371 | float dot_product = best_plane.x * view_direction.x + best_plane.y * view_direction.y + best_plane.z * view_direction.z; 2372 | if (dot_product > 0) { 2373 | best_plane.x = -best_plane.x; 2374 | best_plane.y = -best_plane.y; 2375 | best_plane.z = -best_plane.z; 2376 | best_plane.w = -best_plane.w; 2377 | } 2378 | 2379 | fit_plane_hypothese[center] = best_plane; 2380 | } 2381 | else { 2382 | fit_plane_hypothese[center] = make_float4(0, 0, 0, 0); 2383 | } 2384 | } 2385 | 2386 | void APD::RunPatchMatch() { 2387 | 2388 | int BLOCK_W = 32; 2389 | int BLOCK_H = (BLOCK_W / 2); 2390 | 2391 | dim3 grid_size_full; 2392 | grid_size_full.x = (width + 16 - 1) / 16; 2393 | grid_size_full.y = (height + 16 - 1) / 16; 2394 | grid_size_full.z = 1; 2395 | dim3 block_size_full; 2396 | block_size_full.x = 16; 2397 | block_size_full.y = 16; 2398 | block_size_full.z = 1; 2399 | 2400 | dim3 grid_size_half; 2401 | grid_size_half.x = (width + BLOCK_W - 1) / BLOCK_W; 2402 | grid_size_half.y = ((height / 2) + BLOCK_H - 1) / BLOCK_H; 2403 | grid_size_half.z = 1; 2404 | dim3 block_size_half; 2405 | block_size_half.x = BLOCK_W; 2406 | block_size_half.y = BLOCK_H; 2407 | block_size_half.z = 1; 2408 | 2409 | InitRandomStates << > >(helper_cuda); 2410 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2411 | 2412 | FindNearestStrongPoint << > >(helper_cuda); 2413 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2414 | 2415 | GenNeighbours << > > (helper_cuda); 2416 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2417 | 2418 | NeigbourUpdate << > > (helper_cuda); 2419 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2420 | 2421 | if (problem.show_medium_result) { // write neighbour for visualization 2422 | #ifdef DEBUG_NEIGHBOUR 2423 | path neighbour_map_path = problem.result_folder / path("neighbour_map.bin"); 2424 | path neighbour_path = problem.result_folder / path("neighbour.bin"); 2425 | WriteBinMat(neighbour_map_path, neighbours_map_host); 2426 | short2 *neighbours_host = new short2[weak_count * NEIGHBOUR_NUM]; 2427 | cudaMemcpy(neighbours_host, neighbours_cuda, sizeof(short2) * weak_count * NEIGHBOUR_NUM, cudaMemcpyDeviceToHost); 2428 | { 2429 | ofstream out(neighbour_path, std::ios_base::binary); 2430 | int neighbour_sample_num = NEIGHBOUR_NUM; 2431 | out.write((char *)&weak_count, sizeof(int)); 2432 | out.write((char *)&neighbour_sample_num, sizeof(int)); 2433 | out.write((char *)neighbours_host, sizeof(short2) * weak_count * NEIGHBOUR_NUM); 2434 | out.close(); 2435 | } 2436 | delete[] neighbours_host; 2437 | #endif // DEBUG_NEIGHBOUR 2438 | } 2439 | std::cout << "Generate neighbours done\n"; 2440 | RandomInitialization << > > (helper_cuda); 2441 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2442 | 2443 | for (int i = 0; i < params_host.max_iterations; ++i) { 2444 | BlackPixelUpdateStrong << > > (i, helper_cuda); 2445 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2446 | RedPixelUpdateStrong << > > (i, helper_cuda); 2447 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2448 | std::cout << "Iteration " << i << " strong done\n"; 2449 | RANSACToGetFitPlane << > > (helper_cuda); 2450 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2451 | std::cout << "Compute normal done\n"; 2452 | BlackPixelUpdateWeak << > > (i, helper_cuda); 2453 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2454 | RedPixelUpdateWeak << > > (i, helper_cuda); 2455 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2456 | std::cout << "Iteration " << i << " -weak- done\n"; 2457 | } 2458 | 2459 | GetDepthandNormal << > > (helper_cuda); 2460 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2461 | 2462 | BlackPixelFilterStrong << > > (helper_cuda); 2463 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2464 | RedPixelFilterStrong << > > (helper_cuda); 2465 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2466 | 2467 | DepthToWeak << > > (helper_cuda); 2468 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2469 | 2470 | LocalRefine << > > (helper_cuda); 2471 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2472 | #ifdef DEBUG_COST_LINE 2473 | { 2474 | // export for test 2475 | float *weak_ncc_cost_host = new float[width * height * 61]; 2476 | cudaMemcpy(weak_ncc_cost_host, weak_ncc_cost_cuda, width * height * sizeof(float) * 61, cudaMemcpyDeviceToHost); 2477 | path weak_ncc_cost_path = problem.result_folder / path("weak_ncc_cost.bin"); 2478 | { 2479 | ofstream out(weak_ncc_cost_path, std::ios_base::binary); 2480 | int p_cost_count = 61; 2481 | out.write((char *)&width, sizeof(int)); 2482 | out.write((char *)&height, sizeof(int)); 2483 | out.write((char *)&p_cost_count, sizeof(int)); 2484 | out.write((char *)weak_ncc_cost_host, sizeof(float) * width * height * p_cost_count); 2485 | out.close(); 2486 | } 2487 | delete[] weak_ncc_cost_host; 2488 | } 2489 | #endif // DEBUG_COST_LINE 2490 | cudaMemcpy(plane_hypotheses_host, plane_hypotheses_cuda, sizeof(float4) * width * height, cudaMemcpyDeviceToHost); 2491 | cudaMemcpy(weak_info_host.ptr(0), weak_info_cuda, width * height * sizeof(uchar), cudaMemcpyDeviceToHost); 2492 | cudaMemcpy(selected_views_host.ptr(0), selected_views_cuda, width * height * sizeof(unsigned int), cudaMemcpyDeviceToHost); 2493 | 2494 | CUDA_SAFE_CALL(cudaDeviceSynchronize()); 2495 | } -------------------------------------------------------------------------------- /APD.h: -------------------------------------------------------------------------------- 1 | #ifndef _APD_H_ 2 | #define _APD_H_ 3 | #include "main.h" 4 | 5 | #define CUDA_SAFE_CALL(error) CudaSafeCall(error, __FILE__, __LINE__) 6 | #define CUDA_CHECK_ERROR() CudaCheckError(__FILE__, __LINE__) 7 | #define M_PI 3.14159265358979323846 8 | 9 | using namespace boost::filesystem; 10 | 11 | void CudaSafeCall(const cudaError_t error, const std::string& file, const int line); 12 | 13 | void CudaCheckError(const char* file, const int line); 14 | 15 | bool ReadBinMat(const path &mat_path, cv::Mat &mat); 16 | 17 | bool WriteBinMat(const path &mat_path, const cv::Mat &mat); 18 | 19 | bool ReadCamera(const path &cam_path, Camera &cam); 20 | 21 | bool ShowDepthMap(const path &depth_path, const cv::Mat& depth, float depth_min, float depth_max); 22 | 23 | bool ShowNormalMap(const path &normal_path, const cv::Mat &normal); 24 | 25 | bool ShowWeakImage(const path &weak_path, const cv::Mat &weak); 26 | 27 | bool ExportPointCloud(const path& point_cloud_path, std::vector& pointcloud); 28 | 29 | std::string ToFormatIndex(int index); 30 | 31 | template 32 | void RescaleMatToTargetSize(const cv::Mat &src, cv::Mat &dst, const cv::Size2i &target_size); 33 | 34 | void RunFusion(const path &dense_folder, const std::vector &problems); 35 | 36 | struct cudaTextureObjects { 37 | cudaTextureObject_t images[MAX_IMAGES]; 38 | }; 39 | 40 | struct DataPassHelper { 41 | int width; 42 | int height; 43 | int ref_index; 44 | cudaTextureObjects *texture_objects_cuda; 45 | cudaTextureObjects *texture_depths_cuda; 46 | Camera *cameras_cuda; 47 | float4 *plane_hypotheses_cuda; 48 | curandState *rand_states_cuda; 49 | unsigned int *selected_views_cuda; 50 | short2 *neighbours_cuda; 51 | int *neighbours_map_cuda; 52 | uchar *weak_info_cuda; 53 | float *costs_cuda; 54 | PatchMatchParams *params; 55 | int2 debug_point; 56 | bool show_ncc_info; 57 | float4* fit_plane_hypotheses_cuda; 58 | uchar* weak_reliable_cuda; 59 | uchar *view_weight_cuda; 60 | short2 *weak_nearest_strong; 61 | #ifdef DEBUG_COST_LINE 62 | float *weak_ncc_cost_cuda; 63 | #endif // DEBUG_COST_LINE 64 | 65 | }; 66 | 67 | class APD { 68 | public: 69 | APD(const Problem &problem); 70 | ~APD(); 71 | 72 | void InuputInitialization(); 73 | void CudaSpaceInitialization(); 74 | void SetDataPassHelperInCuda(); 75 | void RunPatchMatch(); 76 | float4 GetPlaneHypothesis(int r, int c); 77 | cv::Mat GetPixelStates(); 78 | cv::Mat GetSelectedViews(); 79 | int GetWidth(); 80 | int GetHeight(); 81 | float GetDepthMin(); 82 | float GetDepthMax(); 83 | private: 84 | void GenerateWeakFromImage(); 85 | 86 | int num_images; 87 | int width; 88 | int height; 89 | Problem problem; 90 | // ========================= 91 | // image host and cuda 92 | std::vector images; 93 | cudaTextureObjects texture_objects_host; 94 | cudaArray *cuArray[MAX_IMAGES]; 95 | cudaTextureObjects *texture_objects_cuda; 96 | // ========================= 97 | // depth host and cuda 98 | std::vector depths; 99 | cudaTextureObjects texture_depths_host; 100 | cudaArray *cuDepthArray[MAX_IMAGES]; 101 | cudaTextureObjects *texture_depths_cuda; 102 | // ========================= 103 | // camera host and cuda 104 | std::vector cameras; 105 | Camera *cameras_cuda; 106 | // ========================= 107 | // weak info host and cuda 108 | int weak_count; 109 | cv::Mat weak_info_host; 110 | uchar *weak_info_cuda; 111 | uchar *weak_reliable_cuda; 112 | short2 *weak_nearest_strong; 113 | // ========================= 114 | // neighbour host and cuda 115 | short2 *neighbours_cuda; 116 | cv::Mat neighbours_map_host; 117 | int *neigbours_map_cuda; 118 | // ========================= 119 | // plane hypotheses host and cuda 120 | float4 *plane_hypotheses_host; 121 | float4 *plane_hypotheses_cuda; 122 | float4 *fit_plane_hypotheses_cuda; 123 | // ========================= 124 | // cost cuda 125 | float *costs_cuda; 126 | // ========================= 127 | // other var 128 | // params 129 | PatchMatchParams params_host; 130 | PatchMatchParams *params_cuda; 131 | // random states 132 | curandState *rand_states_cuda; 133 | // vis info 134 | cv::Mat selected_views_host; 135 | unsigned int *selected_views_cuda; 136 | // for easy data pass 137 | DataPassHelper helper_host; 138 | DataPassHelper *helper_cuda; 139 | // save view weigth 140 | uchar *view_weight_cuda; 141 | //export for test 142 | #ifdef DEBUG_COST_LINE 143 | float *weak_ncc_cost_cuda; 144 | #endif // DEBUG_COST_LINE 145 | }; 146 | #endif // !_APD_H_ -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 2.8) 2 | project (APD) 3 | 4 | set(Boost_USE_STATIC_LIBS ON) 5 | set(Boost_USE_MULTITHREADED ON) 6 | set(Boost_USE_STATIC_RUNTIME OFF) 7 | 8 | # recommand >=3.3.0 9 | find_package(OpenCV REQUIRED) 10 | # recommand >=1.62.0 11 | find_package(Boost REQUIRED COMPONENTS filesystem system) 12 | # recommand >=10.2. 13 | find_package(CUDA REQUIRED ) 14 | 15 | include_directories( 16 | ${OpenCV_INCLUDE_DIRS} 17 | ${Boost_INCLUDE_DIRS}) 18 | include_directories(.) 19 | 20 | set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-O3 --use_fast_math --maxrregcount=128 --ptxas-options=-v -std=c++11 --compiler-options -Wall -gencode arch=compute_75,code=sm_75) 21 | 22 | if(CMAKE_COMPILER_IS_GNUCXX) 23 | add_definitions(-std=c++11) 24 | add_definitions(-pthread) 25 | add_definitions(-Wall) 26 | add_definitions(-Wextra) 27 | add_definitions(-pedantic) 28 | add_definitions(-Wno-unused-function) 29 | add_definitions(-Wno-switch) 30 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -ffast-math -march=native") 31 | endif() 32 | 33 | cuda_add_executable( 34 | APD 35 | main.h 36 | APD.h 37 | APD.cpp 38 | APD.cu 39 | main.cpp 40 | ) 41 | 42 | target_link_libraries(APD 43 | "${Boost_LIBRARIES}" 44 | "${OpenCV_LIBS}" 45 | ) 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Zhaojie Zeng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # APD-MVS 2 | ## About 3 | 4 | APD-MVS is an MVS method that adopts adaptive patch deformation and an NCC-based matching metric. 5 | 6 | Our paper was accepted by CVPR 2023! 7 | 8 | If you find this project useful for your research, please cite: 9 | 10 | ``` 11 | @InProceedings{Wang_2023_CVPR, 12 | author = {Wang, Yuesong and Zeng, Zhaojie and Guan, Tao and Yang, Wei and Chen, Zhuo and Liu, Wenkai and Xu, Luoyuan and Luo, Yawei}, 13 | title = {Adaptive Patch Deformation for Textureless-Resilient Multi-View Stereo}, 14 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 15 | month = {June}, 16 | year = {2023}, 17 | pages = {1621-1630} 18 | } 19 | ``` 20 | ## Dependencies 21 | 22 | The code has been tested on Ubuntu 18.04 with Nvidia Titan RTX, and you can modify the CMakeList.txt to compile on Windows. 23 | * [Cuda](https://developer.nvidia.cn/zh-cn/cuda-toolkit) >= 10.2 24 | * [OpenCV](https://opencv.org/) >= 3.3.0 25 | * [Boost](https://www.boost.org/) >= 1.62.0 26 | * [cmake](https://cmake.org/) >= 2.8 27 | 28 | **Besides make sure that your [GPU Compute Capability](https://en.wikipedia.org/wiki/CUDA) matches the CMakeList.txt!!!** Otherwise you won't get the depth results! For example, according to [GPU Compute Capability](https://en.wikipedia.org/wiki/CUDA), RTX3080's Compute Capability is 8.6. So you should set the 29 | cuda compilation parameter 'arch=compute_86,code=sm_86' or add a '-gencode arch=compute_86,code=sm_86'. 30 | 31 | ## Usage 32 | ### Compile APD-MVS 33 | 34 | ``` sh 35 | git clone https://github.com/whoiszzj/APD-MVS.git 36 | cd APD-MVS 37 | mkdir build & cd build 38 | cmake .. 39 | make 40 | ``` 41 | ### Prepare Datasets 42 | 43 | #### ETH Dataset 44 | 45 | You may download [train](https://www.eth3d.net/data/multi_view_training_dslr_undistorted.7z) and [test](https://www.eth3d.net/data/multi_view_test_dslr_undistorted.7z) dataset from ETH3D, and use the script [*colmap2mvsnet.py*](./colmap2mvsnet.py) to convert the dataset format(you may refer to [MVSNet](https://github.com/YoYo000/MVSNet#file-formats)). You can use the "scale" option in the script to generate any resolution you need. 46 | 47 | ```python 48 | python colmap2mvsnet.py --dense_folder --save_folder --scale_factor 2 # half resolution 49 | ``` 50 | 51 | #### Tanks & Temples Dataset 52 | 53 | We use the version provided by MVSNet. The dataset can be downloaded from [here](https://drive.google.com/file/d/1YArOJaX9WVLJh4757uE8AEREYkgszrCo/view), and the format is exactly what we need. 54 | 55 | #### Other Dataset 56 | 57 | Such as DTU and BlenderMVS, you may explore them yourself. !!! But remember to modify the [ReadCamera](https://github.com/whoiszzj/APD-MVS/blob/d9f9731235f4db05712024213e32346b6a01f5d6/APD.cpp#L84) function when you test on DTU !!! 58 | 59 | ### Run 60 | 61 | After you prepare the dataset, and you want to run the test for ETH3D/office, you can follow this command line. 62 | 63 | ```bash 64 | ./APD /office 65 | ``` 66 | 67 | The result will be saved in the folder office/APD, and the point cloud is saved as "APD.ply" 68 | 69 | It is very easy to use, and you can modify our code as you need. 70 | 71 | ## Acknowledgements 72 | 73 | This code largely benefits from the following repositories: [ACMM](https://github.com/GhiXu/ACMM.git), [Colmap](https://github.com/colmap/colmap.git). Thanks to their authors for opening the source of their excellent works. 74 | 75 | If you have any question or find some bugs, please leave it in [Issues](https://github.com/whoiszzj/APD-MVS/issues)! Thank you! 76 | -------------------------------------------------------------------------------- /colmap2mvsnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Copyright 2019, Jingyang Zhang and Yao Yao, HKUST. Model reading is provided by COLMAP. 4 | Preprocess script. 5 | View selection is modified according to COLMAP's strategy, Qingshan Xu 6 | """ 7 | 8 | from __future__ import print_function 9 | import collections 10 | import struct 11 | import numpy as np 12 | import multiprocessing as mp 13 | from functools import partial 14 | import os 15 | import argparse 16 | import shutil 17 | import cv2 18 | 19 | #============================ read_model.py ============================# 20 | CameraModel = collections.namedtuple( 21 | "CameraModel", ["model_id", "model_name", "num_params"]) 22 | Camera = collections.namedtuple( 23 | "Camera", ["id", "model", "width", "height", "params"]) 24 | BaseImage = collections.namedtuple( 25 | "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]) 26 | Point3D = collections.namedtuple( 27 | "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"]) 28 | 29 | class Image(BaseImage): 30 | def qvec2rotmat(self): 31 | return qvec2rotmat(self.qvec) 32 | 33 | CAMERA_MODELS = { 34 | CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3), 35 | CameraModel(model_id=1, model_name="PINHOLE", num_params=4), 36 | CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4), 37 | CameraModel(model_id=3, model_name="RADIAL", num_params=5), 38 | CameraModel(model_id=4, model_name="OPENCV", num_params=8), 39 | CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8), 40 | CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12), 41 | CameraModel(model_id=7, model_name="FOV", num_params=5), 42 | CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4), 43 | CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5), 44 | CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12) 45 | } 46 | CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model) \ 47 | for camera_model in CAMERA_MODELS]) 48 | 49 | 50 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"): 51 | """Read and unpack the next bytes from a binary file. 52 | :param fid: 53 | :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc. 54 | :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}. 55 | :param endian_character: Any of {@, =, <, >, !} 56 | :return: Tuple of read and unpacked values. 57 | """ 58 | data = fid.read(num_bytes) 59 | return struct.unpack(endian_character + format_char_sequence, data) 60 | 61 | 62 | def read_cameras_text(path): 63 | """ 64 | see: src/base/reconstruction.cc 65 | void Reconstruction::WriteCamerasText(const std::string& path) 66 | void Reconstruction::ReadCamerasText(const std::string& path) 67 | """ 68 | cameras = {} 69 | with open(path, "r") as fid: 70 | while True: 71 | line = fid.readline() 72 | if not line: 73 | break 74 | line = line.strip() 75 | if len(line) > 0 and line[0] != "#": 76 | elems = line.split() 77 | camera_id = int(elems[0]) 78 | model = elems[1] 79 | width = int(elems[2]) 80 | height = int(elems[3]) 81 | params = np.array(tuple(map(float, elems[4:]))) 82 | cameras[camera_id] = Camera(id=camera_id, model=model, 83 | width=width, height=height, 84 | params=params) 85 | return cameras 86 | 87 | 88 | def read_cameras_binary(path_to_model_file): 89 | """ 90 | see: src/base/reconstruction.cc 91 | void Reconstruction::WriteCamerasBinary(const std::string& path) 92 | void Reconstruction::ReadCamerasBinary(const std::string& path) 93 | """ 94 | cameras = {} 95 | with open(path_to_model_file, "rb") as fid: 96 | num_cameras = read_next_bytes(fid, 8, "Q")[0] 97 | for camera_line_index in range(num_cameras): 98 | camera_properties = read_next_bytes( 99 | fid, num_bytes=24, format_char_sequence="iiQQ") 100 | camera_id = camera_properties[0] 101 | model_id = camera_properties[1] 102 | model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name 103 | width = camera_properties[2] 104 | height = camera_properties[3] 105 | num_params = CAMERA_MODEL_IDS[model_id].num_params 106 | params = read_next_bytes(fid, num_bytes=8*num_params, 107 | format_char_sequence="d"*num_params) 108 | cameras[camera_id] = Camera(id=camera_id, 109 | model=model_name, 110 | width=width, 111 | height=height, 112 | params=np.array(params)) 113 | assert len(cameras) == num_cameras 114 | return cameras 115 | 116 | 117 | def read_images_text(path): 118 | """ 119 | see: src/base/reconstruction.cc 120 | void Reconstruction::ReadImagesText(const std::string& path) 121 | void Reconstruction::WriteImagesText(const std::string& path) 122 | """ 123 | images = {} 124 | with open(path, "r") as fid: 125 | while True: 126 | line = fid.readline() 127 | if not line: 128 | break 129 | line = line.strip() 130 | if len(line) > 0 and line[0] != "#": 131 | elems = line.split() 132 | image_id = int(elems[0]) 133 | qvec = np.array(tuple(map(float, elems[1:5]))) 134 | tvec = np.array(tuple(map(float, elems[5:8]))) 135 | camera_id = int(elems[8]) 136 | image_name = elems[9] 137 | elems = fid.readline().split() 138 | xys = np.column_stack([tuple(map(float, elems[0::3])), 139 | tuple(map(float, elems[1::3]))]) 140 | point3D_ids = np.array(tuple(map(int, elems[2::3]))) 141 | images[image_id] = Image( 142 | id=image_id, qvec=qvec, tvec=tvec, 143 | camera_id=camera_id, name=image_name, 144 | xys=xys, point3D_ids=point3D_ids) 145 | return images 146 | 147 | 148 | def read_images_binary(path_to_model_file): 149 | """ 150 | see: src/base/reconstruction.cc 151 | void Reconstruction::ReadImagesBinary(const std::string& path) 152 | void Reconstruction::WriteImagesBinary(const std::string& path) 153 | """ 154 | images = {} 155 | with open(path_to_model_file, "rb") as fid: 156 | num_reg_images = read_next_bytes(fid, 8, "Q")[0] 157 | for image_index in range(num_reg_images): 158 | binary_image_properties = read_next_bytes( 159 | fid, num_bytes=64, format_char_sequence="idddddddi") 160 | image_id = binary_image_properties[0] 161 | qvec = np.array(binary_image_properties[1:5]) 162 | tvec = np.array(binary_image_properties[5:8]) 163 | camera_id = binary_image_properties[8] 164 | image_name = "" 165 | current_char = read_next_bytes(fid, 1, "c")[0] 166 | while current_char != b"\x00": # look for the ASCII 0 entry 167 | image_name += current_char.decode("utf-8") 168 | current_char = read_next_bytes(fid, 1, "c")[0] 169 | num_points2D = read_next_bytes(fid, num_bytes=8, 170 | format_char_sequence="Q")[0] 171 | x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D, 172 | format_char_sequence="ddq"*num_points2D) 173 | xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])), 174 | tuple(map(float, x_y_id_s[1::3]))]) 175 | point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3]))) 176 | images[image_id] = Image( 177 | id=image_id, qvec=qvec, tvec=tvec, 178 | camera_id=camera_id, name=image_name, 179 | xys=xys, point3D_ids=point3D_ids) 180 | return images 181 | 182 | 183 | def read_points3D_text(path): 184 | """ 185 | see: src/base/reconstruction.cc 186 | void Reconstruction::ReadPoints3DText(const std::string& path) 187 | void Reconstruction::WritePoints3DText(const std::string& path) 188 | """ 189 | points3D = {} 190 | with open(path, "r") as fid: 191 | while True: 192 | line = fid.readline() 193 | if not line: 194 | break 195 | line = line.strip() 196 | if len(line) > 0 and line[0] != "#": 197 | elems = line.split() 198 | point3D_id = int(elems[0]) 199 | xyz = np.array(tuple(map(float, elems[1:4]))) 200 | rgb = np.array(tuple(map(int, elems[4:7]))) 201 | error = float(elems[7]) 202 | image_ids = np.array(tuple(map(int, elems[8::2]))) 203 | point2D_idxs = np.array(tuple(map(int, elems[9::2]))) 204 | points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb, 205 | error=error, image_ids=image_ids, 206 | point2D_idxs=point2D_idxs) 207 | return points3D 208 | 209 | 210 | def read_points3d_binary(path_to_model_file): 211 | """ 212 | see: src/base/reconstruction.cc 213 | void Reconstruction::ReadPoints3DBinary(const std::string& path) 214 | void Reconstruction::WritePoints3DBinary(const std::string& path) 215 | """ 216 | points3D = {} 217 | with open(path_to_model_file, "rb") as fid: 218 | num_points = read_next_bytes(fid, 8, "Q")[0] 219 | for point_line_index in range(num_points): 220 | binary_point_line_properties = read_next_bytes( 221 | fid, num_bytes=43, format_char_sequence="QdddBBBd") 222 | point3D_id = binary_point_line_properties[0] 223 | xyz = np.array(binary_point_line_properties[1:4]) 224 | rgb = np.array(binary_point_line_properties[4:7]) 225 | error = np.array(binary_point_line_properties[7]) 226 | track_length = read_next_bytes( 227 | fid, num_bytes=8, format_char_sequence="Q")[0] 228 | track_elems = read_next_bytes( 229 | fid, num_bytes=8*track_length, 230 | format_char_sequence="ii"*track_length) 231 | image_ids = np.array(tuple(map(int, track_elems[0::2]))) 232 | point2D_idxs = np.array(tuple(map(int, track_elems[1::2]))) 233 | points3D[point3D_id] = Point3D( 234 | id=point3D_id, xyz=xyz, rgb=rgb, 235 | error=error, image_ids=image_ids, 236 | point2D_idxs=point2D_idxs) 237 | return points3D 238 | 239 | 240 | def read_model(path, ext): 241 | if ext == ".txt": 242 | cameras = read_cameras_text(os.path.join(path, "cameras" + ext)) 243 | images = read_images_text(os.path.join(path, "images" + ext)) 244 | points3D = read_points3D_text(os.path.join(path, "points3D") + ext) 245 | else: 246 | cameras = read_cameras_binary(os.path.join(path, "cameras" + ext)) 247 | images = read_images_binary(os.path.join(path, "images" + ext)) 248 | points3D = read_points3d_binary(os.path.join(path, "points3D") + ext) 249 | return cameras, images, points3D 250 | 251 | 252 | def qvec2rotmat(qvec): 253 | return np.array([ 254 | [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2, 255 | 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], 256 | 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]], 257 | [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], 258 | 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2, 259 | 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]], 260 | [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], 261 | 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], 262 | 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]]) 263 | 264 | 265 | def rotmat2qvec(R): 266 | Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat 267 | K = np.array([ 268 | [Rxx - Ryy - Rzz, 0, 0, 0], 269 | [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0], 270 | [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0], 271 | [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0 272 | eigvals, eigvecs = np.linalg.eigh(K) 273 | qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)] 274 | if qvec[0] < 0: 275 | qvec *= -1 276 | return qvec 277 | 278 | 279 | 280 | def calc_score(inputs, images, points3d, extrinsic, args): 281 | i, j = inputs 282 | id_i = images[i+1].point3D_ids 283 | id_j = images[j+1].point3D_ids 284 | id_intersect = [it for it in id_i if it in id_j] 285 | cam_center_i = -np.matmul(extrinsic[i+1][:3, :3].transpose(), extrinsic[i+1][:3, 3:4])[:, 0] 286 | cam_center_j = -np.matmul(extrinsic[j+1][:3, :3].transpose(), extrinsic[j+1][:3, 3:4])[:, 0] 287 | score = 0 288 | angles = [] 289 | for pid in id_intersect: 290 | if pid == -1: 291 | continue 292 | p = points3d[pid].xyz 293 | theta = (180 / np.pi) * np.arccos(np.dot(cam_center_i - p, cam_center_j - p) / np.linalg.norm(cam_center_i - p) / np.linalg.norm(cam_center_j - p)) # triangulation angle 294 | # score += np.exp(-(theta - args.theta0) * (theta - args.theta0) / (2 * (args.sigma1 if theta <= args.theta0 else args.sigma2) ** 2)) 295 | angles.append(theta) 296 | score += 1 297 | if len(angles) > 0: 298 | angles_sorted = sorted(angles) 299 | triangulationangle = angles_sorted[int(len(angles_sorted) * 0.75)] 300 | if triangulationangle < 1: 301 | score = 0.0 302 | return i, j, score 303 | 304 | def processing_single_scene(args): 305 | 306 | image_dir = os.path.join(args.dense_folder, 'images') 307 | model_dir = os.path.join(args.dense_folder, 'dslr_calibration_undistorted') 308 | cam_dir = os.path.join(args.save_folder, 'cams') 309 | image_converted_dir = os.path.join(args.save_folder, 'images') 310 | 311 | if os.path.exists(image_converted_dir): 312 | print("remove:{}".format(image_converted_dir)) 313 | shutil.rmtree(image_converted_dir) 314 | os.makedirs(image_converted_dir) 315 | if os.path.exists(cam_dir): 316 | print("remove:{}".format(cam_dir)) 317 | shutil.rmtree(cam_dir) 318 | 319 | cameras, images, points3d = read_model(model_dir, args.model_ext) 320 | num_images = len(list(images.items())) 321 | 322 | param_type = { 323 | 'SIMPLE_PINHOLE': ['f', 'cx', 'cy'], 324 | 'PINHOLE': ['fx', 'fy', 'cx', 'cy'], 325 | 'SIMPLE_RADIAL': ['f', 'cx', 'cy', 'k'], 326 | 'SIMPLE_RADIAL_FISHEYE': ['f', 'cx', 'cy', 'k'], 327 | 'RADIAL': ['f', 'cx', 'cy', 'k1', 'k2'], 328 | 'RADIAL_FISHEYE': ['f', 'cx', 'cy', 'k1', 'k2'], 329 | 'OPENCV': ['fx', 'fy', 'cx', 'cy', 'k1', 'k2', 'p1', 'p2'], 330 | 'OPENCV_FISHEYE': ['fx', 'fy', 'cx', 'cy', 'k1', 'k2', 'k3', 'k4'], 331 | 'FULL_OPENCV': ['fx', 'fy', 'cx', 'cy', 'k1', 'k2', 'p1', 'p2', 'k3', 'k4', 'k5', 'k6'], 332 | 'FOV': ['fx', 'fy', 'cx', 'cy', 'omega'], 333 | 'THIN_PRISM_FISHEYE': ['fx', 'fy', 'cx', 'cy', 'k1', 'k2', 'p1', 'p2', 'k3', 'k4', 'sx1', 'sy1'] 334 | } 335 | 336 | scale_factor = args.scale_factor 337 | 338 | # intrinsic 339 | intrinsic = {} 340 | for camera_id, cam in cameras.items(): 341 | params_dict = {key: value for key, value in zip(param_type[cam.model], cam.params)} 342 | if 'f' in param_type[cam.model]: 343 | params_dict['fx'] = params_dict['f'] 344 | params_dict['fy'] = params_dict['f'] 345 | i = np.array([ 346 | [params_dict['fx'] / scale_factor, 0, params_dict['cx'] / scale_factor], 347 | [0, params_dict['fy'] / scale_factor, params_dict['cy'] / scale_factor], 348 | [0, 0, 1] 349 | ]) 350 | intrinsic[camera_id] = i 351 | print('intrinsic\n', intrinsic, end='\n\n') 352 | 353 | new_images = {} 354 | for i, image_id in enumerate(sorted(images.keys())): 355 | new_images[i+1] = images[image_id] 356 | images = new_images 357 | 358 | # extrinsic 359 | extrinsic = {} 360 | for image_id, image in images.items(): 361 | e = np.zeros((4, 4)) 362 | e[:3, :3] = qvec2rotmat(image.qvec) 363 | e[:3, 3] = image.tvec 364 | e[3, 3] = 1 365 | extrinsic[image_id] = e 366 | print('extrinsic[1]\n', extrinsic[1], end='\n\n') 367 | 368 | # depth range and interval 369 | depth_ranges = {} 370 | for i in range(num_images): 371 | zs = [] 372 | for p3d_id in images[i+1].point3D_ids: 373 | if p3d_id == -1: 374 | continue 375 | transformed = np.matmul(extrinsic[i+1], [points3d[p3d_id].xyz[0], points3d[p3d_id].xyz[1], points3d[p3d_id].xyz[2], 1]) 376 | zs.append(np.asscalar(transformed[2])) 377 | depth_min = 0 378 | depth_max = 0 379 | if len(zs) != 0: 380 | zs_sorted = sorted(zs) 381 | # relaxed depth range 382 | depth_min = zs_sorted[int(len(zs) * .01)] * 0.75 383 | depth_max = zs_sorted[int(len(zs) * .99)] * 1.25 384 | 385 | # determine depth number by inverse depth setting, see supplementary material 386 | if args.max_d == 0: 387 | image_int = intrinsic[images[i+1].camera_id] 388 | image_ext = extrinsic[i+1] 389 | image_r = image_ext[0:3, 0:3] 390 | image_t = image_ext[0:3, 3] 391 | p1 = [image_int[0, 2], image_int[1, 2], 1] 392 | p2 = [image_int[0, 2] + 1, image_int[1, 2], 1] 393 | P1 = np.matmul(np.linalg.inv(image_int), p1) * depth_min 394 | P1 = np.matmul(np.linalg.inv(image_r), (P1 - image_t)) 395 | P2 = np.matmul(np.linalg.inv(image_int), p2) * depth_min 396 | P2 = np.matmul(np.linalg.inv(image_r), (P2 - image_t)) 397 | depth_num = (1 / depth_min - 1 / depth_max) / (1 / depth_min - 1 / (depth_min + np.linalg.norm(P2 - P1))) 398 | else: 399 | depth_num = args.max_d 400 | depth_interval = (depth_max - depth_min) / (depth_num - 1) / args.interval_scale 401 | depth_ranges[i+1] = (depth_min, depth_interval, depth_num, depth_max) 402 | print('depth_ranges[1]\n', depth_ranges[1], end='\n\n') 403 | 404 | # view selection 405 | score = np.zeros((len(images), len(images))) 406 | queue = [] 407 | for i in range(len(images)): 408 | for j in range(i + 1, len(images)): 409 | queue.append((i, j)) 410 | 411 | p = mp.Pool(processes=mp.cpu_count()) 412 | func = partial(calc_score, images=images, points3d=points3d, args=args, extrinsic=extrinsic) 413 | result = p.map(func, queue) 414 | for i, j, s in result: 415 | score[i, j] = s 416 | score[j, i] = s 417 | view_sel = [] 418 | num_view = min(20, len(images) - 1) 419 | for i in range(len(images)): 420 | sorted_score = np.argsort(score[i])[::-1] 421 | view_sel.append([(k, score[i, k]) for k in sorted_score[:num_view]]) 422 | print('view_sel[0]\n', view_sel[0], end='\n\n') 423 | 424 | # write 425 | try: 426 | os.makedirs(cam_dir) 427 | except os.error: 428 | print(cam_dir + ' already exist.') 429 | for i in range(num_images): 430 | with open(os.path.join(cam_dir, '%08d_cam.txt' % i), 'w') as f: 431 | f.write('extrinsic\n') 432 | for j in range(4): 433 | for k in range(4): 434 | f.write(str(extrinsic[i+1][j, k]) + ' ') 435 | f.write('\n') 436 | f.write('\nintrinsic\n') 437 | for j in range(3): 438 | for k in range(3): 439 | f.write(str(intrinsic[images[i+1].camera_id][j, k]) + ' ') 440 | f.write('\n') 441 | f.write('\n%f %f %f %f\n' % (depth_ranges[i+1][0], depth_ranges[i+1][1], depth_ranges[i+1][2], depth_ranges[i+1][3])) 442 | with open(os.path.join(args.save_folder, 'pair.txt'), 'w') as f: 443 | f.write('%d\n' % len(images)) 444 | for i, sorted_score in enumerate(view_sel): 445 | f.write('%d\n%d ' % (i, len(sorted_score))) 446 | for image_id, s in sorted_score: 447 | f.write('%d %d ' % (image_id, s)) 448 | f.write('\n') 449 | 450 | max_width = 0 451 | max_height = 0 452 | for i in range(num_images): 453 | img_path = os.path.join(image_dir, images[i + 1].name) 454 | img = cv2.imread(img_path) 455 | if max_height < img.shape[0]: 456 | max_height = img.shape[0] 457 | if max_width < img.shape[1]: 458 | max_width = img.shape[1] 459 | 460 | 461 | #convert to jpg 462 | for i in range(num_images): 463 | img_path = os.path.join(image_dir, images[i + 1].name) 464 | img = cv2.imread(img_path) 465 | pad_width = max_width - img.shape[1] 466 | pad_height = max_height - img.shape[0] 467 | img_pad = np.pad(img, ((0, pad_height), (0, pad_width), (0, 0)), 'constant') 468 | img_pad = cv2.resize(img_pad, (int(img_pad.shape[1] / scale_factor), int(img_pad.shape[0] / scale_factor)), interpolation=cv2.INTER_NEAREST) 469 | cv2.imwrite(os.path.join(image_converted_dir, '%08d.jpg' % i), img_pad) 470 | # if not img_path.endswith(".jpg"): 471 | # 472 | # else: 473 | # shutil.copyfile(os.path.join(image_dir, images[i+1].name), os.path.join(image_converted_dir, '%08d.jpg' % i)) 474 | 475 | 476 | if __name__ == '__main__': 477 | parser = argparse.ArgumentParser(description='Convert colmap camera') 478 | 479 | parser.add_argument('--dense_folder', required=True, type=str, help='dense_folder.') 480 | parser.add_argument('--save_folder', required=True, type=str, help='save_folder.') 481 | 482 | parser.add_argument('--max_d', type=int, default=192) 483 | parser.add_argument('--interval_scale', type=float, default=1) 484 | parser.add_argument('--scale_factor', type=float, default=1) 485 | 486 | parser.add_argument('--theta0', type=float, default=5) 487 | parser.add_argument('--sigma1', type=float, default=1) 488 | parser.add_argument('--sigma2', type=float, default=10) 489 | parser.add_argument('--model_ext', type=str, default=".txt", choices=[".txt", ".bin"], help='sparse model ext') 490 | 491 | args = parser.parse_args() 492 | 493 | os.makedirs(os.path.join(args.save_folder), exist_ok=True) 494 | processing_single_scene(args) 495 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include "main.h" 2 | #include "APD.h" 3 | 4 | using namespace boost::filesystem; 5 | 6 | void GenerateSampleList(const path &dense_folder, std::vector &problems) 7 | { 8 | path cluster_list_path = dense_folder / path("pair.txt"); 9 | problems.clear(); 10 | ifstream file(cluster_list_path); 11 | std::stringstream iss; 12 | std::string line; 13 | 14 | int num_images; 15 | iss.clear(); 16 | std::getline(file, line); 17 | iss.str(line); 18 | iss >> num_images; 19 | 20 | for (int i = 0; i < num_images; ++i) { 21 | Problem problem; 22 | problem.index = i; 23 | problem.src_image_ids.clear(); 24 | iss.clear(); 25 | std::getline(file, line); 26 | iss.str(line); 27 | iss >> problem.ref_image_id; 28 | 29 | problem.dense_folder = dense_folder; 30 | problem.result_folder = dense_folder / path("APD") / path(ToFormatIndex(problem.ref_image_id)); 31 | create_directory(problem.result_folder); 32 | 33 | int num_src_images; 34 | iss.clear(); 35 | std::getline(file, line); 36 | iss.str(line); 37 | iss >> num_src_images; 38 | for (int j = 0; j < num_src_images; ++j) { 39 | int id; 40 | float score; 41 | iss >> id >> score; 42 | if (score <= 0.0f) { 43 | continue; 44 | } 45 | problem.src_image_ids.push_back(id); 46 | } 47 | problems.push_back(problem); 48 | } 49 | } 50 | 51 | bool CheckImages(const std::vector &problems) { 52 | if (problems.size() == 0) { 53 | return false; 54 | } 55 | path image_path = problems[0].dense_folder / path("images") / path(ToFormatIndex(problems[0].ref_image_id) + ".jpg"); 56 | cv::Mat image = cv::imread(image_path.string()); 57 | if (image.empty()) { 58 | return false; 59 | } 60 | const int width = image.cols; 61 | const int height = image.rows; 62 | for (size_t i = 1; i < problems.size(); ++i) { 63 | image_path = problems[i].dense_folder / path("images") / path(ToFormatIndex(problems[i].ref_image_id) + ".jpg"); 64 | image = cv::imread(image_path.string()); 65 | if (image.cols != width || image.rows != height) { 66 | return false; 67 | } 68 | } 69 | return true; 70 | } 71 | 72 | int ComputeRoundNum(const std::vector &problems) { 73 | if (problems.size() == 0) { 74 | return 0; 75 | } 76 | path image_path = problems[0].dense_folder / path("images") / path(ToFormatIndex(problems[0].ref_image_id) + ".jpg"); 77 | cv::Mat image = cv::imread(image_path.string()); 78 | if (image.empty()) { 79 | return 0; 80 | } 81 | int max_size = MAX(image.cols, image.rows); 82 | int round_num = 1; 83 | while (max_size > 1000) { 84 | max_size /= 2; 85 | round_num++; 86 | } 87 | return round_num; 88 | } 89 | 90 | 91 | void ProcessProblem(const Problem &problem) { 92 | std::cout << "Processing image: " << std::setw(8) << std::setfill('0') << problem.ref_image_id << "..." << std::endl; 93 | std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now(); 94 | 95 | APD APD(problem); 96 | APD.InuputInitialization(); 97 | APD.CudaSpaceInitialization(); 98 | APD.SetDataPassHelperInCuda(); 99 | APD.RunPatchMatch(); 100 | 101 | int width = APD.GetWidth(), height = APD.GetHeight(); 102 | cv::Mat depth = cv::Mat(height, width, CV_32FC1); 103 | cv::Mat normal = cv::Mat(height, width, CV_32FC3); 104 | cv::Mat pixel_states = APD.GetPixelStates(); 105 | for (int r = 0; r < height; ++r) { 106 | for (int c = 0; c < width; ++c) { 107 | float4 plane_hypothesis = APD.GetPlaneHypothesis(r, c); 108 | depth.at(r, c) = plane_hypothesis.w; 109 | if (depth.at(r, c) < APD.GetDepthMin() || depth.at(r, c) > APD.GetDepthMax()) { 110 | depth.at(r, c) = 0; 111 | pixel_states.at(r, c) = UNKNOWN; 112 | } 113 | normal.at(r, c) = cv::Vec3f(plane_hypothesis.x, plane_hypothesis.y, plane_hypothesis.z); 114 | } 115 | } 116 | 117 | path depth_path = problem.result_folder / path("depths.dmb"); 118 | WriteBinMat(depth_path, depth); 119 | path normal_path = problem.result_folder / path("normals.dmb"); 120 | WriteBinMat(normal_path, normal); 121 | path weak_path = problem.result_folder / path("weak.bin"); 122 | WriteBinMat(weak_path, pixel_states); 123 | path selected_view_path = problem.result_folder / path("selected_views.bin"); 124 | WriteBinMat(selected_view_path, APD.GetSelectedViews()); 125 | 126 | 127 | if (problem.show_medium_result) { 128 | path depth_img_path = problem.result_folder / path("depth_" + std::to_string(problem.iteration) + ".jpg"); 129 | path normal_img_path = problem.result_folder / path("normal_" + std::to_string(problem.iteration) + ".jpg"); 130 | path weak_img_path = problem.result_folder / path("weak_" + std::to_string(problem.iteration) + ".jpg"); 131 | ShowDepthMap(depth_img_path, depth, APD.GetDepthMin(), APD.GetDepthMax()); 132 | ShowNormalMap(normal_img_path, normal); 133 | ShowWeakImage(weak_img_path, pixel_states); 134 | } 135 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 136 | std::cout << "Processing image: " << std::setw(8) << std::setfill('0') << problem.ref_image_id << " done!" << std::endl; 137 | std::cout << "Cost time: " << std::chrono::duration_cast(end - start).count() << " ms" << std::endl; 138 | } 139 | 140 | int main(int argc, char **argv) { 141 | if (argc < 2) { 142 | std::cerr << "USAGE: APD dense_folder\n"; 143 | return EXIT_FAILURE; 144 | } 145 | path dense_folder(argv[1]); 146 | path output_folder = dense_folder / path("APD"); 147 | create_directory(output_folder); 148 | // set cuda device for multi-gpu machine 149 | int gpu_index = 0; 150 | if (argc == 3) { 151 | gpu_index = std::atoi(argv[2]); 152 | } 153 | cudaSetDevice(gpu_index); 154 | // generate problems 155 | std::vector problems; 156 | GenerateSampleList(dense_folder, problems); 157 | if (!CheckImages(problems)) { 158 | std::cerr << "Images may error, check it!\n"; 159 | return EXIT_FAILURE; 160 | } 161 | int num_images = problems.size(); 162 | std::cout << "There are " << num_images << " problems needed to be processed!" << std::endl; 163 | 164 | int round_num = ComputeRoundNum(problems); 165 | 166 | std::cout << "Round nums: " << round_num << std::endl; 167 | int iteration_index = 0; 168 | for (int i = 0; i < round_num; ++i) { 169 | for (auto &problem : problems) { 170 | { 171 | auto ¶ms = problem.params; 172 | if (i == 0) { 173 | params.state = FIRST_INIT; 174 | params.use_APD = false; 175 | } 176 | else { 177 | params.state = REFINE_INIT; 178 | params.use_APD = true; 179 | params.ransac_threshold = 0.01 - i * 0.00125; 180 | params.rotate_time = MIN(static_cast(std::pow(2, i)), 4); 181 | } 182 | params.geom_consistency = false; 183 | params.max_iterations = 3; 184 | params.weak_peak_radius = 6; 185 | } 186 | problem.iteration = iteration_index; 187 | problem.show_medium_result = true; 188 | problem.scale_size = static_cast(std::pow(2, round_num - 1 - i)); // scale 189 | ProcessProblem(problem); 190 | } 191 | iteration_index++; 192 | for (int j = 0; j < 3; ++j) { 193 | for (auto &problem : problems) { 194 | { 195 | auto ¶ms = problem.params; 196 | params.state = REFINE_ITER; 197 | if (i == 0) { 198 | params.use_APD = false; 199 | } 200 | else { 201 | params.use_APD = true; 202 | params.ransac_threshold = 0.01 - i * 0.00125; 203 | params.rotate_time = MIN(static_cast(std::pow(2, i)), 4); 204 | } 205 | params.geom_consistency = true; 206 | params.max_iterations = 3; 207 | params.weak_peak_radius = MAX(4 - 2 * j, 2); 208 | } 209 | problem.iteration = iteration_index; 210 | problem.show_medium_result = true; 211 | problem.scale_size = static_cast(std::pow(2, round_num - 1 - i)); // scale 212 | ProcessProblem(problem); 213 | } 214 | iteration_index++; 215 | } 216 | std::cout << "Round: " << i << " done\n"; 217 | } 218 | 219 | RunFusion(dense_folder, problems); 220 | {// delete files 221 | for (size_t i = 0; i < problems.size(); ++i) { 222 | const auto &problem = problems[i]; 223 | remove(problem.result_folder / path("weak.bin")); 224 | remove(problem.result_folder / path("depths.dmb")); 225 | remove(problem.result_folder / path("normals.dmb")); 226 | remove(problem.result_folder / path("selected_views.bin")); 227 | //remove(problem.result_folder / path("neighbour.bin")); 228 | //remove(problem.result_folder / path("neighbour_map.bin")); 229 | } 230 | } 231 | std::cout << "All done\n"; 232 | return EXIT_SUCCESS; 233 | } -------------------------------------------------------------------------------- /main.h: -------------------------------------------------------------------------------- 1 | #ifndef _MAIN_H_ 2 | #define _MAIN_H_ 3 | // Includes Opencv 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | // Includes CUDA 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | // Includes STD libs 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | // Includes Boost filesystem 33 | #include 34 | #include 35 | 36 | // Define some const var 37 | #define MAX_IMAGES 32 38 | #define NEIGHBOUR_NUM 9 39 | #define MAX_SEARCH_RADIUS 4096 40 | #define DEBUG_POINT_X 753 41 | #define DEBUG_POINT_Y 259 42 | //#define DEBUG_COST_LINE 43 | //#define DEBUG_NEIGHBOUR 44 | 45 | using namespace boost::filesystem; 46 | 47 | struct Camera { 48 | float K[9]; 49 | float R[9]; 50 | float t[3]; 51 | float c[3]; 52 | int height; 53 | int width; 54 | float depth_min; 55 | float depth_max; 56 | }; 57 | 58 | struct PointList { 59 | float3 coord; 60 | float3 color; 61 | }; 62 | 63 | enum RunState { 64 | FIRST_INIT, 65 | REFINE_INIT, 66 | REFINE_ITER, 67 | }; 68 | 69 | enum PixelState { 70 | WEAK, 71 | STRONG, 72 | UNKNOWN 73 | }; 74 | 75 | struct PatchMatchParams { 76 | int max_iterations = 3; 77 | int num_images = 5; 78 | float sigma_spatial = 5.0f; 79 | float sigma_color = 3.0f; 80 | int top_k = 4; 81 | float depth_min = 0.0f; 82 | float depth_max = 1.0f; 83 | bool geom_consistency = false; 84 | int strong_radius = 5; 85 | int strong_increment = 2; 86 | int weak_radius = 5; 87 | int weak_increment = 5; 88 | bool use_APD = true; 89 | int weak_peak_radius = 2; 90 | int rotate_time = 4; 91 | float ransac_threshold = 0.005; 92 | float geom_factor = 0.2f; 93 | RunState state; 94 | }; 95 | 96 | struct Problem { 97 | int index; 98 | int ref_image_id; 99 | std::vector src_image_ids; 100 | path dense_folder; 101 | path result_folder; 102 | int scale_size = 1; 103 | PatchMatchParams params; 104 | bool show_medium_result = false; 105 | int iteration; 106 | }; 107 | 108 | #endif // !_MAIN_H_ 109 | --------------------------------------------------------------------------------