├── README.md └── guided_path_amisps.cpp /README.md: -------------------------------------------------------------------------------- 1 | # multiple-importance-reweight 2 | Code for SIGGRAPH 2025 (ToG) paper "Multiple Importance Reweighting for Path Guiding" 3 | -------------------------------------------------------------------------------- /guided_path_amisps.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | This file is part of Mitsuba, a physically based rendering system. 3 | 4 | Copyright (c) 2007-2014 by Wenzel Jakob 5 | Copyright (c) 2017 by ETH Zurich, Thomas Mueller. 6 | 7 | Mitsuba is free software; you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License Version 3 9 | as published by the Free Software Foundation. 10 | 11 | Mitsuba is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with this program. If not, see . 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | MTS_NAMESPACE_BEGIN 39 | 40 | double statsSdtreeBuild = 0.0; 41 | double statsSdtreeReset = 0.0; 42 | double statsAMISSdtreeExtra = 0.0; 43 | double statsAMISTimeArchive = 0.0; 44 | double statsAMISTimeSplat = 0.0; 45 | double statsPhaseTimeRendering = 0.0; 46 | double statsPhaseTimeRenderPass = 0.0; 47 | double statsPhaseTimeTotal = 0.0; 48 | double statsPhaseTimeSampleMat = 0.0; 49 | double statsPhaseTimeCommit = 0.0; 50 | double statsPhaseTimeRenderBlockSum = 0.0; 51 | double statsPhaseTimeRenderPostproc = 0.0; 52 | double statsPhaseTimeRenderRecording1 = 0.0; 53 | double statsPhaseTimeRenderRecording = 0.0; 54 | int64_t statsSuperfuseDFSCall = 0; 55 | int64_t statsSuperfusePushdownCall = 0; 56 | int64_t statsResetBFSCall = 0; 57 | int64_t statsCommitCall = 0; 58 | int64_t statsCommitRequestTotal = 0; 59 | int64_t statsImageSamples = 0; 60 | int64_t statsImageSamplesNonzero = 0; 61 | int64_t statsImageSamplesAMIS = 0; 62 | int64_t statsRecordedVertices = 0; 63 | 64 | int g_sampleCount = 0; 65 | int g_passesThisIteration = 0; 66 | float g_selectiveActivateThreshold = 0; 67 | float g_tempParam = 0; // currently not use. You can use it for any experimental purpose. 68 | ref g_sensor; 69 | Point3f g_first_vertex; 70 | 71 | void printMystats() 72 | { 73 | printf("Guided path tracer: Sdtree Build = %.6f\n", statsSdtreeBuild); 74 | printf("Guided path tracer: Sdtree Reset = %.6f\n", statsSdtreeReset); 75 | printf("Guided path tracer: AMIS Sdtree Extra = %.6f\n", statsAMISSdtreeExtra); 76 | printf("Guided path tracer: Sdtree All = %.6f\n", statsSdtreeBuild + statsSdtreeReset + statsAMISSdtreeExtra); 77 | puts(""); 78 | printf("Guided path tracer: AMIS Time Archive = %.6f\n", statsAMISTimeArchive); 79 | printf("Guided path tracer: AMIS Time Splat = %.6f\n", statsAMISTimeSplat); 80 | puts(""); 81 | printf("Guided path tracer: Phase Time Rendering = %.6f\n", statsPhaseTimeRendering); 82 | printf("Guided path tracer: Phase Time RenderPass = %.6f\n", statsPhaseTimeRenderPass); 83 | printf("Guided path tracer: Phase Time SampleMat = %.6f\n", statsPhaseTimeSampleMat); 84 | printf("Guided path tracer: Phase Time Commit = %.6f\n", statsPhaseTimeCommit); 85 | printf("Guided path tracer: Phase Time statsPhaseTimeRenderBlockSum = %.6f\n", statsPhaseTimeRenderBlockSum); 86 | printf("Guided path tracer: Phase Time statsPhaseTimeRenderRecording1 = %.6f\n", statsPhaseTimeRenderRecording1); 87 | printf("Guided path tracer: Phase Time statsPhaseTimeRenderRecording = %.6f\n", statsPhaseTimeRenderRecording); 88 | printf("Guided path tracer: Phase Time RenderPostproc= %.6f\n", statsPhaseTimeRenderPostproc); 89 | puts(""); 90 | printf("Guided path tracer: statsSuperfuseDFSCall = %lld\n", statsSuperfuseDFSCall); 91 | printf("Guided path tracer: statsSuperfusePushdownCall = %lld\n", statsSuperfusePushdownCall); 92 | printf("Guided path tracer: statsResetBFSCall = %lld\n", statsResetBFSCall); 93 | puts(""); 94 | printf("Guided path tracer: statsCommitCall = %lld\n", statsCommitCall); 95 | printf("Guided path tracer: statsCommitRequestTotal = %lld\n", statsCommitRequestTotal); 96 | printf("Guided path tracer: accept rate = %.4f \%\n", statsCommitCall * 100.0f / statsCommitRequestTotal); 97 | puts(""); 98 | printf("Guided path tracer: statsImageSamples = %lld\n", statsImageSamples); 99 | printf("Guided path tracer: statsImageSamplesNonzero = %lld\n", statsImageSamplesNonzero); 100 | printf("Guided path tracer: statsImageSamplesAMIS = %lld\n", statsImageSamplesAMIS); 101 | printf("Guided path tracer: nonzero rate = %.4f \%\n", statsImageSamplesNonzero * 100.0f / statsImageSamples); 102 | printf("Guided path tracer: amis rate = %.4f \%\n", statsImageSamplesAMIS * 100.0f / statsImageSamples); 103 | printf("Guided path tracer: amis rate nonzero = %.4f \%\n", statsImageSamplesAMIS * 100.0f / statsImageSamplesNonzero); 104 | printf("Guided path tracer: statsRecordedVertices = %lld\n", statsRecordedVertices); 105 | printf("Guided path tracer: statsRecordedVertices mem = %.3f\n", statsRecordedVertices * 16.0 / 1048576); 106 | } 107 | 108 | class HDTimer 109 | { 110 | public: 111 | using Unit = std::chrono::nanoseconds; 112 | 113 | HDTimer() 114 | { 115 | start = std::chrono::system_clock::now(); 116 | } 117 | 118 | double value() const 119 | { 120 | auto now = std::chrono::system_clock::now(); 121 | auto duration = std::chrono::duration_cast(now - start); 122 | return (double)duration.count() * 1e-9; 123 | } 124 | 125 | double reset() 126 | { 127 | auto now = std::chrono::system_clock::now(); 128 | auto duration = std::chrono::duration_cast(now - start); 129 | start = now; 130 | return (double)duration.count() * 1e-9; 131 | } 132 | 133 | static std::chrono::system_clock::time_point staticValue() 134 | { 135 | return std::chrono::system_clock::now(); 136 | } 137 | 138 | static double staticDelta(std::chrono::system_clock::time_point t) 139 | { 140 | return std::chrono::duration_cast(std::chrono::system_clock::now() - t).count() * 1e-9; 141 | } 142 | 143 | private: 144 | std::chrono::system_clock::time_point start; 145 | }; 146 | 147 | float computeElapsedSeconds(std::chrono::steady_clock::time_point start) 148 | { 149 | auto current = std::chrono::steady_clock::now(); 150 | auto ms = std::chrono::duration_cast(current - start); 151 | return (float)ms.count() / 1000; 152 | } 153 | 154 | struct RawImageSample 155 | { 156 | std::vector path; 157 | Point2f last_dir; // 2*max_quadtree_depth bits at most actually 158 | Spectrum value; 159 | int iter; // uint8_t actually, neglectable 160 | float original_radiance; // not really needed 161 | 162 | bool operator<(const RawImageSample &rhs) const 163 | { 164 | return original_radiance > rhs.original_radiance; 165 | } 166 | 167 | std::string toString() const 168 | { 169 | } 170 | }; 171 | 172 | class BlobWriter 173 | { 174 | public: 175 | BlobWriter(const std::string &filename) 176 | : f(filename, std::ios::out | std::ios::binary) 177 | { 178 | } 179 | 180 | template 181 | typename std::enable_if::value, BlobWriter &>::type 182 | operator<<(Type Element) 183 | { 184 | Write(&Element, 1); 185 | return *this; 186 | } 187 | 188 | // CAUTION: This function may break down on big-endian architectures. 189 | // The ordering of bytes has to be reverted then. 190 | template 191 | void Write(T *Src, size_t Size) 192 | { 193 | f.write(reinterpret_cast(Src), Size * sizeof(T)); 194 | } 195 | 196 | private: 197 | std::ofstream f; 198 | }; 199 | 200 | static void addToAtomicFloat(std::atomic &var, float val) 201 | { 202 | auto current = var.load(); 203 | while (!var.compare_exchange_weak(current, current + val)) 204 | ; 205 | } 206 | 207 | inline float logistic(float x) 208 | { 209 | return 1 / (1 + std::exp(-x)); 210 | } 211 | 212 | // Implements the stochastic-gradient-based Adam optimizer [Kingma and Ba 2014] 213 | class AdamOptimizer 214 | { 215 | public: 216 | AdamOptimizer(float learningRate, int batchSize = 1, float epsilon = 1e-08f, float beta1 = 0.9f, float beta2 = 0.999f) 217 | { 218 | m_hparams = {learningRate, batchSize, epsilon, beta1, beta2}; 219 | } 220 | 221 | AdamOptimizer &operator=(const AdamOptimizer &arg) 222 | { 223 | m_state = arg.m_state; 224 | m_hparams = arg.m_hparams; 225 | return *this; 226 | } 227 | 228 | AdamOptimizer(const AdamOptimizer &arg) 229 | { 230 | *this = arg; 231 | } 232 | 233 | void append(float gradient, float statisticalWeight) 234 | { 235 | m_state.batchGradient += gradient * statisticalWeight; 236 | m_state.batchAccumulation += statisticalWeight; 237 | 238 | if (m_state.batchAccumulation > m_hparams.batchSize) 239 | { 240 | step(m_state.batchGradient / m_state.batchAccumulation); 241 | 242 | m_state.batchGradient = 0; 243 | m_state.batchAccumulation = 0; 244 | } 245 | } 246 | 247 | void step(float gradient) 248 | { 249 | ++m_state.iter; 250 | 251 | float actualLearningRate = m_hparams.learningRate * std::sqrt(1 - std::pow(m_hparams.beta2, m_state.iter)) / (1 - std::pow(m_hparams.beta1, m_state.iter)); 252 | m_state.firstMoment = m_hparams.beta1 * m_state.firstMoment + (1 - m_hparams.beta1) * gradient; 253 | m_state.secondMoment = m_hparams.beta2 * m_state.secondMoment + (1 - m_hparams.beta2) * gradient * gradient; 254 | m_state.variable -= actualLearningRate * m_state.firstMoment / (std::sqrt(m_state.secondMoment) + m_hparams.epsilon); 255 | 256 | // Clamp the variable to the range [-20, 20] as a safeguard to avoid numerical instability: 257 | // since the sigmoid involves the exponential of the variable, value of -20 or 20 already yield 258 | // in *extremely* small and large results that are pretty much never necessary in practice. 259 | m_state.variable = std::min(std::max(m_state.variable, -20.0f), 20.0f); 260 | } 261 | 262 | float variable() const 263 | { 264 | return m_state.variable; 265 | } 266 | 267 | private: 268 | struct State 269 | { 270 | int iter = 0; 271 | float firstMoment = 0; 272 | float secondMoment = 0; 273 | float variable = 0; 274 | 275 | float batchAccumulation = 0; 276 | float batchGradient = 0; 277 | } m_state; 278 | 279 | struct Hyperparameters 280 | { 281 | float learningRate; 282 | int batchSize; 283 | float epsilon; 284 | float beta1; 285 | float beta2; 286 | } m_hparams; 287 | }; 288 | 289 | enum class ESampleCombination 290 | { 291 | EAMIS, 292 | }; 293 | 294 | enum class EBsdfSamplingFractionLoss 295 | { 296 | ENone, 297 | EKL, 298 | EVariance, 299 | }; 300 | 301 | enum class ESpatialFilter 302 | { 303 | ENearest, 304 | EStochasticBox, 305 | EBox, 306 | }; 307 | 308 | enum class EDirectionalFilter 309 | { 310 | ENearest, 311 | EBox, 312 | }; 313 | 314 | enum class ESampleAllocSeq 315 | { 316 | EDouble, 317 | EUniform, 318 | EHalfdouble, 319 | }; 320 | 321 | class QuadTreeNode 322 | { 323 | public: 324 | QuadTreeNode() 325 | { 326 | m_children = {}; 327 | for (size_t i = 0; i < m_sum.size(); ++i) 328 | { 329 | m_sum[i].store(0, std::memory_order_relaxed); 330 | } 331 | } 332 | 333 | void setSum(int index, float val) 334 | { 335 | m_sum[index].store(val, std::memory_order_relaxed); 336 | } 337 | 338 | float sum(int index) const 339 | { 340 | return m_sum[index].load(std::memory_order_relaxed); 341 | } 342 | 343 | void copyFrom(const QuadTreeNode &arg) 344 | { 345 | for (int i = 0; i < 4; ++i) 346 | { 347 | setSum(i, arg.sum(i)); 348 | m_children[i] = arg.m_children[i]; 349 | } 350 | } 351 | 352 | QuadTreeNode(const QuadTreeNode &arg) 353 | { 354 | copyFrom(arg); 355 | } 356 | 357 | QuadTreeNode &operator=(const QuadTreeNode &arg) 358 | { 359 | copyFrom(arg); 360 | return *this; 361 | } 362 | 363 | void setChild(int idx, uint16_t val) 364 | { 365 | m_children[idx] = val; 366 | } 367 | 368 | uint16_t child(int idx) const 369 | { 370 | return m_children[idx]; 371 | } 372 | 373 | void setSum(float val) 374 | { 375 | for (int i = 0; i < 4; ++i) 376 | { 377 | setSum(i, val); 378 | } 379 | } 380 | 381 | int childIndex(Point2 &p) const 382 | { 383 | int res = 0; 384 | for (int i = 0; i < Point2::dim; ++i) 385 | { 386 | if (p[i] < 0.5f) 387 | { 388 | p[i] *= 2; 389 | } 390 | else 391 | { 392 | p[i] = (p[i] - 0.5f) * 2; 393 | res |= 1 << i; 394 | } 395 | } 396 | 397 | return res; 398 | } 399 | 400 | // Evaluates the directional irradiance *sum density* (i.e. sum / area) at a given location p. 401 | // To obtain radiance, the sum density (result of this function) must be divided 402 | // by the total statistical weight of the estimates that were summed up. 403 | float eval(Point2 &p, const std::vector &nodes) const 404 | { 405 | SAssert(p.x >= 0 && p.x <= 1 && p.y >= 0 && p.y <= 1); 406 | const int index = childIndex(p); 407 | if (isLeaf(index)) 408 | { 409 | return 4 * sum(index); 410 | } 411 | else 412 | { 413 | return 4 * nodes[child(index)].eval(p, nodes); 414 | } 415 | } 416 | 417 | float pdf(Point2 &p, const std::vector &nodes) const 418 | { 419 | SAssert(p.x >= 0 && p.x <= 1 && p.y >= 0 && p.y <= 1); 420 | const int index = childIndex(p); 421 | if (!(sum(index) > 0)) 422 | { 423 | return 0; 424 | } 425 | 426 | const float factor = 4 * sum(index) / (sum(0) + sum(1) + sum(2) + sum(3)); 427 | if (isLeaf(index)) 428 | { 429 | return factor; 430 | } 431 | else 432 | { 433 | return factor * nodes[child(index)].pdf(p, nodes); 434 | } 435 | } 436 | 437 | int depthAt(Point2 &p, const std::vector &nodes) const 438 | { 439 | SAssert(p.x >= 0 && p.x <= 1 && p.y >= 0 && p.y <= 1); 440 | const int index = childIndex(p); 441 | if (isLeaf(index)) 442 | { 443 | return 1; 444 | } 445 | else 446 | { 447 | return 1 + nodes[child(index)].depthAt(p, nodes); 448 | } 449 | } 450 | 451 | Point2 sample(Sampler *sampler, const std::vector &nodes) const 452 | { 453 | int index = 0; 454 | 455 | float topLeft = sum(0); 456 | float topRight = sum(1); 457 | float partial = topLeft + sum(2); 458 | float total = partial + topRight + sum(3); 459 | 460 | // Should only happen when there are numerical instabilities. 461 | if (!(total > 0.0f)) 462 | { 463 | return sampler->next2D(); 464 | } 465 | 466 | float boundary = partial / total; 467 | Point2 origin = Point2{0.0f, 0.0f}; 468 | 469 | float sample = sampler->next1D(); 470 | 471 | if (sample < boundary) 472 | { 473 | SAssert(partial > 0); 474 | sample /= boundary; 475 | boundary = topLeft / partial; 476 | } 477 | else 478 | { 479 | partial = total - partial; 480 | SAssert(partial > 0); 481 | origin.x = 0.5f; 482 | sample = (sample - boundary) / (1.0f - boundary); 483 | boundary = topRight / partial; 484 | index |= 1 << 0; 485 | } 486 | 487 | if (sample < boundary) 488 | { 489 | sample /= boundary; 490 | } 491 | else 492 | { 493 | origin.y = 0.5f; 494 | sample = (sample - boundary) / (1.0f - boundary); 495 | index |= 1 << 1; 496 | } 497 | 498 | if (isLeaf(index)) 499 | { 500 | return origin + 0.5f * sampler->next2D(); 501 | } 502 | else 503 | { 504 | return origin + 0.5f * nodes[child(index)].sample(sampler, nodes); 505 | } 506 | } 507 | 508 | void record(Point2 &p, float irradiance, std::vector &nodes) 509 | { 510 | SAssert(p.x >= 0 && p.x <= 1 && p.y >= 0 && p.y <= 1); 511 | int index = childIndex(p); 512 | 513 | if (isLeaf(index)) 514 | { 515 | addToAtomicFloat(m_sum[index], irradiance); 516 | } 517 | else 518 | { 519 | nodes[child(index)].record(p, irradiance, nodes); 520 | } 521 | } 522 | 523 | float computeOverlappingArea(const Point2 &min1, const Point2 &max1, const Point2 &min2, const Point2 &max2) 524 | { 525 | float lengths[2]; 526 | for (int i = 0; i < 2; ++i) 527 | { 528 | lengths[i] = std::max(std::min(max1[i], max2[i]) - std::max(min1[i], min2[i]), 0.0f); 529 | } 530 | return lengths[0] * lengths[1]; 531 | } 532 | 533 | void record(const Point2 &origin, float size, Point2 nodeOrigin, float nodeSize, float value, std::vector &nodes) 534 | { 535 | float childSize = nodeSize / 2; 536 | for (int i = 0; i < 4; ++i) 537 | { 538 | Point2 childOrigin = nodeOrigin; 539 | if (i & 1) 540 | { 541 | childOrigin[0] += childSize; 542 | } 543 | if (i & 2) 544 | { 545 | childOrigin[1] += childSize; 546 | } 547 | 548 | float w = computeOverlappingArea(origin, origin + Point2(size), childOrigin, childOrigin + Point2(childSize)); 549 | if (w > 0.0f) 550 | { 551 | if (isLeaf(i)) 552 | { 553 | addToAtomicFloat(m_sum[i], value * w); 554 | } 555 | else 556 | { 557 | nodes[child(i)].record(origin, size, childOrigin, childSize, value, nodes); 558 | } 559 | } 560 | } 561 | } 562 | 563 | bool isLeaf(int index) const 564 | { 565 | return child(index) == 0; 566 | } 567 | 568 | // Ensure that each quadtree node's sum of irradiance estimates 569 | // equals that of all its children. 570 | void build(std::vector &nodes) 571 | { 572 | for (int i = 0; i < 4; ++i) 573 | { 574 | // During sampling, all irradiance estimates are accumulated in 575 | // the leaves, so the leaves are built by definition. 576 | if (isLeaf(i)) 577 | { 578 | continue; 579 | } 580 | 581 | QuadTreeNode &c = nodes[child(i)]; 582 | 583 | // Recursively build each child such that their sum becomes valid... 584 | c.build(nodes); 585 | 586 | // ...then sum up the children's sums. 587 | float sum = 0; 588 | for (int j = 0; j < 4; ++j) 589 | { 590 | sum += c.sum(j); 591 | } 592 | setSum(i, sum); 593 | } 594 | } 595 | 596 | private: 597 | std::array, 4> m_sum; 598 | std::array m_children; 599 | }; 600 | 601 | class DTree 602 | { 603 | public: 604 | DTree() 605 | { 606 | m_atomic.sum.store(0, std::memory_order_relaxed); 607 | m_maxDepth = 0; 608 | m_nodes.emplace_back(); 609 | m_nodes.front().setSum(0.0f); 610 | } 611 | 612 | const QuadTreeNode &node(size_t i) const 613 | { 614 | return m_nodes[i]; 615 | } 616 | 617 | float mean() const 618 | { 619 | if (m_atomic.statisticalWeight == 0) 620 | { 621 | return 0; 622 | } 623 | const float factor = 1 / (M_PI * 4 * m_atomic.statisticalWeight); 624 | return factor * m_atomic.sum; 625 | } 626 | 627 | void recordIrradiance(Point2 p, float irradiance, float statisticalWeight, EDirectionalFilter directionalFilter) 628 | { 629 | if (std::isfinite(statisticalWeight) && statisticalWeight > 0) 630 | { 631 | addToAtomicFloat(m_atomic.statisticalWeight, statisticalWeight); 632 | 633 | if (std::isfinite(irradiance) && irradiance > 0) 634 | { 635 | if (directionalFilter == EDirectionalFilter::ENearest) 636 | { 637 | m_nodes[0].record(p, irradiance * statisticalWeight, m_nodes); 638 | } 639 | else 640 | { 641 | int depth = depthAt(p); 642 | float size = std::pow(0.5f, depth); 643 | 644 | Point2 origin = p; 645 | origin.x -= size / 2; 646 | origin.y -= size / 2; 647 | m_nodes[0].record(origin, size, Point2(0.0f), 1.0f, irradiance * statisticalWeight / (size * size), m_nodes); 648 | } 649 | } 650 | } 651 | } 652 | 653 | float pdf(Point2 p) const 654 | { 655 | if (!(mean() > 0)) 656 | { 657 | return 1 / (4 * M_PI); 658 | } 659 | 660 | return m_nodes[0].pdf(p, m_nodes) / (4 * M_PI); 661 | } 662 | 663 | int depthAt(Point2 p) const 664 | { 665 | return m_nodes[0].depthAt(p, m_nodes); 666 | } 667 | 668 | int depth() const 669 | { 670 | return m_maxDepth; 671 | } 672 | 673 | Point2 sample(Sampler *sampler) const 674 | { 675 | if (!(mean() > 0)) 676 | { 677 | return sampler->next2D(); 678 | } 679 | 680 | Point2 res = m_nodes[0].sample(sampler, m_nodes); 681 | 682 | res.x = math::clamp(res.x, 0.0f, 1.0f); 683 | res.y = math::clamp(res.y, 0.0f, 1.0f); 684 | 685 | return res; 686 | } 687 | 688 | size_t numNodes() const 689 | { 690 | return m_nodes.size(); 691 | } 692 | 693 | float statisticalWeight() const 694 | { 695 | return m_atomic.statisticalWeight; 696 | } 697 | 698 | void setStatisticalWeight(float statisticalWeight) 699 | { 700 | m_atomic.statisticalWeight = statisticalWeight; 701 | } 702 | 703 | void reset(const DTree &previousDTree, int newMaxDepth, float subdivisionThreshold) 704 | { 705 | m_atomic = Atomic{}; 706 | m_maxDepth = 0; 707 | m_nodes.clear(); 708 | m_nodes.emplace_back(); 709 | 710 | struct StackNode 711 | { 712 | size_t nodeIndex; 713 | size_t otherNodeIndex; 714 | const DTree *otherDTree; 715 | int depth; 716 | }; 717 | 718 | std::stack nodeIndices; 719 | nodeIndices.push({0, 0, &previousDTree, 1}); 720 | 721 | const float total = previousDTree.m_atomic.sum; 722 | 723 | // Create the topology of the new DTree to be the refined version 724 | // of the previous DTree. Subdivision is recursive if enough energy is there. 725 | while (!nodeIndices.empty()) 726 | { 727 | StackNode sNode = nodeIndices.top(); 728 | nodeIndices.pop(); 729 | 730 | m_maxDepth = std::max(m_maxDepth, sNode.depth); 731 | 732 | for (int i = 0; i < 4; ++i) 733 | { 734 | const QuadTreeNode &otherNode = sNode.otherDTree->m_nodes[sNode.otherNodeIndex]; 735 | const float fraction = total > 0 ? (otherNode.sum(i) / total) : std::pow(0.25f, sNode.depth); 736 | SAssert(fraction <= 1.0f + Epsilon); 737 | 738 | if (sNode.depth < newMaxDepth && fraction > subdivisionThreshold) 739 | { 740 | if (!otherNode.isLeaf(i)) 741 | { 742 | SAssert(sNode.otherDTree == &previousDTree); 743 | nodeIndices.push({m_nodes.size(), otherNode.child(i), &previousDTree, sNode.depth + 1}); 744 | } 745 | else 746 | { 747 | nodeIndices.push({m_nodes.size(), m_nodes.size(), this, sNode.depth + 1}); 748 | } 749 | 750 | m_nodes[sNode.nodeIndex].setChild(i, static_cast(m_nodes.size())); 751 | m_nodes.emplace_back(); 752 | m_nodes.back().setSum(otherNode.sum(i) / 4); 753 | 754 | if (m_nodes.size() > std::numeric_limits::max()) 755 | { 756 | SLog(EWarn, "DTreeWrapper hit maximum children count."); 757 | nodeIndices = std::stack(); 758 | break; 759 | } 760 | } 761 | } 762 | } 763 | 764 | // Uncomment once memory becomes an issue. 765 | // m_nodes.shrink_to_fit(); 766 | 767 | for (auto &node : m_nodes) 768 | { 769 | node.setSum(0); 770 | } 771 | } 772 | 773 | size_t approxMemoryFootprint() const 774 | { 775 | return m_nodes.capacity() * sizeof(QuadTreeNode) + sizeof(*this); 776 | } 777 | 778 | void build() 779 | { 780 | auto &root = m_nodes[0]; 781 | 782 | // Build the quadtree recursively, starting from its root. 783 | root.build(m_nodes); 784 | 785 | // Ensure that the overall sum of irradiance estimates equals 786 | // the sum of irradiance estimates found in the quadtree. 787 | float sum = 0; 788 | for (int i = 0; i < 4; ++i) 789 | { 790 | sum += root.sum(i); 791 | } 792 | m_atomic.sum.store(sum); 793 | } 794 | 795 | private: 796 | std::vector m_nodes; 797 | 798 | struct Atomic 799 | { 800 | Atomic() 801 | { 802 | sum.store(0, std::memory_order_relaxed); 803 | statisticalWeight.store(0, std::memory_order_relaxed); 804 | } 805 | 806 | Atomic(const Atomic &arg) 807 | { 808 | *this = arg; 809 | } 810 | 811 | Atomic &operator=(const Atomic &arg) 812 | { 813 | sum.store(arg.sum.load(std::memory_order_relaxed), std::memory_order_relaxed); 814 | statisticalWeight.store(arg.statisticalWeight.load(std::memory_order_relaxed), std::memory_order_relaxed); 815 | return *this; 816 | } 817 | 818 | std::atomic sum; 819 | std::atomic statisticalWeight; 820 | 821 | } m_atomic; 822 | 823 | int m_maxDepth; 824 | }; 825 | 826 | struct DTreeRecord 827 | { 828 | Vector d; 829 | float radiance, product; 830 | float woPdf, bsdfPdf, dTreePdf; 831 | float statisticalWeight; 832 | bool isDelta; 833 | }; 834 | 835 | Vector canonicalToDir(Point2 p) 836 | { 837 | const float cosTheta = 2 * p.x - 1; 838 | const float phi = 2 * M_PI * p.y; 839 | 840 | const float sinTheta = sqrt(1 - cosTheta * cosTheta); 841 | float sinPhi, cosPhi; 842 | math::sincos(phi, &sinPhi, &cosPhi); 843 | 844 | return {sinTheta * cosPhi, sinTheta * sinPhi, cosTheta}; 845 | } 846 | 847 | Point2 dirToCanonical(const Vector &d) 848 | { 849 | if (!std::isfinite(d.x) || !std::isfinite(d.y) || !std::isfinite(d.z)) 850 | { 851 | return {0, 0}; 852 | } 853 | 854 | const float cosTheta = std::min(std::max(d.z, -1.0f), 1.0f); 855 | float phi = std::atan2(d.y, d.x); 856 | while (phi < 0) 857 | phi += 2.0 * M_PI; 858 | 859 | return {(cosTheta + 1) / 2, phi / (2 * M_PI)}; 860 | } 861 | 862 | struct DTreeWrapper 863 | { 864 | public: 865 | DTreeWrapper() 866 | { 867 | } 868 | 869 | void record(const DTreeRecord &rec, EDirectionalFilter directionalFilter, EBsdfSamplingFractionLoss bsdfSamplingFractionLoss) 870 | { 871 | if (!rec.isDelta) 872 | { 873 | float irradiance = rec.radiance / rec.woPdf; 874 | building.recordIrradiance(dirToCanonical(rec.d), irradiance, rec.statisticalWeight, directionalFilter); 875 | } 876 | 877 | if (bsdfSamplingFractionLoss != EBsdfSamplingFractionLoss::ENone && rec.product > 0) 878 | { 879 | optimizeBsdfSamplingFraction(rec, bsdfSamplingFractionLoss == EBsdfSamplingFractionLoss::EKL ? 1.0f : 2.0f); 880 | } 881 | } 882 | 883 | void build() 884 | { 885 | building.build(); 886 | sampling = building; 887 | } 888 | 889 | void reset(int maxDepth, float subdivisionThreshold) 890 | { 891 | building.reset(sampling, maxDepth, subdivisionThreshold); 892 | } 893 | 894 | Vector sample(Sampler *sampler) const 895 | { 896 | return canonicalToDir(sampling.sample(sampler)); 897 | } 898 | 899 | float pdf(const Vector &dir) const 900 | { 901 | return sampling.pdf(dirToCanonical(dir)); 902 | } 903 | 904 | float pdfHistory(const Vector &dir, int version) const 905 | { 906 | return history[version].pdf(dirToCanonical(dir)); 907 | } 908 | 909 | void archive() 910 | { 911 | history.push_back(sampling); 912 | } 913 | 914 | float diff(const DTreeWrapper &other) const 915 | { 916 | return 0.0f; 917 | } 918 | 919 | int depth() const 920 | { 921 | return sampling.depth(); 922 | } 923 | 924 | size_t numNodes() const 925 | { 926 | return sampling.numNodes(); 927 | } 928 | 929 | float meanRadiance() const 930 | { 931 | return sampling.mean(); 932 | } 933 | 934 | float statisticalWeight() const 935 | { 936 | return sampling.statisticalWeight(); 937 | } 938 | 939 | float statisticalWeightBuilding() const 940 | { 941 | return building.statisticalWeight(); 942 | } 943 | 944 | void setStatisticalWeightBuilding(float statisticalWeight) 945 | { 946 | building.setStatisticalWeight(statisticalWeight); 947 | } 948 | 949 | size_t approxMemoryFootprint() const 950 | { 951 | // our amis only requires one dtree per snode 952 | // return sampling.approxMemoryFootprint(); 953 | size_t ans = 0; 954 | for (auto i: history) 955 | { 956 | ans += i.approxMemoryFootprint(); 957 | } 958 | return ans; 959 | } 960 | 961 | inline float bsdfSamplingFraction(float variable) const 962 | { 963 | return logistic(variable); 964 | } 965 | 966 | inline float dBsdfSamplingFraction_dVariable(float variable) const 967 | { 968 | float fraction = bsdfSamplingFraction(variable); 969 | return fraction * (1 - fraction); 970 | } 971 | 972 | inline float bsdfSamplingFraction() const 973 | { 974 | return bsdfSamplingFraction(bsdfSamplingFractionOptimizer.variable()); 975 | } 976 | 977 | void optimizeBsdfSamplingFraction(const DTreeRecord &rec, float ratioPower) 978 | { 979 | m_lock.lock(); 980 | 981 | // GRADIENT COMPUTATION 982 | float variable = bsdfSamplingFractionOptimizer.variable(); 983 | float samplingFraction = bsdfSamplingFraction(variable); 984 | 985 | // Loss gradient w.r.t. sampling fraction 986 | float mixPdf = samplingFraction * rec.bsdfPdf + (1 - samplingFraction) * rec.dTreePdf; 987 | float ratio = std::pow(rec.product / mixPdf, ratioPower); 988 | float dLoss_dSamplingFraction = -ratio / rec.woPdf * (rec.bsdfPdf - rec.dTreePdf); 989 | 990 | // Chain rule to get loss gradient w.r.t. trainable variable 991 | float dLoss_dVariable = dLoss_dSamplingFraction * dBsdfSamplingFraction_dVariable(variable); 992 | 993 | // We want some regularization such that our parameter does not become too big. 994 | // We use l2 regularization, resulting in the following linear gradient. 995 | float l2RegGradient = 0.01f * variable; 996 | 997 | float lossGradient = l2RegGradient + dLoss_dVariable; 998 | 999 | // ADAM GRADIENT DESCENT 1000 | bsdfSamplingFractionOptimizer.append(lossGradient, rec.statisticalWeight); 1001 | 1002 | m_lock.unlock(); 1003 | } 1004 | 1005 | void dump(BlobWriter &blob, const Point &p, const Vector &size) const 1006 | { 1007 | blob 1008 | << (float)p.x << (float)p.y << (float)p.z 1009 | << (float)size.x << (float)size.y << (float)size.z 1010 | << (float)sampling.mean() << (uint64_t)sampling.statisticalWeight() << (uint64_t)sampling.numNodes(); 1011 | 1012 | for (size_t i = 0; i < sampling.numNodes(); ++i) 1013 | { 1014 | const auto &node = sampling.node(i); 1015 | for (int j = 0; j < 4; ++j) 1016 | { 1017 | blob << (float)node.sum(j) << (uint16_t)node.child(j); 1018 | } 1019 | } 1020 | } 1021 | 1022 | private: 1023 | DTree building; 1024 | DTree sampling; 1025 | std::vector history; 1026 | 1027 | AdamOptimizer bsdfSamplingFractionOptimizer{0.01f}; 1028 | 1029 | class SpinLock 1030 | { 1031 | public: 1032 | SpinLock() 1033 | { 1034 | m_mutex.clear(std::memory_order_release); 1035 | } 1036 | 1037 | SpinLock(const SpinLock &other) { m_mutex.clear(std::memory_order_release); } 1038 | SpinLock &operator=(const SpinLock &other) { return *this; } 1039 | 1040 | void lock() 1041 | { 1042 | while (m_mutex.test_and_set(std::memory_order_acquire)) 1043 | { 1044 | } 1045 | } 1046 | 1047 | void unlock() 1048 | { 1049 | m_mutex.clear(std::memory_order_release); 1050 | } 1051 | 1052 | private: 1053 | std::atomic_flag m_mutex; 1054 | } m_lock; 1055 | }; 1056 | 1057 | struct STreeNode 1058 | { 1059 | STreeNode() 1060 | { 1061 | children = {}; 1062 | isLeaf = true; 1063 | axis = 0; 1064 | } 1065 | 1066 | int childIndex(Point &p) const 1067 | { 1068 | if (p[axis] < 0.5f) 1069 | { 1070 | p[axis] *= 2; 1071 | return 0; 1072 | } 1073 | else 1074 | { 1075 | p[axis] = (p[axis] - 0.5f) * 2; 1076 | return 1; 1077 | } 1078 | } 1079 | 1080 | int nodeIndex(Point &p) const 1081 | { 1082 | return children[childIndex(p)]; 1083 | } 1084 | 1085 | DTreeWrapper *dTreeWrapper(Point &p, Vector &size, std::vector &nodes) 1086 | { 1087 | SAssert(p[axis] >= 0 && p[axis] <= 1); 1088 | if (isLeaf) 1089 | { 1090 | return &dTree; 1091 | } 1092 | else 1093 | { 1094 | size[axis] /= 2; 1095 | return nodes[nodeIndex(p)].dTreeWrapper(p, size, nodes); 1096 | } 1097 | } 1098 | 1099 | const DTreeWrapper *dTreeWrapper() const 1100 | { 1101 | return &dTree; 1102 | } 1103 | 1104 | int depth(Point &p, const std::vector &nodes) const 1105 | { 1106 | SAssert(p[axis] >= 0 && p[axis] <= 1); 1107 | if (isLeaf) 1108 | { 1109 | return 1; 1110 | } 1111 | else 1112 | { 1113 | return 1 + nodes[nodeIndex(p)].depth(p, nodes); 1114 | } 1115 | } 1116 | 1117 | int depth(const std::vector &nodes) const 1118 | { 1119 | int result = 1; 1120 | 1121 | if (!isLeaf) 1122 | { 1123 | for (auto c : children) 1124 | { 1125 | result = std::max(result, 1 + nodes[c].depth(nodes)); 1126 | } 1127 | } 1128 | 1129 | return result; 1130 | } 1131 | 1132 | void forEachLeaf( 1133 | std::function func, 1134 | Point p, Vector size, const std::vector &nodes) const 1135 | { 1136 | 1137 | if (isLeaf) 1138 | { 1139 | func(&dTree, p, size); 1140 | } 1141 | else 1142 | { 1143 | size[axis] /= 2; 1144 | for (int i = 0; i < 2; ++i) 1145 | { 1146 | Point childP = p; 1147 | if (i == 1) 1148 | { 1149 | childP[axis] += size[axis]; 1150 | } 1151 | 1152 | nodes[children[i]].forEachLeaf(func, childP, size, nodes); 1153 | } 1154 | } 1155 | } 1156 | 1157 | float computeOverlappingVolume(const Point &min1, const Point &max1, const Point &min2, const Point &max2) 1158 | { 1159 | float lengths[3]; 1160 | for (int i = 0; i < 3; ++i) 1161 | { 1162 | lengths[i] = std::max(std::min(max1[i], max2[i]) - std::max(min1[i], min2[i]), 0.0f); 1163 | } 1164 | return lengths[0] * lengths[1] * lengths[2]; 1165 | } 1166 | 1167 | void record(const Point &min1, const Point &max1, Point min2, Vector size2, const DTreeRecord &rec, EDirectionalFilter directionalFilter, EBsdfSamplingFractionLoss bsdfSamplingFractionLoss, std::vector &nodes) 1168 | { 1169 | float w = computeOverlappingVolume(min1, max1, min2, min2 + size2); 1170 | if (w > 0) 1171 | { 1172 | if (isLeaf) 1173 | { 1174 | dTree.record({rec.d, rec.radiance, rec.product, rec.woPdf, rec.bsdfPdf, rec.dTreePdf, rec.statisticalWeight * w, rec.isDelta}, directionalFilter, bsdfSamplingFractionLoss); 1175 | } 1176 | else 1177 | { 1178 | size2[axis] /= 2; 1179 | for (int i = 0; i < 2; ++i) 1180 | { 1181 | if (i & 1) 1182 | { 1183 | min2[axis] += size2[axis]; 1184 | } 1185 | 1186 | nodes[children[i]].record(min1, max1, min2, size2, rec, directionalFilter, bsdfSamplingFractionLoss, nodes); 1187 | } 1188 | } 1189 | } 1190 | } 1191 | 1192 | bool isLeaf; 1193 | DTreeWrapper dTree; 1194 | int axis; 1195 | std::array children; 1196 | }; 1197 | 1198 | class STree 1199 | { 1200 | public: 1201 | STree(const AABB &aabb) 1202 | { 1203 | clear(); 1204 | 1205 | m_aabb = aabb; 1206 | 1207 | // Enlarge AABB to turn it into a cube. This has the effect 1208 | // of nicer hierarchical subdivisions. 1209 | Vector size = m_aabb.max - m_aabb.min; 1210 | float maxSize = std::max(std::max(size.x, size.y), size.z); 1211 | m_aabb.max = m_aabb.min + Vector(maxSize); 1212 | } 1213 | 1214 | size_t mem() const 1215 | { 1216 | size_t approxMemoryFootprint = 0; 1217 | for (const auto &node : m_nodes) 1218 | { 1219 | approxMemoryFootprint += node.dTreeWrapper()->approxMemoryFootprint(); 1220 | } 1221 | return approxMemoryFootprint; 1222 | } 1223 | 1224 | void clear() 1225 | { 1226 | m_nodes.clear(); 1227 | m_nodes.emplace_back(); 1228 | } 1229 | 1230 | void subdivideAll() 1231 | { 1232 | int nNodes = (int)m_nodes.size(); 1233 | for (int i = 0; i < nNodes; ++i) 1234 | { 1235 | if (m_nodes[i].isLeaf) 1236 | { 1237 | subdivide(i, m_nodes); 1238 | } 1239 | } 1240 | } 1241 | 1242 | void subdivide(int nodeIdx, std::vector &nodes) 1243 | { 1244 | // Add 2 child nodes 1245 | nodes.resize(nodes.size() + 2); 1246 | 1247 | if (nodes.size() > std::numeric_limits::max()) 1248 | { 1249 | SLog(EWarn, "DTreeWrapper hit maximum children count."); 1250 | return; 1251 | } 1252 | 1253 | STreeNode &cur = nodes[nodeIdx]; 1254 | for (int i = 0; i < 2; ++i) 1255 | { 1256 | uint32_t idx = (uint32_t)nodes.size() - 2 + i; 1257 | cur.children[i] = idx; 1258 | nodes[idx].axis = (cur.axis + 1) % 3; 1259 | nodes[idx].dTree = cur.dTree; 1260 | nodes[idx].dTree.setStatisticalWeightBuilding(nodes[idx].dTree.statisticalWeightBuilding() / 2); 1261 | } 1262 | cur.isLeaf = false; 1263 | cur.dTree = {}; // Reset to an empty dtree to save memory. 1264 | } 1265 | 1266 | DTreeWrapper *dTreeWrapper(Point p, Vector &size) 1267 | { 1268 | size = m_aabb.getExtents(); 1269 | p = Point(p - m_aabb.min); 1270 | p.x /= size.x; 1271 | p.y /= size.y; 1272 | p.z /= size.z; 1273 | 1274 | return m_nodes[0].dTreeWrapper(p, size, m_nodes); 1275 | } 1276 | 1277 | DTreeWrapper *dTreeWrapper(Point p) 1278 | { 1279 | Vector size; 1280 | return dTreeWrapper(p, size); 1281 | } 1282 | 1283 | void forEachDTreeWrapperConst(std::function func) const 1284 | { 1285 | for (auto &node : m_nodes) 1286 | { 1287 | if (node.isLeaf) 1288 | { 1289 | func(&node.dTree); 1290 | } 1291 | } 1292 | } 1293 | 1294 | void forEachDTreeWrapperConstP(std::function func) const 1295 | { 1296 | m_nodes[0].forEachLeaf(func, m_aabb.min, m_aabb.max - m_aabb.min, m_nodes); 1297 | } 1298 | 1299 | void forEachDTreeWrapperParallel(std::function func) 1300 | { 1301 | int nDTreeWrappers = static_cast(m_nodes.size()); 1302 | 1303 | #pragma omp parallel for 1304 | for (int i = 0; i < nDTreeWrappers; ++i) 1305 | { 1306 | if (m_nodes[i].isLeaf) 1307 | { 1308 | func(&m_nodes[i].dTree); 1309 | } 1310 | } 1311 | } 1312 | 1313 | void record(const Point &p, const Vector &dTreeVoxelSize, DTreeRecord rec, EDirectionalFilter directionalFilter, EBsdfSamplingFractionLoss bsdfSamplingFractionLoss) 1314 | { 1315 | float volume = 1; 1316 | for (int i = 0; i < 3; ++i) 1317 | { 1318 | volume *= dTreeVoxelSize[i]; 1319 | } 1320 | 1321 | rec.statisticalWeight /= volume; 1322 | m_nodes[0].record(p - dTreeVoxelSize * 0.5f, p + dTreeVoxelSize * 0.5f, m_aabb.min, m_aabb.getExtents(), rec, directionalFilter, bsdfSamplingFractionLoss, m_nodes); 1323 | } 1324 | 1325 | void dump(BlobWriter &blob) const 1326 | { 1327 | forEachDTreeWrapperConstP([&blob](const DTreeWrapper *dTree, const Point &p, const Vector &size) 1328 | { 1329 | if (dTree->statisticalWeight() > 0) { 1330 | dTree->dump(blob, p, size); 1331 | } }); 1332 | } 1333 | 1334 | bool shallSplit(const STreeNode &node, int depth, size_t samplesRequired) 1335 | { 1336 | return m_nodes.size() < std::numeric_limits::max() - 1 && node.dTree.statisticalWeightBuilding() > samplesRequired; 1337 | } 1338 | 1339 | void refine(size_t sTreeThreshold, int maxMB) 1340 | { 1341 | if (maxMB >= 0) 1342 | { 1343 | size_t approxMemoryFootprint = 0; 1344 | for (const auto &node : m_nodes) 1345 | { 1346 | approxMemoryFootprint += node.dTreeWrapper()->approxMemoryFootprint(); 1347 | } 1348 | 1349 | if (approxMemoryFootprint / 1000000 >= (size_t)maxMB) 1350 | { 1351 | return; 1352 | } 1353 | } 1354 | 1355 | struct StackNode 1356 | { 1357 | size_t index; 1358 | int depth; 1359 | }; 1360 | 1361 | std::stack nodeIndices; 1362 | nodeIndices.push({0, 1}); 1363 | while (!nodeIndices.empty()) 1364 | { 1365 | StackNode sNode = nodeIndices.top(); 1366 | nodeIndices.pop(); 1367 | 1368 | // Subdivide if needed and leaf 1369 | if (m_nodes[sNode.index].isLeaf) 1370 | { 1371 | if (shallSplit(m_nodes[sNode.index], sNode.depth, sTreeThreshold)) 1372 | { 1373 | subdivide((int)sNode.index, m_nodes); 1374 | } 1375 | } 1376 | 1377 | // Add children to stack if we're not 1378 | if (!m_nodes[sNode.index].isLeaf) 1379 | { 1380 | const STreeNode &node = m_nodes[sNode.index]; 1381 | for (int i = 0; i < 2; ++i) 1382 | { 1383 | nodeIndices.push({node.children[i], sNode.depth + 1}); 1384 | } 1385 | } 1386 | } 1387 | 1388 | // Uncomment once memory becomes an issue. 1389 | // m_nodes.shrink_to_fit(); 1390 | } 1391 | 1392 | const AABB &aabb() const 1393 | { 1394 | return m_aabb; 1395 | } 1396 | 1397 | private: 1398 | std::vector m_nodes; 1399 | AABB m_aabb; 1400 | }; 1401 | 1402 | static StatsCounter avgPathLength("Guided path tracer", "Average path length", EAverage); 1403 | 1404 | class GuidedPathTracerAMISPathspace : public MonteCarloIntegrator 1405 | { 1406 | public: 1407 | GuidedPathTracerAMISPathspace(const Properties &props) : MonteCarloIntegrator(props) 1408 | { 1409 | m_neeStr = props.getString("nee", "never"); 1410 | if (m_neeStr == "never") 1411 | { 1412 | m_nee = ENever; 1413 | } 1414 | else if (m_neeStr == "kickstart") 1415 | { 1416 | m_nee = EKickstart; 1417 | } 1418 | else if (m_neeStr == "always") 1419 | { 1420 | m_nee = EAlways; 1421 | } 1422 | else 1423 | { 1424 | Assert(false); 1425 | } 1426 | 1427 | m_sampleCombinationStr = props.getString("sampleCombination", "automatic"); 1428 | if (m_sampleCombinationStr == "amis") 1429 | { 1430 | m_sampleCombination = ESampleCombination::EAMIS; 1431 | } 1432 | else 1433 | { 1434 | Assert(false); 1435 | } 1436 | 1437 | m_spatialFilterStr = props.getString("spatialFilter", "nearest"); 1438 | if (m_spatialFilterStr == "nearest") 1439 | { 1440 | m_spatialFilter = ESpatialFilter::ENearest; 1441 | } 1442 | else if (m_spatialFilterStr == "stochastic") 1443 | { 1444 | m_spatialFilter = ESpatialFilter::EStochasticBox; 1445 | } 1446 | else if (m_spatialFilterStr == "box") 1447 | { 1448 | m_spatialFilter = ESpatialFilter::EBox; 1449 | } 1450 | else 1451 | { 1452 | Assert(false); 1453 | } 1454 | 1455 | m_directionalFilterStr = props.getString("directionalFilter", "nearest"); 1456 | if (m_directionalFilterStr == "nearest") 1457 | { 1458 | m_directionalFilter = EDirectionalFilter::ENearest; 1459 | } 1460 | else if (m_directionalFilterStr == "box") 1461 | { 1462 | m_directionalFilter = EDirectionalFilter::EBox; 1463 | } 1464 | else 1465 | { 1466 | Assert(false); 1467 | } 1468 | 1469 | m_bsdfSamplingFractionLossStr = props.getString("bsdfSamplingFractionLoss", "none"); 1470 | if (m_bsdfSamplingFractionLossStr == "none") 1471 | { 1472 | m_bsdfSamplingFractionLoss = EBsdfSamplingFractionLoss::ENone; 1473 | } 1474 | else if (m_bsdfSamplingFractionLossStr == "kl") 1475 | { 1476 | m_bsdfSamplingFractionLoss = EBsdfSamplingFractionLoss::EKL; 1477 | } 1478 | else if (m_bsdfSamplingFractionLossStr == "var") 1479 | { 1480 | m_bsdfSamplingFractionLoss = EBsdfSamplingFractionLoss::EVariance; 1481 | } 1482 | else 1483 | { 1484 | Assert(false); 1485 | } 1486 | 1487 | m_sdTreeMaxMemory = props.getInteger("sdTreeMaxMemory", -1); 1488 | m_sTreeThreshold = props.getInteger("sTreeThreshold", 4000); 1489 | m_dTreeThreshold = props.getFloat("dTreeThreshold", 0.01f); 1490 | m_bsdfSamplingFraction = props.getFloat("bsdfSamplingFraction", 0.5f); 1491 | m_sppPerPass = props.getInteger("sppPerPass", 4); 1492 | 1493 | m_budgetStr = props.getString("budgetType", "seconds"); 1494 | if (m_budgetStr == "spp") 1495 | { 1496 | m_budgetType = ESpp; 1497 | } 1498 | else if (m_budgetStr == "seconds") 1499 | { 1500 | m_budgetType = ESeconds; 1501 | } 1502 | else 1503 | { 1504 | Assert(false); 1505 | } 1506 | 1507 | m_sampleAllocSeqStr = props.getString("sampleAllocSeq", "double"); 1508 | if (m_sampleAllocSeqStr == "double") 1509 | { 1510 | m_sampleAllocSeq = ESampleAllocSeq::EDouble; 1511 | } 1512 | else if (m_sampleAllocSeqStr == "halfdouble") 1513 | { 1514 | m_sampleAllocSeq = ESampleAllocSeq::EHalfdouble; 1515 | } 1516 | else if (m_sampleAllocSeqStr == "uniform") 1517 | { 1518 | m_sampleAllocSeq = ESampleAllocSeq::EUniform; 1519 | } 1520 | else 1521 | { 1522 | Assert(false); 1523 | } 1524 | m_budget = props.getFloat("budget", 300.0f); 1525 | 1526 | m_dumpSDTree = props.getBoolean("dumpSDTree", false); 1527 | m_tempParam = props.getFloat("tempParam", 0); 1528 | g_tempParam = m_tempParam; 1529 | } 1530 | 1531 | ref renderPass(Scene *scene, 1532 | RenderQueue *queue, const RenderJob *job, 1533 | int sceneResID, int sensorResID, int samplerResID, int integratorResID) 1534 | { 1535 | 1536 | /* This is a sampling-based integrator - parallelize */ 1537 | ref proc = new BlockedRenderProcess(job, 1538 | queue, scene->getBlockSize()); 1539 | 1540 | proc->disableProgress(); 1541 | 1542 | proc->bindResource("integrator", integratorResID); 1543 | proc->bindResource("scene", sceneResID); 1544 | proc->bindResource("sensor", sensorResID); 1545 | proc->bindResource("sampler", samplerResID); 1546 | 1547 | scene->bindUsedResources(proc); 1548 | bindUsedResources(proc); 1549 | 1550 | return proc; 1551 | } 1552 | 1553 | void resetSDTree() 1554 | { 1555 | Log(EInfo, "Resetting distributions for sampling."); 1556 | 1557 | int iter = m_iter; 1558 | int t_iter = m_iter; 1559 | if (m_sampleAllocSeq == ESampleAllocSeq::EHalfdouble) 1560 | { 1561 | iter = std::max(0, iter - 4); 1562 | } 1563 | HDTimer t1; 1564 | if (t_iter > 0) m_sdTree->forEachDTreeWrapperParallel([this, t_iter](DTreeWrapper *dTree) 1565 | { dTree->archive(); }); 1566 | statsAMISTimeArchive += t1.value(); 1567 | m_sdTree->refine((size_t)(std::sqrt(std::pow(2, iter) * m_sppPerPass / 4) * m_sTreeThreshold), m_sdTreeMaxMemory); 1568 | m_sdTree->forEachDTreeWrapperParallel([this](DTreeWrapper *dTree) 1569 | { dTree->reset(20, m_dTreeThreshold); }); 1570 | } 1571 | 1572 | void buildSDTree() 1573 | { 1574 | Log(EInfo, "Building distributions for sampling."); 1575 | 1576 | // Build distributions 1577 | m_sdTree->forEachDTreeWrapperParallel([](DTreeWrapper *dTree) 1578 | { dTree->build(); }); 1579 | 1580 | // Gather statistics 1581 | int maxDepth = 0; 1582 | int minDepth = std::numeric_limits::max(); 1583 | float avgDepth = 0; 1584 | float maxAvgRadiance = 0; 1585 | float minAvgRadiance = std::numeric_limits::max(); 1586 | float avgAvgRadiance = 0; 1587 | size_t maxNodes = 0; 1588 | size_t minNodes = std::numeric_limits::max(); 1589 | float avgNodes = 0; 1590 | float maxStatisticalWeight = 0; 1591 | float minStatisticalWeight = std::numeric_limits::max(); 1592 | float avgStatisticalWeight = 0; 1593 | 1594 | int nPoints = 0; 1595 | int nPointsNodes = 0; 1596 | 1597 | m_sdTree->forEachDTreeWrapperConst([&](const DTreeWrapper *dTree) 1598 | { 1599 | const int depth = dTree->depth(); 1600 | maxDepth = std::max(maxDepth, depth); 1601 | minDepth = std::min(minDepth, depth); 1602 | avgDepth += depth; 1603 | 1604 | const float avgRadiance = dTree->meanRadiance(); 1605 | maxAvgRadiance = std::max(maxAvgRadiance, avgRadiance); 1606 | minAvgRadiance = std::min(minAvgRadiance, avgRadiance); 1607 | avgAvgRadiance += avgRadiance; 1608 | 1609 | if (dTree->numNodes() > 1) { 1610 | const size_t nodes = dTree->numNodes(); 1611 | maxNodes = std::max(maxNodes, nodes); 1612 | minNodes = std::min(minNodes, nodes); 1613 | avgNodes += nodes; 1614 | ++nPointsNodes; 1615 | } 1616 | 1617 | const float statisticalWeight = dTree->statisticalWeight(); 1618 | maxStatisticalWeight = std::max(maxStatisticalWeight, statisticalWeight); 1619 | minStatisticalWeight = std::min(minStatisticalWeight, statisticalWeight); 1620 | avgStatisticalWeight += statisticalWeight; 1621 | 1622 | ++nPoints; }); 1623 | 1624 | if (nPoints > 0) 1625 | { 1626 | avgDepth /= nPoints; 1627 | avgAvgRadiance /= nPoints; 1628 | 1629 | if (nPointsNodes > 0) 1630 | { 1631 | avgNodes /= nPointsNodes; 1632 | } 1633 | 1634 | avgStatisticalWeight /= nPoints; 1635 | } 1636 | 1637 | Log(EInfo, 1638 | "Distribution statistics:\n" 1639 | " Depth = [%d, %f, %d]\n" 1640 | " Mean radiance = [%f, %f, %f]\n" 1641 | " Node count = [" SIZE_T_FMT ", %f, " SIZE_T_FMT "]\n" 1642 | " Stat. weight = [%f, %f, %f]\n", 1643 | minDepth, avgDepth, maxDepth, 1644 | minAvgRadiance, avgAvgRadiance, maxAvgRadiance, 1645 | minNodes, avgNodes, maxNodes, 1646 | minStatisticalWeight, avgStatisticalWeight, maxStatisticalWeight); 1647 | 1648 | m_isBuilt = true; 1649 | } 1650 | 1651 | void dumpSDTree(Scene *scene, ref sensor) 1652 | { 1653 | std::ostringstream extension; 1654 | extension << "-" << std::setfill('0') << std::setw(2) << m_iter << ".sdt"; 1655 | fs::path path = scene->getDestinationFile(); 1656 | auto cameraMatrix = sensor->getWorldTransform()->eval(0).getMatrix(); 1657 | 1658 | BlobWriter blob(path.string()); 1659 | 1660 | for (int i = 0; i < 4; ++i) 1661 | { 1662 | for (int j = 0; j < 4; ++j) 1663 | { 1664 | blob << (float)cameraMatrix(i, j); 1665 | } 1666 | } 1667 | 1668 | m_sdTree->dump(blob); 1669 | } 1670 | 1671 | bool performRenderPasses(float &variance, int numPasses, Scene *scene, RenderQueue *queue, const RenderJob *job, 1672 | int sceneResID, int sensorResID, int samplerResID, int integratorResID) 1673 | { 1674 | 1675 | ref sched = Scheduler::getInstance(); 1676 | ref sensor = static_cast(sched->getResource(sensorResID)); 1677 | g_sensor = sensor; 1678 | ref film = sensor->getFilm(); 1679 | 1680 | // m_image->clear(); // ! we do not clear to accumulate ? is this necessary now? 1681 | m_squaredImage->clear(); 1682 | 1683 | size_t totalBlocks = 0; 1684 | 1685 | Log(EInfo, "Rendering %d render passes.", numPasses); 1686 | 1687 | int N = numPasses * m_sppPerPass; 1688 | m_sampleCounts.push_back(N); 1689 | 1690 | auto start = std::chrono::steady_clock::now(); 1691 | 1692 | HDTimer timer_phase_renderpass; 1693 | for (int i = 0; i < numPasses; ++i) 1694 | { 1695 | ref process = renderPass(scene, queue, job, sceneResID, sensorResID, samplerResID, integratorResID); 1696 | m_renderProcesses.push_back(process); 1697 | totalBlocks += process->totalBlocks(); 1698 | } 1699 | 1700 | bool result = true; 1701 | int passesRenderedLocal = 0; 1702 | 1703 | static const size_t processBatchSize = 128; 1704 | 1705 | for (size_t i = 0; i < m_renderProcesses.size(); i += processBatchSize) 1706 | { 1707 | const size_t start = i; 1708 | const size_t end = std::min(i + processBatchSize, m_renderProcesses.size()); 1709 | for (size_t j = start; j < end; ++j) 1710 | { 1711 | sched->schedule(m_renderProcesses[j]); 1712 | } 1713 | 1714 | for (size_t j = start; j < end; ++j) 1715 | { 1716 | auto &process = m_renderProcesses[j]; 1717 | sched->wait(process); 1718 | 1719 | ++m_passesRendered; 1720 | ++m_passesRenderedThisIter; 1721 | ++passesRenderedLocal; 1722 | 1723 | int progress = 0; 1724 | bool shouldAbort; 1725 | switch (m_budgetType) 1726 | { 1727 | case ESpp: 1728 | progress = m_passesRendered; 1729 | shouldAbort = false; 1730 | break; 1731 | case ESeconds: 1732 | progress = (int)computeElapsedSeconds(m_startTime); 1733 | shouldAbort = progress > m_budget; 1734 | break; 1735 | default: 1736 | Assert(false); 1737 | break; 1738 | } 1739 | 1740 | m_progress->update(progress); 1741 | 1742 | if (process->getReturnStatus() != ParallelProcess::ESuccess) 1743 | { 1744 | result = false; 1745 | shouldAbort = true; 1746 | } 1747 | 1748 | if (shouldAbort) 1749 | { 1750 | goto l_abort; 1751 | } 1752 | } 1753 | } 1754 | l_abort: 1755 | 1756 | for (auto &process : m_renderProcesses) 1757 | { 1758 | sched->cancel(process); 1759 | } 1760 | 1761 | std::cout << "all mem " << m_sdTree->mem() * 1.0 / 1048576 << "MB" << std::endl; 1762 | m_renderProcesses.clear(); 1763 | 1764 | variance = 0; 1765 | 1766 | float seconds = computeElapsedSeconds(start); 1767 | 1768 | Log(EInfo, "%.2f seconds, Total passes: %d", 1769 | seconds, m_passesRendered); 1770 | 1771 | return result; 1772 | } 1773 | 1774 | bool doNeeWithSpp(int spp) 1775 | { 1776 | switch (m_nee) 1777 | { 1778 | case ENever: 1779 | return false; 1780 | case EKickstart: 1781 | return spp < 128; 1782 | default: 1783 | return true; 1784 | } 1785 | } 1786 | 1787 | bool renderSPP(Scene *scene, RenderQueue *queue, const RenderJob *job, 1788 | int sceneResID, int sensorResID, int samplerResID, int integratorResID) 1789 | { 1790 | memset(len_counts, 0, sizeof(len_counts)); 1791 | 1792 | ref sched = Scheduler::getInstance(); 1793 | 1794 | size_t sampleCount = (size_t)m_budget; 1795 | 1796 | ref sensor = static_cast(sched->getResource(sensorResID)); 1797 | ref film = sensor->getFilm(); 1798 | 1799 | int nPasses = (int)std::ceil(sampleCount / (float)m_sppPerPass); 1800 | sampleCount = m_sppPerPass * nPasses; 1801 | 1802 | g_sampleCount = sampleCount; 1803 | 1804 | bool result = true; 1805 | float currentVarAtEnd = std::numeric_limits::infinity(); 1806 | 1807 | m_progress = std::unique_ptr(new ProgressReporter("Rendering", nPasses, job)); 1808 | 1809 | while (result && m_passesRendered < nPasses) 1810 | { 1811 | HDTimer timer_phase_total; 1812 | const int sppRendered = m_passesRendered * m_sppPerPass; 1813 | m_doNee = doNeeWithSpp(sppRendered); 1814 | 1815 | int remainingPasses = nPasses - m_passesRendered; 1816 | int passesThisIteration = std::min(remainingPasses, 1 << m_iter); // ! this line is modified from the original code release 1817 | 1818 | // If the next iteration does not manage to double the number of passes once more 1819 | // then it would be unwise to throw away the current iteration. Instead, extend 1820 | // the current iteration to the end. 1821 | // This condition can also be interpreted as: the last iteration must always use 1822 | // at _least_ half the total sample budget. 1823 | if (remainingPasses - passesThisIteration < 2 * passesThisIteration) 1824 | { 1825 | passesThisIteration = remainingPasses; 1826 | } 1827 | if (m_sampleAllocSeq == ESampleAllocSeq::EHalfdouble) 1828 | { 1829 | passesThisIteration = 1 << std::max(0, m_iter - 4); 1830 | } 1831 | 1832 | if (m_sampleAllocSeq == ESampleAllocSeq::EUniform) 1833 | { 1834 | passesThisIteration = 1; 1835 | } 1836 | if (remainingPasses - passesThisIteration < 0) 1837 | { 1838 | passesThisIteration = remainingPasses; 1839 | } 1840 | 1841 | Log(EInfo, "ITERATION %d, %d passes", m_iter, passesThisIteration); 1842 | 1843 | g_passesThisIteration = passesThisIteration; 1844 | 1845 | m_isFinalIter = passesThisIteration >= remainingPasses; 1846 | 1847 | film->clear(); 1848 | 1849 | // if ((m_sampleAllocSeq == ESampleAllocSeq::EUniform && (m_iter + 1) == std::pow(2, int(std::log2(m_iter + 1)))) 1850 | // || (m_sampleAllocSeq == ESampleAllocSeq::EHalfdouble && (m_iter == 0 || m_iter % 2 == 1)) 1851 | // || m_sampleAllocSeq == ESampleAllocSeq::EDouble) 1852 | resetSDTree(); 1853 | 1854 | float variance; 1855 | if (!performRenderPasses(variance, passesThisIteration, scene, queue, job, sceneResID, sensorResID, samplerResID, integratorResID)) 1856 | { 1857 | result = false; 1858 | break; 1859 | } 1860 | 1861 | const float lastVarAtEnd = currentVarAtEnd; 1862 | currentVarAtEnd = passesThisIteration * variance / remainingPasses; 1863 | 1864 | Log(EInfo, 1865 | "Extrapolated var:\n" 1866 | " Last: %f\n" 1867 | " Current: %f\n", 1868 | lastVarAtEnd, currentVarAtEnd); 1869 | 1870 | remainingPasses -= passesThisIteration; 1871 | buildSDTree(); 1872 | 1873 | ++m_iter; 1874 | m_passesRenderedThisIter = 0; 1875 | } 1876 | 1877 | return result; 1878 | } 1879 | 1880 | bool renderTime(Scene *scene, RenderQueue *queue, const RenderJob *job, 1881 | int sceneResID, int sensorResID, int samplerResID, int integratorResID) 1882 | { 1883 | std::cout << "not supported" << std::endl; 1884 | std::cerr << "not supported" << std::endl; 1885 | exit(1); 1886 | return false; 1887 | } 1888 | 1889 | bool render(Scene *scene, RenderQueue *queue, const RenderJob *job, 1890 | int sceneResID, int sensorResID, int samplerResID) 1891 | { 1892 | 1893 | m_sdTree = std::shared_ptr(new STree(scene->getAABB())); 1894 | m_iter = 0; 1895 | m_isFinalIter = false; 1896 | 1897 | ref sched = Scheduler::getInstance(); 1898 | 1899 | size_t nCores = sched->getCoreCount(); 1900 | ref sensor = static_cast(sched->getResource(sensorResID)); 1901 | ref film = sensor->getFilm(); 1902 | 1903 | m_film = film; 1904 | 1905 | auto properties = Properties("hdrfilm"); 1906 | properties.setInteger("width", film->getSize().x); 1907 | properties.setInteger("height", film->getSize().y); 1908 | m_varianceBuffer = static_cast(PluginManager::getInstance()->createObject(MTS_CLASS(Film), properties)); 1909 | m_varianceBuffer->setDestinationFile(scene->getDestinationFile(), 0); 1910 | 1911 | m_squaredImage = new ImageBlock(Bitmap::ESpectrumAlphaWeight, film->getCropSize()); 1912 | m_image = new ImageBlock(Bitmap::ESpectrumAlphaWeight, film->getCropSize()); 1913 | 1914 | m_images.clear(); 1915 | m_variances.clear(); 1916 | m_sampleCounts.clear(); 1917 | 1918 | m_amisImage = new ImageBlock(Bitmap::ESpectrumAlphaWeight, film->getCropSize(), film->getReconstructionFilter()); 1919 | m_amisImage->clear(); 1920 | 1921 | Log(EInfo, "Starting render job (%ix%i, " SIZE_T_FMT " %s, " SSE_STR ") ..", film->getCropSize().x, film->getCropSize().y, nCores, nCores == 1 ? "core" : "cores"); 1922 | 1923 | Thread::initializeOpenMP(nCores); 1924 | 1925 | int integratorResID = sched->registerResource(this); 1926 | bool result = true; 1927 | 1928 | m_startTime = std::chrono::steady_clock::now(); 1929 | 1930 | m_passesRendered = 0; 1931 | switch (m_budgetType) 1932 | { 1933 | case ESpp: 1934 | result = renderSPP(scene, queue, job, sceneResID, sensorResID, samplerResID, integratorResID); 1935 | break; 1936 | case ESeconds: 1937 | result = renderTime(scene, queue, job, sceneResID, sensorResID, samplerResID, integratorResID); 1938 | break; 1939 | default: 1940 | Assert(false); 1941 | break; 1942 | } 1943 | 1944 | sched->unregisterResource(integratorResID); 1945 | 1946 | m_progress = nullptr; 1947 | 1948 | fuseImageSamples(film, m_sppPerPass); 1949 | printMystats(); 1950 | 1951 | return result; 1952 | } 1953 | 1954 | void fuseImageSamples(ref film, int sppPerPass) 1955 | { 1956 | Log(EInfo, "fuseImageSamples begin"); 1957 | HDTimer timer; 1958 | amisSplatSamples(); 1959 | amisSplatPostproc(); 1960 | m_sppPerPass = sppPerPass; 1961 | Log(EInfo, "fuseImageSamples end, use %.3f sec", timer.value()); 1962 | } 1963 | 1964 | void renderBlock(const Scene *scene, const Sensor *sensor, 1965 | Sampler *sampler, ImageBlock *block, const bool &stop, 1966 | const std::vector> &points) const 1967 | { 1968 | 1969 | HDTimer timer; 1970 | 1971 | float diffScaleFactor = 1.0f / 1972 | std::sqrt((float)m_sppPerPass); 1973 | 1974 | bool needsApertureSample = sensor->needsApertureSample(); 1975 | bool needsTimeSample = sensor->needsTimeSample(); 1976 | 1977 | RadianceQueryRecord rRec(scene, sampler); 1978 | Point2 apertureSample(0.5f); 1979 | float timeSample = 0.5f; 1980 | RayDifferential sensorRay; 1981 | 1982 | block->clear(); 1983 | 1984 | uint32_t queryType = RadianceQueryRecord::ESensorRay; 1985 | 1986 | if (!sensor->getFilm()->hasAlpha()) // Don't compute an alpha channel if we don't have to 1987 | queryType &= ~RadianceQueryRecord::EOpacity; 1988 | 1989 | for (size_t i = 0; i < points.size(); ++i) 1990 | { 1991 | Point2i offset = Point2i(points[i]) + Vector2i(block->getOffset()); 1992 | if (stop) 1993 | break; 1994 | 1995 | for (int j = 0; j < m_sppPerPass; j++) 1996 | { 1997 | rRec.newQuery(queryType, sensor->getMedium()); 1998 | Point2 samplePos(Point2(offset) + Vector2(rRec.nextSample2D())); 1999 | 2000 | if (needsApertureSample) 2001 | apertureSample = rRec.nextSample2D(); 2002 | if (needsTimeSample) 2003 | timeSample = rRec.nextSample1D(); 2004 | 2005 | Spectrum spec = sensor->sampleRayDifferential( 2006 | sensorRay, samplePos, apertureSample, timeSample); 2007 | 2008 | if (i + j == 0) g_first_vertex = sensorRay.o; 2009 | 2010 | sensorRay.scaleDifferential(diffScaleFactor); 2011 | 2012 | rRec.samplePos = samplePos; 2013 | 2014 | auto L = Li(sensorRay, rRec); 2015 | sampler->advance(); 2016 | } 2017 | } 2018 | 2019 | m_image->put(block); 2020 | } 2021 | 2022 | void cancel() 2023 | { 2024 | const auto &scheduler = Scheduler::getInstance(); 2025 | for (size_t i = 0; i < m_renderProcesses.size(); ++i) 2026 | { 2027 | scheduler->cancel(m_renderProcesses[i]); 2028 | } 2029 | } 2030 | 2031 | Spectrum sampleMat(const BSDF *bsdf, BSDFSamplingRecord &bRec, float &woPdf, float &bsdfPdf, float &dTreePdf, float bsdfSamplingFraction, RadianceQueryRecord &rRec, const DTreeWrapper *dTree) const 2032 | { 2033 | Point2 sample = rRec.nextSample2D(); 2034 | 2035 | auto type = bsdf->getType(); 2036 | if (!m_isBuilt || !dTree || (type & BSDF::EDelta) == (type & BSDF::EAll)) 2037 | { 2038 | auto result = bsdf->sample(bRec, bsdfPdf, sample); 2039 | woPdf = bsdfPdf; 2040 | dTreePdf = 0; 2041 | return result; 2042 | } 2043 | 2044 | Spectrum result; 2045 | if (sample.x < bsdfSamplingFraction) 2046 | { 2047 | sample.x /= bsdfSamplingFraction; 2048 | result = bsdf->sample(bRec, bsdfPdf, sample); 2049 | if (result.isZero()) 2050 | { 2051 | woPdf = bsdfPdf = dTreePdf = 0; 2052 | return Spectrum{0.0f}; 2053 | } 2054 | 2055 | // If we sampled a delta component, then we have a 0 probability 2056 | // of sampling that direction via guiding, thus we can return early. 2057 | if (bRec.sampledType & BSDF::EDelta) 2058 | { 2059 | dTreePdf = 0; 2060 | woPdf = bsdfPdf * bsdfSamplingFraction; 2061 | return result / bsdfSamplingFraction; 2062 | } 2063 | 2064 | result *= bsdfPdf; 2065 | } 2066 | else 2067 | { 2068 | sample.x = (sample.x - bsdfSamplingFraction) / (1 - bsdfSamplingFraction); 2069 | bRec.wo = bRec.its.toLocal(dTree->sample(rRec.sampler)); 2070 | result = bsdf->eval(bRec); 2071 | } 2072 | 2073 | pdfMat(woPdf, bsdfPdf, dTreePdf, bsdfSamplingFraction, bsdf, bRec, dTree); 2074 | if (woPdf == 0) 2075 | { 2076 | return Spectrum{0.0f}; 2077 | } 2078 | 2079 | return result / woPdf; 2080 | } 2081 | 2082 | void pdfMat(float &woPdf, float &bsdfPdf, float &dTreePdf, float bsdfSamplingFraction, const BSDF *bsdf, const BSDFSamplingRecord &bRec, const DTreeWrapper *dTree) const 2083 | { 2084 | dTreePdf = 0; 2085 | 2086 | auto type = bsdf->getType(); 2087 | if (!m_isBuilt || !dTree || (type & BSDF::EDelta) == (type & BSDF::EAll)) 2088 | { 2089 | woPdf = bsdfPdf = bsdf->pdf(bRec); 2090 | return; 2091 | } 2092 | 2093 | bsdfPdf = bsdf->pdf(bRec); 2094 | if (!std::isfinite(bsdfPdf)) 2095 | { 2096 | woPdf = 0; 2097 | return; 2098 | } 2099 | 2100 | dTreePdf = dTree->pdf(bRec.its.toWorld(bRec.wo)); 2101 | woPdf = bsdfSamplingFraction * bsdfPdf + (1 - bsdfSamplingFraction) * dTreePdf; 2102 | } 2103 | 2104 | struct Vertex 2105 | { 2106 | DTreeWrapper *dTree; 2107 | Vector dTreeVoxelSize; 2108 | Ray ray; 2109 | 2110 | Spectrum throughput; 2111 | Spectrum bsdfVal; 2112 | 2113 | Spectrum radiance; 2114 | 2115 | float woPdf, bsdfPdf, dTreePdf; 2116 | bool isDelta; 2117 | 2118 | void record(const Spectrum &r) 2119 | { 2120 | radiance += r; 2121 | } 2122 | 2123 | void commit(STree &sdTree, float statisticalWeight, ESpatialFilter spatialFilter, EDirectionalFilter directionalFilter, EBsdfSamplingFractionLoss bsdfSamplingFractionLoss, Sampler *sampler) 2124 | { 2125 | if (!(woPdf > 0) || !radiance.isValid() || !bsdfVal.isValid()) 2126 | { 2127 | return; 2128 | } 2129 | 2130 | Spectrum localRadiance = Spectrum{0.0f}; 2131 | if (throughput[0] * woPdf > Epsilon) 2132 | localRadiance[0] = radiance[0] / throughput[0]; 2133 | if (throughput[1] * woPdf > Epsilon) 2134 | localRadiance[1] = radiance[1] / throughput[1]; 2135 | if (throughput[2] * woPdf > Epsilon) 2136 | localRadiance[2] = radiance[2] / throughput[2]; 2137 | Spectrum product = localRadiance * bsdfVal; 2138 | 2139 | DTreeRecord rec{ray.d, localRadiance.average(), product.average(), woPdf, bsdfPdf, dTreePdf, statisticalWeight, isDelta}; 2140 | switch (spatialFilter) 2141 | { 2142 | case ESpatialFilter::ENearest: 2143 | dTree->record(rec, directionalFilter, bsdfSamplingFractionLoss); 2144 | break; 2145 | case ESpatialFilter::EStochasticBox: 2146 | { 2147 | DTreeWrapper *splatDTree = dTree; 2148 | 2149 | // Jitter the actual position within the 2150 | // filter box to perform stochastic filtering. 2151 | Vector offset = dTreeVoxelSize; 2152 | offset.x *= sampler->next1D() - 0.5f; 2153 | offset.y *= sampler->next1D() - 0.5f; 2154 | offset.z *= sampler->next1D() - 0.5f; 2155 | 2156 | Point origin = sdTree.aabb().clip2(ray.o + offset); 2157 | splatDTree = sdTree.dTreeWrapper(origin); 2158 | if (splatDTree) 2159 | { 2160 | splatDTree->record(rec, directionalFilter, bsdfSamplingFractionLoss); 2161 | } 2162 | break; 2163 | } 2164 | case ESpatialFilter::EBox: 2165 | sdTree.record(ray.o, dTreeVoxelSize, rec, directionalFilter, bsdfSamplingFractionLoss); 2166 | break; 2167 | } 2168 | } 2169 | }; 2170 | 2171 | Spectrum Li(const RayDifferential &r, RadianceQueryRecord &rRec) const 2172 | { 2173 | auto samplePos = rRec.samplePos; 2174 | Vector3f last_wo(0.0f); 2175 | 2176 | static const int MAX_NUM_VERTICES = 32; 2177 | std::array vertices; 2178 | 2179 | /* Some aliases and local variables */ 2180 | const Scene *scene = rRec.scene; 2181 | Intersection &its = rRec.its; 2182 | MediumSamplingRecord mRec; 2183 | RayDifferential ray(r); 2184 | Spectrum Li(0.0f); 2185 | float eta = 1.0f; 2186 | auto p0 = r.o + r.d; 2187 | 2188 | /* Perform the first ray intersection (or ignore if the 2189 | intersection has already been provided). */ 2190 | rRec.rayIntersect(ray); 2191 | 2192 | Spectrum throughput(1.0f); 2193 | bool scattered = false; 2194 | 2195 | float woPdf_product = 1.0f; 2196 | float bsdfPdf_product = 1.0f; 2197 | int nVertices = 0; 2198 | 2199 | std::vector path; 2200 | 2201 | auto recordRadiance = [&](Spectrum radiance) 2202 | { 2203 | Li += radiance; 2204 | for (int i = 0; i < nVertices; ++i) 2205 | { 2206 | vertices[i].record(radiance); 2207 | } 2208 | }; 2209 | 2210 | float emitterRadiance = 0; 2211 | bool pass_through_diffuse = false; 2212 | 2213 | while (rRec.depth <= m_maxDepth || m_maxDepth < 0) 2214 | { 2215 | 2216 | /* ==================================================================== */ 2217 | /* Radiative Transfer Equation sampling */ 2218 | /* ==================================================================== */ 2219 | if (rRec.medium && rRec.medium->sampleDistance(Ray(ray, 0, its.t), mRec, rRec.sampler)) 2220 | { 2221 | 2222 | } 2223 | else 2224 | { 2225 | /* Sample 2226 | tau(x, y) (Surface integral). This happens with probability mRec.pdfFailure 2227 | Account for this and multiply by the proper per-color-channel transmittance. 2228 | */ 2229 | if (rRec.medium) 2230 | throughput *= mRec.transmittance / mRec.pdfFailure; 2231 | 2232 | if (!its.isValid()) 2233 | { 2234 | /* If no intersection could be found, possibly return 2235 | attenuated radiance from a background luminaire */ 2236 | if ((rRec.type & RadianceQueryRecord::EEmittedRadiance) && (!m_hideEmitters || scattered)) 2237 | { 2238 | Spectrum value = throughput * scene->evalEnvironment(ray); 2239 | if (rRec.medium) 2240 | value *= rRec.medium->evalTransmittance(ray, rRec.sampler); 2241 | recordRadiance(value); 2242 | } 2243 | 2244 | break; 2245 | } 2246 | 2247 | /* Possibly include emitted radiance if requested */ 2248 | if (its.isEmitter() && (rRec.type & RadianceQueryRecord::EEmittedRadiance) && (!m_hideEmitters || scattered)) 2249 | recordRadiance(throughput * its.Le(-ray.d)); 2250 | 2251 | /* Include radiance from a subsurface integrator if requested */ 2252 | if (its.hasSubsurface() && (rRec.type & RadianceQueryRecord::ESubsurfaceRadiance)) 2253 | recordRadiance(throughput * its.LoSub(scene, rRec.sampler, -ray.d, rRec.depth)); 2254 | 2255 | if (rRec.depth >= m_maxDepth && m_maxDepth != -1) 2256 | break; 2257 | 2258 | /* Prevent light leaks due to the use of shading normals */ 2259 | float wiDotGeoN = -dot(its.geoFrame.n, ray.d), 2260 | wiDotShN = Frame::cosTheta(its.wi); 2261 | if (wiDotGeoN * wiDotShN < 0 && m_strictNormals) 2262 | break; 2263 | 2264 | const BSDF *bsdf = its.getBSDF(); 2265 | 2266 | Vector dTreeVoxelSize; 2267 | DTreeWrapper *dTree = nullptr; 2268 | 2269 | // We only guide smooth BRDFs for now. Analytic product sampling 2270 | // would be conceivable for discrete decisions such as refraction vs 2271 | // reflection. 2272 | if (bsdf->getType() & BSDF::ESmooth) 2273 | { 2274 | if (!its.isEmitter()) pass_through_diffuse = true; 2275 | dTree = m_sdTree->dTreeWrapper(its.p, dTreeVoxelSize); 2276 | } 2277 | 2278 | float bsdfSamplingFraction = m_bsdfSamplingFraction; 2279 | if (dTree && m_bsdfSamplingFractionLoss != EBsdfSamplingFractionLoss::ENone) 2280 | { 2281 | bsdfSamplingFraction = dTree->bsdfSamplingFraction(); 2282 | } 2283 | 2284 | /* ==================================================================== */ 2285 | /* BSDF sampling */ 2286 | /* ==================================================================== */ 2287 | 2288 | /* Sample BSDF * cos(theta) */ 2289 | BSDFSamplingRecord bRec(its, rRec.sampler, ERadiance); 2290 | float woPdf, bsdfPdf, dTreePdf; 2291 | Spectrum bsdfWeight = sampleMat(bsdf, bRec, woPdf, bsdfPdf, dTreePdf, bsdfSamplingFraction, rRec, dTree); 2292 | 2293 | /* ==================================================================== */ 2294 | /* Luminaire sampling */ 2295 | /* ==================================================================== */ 2296 | 2297 | DirectSamplingRecord dRec(its); 2298 | 2299 | /* Estimate the direct illumination if this is requested */ 2300 | if (m_doNee && 2301 | (rRec.type & RadianceQueryRecord::EDirectSurfaceRadiance) && 2302 | (bsdf->getType() & BSDF::ESmooth)) 2303 | { 2304 | int interactions = m_maxDepth - rRec.depth - 1; 2305 | 2306 | Spectrum value = scene->sampleAttenuatedEmitterDirect( 2307 | dRec, its, rRec.medium, interactions, 2308 | rRec.nextSample2D(), rRec.sampler); 2309 | 2310 | if (!value.isZero()) 2311 | { 2312 | BSDFSamplingRecord bRec(its, its.toLocal(dRec.d)); 2313 | 2314 | float woDotGeoN = dot(its.geoFrame.n, dRec.d); 2315 | 2316 | /* Prevent light leaks due to the use of shading normals */ 2317 | if (!m_strictNormals || woDotGeoN * Frame::cosTheta(bRec.wo) > 0) 2318 | { 2319 | /* Evaluate BSDF * cos(theta) */ 2320 | const Spectrum bsdfVal = bsdf->eval(bRec); 2321 | 2322 | /* Calculate prob. of having generated that direction using BSDF sampling */ 2323 | const Emitter *emitter = static_cast(dRec.object); 2324 | float woPdf = 0, bsdfPdf = 0, dTreePdf = 0; 2325 | if (emitter->isOnSurface() && dRec.measure == ESolidAngle) 2326 | { 2327 | pdfMat(woPdf, bsdfPdf, dTreePdf, bsdfSamplingFraction, bsdf, bRec, dTree); 2328 | } 2329 | 2330 | /* Weight using the power heuristic */ 2331 | const float weight = miWeight(dRec.pdf, woPdf); 2332 | 2333 | value *= bsdfVal; 2334 | Spectrum L = throughput * value * weight; 2335 | 2336 | if (!m_isFinalIter && m_nee != EAlways) 2337 | { 2338 | if (dTree) 2339 | { 2340 | Vertex v = Vertex{ 2341 | dTree, 2342 | dTreeVoxelSize, 2343 | Ray(its.p, dRec.d, 0), 2344 | throughput * bsdfVal / dRec.pdf, 2345 | bsdfVal, 2346 | L, 2347 | dRec.pdf, 2348 | bsdfPdf, 2349 | dTreePdf, 2350 | false, 2351 | }; 2352 | 2353 | v.commit(*m_sdTree, 0.5f, m_spatialFilter, m_directionalFilter, m_isBuilt ? m_bsdfSamplingFractionLoss : EBsdfSamplingFractionLoss::ENone, rRec.sampler); 2354 | } 2355 | } 2356 | 2357 | recordRadiance(L); 2358 | } 2359 | } 2360 | } 2361 | 2362 | // BSDF handling 2363 | if (bsdfWeight.isZero()) 2364 | break; 2365 | 2366 | /* Prevent light leaks due to the use of shading normals */ 2367 | const Vector wo = its.toWorld(bRec.wo); 2368 | float woDotGeoN = dot(its.geoFrame.n, wo); 2369 | 2370 | if (woDotGeoN * Frame::cosTheta(bRec.wo) <= 0 && m_strictNormals) 2371 | break; 2372 | 2373 | /* Trace a ray in this direction */ 2374 | ray = Ray(its.p, wo, ray.time); 2375 | 2376 | /* Keep track of the throughput, medium, and relative 2377 | refractive index along the path */ 2378 | throughput *= bsdfWeight; 2379 | woPdf_product *= woPdf; 2380 | last_wo = its.toWorld(bRec.wo); 2381 | auto type = bsdf->getType(); 2382 | path.push_back({its.p[0], its.p[1], its.p[2], (type & BSDF::EDelta) != (type & BSDF::EAll) ? bsdfPdf : -1}); 2383 | 2384 | eta *= bRec.eta; 2385 | if (its.isMediumTransition()) 2386 | rRec.medium = its.getTargetMedium(ray.d); 2387 | 2388 | /* Handle index-matched medium transitions specially */ 2389 | if (bRec.sampledType == BSDF::ENull) 2390 | { 2391 | if (!(rRec.type & RadianceQueryRecord::EIndirectSurfaceRadiance)) 2392 | break; 2393 | 2394 | // There exist materials that are smooth/null hybrids (e.g. the mask BSDF), which means that 2395 | // for optimal-sampling-fraction optimization we need to record null transitions for such BSDFs. 2396 | if (m_bsdfSamplingFractionLoss != EBsdfSamplingFractionLoss::ENone && dTree && nVertices < MAX_NUM_VERTICES && !m_isFinalIter) 2397 | { 2398 | if (1 / woPdf > 0) 2399 | { 2400 | vertices[nVertices] = Vertex{ 2401 | dTree, 2402 | dTreeVoxelSize, 2403 | ray, 2404 | throughput, 2405 | bsdfWeight * woPdf, 2406 | Spectrum{0.0f}, 2407 | woPdf, 2408 | bsdfPdf, 2409 | dTreePdf, 2410 | true, 2411 | }; 2412 | 2413 | ++nVertices; 2414 | } 2415 | } 2416 | 2417 | rRec.type = scattered ? RadianceQueryRecord::ERadianceNoEmission 2418 | : RadianceQueryRecord::ERadiance; 2419 | scene->rayIntersect(ray, its); 2420 | rRec.depth++; 2421 | continue; 2422 | } 2423 | 2424 | Spectrum value(0.0f); 2425 | rayIntersectAndLookForEmitter(scene, rRec.sampler, rRec.medium, 2426 | m_maxDepth - rRec.depth - 1, ray, its, dRec, value); 2427 | 2428 | /* If a luminaire was hit, estimate the local illumination and 2429 | weight using the power heuristic */ 2430 | if (rRec.type & RadianceQueryRecord::EDirectSurfaceRadiance) 2431 | { 2432 | bool isDelta = bRec.sampledType & BSDF::EDelta; 2433 | const float emitterPdf = (m_doNee && !isDelta && !value.isZero()) ? scene->pdfEmitterDirect(dRec) : 0; 2434 | 2435 | const float weight = miWeight(woPdf, emitterPdf); 2436 | Spectrum L = throughput * value * weight; 2437 | if (!L.isZero()) 2438 | { 2439 | recordRadiance(L); 2440 | } 2441 | 2442 | if ((!isDelta || m_bsdfSamplingFractionLoss != EBsdfSamplingFractionLoss::ENone) && dTree && nVertices < MAX_NUM_VERTICES && !m_isFinalIter) 2443 | { 2444 | if (1 / woPdf > 0) 2445 | { 2446 | vertices[nVertices] = Vertex{ 2447 | dTree, 2448 | dTreeVoxelSize, 2449 | ray, 2450 | throughput, 2451 | bsdfWeight * woPdf, 2452 | (m_nee == EAlways) ? Spectrum{0.0f} : L, 2453 | woPdf, 2454 | bsdfPdf, 2455 | dTreePdf, 2456 | isDelta, 2457 | }; 2458 | 2459 | ++nVertices; 2460 | } 2461 | } 2462 | } 2463 | 2464 | /* ==================================================================== */ 2465 | /* Indirect illumination */ 2466 | /* ==================================================================== */ 2467 | 2468 | /* Stop if indirect illumination was not requested */ 2469 | if (!(rRec.type & RadianceQueryRecord::EIndirectSurfaceRadiance)) 2470 | break; 2471 | 2472 | rRec.type = RadianceQueryRecord::ERadianceNoEmission; 2473 | 2474 | // Russian roulette 2475 | if (rRec.depth++ >= m_rrDepth) 2476 | { 2477 | float successProb = 1.0f; 2478 | if (dTree && !(bRec.sampledType & BSDF::EDelta)) 2479 | { 2480 | if (!m_isBuilt) 2481 | { 2482 | successProb = throughput.max() * eta * eta; 2483 | } 2484 | else 2485 | { 2486 | // The adjoint russian roulette implementation of Mueller et al. [2017] 2487 | // was broken, effectively turning off russian roulette entirely. 2488 | // For reproducibility's sake, we therefore removed adjoint russian roulette 2489 | // from this codebase rather than fixing it. 2490 | } 2491 | 2492 | successProb = std::max(0.1f, std::min(successProb, 0.99f)); 2493 | } 2494 | 2495 | if (rRec.nextSample1D() >= successProb) 2496 | break; 2497 | throughput /= successProb; 2498 | } 2499 | } 2500 | 2501 | scattered = true; 2502 | } 2503 | avgPathLength.incrementBase(); 2504 | avgPathLength += rRec.depth; 2505 | 2506 | // int splatToDistr = g_tempParam > 0 ? 1 - splatToFilm : 1; 2507 | 2508 | int splatToFilm = 1; 2509 | int splatToDistr = 1; 2510 | 2511 | if (splatToDistr && nVertices > 0 && !m_isFinalIter) 2512 | { 2513 | for (int i = 0; i < nVertices; ++i) 2514 | { 2515 | vertices[i].commit(*m_sdTree, m_nee == EKickstart && m_doNee ? 0.5f : 1.0f, m_spatialFilter, m_directionalFilter, m_isBuilt ? m_bsdfSamplingFractionLoss : EBsdfSamplingFractionLoss::ENone, rRec.sampler); 2516 | } 2517 | } 2518 | 2519 | if (splatToFilm) 2520 | { 2521 | int thread_id = Thread::getThread()->getID(); 2522 | if (s_amisBufferThreadlocal[thread_id].size() == 0) 2523 | { 2524 | } 2525 | float energy = Li[0] + Li[1] + Li[2]; 2526 | if (isnan(energy) || isinf(energy)) 2527 | energy = 0; 2528 | if (!m_isFinalIter) 2529 | statsImageSamples++; 2530 | bool contributed = energy > 0; 2531 | if (contributed && !m_isFinalIter) 2532 | { 2533 | statsImageSamplesNonzero++; 2534 | } 2535 | if (contributed && path.size() > 0) 2536 | { 2537 | RawImageSample sample = {path, dirToCanonical(last_wo), Li, m_iter, energy}; 2538 | bool flag = true; 2539 | if (!s_amisBufferThreadlocal[thread_id].empty() && statsRecordedVertices * 16.0 / 1048576 > std::abs(g_tempParam)) 2540 | { 2541 | if (sample.original_radiance < s_amisBufferThreadlocal[thread_id].top().original_radiance) 2542 | { 2543 | flag = false; 2544 | } 2545 | } 2546 | if (!m_isFinalIter && path.size() > 0 && pass_through_diffuse) { 2547 | if (flag) { 2548 | s_amisBufferThreadlocal[thread_id].push(sample); 2549 | statsImageSamplesAMIS++; 2550 | statsRecordedVertices += path.size() + 1; // the final iter is not counted since it can be optimized 2551 | while (statsRecordedVertices * 16.0 / 1048576 > std::abs(g_tempParam) && !s_amisBufferThreadlocal[thread_id].empty()) // number of cores 2552 | { 2553 | auto sample = s_amisBufferThreadlocal[thread_id].top(); 2554 | s_amisBufferThreadlocal[thread_id].pop(); 2555 | amisSplatOneSample(sample, g_tempParam < 0); 2556 | statsRecordedVertices -= sample.path.size() + 1; // the final iter is not counted since it can be optimized 2557 | } 2558 | } 2559 | else { 2560 | amisSplatOneSample(sample, g_tempParam < 0); 2561 | } 2562 | } 2563 | else { 2564 | if (!pass_through_diffuse || path.size() == 0 ) { 2565 | amisSplatOneSample(sample, false); 2566 | } 2567 | else 2568 | { 2569 | amisSplatOneSample(sample, g_tempParam < 0 ? true : flag); 2570 | } 2571 | } 2572 | } 2573 | else 2574 | { 2575 | m_amisImage->put(samplePos, Li, rRec.alpha); 2576 | } 2577 | } 2578 | 2579 | return Li; 2580 | } 2581 | 2582 | /** 2583 | * This function is called by the recursive ray tracing above after 2584 | * having sampled a direction from a BSDF/phase function. Due to the 2585 | * way in which this integrator deals with index-matched boundaries, 2586 | * it is necessarily a bit complicated (though the improved performance 2587 | * easily pays for the extra effort). 2588 | * 2589 | * This function 2590 | * 2591 | * 1. Intersects 'ray' against the scene geometry and returns the 2592 | * *first* intersection via the '_its' argument. 2593 | * 2594 | * 2. It checks whether the intersected shape was an emitter, or if 2595 | * the ray intersects nothing and there is an environment emitter. 2596 | * In this case, it returns the attenuated emittance, as well as 2597 | * a DirectSamplingRecord that can be used to query the hypothetical 2598 | * sampling density at the emitter. 2599 | * 2600 | * 3. If current shape is an index-matched medium transition, the 2601 | * integrator keeps on looking on whether a light source eventually 2602 | * follows after a potential chain of index-matched medium transitions, 2603 | * while respecting the specified 'maxDepth' limits. It then returns 2604 | * the attenuated emittance of this light source, while accounting for 2605 | * all attenuation that occurs on the wya. 2606 | */ 2607 | void rayIntersectAndLookForEmitter(const Scene *scene, Sampler *sampler, 2608 | const Medium *medium, int maxInteractions, Ray ray, Intersection &_its, 2609 | DirectSamplingRecord &dRec, Spectrum &value) const 2610 | { 2611 | Intersection its2, *its = &_its; 2612 | Spectrum transmittance(1.0f); 2613 | bool surface = false; 2614 | int interactions = 0; 2615 | 2616 | while (true) 2617 | { 2618 | surface = scene->rayIntersect(ray, *its); 2619 | 2620 | if (medium) 2621 | transmittance *= medium->evalTransmittance(Ray(ray, 0, its->t), sampler); 2622 | 2623 | if (surface && (interactions == maxInteractions || 2624 | !(its->getBSDF()->getType() & BSDF::ENull) || 2625 | its->isEmitter())) 2626 | { 2627 | /* Encountered an occluder / light source */ 2628 | break; 2629 | } 2630 | 2631 | if (!surface) 2632 | break; 2633 | 2634 | if (transmittance.isZero()) 2635 | return; 2636 | 2637 | if (its->isMediumTransition()) 2638 | medium = its->getTargetMedium(ray.d); 2639 | 2640 | Vector wo = its->shFrame.toLocal(ray.d); 2641 | BSDFSamplingRecord bRec(*its, -wo, wo, ERadiance); 2642 | bRec.typeMask = BSDF::ENull; 2643 | transmittance *= its->getBSDF()->eval(bRec, EDiscrete); 2644 | 2645 | ray.o = ray(its->t); 2646 | ray.mint = Epsilon; 2647 | its = &its2; 2648 | 2649 | if (++interactions > 100) 2650 | { /// Just a precaution.. 2651 | Log(EWarn, "rayIntersectAndLookForEmitter(): round-off error issues?"); 2652 | return; 2653 | } 2654 | } 2655 | 2656 | if (surface) 2657 | { 2658 | /* Intersected something - check if it was a luminaire */ 2659 | if (its->isEmitter()) 2660 | { 2661 | dRec.setQuery(ray, *its); 2662 | value = transmittance * its->Le(-ray.d); 2663 | } 2664 | } 2665 | else 2666 | { 2667 | /* Intersected nothing -- perhaps there is an environment map? */ 2668 | const Emitter *env = scene->getEnvironmentEmitter(); 2669 | 2670 | if (env && env->fillDirectSamplingRecord(dRec, ray)) 2671 | { 2672 | value = transmittance * env->evalEnvironment(RayDifferential(ray)); 2673 | dRec.dist = std::numeric_limits::infinity(); 2674 | its->t = std::numeric_limits::infinity(); 2675 | } 2676 | } 2677 | } 2678 | 2679 | float miWeight(float pdfA, float pdfB) const 2680 | { 2681 | // pdfA *= pdfA; 2682 | // pdfB *= pdfB; 2683 | return pdfA / (pdfA + pdfB); 2684 | } 2685 | 2686 | std::string toString() const 2687 | { 2688 | std::ostringstream oss; 2689 | oss << "GuidedPathTracerAMISPathspace[" << endl 2690 | << " maxDepth = " << m_maxDepth << "," << endl 2691 | << " rrDepth = " << m_rrDepth << "," << endl 2692 | << " strictNormals = " << m_strictNormals << endl 2693 | << "]"; 2694 | return oss.str(); 2695 | } 2696 | 2697 | double getAMISPdf(double bsdfPdf, const Point3f &pos, const Vector3f &dir, int iter, DTreeWrapper* dtw) const 2698 | { 2699 | double woPdf_film = 0.0f; 2700 | if (iter > 0 && bsdfPdf > 0) 2701 | { 2702 | woPdf_film += bsdfPdf * 0.5; 2703 | float dtpdf = dtw->pdfHistory(dir, iter - 1); 2704 | if (isnan(dtpdf) || isinf(dtpdf)) 2705 | { 2706 | dtpdf = 0; 2707 | } 2708 | woPdf_film += dtpdf * 0.5; 2709 | } 2710 | else 2711 | { 2712 | woPdf_film = bsdfPdf; 2713 | } 2714 | return woPdf_film; 2715 | } 2716 | 2717 | 2718 | mutable int len_counts[64]; 2719 | 2720 | void amisSplatOneSample(const RawImageSample& sample, bool reweight = true) const 2721 | { 2722 | auto f3 = [](const Point4f& point) -> Point3f { 2723 | return {point[0], point[1], point[2]}; 2724 | }; 2725 | 2726 | int filmWidth = m_film->getSize().x, filmHeight = m_film->getSize().y; 2727 | auto path = sample.path; 2728 | Vector3f tt = normalize(f3(sample.path[0]) - g_first_vertex); 2729 | Intersection its; 2730 | PositionSamplingRecord psr(its); 2731 | psr.time = 0; 2732 | DirectionSamplingRecord dsr({tt.x, tt.y, tt.z}); 2733 | Point2 ps; 2734 | bool fl = g_sensor->getSamplePosition(psr, dsr, ps); 2735 | int x = ps[0], y = ps[1]; 2736 | int iter = sample.iter; 2737 | x = std::max(0, x); 2738 | x = std::min(x, filmWidth - 1); 2739 | y = std::max(0, y); 2740 | y = std::min(y, filmHeight - 1); 2741 | 2742 | Spectrum ans = sample.value; 2743 | int n_iter = m_sampleCounts.size(); 2744 | if (reweight){ 2745 | float factor = 1.f; 2746 | std::vector pdfForEachIters(m_sampleCounts.size(), 1.0f); 2747 | float pdfMixture = 0.f, denom = 0.f; 2748 | len_counts[path.size()]++; 2749 | for (int j = 0; j < path.size(); j++) 2750 | { 2751 | auto amisRec = path[j]; 2752 | auto dtw = m_sdTree->dTreeWrapper(f3(amisRec)); 2753 | int iter_start = 0, iter_end = m_sampleCounts.size(); 2754 | for (int iter = iter_start; iter < iter_end; iter++) { 2755 | Point3f dir_begin = f3(path[j]); 2756 | Point3f dir_end = j + 1 == path.size() ? dir_begin : f3(path[j + 1]); 2757 | Vector3f dir = j + 1 == path.size() ? canonicalToDir(sample.last_dir) : normalize(dir_end - dir_begin); 2758 | auto p = getAMISPdf(amisRec[3], f3(amisRec), dir, iter, dtw); 2759 | pdfForEachIters[iter] *= p; 2760 | if (iter == sample.iter) factor *= p; 2761 | } 2762 | } 2763 | for (int iter = 0; iter < m_sampleCounts.size(); iter++) 2764 | { 2765 | float var_fac = 1; 2766 | pdfMixture += pdfForEachIters[iter] * m_sampleCounts[iter] * var_fac; 2767 | denom += m_sampleCounts[iter]; 2768 | } 2769 | pdfMixture /= denom; 2770 | ans *= factor / pdfMixture; 2771 | } 2772 | m_amisImage->put(Point2f(x + 0.5f, y + 0.5f), ans, 1.0f); 2773 | } 2774 | 2775 | void amisSplatSamples() 2776 | { 2777 | auto *amisBufferPtr = &s_amisBuffer; 2778 | auto &amisBuffer = *amisBufferPtr; 2779 | 2780 | HDTimer timer_splat; 2781 | 2782 | // merge buffers 2783 | std::vector *> amisBufferPtrList; 2784 | for (auto &[x, y] : s_amisBufferThreadlocal) 2785 | { 2786 | amisBufferPtrList.push_back(&y); 2787 | } 2788 | 2789 | // splat 2790 | std::cout << "begin splat " << std::endl; 2791 | #pragma omp parallel for 2792 | for (int k = 0; k < amisBufferPtrList.size(); k++) 2793 | { 2794 | auto amisBufPtr = amisBufferPtrList[k]; 2795 | while(!amisBufPtr->empty()) 2796 | { 2797 | auto sample = amisBufPtr->top(); 2798 | amisBufPtr->pop(); 2799 | amisSplatOneSample(sample); 2800 | } 2801 | } 2802 | 2803 | for(int i = 0; i < 64; i++) 2804 | { 2805 | if(len_counts[i] > 0) 2806 | { 2807 | std::cout << "len: " << i << " count: " << len_counts[i] << std::endl; 2808 | } 2809 | } 2810 | } 2811 | 2812 | void amisSplatPostproc() 2813 | { 2814 | auto film = m_film; 2815 | film->clear(); 2816 | ref imgBlockAMISImage = new ImageBlock(Bitmap::ESpectrum, film->getCropSize()); 2817 | ref imageBlockResidualImage = new ImageBlock(Bitmap::ESpectrum, film->getCropSize()); 2818 | int r = film->getReconstructionFilter()->getBorderSize(); 2819 | float coef = 1.0f; 2820 | m_amisImage->getBitmap()->crop(Point2i(r, r), imgBlockAMISImage->getSize())->convert(imgBlockAMISImage->getBitmap(), coef); 2821 | film->addBitmap(imgBlockAMISImage->getBitmap()); 2822 | } 2823 | 2824 | private: 2825 | /// The datastructure for guiding paths. 2826 | std::shared_ptr m_sdTree; 2827 | 2828 | /// The squared values of our currently rendered image. Used to estimate variance. 2829 | mutable ref m_squaredImage; 2830 | /// The currently rendered image. Used to estimate variance. 2831 | mutable ref m_image; 2832 | 2833 | std::vector> m_images; 2834 | std::vector m_variances; 2835 | std::vector m_sampleCounts; 2836 | mutable float m_tempParam; 2837 | /// This contains the currently estimated variance. 2838 | mutable ref m_varianceBuffer; 2839 | 2840 | /// The modes of NEE which are supported. 2841 | enum ENee 2842 | { 2843 | ENever, 2844 | EKickstart, 2845 | EAlways, 2846 | }; 2847 | 2848 | /** 2849 | How to perform next event estimation (NEE). The following values are valid: 2850 | - "never": Never performs NEE. 2851 | - "kickstart": Performs NEE for the first few iterations to initialize 2852 | the SDTree with good direct illumination estimates. 2853 | - "always": Always performs NEE. 2854 | Default = "never" 2855 | */ 2856 | std::string m_neeStr; 2857 | ENee m_nee; 2858 | 2859 | /// Whether Li should currently perform NEE (automatically set during rendering based on m_nee). 2860 | bool m_doNee; 2861 | 2862 | enum EBudget 2863 | { 2864 | ESpp, 2865 | ESeconds, 2866 | }; 2867 | 2868 | /** 2869 | What type of budget to use. The following values are valid: 2870 | - "spp": Budget is the number of samples per pixel. 2871 | - "seconds": Budget is a time in seconds. 2872 | Default = "seconds" 2873 | */ 2874 | std::string m_budgetStr; 2875 | EBudget m_budgetType; 2876 | float m_budget; 2877 | 2878 | bool m_isBuilt = false; 2879 | int m_iter; 2880 | bool m_isFinalIter = false; 2881 | 2882 | int m_sppPerPass; 2883 | 2884 | int m_passesRendered; 2885 | int m_passesRenderedThisIter; 2886 | mutable std::unique_ptr m_progress; 2887 | 2888 | std::vector> m_renderProcesses; 2889 | 2890 | /** 2891 | How to combine the samples from all path-guiding iterations: 2892 | - "discard": Discard all but the last iteration. 2893 | - "automatic": Discard all but the last iteration, but automatically assign an appropriately 2894 | larger budget to the last [Mueller et al. 2018]. 2895 | - "inversevar": Combine samples of the last 4 iterations based on their 2896 | mean pixel variance [Mueller et al. 2018]. 2897 | Default = "automatic" (for reproducibility) 2898 | Recommended = "inversevar" 2899 | */ 2900 | std::string m_sampleCombinationStr; 2901 | ESampleCombination m_sampleCombination; 2902 | 2903 | std::string m_sampleAllocSeqStr; 2904 | ESampleAllocSeq m_sampleAllocSeq; 2905 | 2906 | /// Maximum memory footprint of the SDTree in MB. Stops subdividing once reached. -1 to disable. 2907 | int m_sdTreeMaxMemory; 2908 | 2909 | /** 2910 | The spatial filter to use when splatting radiance samples into the SDTree. 2911 | The following values are valid: 2912 | - "nearest": No filtering [Mueller et al. 2017]. 2913 | - "stochastic": Stochastic box filter; improves upon Mueller et al. [2017] 2914 | at nearly no computational cost. 2915 | - "box": Box filter; improves the quality further at significant 2916 | additional computational cost. 2917 | Default = "nearest" (for reproducibility) 2918 | Recommended = "stochastic" 2919 | */ 2920 | std::string m_spatialFilterStr; 2921 | ESpatialFilter m_spatialFilter; 2922 | 2923 | /** 2924 | The directional filter to use when splatting radiance samples into the SDTree. 2925 | The following values are valid: 2926 | - "nearest": No filtering [Mueller et al. 2017]. 2927 | - "box": Box filter; improves upon Mueller et al. [2017] 2928 | at nearly no computational cost. 2929 | Default = "nearest" (for reproducibility) 2930 | Recommended = "box" 2931 | */ 2932 | std::string m_directionalFilterStr; 2933 | EDirectionalFilter m_directionalFilter; 2934 | 2935 | /** 2936 | Leaf nodes of the spatial binary tree are subdivided if the number of samples 2937 | they received in the last iteration exceeds c * sqrt(2^k) where c is this value 2938 | and k is the iteration index. The first iteration has k==0. 2939 | Default = 12000 (for reproducibility) 2940 | Recommended = 4000 2941 | */ 2942 | int m_sTreeThreshold; 2943 | 2944 | /** 2945 | Leaf nodes of the directional quadtree are subdivided if the fraction 2946 | of energy they carry exceeds this value. 2947 | Default = 0.01 (1%) 2948 | */ 2949 | float m_dTreeThreshold; 2950 | 2951 | /** 2952 | When guiding, we perform MIS with the balance heuristic between the guiding 2953 | distribution and the BSDF, combined with probabilistically choosing one of the 2954 | two sampling methods. This factor controls how often the BSDF is sampled 2955 | vs. how often the guiding distribution is sampled. 2956 | Default = 0.5 (50%) 2957 | */ 2958 | float m_bsdfSamplingFraction; 2959 | 2960 | /** 2961 | The loss function to use when learning the bsdfSamplingFraction using gradient 2962 | descent, following the theory of Neural Importance Sampling [Mueller et al. 2018]. 2963 | The following values are valid: 2964 | - "none": No learning (uses the fixed `m_bsdfSamplingFraction`). 2965 | - "kl": Optimizes bsdfSamplingFraction w.r.t. the KL divergence. 2966 | - "var": Optimizes bsdfSamplingFraction w.r.t. variance. 2967 | Default = "none" (for reproducibility) 2968 | Recommended = "kl" 2969 | */ 2970 | std::string m_bsdfSamplingFractionLossStr; 2971 | EBsdfSamplingFractionLoss m_bsdfSamplingFractionLoss; 2972 | 2973 | /** 2974 | Whether to dump a binary representation of the SD-Tree to disk after every 2975 | iteration. The dumped SD-Tree can be visualized with the accompanying 2976 | visualizer tool. 2977 | Default = false 2978 | */ 2979 | bool m_dumpSDTree; 2980 | 2981 | /// The time at which rendering started. 2982 | std::chrono::steady_clock::time_point m_startTime; 2983 | ref m_film; 2984 | 2985 | public: 2986 | MTS_DECLARE_CLASS() 2987 | 2988 | static std::vector s_amisBuffer; 2989 | static std::map> s_amisBufferThreadlocal; 2990 | mutable ref m_amisImage; 2991 | }; 2992 | 2993 | std::vector GuidedPathTracerAMISPathspace::s_amisBuffer; 2994 | std::map> GuidedPathTracerAMISPathspace::s_amisBufferThreadlocal; 2995 | 2996 | MTS_IMPLEMENT_CLASS(GuidedPathTracerAMISPathspace, false, MonteCarloIntegrator) 2997 | MTS_EXPORT_PLUGIN(GuidedPathTracerAMISPathspace, "Guided path tracer AMIS Experimental"); 2998 | MTS_NAMESPACE_END 2999 | --------------------------------------------------------------------------------