├── README.md ├── bunny_glass_corn.png ├── cutil_math.h └── tutorial2_cuda_pathtracer.cu /README.md: -------------------------------------------------------------------------------- 1 | # GPU-path-tracing-tutorial-2 2 | Basic CUDA path tracer with triangle mesh support (based on CUDA raytracer from http://cg.alexandra.dk/?p=278) 3 | Sam Lapere, 2015 4 | 5 | More details at https://raytracey.blogspot.com/2015/12/gpu-path-tracing-tutorial-2-interactive.html 6 | 7 | In order to keep the code to a minimum, there are lots of hardcoded values at the moment. The comments should clarify most of what's happening but let me know if something isn't clear. 8 | 9 | The code probably contains some bugs as I haven't had much time to do many testing. It will probably be revised for the next tutorial. 10 | 11 | The executable needs glew32.dll and glut32.dll to run and the triangle meshes (bunny and teapot) should be stored in folder named "data" that resides in the project folder. 12 | 13 | Screenshot (path tracing a simple triangle mesh): 14 | 15 | ![Image description](https://github.com/straaljager/GPU-path-tracing-tutorial-2/blob/master/bunny_glass_corn.png) 16 | 17 | For more screenshots produced with this code, see http://raytracey.blogspot.co.nz 18 | 19 | Stanford Bunny mesh from https://graphics.stanford.edu/~mdfisher/Data/Meshes/bunny.obj 20 | Berkeley teapot mesh from http://inst.eecs.berkeley.edu/~cs184/sp09/assignments/teapot.obj 21 | 22 | -------------------------------------------------------------------------------- /bunny_glass_corn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/straaljager/GPU-path-tracing-with-CUDA-tutorial-2/5c3f611bbf1f9d0bae3602343278214b2de9acf3/bunny_glass_corn.png -------------------------------------------------------------------------------- /cutil_math.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2009 NVIDIA Corporation. All rights reserved. 3 | * 4 | * NVIDIA Corporation and its licensors retain all intellectual property and 5 | * proprietary rights in and to this software and related documentation and 6 | * any modifications thereto. Any use, reproduction, disclosure, or distribution 7 | * of this software and related documentation without an express license 8 | * agreement from NVIDIA Corporation is strictly prohibited. 9 | * 10 | */ 11 | 12 | /* 13 | This file implements common mathematical operations on vector types 14 | (float3, float4 etc.) since these are not provided as standard by CUDA. 15 | 16 | The syntax is modelled on the Cg standard library. 17 | */ 18 | 19 | #ifndef CUTIL_MATH_H 20 | #define CUTIL_MATH_H 21 | 22 | #include "cuda_runtime.h" 23 | 24 | //////////////////////////////////////////////////////////////////////////////// 25 | typedef unsigned int uint; 26 | typedef unsigned short ushort; 27 | 28 | #ifndef __CUDACC__ 29 | #include 30 | 31 | inline float fminf(float a, float b) 32 | { 33 | return a < b ? a : b; 34 | } 35 | 36 | inline float fmaxf(float a, float b) 37 | { 38 | return a > b ? a : b; 39 | } 40 | 41 | inline int max(int a, int b) 42 | { 43 | return a > b ? a : b; 44 | } 45 | 46 | inline int min(int a, int b) 47 | { 48 | return a < b ? a : b; 49 | } 50 | 51 | inline float rsqrtf(float x) 52 | { 53 | return 1.0f / sqrtf(x); 54 | } 55 | #endif 56 | 57 | // float functions 58 | //////////////////////////////////////////////////////////////////////////////// 59 | 60 | // lerp 61 | inline __device__ __host__ float lerp(float a, float b, float t) 62 | { 63 | return a + t*(b-a); 64 | } 65 | 66 | // clamp 67 | inline __device__ __host__ float clamp(float f, float a, float b) 68 | { 69 | return fmaxf(a, fminf(f, b)); 70 | } 71 | 72 | // int2 functions 73 | //////////////////////////////////////////////////////////////////////////////// 74 | 75 | // negate 76 | inline __host__ __device__ int2 operator-(int2 &a) 77 | { 78 | return make_int2(-a.x, -a.y); 79 | } 80 | 81 | // addition 82 | inline __host__ __device__ int2 operator+(int2 a, int2 b) 83 | { 84 | return make_int2(a.x + b.x, a.y + b.y); 85 | } 86 | inline __host__ __device__ void operator+=(int2 &a, int2 b) 87 | { 88 | a.x += b.x; a.y += b.y; 89 | } 90 | 91 | // subtract 92 | inline __host__ __device__ int2 operator-(int2 a, int2 b) 93 | { 94 | return make_int2(a.x - b.x, a.y - b.y); 95 | } 96 | inline __host__ __device__ void operator-=(int2 &a, int2 b) 97 | { 98 | a.x -= b.x; a.y -= b.y; 99 | } 100 | 101 | // multiply 102 | inline __host__ __device__ int2 operator*(int2 a, int2 b) 103 | { 104 | return make_int2(a.x * b.x, a.y * b.y); 105 | } 106 | inline __host__ __device__ int2 operator*(int2 a, int s) 107 | { 108 | return make_int2(a.x * s, a.y * s); 109 | } 110 | inline __host__ __device__ int2 operator*(int s, int2 a) 111 | { 112 | return make_int2(a.x * s, a.y * s); 113 | } 114 | inline __host__ __device__ void operator*=(int2 &a, int s) 115 | { 116 | a.x *= s; a.y *= s; 117 | } 118 | 119 | // float2 functions 120 | //////////////////////////////////////////////////////////////////////////////// 121 | 122 | // additional constructors 123 | inline __host__ __device__ float2 make_float2(float s) 124 | { 125 | return make_float2(s, s); 126 | } 127 | inline __host__ __device__ float2 make_float2(int2 a) 128 | { 129 | return make_float2(float(a.x), float(a.y)); 130 | } 131 | 132 | // negate 133 | inline __host__ __device__ float2 operator-(float2 &a) 134 | { 135 | return make_float2(-a.x, -a.y); 136 | } 137 | 138 | // addition 139 | inline __host__ __device__ float2 operator+(float2 a, float2 b) 140 | { 141 | return make_float2(a.x + b.x, a.y + b.y); 142 | } 143 | inline __host__ __device__ void operator+=(float2 &a, float2 b) 144 | { 145 | a.x += b.x; a.y += b.y; 146 | } 147 | 148 | // subtract 149 | inline __host__ __device__ float2 operator-(float2 a, float2 b) 150 | { 151 | return make_float2(a.x - b.x, a.y - b.y); 152 | } 153 | inline __host__ __device__ void operator-=(float2 &a, float2 b) 154 | { 155 | a.x -= b.x; a.y -= b.y; 156 | } 157 | 158 | // multiply 159 | inline __host__ __device__ float2 operator*(float2 a, float2 b) 160 | { 161 | return make_float2(a.x * b.x, a.y * b.y); 162 | } 163 | inline __host__ __device__ float2 operator*(float2 a, float s) 164 | { 165 | return make_float2(a.x * s, a.y * s); 166 | } 167 | inline __host__ __device__ float2 operator*(float s, float2 a) 168 | { 169 | return make_float2(a.x * s, a.y * s); 170 | } 171 | inline __host__ __device__ void operator*=(float2 &a, float s) 172 | { 173 | a.x *= s; a.y *= s; 174 | } 175 | 176 | // divide 177 | inline __host__ __device__ float2 operator/(float2 a, float2 b) 178 | { 179 | return make_float2(a.x / b.x, a.y / b.y); 180 | } 181 | inline __host__ __device__ float2 operator/(float2 a, float s) 182 | { 183 | float inv = 1.0f / s; 184 | return a * inv; 185 | } 186 | inline __host__ __device__ float2 operator/(float s, float2 a) 187 | { 188 | float inv = 1.0f / s; 189 | return a * inv; 190 | } 191 | inline __host__ __device__ void operator/=(float2 &a, float s) 192 | { 193 | float inv = 1.0f / s; 194 | a *= inv; 195 | } 196 | 197 | // lerp 198 | inline __device__ __host__ float2 lerp(float2 a, float2 b, float t) 199 | { 200 | return a + t*(b-a); 201 | } 202 | 203 | // clamp 204 | inline __device__ __host__ float2 clamp(float2 v, float a, float b) 205 | { 206 | return make_float2(clamp(v.x, a, b), clamp(v.y, a, b)); 207 | } 208 | 209 | inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b) 210 | { 211 | return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); 212 | } 213 | 214 | // dot product 215 | inline __host__ __device__ float dot(float2 a, float2 b) 216 | { 217 | return a.x * b.x + a.y * b.y; 218 | } 219 | 220 | // length 221 | inline __host__ __device__ float length(float2 v) 222 | { 223 | return sqrtf(dot(v, v)); 224 | } 225 | 226 | // normalize 227 | inline __host__ __device__ float2 normalize(float2 v) 228 | { 229 | float invLen = rsqrtf(dot(v, v)); 230 | return v * invLen; 231 | } 232 | 233 | // floor 234 | inline __host__ __device__ float2 floor(const float2 v) 235 | { 236 | return make_float2(floor(v.x), floor(v.y)); 237 | } 238 | 239 | // reflect 240 | inline __host__ __device__ float2 reflect(float2 i, float2 n) 241 | { 242 | return i - 2.0f * n * dot(n,i); 243 | } 244 | 245 | // absolute value 246 | inline __host__ __device__ float2 fabs(float2 v) 247 | { 248 | return make_float2(fabs(v.x), fabs(v.y)); 249 | } 250 | 251 | // float3 functions 252 | //////////////////////////////////////////////////////////////////////////////// 253 | 254 | // additional constructors 255 | inline __host__ __device__ float3 make_float3(float s) 256 | { 257 | return make_float3(s, s, s); 258 | } 259 | inline __host__ __device__ float3 make_float3(float2 a) 260 | { 261 | return make_float3(a.x, a.y, 0.0f); 262 | } 263 | inline __host__ __device__ float3 make_float3(float2 a, float s) 264 | { 265 | return make_float3(a.x, a.y, s); 266 | } 267 | inline __host__ __device__ float3 make_float3(float4 a) 268 | { 269 | return make_float3(a.x, a.y, a.z); // discards w 270 | } 271 | inline __host__ __device__ float3 make_float3(int3 a) 272 | { 273 | return make_float3(float(a.x), float(a.y), float(a.z)); 274 | } 275 | 276 | // negate 277 | inline __host__ __device__ float3 operator-(float3 &a) 278 | { 279 | return make_float3(-a.x, -a.y, -a.z); 280 | } 281 | 282 | // min 283 | static __inline__ __host__ __device__ float3 fminf(float3 a, float3 b) 284 | { 285 | return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z)); 286 | } 287 | 288 | // max 289 | static __inline__ __host__ __device__ float3 fmaxf(float3 a, float3 b) 290 | { 291 | return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z)); 292 | } 293 | 294 | // addition 295 | inline __host__ __device__ float3 operator+(float3 a, float3 b) 296 | { 297 | return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); 298 | } 299 | inline __host__ __device__ float3 operator+(float3 a, float b) 300 | { 301 | return make_float3(a.x + b, a.y + b, a.z + b); 302 | } 303 | inline __host__ __device__ void operator+=(float3 &a, float3 b) 304 | { 305 | a.x += b.x; a.y += b.y; a.z += b.z; 306 | } 307 | 308 | // subtract 309 | inline __host__ __device__ float3 operator-(float3 a, float3 b) 310 | { 311 | return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); 312 | } 313 | inline __host__ __device__ float3 operator-(float3 a, float b) 314 | { 315 | return make_float3(a.x - b, a.y - b, a.z - b); 316 | } 317 | inline __host__ __device__ void operator-=(float3 &a, float3 b) 318 | { 319 | a.x -= b.x; a.y -= b.y; a.z -= b.z; 320 | } 321 | 322 | // multiply 323 | inline __host__ __device__ float3 operator*(float3 a, float3 b) 324 | { 325 | return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); 326 | } 327 | inline __host__ __device__ float3 operator*(float3 a, float s) 328 | { 329 | return make_float3(a.x * s, a.y * s, a.z * s); 330 | } 331 | inline __host__ __device__ float3 operator*(float s, float3 a) 332 | { 333 | return make_float3(a.x * s, a.y * s, a.z * s); 334 | } 335 | inline __host__ __device__ void operator*=(float3 &a, float s) 336 | { 337 | a.x *= s; a.y *= s; a.z *= s; 338 | } 339 | inline __host__ __device__ void operator*=(float3 &a, float3 b) 340 | { 341 | a.x *= b.x; a.y *= b.y; a.z *= b.z;; 342 | } 343 | 344 | // divide 345 | inline __host__ __device__ float3 operator/(float3 a, float3 b) 346 | { 347 | return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); 348 | } 349 | inline __host__ __device__ float3 operator/(float3 a, float s) 350 | { 351 | float inv = 1.0f / s; 352 | return a * inv; 353 | } 354 | inline __host__ __device__ float3 operator/(float s, float3 a) 355 | { 356 | float inv = 1.0f / s; 357 | return a * inv; 358 | } 359 | inline __host__ __device__ void operator/=(float3 &a, float s) 360 | { 361 | float inv = 1.0f / s; 362 | a *= inv; 363 | } 364 | 365 | // lerp 366 | inline __device__ __host__ float3 lerp(float3 a, float3 b, float t) 367 | { 368 | return a + t*(b-a); 369 | } 370 | 371 | // clamp 372 | inline __device__ __host__ float3 clamp(float3 v, float a, float b) 373 | { 374 | return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); 375 | } 376 | 377 | inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b) 378 | { 379 | return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); 380 | } 381 | 382 | // dot product 383 | inline __host__ __device__ float dot(float3 a, float3 b) 384 | { 385 | return a.x * b.x + a.y * b.y + a.z * b.z; 386 | } 387 | 388 | // cross product 389 | inline __host__ __device__ float3 cross(float3 a, float3 b) 390 | { 391 | return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); 392 | } 393 | 394 | // length 395 | inline __host__ __device__ float length(float3 v) 396 | { 397 | return sqrtf(dot(v, v)); 398 | } 399 | 400 | // normalize 401 | inline __host__ __device__ float3 normalize(float3 v) 402 | { 403 | float invLen = rsqrtf(dot(v, v)); 404 | return v * invLen; 405 | } 406 | 407 | // floor 408 | inline __host__ __device__ float3 floor(const float3 v) 409 | { 410 | return make_float3(floor(v.x), floor(v.y), floor(v.z)); 411 | } 412 | 413 | // reflect 414 | inline __host__ __device__ float3 reflect(float3 i, float3 n) 415 | { 416 | return i - 2.0f * n * dot(n,i); 417 | } 418 | 419 | // absolute value 420 | inline __host__ __device__ float3 fabs(float3 v) 421 | { 422 | return make_float3(fabs(v.x), fabs(v.y), fabs(v.z)); 423 | } 424 | 425 | // float4 functions 426 | //////////////////////////////////////////////////////////////////////////////// 427 | 428 | // additional constructors 429 | inline __host__ __device__ float4 make_float4(float s) 430 | { 431 | return make_float4(s, s, s, s); 432 | } 433 | inline __host__ __device__ float4 make_float4(float3 a) 434 | { 435 | return make_float4(a.x, a.y, a.z, 0.0f); 436 | } 437 | inline __host__ __device__ float4 make_float4(float3 a, float w) 438 | { 439 | return make_float4(a.x, a.y, a.z, w); 440 | } 441 | inline __host__ __device__ float4 make_float4(int4 a) 442 | { 443 | return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); 444 | } 445 | 446 | // negate 447 | inline __host__ __device__ float4 operator-(float4 &a) 448 | { 449 | return make_float4(-a.x, -a.y, -a.z, -a.w); 450 | } 451 | 452 | // min 453 | static __inline__ __host__ __device__ float4 fminf(float4 a, float4 b) 454 | { 455 | return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w)); 456 | } 457 | 458 | // max 459 | static __inline__ __host__ __device__ float4 fmaxf(float4 a, float4 b) 460 | { 461 | return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w)); 462 | } 463 | 464 | // addition 465 | inline __host__ __device__ float4 operator+(float4 a, float4 b) 466 | { 467 | return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); 468 | } 469 | inline __host__ __device__ void operator+=(float4 &a, float4 b) 470 | { 471 | a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; 472 | } 473 | 474 | // subtract 475 | inline __host__ __device__ float4 operator-(float4 a, float4 b) 476 | { 477 | return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); 478 | } 479 | inline __host__ __device__ void operator-=(float4 &a, float4 b) 480 | { 481 | a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; 482 | } 483 | 484 | // multiply 485 | inline __host__ __device__ float4 operator*(float4 a, float s) 486 | { 487 | return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); 488 | } 489 | inline __host__ __device__ float4 operator*(float s, float4 a) 490 | { 491 | return make_float4(a.x * s, a.y * s, a.z * s, a.w * s); 492 | } 493 | inline __host__ __device__ void operator*=(float4 &a, float s) 494 | { 495 | a.x *= s; a.y *= s; a.z *= s; a.w *= s; 496 | } 497 | 498 | // divide 499 | inline __host__ __device__ float4 operator/(float4 a, float4 b) 500 | { 501 | return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); 502 | } 503 | inline __host__ __device__ float4 operator/(float4 a, float s) 504 | { 505 | float inv = 1.0f / s; 506 | return a * inv; 507 | } 508 | inline __host__ __device__ float4 operator/(float s, float4 a) 509 | { 510 | float inv = 1.0f / s; 511 | return a * inv; 512 | } 513 | inline __host__ __device__ void operator/=(float4 &a, float s) 514 | { 515 | float inv = 1.0f / s; 516 | a *= inv; 517 | } 518 | 519 | // lerp 520 | inline __device__ __host__ float4 lerp(float4 a, float4 b, float t) 521 | { 522 | return a + t*(b-a); 523 | } 524 | 525 | // clamp 526 | inline __device__ __host__ float4 clamp(float4 v, float a, float b) 527 | { 528 | return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); 529 | } 530 | 531 | inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b) 532 | { 533 | return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); 534 | } 535 | 536 | // dot product 537 | inline __host__ __device__ float dot(float4 a, float4 b) 538 | { 539 | return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; 540 | } 541 | 542 | // length 543 | inline __host__ __device__ float length(float4 r) 544 | { 545 | return sqrtf(dot(r, r)); 546 | } 547 | 548 | // normalize 549 | inline __host__ __device__ float4 normalize(float4 v) 550 | { 551 | float invLen = rsqrtf(dot(v, v)); 552 | return v * invLen; 553 | } 554 | 555 | // floor 556 | inline __host__ __device__ float4 floor(const float4 v) 557 | { 558 | return make_float4(floor(v.x), floor(v.y), floor(v.z), floor(v.w)); 559 | } 560 | 561 | // absolute value 562 | inline __host__ __device__ float4 fabs(float4 v) 563 | { 564 | return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w)); 565 | } 566 | 567 | // int3 functions 568 | //////////////////////////////////////////////////////////////////////////////// 569 | 570 | // additional constructors 571 | inline __host__ __device__ int3 make_int3(int s) 572 | { 573 | return make_int3(s, s, s); 574 | } 575 | inline __host__ __device__ int3 make_int3(float3 a) 576 | { 577 | return make_int3(int(a.x), int(a.y), int(a.z)); 578 | } 579 | 580 | // negate 581 | inline __host__ __device__ int3 operator-(int3 &a) 582 | { 583 | return make_int3(-a.x, -a.y, -a.z); 584 | } 585 | 586 | // min 587 | inline __host__ __device__ int3 min(int3 a, int3 b) 588 | { 589 | return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); 590 | } 591 | 592 | // max 593 | inline __host__ __device__ int3 max(int3 a, int3 b) 594 | { 595 | return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); 596 | } 597 | 598 | // addition 599 | inline __host__ __device__ int3 operator+(int3 a, int3 b) 600 | { 601 | return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); 602 | } 603 | inline __host__ __device__ void operator+=(int3 &a, int3 b) 604 | { 605 | a.x += b.x; a.y += b.y; a.z += b.z; 606 | } 607 | 608 | // subtract 609 | inline __host__ __device__ int3 operator-(int3 a, int3 b) 610 | { 611 | return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); 612 | } 613 | 614 | inline __host__ __device__ void operator-=(int3 &a, int3 b) 615 | { 616 | a.x -= b.x; a.y -= b.y; a.z -= b.z; 617 | } 618 | 619 | // multiply 620 | inline __host__ __device__ int3 operator*(int3 a, int3 b) 621 | { 622 | return make_int3(a.x * b.x, a.y * b.y, a.z * b.z); 623 | } 624 | inline __host__ __device__ int3 operator*(int3 a, int s) 625 | { 626 | return make_int3(a.x * s, a.y * s, a.z * s); 627 | } 628 | inline __host__ __device__ int3 operator*(int s, int3 a) 629 | { 630 | return make_int3(a.x * s, a.y * s, a.z * s); 631 | } 632 | inline __host__ __device__ void operator*=(int3 &a, int s) 633 | { 634 | a.x *= s; a.y *= s; a.z *= s; 635 | } 636 | 637 | // divide 638 | inline __host__ __device__ int3 operator/(int3 a, int3 b) 639 | { 640 | return make_int3(a.x / b.x, a.y / b.y, a.z / b.z); 641 | } 642 | inline __host__ __device__ int3 operator/(int3 a, int s) 643 | { 644 | return make_int3(a.x / s, a.y / s, a.z / s); 645 | } 646 | inline __host__ __device__ int3 operator/(int s, int3 a) 647 | { 648 | return make_int3(a.x / s, a.y / s, a.z / s); 649 | } 650 | inline __host__ __device__ void operator/=(int3 &a, int s) 651 | { 652 | a.x /= s; a.y /= s; a.z /= s; 653 | } 654 | 655 | // clamp 656 | inline __device__ __host__ int clamp(int f, int a, int b) 657 | { 658 | return max(a, min(f, b)); 659 | } 660 | 661 | inline __device__ __host__ int3 clamp(int3 v, int a, int b) 662 | { 663 | return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); 664 | } 665 | 666 | inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b) 667 | { 668 | return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); 669 | } 670 | 671 | 672 | // uint3 functions 673 | //////////////////////////////////////////////////////////////////////////////// 674 | 675 | // additional constructors 676 | inline __host__ __device__ uint3 make_uint3(uint s) 677 | { 678 | return make_uint3(s, s, s); 679 | } 680 | inline __host__ __device__ uint3 make_uint3(float3 a) 681 | { 682 | return make_uint3(uint(a.x), uint(a.y), uint(a.z)); 683 | } 684 | 685 | // min 686 | inline __host__ __device__ uint3 min(uint3 a, uint3 b) 687 | { 688 | return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); 689 | } 690 | 691 | // max 692 | inline __host__ __device__ uint3 max(uint3 a, uint3 b) 693 | { 694 | return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); 695 | } 696 | 697 | // addition 698 | inline __host__ __device__ uint3 operator+(uint3 a, uint3 b) 699 | { 700 | return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z); 701 | } 702 | inline __host__ __device__ void operator+=(uint3 &a, uint3 b) 703 | { 704 | a.x += b.x; a.y += b.y; a.z += b.z; 705 | } 706 | 707 | // subtract 708 | inline __host__ __device__ uint3 operator-(uint3 a, uint3 b) 709 | { 710 | return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z); 711 | } 712 | 713 | inline __host__ __device__ void operator-=(uint3 &a, uint3 b) 714 | { 715 | a.x -= b.x; a.y -= b.y; a.z -= b.z; 716 | } 717 | 718 | // multiply 719 | inline __host__ __device__ uint3 operator*(uint3 a, uint3 b) 720 | { 721 | return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z); 722 | } 723 | inline __host__ __device__ uint3 operator*(uint3 a, uint s) 724 | { 725 | return make_uint3(a.x * s, a.y * s, a.z * s); 726 | } 727 | inline __host__ __device__ uint3 operator*(uint s, uint3 a) 728 | { 729 | return make_uint3(a.x * s, a.y * s, a.z * s); 730 | } 731 | inline __host__ __device__ void operator*=(uint3 &a, uint s) 732 | { 733 | a.x *= s; a.y *= s; a.z *= s; 734 | } 735 | 736 | // divide 737 | inline __host__ __device__ uint3 operator/(uint3 a, uint3 b) 738 | { 739 | return make_uint3(a.x / b.x, a.y / b.y, a.z / b.z); 740 | } 741 | inline __host__ __device__ uint3 operator/(uint3 a, uint s) 742 | { 743 | return make_uint3(a.x / s, a.y / s, a.z / s); 744 | } 745 | inline __host__ __device__ uint3 operator/(uint s, uint3 a) 746 | { 747 | return make_uint3(a.x / s, a.y / s, a.z / s); 748 | } 749 | inline __host__ __device__ void operator/=(uint3 &a, uint s) 750 | { 751 | a.x /= s; a.y /= s; a.z /= s; 752 | } 753 | 754 | // clamp 755 | inline __device__ __host__ uint clamp(uint f, uint a, uint b) 756 | { 757 | return max(a, min(f, b)); 758 | } 759 | 760 | inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b) 761 | { 762 | return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); 763 | } 764 | 765 | inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b) 766 | { 767 | return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); 768 | } 769 | 770 | #endif 771 | -------------------------------------------------------------------------------- /tutorial2_cuda_pathtracer.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Basic CUDA based triangle mesh path tracer. 3 | * For background info, see http://raytracey.blogspot.co.nz/2015/12/gpu-path-tracing-tutorial-2-interactive.html 4 | * Based on CUDA ray tracing code from http://cg.alexandra.dk/?p=278 5 | * Copyright (C) 2015 Sam Lapere 6 | * 7 | * This program is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | */ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\cuda.h" 23 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\math_functions.h" 24 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\vector_types.h" 25 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\vector_functions.h" 26 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\device_launch_parameters.h" 27 | #include "cutil_math.h" // required for float3 vector math 28 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\extras\CUPTI\include\GL\glew.h" 29 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\extras\CUPTI\include\GL\glut.h" 30 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\cuda_runtime.h" 31 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\cuda_gl_interop.h" 32 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\curand.h" 33 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\curand_kernel.h" 34 | 35 | #define M_PI 3.14159265359f 36 | #define width 1024 // screenwidth 37 | #define height 576 // screenheight 38 | #define samps 1 // samples per pixel per pass 39 | 40 | int total_number_of_triangles = 0; 41 | int frames = 0; 42 | 43 | // scene bounding box 44 | float3 scene_aabbox_min; 45 | float3 scene_aabbox_max; 46 | 47 | // the scene triangles are stored in a 1D CUDA texture of float4 for memory alignment 48 | // store two edges instead of vertices 49 | // each triangle is stored as three float4s: (float4 first_vertex, float4 edge1, float4 edge2) 50 | texture triangle_texture; 51 | 52 | // hardcoded camera position 53 | __device__ float3 firstcamorig = { 50, 52, 295.6 }; 54 | 55 | // OpenGL vertex buffer object for real-time viewport 56 | GLuint vbo; 57 | void *d_vbo_buffer = NULL; 58 | 59 | struct Ray { 60 | float3 orig; // ray origin 61 | float3 dir; // ray direction 62 | __device__ Ray(float3 o_, float3 d_) : orig(o_), dir(d_) {} 63 | }; 64 | 65 | enum Refl_t { DIFF, SPEC, REFR }; // material types, used in radiance(), only DIFF used here 66 | 67 | // SPHERES 68 | 69 | struct Sphere { 70 | 71 | float rad; // radius 72 | float3 pos, emi, col; // position, emission, color 73 | Refl_t refl; // reflection type (DIFFuse, SPECular, REFRactive) 74 | 75 | __device__ float intersect(const Ray &r) const { // returns distance, 0 if nohit 76 | 77 | // Ray/sphere intersection 78 | // Quadratic formula required to solve ax^2 + bx + c = 0 79 | // Solution x = (-b +- sqrt(b*b - 4ac)) / 2a 80 | // Solve t^2*d.d + 2*t*(o-p).d + (o-p).(o-p)-R^2 = 0 81 | 82 | float3 op = pos - r.orig; // 83 | float t, epsilon = 0.01f; 84 | float b = dot(op, r.dir); 85 | float disc = b*b - dot(op, op) + rad*rad; // discriminant 86 | if (disc<0) return 0; else disc = sqrtf(disc); 87 | return (t = b - disc)>epsilon ? t : ((t = b + disc)>epsilon ? t : 0); 88 | } 89 | }; 90 | 91 | // TRIANGLES 92 | 93 | // the classic ray triangle intersection: http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf 94 | // for an explanation see http://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/moller-trumbore-ray-triangle-intersection 95 | 96 | __device__ float RayTriangleIntersection(const Ray &r, 97 | const float3 &v0, 98 | const float3 &edge1, 99 | const float3 &edge2) 100 | { 101 | 102 | float3 tvec = r.orig - v0; 103 | float3 pvec = cross(r.dir, edge2); 104 | float det = dot(edge1, pvec); 105 | 106 | det = __fdividef(1.0f, det); // CUDA intrinsic function 107 | 108 | float u = dot(tvec, pvec) * det; 109 | 110 | if (u < 0.0f || u > 1.0f) 111 | return -1.0f; 112 | 113 | float3 qvec = cross(tvec, edge1); 114 | 115 | float v = dot(r.dir, qvec) * det; 116 | 117 | if (v < 0.0f || (u + v) > 1.0f) 118 | return -1.0f; 119 | 120 | return dot(edge2, qvec) * det; 121 | } 122 | 123 | __device__ float3 getTriangleNormal(const int triangleIndex){ 124 | 125 | float4 edge1 = tex1Dfetch(triangle_texture, triangleIndex * 3 + 1); 126 | float4 edge2 = tex1Dfetch(triangle_texture, triangleIndex * 3 + 2); 127 | 128 | // cross product of two triangle edges yields a vector orthogonal to triangle plane 129 | float3 trinormal = cross(make_float3(edge1.x, edge1.y, edge1.z), make_float3(edge2.x, edge2.y, edge2.z)); 130 | trinormal = normalize(trinormal); 131 | 132 | return trinormal; 133 | } 134 | 135 | __device__ void intersectAllTriangles(const Ray& r, float& t_scene, int& triangle_id, const int number_of_triangles, int& geomtype){ 136 | 137 | for (int i = 0; i < number_of_triangles; i++) 138 | { 139 | // the triangles are packed into the 1D texture using three consecutive float4 structs for each triangle, 140 | // first float4 contains the first vertex, second float4 contains the first precomputed edge, third float4 contains second precomputed edge like this: 141 | // (float4(vertex.x,vertex.y,vertex.z, 0), float4 (egde1.x,egde1.y,egde1.z,0),float4 (egde2.x,egde2.y,egde2.z,0)) 142 | 143 | // i is triangle index, each triangle represented by 3 float4s in triangle_texture 144 | float4 v0 = tex1Dfetch(triangle_texture, i * 3); 145 | float4 edge1 = tex1Dfetch(triangle_texture, i * 3 + 1); 146 | float4 edge2 = tex1Dfetch(triangle_texture, i * 3 + 2); 147 | 148 | // intersect ray with reconstructed triangle 149 | float t = RayTriangleIntersection(r, 150 | make_float3(v0.x, v0.y, v0.z), 151 | make_float3(edge1.x, edge1.y, edge1.z), 152 | make_float3(edge2.x, edge2.y, edge2.z)); 153 | 154 | // keep track of closest distance and closest triangle 155 | // if ray/tri intersection finds an intersection point that is closer than closest intersection found so far 156 | if (t < t_scene && t > 0.001) 157 | { 158 | t_scene = t; 159 | triangle_id = i; 160 | geomtype = 3; 161 | } 162 | } 163 | } 164 | 165 | 166 | // AXIS ALIGNED BOXES 167 | 168 | // helper functions 169 | inline __device__ float3 minf3(float3 a, float3 b){ return make_float3(a.x < b.x ? a.x : b.x, a.y < b.y ? a.y : b.y, a.z < b.z ? a.z : b.z); } 170 | inline __device__ float3 maxf3(float3 a, float3 b){ return make_float3(a.x > b.x ? a.x : b.x, a.y > b.y ? a.y : b.y, a.z > b.z ? a.z : b.z); } 171 | inline __device__ float minf1(float a, float b){ return a < b ? a : b; } 172 | inline __device__ float maxf1(float a, float b){ return a > b ? a : b; } 173 | 174 | struct Box { 175 | 176 | float3 min; // minimum bounds 177 | float3 max; // maximum bounds 178 | float3 emi; // emission 179 | float3 col; // colour 180 | Refl_t refl; // material type 181 | 182 | // ray/box intersection 183 | // for theoretical background of the algorithm see 184 | // http://www.scratchapixel.com/lessons/3d-basic-rendering/minimal-ray-tracer-rendering-simple-shapes/ray-box-intersection 185 | // optimised code from http://www.gamedev.net/topic/495636-raybox-collision-intersection-point/ 186 | __device__ float intersect(const Ray &r) const { 187 | 188 | float epsilon = 0.001f; // required to prevent self intersection 189 | 190 | float3 tmin = (min - r.orig) / r.dir; 191 | float3 tmax = (max - r.orig) / r.dir; 192 | 193 | float3 real_min = minf3(tmin, tmax); 194 | float3 real_max = maxf3(tmin, tmax); 195 | 196 | float minmax = minf1(minf1(real_max.x, real_max.y), real_max.z); 197 | float maxmin = maxf1(maxf1(real_min.x, real_min.y), real_min.z); 198 | 199 | if (minmax >= maxmin) { return maxmin > epsilon ? maxmin : 0; } 200 | else return 0; 201 | } 202 | 203 | // calculate normal for point on axis aligned box 204 | __device__ float3 Box::normalAt(float3 &point) { 205 | 206 | float3 normal = make_float3(0.f, 0.f, 0.f); 207 | float min_distance = 1e8; 208 | float distance; 209 | float epsilon = 0.001f; 210 | 211 | if (fabs(min.x - point.x) < epsilon) normal = make_float3(-1, 0, 0); 212 | else if (fabs(max.x - point.x) < epsilon) normal = make_float3(1, 0, 0); 213 | else if (fabs(min.y - point.y) < epsilon) normal = make_float3(0, -1, 0); 214 | else if (fabs(max.y - point.y) < epsilon) normal = make_float3(0, 1, 0); 215 | else if (fabs(min.z - point.z) < epsilon) normal = make_float3(0, 0, -1); 216 | else normal = make_float3(0, 0, 1); 217 | 218 | return normal; 219 | } 220 | }; 221 | 222 | // scene: 9 spheres forming a Cornell box 223 | // small enough to fit in constant GPU memory 224 | __constant__ Sphere spheres[] = { 225 | // FORMAT: { float radius, float3 position, float3 emission, float3 colour, Refl_t material } 226 | // cornell box 227 | //{ 1e5f, { 1e5f + 1.0f, 40.8f, 81.6f }, { 0.0f, 0.0f, 0.0f }, { 0.75f, 0.25f, 0.25f }, DIFF }, //Left 1e5f 228 | //{ 1e5f, { -1e5f + 99.0f, 40.8f, 81.6f }, { 0.0f, 0.0f, 0.0f }, { .25f, .25f, .75f }, DIFF }, //Right 229 | //{ 1e5f, { 50.0f, 40.8f, 1e5f }, { 0.0f, 0.0f, 0.0f }, { .75f, .75f, .75f }, DIFF }, //Back 230 | //{ 1e5f, { 50.0f, 40.8f, -1e5f + 600.0f }, { 0.0f, 0.0f, 0.0f }, { 0.00f, 0.00f, 0.00f }, DIFF }, //Front 231 | //{ 1e5f, { 50.0f, -1e5f, 81.6f }, { 0.0f, 0.0f, 0.0f }, { .75f, .75f, .75f }, DIFF }, //Bottom 232 | //{ 1e5f, { 50.0f, -1e5f + 81.6f, 81.6f }, { 0.0f, 0.0f, 0.0f }, { .75f, .75f, .75f }, DIFF }, //Top 233 | //{ 16.5f, { 27.0f, 16.5f, 47.0f }, { 0.0f, 0.0f, 0.0f }, { 0.99f, 0.99f, 0.99f }, SPEC }, // small sphere 1 234 | //{ 16.5f, { 73.0f, 16.5f, 78.0f }, { 0.0f, 0.f, .0f }, { 0.09f, 0.49f, 0.3f }, REFR }, // small sphere 2 235 | //{ 600.0f, { 50.0f, 681.6f - .5f, 81.6f }, { 3.0f, 2.5f, 2.0f }, { 0.0f, 0.0f, 0.0f }, DIFF } // Light 12, 10 ,8 236 | 237 | //outdoor scene: radius, position, emission, color, material 238 | 239 | //{ 1600, { 3000.0f, 10, 6000 }, { 37, 34, 30 }, { 0.f, 0.f, 0.f }, DIFF }, // 37, 34, 30 // sun 240 | //{ 1560, { 3500.0f, 0, 7000 }, { 50, 25, 2.5 }, { 0.f, 0.f, 0.f }, DIFF }, // 150, 75, 7.5 // sun 2 241 | { 10000, { 50.0f, 40.8f, -1060 }, { 0.0003, 0.01, 0.15 }, { 0.175f, 0.175f, 0.25f }, DIFF }, // sky 242 | { 100000, { 50.0f, -100000, 0 }, { 0.0, 0.0, 0 }, { 0.8f, 0.2f, 0.f }, DIFF }, // ground 243 | { 110000, { 50.0f, -110048.5, 0 }, { 3.6, 2.0, 0.2 }, { 0.f, 0.f, 0.f }, DIFF }, // horizon brightener 244 | { 4e4, { 50.0f, -4e4 - 30, -3000 }, { 0, 0, 0 }, { 0.2f, 0.2f, 0.2f }, DIFF }, // mountains 245 | { 82.5, { 30.0f, 180.5, 42 }, { 16, 12, 6 }, { .6f, .6f, 0.6f }, DIFF }, // small sphere 1 246 | { 12, { 115.0f, 10, 105 }, { 0.0, 0.0, 0.0 }, { 0.9f, 0.9f, 0.9f }, REFR }, // small sphere 2 247 | { 22, { 65.0f, 22, 24 }, { 0, 0, 0 }, { 0.9f, 0.9f, 0.9f }, SPEC }, // small sphere 3 248 | }; 249 | 250 | __constant__ Box boxes[] = { 251 | // FORMAT: { float3 minbounds, float3 maxbounds, float3 emission, float3 colour, Refl_t } 252 | { { 5.0f, 0.0f, 70.0f }, { 45.0f, 11.0f, 115.0f }, { .0f, .0f, 0.0f }, { 0.5f, 0.5f, 0.5f }, DIFF }, 253 | { { 85.0f, 0.0f, 95.0f }, { 95.0f, 20.0f, 105.0f }, { .0f, .0f, 0.0f }, { 0.5f, 0.5f, 0.5f }, DIFF }, 254 | { { 75.0f, 20.0f, 85.0f }, { 105.0f, 22.0f, 115.0f }, { .0f, .0f, 0.0f }, { 0.5f, 0.5f, 0.5f }, DIFF }, 255 | }; 256 | 257 | 258 | __device__ inline bool intersect_scene(const Ray &r, float &t, int &sphere_id, int &box_id, int& triangle_id, const int number_of_triangles, int &geomtype, const float3& bbmin, const float3& bbmax){ 259 | 260 | float tmin = 1e20; 261 | float tmax = -1e20; 262 | float d = 1e21; 263 | float k = 1e21; 264 | float q = 1e21; 265 | float inf = t = 1e20; 266 | 267 | // SPHERES 268 | // intersect all spheres in the scene 269 | float numspheres = sizeof(spheres) / sizeof(Sphere); 270 | for (int i = int(numspheres); i--;) // for all spheres in scene 271 | // keep track of distance from origin to closest intersection point 272 | if ((d = spheres[i].intersect(r)) && d < t){ t = d; sphere_id = i; geomtype = 1; } 273 | 274 | // BOXES 275 | // intersect all boxes in the scene 276 | float numboxes = sizeof(boxes) / sizeof(Box); 277 | for (int i = int(numboxes); i--;) // for all boxes in scene 278 | if ((k = boxes[i].intersect(r)) && k < t){ t = k; box_id = i; geomtype = 2; } 279 | 280 | // TRIANGLES 281 | Box scene_bbox; // bounding box around triangle meshes 282 | scene_bbox.min = bbmin; 283 | scene_bbox.max = bbmax; 284 | 285 | // if ray hits bounding box of triangle meshes, intersect ray with all triangles 286 | if (scene_bbox.intersect(r)){ 287 | intersectAllTriangles(r, t, triangle_id, number_of_triangles, geomtype); 288 | } 289 | 290 | // t is distance to closest intersection of ray with all primitives in the scene (spheres, boxes and triangles) 291 | return t> 16); 299 | a = a + (a << 3); 300 | a = a ^ (a >> 4); 301 | a = a * 0x27d4eb2d; 302 | a = a ^ (a >> 15); 303 | return a; 304 | } 305 | 306 | // radiance function 307 | // compute path bounces in scene and accumulate returned color from each path sgment 308 | __device__ float3 radiance(Ray &r, curandState *randstate, const int totaltris, const float3& scene_aabb_min, const float3& scene_aabb_max){ // returns ray color 309 | 310 | // colour mask 311 | float3 mask = make_float3(1.0f, 1.0f, 1.0f); 312 | // accumulated colour 313 | float3 accucolor = make_float3(0.0f, 0.0f, 0.0f); 314 | 315 | for (int bounces = 0; bounces < 5; bounces++){ // iteration up to 4 bounces (instead of recursion in CPU code) 316 | 317 | // reset scene intersection function parameters 318 | float t = 100000; // distance to intersection 319 | int sphere_id = -1; 320 | int box_id = -1; // index of intersected sphere 321 | int triangle_id = -1; 322 | int geomtype = -1; 323 | float3 f; // primitive colour 324 | float3 emit; // primitive emission colour 325 | float3 x; // intersection point 326 | float3 n; // normal 327 | float3 nl; // oriented normal 328 | float3 d; // ray direction of next path segment 329 | Refl_t refltype; 330 | 331 | // intersect ray with scene 332 | // intersect_scene keeps track of closest intersected primitive and distance to closest intersection point 333 | if (!intersect_scene(r, t, sphere_id, box_id, triangle_id, totaltris, geomtype, scene_aabb_min, scene_aabb_max)) 334 | return make_float3(0.0f, 0.0f, 0.0f); // if miss, return black 335 | 336 | // else: we've got a hit with a scene primitive 337 | // determine geometry type of primitive: sphere/box/triangle 338 | 339 | // if sphere: 340 | if (geomtype == 1){ 341 | Sphere &sphere = spheres[sphere_id]; // hit object with closest intersection 342 | x = r.orig + r.dir*t; // intersection point on object 343 | n = normalize(x - sphere.pos); // normal 344 | nl = dot(n, r.dir) < 0 ? n : n * -1; // correctly oriented normal 345 | f = sphere.col; // object colour 346 | refltype = sphere.refl; 347 | emit = sphere.emi; // object emission 348 | accucolor += (mask * emit); 349 | } 350 | 351 | // if box: 352 | if (geomtype == 2){ 353 | Box &box = boxes[box_id]; 354 | x = r.orig + r.dir*t; // intersection point on object 355 | n = normalize(box.normalAt(x)); // normal 356 | nl = dot(n, r.dir) < 0 ? n : n * -1; // correctly oriented normal 357 | f = box.col; // box colour 358 | refltype = box.refl; 359 | emit = box.emi; // box emission 360 | accucolor += (mask * emit); 361 | } 362 | 363 | // if triangle: 364 | if (geomtype == 3){ 365 | int tri_index = triangle_id; 366 | x = r.orig + r.dir*t; // intersection point 367 | n = normalize(getTriangleNormal(tri_index)); // normal 368 | nl = dot(n, r.dir) < 0 ? n : n * -1; // correctly oriented normal 369 | 370 | // colour, refltype and emit value are hardcoded and apply to all triangles 371 | // no per triangle material support yet 372 | f = make_float3(0.9f, 0.4f, 0.1f); // triangle colour 373 | refltype = REFR; 374 | emit = make_float3(0.0f, 0.0f, 0.0f); 375 | accucolor += (mask * emit); 376 | } 377 | 378 | // SHADING: diffuse, specular or refractive 379 | 380 | // ideal diffuse reflection (see "Realistic Ray Tracing", P. Shirley) 381 | if (refltype == DIFF){ 382 | 383 | // create 2 random numbers 384 | float r1 = 2 * M_PI * curand_uniform(randstate); 385 | float r2 = curand_uniform(randstate); 386 | float r2s = sqrtf(r2); 387 | 388 | // compute orthonormal coordinate frame uvw with hitpoint as origin 389 | float3 w = nl; 390 | float3 u = normalize(cross((fabs(w.x) > .1 ? make_float3(0, 1, 0) : make_float3(1, 0, 0)), w)); 391 | float3 v = cross(w, u); 392 | 393 | // compute cosine weighted random ray direction on hemisphere 394 | d = normalize(u*cos(r1)*r2s + v*sin(r1)*r2s + w*sqrtf(1 - r2)); 395 | 396 | // offset origin next path segment to prevent self intersection 397 | x += nl * 0.03; 398 | 399 | // multiply mask with colour of object 400 | mask *= f; 401 | } 402 | 403 | // ideal specular reflection (mirror) 404 | if (refltype == SPEC){ 405 | 406 | // compute relfected ray direction according to Snell's law 407 | d = r.dir - 2.0f * n * dot(n, r.dir); 408 | 409 | // offset origin next path segment to prevent self intersection 410 | x += nl * 0.01f; 411 | 412 | // multiply mask with colour of object 413 | mask *= f; 414 | } 415 | 416 | // ideal refraction (based on smallpt code by Kevin Beason) 417 | if (refltype == REFR){ 418 | 419 | bool into = dot(n, nl) > 0; // is ray entering or leaving refractive material? 420 | float nc = 1.0f; // Index of Refraction air 421 | float nt = 1.5f; // Index of Refraction glass/water 422 | float nnt = into ? nc / nt : nt / nc; // IOR ratio of refractive materials 423 | float ddn = dot(r.dir, nl); 424 | float cos2t = 1.0f - nnt*nnt * (1.f - ddn*ddn); 425 | 426 | if (cos2t < 0.0f) // total internal reflection 427 | { 428 | d = reflect(r.dir, n); //d = r.dir - 2.0f * n * dot(n, r.dir); 429 | x += nl * 0.01f; 430 | } 431 | else // cos2t > 0 432 | { 433 | // compute direction of transmission ray 434 | float3 tdir = normalize(r.dir * nnt - n * ((into ? 1 : -1) * (ddn*nnt + sqrtf(cos2t)))); 435 | 436 | float R0 = (nt - nc)*(nt - nc) / (nt + nc)*(nt + nc); 437 | float c = 1.f - (into ? -ddn : dot(tdir, n)); 438 | float Re = R0 + (1.f - R0) * c * c * c * c * c; 439 | float Tr = 1 - Re; // Transmission 440 | float P = .25f + .5f * Re; 441 | float RP = Re / P; 442 | float TP = Tr / (1.f - P); 443 | 444 | // randomly choose reflection or transmission ray 445 | if (curand_uniform(randstate) < 0.25) // reflection ray 446 | { 447 | mask *= RP; 448 | d = reflect(r.dir, n); 449 | x += nl * 0.02f; 450 | } 451 | else // transmission ray 452 | { 453 | mask *= TP; 454 | d = tdir; //r = Ray(x, tdir); 455 | x += nl * 0.0005f; // epsilon must be small to avoid artefacts 456 | } 457 | } 458 | } 459 | 460 | // set up origin and direction of next path segment 461 | r.orig = x; 462 | r.dir = d; 463 | } 464 | 465 | // add radiance up to a certain ray depth 466 | // return accumulated ray colour after all bounces are computed 467 | return accucolor; 468 | } 469 | 470 | // required to convert colour to a format that OpenGL can display 471 | union Colour // 4 bytes = 4 chars = 1 float 472 | { 473 | float c; 474 | uchar4 components; 475 | }; 476 | 477 | __global__ void render_kernel(float3 *output, float3* accumbuffer, const int numtriangles, int framenumber, uint hashedframenumber, float3 scene_bbmin, float3 scene_bbmax){ // float3 *gputexdata1, int *texoffsets 478 | 479 | // assign a CUDA thread to every pixel by using the threadIndex 480 | unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; 481 | unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; 482 | 483 | // global threadId, see richiesams blogspot 484 | int threadId = (blockIdx.x + blockIdx.y * gridDim.x) * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x; 485 | 486 | // create random number generator, see RichieSams blogspot 487 | curandState randState; // state of the random number generator, to prevent repetition 488 | curand_init(hashedframenumber + threadId, 0, 0, &randState); 489 | 490 | Ray cam(firstcamorig, normalize(make_float3(0, -0.042612, -1))); 491 | float3 cx = make_float3(width * .5135 / height, 0.0f, 0.0f); // ray direction offset along X-axis 492 | float3 cy = normalize(cross(cx, cam.dir)) * .5135; // ray dir offset along Y-axis, .5135 is FOV angle 493 | float3 pixelcol; // final pixel color 494 | 495 | int i = (height - y - 1)*width + x; // pixel index 496 | 497 | pixelcol = make_float3(0.0f, 0.0f, 0.0f); // reset to zero for every pixel 498 | 499 | for (int s = 0; s < samps; s++){ 500 | 501 | // compute primary ray direction 502 | float3 d = cx*((.25 + x) / width - .5) + cy*((.25 + y) / height - .5) + cam.dir; 503 | // normalize primary ray direction 504 | d = normalize(d); 505 | // add accumulated colour from path bounces 506 | pixelcol += radiance(Ray(cam.orig + d * 40, d), &randState, numtriangles, scene_bbmin, scene_bbmax)*(1. / samps); 507 | } // Camera rays are pushed ^^^^^ forward to start in interior 508 | 509 | // add pixel colour to accumulation buffer (accumulates all samples) 510 | accumbuffer[i] += pixelcol; 511 | // averaged colour: divide colour by the number of calculated frames so far 512 | float3 tempcol = accumbuffer[i] / framenumber; 513 | 514 | Colour fcolour; 515 | float3 colour = make_float3(clamp(tempcol.x, 0.0f, 1.0f), clamp(tempcol.y, 0.0f, 1.0f), clamp(tempcol.z, 0.0f, 1.0f)); 516 | // convert from 96-bit to 24-bit colour + perform gamma correction 517 | fcolour.components = make_uchar4((unsigned char)(powf(colour.x, 1 / 2.2f) * 255), (unsigned char)(powf(colour.y, 1 / 2.2f) * 255), (unsigned char)(powf(colour.z, 1 / 2.2f) * 255), 1); 518 | // store pixel coordinates and pixelcolour in OpenGL readable outputbuffer 519 | output[i] = make_float3(x, y, fcolour.c); 520 | } 521 | 522 | void Timer(int obsolete) { 523 | 524 | glutPostRedisplay(); 525 | glutTimerFunc(30, Timer, 0); 526 | } 527 | 528 | __device__ float timer = 0.0f; 529 | 530 | inline float clamp(float x){ return x<0 ? 0 : x>1 ? 1 : x; } 531 | 532 | //inline int toInt(float x){ return int(pow(clamp(x), 1 / 2.2) * 255 + .5); } // RGB float in range [0,1] to int in range [0, 255] 533 | 534 | // buffer for accumulating samples over several frames 535 | float3* accumulatebuffer; 536 | // output buffer 537 | float3 *dptr; 538 | 539 | void disp(void) 540 | { 541 | frames++; 542 | cudaThreadSynchronize(); 543 | 544 | // map vertex buffer object for acces by CUDA 545 | cudaGLMapBufferObject((void**)&dptr, vbo); 546 | 547 | //clear all pixels: 548 | glClear(GL_COLOR_BUFFER_BIT); 549 | 550 | // RAY TRACING: 551 | // dim3 grid(WINDOW / block.x, WINDOW / block.y, 1); 552 | // dim3 CUDA specific syntax, block and grid are required to schedule CUDA threads over streaming multiprocessors 553 | dim3 block(16, 16, 1); 554 | dim3 grid(width / block.x, height / block.y, 1); 555 | 556 | // launch CUDA path tracing kernel, pass in a hashed seed based on number of frames 557 | render_kernel << < grid, block >> >(dptr, accumulatebuffer, total_number_of_triangles, frames, WangHash(frames), scene_aabbox_max, scene_aabbox_min); // launches CUDA render kernel from the host 558 | 559 | cudaThreadSynchronize(); 560 | 561 | // unmap buffer 562 | cudaGLUnmapBufferObject(vbo); 563 | //glFlush(); 564 | glBindBuffer(GL_ARRAY_BUFFER, vbo); 565 | glVertexPointer(2, GL_FLOAT, 12, 0); 566 | glColorPointer(4, GL_UNSIGNED_BYTE, 12, (GLvoid*)8); 567 | 568 | glEnableClientState(GL_VERTEX_ARRAY); 569 | glEnableClientState(GL_COLOR_ARRAY); 570 | glDrawArrays(GL_POINTS, 0, width * height); 571 | glDisableClientState(GL_VERTEX_ARRAY); 572 | 573 | glutSwapBuffers(); 574 | //glutPostRedisplay(); 575 | } 576 | 577 | // load triangle data in a CUDA texture 578 | extern "C" 579 | { 580 | void bindTriangles(float *dev_triangle_p, unsigned int number_of_triangles) 581 | { 582 | triangle_texture.normalized = false; // access with normalized texture coordinates 583 | triangle_texture.filterMode = cudaFilterModePoint; // Point mode, so no 584 | triangle_texture.addressMode[0] = cudaAddressModeWrap; // wrap texture coordinates 585 | 586 | size_t size = sizeof(float4)*number_of_triangles * 3; 587 | cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); 588 | cudaBindTexture(0, triangle_texture, dev_triangle_p, channelDesc, size); 589 | } 590 | } 591 | 592 | // helpers to load triangle data 593 | struct TriangleFace 594 | { 595 | int v[3]; // vertex indices 596 | }; 597 | 598 | struct TriangleMesh 599 | { 600 | std::vector verts; 601 | std::vector faces; 602 | float3 bounding_box[2]; 603 | }; 604 | 605 | TriangleMesh mesh1; 606 | TriangleMesh mesh2; 607 | 608 | float *dev_triangle_p; // the cuda device pointer that points to the uploaded triangles 609 | 610 | void loadObj(const std::string filename, TriangleMesh &mesh); // forward declaration 611 | 612 | // 1. load triangle mesh data from obj files 613 | // 2. copy data to CPU memory (into vector triangles) 614 | // 3. copy to CUDA global memory (allocated with dev_triangle_p pointer) 615 | // 4. copy to CUDA texture memory with bindtriangles() 616 | void initCUDAmemoryTriMesh() 617 | { 618 | loadObj("data/bunny.obj", mesh1); 619 | loadObj("data/bunny.obj", mesh2); 620 | 621 | // scalefactor and offset to position/scale triangle meshes 622 | float scalefactor1 = 200; 623 | float scalefactor2 = 300; // 300 624 | float3 offset1 = make_float3(90, 22, 100);// (30, -2, 80); 625 | float3 offset2 = make_float3(30, -2, 80); 626 | 627 | std::vector triangles; 628 | 629 | for (unsigned int i = 0; i < mesh1.faces.size(); i++) 630 | { 631 | // make a local copy of the triangle vertices 632 | float3 v0 = mesh1.verts[mesh1.faces[i].v[0] - 1]; 633 | float3 v1 = mesh1.verts[mesh1.faces[i].v[1] - 1]; 634 | float3 v2 = mesh1.verts[mesh1.faces[i].v[2] - 1]; 635 | 636 | // scale 637 | v0 *= scalefactor1; 638 | v1 *= scalefactor1; 639 | v2 *= scalefactor1; 640 | 641 | // translate 642 | v0 += offset1; 643 | v1 += offset1; 644 | v2 += offset1; 645 | 646 | // store triangle data as float4 647 | // store two edges per triangle instead of vertices, to save some calculations in the 648 | // ray triangle intersection test 649 | triangles.push_back(make_float4(v0.x, v0.y, v0.z, 0)); 650 | triangles.push_back(make_float4(v1.x - v0.x, v1.y - v0.y, v1.z - v0.z, 0)); 651 | triangles.push_back(make_float4(v2.x - v0.x, v2.y - v0.y, v2.z - v0.z, 0)); 652 | } 653 | 654 | // compute bounding box of this mesh 655 | mesh1.bounding_box[0] *= scalefactor1; mesh1.bounding_box[0] += offset1; 656 | mesh1.bounding_box[1] *= scalefactor1; mesh1.bounding_box[1] += offset1; 657 | 658 | for (unsigned int i = 0; i < mesh2.faces.size(); i++) 659 | { 660 | float3 v0 = mesh2.verts[mesh2.faces[i].v[0] - 1]; 661 | float3 v1 = mesh2.verts[mesh2.faces[i].v[1] - 1]; 662 | float3 v2 = mesh2.verts[mesh2.faces[i].v[2] - 1]; 663 | 664 | v0 *= scalefactor2; 665 | v1 *= scalefactor2; 666 | v2 *= scalefactor2; 667 | 668 | v0 += offset2; 669 | v1 += offset2; 670 | v2 += offset2; 671 | 672 | triangles.push_back(make_float4(v0.x, v0.y, v0.z, 0)); 673 | triangles.push_back(make_float4(v1.x - v0.x, v1.y - v0.y, v1.z - v0.z, 1)); 674 | triangles.push_back(make_float4(v2.x - v0.x, v2.y - v0.y, v2.z - v0.z, 0)); 675 | } 676 | 677 | mesh2.bounding_box[0] *= scalefactor2; mesh2.bounding_box[0] += offset2; 678 | mesh2.bounding_box[1] *= scalefactor2; mesh2.bounding_box[1] += offset2; 679 | 680 | std::cout << "total number of triangles check:" << mesh1.faces.size() + mesh2.faces.size() << " == " << triangles.size() / 3 << std::endl; 681 | 682 | // calculate total number of triangles in the scene 683 | size_t triangle_size = triangles.size() * sizeof(float4); 684 | int total_num_triangles = triangles.size() / 3; 685 | total_number_of_triangles = total_num_triangles; 686 | 687 | if (triangle_size > 0) 688 | { 689 | // allocate memory for the triangle meshes on the GPU 690 | cudaMalloc((void **)&dev_triangle_p, triangle_size); 691 | 692 | // copy triangle data to GPU 693 | cudaMemcpy(dev_triangle_p, &triangles[0], triangle_size, cudaMemcpyHostToDevice); 694 | 695 | // load triangle data into a CUDA texture 696 | bindTriangles(dev_triangle_p, total_num_triangles); 697 | } 698 | 699 | // compute scene bounding box by merging bounding boxes of individual meshes 700 | scene_aabbox_min = mesh2.bounding_box[0]; 701 | scene_aabbox_max = mesh2.bounding_box[1]; 702 | scene_aabbox_min = fminf(scene_aabbox_min, mesh1.bounding_box[0]); 703 | scene_aabbox_max = fmaxf(scene_aabbox_max, mesh1.bounding_box[1]); 704 | 705 | } 706 | 707 | // read triangle data from obj file 708 | void loadObj(const std::string filename, TriangleMesh &mesh) 709 | { 710 | std::ifstream in(filename.c_str()); 711 | 712 | if (!in.good()) 713 | { 714 | std::cout << "ERROR: loading obj:(" << filename << ") file not found or not good" << "\n"; 715 | system("PAUSE"); 716 | exit(0); 717 | } 718 | 719 | char buffer[256], str[255]; 720 | float f1, f2, f3; 721 | 722 | while (!in.getline(buffer, 255).eof()) 723 | { 724 | buffer[255] = '\0'; 725 | sscanf_s(buffer, "%s", str, 255); 726 | 727 | // reading a vertex 728 | if (buffer[0] == 'v' && (buffer[1] == ' ' || buffer[1] == 32)){ 729 | if (sscanf(buffer, "v %f %f %f", &f1, &f2, &f3) == 3){ 730 | mesh.verts.push_back(make_float3(f1, f2, f3)); 731 | } 732 | else{ 733 | std::cout << "ERROR: vertex not in wanted format in OBJLoader" << "\n"; 734 | exit(-1); 735 | } 736 | } 737 | 738 | // reading faceMtls 739 | else if (buffer[0] == 'f' && (buffer[1] == ' ' || buffer[1] == 32)) 740 | { 741 | TriangleFace f; 742 | int nt = sscanf(buffer, "f %d %d %d", &f.v[0], &f.v[1], &f.v[2]); 743 | if (nt != 3){ 744 | std::cout << "ERROR: I don't know the format of that FaceMtl" << "\n"; 745 | exit(-1); 746 | } 747 | 748 | mesh.faces.push_back(f); 749 | } 750 | } 751 | 752 | // calculate the bounding box of the mesh 753 | mesh.bounding_box[0] = make_float3(1000000, 1000000, 1000000); 754 | mesh.bounding_box[1] = make_float3(-1000000, -1000000, -1000000); 755 | for (unsigned int i = 0; i < mesh.verts.size(); i++) 756 | { 757 | //update min and max value 758 | mesh.bounding_box[0] = fminf(mesh.verts[i], mesh.bounding_box[0]); 759 | mesh.bounding_box[1] = fmaxf(mesh.verts[i], mesh.bounding_box[1]); 760 | } 761 | 762 | std::cout << "obj file loaded: number of faces:" << mesh.faces.size() << " number of vertices:" << mesh.verts.size() << std::endl; 763 | std::cout << "obj bounding box: min:(" << mesh.bounding_box[0].x << "," << mesh.bounding_box[0].y << "," << mesh.bounding_box[0].z << ") max:" 764 | << mesh.bounding_box[1].x << "," << mesh.bounding_box[1].y << "," << mesh.bounding_box[1].z << ")" << std::endl; 765 | } 766 | 767 | void createVBO(GLuint* vbo) 768 | { 769 | //create vertex buffer object 770 | glGenBuffers(1, vbo); 771 | glBindBuffer(GL_ARRAY_BUFFER, *vbo); 772 | 773 | //initialize VBO 774 | unsigned int size = width * height * sizeof(float3); // 3 floats 775 | glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW); 776 | glBindBuffer(GL_ARRAY_BUFFER, 0); 777 | 778 | //register VBO with CUDA 779 | cudaGLRegisterBufferObject(*vbo); 780 | } 781 | 782 | int main(int argc, char** argv){ 783 | 784 | // allocate memmory for the accumulation buffer on the GPU 785 | cudaMalloc(&accumulatebuffer, width * height * sizeof(float3)); 786 | // load triangle meshes in CUDA memory 787 | initCUDAmemoryTriMesh(); 788 | // init glut for OpenGL viewport 789 | glutInit(&argc, argv); 790 | // specify the display mode to be RGB and single buffering 791 | glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB); 792 | // specify the initial window position 793 | glutInitWindowPosition(100, 100); 794 | // specify the initial window size 795 | glutInitWindowSize(width, height); 796 | // create the window and set title 797 | glutCreateWindow("Basic triangle mesh path tracer in CUDA"); 798 | // init OpenGL 799 | glClearColor(0.0, 0.0, 0.0, 0.0); 800 | glMatrixMode(GL_PROJECTION); 801 | gluOrtho2D(0.0, width, 0.0, height); 802 | fprintf(stderr, "OpenGL initialized \n"); 803 | // register callback function to display graphics: 804 | glutDisplayFunc(disp); 805 | glewInit(); 806 | if (!glewIsSupported("GL_VERSION_2_0 ")) { 807 | fprintf(stderr, "ERROR: Support for necessary OpenGL extensions missing."); 808 | fflush(stderr); 809 | exit(0); 810 | } 811 | fprintf(stderr, "glew initialized \n"); 812 | // call Timer(): 813 | Timer(0); 814 | //create VBO (vertex buffer object) 815 | createVBO(&vbo); 816 | fprintf(stderr, "VBO created \n"); 817 | // enter the main loop and process events 818 | fprintf(stderr, "Entering glutMainLoop... \n"); 819 | glutMainLoop(); 820 | 821 | // free CUDA memory on exit 822 | cudaFree(accumulatebuffer); 823 | cudaFree(dev_triangle_p); 824 | cudaFree(dptr); 825 | } 826 | --------------------------------------------------------------------------------