├── README.md
├── bunny_glass_corn.png
├── cutil_math.h
└── tutorial2_cuda_pathtracer.cu


/README.md:
--------------------------------------------------------------------------------
 1 | # GPU-path-tracing-tutorial-2
 2 | Basic CUDA path tracer with triangle mesh support (based on CUDA raytracer from http://cg.alexandra.dk/?p=278)
 3 | Sam Lapere, 2015
 4 | 
 5 | More details at https://raytracey.blogspot.com/2015/12/gpu-path-tracing-tutorial-2-interactive.html
 6 | 
 7 | In order to keep the code to a minimum, there are lots of hardcoded values at the moment. The comments should clarify most of what's happening but let me know if something isn't clear. 
 8 | 
 9 | The code probably contains some bugs as I haven't had much time to do many testing. It will probably be revised for the next tutorial.
10 | 
11 | The executable needs glew32.dll and glut32.dll to run and the triangle meshes (bunny and teapot) should be stored in folder named "data" that resides in the project folder.
12 | 
13 | Screenshot (path tracing a simple triangle mesh):
14 | 
15 | ![Image description](https://github.com/straaljager/GPU-path-tracing-tutorial-2/blob/master/bunny_glass_corn.png)
16 | 
17 | For more screenshots produced with this code, see http://raytracey.blogspot.co.nz
18 | 
19 | Stanford Bunny mesh from https://graphics.stanford.edu/~mdfisher/Data/Meshes/bunny.obj
20 | Berkeley teapot mesh from http://inst.eecs.berkeley.edu/~cs184/sp09/assignments/teapot.obj
21 | 
22 | 


--------------------------------------------------------------------------------
/bunny_glass_corn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/straaljager/GPU-path-tracing-with-CUDA-tutorial-2/5c3f611bbf1f9d0bae3602343278214b2de9acf3/bunny_glass_corn.png


--------------------------------------------------------------------------------
/cutil_math.h:
--------------------------------------------------------------------------------
  1 |    /*
  2 |  * Copyright 1993-2009 NVIDIA Corporation.  All rights reserved.
  3 |  *
  4 |  * NVIDIA Corporation and its licensors retain all intellectual property and 
  5 |  * proprietary rights in and to this software and related documentation and 
  6 |  * any modifications thereto.  Any use, reproduction, disclosure, or distribution 
  7 |  * of this software and related documentation without an express license 
  8 |  * agreement from NVIDIA Corporation is strictly prohibited.
  9 |  * 
 10 |  */
 11 | 
 12 | /*
 13 |     This file implements common mathematical operations on vector types
 14 |     (float3, float4 etc.) since these are not provided as standard by CUDA.
 15 | 
 16 |     The syntax is modelled on the Cg standard library.
 17 | */
 18 | 
 19 | #ifndef CUTIL_MATH_H
 20 | #define CUTIL_MATH_H
 21 | 
 22 | #include "cuda_runtime.h"
 23 | 
 24 | ////////////////////////////////////////////////////////////////////////////////
 25 | typedef unsigned int uint;
 26 | typedef unsigned short ushort;
 27 | 
 28 | #ifndef __CUDACC__
 29 | #include <math.h>
 30 | 
 31 | inline float fminf(float a, float b)
 32 | {
 33 |   return a < b ? a : b;
 34 | }
 35 | 
 36 | inline float fmaxf(float a, float b)
 37 | {
 38 |   return a > b ? a : b;
 39 | }
 40 | 
 41 | inline int max(int a, int b)
 42 | {
 43 |   return a > b ? a : b;
 44 | }
 45 | 
 46 | inline int min(int a, int b)
 47 | {
 48 |   return a < b ? a : b;
 49 | }
 50 | 
 51 | inline float rsqrtf(float x)
 52 | {
 53 |     return 1.0f / sqrtf(x);
 54 | }
 55 | #endif
 56 | 
 57 | // float functions
 58 | ////////////////////////////////////////////////////////////////////////////////
 59 | 
 60 | // lerp
 61 | inline __device__ __host__ float lerp(float a, float b, float t)
 62 | {
 63 |     return a + t*(b-a);
 64 | }
 65 | 
 66 | // clamp
 67 | inline __device__ __host__ float clamp(float f, float a, float b)
 68 | {
 69 |     return fmaxf(a, fminf(f, b));
 70 | }
 71 | 
 72 | // int2 functions
 73 | ////////////////////////////////////////////////////////////////////////////////
 74 | 
 75 | // negate
 76 | inline __host__ __device__ int2 operator-(int2 &a)
 77 | {
 78 |     return make_int2(-a.x, -a.y);
 79 | }
 80 | 
 81 | // addition
 82 | inline __host__ __device__ int2 operator+(int2 a, int2 b)
 83 | {
 84 |     return make_int2(a.x + b.x, a.y + b.y);
 85 | }
 86 | inline __host__ __device__ void operator+=(int2 &a, int2 b)
 87 | {
 88 |     a.x += b.x; a.y += b.y;
 89 | }
 90 | 
 91 | // subtract
 92 | inline __host__ __device__ int2 operator-(int2 a, int2 b)
 93 | {
 94 |     return make_int2(a.x - b.x, a.y - b.y);
 95 | }
 96 | inline __host__ __device__ void operator-=(int2 &a, int2 b)
 97 | {
 98 |     a.x -= b.x; a.y -= b.y;
 99 | }
100 | 
101 | // multiply
102 | inline __host__ __device__ int2 operator*(int2 a, int2 b)
103 | {
104 |     return make_int2(a.x * b.x, a.y * b.y);
105 | }
106 | inline __host__ __device__ int2 operator*(int2 a, int s)
107 | {
108 |     return make_int2(a.x * s, a.y * s);
109 | }
110 | inline __host__ __device__ int2 operator*(int s, int2 a)
111 | {
112 |     return make_int2(a.x * s, a.y * s);
113 | }
114 | inline __host__ __device__ void operator*=(int2 &a, int s)
115 | {
116 |     a.x *= s; a.y *= s;
117 | }
118 | 
119 | // float2 functions
120 | ////////////////////////////////////////////////////////////////////////////////
121 | 
122 | // additional constructors
123 | inline __host__ __device__ float2 make_float2(float s)
124 | {
125 |     return make_float2(s, s);
126 | }
127 | inline __host__ __device__ float2 make_float2(int2 a)
128 | {
129 |     return make_float2(float(a.x), float(a.y));
130 | }
131 | 
132 | // negate
133 | inline __host__ __device__ float2 operator-(float2 &a)
134 | {
135 |     return make_float2(-a.x, -a.y);
136 | }
137 | 
138 | // addition
139 | inline __host__ __device__ float2 operator+(float2 a, float2 b)
140 | {
141 |     return make_float2(a.x + b.x, a.y + b.y);
142 | }
143 | inline __host__ __device__ void operator+=(float2 &a, float2 b)
144 | {
145 |     a.x += b.x; a.y += b.y;
146 | }
147 | 
148 | // subtract
149 | inline __host__ __device__ float2 operator-(float2 a, float2 b)
150 | {
151 |     return make_float2(a.x - b.x, a.y - b.y);
152 | }
153 | inline __host__ __device__ void operator-=(float2 &a, float2 b)
154 | {
155 |     a.x -= b.x; a.y -= b.y;
156 | }
157 | 
158 | // multiply
159 | inline __host__ __device__ float2 operator*(float2 a, float2 b)
160 | {
161 |     return make_float2(a.x * b.x, a.y * b.y);
162 | }
163 | inline __host__ __device__ float2 operator*(float2 a, float s)
164 | {
165 |     return make_float2(a.x * s, a.y * s);
166 | }
167 | inline __host__ __device__ float2 operator*(float s, float2 a)
168 | {
169 |     return make_float2(a.x * s, a.y * s);
170 | }
171 | inline __host__ __device__ void operator*=(float2 &a, float s)
172 | {
173 |     a.x *= s; a.y *= s;
174 | }
175 | 
176 | // divide
177 | inline __host__ __device__ float2 operator/(float2 a, float2 b)
178 | {
179 |     return make_float2(a.x / b.x, a.y / b.y);
180 | }
181 | inline __host__ __device__ float2 operator/(float2 a, float s)
182 | {
183 |     float inv = 1.0f / s;
184 |     return a * inv;
185 | }
186 | inline __host__ __device__ float2 operator/(float s, float2 a)
187 | {
188 |     float inv = 1.0f / s;
189 |     return a * inv;
190 | }
191 | inline __host__ __device__ void operator/=(float2 &a, float s)
192 | {
193 |     float inv = 1.0f / s;
194 |     a *= inv;
195 | }
196 | 
197 | // lerp
198 | inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
199 | {
200 |     return a + t*(b-a);
201 | }
202 | 
203 | // clamp
204 | inline __device__ __host__ float2 clamp(float2 v, float a, float b)
205 | {
206 |     return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
207 | }
208 | 
209 | inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
210 | {
211 |     return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
212 | }
213 | 
214 | // dot product
215 | inline __host__ __device__ float dot(float2 a, float2 b)
216 | { 
217 |     return a.x * b.x + a.y * b.y;
218 | }
219 | 
220 | // length
221 | inline __host__ __device__ float length(float2 v)
222 | {
223 |     return sqrtf(dot(v, v));
224 | }
225 | 
226 | // normalize
227 | inline __host__ __device__ float2 normalize(float2 v)
228 | {
229 |     float invLen = rsqrtf(dot(v, v));
230 |     return v * invLen;
231 | }
232 | 
233 | // floor
234 | inline __host__ __device__ float2 floor(const float2 v)
235 | {
236 |     return make_float2(floor(v.x), floor(v.y));
237 | }
238 | 
239 | // reflect
240 | inline __host__ __device__ float2 reflect(float2 i, float2 n)
241 | {
242 | 	return i - 2.0f * n * dot(n,i);
243 | }
244 | 
245 | // absolute value
246 | inline __host__ __device__ float2 fabs(float2 v)
247 | {
248 | 	return make_float2(fabs(v.x), fabs(v.y));
249 | }
250 | 
251 | // float3 functions
252 | ////////////////////////////////////////////////////////////////////////////////
253 | 
254 | // additional constructors
255 | inline __host__ __device__ float3 make_float3(float s)
256 | {
257 |     return make_float3(s, s, s);
258 | }
259 | inline __host__ __device__ float3 make_float3(float2 a)
260 | {
261 |     return make_float3(a.x, a.y, 0.0f);
262 | }
263 | inline __host__ __device__ float3 make_float3(float2 a, float s)
264 | {
265 |     return make_float3(a.x, a.y, s);
266 | }
267 | inline __host__ __device__ float3 make_float3(float4 a)
268 | {
269 |     return make_float3(a.x, a.y, a.z);  // discards w
270 | }
271 | inline __host__ __device__ float3 make_float3(int3 a)
272 | {
273 |     return make_float3(float(a.x), float(a.y), float(a.z));
274 | }
275 | 
276 | // negate
277 | inline __host__ __device__ float3 operator-(float3 &a)
278 | {
279 |     return make_float3(-a.x, -a.y, -a.z);
280 | }
281 | 
282 | // min
283 | static __inline__ __host__ __device__ float3 fminf(float3 a, float3 b)
284 | {
285 | 	return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
286 | }
287 | 
288 | // max
289 | static __inline__ __host__ __device__ float3 fmaxf(float3 a, float3 b)
290 | {
291 | 	return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
292 | }
293 | 
294 | // addition
295 | inline __host__ __device__ float3 operator+(float3 a, float3 b)
296 | {
297 |     return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
298 | }
299 | inline __host__ __device__ float3 operator+(float3 a, float b)
300 | {
301 |     return make_float3(a.x + b, a.y + b, a.z + b);
302 | }
303 | inline __host__ __device__ void operator+=(float3 &a, float3 b)
304 | {
305 |     a.x += b.x; a.y += b.y; a.z += b.z;
306 | }
307 | 
308 | // subtract
309 | inline __host__ __device__ float3 operator-(float3 a, float3 b)
310 | {
311 |     return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
312 | }
313 | inline __host__ __device__ float3 operator-(float3 a, float b)
314 | {
315 |     return make_float3(a.x - b, a.y - b, a.z - b);
316 | }
317 | inline __host__ __device__ void operator-=(float3 &a, float3 b)
318 | {
319 |     a.x -= b.x; a.y -= b.y; a.z -= b.z;
320 | }
321 | 
322 | // multiply
323 | inline __host__ __device__ float3 operator*(float3 a, float3 b)
324 | {
325 |     return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
326 | }
327 | inline __host__ __device__ float3 operator*(float3 a, float s)
328 | {
329 |     return make_float3(a.x * s, a.y * s, a.z * s);
330 | }
331 | inline __host__ __device__ float3 operator*(float s, float3 a)
332 | {
333 |     return make_float3(a.x * s, a.y * s, a.z * s);
334 | }
335 | inline __host__ __device__ void operator*=(float3 &a, float s)
336 | {
337 |     a.x *= s; a.y *= s; a.z *= s;
338 | }
339 | inline __host__ __device__ void operator*=(float3 &a, float3 b)
340 | {
341 | 	a.x *= b.x; a.y *= b.y; a.z *= b.z;;
342 | }
343 | 
344 | // divide
345 | inline __host__ __device__ float3 operator/(float3 a, float3 b)
346 | {
347 |     return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
348 | }
349 | inline __host__ __device__ float3 operator/(float3 a, float s)
350 | {
351 |     float inv = 1.0f / s;
352 |     return a * inv;
353 | }
354 | inline __host__ __device__ float3 operator/(float s, float3 a)
355 | {
356 |     float inv = 1.0f / s;
357 |     return a * inv;
358 | }
359 | inline __host__ __device__ void operator/=(float3 &a, float s)
360 | {
361 |     float inv = 1.0f / s;
362 |     a *= inv;
363 | }
364 | 
365 | // lerp
366 | inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
367 | {
368 |     return a + t*(b-a);
369 | }
370 | 
371 | // clamp
372 | inline __device__ __host__ float3 clamp(float3 v, float a, float b)
373 | {
374 |     return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
375 | }
376 | 
377 | inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
378 | {
379 |     return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
380 | }
381 | 
382 | // dot product
383 | inline __host__ __device__ float dot(float3 a, float3 b)
384 | { 
385 |     return a.x * b.x + a.y * b.y + a.z * b.z;
386 | }
387 | 
388 | // cross product
389 | inline __host__ __device__ float3 cross(float3 a, float3 b)
390 | { 
391 |     return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); 
392 | }
393 | 
394 | // length
395 | inline __host__ __device__ float length(float3 v)
396 | {
397 |     return sqrtf(dot(v, v));
398 | }
399 | 
400 | // normalize
401 | inline __host__ __device__ float3 normalize(float3 v)
402 | {
403 |     float invLen = rsqrtf(dot(v, v));
404 |     return v * invLen;
405 | }
406 | 
407 | // floor
408 | inline __host__ __device__ float3 floor(const float3 v)
409 | {
410 |     return make_float3(floor(v.x), floor(v.y), floor(v.z));
411 | }
412 | 
413 | // reflect
414 | inline __host__ __device__ float3 reflect(float3 i, float3 n)
415 | {
416 | 	return i - 2.0f * n * dot(n,i);
417 | }
418 | 
419 | // absolute value
420 | inline __host__ __device__ float3 fabs(float3 v)
421 | {
422 | 	return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));
423 | }
424 | 
425 | // float4 functions
426 | ////////////////////////////////////////////////////////////////////////////////
427 | 
428 | // additional constructors
429 | inline __host__ __device__ float4 make_float4(float s)
430 | {
431 |     return make_float4(s, s, s, s);
432 | }
433 | inline __host__ __device__ float4 make_float4(float3 a)
434 | {
435 |     return make_float4(a.x, a.y, a.z, 0.0f);
436 | }
437 | inline __host__ __device__ float4 make_float4(float3 a, float w)
438 | {
439 |     return make_float4(a.x, a.y, a.z, w);
440 | }
441 | inline __host__ __device__ float4 make_float4(int4 a)
442 | {
443 |     return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
444 | }
445 | 
446 | // negate
447 | inline __host__ __device__ float4 operator-(float4 &a)
448 | {
449 |     return make_float4(-a.x, -a.y, -a.z, -a.w);
450 | }
451 | 
452 | // min
453 | static __inline__ __host__ __device__ float4 fminf(float4 a, float4 b)
454 | {
455 | 	return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
456 | }
457 | 
458 | // max
459 | static __inline__ __host__ __device__ float4 fmaxf(float4 a, float4 b)
460 | {
461 | 	return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
462 | }
463 | 
464 | // addition
465 | inline __host__ __device__ float4 operator+(float4 a, float4 b)
466 | {
467 |     return make_float4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
468 | }
469 | inline __host__ __device__ void operator+=(float4 &a, float4 b)
470 | {
471 |     a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
472 | }
473 | 
474 | // subtract
475 | inline __host__ __device__ float4 operator-(float4 a, float4 b)
476 | {
477 |     return make_float4(a.x - b.x, a.y - b.y, a.z - b.z,  a.w - b.w);
478 | }
479 | inline __host__ __device__ void operator-=(float4 &a, float4 b)
480 | {
481 |     a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
482 | }
483 | 
484 | // multiply
485 | inline __host__ __device__ float4 operator*(float4 a, float s)
486 | {
487 |     return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
488 | }
489 | inline __host__ __device__ float4 operator*(float s, float4 a)
490 | {
491 |     return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
492 | }
493 | inline __host__ __device__ void operator*=(float4 &a, float s)
494 | {
495 |     a.x *= s; a.y *= s; a.z *= s; a.w *= s;
496 | }
497 | 
498 | // divide
499 | inline __host__ __device__ float4 operator/(float4 a, float4 b)
500 | {
501 |     return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
502 | }
503 | inline __host__ __device__ float4 operator/(float4 a, float s)
504 | {
505 |     float inv = 1.0f / s;
506 |     return a * inv;
507 | }
508 | inline __host__ __device__ float4 operator/(float s, float4 a)
509 | {
510 |     float inv = 1.0f / s;
511 |     return a * inv;
512 | }
513 | inline __host__ __device__ void operator/=(float4 &a, float s)
514 | {
515 |     float inv = 1.0f / s;
516 |     a *= inv;
517 | }
518 | 
519 | // lerp
520 | inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
521 | {
522 |     return a + t*(b-a);
523 | }
524 | 
525 | // clamp
526 | inline __device__ __host__ float4 clamp(float4 v, float a, float b)
527 | {
528 |     return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
529 | }
530 | 
531 | inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
532 | {
533 |     return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
534 | }
535 | 
536 | // dot product
537 | inline __host__ __device__ float dot(float4 a, float4 b)
538 | { 
539 |     return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
540 | }
541 | 
542 | // length
543 | inline __host__ __device__ float length(float4 r)
544 | {
545 |     return sqrtf(dot(r, r));
546 | }
547 | 
548 | // normalize
549 | inline __host__ __device__ float4 normalize(float4 v)
550 | {
551 |     float invLen = rsqrtf(dot(v, v));
552 |     return v * invLen;
553 | }
554 | 
555 | // floor
556 | inline __host__ __device__ float4 floor(const float4 v)
557 | {
558 |     return make_float4(floor(v.x), floor(v.y), floor(v.z), floor(v.w));
559 | }
560 | 
561 | // absolute value
562 | inline __host__ __device__ float4 fabs(float4 v)
563 | {
564 | 	return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));
565 | }
566 | 
567 | // int3 functions
568 | ////////////////////////////////////////////////////////////////////////////////
569 | 
570 | // additional constructors
571 | inline __host__ __device__ int3 make_int3(int s)
572 | {
573 |     return make_int3(s, s, s);
574 | }
575 | inline __host__ __device__ int3 make_int3(float3 a)
576 | {
577 |     return make_int3(int(a.x), int(a.y), int(a.z));
578 | }
579 | 
580 | // negate
581 | inline __host__ __device__ int3 operator-(int3 &a)
582 | {
583 |     return make_int3(-a.x, -a.y, -a.z);
584 | }
585 | 
586 | // min
587 | inline __host__ __device__ int3 min(int3 a, int3 b)
588 | {
589 |     return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
590 | }
591 | 
592 | // max
593 | inline __host__ __device__ int3 max(int3 a, int3 b)
594 | {
595 |     return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
596 | }
597 | 
598 | // addition
599 | inline __host__ __device__ int3 operator+(int3 a, int3 b)
600 | {
601 |     return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
602 | }
603 | inline __host__ __device__ void operator+=(int3 &a, int3 b)
604 | {
605 |     a.x += b.x; a.y += b.y; a.z += b.z;
606 | }
607 | 
608 | // subtract
609 | inline __host__ __device__ int3 operator-(int3 a, int3 b)
610 | {
611 |     return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
612 | }
613 | 
614 | inline __host__ __device__ void operator-=(int3 &a, int3 b)
615 | {
616 |     a.x -= b.x; a.y -= b.y; a.z -= b.z;
617 | }
618 | 
619 | // multiply
620 | inline __host__ __device__ int3 operator*(int3 a, int3 b)
621 | {
622 |     return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
623 | }
624 | inline __host__ __device__ int3 operator*(int3 a, int s)
625 | {
626 |     return make_int3(a.x * s, a.y * s, a.z * s);
627 | }
628 | inline __host__ __device__ int3 operator*(int s, int3 a)
629 | {
630 |     return make_int3(a.x * s, a.y * s, a.z * s);
631 | }
632 | inline __host__ __device__ void operator*=(int3 &a, int s)
633 | {
634 |     a.x *= s; a.y *= s; a.z *= s;
635 | }
636 | 
637 | // divide
638 | inline __host__ __device__ int3 operator/(int3 a, int3 b)
639 | {
640 |     return make_int3(a.x / b.x, a.y / b.y, a.z / b.z);
641 | }
642 | inline __host__ __device__ int3 operator/(int3 a, int s)
643 | {
644 |     return make_int3(a.x / s, a.y / s, a.z / s);
645 | }
646 | inline __host__ __device__ int3 operator/(int s, int3 a)
647 | {
648 |     return make_int3(a.x / s, a.y / s, a.z / s);
649 | }
650 | inline __host__ __device__ void operator/=(int3 &a, int s)
651 | {
652 |     a.x /= s; a.y /= s; a.z /= s;
653 | }
654 | 
655 | // clamp
656 | inline __device__ __host__ int clamp(int f, int a, int b)
657 | {
658 |     return max(a, min(f, b));
659 | }
660 | 
661 | inline __device__ __host__ int3 clamp(int3 v, int a, int b)
662 | {
663 |     return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
664 | }
665 | 
666 | inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
667 | {
668 |     return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
669 | }
670 | 
671 | 
672 | // uint3 functions
673 | ////////////////////////////////////////////////////////////////////////////////
674 | 
675 | // additional constructors
676 | inline __host__ __device__ uint3 make_uint3(uint s)
677 | {
678 |     return make_uint3(s, s, s);
679 | }
680 | inline __host__ __device__ uint3 make_uint3(float3 a)
681 | {
682 |     return make_uint3(uint(a.x), uint(a.y), uint(a.z));
683 | }
684 | 
685 | // min
686 | inline __host__ __device__ uint3 min(uint3 a, uint3 b)
687 | {
688 |     return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
689 | }
690 | 
691 | // max
692 | inline __host__ __device__ uint3 max(uint3 a, uint3 b)
693 | {
694 |     return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
695 | }
696 | 
697 | // addition
698 | inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
699 | {
700 |     return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
701 | }
702 | inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
703 | {
704 |     a.x += b.x; a.y += b.y; a.z += b.z;
705 | }
706 | 
707 | // subtract
708 | inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
709 | {
710 |     return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
711 | }
712 | 
713 | inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
714 | {
715 |     a.x -= b.x; a.y -= b.y; a.z -= b.z;
716 | }
717 | 
718 | // multiply
719 | inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
720 | {
721 |     return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
722 | }
723 | inline __host__ __device__ uint3 operator*(uint3 a, uint s)
724 | {
725 |     return make_uint3(a.x * s, a.y * s, a.z * s);
726 | }
727 | inline __host__ __device__ uint3 operator*(uint s, uint3 a)
728 | {
729 |     return make_uint3(a.x * s, a.y * s, a.z * s);
730 | }
731 | inline __host__ __device__ void operator*=(uint3 &a, uint s)
732 | {
733 |     a.x *= s; a.y *= s; a.z *= s;
734 | }
735 | 
736 | // divide
737 | inline __host__ __device__ uint3 operator/(uint3 a, uint3 b)
738 | {
739 |     return make_uint3(a.x / b.x, a.y / b.y, a.z / b.z);
740 | }
741 | inline __host__ __device__ uint3 operator/(uint3 a, uint s)
742 | {
743 |     return make_uint3(a.x / s, a.y / s, a.z / s);
744 | }
745 | inline __host__ __device__ uint3 operator/(uint s, uint3 a)
746 | {
747 |     return make_uint3(a.x / s, a.y / s, a.z / s);
748 | }
749 | inline __host__ __device__ void operator/=(uint3 &a, uint s)
750 | {
751 |     a.x /= s; a.y /= s; a.z /= s;
752 | }
753 | 
754 | // clamp
755 | inline __device__ __host__ uint clamp(uint f, uint a, uint b)
756 | {
757 |     return max(a, min(f, b));
758 | }
759 | 
760 | inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
761 | {
762 |     return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
763 | }
764 | 
765 | inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
766 | {
767 |     return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
768 | }
769 | 
770 | #endif
771 | 


--------------------------------------------------------------------------------
/tutorial2_cuda_pathtracer.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | *  Basic CUDA based triangle mesh path tracer.
  3 | *  For background info, see http://raytracey.blogspot.co.nz/2015/12/gpu-path-tracing-tutorial-2-interactive.html
  4 | *  Based on CUDA ray tracing code from http://cg.alexandra.dk/?p=278
  5 | *  Copyright (C) 2015  Sam Lapere
  6 | *
  7 | *  This program is free software; you can redistribute it and/or modify
  8 | *  it under the terms of the GNU General Public License as published by
  9 | *  the Free Software Foundation; either version 2 of the License, or
 10 | *  (at your option) any later version.
 11 | *
 12 | *  This program is distributed in the hope that it will be useful,
 13 | *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 | *  GNU General Public License for more details.
 16 | */
 17 | 
 18 | #include <iostream>
 19 | #include <fstream>
 20 | #include <string>
 21 | #include <vector>
 22 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\cuda.h"
 23 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\math_functions.h"
 24 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\vector_types.h"
 25 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\vector_functions.h"
 26 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\device_launch_parameters.h"
 27 | #include "cutil_math.h"  // required for float3 vector math
 28 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\extras\CUPTI\include\GL\glew.h"
 29 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\extras\CUPTI\include\GL\glut.h"
 30 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\cuda_runtime.h"
 31 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\cuda_gl_interop.h"
 32 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\curand.h"
 33 | #include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\include\curand_kernel.h"
 34 | 
 35 | #define M_PI 3.14159265359f
 36 | #define width 1024	// screenwidth
 37 | #define height 576	// screenheight
 38 | #define samps  1	// samples per pixel per pass
 39 | 
 40 | int total_number_of_triangles = 0;
 41 | int frames = 0;
 42 | 
 43 | // scene bounding box
 44 | float3 scene_aabbox_min;
 45 | float3 scene_aabbox_max;
 46 | 
 47 | // the scene triangles are stored in a 1D CUDA texture of float4 for memory alignment
 48 | // store two edges instead of vertices
 49 | // each triangle is stored as three float4s: (float4 first_vertex, float4 edge1, float4 edge2)
 50 | texture<float4, 1, cudaReadModeElementType> triangle_texture;
 51 | 
 52 | // hardcoded camera position
 53 | __device__ float3 firstcamorig = { 50, 52, 295.6 };
 54 | 
 55 | // OpenGL vertex buffer object for real-time viewport
 56 | GLuint vbo;
 57 | void *d_vbo_buffer = NULL;
 58 | 
 59 | struct Ray {
 60 | 	float3 orig;	// ray origin
 61 | 	float3 dir;		// ray direction	
 62 | 	__device__ Ray(float3 o_, float3 d_) : orig(o_), dir(d_) {}
 63 | };
 64 | 
 65 | enum Refl_t { DIFF, SPEC, REFR };  // material types, used in radiance(), only DIFF used here
 66 | 
 67 | // SPHERES
 68 | 
 69 | struct Sphere {
 70 | 
 71 | 	float rad;				// radius 
 72 | 	float3 pos, emi, col;	// position, emission, color 
 73 | 	Refl_t refl;			// reflection type (DIFFuse, SPECular, REFRactive)
 74 | 
 75 | 	__device__ float intersect(const Ray &r) const { // returns distance, 0 if nohit 
 76 | 
 77 | 		// Ray/sphere intersection
 78 | 		// Quadratic formula required to solve ax^2 + bx + c = 0 
 79 | 		// Solution x = (-b +- sqrt(b*b - 4ac)) / 2a
 80 | 		// Solve t^2*d.d + 2*t*(o-p).d + (o-p).(o-p)-R^2 = 0 
 81 | 
 82 | 		float3 op = pos - r.orig;  // 
 83 | 		float t, epsilon = 0.01f;
 84 | 		float b = dot(op, r.dir);
 85 | 		float disc = b*b - dot(op, op) + rad*rad; // discriminant
 86 | 		if (disc<0) return 0; else disc = sqrtf(disc);
 87 | 		return (t = b - disc)>epsilon ? t : ((t = b + disc)>epsilon ? t : 0);
 88 | 	}
 89 | };
 90 | 
 91 | // TRIANGLES
 92 | 
 93 | // the classic ray triangle intersection: http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf
 94 | // for an explanation see http://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/moller-trumbore-ray-triangle-intersection
 95 | 
 96 | __device__ float RayTriangleIntersection(const Ray &r,
 97 | 	const float3 &v0,
 98 | 	const float3 &edge1,
 99 | 	const float3 &edge2)
100 | {
101 | 
102 | 	float3 tvec = r.orig - v0;
103 | 	float3 pvec = cross(r.dir, edge2);
104 | 	float  det = dot(edge1, pvec);
105 | 
106 | 	det = __fdividef(1.0f, det);  // CUDA intrinsic function 
107 | 
108 | 	float u = dot(tvec, pvec) * det;
109 | 
110 | 	if (u < 0.0f || u > 1.0f)
111 | 		return -1.0f;
112 | 
113 | 	float3 qvec = cross(tvec, edge1);
114 | 
115 | 	float v = dot(r.dir, qvec) * det;
116 | 
117 | 	if (v < 0.0f || (u + v) > 1.0f)
118 | 		return -1.0f;
119 | 
120 | 	return dot(edge2, qvec) * det;
121 | }
122 | 
123 | __device__ float3 getTriangleNormal(const int triangleIndex){
124 | 
125 | 	float4 edge1 = tex1Dfetch(triangle_texture, triangleIndex * 3 + 1);
126 | 	float4 edge2 = tex1Dfetch(triangle_texture, triangleIndex * 3 + 2);
127 | 
128 | 	// cross product of two triangle edges yields a vector orthogonal to triangle plane
129 | 	float3 trinormal = cross(make_float3(edge1.x, edge1.y, edge1.z), make_float3(edge2.x, edge2.y, edge2.z));
130 | 	trinormal = normalize(trinormal);
131 | 
132 | 	return trinormal;
133 | }
134 | 
135 | __device__ void intersectAllTriangles(const Ray& r, float& t_scene, int& triangle_id, const int number_of_triangles, int& geomtype){
136 | 
137 | 	for (int i = 0; i < number_of_triangles; i++)
138 | 	{
139 | 		// the triangles are packed into the 1D texture using three consecutive float4 structs for each triangle, 
140 | 		// first float4 contains the first vertex, second float4 contains the first precomputed edge, third float4 contains second precomputed edge like this: 
141 | 		// (float4(vertex.x,vertex.y,vertex.z, 0), float4 (egde1.x,egde1.y,egde1.z,0),float4 (egde2.x,egde2.y,egde2.z,0)) 
142 | 
143 | 		// i is triangle index, each triangle represented by 3 float4s in triangle_texture
144 | 		float4 v0 = tex1Dfetch(triangle_texture, i * 3);
145 | 		float4 edge1 = tex1Dfetch(triangle_texture, i * 3 + 1);
146 | 		float4 edge2 = tex1Dfetch(triangle_texture, i * 3 + 2);
147 | 
148 | 		// intersect ray with reconstructed triangle	
149 | 		float t = RayTriangleIntersection(r,
150 | 			make_float3(v0.x, v0.y, v0.z),
151 | 			make_float3(edge1.x, edge1.y, edge1.z),
152 | 			make_float3(edge2.x, edge2.y, edge2.z));
153 | 
154 | 		// keep track of closest distance and closest triangle
155 | 		// if ray/tri intersection finds an intersection point that is closer than closest intersection found so far
156 | 		if (t < t_scene && t > 0.001)
157 | 		{
158 | 			t_scene = t;
159 | 			triangle_id = i;
160 | 			geomtype = 3;
161 | 		}
162 | 	}
163 | }
164 | 
165 | 
166 | // AXIS ALIGNED BOXES
167 | 
168 | // helper functions
169 | inline __device__ float3 minf3(float3 a, float3 b){ return make_float3(a.x < b.x ? a.x : b.x, a.y < b.y ? a.y : b.y, a.z < b.z ? a.z : b.z); }
170 | inline __device__ float3 maxf3(float3 a, float3 b){ return make_float3(a.x > b.x ? a.x : b.x, a.y > b.y ? a.y : b.y, a.z > b.z ? a.z : b.z); }
171 | inline __device__ float minf1(float a, float b){ return a < b ? a : b; }
172 | inline __device__ float maxf1(float a, float b){ return a > b ? a : b; }
173 | 
174 | struct Box {
175 | 
176 | 	float3 min; // minimum bounds
177 | 	float3 max; // maximum bounds
178 | 	float3 emi; // emission
179 | 	float3 col; // colour
180 | 	Refl_t refl; // material type
181 | 
182 | 	// ray/box intersection
183 | 	// for theoretical background of the algorithm see 
184 | 	// http://www.scratchapixel.com/lessons/3d-basic-rendering/minimal-ray-tracer-rendering-simple-shapes/ray-box-intersection
185 | 	// optimised code from http://www.gamedev.net/topic/495636-raybox-collision-intersection-point/
186 | 	__device__ float intersect(const Ray &r) const {
187 | 
188 | 		float epsilon = 0.001f; // required to prevent self intersection
189 | 
190 | 		float3 tmin = (min - r.orig) / r.dir;
191 | 		float3 tmax = (max - r.orig) / r.dir;
192 | 
193 | 		float3 real_min = minf3(tmin, tmax);
194 | 		float3 real_max = maxf3(tmin, tmax);
195 | 
196 | 		float minmax = minf1(minf1(real_max.x, real_max.y), real_max.z);
197 | 		float maxmin = maxf1(maxf1(real_min.x, real_min.y), real_min.z);
198 | 
199 | 		if (minmax >= maxmin) { return maxmin > epsilon ? maxmin : 0; }
200 | 		else return 0;
201 | 	}
202 | 
203 | 	// calculate normal for point on axis aligned box
204 | 	__device__ float3 Box::normalAt(float3 &point) {
205 | 
206 | 		float3 normal = make_float3(0.f, 0.f, 0.f);
207 | 		float min_distance = 1e8;
208 | 		float distance;
209 | 		float epsilon = 0.001f;
210 | 
211 | 		if (fabs(min.x - point.x) < epsilon) normal = make_float3(-1, 0, 0);
212 | 		else if (fabs(max.x - point.x) < epsilon) normal = make_float3(1, 0, 0);
213 | 		else if (fabs(min.y - point.y) < epsilon) normal = make_float3(0, -1, 0);
214 | 		else if (fabs(max.y - point.y) < epsilon) normal = make_float3(0, 1, 0);
215 | 		else if (fabs(min.z - point.z) < epsilon) normal = make_float3(0, 0, -1);
216 | 		else normal = make_float3(0, 0, 1);
217 | 
218 | 		return normal;
219 | 	}
220 | };
221 | 
222 | // scene: 9 spheres forming a Cornell box
223 | // small enough to fit in constant GPU memory
224 | __constant__ Sphere spheres[] = {
225 | 	// FORMAT: { float radius, float3 position, float3 emission, float3 colour, Refl_t material }
226 | 	// cornell box
227 | 	//{ 1e5f, { 1e5f + 1.0f, 40.8f, 81.6f }, { 0.0f, 0.0f, 0.0f }, { 0.75f, 0.25f, 0.25f }, DIFF }, //Left 1e5f
228 | 	//{ 1e5f, { -1e5f + 99.0f, 40.8f, 81.6f }, { 0.0f, 0.0f, 0.0f }, { .25f, .25f, .75f }, DIFF }, //Right 
229 | 	//{ 1e5f, { 50.0f, 40.8f, 1e5f }, { 0.0f, 0.0f, 0.0f }, { .75f, .75f, .75f }, DIFF }, //Back 
230 | 	//{ 1e5f, { 50.0f, 40.8f, -1e5f + 600.0f }, { 0.0f, 0.0f, 0.0f }, { 0.00f, 0.00f, 0.00f }, DIFF }, //Front 
231 | 	//{ 1e5f, { 50.0f, -1e5f, 81.6f }, { 0.0f, 0.0f, 0.0f }, { .75f, .75f, .75f }, DIFF }, //Bottom 
232 | 	//{ 1e5f, { 50.0f, -1e5f + 81.6f, 81.6f }, { 0.0f, 0.0f, 0.0f }, { .75f, .75f, .75f }, DIFF }, //Top 
233 | 	//{ 16.5f, { 27.0f, 16.5f, 47.0f }, { 0.0f, 0.0f, 0.0f }, { 0.99f, 0.99f, 0.99f }, SPEC }, // small sphere 1
234 | 	//{ 16.5f, { 73.0f, 16.5f, 78.0f }, { 0.0f, 0.f, .0f }, { 0.09f, 0.49f, 0.3f }, REFR }, // small sphere 2
235 | 	//{ 600.0f, { 50.0f, 681.6f - .5f, 81.6f }, { 3.0f, 2.5f, 2.0f }, { 0.0f, 0.0f, 0.0f }, DIFF }  // Light 12, 10 ,8
236 | 
237 | 	//outdoor scene: radius, position, emission, color, material
238 | 
239 | 	//{ 1600, { 3000.0f, 10, 6000 }, { 37, 34, 30 }, { 0.f, 0.f, 0.f }, DIFF },  // 37, 34, 30 // sun
240 | 	//{ 1560, { 3500.0f, 0, 7000 }, { 50, 25, 2.5 }, { 0.f, 0.f, 0.f }, DIFF },  //  150, 75, 7.5 // sun 2
241 | 	{ 10000, { 50.0f, 40.8f, -1060 }, { 0.0003, 0.01, 0.15 }, { 0.175f, 0.175f, 0.25f }, DIFF }, // sky
242 | 	{ 100000, { 50.0f, -100000, 0 }, { 0.0, 0.0, 0 }, { 0.8f, 0.2f, 0.f }, DIFF }, // ground
243 | 	{ 110000, { 50.0f, -110048.5, 0 }, { 3.6, 2.0, 0.2 }, { 0.f, 0.f, 0.f }, DIFF },  // horizon brightener
244 | 	{ 4e4, { 50.0f, -4e4 - 30, -3000 }, { 0, 0, 0 }, { 0.2f, 0.2f, 0.2f }, DIFF }, // mountains
245 | 	{ 82.5, { 30.0f, 180.5, 42 }, { 16, 12, 6 }, { .6f, .6f, 0.6f }, DIFF },  // small sphere 1
246 | 	{ 12, { 115.0f, 10, 105 }, { 0.0, 0.0, 0.0 }, { 0.9f, 0.9f, 0.9f }, REFR },  // small sphere 2
247 | 	{ 22, { 65.0f, 22, 24 }, { 0, 0, 0 }, { 0.9f, 0.9f, 0.9f }, SPEC }, // small sphere 3
248 | };
249 | 
250 | __constant__ Box boxes[] = {
251 | 	// FORMAT: { float3 minbounds,    float3 maxbounds,         float3 emission,    float3 colour,       Refl_t }
252 | 	{ { 5.0f, 0.0f, 70.0f }, { 45.0f, 11.0f, 115.0f }, { .0f, .0f, 0.0f }, { 0.5f, 0.5f, 0.5f }, DIFF },
253 | 	{ { 85.0f, 0.0f, 95.0f }, { 95.0f, 20.0f, 105.0f }, { .0f, .0f, 0.0f }, { 0.5f, 0.5f, 0.5f }, DIFF },
254 | 	{ { 75.0f, 20.0f, 85.0f }, { 105.0f, 22.0f, 115.0f }, { .0f, .0f, 0.0f }, { 0.5f, 0.5f, 0.5f }, DIFF },
255 | };
256 | 
257 | 
258 | __device__ inline bool intersect_scene(const Ray &r, float &t, int &sphere_id, int &box_id, int& triangle_id, const int number_of_triangles, int &geomtype, const float3& bbmin, const float3& bbmax){
259 | 
260 | 	float tmin = 1e20;
261 | 	float tmax = -1e20;
262 | 	float d = 1e21;
263 | 	float k = 1e21;
264 | 	float q = 1e21;
265 | 	float inf = t = 1e20;
266 | 
267 | 	// SPHERES
268 | 	// intersect all spheres in the scene
269 | 	float numspheres = sizeof(spheres) / sizeof(Sphere);
270 | 	for (int i = int(numspheres); i--;)  // for all spheres in scene
271 | 		// keep track of distance from origin to closest intersection point
272 | 		if ((d = spheres[i].intersect(r)) && d < t){ t = d; sphere_id = i; geomtype = 1; }
273 | 
274 | 	// BOXES
275 | 	// intersect all boxes in the scene
276 | 	float numboxes = sizeof(boxes) / sizeof(Box);
277 | 	for (int i = int(numboxes); i--;) // for all boxes in scene
278 | 		if ((k = boxes[i].intersect(r)) && k < t){ t = k; box_id = i; geomtype = 2; }
279 | 
280 | 	// TRIANGLES
281 | 	Box scene_bbox; // bounding box around triangle meshes
282 | 	scene_bbox.min = bbmin;
283 | 	scene_bbox.max = bbmax;
284 | 
285 | 	// if ray hits bounding box of triangle meshes, intersect ray with all triangles
286 | 	if (scene_bbox.intersect(r)){
287 | 		intersectAllTriangles(r, t, triangle_id, number_of_triangles, geomtype);
288 | 	}
289 | 
290 | 	// t is distance to closest intersection of ray with all primitives in the scene (spheres, boxes and triangles)
291 | 	return t<inf;
292 | }
293 | 
294 | 
295 | // hash function to calculate new seed for each frame
296 | // see http://www.reedbeta.com/blog/2013/01/12/quick-and-easy-gpu-random-numbers-in-d3d11/
297 | uint WangHash(uint a) {
298 | 	a = (a ^ 61) ^ (a >> 16);
299 | 	a = a + (a << 3);
300 | 	a = a ^ (a >> 4);
301 | 	a = a * 0x27d4eb2d;
302 | 	a = a ^ (a >> 15);
303 | 	return a;
304 | }
305 | 
306 | // radiance function
307 | // compute path bounces in scene and accumulate returned color from each path sgment
308 | __device__ float3 radiance(Ray &r, curandState *randstate, const int totaltris, const float3& scene_aabb_min, const float3& scene_aabb_max){ // returns ray color
309 | 
310 | 	// colour mask
311 | 	float3 mask = make_float3(1.0f, 1.0f, 1.0f);
312 | 	// accumulated colour
313 | 	float3 accucolor = make_float3(0.0f, 0.0f, 0.0f);
314 | 
315 | 	for (int bounces = 0; bounces < 5; bounces++){  // iteration up to 4 bounces (instead of recursion in CPU code)
316 | 
317 | 		// reset scene intersection function parameters
318 | 		float t = 100000; // distance to intersection 
319 | 		int sphere_id = -1;
320 | 		int box_id = -1;   // index of intersected sphere 
321 | 		int triangle_id = -1;
322 | 		int geomtype = -1;
323 | 		float3 f;  // primitive colour
324 | 		float3 emit; // primitive emission colour
325 | 		float3 x; // intersection point
326 | 		float3 n; // normal
327 | 		float3 nl; // oriented normal
328 | 		float3 d; // ray direction of next path segment
329 | 		Refl_t refltype;
330 | 
331 | 		// intersect ray with scene
332 | 		// intersect_scene keeps track of closest intersected primitive and distance to closest intersection point
333 | 		if (!intersect_scene(r, t, sphere_id, box_id, triangle_id, totaltris, geomtype, scene_aabb_min, scene_aabb_max))
334 | 			return make_float3(0.0f, 0.0f, 0.0f); // if miss, return black
335 | 
336 | 		// else: we've got a hit with a scene primitive
337 | 		// determine geometry type of primitive: sphere/box/triangle
338 | 
339 | 		// if sphere:
340 | 		if (geomtype == 1){
341 | 			Sphere &sphere = spheres[sphere_id]; // hit object with closest intersection
342 | 			x = r.orig + r.dir*t;  // intersection point on object
343 | 			n = normalize(x - sphere.pos);		// normal
344 | 			nl = dot(n, r.dir) < 0 ? n : n * -1; // correctly oriented normal
345 | 			f = sphere.col;   // object colour
346 | 			refltype = sphere.refl;
347 | 			emit = sphere.emi;  // object emission
348 | 			accucolor += (mask * emit);
349 | 		}
350 | 
351 | 		// if box:
352 | 		if (geomtype == 2){
353 | 			Box &box = boxes[box_id];
354 | 			x = r.orig + r.dir*t;  // intersection point on object
355 | 			n = normalize(box.normalAt(x)); // normal
356 | 			nl = dot(n, r.dir) < 0 ? n : n * -1;  // correctly oriented normal
357 | 			f = box.col;  // box colour
358 | 			refltype = box.refl;
359 | 			emit = box.emi; // box emission
360 | 			accucolor += (mask * emit);
361 | 		}
362 | 
363 | 		// if triangle:
364 | 		if (geomtype == 3){
365 | 			int tri_index = triangle_id;
366 | 			x = r.orig + r.dir*t;  // intersection point
367 | 			n = normalize(getTriangleNormal(tri_index));  // normal 
368 | 			nl = dot(n, r.dir) < 0 ? n : n * -1;  // correctly oriented normal
369 | 
370 | 			// colour, refltype and emit value are hardcoded and apply to all triangles
371 | 			// no per triangle material support yet
372 | 			f = make_float3(0.9f, 0.4f, 0.1f);  // triangle colour
373 | 			refltype = REFR;
374 | 			emit = make_float3(0.0f, 0.0f, 0.0f);
375 | 			accucolor += (mask * emit);
376 | 		}
377 | 
378 | 		// SHADING: diffuse, specular or refractive
379 | 
380 | 		// ideal diffuse reflection (see "Realistic Ray Tracing", P. Shirley)
381 | 		if (refltype == DIFF){
382 | 
383 | 			// create 2 random numbers
384 | 			float r1 = 2 * M_PI * curand_uniform(randstate);
385 | 			float r2 = curand_uniform(randstate);
386 | 			float r2s = sqrtf(r2);
387 | 
388 | 			// compute orthonormal coordinate frame uvw with hitpoint as origin 
389 | 			float3 w = nl;
390 | 			float3 u = normalize(cross((fabs(w.x) > .1 ? make_float3(0, 1, 0) : make_float3(1, 0, 0)), w));
391 | 			float3 v = cross(w, u);
392 | 
393 | 			// compute cosine weighted random ray direction on hemisphere 
394 | 			d = normalize(u*cos(r1)*r2s + v*sin(r1)*r2s + w*sqrtf(1 - r2));
395 | 
396 | 			// offset origin next path segment to prevent self intersection
397 | 			x += nl * 0.03;
398 | 
399 | 			// multiply mask with colour of object
400 | 			mask *= f;
401 | 		}
402 | 
403 | 		// ideal specular reflection (mirror) 
404 | 		if (refltype == SPEC){
405 | 
406 | 			// compute relfected ray direction according to Snell's law
407 | 			d = r.dir - 2.0f * n * dot(n, r.dir);
408 | 
409 | 			// offset origin next path segment to prevent self intersection
410 | 			x += nl * 0.01f;
411 | 
412 | 			// multiply mask with colour of object
413 | 			mask *= f;
414 | 		}
415 | 
416 | 		// ideal refraction (based on smallpt code by Kevin Beason)
417 | 		if (refltype == REFR){
418 | 
419 | 			bool into = dot(n, nl) > 0; // is ray entering or leaving refractive material?
420 | 			float nc = 1.0f;  // Index of Refraction air
421 | 			float nt = 1.5f;  // Index of Refraction glass/water
422 | 			float nnt = into ? nc / nt : nt / nc;  // IOR ratio of refractive materials
423 | 			float ddn = dot(r.dir, nl);
424 | 			float cos2t = 1.0f - nnt*nnt * (1.f - ddn*ddn);
425 | 
426 | 			if (cos2t < 0.0f) // total internal reflection 
427 | 			{
428 | 				d = reflect(r.dir, n); //d = r.dir - 2.0f * n * dot(n, r.dir);
429 | 				x += nl * 0.01f;
430 | 			}
431 | 			else // cos2t > 0
432 | 			{
433 | 				// compute direction of transmission ray
434 | 				float3 tdir = normalize(r.dir * nnt - n * ((into ? 1 : -1) * (ddn*nnt + sqrtf(cos2t))));
435 | 
436 | 				float R0 = (nt - nc)*(nt - nc) / (nt + nc)*(nt + nc);
437 | 				float c = 1.f - (into ? -ddn : dot(tdir, n));
438 | 				float Re = R0 + (1.f - R0) * c * c * c * c * c;
439 | 				float Tr = 1 - Re; // Transmission
440 | 				float P = .25f + .5f * Re;
441 | 				float RP = Re / P;
442 | 				float TP = Tr / (1.f - P);
443 | 
444 | 				// randomly choose reflection or transmission ray
445 | 				if (curand_uniform(randstate) < 0.25) // reflection ray
446 | 				{
447 | 					mask *= RP;
448 | 					d = reflect(r.dir, n);
449 | 					x += nl * 0.02f;
450 | 				}
451 | 				else // transmission ray
452 | 				{
453 | 					mask *= TP;
454 | 					d = tdir; //r = Ray(x, tdir); 
455 | 					x += nl * 0.0005f; // epsilon must be small to avoid artefacts
456 | 				}
457 | 			}
458 | 		}
459 | 
460 | 		// set up origin and direction of next path segment
461 | 		r.orig = x;
462 | 		r.dir = d;
463 | 	}
464 | 
465 | 	// add radiance up to a certain ray depth
466 | 	// return accumulated ray colour after all bounces are computed
467 | 	return accucolor;
468 | }
469 | 
470 | // required to convert colour to a format that OpenGL can display  
471 | union Colour  // 4 bytes = 4 chars = 1 float
472 | {
473 | 	float c;
474 | 	uchar4 components;
475 | };
476 | 
477 | __global__ void render_kernel(float3 *output, float3* accumbuffer, const int numtriangles, int framenumber, uint hashedframenumber, float3 scene_bbmin, float3 scene_bbmax){   // float3 *gputexdata1, int *texoffsets
478 | 
479 | 	// assign a CUDA thread to every pixel by using the threadIndex
480 | 	unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
481 | 	unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
482 | 
483 | 	// global threadId, see richiesams blogspot
484 | 	int threadId = (blockIdx.x + blockIdx.y * gridDim.x) * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
485 | 
486 | 	// create random number generator, see RichieSams blogspot
487 | 	curandState randState; // state of the random number generator, to prevent repetition
488 | 	curand_init(hashedframenumber + threadId, 0, 0, &randState);
489 | 
490 | 	Ray cam(firstcamorig, normalize(make_float3(0, -0.042612, -1)));
491 | 	float3 cx = make_float3(width * .5135 / height, 0.0f, 0.0f);  // ray direction offset along X-axis 
492 | 	float3 cy = normalize(cross(cx, cam.dir)) * .5135; // ray dir offset along Y-axis, .5135 is FOV angle
493 | 	float3 pixelcol; // final pixel color       
494 | 
495 | 	int i = (height - y - 1)*width + x; // pixel index
496 | 
497 | 	pixelcol = make_float3(0.0f, 0.0f, 0.0f); // reset to zero for every pixel	
498 | 
499 | 	for (int s = 0; s < samps; s++){
500 | 
501 | 		// compute primary ray direction
502 | 		float3 d = cx*((.25 + x) / width - .5) + cy*((.25 + y) / height - .5) + cam.dir;
503 | 		// normalize primary ray direction
504 | 		d = normalize(d);
505 | 		// add accumulated colour from path bounces
506 | 		pixelcol += radiance(Ray(cam.orig + d * 40, d), &randState, numtriangles, scene_bbmin, scene_bbmax)*(1. / samps);
507 | 	}       // Camera rays are pushed ^^^^^ forward to start in interior 
508 | 
509 | 	// add pixel colour to accumulation buffer (accumulates all samples) 
510 | 	accumbuffer[i] += pixelcol;
511 | 	// averaged colour: divide colour by the number of calculated frames so far
512 | 	float3 tempcol = accumbuffer[i] / framenumber;
513 | 
514 | 	Colour fcolour;
515 | 	float3 colour = make_float3(clamp(tempcol.x, 0.0f, 1.0f), clamp(tempcol.y, 0.0f, 1.0f), clamp(tempcol.z, 0.0f, 1.0f));
516 | 	// convert from 96-bit to 24-bit colour + perform gamma correction
517 | 	fcolour.components = make_uchar4((unsigned char)(powf(colour.x, 1 / 2.2f) * 255), (unsigned char)(powf(colour.y, 1 / 2.2f) * 255), (unsigned char)(powf(colour.z, 1 / 2.2f) * 255), 1);
518 | 	// store pixel coordinates and pixelcolour in OpenGL readable outputbuffer
519 | 	output[i] = make_float3(x, y, fcolour.c);
520 | }
521 | 
522 | void Timer(int obsolete) {
523 | 
524 | 	glutPostRedisplay();
525 | 	glutTimerFunc(30, Timer, 0);
526 | }
527 | 
528 | __device__ float timer = 0.0f;
529 | 
530 | inline float clamp(float x){ return x<0 ? 0 : x>1 ? 1 : x; }
531 | 
532 | //inline int toInt(float x){ return int(pow(clamp(x), 1 / 2.2) * 255 + .5); }  // RGB float in range [0,1] to int in range [0, 255]
533 | 
534 | // buffer for accumulating samples over several frames
535 | float3* accumulatebuffer;
536 | // output buffer
537 | float3 *dptr;
538 | 
539 | void disp(void)
540 | {
541 | 	frames++;
542 | 	cudaThreadSynchronize();
543 | 
544 | 	// map vertex buffer object for acces by CUDA 
545 | 	cudaGLMapBufferObject((void**)&dptr, vbo);
546 | 
547 | 	//clear all pixels:
548 | 	glClear(GL_COLOR_BUFFER_BIT);
549 | 
550 | 	// RAY TRACING:
551 | 	// dim3 grid(WINDOW / block.x, WINDOW / block.y, 1);
552 | 	// dim3 CUDA specific syntax, block and grid are required to schedule CUDA threads over streaming multiprocessors
553 | 	dim3 block(16, 16, 1);
554 | 	dim3 grid(width / block.x, height / block.y, 1);
555 | 
556 | 	// launch CUDA path tracing kernel, pass in a hashed seed based on number of frames
557 | 	render_kernel << < grid, block >> >(dptr, accumulatebuffer, total_number_of_triangles, frames, WangHash(frames), scene_aabbox_max, scene_aabbox_min);  // launches CUDA render kernel from the host
558 | 
559 | 	cudaThreadSynchronize();
560 | 
561 | 	// unmap buffer
562 | 	cudaGLUnmapBufferObject(vbo);
563 | 	//glFlush();
564 | 	glBindBuffer(GL_ARRAY_BUFFER, vbo);
565 | 	glVertexPointer(2, GL_FLOAT, 12, 0);
566 | 	glColorPointer(4, GL_UNSIGNED_BYTE, 12, (GLvoid*)8);
567 | 
568 | 	glEnableClientState(GL_VERTEX_ARRAY);
569 | 	glEnableClientState(GL_COLOR_ARRAY);
570 | 	glDrawArrays(GL_POINTS, 0, width * height);
571 | 	glDisableClientState(GL_VERTEX_ARRAY);
572 | 
573 | 	glutSwapBuffers();
574 | 	//glutPostRedisplay();
575 | }
576 | 
577 | // load triangle data in a CUDA texture
578 | extern "C"
579 | {
580 | 	void bindTriangles(float *dev_triangle_p, unsigned int number_of_triangles)
581 | 	{
582 | 		triangle_texture.normalized = false;                      // access with normalized texture coordinates
583 | 		triangle_texture.filterMode = cudaFilterModePoint;        // Point mode, so no 
584 | 		triangle_texture.addressMode[0] = cudaAddressModeWrap;    // wrap texture coordinates
585 | 
586 | 		size_t size = sizeof(float4)*number_of_triangles * 3;
587 | 		cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float4>();
588 | 		cudaBindTexture(0, triangle_texture, dev_triangle_p, channelDesc, size);
589 | 	}
590 | }
591 | 
592 | // helpers to load triangle data
593 | struct TriangleFace
594 | {
595 | 	int v[3]; // vertex indices
596 | };
597 | 
598 | struct TriangleMesh
599 | {
600 | 	std::vector<float3> verts;
601 | 	std::vector<TriangleFace> faces;
602 | 	float3 bounding_box[2];
603 | };
604 | 
605 | TriangleMesh mesh1;
606 | TriangleMesh mesh2;
607 | 
608 | float *dev_triangle_p; // the cuda device pointer that points to the uploaded triangles
609 | 
610 | void loadObj(const std::string filename, TriangleMesh &mesh); // forward declaration
611 | 
612 | // 1. load triangle mesh data from obj files
613 | // 2. copy data to CPU memory (into vector<float4> triangles)
614 | // 3. copy to CUDA global memory (allocated with dev_triangle_p pointer)
615 | // 4. copy to CUDA texture memory with bindtriangles()
616 | void initCUDAmemoryTriMesh()
617 | {
618 | 	loadObj("data/bunny.obj", mesh1);
619 | 	loadObj("data/bunny.obj", mesh2);
620 | 
621 | 	// scalefactor and offset to position/scale triangle meshes
622 | 	float scalefactor1 = 200;
623 | 	float scalefactor2 = 300;  // 300
624 | 	float3 offset1 = make_float3(90, 22, 100);// (30, -2, 80);
625 | 	float3 offset2 = make_float3(30, -2, 80);
626 | 
627 | 	std::vector<float4> triangles;
628 | 
629 | 	for (unsigned int i = 0; i < mesh1.faces.size(); i++)
630 | 	{
631 | 		// make a local copy of the triangle vertices
632 | 		float3 v0 = mesh1.verts[mesh1.faces[i].v[0] - 1];
633 | 		float3 v1 = mesh1.verts[mesh1.faces[i].v[1] - 1];
634 | 		float3 v2 = mesh1.verts[mesh1.faces[i].v[2] - 1];
635 | 
636 | 		// scale
637 | 		v0 *= scalefactor1;
638 | 		v1 *= scalefactor1;
639 | 		v2 *= scalefactor1;
640 | 
641 | 		// translate
642 | 		v0 += offset1;
643 | 		v1 += offset1;
644 | 		v2 += offset1;
645 | 
646 | 		// store triangle data as float4
647 | 		// store two edges per triangle instead of vertices, to save some calculations in the
648 | 		// ray triangle intersection test
649 | 		triangles.push_back(make_float4(v0.x, v0.y, v0.z, 0));
650 | 		triangles.push_back(make_float4(v1.x - v0.x, v1.y - v0.y, v1.z - v0.z, 0));
651 | 		triangles.push_back(make_float4(v2.x - v0.x, v2.y - v0.y, v2.z - v0.z, 0));
652 | 	}
653 | 
654 | 	// compute bounding box of this mesh
655 | 	mesh1.bounding_box[0] *= scalefactor1; mesh1.bounding_box[0] += offset1;
656 | 	mesh1.bounding_box[1] *= scalefactor1; mesh1.bounding_box[1] += offset1;
657 | 
658 | 	for (unsigned int i = 0; i < mesh2.faces.size(); i++)
659 | 	{
660 | 		float3 v0 = mesh2.verts[mesh2.faces[i].v[0] - 1];
661 | 		float3 v1 = mesh2.verts[mesh2.faces[i].v[1] - 1];
662 | 		float3 v2 = mesh2.verts[mesh2.faces[i].v[2] - 1];
663 | 
664 | 		v0 *= scalefactor2;
665 | 		v1 *= scalefactor2;
666 | 		v2 *= scalefactor2;
667 | 
668 | 		v0 += offset2;
669 | 		v1 += offset2;
670 | 		v2 += offset2;
671 | 
672 | 		triangles.push_back(make_float4(v0.x, v0.y, v0.z, 0));
673 | 		triangles.push_back(make_float4(v1.x - v0.x, v1.y - v0.y, v1.z - v0.z, 1));
674 | 		triangles.push_back(make_float4(v2.x - v0.x, v2.y - v0.y, v2.z - v0.z, 0));
675 | 	}
676 | 
677 | 	mesh2.bounding_box[0] *= scalefactor2; mesh2.bounding_box[0] += offset2;
678 | 	mesh2.bounding_box[1] *= scalefactor2; mesh2.bounding_box[1] += offset2;
679 | 
680 | 	std::cout << "total number of triangles check:" << mesh1.faces.size() + mesh2.faces.size() << " == " << triangles.size() / 3 << std::endl;
681 | 
682 | 	// calculate total number of triangles in the scene
683 | 	size_t triangle_size = triangles.size() * sizeof(float4);
684 | 	int total_num_triangles = triangles.size() / 3;
685 | 	total_number_of_triangles = total_num_triangles;
686 | 
687 | 	if (triangle_size > 0)
688 | 	{
689 | 		// allocate memory for the triangle meshes on the GPU
690 | 		cudaMalloc((void **)&dev_triangle_p, triangle_size);
691 | 
692 | 		// copy triangle data to GPU
693 | 		cudaMemcpy(dev_triangle_p, &triangles[0], triangle_size, cudaMemcpyHostToDevice);
694 | 
695 | 		// load triangle data into a CUDA texture
696 | 		bindTriangles(dev_triangle_p, total_num_triangles);
697 | 	}
698 | 
699 | 	// compute scene bounding box by merging bounding boxes of individual meshes 
700 | 	scene_aabbox_min = mesh2.bounding_box[0];
701 | 	scene_aabbox_max = mesh2.bounding_box[1];
702 | 	scene_aabbox_min = fminf(scene_aabbox_min, mesh1.bounding_box[0]);
703 | 	scene_aabbox_max = fmaxf(scene_aabbox_max, mesh1.bounding_box[1]);
704 | 
705 | }
706 | 
707 | // read triangle data from obj file
708 | void loadObj(const std::string filename, TriangleMesh &mesh)
709 | {
710 | 	std::ifstream in(filename.c_str());
711 | 
712 | 	if (!in.good())
713 | 	{
714 | 		std::cout << "ERROR: loading obj:(" << filename << ") file not found or not good" << "\n";
715 | 		system("PAUSE");
716 | 		exit(0);
717 | 	}
718 | 
719 | 	char buffer[256], str[255];
720 | 	float f1, f2, f3;
721 | 
722 | 	while (!in.getline(buffer, 255).eof())
723 | 	{
724 | 		buffer[255] = '\0';
725 | 		sscanf_s(buffer, "%s", str, 255);
726 | 
727 | 		// reading a vertex
728 | 		if (buffer[0] == 'v' && (buffer[1] == ' ' || buffer[1] == 32)){
729 | 			if (sscanf(buffer, "v %f %f %f", &f1, &f2, &f3) == 3){
730 | 				mesh.verts.push_back(make_float3(f1, f2, f3));
731 | 			}
732 | 			else{
733 | 				std::cout << "ERROR: vertex not in wanted format in OBJLoader" << "\n";
734 | 				exit(-1);
735 | 			}
736 | 		}
737 | 
738 | 		// reading faceMtls 
739 | 		else if (buffer[0] == 'f' && (buffer[1] == ' ' || buffer[1] == 32))
740 | 		{
741 | 			TriangleFace f;
742 | 			int nt = sscanf(buffer, "f %d %d %d", &f.v[0], &f.v[1], &f.v[2]);
743 | 			if (nt != 3){
744 | 				std::cout << "ERROR: I don't know the format of that FaceMtl" << "\n";
745 | 				exit(-1);
746 | 			}
747 | 
748 | 			mesh.faces.push_back(f);
749 | 		}
750 | 	}
751 | 
752 | 	// calculate the bounding box of the mesh
753 | 	mesh.bounding_box[0] = make_float3(1000000, 1000000, 1000000);
754 | 	mesh.bounding_box[1] = make_float3(-1000000, -1000000, -1000000);
755 | 	for (unsigned int i = 0; i < mesh.verts.size(); i++)
756 | 	{
757 | 		//update min and max value
758 | 		mesh.bounding_box[0] = fminf(mesh.verts[i], mesh.bounding_box[0]);
759 | 		mesh.bounding_box[1] = fmaxf(mesh.verts[i], mesh.bounding_box[1]);
760 | 	}
761 | 
762 | 	std::cout << "obj file loaded: number of faces:" << mesh.faces.size() << " number of vertices:" << mesh.verts.size() << std::endl;
763 | 	std::cout << "obj bounding box: min:(" << mesh.bounding_box[0].x << "," << mesh.bounding_box[0].y << "," << mesh.bounding_box[0].z << ") max:"
764 | 		<< mesh.bounding_box[1].x << "," << mesh.bounding_box[1].y << "," << mesh.bounding_box[1].z << ")" << std::endl;
765 | }
766 | 
767 | void createVBO(GLuint* vbo)
768 | {
769 | 	//create vertex buffer object
770 | 	glGenBuffers(1, vbo);
771 | 	glBindBuffer(GL_ARRAY_BUFFER, *vbo);
772 | 	
773 | 	//initialize VBO
774 | 	unsigned int size = width * height * sizeof(float3);  // 3 floats
775 | 	glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
776 | 	glBindBuffer(GL_ARRAY_BUFFER, 0);
777 | 	
778 | 	//register VBO with CUDA
779 | 	cudaGLRegisterBufferObject(*vbo);
780 | }
781 | 
782 | int main(int argc, char** argv){
783 | 
784 | 	// allocate memmory for the accumulation buffer on the GPU
785 | 	cudaMalloc(&accumulatebuffer, width * height * sizeof(float3));
786 | 	// load triangle meshes in CUDA memory
787 | 	initCUDAmemoryTriMesh();
788 | 	// init glut for OpenGL viewport
789 | 	glutInit(&argc, argv);
790 | 	// specify the display mode to be RGB and single buffering
791 | 	glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB);
792 | 	// specify the initial window position
793 | 	glutInitWindowPosition(100, 100);
794 | 	// specify the initial window size
795 | 	glutInitWindowSize(width, height);
796 | 	// create the window and set title
797 | 	glutCreateWindow("Basic triangle mesh path tracer in CUDA");
798 | 	// init OpenGL
799 | 	glClearColor(0.0, 0.0, 0.0, 0.0);
800 | 	glMatrixMode(GL_PROJECTION);
801 | 	gluOrtho2D(0.0, width, 0.0, height);
802 | 	fprintf(stderr, "OpenGL initialized \n");
803 | 	// register callback function to display graphics:
804 | 	glutDisplayFunc(disp);
805 | 	glewInit();
806 | 	if (!glewIsSupported("GL_VERSION_2_0 ")) {
807 | 		fprintf(stderr, "ERROR: Support for necessary OpenGL extensions missing.");
808 | 		fflush(stderr);
809 | 		exit(0);
810 | 	}
811 | 	fprintf(stderr, "glew initialized  \n");
812 | 	// call Timer():
813 | 	Timer(0);
814 | 	//create VBO (vertex buffer object)
815 | 	createVBO(&vbo);
816 | 	fprintf(stderr, "VBO created  \n");
817 | 	// enter the main loop and process events
818 | 	fprintf(stderr, "Entering glutMainLoop...  \n");
819 | 	glutMainLoop();
820 | 
821 | 	// free CUDA memory on exit
822 | 	cudaFree(accumulatebuffer);
823 | 	cudaFree(dev_triangle_p);
824 | 	cudaFree(dptr);
825 | }
826 | 


--------------------------------------------------------------------------------