├── README.md ├── gl_interop.h ├── main.cpp ├── opencl_kernel.cl └── opencl_tut3_3.png /README.md: -------------------------------------------------------------------------------- 1 | # OpenCL-path-tracing-tutorial-3-Part-1 2 | Creating an OpenGL viewport (fixed camera) 3 | 4 | Compiling instructions (for Visual Studio on Windows) 5 | 6 | To compile this code, it's recommended to download and install the AMD App SDK (this works for systems with GPUs or CPUs from AMD, Nvidia and Intel, even if your system doesn't have an AMD CPU or GPU installed) since Nvidia's OpenCL implementation is no longer up-to-date. 7 | 8 | - Start an empty Console project in Visual Studio (any recent version should work, including Express and Community) and set to Release mode 9 | 10 | - Add the SDK include path to the "Additional Include Directories" (e.g. "C:\Program Files (x86)\AMD APP SDK\2.9-1\include") 11 | 12 | - In Linker > Input, add "opencl.lib" and "glew32.lib" to "Additional Dependencies" 13 | 14 | - In Linker > Input add the OpenCL/OpenGL library path to "Additional Library Directories" (e.g. "C:\Program Files (x86)\AMD APP SDK\2.9-1\lib\x86") 15 | 16 | - Disable SAFESEH in Linker > Advanced (set "Image has safe exception handlers" to NO), this is required in order to use the GLEW library which is often built with an older version of Visual Studio 17 | 18 | - Add all the files to the project (or create a new file and paste the code) and build the code 19 | 20 | 21 | For more details, see https://raytracey.blogspot.com/2017/01/opencl-path-tracing-tutorial-3-opengl.html 22 | 23 | For part 2 of this tutorial, see https://github.com/straaljager/OpenCL-path-tracing-tutorial-3-Part-2 24 | 25 | Screenshot: 26 | 27 | ![Image description](https://github.com/straaljager/OpenCL-path-tracing-tutorial-3-Part-1/blob/master/opencl_tut3_3.png) 28 | -------------------------------------------------------------------------------- /gl_interop.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | //#define GL_SHARING_EXTENSION "cl_khr_gl_sharing" 6 | 7 | const int window_width = 1280; 8 | const int window_height = 720; 9 | 10 | // OpenGL vertex buffer object 11 | GLuint vbo; 12 | 13 | void render(); 14 | 15 | void initGL(int argc, char** argv){ 16 | // init GLUT for OpenGL viewport 17 | glutInit(&argc, argv); 18 | // specify the display mode to be RGB and single buffering 19 | glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB); 20 | // specify the initial window position 21 | glutInitWindowPosition(50, 50); 22 | // specify the initial window size 23 | glutInitWindowSize(window_width, window_height); 24 | // create the window and set title 25 | glutCreateWindow("Basic OpenCL path tracer"); 26 | 27 | // register GLUT callback function to display graphics: 28 | glutDisplayFunc(render); 29 | 30 | // initialise OpenGL extensions 31 | glewInit(); 32 | 33 | // initialise OpenGL 34 | glClearColor(0.0, 0.0, 0.0, 1.0); 35 | glMatrixMode(GL_PROJECTION); 36 | gluOrtho2D(0.0, window_width, 0.0, window_height); 37 | } 38 | 39 | void createVBO(GLuint* vbo) 40 | { 41 | //create vertex buffer object 42 | glGenBuffers(1, vbo); 43 | glBindBuffer(GL_ARRAY_BUFFER, *vbo); 44 | 45 | //initialise VBO 46 | unsigned int size = window_width * window_height * sizeof(cl_float3); 47 | glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW); 48 | glBindBuffer(GL_ARRAY_BUFFER, 0); 49 | } 50 | 51 | void drawGL(){ 52 | 53 | //clear all pixels, then render from the vbo 54 | glClear(GL_COLOR_BUFFER_BIT); 55 | glBindBuffer(GL_ARRAY_BUFFER, vbo); 56 | glVertexPointer(2, GL_FLOAT, 16, 0); // size (2, 3 or 4), type, stride, pointer 57 | glColorPointer(4, GL_UNSIGNED_BYTE, 16, (GLvoid*)8); // size (3 or 4), type, stride, pointer 58 | 59 | glEnableClientState(GL_VERTEX_ARRAY); 60 | glEnableClientState(GL_COLOR_ARRAY); 61 | glDrawArrays(GL_POINTS, 0, window_width * window_height); 62 | glDisableClientState(GL_COLOR_ARRAY); 63 | glDisableClientState(GL_VERTEX_ARRAY); 64 | 65 | // flip backbuffer to screen 66 | glutSwapBuffers(); 67 | } 68 | 69 | void Timer(int value) { 70 | glutPostRedisplay(); 71 | glutTimerFunc(15, Timer, 0); 72 | } 73 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | // OpenCL ray tracing tutorial by Sam Lapere, 2016 2 | // http://raytracey.blogspot.com 3 | 4 | #include 5 | #include 6 | #include 7 | #include "gl_interop.h" 8 | #include 9 | 10 | // TODO 11 | // cleanup() 12 | // check for cl-gl interop 13 | 14 | using namespace std; 15 | using namespace cl; 16 | 17 | const int sphere_count = 9; 18 | 19 | 20 | // OpenCL objects 21 | Device device; 22 | CommandQueue queue; 23 | Kernel kernel; 24 | Context context; 25 | Program program; 26 | Buffer cl_output; 27 | Buffer cl_spheres; 28 | BufferGL cl_vbo; 29 | vector cl_vbos; 30 | 31 | // image buffer (not needed with real-time viewport) 32 | cl_float4* cpu_output; 33 | cl_int err; 34 | unsigned int framenumber = 0; 35 | 36 | 37 | // padding with dummy variables are required for memory alignment 38 | // float3 is considered as float4 by OpenCL 39 | // alignment can also be enforced by using __attribute__ ((aligned (16))); 40 | // see https://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/attributes-variables.html 41 | 42 | struct Sphere 43 | { 44 | cl_float radius; 45 | cl_float dummy1; 46 | cl_float dummy2; 47 | cl_float dummy3; 48 | cl_float3 position; 49 | cl_float3 color; 50 | cl_float3 emission; 51 | }; 52 | 53 | Sphere cpu_spheres[sphere_count]; 54 | 55 | void pickPlatform(Platform& platform, const vector& platforms){ 56 | 57 | if (platforms.size() == 1) platform = platforms[0]; 58 | else{ 59 | int input = 0; 60 | cout << "\nChoose an OpenCL platform: "; 61 | cin >> input; 62 | 63 | // handle incorrect user input 64 | while (input < 1 || input > platforms.size()){ 65 | cin.clear(); //clear errors/bad flags on cin 66 | cin.ignore(cin.rdbuf()->in_avail(), '\n'); // ignores exact number of chars in cin buffer 67 | cout << "No such option. Choose an OpenCL platform: "; 68 | cin >> input; 69 | } 70 | platform = platforms[input - 1]; 71 | } 72 | } 73 | 74 | void pickDevice(Device& device, const vector& devices){ 75 | 76 | if (devices.size() == 1) device = devices[0]; 77 | else{ 78 | int input = 0; 79 | cout << "\nChoose an OpenCL device: "; 80 | cin >> input; 81 | 82 | // handle incorrect user input 83 | while (input < 1 || input > devices.size()){ 84 | cin.clear(); //clear errors/bad flags on cin 85 | cin.ignore(cin.rdbuf()->in_avail(), '\n'); // ignores exact number of chars in cin buffer 86 | cout << "No such option. Choose an OpenCL device: "; 87 | cin >> input; 88 | } 89 | device = devices[input - 1]; 90 | } 91 | } 92 | 93 | void printErrorLog(const Program& program, const Device& device){ 94 | 95 | // Get the error log and print to console 96 | string buildlog = program.getBuildInfo(device); 97 | cerr << "Build log:" << std::endl << buildlog << std::endl; 98 | 99 | // Print the error log to a file 100 | FILE *log = fopen("errorlog.txt", "w"); 101 | fprintf(log, "%s\n", buildlog); 102 | cout << "Error log saved in 'errorlog.txt'" << endl; 103 | system("PAUSE"); 104 | exit(1); 105 | } 106 | 107 | void initOpenCL() 108 | { 109 | // Get all available OpenCL platforms (e.g. AMD OpenCL, Nvidia CUDA, Intel OpenCL) 110 | vector platforms; 111 | Platform::get(&platforms); 112 | cout << "Available OpenCL platforms : " << endl << endl; 113 | for (int i = 0; i < platforms.size(); i++) 114 | cout << "\t" << i + 1 << ": " << platforms[i].getInfo() << endl; 115 | 116 | cout << endl << "WARNING: " << endl << endl; 117 | cout << "OpenCL-OpenGL interoperability is only tested " << endl; 118 | cout << "on discrete GPUs from Nvidia and AMD" << endl; 119 | cout << "Other devices (such as Intel integrated GPUs) may fail" << endl << endl; 120 | 121 | // Pick one platform 122 | Platform platform; 123 | pickPlatform(platform, platforms); 124 | cout << "\nUsing OpenCL platform: \t" << platform.getInfo() << endl; 125 | 126 | // Get available OpenCL devices on platform 127 | vector devices; 128 | platform.getDevices(CL_DEVICE_TYPE_GPU, &devices); 129 | 130 | cout << "Available OpenCL devices on this platform: " << endl << endl; 131 | for (int i = 0; i < devices.size(); i++){ 132 | cout << "\t" << i + 1 << ": " << devices[i].getInfo() << endl; 133 | cout << "\t\tMax compute units: " << devices[i].getInfo() << endl; 134 | cout << "\t\tMax work group size: " << devices[i].getInfo() << endl << endl; 135 | } 136 | 137 | 138 | // Pick one device 139 | //Device device; 140 | pickDevice(device, devices); 141 | cout << "\nUsing OpenCL device: \t" << device.getInfo() << endl; 142 | 143 | // Create an OpenCL context on that device. 144 | // Windows specific OpenCL-OpenGL interop 145 | cl_context_properties properties[] = 146 | { 147 | CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(), 148 | CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(), 149 | CL_CONTEXT_PLATFORM, (cl_context_properties)platform(), 150 | 0 151 | }; 152 | 153 | context = Context(device, properties); 154 | 155 | // Create a command queue 156 | queue = CommandQueue(context, device); 157 | 158 | 159 | // Convert the OpenCL source code to a string// Convert the OpenCL source code to a string 160 | string source; 161 | ifstream file("opencl_kernel.cl"); 162 | if (!file){ 163 | cout << "\nNo OpenCL file found!" << endl << "Exiting..." << endl; 164 | system("PAUSE"); 165 | exit(1); 166 | } 167 | while (!file.eof()){ 168 | char line[256]; 169 | file.getline(line, 255); 170 | source += line; 171 | } 172 | 173 | const char* kernel_source = source.c_str(); 174 | 175 | // Create an OpenCL program with source 176 | program = Program(context, kernel_source); 177 | 178 | // Build the program for the selected device 179 | cl_int result = program.build({ device }); // "-cl-fast-relaxed-math" 180 | if (result) cout << "Error during compilation OpenCL code!!!\n (" << result << ")" << endl; 181 | if (result == CL_BUILD_PROGRAM_FAILURE) printErrorLog(program, device); 182 | } 183 | 184 | #define float3(x, y, z) {{x, y, z}} // macro to replace ugly initializer braces 185 | 186 | void initScene(Sphere* cpu_spheres){ 187 | 188 | // left wall 189 | cpu_spheres[0].radius = 200.0f; 190 | cpu_spheres[0].position = float3(-200.6f, 0.0f, 0.0f); 191 | cpu_spheres[0].color = float3(0.75f, 0.25f, 0.25f); 192 | cpu_spheres[0].emission = float3(0.0f, 0.0f, 0.0f); 193 | 194 | // right wall 195 | cpu_spheres[1].radius = 200.0f; 196 | cpu_spheres[1].position = float3(200.6f, 0.0f, 0.0f); 197 | cpu_spheres[1].color = float3(0.25f, 0.25f, 0.75f); 198 | cpu_spheres[1].emission = float3(0.0f, 0.0f, 0.0f); 199 | 200 | // floor 201 | cpu_spheres[2].radius = 200.0f; 202 | cpu_spheres[2].position = float3(0.0f, -200.4f, 0.0f); 203 | cpu_spheres[2].color = float3(0.9f, 0.8f, 0.7f); 204 | cpu_spheres[2].emission = float3(0.0f, 0.0f, 0.0f); 205 | 206 | // ceiling 207 | cpu_spheres[3].radius = 200.0f; 208 | cpu_spheres[3].position = float3(0.0f, 200.4f, 0.0f); 209 | cpu_spheres[3].color = float3(0.9f, 0.8f, 0.7f); 210 | cpu_spheres[3].emission = float3(0.0f, 0.0f, 0.0f); 211 | 212 | // back wall 213 | cpu_spheres[4].radius = 200.0f; 214 | cpu_spheres[4].position = float3(0.0f, 0.0f, -200.4f); 215 | cpu_spheres[4].color = float3(0.9f, 0.8f, 0.7f); 216 | cpu_spheres[4].emission = float3(0.0f, 0.0f, 0.0f); 217 | 218 | // front wall 219 | cpu_spheres[5].radius = 200.0f; 220 | cpu_spheres[5].position = float3(0.0f, 0.0f, 202.0f); 221 | cpu_spheres[5].color = float3(0.9f, 0.8f, 0.7f); 222 | cpu_spheres[5].emission = float3(0.0f, 0.0f, 0.0f); 223 | 224 | // left sphere 225 | cpu_spheres[6].radius = 0.16f; 226 | cpu_spheres[6].position = float3(-0.25f, -0.24f, -0.1f); 227 | cpu_spheres[6].color = float3(0.9f, 0.8f, 0.7f); 228 | cpu_spheres[6].emission = float3(0.0f, 0.0f, 0.0f); 229 | 230 | // right sphere 231 | cpu_spheres[7].radius = 0.16f; 232 | cpu_spheres[7].position = float3(0.25f, -0.24f, 0.1f); 233 | cpu_spheres[7].color = float3(0.9f, 0.8f, 0.7f); 234 | cpu_spheres[7].emission = float3(0.0f, 0.0f, 0.0f); 235 | 236 | // lightsource 237 | cpu_spheres[8].radius = 1.0f; 238 | cpu_spheres[8].position = float3(0.0f, 1.36f, 0.0f); 239 | cpu_spheres[8].color = float3(0.0f, 0.0f, 0.0f); 240 | cpu_spheres[8].emission = float3(9.0f, 8.0f, 6.0f); 241 | 242 | } 243 | 244 | void initCLKernel(){ 245 | 246 | // pick a rendermode 247 | unsigned int rendermode = 1; 248 | 249 | // Create a kernel (entry point in the OpenCL source program) 250 | kernel = Kernel(program, "render_kernel"); 251 | 252 | // specify OpenCL kernel arguments 253 | //kernel.setArg(0, cl_output); 254 | kernel.setArg(0, cl_spheres); 255 | kernel.setArg(1, window_width); 256 | kernel.setArg(2, window_height); 257 | kernel.setArg(3, sphere_count); 258 | kernel.setArg(4, cl_vbo); 259 | kernel.setArg(5, framenumber); 260 | } 261 | 262 | void runKernel(){ 263 | // every pixel in the image has its own thread or "work item", 264 | // so the total amount of work items equals the number of pixels 265 | std::size_t global_work_size = window_width * window_height; 266 | std::size_t local_work_size = kernel.getWorkGroupInfo(device);; 267 | 268 | // Ensure the global work size is a multiple of local work size 269 | if (global_work_size % local_work_size != 0) 270 | global_work_size = (global_work_size / local_work_size + 1) * local_work_size; 271 | 272 | //Make sure OpenGL is done using the VBOs 273 | glFinish(); 274 | 275 | //this passes in the vector of VBO buffer objects 276 | queue.enqueueAcquireGLObjects(&cl_vbos); 277 | queue.finish(); 278 | 279 | // launch the kernel 280 | queue.enqueueNDRangeKernel(kernel, NULL, global_work_size, local_work_size); // local_work_size 281 | queue.finish(); 282 | 283 | //Release the VBOs so OpenGL can play with them 284 | queue.enqueueReleaseGLObjects(&cl_vbos); 285 | queue.finish(); 286 | } 287 | 288 | 289 | // hash function to calculate new seed for each frame 290 | // see http://www.reedbeta.com/blog/2013/01/12/quick-and-easy-gpu-random-numbers-in-d3d11/ 291 | unsigned int WangHash(unsigned int a) { 292 | a = (a ^ 61) ^ (a >> 16); 293 | a = a + (a << 3); 294 | a = a ^ (a >> 4); 295 | a = a * 0x27d4eb2d; 296 | a = a ^ (a >> 15); 297 | return a; 298 | } 299 | 300 | 301 | void render(){ 302 | 303 | framenumber++; 304 | 305 | cpu_spheres[6].position.s[1] += 0.01; 306 | 307 | queue.enqueueWriteBuffer(cl_spheres, CL_TRUE, 0, sphere_count * sizeof(Sphere), cpu_spheres); 308 | 309 | kernel.setArg(0, cl_spheres); 310 | kernel.setArg(5, WangHash(framenumber)); 311 | 312 | runKernel(); 313 | 314 | drawGL(); 315 | } 316 | 317 | void cleanUp(){ 318 | // delete cpu_output; 319 | } 320 | 321 | void main(int argc, char** argv){ 322 | 323 | // initialise OpenGL (GLEW and GLUT window + callback functions) 324 | initGL(argc, argv); 325 | cout << "OpenGL initialized \n"; 326 | 327 | // initialise OpenCL 328 | initOpenCL(); 329 | 330 | // create vertex buffer object 331 | createVBO(&vbo); 332 | 333 | // call Timer(): 334 | Timer(0); 335 | 336 | //make sure OpenGL is finished before we proceed 337 | glFinish(); 338 | 339 | // initialise scene 340 | initScene(cpu_spheres); 341 | 342 | cl_spheres = Buffer(context, CL_MEM_READ_ONLY, sphere_count * sizeof(Sphere)); 343 | queue.enqueueWriteBuffer(cl_spheres, CL_TRUE, 0, sphere_count * sizeof(Sphere), cpu_spheres); 344 | 345 | // create OpenCL buffer from OpenGL vertex buffer object 346 | cl_vbo = BufferGL(context, CL_MEM_WRITE_ONLY, vbo); 347 | cl_vbos.push_back(cl_vbo); 348 | 349 | // intitialise the kernel 350 | initCLKernel(); 351 | 352 | // start rendering continuously 353 | glutMainLoop(); 354 | 355 | // release memory 356 | cleanUp(); 357 | 358 | system("PAUSE"); 359 | } 360 | -------------------------------------------------------------------------------- /opencl_kernel.cl: -------------------------------------------------------------------------------- 1 | /* OpenCL based simple sphere path tracer by Sam Lapere, 2016*/ 2 | /* based on smallpt by Kevin Beason */ 3 | /* http://raytracey.blogspot.com */ 4 | 5 | __constant float EPSILON = 0.00003f; /* required to compensate for limited float precision */ 6 | __constant float PI = 3.14159265359f; 7 | __constant int SAMPLES = 16; 8 | 9 | typedef struct Ray{ 10 | float3 origin; 11 | float3 dir; 12 | } Ray; 13 | 14 | typedef struct Sphere{ 15 | float radius; 16 | float3 pos; 17 | float3 color; 18 | float3 emission; 19 | } Sphere; 20 | 21 | static float get_random(unsigned int *seed0, unsigned int *seed1) { 22 | 23 | /* hash the seeds using bitwise AND operations and bitshifts */ 24 | *seed0 = 36969 * ((*seed0) & 65535) + ((*seed0) >> 16); 25 | *seed1 = 18000 * ((*seed1) & 65535) + ((*seed1) >> 16); 26 | 27 | unsigned int ires = ((*seed0) << 16) + (*seed1); 28 | 29 | /* use union struct to convert int to float */ 30 | union { 31 | float f; 32 | unsigned int ui; 33 | } res; 34 | 35 | res.ui = (ires & 0x007fffff) | 0x40000000; /* bitwise AND, bitwise OR */ 36 | return (res.f - 2.0f) / 2.0f; 37 | } 38 | 39 | Ray createCamRay(const int x_coord, const int y_coord, const int width, const int height){ 40 | 41 | float fx = (float)x_coord / (float)width; /* convert int in range [0 - width] to float in range [0-1] */ 42 | float fy = (float)y_coord / (float)height; /* convert int in range [0 - height] to float in range [0-1] */ 43 | 44 | /* calculate aspect ratio */ 45 | float aspect_ratio = (float)(width) / (float)(height); 46 | float fx2 = (fx - 0.5f) * aspect_ratio; 47 | float fy2 = fy - 0.5f; 48 | 49 | /* determine position of pixel on screen */ 50 | float3 pixel_pos = (float3)(fx2, fy2, 0.0f); 51 | 52 | /* create camera ray*/ 53 | Ray ray; 54 | ray.origin = (float3)(0.0f, 0.1f, 2.0f); /* fixed camera position */ 55 | ray.dir = normalize(pixel_pos - ray.origin); /* vector from camera to pixel on screen */ 56 | 57 | return ray; 58 | } 59 | 60 | /* (__global Sphere* sphere, const Ray* ray) */ 61 | float intersect_sphere(const Sphere* sphere, const Ray* ray) /* version using local copy of sphere */ 62 | { 63 | float3 rayToCenter = sphere->pos - ray->origin; 64 | float b = dot(rayToCenter, ray->dir); 65 | float c = dot(rayToCenter, rayToCenter) - sphere->radius*sphere->radius; 66 | float disc = b * b - c; 67 | 68 | if (disc < 0.0f) return 0.0f; 69 | else disc = sqrt(disc); 70 | 71 | if ((b - disc) > EPSILON) return b - disc; 72 | if ((b + disc) > EPSILON) return b + disc; 73 | 74 | return 0.0f; 75 | } 76 | 77 | bool intersect_scene(__constant Sphere* spheres, const Ray* ray, float* t, int* sphere_id, const int sphere_count) 78 | { 79 | /* initialise t to a very large number, 80 | so t will be guaranteed to be smaller 81 | when a hit with the scene occurs */ 82 | 83 | float inf = 1e20f; 84 | *t = inf; 85 | 86 | /* check if the ray intersects each sphere in the scene */ 87 | for (int i = 0; i < sphere_count; i++) { 88 | 89 | Sphere sphere = spheres[i]; /* create local copy of sphere */ 90 | 91 | /* float hitdistance = intersect_sphere(&spheres[i], ray); */ 92 | float hitdistance = intersect_sphere(&sphere, ray); 93 | /* keep track of the closest intersection and hitobject found so far */ 94 | if (hitdistance != 0.0f && hitdistance < *t) { 95 | *t = hitdistance; 96 | *sphere_id = i; 97 | } 98 | } 99 | return *t < inf; /* true when ray interesects the scene */ 100 | } 101 | 102 | 103 | /* the path tracing function */ 104 | /* computes a path (starting from the camera) with a defined number of bounces, accumulates light/color at each bounce */ 105 | /* each ray hitting a surface will be reflected in a random direction (by randomly sampling the hemisphere above the hitpoint) */ 106 | /* small optimisation: diffuse ray directions are calculated using cosine weighted importance sampling */ 107 | 108 | float3 trace(__constant Sphere* spheres, const Ray* camray, const int sphere_count, const int* seed0, const int* seed1){ 109 | 110 | Ray ray = *camray; 111 | 112 | float3 accum_color = (float3)(0.0f, 0.0f, 0.0f); 113 | float3 mask = (float3)(1.0f, 1.0f, 1.0f); 114 | 115 | for (int bounces = 0; bounces < 8; bounces++){ 116 | 117 | float t; /* distance to intersection */ 118 | int hitsphere_id = 0; /* index of intersected sphere */ 119 | 120 | /* if ray misses scene, return background colour */ 121 | if (!intersect_scene(spheres, &ray, &t, &hitsphere_id, sphere_count)) 122 | return accum_color += mask * (float3)(0.15f, 0.15f, 0.25f); 123 | 124 | /* else, we've got a hit! Fetch the closest hit sphere */ 125 | Sphere hitsphere = spheres[hitsphere_id]; /* version with local copy of sphere */ 126 | 127 | /* compute the hitpoint using the ray equation */ 128 | float3 hitpoint = ray.origin + ray.dir * t; 129 | 130 | /* compute the surface normal and flip it if necessary to face the incoming ray */ 131 | float3 normal = normalize(hitpoint - hitsphere.pos); 132 | float3 normal_facing = dot(normal, ray.dir) < 0.0f ? normal : normal * (-1.0f); 133 | 134 | /* compute two random numbers to pick a random point on the hemisphere above the hitpoint*/ 135 | float rand1 = 2.0f * PI * get_random(seed0, seed1); 136 | float rand2 = get_random(seed0, seed1); 137 | float rand2s = sqrt(rand2); 138 | 139 | /* create a local orthogonal coordinate frame centered at the hitpoint */ 140 | float3 w = normal_facing; 141 | float3 axis = fabs(w.x) > 0.1f ? (float3)(0.0f, 1.0f, 0.0f) : (float3)(1.0f, 0.0f, 0.0f); 142 | float3 u = normalize(cross(axis, w)); 143 | float3 v = cross(w, u); 144 | 145 | /* use the coordinte frame and random numbers to compute the next ray direction */ 146 | float3 newdir = normalize(u * cos(rand1)*rand2s + v*sin(rand1)*rand2s + w*sqrt(1.0f - rand2)); 147 | 148 | /* add a very small offset to the hitpoint to prevent self intersection */ 149 | ray.origin = hitpoint + normal_facing * EPSILON; 150 | ray.dir = newdir; 151 | 152 | /* add the colour and light contributions to the accumulated colour */ 153 | accum_color += mask * hitsphere.emission; 154 | 155 | /* the mask colour picks up surface colours at each bounce */ 156 | mask *= hitsphere.color; 157 | 158 | /* perform cosine-weighted importance sampling for diffuse surfaces*/ 159 | mask *= dot(newdir, normal_facing); 160 | } 161 | 162 | return accum_color; 163 | } 164 | 165 | union Colour{ float c; uchar4 components;}; 166 | 167 | __kernel void render_kernel(__constant Sphere* spheres, const int width, const int height, const int sphere_count, __global float3* output, const int hashedframenumber) 168 | { 169 | unsigned int work_item_id = get_global_id(0); /* the unique global id of the work item for the current pixel */ 170 | unsigned int x_coord = work_item_id % width; /* x-coordinate of the pixel */ 171 | unsigned int y_coord = work_item_id / width; /* y-coordinate of the pixel */ 172 | 173 | /* seeds for random number generator */ 174 | unsigned int seed0 = x_coord + hashedframenumber; 175 | unsigned int seed1 = y_coord + hashedframenumber; 176 | 177 | Ray camray = createCamRay(x_coord, y_coord, width, height); 178 | 179 | /* add the light contribution of each sample and average over all samples*/ 180 | float3 finalcolor = (float3)(0.0f, 0.0f, 0.0f); 181 | float invSamples = 1.0f / SAMPLES; 182 | 183 | for (int i = 0; i < SAMPLES; i++) 184 | finalcolor += trace(spheres, &camray, sphere_count, &seed0, &seed1) * invSamples; 185 | 186 | finalcolor = (float3)(clamp(finalcolor.x, 0.0f, 1.0f), 187 | clamp(finalcolor.y, 0.0f, 1.0f), clamp(finalcolor.z, 0.0f, 1.0f)); 188 | 189 | union Colour fcolour; 190 | fcolour.components = (uchar4)( 191 | (unsigned char)(finalcolor.x * 255), 192 | (unsigned char)(finalcolor.y * 255), 193 | (unsigned char)(finalcolor.z * 255), 194 | 1); 195 | 196 | /* store the pixelcolour in the output buffer */ 197 | output[work_item_id] = (float3)(x_coord, y_coord, fcolour.c); 198 | } 199 | -------------------------------------------------------------------------------- /opencl_tut3_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/straaljager/OpenCL-path-tracing-tutorial-3-Part-1/64cc7738eccb679f290b7c99ec1019dca8548b67/opencl_tut3_3.png --------------------------------------------------------------------------------