├── result.png ├── README.md ├── CMakeLists.txt ├── optixInOneWeekend.h ├── optixInOneWeekend.cu └── optixInOneWeekend.cpp /result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sketchbooks99/optixInOneWeekend/HEAD/result.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OptixInOneWeekend 2 | 3 | このリポジトリは [レイトレ Advent Calendar 2021](https://qiita.com/advent-calendar/2021/raytracing) の12月20日担当分 「[NVIDIA OptiXでRay tracing in One Weekend](https://qiita.com/sketchbooks99/items/de98db331f8c8d24628c)」のサンプルコードです。 4 | 5 | ![result.png](result.png) 6 | 7 | 動作するにはOptiXをインストール後、`SDK/`以下に本リポジトリを配置し、SDK/CMakeLists.txtのビルドディレクトリ指定部分に`add_subdirectory(optixInOneWeekend)`を追加してください。 8 | 9 | # 要件 10 | - NVIDIA OptiX 7.4 to 7.7 11 | - OptiX 7.3以前のバージョンで動作するには、使用バージョンに合わせてAPIを修正してください。 -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions 6 | # are met: 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # * Redistributions in binary form must reproduce the above copyright 10 | # notice, this list of conditions and the following disclaimer in the 11 | # documentation and/or other materials provided with the distribution. 12 | # * Neither the name of NVIDIA CORPORATION nor the names of its 13 | # contributors may be used to endorse or promote products derived 14 | # from this software without specific prior written permission. 15 | # 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | # 28 | 29 | OPTIX_add_sample_executable( optixInOneWeekend target_name 30 | optixInOneWeekend.cu 31 | optixInOneWeekend.cpp 32 | optixInOneWeekend.h 33 | OPTIONS -rdc true 34 | ) 35 | 36 | target_link_libraries( ${target_name} 37 | ${CUDA_LIBRARIES} 38 | ) 39 | -------------------------------------------------------------------------------- /optixInOneWeekend.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | 29 | struct Params 30 | { 31 | unsigned int subframe_index; 32 | float4* accum_buffer; 33 | uchar4* frame_buffer; 34 | unsigned int width; 35 | unsigned int height; 36 | unsigned int samples_per_launch; 37 | unsigned int max_depth; 38 | 39 | float3 eye; 40 | float3 U; 41 | float3 V; 42 | float3 W; 43 | 44 | OptixTraversableHandle handle; 45 | 46 | // 動的にシーンを変更するための時間とマウス位置 47 | // ホスト側(CPU)で毎フレーム値を更新して、デバイス側(GPU)に転送する 48 | float time; 49 | float mouse_x; 50 | float mouse_y; 51 | }; 52 | 53 | 54 | struct RayGenData 55 | { 56 | }; 57 | 58 | 59 | struct MissData 60 | { 61 | float4 bg_color; 62 | }; 63 | 64 | struct SphereData 65 | { 66 | // 球の中心 67 | float3 center; 68 | // 球の半径 69 | float radius; 70 | }; 71 | 72 | struct MeshData 73 | { 74 | // メッシュの頂点 75 | float3* vertices; 76 | // 三角形を構成するための頂点番号3点 77 | uint3* indices; 78 | }; 79 | 80 | struct LambertianData { 81 | // Lambert マテリアルの色 82 | void* texture_data; 83 | unsigned int texture_prg_id; 84 | }; 85 | 86 | struct DielectricData { 87 | // 誘電体の色 88 | void* texture_data; 89 | unsigned int texture_prg_id; 90 | // 屈折率 91 | float ior; 92 | }; 93 | 94 | struct MetalData { 95 | // 金属の色 96 | void* texture_data; 97 | unsigned int texture_prg_id; 98 | // 金属の疑似粗さを指定するパラメータ 99 | float fuzz; 100 | }; 101 | 102 | struct Material { 103 | // マテリアル(Lambertian, Glass, Metal)のデータ 104 | // デバイス上に確保されたポインタを紐づけておく 105 | // 共用体(union)を使わずに汎用ポインタにすることで、 106 | // 異なるデータ型の構造体を追加したいときに対応しやすくなる。 107 | void* data; 108 | 109 | // マテリアルにおける散乱方向や色を計算するためのCallablesプログラムのID 110 | // OptiX 7.x では仮想関数が使えないので、Callablesプログラムを使って 111 | // 疑似的なポリモーフィズムを実現する 112 | unsigned int prg_id; 113 | }; 114 | 115 | struct ConstantData 116 | { 117 | float4 color; 118 | }; 119 | 120 | struct CheckerData 121 | { 122 | float4 color1; 123 | float4 color2; 124 | float scale; 125 | }; 126 | 127 | struct HitGroupData 128 | { 129 | // 物体形状に関するデータ 130 | // デバイス上に確保されたポインタを紐づける 131 | void* shape_data; 132 | 133 | Material material; 134 | }; 135 | 136 | struct EmptyData 137 | { 138 | 139 | }; -------------------------------------------------------------------------------- /optixInOneWeekend.cu: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | #include 29 | 30 | #include "optixInOneWeekend.h" 31 | #include "random.h" 32 | 33 | #include 34 | #include 35 | 36 | extern "C" { 37 | __constant__ Params params; 38 | } 39 | 40 | 41 | //------------------------------------------------------------------------------ 42 | // 43 | // 44 | // 45 | //------------------------------------------------------------------------------ 46 | 47 | struct SurfaceInfo 48 | { 49 | // 発光度 50 | float3 emission; 51 | // 物体表面の色 52 | float3 albedo; 53 | // 衝突位置 54 | float3 p; 55 | // レイの方向 56 | float3 direction; 57 | // 法線 58 | float3 n; 59 | // テクスチャ座標 60 | float2 texcoord; 61 | 62 | // 乱数のシード値 63 | unsigned int seed; 64 | // 光線追跡を終了するか否か 65 | int trace_terminate; 66 | 67 | // マテリアル用のデータとCallablesプログラムのID 68 | Material material; 69 | }; 70 | 71 | //------------------------------------------------------------------------------ 72 | // 73 | // 74 | // 75 | //------------------------------------------------------------------------------ 76 | 77 | static __forceinline__ __device__ void* unpackPointer( unsigned int i0, unsigned int i1 ) 78 | { 79 | const unsigned long long uptr = static_cast( i0 ) << 32 | i1; 80 | void* ptr = reinterpret_cast( uptr ); 81 | return ptr; 82 | } 83 | 84 | // ポインタをunsigned long longに変換してから、前側32bitをi0に、後側32bitをi1に格納する 85 | static __forceinline__ __device__ void packPointer( void* ptr, unsigned int& i0, unsigned int& i1 ) 86 | { 87 | const unsigned long long uptr = reinterpret_cast( ptr ); 88 | i0 = uptr >> 32; 89 | i1 = uptr & 0x00000000ffffffff; 90 | } 91 | 92 | // 0番目と1番目のペイロードにパックされているSurfaceInfoのポインタを取得 93 | static __forceinline__ __device__ SurfaceInfo* getSurfaceInfo() 94 | { 95 | const unsigned int u0 = optixGetPayload_0(); 96 | const unsigned int u1 = optixGetPayload_1(); 97 | return reinterpret_cast( unpackPointer( u0, u1 ) ); 98 | } 99 | 100 | static __forceinline__ __device__ float3 randomInUnitSphere(unsigned int& seed) { 101 | while (true) 102 | { 103 | float3 v = make_float3(rnd(seed) * 2.0f - 1.0f, rnd(seed) * 2.0f - 1.0f, rnd(seed) * 2.0f - 1.0f); 104 | if (dot(v, v) >= 1.0f) continue; 105 | return v; 106 | } 107 | } 108 | 109 | static __forceinline__ __device__ float3 randomSampleHemisphere(unsigned int& seed, const float3& normal) 110 | { 111 | const float3 vec_in_sphere = randomInUnitSphere(seed); 112 | if (dot(vec_in_sphere, normal) > 0.0f) 113 | return vec_in_sphere; 114 | else 115 | return -vec_in_sphere; 116 | } 117 | 118 | static __forceinline__ __device__ float fresnel(float cosine, float ref_idx) 119 | { 120 | float r0 = (1 - ref_idx) / (1 + ref_idx); 121 | r0 = r0 * r0; 122 | return r0 + (1 - r0) * powf((1 - cosine), 5.0f); 123 | } 124 | 125 | static __forceinline__ __device__ float3 refract(const float3& uv, const float3& n, float etai_over_etat) { 126 | auto cos_theta = fminf(dot(-uv, n), 1.0f); 127 | float3 r_out_perp = etai_over_etat * (uv + cos_theta * n); 128 | float3 r_out_parallel = -sqrtf(fabs(1.0f - dot(r_out_perp, r_out_perp))) * n; 129 | return r_out_perp + r_out_parallel; 130 | } 131 | 132 | static __forceinline__ __device__ float3 refract(const float3& wi, const float3& n, float cos_i, float ni, float nt) { 133 | float nt_ni = nt / ni; 134 | float ni_nt = ni / nt; 135 | float D = sqrtf(nt_ni * nt_ni - (1.0f - cos_i * cos_i)) - cos_i; 136 | return ni_nt * (wi - D * n); 137 | } 138 | 139 | static __forceinline__ __device__ void trace( 140 | OptixTraversableHandle handle, 141 | float3 ray_origin, 142 | float3 ray_direction, 143 | float tmin, 144 | float tmax, 145 | SurfaceInfo* si 146 | ) 147 | { 148 | // SurfaceInfoのポインタを2つのペイロードにパックする 149 | unsigned int u0, u1; 150 | packPointer( si, u0, u1 ); 151 | optixTrace( 152 | handle, 153 | ray_origin, 154 | ray_direction, 155 | tmin, 156 | tmax, 157 | 0.0f, // rayTime 158 | OptixVisibilityMask( 1 ), 159 | OPTIX_RAY_FLAG_NONE, 160 | 0, // SBT offset 161 | 1, // SBT stride 162 | 0, // missSBTIndex 163 | u0, u1 ); 164 | } 165 | 166 | //------------------------------------------------------------------------------ 167 | // 168 | // 169 | // 170 | //------------------------------------------------------------------------------ 171 | 172 | extern "C" __global__ void __raygen__pinhole() 173 | { 174 | const int w = params.width; 175 | const int h = params.height; 176 | const float3 eye = params.eye; 177 | const float3 U = params.U; 178 | const float3 V = params.V; 179 | const float3 W = params.W; 180 | const uint3 idx = optixGetLaunchIndex(); 181 | const int subframe_index = params.subframe_index; 182 | const int samples_per_launch = params.samples_per_launch; 183 | 184 | // 現在のスレッドIDから乱数用のシード値を生成 185 | unsigned int seed = tea<4>(idx.y * w + idx.x, subframe_index); 186 | 187 | float3 result = make_float3(0.0f); 188 | for (int i = 0; i < samples_per_launch; i++) 189 | { 190 | const float2 subpixel_jitter = make_float2(rnd(seed), rnd(seed)); 191 | 192 | const float2 d = 2.0f * make_float2( 193 | ((float)idx.x + subpixel_jitter.x) / (float)w, 194 | ((float)idx.y + subpixel_jitter.y) / (float)h 195 | ) - 1.0f; 196 | 197 | // 光線の向きと原点を設定 198 | float3 ray_direction = normalize(d.x * U + d.y * V + W); 199 | float3 ray_origin = eye; 200 | 201 | SurfaceInfo si; 202 | si.emission = make_float3(0.0f); 203 | si.albedo = make_float3(0.0f); 204 | si.trace_terminate = false; 205 | si.seed = seed; 206 | 207 | float3 throughput = make_float3(1.0f); 208 | 209 | int depth = 0; 210 | for (;;) 211 | { 212 | if (depth >= params.max_depth) 213 | break; 214 | 215 | // IASに対してレイトレース 216 | trace(params.handle, ray_origin, ray_direction, 0.01f, 1e16f, &si); 217 | 218 | if (si.trace_terminate) { 219 | result += si.emission * throughput; 220 | break; 221 | } 222 | 223 | // Direct callable関数を使って各マテリアルにおける散乱方向とマテリアルの色を計算 224 | float3 scattered; 225 | optixDirectCall( 226 | si.material.prg_id, &si, si.material.data, scattered 227 | ); 228 | 229 | throughput *= si.albedo; 230 | 231 | ray_origin = si.p; 232 | ray_direction = scattered; 233 | 234 | ++depth; 235 | } 236 | } 237 | 238 | const unsigned int image_index = idx.y * params.width + idx.x; 239 | float3 accum_color = result / static_cast(params.samples_per_launch); 240 | 241 | if (subframe_index > 0) 242 | { 243 | const float a = 1.0f / static_cast(subframe_index + 1); 244 | const float3 accum_color_prev = make_float3(params.accum_buffer[image_index]); 245 | accum_color = lerp(accum_color_prev, accum_color, a); 246 | } 247 | // 取得した輝度値を出力バッファに書き込む 248 | params.accum_buffer[image_index] = make_float4(accum_color, 1.0f); 249 | params.frame_buffer[image_index] = make_color(accum_color); 250 | } 251 | 252 | extern "C" __global__ void __miss__radiance() 253 | { 254 | const MissData* miss = (MissData*)optixGetSbtDataPointer(); 255 | 256 | SurfaceInfo* si = getSurfaceInfo(); 257 | 258 | // ベクトルのy成分から背景色を計算 259 | const float3 unit_direction = normalize(optixGetWorldRayDirection()); 260 | const float t = 0.5f * (unit_direction.y + 1.0f); 261 | si->emission = (1.0f - t) * make_float3(1.0f) + t * make_float3(0.5f, 0.7f, 1.0f); 262 | si->trace_terminate = true; 263 | } 264 | 265 | extern "C" __global__ void __closesthit__mesh() 266 | { 267 | // Shader binding tableからデータを取得 268 | HitGroupData* data = (HitGroupData*)optixGetSbtDataPointer(); 269 | const MeshData* mesh_data = (MeshData*)data->shape_data; 270 | 271 | const int prim_idx = optixGetPrimitiveIndex(); 272 | const float3 direction = optixGetWorldRayDirection(); 273 | const uint3 index = mesh_data->indices[prim_idx]; 274 | 275 | // 三角形の重心座標(u,v)を三角形のテクスチャ座標とする 276 | const float2 texcoord = optixGetTriangleBarycentrics(); 277 | 278 | // メッシュデータから頂点を取得し、法線計算 279 | const float3 v0 = mesh_data->vertices[ index.x ]; 280 | const float3 v1 = mesh_data->vertices[ index.y ]; 281 | const float3 v2 = mesh_data->vertices[ index.z ]; 282 | const float3 N = normalize( cross( v1-v0, v2-v0 ) ); 283 | 284 | // レイと三角形の交点を計算 285 | const float3 P = optixGetWorldRayOrigin() + optixGetRayTmax()*direction; 286 | 287 | // PayloadからSurfaceInfoのポインタを取得し、交点上の情報を格納 288 | SurfaceInfo* si = getSurfaceInfo(); 289 | 290 | // SurfaceInfoに交点における情報を格納する 291 | si->p = P; 292 | si->direction = direction; 293 | si->n = N; 294 | si->texcoord = texcoord; 295 | // HitGroupDataに紐付いているマテリアル情報をSurfaceInfoに紐付ける 296 | si->material = data->material; 297 | } 298 | 299 | extern "C" __global__ void __intersection__sphere() 300 | { 301 | // Shader binding tableからデータを取得 302 | HitGroupData* data = (HitGroupData*)optixGetSbtDataPointer(); 303 | // AABBとの交差判定が認められた球体のGAS内のIDを取得 304 | const int prim_idx = optixGetPrimitiveIndex(); 305 | const SphereData sphere_data = ((SphereData*)data->shape_data)[prim_idx]; 306 | 307 | const float3 center = sphere_data.center; 308 | const float radius = sphere_data.radius; 309 | 310 | // オブジェクト空間におけるレイの原点と方向を取得 311 | const float3 origin = optixGetObjectRayOrigin(); 312 | const float3 direction = optixGetObjectRayDirection(); 313 | // レイの最小距離と最大距離を取得 314 | const float tmin = optixGetRayTmin(); 315 | const float tmax = optixGetRayTmax(); 316 | 317 | // 球体との交差判定処理(判別式を解いて、距離tを計算) 318 | const float3 oc = origin - center; 319 | const float a = dot(direction, direction); 320 | const float half_b = dot(oc, direction); 321 | const float c = dot(oc, oc) - radius * radius; 322 | 323 | const float discriminant = half_b * half_b - a * c; 324 | if (discriminant < 0) return; 325 | 326 | const float sqrtd = sqrtf(discriminant); 327 | 328 | float root = (-half_b - sqrtd) / a; 329 | if (root < tmin || tmax < root) 330 | { 331 | root = (-half_b + sqrtd) / a; 332 | if (root < tmin || tmax < root) 333 | return; 334 | } 335 | 336 | // オブジェクト空間におけるレイと球の交点を計算 337 | const float3 P = origin + root * direction; 338 | const float3 normal = (P - center) / radius; 339 | 340 | // 球体におけるテクスチャ座標を算出 (Z up)と仮定して、xとyから方位角、zから仰角を計算 341 | float phi = atan2(normal.y, normal.x); 342 | if (phi < 0) phi += 2.0f * M_PIf; 343 | const float theta = acosf(normal.z); 344 | const float2 texcoord = make_float2(phi / (2.0f * M_PIf), theta / M_PIf); 345 | 346 | // レイと球の交差判定を認める 347 | optixReportIntersection(root, 0, 348 | __float_as_int(normal.x), __float_as_int(normal.y), __float_as_int(normal.z), 349 | __float_as_int(texcoord.x), __float_as_int(texcoord.y) 350 | ); 351 | } 352 | 353 | extern "C" __global__ void __closesthit__sphere() 354 | { 355 | // Shader binding tableからデータを取得 356 | HitGroupData* data = (HitGroupData*)optixGetSbtDataPointer(); 357 | 358 | // 0 - 2番目のAttributeからIntersectionプログラムで計算した法線を取得 359 | const float3 local_n = make_float3( 360 | __int_as_float(optixGetAttribute_0()), 361 | __int_as_float(optixGetAttribute_1()), 362 | __int_as_float(optixGetAttribute_2()) 363 | ); 364 | // Instanceに紐付いている行列からオブジェクト空間における法線をグローバル空間にマップする 365 | const float3 world_n = normalize(optixTransformNormalFromObjectToWorldSpace(local_n)); 366 | 367 | // 3 - 4番目のAttributeからテクスチャ座標を取得 368 | const float2 texcoord = make_float2( 369 | __int_as_float(optixGetAttribute_3()), 370 | __int_as_float(optixGetAttribute_4()) 371 | ); 372 | 373 | // グローバル空間におけるレイの原点と方向を計算し、交点座標の位置を計算 374 | const float3 origin = optixGetWorldRayOrigin(); 375 | const float3 direction = optixGetWorldRayDirection(); 376 | const float3 P = origin + optixGetRayTmax() * direction; 377 | 378 | // PayloadからSurfaceInfoのポインタを取得し、交点上の情報を格納 379 | SurfaceInfo* si = getSurfaceInfo(); 380 | si->p = P; 381 | si->n = world_n; 382 | si->direction = direction; 383 | si->texcoord = texcoord; 384 | // HitGroupDataに紐付いているマテリアル情報をSurfaceInfoに紐付ける 385 | si->material = data->material; 386 | } 387 | 388 | extern "C" __device__ void __direct_callable__lambertian(SurfaceInfo* si, void* material_data, float3& scattered) 389 | { 390 | const LambertianData* lambertian = (LambertianData*)material_data; 391 | 392 | // Direct callableプログラムによって、テクスチャ色を取得 393 | const float4 color = optixDirectCall( 394 | lambertian->texture_prg_id, si, lambertian->texture_data 395 | ); 396 | si->albedo = make_float3(color); 397 | 398 | si->n = faceforward(si->n, -si->direction, si->n); 399 | 400 | unsigned int seed = si->seed; 401 | float3 wi = randomSampleHemisphere(seed, si->n); 402 | scattered = normalize(wi); 403 | si->trace_terminate = false; 404 | si->emission = make_float3(0.0f); 405 | } 406 | 407 | extern "C" __device__ void __direct_callable__dielectric(SurfaceInfo* si, void* material_data, float3& scattered) 408 | { 409 | const DielectricData* dielectric = (DielectricData*)material_data; 410 | // Direct callableプログラムによって、テクスチャ色を取得 411 | const float4 color = optixDirectCall( 412 | dielectric->texture_prg_id, si, dielectric->texture_data 413 | ); 414 | 415 | const float ior = dielectric->ior; 416 | const float3 in_direction = si->direction; 417 | 418 | si->albedo = make_float3(color); 419 | float cos_theta = dot(in_direction, si->n); 420 | bool into = cos_theta < 0; 421 | const float3 outward_normal = into ? si->n : -si->n; 422 | const float refraction_ratio = into ? (1.0 / ior) : ior; 423 | 424 | float3 unit_direction = normalize(in_direction); 425 | cos_theta = fabs(cos_theta); 426 | float sin_theta = sqrt(1.0 - cos_theta * cos_theta); 427 | 428 | unsigned int seed = si->seed; 429 | bool cannot_refract = refraction_ratio * sin_theta > 1.0; 430 | if (cannot_refract || rnd(seed) < fresnel(cos_theta, refraction_ratio)) 431 | scattered = reflect(unit_direction, si->n); 432 | else 433 | scattered = refract(unit_direction, outward_normal, refraction_ratio); 434 | si->trace_terminate = false; 435 | si->emission = make_float3(0.0f); 436 | si->seed = seed; 437 | } 438 | 439 | extern "C" __device__ void __direct_callable__metal(SurfaceInfo* si, void* material_data, float3& scattered) 440 | { 441 | const MetalData* metal = (MetalData*)material_data; 442 | // Direct callableプログラムによって、テクスチャ色を取得 443 | const float4 color = optixDirectCall( 444 | metal->texture_prg_id, si, metal->texture_data 445 | ); 446 | 447 | unsigned int seed = si->seed; 448 | scattered = reflect(si->direction, si->n) + metal->fuzz * randomInUnitSphere(seed); 449 | si->albedo = make_float3(color); 450 | si->trace_terminate = false; 451 | si->emission = make_float3(0.0f); 452 | si->seed = seed; 453 | } 454 | 455 | extern "C" __device__ float4 __direct_callable__constant(SurfaceInfo* /* si */ , void* texture_data) 456 | { 457 | const ConstantData* constant = (ConstantData*)texture_data; 458 | return constant->color; 459 | } 460 | 461 | extern "C" __device__ float4 __direct_callable__checker(SurfaceInfo* si, void* texture_data) 462 | { 463 | const CheckerData* checker = (CheckerData*)texture_data; 464 | const bool is_odd = sinf(si->texcoord.x * M_PIf * checker->scale) * sinf(si->texcoord.y * M_PIf * checker->scale) < 0; 465 | return is_odd ? checker->color1 : checker->color2; 466 | } -------------------------------------------------------------------------------- /optixInOneWeekend.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions 6 | // are met: 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of NVIDIA CORPORATION nor the names of its 13 | // contributors may be used to endorse or promote products derived 14 | // from this software without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 17 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 20 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | // 28 | 29 | // gl_interopの前にincludeされる必要がある 30 | #include 31 | 32 | #include 33 | #include 34 | 35 | #include 36 | #include 37 | #include 38 | 39 | // sampleConfig.h.in から自動生成されるヘッダーファイルをinclude 40 | // ディレクトリへのパスなどの環境変数が定義されている 41 | #include 42 | 43 | // OptiX SDK 提供のヘッダーファイルのinclude 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include 52 | #include 53 | 54 | #include 55 | 56 | #include "optixInOneWeekend.h" 57 | #include "random.h" 58 | 59 | #include 60 | #include 61 | #include 62 | #include 63 | #include 64 | #include 65 | #include 66 | 67 | bool resize_dirty = false; 68 | bool minimized = false; 69 | 70 | // カメラ 71 | bool camera_changed = true; 72 | sutil::Camera camera; 73 | sutil::Trackball trackball; 74 | 75 | // マウス 76 | int32_t mouse_button = -1; 77 | 78 | // 1度のカーネル起動におけるピクセルあたりのサンプル数 79 | int32_t samples_per_launch = 16; 80 | 81 | // Shader binding tableを構成するシェーダーレコードでヘッダーと任意のデータからなる。 82 | // ヘッダーサイズはOptiX 7.4ではOPTIX_SBT_RECORD_HEADER_SIZE (32 bytes)で固定の値である。 83 | // データはユーザー定義のデータ型を格納することが可能。ただし、Shader binding table内で 84 | // 複数のレコードを保持できるHitGroup record, Miss record, Callables recordはそれぞれで 85 | // レコードサイズが等しい必要がある。 86 | template 87 | struct Record 88 | { 89 | __align__( OPTIX_SBT_RECORD_ALIGNMENT ) char header[OPTIX_SBT_RECORD_HEADER_SIZE]; 90 | T data; 91 | }; 92 | 93 | using RayGenRecord = Record; 94 | using MissRecord = Record; 95 | using HitGroupRecord = Record; 96 | using EmptyRecord = Record; 97 | 98 | // Direct/Continuation callable プログラムをデバイス(GPU)側で呼ぶには、 99 | // CallablesプログラムのSBT_IDが必要なので、生成順で番号を割り振って起き、 100 | // その順番でCallables用のSBTを構築するようにする 101 | struct CallableProgram 102 | { 103 | OptixProgramGroup program = nullptr; 104 | uint32_t id = 0; 105 | }; 106 | 107 | // Geometry acceleration structure (GAS) 用 108 | // GASのtraversable handleをOptixInstanceに紐づける際に、 109 | // GASが保持するSBT recordの数がわかると、 110 | // Instanceのsbt offsetを一括で構築しやすい 111 | struct GeometryAccelData 112 | { 113 | OptixTraversableHandle handle; 114 | CUdeviceptr d_output_buffer; 115 | uint32_t num_sbt_records; 116 | }; 117 | 118 | // Instance acceleration structure (IAS) 用 119 | // 120 | struct InstanceAccelData 121 | { 122 | OptixTraversableHandle handle; 123 | CUdeviceptr d_output_buffer; 124 | 125 | // IASを構築しているOptixInstanceのデータを更新できるように、 126 | // デバイス側のポインタを格納しておく 127 | CUdeviceptr d_instances_buffer; 128 | }; 129 | 130 | enum class ShapeType 131 | { 132 | Mesh, 133 | Sphere 134 | }; 135 | 136 | struct OneWeekendState 137 | { 138 | OptixDeviceContext context = 0; 139 | 140 | // シーン全体のInstance acceleration structure 141 | InstanceAccelData ias = {}; 142 | // GPU上におけるシーンの球体データ全てを格納している配列のポインタ 143 | void* d_sphere_data = nullptr; 144 | // GPU上におけるシーンの三角形データ全てを格納している配列のポインタ 145 | void* d_mesh_data = nullptr; 146 | 147 | OptixModule module = nullptr; 148 | OptixPipelineCompileOptions pipeline_compile_options = {}; 149 | OptixPipeline pipeline = nullptr; 150 | 151 | // Ray generation プログラム 152 | OptixProgramGroup raygen_prg = nullptr; 153 | // Miss プログラム 154 | OptixProgramGroup miss_prg = nullptr; 155 | 156 | // 球体用のHitGroup プログラム 157 | OptixProgramGroup sphere_hitgroup_prg = nullptr; 158 | // メッシュ用のHitGroupプログラム 159 | OptixProgramGroup mesh_hitgroup_prg = nullptr; 160 | 161 | // マテリアル用のCallableプログラム 162 | // OptiXでは基底クラスのポインタを介した、派生クラスの関数呼び出し (ポリモーフィズム)が 163 | // 禁止されているため、Callable関数を使って疑似的なポリモーフィズムを実現する 164 | // ここでは、Lambertian, Dielectric, Metal の3種類を実装している 165 | CallableProgram lambertian_prg = {}; 166 | CallableProgram dielectric_prg = {}; 167 | CallableProgram metal_prg = {}; 168 | 169 | // テクスチャ用のCallableプログラム 170 | // Constant ... 単色、Checker ... チェッカーボード 171 | CallableProgram constant_prg = {}; 172 | CallableProgram checker_prg = {}; 173 | 174 | // CUDA stream 175 | CUstream stream = 0; 176 | 177 | // Pipeline launch parameters 178 | // CUDA内で extern "C" __constant__ Params params 179 | // と宣言することで、全モジュールからアクセス可能である。 180 | Params params; 181 | Params* d_params; 182 | 183 | // Shader binding table 184 | OptixShaderBindingTable sbt = {}; 185 | }; 186 | 187 | // GLFW callbacks ------------------------------------------------ 188 | static void mouseButtonCallback(GLFWwindow* window, int button, int action, int mods) 189 | { 190 | double xpos, ypos; 191 | 192 | glfwGetCursorPos(window, &xpos, &ypos); 193 | 194 | if (action == GLFW_PRESS) 195 | { 196 | mouse_button = button; 197 | trackball.startTracking(static_cast(xpos), static_cast(ypos)); 198 | } 199 | else 200 | { 201 | mouse_button = -1; 202 | } 203 | } 204 | 205 | // ----------------------------------------------------------------------- 206 | static void cursorPosCallback(GLFWwindow* window, double xpos, double ypos) 207 | { 208 | Params* params = static_cast(glfwGetWindowUserPointer(window)); 209 | 210 | // 左クリック中にマウスが移動した場合は、注視点を固定してカメラを動かす 211 | if (mouse_button == GLFW_MOUSE_BUTTON_LEFT) 212 | { 213 | trackball.setViewMode(sutil::Trackball::LookAtFixed); 214 | trackball.updateTracking(static_cast(xpos), static_cast(ypos), params->width, params->height); 215 | camera_changed = true; 216 | } 217 | // 右クリック中にマウスが移動した場合は、カメラの原点を固定して注視点を動かす 218 | else if (mouse_button == GLFW_MOUSE_BUTTON_RIGHT) 219 | { 220 | trackball.setViewMode(sutil::Trackball::EyeFixed); 221 | trackball.updateTracking(static_cast(xpos), static_cast(ypos), params->width, params->height); 222 | camera_changed = true; 223 | } 224 | } 225 | 226 | // ----------------------------------------------------------------------- 227 | static void windowSizeCallback( GLFWwindow* window, int32_t res_x, int32_t res_y ) 228 | { 229 | // ウィンドウが最小化された時に、最小化される前のウィンドウの解像度を保存しておく 230 | if( minimized ) 231 | return; 232 | 233 | // ウィンドウサイズが最小でも 1 x 1 になるようにする 234 | sutil::ensureMinimumSize( res_x, res_y ); 235 | 236 | Params* params = static_cast( glfwGetWindowUserPointer( window ) ); 237 | params->width = res_x; 238 | params->height = res_y; 239 | camera_changed = true; 240 | resize_dirty = true; 241 | } 242 | 243 | // ----------------------------------------------------------------------- 244 | static void windowIconifyCallback( GLFWwindow* window, int32_t iconified ) 245 | { 246 | minimized = ( iconified > 0 ); 247 | } 248 | 249 | // ----------------------------------------------------------------------- 250 | static void keyCallback( GLFWwindow* window, int32_t key, int32_t /*scancode*/, int32_t action, int32_t /*mods*/ ) 251 | { 252 | if( action == GLFW_PRESS ) 253 | { 254 | // Q or Esc -> 終了 255 | if( key == GLFW_KEY_Q || key == GLFW_KEY_ESCAPE ) 256 | { 257 | glfwSetWindowShouldClose( window, true ); 258 | } 259 | } 260 | else if( key == GLFW_KEY_G ) 261 | { 262 | // toggle UI draw 263 | } 264 | } 265 | 266 | // ----------------------------------------------------------------------- 267 | static void scrollCallback( GLFWwindow* window, double xscroll, double yscroll ) 268 | { 269 | if( trackball.wheelEvent( (int)yscroll ) ) 270 | camera_changed = true; 271 | } 272 | 273 | // ----------------------------------------------------------------------- 274 | OptixAabb sphereBound(const SphereData& sphere) 275 | { 276 | // 球体のAxis-aligned bounding box (AABB)を返す 277 | const float3 center = sphere.center; 278 | const float radius = sphere.radius; 279 | return OptixAabb { 280 | /* minX = */ center.x - radius, /* minY = */ center.y - radius, /* minZ = */ center.z - radius, 281 | /* maxX = */ center.x + radius, /* maxY = */ center.y + radius, /* maxZ = */ center.z + radius 282 | }; 283 | } 284 | 285 | // ----------------------------------------------------------------------- 286 | void printUsageAndExit(const char* argv0) 287 | { 288 | std::cerr << "Usage : " << argv0 << " [options]\n"; 289 | std::cerr << "Options: --file | -f File for image output\n"; 290 | std::cerr << " --launch-samples | -s Number of samples per pixel per launch (default 16)\n"; 291 | std::cerr << " --no-gl-interop Disable GL interop for display\n"; 292 | std::cerr << " --dim=x Set image dimensions; defaults to 768x768\n"; 293 | std::cerr << " --help | -h Print this usage message\n"; 294 | exit(0); 295 | } 296 | 297 | // ----------------------------------------------------------------------- 298 | // Pipeline launch parameterの初期化 299 | // ----------------------------------------------------------------------- 300 | void initLaunchParams( OneWeekendState& state ) 301 | { 302 | CUDA_CHECK(cudaMalloc( 303 | reinterpret_cast(&state.params.accum_buffer), 304 | state.params.width * state.params.height * sizeof(float4) 305 | )); 306 | state.params.frame_buffer = nullptr; 307 | 308 | state.params.samples_per_launch = samples_per_launch; 309 | state.params.subframe_index = 0u; 310 | state.params.max_depth = 5; 311 | 312 | // レイトレーシングを行うASのtraversableHandleを設定 313 | state.params.handle = state.ias.handle; 314 | 315 | CUDA_CHECK(cudaStreamCreate(&state.stream)); 316 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&state.d_params), sizeof(Params))); 317 | } 318 | 319 | // ----------------------------------------------------------------------- 320 | // カメラの更新処理 321 | // マウス入力等でカメラが動いた際にlaunch parameterも更新する 322 | // ----------------------------------------------------------------------- 323 | void handleCameraUpdate( Params& params ) 324 | { 325 | if (!camera_changed) 326 | return; 327 | 328 | camera_changed = false; 329 | camera.setAspectRatio(static_cast(params.width) / static_cast(params.height)); 330 | params.eye = camera.eye(); 331 | camera.UVWFrame(params.U, params.V, params.W); 332 | } 333 | 334 | // ----------------------------------------------------------------------- 335 | // ウィンドウサイズが変化したときの処理 336 | // レイトレーシングによる計算結果を格納するバッファを更新する 337 | // ----------------------------------------------------------------------- 338 | void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params& params ) 339 | { 340 | if( !resize_dirty ) 341 | return; 342 | resize_dirty = false; 343 | 344 | output_buffer.resize( params.width, params.height ); 345 | 346 | // Realloc accumulation buffer 347 | CUDA_CHECK( cudaFree( reinterpret_cast( params.accum_buffer ) ) ); 348 | CUDA_CHECK( cudaMalloc( 349 | reinterpret_cast( ¶ms.accum_buffer ), 350 | params.width * params.height * sizeof( float4 ) 351 | ) ); 352 | } 353 | 354 | // ----------------------------------------------------------------------- 355 | // カメラとウィンドウサイズの変化を監視 356 | // ----------------------------------------------------------------------- 357 | void updateState( sutil::CUDAOutputBuffer& output_buffer, Params& params ) 358 | { 359 | // Update params on device 360 | if( camera_changed || resize_dirty ) 361 | params.subframe_index = 0; 362 | 363 | handleCameraUpdate( params ); 364 | handleResize( output_buffer, params ); 365 | } 366 | 367 | // ----------------------------------------------------------------------- 368 | // optixLaunchを呼び出し、デバイス側のレイトレーシングカーネルを起動 369 | // ----------------------------------------------------------------------- 370 | void launchSubframe( sutil::CUDAOutputBuffer& output_buffer, OneWeekendState& state ) 371 | { 372 | uchar4* result_buffer_data = output_buffer.map(); 373 | state.params.frame_buffer = result_buffer_data; 374 | CUDA_CHECK( cudaMemcpyAsync( 375 | reinterpret_cast( state.d_params ), 376 | &state.params, sizeof( Params ), 377 | cudaMemcpyHostToDevice, state.stream 378 | ) ); 379 | 380 | OPTIX_CHECK( optixLaunch( 381 | state.pipeline, 382 | state.stream, 383 | reinterpret_cast( state.d_params ), 384 | sizeof( Params ), 385 | &state.sbt, 386 | state.params.width, // launch width 387 | state.params.height, // launch height 388 | 1 // launch depth 389 | ) ); 390 | output_buffer.unmap(); 391 | CUDA_SYNC_CHECK(); 392 | } 393 | 394 | // ----------------------------------------------------------------------- 395 | // OpenGLを介してレンダリング結果を描画 396 | // ----------------------------------------------------------------------- 397 | void displaySubframe( sutil::CUDAOutputBuffer& output_buffer, sutil::GLDisplay& gl_display, GLFWwindow* window ) 398 | { 399 | // Display 400 | int framebuf_res_x = 0; // The display's resolution (could be HDPI res) 401 | int framebuf_res_y = 0; // 402 | glfwGetFramebufferSize( window, &framebuf_res_x, &framebuf_res_y ); 403 | gl_display.display( 404 | output_buffer.width(), 405 | output_buffer.height(), 406 | framebuf_res_x, 407 | framebuf_res_y, 408 | output_buffer.getPBO() 409 | ); 410 | } 411 | 412 | // ----------------------------------------------------------------------- 413 | // デバイス側からのメッセージを取得するためのCallable関数 414 | // OptixDeviceContextを生成する際に、 415 | // OptixDeviceContext::logCallbackFunctionに登録する 416 | // ----------------------------------------------------------------------- 417 | static void contextLogCallback(uint32_t level, const char* tag, const char* msg, void* /* callback_data */) 418 | { 419 | std::cerr << "[" << std::setw(2) << level << "][" << std::setw(12) << tag << "]: " << msg << "\n"; 420 | } 421 | 422 | // ----------------------------------------------------------------------- 423 | // カメラの初期化 424 | // ----------------------------------------------------------------------- 425 | void initCameraState() 426 | { 427 | camera_changed = true; 428 | 429 | camera.setEye(make_float3(13.0f, 2.0f, 3.0f)); 430 | camera.setLookat(make_float3(0.0f, 0.0f, 0.0f)); 431 | camera.setUp(make_float3(0.0f, 1.0f, 0.0f)); 432 | camera.setFovY(20.0f); 433 | camera.setAspectRatio(3.0f / 2.0f); 434 | 435 | trackball.setCamera(&camera); 436 | trackball.setMoveSpeed(10.0f); 437 | trackball.setReferenceFrame( 438 | make_float3(1.0f, 0.0f, 0.0f), 439 | make_float3(0.0f, 0.0f, 1.0f), 440 | make_float3(0.0f, 1.0f, 0.0f) 441 | ); 442 | trackball.setGimbalLock(true); 443 | } 444 | 445 | // ----------------------------------------------------------------------- 446 | // OptixDeviceContextの初期化 447 | // ----------------------------------------------------------------------- 448 | void createContext( OneWeekendState& state ) 449 | { 450 | // CUDAの初期化 451 | CUDA_CHECK( cudaFree( 0 ) ); 452 | 453 | OptixDeviceContext context; 454 | CUcontext cu_ctx = 0; 455 | OPTIX_CHECK( optixInit() ); 456 | OptixDeviceContextOptions options = {}; 457 | options.logCallbackFunction = &contextLogCallback; 458 | // Callbackで取得するメッセージのレベル 459 | // 0 ... disable、メッセージを受け取らない 460 | // 1 ... fatal、修復不可能なエラー。コンテクストやOptiXが不能状態にある 461 | // 2 ... error、修復可能エラー。 462 | // 3 ... warning、意図せぬ挙動や低パフォーマンスを導くような場合に警告してくれる 463 | // 4 ... print、全メッセージを受け取る 464 | options.logCallbackLevel = 4; 465 | OPTIX_CHECK( optixDeviceContextCreate( cu_ctx, &options, &context ) ); 466 | 467 | state.context = context; 468 | } 469 | 470 | // ----------------------------------------------------------------------- 471 | // 重複のないインデックスの個数を数える 472 | // 例) { 0, 0, 0, 1, 1, 2, 2, 2 } -> 3 473 | // ----------------------------------------------------------------------- 474 | uint32_t getNumSbtRecords(const std::vector& sbt_indices) 475 | { 476 | std::vector sbt_counter; 477 | for (const uint32_t& sbt_idx : sbt_indices) 478 | { 479 | auto itr = std::find(sbt_counter.begin(), sbt_counter.end(), sbt_idx); 480 | if (sbt_counter.empty() || itr == sbt_counter.end()) 481 | sbt_counter.emplace_back(sbt_idx); 482 | } 483 | return static_cast(sbt_counter.size()); 484 | } 485 | 486 | // ----------------------------------------------------------------------- 487 | // Geometry acceleration structureの構築 488 | // ----------------------------------------------------------------------- 489 | void buildGAS( OneWeekendState& state, GeometryAccelData& gas, OptixBuildInput& build_input) 490 | { 491 | OptixAccelBuildOptions accel_options = {}; 492 | accel_options.buildFlags = OPTIX_BUILD_FLAG_ALLOW_COMPACTION; // ビルド後のCompactionを許可 493 | accel_options.operation = OPTIX_BUILD_OPERATION_BUILD; // ASの更新の際は OPERATION_UPDATE 494 | 495 | // ASのビルドに必要なメモリ領域を計算 496 | OptixAccelBufferSizes gas_buffer_sizes; 497 | OPTIX_CHECK( optixAccelComputeMemoryUsage( 498 | state.context, 499 | &accel_options, 500 | &build_input, 501 | 1, 502 | &gas_buffer_sizes 503 | )); 504 | 505 | // ASを構築するための一時バッファを確保 506 | CUdeviceptr d_temp_buffer; 507 | CUDA_CHECK( cudaMalloc( reinterpret_cast( &d_temp_buffer ), gas_buffer_sizes.tempSizeInBytes ) ); 508 | 509 | CUdeviceptr d_buffer_temp_output_gas_and_compacted_size; 510 | size_t compacted_size_offset = roundUp( gas_buffer_sizes.outputSizeInBytes, 8ull ); 511 | CUDA_CHECK( cudaMalloc( 512 | reinterpret_cast(&d_buffer_temp_output_gas_and_compacted_size), 513 | compacted_size_offset + 8 514 | )); 515 | 516 | // Compaction後のデータ領域を確保するためのEmit property 517 | OptixAccelEmitDesc emit_property = {}; 518 | emit_property.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE; 519 | emit_property.result = ( CUdeviceptr )( (char*)d_buffer_temp_output_gas_and_compacted_size + compacted_size_offset ); 520 | 521 | // ASのビルド 522 | OPTIX_CHECK(optixAccelBuild( 523 | state.context, 524 | state.stream, 525 | &accel_options, 526 | &build_input, 527 | 1, 528 | d_temp_buffer, 529 | gas_buffer_sizes.tempSizeInBytes, 530 | d_buffer_temp_output_gas_and_compacted_size, 531 | gas_buffer_sizes.outputSizeInBytes, 532 | &gas.handle, 533 | &emit_property, 534 | 1 535 | )); 536 | 537 | // 一時バッファは必要ないので解放 538 | CUDA_CHECK(cudaFree(reinterpret_cast(d_temp_buffer))); 539 | 540 | size_t compacted_gas_size; 541 | CUDA_CHECK(cudaMemcpy(&compacted_gas_size, (void*)emit_property.result, sizeof(size_t), cudaMemcpyDeviceToHost)); 542 | // Compaction後の領域が、Compaction前の領域サイズよりも小さい場合のみ Compactionを行う 543 | if (compacted_gas_size < gas_buffer_sizes.outputSizeInBytes) 544 | { 545 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&gas.d_output_buffer), compacted_gas_size)); 546 | OPTIX_CHECK(optixAccelCompact(state.context, 0, gas.handle, gas.d_output_buffer, compacted_gas_size, &gas.handle)); 547 | CUDA_CHECK(cudaFree((void*)d_buffer_temp_output_gas_and_compacted_size)); 548 | } 549 | else 550 | { 551 | gas.d_output_buffer = d_buffer_temp_output_gas_and_compacted_size; 552 | } 553 | } 554 | 555 | // ----------------------------------------------------------------------- 556 | // Mesh用のGASを構築 557 | // デバイス側のポインタ(state.d_mesh_data)へのデータコピーも同時に行う 558 | // ----------------------------------------------------------------------- 559 | void buildMeshGAS( 560 | OneWeekendState& state, 561 | GeometryAccelData& gas, 562 | const std::vector& vertices, 563 | const std::vector& indices, 564 | const std::vector& sbt_indices 565 | ) 566 | { 567 | // メッシュを構成する頂点情報をGPU上にコピー 568 | CUdeviceptr d_vertices = 0; 569 | const size_t vertices_size = vertices.size() * sizeof(float3); 570 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_vertices), vertices_size)); 571 | CUDA_CHECK(cudaMemcpy( 572 | reinterpret_cast(d_vertices), 573 | vertices.data(), vertices_size, 574 | cudaMemcpyHostToDevice 575 | )); 576 | 577 | // 頂点のつなぎ方を定義するインデックス情報をGPU上にコピー 578 | CUdeviceptr d_indices = 0; 579 | const size_t indices_size = indices.size() * sizeof(uint3); 580 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_indices), indices_size)); 581 | CUDA_CHECK(cudaMemcpy( 582 | reinterpret_cast(d_indices), 583 | indices.data(), indices_size, 584 | cudaMemcpyHostToDevice 585 | )); 586 | 587 | // メッシュデータを構造体に格納し、GPU上にコピー 588 | MeshData mesh_data{reinterpret_cast(d_vertices), reinterpret_cast(d_indices) }; 589 | CUDA_CHECK(cudaMalloc(&state.d_mesh_data, sizeof(MeshData))); 590 | CUDA_CHECK(cudaMemcpy( 591 | state.d_mesh_data, &mesh_data, sizeof(MeshData), cudaMemcpyHostToDevice 592 | )); 593 | 594 | // Instance sbt offsetを基準としたsbt indexの配列をGPUにコピー 595 | CUdeviceptr d_sbt_indices = 0; 596 | const size_t sbt_indices_size = sbt_indices.size() * sizeof(uint32_t); 597 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_sbt_indices), sbt_indices_size)); 598 | CUDA_CHECK(cudaMemcpy( 599 | reinterpret_cast(d_sbt_indices), 600 | sbt_indices.data(), sbt_indices_size, 601 | cudaMemcpyHostToDevice 602 | )); 603 | 604 | // 重複のないsbt_indexの個数を数える 605 | uint32_t num_sbt_records = getNumSbtRecords(sbt_indices); 606 | gas.num_sbt_records = num_sbt_records; 607 | 608 | // 重複のないsbt_indexの分だけflagsを設定する 609 | // Anyhit プログラムを使用したい場合はFLAG_NONE or FLAG_REQUIRE_SINGLE_ANYHIT_CALL に設定する 610 | uint32_t* input_flags = new uint32_t[num_sbt_records]; 611 | for (uint32_t i = 0; i < num_sbt_records; i++) 612 | input_flags[i] = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT; 613 | 614 | // メッシュの頂点情報やインデックスバッファ、SBTレコードのインデックス配列をbuild inputに設定 615 | // num_sbt_recordsはあくまでSBTレコードの数で三角形の数でないことに注意 616 | OptixBuildInput mesh_input = {}; 617 | mesh_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES; 618 | mesh_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3; 619 | mesh_input.triangleArray.vertexStrideInBytes = sizeof(float3); 620 | mesh_input.triangleArray.numVertices = static_cast(vertices.size()); 621 | mesh_input.triangleArray.vertexBuffers = &d_vertices; 622 | mesh_input.triangleArray.flags = input_flags; 623 | mesh_input.triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3; 624 | mesh_input.triangleArray.indexStrideInBytes = sizeof(uint3); 625 | mesh_input.triangleArray.indexBuffer = d_indices; 626 | mesh_input.triangleArray.numIndexTriplets = static_cast(indices.size()); 627 | mesh_input.triangleArray.numSbtRecords = num_sbt_records; 628 | mesh_input.triangleArray.sbtIndexOffsetBuffer = d_sbt_indices; 629 | mesh_input.triangleArray.sbtIndexOffsetSizeInBytes = sizeof(uint32_t); 630 | mesh_input.triangleArray.sbtIndexOffsetStrideInBytes = sizeof(uint32_t); 631 | 632 | buildGAS(state, gas, mesh_input); 633 | } 634 | 635 | // ----------------------------------------------------------------------- 636 | // Sphere用のGASを構築 637 | // デバイス側のポインタ(state.d_sphere_data)へのデータコピーも同時に行う 638 | // ----------------------------------------------------------------------- 639 | void buildSphereGAS( 640 | OneWeekendState& state, 641 | GeometryAccelData& gas, 642 | const std::vector& spheres, 643 | const std::vector& sbt_indices 644 | ) 645 | { 646 | // Sphereの配列からAABBの配列を作る 647 | std::vector aabb; 648 | std::transform(spheres.begin(), spheres.end(), std::back_inserter(aabb), 649 | [](const SphereData& sphere) { return sphereBound(sphere); }); 650 | 651 | // AABBの配列をGPU上にコピー 652 | CUdeviceptr d_aabb_buffer; 653 | const size_t aabb_size = sizeof(OptixAabb) * aabb.size(); 654 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_aabb_buffer), aabb_size)); 655 | CUDA_CHECK(cudaMemcpy( 656 | reinterpret_cast(d_aabb_buffer), 657 | aabb.data(), aabb_size, 658 | cudaMemcpyHostToDevice 659 | )); 660 | 661 | // Instance sbt offsetを基準としたsbt indexの配列をGPUにコピー 662 | CUdeviceptr d_sbt_indices; 663 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_sbt_indices), sizeof(uint32_t) * sbt_indices.size())); 664 | CUDA_CHECK(cudaMemcpy( 665 | reinterpret_cast(d_sbt_indices), 666 | sbt_indices.data(), sizeof(uint32_t) * sbt_indices.size(), 667 | cudaMemcpyHostToDevice 668 | )); 669 | 670 | // 全球体データの配列をGPU上にコピー 671 | // 個々の球体データへのアクセスはoptixGetPrimitiveIndex()を介して行う 672 | CUDA_CHECK(cudaMalloc(&state.d_sphere_data, sizeof(SphereData) * spheres.size())); 673 | CUDA_CHECK(cudaMemcpy(state.d_sphere_data, spheres.data(), sizeof(SphereData) * spheres.size(), cudaMemcpyHostToDevice)); 674 | 675 | // 重複のないsbt_indexの個数を数える 676 | uint32_t num_sbt_records = getNumSbtRecords(sbt_indices); 677 | gas.num_sbt_records = num_sbt_records; 678 | 679 | // 重複のないsbt_indexの分だけflagsを設定する 680 | // Anyhit プログラムを使用したい場合はFLAG_NONE or FLAG_REQUIRE_SINGLE_ANYHIT_CALL に設定する 681 | uint32_t* input_flags = new uint32_t[num_sbt_records]; 682 | for (uint32_t i = 0; i < num_sbt_records; i++) 683 | input_flags[i] = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT; 684 | 685 | // Custom primitives用のAABB配列やSBTレコードのインデックス配列を 686 | // build input に設定する 687 | // num_sbt_recordsはあくまでSBTレコードの数でプリミティブ数でないことに注意 688 | OptixBuildInput sphere_input = {}; 689 | sphere_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES; 690 | sphere_input.customPrimitiveArray.aabbBuffers = &d_aabb_buffer; 691 | sphere_input.customPrimitiveArray.numPrimitives = static_cast(spheres.size()); 692 | sphere_input.customPrimitiveArray.flags = input_flags; 693 | sphere_input.customPrimitiveArray.numSbtRecords = num_sbt_records; 694 | sphere_input.customPrimitiveArray.sbtIndexOffsetBuffer = d_sbt_indices; 695 | sphere_input.customPrimitiveArray.sbtIndexOffsetSizeInBytes = sizeof(uint32_t); 696 | sphere_input.customPrimitiveArray.sbtIndexOffsetStrideInBytes = sizeof(uint32_t); 697 | 698 | buildGAS(state, gas, sphere_input); 699 | } 700 | 701 | // ----------------------------------------------------------------------- 702 | // Instance acceleration structureの構築 703 | // ----------------------------------------------------------------------- 704 | void buildIAS(OneWeekendState& state, InstanceAccelData& ias, const std::vector& instances) 705 | { 706 | CUdeviceptr d_instances; 707 | const size_t instances_size = sizeof(OptixInstance) * instances.size(); 708 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_instances), instances_size)); 709 | CUDA_CHECK(cudaMemcpy( 710 | reinterpret_cast(d_instances), 711 | instances.data(), instances_size, 712 | cudaMemcpyHostToDevice 713 | )); 714 | 715 | OptixBuildInput instance_input = {}; 716 | instance_input.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES; 717 | instance_input.instanceArray.instances = d_instances; 718 | instance_input.instanceArray.numInstances = static_cast(instances.size()); 719 | 720 | OptixAccelBuildOptions accel_options = {}; 721 | accel_options.operation = OPTIX_BUILD_OPERATION_BUILD; 722 | accel_options.buildFlags = OPTIX_BUILD_FLAG_ALLOW_COMPACTION; 723 | 724 | OptixAccelBufferSizes ias_buffer_sizes; 725 | OPTIX_CHECK(optixAccelComputeMemoryUsage( 726 | state.context, 727 | &accel_options, 728 | &instance_input, 729 | 1, // num build input 730 | &ias_buffer_sizes 731 | )); 732 | 733 | size_t d_temp_buffer_size = ias_buffer_sizes.tempSizeInBytes; 734 | 735 | // ASを構築するための一時バッファを確保 736 | CUdeviceptr d_temp_buffer; 737 | CUDA_CHECK(cudaMalloc( 738 | reinterpret_cast(&d_temp_buffer), 739 | d_temp_buffer_size 740 | )); 741 | 742 | CUdeviceptr d_buffer_temp_output_ias_and_compacted_size; 743 | size_t compacted_size_offset = roundUp(ias_buffer_sizes.outputSizeInBytes, 8ull); 744 | CUDA_CHECK(cudaMalloc( 745 | reinterpret_cast(&d_buffer_temp_output_ias_and_compacted_size), 746 | compacted_size_offset + 8 747 | )); 748 | 749 | // Compaction後のデータ領域を確保するためのEmit property 750 | OptixAccelEmitDesc emit_property = {}; 751 | emit_property.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE; 752 | emit_property.result = ( CUdeviceptr )( (char*)d_buffer_temp_output_ias_and_compacted_size + compacted_size_offset ); 753 | 754 | // ASのビルド 755 | OPTIX_CHECK(optixAccelBuild( 756 | state.context, 757 | state.stream, 758 | &accel_options, 759 | &instance_input, 760 | 1, // num build inputs 761 | d_temp_buffer, 762 | d_temp_buffer_size, 763 | // ias.d_output_buffer, 764 | d_buffer_temp_output_ias_and_compacted_size, 765 | ias_buffer_sizes.outputSizeInBytes, 766 | &ias.handle, // emitted property list 767 | nullptr, // num emitted property 768 | 0 769 | )); 770 | 771 | CUDA_CHECK(cudaFree(reinterpret_cast(d_temp_buffer))); 772 | 773 | size_t compacted_ias_size; 774 | CUDA_CHECK(cudaMemcpy(&compacted_ias_size, (void*)emit_property.result, sizeof(size_t), cudaMemcpyDeviceToHost)); 775 | if (compacted_ias_size < ias_buffer_sizes.outputSizeInBytes) 776 | { 777 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&ias.d_output_buffer), compacted_ias_size)); 778 | OPTIX_CHECK(optixAccelCompact(state.context, 0, ias.handle, ias.d_output_buffer, compacted_ias_size, &ias.handle)); 779 | CUDA_CHECK(cudaFree((void*)d_buffer_temp_output_ias_and_compacted_size)); 780 | } 781 | else 782 | { 783 | ias.d_output_buffer = d_buffer_temp_output_ias_and_compacted_size; 784 | } 785 | } 786 | 787 | // ----------------------------------------------------------------------- 788 | // OptixModuleの作成 789 | // ----------------------------------------------------------------------- 790 | void createModule(OneWeekendState& state) 791 | { 792 | OptixModuleCompileOptions module_compile_options = {}; 793 | #if !defined( NDEBUG ) 794 | module_compile_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0; 795 | module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; 796 | #endif 797 | 798 | state.pipeline_compile_options.usesMotionBlur = false; 799 | state.pipeline_compile_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY; 800 | state.pipeline_compile_options.numPayloadValues = 2; 801 | // Attributeの個数設定 802 | // Sphereの交差判定で法線とテクスチャ座標を intersection -> closesthitに渡すので 803 | // (x, y, z) ... 3次元、(s, t) ... 2次元 で計5つのAttributeが必要 804 | // optixinOneWeekend.cu:339行目参照 805 | state.pipeline_compile_options.numAttributeValues = 5; 806 | #ifdef DEBUG 807 | state.pipeline_compile_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_DEBUG | OPTIX_EXCEPTION_FLAG_TRACE_DEPTH | OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW; 808 | #else 809 | state.pipeline_compile_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE; 810 | #endif 811 | // Pipeline launch parameterの変数名 812 | state.pipeline_compile_options.pipelineLaunchParamsVariableName = "params"; 813 | 814 | size_t inputSize = 0; 815 | const char* input = sutil::getInputData(OPTIX_SAMPLE_NAME, OPTIX_SAMPLE_DIR, "optixInOneWeekend.cu", inputSize); 816 | 817 | // PTXからModuleを作成 818 | char log[2048]; 819 | size_t sizeof_log = sizeof(log); 820 | #if OPTIX_VERSION < 70700 821 | OPTIX_CHECK_LOG(optixModuleCreateFromPTX( 822 | state.context, 823 | &module_compile_options, 824 | &state.pipeline_compile_options, 825 | input, 826 | inputSize, 827 | log, 828 | &sizeof_log, 829 | &state.module 830 | )); 831 | #else 832 | OPTIX_CHECK_LOG(optixModuleCreate( 833 | state.context, 834 | &module_compile_options, 835 | &state.pipeline_compile_options, 836 | input, 837 | inputSize, 838 | log, 839 | &sizeof_log, 840 | &state.module 841 | )); 842 | #endif 843 | } 844 | 845 | // ----------------------------------------------------------------------- 846 | // Direct callable プログラムを生成する。生成するごとにcallable_idを1増やす 847 | // ----------------------------------------------------------------------- 848 | void createDirectCallables(const OneWeekendState& state, CallableProgram& callable, const char* dc_function_name, uint32_t& callables_id) 849 | { 850 | OptixProgramGroupOptions prg_options = {}; 851 | 852 | OptixProgramGroupDesc callables_prg_desc = {}; 853 | 854 | char log[2048]; 855 | size_t sizeof_log = sizeof(log); 856 | 857 | callables_prg_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; 858 | callables_prg_desc.callables.moduleDC = state.module; 859 | callables_prg_desc.callables.entryFunctionNameDC = dc_function_name; 860 | sizeof_log = sizeof(log); 861 | OPTIX_CHECK_LOG(optixProgramGroupCreate( 862 | state.context, 863 | &callables_prg_desc, 864 | 1, 865 | &prg_options, 866 | log, 867 | &sizeof_log, 868 | &callable.program 869 | )); 870 | callable.id = callables_id; 871 | callables_id++; 872 | } 873 | 874 | // ----------------------------------------------------------------------- 875 | // 全ProgramGroupの作成 876 | // ----------------------------------------------------------------------- 877 | void createProgramGroups(OneWeekendState& state) 878 | { 879 | OptixProgramGroupOptions prg_options = {}; 880 | 881 | char log[2048]; 882 | size_t sizeof_log = sizeof(log); 883 | 884 | // Raygen program 885 | { 886 | OptixProgramGroupDesc raygen_prg_desc = {}; 887 | raygen_prg_desc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; 888 | raygen_prg_desc.raygen.module = state.module; 889 | raygen_prg_desc.raygen.entryFunctionName = "__raygen__pinhole"; 890 | 891 | OPTIX_CHECK_LOG(optixProgramGroupCreate( 892 | state.context, 893 | &raygen_prg_desc, 894 | 1, // num program groups 895 | &prg_options, 896 | log, 897 | &sizeof_log, 898 | &state.raygen_prg 899 | )); 900 | } 901 | 902 | // Miss program 903 | { 904 | OptixProgramGroupDesc miss_prg_desc = {}; 905 | miss_prg_desc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS; 906 | miss_prg_desc.miss.module = state.module; 907 | miss_prg_desc.miss.entryFunctionName = "__miss__radiance"; 908 | sizeof_log = sizeof(log); 909 | 910 | OPTIX_CHECK_LOG(optixProgramGroupCreate( 911 | state.context, 912 | &miss_prg_desc, 913 | 1, 914 | &prg_options, 915 | log, 916 | &sizeof_log, 917 | &state.miss_prg 918 | )); 919 | } 920 | 921 | // Hitgroup programs 922 | { 923 | // Mesh 924 | OptixProgramGroupDesc hitgroup_prg_desc = {}; 925 | hitgroup_prg_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; 926 | hitgroup_prg_desc.hitgroup.moduleCH = state.module; 927 | hitgroup_prg_desc.hitgroup.entryFunctionNameCH = "__closesthit__mesh"; 928 | sizeof_log = sizeof(log); 929 | OPTIX_CHECK_LOG(optixProgramGroupCreate( 930 | state.context, 931 | &hitgroup_prg_desc, 932 | 1, 933 | &prg_options, 934 | log, 935 | &sizeof_log, 936 | &state.mesh_hitgroup_prg 937 | )); 938 | 939 | // Sphere 940 | memset(&hitgroup_prg_desc, 0, sizeof(OptixProgramGroupDesc)); 941 | hitgroup_prg_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; 942 | hitgroup_prg_desc.hitgroup.moduleIS = state.module; 943 | hitgroup_prg_desc.hitgroup.entryFunctionNameIS = "__intersection__sphere"; 944 | hitgroup_prg_desc.hitgroup.moduleCH = state.module; 945 | hitgroup_prg_desc.hitgroup.entryFunctionNameCH = "__closesthit__sphere"; 946 | sizeof_log = sizeof(log); 947 | OPTIX_CHECK_LOG(optixProgramGroupCreate( 948 | state.context, 949 | &hitgroup_prg_desc, 950 | 1, 951 | &prg_options, 952 | log, 953 | &sizeof_log, 954 | &state.sphere_hitgroup_prg 955 | )); 956 | } 957 | 958 | uint32_t callables_id = 0; 959 | // マテリアル用のCallableプログラム 960 | { 961 | // Lambertian 962 | createDirectCallables(state, state.lambertian_prg, "__direct_callable__lambertian", callables_id); 963 | // Dielectric 964 | createDirectCallables(state, state.dielectric_prg, "__direct_callable__dielectric", callables_id); 965 | // Metal 966 | createDirectCallables(state, state.metal_prg, "__direct_callable__metal", callables_id); 967 | } 968 | 969 | // テクスチャ用のCallableプログラム 970 | { 971 | // Constant texture 972 | createDirectCallables(state, state.constant_prg, "__direct_callable__constant", callables_id); 973 | // Checker texture 974 | createDirectCallables(state, state.checker_prg, "__direct_callable__checker", callables_id); 975 | } 976 | } 977 | 978 | // ----------------------------------------------------------------------- 979 | // OptixPipelineの作成 980 | // ----------------------------------------------------------------------- 981 | void createPipeline(OneWeekendState& state) 982 | { 983 | OptixProgramGroup program_groups[] = 984 | { 985 | state.raygen_prg, 986 | state.miss_prg, 987 | state.mesh_hitgroup_prg, 988 | state.sphere_hitgroup_prg, 989 | state.lambertian_prg.program, 990 | state.dielectric_prg.program, 991 | state.metal_prg.program, 992 | state.constant_prg.program, 993 | state.checker_prg.program 994 | }; 995 | 996 | OptixPipelineLinkOptions pipeline_link_options = {}; 997 | // optixTrace()の呼び出し深度の設定 998 | pipeline_link_options.maxTraceDepth = 2; 999 | #if OPTIX_VERSION < 70700 1000 | pipeline_link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; 1001 | #endif 1002 | 1003 | char log[2048]; 1004 | size_t sizeof_log = sizeof(log); 1005 | OPTIX_CHECK_LOG(optixPipelineCreate( 1006 | state.context, 1007 | &state.pipeline_compile_options, 1008 | &pipeline_link_options, 1009 | program_groups, 1010 | sizeof(program_groups) / sizeof(program_groups[0]), 1011 | log, 1012 | &sizeof_log, 1013 | &state.pipeline 1014 | )); 1015 | 1016 | // 各プログラムからパイプラインによって構築されるCall graphのスタックサイズを計算 1017 | OptixStackSizes stack_sizes = {}; 1018 | #if OPTIX_VERSION < 70700 1019 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.raygen_prg, &stack_sizes)); 1020 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.miss_prg, &stack_sizes)); 1021 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.mesh_hitgroup_prg, &stack_sizes)); 1022 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.sphere_hitgroup_prg, &stack_sizes)); 1023 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.lambertian_prg.program, &stack_sizes)); 1024 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.dielectric_prg.program, &stack_sizes)); 1025 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.metal_prg.program, &stack_sizes)); 1026 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.constant_prg.program, &stack_sizes)); 1027 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.checker_prg.program, &stack_sizes)); 1028 | #else 1029 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.raygen_prg, &stack_sizes, state.pipeline)); 1030 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.miss_prg, &stack_sizes, state.pipeline)); 1031 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.mesh_hitgroup_prg, &stack_sizes, state.pipeline)); 1032 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.sphere_hitgroup_prg, &stack_sizes, state.pipeline)); 1033 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.lambertian_prg.program, &stack_sizes, state.pipeline)); 1034 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.dielectric_prg.program, &stack_sizes, state.pipeline)); 1035 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.metal_prg.program, &stack_sizes, state.pipeline)); 1036 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.constant_prg.program, &stack_sizes, state.pipeline)); 1037 | OPTIX_CHECK(optixUtilAccumulateStackSizes(state.checker_prg.program, &stack_sizes, state.pipeline)); 1038 | #endif 1039 | 1040 | uint32_t max_trace_depth = pipeline_link_options.maxTraceDepth; 1041 | // Continuation callableは使用していないので、0でよい 1042 | uint32_t max_cc_depth = 0; 1043 | // Direct callableの呼び出し深度は最大でも2回 (マテリアル -> テクスチャ) 1044 | uint32_t max_dc_depth = 3; 1045 | uint32_t direct_callable_stack_size_from_traversable; 1046 | uint32_t direct_callable_stack_size_from_state; 1047 | uint32_t continuation_stack_size; 1048 | OPTIX_CHECK(optixUtilComputeStackSizes( 1049 | &stack_sizes, 1050 | max_trace_depth, 1051 | max_cc_depth, 1052 | max_dc_depth, 1053 | &direct_callable_stack_size_from_traversable, 1054 | &direct_callable_stack_size_from_state, 1055 | &continuation_stack_size 1056 | )); 1057 | 1058 | // Traversable graphの深度を設定する 1059 | // 今回のように IAS -> GAS だけで終わるのであれば、traversable graphの深度は2となる 1060 | // IAS -> Motion transform -> GAS となるようであれば、深度は3必要となる 1061 | const uint32_t max_traversal_depth = 2; 1062 | OPTIX_CHECK(optixPipelineSetStackSize( 1063 | state.pipeline, 1064 | direct_callable_stack_size_from_traversable, 1065 | direct_callable_stack_size_from_state, 1066 | continuation_stack_size, 1067 | max_traversal_depth 1068 | )); 1069 | } 1070 | 1071 | // ----------------------------------------------------------------------- 1072 | // Shader binding tableの構築 1073 | // ----------------------------------------------------------------------- 1074 | void createSBT(OneWeekendState& state, const std::vector>& hitgroup_datas) 1075 | { 1076 | // Ray generation 1077 | RayGenRecord raygen_record = {}; 1078 | // RayGenRecordの領域をデバイス側に確保 1079 | CUdeviceptr d_raygen_record; 1080 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_raygen_record), sizeof(RayGenRecord))); 1081 | // SBT recordのヘッダーをプログラムを使って埋める 1082 | OPTIX_CHECK(optixSbtRecordPackHeader(state.raygen_prg, &raygen_record)); 1083 | // RayGenRecordをデバイス側にコピー 1084 | CUDA_CHECK(cudaMemcpy( 1085 | reinterpret_cast(d_raygen_record), 1086 | &raygen_record, 1087 | sizeof(RayGenRecord), 1088 | cudaMemcpyHostToDevice 1089 | )); 1090 | 1091 | // Miss 1092 | MissRecord miss_record = {}; 1093 | // MissRecordの領域をデバイス側に確保 1094 | CUdeviceptr d_miss_record; 1095 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_miss_record), sizeof(MissRecord))); 1096 | // SBT recordのヘッダーをプログラムを使って埋める 1097 | OPTIX_CHECK(optixSbtRecordPackHeader(state.miss_prg, &miss_record)); 1098 | // データを設定 1099 | miss_record.data.bg_color = make_float4(0.0f); 1100 | // MissRecordをデバイス側にコピー 1101 | CUDA_CHECK(cudaMemcpy( 1102 | reinterpret_cast(d_miss_record), 1103 | &miss_record, 1104 | sizeof(MissRecord), 1105 | cudaMemcpyHostToDevice 1106 | )); 1107 | 1108 | // HitGroup 1109 | HitGroupRecord* hitgroup_records = new HitGroupRecord[hitgroup_datas.size()]; 1110 | // HitGroupRecord用の領域をデバイス側に確保 1111 | CUdeviceptr d_hitgroup_records; 1112 | const size_t hitgroup_record_size = sizeof(HitGroupRecord) * hitgroup_datas.size(); 1113 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_hitgroup_records), hitgroup_record_size)); 1114 | 1115 | // HitGroupDataからShader binding tableを構築 1116 | for (size_t i = 0; i < hitgroup_datas.size(); i++) 1117 | { 1118 | ShapeType type = hitgroup_datas[i].first; 1119 | HitGroupData data = hitgroup_datas[i].second; 1120 | // ShapeTypeに応じてヘッダーを埋めるためのプログラムを切り替える 1121 | if (type == ShapeType::Mesh) 1122 | OPTIX_CHECK(optixSbtRecordPackHeader(state.mesh_hitgroup_prg, &hitgroup_records[i])); 1123 | else if (type == ShapeType::Sphere) 1124 | OPTIX_CHECK(optixSbtRecordPackHeader(state.sphere_hitgroup_prg, &hitgroup_records[i])); 1125 | // データを設定 1126 | hitgroup_records[i].data = data; 1127 | } 1128 | // HitGroupRecordをデバイス側にコピー 1129 | CUDA_CHECK(cudaMemcpy( 1130 | reinterpret_cast(d_hitgroup_records), 1131 | hitgroup_records, 1132 | hitgroup_record_size, 1133 | cudaMemcpyHostToDevice 1134 | )); 1135 | 1136 | // 今回はCallable プログラムのShader binding tableへのデータ登録は必要ないので、 1137 | // EmptyRecordを使って空データをコピーする。 1138 | // ただし、データがない場合でもヘッダーをプログラムで埋める必要がある。 1139 | // ここを忘れるとレイトレーシング起動後にInvalid memory accessが起きる 1140 | // デバッグで気づきづらい点なので要注意 1141 | EmptyRecord* callables_records = new EmptyRecord[5]; 1142 | CUdeviceptr d_callables_records; 1143 | const size_t callables_record_size = sizeof(EmptyRecord) * 5; 1144 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&d_callables_records), callables_record_size)); 1145 | 1146 | OPTIX_CHECK(optixSbtRecordPackHeader(state.lambertian_prg.program, &callables_records[state.lambertian_prg.id])); 1147 | OPTIX_CHECK(optixSbtRecordPackHeader(state.dielectric_prg.program, &callables_records[state.dielectric_prg.id])); 1148 | OPTIX_CHECK(optixSbtRecordPackHeader(state.metal_prg.program, &callables_records[state.metal_prg.id])); 1149 | OPTIX_CHECK(optixSbtRecordPackHeader(state.constant_prg.program, &callables_records[state.constant_prg.id])); 1150 | OPTIX_CHECK(optixSbtRecordPackHeader(state.checker_prg.program, &callables_records[state.checker_prg.id])); 1151 | 1152 | CUDA_CHECK(cudaMemcpy( 1153 | reinterpret_cast(d_callables_records), 1154 | callables_records, 1155 | callables_record_size, 1156 | cudaMemcpyHostToDevice 1157 | )); 1158 | 1159 | // 各recordからShader binding tableを構築 1160 | // ここではrecord配列の先頭へのポインタと、shader binding tableのアラインメント、配列数を設定する 1161 | state.sbt.raygenRecord = d_raygen_record; 1162 | state.sbt.missRecordBase = d_miss_record; 1163 | state.sbt.missRecordStrideInBytes = static_cast(sizeof(MissRecord)); 1164 | state.sbt.missRecordCount = 1; 1165 | state.sbt.hitgroupRecordBase = d_hitgroup_records; 1166 | state.sbt.hitgroupRecordStrideInBytes = static_cast(sizeof(HitGroupRecord)); 1167 | state.sbt.hitgroupRecordCount = static_cast(hitgroup_datas.size()); 1168 | state.sbt.callablesRecordBase = d_callables_records; 1169 | state.sbt.callablesRecordCount = 5; 1170 | state.sbt.callablesRecordStrideInBytes = sizeof(EmptyRecord); 1171 | } 1172 | 1173 | // ----------------------------------------------------------------------- 1174 | void finalizeState(OneWeekendState& state) 1175 | { 1176 | OPTIX_CHECK(optixPipelineDestroy(state.pipeline)); 1177 | OPTIX_CHECK(optixProgramGroupDestroy(state.raygen_prg)); 1178 | OPTIX_CHECK(optixProgramGroupDestroy(state.miss_prg)); 1179 | OPTIX_CHECK(optixProgramGroupDestroy(state.mesh_hitgroup_prg)); 1180 | OPTIX_CHECK(optixProgramGroupDestroy(state.sphere_hitgroup_prg)); 1181 | OPTIX_CHECK(optixProgramGroupDestroy(state.lambertian_prg.program)); 1182 | OPTIX_CHECK(optixProgramGroupDestroy(state.dielectric_prg.program)); 1183 | OPTIX_CHECK(optixProgramGroupDestroy(state.metal_prg.program)); 1184 | OPTIX_CHECK(optixProgramGroupDestroy(state.constant_prg.program)); 1185 | OPTIX_CHECK(optixProgramGroupDestroy(state.checker_prg.program)); 1186 | 1187 | CUDA_CHECK(cudaFree(reinterpret_cast(state.sbt.raygenRecord))); 1188 | CUDA_CHECK(cudaFree(reinterpret_cast(state.sbt.missRecordBase))); 1189 | CUDA_CHECK(cudaFree(reinterpret_cast(state.sbt.hitgroupRecordBase))); 1190 | } 1191 | 1192 | // ----------------------------------------------------------------------- 1193 | // デバイス上にデータをコピーして、そのポインタを汎用ポインタで返す 1194 | // ----------------------------------------------------------------------- 1195 | template 1196 | void* copyDataToDevice(T data, size_t size) 1197 | { 1198 | CUdeviceptr device_ptr; 1199 | CUDA_CHECK(cudaMalloc(reinterpret_cast(&device_ptr), size)); 1200 | CUDA_CHECK(cudaMemcpy( 1201 | reinterpret_cast(device_ptr), 1202 | &data, size, 1203 | cudaMemcpyHostToDevice 1204 | )); 1205 | return reinterpret_cast(device_ptr); 1206 | } 1207 | 1208 | // ----------------------------------------------------------------------- 1209 | void createScene(OneWeekendState& state) 1210 | { 1211 | // HitGroupDataとマテリアルデータを格納する配列 1212 | // 今回の場合は、球・メッシュではそれぞれでジオメトリ用のデータは同じ配列を使用し、 1213 | // デバイス側でのoptixGetPrimitiveIndex()で交差するデータを切り替えて 1214 | // マテリアルデータは異なるデータが振り分けられている方式をとっている。 1215 | // そのため、hitgroup_datasの数はmaterialsの数に合わせる 1216 | // <- マテリアルの分だけHitGroupRecordがあれば十分でジオメトリの数用意する必要はない 1217 | std::vector> hitgroup_datas; 1218 | std::vector materials; 1219 | 1220 | // -------------------------------------------------------------------- 1221 | // 球体のシーン構築 1222 | // 球体は全て異なるマテリアルを持っていることとする 1223 | // -------------------------------------------------------------------- 1224 | // 球体用のデータ準備 1225 | std::vector spheres; 1226 | // 球体用の相対的なsbt_indexの配列 1227 | std::vector sphere_sbt_indices; 1228 | uint32_t sphere_sbt_index = 0; 1229 | 1230 | // Ground 1231 | SphereData ground_sphere{ make_float3(0, -1000, 0), 1000 }; 1232 | spheres.emplace_back(ground_sphere); 1233 | // テクスチャ 1234 | CheckerData ground_checker{ make_float4(1.0f), make_float4(0.2f, 0.5f, 0.2f, 1.0f), 5000}; 1235 | // Lambertianマテリアル 1236 | LambertianData ground_lambert{ copyDataToDevice(ground_checker, sizeof(CheckerData)), state.checker_prg.id }; 1237 | materials.push_back(Material{ copyDataToDevice(ground_lambert, sizeof(LambertianData)), state.lambertian_prg.id }); 1238 | // マテリアルを追加したのでsbt_indexも追加 1239 | sphere_sbt_indices.emplace_back(sphere_sbt_index++); 1240 | 1241 | // 疑似乱数用のシード値を生成 1242 | uint32_t seed = tea<4>(0, 0); 1243 | for (int a = -11; a < 11; a++) 1244 | { 1245 | for (int b = -11; b < 11; b++) 1246 | { 1247 | const float choose_mat = rnd(seed); 1248 | const float3 center{ a + 0.9f * rnd(seed), 0.2f, b + 0.9f * rnd(seed) }; 1249 | if (length(center - make_float3(4, 0.2, 0)) > 0.9f) 1250 | { 1251 | // 球体を追加 1252 | spheres.emplace_back( SphereData { center, 0.2f }); 1253 | 1254 | // 確率的にLambertian、Metal、Dielectricマテリアルを作成 1255 | // 追加する際は型に応じたCallableプログラムIDを割り振る 1256 | if (choose_mat < 0.8f) 1257 | { 1258 | // Lambertian 1259 | ConstantData albedo{ make_float4(rnd(seed), rnd(seed), rnd(seed), 1.0f) }; 1260 | LambertianData lambertian{ copyDataToDevice(albedo, sizeof(ConstantData)), state.constant_prg.id }; 1261 | materials.emplace_back(Material{ copyDataToDevice(lambertian, sizeof(LambertianData)), state.lambertian_prg.id }); 1262 | } 1263 | else if (choose_mat < 0.95f) 1264 | { 1265 | // Metal 1266 | ConstantData albedo{ make_float4(0.5f + rnd(seed) * 0.5f) }; 1267 | MetalData metal{ copyDataToDevice(albedo, sizeof(ConstantData)), state.constant_prg.id, /* fuzz = */ rnd(seed) * 0.5f}; 1268 | materials.emplace_back(Material{ copyDataToDevice(metal, sizeof(MetalData)), state.metal_prg.id }); 1269 | } 1270 | else 1271 | { 1272 | // Dielectric 1273 | ConstantData albedo{ make_float4(1.0f) }; 1274 | DielectricData glass{ copyDataToDevice(albedo, sizeof(ConstantData)), state.constant_prg.id, /* ior = */ 1.5f}; 1275 | materials.emplace_back(Material{ copyDataToDevice(glass, sizeof(DielectricData)), state.dielectric_prg.id }); 1276 | } 1277 | sphere_sbt_indices.emplace_back(sphere_sbt_index++); 1278 | } 1279 | } 1280 | } 1281 | 1282 | // Dielectric 1283 | spheres.emplace_back(SphereData{ make_float3(0.0f, 1.0f, 0.0f), 1.0f }); 1284 | ConstantData albedo1{ make_float4(1.0f) }; 1285 | DielectricData material1{ copyDataToDevice(albedo1, sizeof(ConstantData)), state.constant_prg.id, /* ior = */ 1.5f }; 1286 | materials.push_back(Material{ copyDataToDevice(material1, sizeof(DielectricData)), state.dielectric_prg.id }); 1287 | sphere_sbt_indices.emplace_back(sphere_sbt_index++); 1288 | 1289 | // Lambertian 1290 | spheres.emplace_back(SphereData{ make_float3(-4.0f, 1.0f, 0.0f), 1.0f }); 1291 | ConstantData albedo2{ make_float4(0.4f, 0.2f, 0.1f, 1.0f) }; 1292 | LambertianData material2{ copyDataToDevice(albedo2, sizeof(ConstantData)), state.constant_prg.id }; 1293 | materials.push_back(Material{ copyDataToDevice(material2, sizeof(LambertianData)), state.lambertian_prg.id }); 1294 | sphere_sbt_indices.emplace_back(sphere_sbt_index++); 1295 | 1296 | // Metal 1297 | spheres.emplace_back(SphereData{ make_float3(4.0f, 1.0f, 0.0f), 1.0f }); 1298 | ConstantData albedo3{ make_float4(0.7f, 0.6f, 0.5f, 1.0f) }; 1299 | MetalData material3{ copyDataToDevice(albedo3, sizeof(ConstantData)), state.constant_prg.id }; 1300 | materials.emplace_back(Material{ copyDataToDevice(material3, sizeof(MetalData)), state.metal_prg.id }); 1301 | sphere_sbt_indices.emplace_back(sphere_sbt_index++); 1302 | 1303 | // Sphere用のGASを作成 (内部で同時にstate.d_sphere_dataへのデータコピーも行っている) 1304 | GeometryAccelData sphere_gas; 1305 | buildSphereGAS(state, sphere_gas, spheres, sphere_sbt_indices); 1306 | 1307 | // マテリアルと球体データの配列からShader binding table用のデータを用意 1308 | for (auto& m : materials) 1309 | hitgroup_datas.emplace_back(ShapeType::Sphere, HitGroupData{state.d_sphere_data, m}); 1310 | 1311 | // -------------------------------------------------------------------- 1312 | // メッシュののシーン構築 1313 | // メッシュでは100個の三角形に対して割り振るマテリアルは3種類のみ 1314 | // メッシュデータは全マテリアル共通なので、用意するSBT recordも3つのみでよい 1315 | // -------------------------------------------------------------------- 1316 | std::vector mesh_vertices; 1317 | std::vector mesh_indices; 1318 | std::vector mesh_sbt_indices; 1319 | uint32_t mesh_index = 0; 1320 | for (int a = 0; a < 100; a++) { 1321 | float3 center{rnd(seed) * 20.0f - 10.0f, 0.5f + rnd(seed) * 1.0f - 0.5f, rnd(seed) * 20.0f - 10.0f }; 1322 | const float3 p0 = center + make_float3(rnd(seed) * 0.5f, -rnd(seed) * 0.5f, rnd(seed) * 0.5f - 0.25f); 1323 | const float3 p1 = center + make_float3(-rnd(seed) * 0.5f, -rnd(seed) * 0.5f, rnd(seed) * 0.5f - 0.25f); 1324 | const float3 p2 = center + make_float3(rnd(seed) * 0.25f, rnd(seed) * 0.5f, rnd(seed) * 0.5f - 0.25f); 1325 | 1326 | mesh_vertices.emplace_back(p0); 1327 | mesh_vertices.emplace_back(p1); 1328 | mesh_vertices.emplace_back(p2); 1329 | mesh_indices.emplace_back(make_uint3(mesh_index + 0, mesh_index + 1, mesh_index + 2)); 1330 | mesh_index += 3; 1331 | } 1332 | 1333 | const uint32_t red_sbt_index = 0; 1334 | const uint32_t green_sbt_index = 1; 1335 | const uint32_t blue_sbt_index = 2; 1336 | 1337 | // ランダムで赤・緑・青の3色を割り振る 1338 | for (size_t i = 0; i < mesh_indices.size(); i++) 1339 | { 1340 | const float choose_rgb = rnd(seed); 1341 | if (choose_rgb < 0.33f) 1342 | mesh_sbt_indices.push_back(red_sbt_index); 1343 | else if (choose_rgb < 0.67f) 1344 | mesh_sbt_indices.push_back(green_sbt_index); 1345 | else 1346 | mesh_sbt_indices.push_back(blue_sbt_index); 1347 | } 1348 | 1349 | // メッシュ用のGASを作成 1350 | GeometryAccelData mesh_gas; 1351 | buildMeshGAS(state, mesh_gas, mesh_vertices, mesh_indices, mesh_sbt_indices); 1352 | 1353 | // 赤・緑・青のマテリアルを用意し、HitGroupDataを追加 1354 | // 赤 1355 | ConstantData red{ {0.8f, 0.05f, 0.05f, 1.0f} }; 1356 | LambertianData red_lambert{ copyDataToDevice(red, sizeof(ConstantData)), state.constant_prg.id }; 1357 | materials.emplace_back(Material{ copyDataToDevice(red_lambert, sizeof(LambertianData)), state.lambertian_prg.id }); 1358 | hitgroup_datas.emplace_back(ShapeType::Mesh, HitGroupData{ state.d_mesh_data, materials.back() }); 1359 | 1360 | // 緑 1361 | ConstantData green{ {0.05f, 0.8f, 0.05f, 1.0f} }; 1362 | LambertianData green_lambert{ copyDataToDevice(green, sizeof(ConstantData)), state.constant_prg.id }; 1363 | materials.emplace_back(Material{ copyDataToDevice(green_lambert, sizeof(LambertianData)), state.lambertian_prg.id }); 1364 | hitgroup_datas.emplace_back(ShapeType::Mesh, HitGroupData{ state.d_mesh_data, materials.back() }); 1365 | 1366 | // 青 1367 | ConstantData blue{ {0.05f, 0.05f, 0.8f, 1.0f} }; 1368 | LambertianData blue_lambert{ copyDataToDevice(blue, sizeof(ConstantData)), state.constant_prg.id }; 1369 | materials.emplace_back(Material{ copyDataToDevice(blue_lambert, sizeof(LambertianData)), state.lambertian_prg.id }); 1370 | hitgroup_datas.emplace_back(ShapeType::Mesh, HitGroupData{ state.d_mesh_data, materials.back() }); 1371 | 1372 | // IAS用のInstanceを球体用・メッシュ用それぞれ作成 1373 | std::vector instances; 1374 | uint32_t flags = OPTIX_INSTANCE_FLAG_NONE; 1375 | 1376 | uint32_t sbt_offset = 0; 1377 | uint32_t instance_id = 0; 1378 | instances.emplace_back(OptixInstance{ 1379 | {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0}, instance_id, sbt_offset, 255, 1380 | flags, sphere_gas.handle, {0, 0} 1381 | }); 1382 | 1383 | sbt_offset += sphere_gas.num_sbt_records; 1384 | instance_id++; 1385 | // メッシュの方はY軸中心にPI/6だけ回転させる 1386 | const float c = cosf(M_PIf / 6.0f); 1387 | const float s = sinf(M_PIf / 6.0f); 1388 | instances.push_back(OptixInstance{ 1389 | {c, 0, s, 0, 0, 1, 0, 0, -s, 0, c, 0}, instance_id, sbt_offset, 255, 1390 | flags, mesh_gas.handle, {0, 0} 1391 | }); 1392 | 1393 | // IASの作成 1394 | buildIAS(state, state.ias, instances); 1395 | 1396 | // Shader binding tableの作成 1397 | createSBT(state, hitgroup_datas); 1398 | } 1399 | 1400 | // ----------------------------------------------------------------------- 1401 | int main(int argc, char* argv[]) 1402 | { 1403 | OneWeekendState state; 1404 | state.params.width = 1200; 1405 | state.params.height = static_cast(1200.0f / (3.0f / 2.0f)); 1406 | sutil::CUDAOutputBufferType output_buffer_type = sutil::CUDAOutputBufferType::GL_INTEROP; 1407 | 1408 | std::string outfile; 1409 | 1410 | for (int i = 1; i < argc; i++) 1411 | { 1412 | const std::string arg = argv[i]; 1413 | if (arg == "--file" || arg == "-f") 1414 | { 1415 | if (i >= argc - 1) 1416 | printUsageAndExit(argv[0]); 1417 | outfile = argv[++i]; 1418 | } 1419 | else if (arg.substr(0, 6) == "--dim=") 1420 | { 1421 | const std::string dims_arg = arg.substr(6); 1422 | int w, h; 1423 | sutil::parseDimensions(dims_arg.c_str(), w, h); 1424 | state.params.width = w; 1425 | state.params.height = h; 1426 | } 1427 | else if (arg == "--launch-samples" || arg == "-s") 1428 | { 1429 | if (i >= argc - 1) 1430 | printUsageAndExit(argv[0]); 1431 | samples_per_launch = atoi(argv[++i]); 1432 | } 1433 | else 1434 | { 1435 | std::cerr << "Unknown option '" << argv[i] << "'\n"; 1436 | printUsageAndExit(argv[0]); 1437 | } 1438 | } 1439 | 1440 | try 1441 | { 1442 | initCameraState(); 1443 | 1444 | createContext(state); 1445 | createModule(state); 1446 | createProgramGroups(state); 1447 | createPipeline(state); 1448 | createScene(state); 1449 | initLaunchParams(state); 1450 | 1451 | if (outfile.empty()) 1452 | { 1453 | GLFWwindow* window = sutil::initUI("optixInOneWeekend", state.params.width, state.params.height); 1454 | glfwSetMouseButtonCallback(window, mouseButtonCallback); 1455 | glfwSetCursorPosCallback(window, cursorPosCallback); 1456 | glfwSetWindowSizeCallback(window, windowSizeCallback); 1457 | glfwSetWindowIconifyCallback(window, windowIconifyCallback); 1458 | glfwSetKeyCallback(window, keyCallback); 1459 | glfwSetScrollCallback(window, scrollCallback); 1460 | glfwSetWindowUserPointer(window, &state.params); 1461 | 1462 | // 1463 | // Render loop 1464 | // 1465 | { 1466 | sutil::CUDAOutputBuffer output_buffer( 1467 | output_buffer_type, 1468 | state.params.width, 1469 | state.params.height 1470 | ); 1471 | 1472 | output_buffer.setStream(state.stream); 1473 | sutil::GLDisplay gl_display; 1474 | 1475 | std::chrono::duration state_update_time(0.0); 1476 | std::chrono::duration render_time(0.0); 1477 | std::chrono::duration display_time(0.0); 1478 | 1479 | do 1480 | { 1481 | auto t0 = std::chrono::steady_clock::now(); 1482 | glfwPollEvents(); 1483 | 1484 | updateState(output_buffer, state.params); 1485 | auto t1 = std::chrono::steady_clock::now(); 1486 | state_update_time += t1 - t0; 1487 | t0 = t1; 1488 | 1489 | launchSubframe(output_buffer, state); 1490 | t1 = std::chrono::steady_clock::now(); 1491 | render_time += t1 - t0; 1492 | t0 = t1; 1493 | 1494 | displaySubframe(output_buffer, gl_display, window); 1495 | t1 = std::chrono::steady_clock::now(); 1496 | display_time += t1 - t0; 1497 | 1498 | sutil::displayStats(state_update_time, render_time, display_time); 1499 | 1500 | glfwSwapBuffers(window); 1501 | 1502 | ++state.params.subframe_index; 1503 | } while (!glfwWindowShouldClose(window)); 1504 | CUDA_SYNC_CHECK(); 1505 | } 1506 | 1507 | sutil::cleanupUI(window); 1508 | } 1509 | else 1510 | { 1511 | if (output_buffer_type == sutil::CUDAOutputBufferType::GL_INTEROP) 1512 | { 1513 | sutil::initGLFW(); // For GL context 1514 | sutil::initGL(); 1515 | } 1516 | 1517 | sutil::CUDAOutputBuffer output_buffer( 1518 | output_buffer_type, 1519 | state.params.width, 1520 | state.params.height 1521 | ); 1522 | 1523 | handleCameraUpdate(state.params); 1524 | handleResize(output_buffer, state.params); 1525 | for (int i = 0; i < 1024; i += samples_per_launch) { 1526 | launchSubframe(output_buffer, state); 1527 | state.params.subframe_index++; 1528 | } 1529 | 1530 | sutil::ImageBuffer buffer; 1531 | buffer.data = output_buffer.getHostPointer(); 1532 | buffer.width = output_buffer.width(); 1533 | buffer.height = output_buffer.height(); 1534 | buffer.pixel_format = sutil::BufferImageFormat::UNSIGNED_BYTE4; 1535 | 1536 | sutil::saveImage(outfile.c_str(), buffer, false); 1537 | 1538 | if (output_buffer_type == sutil::CUDAOutputBufferType::GL_INTEROP) 1539 | { 1540 | glfwTerminate(); 1541 | } 1542 | } 1543 | 1544 | finalizeState(state); 1545 | } 1546 | catch (std::exception& e) 1547 | { 1548 | std::cerr << "Caught exception: " << e.what() << "\n"; 1549 | return 1; 1550 | } 1551 | return 0; 1552 | } --------------------------------------------------------------------------------