├── .gitignore ├── CMakeLists.txt ├── README.md ├── TODO ├── accel ├── CMakeLists.txt ├── accel.hpp ├── bvh.cpp ├── bvh.hpp ├── bvh2.cpp ├── bvh2.hpp ├── bvh4.cpp ├── bvh4.hpp ├── light_accel.hpp ├── light_array.cpp ├── light_array.hpp ├── light_tree.cpp └── light_tree.hpp ├── basics ├── bbox.hpp ├── bbox2_test.cpp ├── bbox4_test.cpp ├── bbox_test.cpp ├── camera.hpp ├── differential_geometry.hpp ├── instance_id.hpp ├── instance_id_test.cpp ├── intersection.hpp └── ray.hpp ├── cmake └── FindIlmBase.cmake ├── color ├── color.hpp ├── spectra_xyz_5nm_380_780_0.97.h └── spectrum_grid.h ├── config.cpp ├── config.h.in ├── config.hpp ├── docs └── scene_format_example.psy ├── film ├── film.hpp └── raster.hpp ├── format_code.sh ├── global.cpp ├── global.hpp ├── integrator ├── CMakeLists.txt ├── integrator.hpp ├── path_trace_integrator.cpp └── path_trace_integrator.hpp ├── lights ├── light.hpp ├── point_light.hpp ├── rectangle_light.hpp └── sphere_light.hpp ├── main.cpp ├── math ├── CMakeLists.txt ├── matrix.hpp ├── matrix_test.cpp ├── transform.hpp ├── vector.hpp └── vector_test.cpp ├── object ├── CMakeLists.txt ├── bicubic.cpp ├── bicubic.hpp ├── bilinear.cpp ├── bilinear.hpp ├── object.hpp ├── patch_utils.hpp ├── sphere.cpp ├── sphere.hpp ├── subdivision_surface.cpp └── subdivision_surface.hpp ├── parser ├── CMakeLists.txt ├── data_tree.cpp ├── data_tree.hpp ├── parser.cpp ├── parser.hpp └── utf8.hpp ├── psychoblend ├── __init__.py ├── psy_export.py ├── render.py └── ui.py ├── renderer ├── CMakeLists.txt ├── renderer.cpp └── renderer.hpp ├── sampling ├── CMakeLists.txt ├── halton.cpp ├── halton.hpp ├── halton.py ├── image_sampler.cpp ├── image_sampler.hpp ├── samples.hpp ├── sobol.cpp └── sobol.hpp ├── scene ├── assembly.hpp └── scene.hpp ├── shading ├── closure_union.hpp ├── displacement_shader.hpp ├── surface_closure.hpp └── surface_shader.hpp ├── test ├── catch.hpp ├── test.hpp ├── test_float.cpp └── test_main.cpp ├── tracer ├── CMakeLists.txt ├── potentialinter.hpp ├── tracer.cpp └── tracer.hpp └── utils ├── bit_stack.hpp ├── blocked_array.hpp ├── blocked_array_disk_cache.hpp ├── chunked_array.hpp ├── chunked_array_test.cpp ├── counting_sort.hpp ├── disk_cache.hpp ├── disk_cache_test.cpp ├── hash.hpp ├── hilbert.hpp ├── job_queue.hpp ├── job_queue_test.cpp ├── low_level.hpp ├── lru_cache.hpp ├── mis.hpp ├── monte_carlo.hpp ├── morton.hpp ├── numtype.h ├── range.hpp ├── ring_buffer.hpp ├── ring_buffer_atomic.hpp ├── ring_buffer_concurrent.hpp ├── ring_buffer_concurrent_test.cpp ├── ring_buffer_test.cpp ├── rng.hpp ├── rng_test.cpp ├── simd.hpp ├── simd_test.cpp ├── spinlock.hpp ├── stack.hpp ├── stack_test.cpp ├── timer.hpp ├── utils.hpp └── utils_test.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Auto-formatting backups 2 | *.orig 3 | *.orig~ 4 | 5 | # Python Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | 9 | .zedstate 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Note 2 | ==== 3 | This repo is the old version of Psychopath, and no further development is happening here. The new repo is at https://github.com/cessen/psychopath 4 | 5 | Overview 6 | ======== 7 | 8 | Psychopath is a path tracer, aimed at rendering animations and VFX for 9 | film. It is currently still in an early prototyping stage of development. You 10 | can view a brief video of some animations rendered with it in 11 | [this (now somewhat out-dated) 12 | video](https://www.youtube.com/watch?v=rydLFAdhseo). 13 | 14 | The long-term goals of Psychopath are to support efficient global illumination 15 | rendering of scenes that are significantly larger than available RAM and/or 16 | that contain procedural elements that need to be generated on-the-fly during 17 | rendering. 18 | 19 | The approach that Psychopath takes to enable this is to try to access the scene 20 | data in as coherent a fashion as possible via breadth-first ray tracing, 21 | allowing the cost of HDD access, expensive procedurals, etc. to be amortized 22 | over large batches of rays. 23 | 24 | In its current state this principle and its effectiveness are demonstrated by 25 | by refining geometry to sub-pixel microgeometry on the fly during the rendering process, somewhat analogous to the Reyes rendering architecture. Even with geometry caching completely disabled, Psychopath is able to render using this technique very efficiently. 26 | 27 | Current Features 28 | ---------------- 29 | - Spheres 30 | - Bilinear patches 31 | - Bicubic bezier patches 32 | - Catmull-Clark subdivision surfaces 33 | - Spherical light sources 34 | - Rectangular light sources 35 | - Simple shader system (assign BSDF's to objects) 36 | - Multiple importance sampling 37 | - Spectral rendering (via monte carlo, not binning) 38 | - Focal blur / DoF 39 | - Camera motion blur 40 | - Deformation motion blur 41 | - Transforms and transform motion blur 42 | - Full hierarchical instancing 43 | 44 | Features Currently In-Progress 45 | ------------------------------ 46 | - A novel method for efficiently handling many (i.e. thousands or millions) of 47 | lights in a scene. See [this thread](http://ompf2.com/viewtopic.php?f=3&t=1938) for an overview. 48 | 49 | 50 | 51 | PsychoBlend 52 | =========== 53 | 54 | Included in the repository is an addon for [Blender](http://www.blender.org) 55 | called "PsychoBlend" that allows you to do basic rendering of Blender scenes 56 | with Psychopath. Most Blender features are not yet supported. 57 | 58 | Features Supported 59 | ------------------ 60 | - Quad mesh export as bilinear patches 61 | - Mesh export as Catmull-Clark subdivision surfaces (when marked as such) 62 | - Point lights exported as spherical lights (point lights have a "radius" setting) 63 | - Area lights, exported as rectangular area lights 64 | - Simple material system for assigning different BSDF's to different objects 65 | - Focal blur / DoF 66 | - Camera motion blur 67 | - Transform motion blur 68 | - Deformation motion blur 69 | - Exports dupligroups with full hierarchical instancing 70 | - Limited auto-detected instancing of meshes 71 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | - Bugs: 2 | //- Very small sphere lights introduce severe banding in light attentuation. 3 | - Some scenes hang when rendering at high sample rates. 4 | 5 | - Iterative intersections: 6 | - Displacement tests 7 | 8 | - Unit tests: 9 | - BitStack 10 | - BitStack2 11 | 12 | - Ray stream tracing: 13 | - Optimizations... 14 | 15 | //- Multiple importance sampling 16 | 17 | - LightTree: 18 | - Investigate using importance resampling along with leaf nodes that have 19 | more than one light in them. 20 | - Investigate having the BSDF's provide methods for working with the 21 | LightTree, to better optimize light selection based on the BSDF. 22 | 23 | //- Hero wavelength spectral rendering 24 | 25 | - Change assemblies to use a memory arena for object/shader/etc. data. 26 | 27 | - Light sources: 28 | - Infinite lights (e.g. sun lights) will be considered part of the background, 29 | along with e.g. HDRI lighting backgrounds. 30 | - Point lights should be a special-case of sphere lights, with radius zero. 31 | - Spot lights 32 | //- Rectangular area lights 33 | - A way to mark other objects as light sources, for MIS (this can't be done 34 | automatically in the general case...?) 35 | 36 | - Parsing: 37 | - Give useful error messages... 38 | - Support binary geometry files of some kind, for faster scene loading 39 | 40 | - PsychoBlend: 41 | - Allow per-object motion blur segment specification 42 | 43 | - Shading system 44 | //- Stupid simple shaders first 45 | - A few hard-coded displacement shaders, to verify the ideas work (e.g. 46 | with interval arithmetic). 47 | - Then use OSL 48 | 49 | - More geometry types: 50 | - Catmull-clark subdivision surfaces: 51 | //- Basic proof-of-concept implementation using OpenSubdiv 52 | //- Deformation motion blur support 53 | //- BVH acceleration 54 | - Face-varying data support 55 | - Triangle meshes 56 | 57 | - Film class: 58 | - Make film class more data-type agnostic. It should be the responsibility 59 | of the Renderer and Integrator to make sure pixel data is interpretted and 60 | used correctly. 61 | 62 | - Cleanup: 63 | - Add NaN and Inf catching to key places in code 64 | -------------------------------------------------------------------------------- /accel/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(accel 2 | bvh bvh2 bvh4 light_array light_tree) 3 | -------------------------------------------------------------------------------- /accel/accel.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ACCEL_HPP 2 | #define ACCEL_HPP 3 | 4 | #include "numtype.h" 5 | #include "ray.hpp" 6 | #include "object.hpp" 7 | #include "bbox.hpp" 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | // Forward declaration of Assembly from scene/assembly.hpp 14 | class Assembly; 15 | 16 | /** 17 | * @brief An acceleration structure for a scene hierarchy. 18 | * 19 | * This pure virtual class should never be used directly. It's only purpose 20 | * is to enforce an interface for classes that inherit from it. 21 | */ 22 | class Accel { 23 | public: 24 | virtual ~Accel() {} 25 | 26 | /** 27 | * @brief Builds the acceleration structure from the given assembly. 28 | */ 29 | virtual void build(const Assembly& assembly) = 0; 30 | 31 | /** 32 | * @brief Returns the spatial bounds of the acceleration structure. 33 | * 34 | * Should not be called until after build() is called. 35 | */ 36 | virtual const std::vector& bounds() const = 0; 37 | }; 38 | 39 | 40 | /** 41 | * @brief An acceleration structure traverser that traverses with many rays at once 42 | * in a breadth-first fashion. 43 | * 44 | * This pure virtual template class should never be used directly. It's only purpose 45 | * is to enforce an interface for classes that inherit from it. 46 | */ 47 | template 48 | class AccelStreamTraverser { 49 | public: 50 | virtual ~AccelStreamTraverser() {} 51 | 52 | /** 53 | * @brief Initializes the traverser for traversing the given 54 | * acceleration structure. 55 | */ 56 | virtual void init_accel(const T& accel) = 0; 57 | 58 | /** 59 | * @brief Initializes the traverser for traversing with 60 | * the given WorldRays. 61 | * 62 | * This resets any traversal already in progress. 63 | */ 64 | virtual void init_rays(Ray* begin, Ray* end) = 0; 65 | 66 | /** 67 | * @brief Traverses to the next relevant object. 68 | * 69 | * Returns a tuple with a pair of iterators to the begin and end of the 70 | * relevant Rays, and an index to the object instance they need to be 71 | * tested against. 72 | * 73 | * When traversal is complete, begin == end and object == 0. 74 | */ 75 | virtual std::tuple 76 | next_object() = 0; 77 | }; 78 | 79 | #endif // ACCEL_HPP 80 | -------------------------------------------------------------------------------- /accel/bvh.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BVH_HPP 2 | #define BVH_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "numtype.h" 10 | #include "global.hpp" 11 | 12 | #include "accel.hpp" 13 | #include "object.hpp" 14 | #include "ray.hpp" 15 | #include "bbox.hpp" 16 | #include "utils.hpp" 17 | #include "vector.hpp" 18 | 19 | 20 | 21 | /* 22 | * A bounding volume hierarchy. 23 | */ 24 | class BVH: public Accel { 25 | std::vector _bounds {BBox()}; 26 | public: 27 | virtual ~BVH() {}; 28 | virtual void build(const Assembly& assembly); 29 | virtual const std::vector& bounds() const { 30 | return _bounds; 31 | }; 32 | 33 | // Traversers need access to private data 34 | friend class BVHStreamTraverser; 35 | 36 | enum { 37 | IS_LEAF = 1 << 0 38 | }; 39 | 40 | /* 41 | * A node of a bounding volume hierarchy. 42 | * Contains a bounding box, a flag for whether 43 | * it's a leaf or not, a pointer to its first 44 | * child, and it's data if it's a leaf. 45 | */ 46 | struct Node { 47 | size_t bbox_index = 0; 48 | union { 49 | size_t child_index = 0; 50 | size_t data_index; 51 | }; 52 | size_t parent_index = 0; 53 | uint16_t ts = 0; // Time sample count 54 | uint16_t flags = 0; 55 | }; 56 | 57 | /* 58 | * Used to store objects that have yet to be 59 | * inserted into the hierarchy. 60 | * Contains the time 0.5 bounds of the object and it's centroid. 61 | */ 62 | struct BVHPrimitive { 63 | size_t instance_index; 64 | Vec3 bmin, bmax, c; 65 | }; 66 | 67 | public: 68 | // This stuff is public because BVH is used as the basis 69 | // for building other BVH's like BVH2 and BVH4, and they need 70 | // direct access. 71 | std::vector nodes; 72 | std::vector bboxes; 73 | 74 | /** 75 | * @brief Returns the index of the first child 76 | * of the node with the given index. 77 | */ 78 | inline size_t child1(const size_t node_i) const { 79 | return node_i + 1; 80 | } 81 | 82 | /** 83 | * @brief Returns the index of the second child 84 | * of the node with the given index. 85 | */ 86 | inline size_t child2(const size_t node_i) const { 87 | return nodes[node_i].child_index; 88 | } 89 | 90 | /** 91 | * @brief Returns the index of the sibling 92 | * of the node with the given index. 93 | */ 94 | inline size_t sibling(const size_t node_i) const { 95 | const size_t parent_i = nodes[node_i].parent_index; 96 | if (node_i == (parent_i + 1)) 97 | return nodes[parent_i].child_index; 98 | else 99 | return parent_i + 1; 100 | } 101 | 102 | inline bool is_leaf(const size_t node_i) const { 103 | return nodes[node_i].flags & IS_LEAF; 104 | } 105 | 106 | private: 107 | const Assembly* assembly; // Set during build() 108 | //std::vector bbox; 109 | std::vector bag; // Temporary holding spot for objects not yet added to the hierarchy 110 | 111 | bool finalize(); 112 | 113 | /** 114 | * @brief Tests whether a ray intersects a node or not. 115 | */ 116 | inline bool intersect_node(const uint64_t node_i, const Ray& ray, float *near_t, float *far_t) const { 117 | #ifdef GLOBAL_STATS_TOP_LEVEL_BVH_NODE_TESTS 118 | Global::Stats::top_level_bvh_node_tests++; 119 | #endif 120 | const Node& node = nodes[node_i]; 121 | const BBox b = lerp_seq(ray.time, bboxes.cbegin() + node.bbox_index, bboxes.cbegin() + node.bbox_index + node.ts); 122 | return b.intersect_ray(ray, near_t, far_t, ray.max_t); 123 | } 124 | 125 | size_t split_primitives(size_t first_prim, size_t last_prim); 126 | size_t recursive_build(size_t parent, size_t first_prim, size_t last_prim); 127 | }; 128 | 129 | 130 | 131 | /** 132 | * @brief A breadth-first traverser for BVH. 133 | */ 134 | class BVHStreamTraverser: public AccelStreamTraverser { 135 | public: 136 | virtual ~BVHStreamTraverser() {} 137 | 138 | virtual void init_accel(const BVH& accel) { 139 | bvh = &accel; 140 | } 141 | 142 | virtual void init_rays(Ray* begin, Ray* end) { 143 | rays = begin; 144 | rays_end = end; 145 | 146 | // Initialize stack 147 | stack_ptr = 0; 148 | node_stack[0] = 0; 149 | ray_stack[0].first = rays; 150 | ray_stack[0].second = rays_end; 151 | } 152 | 153 | virtual std::tuple next_object(); 154 | 155 | private: 156 | const BVH* bvh = nullptr; 157 | Ray* rays = nullptr; 158 | Ray* rays_end = nullptr; 159 | 160 | // Stack data 161 | #define BVHST_STACK_SIZE 64 162 | int stack_ptr; 163 | size_t node_stack[BVHST_STACK_SIZE]; 164 | std::pair ray_stack[BVHST_STACK_SIZE]; 165 | 166 | }; 167 | 168 | 169 | #endif // BVH_HPP 170 | -------------------------------------------------------------------------------- /accel/bvh2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "numtype.h" 9 | #include "bvh2.hpp" 10 | 11 | #include "simd.hpp" 12 | #include "ray.hpp" 13 | #include "assembly.hpp" 14 | #include "utils.hpp" 15 | 16 | 17 | 18 | void BVH2::build(const Assembly& assembly) { 19 | // Build a normal BVH as a starting point 20 | BVH bvh; 21 | bvh.build(assembly); 22 | 23 | if (bvh.nodes.size() == 0) 24 | return; 25 | 26 | // Pack BVH into more efficient BVH2 27 | nodes.push_back(Node()); 28 | for (size_t bni = 0; bni < bvh.nodes.size(); ++bni) { 29 | BVH::Node& bn = bvh.nodes[bni]; 30 | size_t ni = nodes.size() - 1; // Node index 31 | 32 | // Set the values that don't depend on whether this 33 | // is a leaf node or not. 34 | if (bn.flags & IS_RIGHT) 35 | nodes[bn.parent_index].child_index = ni; // Set parent's child_index field to point to this 36 | 37 | // Set the values that _do_ depend on whether this is 38 | // a leaf node or not. 39 | if (bn.flags & BVH::IS_LEAF) { 40 | nodes[ni].child_index = 0; // Indicates that this is a leaf node 41 | nodes[ni].data_index = bn.data_index; 42 | nodes.push_back(Node()); 43 | } else { 44 | BVH::Node& child1 = bvh.nodes[bvh.child1(bni)]; 45 | BVH::Node& child2 = bvh.nodes[bvh.child2(bni)]; 46 | 47 | // Let right child know that it's right 48 | child2.flags |= IS_RIGHT; 49 | 50 | // Set the parent index fields in the child build nodes 51 | // to refer to the parent Node instead of the parent BVH::Node 52 | child1.parent_index = ni; 53 | child2.parent_index = ni; 54 | 55 | // If children have same number of time samples, easy 56 | if (child1.ts == child2.ts) { 57 | nodes[ni].ts = child1.ts; 58 | for (uint16_t i = 0; i < child1.ts; ++i) { 59 | nodes.back().bounds = BBox2(bvh.bboxes[child1.bbox_index+i], bvh.bboxes[child2.bbox_index+i]); 60 | nodes.push_back(Node()); 61 | } 62 | } 63 | // If children have different number of time samples, 64 | // interpolate one or the other 65 | else if (child1.ts > child2.ts) { 66 | nodes[ni].ts = child1.ts; 67 | const float s = child1.ts - 1; 68 | auto cbegin = bvh.bboxes.cbegin() + child2.bbox_index; 69 | auto cend = cbegin + child2.ts; 70 | 71 | for (uint16_t i = 0; i < child1.ts; ++i) { 72 | nodes.back().bounds = BBox2(bvh.bboxes[child1.bbox_index+i], lerp_seq(i/s, cbegin, cend)); 73 | nodes.push_back(Node()); 74 | } 75 | } else { 76 | nodes[ni].ts = child2.ts; 77 | const float s = child2.ts - 1; 78 | auto cbegin = bvh.bboxes.cbegin() + child1.bbox_index; 79 | auto cend = cbegin + child1.ts; 80 | 81 | for (uint16_t i = 0; i < child2.ts; ++i) { 82 | nodes.back().bounds = BBox2(lerp_seq(i/s, cbegin, cend), bvh.bboxes[child2.bbox_index+i]); 83 | nodes.push_back(Node()); 84 | } 85 | } 86 | } 87 | } 88 | 89 | // Store top-level bounds 90 | auto begin = bvh.bboxes.begin() + bvh.nodes[0].bbox_index; 91 | auto end = begin + bvh.nodes[0].ts; 92 | _bounds.clear(); 93 | _bounds.insert(_bounds.begin(), begin, end); 94 | } 95 | 96 | 97 | 98 | std::tuple BVH2StreamTraverser::next_object() { 99 | while (stack_ptr >= 0) { 100 | if (bvh->is_leaf(node_stack[stack_ptr])) { 101 | ray_stack[stack_ptr].second = mutable_partition(ray_stack[stack_ptr].first, ray_stack[stack_ptr].second, [&](Ray& ray) { 102 | return !ray.is_done() && (first_call || ray.trav_stack.pop()); 103 | }); 104 | 105 | if (std::distance(ray_stack[stack_ptr].first, ray_stack[stack_ptr].second) > 0) { 106 | auto rv = std::make_tuple(&(*ray_stack[stack_ptr].first), &(*ray_stack[stack_ptr].second), bvh->nodes[node_stack[stack_ptr]].data_index); 107 | --stack_ptr; 108 | return rv; 109 | } else { 110 | --stack_ptr; 111 | } 112 | } else { 113 | const auto cbegin = bvh->nodes.cbegin() + node_stack[stack_ptr]; 114 | const auto cend = cbegin + bvh->nodes[node_stack[stack_ptr]].ts; 115 | 116 | SIMD::float4 near_hits; 117 | bool flip_set = false; 118 | bool flip = false; 119 | 120 | // Test rays against current node's children 121 | ray_stack[stack_ptr].second = mutable_partition(ray_stack[stack_ptr].first, ray_stack[stack_ptr].second, [&](Ray& ray) { 122 | if (!ray.is_done() && (first_call || ray.trav_stack.pop())) { 123 | // Get the time-interpolated bounding box 124 | const BBox2 b = lerp_seq(ray.time, cbegin, cend).bounds; 125 | 126 | // Ray test 127 | const auto hit_mask = b.intersect_ray(ray, &near_hits); 128 | 129 | if (hit_mask != 0) { 130 | if (!flip_set) { 131 | flip_set = true; 132 | flip = near_hits[0] > near_hits[1]; 133 | } 134 | 135 | if (flip) 136 | ray.trav_stack.push((hit_mask >> 1) | (hit_mask << 1), 2); 137 | else 138 | ray.trav_stack.push(hit_mask, 2); 139 | } 140 | 141 | return hit_mask != 0; 142 | } else { 143 | return false; 144 | } 145 | }); 146 | 147 | if (first_call) 148 | first_call = false; 149 | 150 | // If any rays hit, traverse deeper 151 | if (std::distance(ray_stack[stack_ptr].first, ray_stack[stack_ptr].second) > 0) { 152 | ray_stack[stack_ptr+1] = ray_stack[stack_ptr]; 153 | 154 | if (flip) { 155 | node_stack[stack_ptr+1] = bvh->child2(node_stack[stack_ptr]); 156 | node_stack[stack_ptr] = bvh->child1(node_stack[stack_ptr]); 157 | } else { 158 | node_stack[stack_ptr+1] = bvh->child1(node_stack[stack_ptr]); 159 | node_stack[stack_ptr] = bvh->child2(node_stack[stack_ptr]); 160 | } 161 | 162 | ++stack_ptr; 163 | } 164 | // If no rays hit, go to next stack item 165 | else { 166 | --stack_ptr; 167 | } 168 | } 169 | } 170 | 171 | // Finished traversal 172 | return std::make_tuple(rays_end, rays_end, 0); 173 | } -------------------------------------------------------------------------------- /accel/bvh2.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BVH2_HPP 2 | #define BVH2_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "numtype.h" 12 | #include "global.hpp" 13 | 14 | #include "accel.hpp" 15 | #include "bvh.hpp" 16 | #include "object.hpp" 17 | #include "ray.hpp" 18 | #include "bbox.hpp" 19 | #include "utils.hpp" 20 | #include "vector.hpp" 21 | 22 | 23 | 24 | 25 | /* 26 | * A bounding volume hierarchy. 27 | */ 28 | class BVH2: public Accel { 29 | public: 30 | virtual void build(const Assembly& assembly); 31 | virtual const std::vector& bounds() const { 32 | return _bounds; 33 | }; 34 | virtual ~BVH2() {}; 35 | 36 | // Traversers need access to private data 37 | friend class BVH2StreamTraverser; 38 | 39 | struct alignas(16) Node { 40 | union { 41 | // If the node is a leaf, we don't need the bounds. 42 | // If the node is not a leaf, it doesn't have Primitive data. 43 | BBox2 bounds {BBox(), BBox()}; 44 | size_t data_index; 45 | }; 46 | size_t child_index = 0; // When zero, indicates that this is a leaf node 47 | uint32_t ts = 0; // Number of time samples. 48 | 49 | Node() {} 50 | 51 | Node(const Node& n): child_index {n.child_index}, ts {n.ts} { 52 | bounds = n.bounds; 53 | } 54 | 55 | // Operators to allow node bounds to be interpolated conveniently 56 | Node operator+(const Node& b) const { 57 | Node n; 58 | n.bounds = bounds + b.bounds; 59 | return n; 60 | } 61 | 62 | Node operator*(float f) const { 63 | Node n; 64 | n.bounds = bounds * f; 65 | return n; 66 | } 67 | }; 68 | 69 | private: 70 | std::vector nodes; 71 | std::vector _bounds {BBox()}; 72 | 73 | enum { 74 | IS_RIGHT = 1 << 1 75 | }; 76 | 77 | /** 78 | * @brief Returns the index of the first child 79 | * of the node with the given index. 80 | */ 81 | inline size_t child1(const size_t node_i) const { 82 | return node_i + nodes[node_i].ts; 83 | } 84 | 85 | /** 86 | * @brief Returns the index of the second child 87 | * of the node with the given index. 88 | */ 89 | inline size_t child2(const size_t node_i) const { 90 | return nodes[node_i].child_index; 91 | } 92 | 93 | /** 94 | * @brief Returns the number of time samples 95 | * of the node with the given index. 96 | */ 97 | inline uint32_t time_samples(const size_t node_i) const { 98 | return nodes[node_i].ts; 99 | } 100 | 101 | inline bool is_leaf(const size_t node_i) const { 102 | return nodes[node_i].child_index == 0; 103 | } 104 | }; 105 | 106 | 107 | 108 | /** 109 | * @brief A breadth-first traverser for BVH2. 110 | */ 111 | class BVH2StreamTraverser: public AccelStreamTraverser { 112 | public: 113 | virtual ~BVH2StreamTraverser() {} 114 | 115 | virtual void init_accel(const BVH2& accel) { 116 | bvh = &accel; 117 | } 118 | 119 | virtual void init_rays(Ray* begin, Ray* end) { 120 | rays = begin; 121 | rays_end = end; 122 | first_call = true; 123 | 124 | // Initialize stack 125 | if (bvh == nullptr || bvh->nodes.size() == 0) { 126 | stack_ptr = -1; 127 | } else { 128 | stack_ptr = 0; 129 | } 130 | node_stack[0] = 0; 131 | ray_stack[0].first = rays; 132 | ray_stack[0].second = rays_end; 133 | } 134 | 135 | virtual std::tuple next_object(); 136 | 137 | private: 138 | const BVH2* bvh = nullptr; 139 | Ray* rays = nullptr; 140 | Ray* rays_end = nullptr; 141 | bool first_call = true; 142 | 143 | // Stack data 144 | #define BVH2_STACK_SIZE 64 145 | int stack_ptr; 146 | size_t node_stack[BVH2_STACK_SIZE]; 147 | std::pair ray_stack[BVH2_STACK_SIZE]; 148 | 149 | }; 150 | 151 | #endif // BVH2_HPP -------------------------------------------------------------------------------- /accel/bvh4.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BVH4_HPP 2 | #define BVH4_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "numtype.h" 12 | #include "global.hpp" 13 | 14 | #include "accel.hpp" 15 | #include "bvh.hpp" 16 | #include "object.hpp" 17 | #include "ray.hpp" 18 | #include "bbox.hpp" 19 | #include "utils.hpp" 20 | #include "vector.hpp" 21 | 22 | 23 | 24 | 25 | /* 26 | * A bounding volume hierarchy. 27 | */ 28 | class BVH4: public Accel { 29 | public: 30 | virtual void build(const Assembly& assembly); 31 | virtual const std::vector& bounds() const { 32 | return _bounds; 33 | }; 34 | virtual ~BVH4() {}; 35 | 36 | // Traversers need access to private data 37 | friend class BVH4StreamTraverser; 38 | 39 | struct alignas(16) Node { 40 | union { 41 | // If the node is a leaf, we don't need the bounds. 42 | // If the node is not a leaf, it doesn't have Primitive data. 43 | BBox4 bounds {BBox(), BBox(), BBox(), BBox()}; 44 | size_t data_index; 45 | }; 46 | size_t child_indices[3] = {0,0,0}; // Indices of children 2, 3, and 4. (Child 1's index is implicit.) 47 | // When first element is 0, indicates that this is a leaf node, 48 | // because a non-leaf node needs at least two children. When the 49 | // second and/or third elements are zero, indicates there is no 50 | // third or fourth child, respectively. 51 | uint32_t ts = 0; // Number of time samples. 52 | 53 | Node() {} 54 | 55 | // Node(const Node& n): child_indices {n.child_indices}, ts {n.ts} { 56 | // bounds = n.bounds; 57 | // } 58 | 59 | // Operators to allow node bounds to be interpolated conveniently 60 | Node operator+(const Node& b) const { 61 | Node n; 62 | n.bounds = bounds + b.bounds; 63 | return n; 64 | } 65 | 66 | Node operator*(float f) const { 67 | Node n; 68 | n.bounds = bounds * f; 69 | return n; 70 | } 71 | }; 72 | 73 | private: 74 | std::vector nodes; 75 | std::vector _bounds {BBox()}; 76 | 77 | enum { 78 | IS_SKIP = 1 << 8, 79 | IS_2ND = 1 << 9, 80 | IS_3RD = 1 << 10, 81 | IS_4TH = 1 << 11 82 | }; 83 | 84 | /** 85 | * @brief Returns the index of the nth (0-3) child 86 | * of the node with the given index. 87 | */ 88 | inline size_t child(const size_t node_i, const int n) const { 89 | if (n == 0) 90 | return node_i + nodes[node_i].ts; 91 | else 92 | return nodes[node_i].child_indices[n-1]; 93 | } 94 | 95 | /** 96 | * @brief Returns the number of time samples 97 | * of the node with the given index. 98 | */ 99 | inline uint32_t time_samples(const size_t node_i) const { 100 | return nodes[node_i].ts; 101 | } 102 | 103 | /** 104 | * @brief Returns whether the node with the given index is a 105 | * leaf node or not. 106 | */ 107 | inline bool is_leaf(const size_t node_i) const { 108 | return (nodes[node_i].child_indices[0] == 0); 109 | } 110 | 111 | inline int child_count(const size_t node_i) const { 112 | if (nodes[node_i].child_indices[1] == 0) { 113 | return 2; 114 | } else if (nodes[node_i].child_indices[2] == 0) { 115 | return 3; 116 | } else { 117 | return 4; 118 | } 119 | } 120 | }; 121 | 122 | 123 | 124 | 125 | /** 126 | * @brief A breadth-first traverser for BVH4. 127 | */ 128 | class BVH4StreamTraverser: public AccelStreamTraverser { 129 | public: 130 | virtual ~BVH4StreamTraverser() {} 131 | 132 | virtual void init_accel(const BVH4& accel) { 133 | bvh = &accel; 134 | } 135 | 136 | virtual void init_rays(Ray* begin, Ray* end) { 137 | rays = begin; 138 | rays_end = end; 139 | first_call = true; 140 | 141 | // Initialize stack 142 | if (bvh == nullptr || bvh->nodes.size() == 0) { 143 | stack_ptr = -1; 144 | } else { 145 | stack_ptr = 0; 146 | } 147 | node_stack[0] = 0; 148 | ray_stack[0].first = rays; 149 | ray_stack[0].second = rays_end; 150 | } 151 | 152 | virtual std::tuple next_object(); 153 | 154 | private: 155 | const BVH4* bvh = nullptr; 156 | Ray* rays = nullptr; 157 | Ray* rays_end = nullptr; 158 | bool first_call = true; 159 | 160 | // Stack data 161 | #define BVH4_STACK_SIZE 64 162 | int stack_ptr; 163 | size_t node_stack[BVH4_STACK_SIZE]; 164 | std::pair ray_stack[BVH4_STACK_SIZE]; 165 | 166 | }; 167 | 168 | 169 | #endif // BVH4_HPP -------------------------------------------------------------------------------- /accel/light_accel.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LIGHT_ACCEL_HPP 2 | #define LIGHT_ACCEL_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "numtype.h" 9 | #include "instance_id.hpp" 10 | #include "ray.hpp" 11 | #include "light.hpp" 12 | #include "transform.hpp" 13 | #include "color.hpp" 14 | 15 | 16 | // Forward declaration of Assembly from scene/assembly.hpp 17 | class Assembly; 18 | 19 | 20 | /** 21 | * Data structure used to query for a light sample. 22 | */ 23 | struct LightQuery { 24 | // In 25 | float n, u, v, w; 26 | Vec3 pos; 27 | Vec3 nor; 28 | Vec3 d; // Direction of the known ray 29 | SurfaceClosure* bsdf; 30 | float wavelength; 31 | float time; 32 | 33 | // Intermediate 34 | Transform xform; 35 | 36 | // Out 37 | InstanceID id; 38 | Vec3 to_light; 39 | SpectralSample spec_samp; 40 | float selection_pdf; // The pdf of selecting the given light 41 | float light_sample_pdf; // The pdf of the sample taken on the selected light 42 | }; 43 | 44 | 45 | 46 | /** 47 | * @brief An acceleration structure for sampling a collection of light sources. 48 | */ 49 | class LightAccel { 50 | public: 51 | virtual ~LightAccel() {} 52 | 53 | virtual void build(const Assembly& assembly) = 0; 54 | 55 | virtual void sample(LightQuery* query) const = 0; 56 | 57 | virtual const std::vector& bounds() const = 0; 58 | 59 | virtual size_t light_count() const = 0; 60 | 61 | virtual Color total_emitted_color() const = 0; 62 | }; 63 | 64 | 65 | #endif // LIGHT_ACCEL_HPP 66 | -------------------------------------------------------------------------------- /accel/light_array.cpp: -------------------------------------------------------------------------------- 1 | #include "light_array.hpp" 2 | 3 | #include "assembly.hpp" 4 | 5 | void LightArray::build(const Assembly& assembly_) { 6 | assembly = &assembly_; 7 | 8 | for (size_t i = 0; i < assembly->instances.size(); ++i) { 9 | const auto& instance = assembly->instances[i]; // Shorthand 10 | 11 | // If it's an object 12 | if (instance.type == Instance::OBJECT) { 13 | if (assembly->objects[instance.data_index]->get_type() == Object::LIGHT) { 14 | light_indices.push_back(i); 15 | const Light* light = dynamic_cast(assembly->objects[instance.data_index].get()); 16 | total_color += light->total_emitted_color(); 17 | } 18 | } 19 | // If it's an assembly 20 | else if (instance.type == Instance::ASSEMBLY) { 21 | const auto count = assembly->assemblies[instance.data_index]->light_accel.light_count(); 22 | if (count > 0) { 23 | assembly_lights.emplace_back(total_assembly_lights, count, i); 24 | total_assembly_lights += count; 25 | const Assembly* child_assembly = dynamic_cast(assembly->assemblies[instance.data_index].get()); 26 | total_color += child_assembly->light_accel.total_emitted_color(); 27 | } 28 | } 29 | 30 | // Merge bounds 31 | auto instance_bounds = assembly->instance_bounds(i); 32 | for (const auto& bbox: instance_bounds) 33 | bounds_[0].merge_with(bbox); 34 | } 35 | } 36 | 37 | 38 | 39 | void LightArray::sample(LightQuery* query) const { 40 | // Handle empty light accel 41 | if (light_indices.size() == 0 && assembly_lights.size() == 0) { 42 | query->spec_samp = SpectralSample(query->spec_samp.hero_wavelength, 0.0f); 43 | return; 44 | } 45 | 46 | const float local_prob = static_cast(light_indices.size()) / (total_assembly_lights + light_indices.size()); 47 | const float child_prob = 1.0f - local_prob; 48 | 49 | // If we're sampling a light in this assembly 50 | if (query->n <= local_prob) { 51 | // Update probabilities 52 | query->n /= local_prob; 53 | 54 | // Get light instance 55 | const auto index = light_indices[static_cast(query->n * light_indices.size()) % light_indices.size()]; 56 | const Instance& instance = assembly->instances[index]; // Shorthand 57 | 58 | // Get light data 59 | Light* light = dynamic_cast(assembly->objects[instance.data_index].get()); 60 | 61 | /// Get transforms if any 62 | if (instance.transform_count > 0) { 63 | auto cbegin = assembly->xforms.cbegin() + instance.transform_index; 64 | auto cend = cbegin + instance.transform_count; 65 | auto instance_xform = lerp_seq(query->time, cbegin, cend); 66 | query->pos = instance_xform.pos_to(query->pos); 67 | query->nor = instance_xform.nor_to(query->nor).normalized(); 68 | query->xform *= instance_xform; 69 | } 70 | 71 | // Sample the light 72 | float p; 73 | query->spec_samp = light->sample(query->pos, query->u, query->v, query->wavelength, query->time, &(query->to_light), &p); 74 | query->to_light = query->xform.dir_from(query->to_light); 75 | query->light_sample_pdf = p; 76 | 77 | // FIll in the light's instance ID 78 | query->id.push_back(index, assembly->element_id_bits()); 79 | } 80 | // If we're sampling a light in a child assembly 81 | else { 82 | // Update probabilities 83 | query->n = (query->n - local_prob) / child_prob; 84 | 85 | // Select assembly 86 | // TODO: a binary search would be faster 87 | size_t index = 0; 88 | const size_t target_index = static_cast(total_assembly_lights * query->n) % total_assembly_lights; 89 | for (const auto& al: assembly_lights) { 90 | if (std::get<0>(al) <= target_index && target_index < (std::get<0>(al) + std::get<1>(al))) { 91 | index = std::get<2>(al); 92 | break; 93 | } 94 | } 95 | 96 | // Get assembly instance shorthand 97 | const Instance& instance = assembly->instances[index]; 98 | 99 | // Get assembly 100 | Assembly* child_assembly = assembly->assemblies[instance.data_index].get(); 101 | 102 | // Get transforms if any 103 | if (instance.transform_count > 0) { 104 | auto cbegin = assembly->xforms.cbegin() + instance.transform_index; 105 | auto cend = cbegin + instance.transform_count; 106 | auto instance_xform = lerp_seq(query->time, cbegin, cend); 107 | query->pos = instance_xform.pos_to(query->pos); 108 | query->xform *= instance_xform; 109 | } 110 | 111 | // Push the assembly's instance ID 112 | query->id.push_back(index, assembly->element_id_bits()); 113 | 114 | // Traverse into child assembly 115 | child_assembly->light_accel.sample(query); 116 | } 117 | 118 | // Selection PDF is just one, since all lights have equal probability of 119 | // being selected. 120 | query->selection_pdf = 1.0f; 121 | } 122 | -------------------------------------------------------------------------------- /accel/light_array.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LIGHT_ARRAY_HPP 2 | #define LIGHT_ARRAY_HPP 3 | 4 | #include "light_accel.hpp" 5 | 6 | class LightArray final: public LightAccel { 7 | const Assembly* assembly; 8 | std::vector light_indices; 9 | std::vector> assembly_lights; // 1: accumulated total lights, 2: number of light, 3: assembly instance index 10 | size_t total_assembly_lights; 11 | Color total_color; 12 | std::vector bounds_ {BBox()}; 13 | 14 | public: 15 | ~LightArray() {} 16 | 17 | virtual void build(const Assembly& assembly); 18 | 19 | virtual void sample(LightQuery* query) const; 20 | 21 | virtual const std::vector& bounds() const { 22 | return bounds_; 23 | } 24 | 25 | virtual size_t light_count() const { 26 | return total_assembly_lights + light_indices.size(); 27 | } 28 | 29 | virtual Color total_emitted_color() const { 30 | return total_color; 31 | } 32 | }; 33 | 34 | #endif // LIGHT_ARRAY_HPP 35 | -------------------------------------------------------------------------------- /accel/light_tree.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LIGHT_TREE_HPP 2 | #define LIGHT_TREE_HPP 3 | 4 | #include "light_accel.hpp" 5 | #include 6 | 7 | class LightTree final: public LightAccel { 8 | struct BuildNode { 9 | size_t instance_index; 10 | Vec3 center; 11 | BBox bbox; 12 | float energy; 13 | }; 14 | 15 | struct Node { 16 | std::vector bounds; 17 | float energy; 18 | 19 | size_t index1; 20 | size_t index2; 21 | 22 | bool is_leaf; 23 | size_t instance_index; 24 | }; 25 | 26 | const Assembly* assembly; 27 | std::vector build_nodes; 28 | std::vector nodes; 29 | std::vector bounds_; 30 | float total_energy {0.0f}; 31 | size_t total_lights {0}; 32 | 33 | std::vector::iterator split_lights(std::vector::iterator start, std::vector::iterator end); 34 | size_t recursive_build(std::vector::iterator start, std::vector::iterator end); 35 | 36 | float node_prob(const LightQuery& lq, uint32_t index) const; 37 | 38 | 39 | public: 40 | ~LightTree() {} 41 | 42 | virtual void build(const Assembly& assembly) override; 43 | 44 | virtual void sample(LightQuery* query) const override; 45 | 46 | 47 | virtual const std::vector& bounds() const override { 48 | return bounds_; 49 | } 50 | 51 | 52 | // TODO 53 | virtual size_t light_count() const override { 54 | return total_lights; 55 | } 56 | 57 | 58 | virtual Color total_emitted_color() const override { 59 | return Color(total_energy); 60 | } 61 | }; 62 | 63 | #endif // LIGHT_TREE_HPP 64 | -------------------------------------------------------------------------------- /basics/bbox_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | #include "vector.hpp" 7 | #include "ray.hpp" 8 | #include "bbox.hpp" 9 | #include "utils.hpp" 10 | 11 | 12 | /* 13 | ************************************************************************ 14 | * Testing suite for BBox. 15 | ************************************************************************ 16 | */ 17 | 18 | TEST_CASE("bbox") { 19 | // Test for the first constructor 20 | SECTION("constructor_1") { 21 | BBox bb; 22 | 23 | REQUIRE(bb.min == Vec3(std::numeric_limits::infinity(), std::numeric_limits::infinity(), std::numeric_limits::infinity())); 24 | REQUIRE(bb.max == Vec3(-std::numeric_limits::infinity(), -std::numeric_limits::infinity(), -std::numeric_limits::infinity())); 25 | } 26 | 27 | // Test for the second constructor 28 | SECTION("constructor_2") { 29 | BBox bb(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0)); 30 | 31 | REQUIRE(bb.min == Vec3(1.0, -2.5, 0.5)); 32 | REQUIRE(bb.max == Vec3(8.0, 7.25, 2.0)); 33 | } 34 | 35 | 36 | // Test for the add operator 37 | SECTION("add") { 38 | BBox bb1(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0)); 39 | BBox bb2(Vec3(-1.0, -1.5, -2.0), Vec3(8.0, 4.75, -1.0)); 40 | 41 | BBox bb = bb1 + bb2; 42 | 43 | REQUIRE(bb.min == Vec3(0.0, -4.0, -1.5)); 44 | REQUIRE(bb.max == Vec3(16.0, 12.0, 1.0)); 45 | } 46 | 47 | 48 | // Test for the subtract operator 49 | SECTION("subtract") { 50 | BBox bb1(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0)); 51 | BBox bb2(Vec3(-1.0, -1.5, -2.0), Vec3(8.0, 4.75, -1.0)); 52 | 53 | BBox bb = bb1 - bb2; 54 | 55 | REQUIRE(bb.min == Vec3(2.0, -1.0, 2.5)); 56 | REQUIRE(bb.max == Vec3(0.0, 2.5, 3.0)); 57 | } 58 | 59 | 60 | // Test for the multiply operator 61 | SECTION("multiply") { 62 | BBox bb1(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0)); 63 | 64 | BBox bb = bb1 * -2.0; 65 | 66 | REQUIRE(bb.min == Vec3(-2.0, 5.0, -1.0)); 67 | REQUIRE(bb.max == Vec3(-16.0, -14.5, -4.0)); 68 | } 69 | 70 | 71 | // Test for the divide operator 72 | SECTION("divide") { 73 | BBox bb1(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0)); 74 | 75 | BBox bb = bb1 / -2.0; 76 | 77 | REQUIRE(bb.min == Vec3(-0.5, 1.25, -0.25)); 78 | REQUIRE(bb.max == Vec3(-4.0, -3.625, -1.0)); 79 | } 80 | 81 | 82 | // Test for ::merge_with() 83 | SECTION("merge_with") { 84 | BBox bb1(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0)); 85 | BBox bb2(Vec3(-1.0, -1.5, -2.0), Vec3(8.0, 4.75, -1.0)); 86 | 87 | bb1.merge_with(bb2); 88 | 89 | REQUIRE(bb1.min == Vec3(-1.0, -2.5, -2.0)); 90 | REQUIRE(bb1.max == Vec3(8.0, 7.25, 2.0)); 91 | } 92 | 93 | 94 | // Test for ::surface_area() 95 | SECTION("surface_area") { 96 | BBox bb(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0)); 97 | 98 | REQUIRE(bb.surface_area() == 186.75); 99 | } 100 | 101 | 102 | // Tests for ::intersect_ray() 103 | SECTION("intersect_ray_1") { 104 | // Simple intersection 105 | Ray r(Vec3(0.125, -8.0, 0.25), Vec3(0.0, 1.0, 0.0)); 106 | r.finalize(); 107 | BBox bb(Vec3(-1.0, -2.5, -0.5), Vec3(8.0, 7.25, 2.0)); 108 | float hitt0=0.0, hitt1=0.0; 109 | bool hit=false; 110 | 111 | hit = bb.intersect_ray(r, &hitt0, &hitt1); 112 | 113 | REQUIRE(hit == true); 114 | REQUIRE(hitt0 == 5.5); 115 | REQUIRE(hitt1 >= 15.25); 116 | REQUIRE(hitt1 <= 15.25001); 117 | } 118 | 119 | 120 | SECTION("intersect_ray_2") { 121 | // Simple intersection with unnormalized ray 122 | Ray r(Vec3(0.125, -8.0, 0.25), Vec3(0.0, 2.0, 0.0)); 123 | r.update_accel(); 124 | BBox bb(Vec3(-1.0, -2.5, -0.5), Vec3(8.0, 7.25, 2.0)); 125 | float hitt0=0.0, hitt1=0.0; 126 | bool hit=false; 127 | 128 | hit = bb.intersect_ray(r, &hitt0, &hitt1); 129 | 130 | REQUIRE(hit == true); 131 | REQUIRE(hitt0 == 2.75); 132 | REQUIRE(hitt1 >= 7.625); 133 | REQUIRE(hitt1 <= (7.62501)); 134 | } 135 | 136 | SECTION("intersect_ray_3") { 137 | // Simple miss 138 | Ray r(Vec3(20.0, -8.0, 0.25), Vec3(0.0, 1.0, 0.0)); 139 | r.finalize(); 140 | BBox bb(Vec3(-1.0, -2.5, -0.5), Vec3(8.0, 7.25, 2.0)); 141 | 142 | REQUIRE(bb.intersect_ray(r) == false); 143 | } 144 | 145 | SECTION("intersect_ray_4") { 146 | // Intersection from ray that starts inside the bbox 147 | Ray r(Vec3(0.0, 0.0, 0.0), Vec3(0, 1.0, 0)); 148 | r.finalize(); 149 | BBox bb(Vec3(-1.0, -2.5, -0.5), Vec3(8.0, 7.25, 2.0)); 150 | float hitt0=0.0, hitt1=0.0; 151 | bool hit=false; 152 | 153 | hit = bb.intersect_ray(r, &hitt0, &hitt1); 154 | 155 | REQUIRE(hit == true); 156 | REQUIRE(hitt0 == 0.0); 157 | REQUIRE(hitt1 >= 7.25); 158 | REQUIRE(hitt1 <= 7.25001); 159 | } 160 | 161 | SECTION("intersect_ray_5") { 162 | // Intersection from ray that grazes the side of the bbox 163 | Ray r(Vec3(-1.0001, -8.0, 0.25), Vec3(0, 1.0, 0)); 164 | r.finalize(); 165 | BBox bb(Vec3(-1.0, -2.5, -0.5), Vec3(8.0, 7.25, 2.0)); 166 | 167 | REQUIRE(bb.intersect_ray(r) == false); 168 | } 169 | 170 | SECTION("intersect_ray_6") { 171 | // Intersection with collapsed BBox, should be true 172 | Ray r(Vec3(-4.0, 0.0, 0.0), Vec3(1.0, 0.0, 0.0)); 173 | r.finalize(); 174 | BBox bb(Vec3(1.0, -1.0, -1.0), Vec3(1.0, 1.0, 1.0)); 175 | 176 | float hitt0=0.0, hitt1=0.0; 177 | bool hit=false; 178 | 179 | hit = bb.intersect_ray(r, &hitt0, &hitt1); 180 | 181 | REQUIRE(hit == true); 182 | REQUIRE(hitt0 == 5.0); 183 | REQUIRE(hitt1 >= 5.0); 184 | REQUIRE(hitt1 <= 5.00001); 185 | 186 | } 187 | 188 | SECTION("intersect_ray_7") { 189 | // Intersection with collapsed BBox with ray at an angle, should be true 190 | Ray r(Vec3(-4.0, 0.0, 0.0), Vec3(0.5, 0.5, 0.5)); 191 | r.finalize(); 192 | BBox bb(Vec3(1.0, -20.0, -20.0), Vec3(1.0, 20.0, 20.0)); 193 | 194 | float hitt0=0.0, hitt1=0.0; 195 | bool hit=false; 196 | 197 | hit = bb.intersect_ray(r, &hitt0, &hitt1); 198 | 199 | REQUIRE(hit == true); 200 | REQUIRE(hitt0 == 10.0); 201 | REQUIRE(hitt1 >= 10.0); 202 | REQUIRE(hitt1 <= 10.00001); 203 | } 204 | } 205 | 206 | // TODO: - diagonal rays 207 | // - rays with different tmin/tmax value 208 | -------------------------------------------------------------------------------- /basics/camera.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CAMERA_HPP 2 | #define CAMERA_HPP 3 | 4 | #include "numtype.h" 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | #include "config.hpp" 11 | #include "utils.hpp" 12 | #include "monte_carlo.hpp" 13 | #include "vector.hpp" 14 | #include "matrix.hpp" 15 | #include "transform.hpp" 16 | #include "ray.hpp" 17 | 18 | /* 19 | * A virtual camera. 20 | */ 21 | class Camera { 22 | public: 23 | std::vector transforms; 24 | std::vector fovs; 25 | std::vector tfovs; 26 | std::vector aperture_radii; 27 | std::vector focus_distances; 28 | 29 | Camera(std::vector &transforms_, std::vector &fovs_, std::vector &aperture_radii_, std::vector &focus_distances_) { 30 | transforms = transforms_; 31 | fovs = fovs_; 32 | aperture_radii = aperture_radii_; 33 | focus_distances = focus_distances_; 34 | 35 | // Make sure we have needed values for everything 36 | if (transforms.size() == 0) 37 | std::cout << "WARNING: camera has no transform(s)!\n"; 38 | 39 | if (fovs.size() == 0) 40 | std::cout << "WARNING: camera has no fov(s)!\n"; 41 | 42 | if (aperture_radii.size() == 0 || focus_distances.size() == 0) { 43 | aperture_radii = {0.0f}; 44 | focus_distances = {1.0f}; 45 | 46 | if (aperture_radii.size() == 0 && focus_distances.size() != 0) 47 | std::cout << "WARNING: camera has aperture radius but no focus distance. Disabling focal blur.\n"; 48 | else if (aperture_radii.size() != 0 && focus_distances.size() == 0) 49 | std::cout << "WARNING: camera has focus distance but no aperture radius. Disabling focal blur.\n"; 50 | } 51 | 52 | // Convert angle fov into linear fov 53 | tfovs.clear(); 54 | for (auto&& i: fovs) 55 | tfovs.emplace_back(sin(i/2) / cos(i/2)); 56 | fovs.clear(); 57 | 58 | // Can't have focus distance of zero 59 | for (auto&& f: focus_distances) { 60 | if (f <= 0.0f) { 61 | std::cout << "WARNING: camera focal distance is zero or less. Disabling focal blur.\n"; 62 | aperture_radii = {0.0f}; 63 | focus_distances = {1.0f}; 64 | break; 65 | } 66 | } 67 | } 68 | 69 | /* 70 | * Generates a camera ray based on the given information. 71 | */ 72 | WorldRay generate_ray(float x, float y, float dx, float dy, float time, float u, float v) const { 73 | WorldRay wray; 74 | 75 | wray.type = WorldRay::CAMERA; 76 | wray.time = time; 77 | 78 | // Get time-interpolated camera settings 79 | const Transform transform = lerp_seq(time, transforms); 80 | const float tfov = lerp_seq(time, tfovs); 81 | const float aperture_radius = lerp_seq(time, aperture_radii); 82 | const float focus_distance = lerp_seq(time, focus_distances); 83 | 84 | // Ray origin 85 | wray.o.x = aperture_radius * ((u * 2) - 1); 86 | wray.o.y = aperture_radius * ((v * 2) - 1); 87 | wray.o.z = 0.0; 88 | square_to_circle(&wray.o.x, &wray.o.y); 89 | 90 | // Ray direction 91 | wray.d.x = (x * tfov) - (wray.o.x / focus_distance); 92 | wray.d.y = (y * tfov) - (wray.o.y / focus_distance); 93 | wray.d.z = 1.0; 94 | wray.d.normalize(); 95 | 96 | // Ray image plane differentials 97 | wray.odx = Vec3(0.0f, 0.0f, 0.0f); 98 | wray.ody = Vec3(0.0f, 0.0f, 0.0f); 99 | wray.ddx = Vec3(dx*tfov, 0.0f, 0.0f); 100 | wray.ddy = Vec3(0.0f, dy*tfov, 0.0f); 101 | 102 | // Transform the ray 103 | return wray.transformed(transform); 104 | } 105 | }; 106 | 107 | #endif 108 | -------------------------------------------------------------------------------- /basics/differential_geometry.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DIFFERENTIAL_GEOMETRY_HPP 2 | #define DIFFERENTIAL_GEOMETRY_HPP 3 | 4 | #include "numtype.h" 5 | #include "vector.hpp" 6 | #include "transform.hpp" 7 | 8 | struct DifferentialGeometry { 9 | float u, v; 10 | 11 | // Point position 12 | Vec3 p; 13 | Vec3 dpdu, dpdv; 14 | 15 | // Surface normal 16 | Vec3 n; 17 | Vec3 dndu, dndv; 18 | 19 | 20 | DifferentialGeometry transformed_from(const Transform& xform) const { 21 | DifferentialGeometry geo; 22 | geo.u = u; 23 | geo.v = v; 24 | 25 | geo.p = xform.pos_from(p); 26 | geo.dpdu = xform.dir_from(dpdu); 27 | geo.dpdv = xform.dir_from(dpdv); 28 | 29 | // TODO: figure out how to transform surface normal differentials 30 | // properly 31 | geo.n = xform.nor_from(n); 32 | geo.dndu = xform.nor_from(dndu); 33 | geo.dndv = xform.nor_from(dndv); 34 | const float il = 1.0f / geo.n.length(); 35 | geo.n *= il; 36 | geo.dndu *= il; 37 | geo.dndv *= il; 38 | 39 | return geo; 40 | } 41 | 42 | 43 | DifferentialGeometry transformed_to(const Transform& xform) const { 44 | DifferentialGeometry geo; 45 | geo.u = u; 46 | geo.v = v; 47 | 48 | geo.p = xform.pos_to(p); 49 | geo.dpdu = xform.dir_to(dpdu); 50 | geo.dpdv = xform.dir_to(dpdv); 51 | 52 | // TODO: figure out how to transform surface normal differentials 53 | // properly 54 | geo.n = xform.nor_to(n); 55 | geo.dndu = xform.nor_to(dndu); 56 | geo.dndv = xform.nor_to(dndv); 57 | const float il = 1.0f / geo.n.length(); 58 | geo.n *= il; 59 | geo.dndu *= il; 60 | geo.dndv *= il; 61 | 62 | return geo; 63 | } 64 | 65 | 66 | void flip_normal() { 67 | n *= -1.0f; 68 | dndu *= -1.0f; 69 | dndv *= -1.0f; 70 | } 71 | }; 72 | 73 | /* 74 | * Transfers a ray differential onto a surface intersection. 75 | * This assumes that both normal and d are normalized. 76 | * 77 | * t is the distance along the primary ray to the intersection 78 | * normal is the surface normal at the intersection 79 | * d is the primary ray's direction 80 | * od is the ray origin differential 81 | * dd is the ray direction differential 82 | * 83 | * Returns the origin differential transfered onto the surface intersection. 84 | */ 85 | static inline Vec3 transfer_ray_origin_differential(const float t, const Vec3 normal, const Vec3 d, 86 | const Vec3 od, const Vec3 dd) { 87 | const Vec3 temp = od + (dd * t); 88 | const float td = -dot(temp, normal) / dot(d, normal); 89 | 90 | const Vec3 real_projected = temp + (d * td); 91 | 92 | // Scaled to the non-projected ray footprint at the hit point. 93 | // This is important because otherwise the ray footprint ends up 94 | // being larger than the dicing rate, and the next bounce ray often 95 | // ends up with false self-intersections, especially for incoming 96 | // rays with grazing angles. 97 | return real_projected.normalized() * temp.length(); 98 | } 99 | 100 | 101 | /* 102 | * Reflects a ray differential off of a surface intersection as a 103 | * perfect mirror. 104 | * This assumes that 'normal' is normalized. 105 | * 106 | * normal is the surface normal at the intersection 107 | * normal_d is the surface normal differential for the intersection 108 | * d is the primary ray's direction 109 | * dd is the ray direction differential 110 | * 111 | * Returns the direction differential reflected off the surface. 112 | */ 113 | static inline Vec3 reflect_ray_direction_differential(const Vec3 normal, const Vec3 normal_d, const Vec3 d, const Vec3 dd) { 114 | const auto ddn = dot(dd, normal) + dot(d, normal_d); 115 | const auto tmp = (normal_d * dot(d, normal)) + (normal * ddn); 116 | return dd - (tmp * 2.0f); 117 | } 118 | 119 | 120 | /** 121 | * Clamps the direction differentials of a ray to not have slopes 122 | * exceeding 1.0. This is important to prevent self-intersections with 123 | * micro-geometry. 124 | */ 125 | static inline void clamp_dd(WorldRay* ray) { 126 | const float len_d = ray->d.length(); 127 | const float len_dx = ray->ddx.length(); 128 | const float len_dy = ray->ddy.length(); 129 | 130 | if ((len_dx / len_d) > 0.9f) 131 | ray->ddx *= 0.9f * len_d / len_dx; 132 | 133 | if ((len_dy / len_d) > 0.9f) 134 | ray->ddy *= 0.9f * len_d / len_dy; 135 | } 136 | 137 | 138 | /** 139 | * Calculates the uv coordinate differentials at the given differential 140 | * hit point. 141 | * 142 | * TODO: apparently this is wrong. See pg. 508 of PBRT for a correct 143 | * implementation. 144 | */ 145 | static inline std::pair calc_uv_differentials(const Vec3 dp, const Vec3 dpdu, const Vec3 dpdv) { 146 | const float dpdu_ilen = 1.0f / dpdu.length(); 147 | const Vec3 dpdu_n = dpdu * dpdu_ilen; 148 | 149 | const float dpdv_ilen = 1.0f / dpdv.length(); 150 | const Vec3 dpdv_n = dpdv * dpdv_ilen; 151 | 152 | float dudp = dot(dp, dpdu_n) * dpdu_ilen; 153 | float dvdp = dot(dp, dpdv_n) * dpdv_ilen;; 154 | 155 | return std::make_pair(dudp, dvdp); 156 | } 157 | 158 | #endif // DIFFERENTIAL_GEOMETRY_HPP -------------------------------------------------------------------------------- /basics/instance_id.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INSTANCE_ID_HPP 2 | #define INSTANCE_ID_HPP 3 | 4 | #include "numtype.h" 5 | #include 6 | 7 | static constexpr int MAX_ID_BITS = 64; 8 | 9 | struct InstanceID { 10 | uint64_t id; 11 | int pos = 0; 12 | 13 | void clear() { 14 | id = 0; 15 | pos = 0; 16 | } 17 | 18 | void push_back(uint64_t sub_id, int bit_length) { 19 | assert((pos + bit_length) <= MAX_ID_BITS); 20 | id <<= bit_length; 21 | id |= sub_id & ((1<= 0); 27 | const uint64_t value = id & ((1<>= bit_length; 29 | pos -= bit_length; 30 | return value; 31 | } 32 | 33 | uint64_t pop_front(int bit_length) { 34 | assert((pos - bit_length) >= 0); 35 | const int offset = pos - bit_length; 36 | const uint64_t value = (id & (((1<> offset; 37 | pos -= bit_length; 38 | return value; 39 | } 40 | }; 41 | 42 | #endif // INSTANCE_ID_HPP -------------------------------------------------------------------------------- /basics/instance_id_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | 3 | #include "instance_id.hpp" 4 | 5 | 6 | /* 7 | ************************************************************************ 8 | * Testing suite for InstanceID. 9 | ************************************************************************ 10 | */ 11 | 12 | TEST_CASE("InstanceID") { 13 | // Test for the first constructor 14 | SECTION("push/pop back") { 15 | InstanceID id; 16 | 17 | id.push_back(1, 1); 18 | id.push_back(3, 2); 19 | id.push_back(63, 10); 20 | id.push_back(7, 5); 21 | 22 | REQUIRE(id.pop_back(5) == 7); 23 | REQUIRE(id.pop_back(10) == 63); 24 | REQUIRE(id.pop_back(2) == 3); 25 | REQUIRE(id.pop_back(1) == 1); 26 | } 27 | 28 | // Test for the first constructor 29 | SECTION("push back, pop front") { 30 | InstanceID id; 31 | 32 | id.push_back(1, 1); 33 | id.push_back(3, 2); 34 | id.push_back(63, 10); 35 | id.push_back(7, 5); 36 | 37 | REQUIRE(id.pop_front(1) == 1); 38 | REQUIRE(id.pop_front(2) == 3); 39 | REQUIRE(id.pop_front(10) == 63); 40 | REQUIRE(id.pop_front(5) == 7); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /basics/intersection.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INTERSECTION_HPP 2 | #define INTERSECTION_HPP 3 | 4 | #include "numtype.h" 5 | 6 | #include 7 | #include 8 | 9 | #include "instance_id.hpp" 10 | #include "transform.hpp" 11 | #include "vector.hpp" 12 | #include "color.hpp" 13 | #include "surface_closure.hpp" 14 | #include "closure_union.hpp" 15 | #include "differential_geometry.hpp" 16 | 17 | #define DIFFERENTIAL_DOT_EPSILON 0.0000f 18 | 19 | /* 20 | * Contains the information from a ray intersection. 21 | */ 22 | struct Intersection { 23 | // Whether there's a hit or not 24 | bool hit {false}; 25 | 26 | // The GUID of the object instance that was hit 27 | InstanceID id; 28 | 29 | // Information about the intersection point 30 | float t {std::numeric_limits::infinity()}; // T-parameter along the ray at the intersection 31 | bool backfacing {false}; // Whether it hit the backface of the surface 32 | float light_pdf {9999.0f}; // Pdf of selecting this hit point and ray via light sampling 33 | 34 | // The space that the intersection took place in, relative to world space. 35 | Transform space; 36 | 37 | // Differential geometry at the hit point 38 | DifferentialGeometry geo; 39 | 40 | // Offset for subsequent spawned rays to avoid self-intersection 41 | // Should be added for reflection, subtracted for transmission 42 | Vec3 offset {0.0f, 0.0f, 0.0f}; 43 | 44 | // The surface closure at the intersection, along with the probability 45 | // of that closure having been selected amongst multuple possible 46 | // closures. 47 | SurfaceClosureUnion surface_closure; 48 | float closure_prob {1.0f}; 49 | }; 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /color/spectrum_grid.h: -------------------------------------------------------------------------------- 1 | /* 2 | * XYZ -> spectrum 3 | * From the paper "Physically Meaningful Rendering using Tristimulus Colours" 4 | * by Hanika et al. 5 | */ 6 | 7 | #ifndef SPECTRUM_GRID_H 8 | #define SPECTRUM_GRID_H 9 | 10 | #include "spectra_xyz_5nm_380_780_0.97.h" 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | /* 17 | * Evaluate the spectrum for xyz at the given wavelength. 18 | * 19 | * 20 | */ 21 | static inline float spectrum_xyz_to_p(const float lambda, const float *xyz) { 22 | assert(lambda >= spectrum_sample_min); 23 | assert(lambda <= spectrum_sample_max); 24 | float xyY[3], uv[2]; 25 | 26 | const float norm = 1.0/(xyz[0] + xyz[1] + xyz[2]); 27 | if (!(norm < FLT_MAX)) { 28 | return 0.0f; 29 | } 30 | // convert to xy chromaticities 31 | xyY[0] = xyz[0] * norm; 32 | xyY[1] = xyz[1] * norm; 33 | xyY[2] = xyz[1]; 34 | 35 | // rotate to align with grid 36 | spectrum_xy_to_uv(xyY, uv); 37 | 38 | if (uv[0] < 0.0f || uv[0] >= spectrum_grid_width || 39 | uv[1] < 0.0f || uv[1] >= spectrum_grid_height) { 40 | return 0.f; 41 | } 42 | 43 | int uvi[2] = {(int)uv[0], (int)uv[1]}; 44 | assert(uvi[0] < spectrum_grid_width); 45 | assert(uvi[1] < spectrum_grid_height); 46 | 47 | const int cell_idx = uvi[0] + spectrum_grid_width * uvi[1]; 48 | assert(cell_idx < spectrum_grid_width*spectrum_grid_height); 49 | assert(cell_idx >= 0); 50 | 51 | const spectrum_grid_cell_t* cell = spectrum_grid + cell_idx; 52 | const int inside = cell->inside; 53 | const int *idx = cell->idx; 54 | const int num = cell->num_points; 55 | 56 | // get linearly interpolated spectral power for the corner vertices: 57 | float p[num]; 58 | // this clamping is only necessary if lambda is not sure to be >= spectrum_sample_min and <= spectrum_sample_max: 59 | const float sb = //fminf(spectrum_num_samples-1e-4, fmaxf(0.0f, 60 | (lambda - spectrum_sample_min)/(spectrum_sample_max-spectrum_sample_min) * (spectrum_num_samples-1);//)); 61 | assert(sb >= 0.f); 62 | assert(sb <= spectrum_num_samples); 63 | 64 | const int sb0 = (int)sb; 65 | const int sb1 = sb+1 < spectrum_num_samples ? sb+1 : spectrum_num_samples-1; 66 | const float sbf = sb - sb0; 67 | for (int i=0; i= 0); 69 | assert(sb0 < spectrum_num_samples); 70 | assert(sb1 < spectrum_num_samples); 71 | const float* spectrum = spectrum_data_points[idx[i]].spectrum; 72 | p[i] = spectrum[sb0] * (1.0f-sbf) + spectrum[sb1] * sbf; 73 | } 74 | 75 | float interpolated_p = 0.0f; 76 | 77 | if (inside) { 78 | // fast path for normal inner quads: 79 | uv[0] -= uvi[0]; 80 | uv[1] -= uvi[1]; 81 | 82 | assert(uv[0] >= 0 && uv[0] <= 1.f); 83 | assert(uv[1] >= 0 && uv[1] <= 1.f); 84 | 85 | // the layout of the vertices in the quad is: 86 | // 2 3 87 | // 0 1 88 | interpolated_p = 89 | p[0] * (1.0f-uv[0]) * (1.0f-uv[1]) + p[2] * (1.0f-uv[0]) * uv[1] + 90 | p[3] * uv[0] * uv[1] + p[1] * uv[0] * (1.0f-uv[1]); 91 | } else { 92 | // need to go through triangulation :( 93 | // we get the indices in such an order that they form a triangle fan around idx[0]. 94 | // compute barycentric coordinates of our xy* point for all triangles in the fan: 95 | const float ex = uv[0] - spectrum_data_points[idx[0]].uv[0]; 96 | const float ey = uv[1] - spectrum_data_points[idx[0]].uv[1]; 97 | float e0x = spectrum_data_points[idx[1]].uv[0] - spectrum_data_points[idx[0]].uv[0]; 98 | float e0y = spectrum_data_points[idx[1]].uv[1] - spectrum_data_points[idx[0]].uv[1]; 99 | float uu = e0x*ey - ex*e0y; 100 | for (int i=0; i} 12 | # 13 | # The contents of a non-leaf property is other properties, both 14 | # leaf and non-leaf. 15 | 16 | # Leaf properties follow the format: 17 | # TypeName [] 18 | # 19 | # Note the square brackets instead of curly braces. 20 | # The contents of a leaf property can be any utf8 text 21 | # that is properly escaped (see below). 22 | 23 | # Backslashes (\) are used for escaping characters inside property names and 24 | # leaf property contents. Escaping is not allowed anywhere else. 25 | # In these contexts, Any character immediately following a backslash is 26 | # interpreted literally and is stripped of any semantic meaning. This allows, 27 | # for example, closing square brackets and hash symbols (] and #) to be 28 | # included in leaf contents. It also allows white space, hashes, and opening 29 | # square and curly braces to be in property names. 30 | 31 | # A scene defines a single frame to be rendered 32 | Scene $yar_0001 { 33 | # The output section defines how the rendered image should be output to disk 34 | Output { 35 | Path ["/home/cessen/test/psychopath/render/yar_0001.png"] 36 | Format [png] 37 | ColorSpace [srgb] 38 | Dither [random 1.0] 39 | } 40 | 41 | # Render settings... fairly self explanatory 42 | RenderSettings { 43 | Resolution [1280 720] 44 | SamplesPerPixel [16] 45 | DicingRate [0.25] 46 | PixelAspect [1.0] 47 | Filter [gaussian 1.5] 48 | Seed [1] 49 | } 50 | 51 | # Each scene contains a single camera 52 | Camera { 53 | Fov [0.785398163] # In radians 54 | FocalDistance [23.1] 55 | ApertureRadius [0.1] 56 | 57 | # Multiple of the same property listed in the same context implies animation. 58 | # In this case, the camera has an animated transform. 59 | Transform [ 60 | 1 0 0 0 61 | 0 1 0 0 62 | 0 0 1 0 63 | 0 0 0 1 64 | ] 65 | 66 | Transform [ 67 | 1 0 0 0 68 | 0 0.9 0 0 69 | 0 0 0.8 0 70 | 0 0 0 1.3 71 | ] 72 | } 73 | 74 | # Each scene contains a single world description, which includes everything 75 | # of infinite extent. For example: background shader, distant lights, volumes 76 | # that occupy the entire world, etc. 77 | World { 78 | BackgroundShader { 79 | Type [Color] 80 | Color [0.8 0.8 0.8] 81 | } 82 | 83 | DistantDiskLight { 84 | Direction [1.0 0.5 0.5] 85 | Radius [0.00872664] # In radians 86 | Color [1.0 1.0 1.0] 87 | } 88 | } 89 | 90 | # Each scene contains a single root assembly. 91 | # All further scene description occurs within this assembly, 92 | # or in other files referenced by this assembly. 93 | Assembly { 94 | # Shaders, objects, sub-assemblies, and instances can be listed in any 95 | # order, as long as all data preceeds any references to it. 96 | SurfaceShader $grey_diffuse { 97 | Type [Lambert] 98 | Color [0.9 0.9 0.9] 99 | } 100 | 101 | SurfaceShader $mirror { 102 | Type [GTR] 103 | Color [0.9 0.9 0.9] 104 | Roughness [0.0] 105 | TailShape [2.0] 106 | Fresnel [0.25] 107 | } 108 | 109 | SphereLight $light.001 { 110 | Location [20 20 20] 111 | Radius [1.0] 112 | Color [1.0 1.0 1.0] 113 | } 114 | 115 | CatmullClarkSubdiv $subdiv_test { 116 | GeometryFile ["/home/cessen/thing.obj"] 117 | SurfaceShaderBind [$mirror] # Referencing the shader defined previously 118 | } 119 | 120 | # Assemblies can contain other assemblies 121 | Assembly $gruble { 122 | SurfaceShader $complex_shader { 123 | Type [OSL] 124 | FilePath ["cool_shader.osl"] 125 | } 126 | 127 | # Assembly namespaces are local, so $subdiv_test here does 128 | # not conflict with $subdiv_test in the parent assembly. 129 | CatmullClarkSubdiv $subdiv_test { 130 | GeometryFile ["/home/cessen/thing2.obj"] 131 | SurfaceShaderBind [$mirror] # Referencing the shader defined previously 132 | } 133 | 134 | Instance { 135 | Data [$subdiv_test] 136 | } 137 | } 138 | 139 | # The Objects and SubAssemblies don't directly manifest inside 140 | # this assembly. They must be instanced into it. An object or 141 | # sub-assembly can be instanced any number of times within the 142 | # assembly. 143 | Instance { 144 | Data [$light.001] 145 | Transform [ 146 | 1 0 0 0 147 | 0 1 0 0 148 | 0 0 1 0 149 | 0 0 0 1 150 | ] 151 | } 152 | 153 | Instance { 154 | Data [$subdiv_test] 155 | # Transforms are not necessary: an instance can have no transforms 156 | } 157 | 158 | Instance { 159 | Data [$subdiv_test] 160 | Transform [ 161 | 2 0 0 0 162 | 0 3 4 0 163 | 1 0 -1 0 164 | 0 0 0 1 165 | ] 166 | Transform [ 167 | 3 0 0 0 168 | 0 4 -2 1 169 | 1 0 -1 0 170 | 0 0 0 1 171 | ] 172 | } 173 | 174 | Instance { 175 | Data [$gruble] 176 | Transform [ 177 | 2 0 0 0 178 | 0 3 4 0 179 | 1 0 -1 0 180 | 0 0 0 1 181 | ] 182 | } 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /film/raster.hpp: -------------------------------------------------------------------------------- 1 | #ifndef RASTER_HPP 2 | #define RASTER_HPP 3 | 4 | #include "numtype.h" 5 | 6 | #include 7 | 8 | /** 9 | * A lightweight raster image buffer. 10 | * Includes a mapping to 2d coordinates. 11 | * Pixels are stored in left-to-right, top-to-bottom order, with all the 12 | * channels of a pixel stored next to each other. 13 | */ 14 | template 15 | class Raster { 16 | public: 17 | uint16_t width, height; // Resolution of the image 18 | float min_x, min_y; // Minimum x/y coordinates of the image 19 | float max_x, max_y; // Maximum x/y coordinates of the image 20 | uint16_t channels; // Channels per pixel 21 | PIXFMT *pixels; // Pixel data 22 | 23 | /** 24 | * @brief Constructor. 25 | * 26 | * Creates a new Raster buffer. All pixel data is initialized to zero. 27 | */ 28 | Raster(int w, int h, int cc, float x1, float y1, float x2, float y2) { 29 | width = w; 30 | height = h; 31 | min_x = x1 < x2 ? x1 : x2; 32 | min_y = y1 < y2 ? y1 : y2; 33 | max_x = x1 > x2 ? x1 : x2; 34 | max_y = y1 > y2 ? y1 : y2; 35 | 36 | channels = cc; 37 | pixels = new PIXFMT[w*h*cc]; 38 | for (int i=0; i < w*h*cc; i++) 39 | pixels[i] = 0; 40 | } 41 | 42 | ~Raster() { 43 | delete [] pixels; 44 | } 45 | 46 | /** 47 | * @brief Fetches a pointer to the requested pixel's data. 48 | */ 49 | PIXFMT *pixel(int x, int y) { 50 | assert(x >= 0 && x < width && y >= 0 && y < height); 51 | return &(pixels[(y*width + x)*channels]); 52 | } 53 | }; 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /format_code.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Auto-formats all C/C++ code with Artistic Style (http://astyle.sourceforge.net/) 3 | # to adhere to a consistent coding style. Should be run before committing. 4 | astyle --options=none --suffix=".orig~" -Q -A2 -tSHU -R "*.hpp" "*.cpp" "*.h" "*.c" 5 | 6 | -------------------------------------------------------------------------------- /global.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | #include 4 | #include "numtype.h" 5 | 6 | namespace Global { 7 | std::atomic next_object_uid {0}; 8 | 9 | namespace Stats { 10 | std::atomic rays_shot(0); 11 | std::atomic split_count(0); 12 | std::atomic object_ray_tests(0); 13 | std::atomic top_level_bvh_node_tests(0); 14 | 15 | std::atomic nan_count(0); 16 | std::atomic inf_count(0); 17 | } // Stats 18 | } // Global 19 | -------------------------------------------------------------------------------- /global.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PSYCHO_GLOBAL_HPP 2 | #define PSYCHO_GLOBAL_HPP 3 | 4 | #include 5 | #include "numtype.h" 6 | 7 | //#define GLOBAL_STATS_TOP_LEVEL_BVH_NODE_TESTS 8 | 9 | namespace Global { 10 | extern std::atomic next_object_uid; 11 | 12 | namespace Stats { 13 | extern std::atomic rays_shot; 14 | extern std::atomic split_count; 15 | extern std::atomic object_ray_tests; 16 | extern std::atomic top_level_bvh_node_tests; 17 | 18 | extern std::atomic nan_count; 19 | extern std::atomic inf_count; 20 | 21 | static void clear() { 22 | rays_shot = 0; 23 | split_count = 0; 24 | object_ray_tests = 0; 25 | top_level_bvh_node_tests = 0; 26 | 27 | nan_count = 0; 28 | inf_count = 0; 29 | } 30 | 31 | } // Stats 32 | } // Global 33 | 34 | #endif // PSYCHO_GLOBAL_HPP -------------------------------------------------------------------------------- /integrator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(integrator 2 | path_trace_integrator) 3 | -------------------------------------------------------------------------------- /integrator/integrator.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file defines the interface to Integrator classes, which decide where 3 | * to shoot rays and how to combine their results into a final image or images. 4 | */ 5 | #ifndef INTEGRATOR_HPP 6 | #define INTEGRATOR_HPP 7 | 8 | #include "numtype.h" 9 | 10 | #include "raster.hpp" 11 | #include "scene.hpp" 12 | #include "tracer.hpp" 13 | 14 | /** 15 | * @brief An integrator for the rendering equation. 16 | * 17 | * The Integrator's job is to solve the rendering equation, using the Tracer 18 | * for ray intersection testing and the shading system for shading. 19 | * 20 | * It can, for example, implement Whitted style ray tracing, or 21 | * bidirectional path tracing, or metroplis light transport, etc. 22 | * Although markov chain algorithms may play poorly with the Tracer, which is 23 | * designed to trace rays in bulk. 24 | */ 25 | class Integrator { 26 | public: 27 | virtual ~Integrator() {} 28 | 29 | /** 30 | * @brief Begins integration. 31 | */ 32 | virtual void integrate() = 0; 33 | 34 | }; 35 | 36 | #endif // INTEGRATOR_H 37 | 38 | -------------------------------------------------------------------------------- /integrator/path_trace_integrator.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file and integrator.cpp define a Integrator class, which decides where 3 | * to shoot rays and how to combine their results into a final image or images. 4 | */ 5 | #ifndef PATH_TRACE_INTEGRATOR_HPP 6 | #define PATH_TRACE_INTEGRATOR_HPP 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include "numtype.h" 15 | 16 | #include "spinlock.hpp" 17 | 18 | #include "integrator.hpp" 19 | #include "film.hpp" 20 | #include "image_sampler.hpp" 21 | #include "scene.hpp" 22 | #include "tracer.hpp" 23 | #include "color.hpp" 24 | 25 | #include "ring_buffer_concurrent.hpp" 26 | 27 | /** 28 | * @brief An integrator for the rendering equation. 29 | * 30 | * The Integrator's job is to solve the rendering equation, using the Tracer 31 | * for ray intersection testing and the shading system for shading. 32 | * 33 | * It will implement path tracing with next event estimation. But it 34 | * could instead, for example, implement Whitted style ray tracing, or 35 | * bidirectional path tracing, or metroplis light transport, etc. 36 | * Although markov chain algorithms may play poorly with the Tracer, which is 37 | * designed to trace rays in bulk. 38 | */ 39 | class PathTraceIntegrator final: Integrator { 40 | struct PixelBlock { 41 | int x, y; 42 | int w, h; 43 | }; 44 | 45 | /* 46 | * A path tracing path state. 47 | * Stores state of a path in progress. 48 | */ 49 | struct PTState { 50 | Sampler sampler; 51 | float time; 52 | int step = 0; 53 | short pix_x, pix_y; // Pixel coordinates of the path 54 | Intersection inter {}; 55 | WorldRay prev_ray {}; 56 | float wavelength; // The wavelength of light of the path (in nm) 57 | float last_pdf = 0.0f; 58 | SpectralSample col {0.0f, 0.0f}; // Color of the sample collected so far 59 | SpectralSample fcol {0.0f, 1.0f}; // Accumulated filter color from light path 60 | SpectralSample lcol {0.0f, 0.0f}; // Temporary storage for incoming light color 61 | 62 | bool done {false}; 63 | }; 64 | 65 | void init_path(PTState* pstate, Sampler s, short x, short y); 66 | WorldRay next_ray_for_path(const WorldRay& prev_ray, PTState* pstate); 67 | void update_path(PTState* pstate, const WorldRay& ray, const Intersection& inter); 68 | 69 | 70 | 71 | size_t total_items = 0; 72 | size_t completed_items = 0; 73 | SpinLock progress_lock; 74 | void print_progress() { 75 | std::cout << "\rRendering: " << std::fixed << std::setprecision(2) << (float(completed_items) / total_items) * 100 << "%" << std::flush; 76 | } 77 | 78 | 79 | 80 | public: 81 | Scene *scene; 82 | Film *image; 83 | std::mutex image_mut; 84 | int spp; 85 | int spp_max; 86 | float image_variance_max; 87 | uint seed; 88 | int path_length; 89 | int thread_count; 90 | std::function callback; 91 | 92 | RingBufferConcurrent blocks; // Queue for pending blocks of pixels to be rendered 93 | 94 | /** 95 | * @brief Constructor. 96 | * 97 | * @param[in] scene_ A pointer to the scene to render. Should be fully 98 | * finalized for rendering. 99 | * @param[out] image_ The image to render to. Should be already 100 | * initialized with 3 channels, for rgb. 101 | * @param spp_ The number of samples to take per pixel for integration. 102 | */ 103 | PathTraceIntegrator(Scene *scene_, Film *image_, int spp_, int spp_max_, float variance_max_, uint seed_, int thread_count_=1, std::function callback_ = std::function()) { 104 | scene = scene_; 105 | image = image_; 106 | spp = spp_; 107 | spp_max = spp_max_; 108 | image_variance_max = variance_max_; 109 | seed = seed_; 110 | thread_count = thread_count_; 111 | path_length = 4; 112 | callback = callback_; 113 | 114 | blocks.resize(thread_count_ * 2); 115 | } 116 | 117 | /** 118 | * @brief Begins integration. 119 | */ 120 | virtual void integrate() override; 121 | 122 | /** 123 | * Watches the block queue for blocks of pixels to render. 124 | */ 125 | void render_blocks(); 126 | }; 127 | 128 | #endif // PATH_TRACE_INTEGRATOR_H 129 | 130 | -------------------------------------------------------------------------------- /lights/light.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LIGHT_HPP 2 | #define LIGHT_HPP 3 | 4 | #include "object.hpp" 5 | #include "bbox.hpp" 6 | #include "vector.hpp" 7 | #include "color.hpp" 8 | 9 | /** 10 | * @brief An interface for light sources. 11 | */ 12 | class Light: public Object { 13 | public: 14 | virtual ~Light() {} 15 | 16 | Object::Type get_type() const final { 17 | return Object::LIGHT; 18 | } 19 | 20 | /** 21 | * @brief Samples the light source for a given point to be illuminated. 22 | * 23 | * @param arr The point to be illuminated. 24 | * @param u Random parameter U. 25 | * @param v Random parameter V. 26 | * @param wavelength The wavelength of light to sample at. 27 | * @param time The time to sample at. 28 | * @param[out] shadow_vec The world-space direction to cast a shadow ray 29 | * for visibility testing. It's length determines the extent 30 | * that the shadow ray should have, unless the light source 31 | * is infinite (see is_infinite()) in which case the extent 32 | * should be infinite. This vector also doubles to inform 33 | * What direction the light is arriving from (just invert 34 | * the vector). 35 | * 36 | * @returns The light arriving at the point arr. 37 | */ 38 | virtual SpectralSample sample(const Vec3 &arr, float u, float v, float wavelength, float time, 39 | Vec3 *shadow_vec, float* pdf) const = 0; 40 | 41 | 42 | /** 43 | * @brief Calculates the pdf of sampling the given 44 | * sample_dir/sample_u/sample_v from the given point arr. This is used 45 | * primarily to calculate probabilities for multiple importance sampling. 46 | * 47 | * NOTE: this function CAN assume that sample_dir, sample_u, and sample_v 48 | * are a valid sample for the light source (i.e. hits/lies on the light 49 | * source). No guarantees are made about the correctness of the return 50 | * value if they are not valid. 51 | */ 52 | virtual float sample_pdf(const Vec3 &arr, const Vec3 &sample_dir, float sample_u, float sample_v, float wavelength, float time) const = 0; 53 | 54 | 55 | /** 56 | * @brief Returns the color emitted in the given direction from the 57 | * given parameters on the light. 58 | * 59 | * @param dir The direction of the outgoing light. 60 | * @param u Random parameter U. 61 | * @param v Random parameter V. 62 | * @param wavelength The wavelength of light to sample at. 63 | * @param time The time to sample at. 64 | */ 65 | virtual SpectralSample outgoing(const Vec3 &dir, float u, float v, float wavelength, float time) const = 0; 66 | 67 | 68 | 69 | /** 70 | * @brief Returns whether the light has a delta distribution. 71 | * 72 | * If a light has no chance of a ray hitting it through random process 73 | * then it is a delta light source. For example, point light sources, 74 | * lights that only emit in a single direction, etc. 75 | */ 76 | virtual bool is_delta() const = 0; 77 | 78 | 79 | /** 80 | * @brief Tests a ray against the light. 81 | */ 82 | virtual bool intersect_ray(const Ray &ray, Intersection *intersection=nullptr) const = 0; 83 | }; 84 | 85 | #endif // LIGHT_HPP 86 | -------------------------------------------------------------------------------- /lights/point_light.hpp: -------------------------------------------------------------------------------- 1 | #ifndef POINT_LIGHT_HPP 2 | #define POINT_LIGHT_HPP 3 | 4 | #include "light.hpp" 5 | 6 | /** 7 | * @brief A point light source. 8 | * 9 | * Super simple point light source. Practically an example of how 10 | * to write a finite light source. 11 | */ 12 | class PointLight final: public Light { 13 | Vec3 pos; 14 | Color col; 15 | std::vector bounds_; 16 | 17 | public: 18 | PointLight(Vec3 pos_, Color col_): pos {pos_}, col {col_}, bounds_ {BBox(pos_, pos_)} 19 | {} 20 | 21 | virtual SpectralSample sample(const Vec3 &arr, float u, float v, float wavelength, float time, 22 | Vec3 *shadow_vec, float* pdf) const override { 23 | *pdf = 1.0f; 24 | *shadow_vec = pos - arr; 25 | float d2 = shadow_vec->length2(); 26 | if (d2 > 0) 27 | return Color_to_SpectralSample(col / d2, wavelength); 28 | else 29 | return Color_to_SpectralSample(col, wavelength); // Fudge for divide by zero. 30 | } 31 | 32 | virtual float sample_pdf(const Vec3 &arr, const Vec3 &sample_dir, float sample_u, float sample_v, float wavelength, float time) const override { 33 | return 0.0f; 34 | } 35 | 36 | virtual SpectralSample outgoing(const Vec3 &dir, float u, float v, float wavelength, float time) const override { 37 | return Color_to_SpectralSample(col, wavelength); 38 | } 39 | 40 | virtual bool is_delta() const override { 41 | return true; 42 | } 43 | 44 | virtual Color total_emitted_color() const override { 45 | return col; 46 | } 47 | 48 | virtual bool intersect_ray(const Ray &ray, Intersection *intersection=nullptr) const override { 49 | return false; 50 | } 51 | 52 | virtual const std::vector &bounds() const override { 53 | return bounds_; 54 | } 55 | }; 56 | 57 | #endif // POINT_LIGHT_HPP 58 | -------------------------------------------------------------------------------- /math/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #add_library(math 2 | # ) 3 | -------------------------------------------------------------------------------- /math/vector.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VECTOR_HPP 2 | #define VECTOR_HPP 3 | 4 | #include "numtype.h" 5 | #include 6 | #include 7 | #include 8 | 9 | #include "ImathVec.h" 10 | 11 | // 3D vector 12 | typedef Imath::Vec3 ImathVec3; 13 | 14 | 15 | #if 0 16 | /** 17 | * @brief A 3d vector. 18 | * 19 | * Optionally accelerated by SSE instructions. 20 | */ 21 | struct __attribute__((aligned(16))) Vec3 { 22 | union { 23 | struct { 24 | float x,y,z,w; 25 | }; 26 | __m128 m128; 27 | }; 28 | 29 | // Constructors 30 | Vec3() {} 31 | Vec3(float v) { 32 | x = v; 33 | y = v; 34 | z = v; 35 | w = v; 36 | } 37 | Vec3(float x_, float y_, float z_, float w_=1.0f) { 38 | x = x_; 39 | y = y_; 40 | z = z_; 41 | w = w_; 42 | } 43 | Vec3(__m128 m) { 44 | m128 = m; 45 | } 46 | 47 | // Element access 48 | float &operator[](size_t n) { 49 | assert(n < 4); 50 | return (&x)[n]; 51 | } 52 | const float &operator[](size_t n) const { 53 | assert(n < 4); 54 | return (&x)[n]; 55 | } 56 | 57 | // Comparisons 58 | bool operator==(const Vec3 &b) const { 59 | return (x==b.x && y==b.y && z==b.z); 60 | } 61 | 62 | // Multiplication and division by scalar 63 | Vec3 operator*(float b) const { 64 | return (Vec3)_mm_mul_ps(m128, _mm_set_ps(b,b,b,b)); 65 | } 66 | Vec3 operator/(float b) const { 67 | return (Vec3)_mm_div_ps(m128, _mm_set_ps(b,b,b,b)); 68 | } 69 | 70 | Vec3 &operator*=(float b) { 71 | m128 = _mm_mul_ps(m128, _mm_set_ps(b,b,b,b)); 72 | return *this; 73 | } 74 | Vec3 &operator/=(float b) { 75 | m128 = _mm_div_ps(m128, _mm_set_ps(b,b,b,b)); 76 | return *this; 77 | } 78 | 79 | // Component-wise arithmetic 80 | Vec3 operator+(const Vec3& b) const { 81 | return (Vec3)_mm_add_ps(m128, b.m128); 82 | } 83 | Vec3 operator-(const Vec3& b) const { 84 | return (Vec3)_mm_sub_ps(m128, b.m128); 85 | } 86 | Vec3 operator*(const Vec3& b) const { 87 | return (Vec3)_mm_mul_ps(m128, b.m128); 88 | } 89 | Vec3 operator/(const Vec3& b) const { 90 | return (Vec3)_mm_div_ps(m128, b.m128); 91 | } 92 | 93 | Vec3 &operator+=(const Vec3& b) { 94 | m128 = _mm_add_ps(m128, b.m128); 95 | return *this; 96 | } 97 | Vec3 &operator-=(const Vec3& b) { 98 | m128 = _mm_sub_ps(m128, b.m128); 99 | return *this; 100 | } 101 | Vec3 &operator*=(const Vec3& b) { 102 | m128 = _mm_mul_ps(m128, b.m128); 103 | return *this; 104 | } 105 | Vec3 &operator/=(const Vec3& b) { 106 | m128 = _mm_div_ps(m128, b.m128); 107 | return *this; 108 | } 109 | 110 | // Products 111 | float dot(const Vec3 &b) const { 112 | return x*b.x + y*b.y + z*b.z; 113 | } 114 | Vec3 cross(const Vec3 &b) const { 115 | return (Vec3)_mm_sub_ps( 116 | _mm_mul_ps( 117 | _mm_shuffle_ps(m128, m128, _MM_SHUFFLE(3, 0, 2, 1)), 118 | _mm_shuffle_ps(b.m128, b.m128, _MM_SHUFFLE(3, 1, 0, 2))), 119 | _mm_mul_ps( 120 | _mm_shuffle_ps(m128, m128, _MM_SHUFFLE(3, 1, 0, 2)), 121 | _mm_shuffle_ps(b.m128, b.m128, _MM_SHUFFLE(3, 0, 2, 1))) 122 | ); 123 | } 124 | 125 | // Component-wise min and max 126 | Vec3 min(const Vec3 &b) const { 127 | return (Vec3)_mm_min_ps(m128, b.m128); 128 | } 129 | Vec3 max(const Vec3 &b) const { 130 | return (Vec3)_mm_max_ps(m128, b.m128); 131 | } 132 | 133 | float length() const { 134 | Vec3 a = *this; 135 | a.w = 0.0f; 136 | 137 | __m128 &D = a.m128; 138 | D = _mm_mul_ps(D, D); 139 | D = _mm_hadd_ps(D, D); 140 | D = _mm_hadd_ps(D, D); 141 | 142 | D = _mm_sqrt_ps(D); 143 | 144 | return a.x; 145 | } 146 | 147 | float length2() const { 148 | Vec3 a = *this; 149 | a.w = 0.0f; 150 | 151 | __m128 &D = a.m128; 152 | D = _mm_mul_ps(D, D); 153 | D = _mm_hadd_ps(D, D); 154 | D = _mm_hadd_ps(D, D); 155 | 156 | return a.x; 157 | } 158 | 159 | const Vec3 &normalize() { 160 | w = 0.f; 161 | 162 | __m128 D = m128; 163 | D = _mm_mul_ps(D, D); 164 | D = _mm_hadd_ps(D, D); 165 | D = _mm_hadd_ps(D, D); 166 | 167 | // 1 iteration of Newton-raphson -- Idea from Intel's Embree. 168 | __m128 r = _mm_rsqrt_ps(D); 169 | r = _mm_add_ps( 170 | _mm_mul_ps(_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f), r), 171 | _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(D, _mm_set_ps(-0.5f, -0.5f, -0.5f, -0.5f)), r), _mm_mul_ps(r, r))); 172 | 173 | m128 = _mm_mul_ps(m128, r); 174 | 175 | return *this; 176 | } 177 | 178 | Vec3 normalized() const { 179 | Vec3 v = *this; 180 | v.normalize(); 181 | return v; 182 | } 183 | 184 | }; 185 | #else 186 | typedef Imath::Vec3 Vec3; 187 | #endif 188 | 189 | 190 | template 191 | static inline float dot(const T &a, const T &b) { 192 | return a.dot(b); 193 | } 194 | 195 | // Normalized dot product (i.e. the cosine of the angle between two vectors 196 | template 197 | static inline float dot_norm(const T& a, const T& b) { 198 | const float length_product = a.length() * b.length(); 199 | assert(length_product > 0.0f); 200 | return ((a.x * b.x) + (a.y * b.y) + (a.z * b.z)) / length_product; 201 | } 202 | 203 | template 204 | static inline T cross(const T &a, const T &b) { 205 | return a.cross(b); 206 | } 207 | 208 | static inline Vec3 min(const Vec3 &a, const Vec3 &b) { 209 | Vec3 c; 210 | for (int i = 0; i < 3; i++) 211 | c[i] = a[i] < b[i] ? a[i] : b[i]; 212 | return c; 213 | } 214 | 215 | static inline Vec3 max(const Vec3 &a, const Vec3 &b) { 216 | Vec3 c; 217 | for (int i = 0; i < 3; i++) 218 | c[i] = a[i] > b[i] ? a[i] : b[i]; 219 | return c; 220 | } 221 | 222 | static inline float longest_axis(const Vec3 &v) { 223 | return std::max(std::max(std::abs(v.x), std::abs(v.y)), std::abs(v.z)); 224 | } 225 | 226 | /** 227 | * Returns 'in' reflected off a surface with surface normal 'nn'. 228 | * 'in' is an incoming direction, i.e. pointing towards the surface. 229 | * 'nn' must be normalized. 230 | */ 231 | static inline Vec3 reflect_vec(Vec3 in, Vec3 nn) { 232 | return in - (nn * 2.0f * dot(in, nn)); 233 | } 234 | 235 | #endif // VECTOR_HPP 236 | -------------------------------------------------------------------------------- /math/vector_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | 3 | #include 4 | #include "vector.hpp" 5 | 6 | /* 7 | ************************************************************************ 8 | * Test suite for Vec3. 9 | ************************************************************************ 10 | */ 11 | 12 | TEST_CASE("vector") { 13 | // Test for the constructor 14 | SECTION("constructor") { 15 | Vec3 v1(0.0, 0.0, 0.0); 16 | Vec3 v2(1.5, 0.0, -64.0); 17 | 18 | bool t1 = v1.x == 0.0 && v1.y == 0.0 && v1.z == 0.0; 19 | REQUIRE(t1); 20 | bool t2 = v2.x == 1.5 && v2.y == 0.0 && v2.z == -64.0; 21 | REQUIRE(t2); 22 | } 23 | 24 | // Test for ::operator[] 25 | SECTION("op_square_bracket") { 26 | Vec3 v1(1.5, 0.0, -64.0); 27 | const Vec3 v2(1.5, 0.0, -64.0); 28 | 29 | // Access 30 | bool t1 = v1[0] == 1.5 && v1[1] == 0.0 && v1[2] == -64.0; 31 | REQUIRE(t1); 32 | bool t2 = v2[0] == 1.5 && v2[1] == 0.0 && v2[2] == -64.0; 33 | REQUIRE(t2); 34 | 35 | // Modification 36 | v1[0] = 1.0; 37 | v1[1] = 2.0; 38 | v1[2] = 3.0; 39 | bool t3 = v1[0] == 1.0 && v1[1] == 2.0 && v1[2] == 3.0; 40 | REQUIRE(t3); 41 | } 42 | 43 | // Test for ::operator+ 44 | SECTION("op_add") { 45 | Vec3 v1(1.2, -2.6, 1.0); 46 | Vec3 v2(-23.4, 2.0, 9.0); 47 | 48 | Vec3 v3 = v1 + v2; 49 | 50 | REQUIRE(v3.x == Approx(-22.2).epsilon(0.00001)); 51 | REQUIRE(v3.y == Approx(-0.6).epsilon(0.0001)); 52 | REQUIRE(v3.z == 10.0); 53 | } 54 | 55 | // Test for ::operator- 56 | SECTION("op_subtract") { 57 | Vec3 v1(1.2, -2.6, 1.0); 58 | Vec3 v2(-23.4, 2.2, 9.0); 59 | 60 | Vec3 v3 = v1 - v2; 61 | 62 | REQUIRE(v3.x == Approx(24.6).epsilon(0.00001)); 63 | REQUIRE(v3.y == Approx(-4.8).epsilon(0.00001)); 64 | REQUIRE(v3.z == -8.0); 65 | } 66 | 67 | // Test for ::operator* 68 | SECTION("op_multiply") { 69 | Vec3 v1(1.2, -2.6, 1.0); 70 | 71 | Vec3 v2 = v1 * 1.5; 72 | 73 | REQUIRE(v2.x == Approx(1.8).epsilon(0.00001)); 74 | REQUIRE(v2.y == Approx(-3.9).epsilon(0.00001)); 75 | REQUIRE(v2.z == 1.5); 76 | } 77 | 78 | // Test for ::operator/ 79 | SECTION("op_divide") { 80 | Vec3 v1(1.2, -2.6, 1.0); 81 | 82 | Vec3 v2 = v1 / 1.5; 83 | 84 | REQUIRE(v2.x == Approx(0.8).epsilon(0.00001)); 85 | REQUIRE(v2.y == Approx(-1.7333333333333333333333333).epsilon(0.00001)); 86 | REQUIRE(v2.z == Approx(0.6666666666666666666666666).epsilon(0.00001)); 87 | } 88 | 89 | // Test for ::length() 90 | SECTION("length") { 91 | Vec3 v1(1.2, -2.6, 1.0); 92 | 93 | REQUIRE(v1.length() == Approx(3.03315017762062).epsilon(0.0001)); 94 | } 95 | 96 | // Test for ::length2() 97 | SECTION("length2") { 98 | Vec3 v1(1.2, -2.6, 1.0); 99 | 100 | REQUIRE(v1.length2() == Approx(9.2).epsilon(0.0001)); 101 | } 102 | 103 | // Test for ::normalize() 104 | SECTION("normalize") { 105 | Vec3 v(1.2, -2.6, 1.0); 106 | 107 | float l = v.length(); 108 | v.normalize(); 109 | 110 | REQUIRE(l == Approx(3.03315017762062).epsilon(0.0001)); 111 | REQUIRE(v.x == Approx(0.39562828403747).epsilon(0.0001)); 112 | REQUIRE(v.y == Approx(-0.85719461541452).epsilon(0.0001)); 113 | REQUIRE(v.z == Approx(0.32969023669789).epsilon(0.0001)); 114 | } 115 | 116 | // Test for dot() 117 | SECTION("dot_") { 118 | Vec3 v1(1.2, -2.6, 1.0); 119 | Vec3 v2(-23.4, 2.2, 9.0); 120 | 121 | float d = dot(v1, v2); 122 | 123 | REQUIRE(d == Approx(-24.8).epsilon(0.00001)); 124 | } 125 | 126 | // Test for cross() 127 | SECTION("cross_") { 128 | Vec3 v1(1.2, -2.6, 1.0); 129 | Vec3 v2(-23.4, 2.2, 9.0); 130 | 131 | Vec3 v3 = cross(v1, v2); 132 | 133 | REQUIRE(v3.x == Approx(-25.6).epsilon(0.00001)); 134 | REQUIRE(v3.y == Approx(-34.2).epsilon(0.00001)); 135 | REQUIRE(v3.z == Approx(-58.2).epsilon(0.00001)); 136 | } 137 | } 138 | 139 | -------------------------------------------------------------------------------- /object/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(object 2 | bilinear bicubic sphere subdivision_surface) 3 | -------------------------------------------------------------------------------- /object/bicubic.cpp: -------------------------------------------------------------------------------- 1 | #include "numtype.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "stack.hpp" 11 | #include "bicubic.hpp" 12 | #include "config.hpp" 13 | #include "global.hpp" 14 | 15 | #include "surface_closure.hpp" 16 | #include "closure_union.hpp" 17 | 18 | 19 | 20 | 21 | Bicubic::Bicubic(Vec3 v1, Vec3 v2, Vec3 v3, Vec3 v4, 22 | Vec3 v5, Vec3 v6, Vec3 v7, Vec3 v8, 23 | Vec3 v9, Vec3 v10, Vec3 v11, Vec3 v12, 24 | Vec3 v13, Vec3 v14, Vec3 v15, Vec3 v16) { 25 | verts.resize(1); 26 | 27 | verts[0][0] = v1; 28 | verts[0][1] = v2; 29 | verts[0][2] = v3; 30 | verts[0][3] = v4; 31 | 32 | verts[0][4] = v5; 33 | verts[0][5] = v6; 34 | verts[0][6] = v7; 35 | verts[0][7] = v8; 36 | 37 | verts[0][8] = v9; 38 | verts[0][9] = v10; 39 | verts[0][10] = v11; 40 | verts[0][11] = v12; 41 | 42 | verts[0][12] = v13; 43 | verts[0][13] = v14; 44 | verts[0][14] = v15; 45 | verts[0][15] = v16; 46 | } 47 | 48 | 49 | void Bicubic::add_time_sample(Vec3 v1, Vec3 v2, Vec3 v3, Vec3 v4, 50 | Vec3 v5, Vec3 v6, Vec3 v7, Vec3 v8, 51 | Vec3 v9, Vec3 v10, Vec3 v11, Vec3 v12, 52 | Vec3 v13, Vec3 v14, Vec3 v15, Vec3 v16) { 53 | const auto i = verts.size(); 54 | verts.resize(verts.size()+1); 55 | 56 | verts[i][0] = v1; 57 | verts[i][1] = v2; 58 | verts[i][2] = v3; 59 | verts[i][3] = v4; 60 | 61 | verts[i][4] = v5; 62 | verts[i][5] = v6; 63 | verts[i][6] = v7; 64 | verts[i][7] = v8; 65 | 66 | verts[i][8] = v9; 67 | verts[i][9] = v10; 68 | verts[i][10] = v11; 69 | verts[i][11] = v12; 70 | 71 | verts[i][12] = v13; 72 | verts[i][13] = v14; 73 | verts[i][14] = v15; 74 | verts[i][15] = v16; 75 | } 76 | 77 | void Bicubic::add_time_sample(std::array patch) { 78 | verts.emplace_back(patch); 79 | } 80 | 81 | 82 | void Bicubic::finalize() { 83 | // Calculate bounds 84 | bbox.resize(verts.size()); 85 | for (size_t time = 0; time < verts.size(); time++) { 86 | bbox[time] = bound(verts[time]); 87 | 88 | // Extend bounds for displacements 89 | for (int i = 0; i < 3; i++) { 90 | bbox[time].min[i] -= Config::displace_distance; 91 | bbox[time].max[i] += Config::displace_distance; 92 | } 93 | } 94 | } 95 | 96 | 97 | const std::vector &Bicubic::bounds() const { 98 | return bbox; 99 | } 100 | 101 | -------------------------------------------------------------------------------- /object/bilinear.cpp: -------------------------------------------------------------------------------- 1 | #include "numtype.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "bilinear.hpp" 9 | #include "config.hpp" 10 | #include "global.hpp" 11 | 12 | 13 | 14 | 15 | Bilinear::Bilinear(Vec3 v1, Vec3 v2, Vec3 v3, Vec3 v4) { 16 | verts.push_back({{v1,v2,v3,v4}}); 17 | } 18 | 19 | void Bilinear::finalize() { 20 | // Calculate bounds 21 | bbox.resize(verts.size()); 22 | for (size_t time = 0; time < verts.size(); time++) { 23 | bbox[time] = bound(verts[time]); 24 | 25 | // Extend bounds for displacements 26 | for (int i = 0; i < 3; i++) { 27 | bbox[time].min[i] -= Config::displace_distance; 28 | bbox[time].max[i] += Config::displace_distance; 29 | } 30 | } 31 | } 32 | 33 | 34 | void Bilinear::add_time_sample(Vec3 v1, Vec3 v2, Vec3 v3, Vec3 v4) { 35 | verts.push_back({{v1,v2,v3,v4}}); 36 | } 37 | 38 | 39 | const std::vector &Bilinear::bounds() const { 40 | return bbox; 41 | } 42 | -------------------------------------------------------------------------------- /object/bilinear.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BILINEAR_HPP 2 | #define BILINEAR_HPP 3 | 4 | #include "numtype.h" 5 | 6 | #include 7 | #include 8 | #include "utils.hpp" 9 | #include "stack.hpp" 10 | #include "vector.hpp" 11 | #include "object.hpp" 12 | 13 | /* 14 | * A bilinear patch. 15 | * Vertices arranged like this: 16 | * u--> 17 | * v1----v2 18 | * v | | 19 | * | v3----v4 20 | * \/ 21 | */ 22 | class Bilinear final: public PatchSurface { 23 | public: 24 | std::vector> verts; 25 | std::vector bbox; 26 | 27 | Bilinear() {} 28 | Bilinear(Vec3 v1, Vec3 v2, Vec3 v3, Vec3 v4); 29 | virtual ~Bilinear() {} 30 | 31 | void finalize(); 32 | 33 | void add_time_sample(Vec3 v1, Vec3 v2, Vec3 v3, Vec3 v4); 34 | 35 | virtual const std::vector &bounds() const override; 36 | virtual Color total_emitted_color() const override { 37 | return Color(0.0f); 38 | } 39 | 40 | 41 | // For being traced by intersect_rays_with_patch() in tracer.cpp 42 | typedef std::array store_type; 43 | 44 | static store_type interpolate_patch(float alpha, const store_type& p1, const store_type& p2) { 45 | store_type p3; 46 | for (int i = 0; i < 4; ++i) { 47 | p3[i] = lerp(alpha, p1[i], p2[i]); 48 | } 49 | return p3; 50 | } 51 | 52 | __attribute__((always_inline)) 53 | static float ulen(const store_type& p) { 54 | return longest_axis(p[0] - p[1]); 55 | } 56 | 57 | __attribute__((always_inline)) 58 | static float vlen(const store_type& p) { 59 | return longest_axis(p[0] - p[2]); 60 | } 61 | 62 | __attribute__((always_inline)) 63 | static void split_u(const store_type& p, store_type* p1, store_type* p2) { 64 | (*p2)[0] = (p[0] + p[1]) * 0.5f; 65 | (*p2)[1] = p[1]; 66 | (*p2)[2] = (p[2] + p[3]) * 0.5f; 67 | (*p2)[3] = p[3]; 68 | 69 | (*p1)[0] = p[0]; 70 | (*p1)[1] = (p[0] + p[1]) * 0.5f; 71 | (*p1)[2] = p[2]; 72 | (*p1)[3] = (p[2] + p[3]) * 0.5f; 73 | } 74 | 75 | __attribute__((always_inline)) 76 | static void split_v(const store_type& p, store_type* p1, store_type* p2) { 77 | (*p2)[0] = (p[0] + p[2]) * 0.5f; 78 | (*p2)[1] = (p[1] + p[3]) * 0.5f; 79 | (*p2)[2] = p[2]; 80 | (*p2)[3] = p[3]; 81 | 82 | (*p1)[0] = p[0]; 83 | (*p1)[1] = p[1]; 84 | (*p1)[2] = (p[0] + p[2]) * 0.5f; 85 | (*p1)[3] = (p[1] + p[3]) * 0.5f; 86 | } 87 | 88 | static Vec3 eval_p(float u, const Vec3 p0, const Vec3 p1) { 89 | const float b0 = 1.0f - u; 90 | const float b1 = u; 91 | 92 | return (p0 * b0) + (p1 * b1); 93 | } 94 | 95 | static Vec3 eval_pd(float u, const Vec3 p0, const Vec3 p1) { 96 | const float d0 = -1.0f; 97 | const float d1 = 1.0f; 98 | 99 | return (p0 * d0) + (p1 * d1); 100 | } 101 | 102 | /** 103 | * Returns 104 | */ 105 | static std::tuple differential_geometry(const store_type& p, float u, float v) { 106 | // Calculate first derivatives and surface normal 107 | const Vec3 dpdu = eval_pd(u, eval_p(v, p[0], p[2]), eval_p(v, p[1], p[3])); 108 | const Vec3 dpdv = eval_pd(v, eval_p(u, p[0], p[1]), eval_p(u, p[2], p[3])); 109 | const Vec3 n = cross(dpdv, dpdu).normalized(); 110 | 111 | // Calculate second derivatives 112 | const Vec3 d2pduu = Vec3(0.0f); 113 | const Vec3 d2pduv = eval_pd(v, eval_pd(u, p[0], p[1]), eval_pd(u, p[2], p[3])); 114 | const Vec3 d2pdvv = Vec3(0.0f); 115 | 116 | // Calculate surface normal derivatives 117 | const float E = dot(dpdu, dpdu); 118 | const float F = dot(dpdu, dpdv); 119 | const float G = dot(dpdv, dpdv); 120 | const float e = dot(n, d2pduu); 121 | const float f = dot(n, d2pduv); 122 | const float g = dot(n, d2pdvv); 123 | 124 | const float invEGF2 = 1.0f / ((E*G) - (F*F)); 125 | const Vec3 dndu = (((f*F) - (e*G)) * invEGF2 * dpdu) + (((e*F) - (f*E)) * invEGF2 * dpdv); 126 | const Vec3 dndv = (((g*F) - (f*G)) * invEGF2 * dpdu) + (((f*F) - (g*E)) * invEGF2 * dpdv); 127 | 128 | return std::make_tuple(n, dpdu, dpdv, dndu, dndv); 129 | } 130 | 131 | __attribute__((always_inline)) 132 | static BBox bound(const store_type& p) { 133 | BBox bb = BBox(p[0], p[0]);; 134 | 135 | for (int i = 1; i < 4; ++i) { 136 | bb.min = min(bb.min, p[i]); 137 | bb.max = max(bb.max, p[i]); 138 | } 139 | 140 | return bb; 141 | } 142 | }; 143 | 144 | #endif 145 | -------------------------------------------------------------------------------- /object/object.hpp: -------------------------------------------------------------------------------- 1 | #ifndef OBJECT_HPP 2 | #define OBJECT_HPP 3 | 4 | #include "numtype.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "stack.hpp" 11 | #include "ray.hpp" 12 | #include "intersection.hpp" 13 | #include "bbox.hpp" 14 | #include "transform.hpp" 15 | #include "surface_shader.hpp" 16 | 17 | 18 | /** 19 | * @brief Base object class, from which all other objects inherit. 20 | */ 21 | class Object { 22 | public: 23 | // Virtual destructor, and don't delete default copy/move constructors 24 | Object() = default; 25 | virtual ~Object() = default; 26 | Object(const Object&) = default; 27 | Object(Object&&) = default; 28 | Object& operator=(const Object&) = default; 29 | Object& operator=(Object&&) = default; 30 | 31 | /** 32 | * @brief An enum type for describing the type of an object. 33 | */ 34 | enum Type { 35 | SURFACE, 36 | COMPLEX_SURFACE, 37 | PATCH_SURFACE, 38 | LIGHT, 39 | ASSEMBLY_INSTANCE 40 | }; 41 | 42 | // Unique ID, used by Scene and Tracer for various purposes 43 | // Sub-classes should ignore it. 44 | size_t uid; 45 | 46 | /** 47 | * @brief Returns the type of the object. 48 | */ 49 | virtual Type get_type() const = 0; 50 | 51 | /** 52 | * Finalizes an object after parsing is complete, if needed. 53 | */ 54 | virtual void finalize() {} 55 | 56 | /** 57 | * @brief Returns the bounds of the object. 58 | */ 59 | virtual const std::vector &bounds() const = 0; 60 | 61 | /** 62 | * Returns the total amount of energy emitted by the object. 63 | * 64 | * This does not need to be 100% accurate, as it is only used 65 | * for sampling decisions. But it should be approximately 66 | * correct. 67 | * 68 | * TODO: remove this function! This is NOT where this should be handled. 69 | * This needs to be handled at a point where the material of the object 70 | * is known. 71 | */ 72 | virtual Color total_emitted_color() const = 0; 73 | }; 74 | 75 | 76 | /** 77 | * @brief An interface for traditional surface objects that can be easily 78 | * directly tested against a single ray at a time. 79 | */ 80 | class Surface: public Object { 81 | public: 82 | virtual ~Surface() {} 83 | 84 | Object::Type get_type() const final { 85 | return Object::SURFACE; 86 | } 87 | 88 | /** 89 | * @brief Tests a ray against the surface. 90 | */ 91 | virtual bool intersect_ray(const Ray &ray, Intersection *intersection=nullptr) = 0; 92 | }; 93 | 94 | 95 | /** 96 | * @brief An interface for surfaces that require more complex handling 97 | * and which require fast scratch memory. 98 | */ 99 | class ComplexSurface: public Object { 100 | public: 101 | virtual ~ComplexSurface() {} 102 | 103 | Object::Type get_type() const final { 104 | return Object::COMPLEX_SURFACE; 105 | } 106 | 107 | /** 108 | * @brief Tests a batch of rays against the surface. 109 | */ 110 | virtual void intersect_rays(Ray* rays_begin, Ray* rays_end, 111 | Intersection *intersections, 112 | const Range parent_xforms, 113 | Stack* data_stack, 114 | const SurfaceShader* surface_shader, 115 | const InstanceID& element_id) const = 0; 116 | }; 117 | 118 | 119 | /** 120 | * @brief An interface for surface patches with inherent UV coordinates, and 121 | * which can be easily recursively split into smaller patches. 122 | * 123 | * Other than defining get_type() there are no methods defined in this class. 124 | * However, subclasses of this must nevertheless adhere to an interface and 125 | * provide certain static methods that certain templated functions end up 126 | * using. C++14 and earlier are, unfortunately, not able to describe such 127 | * interfaces. Hopefully Concepts Lite in C++17 will allow this. In the mean 128 | * time, look at the Bilinear and Bicubic classes for examples of the required 129 | * interface. 130 | */ 131 | class PatchSurface: public Object { 132 | public: 133 | virtual ~PatchSurface() {} 134 | 135 | Object::Type get_type() const final { 136 | return Object::PATCH_SURFACE; 137 | } 138 | }; 139 | 140 | #endif // OBJECT_HPP 141 | -------------------------------------------------------------------------------- /object/sphere.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SPHERE_HPP 2 | #define SPHERE_HPP 3 | 4 | #include "numtype.h" 5 | 6 | #include 7 | #include "vector.hpp" 8 | #include "object.hpp" 9 | 10 | /** 11 | * @brief A sphere primitive. 12 | * 13 | * This serves as a simple example of how to implement a surface primitive. 14 | */ 15 | class Sphere final: public Surface { 16 | public: 17 | std::vector center; 18 | std::vector radius; 19 | 20 | std::vector bbox; 21 | 22 | Sphere(Vec3 center_, float radius_); 23 | Sphere(uint8_t res_time_); 24 | virtual ~Sphere() {}; 25 | 26 | void add_time_sample(int samp, Vec3 center_, float radius_); 27 | 28 | void finalize(); 29 | 30 | virtual bool intersect_ray(const Ray &ray, Intersection *intersection=nullptr); 31 | virtual const std::vector &bounds() const; 32 | virtual Color total_emitted_color() const override final { 33 | return Color(0.0f); 34 | } 35 | }; 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /object/subdivision_surface.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SUBDIVISION_SURFACE_HPP 2 | #define SUBDIVISION_SURFACE_HPP 3 | 4 | #include 5 | 6 | #include "object.hpp" 7 | #include "intersection.hpp" 8 | #include "ray.hpp" 9 | #include "stack.hpp" 10 | #include "vector.hpp" 11 | #include "bbox.hpp" 12 | #include "bilinear.hpp" 13 | #include "bicubic.hpp" 14 | 15 | class SubdivisionSurface final: public ComplexSurface { 16 | struct Node { 17 | Range bounds; 18 | Node* children[2]; 19 | Bicubic* leaf_data; 20 | }; 21 | 22 | void build_bvh(); 23 | Node* build_bvh_recursive(Node* begin, Node* end); 24 | 25 | public: 26 | // Final data 27 | std::vector patches; 28 | std::vector bbox; 29 | std::vector bvh_nodes; 30 | std::vector bvh_bboxes; 31 | Node* bvh_root; 32 | int max_depth; 33 | 34 | // Intermediate data 35 | int depth; 36 | int motion_samples = 0; 37 | int verts_per_motion_sample = 0; 38 | std::vector verts; 39 | std::vector face_vert_counts; 40 | std::vector face_vert_indices; 41 | 42 | // Construction 43 | SubdivisionSurface() {} 44 | void set_verts(std::vector&& verts_, int verts_per_motion_sample_) { 45 | verts = std::move(verts_); 46 | verts_per_motion_sample = verts_per_motion_sample_; 47 | motion_samples = verts.size() / verts_per_motion_sample; 48 | } 49 | void set_face_vert_counts(std::vector&& vert_counts) { 50 | face_vert_counts = std::move(vert_counts); 51 | } 52 | void set_face_vert_indices(std::vector&& vert_indices) { 53 | face_vert_indices = std::move(vert_indices); 54 | } 55 | void finalize(); 56 | 57 | virtual const std::vector &bounds() const override { 58 | return bbox; 59 | } 60 | 61 | virtual Color total_emitted_color() const override { 62 | return Color(0.0f); 63 | } 64 | 65 | virtual void intersect_rays(Ray* rays_begin, Ray* rays_end, 66 | Intersection *intersections, 67 | const Range parent_xforms, 68 | Stack* data_stack, 69 | const SurfaceShader* surface_shader, 70 | const InstanceID& element_id) const override; 71 | 72 | }; 73 | 74 | #endif // SUBDIVISION_SURFACE_HPP -------------------------------------------------------------------------------- /parser/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(parser 2 | parser data_tree) 3 | -------------------------------------------------------------------------------- /parser/data_tree.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DATA_TREE_HPP 2 | #define DATA_TREE_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace DataTree { 8 | 9 | struct Node { 10 | std::string type; 11 | std::string name; 12 | std::vector children; // If size is zero, indicates the node is a leaf 13 | std::string leaf_contents; // Only for leaf nodes 14 | }; 15 | 16 | 17 | /** 18 | * Builds a data tree from the contents of a file. 19 | * 20 | * Returns the root node. 21 | */ 22 | Node build_from_file(const char* file_path); 23 | 24 | 25 | /** 26 | * Prints a DataTree to the console, for debugging purposes. 27 | */ 28 | void print_tree(const Node& node, const std::string& indent = ""); 29 | 30 | } 31 | 32 | #endif // DATA_TREE_HPP 33 | -------------------------------------------------------------------------------- /parser/parser.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_HPP 2 | #define PARSER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "data_tree.hpp" 9 | 10 | #include "sphere_light.hpp" 11 | #include "rectangle_light.hpp" 12 | #include "bilinear.hpp" 13 | #include "bicubic.hpp" 14 | #include "subdivision_surface.hpp" 15 | #include "sphere.hpp" 16 | 17 | #include "renderer.hpp" 18 | #include "scene.hpp" 19 | 20 | 21 | class Parser { 22 | DataTree::Node tree; 23 | unsigned int node_index = 0; 24 | 25 | // Methods 26 | 27 | /** 28 | * @brief Parses a transform matrix. 29 | */ 30 | Matrix44 parse_matrix(const std::string line); 31 | 32 | /** 33 | * @brief Parses a Camera section. 34 | */ 35 | std::unique_ptr parse_camera(const DataTree::Node& node); 36 | 37 | /** 38 | * @brief Parses an Assembly section. 39 | */ 40 | std::unique_ptr parse_assembly(const DataTree::Node& node, const Assembly* parent_assembly); 41 | 42 | 43 | /** 44 | * @brief Parses a bilinear patch section. 45 | */ 46 | std::unique_ptr parse_bilinear_patch(const DataTree::Node& node); 47 | 48 | /** 49 | * @brief Parses a bicubic patch section. 50 | */ 51 | std::unique_ptr parse_bicubic_patch(const DataTree::Node& node); 52 | 53 | /** 54 | * @brief Parses a subdivision surface section. 55 | */ 56 | std::unique_ptr parse_subdivision_surface(const DataTree::Node& node); 57 | 58 | /** 59 | * @brief Parses a sphere section. 60 | */ 61 | std::unique_ptr parse_sphere(const DataTree::Node& node); 62 | 63 | /** 64 | * @brief Parses a surface shader section. 65 | */ 66 | std::unique_ptr parse_surface_shader(const DataTree::Node& node); 67 | 68 | /** 69 | * @brief Parses a sphere light section. 70 | */ 71 | std::unique_ptr parse_sphere_light(const DataTree::Node& node); 72 | 73 | /** 74 | * @brief Parses a rectangle light section. 75 | */ 76 | std::unique_ptr parse_rectangle_light(const DataTree::Node& node); 77 | 78 | public: 79 | Parser(std::string input_path) { 80 | tree = DataTree::build_from_file(input_path.c_str()); 81 | //DataTree::print_tree(tree); 82 | } 83 | 84 | /** 85 | * @brief Parses the next frame in the file, and returns the 86 | * resulting scene, ready for rendering. 87 | */ 88 | std::unique_ptr parse_next_frame(); 89 | }; 90 | 91 | #endif // PARSER_HPP 92 | -------------------------------------------------------------------------------- /parser/utf8.hpp: -------------------------------------------------------------------------------- 1 | #ifndef UTF8_HPP 2 | #define UTF8_HPP 3 | 4 | #include 5 | #include 6 | 7 | class utf8_parse_error: std::exception { 8 | public: 9 | virtual const char* what() const noexcept { 10 | return "Invalid UTF8 sequence."; 11 | } 12 | }; 13 | 14 | /** 15 | * Fetches a single, complete UTF8 code point, returning it as a std::string. 16 | * Returns an empty string on a malformed codepoint. 17 | * 18 | * @param in Reference to a const string iterator where the parsing is to begin. 19 | * @param end Reference to the corresponding end iterator for the string. 20 | * 21 | * Throws a utf8_parse_error exception on malformed utf8 input. 22 | */ 23 | static inline std::string cur_utf8(const std::string::const_iterator& in, const std::string::const_iterator& end) { 24 | const unsigned char* c = reinterpret_cast(&(*in)); 25 | 26 | if (in == end) 27 | return std::string(""); 28 | 29 | // Determine the length of the encoded codepoint 30 | int len = 0; 31 | if (c[0] < 0b10000000) 32 | len = 1; 33 | else if (c[0] < 0b11000000) 34 | throw utf8_parse_error {}; // Malformed: continuation byte as first byte 35 | else if (c[0] < 0b11100000) 36 | len = 2; 37 | else if (c[0] < 0b11110000) 38 | len = 3; 39 | else if (c[0] < 0b11111000) 40 | len = 4; 41 | else 42 | throw utf8_parse_error {}; // Malformed: current utf8 standard only allows up to four bytes 43 | 44 | if (len == 0 || len > (end-in)) 45 | throw utf8_parse_error {}; // Malformed: not enough bytes 46 | 47 | // Read the rest of the bytes of the codepoint, 48 | // making sure they're proper continuation bytes 49 | for (int i = 1; i < len; ++i) { 50 | if ((c[i] & 0b11000000) != 0b10000000) 51 | throw utf8_parse_error {}; // Malformed: not a continuation byte 52 | } 53 | 54 | // Success! 55 | return std::string(in, in+len); 56 | } 57 | 58 | /** 59 | * Like cur_utf8, except it advances the string iterator after parsing the token. 60 | */ 61 | static inline std::string next_utf8(std::string::const_iterator& in, const std::string::const_iterator& end) { 62 | std::string c = cur_utf8(in, end); 63 | 64 | in += c.length(); 65 | 66 | return c; 67 | } 68 | 69 | #endif // UTF8_HPP -------------------------------------------------------------------------------- /psychoblend/__init__.py: -------------------------------------------------------------------------------- 1 | bl_info = { 2 | "name": "PsychoBlend", 3 | "version": (0, 1), 4 | "author": "Nathan Vegdahl", 5 | "blender": (2, 70, 0), 6 | "description": "Psychopath renderer integration", 7 | "location": "", 8 | "wiki_url": "https://github.com/cessen/psychopath/wiki", 9 | "tracker_url": "https://github.com/cessen/psychopath/issues", 10 | "category": "Render"} 11 | 12 | 13 | if "bpy" in locals(): 14 | import imp 15 | imp.reload(ui) 16 | imp.reload(psy_export) 17 | imp.reload(render) 18 | else: 19 | from . import ui, psy_export, render 20 | 21 | import bpy 22 | from bpy.types import (AddonPreferences, 23 | PropertyGroup, 24 | Operator, 25 | ) 26 | from bpy.props import (StringProperty, 27 | BoolProperty, 28 | IntProperty, 29 | FloatProperty, 30 | FloatVectorProperty, 31 | EnumProperty, 32 | PointerProperty, 33 | ) 34 | 35 | 36 | # Custom Scene settings 37 | class RenderPsychopathSettingsScene(PropertyGroup): 38 | spp = IntProperty( 39 | name="Samples Per Pixel", description="Total number of samples to take per pixel", 40 | min=1, max=65536, default=16 41 | ) 42 | 43 | dicing_rate = FloatProperty( 44 | name="Dicing Rate", description="The target microgeometry width in pixels", 45 | min=0.0001, max=100.0, soft_min=0.125, soft_max=1.0, default=0.25 46 | ) 47 | 48 | motion_blur_segments = IntProperty( 49 | name="Motion Segments", description="The number of segments to use in motion blur. Zero means no motion blur. Will be rounded down to the nearest power of two.", 50 | min=0, max=256, default=0 51 | ) 52 | 53 | shutter_start = FloatProperty( 54 | name="Shutter Open", description="The time during the frame that the shutter opens, for motion blur", 55 | min=-1.0, max=1.0, soft_min=0.0, soft_max=1.0, default=0.0 56 | ) 57 | 58 | shutter_end = FloatProperty( 59 | name="Shutter Close", description="The time during the frame that the shutter closes, for motion blur", 60 | min=-1.0, max=1.0, soft_min=0.0, soft_max=1.0, default=0.5 61 | ) 62 | 63 | export_path = StringProperty( 64 | name="Export Path", description="The path to where the .psy files should be exported when rendering. If left blank, /tmp or the equivalent is used.", 65 | subtype='FILE_PATH' 66 | ) 67 | 68 | # Custom Camera properties 69 | class PsychopathCamera(bpy.types.PropertyGroup): 70 | aperture_radius = FloatProperty( 71 | name="Aperture Radius", description="Size of the camera's aperture, for DoF", 72 | min=0.0, max=10000.0, soft_min=0.0, soft_max=2.0, default=0.0 73 | ) 74 | 75 | # Custom Mesh properties 76 | class PsychopathMesh(bpy.types.PropertyGroup): 77 | is_subdivision_surface = BoolProperty( 78 | name="Is Subdivision Surface", description="Whether this is a sibdivision surface or just a normal mesh", 79 | default=False 80 | ) 81 | 82 | # Psychopath material 83 | class PsychopathMaterial(bpy.types.PropertyGroup): 84 | surface_shader_type = EnumProperty( 85 | name="Surface Shader Type", description="", 86 | items=[('Emit', 'Emit', ""), ('Lambert', 'Lambert', ""), ('GTR', 'GTR', "")], 87 | default="Lambert" 88 | ) 89 | 90 | color = FloatVectorProperty( 91 | name="Color", description="", 92 | subtype='COLOR', 93 | min=0.0, soft_min=0.0, soft_max = 1.0, 94 | default=[0.8,0.8,0.8] 95 | ) 96 | 97 | roughness = FloatProperty( 98 | name="Roughness", description="", 99 | min=-1.0, max=1.0, soft_min=0.0, soft_max=1.0, default=0.1 100 | ) 101 | 102 | tail_shape = FloatProperty( 103 | name="Tail Shape", description="", 104 | min=0.0, max=8.0, soft_min=1.0, soft_max=3.0, default=2.0 105 | ) 106 | 107 | fresnel = FloatProperty( 108 | name="Fresnel", description="", 109 | min=0.0, max=1.0, soft_min=0.0, soft_max=1.0, default=0.9 110 | ) 111 | 112 | 113 | # Addon Preferences 114 | class PsychopathPreferences(AddonPreferences): 115 | bl_idname = __name__ 116 | 117 | filepath_psychopath = StringProperty( 118 | name="Psychopath Location", 119 | description="Path to renderer executable", 120 | subtype='DIR_PATH', 121 | ) 122 | 123 | def draw(self, context): 124 | layout = self.layout 125 | layout.prop(self, "filepath_psychopath") 126 | 127 | 128 | ##### REGISTER ##### 129 | def register(): 130 | bpy.utils.register_class(PsychopathPreferences) 131 | bpy.utils.register_class(RenderPsychopathSettingsScene) 132 | bpy.utils.register_class(PsychopathCamera) 133 | bpy.utils.register_class(PsychopathMesh) 134 | bpy.utils.register_class(PsychopathMaterial) 135 | bpy.types.Scene.psychopath = PointerProperty(type=RenderPsychopathSettingsScene) 136 | bpy.types.Camera.psychopath = PointerProperty(type=PsychopathCamera) 137 | bpy.types.Mesh.psychopath = PointerProperty(type=PsychopathMesh) 138 | bpy.types.Material.psychopath = PointerProperty(type=PsychopathMaterial) 139 | render.register() 140 | ui.register() 141 | 142 | 143 | def unregister(): 144 | bpy.utils.unregister_class(PsychopathPreferences) 145 | bpy.utils.unregister_class(RenderPsychopathSettingsScene) 146 | bpy.utils.unregister_class(PsychopathCamera) 147 | bpy.utils.unregister_class(PsychopathMesh) 148 | bpy.utils.unregister_class(PsychopathMaterial) 149 | del bpy.types.Scene.psychopath 150 | del bpy.types.Camera.psychopath 151 | del bpy.types.Mesh.psychopath 152 | del bpy.types.Material.psychopath 153 | render.unregister() 154 | ui.unregister() 155 | -------------------------------------------------------------------------------- /psychoblend/render.py: -------------------------------------------------------------------------------- 1 | import bpy 2 | import time 3 | import os 4 | import subprocess 5 | import tempfile 6 | from . import psy_export 7 | 8 | def get_temp_filename(suffix=""): 9 | tmpf = tempfile.mkstemp(suffix=suffix, prefix='tmp') 10 | os.close(tmpf[0]) 11 | return(tmpf[1]) 12 | 13 | class PsychopathRender(bpy.types.RenderEngine): 14 | bl_idname = 'PSYCHOPATH_RENDER' 15 | bl_label = "Psychopath" 16 | DELAY = 1.0 17 | 18 | @staticmethod 19 | def _locate_binary(): 20 | addon_prefs = bpy.context.user_preferences.addons[__package__].preferences 21 | 22 | # Use the system preference if its set. 23 | psy_binary = addon_prefs.filepath_psychopath 24 | if psy_binary: 25 | if os.path.exists(psy_binary): 26 | return psy_binary 27 | else: 28 | print("User Preference to psychopath %r NOT FOUND, checking $PATH" % psy_binary) 29 | 30 | # search the path all os's 31 | psy_binary_default = "psychopath" 32 | 33 | os_path_ls = os.getenv("PATH").split(':') + [""] 34 | 35 | for dir_name in os_path_ls: 36 | psy_binary = os.path.join(dir_name, psy_binary_default) 37 | if os.path.exists(psy_binary): 38 | return psy_binary 39 | return "" 40 | 41 | def _export(self, scene, export_path, render_image_path): 42 | exporter = psy_export.PsychoExporter(scene) 43 | exporter.export_psy(export_path, render_image_path) 44 | 45 | def _render(self, scene, psy_filepath): 46 | psy_binary = PsychopathRender._locate_binary() 47 | if not psy_binary: 48 | print("Psychopath: could not execute psychopath, possibly Psychopath isn't installed") 49 | return False 50 | 51 | # TODO: figure out command line options 52 | args = ["-i", psy_filepath] 53 | 54 | # Start Rendering! 55 | try: 56 | self._process = subprocess.Popen([psy_binary] + args, 57 | stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 58 | except OSError: 59 | # TODO, report api 60 | print("Psychopath: could not execute '%s'" % psy_binary) 61 | import traceback 62 | traceback.print_exc() 63 | print ("***-DONE-***") 64 | return False 65 | 66 | return True 67 | 68 | 69 | def _cleanup(self): 70 | # for f in (self._temp_file_in, self._temp_file_ini, self._temp_file_out): 71 | # for i in range(5): 72 | # try: 73 | # os.unlink(f) 74 | # break 75 | # except OSError: 76 | # # Wait a bit before retrying file might be still in use by Blender, 77 | # # and Windows does not know how to delete a file in use! 78 | # time.sleep(self.DELAY) 79 | # for i in unpacked_images: 80 | # for c in range(5): 81 | # try: 82 | # os.unlink(i) 83 | # break 84 | # except OSError: 85 | # # Wait a bit before retrying file might be still in use by Blender, 86 | # # and Windows does not know how to delete a file in use! 87 | # time.sleep(self.DELAY) 88 | pass 89 | 90 | def render(self, scene): 91 | # has to be called to update the frame on exporting animations 92 | scene.frame_set(scene.frame_current) 93 | 94 | export_path = scene.psychopath.export_path 95 | if export_path != "": 96 | export_path += "_%d.psy" % scene.frame_current 97 | else: 98 | # Create a temporary file for exporting 99 | export_path = get_temp_filename('.psy') 100 | 101 | # Create a temporary file to render into 102 | render_image_path = get_temp_filename('.png') 103 | 104 | # start export 105 | self.update_stats("", "Psychopath: Exporting data from Blender") 106 | self._export(scene, export_path, render_image_path) 107 | 108 | # Start rendering 109 | self.update_stats("", "Psychopath: Rendering from exported file") 110 | if not self._render(scene, export_path): 111 | self.update_stats("", "Psychopath: Not found") 112 | return 113 | 114 | r = scene.render 115 | # compute resolution 116 | x = int(r.resolution_x * r.resolution_percentage) 117 | y = int(r.resolution_y * r.resolution_percentage) 118 | 119 | result = self.begin_result(0, 0, x, y) 120 | lay = result.layers[0] 121 | 122 | # TODO: Update viewport with render result while rendering 123 | while self._process.poll() == None: 124 | # Wait for self.DELAY seconds, but check for render cancels 125 | # while waiting. 126 | t = 0.0 127 | while t < self.DELAY: 128 | if self.test_break(): 129 | self._process.terminate() 130 | break 131 | time.sleep(0.05) 132 | t += 0.05 133 | # # Update viewport image with latest render output 134 | # if os.path.exists(render_image_path): 135 | # # This assumes the file has been fully written We wait a bit, just in case! 136 | # try: 137 | # lay.load_from_file(render_image_path) 138 | # self.update_result(result) 139 | # except RuntimeError: 140 | # pass 141 | 142 | # Load final image 143 | lay.load_from_file(render_image_path) 144 | self.end_result(result) 145 | 146 | # Delete temporary image file 147 | os.remove(render_image_path) 148 | 149 | def register(): 150 | bpy.utils.register_class(PsychopathRender) 151 | 152 | def unregister(): 153 | bpy.utils.unregister_class(PsychopathRender) 154 | -------------------------------------------------------------------------------- /renderer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(renderer 2 | renderer) 3 | -------------------------------------------------------------------------------- /renderer/renderer.cpp: -------------------------------------------------------------------------------- 1 | #include "renderer.hpp" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "numtype.h" 9 | 10 | #include "timer.hpp" 11 | 12 | #include "rng.hpp" 13 | #include "integrator.hpp" 14 | #include "path_trace_integrator.hpp" 15 | #include "tracer.hpp" 16 | #include "scene.hpp" 17 | #include "film.hpp" 18 | 19 | #include "config.hpp" 20 | #include "global.hpp" 21 | 22 | #define GAMMA 2.2 23 | 24 | void write_png_from_film(Film *image, std::string path, float min_time=4.0) { 25 | static Timer<> timer; 26 | 27 | if ((timer.time() > min_time || min_time == 0.0f) && !Config::no_output) { 28 | timer.reset(); 29 | 30 | // Convert to dithered sRGB 31 | std::vector im {image->scanline_image_8bbc()}; 32 | // Save image 33 | std::unique_ptr out {OpenImageIO::ImageOutput::create(".png")}; 34 | if (!out) { 35 | return; 36 | } 37 | OpenImageIO::ImageSpec spec(image->width, image->height, 3, OpenImageIO::TypeDesc::UINT8); 38 | out->open(path, spec); 39 | out->write_image(OpenImageIO::TypeDesc::UINT8, &(im[0])); 40 | out->close(); 41 | } 42 | } 43 | 44 | 45 | bool Renderer::render(int thread_count) { 46 | Timer<> timer; // Start timer 47 | 48 | // Clear rendering statistics 49 | Global::Stats::clear(); 50 | 51 | RNG rng; 52 | std::unique_ptr image {new Film(res_x, res_y, 53 | -1.0, -((static_cast(res_y))/res_x), 54 | 1.0, ((static_cast(res_y))/res_x)) 55 | }; 56 | image->si_x1 = subimage_x1; 57 | image->si_y1 = subimage_y1; 58 | image->si_x2 = subimage_x2; 59 | image->si_y2 = subimage_y2; 60 | 61 | // Save blank image before rendering 62 | write_png_from_film(image.get(), output_path, 0.0f); 63 | 64 | // Image writer callback 65 | std::function image_writer = std::bind(write_png_from_film, image.get(), output_path, 10.0); 66 | 67 | PathTraceIntegrator integrator(scene.get(), image.get(), spp, spp_max, variance_max, seed, thread_count, image_writer); 68 | 69 | std::cout << "Integrator prep time (seconds): " << timer.time() << std::endl; 70 | timer.reset(); 71 | 72 | std::cout << "Rendering" << std::flush; 73 | integrator.integrate(); 74 | std::cout << std::endl; 75 | 76 | 77 | // Save image 78 | write_png_from_film(image.get(), output_path, 0.0f); 79 | 80 | #if 0 81 | // Print statistics 82 | 83 | std::cout << "Rays shot while rendering: " << Global::Stats::rays_shot << std::endl; 84 | #ifdef GLOBAL_STATS_TOP_LEVEL_BVH_NODE_TESTS 85 | std::cout << "Top-level BVH node tests: " << Global::Stats::top_level_bvh_node_tests << std::endl; 86 | #endif 87 | std::cout << "Primitive-ray tests during rendering: " << Global::Stats::primitive_ray_tests << std::endl; 88 | std::cout << "Splits during rendering: " << Global::Stats::split_count << std::endl; 89 | std::cout << "MicroSurface cache misses during rendering: " << Global::Stats::cache_misses << std::endl; 90 | std::cout << "NaN's encountered: " << Global::Stats::nan_count << std::endl; 91 | std::cout << "Bad Inf's encountered: " << Global::Stats::inf_count << std::endl; 92 | #endif 93 | 94 | std::cout << "Render time (seconds): " << timer.time() << std::endl; 95 | 96 | 97 | // Finished 98 | return true; 99 | } 100 | -------------------------------------------------------------------------------- /renderer/renderer.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file and renderer.cpp define a Renderer class, which serves as 3 | * as the API for setting up, running, and controlling a render. 4 | */ 5 | #ifndef RENDERER_HPP 6 | #define RENDERER_HPP 7 | 8 | #include "numtype.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "scene.hpp" 15 | 16 | /** 17 | * @brief Manages a render. 18 | * 19 | * The Renderer is responsible for doing the actual rendering. It is given 20 | * 3d scene that has already been setup, and it dives in and tears it 21 | * to pieces. The result is an image or images. 22 | * 23 | * The Renderer is responsible for knowing: 24 | * - Where to output the render result (e.g. to a file, or files, to 25 | * another process...) 26 | * - What "passes" to output (light path expressions) and in what format. 27 | * - What resolution to render with. 28 | * - How to manage resources during rendering (number of threads to use, RAM 29 | * usage limits, max grid size, bucket size, ray buffer size...) 30 | * - Render quality settings (number of samples, adaptive sampling settings, 31 | * dicing rate, color clamping...). 32 | * 33 | * Essentially, anything that is not part of the scene description is entirely 34 | * the responsibility of the renderer. 35 | */ 36 | class Renderer { 37 | private: 38 | uint res_x, res_y; 39 | uint subimage_x1, subimage_y1, subimage_x2, subimage_y2; 40 | uint spp; 41 | uint spp_max; 42 | float variance_max; 43 | uint seed; 44 | std::string output_path; 45 | 46 | public: 47 | std::unique_ptr scene; 48 | 49 | Renderer(Scene *scene, uint res_x, uint res_y, uint spp, uint spp_max, float variance_max, uint seed, std::string output_path): 50 | res_x {res_x}, 51 | res_y {res_y}, 52 | subimage_x1 {0}, subimage_y1 {0}, subimage_x2 {res_x}, subimage_y2 {res_y}, 53 | spp {spp}, 54 | spp_max {spp_max}, 55 | variance_max {variance_max}, 56 | seed {seed}, 57 | output_path {output_path}, 58 | scene {scene} 59 | {} 60 | 61 | void set_resolution(int res_x_, int res_y_) { 62 | res_x = res_x_; 63 | res_y = res_y_; 64 | } 65 | 66 | void set_subimage(int subimage_x1_, int subimage_y1_, int subimage_x2_, int subimage_y2_) { 67 | subimage_x1 = subimage_x1_; 68 | subimage_y1 = subimage_y1_; 69 | subimage_x2 = subimage_x2_; 70 | subimage_y2 = subimage_y2_; 71 | } 72 | 73 | void set_spp(int spp_) { 74 | spp = spp_; 75 | } 76 | 77 | void set_spp_max(int spp_max_) { 78 | spp_max = spp_max_; 79 | } 80 | 81 | void set_variance_max(float variance_max_) { 82 | variance_max = variance_max_; 83 | } 84 | 85 | // Starts a render with the given number of threads. 86 | bool render(int thread_count=1); 87 | }; 88 | 89 | 90 | 91 | #endif // RENDERER_HPP 92 | 93 | -------------------------------------------------------------------------------- /sampling/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(sampling 2 | image_sampler sobol halton) 3 | -------------------------------------------------------------------------------- /sampling/halton.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 Leonhard Gruenschloss (leonhard@gruenschloss.org) 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights to 6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | // of the Software, and to permit persons to whom the Software is furnished to do 8 | // so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | // SOFTWARE. 20 | 21 | #ifndef HALTON_HPP 22 | #define HALTON_HPP 23 | 24 | #include "numtype.h" 25 | 26 | namespace Halton { 27 | 28 | uint32_t max_dimension(); 29 | 30 | float sample(const uint32_t dimension, const uint32_t index); 31 | 32 | } 33 | 34 | #endif // HALTON_HPP 35 | 36 | -------------------------------------------------------------------------------- /sampling/image_sampler.cpp: -------------------------------------------------------------------------------- 1 | #include "numtype.h" 2 | 3 | #include "halton.hpp" 4 | #include "rng.hpp" 5 | #include "image_sampler.hpp" 6 | #include "hilbert.hpp" 7 | #include "morton.hpp" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | 17 | ImageSampler::ImageSampler(uint spp, 18 | uint res_x, uint res_y, 19 | uint seed): 20 | spp {spp}, res_x {res_x}, res_y {res_y}, rng {seed}, hash {seed}, seed {seed} { 21 | 22 | x = 0; 23 | y = 0; 24 | s = 0; 25 | 26 | samp_taken = 0; 27 | tot_samp = spp * res_x * res_y; 28 | 29 | // Determine square power of two resolution to cover entire image 30 | uint dim = res_x > res_y ? res_x : res_y; 31 | uint curve_order = 1; 32 | curve_res = 2; 33 | while (curve_res < dim) { 34 | curve_res <<= 1; 35 | curve_order++; 36 | } 37 | points_traversed = 0; 38 | } 39 | 40 | 41 | ImageSampler::~ImageSampler() { 42 | } 43 | 44 | 45 | void ImageSampler::get_sample(uint32_t x, uint32_t y, uint32_t d, uint32_t ns, float *sample, uint16_t *coords) { 46 | if (coords != nullptr) { 47 | coords[0] = x; 48 | coords[1] = y; 49 | } 50 | 51 | static const std::array d_order {{7, 6, 5, 4, 2, 9, 8, 3, 1, 0}}; // Reorder the first several dimensions for least image variance 52 | 53 | // Hash the x and y indices of the pixel and use that as an offset 54 | // into the LDS sequence. This gives the image a more random appearance 55 | // before converging, which is less distracting than the LDS patterns. 56 | // But since within each pixel the samples are contiguous LDS sequences 57 | // this still gives very good convergence properties. 58 | // This also means that each pixel can keep drawing samples in a 59 | // "bottomless" kind of way, which is nice for e.g. adaptive sampling. 60 | uint32_t h = x ^ ((y >> 16) | (y << 16)); 61 | const uint32_t samp_i = d + hash.get_int(h); 62 | 63 | // Generate the sample 64 | size_t i = 0; 65 | for (; i < ns && i < d_order.size(); ++i) 66 | sample[i] = Halton::sample(d_order[i], samp_i); 67 | for (; i < ns; ++i) 68 | sample[i] = Halton::sample(i, samp_i); 69 | } 70 | 71 | 72 | /** 73 | * @brief Itteratively produces samples for an image. 74 | * 75 | * It provides x, y, u, v, and t coordinates always. 76 | * On top of that, additional coordinates can be requested via the ns 77 | * parameter. 78 | * 79 | * @param[out] sample A pointer where the sample is stored. 80 | * @param ns The number of additional coordinates to provide. 81 | */ 82 | //#define PROGRESSIVE_SAMPLING 83 | #ifndef PROGRESSIVE_SAMPLING 84 | bool ImageSampler::get_next_sample(uint32_t ns, float *sample, uint16_t *coords) { 85 | //std::cout << s << " " << x << " " << y << std::endl; 86 | // Check if we're done 87 | if (points_traversed >= (curve_res*curve_res)) 88 | return false; 89 | 90 | get_sample(x, y, s, ns, sample, coords); 91 | 92 | // increment to next sample 93 | samp_taken++; 94 | s++; 95 | if (s >= spp) { 96 | s = 0; 97 | 98 | // Space-filling curve traverses pixels 99 | do { 100 | Morton::d2xy(points_traversed, &x, &y); 101 | points_traversed++; 102 | if (points_traversed >= (curve_res*curve_res)) 103 | return false; 104 | } while (x >= res_x || y >= res_y); 105 | } 106 | 107 | return true; 108 | } 109 | #else 110 | bool ImageSampler::get_next_sample(uint32_t ns, float *sample, uint16_t *coords) { 111 | //std::cout << s << " " << x << " " << y << std::endl; 112 | // Check if we're done 113 | if (points_traversed >= (curve_res*curve_res) && s >= spp) 114 | return false; 115 | 116 | get_sample(x, y, s, ns, sample, coords); 117 | 118 | samp_taken++; 119 | 120 | // Space-filling curve traverses pixels 121 | do { 122 | Morton::d2xy(points_traversed, &x, &y); 123 | points_traversed++; 124 | if (points_traversed >= (curve_res*curve_res)) { 125 | x = y = points_traversed = 0; 126 | 127 | // increment to next sample 128 | s++; 129 | if (s >= spp) 130 | return false; 131 | } 132 | } while (x >= res_x || y >= res_y); 133 | 134 | 135 | return true; 136 | } 137 | #endif 138 | -------------------------------------------------------------------------------- /sampling/image_sampler.hpp: -------------------------------------------------------------------------------- 1 | #ifndef IMAGE_SAMPLER_HPP 2 | #define IMAGE_SAMPLER_HPP 3 | 4 | #include "numtype.h" 5 | 6 | #include "halton.hpp" 7 | #include "rng.hpp" 8 | #include "hash.hpp" 9 | #include 10 | 11 | 12 | 13 | /** 14 | * A sampler for a single "item" which requires a multi-dimensional sample. 15 | */ 16 | struct Sampler { 17 | uint32_t offset; 18 | uint32_t dim = 0; 19 | 20 | Sampler(): offset {0} {} 21 | Sampler(uint32_t x, uint32_t y, uint32_t n, uint32_t seed) { 22 | offset = hash_u32(x ^ ((y >> 16) | (y << 16)), seed) + n; 23 | } 24 | 25 | float get_sample(const uint32_t dimension) const { 26 | static const std::array d_order {{10, 7, 6, 5, 4, 2, 9, 8, 3, 1, 0}}; // Reorder the first several dimensions for least image variance 27 | 28 | if (dimension < d_order.size()) { 29 | return Halton::sample(d_order[dimension], offset); 30 | } else { 31 | return Halton::sample(dimension, offset); 32 | } 33 | } 34 | 35 | float next() { 36 | return get_sample(dim++); 37 | } 38 | }; 39 | 40 | 41 | /* 42 | * An image sampler. Returns samples for use by the renderer. 43 | * Image plane samples are returned on the [0,1] square, + edge buffer for filtering. 44 | * Lens samples are returned on the [0,1) square. 45 | * Time samples are returned on the [0,1) line. 46 | * All 1d, 2d, and 3d samples are returned on the [0,1) line, square, 47 | * and cube respectively. 48 | * The renderer is expected to transform sample ranges as necessary. 49 | */ 50 | class ImageSampler { 51 | private: 52 | /* General settings. */ 53 | uint spp; // Approximate number of samples per pixel 54 | uint res_x, res_y; // Image resolution in pixels 55 | 56 | /* State information. */ 57 | uint curve_res; // Space filling curve resolution 58 | uint points_traversed; 59 | uint32_t x, y, s; 60 | 61 | /* For reporting percentages. */ 62 | uint samp_taken; 63 | uint tot_samp; 64 | 65 | /* Random number generator. */ 66 | RNG rng; 67 | Hash hash; 68 | uint32_t seed; 69 | 70 | public: 71 | ImageSampler(uint spp, 72 | uint res_x, uint res_y, 73 | uint seed=0); 74 | ~ImageSampler(); 75 | 76 | void init_tile(); 77 | Sampler get_single_sampler(uint32_t x, uint32_t y, uint32_t i) { 78 | return Sampler(x, y, i, seed); 79 | } 80 | void get_sample(uint32_t x, uint32_t y, uint32_t d, uint32_t ns, float *sample, uint16_t *coords=nullptr); 81 | bool get_next_sample(uint32_t ns, float *sample, uint16_t *coords=nullptr); 82 | 83 | float percentage() const { 84 | return ((float)(samp_taken)) / tot_samp; 85 | } 86 | }; 87 | 88 | 89 | 90 | #endif 91 | -------------------------------------------------------------------------------- /sampling/samples.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLES_HPP 2 | #define SAMPLES_HPP 3 | 4 | 5 | 6 | #endif // SAMPLES_HPP -------------------------------------------------------------------------------- /sampling/sobol.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 Leonhard Gruenschloss (leonhard@gruenschloss.org) 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights to 6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | // of the Software, and to permit persons to whom the Software is furnished to do 8 | // so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | // SOFTWARE. 20 | 21 | #ifndef SOBOL_HPP 22 | #define SOBOL_HPP 23 | 24 | #include 25 | 26 | namespace Sobol { 27 | 28 | struct Matrices { 29 | static const unsigned num_dimensions = 1024; 30 | static const unsigned size = 52; 31 | static const unsigned long long matrices[]; 32 | }; 33 | 34 | // Compute one component of the Sobol'-sequence, where the component 35 | // corresponds to the dimension parameter, and the index specifies 36 | // the point inside the sequence. The scramble parameter can be used 37 | // to permute elementary intervals, and might be chosen randomly to 38 | // generate a randomized QMC sequence. Only the Matrices::size least 39 | // significant bits of the scramble value are used. 40 | inline double sample( 41 | const unsigned dimension, 42 | unsigned long long index, 43 | const unsigned long long scramble = 0ULL) { 44 | assert(dimension < Matrices::num_dimensions); 45 | 46 | unsigned long long result = scramble & ~-(1ULL << Matrices::size); 47 | for (unsigned i = dimension * Matrices::size; index; index >>= 1, ++i) { 48 | if (index & 1) 49 | result ^= Matrices::matrices[i]; 50 | } 51 | 52 | return result * (1.0 / (1ULL << Matrices::size)); 53 | } 54 | 55 | } // namespace sobol 56 | 57 | #endif 58 | 59 | -------------------------------------------------------------------------------- /scene/scene.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file and scene.cpp define a Scene class, which is used to build and 3 | * store a scene description to be rendered. 4 | */ 5 | #ifndef SCENE_HPP 6 | #define SCENE_HPP 7 | 8 | #include "numtype.h" 9 | 10 | #include "global.hpp" 11 | #include "camera.hpp" 12 | #include "bvh.hpp" 13 | #include "light_array.hpp" 14 | #include "light_tree.hpp" 15 | #include "assembly.hpp" 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | 25 | 26 | 27 | /** 28 | * @brief A 3D scene for rendering. 29 | * 30 | * The Scene class is used to build and store the complete description of a 3d 31 | * scene to be rendered. 32 | */ 33 | struct Scene { 34 | std::string name; 35 | 36 | Color background_color; 37 | 38 | std::unique_ptr camera; // The camera of the scene 39 | 40 | std::unique_ptr root; // The root assembly of the scene 41 | 42 | 43 | Scene() { 44 | background_color = Color(0.0f, 0.0f, 0.0f); 45 | root = std::unique_ptr(new Assembly()); 46 | } 47 | 48 | 49 | // Finalizes the scene for rendering 50 | void finalize() { 51 | root->finalize(); 52 | } 53 | }; 54 | 55 | #endif // SCENE_H 56 | -------------------------------------------------------------------------------- /shading/closure_union.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CLOSURE_UNION_HPP 2 | #define CLOSURE_UNION_HPP 3 | 4 | #include 5 | #include "surface_closure.hpp" 6 | 7 | /** 8 | * A structure that uses type erasure to contain any surface closure. 9 | * 10 | * init() should be used to initialize the structure from a surface closure 11 | * of some kind. 12 | * 13 | * get() should be used to utilize the contained closure via the returned 14 | * SurfaceClosure pointer. 15 | */ 16 | struct SurfaceClosureUnion { 17 | // The following should always be the size and alignment of the 18 | // largest and largest-aligning surface closure, respectively. 19 | alignas(GTRClosure) char data[sizeof(GTRClosure)]; 20 | 21 | /** 22 | * Properly initialize the struct from any surface closure. 23 | */ 24 | template 25 | void init(CLOSURE_TYPE closure) { 26 | static_assert(std::is_base_of::value, "CLOSURE_TYPE is not derived from SurfaceClosure."); 27 | new(reinterpret_cast(data)) CLOSURE_TYPE(closure); 28 | } 29 | 30 | /** 31 | * Return a pointer to the underlying SurfaceClosure. 32 | */ 33 | SurfaceClosure* get() { 34 | return reinterpret_cast(data); 35 | } 36 | 37 | /** 38 | * Return a pointer to the underlying SurfaceClosure. 39 | */ 40 | const SurfaceClosure* get() const { 41 | return reinterpret_cast(data); 42 | } 43 | }; 44 | 45 | #endif // CLOSURE_UNION_HPP -------------------------------------------------------------------------------- /shading/displacement_shader.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DISPLACEMENT_SHADER_HPP 2 | #define DISPLACEMENT_SHADER_HPP 3 | 4 | class DisplacementShader { 5 | ~DisplacementShader() {} 6 | 7 | /** 8 | * @brief Evaluates the displacement shader for the given surface 9 | * parameters. 10 | * 11 | * TODO: differential geometry as input. 12 | * TODO: surface normal and normal differentials in output. 13 | * 14 | * @param u Surface U parameter. 15 | * @param v Surface V parameter. 16 | * @param id Surface id number. 17 | * 18 | * @return A BBox, with min an max displacement coordinates 19 | */ 20 | virtual BBox evaluate(float32 u, float32 v, uint_i id) = 0; 21 | }; 22 | 23 | #endif // DISPLACEMENT_SHADER_HPP -------------------------------------------------------------------------------- /shading/surface_shader.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SURFACE_SHADER_HPP 2 | #define SURFACE_SHADER_HPP 3 | 4 | #include "numtype.h" 5 | 6 | #include "intersection.hpp" 7 | #include "surface_closure.hpp" 8 | #include "closure_union.hpp" 9 | 10 | class SurfaceShader { 11 | public: 12 | virtual ~SurfaceShader() {} 13 | 14 | /** 15 | * @brief Calculates the SurfaceClosure(s) and their pdfs for the given 16 | * intersection. 17 | * 18 | * @param inter The surface intersection data. This is an in/out parameter: 19 | * the geometry, transform, ray data, etc. is 'in' and the 20 | * closure data is 'out'. 21 | * 22 | * @returns True on success, false on failure. 23 | */ 24 | virtual bool shade(Intersection* inter) const = 0; 25 | }; 26 | 27 | 28 | class EmitShader: public SurfaceShader { 29 | public: 30 | Color col; 31 | 32 | EmitShader(Color col): col {col} {} 33 | 34 | virtual bool shade(Intersection* inter) const override final { 35 | inter->surface_closure.init(EmitClosure(col)); 36 | inter->closure_prob = 1.0f; 37 | return true; 38 | } 39 | }; 40 | 41 | 42 | class LambertShader: public SurfaceShader { 43 | public: 44 | Color col; 45 | 46 | LambertShader(Color col): col {col} {} 47 | 48 | virtual bool shade(Intersection* inter) const override final { 49 | inter->surface_closure.init(LambertClosure(col)); 50 | inter->closure_prob = 1.0f; 51 | return true; 52 | } 53 | }; 54 | 55 | 56 | class GTRShader: public SurfaceShader { 57 | public: 58 | Color col; 59 | float roughness; 60 | float tail_shape; 61 | float fresnel; 62 | 63 | GTRShader(Color col, float roughness, float tail_shape, float fresnel): col {col}, roughness {roughness}, tail_shape {tail_shape}, fresnel {fresnel} 64 | {} 65 | 66 | virtual bool shade(Intersection* inter) const override final { 67 | inter->surface_closure.init(GTRClosure(col, roughness, tail_shape, fresnel)); 68 | inter->closure_prob = 1.0f; 69 | return true; 70 | } 71 | }; 72 | 73 | #endif // SURFACE_SHADER_HPP -------------------------------------------------------------------------------- /test/test.hpp: -------------------------------------------------------------------------------- 1 | #ifndef TEST_HPP 2 | #define TEST_HPP 3 | 4 | #include "catch.hpp" 5 | 6 | #endif // TEST_HPP 7 | 8 | -------------------------------------------------------------------------------- /test/test_float.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | 3 | #include 4 | #include 5 | 6 | 7 | /* 8 | ************************************************************************ 9 | * Testing suite for floating point values. 10 | ************************************************************************ 11 | */ 12 | 13 | TEST_CASE("float") { 14 | SECTION("inf_1") { 15 | float yar = std::numeric_limits::infinity(); 16 | float foo = -std::numeric_limits::infinity(); 17 | 18 | REQUIRE(std::isinf(yar)); 19 | REQUIRE(std::isinf(foo)); 20 | } 21 | 22 | SECTION("inf_2") { 23 | float yar1 = 1.0f / 0.0f; 24 | float yar2 = 1.0f / -0.0f; 25 | float foo1 = -1.0f / 0.0f; 26 | float foo2 = -1.0f / -0.0f; 27 | 28 | REQUIRE(std::isinf(yar1)); 29 | REQUIRE(std::isinf(yar2)); 30 | REQUIRE(std::isinf(foo1)); 31 | REQUIRE(std::isinf(foo2)); 32 | } 33 | 34 | 35 | SECTION("nan_1") { 36 | float yar = std::numeric_limits::quiet_NaN(); 37 | float foo = std::numeric_limits::signaling_NaN(); 38 | 39 | REQUIRE(std::isnan(yar)); 40 | REQUIRE(std::isnan(foo)); 41 | REQUIRE(yar != yar); 42 | REQUIRE(foo != foo); 43 | } 44 | 45 | SECTION("nan_2") { 46 | float yar1 = 0.0f / 0.0f; 47 | float yar2 = 0.0f / -0.0f; 48 | float yar3 = -0.0f / 0.0f; 49 | float yar4 = -0.0f / -0.0f; 50 | 51 | REQUIRE(std::isnan(yar1)); 52 | REQUIRE(std::isnan(yar2)); 53 | REQUIRE(std::isnan(yar3)); 54 | REQUIRE(std::isnan(yar4)); 55 | } 56 | 57 | SECTION("nan_3") { 58 | float yar1 = 1.0f + std::numeric_limits::quiet_NaN(); 59 | float yar2 = 1.0f - std::numeric_limits::quiet_NaN(); 60 | float yar3 = 1.0f * std::numeric_limits::quiet_NaN(); 61 | float yar4 = 1.0f / std::numeric_limits::quiet_NaN(); 62 | 63 | REQUIRE(std::isnan(yar1)); 64 | REQUIRE(std::isnan(yar2)); 65 | REQUIRE(std::isnan(yar3)); 66 | REQUIRE(std::isnan(yar4)); 67 | } 68 | } 69 | 70 | 71 | -------------------------------------------------------------------------------- /test/test_main.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_MAIN 2 | #include "catch.hpp" -------------------------------------------------------------------------------- /tracer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(tracer 2 | tracer) 3 | -------------------------------------------------------------------------------- /tracer/potentialinter.hpp: -------------------------------------------------------------------------------- 1 | #ifndef POTENTIALINTER_HPP 2 | #define POTENTIALINTER_HPP 3 | 4 | #include "numtype.h" 5 | 6 | /** 7 | * @brief Records information about a potential intersection with an object. 8 | */ 9 | struct PotentialInter { 10 | size_t object_id; 11 | size_t ray_index; 12 | float nearest_hit_t; // The nearest possible hit distance along the ray 13 | bool valid; // The potential intersection data is filled and valid 14 | uint8_t tag; // Used for misc purposes 15 | 16 | bool operator<(const PotentialInter &b) const { 17 | return object_id < b.object_id; 18 | } 19 | }; 20 | 21 | static bool compare_potint(const PotentialInter &a, const PotentialInter &b) { 22 | // Sort by object id 23 | return a.object_id < b.object_id; 24 | } 25 | 26 | static size_t index_potint(const PotentialInter &a) { 27 | return a.object_id; 28 | } 29 | 30 | #endif // POTENTIALINTER_HPP 31 | -------------------------------------------------------------------------------- /tracer/tracer.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file and tracer.cpp define a Tracer class, which manages the tracing 3 | * of rays in a scene. 4 | */ 5 | #ifndef TRACER_HPP 6 | #define TRACER_HPP 7 | 8 | #include 9 | 10 | #include "numtype.h" 11 | #include "range.hpp" 12 | #include "rng.hpp" 13 | #include "stack.hpp" 14 | 15 | #include "instance_id.hpp" 16 | #include "ray.hpp" 17 | #include "intersection.hpp" 18 | #include "potentialinter.hpp" 19 | #include "scene.hpp" 20 | 21 | 22 | /** 23 | * @brief Traces rays in a scene. 24 | * 25 | * The Tracer is responsible for doing the actual ray-tracing in a scene. 26 | * It does _not_ manage the specific integration algorithm, or shading. Only 27 | * the tracing of rays and calculating the relevant information about ray 28 | * hits. 29 | * 30 | * It is specifically designed to handle tracing a large number of rays 31 | * (ideally > a million, as ram allows) simultaneously to gain efficiency 32 | * in various ways. The rays do not need to be related to each other or 33 | * coherent in any way. 34 | * 35 | * It is, of course, also capable of tracing a single ray at a time or a small 36 | * number of rays at a time if necessary. But doing so may be far less 37 | * efficient depending on the scene. 38 | * 39 | * The simplest usage is to add a bunch of rays to the Tracer's queue with 40 | * queue_rays(), and then trace them all by calling trace_rays(). The 41 | * resulting intersection data is stored in the rays' data structures directly. 42 | * Wash, rinse, repeat. 43 | */ 44 | class Tracer { 45 | public: 46 | Scene *scene; 47 | Range w_rays; // Rays to trace 48 | Range intersections; // Resulting intersections 49 | std::vector rays; 50 | RNG rng; 51 | std::vector surface_shader_stack; 52 | Stack xform_stack; // Stack for transforms as we traverse into transform hierarchies 53 | Stack data_stack; // Stack for arbitrary POD data, passed to other functions 54 | InstanceID element_id; 55 | int element_id_pos = 0; 56 | 57 | Tracer(): xform_stack(16*4*256*64, 256), data_stack(1024*1024*8, 256) { 58 | surface_shader_stack.reserve(64); 59 | } 60 | 61 | Tracer(Scene *scene_): scene {scene_}, xform_stack(16*4*256*64, 256), data_stack(1024*1024*8, 256) { 62 | surface_shader_stack.reserve(64); 63 | } 64 | 65 | void set_seed(uint32_t seed) { 66 | rng.seed(seed); 67 | } 68 | 69 | 70 | /** 71 | * Traces the provided rays, filling in the corresponding intersections. 72 | * 73 | * @param [in] rays_ The rays to be traced. 74 | * @param [out] intersections_ The resulting intersections. 75 | */ 76 | uint32_t trace(const WorldRay* w_rays_begin, const WorldRay* w_rays_end, Intersection* intersections_begin, Intersection* intersections_end); 77 | 78 | private: 79 | // Various methods for tracing different object types 80 | void trace_assembly(Assembly* assembly, Ray* rays, Ray* rays_end); 81 | void trace_surface(Surface* surface, Ray* rays, Ray* end); 82 | void trace_complex_surface(ComplexSurface* surface, Ray* rays, Ray* end); 83 | void trace_patch_surface(PatchSurface* surface, Ray* rays, Ray* end); 84 | void trace_lightsource(Light* light, Ray* rays, Ray* end); 85 | }; 86 | 87 | #endif // TRACER_HPP 88 | 89 | -------------------------------------------------------------------------------- /utils/bit_stack.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BIT_STACK_HPP 2 | #define BIT_STACK_HPP 3 | 4 | #include 5 | 6 | /** 7 | * A bit field for use as a stack of boolean values, with 8 | * push, pop, and peek operations. 9 | */ 10 | template 11 | class BitStack { 12 | enum { 13 | NUM_BITS = sizeof(INT_TYPE) * 8 14 | }; 15 | 16 | INT_TYPE bits = 0; 17 | 18 | public: 19 | BitStack() {} 20 | BitStack(INT_TYPE i): bits {i} {} 21 | 22 | /** 23 | * Push a bit onto the top of the stack. 24 | */ 25 | void push(bool value) { 26 | assert(bits >> (NUM_BITS-1) == 0); // Verify no stack overflow 27 | bits <<= 1; 28 | bits |= static_cast(value); 29 | } 30 | 31 | /** 32 | * Push n bits onto the top of the stack. The input 33 | * bits are passed as an integer, with the bit that 34 | * will be on top in the least significant digit, and 35 | * the rest following in order from there. 36 | */ 37 | void push(uint32_t value, int n) { 38 | assert(n < NUM_BITS && (bits >> (NUM_BITS-n)) == 0); // Verify no stack overflow 39 | assert(n < 32); // Verify staying within input size 40 | bits <<= n; 41 | bits |= value & ((1<>= 1; 50 | return b; 51 | } 52 | 53 | /** 54 | * Pop the top n bits off the stack. The bits are returned as 55 | * an integer, with the top bit in the least significant digit, 56 | * and the rest following in order from there. 57 | */ 58 | uint32_t pop(int n) { 59 | assert(n < NUM_BITS); // Can't pop more than we have 60 | assert(n < 32); // Can't pop more than the return type can hold 61 | const uint32_t b = static_cast(bits) & ((1<>= n; 63 | return b; 64 | } 65 | 66 | /** 67 | * Read the top bit of the stack without popping it. 68 | */ 69 | bool peek() const { 70 | return bits & 1; 71 | } 72 | 73 | /** 74 | * Read the top n bits of the stack without popping them. The bits 75 | * are returned as an integer, with the top bit in the least 76 | * significant digit, and the rest following in order from there. 77 | */ 78 | bool peek(int n) const { 79 | assert(n < NUM_BITS); // Can't return more than we have 80 | assert(n < 32); // Can't return more than the return type can hold 81 | return static_cast(bits) & ((1<> pos) & 1; 90 | } 91 | }; 92 | 93 | 94 | /** 95 | * A bit field for use as a stack of boolean values, with 96 | * push, pop, and peek operations. Uses two integer types 97 | * to give twice the stack size at BitStack. 98 | */ 99 | template 100 | class BitStack2 { 101 | enum { 102 | INT_SIZE = sizeof(INT_TYPE) * 8, 103 | NUM_BITS = sizeof(INT_TYPE) * 16 104 | }; 105 | 106 | INT_TYPE bits1, bits2; 107 | 108 | public: 109 | BitStack2() {} 110 | BitStack2(INT_TYPE i): bits1 {i} {} 111 | 112 | /** 113 | * Push a bit onto the top of the stack. 114 | */ 115 | void push(bool value) { 116 | assert(bits2 >> (INT_SIZE-1) == 0); // Verify no stack overflow 117 | bits2 = (bits2 << 1) | (bits1 >> (INT_SIZE-1)); 118 | bits1 <<= 1; 119 | bits1 |= static_cast(value); 120 | } 121 | 122 | /** 123 | * Push n bits onto the top of the stack. The input 124 | * bits are passed as an integer, with the bit that 125 | * will be on top in the least significant digit, and 126 | * the rest following in order from there. 127 | */ 128 | void push(uint32_t value, int n) { 129 | assert(n < NUM_BITS && (bits2 >> (INT_SIZE-n)) == 0); // Verify no stack overflow 130 | assert(n < 32); // Verify staying within input size 131 | bits2 = (bits2 << n) | (bits1 >> (INT_SIZE-n)); 132 | bits1 <<= n; 133 | bits1 |= value & ((1<> 1) | (bits2 << (INT_SIZE-1)); 142 | bits2 >>= 1; 143 | return b; 144 | } 145 | 146 | /** 147 | * Pop the top n bits off the stack. The bits are returned as 148 | * an integer, with the top bit in the least significant digit, 149 | * and the rest following in order from there. 150 | */ 151 | uint32_t pop(int n) { 152 | assert(n < NUM_BITS); // Can't pop more than we have 153 | assert(n < 32); // Can't pop more than the return type can hold 154 | const uint32_t b = static_cast(bits1) & ((1<> n) | (bits2 << (INT_SIZE-n)); 156 | bits2 >>= n; 157 | return b; 158 | } 159 | 160 | /** 161 | * Read the top bit of the stack without popping it. 162 | */ 163 | bool peek() const { 164 | return bits1 & 1; 165 | } 166 | 167 | /** 168 | * Read the top n bits of the stack without popping them. The bits 169 | * are returned as an integer, with the top bit in the least 170 | * significant digit, and the rest following in order from there. 171 | */ 172 | bool peek(int n) const { 173 | assert(n < NUM_BITS); // Can't return more than we have 174 | assert(n < 32); // Can't return more than the return type can hold 175 | return static_cast(bits1) & ((1<> pos) & 1; 184 | } 185 | }; 186 | 187 | 188 | 189 | 190 | #endif // BIT_STACK_HPP -------------------------------------------------------------------------------- /utils/blocked_array.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BLOCKED_ARRAY_HPP 2 | #define BLOCKED_ARRAY_HPP 3 | 4 | #include 5 | #include 6 | 7 | #include "morton.hpp" 8 | 9 | /** 10 | * @brief A 2d array optimized for cache coherency. 11 | */ 12 | template 13 | class BlockedArray { 14 | private: 15 | uint32_t block_size {1 << LOG_BLOCK_SIZE}; 16 | uint32_t block_mask {block_size - 1}; 17 | uint32_t u_blocks {0}; 18 | uint32_t v_blocks {0}; 19 | uint32_t block_elements {block_size * block_size}; 20 | 21 | std::vector data {}; 22 | 23 | public: 24 | uint32_t width {0}; 25 | uint32_t height {0}; 26 | 27 | BlockedArray() {} 28 | 29 | BlockedArray(uint32_t w, uint32_t h) { 30 | init(w, h); 31 | 32 | } 33 | 34 | void init(uint32_t w, uint32_t h) { 35 | width = w; 36 | height = h; 37 | 38 | // Round width and height up to the nearest multiple of block_size 39 | if (width % block_size) 40 | width = width - (width % block_size) + block_size; 41 | if (height % block_size) 42 | height = height - (height % block_size) + block_size; 43 | 44 | // Calculate the number of blocks in the horizontal direction 45 | u_blocks = width >> LOG_BLOCK_SIZE; 46 | 47 | data.resize(width*height); 48 | } 49 | 50 | uint32_t index(uint32_t u, uint32_t v) const { 51 | // Find the start of the block 52 | const uint32_t bu = u >> LOG_BLOCK_SIZE; 53 | const uint32_t bv = v >> LOG_BLOCK_SIZE; 54 | const uint32_t i1 = block_elements * ((bv * u_blocks) + bu); 55 | 56 | // Find the index within the block 57 | u &= block_mask; 58 | v &= block_mask; 59 | const uint32_t i2 = Morton::xy2d(u, v); 60 | 61 | return i1 + i2; 62 | } 63 | 64 | // Element addressing 65 | T &operator()(uint32_t u, uint32_t v) { 66 | return data[index(u, v)]; 67 | } 68 | 69 | const T &operator()(uint32_t u, uint32_t v) const { 70 | return data[index(u, v)]; 71 | } 72 | 73 | }; 74 | 75 | #endif // BLOCKED_ARRAY_HPP 76 | -------------------------------------------------------------------------------- /utils/blocked_array_disk_cache.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BLOCKED_ARRAY_DISK_CACHE_HPP 2 | #define BLOCKED_ARRAY_DISK_CACHE_HPP 3 | 4 | #include 5 | #include 6 | 7 | #include "morton.hpp" 8 | #include "disk_cache.hpp" 9 | 10 | #define BLOCK_CACHE_SIZE 64 11 | 12 | /** 13 | * @brief A 2d array optimized for cache coherency, and which pages 14 | * large data to disk. 15 | * 16 | * TODO: This class is currently NOT thread safe, even for reading. This 17 | * should be addressed in the DiskCache class eventually. 18 | * 19 | */ 20 | template 21 | class BlockedArrayDiskCache { 22 | private: 23 | uint32_t block_size {1 << LOG_BLOCK_SIZE}; 24 | uint32_t block_mask {block_size - 1}; 25 | uint32_t u_blocks {0}; 26 | uint32_t v_blocks {0}; 27 | uint32_t block_elements {block_size * block_size}; 28 | 29 | DiskCache::Cache data {}; 30 | 31 | public: 32 | uint32_t width {0}; 33 | uint32_t height {0}; 34 | 35 | BlockedArrayDiskCache() {} 36 | 37 | BlockedArrayDiskCache(uint32_t w, uint32_t h): BlockedArrayDiskCache() { 38 | init(w, h); 39 | } 40 | 41 | void init(uint32_t w, uint32_t h) { 42 | width = w; 43 | height = h; 44 | 45 | // Round width and height up to the nearest multiple of block_size 46 | if (width % block_size) 47 | width = width - (width % block_size) + block_size; 48 | if (height % block_size) 49 | height = height - (height % block_size) + block_size; 50 | 51 | // Calculate the number of blocks in the horizontal direction 52 | u_blocks = width >> LOG_BLOCK_SIZE; 53 | 54 | data.init(width*height, BLOCK_CACHE_SIZE); 55 | } 56 | 57 | uint32_t index(uint32_t u, uint32_t v) const { 58 | // Find the start of the block 59 | const uint32_t bu = u >> LOG_BLOCK_SIZE; 60 | const uint32_t bv = v >> LOG_BLOCK_SIZE; 61 | const uint32_t i1 = block_elements * ((bv * u_blocks) + bu); 62 | 63 | // Find the index within the block 64 | u &= block_mask; 65 | v &= block_mask; 66 | const uint32_t i2 = Morton::xy2d(u, v); 67 | 68 | return i1 + i2; 69 | } 70 | 71 | // Element addressing 72 | T &operator()(uint32_t u, uint32_t v) { 73 | return data[index(u, v)]; 74 | } 75 | 76 | const T &operator()(uint32_t u, uint32_t v) const { 77 | return data[index(u, v)]; 78 | } 79 | 80 | }; 81 | 82 | #endif // BLOCKED_ARRAY_DISK_CACHE_HPP 83 | -------------------------------------------------------------------------------- /utils/chunked_array_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | 3 | #include "chunked_array.hpp" 4 | 5 | #define INITIAL_VALUE 123456 6 | 7 | struct MyInt { 8 | int n; 9 | MyInt() { 10 | n = INITIAL_VALUE; 11 | } 12 | }; 13 | 14 | 15 | 16 | TEST_CASE("chunked_array") { 17 | SECTION("constructor_1") { 18 | ChunkedArray ar; 19 | REQUIRE(ar.size() == 0); 20 | } 21 | 22 | SECTION("constructor_2") { 23 | ChunkedArray ar(1013); 24 | REQUIRE(ar.size() == 1013); 25 | } 26 | 27 | SECTION("constructor_3") { 28 | ChunkedArray ar(1013); 29 | 30 | bool eq = true; 31 | for (size_t i = 0; i < ar.size(); i++) 32 | eq = eq && (ar[i].n == INITIAL_VALUE); 33 | 34 | REQUIRE(eq); 35 | } 36 | 37 | SECTION("read_write_1") { 38 | ChunkedArray ar(4011); 39 | 40 | for (size_t i = 0; i < ar.size(); i++) 41 | ar[i] = i; 42 | 43 | bool eq = true; 44 | for (size_t i = 0; i < ar.size(); i++) 45 | eq = eq && (ar[i] == (int)i); 46 | 47 | REQUIRE(eq); 48 | } 49 | 50 | SECTION("resize_1") { 51 | ChunkedArray ar; 52 | ar.resize(1013); 53 | 54 | REQUIRE(ar.size() == 1013); 55 | } 56 | 57 | SECTION("resize_2") { 58 | ChunkedArray ar(12); 59 | ar.resize(1013); 60 | 61 | bool eq = true; 62 | for (size_t i = 0; i < ar.size(); i++) 63 | eq = eq && (ar[i].n == INITIAL_VALUE); 64 | 65 | REQUIRE(eq); 66 | REQUIRE(ar.size() == 1013); 67 | } 68 | 69 | SECTION("resize_3") { 70 | ChunkedArray ar(40000); 71 | ar.resize(1013); 72 | 73 | bool eq = true; 74 | for (size_t i = 0; i < ar.size(); i++) 75 | eq = eq && (ar[i].n == INITIAL_VALUE); 76 | 77 | REQUIRE(eq); 78 | REQUIRE(ar.size() == 1013); 79 | } 80 | 81 | SECTION("resize_4") { 82 | ChunkedArray ar(40000); 83 | ar.resize(0); 84 | ar.resize(6230); 85 | ar.resize(10000); 86 | ar.resize(943); 87 | ar.resize(302853); 88 | ar.resize(0); 89 | ar.resize(1013); 90 | 91 | bool eq = true; 92 | for (size_t i = 0; i < ar.size(); i++) 93 | eq = eq && (ar[i].n == INITIAL_VALUE); 94 | 95 | REQUIRE(eq); 96 | REQUIRE(ar.size() == 1013); 97 | } 98 | 99 | SECTION("iterator_1") { 100 | ChunkedArray ar(1234); 101 | ChunkedArray::iterator it = ar.begin(); 102 | 103 | for (size_t i = 0; i < ar.size(); i++) 104 | it[i] = i; 105 | 106 | bool eq = true; 107 | it = ar.begin(); 108 | for (size_t i = 0; i < ar.size(); i++) 109 | eq = eq && ((size_t)it[i] == i); 110 | 111 | REQUIRE(eq); 112 | } 113 | 114 | SECTION("iterator_2") { 115 | ChunkedArray ar(1234); 116 | ChunkedArray::iterator it = ar.begin() + 23; 117 | 118 | for (size_t i = 23; i < ar.size(); i++) 119 | it[i-23] = i; 120 | 121 | bool eq = true; 122 | it = ar.begin() + 23; 123 | for (size_t i = 23; i < ar.size(); i++) 124 | eq = eq && ((size_t)it[i-23] == i); 125 | 126 | REQUIRE(eq); 127 | } 128 | 129 | SECTION("iterator_3") { 130 | ChunkedArray ar(1234); 131 | ChunkedArray::iterator it = ar.begin() + 23; 132 | 133 | ar[23] = 54321; 134 | 135 | REQUIRE(*it == 54321); 136 | } 137 | 138 | SECTION("iterator_4") { 139 | ChunkedArray ar(1234); 140 | ChunkedArray::iterator it = ar.begin(); 141 | 142 | for (size_t i = 0; i < ar.size(); i++) 143 | ar[i] = i; 144 | 145 | bool eq = true; 146 | for (size_t i = 0; i < ar.size(); i++) { 147 | eq = eq && (ar[i] == *it); 148 | ++it; 149 | } 150 | 151 | REQUIRE(eq); 152 | } 153 | } 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /utils/counting_sort.hpp: -------------------------------------------------------------------------------- 1 | #ifndef COUNTING_SORT_HPP 2 | #define COUNTING_SORT_HPP 3 | 4 | #include 5 | #include 6 | #include "numtype.h" 7 | 8 | namespace CountingSort { 9 | 10 | /** 11 | * @brief Counting sort algorithm. 12 | * 13 | * Works on any array whose items can be sorted as non-negative 14 | * integers (e.g. there are a finite and countable number of possible 15 | * values). However, to be practical the maximum integer can't be 16 | * too absurdly large. 17 | * 18 | * The benefit of counting sort is that it sorts in linear time to the 19 | * length of the array (makes 6*array_length accesses to the data), and 20 | * there is extremely efficient for very large array sizes. 21 | * 22 | * @param list Pointer to the beginning of the array. 23 | * @param list_length Length of the array. 24 | * @param max_items The largest integer that can come out of an item in the array. 25 | * @param indexer Pointer to a function that can turn type T into an integer. 26 | */ 27 | template 28 | bool sort(T *list, size_t list_length, size_t max_items, size_t(*indexer)(const T &)) { 29 | size_t item_counts[max_items]; 30 | for (size_t i = 0; i < max_items; i++) { 31 | item_counts[i] = 0; 32 | } 33 | 34 | // Count the items 35 | for (size_t i = 0; i < list_length; i++) { 36 | item_counts[indexer(list[i])]++; 37 | } 38 | 39 | // Set up start-index array 40 | size_t item_start_indices[max_items]; 41 | size_t running_count = 0; 42 | for (size_t i = 0; i < max_items; i++) { 43 | item_start_indices[i] = running_count; 44 | running_count += item_counts[i]; 45 | } 46 | 47 | // Set up filled-so-far-count array 48 | size_t item_fill_counts[max_items]; 49 | for (size_t i = 0; i < max_items; i++) { 50 | item_fill_counts[i] = 0; 51 | } 52 | 53 | // Sort the list 54 | size_t traversal = 0; 55 | size_t i = 0; 56 | while (i < list_length) { 57 | const size_t index = indexer(list[i]); 58 | const size_t next_place = item_start_indices[index] + item_fill_counts[index]; 59 | 60 | if (i >= item_start_indices[index] && i < next_place) { 61 | i++; 62 | } else { 63 | std::swap(list[i], list[next_place]); 64 | item_fill_counts[index]++; 65 | } 66 | traversal++; 67 | } 68 | 69 | return true; 70 | } 71 | 72 | 73 | } 74 | #endif // COUNTING_SORT_HPP -------------------------------------------------------------------------------- /utils/disk_cache_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | 3 | #include "disk_cache.hpp" 4 | #include "rng.hpp" 5 | 6 | 7 | TEST_CASE("disk_cache") { 8 | // Constructors 9 | SECTION("constructor") { 10 | DiskCache::Cache cache1(100000, 32); 11 | DiskCache::Cache cache2(30001, 33); 12 | 13 | REQUIRE(cache1.block_size() == 1024); 14 | REQUIRE(cache2.block_size() == 213); 15 | REQUIRE(cache1.element_count() >= 100000); 16 | REQUIRE(cache2.element_count() >= 30001); 17 | } 18 | 19 | SECTION("manual_init") { 20 | DiskCache::Cache cache1; 21 | DiskCache::Cache cache2; 22 | 23 | cache1.init(100000, 32); 24 | cache2.init(30001, 33); 25 | 26 | REQUIRE(cache1.block_size() == 1024); 27 | REQUIRE(cache2.block_size() == 213); 28 | REQUIRE(cache1.element_count() >= 100000); 29 | REQUIRE(cache2.element_count() >= 30001); 30 | } 31 | 32 | SECTION("write_read") { 33 | RNG rng(1); 34 | DiskCache::Cache cache(1000000, 32); 35 | 36 | for (int i = 0; i < 1000000; i++) { 37 | cache[i] = rng.next_float(); 38 | } 39 | 40 | rng.seed(1); 41 | bool match = true; 42 | for (int i = 0; i < 1000000; i++) { 43 | match = match && cache[i] == rng.next_float(); 44 | } 45 | 46 | REQUIRE(match); 47 | } 48 | } 49 | 50 | -------------------------------------------------------------------------------- /utils/hash.hpp: -------------------------------------------------------------------------------- 1 | #ifndef HASH_HPP 2 | #define HASH_HPP 3 | 4 | #include 5 | 6 | static inline uint32_t hash_u32(uint32_t n, uint32_t seed) { 7 | uint32_t hash = n; 8 | 9 | for (uint32_t i=0; i < 3; ++i) { 10 | hash *= 1936502639; 11 | hash ^= hash >> 16; 12 | hash += seed; 13 | } 14 | 15 | return hash; 16 | } 17 | 18 | static inline float hash_f32(uint32_t n, uint32_t seed) { 19 | uint32_t hash = hash_u32(n, seed); 20 | 21 | union { 22 | float w; 23 | uint32_t a; 24 | }; 25 | a = hash >> 9; // Take upper 23 bits 26 | a |= 0x3F800000; // Make a float from bits 27 | return w-1.f; 28 | } 29 | 30 | /** 31 | * @brief A seedable hash class. 32 | * 33 | * Takes 32 bit unsigned ints as input, and can output either 34 | * unsigned 32 bit ints or floats. 35 | */ 36 | class Hash { 37 | private: 38 | uint32_t seed {42}; 39 | 40 | public: 41 | Hash(uint32_t seed): seed {seed} {} 42 | 43 | /** 44 | * @brief Takes an int and returns an int. 45 | */ 46 | uint32_t get_int(uint32_t n) { 47 | return hash_u32(n, seed); 48 | } 49 | 50 | /** 51 | * @brief Takes an int and returns a float in [0, 1). 52 | */ 53 | float get_float(uint32_t n) { 54 | uint32_t hash = hash_u32(n, seed); 55 | 56 | union { 57 | float w; 58 | uint32_t a; 59 | }; 60 | a = hash >> 9; // Take upper 23 bits 61 | a |= 0x3F800000; // Make a float from bits 62 | return w-1.f; 63 | } 64 | }; 65 | 66 | #endif // HASH_HPP -------------------------------------------------------------------------------- /utils/hilbert.hpp: -------------------------------------------------------------------------------- 1 | /* Hilbert curve transforms. 2 | */ 3 | 4 | #ifndef HILBERT_HPP 5 | #define HILBERT_HPP 6 | 7 | #include "numtype.h" 8 | 9 | namespace Hilbert { 10 | 11 | // Utility function used by the functions below. 12 | static inline void hil_rot(uint32_t n, uint32_t &x, uint32_t &y, uint32_t rx, uint32_t ry) { 13 | if (ry == 0) { 14 | if (rx == 1) { 15 | x = n-1 - x; 16 | y = n-1 - y; 17 | } 18 | const uint32_t t = x; 19 | x = y; 20 | y = t; 21 | } 22 | } 23 | 24 | /** 25 | * @brief Convert (x,y) to hilbert curve index. 26 | * 27 | * @param x The x coordinate. Must be a positive integer no greater than n. 28 | * @param y The y coordinate. Must be a positive integer no greater than n. 29 | * 30 | * @returns The hilbert curve index corresponding to the (x,y) coordinates given. 31 | */ 32 | static inline uint32_t xy2d(uint32_t x, uint32_t y) { 33 | const uint32_t n = 1 << 16; 34 | uint32_t rx, ry, s, d=0; 35 | for (s=n>>1; s>0; s>>=1) { 36 | rx = (x & s) > 0; 37 | ry = (y & s) > 0; 38 | d += s * s * ((3 * rx) ^ ry); 39 | hil_rot(s, x, y, rx, ry); 40 | } 41 | return d; 42 | } 43 | 44 | 45 | /** 46 | * @brief Convert hilbert curve index to (x,y). 47 | * 48 | * @param d The hilbert curve index. 49 | * @param[out] x Pointer where the x coordinate will be stored. 50 | * @param[out] y Pointer where the y coordinate will be stored. 51 | */ 52 | static inline void d2xy(uint32_t d, uint32_t *x, uint32_t *y) { 53 | const uint32_t n = 1 << 16; 54 | uint32_t rx, ry, s, t=d; 55 | *x = *y = 0; 56 | for (s=1; s>1); 58 | ry = 1 & (t ^ rx); 59 | hil_rot(s, *x, *y, rx, ry); 60 | *x += s * rx; 61 | *y += s * ry; 62 | t >>= 2; 63 | } 64 | } 65 | 66 | } 67 | 68 | #endif // HILBERT_HPP 69 | 70 | 71 | -------------------------------------------------------------------------------- /utils/job_queue.hpp: -------------------------------------------------------------------------------- 1 | #ifndef JOB_QUEUE_HPP 2 | #define JOB_QUEUE_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "ring_buffer_concurrent.hpp" 10 | 11 | /** 12 | * @brief A job queue for the producer/consumer model of managing threads. 13 | * 14 | * Consumer threads are created and managed by the queue. To use this, 15 | * simply add jobs to the queue and they will be processed. All jobs 16 | * must be thread-safe, as multiple jobs can be processed concurrently. 17 | * 18 | * A job can be any object that is callable without parameters. A good 19 | * choice is std::function 20 | */ 21 | template > 22 | class JobQueue { 23 | RingBufferConcurrent queue; 24 | std::vector threads; 25 | 26 | bool done; 27 | 28 | // A consumer thread, which watches the queue for jobs and 29 | // executes them. 30 | void run_consumer() { 31 | T job; 32 | while (pop(&job)) { 33 | job(); 34 | } 35 | } 36 | 37 | public: 38 | /** 39 | * @brief Constructor. 40 | * 41 | * By default uses 1 thread and creates a queue 4 times the size 42 | * of the thread count. 43 | * 44 | * @param thread_count Number of consumer threads to spawn for processing jobs. 45 | * @param queue_size Size of the job queue buffer. Zero means determine 46 | * automatically from number of threads. 47 | */ 48 | explicit JobQueue(size_t thread_count=1, size_t queue_size=0) { 49 | done = false; 50 | 51 | // Set up queue 52 | if (queue_size == 0) 53 | queue_size = thread_count * 4; 54 | queue.resize(queue_size); 55 | 56 | // Create and start consumer threads 57 | threads.resize(thread_count); 58 | for (auto &thread: threads) 59 | thread = std::thread(&JobQueue::run_consumer, this); 60 | } 61 | 62 | // Destructor. Joins and deletes threads. 63 | ~JobQueue() { 64 | finish(); 65 | } 66 | 67 | 68 | /** 69 | * @brief Marks the queue as done, and waits for all 70 | * jobs to finish. 71 | * 72 | * Once the queue is done, producers can no longer add jobs to 73 | * the queue, and consumers will be notified when the queue is 74 | * empty so they can terminate. 75 | */ 76 | void finish() { 77 | if (!done) { 78 | // Notify all threads that the queue is done 79 | done = true; 80 | queue.disallow_blocking(); 81 | 82 | // Wait for threads to finish 83 | for (auto &thread: threads) 84 | thread.join(); 85 | } 86 | } 87 | 88 | 89 | /** 90 | * @brief Adds a job to the queue. 91 | * 92 | * @param job The job to add. 93 | * 94 | * @return True on success, false if the queue is closed. 95 | */ 96 | bool push(const T &job) { 97 | // Add job to queue 98 | return queue.push_blocking(job); 99 | } 100 | 101 | 102 | /** 103 | * @brief Gets the next job, removing it from the queue. 104 | * 105 | * @param [out] job The popped job is copied into here. Must be a 106 | * pointer to valid memory. 107 | * 108 | * @return True on success, false if the queue is empty and closed. 109 | */ 110 | bool pop(T *job) { 111 | // Pop the next job 112 | return queue.pop_blocking(job); 113 | } 114 | }; 115 | 116 | #endif // JOB_QUEUE_HPP -------------------------------------------------------------------------------- /utils/job_queue_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | #include "job_queue.hpp" 3 | 4 | // Simple callable class that does nothing more than set an integer 5 | // value. 6 | class TestJob { 7 | int *inc; 8 | int value; 9 | public: 10 | TestJob() {} 11 | 12 | TestJob(int *incr, int val) { 13 | inc = incr; 14 | value = val; 15 | } 16 | 17 | void operator()() { 18 | *inc = value; 19 | } 20 | }; 21 | 22 | 23 | 24 | 25 | TEST_CASE("job_queue") { 26 | SECTION("basic_usage") { 27 | JobQueue q; 28 | int ints[100]; 29 | for (int i = 0; i < 100; i++) 30 | q.push(TestJob(&(ints[i]), i)); 31 | q.finish(); 32 | 33 | bool test = true; 34 | for (int i = 0; i < 100; i++) 35 | test = test && ints[i] == i; 36 | 37 | REQUIRE(test); 38 | } 39 | 40 | SECTION("queue_bottleneck") { 41 | JobQueue q(1000, 2); // 1000 threads, queue size of 2 42 | int ints[100]; 43 | for (int i = 0; i < 100; i++) 44 | q.push(TestJob(&(ints[i]), i)); 45 | q.finish(); 46 | 47 | bool test = true; 48 | for (int i = 0; i < 100; i++) 49 | test = test && ints[i] == i; 50 | 51 | REQUIRE(test); 52 | } 53 | 54 | SECTION("destruct") { 55 | JobQueue *q; 56 | q = new JobQueue; 57 | int ints[100]; 58 | for (int i = 0; i < 100; i++) 59 | q->push(TestJob(&(ints[i]), i)); 60 | delete q; // Should call finish() via destructor 61 | 62 | bool test = true; 63 | for (int i = 0; i < 100; i++) 64 | test = test && ints[i] == i; 65 | 66 | REQUIRE(test); 67 | } 68 | } 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /utils/low_level.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LOW_LEVEL_HPP 2 | #define LOW_LEVEL_HPP 3 | 4 | #include 5 | 6 | namespace LowLevel { 7 | 8 | static const int cache_line_size = 64; 9 | 10 | template 11 | inline void prefetch_L1(T* address) { 12 | constexpr int lines = (sizeof(T)/cache_line_size) + ((sizeof(T)%cache_line_size) == 0 ? 0 : 1); 13 | for (int i = 0; i < lines; ++i) { 14 | _mm_prefetch(address+i, _MM_HINT_T0); 15 | } 16 | } 17 | 18 | template 19 | inline void prefetch_L2(T* address) { 20 | constexpr int lines = (sizeof(T)/cache_line_size) + ((sizeof(T)%cache_line_size) == 0 ? 0 : 1); 21 | for (int i = 0; i < lines; ++i) { 22 | _mm_prefetch(address+i, _MM_HINT_T1); 23 | } 24 | } 25 | 26 | template 27 | inline void prefetch_L3(T* address) { 28 | constexpr int lines = (sizeof(T)/cache_line_size) + ((sizeof(T)%cache_line_size) == 0 ? 0 : 1); 29 | for (int i = 0; i < lines; ++i) { 30 | _mm_prefetch(address+i, _MM_HINT_T2); 31 | } 32 | } 33 | 34 | } 35 | 36 | #endif // LOW_LEVEL_HPP -------------------------------------------------------------------------------- /utils/lru_cache.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LRU_CACHE_HPP 2 | #define LRU_CACHE_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "spinlock.hpp" 11 | 12 | 13 | // Should be overloaded for more complex types 14 | template 15 | size_t size_in_bytes(const T& data) { 16 | return sizeof(T); 17 | } 18 | 19 | /* 20 | * A thread-safe Least-Recently-Used cache. 21 | */ 22 | template 23 | class LRUCache { 24 | struct LRUPair { 25 | K key; 26 | std::shared_ptr data_ptr; 27 | }; 28 | 29 | SpinLock slock; 30 | 31 | size_t max_bytes; 32 | size_t byte_count {0}; 33 | 34 | // A map from indices to iterators into the list 35 | std::unordered_map::iterator> map; 36 | 37 | // A list that contains the index and a pointer to the data of each element 38 | std::list elements; 39 | 40 | // The number of bytes each item takes up, aside from the size of the item itself. 41 | // In other words, the overhead of the LRUCache per-item. 42 | // Estimated for now as the size of an LRUPair plus the size of 43 | // 2 pointers (for the list) plus the size of an LRUKEY and list 44 | // iterator (for the map). 45 | // TODO: more accurate estimate 46 | const size_t per_item_size_cost = sizeof(LRUPair) + (sizeof(void*)*2) + sizeof(K) + sizeof(typename std::list::iterator); 47 | 48 | public: 49 | LRUCache(size_t max_bytes_=40): max_bytes {max_bytes_} {} 50 | 51 | ~LRUCache() { 52 | } 53 | 54 | /* 55 | * Sets the maximum number of bytes in the cache. 56 | * Should only be called once right after construction. 57 | */ 58 | void set_max_size(size_t size) { 59 | max_bytes = size; 60 | } 61 | 62 | /* 63 | * Adds the given item to the cache using the given key. 64 | * If the key already exists, the existing item will be 65 | * replaced. 66 | * 67 | * Returns the key. 68 | */ 69 | K put(std::shared_ptr data_ptr, K key) { 70 | std::unique_lock lock(slock); 71 | 72 | // Check if the key exists, and erase it if it does 73 | const auto exists = static_cast(map.count(key)); 74 | if (exists) 75 | erase(key); 76 | 77 | // Add data to the cache 78 | add(data_ptr, key); 79 | 80 | return key; 81 | } 82 | 83 | /** 84 | * @brief Fetches the data associated with a key. 85 | * 86 | * @param key The key of the data to fetch. 87 | * 88 | * @return shared_ptr to the data on success, nullptr if the data isn't 89 | * in the cache. 90 | * 91 | * Example usage: 92 | * std::shared_ptr p = cache.get(12345); 93 | * if (p) { 94 | * // Do things with the data here 95 | * } 96 | */ 97 | std::shared_ptr get(K key) { 98 | std::unique_lock lock(slock); 99 | 100 | // Check if the key exists 101 | const auto exists = static_cast(map.count(key)); 102 | if (!exists) 103 | return nullptr; 104 | 105 | touch(key); 106 | 107 | return map[key]->data_ptr; 108 | } 109 | 110 | /** 111 | * @brief Erases all items from the cache.get 112 | */ 113 | void clear() { 114 | std::unique_lock lock(slock); 115 | 116 | map.clear(); 117 | elements.clear(); 118 | byte_count = 0; 119 | } 120 | 121 | private: 122 | /* 123 | * Adds an item to the cache with the given key. 124 | */ 125 | void add(std::shared_ptr& data_ptr, K key) { 126 | byte_count += size_in_bytes(*data_ptr) + per_item_size_cost; 127 | 128 | // Remove last element(s) if necessary to make room 129 | while (byte_count >= max_bytes) { 130 | if (!erase_last()) 131 | break; 132 | } 133 | 134 | // Add the new data 135 | auto it = elements.begin(); 136 | it = elements.insert(it, LRUPair {key, data_ptr}); 137 | 138 | // Log it in the map 139 | map[key] = it; 140 | } 141 | 142 | /* 143 | * Erases the given key and associated data from the cache. 144 | */ 145 | void erase(K key) { 146 | byte_count -= size_in_bytes(*(map[key]->data_ptr)) + per_item_size_cost; 147 | elements.erase(map[key]); 148 | map.erase(key); 149 | } 150 | 151 | /* 152 | * Erases the last inactive element in the cache. 153 | */ 154 | bool erase_last() { 155 | for (auto rit = elements.rbegin(); rit != elements.rend(); ++rit) { 156 | erase(rit->key); 157 | return true; 158 | } 159 | return false; 160 | } 161 | 162 | /* 163 | * Moves a given item to the front of the cache. 164 | */ 165 | void touch(K key) { 166 | elements.splice(elements.begin(), elements, map[key]); 167 | } 168 | }; 169 | 170 | #endif 171 | -------------------------------------------------------------------------------- /utils/mis.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MIS_HPP 2 | #define MIS_HPP 3 | 4 | // Utility functions for multiple importance sampling 5 | 6 | template 7 | T balance_heuristic(T a, T b) { 8 | return a / (a + b); 9 | } 10 | 11 | template 12 | T power_heuristic(T a, T b) { 13 | const auto a2 = a * a; 14 | const auto b2 = b * b; 15 | return a2 / (a2 + b2); 16 | } 17 | 18 | #endif // MIS_HPP -------------------------------------------------------------------------------- /utils/morton.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Morton code (a.k.a. z-curve) transforms. 3 | */ 4 | 5 | #ifndef MORTON_HPP 6 | #define MORTON_HPP 7 | 8 | namespace Morton { 9 | 10 | /** 11 | * @brief Encodes x and y coordinates into a morton code index. 12 | * 13 | * In practice x and y need to be within the range of an unsigned 16 bit 14 | * integer, since the output is a single 32 bit index. 15 | */ 16 | static inline uint32_t xy2d(uint32_t x, uint32_t y) { 17 | x &= 0x0000ffff; 18 | y &= 0x0000ffff; 19 | x |= (x << 8); 20 | y |= (y << 8); 21 | x &= 0x00ff00ff; 22 | y &= 0x00ff00ff; 23 | x |= (x << 4); 24 | y |= (y << 4); 25 | x &= 0x0f0f0f0f; 26 | y &= 0x0f0f0f0f; 27 | x |= (x << 2); 28 | y |= (y << 2); 29 | x &= 0x33333333; 30 | y &= 0x33333333; 31 | x |= (x << 1); 32 | y |= (y << 1); 33 | x &= 0x55555555; 34 | y &= 0x55555555; 35 | return x | (y << 1); 36 | } 37 | 38 | /** 39 | * @brief Decodes a morton code index into x and y coordinates. 40 | */ 41 | static inline void d2xy(uint32_t d, uint32_t *x, uint32_t *y) { 42 | *x = d; 43 | *y = (*x >> 1); 44 | *x &= 0x55555555; 45 | *y &= 0x55555555; 46 | *x |= (*x >> 1); 47 | *y |= (*y >> 1); 48 | *x &= 0x33333333; 49 | *y &= 0x33333333; 50 | *x |= (*x >> 2); 51 | *y |= (*y >> 2); 52 | *x &= 0x0f0f0f0f; 53 | *y &= 0x0f0f0f0f; 54 | *x |= (*x >> 4); 55 | *y |= (*y >> 4); 56 | *x &= 0x00ff00ff; 57 | *y &= 0x00ff00ff; 58 | *x |= (*x >> 8); 59 | *y |= (*y >> 8); 60 | *x &= 0x0000ffff; 61 | *y &= 0x0000ffff; 62 | } 63 | 64 | } 65 | 66 | #endif // MORTON_HPP 67 | -------------------------------------------------------------------------------- /utils/numtype.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Defines basic numerical types for use throughout psychopath. 3 | */ 4 | 5 | #ifndef NUMTYPE_H 6 | #define NUMTYPE_H 7 | 8 | #include 9 | #include 10 | 11 | typedef unsigned int uint; 12 | 13 | 14 | #endif // NUMTYPE_H 15 | -------------------------------------------------------------------------------- /utils/range.hpp: -------------------------------------------------------------------------------- 1 | #ifndef RANGE_HPP 2 | #define RANGE_HPP 3 | 4 | #include 5 | #include 6 | 7 | /** 8 | * A Range class, based on the proposal for std::range at: 9 | * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2012/n3350.html 10 | * But with a few things omitted. 11 | */ 12 | template 13 | class Range { 14 | private: 15 | Iterator iter_begin; 16 | Iterator iter_end; 17 | 18 | public: 19 | // types 20 | typedef typename std::iterator_traits::iterator_category iterator_category; 21 | typedef typename std::iterator_traits::value_type value_type; 22 | typedef typename std::iterator_traits::difference_type difference_type; 23 | typedef typename std::iterator_traits::reference reference; 24 | typedef typename std::iterator_traits::pointer pointer; 25 | 26 | // constructors 27 | Range() {} 28 | constexpr Range(Iterator begin, Iterator end): iter_begin {begin}, iter_end {end} {} 29 | constexpr Range(std::pair iter_pair): iter_begin {iter_pair.first}, iter_end {iter_pair.second} {} 30 | 31 | // iterator access 32 | constexpr Iterator begin() const { 33 | return iter_begin; 34 | } 35 | constexpr Iterator end() const { 36 | return iter_end; 37 | } 38 | constexpr const Iterator cbegin() const { 39 | return iter_begin; 40 | } 41 | constexpr const Iterator cend() const { 42 | return iter_end; 43 | } 44 | 45 | // element access 46 | constexpr reference front() const { 47 | return *iter_begin; 48 | } 49 | constexpr reference back() const { 50 | return *iter_end; 51 | } 52 | constexpr reference operator[](difference_type index) const { 53 | return *(iter_begin + index); 54 | } 55 | 56 | // size 57 | constexpr bool empty() const { 58 | return iter_begin == iter_end; 59 | } 60 | constexpr difference_type size() const { 61 | return std::distance(iter_begin, iter_end); 62 | } 63 | 64 | // creating derived ranges 65 | //pair< range, range > split(difference_type index) const; 66 | //Range slice(difference_type start, difference_type stop) const; 67 | //Range slice(difference_type start) const; 68 | }; 69 | 70 | // deducing constructor wrappers 71 | template 72 | constexpr Range make_range(Iterator begin, Iterator end) { 73 | return Range(begin, end); 74 | } 75 | 76 | #endif // RANGE_HPP 77 | -------------------------------------------------------------------------------- /utils/ring_buffer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef RING_BUFFER 2 | #define RING_BUFFER 3 | 4 | #include 5 | #include 6 | 7 | /** 8 | * @brief A ring buffer, or circular buffer. 9 | * 10 | * Acts as a limited-size FIFO queue, where overflow simply results in 11 | * the queue overwriting itself from the back. 12 | */ 13 | template 14 | class RingBuffer { 15 | private: 16 | std::vector buffer {T{}}; // Default 1 item large 17 | 18 | size_t next {0}; // Index of the next item to be consumed 19 | size_t count {0}; // Number of unconsumed items in the buffer 20 | 21 | public: 22 | /** 23 | * @brief Default constructor. 24 | */ 25 | RingBuffer() {} 26 | 27 | /** 28 | * @brief Constructor. 29 | * 30 | * @param size Size of the buffer in number-of-items. 31 | */ 32 | RingBuffer(size_t buffer_size): buffer(buffer_size) {} 33 | 34 | /** 35 | * @brief Resizes the buffer. 36 | * 37 | * @warning Significant data loss can occur if this is done on 38 | * a non-empty buffer. Check is_empty() before calling 39 | * this. 40 | * 41 | * @param size New size of the buffer in number-of-items 42 | * 43 | * TODO: minimize data loss when running this. 44 | */ 45 | void resize(size_t buffer_size) { 46 | next = 0; 47 | count = 0; 48 | buffer.resize(buffer_size); 49 | } 50 | 51 | /** 52 | * @brief Returns the size of the buffer. 53 | */ 54 | size_t size() { 55 | return buffer.size(); 56 | } 57 | 58 | /** 59 | * @brief Pushes an item onto the front of the buffer. 60 | * 61 | * If the buffer is full, this will start over-writing 62 | * the tail of the buffer. Make sure to check is_full() 63 | * if you don't want this behavior. 64 | * 65 | * @param item The item to push. 66 | */ 67 | void push(const T &item) { 68 | buffer[(next+count)%buffer.size()] = item; 69 | count++; 70 | 71 | // If we overwrote a non-empty item in the buffer 72 | if (count > buffer.size()) { 73 | next = (next + 1) % buffer.size(); 74 | count = buffer.size(); 75 | } 76 | } 77 | 78 | /** 79 | * @brief Pops an item off the back of the buffer. 80 | * 81 | * If the buffer is empty, this will return garbage. 82 | * Make sure to check is_empty(). 83 | * 84 | * @return The popped item. 85 | */ 86 | T pop() { 87 | const size_t i = next; 88 | if (count > 0) { 89 | next = (next + 1) % buffer.size(); 90 | count--; 91 | } 92 | 93 | return buffer[i]; 94 | } 95 | 96 | /** 97 | * @brief Returns whether the buffer is full or not. 98 | */ 99 | bool is_full() { 100 | return count == buffer.size(); 101 | } 102 | 103 | /** 104 | * @brief Returns whether the buffer is empty or not. 105 | */ 106 | bool is_empty() { 107 | return count == 0; 108 | } 109 | }; 110 | 111 | #endif // RING_BUFFER -------------------------------------------------------------------------------- /utils/ring_buffer_atomic.hpp: -------------------------------------------------------------------------------- 1 | #ifndef RING_BUFFER 2 | #define RING_BUFFER 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | template 10 | class RingBufferAtomicItem { 11 | T item; 12 | std::atomic_flag taken; 13 | }; 14 | 15 | 16 | /** 17 | * WIP 18 | * @brief A ring buffer, or circular buffer, that uses atomics to be 19 | * thread-safe for consumers. Only single-producer is supported at 20 | * the moment. 21 | * 22 | * Acts as a limited-size FIFO queue, where overflow simply results in 23 | * the queue overwriting itself from the back. 24 | */ 25 | template 26 | class RingBufferAtomic { 27 | private: 28 | std::vector> buffer; 29 | 30 | std::atomic next; // Index of the next item to be consumed 31 | std::atomic count; // Number of unconsumed items in the buffer 32 | 33 | public: 34 | /** 35 | * @brief Default constructor, buffer size of 1. 36 | */ 37 | RingBuffer(): buffer(1), next {0}, count {0} { 38 | for (auto& item: buffer) 39 | item.taken.test_and_set(std::memory_order_acquire); 40 | } 41 | 42 | /** 43 | * @brief Constructor. 44 | * 45 | * @param size Size of the buffer in number-of-items. 46 | */ 47 | RingBuffer(size_t buffer_size): buffer(buffer_size), next {0}, count {0} { 48 | for (auto& item: buffer) 49 | item.taken.test_and_set(std::memory_order_acquire); 50 | } 51 | 52 | /** 53 | * @brief Resizes the buffer. 54 | * 55 | * @warning Significant data loss and/or loss of proper 56 | * syncronization between threads can happen if this is called 57 | * at the wrong time. Only call this before any reading or writing 58 | * is done. 59 | * 60 | * @param size New size of the buffer in number-of-items 61 | */ 62 | void resize(size_t buffer_size) { 63 | next = 0; 64 | count = 0; 65 | buffer.resize(buffer_size); 66 | for (auto& item: buffer) 67 | item.taken.test_and_set(std::memory_order_acquire); 68 | } 69 | 70 | /** 71 | * @brief Returns the size of the buffer. 72 | */ 73 | size_t size() const { 74 | return buffer.size(); 75 | } 76 | 77 | /** 78 | * @brief Pushes an item onto the front of the buffer. 79 | * 80 | * If the buffer is full, this will start over-writing 81 | * the tail of the buffer. Make sure to check is_full() 82 | * if you don't want this behavior. 83 | * 84 | * @param item The item to push. 85 | */ 86 | void push(const T &item) { 87 | buffer[(next+count)%buffer.size()].item = item; 88 | count++; 89 | 90 | // If we overwrote a non-empty item in the buffer 91 | if (count > buffer.size()) { 92 | next = (next + 1) % buffer.size(); 93 | count = buffer.size(); 94 | } 95 | } 96 | 97 | /** 98 | * @brief Pops an item off the back of the buffer. 99 | * 100 | * If the buffer is empty, this will return garbage. 101 | * Make sure to check is_empty(). 102 | * 103 | * @return The popped item. 104 | */ 105 | T pop() { 106 | const size_t i = next; 107 | if (count > 0) { 108 | next = (next + 1) % buffer.size(); 109 | count--; 110 | } 111 | 112 | return buffer[i]; 113 | } 114 | 115 | /** 116 | * @brief Returns whether the buffer is full or not. 117 | */ 118 | bool is_full() { 119 | return count == buffer.size(); 120 | } 121 | 122 | /** 123 | * @brief Returns whether the buffer is empty or not. 124 | */ 125 | bool is_empty() { 126 | return count == 0; 127 | } 128 | }; 129 | 130 | #endif // RING_BUFFER -------------------------------------------------------------------------------- /utils/ring_buffer_concurrent.hpp: -------------------------------------------------------------------------------- 1 | #ifndef RING_BUFFER_CONCURRENT 2 | #define RING_BUFFER_CONCURRENT 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "ring_buffer.hpp" 10 | 11 | /** 12 | * @brief A thread-safe ring buffer, or circular buffer. 13 | * 14 | * Acts as a limited-size FIFO queue. 15 | */ 16 | template 17 | class RingBufferConcurrent { 18 | private: 19 | RingBuffer buffer; 20 | 21 | std::mutex mut; 22 | std::condition_variable full; 23 | std::condition_variable empty; 24 | 25 | bool stop; 26 | std::atomic blocker_count; // Counters for blocking pushers and poppers. 27 | 28 | public: 29 | /** 30 | * @brief Default constructor, buffer size of 1. 31 | */ 32 | RingBufferConcurrent(): buffer(1), stop {false}, blocker_count {0} {} 33 | 34 | /** 35 | * @brief Constructor. 36 | * 37 | * @param size Size of the buffer in number-of-items. 38 | */ 39 | RingBufferConcurrent(size_t buffer_size): buffer(buffer_size), stop {false}, blocker_count {0} {} 40 | 41 | /** 42 | * @brief Resizes the buffer. 43 | * 44 | * @warning Significant data loss can occur if this is done on 45 | * a non-empty buffer. Check is_empty() before calling 46 | * this. 47 | * 48 | * @param size New size of the buffer in number-of-items 49 | * 50 | * TODO: minimize data loss when running this. 51 | */ 52 | void resize(size_t buffer_size) { 53 | std::unique_lock lock(mut); 54 | buffer.resize(buffer_size); 55 | } 56 | 57 | /** 58 | * @brief Returns the size of the buffer. 59 | */ 60 | size_t size() { 61 | return buffer.size(); 62 | } 63 | 64 | /** 65 | * @brief Forces current blocking calls to end and return false. 66 | * 67 | * Any currently waiting call to push_blocking() or pop_blocking() 68 | * will be stopped and will return false. 69 | */ 70 | void stop_blocking() { 71 | mut.lock(); 72 | stop = true; 73 | full.notify_all(); 74 | empty.notify_all(); 75 | mut.unlock(); 76 | 77 | // Wait for all blockers to stop 78 | while (blocker_count > 0) {} 79 | 80 | mut.lock(); 81 | stop = false; 82 | mut.unlock(); 83 | } 84 | 85 | /** 86 | * @brief Stops all blocking calls and prevents further blocking 87 | * calls. 88 | */ 89 | void disallow_blocking() { 90 | mut.lock(); 91 | stop = true; 92 | full.notify_all(); 93 | empty.notify_all(); 94 | mut.unlock(); 95 | } 96 | 97 | /** 98 | * @brief Pushes an item onto the front of the buffer. 99 | * 100 | * @param [in] item The item to push. 101 | * 102 | * @return Whether the item was successfully pushed or not. 103 | */ 104 | bool push(const T &item) { 105 | std::unique_lock lock(mut); 106 | if (buffer.is_full()) 107 | return false; 108 | 109 | // Push item 110 | buffer.push(item); 111 | 112 | // Notify waiting poppers that there's an item in the queue 113 | empty.notify_all(); 114 | 115 | return true; 116 | } 117 | 118 | /** 119 | * @brief Pushes an item onto the front of the buffer. 120 | * 121 | * If the buffer is full, this will block until there is space 122 | * to successfully push. 123 | * 124 | * @param [in] item The item to push. 125 | * 126 | * @return Whether the item was successfully pushed or not. 127 | */ 128 | bool push_blocking(const T &item) { 129 | std::unique_lock lock(mut); 130 | blocker_count++; 131 | 132 | // Wait for open space in the buffer 133 | while (buffer.is_full()) { 134 | if (stop) { 135 | blocker_count--; 136 | return false; 137 | } else { 138 | full.wait(lock); 139 | } 140 | } 141 | 142 | // Push item 143 | buffer.push(item); 144 | 145 | // Notify waiting poppers that there's an item in the queue 146 | empty.notify_all(); 147 | 148 | blocker_count--; 149 | return true; 150 | } 151 | 152 | /** 153 | * @brief Pops an item off the back of the buffer. 154 | * 155 | * @param [out] item Popped item is copied to this memory location. 156 | * @return Whether an item was successfully popped or not. 157 | */ 158 | bool pop(T* item) { 159 | std::unique_lock lock(mut); 160 | if (buffer.is_empty()) 161 | return false; 162 | 163 | // Pop item 164 | *item = buffer.pop(); 165 | 166 | // Notify waiting pushers that there's space free 167 | full.notify_all(); 168 | 169 | return true; 170 | } 171 | 172 | /** 173 | * @brief Pops an item off the back of the buffer. 174 | * 175 | * If the buffer is empty, this will block until there is an item 176 | * to pop. 177 | * 178 | * @param [out] item Popped item is copied to this memory location. 179 | * @return Whether an item was popped or not. 180 | */ 181 | bool pop_blocking(T* item) { 182 | std::unique_lock lock(mut); 183 | blocker_count++; 184 | 185 | // Wait for open space in the buffer 186 | while (buffer.is_empty()) { 187 | if (stop) { 188 | blocker_count--; 189 | return false; 190 | } else { 191 | empty.wait(lock); 192 | } 193 | } 194 | 195 | // Pop item 196 | *item = buffer.pop(); 197 | 198 | // Notify waiting pushers that there's space free 199 | full.notify_all(); 200 | 201 | blocker_count--; 202 | return true; 203 | } 204 | }; 205 | 206 | #endif // RING_BUFFER_CONCURRENT -------------------------------------------------------------------------------- /utils/ring_buffer_concurrent_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | #include "ring_buffer_concurrent.hpp" 3 | 4 | 5 | 6 | TEST_CASE("ring_buffer_concurrent") { 7 | /* constructor tests */ 8 | SECTION("constructor_1") { 9 | RingBufferConcurrent rb; 10 | 11 | REQUIRE(rb.size() == 1); 12 | } 13 | 14 | SECTION("constructor_2") { 15 | RingBufferConcurrent rb(100); 16 | 17 | REQUIRE(rb.size() == 100); 18 | } 19 | 20 | 21 | 22 | 23 | /* resize() tests */ 24 | SECTION("resize_1") { 25 | RingBufferConcurrent rb; 26 | rb.resize(100); 27 | 28 | REQUIRE(rb.size() == 100); 29 | } 30 | 31 | SECTION("resize_2") { 32 | RingBufferConcurrent rb(50); 33 | rb.resize(100); 34 | 35 | REQUIRE(rb.size() == 100); 36 | } 37 | 38 | 39 | 40 | 41 | /* push()/pop() tests */ 42 | SECTION("push_pop_1") { 43 | // Partially fill buffer, then empty it 44 | RingBufferConcurrent rb(100); 45 | bool test = true; 46 | for (int i = 0; i < 50; i++) 47 | rb.push(i); 48 | int result {0}; 49 | for (int i = 0; i < 50; i++) { 50 | rb.pop(&result); 51 | test = test && (result == i); 52 | } 53 | 54 | REQUIRE(test); 55 | } 56 | 57 | SECTION("push_pop_2") { 58 | // Fully fill buffer, then empty it 59 | RingBufferConcurrent rb(100); 60 | bool test = true; 61 | for (int i = 0; i < 100; i++) 62 | rb.push(i); 63 | int result {0}; 64 | for (int i = 0; i < 100; i++) { 65 | rb.pop(&result); 66 | test = test && (result == i); 67 | } 68 | 69 | REQUIRE(test); 70 | } 71 | 72 | SECTION("push_pop_3") { 73 | // Push and pop repeatedly 74 | RingBufferConcurrent rb(100); 75 | bool test = true; 76 | int result {0}; 77 | for (int i = 0; i < 350; i++) { 78 | rb.push(i); 79 | rb.pop(&result); 80 | test = test && (result == i); 81 | } 82 | 83 | REQUIRE(test); 84 | } 85 | 86 | SECTION("push_pop_4") { 87 | // Overflow buffer 88 | RingBufferConcurrent rb(100); 89 | bool test = true; 90 | for (int i = 0; i < 350; i++) 91 | test = test && rb.push(i); // Should return false when overflowing 92 | 93 | REQUIRE(!test); 94 | } 95 | 96 | SECTION("push_pop_5") { 97 | // Overempty buffer 98 | RingBufferConcurrent rb(100); 99 | bool test = true; 100 | for (int i = 0; i < 50; i++) 101 | rb.push(i); 102 | int result {0}; 103 | for (int i = 0; i < 60; i++) 104 | test = test && rb.pop(&result); // Should return false when empty 105 | 106 | REQUIRE(!test); 107 | } 108 | } 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /utils/ring_buffer_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | #include "ring_buffer.hpp" 3 | 4 | 5 | TEST_CASE("ring_buffer") { 6 | /* constructor tests */ 7 | SECTION("constructor_1") { 8 | RingBuffer rb; 9 | 10 | REQUIRE(rb.size() == 1); 11 | } 12 | 13 | SECTION("constructor_2") { 14 | RingBuffer rb(100); 15 | 16 | REQUIRE(rb.size() == 100); 17 | } 18 | 19 | 20 | 21 | 22 | /* resize() tests */ 23 | SECTION("resize_1") { 24 | RingBuffer rb; 25 | rb.resize(100); 26 | 27 | REQUIRE(rb.size() == 100); 28 | } 29 | 30 | SECTION("resize_2") { 31 | RingBuffer rb(50); 32 | rb.resize(100); 33 | 34 | REQUIRE(rb.size() == 100); 35 | } 36 | 37 | 38 | 39 | 40 | /* is_empty() tests */ 41 | SECTION("is_empty_1") { 42 | // No items added 43 | RingBuffer rb(100); 44 | 45 | REQUIRE(rb.is_empty()); 46 | } 47 | 48 | SECTION("is_empty_2") { 49 | // A few items added 50 | RingBuffer rb(100); 51 | for (int i = 0; i < 5; i++) 52 | rb.push(i); 53 | 54 | REQUIRE(!rb.is_empty()); 55 | } 56 | 57 | SECTION("is_empty_3") { 58 | // Max out buffer with items 59 | RingBuffer rb(100); 60 | for (int i = 0; i < 100; i++) 61 | rb.push(i); 62 | 63 | REQUIRE(!rb.is_empty()); 64 | } 65 | 66 | SECTION("is_empty_4") { 67 | // Overflow buffer with items 68 | RingBuffer rb(100); 69 | for (int i = 0; i < 350; i++) 70 | rb.push(i); 71 | 72 | REQUIRE(!rb.is_empty()); 73 | } 74 | 75 | SECTION("is_empty_5") { 76 | // Items added and all removed 77 | RingBuffer rb(100); 78 | for (int i = 0; i < 50; i++) 79 | rb.push(i); 80 | for (int i = 0; i < 50; i++) 81 | rb.pop(); 82 | 83 | REQUIRE(rb.is_empty()); 84 | } 85 | 86 | SECTION("is_empty_6") { 87 | // Items added and some removed 88 | RingBuffer rb(100); 89 | for (int i = 0; i < 50; i++) 90 | rb.push(i); 91 | for (int i = 0; i < 25; i++) 92 | rb.pop(); 93 | 94 | REQUIRE(!rb.is_empty()); 95 | } 96 | 97 | 98 | 99 | 100 | /* is_full() tests */ 101 | SECTION("is_full_1") { 102 | // No items added 103 | RingBuffer rb(100); 104 | 105 | REQUIRE(!rb.is_full()); 106 | } 107 | 108 | SECTION("is_full_2") { 109 | // A few items added 110 | RingBuffer rb(100); 111 | for (int i = 0; i < 5; i++) 112 | rb.push(i); 113 | 114 | REQUIRE(!rb.is_full()); 115 | } 116 | 117 | SECTION("is_full_3") { 118 | // Max out buffer with items 119 | RingBuffer rb(100); 120 | for (int i = 0; i < 100; i++) 121 | rb.push(i); 122 | 123 | REQUIRE(rb.is_full()); 124 | } 125 | 126 | SECTION("is_full_4") { 127 | // Overflow buffer with items 128 | RingBuffer rb(100); 129 | for (int i = 0; i < 350; i++) 130 | rb.push(i); 131 | 132 | REQUIRE(rb.is_full()); 133 | } 134 | 135 | SECTION("is_full_5") { 136 | // Items added and all removed 137 | RingBuffer rb(100); 138 | for (int i = 0; i < 50; i++) 139 | rb.push(i); 140 | for (int i = 0; i < 50; i++) 141 | rb.pop(); 142 | 143 | REQUIRE(!rb.is_full()); 144 | } 145 | 146 | SECTION("is_full_6") { 147 | // Items added and some removed 148 | RingBuffer rb(100); 149 | for (int i = 0; i < 50; i++) 150 | rb.push(i); 151 | for (int i = 0; i < 25; i++) 152 | rb.pop(); 153 | 154 | REQUIRE(!rb.is_full()); 155 | } 156 | 157 | 158 | 159 | 160 | /* push()/pop() tests */ 161 | SECTION("push_pop_1") { 162 | // Partially fill buffer, then empty it 163 | RingBuffer rb(100); 164 | bool test = true; 165 | for (int i = 0; i < 50; i++) 166 | rb.push(i); 167 | for (int i = 0; i < 50; i++) 168 | test = test && (rb.pop() == i); 169 | 170 | REQUIRE(test); 171 | } 172 | 173 | SECTION("push_pop_2") { 174 | // Fully fill buffer, then empty it 175 | RingBuffer rb(100); 176 | bool test = true; 177 | for (int i = 0; i < 100; i++) 178 | rb.push(i); 179 | for (int i = 0; i < 100; i++) 180 | test = test && (rb.pop() == i); 181 | 182 | REQUIRE(test); 183 | } 184 | 185 | SECTION("push_pop_3") { 186 | // Overflow buffer, then empty it 187 | RingBuffer rb(100); 188 | bool test = true; 189 | for (int i = 0; i < 350; i++) 190 | rb.push(i); 191 | for (int i = 250; i < 350; i++) 192 | test = test && (rb.pop() == i); 193 | 194 | REQUIRE(test); 195 | } 196 | 197 | SECTION("push_pop_4") { 198 | // Push and pop repeatedly 199 | RingBuffer rb(100); 200 | bool test = true; 201 | for (int i = 0; i < 350; i++) { 202 | rb.push(i); 203 | test = test && (rb.pop() == i); 204 | } 205 | 206 | REQUIRE(test); 207 | } 208 | 209 | SECTION("push_pop_5") { 210 | // Push and pop repeatedly in chunks 211 | RingBuffer rb(100); 212 | bool test = true; 213 | for (int i = 0; i < 350; i++) { 214 | rb.push(i); 215 | rb.push(i+1); 216 | rb.push(i+2); 217 | rb.push(i+3); 218 | rb.push(i+4); 219 | rb.push(i+5); 220 | test = test && (rb.pop() == i); 221 | test = test && (rb.pop() == i+1); 222 | test = test && (rb.pop() == i+2); 223 | test = test && (rb.pop() == i+3); 224 | test = test && (rb.pop() == i+4); 225 | test = test && (rb.pop() == i+5); 226 | } 227 | 228 | REQUIRE(test); 229 | } 230 | } 231 | 232 | 233 | 234 | -------------------------------------------------------------------------------- /utils/rng.hpp: -------------------------------------------------------------------------------- 1 | #ifndef RNG_HPP 2 | #define RNG_HPP 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | /** 13 | * @brief A psuedo-random number generator. 14 | * 15 | * Based on the JKISS generator from the paper 16 | * "Good Practice in (Pseudo) Random Number 17 | * Generation for Bioinformatics Applications" 18 | * by David Jones. 19 | * 20 | * This generator is surprisingly robust for how simple it is, passing all of 21 | * the Dieharder tests as well as the complete Big Crush test set in TestU01. 22 | * This robustness is comparable to the Mersenne Twister, excepting for the 23 | * smaller period (~2^127 compared to MT's enormous ~2^19937 period). 24 | * 25 | * This PRNG should be more than sufficient for most purposes. 26 | */ 27 | class RNG { 28 | private: 29 | uint32_t x, y, z, c; 30 | 31 | /** 32 | * @brief Core algorithm of the RNG. 33 | * 34 | * Progresses an RNG with state variables x_, y_, z_, c_. 35 | * 36 | * @return The next unsigned 32-bit integer in the random sequence. 37 | */ 38 | uint32_t n(uint32_t &x_, uint32_t &y_, uint32_t &z_, uint32_t &c_) { 39 | uint64_t t; 40 | 41 | x_ = 314527869 * x_ + 1234567; 42 | 43 | y_ ^= y_ << 5; 44 | y_ ^= y_ >> 7; 45 | y_ ^= y_ << 22; 46 | 47 | t = 4294584393ULL * z_ + c_; 48 | c_ = t >> 32; 49 | z_ = t; 50 | 51 | return x_ + y_ + z_; 52 | } 53 | 54 | public: 55 | /** 56 | * @brief Constructor. 57 | * 58 | * Initializes the RNG with a thread-safe unique random seed. 59 | * Code that uses this constructor can depend on all RNG's from it 60 | * being independant with a high level of confidence. 61 | */ 62 | RNG() { 63 | // The seeder is seeded with a combination of random_device, 64 | // large primes, and the current time. The idea is that if 65 | // random_device doesn't function well, the time and the 66 | // primes function as an okay fall-back. But ideally 67 | // random_device functions well. 68 | std::random_device rd; 69 | static uint32_t seeder_x = rd() + 2123403127 + std::chrono::high_resolution_clock::now().time_since_epoch().count(); 70 | static uint32_t seeder_y = rd() + 1987607653 + std::chrono::high_resolution_clock::now().time_since_epoch().count(); 71 | static uint32_t seeder_z = rd() + 3569508323 + std::chrono::high_resolution_clock::now().time_since_epoch().count(); 72 | static uint32_t seeder_c = rd() + 5206151 + std::chrono::high_resolution_clock::now().time_since_epoch().count(); 73 | 74 | // Use the seeder to create subsequent RNG's that are 75 | // unique from each other. 76 | static std::mutex mut; 77 | mut.lock(); 78 | seed(n(seeder_x, seeder_y, seeder_z, seeder_c), 79 | n(seeder_x, seeder_y, seeder_z, seeder_c), 80 | n(seeder_x, seeder_y, seeder_z, seeder_c), 81 | n(seeder_x, seeder_y, seeder_z, seeder_c)); 82 | mut.unlock(); 83 | } 84 | 85 | /** 86 | * @brief Constructor. 87 | * 88 | * Initializes the RNG with the given seed. Full 128-bit variant. 89 | */ 90 | RNG(uint32_t seed_a, uint32_t seed_b, uint32_t seed_c, uint32_t seed_d) { 91 | seed(seed_a, seed_b, seed_c, seed_d); 92 | } 93 | 94 | /** 95 | * @brief Constructor. 96 | * 97 | * Initializes the RNG with the given seed. 32-bit variant. 98 | */ 99 | RNG(uint32_t seed_) { 100 | seed(seed_); 101 | } 102 | 103 | /** 104 | * @brief Sets the seed of the RNG. 105 | * 106 | * Full 128-bit variant. 107 | */ 108 | void seed(uint32_t seed_a, uint32_t seed_b, uint32_t seed_c, uint32_t seed_d) { 109 | x = seed_a; 110 | y = seed_b; 111 | z = seed_c; 112 | c = seed_d; 113 | } 114 | 115 | /** 116 | * @brief Sets the seed of the RNG. 117 | * 118 | * 32-bit variant, for convenience. 119 | */ 120 | void seed(uint32_t seed_) { 121 | // Scramble up the seed with offsets and multiplications 122 | // by large primes. 123 | seed((seed_+ 5) * 3885701021, 124 | (seed_ + 43) * 653005939, 125 | (seed_ + 13) * 1264700623, 126 | (seed_ + 67) * 37452703); 127 | 128 | // Run the RNG a couple of times 129 | n(x, y, z, c); 130 | n(x, y, z, c); 131 | } 132 | 133 | /** 134 | * @brief Returns a random unsigned 32-bit integer. 135 | */ 136 | uint32_t next_uint() { 137 | return n(x, y, z, c); 138 | } 139 | 140 | /** 141 | * @brief Returns a random 32-bit float in the interval [0.0, 1.0). 142 | */ 143 | float next_float() { 144 | // The following assumes an IEEE 32-bit binary floating point format. 145 | // Alternatively, you could just do "next_uint() / 4294967296.0" which 146 | // would accomplish the same thing, albeit slower. 147 | union { 148 | float w; 149 | uint32_t a; 150 | }; 151 | a = n(x, y, z, c) >> 9; // Take upper 23 bits 152 | a |= 0x3F800000; // Make a float from bits 153 | return w-1.f; 154 | } 155 | 156 | /** 157 | * @brief Returns a random 32-bit float in the interval [-0.5, 0.5). 158 | */ 159 | float next_float_c() { 160 | return next_float() - 0.5f; 161 | } 162 | }; 163 | 164 | #endif // RNG_HPP 165 | 166 | -------------------------------------------------------------------------------- /utils/rng_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | 3 | #include "rng.hpp" 4 | 5 | /* 6 | * Test suite for the random number generator. 7 | */ 8 | TEST_CASE("RNG") { 9 | // Test to see if the RNG gives consistent results 10 | // when given the same seed 11 | SECTION("consistent") { 12 | RNG rng1; 13 | RNG rng2; 14 | bool equals = true; 15 | 16 | rng1.seed(42); 17 | rng2.seed(42); 18 | for (int i = 0; i < 100000; i++) { 19 | equals = equals && (rng1.next_uint() == rng2.next_uint()); 20 | equals = equals && (rng1.next_float() == rng2.next_float()); 21 | } 22 | 23 | REQUIRE(equals); 24 | } 25 | 26 | 27 | 28 | // Test to see if factory-spawned RNG's get different seeds 29 | SECTION("factory_seed_1") { 30 | RNG rng1; 31 | RNG rng2; 32 | bool equals = true; 33 | 34 | for (int i = 0; i < 4; i++) { 35 | equals = equals && (rng1.next_uint() == rng2.next_uint()); 36 | equals = equals && (rng1.next_float() == rng2.next_float()); 37 | } 38 | 39 | REQUIRE(!equals); 40 | } 41 | 42 | // Test to see if the factory code works properly inside object 43 | // initializations 44 | class RNGTest { 45 | public: 46 | RNG rng; 47 | RNGTest() {} 48 | }; 49 | 50 | SECTION("factory_seed_2") { 51 | RNGTest rng1; 52 | RNGTest rng2; 53 | bool equals = true; 54 | 55 | for (int i = 0; i < 4; i++) { 56 | equals = equals && (rng1.rng.next_uint() == rng2.rng.next_uint()); 57 | equals = equals && (rng1.rng.next_float() == rng2.rng.next_float()); 58 | } 59 | 60 | REQUIRE(!equals); 61 | } 62 | } 63 | 64 | -------------------------------------------------------------------------------- /utils/simd_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | #include 3 | 4 | #include "simd.hpp" 5 | 6 | using namespace SIMD; 7 | 8 | 9 | TEST_CASE("simd") { 10 | /* constructor tests */ 11 | SECTION("constructor_1") { 12 | float4 f; 13 | 14 | REQUIRE(f[0] == 0.0f); 15 | REQUIRE(f[1] == 0.0f); 16 | REQUIRE(f[2] == 0.0f); 17 | REQUIRE(f[3] == 0.0f); 18 | } 19 | 20 | SECTION("constructor_2") { 21 | float4 f(2.0f); 22 | 23 | REQUIRE(f[0] == 2.0f); 24 | REQUIRE(f[1] == 2.0f); 25 | REQUIRE(f[2] == 2.0f); 26 | REQUIRE(f[3] == 2.0f); 27 | } 28 | 29 | SECTION("constructor_3") { 30 | float4 f(1.0f, 2.0f, 3.0f, 4.0f); 31 | 32 | REQUIRE(f[0] == 1.0f); 33 | REQUIRE(f[1] == 2.0f); 34 | REQUIRE(f[2] == 3.0f); 35 | REQUIRE(f[3] == 4.0f); 36 | } 37 | 38 | SECTION("constructor_4") { 39 | float fs[4] = {1.0f, 2.0f, 3.0f, 4.0f}; 40 | float4 f(fs); 41 | 42 | REQUIRE(f[0] == 1.0f); 43 | REQUIRE(f[1] == 2.0f); 44 | REQUIRE(f[2] == 3.0f); 45 | REQUIRE(f[3] == 4.0f); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /utils/spinlock.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PSYCHOPATH_SPINLOCK_HPP 2 | #define PSYCHOPATH_SPINLOCK_HPP 3 | 4 | #include 5 | 6 | 7 | /* 8 | ** @brief A simple spinlock. 9 | * 10 | * Useful for low-contention thread syncronization, where the lock is 11 | * not held for very long compared to other work done. For locks that 12 | * are held for long periods of time, a mutex is generally better. 13 | */ 14 | class SpinLock { 15 | std::atomic_flag lock_flag {ATOMIC_FLAG_INIT}; 16 | public: 17 | /** 18 | * @brief Acquires the lock, spinning until success. 19 | */ 20 | void lock() { 21 | while (lock_flag.test_and_set(std::memory_order_acquire)); 22 | } 23 | 24 | /** 25 | * @brief Attempts to acquire the lock once, returning true on 26 | * success and false on failure. 27 | */ 28 | bool try_lock() { 29 | return !lock_flag.test_and_set(std::memory_order_acquire); 30 | } 31 | 32 | /** 33 | * @brief Releases the lock. 34 | */ 35 | void unlock() { 36 | lock_flag.clear(std::memory_order_release); 37 | } 38 | }; 39 | 40 | 41 | /** 42 | * @brief A reader-writer spinlock. 43 | * 44 | * Allows multiple readers to acquire the lock, but only one writer 45 | * at a time. Useful for cases where writers are rare compared to 46 | * readers and where the locks are generally only held for short 47 | * periods. 48 | */ 49 | class SpinLockRW { 50 | std::atomic_flag w_lock {ATOMIC_FLAG_INIT}; // Writer lock 51 | std::atomic r_lock_count {0}; // Reader lock count 52 | public: 53 | /** 54 | * @brief Acquires the writer lock, spinning until success. 55 | */ 56 | void lock_w() { 57 | while (w_lock.test_and_set(std::memory_order_acquire)); 58 | 59 | while (r_lock_count > 0); 60 | } 61 | 62 | /** 63 | * @brief Attempts to acquire the writer lock once, returning true 64 | * on success and false on failure. 65 | */ 66 | bool try_lock_w() { 67 | if (!w_lock.test_and_set(std::memory_order_acquire)) { 68 | if (r_lock_count == 0) { 69 | return true; 70 | } else { 71 | w_lock.clear(std::memory_order_release); 72 | return false; 73 | } 74 | } 75 | return false; 76 | } 77 | 78 | /** 79 | * @brief Releases the writer lock. 80 | */ 81 | void unlock_w() { 82 | w_lock.clear(std::memory_order_release); 83 | } 84 | 85 | /** 86 | * @brief Acquires a reader lock, spinning until success. 87 | */ 88 | void lock_r() { 89 | while (w_lock.test_and_set(std::memory_order_acquire)); 90 | ++r_lock_count; 91 | w_lock.clear(std::memory_order_release); 92 | } 93 | 94 | /** 95 | * @brief Attempts to acquire a reader lock once, returning true 96 | * on success and false on failure. 97 | */ 98 | bool try_lock_r() { 99 | if (w_lock.test_and_set(std::memory_order_acquire)) 100 | return false; 101 | ++r_lock_count; 102 | w_lock.clear(std::memory_order_release); 103 | return true; 104 | } 105 | 106 | /** 107 | * @brief Releases a reader lock. 108 | */ 109 | void unlock_r() { 110 | --r_lock_count; 111 | } 112 | }; 113 | 114 | #endif // PSYCHOPATH_SPINLOCK_HPP -------------------------------------------------------------------------------- /utils/stack.hpp: -------------------------------------------------------------------------------- 1 | #ifndef STACK_HPP 2 | #define STACK_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | /** 10 | * A type-erased stack that can store arrays of POD data. 11 | * 12 | * Do _not_ use this to store RAII types, as their destructors 13 | * will not be run. Also, you must keep track of the types 14 | * you store yourself. 15 | */ 16 | class Stack { 17 | std::vector data; 18 | std::vector> frames; 19 | 20 | public: 21 | Stack() = delete; 22 | Stack(size_t data_capacity, size_t reserved_frames): data(data_capacity) { 23 | frames.reserve(reserved_frames+1); 24 | frames.emplace_back(std::make_pair(&(data[0]), &(data[0]))); 25 | } 26 | 27 | 28 | /** 29 | * Pushes space for element_count items or type T, and returns pointers to 30 | * the beginning and just-past-the-end of the resulting array. 31 | */ 32 | template 33 | std::pair push_frame(size_t element_count) { 34 | // Figure out how much padding we need between elements for proper 35 | // memory alignment if we put them in an array. 36 | constexpr auto array_pad = (alignof(T) - (sizeof(T) % alignof(T))) % alignof(T); 37 | 38 | // Total needed bytes for the requested array of data 39 | const auto needed_bytes = (sizeof(T) * element_count) + (array_pad * (element_count - 1)); 40 | 41 | // Figure out how much padding we need at the beginning to put the 42 | // first element in the right place for memory alignment. 43 | const auto mem_addr = reinterpret_cast(frames.back().second); 44 | const auto begin_pad = (alignof(T) - (mem_addr % alignof(T))) % alignof(T); 45 | 46 | // Push onto the stack 47 | char* begin = reinterpret_cast(mem_addr) + begin_pad; 48 | auto end = begin + needed_bytes; 49 | frames.emplace_back(std::make_pair(begin, end)); 50 | 51 | return std::make_pair(reinterpret_cast(begin), reinterpret_cast(end)); 52 | } 53 | 54 | /** 55 | * Returns the top frame, as pointers with the specified type T. 56 | */ 57 | template 58 | std::pair top_frame() { 59 | return std::make_pair(reinterpret_cast(frames.back().first), reinterpret_cast(frames.back().second)); 60 | } 61 | 62 | /** 63 | * Returns a frame walking backwards from the top frame. Zero means the 64 | * top frame. 65 | */ 66 | template 67 | std::pair prev_frame(size_t i) { 68 | assert(i < frames.size()); 69 | const auto i2 = frames.size() - i - 1; 70 | return std::make_pair(reinterpret_cast(frames[i2].first), reinterpret_cast(frames[i2].second)); 71 | } 72 | 73 | /** 74 | * Pops the top frame off the stack. 75 | * 76 | * This invalidates any pointers to that stack frame's memory, as that 77 | * memory may be used again for a subsequent stack frame push. 78 | */ 79 | void pop_frame() { 80 | frames.pop_back(); 81 | } 82 | 83 | /** 84 | * Clears the stack, as if no pushes had ever taken place. 85 | */ 86 | void clear() { 87 | frames.clear(); 88 | frames.emplace_back(std::make_pair(&(data[0]), &(data[0]))); 89 | } 90 | }; 91 | 92 | #endif // STACK_HPP -------------------------------------------------------------------------------- /utils/stack_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | 3 | #include "stack.hpp" 4 | 5 | struct alignas(64) Yar { 6 | int a, b; 7 | }; 8 | 9 | TEST_CASE("Stack") { 10 | SECTION("ints") { 11 | Stack s(1024, 64); 12 | 13 | auto f = s.push_frame(4); 14 | f.first[0] = 0; 15 | f.first[1] = 1; 16 | f.first[2] = 2; 17 | f.first[3] = 3; 18 | 19 | f = s.push_frame(4); 20 | f.first[0] = 4; 21 | f.first[1] = 5; 22 | f.first[2] = 6; 23 | f.first[3] = 7; 24 | 25 | auto tf = s.top_frame(); 26 | 27 | REQUIRE(tf.first[0] == 4); 28 | REQUIRE(tf.first[1] == 5); 29 | REQUIRE(tf.first[2] == 6); 30 | REQUIRE(tf.first[3] == 7); 31 | 32 | s.pop_frame(); 33 | 34 | tf = s.top_frame(); 35 | 36 | REQUIRE(tf.first[0] == 0); 37 | REQUIRE(tf.first[1] == 1); 38 | REQUIRE(tf.first[2] == 2); 39 | REQUIRE(tf.first[3] == 3); 40 | 41 | s.pop_frame(); 42 | } 43 | 44 | SECTION("alignment") { 45 | Stack s(1024, 64); 46 | 47 | s.push_frame(1); 48 | 49 | auto f = s.push_frame(4); 50 | 51 | auto tf = s.top_frame(); 52 | 53 | REQUIRE((reinterpret_cast(f.first) % 64) == 0); 54 | REQUIRE((reinterpret_cast(tf.second) % 64) == 0); 55 | REQUIRE(&(tf.first[4]) == tf.second); 56 | } 57 | } -------------------------------------------------------------------------------- /utils/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PSYCHOPATH_TIMER_HPP 2 | #define PSYCHOPATH_TIMER_HPP 3 | 4 | #include 5 | 6 | template 7 | class Timer { 8 | std::chrono::time_point start {CLOCK::now()}; 9 | 10 | public: 11 | /** 12 | * Reports the time elapsed so far in seconds. 13 | */ 14 | float time() { 15 | const auto end = CLOCK::now(); 16 | const float dur = static_cast((end-start).count()); 17 | return (dur * CLOCK::period::num) / CLOCK::period::den; 18 | } 19 | 20 | void reset() { 21 | start = CLOCK::now(); 22 | } 23 | }; 24 | 25 | #endif // PSYCHOPATH_TIMER_HPP -------------------------------------------------------------------------------- /utils/utils_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | 3 | #include 4 | #include "utils.hpp" 5 | 6 | TEST_CASE("mutable_partition") { 7 | SECTION("already_partitioned") { 8 | std::vector v {1, 1, 1, 1, 2, 2, 2, 2}; 9 | 10 | auto p = mutable_partition(v.begin(), v.end(), [](int& i) { 11 | return i == 1; 12 | }); 13 | 14 | REQUIRE(p == v.begin() + 4); 15 | REQUIRE(v[0] == 1); 16 | REQUIRE(v[1] == 1); 17 | REQUIRE(v[2] == 1); 18 | REQUIRE(v[3] == 1); 19 | REQUIRE(v[4] == 2); 20 | REQUIRE(v[5] == 2); 21 | REQUIRE(v[6] == 2); 22 | REQUIRE(v[7] == 2); 23 | } 24 | 25 | SECTION("reverse") { 26 | std::vector v {2, 2, 2, 2, 1, 1, 1, 1}; 27 | 28 | auto p = mutable_partition(v.begin(), v.end(), [](int& i) { 29 | return i == 1; 30 | }); 31 | 32 | REQUIRE(p == v.begin() + 4); 33 | REQUIRE(v[0] == 1); 34 | REQUIRE(v[1] == 1); 35 | REQUIRE(v[2] == 1); 36 | REQUIRE(v[3] == 1); 37 | REQUIRE(v[4] == 2); 38 | REQUIRE(v[5] == 2); 39 | REQUIRE(v[6] == 2); 40 | REQUIRE(v[7] == 2); 41 | } 42 | 43 | SECTION("interleaved") { 44 | std::vector v {2, 1, 2, 1, 2, 1, 2, 1}; 45 | 46 | auto p = mutable_partition(v.begin(), v.end(), [](int& i) { 47 | return i == 1; 48 | }); 49 | 50 | REQUIRE(p == v.begin() + 4); 51 | REQUIRE(v[0] == 1); 52 | REQUIRE(v[1] == 1); 53 | REQUIRE(v[2] == 1); 54 | REQUIRE(v[3] == 1); 55 | REQUIRE(v[4] == 2); 56 | REQUIRE(v[5] == 2); 57 | REQUIRE(v[6] == 2); 58 | REQUIRE(v[7] == 2); 59 | } 60 | 61 | SECTION("all_true") { 62 | std::vector v {1, 1, 1, 1, 1, 1, 1, 1}; 63 | 64 | auto p = mutable_partition(v.begin(), v.end(), [](int& i) { 65 | return i == 1; 66 | }); 67 | 68 | REQUIRE(p == v.end()); 69 | REQUIRE(v[0] == 1); 70 | REQUIRE(v[1] == 1); 71 | REQUIRE(v[2] == 1); 72 | REQUIRE(v[3] == 1); 73 | REQUIRE(v[4] == 1); 74 | REQUIRE(v[5] == 1); 75 | REQUIRE(v[6] == 1); 76 | REQUIRE(v[7] == 1); 77 | } 78 | 79 | SECTION("all_false") { 80 | std::vector v {2, 2, 2, 2, 2, 2, 2, 2}; 81 | 82 | auto p = mutable_partition(v.begin(), v.end(), [](int& i) { 83 | return i == 1; 84 | }); 85 | 86 | REQUIRE(p == v.begin()); 87 | REQUIRE(v[0] == 2); 88 | REQUIRE(v[1] == 2); 89 | REQUIRE(v[2] == 2); 90 | REQUIRE(v[3] == 2); 91 | REQUIRE(v[4] == 2); 92 | REQUIRE(v[5] == 2); 93 | REQUIRE(v[6] == 2); 94 | REQUIRE(v[7] == 2); 95 | } 96 | 97 | SECTION("predicate_run_once_per_element") { 98 | std::vector v {2, 1, 2, 1, 2, 1, 2, 1}; 99 | int n = 0; 100 | 101 | auto p = mutable_partition(v.begin(), v.end(), [&](int& i) { 102 | ++n; 103 | return i == 1; 104 | }); 105 | 106 | REQUIRE(n == 8); 107 | REQUIRE(p == v.begin() + 4); 108 | REQUIRE(v[0] == 1); 109 | REQUIRE(v[1] == 1); 110 | REQUIRE(v[2] == 1); 111 | REQUIRE(v[3] == 1); 112 | REQUIRE(v[4] == 2); 113 | REQUIRE(v[5] == 2); 114 | REQUIRE(v[6] == 2); 115 | REQUIRE(v[7] == 2); 116 | } 117 | } --------------------------------------------------------------------------------