├── .gitignore
├── CMakeLists.txt
├── README.md
├── TODO
├── accel
    ├── CMakeLists.txt
    ├── accel.hpp
    ├── bvh.cpp
    ├── bvh.hpp
    ├── bvh2.cpp
    ├── bvh2.hpp
    ├── bvh4.cpp
    ├── bvh4.hpp
    ├── light_accel.hpp
    ├── light_array.cpp
    ├── light_array.hpp
    ├── light_tree.cpp
    └── light_tree.hpp
├── basics
    ├── bbox.hpp
    ├── bbox2_test.cpp
    ├── bbox4_test.cpp
    ├── bbox_test.cpp
    ├── camera.hpp
    ├── differential_geometry.hpp
    ├── instance_id.hpp
    ├── instance_id_test.cpp
    ├── intersection.hpp
    └── ray.hpp
├── cmake
    └── FindIlmBase.cmake
├── color
    ├── color.hpp
    ├── spectra_xyz_5nm_380_780_0.97.h
    └── spectrum_grid.h
├── config.cpp
├── config.h.in
├── config.hpp
├── docs
    └── scene_format_example.psy
├── film
    ├── film.hpp
    └── raster.hpp
├── format_code.sh
├── global.cpp
├── global.hpp
├── integrator
    ├── CMakeLists.txt
    ├── integrator.hpp
    ├── path_trace_integrator.cpp
    └── path_trace_integrator.hpp
├── lights
    ├── light.hpp
    ├── point_light.hpp
    ├── rectangle_light.hpp
    └── sphere_light.hpp
├── main.cpp
├── math
    ├── CMakeLists.txt
    ├── matrix.hpp
    ├── matrix_test.cpp
    ├── transform.hpp
    ├── vector.hpp
    └── vector_test.cpp
├── object
    ├── CMakeLists.txt
    ├── bicubic.cpp
    ├── bicubic.hpp
    ├── bilinear.cpp
    ├── bilinear.hpp
    ├── object.hpp
    ├── patch_utils.hpp
    ├── sphere.cpp
    ├── sphere.hpp
    ├── subdivision_surface.cpp
    └── subdivision_surface.hpp
├── parser
    ├── CMakeLists.txt
    ├── data_tree.cpp
    ├── data_tree.hpp
    ├── parser.cpp
    ├── parser.hpp
    └── utf8.hpp
├── psychoblend
    ├── __init__.py
    ├── psy_export.py
    ├── render.py
    └── ui.py
├── renderer
    ├── CMakeLists.txt
    ├── renderer.cpp
    └── renderer.hpp
├── sampling
    ├── CMakeLists.txt
    ├── halton.cpp
    ├── halton.hpp
    ├── halton.py
    ├── image_sampler.cpp
    ├── image_sampler.hpp
    ├── samples.hpp
    ├── sobol.cpp
    └── sobol.hpp
├── scene
    ├── assembly.hpp
    └── scene.hpp
├── shading
    ├── closure_union.hpp
    ├── displacement_shader.hpp
    ├── surface_closure.hpp
    └── surface_shader.hpp
├── test
    ├── catch.hpp
    ├── test.hpp
    ├── test_float.cpp
    └── test_main.cpp
├── tracer
    ├── CMakeLists.txt
    ├── potentialinter.hpp
    ├── tracer.cpp
    └── tracer.hpp
└── utils
    ├── bit_stack.hpp
    ├── blocked_array.hpp
    ├── blocked_array_disk_cache.hpp
    ├── chunked_array.hpp
    ├── chunked_array_test.cpp
    ├── counting_sort.hpp
    ├── disk_cache.hpp
    ├── disk_cache_test.cpp
    ├── hash.hpp
    ├── hilbert.hpp
    ├── job_queue.hpp
    ├── job_queue_test.cpp
    ├── low_level.hpp
    ├── lru_cache.hpp
    ├── mis.hpp
    ├── monte_carlo.hpp
    ├── morton.hpp
    ├── numtype.h
    ├── range.hpp
    ├── ring_buffer.hpp
    ├── ring_buffer_atomic.hpp
    ├── ring_buffer_concurrent.hpp
    ├── ring_buffer_concurrent_test.cpp
    ├── ring_buffer_test.cpp
    ├── rng.hpp
    ├── rng_test.cpp
    ├── simd.hpp
    ├── simd_test.cpp
    ├── spinlock.hpp
    ├── stack.hpp
    ├── stack_test.cpp
    ├── timer.hpp
    ├── utils.hpp
    └── utils_test.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Auto-formatting backups
 2 | *.orig
 3 | *.orig~
 4 | 
 5 | # Python Byte-compiled / optimized / DLL files
 6 | __pycache__/
 7 | *.py[cod]
 8 | 
 9 | .zedstate
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Note
 2 | ====
 3 | This repo is the old version of Psychopath, and no further development is happening here.  The new repo is at https://github.com/cessen/psychopath
 4 | 
 5 | Overview
 6 | ========
 7 | 
 8 | Psychopath is a path tracer, aimed at rendering animations and VFX for
 9 | film.  It is currently still in an early prototyping stage of development.  You
10 | can view a brief video of some animations rendered with it in
11 | [this (now somewhat out-dated)
12 | video](https://www.youtube.com/watch?v=rydLFAdhseo).
13 | 
14 | The long-term goals of Psychopath are to support efficient global illumination
15 | rendering of scenes that are significantly larger than available RAM and/or
16 | that contain procedural elements that need to be generated on-the-fly during
17 | rendering.
18 | 
19 | The approach that Psychopath takes to enable this is to try to access the scene
20 | data in as coherent a fashion as possible via breadth-first ray tracing,
21 | allowing the cost of HDD access, expensive procedurals, etc. to be amortized
22 | over large batches of rays.
23 | 
24 | In its current state this principle and its effectiveness are demonstrated by
25 | by refining geometry to sub-pixel microgeometry on the fly during the rendering process, somewhat analogous to the Reyes rendering architecture.  Even with geometry caching completely disabled, Psychopath is able to render using this technique very efficiently.
26 | 
27 | Current Features
28 | ----------------
29 | - Spheres
30 | - Bilinear patches
31 | - Bicubic bezier patches
32 | - Catmull-Clark subdivision surfaces
33 | - Spherical light sources
34 | - Rectangular light sources
35 | - Simple shader system (assign BSDF's to objects)
36 | - Multiple importance sampling
37 | - Spectral rendering (via monte carlo, not binning)
38 | - Focal blur / DoF
39 | - Camera motion blur
40 | - Deformation motion blur
41 | - Transforms and transform motion blur
42 | - Full hierarchical instancing
43 | 
44 | Features Currently In-Progress
45 | ------------------------------
46 | - A novel method for efficiently handling many (i.e. thousands or millions) of
47 |   lights in a scene.  See [this thread](http://ompf2.com/viewtopic.php?f=3&t=1938) for an overview.
48 | 
49 | 
50 | 
51 | PsychoBlend
52 | ===========
53 | 
54 | Included in the repository is an addon for [Blender](http://www.blender.org)
55 | called "PsychoBlend" that allows you to do basic rendering of Blender scenes
56 | with Psychopath.  Most Blender features are not yet supported.
57 | 
58 | Features Supported
59 | ------------------
60 | - Quad mesh export as bilinear patches
61 | - Mesh export as Catmull-Clark subdivision surfaces (when marked as such)
62 | - Point lights exported as spherical lights (point lights have a "radius" setting)
63 | - Area lights, exported as rectangular area lights
64 | - Simple material system for assigning different BSDF's to different objects
65 | - Focal blur / DoF
66 | - Camera motion blur
67 | - Transform motion blur
68 | - Deformation motion blur
69 | - Exports dupligroups with full hierarchical instancing
70 | - Limited auto-detected instancing of meshes
71 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
 1 | - Bugs:
 2 | 	//- Very small sphere lights introduce severe banding in light attentuation.
 3 | 	- Some scenes hang when rendering at high sample rates.
 4 | 
 5 | - Iterative intersections:
 6 |     - Displacement tests
 7 | 
 8 | - Unit tests:
 9 | 	- BitStack
10 | 	- BitStack2
11 | 
12 | - Ray stream tracing:
13 | 	- Optimizations...
14 | 
15 | //- Multiple importance sampling
16 | 
17 | - LightTree:
18 |     - Investigate using importance resampling along with leaf nodes that have
19 |       more than one light in them.
20 |     - Investigate having the BSDF's provide methods for working with the
21 |       LightTree, to better optimize light selection based on the BSDF.
22 | 
23 | //- Hero wavelength spectral rendering
24 | 
25 | - Change assemblies to use a memory arena for object/shader/etc. data.
26 | 
27 | - Light sources:
28 |     - Infinite lights (e.g. sun lights) will be considered part of the background,
29 |       along with e.g. HDRI lighting backgrounds.
30 |     - Point lights should be a special-case of sphere lights, with radius zero.
31 |     - Spot lights
32 |     //- Rectangular area lights
33 |     - A way to mark other objects as light sources, for MIS (this can't be done
34 |       automatically in the general case...?)
35 | 
36 | - Parsing:
37 | 	- Give useful error messages...
38 | 	- Support binary geometry files of some kind, for faster scene loading
39 | 
40 | - PsychoBlend:
41 | 	- Allow per-object motion blur segment specification
42 | 
43 | - Shading system
44 | 	//- Stupid simple shaders first
45 | 	- A few hard-coded displacement shaders, to verify the ideas work (e.g.
46 | 	  with interval arithmetic).
47 | 	- Then use OSL
48 | 
49 | - More geometry types:
50 | 	- Catmull-clark subdivision surfaces:
51 | 	    //- Basic proof-of-concept implementation using OpenSubdiv
52 | 	    //- Deformation motion blur support
53 | 	    //- BVH acceleration
54 | 	    - Face-varying data support
55 | 	- Triangle meshes
56 | 
57 | - Film class:
58 | 	- Make film class more data-type agnostic.  It should be the responsibility
59 | 	  of the Renderer and Integrator to make sure pixel data is interpretted and
60 | 	  used correctly.
61 | 
62 | - Cleanup:
63 | 	- Add NaN and Inf catching to key places in code
64 | 


--------------------------------------------------------------------------------
/accel/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(accel
2 |     bvh bvh2 bvh4 light_array light_tree)
3 | 


--------------------------------------------------------------------------------
/accel/accel.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef ACCEL_HPP
 2 | #define ACCEL_HPP
 3 | 
 4 | #include "numtype.h"
 5 | #include "ray.hpp"
 6 | #include "object.hpp"
 7 | #include "bbox.hpp"
 8 | 
 9 | #include <vector>
10 | #include <tuple>
11 | #include <memory>
12 | 
13 | // Forward declaration of Assembly from scene/assembly.hpp
14 | class Assembly;
15 | 
16 | /**
17 |  * @brief An acceleration structure for a scene hierarchy.
18 |  *
19 |  * This pure virtual class should never be used directly.  It's only purpose
20 |  * is to enforce an interface for classes that inherit from it.
21 |  */
22 | class Accel {
23 | public:
24 | 	virtual ~Accel() {}
25 | 
26 | 	/**
27 | 	 * @brief Builds the acceleration structure from the given assembly.
28 | 	 */
29 | 	virtual void build(const Assembly& assembly) = 0;
30 | 
31 | 	/**
32 | 	 * @brief Returns the spatial bounds of the acceleration structure.
33 | 	 *
34 | 	 * Should not be called until after build() is called.
35 | 	 */
36 | 	virtual const std::vector<BBox>& bounds() const = 0;
37 | };
38 | 
39 | 
40 | /**
41 |  * @brief An acceleration structure traverser that traverses with many rays at once
42 |  * in a breadth-first fashion.
43 |  *
44 |  * This pure virtual template class should never be used directly.  It's only purpose
45 |  * is to enforce an interface for classes that inherit from it.
46 |  */
47 | template <typename T>
48 | class AccelStreamTraverser {
49 | public:
50 | 	virtual ~AccelStreamTraverser() {}
51 | 
52 | 	/**
53 | 	 * @brief Initializes the traverser for traversing the given
54 | 	 * acceleration structure.
55 | 	 */
56 | 	virtual void init_accel(const T& accel) = 0;
57 | 
58 | 	/**
59 | 	 * @brief Initializes the traverser for traversing with
60 | 	 * the given WorldRays.
61 | 	 *
62 | 	 * This resets any traversal already in progress.
63 | 	 */
64 | 	virtual void init_rays(Ray* begin, Ray* end) = 0;
65 | 
66 | 	/**
67 | 	 * @brief Traverses to the next relevant object.
68 | 	 *
69 | 	 * Returns a tuple with a pair of iterators to the begin and end of the
70 | 	 * relevant Rays, and an index to the object instance they need to be
71 | 	 * tested against.
72 | 	 *
73 | 	 * When traversal is complete, begin == end and object == 0.
74 | 	 */
75 | 	virtual std::tuple<Ray*, Ray*, size_t>
76 | 	next_object() = 0;
77 | };
78 | 
79 | #endif // ACCEL_HPP
80 | 


--------------------------------------------------------------------------------
/accel/bvh.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef BVH_HPP
  2 | #define BVH_HPP
  3 | 
  4 | #include <stdlib.h>
  5 | #include <iostream>
  6 | #include <vector>
  7 | #include <memory>
  8 | 
  9 | #include "numtype.h"
 10 | #include "global.hpp"
 11 | 
 12 | #include "accel.hpp"
 13 | #include "object.hpp"
 14 | #include "ray.hpp"
 15 | #include "bbox.hpp"
 16 | #include "utils.hpp"
 17 | #include "vector.hpp"
 18 | 
 19 | 
 20 | 
 21 | /*
 22 |  * A bounding volume hierarchy.
 23 |  */
 24 | class BVH: public Accel {
 25 | 	std::vector<BBox> _bounds {BBox()};
 26 | public:
 27 | 	virtual ~BVH() {};
 28 | 	virtual void build(const Assembly& assembly);
 29 | 	virtual const std::vector<BBox>& bounds() const {
 30 | 		return _bounds;
 31 | 	};
 32 | 
 33 | 	// Traversers need access to private data
 34 | 	friend class BVHStreamTraverser;
 35 | 
 36 | 	enum {
 37 | 		IS_LEAF = 1 << 0
 38 | 	};
 39 | 
 40 | 	/*
 41 | 	 * A node of a bounding volume hierarchy.
 42 | 	 * Contains a bounding box, a flag for whether
 43 | 	 * it's a leaf or not, a pointer to its first
 44 | 	 * child, and it's data if it's a leaf.
 45 | 	 */
 46 | 	struct Node {
 47 | 		size_t bbox_index = 0;
 48 | 		union {
 49 | 			size_t child_index = 0;
 50 | 			size_t data_index;
 51 | 		};
 52 | 		size_t parent_index = 0;
 53 | 		uint16_t ts = 0; // Time sample count
 54 | 		uint16_t flags = 0;
 55 | 	};
 56 | 
 57 | 	/*
 58 | 	 * Used to store objects that have yet to be
 59 | 	 * inserted into the hierarchy.
 60 | 	 * Contains the time 0.5 bounds of the object and it's centroid.
 61 | 	 */
 62 | 	struct BVHPrimitive {
 63 | 		size_t instance_index;
 64 | 		Vec3 bmin, bmax, c;
 65 | 	};
 66 | 
 67 | public:
 68 | 	// This stuff is public because BVH is used as the basis
 69 | 	// for building other BVH's like BVH2 and BVH4, and they need
 70 | 	// direct access.
 71 | 	std::vector<Node> nodes;
 72 | 	std::vector<BBox> bboxes;
 73 | 
 74 | 	/**
 75 | 	 * @brief Returns the index of the first child
 76 | 	 * of the node with the given index.
 77 | 	 */
 78 | 	inline size_t child1(const size_t node_i) const {
 79 | 		return node_i + 1;
 80 | 	}
 81 | 
 82 | 	/**
 83 | 	 * @brief Returns the index of the second child
 84 | 	 * of the node with the given index.
 85 | 	 */
 86 | 	inline size_t child2(const size_t node_i) const {
 87 | 		return nodes[node_i].child_index;
 88 | 	}
 89 | 
 90 | 	/**
 91 | 	 * @brief Returns the index of the sibling
 92 | 	 * of the node with the given index.
 93 | 	 */
 94 | 	inline size_t sibling(const size_t node_i) const {
 95 | 		const size_t parent_i = nodes[node_i].parent_index;
 96 | 		if (node_i == (parent_i + 1))
 97 | 			return nodes[parent_i].child_index;
 98 | 		else
 99 | 			return parent_i + 1;
100 | 	}
101 | 
102 | 	inline bool is_leaf(const size_t node_i) const {
103 | 		return nodes[node_i].flags & IS_LEAF;
104 | 	}
105 | 
106 | private:
107 | 	const Assembly* assembly; // Set during build()
108 | 	//std::vector<BBox> bbox;
109 | 	std::vector<BVHPrimitive> bag;  // Temporary holding spot for objects not yet added to the hierarchy
110 | 
111 | 	bool finalize();
112 | 
113 | 	/**
114 | 	 * @brief Tests whether a ray intersects a node or not.
115 | 	 */
116 | 	inline bool intersect_node(const uint64_t node_i, const Ray& ray, float *near_t, float *far_t) const {
117 | #ifdef GLOBAL_STATS_TOP_LEVEL_BVH_NODE_TESTS
118 | 		Global::Stats::top_level_bvh_node_tests++;
119 | #endif
120 | 		const Node& node = nodes[node_i];
121 | 		const BBox b = lerp_seq(ray.time, bboxes.cbegin() + node.bbox_index, bboxes.cbegin() + node.bbox_index + node.ts);
122 | 		return b.intersect_ray(ray, near_t, far_t, ray.max_t);
123 | 	}
124 | 
125 | 	size_t split_primitives(size_t first_prim, size_t last_prim);
126 | 	size_t recursive_build(size_t parent, size_t first_prim, size_t last_prim);
127 | };
128 | 
129 | 
130 | 
131 | /**
132 |  * @brief A breadth-first traverser for BVH.
133 |  */
134 | class BVHStreamTraverser: public AccelStreamTraverser<BVH> {
135 | public:
136 | 	virtual ~BVHStreamTraverser() {}
137 | 
138 | 	virtual void init_accel(const BVH& accel) {
139 | 		bvh = &accel;
140 | 	}
141 | 
142 | 	virtual void init_rays(Ray* begin, Ray* end) {
143 | 		rays = begin;
144 | 		rays_end = end;
145 | 
146 | 		// Initialize stack
147 | 		stack_ptr = 0;
148 | 		node_stack[0] = 0;
149 | 		ray_stack[0].first = rays;
150 | 		ray_stack[0].second = rays_end;
151 | 	}
152 | 
153 | 	virtual std::tuple<Ray*, Ray*, size_t> next_object();
154 | 
155 | private:
156 | 	const BVH* bvh = nullptr;
157 | 	Ray* rays = nullptr;
158 | 	Ray* rays_end = nullptr;
159 | 
160 | 	// Stack data
161 | #define BVHST_STACK_SIZE 64
162 | 	int stack_ptr;
163 | 	size_t node_stack[BVHST_STACK_SIZE];
164 | 	std::pair<Ray*, Ray*> ray_stack[BVHST_STACK_SIZE];
165 | 
166 | };
167 | 
168 | 
169 | #endif // BVH_HPP
170 | 


--------------------------------------------------------------------------------
/accel/bvh2.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <algorithm>
  3 | #include <memory>
  4 | #include <tuple>
  5 | #include <iterator>
  6 | #include <cmath>
  7 | 
  8 | #include "numtype.h"
  9 | #include "bvh2.hpp"
 10 | 
 11 | #include "simd.hpp"
 12 | #include "ray.hpp"
 13 | #include "assembly.hpp"
 14 | #include "utils.hpp"
 15 | 
 16 | 
 17 | 
 18 | void BVH2::build(const Assembly& assembly) {
 19 | 	// Build a normal BVH as a starting point
 20 | 	BVH bvh;
 21 | 	bvh.build(assembly);
 22 | 
 23 | 	if (bvh.nodes.size() == 0)
 24 | 		return;
 25 | 
 26 | 	// Pack BVH into more efficient BVH2
 27 | 	nodes.push_back(Node());
 28 | 	for (size_t bni = 0; bni < bvh.nodes.size(); ++bni) {
 29 | 		BVH::Node& bn = bvh.nodes[bni];
 30 | 		size_t ni = nodes.size() - 1; // Node index
 31 | 
 32 | 		// Set the values that don't depend on whether this
 33 | 		// is a leaf node or not.
 34 | 		if (bn.flags & IS_RIGHT)
 35 | 			nodes[bn.parent_index].child_index = ni;  // Set parent's child_index field to point to this
 36 | 
 37 | 		// Set the values that _do_ depend on whether this is
 38 | 		// a leaf node or not.
 39 | 		if (bn.flags & BVH::IS_LEAF) {
 40 | 			nodes[ni].child_index = 0; // Indicates that this is a leaf node
 41 | 			nodes[ni].data_index = bn.data_index;
 42 | 			nodes.push_back(Node());
 43 | 		} else {
 44 | 			BVH::Node& child1 = bvh.nodes[bvh.child1(bni)];
 45 | 			BVH::Node& child2 = bvh.nodes[bvh.child2(bni)];
 46 | 
 47 | 			// Let right child know that it's right
 48 | 			child2.flags |= IS_RIGHT;
 49 | 
 50 | 			// Set the parent index fields in the child build nodes
 51 | 			// to refer to the parent Node instead of the parent BVH::Node
 52 | 			child1.parent_index = ni;
 53 | 			child2.parent_index = ni;
 54 | 
 55 | 			// If children have same number of time samples, easy
 56 | 			if (child1.ts == child2.ts) {
 57 | 				nodes[ni].ts = child1.ts;
 58 | 				for (uint16_t i = 0; i < child1.ts; ++i) {
 59 | 					nodes.back().bounds = BBox2(bvh.bboxes[child1.bbox_index+i], bvh.bboxes[child2.bbox_index+i]);
 60 | 					nodes.push_back(Node());
 61 | 				}
 62 | 			}
 63 | 			// If children have different number of time samples,
 64 | 			// interpolate one or the other
 65 | 			else if (child1.ts > child2.ts) {
 66 | 				nodes[ni].ts = child1.ts;
 67 | 				const float s = child1.ts - 1;
 68 | 				auto cbegin = bvh.bboxes.cbegin() + child2.bbox_index;
 69 | 				auto cend = cbegin + child2.ts;
 70 | 
 71 | 				for (uint16_t i = 0; i < child1.ts; ++i) {
 72 | 					nodes.back().bounds = BBox2(bvh.bboxes[child1.bbox_index+i], lerp_seq(i/s, cbegin, cend));
 73 | 					nodes.push_back(Node());
 74 | 				}
 75 | 			} else {
 76 | 				nodes[ni].ts = child2.ts;
 77 | 				const float s = child2.ts - 1;
 78 | 				auto cbegin = bvh.bboxes.cbegin() + child1.bbox_index;
 79 | 				auto cend = cbegin + child1.ts;
 80 | 
 81 | 				for (uint16_t i = 0; i < child2.ts; ++i) {
 82 | 					nodes.back().bounds = BBox2(lerp_seq(i/s, cbegin, cend), bvh.bboxes[child2.bbox_index+i]);
 83 | 					nodes.push_back(Node());
 84 | 				}
 85 | 			}
 86 | 		}
 87 | 	}
 88 | 
 89 | 	// Store top-level bounds
 90 | 	auto begin = bvh.bboxes.begin() + bvh.nodes[0].bbox_index;
 91 | 	auto end = begin + bvh.nodes[0].ts;
 92 | 	_bounds.clear();
 93 | 	_bounds.insert(_bounds.begin(), begin, end);
 94 | }
 95 | 
 96 | 
 97 | 
 98 | std::tuple<Ray*, Ray*, size_t> BVH2StreamTraverser::next_object() {
 99 | 	while (stack_ptr >= 0) {
100 | 		if (bvh->is_leaf(node_stack[stack_ptr])) {
101 | 			ray_stack[stack_ptr].second = mutable_partition(ray_stack[stack_ptr].first, ray_stack[stack_ptr].second, [&](Ray& ray) {
102 | 				return !ray.is_done() && (first_call || ray.trav_stack.pop());
103 | 			});
104 | 
105 | 			if (std::distance(ray_stack[stack_ptr].first, ray_stack[stack_ptr].second) > 0) {
106 | 				auto rv = std::make_tuple(&(*ray_stack[stack_ptr].first), &(*ray_stack[stack_ptr].second), bvh->nodes[node_stack[stack_ptr]].data_index);
107 | 				--stack_ptr;
108 | 				return rv;
109 | 			} else {
110 | 				--stack_ptr;
111 | 			}
112 | 		} else {
113 | 			const auto cbegin = bvh->nodes.cbegin() + node_stack[stack_ptr];
114 | 			const auto cend = cbegin + bvh->nodes[node_stack[stack_ptr]].ts;
115 | 
116 | 			SIMD::float4 near_hits;
117 | 			bool flip_set = false;
118 | 			bool flip = false;
119 | 
120 | 			// Test rays against current node's children
121 | 			ray_stack[stack_ptr].second = mutable_partition(ray_stack[stack_ptr].first, ray_stack[stack_ptr].second, [&](Ray& ray) {
122 | 				if (!ray.is_done() && (first_call || ray.trav_stack.pop())) {
123 | 					// Get the time-interpolated bounding box
124 | 					const BBox2 b = lerp_seq(ray.time, cbegin, cend).bounds;
125 | 
126 | 					// Ray test
127 | 					const auto hit_mask = b.intersect_ray(ray, &near_hits);
128 | 
129 | 					if (hit_mask != 0) {
130 | 						if (!flip_set) {
131 | 							flip_set = true;
132 | 							flip = near_hits[0] > near_hits[1];
133 | 						}
134 | 
135 | 						if (flip)
136 | 							ray.trav_stack.push((hit_mask >> 1) | (hit_mask << 1), 2);
137 | 						else
138 | 							ray.trav_stack.push(hit_mask, 2);
139 | 					}
140 | 
141 | 					return hit_mask != 0;
142 | 				} else {
143 | 					return false;
144 | 				}
145 | 			});
146 | 
147 | 			if (first_call)
148 | 				first_call = false;
149 | 
150 | 			// If any rays hit, traverse deeper
151 | 			if (std::distance(ray_stack[stack_ptr].first, ray_stack[stack_ptr].second) > 0) {
152 | 				ray_stack[stack_ptr+1] = ray_stack[stack_ptr];
153 | 
154 | 				if (flip) {
155 | 					node_stack[stack_ptr+1] = bvh->child2(node_stack[stack_ptr]);
156 | 					node_stack[stack_ptr] = bvh->child1(node_stack[stack_ptr]);
157 | 				} else {
158 | 					node_stack[stack_ptr+1] = bvh->child1(node_stack[stack_ptr]);
159 | 					node_stack[stack_ptr] = bvh->child2(node_stack[stack_ptr]);
160 | 				}
161 | 
162 | 				++stack_ptr;
163 | 			}
164 | 			// If no rays hit, go to next stack item
165 | 			else {
166 | 				--stack_ptr;
167 | 			}
168 | 		}
169 | 	}
170 | 
171 | 	// Finished traversal
172 | 	return std::make_tuple(rays_end, rays_end, 0);
173 | }


--------------------------------------------------------------------------------
/accel/bvh2.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef BVH2_HPP
  2 | #define BVH2_HPP
  3 | 
  4 | #include <stdlib.h>
  5 | #include <iostream>
  6 | #include <vector>
  7 | #include <deque>
  8 | #include <memory>
  9 | #include <tuple>
 10 | 
 11 | #include "numtype.h"
 12 | #include "global.hpp"
 13 | 
 14 | #include "accel.hpp"
 15 | #include "bvh.hpp"
 16 | #include "object.hpp"
 17 | #include "ray.hpp"
 18 | #include "bbox.hpp"
 19 | #include "utils.hpp"
 20 | #include "vector.hpp"
 21 | 
 22 | 
 23 | 
 24 | 
 25 | /*
 26 |  * A bounding volume hierarchy.
 27 |  */
 28 | class BVH2: public Accel {
 29 | public:
 30 | 	virtual void build(const Assembly& assembly);
 31 | 	virtual const std::vector<BBox>& bounds() const {
 32 | 		return _bounds;
 33 | 	};
 34 | 	virtual ~BVH2() {};
 35 | 
 36 | 	// Traversers need access to private data
 37 | 	friend class BVH2StreamTraverser;
 38 | 
 39 | 	struct alignas(16) Node {
 40 | 	    union {
 41 | 	        // If the node is a leaf, we don't need the bounds.
 42 | 	        // If the node is not a leaf, it doesn't have Primitive data.
 43 | 	        BBox2 bounds {BBox(), BBox()};
 44 | 	        size_t data_index;
 45 | 	    };
 46 | 	    size_t child_index = 0; // When zero, indicates that this is a leaf node
 47 | 	    uint32_t ts = 0;  // Number of time samples.
 48 | 
 49 | 	Node() {}
 50 | 
 51 | 	Node(const Node& n): child_index {n.child_index}, ts {n.ts} {
 52 | 		bounds = n.bounds;
 53 | 	}
 54 | 
 55 | 	// Operators to allow node bounds to be interpolated conveniently
 56 | 	Node operator+(const Node& b) const {
 57 | 		Node n;
 58 | 		n.bounds = bounds + b.bounds;
 59 | 		return n;
 60 | 	}
 61 | 
 62 | 	Node operator*(float f) const {
 63 | 		Node n;
 64 | 		n.bounds = bounds * f;
 65 | 		return n;
 66 | 	}
 67 | 	};
 68 | 
 69 | private:
 70 | 	std::vector<Node> nodes;
 71 | 	std::vector<BBox> _bounds {BBox()};
 72 | 
 73 | 	enum {
 74 | 		IS_RIGHT = 1 << 1
 75 | 	};
 76 | 
 77 | 	/**
 78 | 	 * @brief Returns the index of the first child
 79 | 	 * of the node with the given index.
 80 | 	 */
 81 | 	inline size_t child1(const size_t node_i) const {
 82 | 		return node_i + nodes[node_i].ts;
 83 | 	}
 84 | 
 85 | 	/**
 86 | 	 * @brief Returns the index of the second child
 87 | 	 * of the node with the given index.
 88 | 	 */
 89 | 	inline size_t child2(const size_t node_i) const {
 90 | 		return nodes[node_i].child_index;
 91 | 	}
 92 | 
 93 | 	/**
 94 | 	 * @brief Returns the number of time samples
 95 | 	 * of the node with the given index.
 96 | 	 */
 97 | 	inline uint32_t time_samples(const size_t node_i) const {
 98 | 		return nodes[node_i].ts;
 99 | 	}
100 | 
101 | 	inline bool is_leaf(const size_t node_i) const {
102 | 		return nodes[node_i].child_index == 0;
103 | 	}
104 | };
105 | 
106 | 
107 | 
108 | /**
109 |  * @brief A breadth-first traverser for BVH2.
110 |  */
111 | class BVH2StreamTraverser: public AccelStreamTraverser<BVH2> {
112 | public:
113 | 	virtual ~BVH2StreamTraverser() {}
114 | 
115 | 	virtual void init_accel(const BVH2& accel) {
116 | 		bvh = &accel;
117 | 	}
118 | 
119 | 	virtual void init_rays(Ray* begin, Ray* end) {
120 | 		rays = begin;
121 | 		rays_end = end;
122 | 		first_call = true;
123 | 
124 | 		// Initialize stack
125 | 		if (bvh == nullptr || bvh->nodes.size() == 0) {
126 | 			stack_ptr = -1;
127 | 		} else {
128 | 			stack_ptr = 0;
129 | 		}
130 | 		node_stack[0] = 0;
131 | 		ray_stack[0].first = rays;
132 | 		ray_stack[0].second = rays_end;
133 | 	}
134 | 
135 | 	virtual std::tuple<Ray*, Ray*, size_t> next_object();
136 | 
137 | private:
138 | 	const BVH2* bvh = nullptr;
139 | 	Ray* rays = nullptr;
140 | 	Ray* rays_end = nullptr;
141 | 	bool first_call = true;
142 | 
143 | 	// Stack data
144 | #define BVH2_STACK_SIZE 64
145 | 	int stack_ptr;
146 | 	size_t node_stack[BVH2_STACK_SIZE];
147 | 	std::pair<Ray*, Ray*> ray_stack[BVH2_STACK_SIZE];
148 | 
149 | };
150 | 
151 | #endif // BVH2_HPP


--------------------------------------------------------------------------------
/accel/bvh4.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef BVH4_HPP
  2 | #define BVH4_HPP
  3 | 
  4 | #include <stdlib.h>
  5 | #include <iostream>
  6 | #include <vector>
  7 | #include <deque>
  8 | #include <memory>
  9 | #include <tuple>
 10 | 
 11 | #include "numtype.h"
 12 | #include "global.hpp"
 13 | 
 14 | #include "accel.hpp"
 15 | #include "bvh.hpp"
 16 | #include "object.hpp"
 17 | #include "ray.hpp"
 18 | #include "bbox.hpp"
 19 | #include "utils.hpp"
 20 | #include "vector.hpp"
 21 | 
 22 | 
 23 | 
 24 | 
 25 | /*
 26 |  * A bounding volume hierarchy.
 27 |  */
 28 | class BVH4: public Accel {
 29 | public:
 30 | 	virtual void build(const Assembly& assembly);
 31 | 	virtual const std::vector<BBox>& bounds() const {
 32 | 		return _bounds;
 33 | 	};
 34 | 	virtual ~BVH4() {};
 35 | 
 36 | 	// Traversers need access to private data
 37 | 	friend class BVH4StreamTraverser;
 38 | 
 39 | 	struct alignas(16) Node {
 40 | 	    union {
 41 | 	        // If the node is a leaf, we don't need the bounds.
 42 | 	        // If the node is not a leaf, it doesn't have Primitive data.
 43 | 	        BBox4 bounds {BBox(), BBox(), BBox(), BBox()};
 44 | 	        size_t data_index;
 45 | 	    };
 46 | 	    size_t child_indices[3] = {0,0,0}; // Indices of children 2, 3, and 4. (Child 1's index is implicit.)
 47 | 	    // When first element is 0, indicates that this is a leaf node,
 48 | 	    // because a non-leaf node needs at least two children.  When the
 49 | 	    // second and/or third elements are zero, indicates there is no
 50 | 	    // third or fourth child, respectively.
 51 | 	    uint32_t ts = 0;  // Number of time samples.
 52 | 
 53 | 	Node() {}
 54 | 
 55 | //		Node(const Node& n): child_indices {n.child_indices}, ts {n.ts} {
 56 | //			bounds = n.bounds;
 57 | //		}
 58 | 
 59 | 	// Operators to allow node bounds to be interpolated conveniently
 60 | 	Node operator+(const Node& b) const {
 61 | 		Node n;
 62 | 		n.bounds = bounds + b.bounds;
 63 | 		return n;
 64 | 	}
 65 | 
 66 | 	Node operator*(float f) const {
 67 | 		Node n;
 68 | 		n.bounds = bounds * f;
 69 | 		return n;
 70 | 	}
 71 | 	};
 72 | 
 73 | private:
 74 | 	std::vector<Node> nodes;
 75 | 	std::vector<BBox> _bounds {BBox()};
 76 | 
 77 | 	enum {
 78 | 		IS_SKIP = 1 << 8,
 79 | 		IS_2ND  = 1 << 9,
 80 | 		IS_3RD  = 1 << 10,
 81 | 		IS_4TH  = 1 << 11
 82 | 	};
 83 | 
 84 | 	/**
 85 | 	 * @brief Returns the index of the nth (0-3) child
 86 | 	 * of the node with the given index.
 87 | 	 */
 88 | 	inline size_t child(const size_t node_i, const int n) const {
 89 | 		if (n == 0)
 90 | 			return node_i + nodes[node_i].ts;
 91 | 		else
 92 | 			return nodes[node_i].child_indices[n-1];
 93 | 	}
 94 | 
 95 | 	/**
 96 | 	 * @brief Returns the number of time samples
 97 | 	 * of the node with the given index.
 98 | 	 */
 99 | 	inline uint32_t time_samples(const size_t node_i) const {
100 | 		return nodes[node_i].ts;
101 | 	}
102 | 
103 | 	/**
104 | 	 * @brief Returns whether the node with the given index is a
105 | 	 * leaf node or not.
106 | 	 */
107 | 	inline bool is_leaf(const size_t node_i) const {
108 | 		return (nodes[node_i].child_indices[0] == 0);
109 | 	}
110 | 
111 | 	inline int child_count(const size_t node_i) const {
112 | 		if (nodes[node_i].child_indices[1] == 0) {
113 | 			return 2;
114 | 		} else if (nodes[node_i].child_indices[2] == 0) {
115 | 			return 3;
116 | 		} else {
117 | 			return 4;
118 | 		}
119 | 	}
120 | };
121 | 
122 | 
123 | 
124 | 
125 | /**
126 |  * @brief A breadth-first traverser for BVH4.
127 |  */
128 | class BVH4StreamTraverser: public AccelStreamTraverser<BVH4> {
129 | public:
130 | 	virtual ~BVH4StreamTraverser() {}
131 | 
132 | 	virtual void init_accel(const BVH4& accel) {
133 | 		bvh = &accel;
134 | 	}
135 | 
136 | 	virtual void init_rays(Ray* begin, Ray* end) {
137 | 		rays = begin;
138 | 		rays_end = end;
139 | 		first_call = true;
140 | 
141 | 		// Initialize stack
142 | 		if (bvh == nullptr || bvh->nodes.size() == 0) {
143 | 			stack_ptr = -1;
144 | 		} else {
145 | 			stack_ptr = 0;
146 | 		}
147 | 		node_stack[0] = 0;
148 | 		ray_stack[0].first = rays;
149 | 		ray_stack[0].second = rays_end;
150 | 	}
151 | 
152 | 	virtual std::tuple<Ray*, Ray*, size_t> next_object();
153 | 
154 | private:
155 | 	const BVH4* bvh = nullptr;
156 | 	Ray* rays = nullptr;
157 | 	Ray* rays_end = nullptr;
158 | 	bool first_call = true;
159 | 
160 | 	// Stack data
161 | #define BVH4_STACK_SIZE 64
162 | 	int stack_ptr;
163 | 	size_t node_stack[BVH4_STACK_SIZE];
164 | 	std::pair<Ray*, Ray*> ray_stack[BVH4_STACK_SIZE];
165 | 
166 | };
167 | 
168 | 
169 | #endif // BVH4_HPP


--------------------------------------------------------------------------------
/accel/light_accel.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef LIGHT_ACCEL_HPP
 2 | #define LIGHT_ACCEL_HPP
 3 | 
 4 | #include <vector>
 5 | #include <tuple>
 6 | #include <memory>
 7 | 
 8 | #include "numtype.h"
 9 | #include "instance_id.hpp"
10 | #include "ray.hpp"
11 | #include "light.hpp"
12 | #include "transform.hpp"
13 | #include "color.hpp"
14 | 
15 | 
16 | // Forward declaration of Assembly from scene/assembly.hpp
17 | class Assembly;
18 | 
19 | 
20 | /**
21 |  * Data structure used to query for a light sample.
22 |  */
23 | struct LightQuery {
24 | 	// In
25 | 	float n, u, v, w;
26 | 	Vec3 pos;
27 | 	Vec3 nor;
28 | 	Vec3 d;  // Direction of the known ray
29 | 	SurfaceClosure* bsdf;
30 | 	float wavelength;
31 | 	float time;
32 | 
33 | 	// Intermediate
34 | 	Transform xform;
35 | 
36 | 	// Out
37 | 	InstanceID id;
38 | 	Vec3 to_light;
39 | 	SpectralSample spec_samp;
40 | 	float selection_pdf;  // The pdf of selecting the given light
41 | 	float light_sample_pdf;  // The pdf of the sample taken on the selected light
42 | };
43 | 
44 | 
45 | 
46 | /**
47 |  * @brief An acceleration structure for sampling a collection of light sources.
48 |  */
49 | class LightAccel {
50 | public:
51 | 	virtual ~LightAccel() {}
52 | 
53 | 	virtual void build(const Assembly& assembly) = 0;
54 | 
55 | 	virtual void sample(LightQuery* query) const = 0;
56 | 
57 | 	virtual const std::vector<BBox>& bounds() const = 0;
58 | 
59 | 	virtual size_t light_count() const = 0;
60 | 
61 | 	virtual Color total_emitted_color() const = 0;
62 | };
63 | 
64 | 
65 | #endif // LIGHT_ACCEL_HPP
66 | 


--------------------------------------------------------------------------------
/accel/light_array.cpp:
--------------------------------------------------------------------------------
  1 | #include "light_array.hpp"
  2 | 
  3 | #include "assembly.hpp"
  4 | 
  5 | void LightArray::build(const Assembly& assembly_) {
  6 | 	assembly = &assembly_;
  7 | 
  8 | 	for (size_t i = 0; i < assembly->instances.size(); ++i) {
  9 | 		const auto& instance = assembly->instances[i]; // Shorthand
 10 | 
 11 | 		// If it's an object
 12 | 		if (instance.type == Instance::OBJECT) {
 13 | 			if (assembly->objects[instance.data_index]->get_type() == Object::LIGHT) {
 14 | 				light_indices.push_back(i);
 15 | 				const Light* light = dynamic_cast<const Light*>(assembly->objects[instance.data_index].get());
 16 | 				total_color += light->total_emitted_color();
 17 | 			}
 18 | 		}
 19 | 		// If it's an assembly
 20 | 		else if (instance.type == Instance::ASSEMBLY) {
 21 | 			const auto count = assembly->assemblies[instance.data_index]->light_accel.light_count();
 22 | 			if (count > 0) {
 23 | 				assembly_lights.emplace_back(total_assembly_lights, count, i);
 24 | 				total_assembly_lights += count;
 25 | 				const Assembly* child_assembly = dynamic_cast<const Assembly*>(assembly->assemblies[instance.data_index].get());
 26 | 				total_color += child_assembly->light_accel.total_emitted_color();
 27 | 			}
 28 | 		}
 29 | 
 30 | 		// Merge bounds
 31 | 		auto instance_bounds = assembly->instance_bounds(i);
 32 | 		for (const auto& bbox: instance_bounds)
 33 | 			bounds_[0].merge_with(bbox);
 34 | 	}
 35 | }
 36 | 
 37 | 
 38 | 
 39 | void LightArray::sample(LightQuery* query) const {
 40 | 	// Handle empty light accel
 41 | 	if (light_indices.size() == 0 && assembly_lights.size() == 0) {
 42 | 		query->spec_samp = SpectralSample(query->spec_samp.hero_wavelength, 0.0f);
 43 | 		return;
 44 | 	}
 45 | 
 46 | 	const float local_prob = static_cast<double>(light_indices.size()) / (total_assembly_lights + light_indices.size());
 47 | 	const float child_prob = 1.0f - local_prob;
 48 | 
 49 | 	// If we're sampling a light in this assembly
 50 | 	if (query->n <= local_prob) {
 51 | 		// Update probabilities
 52 | 		query->n /= local_prob;
 53 | 
 54 | 		// Get light instance
 55 | 		const auto index = light_indices[static_cast<uint32_t>(query->n * light_indices.size()) % light_indices.size()];
 56 | 		const Instance& instance = assembly->instances[index]; // Shorthand
 57 | 
 58 | 		// Get light data
 59 | 		Light* light = dynamic_cast<Light*>(assembly->objects[instance.data_index].get());
 60 | 
 61 | 		/// Get transforms if any
 62 | 		if (instance.transform_count > 0) {
 63 | 			auto cbegin = assembly->xforms.cbegin() + instance.transform_index;
 64 | 			auto cend = cbegin + instance.transform_count;
 65 | 			auto instance_xform = lerp_seq(query->time, cbegin, cend);
 66 | 			query->pos = instance_xform.pos_to(query->pos);
 67 | 			query->nor = instance_xform.nor_to(query->nor).normalized();
 68 | 			query->xform *= instance_xform;
 69 | 		}
 70 | 
 71 | 		// Sample the light
 72 | 		float p;
 73 | 		query->spec_samp = light->sample(query->pos, query->u, query->v, query->wavelength, query->time, &(query->to_light), &p);
 74 | 		query->to_light = query->xform.dir_from(query->to_light);
 75 | 		query->light_sample_pdf = p;
 76 | 
 77 | 		// FIll in the light's instance ID
 78 | 		query->id.push_back(index, assembly->element_id_bits());
 79 | 	}
 80 | 	// If we're sampling a light in a child assembly
 81 | 	else {
 82 | 		// Update probabilities
 83 | 		query->n = (query->n - local_prob) / child_prob;
 84 | 
 85 | 		// Select assembly
 86 | 		// TODO: a binary search would be faster
 87 | 		size_t index = 0;
 88 | 		const size_t target_index = static_cast<size_t>(total_assembly_lights * query->n) % total_assembly_lights;
 89 | 		for (const auto& al: assembly_lights) {
 90 | 			if (std::get<0>(al) <= target_index && target_index < (std::get<0>(al) + std::get<1>(al))) {
 91 | 				index = std::get<2>(al);
 92 | 				break;
 93 | 			}
 94 | 		}
 95 | 
 96 | 		// Get assembly instance shorthand
 97 | 		const Instance& instance = assembly->instances[index];
 98 | 
 99 | 		// Get assembly
100 | 		Assembly* child_assembly = assembly->assemblies[instance.data_index].get();
101 | 
102 | 		// Get transforms if any
103 | 		if (instance.transform_count > 0) {
104 | 			auto cbegin = assembly->xforms.cbegin() + instance.transform_index;
105 | 			auto cend = cbegin + instance.transform_count;
106 | 			auto instance_xform = lerp_seq(query->time, cbegin, cend);
107 | 			query->pos = instance_xform.pos_to(query->pos);
108 | 			query->xform *= instance_xform;
109 | 		}
110 | 
111 | 		// Push the assembly's instance ID
112 | 		query->id.push_back(index, assembly->element_id_bits());
113 | 
114 | 		// Traverse into child assembly
115 | 		child_assembly->light_accel.sample(query);
116 | 	}
117 | 
118 | 	// Selection PDF is just one, since all lights have equal probability of
119 | 	// being selected.
120 | 	query->selection_pdf = 1.0f;
121 | }
122 | 


--------------------------------------------------------------------------------
/accel/light_array.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef LIGHT_ARRAY_HPP
 2 | #define LIGHT_ARRAY_HPP
 3 | 
 4 | #include "light_accel.hpp"
 5 | 
 6 | class LightArray final: public LightAccel {
 7 | 	const Assembly* assembly;
 8 | 	std::vector<size_t> light_indices;
 9 | 	std::vector<std::tuple<size_t, size_t, size_t>> assembly_lights;  // 1: accumulated total lights, 2: number of light, 3: assembly instance index
10 | 	size_t total_assembly_lights;
11 | 	Color total_color;
12 | 	std::vector<BBox> bounds_ {BBox()};
13 | 
14 | public:
15 | 	~LightArray() {}
16 | 
17 | 	virtual void build(const Assembly& assembly);
18 | 
19 | 	virtual void sample(LightQuery* query) const;
20 | 
21 | 	virtual const std::vector<BBox>& bounds() const {
22 | 		return bounds_;
23 | 	}
24 | 
25 | 	virtual size_t light_count() const {
26 | 		return total_assembly_lights + light_indices.size();
27 | 	}
28 | 
29 | 	virtual Color total_emitted_color() const {
30 | 		return total_color;
31 | 	}
32 | };
33 | 
34 | #endif // LIGHT_ARRAY_HPP
35 | 


--------------------------------------------------------------------------------
/accel/light_tree.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef LIGHT_TREE_HPP
 2 | #define LIGHT_TREE_HPP
 3 | 
 4 | #include "light_accel.hpp"
 5 | #include <iostream>
 6 | 
 7 | class LightTree final: public LightAccel {
 8 | 	struct BuildNode {
 9 | 		size_t instance_index;
10 | 		Vec3 center;
11 | 		BBox bbox;
12 | 		float energy;
13 | 	};
14 | 
15 | 	struct Node {
16 | 		std::vector<BBox> bounds;
17 | 		float energy;
18 | 
19 | 		size_t index1;
20 | 		size_t index2;
21 | 
22 | 		bool is_leaf;
23 | 		size_t instance_index;
24 | 	};
25 | 
26 | 	const Assembly* assembly;
27 | 	std::vector<BuildNode> build_nodes;
28 | 	std::vector<Node> nodes;
29 | 	std::vector<BBox> bounds_;
30 | 	float total_energy {0.0f};
31 | 	size_t total_lights {0};
32 | 
33 | 	std::vector<BuildNode>::iterator split_lights(std::vector<BuildNode>::iterator start, std::vector<BuildNode>::iterator end);
34 | 	size_t recursive_build(std::vector<BuildNode>::iterator start, std::vector<BuildNode>::iterator end);
35 | 
36 | 	float node_prob(const LightQuery& lq, uint32_t index) const;
37 | 
38 | 
39 | public:
40 | 	~LightTree() {}
41 | 
42 | 	virtual void build(const Assembly& assembly) override;
43 | 
44 | 	virtual void sample(LightQuery* query) const override;
45 | 
46 | 
47 | 	virtual const std::vector<BBox>& bounds() const override {
48 | 		return bounds_;
49 | 	}
50 | 
51 | 
52 | 	// TODO
53 | 	virtual size_t light_count() const override {
54 | 		return total_lights;
55 | 	}
56 | 
57 | 
58 | 	virtual Color total_emitted_color() const override {
59 | 		return Color(total_energy);
60 | 	}
61 | };
62 | 
63 | #endif // LIGHT_TREE_HPP
64 | 


--------------------------------------------------------------------------------
/basics/bbox_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test.hpp"
  2 | 
  3 | #include <cmath>
  4 | #include <limits>
  5 | #include <iostream>
  6 | #include "vector.hpp"
  7 | #include "ray.hpp"
  8 | #include "bbox.hpp"
  9 | #include "utils.hpp"
 10 | 
 11 | 
 12 | /*
 13 |  ************************************************************************
 14 |  * Testing suite for BBox.
 15 |  ************************************************************************
 16 |  */
 17 | 
 18 | TEST_CASE("bbox") {
 19 | 	// Test for the first constructor
 20 | 	SECTION("constructor_1") {
 21 | 		BBox bb;
 22 | 
 23 | 		REQUIRE(bb.min == Vec3(std::numeric_limits<float>::infinity(), std::numeric_limits<float>::infinity(), std::numeric_limits<float>::infinity()));
 24 | 		REQUIRE(bb.max == Vec3(-std::numeric_limits<float>::infinity(), -std::numeric_limits<float>::infinity(), -std::numeric_limits<float>::infinity()));
 25 | 	}
 26 | 
 27 | 	// Test for the second constructor
 28 | 	SECTION("constructor_2") {
 29 | 		BBox bb(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0));
 30 | 
 31 | 		REQUIRE(bb.min == Vec3(1.0, -2.5, 0.5));
 32 | 		REQUIRE(bb.max == Vec3(8.0, 7.25, 2.0));
 33 | 	}
 34 | 
 35 | 
 36 | 	// Test for the add operator
 37 | 	SECTION("add") {
 38 | 		BBox bb1(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0));
 39 | 		BBox bb2(Vec3(-1.0, -1.5, -2.0), Vec3(8.0, 4.75, -1.0));
 40 | 
 41 | 		BBox bb = bb1 + bb2;
 42 | 
 43 | 		REQUIRE(bb.min == Vec3(0.0, -4.0, -1.5));
 44 | 		REQUIRE(bb.max == Vec3(16.0, 12.0, 1.0));
 45 | 	}
 46 | 
 47 | 
 48 | 	// Test for the subtract operator
 49 | 	SECTION("subtract") {
 50 | 		BBox bb1(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0));
 51 | 		BBox bb2(Vec3(-1.0, -1.5, -2.0), Vec3(8.0, 4.75, -1.0));
 52 | 
 53 | 		BBox bb = bb1 - bb2;
 54 | 
 55 | 		REQUIRE(bb.min == Vec3(2.0, -1.0, 2.5));
 56 | 		REQUIRE(bb.max == Vec3(0.0, 2.5, 3.0));
 57 | 	}
 58 | 
 59 | 
 60 | 	// Test for the multiply operator
 61 | 	SECTION("multiply") {
 62 | 		BBox bb1(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0));
 63 | 
 64 | 		BBox bb = bb1 * -2.0;
 65 | 
 66 | 		REQUIRE(bb.min == Vec3(-2.0, 5.0, -1.0));
 67 | 		REQUIRE(bb.max == Vec3(-16.0, -14.5, -4.0));
 68 | 	}
 69 | 
 70 | 
 71 | 	// Test for the divide operator
 72 | 	SECTION("divide") {
 73 | 		BBox bb1(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0));
 74 | 
 75 | 		BBox bb = bb1 / -2.0;
 76 | 
 77 | 		REQUIRE(bb.min == Vec3(-0.5, 1.25, -0.25));
 78 | 		REQUIRE(bb.max == Vec3(-4.0, -3.625, -1.0));
 79 | 	}
 80 | 
 81 | 
 82 | 	// Test for ::merge_with()
 83 | 	SECTION("merge_with") {
 84 | 		BBox bb1(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0));
 85 | 		BBox bb2(Vec3(-1.0, -1.5, -2.0), Vec3(8.0, 4.75, -1.0));
 86 | 
 87 | 		bb1.merge_with(bb2);
 88 | 
 89 | 		REQUIRE(bb1.min == Vec3(-1.0, -2.5, -2.0));
 90 | 		REQUIRE(bb1.max == Vec3(8.0, 7.25, 2.0));
 91 | 	}
 92 | 
 93 | 
 94 | 	// Test for ::surface_area()
 95 | 	SECTION("surface_area") {
 96 | 		BBox bb(Vec3(1.0, -2.5, 0.5), Vec3(8.0, 7.25, 2.0));
 97 | 
 98 | 		REQUIRE(bb.surface_area() == 186.75);
 99 | 	}
100 | 
101 | 
102 | 	// Tests for ::intersect_ray()
103 | 	SECTION("intersect_ray_1") {
104 | 		// Simple intersection
105 | 		Ray r(Vec3(0.125, -8.0, 0.25), Vec3(0.0, 1.0, 0.0));
106 | 		r.finalize();
107 | 		BBox bb(Vec3(-1.0, -2.5, -0.5), Vec3(8.0, 7.25, 2.0));
108 | 		float hitt0=0.0, hitt1=0.0;
109 | 		bool hit=false;
110 | 
111 | 		hit = bb.intersect_ray(r, &hitt0, &hitt1);
112 | 
113 | 		REQUIRE(hit == true);
114 | 		REQUIRE(hitt0 == 5.5);
115 | 		REQUIRE(hitt1 >= 15.25);
116 | 		REQUIRE(hitt1 <= 15.25001);
117 | 	}
118 | 
119 | 
120 | 	SECTION("intersect_ray_2") {
121 | 		// Simple intersection with unnormalized ray
122 | 		Ray r(Vec3(0.125, -8.0, 0.25), Vec3(0.0, 2.0, 0.0));
123 | 		r.update_accel();
124 | 		BBox bb(Vec3(-1.0, -2.5, -0.5), Vec3(8.0, 7.25, 2.0));
125 | 		float hitt0=0.0, hitt1=0.0;
126 | 		bool hit=false;
127 | 
128 | 		hit = bb.intersect_ray(r, &hitt0, &hitt1);
129 | 
130 | 		REQUIRE(hit == true);
131 | 		REQUIRE(hitt0 == 2.75);
132 | 		REQUIRE(hitt1 >= 7.625);
133 | 		REQUIRE(hitt1 <= (7.62501));
134 | 	}
135 | 
136 | 	SECTION("intersect_ray_3") {
137 | 		// Simple miss
138 | 		Ray r(Vec3(20.0, -8.0, 0.25), Vec3(0.0, 1.0, 0.0));
139 | 		r.finalize();
140 | 		BBox bb(Vec3(-1.0, -2.5, -0.5), Vec3(8.0, 7.25, 2.0));
141 | 
142 | 		REQUIRE(bb.intersect_ray(r) == false);
143 | 	}
144 | 
145 | 	SECTION("intersect_ray_4") {
146 | 		// Intersection from ray that starts inside the bbox
147 | 		Ray r(Vec3(0.0, 0.0, 0.0), Vec3(0, 1.0, 0));
148 | 		r.finalize();
149 | 		BBox bb(Vec3(-1.0, -2.5, -0.5), Vec3(8.0, 7.25, 2.0));
150 | 		float hitt0=0.0, hitt1=0.0;
151 | 		bool hit=false;
152 | 
153 | 		hit = bb.intersect_ray(r, &hitt0, &hitt1);
154 | 
155 | 		REQUIRE(hit == true);
156 | 		REQUIRE(hitt0 == 0.0);
157 | 		REQUIRE(hitt1 >= 7.25);
158 | 		REQUIRE(hitt1 <= 7.25001);
159 | 	}
160 | 
161 | 	SECTION("intersect_ray_5") {
162 | 		// Intersection from ray that grazes the side of the bbox
163 | 		Ray r(Vec3(-1.0001, -8.0, 0.25), Vec3(0, 1.0, 0));
164 | 		r.finalize();
165 | 		BBox bb(Vec3(-1.0, -2.5, -0.5), Vec3(8.0, 7.25, 2.0));
166 | 
167 | 		REQUIRE(bb.intersect_ray(r) == false);
168 | 	}
169 | 
170 | 	SECTION("intersect_ray_6") {
171 | 		// Intersection with collapsed BBox, should be true
172 | 		Ray r(Vec3(-4.0, 0.0, 0.0), Vec3(1.0, 0.0, 0.0));
173 | 		r.finalize();
174 | 		BBox bb(Vec3(1.0, -1.0, -1.0), Vec3(1.0, 1.0, 1.0));
175 | 
176 | 		float hitt0=0.0, hitt1=0.0;
177 | 		bool hit=false;
178 | 
179 | 		hit = bb.intersect_ray(r, &hitt0, &hitt1);
180 | 
181 | 		REQUIRE(hit == true);
182 | 		REQUIRE(hitt0 == 5.0);
183 | 		REQUIRE(hitt1 >= 5.0);
184 | 		REQUIRE(hitt1 <= 5.00001);
185 | 
186 | 	}
187 | 
188 | 	SECTION("intersect_ray_7") {
189 | 		// Intersection with collapsed BBox with ray at an angle, should be true
190 | 		Ray r(Vec3(-4.0, 0.0, 0.0), Vec3(0.5, 0.5, 0.5));
191 | 		r.finalize();
192 | 		BBox bb(Vec3(1.0, -20.0, -20.0), Vec3(1.0, 20.0, 20.0));
193 | 
194 | 		float hitt0=0.0, hitt1=0.0;
195 | 		bool hit=false;
196 | 
197 | 		hit = bb.intersect_ray(r, &hitt0, &hitt1);
198 | 
199 | 		REQUIRE(hit == true);
200 | 		REQUIRE(hitt0 == 10.0);
201 | 		REQUIRE(hitt1 >= 10.0);
202 | 		REQUIRE(hitt1 <= 10.00001);
203 | 	}
204 | }
205 | 
206 | // TODO: - diagonal rays
207 | //       - rays with different tmin/tmax value
208 | 


--------------------------------------------------------------------------------
/basics/camera.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef CAMERA_HPP
  2 | #define CAMERA_HPP
  3 | 
  4 | #include "numtype.h"
  5 | #include <cmath>
  6 | #include <vector>
  7 | #include <iostream>
  8 | 
  9 | 
 10 | #include "config.hpp"
 11 | #include "utils.hpp"
 12 | #include "monte_carlo.hpp"
 13 | #include "vector.hpp"
 14 | #include "matrix.hpp"
 15 | #include "transform.hpp"
 16 | #include "ray.hpp"
 17 | 
 18 | /*
 19 |  * A virtual camera.
 20 |  */
 21 | class Camera {
 22 | public:
 23 | 	std::vector<Transform> transforms;
 24 | 	std::vector<float> fovs;
 25 | 	std::vector<float> tfovs;
 26 | 	std::vector<float> aperture_radii;
 27 | 	std::vector<float> focus_distances;
 28 | 
 29 | 	Camera(std::vector<Transform> &transforms_, std::vector<float> &fovs_, std::vector<float> &aperture_radii_, std::vector<float> &focus_distances_) {
 30 | 		transforms = transforms_;
 31 | 		fovs = fovs_;
 32 | 		aperture_radii = aperture_radii_;
 33 | 		focus_distances = focus_distances_;
 34 | 
 35 | 		// Make sure we have needed values for everything
 36 | 		if (transforms.size() == 0)
 37 | 			std::cout << "WARNING: camera has no transform(s)!\n";
 38 | 
 39 | 		if (fovs.size() == 0)
 40 | 			std::cout << "WARNING: camera has no fov(s)!\n";
 41 | 
 42 | 		if (aperture_radii.size() == 0 || focus_distances.size() == 0) {
 43 | 			aperture_radii = {0.0f};
 44 | 			focus_distances = {1.0f};
 45 | 
 46 | 			if (aperture_radii.size() == 0 && focus_distances.size() != 0)
 47 | 				std::cout << "WARNING: camera has aperture radius but no focus distance.  Disabling focal blur.\n";
 48 | 			else if (aperture_radii.size() != 0 && focus_distances.size() == 0)
 49 | 				std::cout << "WARNING: camera has focus distance but no aperture radius.  Disabling focal blur.\n";
 50 | 		}
 51 | 
 52 | 		// Convert angle fov into linear fov
 53 | 		tfovs.clear();
 54 | 		for (auto&& i: fovs)
 55 | 			tfovs.emplace_back(sin(i/2) / cos(i/2));
 56 | 		fovs.clear();
 57 | 
 58 | 		// Can't have focus distance of zero
 59 | 		for (auto&& f: focus_distances) {
 60 | 			if (f <= 0.0f) {
 61 | 				std::cout << "WARNING: camera focal distance is zero or less.  Disabling focal blur.\n";
 62 | 				aperture_radii = {0.0f};
 63 | 				focus_distances = {1.0f};
 64 | 				break;
 65 | 			}
 66 | 		}
 67 | 	}
 68 | 
 69 | 	/*
 70 | 	 * Generates a camera ray based on the given information.
 71 | 	 */
 72 | 	WorldRay generate_ray(float x, float y, float dx, float dy, float time, float u, float v) const {
 73 | 		WorldRay wray;
 74 | 
 75 | 		wray.type = WorldRay::CAMERA;
 76 | 		wray.time = time;
 77 | 
 78 | 		// Get time-interpolated camera settings
 79 | 		const Transform transform = lerp_seq(time, transforms);
 80 | 		const float tfov = lerp_seq(time, tfovs);
 81 | 		const float aperture_radius = lerp_seq(time, aperture_radii);
 82 | 		const float focus_distance = lerp_seq(time, focus_distances);
 83 | 
 84 | 		// Ray origin
 85 | 		wray.o.x = aperture_radius * ((u * 2) - 1);
 86 | 		wray.o.y = aperture_radius * ((v * 2) - 1);
 87 | 		wray.o.z = 0.0;
 88 | 		square_to_circle(&wray.o.x, &wray.o.y);
 89 | 
 90 | 		// Ray direction
 91 | 		wray.d.x = (x * tfov) - (wray.o.x / focus_distance);
 92 | 		wray.d.y = (y * tfov) - (wray.o.y / focus_distance);
 93 | 		wray.d.z = 1.0;
 94 | 		wray.d.normalize();
 95 | 
 96 | 		// Ray image plane differentials
 97 | 		wray.odx = Vec3(0.0f, 0.0f, 0.0f);
 98 | 		wray.ody = Vec3(0.0f, 0.0f, 0.0f);
 99 | 		wray.ddx = Vec3(dx*tfov, 0.0f, 0.0f);
100 | 		wray.ddy = Vec3(0.0f, dy*tfov, 0.0f);
101 | 
102 | 		// Transform the ray
103 | 		return wray.transformed(transform);
104 | 	}
105 | };
106 | 
107 | #endif
108 | 


--------------------------------------------------------------------------------
/basics/differential_geometry.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef DIFFERENTIAL_GEOMETRY_HPP
  2 | #define DIFFERENTIAL_GEOMETRY_HPP
  3 | 
  4 | #include "numtype.h"
  5 | #include "vector.hpp"
  6 | #include "transform.hpp"
  7 | 
  8 | struct DifferentialGeometry {
  9 | 	float u, v;
 10 | 
 11 | 	// Point position
 12 | 	Vec3 p;
 13 | 	Vec3 dpdu, dpdv;
 14 | 
 15 | 	// Surface normal
 16 | 	Vec3 n;
 17 | 	Vec3 dndu, dndv;
 18 | 
 19 | 
 20 | 	DifferentialGeometry transformed_from(const Transform& xform) const {
 21 | 		DifferentialGeometry geo;
 22 | 		geo.u = u;
 23 | 		geo.v = v;
 24 | 
 25 | 		geo.p = xform.pos_from(p);
 26 | 		geo.dpdu = xform.dir_from(dpdu);
 27 | 		geo.dpdv = xform.dir_from(dpdv);
 28 | 
 29 | 		// TODO: figure out how to transform surface normal differentials
 30 | 		// properly
 31 | 		geo.n = xform.nor_from(n);
 32 | 		geo.dndu = xform.nor_from(dndu);
 33 | 		geo.dndv = xform.nor_from(dndv);
 34 | 		const float il = 1.0f / geo.n.length();
 35 | 		geo.n *= il;
 36 | 		geo.dndu *= il;
 37 | 		geo.dndv *= il;
 38 | 
 39 | 		return geo;
 40 | 	}
 41 | 
 42 | 
 43 | 	DifferentialGeometry transformed_to(const Transform& xform) const {
 44 | 		DifferentialGeometry geo;
 45 | 		geo.u = u;
 46 | 		geo.v = v;
 47 | 
 48 | 		geo.p = xform.pos_to(p);
 49 | 		geo.dpdu = xform.dir_to(dpdu);
 50 | 		geo.dpdv = xform.dir_to(dpdv);
 51 | 
 52 | 		// TODO: figure out how to transform surface normal differentials
 53 | 		// properly
 54 | 		geo.n = xform.nor_to(n);
 55 | 		geo.dndu = xform.nor_to(dndu);
 56 | 		geo.dndv = xform.nor_to(dndv);
 57 | 		const float il = 1.0f / geo.n.length();
 58 | 		geo.n *= il;
 59 | 		geo.dndu *= il;
 60 | 		geo.dndv *= il;
 61 | 
 62 | 		return geo;
 63 | 	}
 64 | 
 65 | 
 66 | 	void flip_normal() {
 67 | 		n *= -1.0f;
 68 | 		dndu *= -1.0f;
 69 | 		dndv *= -1.0f;
 70 | 	}
 71 | };
 72 | 
 73 | /*
 74 |  * Transfers a ray differential onto a surface intersection.
 75 |  * This assumes that both normal and d are normalized.
 76 |  *
 77 |  * t is the distance along the primary ray to the intersection
 78 |  * normal is the surface normal at the intersection
 79 |  * d is the primary ray's direction
 80 |  * od is the ray origin differential
 81 |  * dd is the ray direction differential
 82 |  *
 83 |  * Returns the origin differential transfered onto the surface intersection.
 84 |  */
 85 | static inline Vec3 transfer_ray_origin_differential(const float t, const Vec3 normal, const Vec3 d,
 86 |         const Vec3 od, const Vec3 dd) {
 87 | 	const Vec3 temp = od + (dd * t);
 88 | 	const float td = -dot(temp, normal) / dot(d, normal);
 89 | 
 90 | 	const Vec3 real_projected = temp + (d * td);
 91 | 
 92 | 	// Scaled to the non-projected ray footprint at the hit point.
 93 | 	// This is important because otherwise the ray footprint ends up
 94 | 	// being larger than the dicing rate, and the next bounce ray often
 95 | 	// ends up with false self-intersections, especially for incoming
 96 | 	// rays with grazing angles.
 97 | 	return real_projected.normalized() * temp.length();
 98 | }
 99 | 
100 | 
101 | /*
102 | * Reflects a ray differential off of a surface intersection as a
103 | * perfect mirror.
104 | * This assumes that 'normal' is normalized.
105 | *
106 | * normal is the surface normal at the intersection
107 | * normal_d is the surface normal differential for the intersection
108 | * d is the primary ray's direction
109 | * dd is the ray direction differential
110 | *
111 | * Returns the direction differential reflected off the surface.
112 | */
113 | static inline Vec3 reflect_ray_direction_differential(const Vec3 normal, const Vec3 normal_d, const Vec3 d, const Vec3 dd) {
114 | 	const auto ddn = dot(dd, normal) + dot(d, normal_d);
115 | 	const auto tmp = (normal_d * dot(d, normal)) + (normal * ddn);
116 | 	return dd - (tmp * 2.0f);
117 | }
118 | 
119 | 
120 | /**
121 |  * Clamps the direction differentials of a ray to not have slopes
122 |  * exceeding 1.0.  This is important to prevent self-intersections with
123 |  * micro-geometry.
124 |  */
125 | static inline void clamp_dd(WorldRay* ray) {
126 | 	const float len_d = ray->d.length();
127 | 	const float len_dx = ray->ddx.length();
128 | 	const float len_dy = ray->ddy.length();
129 | 
130 | 	if ((len_dx / len_d) > 0.9f)
131 | 		ray->ddx *= 0.9f * len_d / len_dx;
132 | 
133 | 	if ((len_dy / len_d) > 0.9f)
134 | 		ray->ddy *= 0.9f * len_d / len_dy;
135 | }
136 | 
137 | 
138 | /**
139 |  * Calculates the uv coordinate differentials at the given differential
140 |  * hit point.
141 |  *
142 |  * TODO: apparently this is wrong.  See pg. 508 of PBRT for a correct
143 |  * implementation.
144 |  */
145 | static inline std::pair<float, float> calc_uv_differentials(const Vec3 dp, const Vec3 dpdu, const Vec3 dpdv) {
146 | 	const float dpdu_ilen = 1.0f / dpdu.length();
147 | 	const Vec3 dpdu_n = dpdu * dpdu_ilen;
148 | 
149 | 	const float dpdv_ilen = 1.0f / dpdv.length();
150 | 	const Vec3 dpdv_n = dpdv * dpdv_ilen;
151 | 
152 | 	float dudp = dot(dp, dpdu_n) * dpdu_ilen;
153 | 	float dvdp = dot(dp, dpdv_n) * dpdv_ilen;;
154 | 
155 | 	return std::make_pair(dudp, dvdp);
156 | }
157 | 
158 | #endif // DIFFERENTIAL_GEOMETRY_HPP


--------------------------------------------------------------------------------
/basics/instance_id.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef INSTANCE_ID_HPP
 2 | #define INSTANCE_ID_HPP
 3 | 
 4 | #include "numtype.h"
 5 | #include <cassert>
 6 | 
 7 | static constexpr int MAX_ID_BITS = 64;
 8 | 
 9 | struct InstanceID {
10 | 	uint64_t id;
11 | 	int pos = 0;
12 | 
13 | 	void clear() {
14 | 		id = 0;
15 | 		pos = 0;
16 | 	}
17 | 
18 | 	void push_back(uint64_t sub_id, int bit_length) {
19 | 		assert((pos + bit_length) <= MAX_ID_BITS);
20 | 		id <<= bit_length;
21 | 		id |= sub_id & ((1<<bit_length)-1);
22 | 		pos += bit_length;
23 | 	}
24 | 
25 | 	uint64_t pop_back(int bit_length) {
26 | 		assert((pos - bit_length) >= 0);
27 | 		const uint64_t value = id & ((1<<bit_length)-1);
28 | 		id >>= bit_length;
29 | 		pos -= bit_length;
30 | 		return value;
31 | 	}
32 | 
33 | 	uint64_t pop_front(int bit_length) {
34 | 		assert((pos - bit_length) >= 0);
35 | 		const int offset = pos - bit_length;
36 | 		const uint64_t value = (id & (((1<<bit_length)-1) << (offset))) >> offset;
37 | 		pos -= bit_length;
38 | 		return value;
39 | 	}
40 | };
41 | 
42 | #endif // INSTANCE_ID_HPP


--------------------------------------------------------------------------------
/basics/instance_id_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "test.hpp"
 2 | 
 3 | #include "instance_id.hpp"
 4 | 
 5 | 
 6 | /*
 7 |  ************************************************************************
 8 |  * Testing suite for InstanceID.
 9 |  ************************************************************************
10 |  */
11 | 
12 | TEST_CASE("InstanceID") {
13 | 	// Test for the first constructor
14 | 	SECTION("push/pop back") {
15 | 		InstanceID id;
16 | 
17 | 		id.push_back(1, 1);
18 | 		id.push_back(3, 2);
19 | 		id.push_back(63, 10);
20 | 		id.push_back(7, 5);
21 | 
22 | 		REQUIRE(id.pop_back(5) == 7);
23 | 		REQUIRE(id.pop_back(10) == 63);
24 | 		REQUIRE(id.pop_back(2) == 3);
25 | 		REQUIRE(id.pop_back(1) == 1);
26 | 	}
27 | 
28 | 	// Test for the first constructor
29 | 	SECTION("push back, pop front") {
30 | 		InstanceID id;
31 | 
32 | 		id.push_back(1, 1);
33 | 		id.push_back(3, 2);
34 | 		id.push_back(63, 10);
35 | 		id.push_back(7, 5);
36 | 
37 | 		REQUIRE(id.pop_front(1) == 1);
38 | 		REQUIRE(id.pop_front(2) == 3);
39 | 		REQUIRE(id.pop_front(10) == 63);
40 | 		REQUIRE(id.pop_front(5) == 7);
41 | 	}
42 | }
43 | 


--------------------------------------------------------------------------------
/basics/intersection.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef INTERSECTION_HPP
 2 | #define INTERSECTION_HPP
 3 | 
 4 | #include "numtype.h"
 5 | 
 6 | #include <limits>
 7 | #include <memory>
 8 | 
 9 | #include "instance_id.hpp"
10 | #include "transform.hpp"
11 | #include "vector.hpp"
12 | #include "color.hpp"
13 | #include "surface_closure.hpp"
14 | #include "closure_union.hpp"
15 | #include "differential_geometry.hpp"
16 | 
17 | #define DIFFERENTIAL_DOT_EPSILON 0.0000f
18 | 
19 | /*
20 |  * Contains the information from a ray intersection.
21 |  */
22 | struct Intersection {
23 | 	// Whether there's a hit or not
24 | 	bool hit {false};
25 | 
26 | 	// The GUID of the object instance that was hit
27 | 	InstanceID id;
28 | 
29 | 	// Information about the intersection point
30 | 	float t {std::numeric_limits<float>::infinity()}; // T-parameter along the ray at the intersection
31 | 	bool backfacing {false}; // Whether it hit the backface of the surface
32 | 	float light_pdf {9999.0f};  // Pdf of selecting this hit point and ray via light sampling
33 | 
34 | 	// The space that the intersection took place in, relative to world space.
35 | 	Transform space;
36 | 
37 | 	// Differential geometry at the hit point
38 | 	DifferentialGeometry geo;
39 | 
40 | 	// Offset for subsequent spawned rays to avoid self-intersection
41 | 	// Should be added for reflection, subtracted for transmission
42 | 	Vec3 offset {0.0f, 0.0f, 0.0f};
43 | 
44 | 	// The surface closure at the intersection, along with the probability
45 | 	// of that closure having been selected amongst multuple possible
46 | 	// closures.
47 | 	SurfaceClosureUnion surface_closure;
48 | 	float closure_prob {1.0f};
49 | };
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/color/spectrum_grid.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * XYZ -> spectrum
  3 |  * From the paper "Physically Meaningful Rendering using Tristimulus Colours"
  4 |  * by Hanika et al.
  5 |  */
  6 | 
  7 | #ifndef SPECTRUM_GRID_H
  8 | #define SPECTRUM_GRID_H
  9 | 
 10 | #include "spectra_xyz_5nm_380_780_0.97.h"
 11 | 
 12 | #include <math.h>
 13 | #include <float.h>
 14 | #include <assert.h>
 15 | 
 16 | /*
 17 |  * Evaluate the spectrum for xyz at the given wavelength.
 18 |  *
 19 |  *
 20 |  */
 21 | static inline float spectrum_xyz_to_p(const float lambda, const float *xyz) {
 22 | 	assert(lambda >= spectrum_sample_min);
 23 | 	assert(lambda <= spectrum_sample_max);
 24 | 	float xyY[3], uv[2];
 25 | 
 26 | 	const float norm = 1.0/(xyz[0] + xyz[1] + xyz[2]);
 27 | 	if (!(norm < FLT_MAX)) {
 28 | 		return 0.0f;
 29 | 	}
 30 | 	// convert to xy chromaticities
 31 | 	xyY[0] = xyz[0] * norm;
 32 | 	xyY[1] = xyz[1] * norm;
 33 | 	xyY[2] = xyz[1];
 34 | 
 35 | 	// rotate to align with grid
 36 | 	spectrum_xy_to_uv(xyY, uv);
 37 | 
 38 | 	if (uv[0] < 0.0f || uv[0] >= spectrum_grid_width ||
 39 | 	        uv[1] < 0.0f || uv[1] >= spectrum_grid_height) {
 40 | 		return 0.f;
 41 | 	}
 42 | 
 43 | 	int uvi[2] = {(int)uv[0], (int)uv[1]};
 44 | 	assert(uvi[0] < spectrum_grid_width);
 45 | 	assert(uvi[1] < spectrum_grid_height);
 46 | 
 47 | 	const int cell_idx = uvi[0] + spectrum_grid_width * uvi[1];
 48 | 	assert(cell_idx < spectrum_grid_width*spectrum_grid_height);
 49 | 	assert(cell_idx >= 0);
 50 | 
 51 | 	const spectrum_grid_cell_t* cell = spectrum_grid + cell_idx;
 52 | 	const int inside = cell->inside;
 53 | 	const int *idx   = cell->idx;
 54 | 	const int num    = cell->num_points;
 55 | 
 56 | 	// get linearly interpolated spectral power for the corner vertices:
 57 | 	float p[num];
 58 | 	// this clamping is only necessary if lambda is not sure to be >= spectrum_sample_min and <= spectrum_sample_max:
 59 | 	const float sb = //fminf(spectrum_num_samples-1e-4, fmaxf(0.0f,
 60 | 	    (lambda - spectrum_sample_min)/(spectrum_sample_max-spectrum_sample_min) * (spectrum_num_samples-1);//));
 61 | 	assert(sb >= 0.f);
 62 | 	assert(sb <= spectrum_num_samples);
 63 | 
 64 | 	const int sb0 = (int)sb;
 65 | 	const int sb1 = sb+1 < spectrum_num_samples ? sb+1 : spectrum_num_samples-1;
 66 | 	const float sbf = sb - sb0;
 67 | 	for (int i=0; i<num; ++i) {
 68 | 		assert(idx[i] >= 0);
 69 | 		assert(sb0 < spectrum_num_samples);
 70 | 		assert(sb1 < spectrum_num_samples);
 71 | 		const float* spectrum = spectrum_data_points[idx[i]].spectrum;
 72 | 		p[i] = spectrum[sb0] * (1.0f-sbf) + spectrum[sb1] * sbf;
 73 | 	}
 74 | 
 75 | 	float interpolated_p = 0.0f;
 76 | 
 77 | 	if (inside) {
 78 | 		// fast path for normal inner quads:
 79 | 		uv[0] -= uvi[0];
 80 | 		uv[1] -= uvi[1];
 81 | 
 82 | 		assert(uv[0] >= 0 && uv[0] <= 1.f);
 83 | 		assert(uv[1] >= 0 && uv[1] <= 1.f);
 84 | 
 85 | 		// the layout of the vertices in the quad is:
 86 | 		//  2  3
 87 | 		//  0  1
 88 | 		interpolated_p =
 89 | 		    p[0] * (1.0f-uv[0]) * (1.0f-uv[1]) + p[2] * (1.0f-uv[0])  * uv[1] +
 90 | 		    p[3] * uv[0]        * uv[1]        + p[1] * uv[0]         * (1.0f-uv[1]);
 91 | 	} else {
 92 | 		// need to go through triangulation :(
 93 | 		// we get the indices in such an order that they form a triangle fan around idx[0].
 94 | 		// compute barycentric coordinates of our xy* point for all triangles in the fan:
 95 | 		const float ex = uv[0] - spectrum_data_points[idx[0]].uv[0];
 96 | 		const float ey = uv[1] - spectrum_data_points[idx[0]].uv[1];
 97 | 		float e0x = spectrum_data_points[idx[1]].uv[0] - spectrum_data_points[idx[0]].uv[0];
 98 | 		float e0y = spectrum_data_points[idx[1]].uv[1] - spectrum_data_points[idx[0]].uv[1];
 99 | 		float uu = e0x*ey - ex*e0y;
100 | 		for (int i=0; i<num-1; i++) {
101 | 			float e1x, e1y;
102 | 			if (i == num-2) {
103 | 				// close the circle
104 | 				e1x = spectrum_data_points[idx[1]].uv[0] - spectrum_data_points[idx[0]].uv[0];
105 | 				e1y = spectrum_data_points[idx[1]].uv[1] - spectrum_data_points[idx[0]].uv[1];
106 | 			} else {
107 | 				e1x = spectrum_data_points[idx[i+2]].uv[0] - spectrum_data_points[idx[0]].uv[0];
108 | 				e1y = spectrum_data_points[idx[i+2]].uv[1] - spectrum_data_points[idx[0]].uv[1];
109 | 			}
110 | 			float vv = ex*e1y - e1x*ey;
111 | 
112 | 			// TODO: with some sign magic, this division could be deferred to the last iteration!
113 | 			const float area = e0x*e1y - e1x*e0y;
114 | 			// normalise
115 | 			const float u = uu / area;
116 | 			const float v = vv / area;
117 | 			float w = 1.0f - u - v;
118 | 			// outside spectral locus (quantized version at least) or outside grid
119 | 			if (u < 0.0 || v < 0.0 || w < 0.0) {
120 | 				uu = -vv;
121 | 				e0x = e1x;
122 | 				e0y = e1y;
123 | 				continue;
124 | 			}
125 | 
126 | 			// This seems to be the triangle we've been looking for.
127 | 			interpolated_p = p[0] * w + p[i+1] * v + p[(i == num-2) ? 1 : (i+2)] * u;
128 | 			break;
129 | 		}
130 | 	}
131 | 
132 | 	// now we have a spectrum which corresponds to the xy chromaticities of the input. need to scale according to the
133 | 	// input brightness X+Y+Z now:
134 | 	return interpolated_p / norm;
135 | }
136 | 
137 | #endif
138 | 
139 | 


--------------------------------------------------------------------------------
/config.cpp:
--------------------------------------------------------------------------------
 1 | #include "numtype.h"
 2 | 
 3 | #include "config.hpp"
 4 | 
 5 | namespace Config {
 6 | bool no_output = false; // Suppress writing the image to disk, for better timing tests without as much I/O latency
 7 | float dice_rate = 0.25; // 0.7 is about half pixel area
 8 | float min_upoly_size = 0.00001; // Approximate minimum micropolygon size in world space
 9 | uint8_t max_grid_size = 16;
10 | float grid_cache_size = 64.0; // In MB
11 | 
12 | int samples_per_bucket = 1 << 16; // The number of samples to aim to take per-bucket (used in auto-sizing buckets)
13 | 
14 | float displace_distance = 0.00f;
15 | }
16 | 


--------------------------------------------------------------------------------
/config.h.in:
--------------------------------------------------------------------------------
1 | #ifndef CONFIG_H
2 | #define CONFIG_H
3 | 
4 | #define VERSION_MAJOR ${Psychopath_VERSION_MAJOR}
5 | #define VERSION_MINOR ${Psychopath_VERSION_MINOR}
6 | #define VERSION_PATCH ${Psychopath_VERSION_PATCH}
7 | 
8 | #endif // CONFIG_H
9 | 


--------------------------------------------------------------------------------
/config.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef CONFIG_HPP
 2 | #define CONFIG_HPP
 3 | 
 4 | #include "numtype.h"
 5 | 
 6 | namespace Config {
 7 | extern bool no_output;
 8 | extern float dice_rate;
 9 | extern float min_upoly_size;
10 | extern uint8_t max_grid_size;
11 | extern float grid_cache_size;
12 | 
13 | extern int samples_per_bucket;
14 | 
15 | extern float displace_distance;
16 | }
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/docs/scene_format_example.psy:
--------------------------------------------------------------------------------
  1 | # Psy files must be valid utf8 files
  2 | 
  3 | # Comments start with a hash and go to the end of the line.  Comments are
  4 | # considered whitespace.
  5 | 
  6 | # Contiguous strings of whitespace characters (and comments) are always
  7 | # semantically equivalent to a single space.  Whitespace formatting is purely
  8 | # for human readability and does not affect how the file is interpreted.
  9 | 
 10 | # Non-leaf properties follow the format:
 11 | #   TypeName $optional_name {<contents>}
 12 | #
 13 | # The contents of a non-leaf property is other properties, both
 14 | # leaf and non-leaf.
 15 | 
 16 | # Leaf properties follow the format:
 17 | #   TypeName [<contents>]
 18 | #
 19 | # Note the square brackets instead of curly braces.
 20 | # The contents of a leaf property can be any utf8 text
 21 | # that is properly escaped (see below).
 22 | 
 23 | # Backslashes (\) are used for escaping characters inside property names and
 24 | # leaf property contents.  Escaping is not allowed anywhere else.
 25 | # In these contexts, Any character immediately following a backslash is
 26 | # interpreted literally and is stripped of any semantic meaning.  This allows,
 27 | # for example, closing square brackets and hash symbols (] and #) to be
 28 | # included in leaf contents.  It also allows white space, hashes, and opening
 29 | # square and curly braces to be in property names.
 30 | 
 31 | # A scene defines a single frame to be rendered
 32 | Scene $yar_0001 {
 33 | 	# The output section defines how the rendered image should be output to disk
 34 | 	Output {
 35 | 		Path ["/home/cessen/test/psychopath/render/yar_0001.png"]
 36 | 		Format [png]
 37 | 		ColorSpace [srgb]
 38 | 		Dither [random 1.0]
 39 | 	}
 40 | 
 41 | 	# Render settings... fairly self explanatory
 42 | 	RenderSettings {
 43 | 		Resolution [1280 720]
 44 | 		SamplesPerPixel [16]
 45 | 		DicingRate [0.25]
 46 | 		PixelAspect [1.0]
 47 | 		Filter [gaussian 1.5]
 48 | 		Seed [1]
 49 | 	}
 50 | 
 51 | 	# Each scene contains a single camera
 52 | 	Camera {
 53 | 		Fov [0.785398163] # In radians
 54 | 		FocalDistance [23.1]
 55 | 		ApertureRadius [0.1]
 56 | 
 57 | 		# Multiple of the same property listed in the same context implies animation.
 58 | 		# In this case, the camera has an animated transform.
 59 | 		Transform [
 60 | 			1 0 0 0
 61 | 			0 1 0 0
 62 | 			0 0 1 0
 63 | 			0 0 0 1
 64 | 		]
 65 | 
 66 | 		Transform [
 67 | 			1 0 0 0
 68 | 			0 0.9 0 0
 69 | 			0 0 0.8 0
 70 | 			0 0 0 1.3
 71 | 		]
 72 | 	}
 73 | 
 74 | 	# Each scene contains a single world description, which includes everything
 75 | 	# of infinite extent.  For example: background shader, distant lights, volumes
 76 | 	# that occupy the entire world, etc.
 77 | 	World {
 78 | 		BackgroundShader {
 79 | 		    Type [Color]
 80 | 			Color [0.8 0.8 0.8]
 81 | 		}
 82 | 
 83 | 		DistantDiskLight {
 84 | 			Direction [1.0 0.5 0.5]
 85 | 			Radius [0.00872664] # In radians
 86 | 			Color [1.0 1.0 1.0]
 87 | 		}
 88 | 	}
 89 | 
 90 | 	# Each scene contains a single root assembly.
 91 | 	# All further scene description occurs within this assembly,
 92 | 	# or in other files referenced by this assembly.
 93 | 	Assembly {
 94 | 		# Shaders, objects, sub-assemblies, and instances can be listed in any
 95 | 		# order, as long as all data preceeds any references to it.
 96 | 		SurfaceShader $grey_diffuse {
 97 | 			Type [Lambert]
 98 | 			Color [0.9 0.9 0.9]
 99 | 		}
100 | 
101 | 		SurfaceShader $mirror {
102 | 			Type [GTR]
103 | 			Color [0.9 0.9 0.9]
104 | 			Roughness [0.0]
105 | 			TailShape [2.0]
106 | 			Fresnel [0.25]
107 | 		}
108 | 
109 | 		SphereLight $light.001 {
110 | 			Location [20 20 20]
111 | 			Radius [1.0]
112 | 			Color [1.0 1.0 1.0]
113 | 		}
114 | 
115 | 		CatmullClarkSubdiv $subdiv_test {
116 | 			GeometryFile ["/home/cessen/thing.obj"]
117 | 			SurfaceShaderBind [$mirror] # Referencing the shader defined previously
118 | 		}
119 | 
120 | 		# Assemblies can contain other assemblies
121 | 		Assembly $gruble {
122 | 			SurfaceShader $complex_shader {
123 | 				Type [OSL]
124 | 				FilePath ["cool_shader.osl"]
125 | 			}
126 | 
127 | 			# Assembly namespaces are local, so $subdiv_test here does
128 | 			# not conflict with $subdiv_test in the parent assembly.
129 | 			CatmullClarkSubdiv $subdiv_test {
130 | 				GeometryFile ["/home/cessen/thing2.obj"]
131 | 				SurfaceShaderBind [$mirror] # Referencing the shader defined previously
132 | 			}
133 | 
134 | 			Instance {
135 | 				Data [$subdiv_test]
136 | 			}
137 | 		}
138 | 
139 | 		# The Objects and SubAssemblies don't directly manifest inside
140 | 		# this assembly.  They must be instanced into it.  An object or
141 | 		# sub-assembly can be instanced any number of times within the
142 | 		# assembly.
143 | 		Instance {
144 | 			Data [$light.001]
145 | 			Transform [
146 | 				1 0 0 0
147 | 				0 1 0 0
148 | 				0 0 1 0
149 | 				0 0 0 1
150 | 			]
151 | 		}
152 | 
153 | 		Instance {
154 | 			Data [$subdiv_test]
155 | 			# Transforms are not necessary: an instance can have no transforms
156 | 		}
157 | 
158 | 		Instance {
159 | 			Data [$subdiv_test]
160 | 			Transform [
161 | 				2 0 0 0
162 | 				0 3 4 0
163 | 				1 0 -1 0
164 | 				0 0 0 1
165 | 			]
166 | 			Transform [
167 | 				3 0 0 0
168 | 				0 4 -2 1
169 | 				1 0 -1 0
170 | 				0 0 0 1
171 | 			]
172 | 		}
173 | 
174 | 		Instance {
175 | 			Data [$gruble]
176 | 			Transform [
177 | 				2 0 0 0
178 | 				0 3 4 0
179 | 				1 0 -1 0
180 | 				0 0 0 1
181 | 			]
182 | 		}
183 | 	}
184 | }
185 | 


--------------------------------------------------------------------------------
/film/raster.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef RASTER_HPP
 2 | #define RASTER_HPP
 3 | 
 4 | #include "numtype.h"
 5 | 
 6 | #include <assert.h>
 7 | 
 8 | /**
 9 |  * A lightweight raster image buffer.
10 |  * Includes a mapping to 2d coordinates.
11 |  * Pixels are stored in left-to-right, top-to-bottom order, with all the
12 |  * channels of a pixel stored next to each other.
13 |  */
14 | template <class PIXFMT>
15 | class Raster {
16 | public:
17 | 	uint16_t width, height; // Resolution of the image
18 | 	float min_x, min_y; // Minimum x/y coordinates of the image
19 | 	float max_x, max_y; // Maximum x/y coordinates of the image
20 | 	uint16_t channels; // Channels per pixel
21 | 	PIXFMT *pixels; // Pixel data
22 | 
23 | 	/**
24 | 	 * @brief Constructor.
25 | 	 *
26 | 	 * Creates a new Raster buffer.  All pixel data is initialized to zero.
27 | 	 */
28 | 	Raster(int w, int h, int cc, float x1, float y1, float x2, float y2) {
29 | 		width = w;
30 | 		height = h;
31 | 		min_x = x1 < x2 ? x1 : x2;
32 | 		min_y = y1 < y2 ? y1 : y2;
33 | 		max_x = x1 > x2 ? x1 : x2;
34 | 		max_y = y1 > y2 ? y1 : y2;
35 | 
36 | 		channels = cc;
37 | 		pixels = new PIXFMT[w*h*cc];
38 | 		for (int i=0; i < w*h*cc; i++)
39 | 			pixels[i] = 0;
40 | 	}
41 | 
42 | 	~Raster() {
43 | 		delete [] pixels;
44 | 	}
45 | 
46 | 	/**
47 | 	 * @brief Fetches a pointer to the requested pixel's data.
48 | 	 */
49 | 	PIXFMT *pixel(int x, int y) {
50 | 		assert(x >= 0 && x < width && y >= 0 && y < height);
51 | 		return &(pixels[(y*width + x)*channels]);
52 | 	}
53 | };
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/format_code.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # Auto-formats all C/C++ code with Artistic Style (http://astyle.sourceforge.net/)
3 | # to adhere to a consistent coding style.  Should be run before committing.
4 | astyle --options=none --suffix=".orig~" -Q -A2 -tSHU  -R "*.hpp" "*.cpp" "*.h" "*.c"
5 | 
6 | 


--------------------------------------------------------------------------------
/global.cpp:
--------------------------------------------------------------------------------
 1 | #include "global.hpp"
 2 | 
 3 | #include <atomic>
 4 | #include "numtype.h"
 5 | 
 6 | namespace Global {
 7 | std::atomic<size_t> next_object_uid {0};
 8 | 
 9 | namespace Stats {
10 | std::atomic<uint64_t> rays_shot(0);
11 | std::atomic<uint64_t> split_count(0);
12 | std::atomic<size_t> object_ray_tests(0);
13 | std::atomic<size_t> top_level_bvh_node_tests(0);
14 | 
15 | std::atomic<uint64_t> nan_count(0);
16 | std::atomic<uint64_t> inf_count(0);
17 | } // Stats
18 | } // Global
19 | 


--------------------------------------------------------------------------------
/global.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef PSYCHO_GLOBAL_HPP
 2 | #define PSYCHO_GLOBAL_HPP
 3 | 
 4 | #include <atomic>
 5 | #include "numtype.h"
 6 | 
 7 | //#define GLOBAL_STATS_TOP_LEVEL_BVH_NODE_TESTS
 8 | 
 9 | namespace Global {
10 | extern std::atomic<size_t> next_object_uid;
11 | 
12 | namespace Stats {
13 | extern std::atomic<uint64_t> rays_shot;
14 | extern std::atomic<uint64_t> split_count;
15 | extern std::atomic<size_t> object_ray_tests;
16 | extern std::atomic<size_t> top_level_bvh_node_tests;
17 | 
18 | extern std::atomic<uint64_t> nan_count;
19 | extern std::atomic<uint64_t> inf_count;
20 | 
21 | static void clear() {
22 | 	rays_shot = 0;
23 | 	split_count = 0;
24 | 	object_ray_tests = 0;
25 | 	top_level_bvh_node_tests = 0;
26 | 
27 | 	nan_count = 0;
28 | 	inf_count = 0;
29 | }
30 | 
31 | } // Stats
32 | } // Global
33 | 
34 | #endif // PSYCHO_GLOBAL_HPP


--------------------------------------------------------------------------------
/integrator/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(integrator
2 | 	path_trace_integrator)
3 | 


--------------------------------------------------------------------------------
/integrator/integrator.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file defines the interface to Integrator classes, which decide where
 3 |  * to shoot rays and how to combine their results into a final image or images.
 4 |  */
 5 | #ifndef INTEGRATOR_HPP
 6 | #define INTEGRATOR_HPP
 7 | 
 8 | #include "numtype.h"
 9 | 
10 | #include "raster.hpp"
11 | #include "scene.hpp"
12 | #include "tracer.hpp"
13 | 
14 | /**
15 |  * @brief An integrator for the rendering equation.
16 |  *
17 |  * The Integrator's job is to solve the rendering equation, using the Tracer
18 |  * for ray intersection testing and the shading system for shading.
19 |  *
20 |  * It can, for example, implement Whitted style ray tracing, or
21 |  * bidirectional path tracing, or metroplis light transport, etc.
22 |  * Although markov chain algorithms may play poorly with the Tracer, which is
23 |  * designed to trace rays in bulk.
24 |  */
25 | class Integrator {
26 | public:
27 | 	virtual ~Integrator() {}
28 | 
29 | 	/**
30 | 	 * @brief Begins integration.
31 | 	 */
32 | 	virtual void integrate() = 0;
33 | 
34 | };
35 | 
36 | #endif // INTEGRATOR_H
37 | 
38 | 


--------------------------------------------------------------------------------
/integrator/path_trace_integrator.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file and integrator.cpp define a Integrator class, which decides where
  3 |  * to shoot rays and how to combine their results into a final image or images.
  4 |  */
  5 | #ifndef PATH_TRACE_INTEGRATOR_HPP
  6 | #define PATH_TRACE_INTEGRATOR_HPP
  7 | 
  8 | #include <iostream>
  9 | #include <iomanip>
 10 | 
 11 | #include <functional>
 12 | #include <mutex>
 13 | 
 14 | #include "numtype.h"
 15 | 
 16 | #include "spinlock.hpp"
 17 | 
 18 | #include "integrator.hpp"
 19 | #include "film.hpp"
 20 | #include "image_sampler.hpp"
 21 | #include "scene.hpp"
 22 | #include "tracer.hpp"
 23 | #include "color.hpp"
 24 | 
 25 | #include "ring_buffer_concurrent.hpp"
 26 | 
 27 | /**
 28 |  * @brief An integrator for the rendering equation.
 29 |  *
 30 |  * The Integrator's job is to solve the rendering equation, using the Tracer
 31 |  * for ray intersection testing and the shading system for shading.
 32 |  *
 33 |  * It will implement path tracing with next event estimation.  But it
 34 |  * could instead, for example, implement Whitted style ray tracing, or
 35 |  * bidirectional path tracing, or metroplis light transport, etc.
 36 |  * Although markov chain algorithms may play poorly with the Tracer, which is
 37 |  * designed to trace rays in bulk.
 38 |  */
 39 | class PathTraceIntegrator final: Integrator {
 40 | 	struct PixelBlock {
 41 | 		int x, y;
 42 | 		int w, h;
 43 | 	};
 44 | 
 45 | 	/*
 46 | 	 * A path tracing path state.
 47 | 	 * Stores state of a path in progress.
 48 | 	 */
 49 | 	struct PTState {
 50 | 		Sampler sampler;
 51 | 		float time;
 52 | 		int step = 0;
 53 | 		short pix_x, pix_y;  // Pixel coordinates of the path
 54 | 		Intersection inter {};
 55 | 		WorldRay prev_ray {};
 56 | 		float wavelength;  // The wavelength of light of the path (in nm)
 57 | 		float last_pdf = 0.0f;
 58 | 		SpectralSample col {0.0f, 0.0f}; // Color of the sample collected so far
 59 | 		SpectralSample fcol {0.0f, 1.0f}; // Accumulated filter color from light path
 60 | 		SpectralSample lcol {0.0f, 0.0f}; // Temporary storage for incoming light color
 61 | 
 62 | 		bool done {false};
 63 | 	};
 64 | 
 65 | 	void init_path(PTState* pstate, Sampler s, short x, short y);
 66 | 	WorldRay next_ray_for_path(const WorldRay& prev_ray, PTState* pstate);
 67 | 	void update_path(PTState* pstate, const WorldRay& ray, const Intersection& inter);
 68 | 
 69 | 
 70 | 
 71 | 	size_t total_items = 0;
 72 | 	size_t completed_items = 0;
 73 | 	SpinLock progress_lock;
 74 | 	void print_progress() {
 75 | 		std::cout << "\rRendering: " << std::fixed << std::setprecision(2) << (float(completed_items) / total_items) * 100 << "%" << std::flush;
 76 | 	}
 77 | 
 78 | 
 79 | 
 80 | public:
 81 | 	Scene *scene;
 82 | 	Film *image;
 83 | 	std::mutex image_mut;
 84 | 	int spp;
 85 | 	int spp_max;
 86 | 	float image_variance_max;
 87 | 	uint seed;
 88 | 	int path_length;
 89 | 	int thread_count;
 90 | 	std::function<void()> callback;
 91 | 
 92 | 	RingBufferConcurrent<PixelBlock> blocks; // Queue for pending blocks of pixels to be rendered
 93 | 
 94 | 	/**
 95 | 	 * @brief Constructor.
 96 | 	 *
 97 | 	 * @param[in] scene_ A pointer to the scene to render.  Should be fully
 98 | 	 *                   finalized for rendering.
 99 | 	 * @param[out] image_ The image to render to.  Should be already
100 | 	 *                    initialized with 3 channels, for rgb.
101 | 	 * @param spp_ The number of samples to take per pixel for integration.
102 | 	 */
103 | 	PathTraceIntegrator(Scene *scene_, Film *image_, int spp_, int spp_max_, float variance_max_, uint seed_, int thread_count_=1, std::function<void()> callback_ = std::function<void()>()) {
104 | 		scene = scene_;
105 | 		image = image_;
106 | 		spp = spp_;
107 | 		spp_max = spp_max_;
108 | 		image_variance_max = variance_max_;
109 | 		seed = seed_;
110 | 		thread_count = thread_count_;
111 | 		path_length = 4;
112 | 		callback = callback_;
113 | 
114 | 		blocks.resize(thread_count_ * 2);
115 | 	}
116 | 
117 | 	/**
118 | 	 * @brief Begins integration.
119 | 	 */
120 | 	virtual void integrate() override;
121 | 
122 | 	/**
123 | 	 * Watches the block queue for blocks of pixels to render.
124 | 	 */
125 | 	void render_blocks();
126 | };
127 | 
128 | #endif // PATH_TRACE_INTEGRATOR_H
129 | 
130 | 


--------------------------------------------------------------------------------
/lights/light.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef LIGHT_HPP
 2 | #define LIGHT_HPP
 3 | 
 4 | #include "object.hpp"
 5 | #include "bbox.hpp"
 6 | #include "vector.hpp"
 7 | #include "color.hpp"
 8 | 
 9 | /**
10 |  * @brief An interface for light sources.
11 |  */
12 | class Light: public Object {
13 | public:
14 | 	virtual ~Light() {}
15 | 
16 | 	Object::Type get_type() const final {
17 | 		return Object::LIGHT;
18 | 	}
19 | 
20 | 	/**
21 | 	 * @brief Samples the light source for a given point to be illuminated.
22 | 	 *
23 | 	 * @param arr The point to be illuminated.
24 | 	 * @param u Random parameter U.
25 | 	 * @param v Random parameter V.
26 | 	 * @param wavelength The wavelength of light to sample at.
27 | 	 * @param time The time to sample at.
28 | 	 * @param[out] shadow_vec The world-space direction to cast a shadow ray
29 | 	 *               for visibility testing.  It's length determines the extent
30 | 	 *               that the shadow ray should have, unless the light source
31 | 	 *               is infinite (see is_infinite()) in which case the extent
32 | 	 *               should be infinite.  This vector also doubles to inform
33 | 	 *               What direction the light is arriving from (just invert
34 | 	 *               the vector).
35 | 	 *
36 | 	 * @returns The light arriving at the point arr.
37 | 	 */
38 | 	virtual SpectralSample sample(const Vec3 &arr, float u, float v, float wavelength, float time,
39 | 	                              Vec3 *shadow_vec, float* pdf) const = 0;
40 | 
41 | 
42 | 	/**
43 | 	 * @brief Calculates the pdf of sampling the given
44 | 	 * sample_dir/sample_u/sample_v from the given point arr.  This is used
45 | 	 * primarily to calculate probabilities for multiple importance sampling.
46 | 	 *
47 | 	 * NOTE: this function CAN assume that sample_dir, sample_u, and sample_v
48 | 	 * are a valid sample for the light source (i.e. hits/lies on the light
49 | 	 * source).  No guarantees are made about the correctness of the return
50 | 	 * value if they are not valid.
51 | 	 */
52 | 	virtual float sample_pdf(const Vec3 &arr, const Vec3 &sample_dir, float sample_u, float sample_v, float wavelength, float time) const = 0;
53 | 
54 | 
55 | 	/**
56 | 	 * @brief Returns the color emitted in the given direction from the
57 | 	 * given parameters on the light.
58 | 	 *
59 | 	 * @param dir The direction of the outgoing light.
60 | 	 * @param u Random parameter U.
61 | 	 * @param v Random parameter V.
62 | 	 * @param wavelength The wavelength of light to sample at.
63 | 	 * @param time The time to sample at.
64 | 	 */
65 | 	virtual SpectralSample outgoing(const Vec3 &dir, float u, float v, float wavelength, float time) const = 0;
66 | 
67 | 
68 | 
69 | 	/**
70 | 	 * @brief Returns whether the light has a delta distribution.
71 | 	 *
72 | 	 * If a light has no chance of a ray hitting it through random process
73 | 	 * then it is a delta light source.  For example, point light sources,
74 | 	 * lights that only emit in a single direction, etc.
75 | 	 */
76 | 	virtual bool is_delta() const = 0;
77 | 
78 | 
79 | 	/**
80 | 	 * @brief Tests a ray against the light.
81 | 	 */
82 | 	virtual bool intersect_ray(const Ray &ray, Intersection *intersection=nullptr) const = 0;
83 | };
84 | 
85 | #endif // LIGHT_HPP
86 | 


--------------------------------------------------------------------------------
/lights/point_light.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef POINT_LIGHT_HPP
 2 | #define POINT_LIGHT_HPP
 3 | 
 4 | #include "light.hpp"
 5 | 
 6 | /**
 7 |  * @brief A point light source.
 8 |  *
 9 |  * Super simple point light source.  Practically an example of how
10 |  * to write a finite light source.
11 |  */
12 | class PointLight final: public Light {
13 | 	Vec3 pos;
14 | 	Color col;
15 | 	std::vector<BBox> bounds_;
16 | 
17 | public:
18 | 	PointLight(Vec3 pos_, Color col_): pos {pos_}, col {col_}, bounds_ {BBox(pos_, pos_)}
19 | 	{}
20 | 
21 | 	virtual SpectralSample sample(const Vec3 &arr, float u, float v, float wavelength, float time,
22 | 	                              Vec3 *shadow_vec, float* pdf) const override {
23 | 		*pdf = 1.0f;
24 | 		*shadow_vec = pos - arr;
25 | 		float d2 = shadow_vec->length2();
26 | 		if (d2 > 0)
27 | 			return Color_to_SpectralSample(col / d2, wavelength);
28 | 		else
29 | 			return Color_to_SpectralSample(col, wavelength); // Fudge for divide by zero.
30 | 	}
31 | 
32 | 	virtual float sample_pdf(const Vec3 &arr, const Vec3 &sample_dir, float sample_u, float sample_v, float wavelength, float time) const override {
33 | 		return 0.0f;
34 | 	}
35 | 
36 | 	virtual SpectralSample outgoing(const Vec3 &dir, float u, float v, float wavelength, float time) const override {
37 | 		return Color_to_SpectralSample(col, wavelength);
38 | 	}
39 | 
40 | 	virtual bool is_delta() const override {
41 | 		return true;
42 | 	}
43 | 
44 | 	virtual Color total_emitted_color() const override {
45 | 		return col;
46 | 	}
47 | 
48 | 	virtual bool intersect_ray(const Ray &ray, Intersection *intersection=nullptr) const override {
49 | 		return false;
50 | 	}
51 | 
52 | 	virtual const std::vector<BBox> &bounds() const override {
53 | 		return bounds_;
54 | 	}
55 | };
56 | 
57 | #endif // POINT_LIGHT_HPP
58 | 


--------------------------------------------------------------------------------
/math/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | #add_library(math
2 | #            )
3 | 


--------------------------------------------------------------------------------
/math/vector.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef VECTOR_HPP
  2 | #define VECTOR_HPP
  3 | 
  4 | #include "numtype.h"
  5 | #include <assert.h>
  6 | #include <x86intrin.h>
  7 | #include <cmath>
  8 | 
  9 | #include "ImathVec.h"
 10 | 
 11 | // 3D vector
 12 | typedef Imath::Vec3<float> ImathVec3;
 13 | 
 14 | 
 15 | #if 0
 16 | /**
 17 |  * @brief A 3d vector.
 18 |  *
 19 |  * Optionally accelerated by SSE instructions.
 20 |  */
 21 | struct __attribute__((aligned(16))) Vec3 {
 22 | 	union {
 23 | 		struct {
 24 | 			float x,y,z,w;
 25 | 		};
 26 | 		__m128 m128;
 27 | 	};
 28 | 
 29 | 	// Constructors
 30 | 	Vec3() {}
 31 | 	Vec3(float v) {
 32 | 		x = v;
 33 | 		y = v;
 34 | 		z = v;
 35 | 		w = v;
 36 | 	}
 37 | 	Vec3(float x_, float y_, float z_, float w_=1.0f) {
 38 | 		x = x_;
 39 | 		y = y_;
 40 | 		z = z_;
 41 | 		w = w_;
 42 | 	}
 43 | 	Vec3(__m128 m) {
 44 | 		m128 = m;
 45 | 	}
 46 | 
 47 | // Element access
 48 | 	float &operator[](size_t n) {
 49 | 		assert(n < 4);
 50 | 		return (&x)[n];
 51 | 	}
 52 | 	const float &operator[](size_t n) const {
 53 | 		assert(n < 4);
 54 | 		return (&x)[n];
 55 | 	}
 56 | 
 57 | // Comparisons
 58 | 	bool operator==(const Vec3 &b) const {
 59 | 		return (x==b.x && y==b.y && z==b.z);
 60 | 	}
 61 | 
 62 | // Multiplication and division by scalar
 63 | 	Vec3 operator*(float b) const {
 64 | 		return (Vec3)_mm_mul_ps(m128, _mm_set_ps(b,b,b,b));
 65 | 	}
 66 | 	Vec3 operator/(float b) const {
 67 | 		return (Vec3)_mm_div_ps(m128, _mm_set_ps(b,b,b,b));
 68 | 	}
 69 | 
 70 | 	Vec3 &operator*=(float b) {
 71 | 		m128 = _mm_mul_ps(m128, _mm_set_ps(b,b,b,b));
 72 | 		return *this;
 73 | 	}
 74 | 	Vec3 &operator/=(float b) {
 75 | 		m128 = _mm_div_ps(m128, _mm_set_ps(b,b,b,b));
 76 | 		return *this;
 77 | 	}
 78 | 
 79 | // Component-wise arithmetic
 80 | 	Vec3 operator+(const Vec3& b) const {
 81 | 		return (Vec3)_mm_add_ps(m128, b.m128);
 82 | 	}
 83 | 	Vec3 operator-(const Vec3& b) const {
 84 | 		return (Vec3)_mm_sub_ps(m128, b.m128);
 85 | 	}
 86 | 	Vec3 operator*(const Vec3& b) const {
 87 | 		return (Vec3)_mm_mul_ps(m128, b.m128);
 88 | 	}
 89 | 	Vec3 operator/(const Vec3& b) const {
 90 | 		return (Vec3)_mm_div_ps(m128, b.m128);
 91 | 	}
 92 | 
 93 | 	Vec3 &operator+=(const Vec3& b) {
 94 | 		m128 = _mm_add_ps(m128, b.m128);
 95 | 		return *this;
 96 | 	}
 97 | 	Vec3 &operator-=(const Vec3& b) {
 98 | 		m128 = _mm_sub_ps(m128, b.m128);
 99 | 		return *this;
100 | 	}
101 | 	Vec3 &operator*=(const Vec3& b) {
102 | 		m128 = _mm_mul_ps(m128, b.m128);
103 | 		return *this;
104 | 	}
105 | 	Vec3 &operator/=(const Vec3& b) {
106 | 		m128 = _mm_div_ps(m128, b.m128);
107 | 		return *this;
108 | 	}
109 | 
110 | // Products
111 | 	float dot(const Vec3 &b) const {
112 | 		return x*b.x + y*b.y + z*b.z;
113 | 	}
114 | 	Vec3 cross(const Vec3 &b) const {
115 | 		return (Vec3)_mm_sub_ps(
116 | 		           _mm_mul_ps(
117 | 		               _mm_shuffle_ps(m128, m128, _MM_SHUFFLE(3, 0, 2, 1)),
118 | 		               _mm_shuffle_ps(b.m128, b.m128, _MM_SHUFFLE(3, 1, 0, 2))),
119 | 		           _mm_mul_ps(
120 | 		               _mm_shuffle_ps(m128, m128, _MM_SHUFFLE(3, 1, 0, 2)),
121 | 		               _mm_shuffle_ps(b.m128, b.m128, _MM_SHUFFLE(3, 0, 2, 1)))
122 | 		       );
123 | 	}
124 | 
125 | // Component-wise min and max
126 | 	Vec3 min(const Vec3 &b) const {
127 | 		return (Vec3)_mm_min_ps(m128, b.m128);
128 | 	}
129 | 	Vec3 max(const Vec3 &b) const {
130 | 		return (Vec3)_mm_max_ps(m128, b.m128);
131 | 	}
132 | 
133 | 	float length() const {
134 | 		Vec3 a = *this;
135 | 		a.w = 0.0f;
136 | 
137 | 		__m128 &D = a.m128;
138 | 		D = _mm_mul_ps(D, D);
139 | 		D = _mm_hadd_ps(D, D);
140 | 		D = _mm_hadd_ps(D, D);
141 | 
142 | 		D = _mm_sqrt_ps(D);
143 | 
144 | 		return a.x;
145 | 	}
146 | 
147 | 	float length2() const {
148 | 		Vec3 a = *this;
149 | 		a.w = 0.0f;
150 | 
151 | 		__m128 &D = a.m128;
152 | 		D = _mm_mul_ps(D, D);
153 | 		D = _mm_hadd_ps(D, D);
154 | 		D = _mm_hadd_ps(D, D);
155 | 
156 | 		return a.x;
157 | 	}
158 | 
159 | 	const Vec3 &normalize() {
160 | 		w = 0.f;
161 | 
162 | 		__m128 D = m128;
163 | 		D = _mm_mul_ps(D, D);
164 | 		D = _mm_hadd_ps(D, D);
165 | 		D = _mm_hadd_ps(D, D);
166 | 
167 | 		// 1 iteration of Newton-raphson -- Idea from Intel's Embree.
168 | 		__m128 r = _mm_rsqrt_ps(D);
169 | 		r = _mm_add_ps(
170 | 		        _mm_mul_ps(_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f), r),
171 | 		        _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(D, _mm_set_ps(-0.5f, -0.5f, -0.5f, -0.5f)), r), _mm_mul_ps(r, r)));
172 | 
173 | 		m128 = _mm_mul_ps(m128, r);
174 | 
175 | 		return *this;
176 | 	}
177 | 
178 | 	Vec3 normalized() const {
179 | 		Vec3 v = *this;
180 | 		v.normalize();
181 | 		return v;
182 | 	}
183 | 
184 | };
185 | #else
186 | typedef Imath::Vec3<float> Vec3;
187 | #endif
188 | 
189 | 
190 | template <class T>
191 | static inline float dot(const T &a, const T &b) {
192 | 	return a.dot(b);
193 | }
194 | 
195 | // Normalized dot product (i.e. the cosine of the angle between two vectors
196 | template <class T>
197 | static inline float dot_norm(const T& a, const T& b) {
198 | 	const float length_product = a.length() * b.length();
199 | 	assert(length_product > 0.0f);
200 | 	return ((a.x * b.x) + (a.y * b.y) + (a.z * b.z)) / length_product;
201 | }
202 | 
203 | template <class T>
204 | static inline T cross(const T &a, const T &b) {
205 | 	return a.cross(b);
206 | }
207 | 
208 | static inline Vec3 min(const Vec3 &a, const Vec3 &b) {
209 | 	Vec3 c;
210 | 	for (int i = 0; i < 3; i++)
211 | 		c[i] = a[i] < b[i] ? a[i] : b[i];
212 | 	return c;
213 | }
214 | 
215 | static inline Vec3 max(const Vec3 &a, const Vec3 &b) {
216 | 	Vec3 c;
217 | 	for (int i = 0; i < 3; i++)
218 | 		c[i] = a[i] > b[i] ? a[i] : b[i];
219 | 	return c;
220 | }
221 | 
222 | static inline float longest_axis(const Vec3 &v) {
223 | 	return std::max(std::max(std::abs(v.x), std::abs(v.y)), std::abs(v.z));
224 | }
225 | 
226 | /**
227 |  * Returns 'in' reflected off a surface with surface normal 'nn'.
228 |  * 'in' is an incoming direction, i.e. pointing towards the surface.
229 |  * 'nn' must be normalized.
230 |  */
231 | static inline Vec3 reflect_vec(Vec3 in, Vec3 nn) {
232 | 	return in - (nn * 2.0f * dot(in, nn));
233 | }
234 | 
235 | #endif // VECTOR_HPP
236 | 


--------------------------------------------------------------------------------
/math/vector_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test.hpp"
  2 | 
  3 | #include <iostream>
  4 | #include "vector.hpp"
  5 | 
  6 | /*
  7 |  ************************************************************************
  8 |  * Test suite for Vec3.
  9 |  ************************************************************************
 10 |  */
 11 | 
 12 | TEST_CASE("vector") {
 13 | 	// Test for the constructor
 14 | 	SECTION("constructor") {
 15 | 		Vec3 v1(0.0, 0.0, 0.0);
 16 | 		Vec3 v2(1.5, 0.0, -64.0);
 17 | 
 18 | 		bool t1 = v1.x == 0.0 && v1.y == 0.0 && v1.z == 0.0;
 19 | 		REQUIRE(t1);
 20 | 		bool t2 = v2.x == 1.5 && v2.y == 0.0 && v2.z == -64.0;
 21 | 		REQUIRE(t2);
 22 | 	}
 23 | 
 24 | 	// Test for ::operator[]
 25 | 	SECTION("op_square_bracket") {
 26 | 		Vec3 v1(1.5, 0.0, -64.0);
 27 | 		const Vec3 v2(1.5, 0.0, -64.0);
 28 | 
 29 | 		// Access
 30 | 		bool t1 = v1[0] == 1.5 && v1[1] == 0.0 && v1[2] == -64.0;
 31 | 		REQUIRE(t1);
 32 | 		bool t2 = v2[0] == 1.5 && v2[1] == 0.0 && v2[2] == -64.0;
 33 | 		REQUIRE(t2);
 34 | 
 35 | 		// Modification
 36 | 		v1[0] = 1.0;
 37 | 		v1[1] = 2.0;
 38 | 		v1[2] = 3.0;
 39 | 		bool t3 = v1[0] == 1.0 && v1[1] == 2.0 && v1[2] == 3.0;
 40 | 		REQUIRE(t3);
 41 | 	}
 42 | 
 43 | 	// Test for ::operator+
 44 | 	SECTION("op_add") {
 45 | 		Vec3 v1(1.2, -2.6, 1.0);
 46 | 		Vec3 v2(-23.4, 2.0, 9.0);
 47 | 
 48 | 		Vec3 v3 = v1 + v2;
 49 | 
 50 | 		REQUIRE(v3.x == Approx(-22.2).epsilon(0.00001));
 51 | 		REQUIRE(v3.y == Approx(-0.6).epsilon(0.0001));
 52 | 		REQUIRE(v3.z == 10.0);
 53 | 	}
 54 | 
 55 | 	// Test for ::operator-
 56 | 	SECTION("op_subtract") {
 57 | 		Vec3 v1(1.2, -2.6, 1.0);
 58 | 		Vec3 v2(-23.4, 2.2, 9.0);
 59 | 
 60 | 		Vec3 v3 = v1 - v2;
 61 | 
 62 | 		REQUIRE(v3.x == Approx(24.6).epsilon(0.00001));
 63 | 		REQUIRE(v3.y == Approx(-4.8).epsilon(0.00001));
 64 | 		REQUIRE(v3.z == -8.0);
 65 | 	}
 66 | 
 67 | 	// Test for ::operator*
 68 | 	SECTION("op_multiply") {
 69 | 		Vec3 v1(1.2, -2.6, 1.0);
 70 | 
 71 | 		Vec3 v2 = v1 * 1.5;
 72 | 
 73 | 		REQUIRE(v2.x == Approx(1.8).epsilon(0.00001));
 74 | 		REQUIRE(v2.y == Approx(-3.9).epsilon(0.00001));
 75 | 		REQUIRE(v2.z == 1.5);
 76 | 	}
 77 | 
 78 | 	// Test for ::operator/
 79 | 	SECTION("op_divide") {
 80 | 		Vec3 v1(1.2, -2.6, 1.0);
 81 | 
 82 | 		Vec3 v2 = v1 / 1.5;
 83 | 
 84 | 		REQUIRE(v2.x == Approx(0.8).epsilon(0.00001));
 85 | 		REQUIRE(v2.y == Approx(-1.7333333333333333333333333).epsilon(0.00001));
 86 | 		REQUIRE(v2.z == Approx(0.6666666666666666666666666).epsilon(0.00001));
 87 | 	}
 88 | 
 89 | 	// Test for ::length()
 90 | 	SECTION("length") {
 91 | 		Vec3 v1(1.2, -2.6, 1.0);
 92 | 
 93 | 		REQUIRE(v1.length() == Approx(3.03315017762062).epsilon(0.0001));
 94 | 	}
 95 | 
 96 | 	// Test for ::length2()
 97 | 	SECTION("length2") {
 98 | 		Vec3 v1(1.2, -2.6, 1.0);
 99 | 
100 | 		REQUIRE(v1.length2() == Approx(9.2).epsilon(0.0001));
101 | 	}
102 | 
103 | 	// Test for ::normalize()
104 | 	SECTION("normalize") {
105 | 		Vec3 v(1.2, -2.6, 1.0);
106 | 
107 | 		float l = v.length();
108 | 		v.normalize();
109 | 
110 | 		REQUIRE(l   == Approx(3.03315017762062).epsilon(0.0001));
111 | 		REQUIRE(v.x == Approx(0.39562828403747).epsilon(0.0001));
112 | 		REQUIRE(v.y == Approx(-0.85719461541452).epsilon(0.0001));
113 | 		REQUIRE(v.z == Approx(0.32969023669789).epsilon(0.0001));
114 | 	}
115 | 
116 | 	// Test for dot()
117 | 	SECTION("dot_") {
118 | 		Vec3 v1(1.2, -2.6, 1.0);
119 | 		Vec3 v2(-23.4, 2.2, 9.0);
120 | 
121 | 		float d = dot(v1, v2);
122 | 
123 | 		REQUIRE(d == Approx(-24.8).epsilon(0.00001));
124 | 	}
125 | 
126 | 	// Test for cross()
127 | 	SECTION("cross_") {
128 | 		Vec3 v1(1.2, -2.6, 1.0);
129 | 		Vec3 v2(-23.4, 2.2, 9.0);
130 | 
131 | 		Vec3 v3 = cross(v1, v2);
132 | 
133 | 		REQUIRE(v3.x == Approx(-25.6).epsilon(0.00001));
134 | 		REQUIRE(v3.y == Approx(-34.2).epsilon(0.00001));
135 | 		REQUIRE(v3.z == Approx(-58.2).epsilon(0.00001));
136 | 	}
137 | }
138 | 
139 | 


--------------------------------------------------------------------------------
/object/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(object
2 |             bilinear bicubic sphere subdivision_surface)
3 | 


--------------------------------------------------------------------------------
/object/bicubic.cpp:
--------------------------------------------------------------------------------
  1 | #include "numtype.h"
  2 | 
  3 | #include <algorithm>
  4 | #include <iostream>
  5 | #include <limits>
  6 | #include <stdlib.h>
  7 | #include <cmath>
  8 | #include <utility>
  9 | #include <vector>
 10 | #include "stack.hpp"
 11 | #include "bicubic.hpp"
 12 | #include "config.hpp"
 13 | #include "global.hpp"
 14 | 
 15 | #include "surface_closure.hpp"
 16 | #include "closure_union.hpp"
 17 | 
 18 | 
 19 | 
 20 | 
 21 | Bicubic::Bicubic(Vec3 v1,  Vec3 v2,  Vec3 v3,  Vec3 v4,
 22 |                  Vec3 v5,  Vec3 v6,  Vec3 v7,  Vec3 v8,
 23 |                  Vec3 v9,  Vec3 v10, Vec3 v11, Vec3 v12,
 24 |                  Vec3 v13, Vec3 v14, Vec3 v15, Vec3 v16) {
 25 | 	verts.resize(1);
 26 | 
 27 | 	verts[0][0]  = v1;
 28 | 	verts[0][1]  = v2;
 29 | 	verts[0][2]  = v3;
 30 | 	verts[0][3]  = v4;
 31 | 
 32 | 	verts[0][4]  = v5;
 33 | 	verts[0][5]  = v6;
 34 | 	verts[0][6]  = v7;
 35 | 	verts[0][7]  = v8;
 36 | 
 37 | 	verts[0][8]  = v9;
 38 | 	verts[0][9]  = v10;
 39 | 	verts[0][10] = v11;
 40 | 	verts[0][11] = v12;
 41 | 
 42 | 	verts[0][12] = v13;
 43 | 	verts[0][13] = v14;
 44 | 	verts[0][14] = v15;
 45 | 	verts[0][15] = v16;
 46 | }
 47 | 
 48 | 
 49 | void Bicubic::add_time_sample(Vec3 v1,  Vec3 v2,  Vec3 v3,  Vec3 v4,
 50 |                               Vec3 v5,  Vec3 v6,  Vec3 v7,  Vec3 v8,
 51 |                               Vec3 v9,  Vec3 v10, Vec3 v11, Vec3 v12,
 52 |                               Vec3 v13, Vec3 v14, Vec3 v15, Vec3 v16) {
 53 | 	const auto i = verts.size();
 54 | 	verts.resize(verts.size()+1);
 55 | 
 56 | 	verts[i][0]  = v1;
 57 | 	verts[i][1]  = v2;
 58 | 	verts[i][2]  = v3;
 59 | 	verts[i][3]  = v4;
 60 | 
 61 | 	verts[i][4]  = v5;
 62 | 	verts[i][5]  = v6;
 63 | 	verts[i][6]  = v7;
 64 | 	verts[i][7]  = v8;
 65 | 
 66 | 	verts[i][8]  = v9;
 67 | 	verts[i][9]  = v10;
 68 | 	verts[i][10] = v11;
 69 | 	verts[i][11] = v12;
 70 | 
 71 | 	verts[i][12] = v13;
 72 | 	verts[i][13] = v14;
 73 | 	verts[i][14] = v15;
 74 | 	verts[i][15] = v16;
 75 | }
 76 | 
 77 | void Bicubic::add_time_sample(std::array<Vec3, 16> patch) {
 78 | 	verts.emplace_back(patch);
 79 | }
 80 | 
 81 | 
 82 | void Bicubic::finalize() {
 83 | 	// Calculate bounds
 84 | 	bbox.resize(verts.size());
 85 | 	for (size_t time = 0; time < verts.size(); time++) {
 86 | 		bbox[time] = bound(verts[time]);
 87 | 
 88 | 		// Extend bounds for displacements
 89 | 		for (int i = 0; i < 3; i++) {
 90 | 			bbox[time].min[i] -= Config::displace_distance;
 91 | 			bbox[time].max[i] += Config::displace_distance;
 92 | 		}
 93 | 	}
 94 | }
 95 | 
 96 | 
 97 | const std::vector<BBox> &Bicubic::bounds() const {
 98 | 	return bbox;
 99 | }
100 | 
101 | 


--------------------------------------------------------------------------------
/object/bilinear.cpp:
--------------------------------------------------------------------------------
 1 | #include "numtype.h"
 2 | 
 3 | #include <algorithm>
 4 | #include <iostream>
 5 | #include <limits>
 6 | #include <stdlib.h>
 7 | #include <cmath>
 8 | #include "bilinear.hpp"
 9 | #include "config.hpp"
10 | #include "global.hpp"
11 | 
12 | 
13 | 
14 | 
15 | Bilinear::Bilinear(Vec3 v1, Vec3 v2, Vec3 v3, Vec3 v4) {
16 | 	verts.push_back({{v1,v2,v3,v4}});
17 | }
18 | 
19 | void Bilinear::finalize() {
20 | 	// Calculate bounds
21 | 	bbox.resize(verts.size());
22 | 	for (size_t time = 0; time < verts.size(); time++) {
23 | 		bbox[time] = bound(verts[time]);
24 | 
25 | 		// Extend bounds for displacements
26 | 		for (int i = 0; i < 3; i++) {
27 | 			bbox[time].min[i] -= Config::displace_distance;
28 | 			bbox[time].max[i] += Config::displace_distance;
29 | 		}
30 | 	}
31 | }
32 | 
33 | 
34 | void Bilinear::add_time_sample(Vec3 v1, Vec3 v2, Vec3 v3, Vec3 v4) {
35 | 	verts.push_back({{v1,v2,v3,v4}});
36 | }
37 | 
38 | 
39 | const std::vector<BBox> &Bilinear::bounds() const {
40 | 	return bbox;
41 | }
42 | 


--------------------------------------------------------------------------------
/object/bilinear.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef BILINEAR_HPP
  2 | #define BILINEAR_HPP
  3 | 
  4 | #include "numtype.h"
  5 | 
  6 | #include <vector>
  7 | #include <array>
  8 | #include "utils.hpp"
  9 | #include "stack.hpp"
 10 | #include "vector.hpp"
 11 | #include "object.hpp"
 12 | 
 13 | /*
 14 |  * A bilinear patch.
 15 |  * Vertices arranged like this:
 16 |  *     u-->
 17 |  *   v1----v2
 18 |  * v  |    |
 19 |  * | v3----v4
 20 |  * \/
 21 |  */
 22 | class Bilinear final: public PatchSurface {
 23 | public:
 24 | 	std::vector<std::array<Vec3, 4>> verts;
 25 | 	std::vector<BBox> bbox;
 26 | 
 27 | 	Bilinear() {}
 28 | 	Bilinear(Vec3 v1, Vec3 v2, Vec3 v3, Vec3 v4);
 29 | 	virtual ~Bilinear() {}
 30 | 
 31 | 	void finalize();
 32 | 
 33 | 	void add_time_sample(Vec3 v1, Vec3 v2, Vec3 v3, Vec3 v4);
 34 | 
 35 | 	virtual const std::vector<BBox> &bounds() const override;
 36 | 	virtual Color total_emitted_color() const override {
 37 | 		return Color(0.0f);
 38 | 	}
 39 | 
 40 | 
 41 | 	// For being traced by intersect_rays_with_patch() in tracer.cpp
 42 | 	typedef std::array<Vec3, 4> store_type;
 43 | 
 44 | 	static store_type interpolate_patch(float alpha, const store_type& p1, const store_type& p2) {
 45 | 		store_type p3;
 46 | 		for (int i = 0; i < 4; ++i) {
 47 | 			p3[i] = lerp(alpha, p1[i], p2[i]);
 48 | 		}
 49 | 		return p3;
 50 | 	}
 51 | 
 52 | 	__attribute__((always_inline))
 53 | 	static float ulen(const store_type& p) {
 54 | 		return longest_axis(p[0] - p[1]);
 55 | 	}
 56 | 
 57 | 	__attribute__((always_inline))
 58 | 	static float vlen(const store_type& p) {
 59 | 		return longest_axis(p[0] - p[2]);
 60 | 	}
 61 | 
 62 | 	__attribute__((always_inline))
 63 | 	static void split_u(const store_type& p, store_type* p1, store_type* p2) {
 64 | 		(*p2)[0] = (p[0] + p[1]) * 0.5f;
 65 | 		(*p2)[1] = p[1];
 66 | 		(*p2)[2] = (p[2] + p[3]) * 0.5f;
 67 | 		(*p2)[3] = p[3];
 68 | 
 69 | 		(*p1)[0] = p[0];
 70 | 		(*p1)[1] = (p[0] + p[1]) * 0.5f;
 71 | 		(*p1)[2] = p[2];
 72 | 		(*p1)[3] = (p[2] + p[3]) * 0.5f;
 73 | 	}
 74 | 
 75 | 	__attribute__((always_inline))
 76 | 	static void split_v(const store_type& p, store_type* p1, store_type* p2) {
 77 | 		(*p2)[0] = (p[0] + p[2]) * 0.5f;
 78 | 		(*p2)[1] = (p[1] + p[3]) * 0.5f;
 79 | 		(*p2)[2] = p[2];
 80 | 		(*p2)[3] = p[3];
 81 | 
 82 | 		(*p1)[0] = p[0];
 83 | 		(*p1)[1] = p[1];
 84 | 		(*p1)[2] = (p[0] + p[2]) * 0.5f;
 85 | 		(*p1)[3] = (p[1] + p[3]) * 0.5f;
 86 | 	}
 87 | 
 88 | 	static Vec3 eval_p(float u, const Vec3 p0, const Vec3 p1) {
 89 | 		const float b0 = 1.0f - u;
 90 | 		const float b1 = u;
 91 | 
 92 | 		return (p0 * b0) + (p1 * b1);
 93 | 	}
 94 | 
 95 | 	static Vec3 eval_pd(float u, const Vec3 p0, const Vec3 p1) {
 96 | 		const float d0 = -1.0f;
 97 | 		const float d1 = 1.0f;
 98 | 
 99 | 		return (p0 * d0) + (p1 * d1);
100 | 	}
101 | 
102 | 	/**
103 | 	 * Returns <n, dpdu, dpdv, dndu, dndv>
104 | 	 */
105 | 	static std::tuple<Vec3, Vec3, Vec3, Vec3, Vec3> differential_geometry(const store_type& p, float u, float v) {
106 | 		// Calculate first derivatives and surface normal
107 | 		const Vec3 dpdu = eval_pd(u, eval_p(v, p[0], p[2]), eval_p(v, p[1], p[3]));
108 | 		const Vec3 dpdv = eval_pd(v, eval_p(u, p[0], p[1]), eval_p(u, p[2], p[3]));
109 | 		const Vec3 n = cross(dpdv, dpdu).normalized();
110 | 
111 | 		// Calculate second derivatives
112 | 		const Vec3 d2pduu = Vec3(0.0f);
113 | 		const Vec3 d2pduv = eval_pd(v, eval_pd(u, p[0], p[1]), eval_pd(u, p[2], p[3]));
114 | 		const Vec3 d2pdvv = Vec3(0.0f);
115 | 
116 | 		// Calculate surface normal derivatives
117 | 		const float E = dot(dpdu, dpdu);
118 | 		const float F = dot(dpdu, dpdv);
119 | 		const float G = dot(dpdv, dpdv);
120 | 		const float e = dot(n, d2pduu);
121 | 		const float f = dot(n, d2pduv);
122 | 		const float g = dot(n, d2pdvv);
123 | 
124 | 		const float invEGF2 = 1.0f / ((E*G) - (F*F));
125 | 		const Vec3 dndu = (((f*F) - (e*G)) * invEGF2 * dpdu) + (((e*F) - (f*E)) * invEGF2 * dpdv);
126 | 		const Vec3 dndv = (((g*F) - (f*G)) * invEGF2 * dpdu) + (((f*F) - (g*E)) * invEGF2 * dpdv);
127 | 
128 | 		return std::make_tuple(n, dpdu, dpdv, dndu, dndv);
129 | 	}
130 | 
131 | 	__attribute__((always_inline))
132 | 	static BBox bound(const store_type& p) {
133 | 		BBox bb = BBox(p[0], p[0]);;
134 | 
135 | 		for (int i = 1; i < 4; ++i) {
136 | 			bb.min = min(bb.min, p[i]);
137 | 			bb.max = max(bb.max, p[i]);
138 | 		}
139 | 
140 | 		return bb;
141 | 	}
142 | };
143 | 
144 | #endif
145 | 


--------------------------------------------------------------------------------
/object/object.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef OBJECT_HPP
  2 | #define OBJECT_HPP
  3 | 
  4 | #include "numtype.h"
  5 | 
  6 | #include <memory>
  7 | #include <vector>
  8 | #include <iostream>
  9 | #include <stdlib.h>
 10 | #include "stack.hpp"
 11 | #include "ray.hpp"
 12 | #include "intersection.hpp"
 13 | #include "bbox.hpp"
 14 | #include "transform.hpp"
 15 | #include "surface_shader.hpp"
 16 | 
 17 | 
 18 | /**
 19 |  * @brief Base object class, from which all other objects inherit.
 20 |  */
 21 | class Object {
 22 | public:
 23 | 	// Virtual destructor, and don't delete default copy/move constructors
 24 | 	Object() = default;
 25 | 	virtual ~Object() = default;
 26 | 	Object(const Object&) = default;
 27 | 	Object(Object&&) = default;
 28 | 	Object& operator=(const Object&) = default;
 29 | 	Object& operator=(Object&&) = default;
 30 | 
 31 | 	/**
 32 | 	 * @brief An enum type for describing the type of an object.
 33 | 	 */
 34 | 	enum Type {
 35 | 		SURFACE,
 36 | 		COMPLEX_SURFACE,
 37 | 		PATCH_SURFACE,
 38 | 		LIGHT,
 39 | 		ASSEMBLY_INSTANCE
 40 | 	};
 41 | 
 42 | 	// Unique ID, used by Scene and Tracer for various purposes
 43 | 	// Sub-classes should ignore it.
 44 | 	size_t uid;
 45 | 
 46 | 	/**
 47 | 	 * @brief Returns the type of the object.
 48 | 	 */
 49 | 	virtual Type get_type() const = 0;
 50 | 
 51 | 	/**
 52 | 	 * Finalizes an object after parsing is complete, if needed.
 53 | 	 */
 54 | 	virtual void finalize() {}
 55 | 
 56 | 	/**
 57 | 	 * @brief Returns the bounds of the object.
 58 | 	 */
 59 | 	virtual const std::vector<BBox> &bounds() const = 0;
 60 | 
 61 | 	/**
 62 | 	 * Returns the total amount of energy emitted by the object.
 63 | 	 *
 64 | 	 * This does not need to be 100% accurate, as it is only used
 65 | 	 * for sampling decisions.  But it should be approximately
 66 | 	 * correct.
 67 | 	 *
 68 | 	 * TODO: remove this function!  This is NOT where this should be handled.
 69 | 	 * This needs to be handled at a point where the material of the object
 70 | 	 * is known.
 71 | 	 */
 72 | 	virtual Color total_emitted_color() const = 0;
 73 | };
 74 | 
 75 | 
 76 | /**
 77 |  * @brief An interface for traditional surface objects that can be easily
 78 |  * directly tested against a single ray at a time.
 79 |  */
 80 | class Surface: public Object {
 81 | public:
 82 | 	virtual ~Surface() {}
 83 | 
 84 | 	Object::Type get_type() const final {
 85 | 		return Object::SURFACE;
 86 | 	}
 87 | 
 88 | 	/**
 89 | 	 * @brief Tests a ray against the surface.
 90 | 	 */
 91 | 	virtual bool intersect_ray(const Ray &ray, Intersection *intersection=nullptr) = 0;
 92 | };
 93 | 
 94 | 
 95 | /**
 96 |  * @brief An interface for surfaces that require more complex handling
 97 |  * and which require fast scratch memory.
 98 |  */
 99 | class ComplexSurface: public Object {
100 | public:
101 | 	virtual ~ComplexSurface() {}
102 | 
103 | 	Object::Type get_type() const final {
104 | 		return Object::COMPLEX_SURFACE;
105 | 	}
106 | 
107 | 	/**
108 | 	 * @brief Tests a batch of rays against the surface.
109 | 	 */
110 | 	virtual void intersect_rays(Ray* rays_begin, Ray* rays_end,
111 | 	                            Intersection *intersections,
112 | 	                            const Range<const Transform*> parent_xforms,
113 | 	                            Stack* data_stack,
114 | 	                            const SurfaceShader* surface_shader,
115 | 	                            const InstanceID& element_id) const = 0;
116 | };
117 | 
118 | 
119 | /**
120 |  * @brief An interface for surface patches with inherent UV coordinates, and
121 |  * which can be easily recursively split into smaller patches.
122 |  *
123 |  * Other than defining get_type() there are no methods defined in this class.
124 |  * However, subclasses of this must nevertheless adhere to an interface and
125 |  * provide certain static methods that certain templated functions end up
126 |  * using.  C++14 and earlier are, unfortunately, not able to describe such
127 |  * interfaces.  Hopefully Concepts Lite in C++17 will allow this.  In the mean
128 |  * time, look at the Bilinear and Bicubic classes for examples of the required
129 |  * interface.
130 |  */
131 | class PatchSurface: public Object {
132 | public:
133 | 	virtual ~PatchSurface() {}
134 | 
135 | 	Object::Type get_type() const final {
136 | 		return Object::PATCH_SURFACE;
137 | 	}
138 | };
139 | 
140 | #endif // OBJECT_HPP
141 | 


--------------------------------------------------------------------------------
/object/sphere.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SPHERE_HPP
 2 | #define SPHERE_HPP
 3 | 
 4 | #include "numtype.h"
 5 | 
 6 | #include <vector>
 7 | #include "vector.hpp"
 8 | #include "object.hpp"
 9 | 
10 | /**
11 |  * @brief A sphere primitive.
12 |  *
13 |  * This serves as a simple example of how to implement a surface primitive.
14 |  */
15 | class Sphere final: public Surface {
16 | public:
17 | 	std::vector<Vec3> center;
18 | 	std::vector<float> radius;
19 | 
20 | 	std::vector<BBox> bbox;
21 | 
22 | 	Sphere(Vec3 center_, float radius_);
23 | 	Sphere(uint8_t res_time_);
24 | 	virtual ~Sphere() {};
25 | 
26 | 	void add_time_sample(int samp, Vec3 center_, float radius_);
27 | 
28 | 	void finalize();
29 | 
30 | 	virtual bool intersect_ray(const Ray &ray, Intersection *intersection=nullptr);
31 | 	virtual const std::vector<BBox> &bounds() const;
32 | 	virtual Color total_emitted_color() const override final {
33 | 		return Color(0.0f);
34 | 	}
35 | };
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/object/subdivision_surface.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SUBDIVISION_SURFACE_HPP
 2 | #define SUBDIVISION_SURFACE_HPP
 3 | 
 4 | #include <vector>
 5 | 
 6 | #include "object.hpp"
 7 | #include "intersection.hpp"
 8 | #include "ray.hpp"
 9 | #include "stack.hpp"
10 | #include "vector.hpp"
11 | #include "bbox.hpp"
12 | #include "bilinear.hpp"
13 | #include "bicubic.hpp"
14 | 
15 | class SubdivisionSurface final: public ComplexSurface {
16 | 	struct Node {
17 | 		Range<BBox*> bounds;
18 | 		Node* children[2];
19 | 		Bicubic* leaf_data;
20 | 	};
21 | 
22 | 	void build_bvh();
23 | 	Node* build_bvh_recursive(Node* begin, Node* end);
24 | 
25 | public:
26 | 	// Final data
27 | 	std::vector<Bicubic> patches;
28 | 	std::vector<BBox> bbox;
29 | 	std::vector<Node> bvh_nodes;
30 | 	std::vector<BBox> bvh_bboxes;
31 | 	Node* bvh_root;
32 | 	int max_depth;
33 | 
34 | 	// Intermediate data
35 | 	int depth;
36 | 	int motion_samples = 0;
37 | 	int verts_per_motion_sample = 0;
38 | 	std::vector<Vec3> verts;
39 | 	std::vector<int> face_vert_counts;
40 | 	std::vector<int> face_vert_indices;
41 | 
42 | 	// Construction
43 | 	SubdivisionSurface() {}
44 | 	void set_verts(std::vector<Vec3>&& verts_, int verts_per_motion_sample_) {
45 | 		verts = std::move(verts_);
46 | 		verts_per_motion_sample = verts_per_motion_sample_;
47 | 		motion_samples = verts.size() / verts_per_motion_sample;
48 | 	}
49 | 	void set_face_vert_counts(std::vector<int>&& vert_counts) {
50 | 		face_vert_counts = std::move(vert_counts);
51 | 	}
52 | 	void set_face_vert_indices(std::vector<int>&& vert_indices) {
53 | 		face_vert_indices = std::move(vert_indices);
54 | 	}
55 | 	void finalize();
56 | 
57 | 	virtual const std::vector<BBox> &bounds() const override {
58 | 		return bbox;
59 | 	}
60 | 
61 | 	virtual Color total_emitted_color() const override {
62 | 		return Color(0.0f);
63 | 	}
64 | 
65 | 	virtual void intersect_rays(Ray* rays_begin, Ray* rays_end,
66 | 	                            Intersection *intersections,
67 | 	                            const Range<const Transform*> parent_xforms,
68 | 	                            Stack* data_stack,
69 | 	                            const SurfaceShader* surface_shader,
70 | 	                            const InstanceID& element_id) const override;
71 | 
72 | };
73 | 
74 | #endif // SUBDIVISION_SURFACE_HPP


--------------------------------------------------------------------------------
/parser/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(parser
2 | 	parser data_tree)
3 | 


--------------------------------------------------------------------------------
/parser/data_tree.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef DATA_TREE_HPP
 2 | #define DATA_TREE_HPP
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | 
 7 | namespace DataTree {
 8 | 
 9 | struct Node {
10 | 	std::string type;
11 | 	std::string name;
12 | 	std::vector<Node> children; // If size is zero, indicates the node is a leaf
13 | 	std::string leaf_contents; // Only for leaf nodes
14 | };
15 | 
16 | 
17 | /**
18 |  * Builds a data tree from the contents of a file.
19 |  *
20 |  * Returns the root node.
21 |  */
22 | Node build_from_file(const char* file_path);
23 | 
24 | 
25 | /**
26 |  * Prints a DataTree to the console, for debugging purposes.
27 |  */
28 | void print_tree(const Node& node, const std::string& indent = "");
29 | 
30 | }
31 | 
32 | #endif // DATA_TREE_HPP
33 | 


--------------------------------------------------------------------------------
/parser/parser.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef PARSER_HPP
 2 | #define PARSER_HPP
 3 | 
 4 | #include <string>
 5 | #include <fstream>
 6 | #include <memory>
 7 | 
 8 | #include "data_tree.hpp"
 9 | 
10 | #include "sphere_light.hpp"
11 | #include "rectangle_light.hpp"
12 | #include "bilinear.hpp"
13 | #include "bicubic.hpp"
14 | #include "subdivision_surface.hpp"
15 | #include "sphere.hpp"
16 | 
17 | #include "renderer.hpp"
18 | #include "scene.hpp"
19 | 
20 | 
21 | class Parser {
22 | 	DataTree::Node tree;
23 | 	unsigned int node_index = 0;
24 | 
25 | 	// Methods
26 | 
27 | 	/**
28 | 	* @brief Parses a transform matrix.
29 | 	*/
30 | 	Matrix44 parse_matrix(const std::string line);
31 | 
32 | 	/**
33 | 	* @brief Parses a Camera section.
34 | 	*/
35 | 	std::unique_ptr<Camera> parse_camera(const DataTree::Node& node);
36 | 
37 | 	/**
38 | 	 * @brief Parses an Assembly section.
39 | 	 */
40 | 	std::unique_ptr<Assembly> parse_assembly(const DataTree::Node& node, const Assembly* parent_assembly);
41 | 
42 | 
43 | 	/**
44 | 	 * @brief Parses a bilinear patch section.
45 | 	 */
46 | 	std::unique_ptr<Bilinear> parse_bilinear_patch(const DataTree::Node& node);
47 | 
48 | 	/**
49 | 	 * @brief Parses a bicubic patch section.
50 | 	 */
51 | 	std::unique_ptr<Bicubic> parse_bicubic_patch(const DataTree::Node& node);
52 | 
53 | 	/**
54 | 	 * @brief Parses a subdivision surface section.
55 | 	 */
56 | 	std::unique_ptr<SubdivisionSurface> parse_subdivision_surface(const DataTree::Node& node);
57 | 
58 | 	/**
59 | 	 * @brief Parses a sphere section.
60 | 	 */
61 | 	std::unique_ptr<Sphere> parse_sphere(const DataTree::Node& node);
62 | 
63 | 	/**
64 | 	 * @brief Parses a surface shader section.
65 | 	 */
66 | 	std::unique_ptr<SurfaceShader> parse_surface_shader(const DataTree::Node& node);
67 | 
68 | 	/**
69 | 	 * @brief Parses a sphere light section.
70 | 	 */
71 | 	std::unique_ptr<SphereLight> parse_sphere_light(const DataTree::Node& node);
72 | 
73 | 	/**
74 | 	 * @brief Parses a rectangle light section.
75 | 	 */
76 | 	std::unique_ptr<RectangleLight> parse_rectangle_light(const DataTree::Node& node);
77 | 
78 | public:
79 | 	Parser(std::string input_path) {
80 | 		tree = DataTree::build_from_file(input_path.c_str());
81 | 		//DataTree::print_tree(tree);
82 | 	}
83 | 
84 | 	/**
85 | 	 * @brief Parses the next frame in the file, and returns the
86 | 	 * resulting scene, ready for rendering.
87 | 	 */
88 | 	std::unique_ptr<Renderer> parse_next_frame();
89 | };
90 | 
91 | #endif // PARSER_HPP
92 | 


--------------------------------------------------------------------------------
/parser/utf8.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef UTF8_HPP
 2 | #define UTF8_HPP
 3 | 
 4 | #include <string>
 5 | #include <exception>
 6 | 
 7 | class utf8_parse_error: std::exception {
 8 | public:
 9 | 	virtual const char* what() const noexcept {
10 | 		return "Invalid UTF8 sequence.";
11 | 	}
12 | };
13 | 
14 | /**
15 |  * Fetches a single, complete UTF8 code point, returning it as a std::string.
16 |  * Returns an empty string on a malformed codepoint.
17 |  *
18 |  * @param in  Reference to a const string iterator where the parsing is to begin.
19 |  * @param end Reference to the corresponding end iterator for the string.
20 |  *
21 |  * Throws a utf8_parse_error exception on malformed utf8 input.
22 |  */
23 | static inline std::string cur_utf8(const std::string::const_iterator& in, const std::string::const_iterator& end) {
24 | 	const unsigned char* c = reinterpret_cast<const unsigned char*>(&(*in));
25 | 
26 | 	if (in == end)
27 | 		return std::string("");
28 | 
29 | 	// Determine the length of the encoded codepoint
30 | 	int len = 0;
31 | 	if (c[0] < 0b10000000)
32 | 		len = 1;
33 | 	else if (c[0] < 0b11000000)
34 | 		throw utf8_parse_error {}; // Malformed: continuation byte as first byte
35 | 	else if (c[0] < 0b11100000)
36 | 		len = 2;
37 | 	else if (c[0] < 0b11110000)
38 | 		len = 3;
39 | 	else if (c[0] < 0b11111000)
40 | 		len = 4;
41 | 	else
42 | 		throw utf8_parse_error {}; // Malformed: current utf8 standard only allows up to four bytes
43 | 
44 | 	if (len == 0 || len > (end-in))
45 | 		throw utf8_parse_error {}; // Malformed: not enough bytes
46 | 
47 | 	// Read the rest of the bytes of the codepoint,
48 | 	// making sure they're proper continuation bytes
49 | 	for (int i = 1; i < len; ++i) {
50 | 		if ((c[i] & 0b11000000) != 0b10000000)
51 | 			throw utf8_parse_error {}; // Malformed: not a continuation byte
52 | 	}
53 | 
54 | 	// Success!
55 | 	return std::string(in, in+len);
56 | }
57 | 
58 | /**
59 |  * Like cur_utf8, except it advances the string iterator after parsing the token.
60 |  */
61 | static inline std::string next_utf8(std::string::const_iterator& in, const std::string::const_iterator& end) {
62 | 	std::string c = cur_utf8(in, end);
63 | 
64 | 	in += c.length();
65 | 
66 | 	return c;
67 | }
68 | 
69 | #endif // UTF8_HPP


--------------------------------------------------------------------------------
/psychoblend/__init__.py:
--------------------------------------------------------------------------------
  1 | bl_info = {
  2 |     "name": "PsychoBlend",
  3 |     "version": (0, 1),
  4 |     "author": "Nathan Vegdahl",
  5 |     "blender": (2, 70, 0),
  6 |     "description": "Psychopath renderer integration",
  7 |     "location": "",
  8 |     "wiki_url": "https://github.com/cessen/psychopath/wiki",
  9 |     "tracker_url": "https://github.com/cessen/psychopath/issues",
 10 |     "category": "Render"}
 11 | 
 12 | 
 13 | if "bpy" in locals():
 14 |     import imp
 15 |     imp.reload(ui)
 16 |     imp.reload(psy_export)
 17 |     imp.reload(render)
 18 | else:
 19 |     from . import ui, psy_export, render
 20 | 
 21 | import bpy
 22 | from bpy.types import (AddonPreferences,
 23 |                        PropertyGroup,
 24 |                        Operator,
 25 |                        )
 26 | from bpy.props import (StringProperty,
 27 |                        BoolProperty,
 28 |                        IntProperty,
 29 |                        FloatProperty,
 30 |                        FloatVectorProperty,
 31 |                        EnumProperty,
 32 |                        PointerProperty,
 33 |                        )
 34 | 
 35 | 
 36 | # Custom Scene settings
 37 | class RenderPsychopathSettingsScene(PropertyGroup):
 38 |     spp = IntProperty(
 39 |         name="Samples Per Pixel", description="Total number of samples to take per pixel",
 40 |         min=1, max=65536, default=16
 41 |         )
 42 | 
 43 |     dicing_rate = FloatProperty(
 44 |         name="Dicing Rate", description="The target microgeometry width in pixels",
 45 |         min=0.0001, max=100.0, soft_min=0.125, soft_max=1.0, default=0.25
 46 |         )
 47 | 
 48 |     motion_blur_segments = IntProperty(
 49 |         name="Motion Segments", description="The number of segments to use in motion blur.  Zero means no motion blur.  Will be rounded down to the nearest power of two.",
 50 |         min=0, max=256, default=0
 51 |         )
 52 | 
 53 |     shutter_start = FloatProperty(
 54 |         name="Shutter Open", description="The time during the frame that the shutter opens, for motion blur",
 55 |         min=-1.0, max=1.0, soft_min=0.0, soft_max=1.0, default=0.0
 56 |         )
 57 | 
 58 |     shutter_end = FloatProperty(
 59 |         name="Shutter Close", description="The time during the frame that the shutter closes, for motion blur",
 60 |         min=-1.0, max=1.0, soft_min=0.0, soft_max=1.0, default=0.5
 61 |         )
 62 | 
 63 |     export_path = StringProperty(
 64 |         name="Export Path", description="The path to where the .psy files should be exported when rendering.  If left blank, /tmp or the equivalent is used.",
 65 |         subtype='FILE_PATH'
 66 |         )
 67 | 
 68 | # Custom Camera properties
 69 | class PsychopathCamera(bpy.types.PropertyGroup):
 70 |     aperture_radius = FloatProperty(
 71 |         name="Aperture Radius", description="Size of the camera's aperture, for DoF",
 72 |         min=0.0, max=10000.0, soft_min=0.0, soft_max=2.0, default=0.0
 73 |         )
 74 | 
 75 | # Custom Mesh properties
 76 | class PsychopathMesh(bpy.types.PropertyGroup):
 77 |     is_subdivision_surface = BoolProperty(
 78 |         name="Is Subdivision Surface", description="Whether this is a sibdivision surface or just a normal mesh",
 79 |         default=False
 80 |         )
 81 | 
 82 | # Psychopath material
 83 | class PsychopathMaterial(bpy.types.PropertyGroup):
 84 |     surface_shader_type = EnumProperty(
 85 |         name="Surface Shader Type", description="",
 86 |         items=[('Emit', 'Emit', ""), ('Lambert', 'Lambert', ""), ('GTR', 'GTR', "")],
 87 |         default="Lambert"
 88 |         )
 89 |         
 90 |     color = FloatVectorProperty(
 91 |         name="Color", description="",
 92 |         subtype='COLOR',
 93 |         min=0.0, soft_min=0.0, soft_max = 1.0,
 94 |         default=[0.8,0.8,0.8]
 95 |         )
 96 |     
 97 |     roughness = FloatProperty(
 98 |         name="Roughness", description="",
 99 |         min=-1.0, max=1.0, soft_min=0.0, soft_max=1.0, default=0.1
100 |         )
101 |     
102 |     tail_shape = FloatProperty(
103 |         name="Tail Shape", description="",
104 |         min=0.0, max=8.0, soft_min=1.0, soft_max=3.0, default=2.0
105 |         )
106 |     
107 |     fresnel = FloatProperty(
108 |         name="Fresnel", description="",
109 |         min=0.0, max=1.0, soft_min=0.0, soft_max=1.0, default=0.9
110 |         )
111 | 
112 | 
113 | # Addon Preferences
114 | class PsychopathPreferences(AddonPreferences):
115 |     bl_idname = __name__
116 | 
117 |     filepath_psychopath = StringProperty(
118 |                 name="Psychopath Location",
119 |                 description="Path to renderer executable",
120 |                 subtype='DIR_PATH',
121 |                 )
122 | 
123 |     def draw(self, context):
124 |         layout = self.layout
125 |         layout.prop(self, "filepath_psychopath")
126 | 
127 | 
128 | ##### REGISTER #####
129 | def register():
130 |     bpy.utils.register_class(PsychopathPreferences)
131 |     bpy.utils.register_class(RenderPsychopathSettingsScene)
132 |     bpy.utils.register_class(PsychopathCamera)
133 |     bpy.utils.register_class(PsychopathMesh)
134 |     bpy.utils.register_class(PsychopathMaterial)
135 |     bpy.types.Scene.psychopath = PointerProperty(type=RenderPsychopathSettingsScene)
136 |     bpy.types.Camera.psychopath = PointerProperty(type=PsychopathCamera)
137 |     bpy.types.Mesh.psychopath = PointerProperty(type=PsychopathMesh)
138 |     bpy.types.Material.psychopath = PointerProperty(type=PsychopathMaterial)
139 |     render.register()
140 |     ui.register()
141 | 
142 | 
143 | def unregister():
144 |     bpy.utils.unregister_class(PsychopathPreferences)
145 |     bpy.utils.unregister_class(RenderPsychopathSettingsScene)
146 |     bpy.utils.unregister_class(PsychopathCamera)
147 |     bpy.utils.unregister_class(PsychopathMesh)
148 |     bpy.utils.unregister_class(PsychopathMaterial)
149 |     del bpy.types.Scene.psychopath
150 |     del bpy.types.Camera.psychopath
151 |     del bpy.types.Mesh.psychopath
152 |     del bpy.types.Material.psychopath
153 |     render.unregister()
154 |     ui.unregister()
155 | 


--------------------------------------------------------------------------------
/psychoblend/render.py:
--------------------------------------------------------------------------------
  1 | import bpy
  2 | import time
  3 | import os
  4 | import subprocess
  5 | import tempfile
  6 | from . import psy_export
  7 | 
  8 | def get_temp_filename(suffix=""):
  9 |     tmpf = tempfile.mkstemp(suffix=suffix, prefix='tmp')
 10 |     os.close(tmpf[0])
 11 |     return(tmpf[1])
 12 | 
 13 | class PsychopathRender(bpy.types.RenderEngine):
 14 |     bl_idname = 'PSYCHOPATH_RENDER'
 15 |     bl_label = "Psychopath"
 16 |     DELAY = 1.0
 17 | 
 18 |     @staticmethod
 19 |     def _locate_binary():
 20 |         addon_prefs = bpy.context.user_preferences.addons[__package__].preferences
 21 | 
 22 |         # Use the system preference if its set.
 23 |         psy_binary = addon_prefs.filepath_psychopath
 24 |         if psy_binary:
 25 |             if os.path.exists(psy_binary):
 26 |                 return psy_binary
 27 |             else:
 28 |                 print("User Preference to psychopath %r NOT FOUND, checking $PATH" % psy_binary)
 29 | 
 30 |         # search the path all os's
 31 |         psy_binary_default = "psychopath"
 32 | 
 33 |         os_path_ls = os.getenv("PATH").split(':') + [""]
 34 | 
 35 |         for dir_name in os_path_ls:
 36 |             psy_binary = os.path.join(dir_name, psy_binary_default)
 37 |             if os.path.exists(psy_binary):
 38 |                 return psy_binary
 39 |         return ""
 40 | 
 41 |     def _export(self, scene, export_path, render_image_path):
 42 |         exporter = psy_export.PsychoExporter(scene)
 43 |         exporter.export_psy(export_path, render_image_path)
 44 | 
 45 |     def _render(self, scene, psy_filepath):
 46 |         psy_binary = PsychopathRender._locate_binary()
 47 |         if not psy_binary:
 48 |             print("Psychopath: could not execute psychopath, possibly Psychopath isn't installed")
 49 |             return False
 50 | 
 51 |         # TODO: figure out command line options
 52 |         args = ["-i", psy_filepath]
 53 | 
 54 |         # Start Rendering!
 55 |         try:
 56 |             self._process = subprocess.Popen([psy_binary] + args,
 57 |                                              stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 58 |         except OSError:
 59 |             # TODO, report api
 60 |             print("Psychopath: could not execute '%s'" % psy_binary)
 61 |             import traceback
 62 |             traceback.print_exc()
 63 |             print ("***-DONE-***")
 64 |             return False
 65 | 
 66 |         return True
 67 | 
 68 | 
 69 |     def _cleanup(self):
 70 |         # for f in (self._temp_file_in, self._temp_file_ini, self._temp_file_out):
 71 |         #     for i in range(5):
 72 |         #         try:
 73 |         #             os.unlink(f)
 74 |         #             break
 75 |         #         except OSError:
 76 |         #             # Wait a bit before retrying file might be still in use by Blender,
 77 |         #             # and Windows does not know how to delete a file in use!
 78 |         #             time.sleep(self.DELAY)
 79 |         # for i in unpacked_images:
 80 |         #     for c in range(5):
 81 |         #         try:
 82 |         #             os.unlink(i)
 83 |         #             break
 84 |         #         except OSError:
 85 |         #             # Wait a bit before retrying file might be still in use by Blender,
 86 |         #             # and Windows does not know how to delete a file in use!
 87 |         #             time.sleep(self.DELAY)
 88 |         pass
 89 | 
 90 |     def render(self, scene):
 91 |         # has to be called to update the frame on exporting animations
 92 |         scene.frame_set(scene.frame_current)
 93 | 
 94 |         export_path = scene.psychopath.export_path
 95 |         if export_path != "":
 96 |             export_path += "_%d.psy" % scene.frame_current
 97 |         else:
 98 |             # Create a temporary file for exporting
 99 |             export_path = get_temp_filename('.psy')
100 | 
101 |         # Create a temporary file to render into
102 |         render_image_path = get_temp_filename('.png')
103 | 
104 |         # start export
105 |         self.update_stats("", "Psychopath: Exporting data from Blender")
106 |         self._export(scene, export_path, render_image_path)
107 | 
108 |         # Start rendering
109 |         self.update_stats("", "Psychopath: Rendering from exported file")
110 |         if not self._render(scene, export_path):
111 |             self.update_stats("", "Psychopath: Not found")
112 |             return
113 | 
114 |         r = scene.render
115 |         # compute resolution
116 |         x = int(r.resolution_x * r.resolution_percentage)
117 |         y = int(r.resolution_y * r.resolution_percentage)
118 | 
119 |         result = self.begin_result(0, 0, x, y)
120 |         lay = result.layers[0]
121 | 
122 |         # TODO: Update viewport with render result while rendering
123 |         while self._process.poll() == None:
124 |             # Wait for self.DELAY seconds, but check for render cancels
125 |             # while waiting.
126 |             t = 0.0
127 |             while t < self.DELAY:
128 |                 if self.test_break():
129 |                     self._process.terminate()
130 |                     break
131 |                 time.sleep(0.05)
132 |                 t += 0.05
133 |             # # Update viewport image with latest render output
134 |             # if os.path.exists(render_image_path):
135 |             #     # This assumes the file has been fully written We wait a bit, just in case!
136 |             #     try:
137 |             #         lay.load_from_file(render_image_path)
138 |             #         self.update_result(result)
139 |             #     except RuntimeError:
140 |             #         pass
141 | 
142 |         # Load final image
143 |         lay.load_from_file(render_image_path)
144 |         self.end_result(result)
145 | 
146 |         # Delete temporary image file
147 |         os.remove(render_image_path)
148 | 
149 | def register():
150 |     bpy.utils.register_class(PsychopathRender)
151 | 
152 | def unregister():
153 |     bpy.utils.unregister_class(PsychopathRender)
154 | 


--------------------------------------------------------------------------------
/renderer/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(renderer
2 |             renderer)
3 | 


--------------------------------------------------------------------------------
/renderer/renderer.cpp:
--------------------------------------------------------------------------------
  1 | #include "renderer.hpp"
  2 | 
  3 | #include <functional>
  4 | #include <memory>
  5 | 
  6 | #include <OpenImageIO/imageio.h>
  7 | 
  8 | #include "numtype.h"
  9 | 
 10 | #include "timer.hpp"
 11 | 
 12 | #include "rng.hpp"
 13 | #include "integrator.hpp"
 14 | #include "path_trace_integrator.hpp"
 15 | #include "tracer.hpp"
 16 | #include "scene.hpp"
 17 | #include "film.hpp"
 18 | 
 19 | #include "config.hpp"
 20 | #include "global.hpp"
 21 | 
 22 | #define GAMMA 2.2
 23 | 
 24 | void write_png_from_film(Film *image, std::string path, float min_time=4.0) {
 25 | 	static Timer<> timer;
 26 | 
 27 | 	if ((timer.time() > min_time || min_time == 0.0f) && !Config::no_output) {
 28 | 		timer.reset();
 29 | 
 30 | 		// Convert to dithered sRGB
 31 | 		std::vector<uint8_t> im {image->scanline_image_8bbc()};
 32 | 		// Save image
 33 | 		std::unique_ptr<OpenImageIO::ImageOutput> out {OpenImageIO::ImageOutput::create(".png")};
 34 | 		if (!out) {
 35 | 			return;
 36 | 		}
 37 | 		OpenImageIO::ImageSpec spec(image->width, image->height, 3, OpenImageIO::TypeDesc::UINT8);
 38 | 		out->open(path, spec);
 39 | 		out->write_image(OpenImageIO::TypeDesc::UINT8, &(im[0]));
 40 | 		out->close();
 41 | 	}
 42 | }
 43 | 
 44 | 
 45 | bool Renderer::render(int thread_count) {
 46 | 	Timer<> timer; // Start timer
 47 | 
 48 | 	// Clear rendering statistics
 49 | 	Global::Stats::clear();
 50 | 
 51 | 	RNG rng;
 52 | 	std::unique_ptr<Film> image {new Film(res_x, res_y,
 53 | 		                                      -1.0, -((static_cast<float>(res_y))/res_x),
 54 | 		                                      1.0, ((static_cast<float>(res_y))/res_x))
 55 | 	};
 56 | 	image->si_x1 = subimage_x1;
 57 | 	image->si_y1 = subimage_y1;
 58 | 	image->si_x2 = subimage_x2;
 59 | 	image->si_y2 = subimage_y2;
 60 | 
 61 | 	// Save blank image before rendering
 62 | 	write_png_from_film(image.get(), output_path, 0.0f);
 63 | 
 64 | 	// Image writer callback
 65 | 	std::function<void()> image_writer = std::bind(write_png_from_film, image.get(), output_path, 10.0);
 66 | 
 67 | 	PathTraceIntegrator integrator(scene.get(), image.get(), spp, spp_max, variance_max, seed, thread_count, image_writer);
 68 | 
 69 | 	std::cout << "Integrator prep time (seconds): " << timer.time() << std::endl;
 70 | 	timer.reset();
 71 | 
 72 | 	std::cout << "Rendering" << std::flush;
 73 | 	integrator.integrate();
 74 | 	std::cout << std::endl;
 75 | 
 76 | 
 77 | 	// Save image
 78 | 	write_png_from_film(image.get(), output_path, 0.0f);
 79 | 
 80 | #if 0
 81 | 	// Print statistics
 82 | 
 83 | 	std::cout << "Rays shot while rendering: " << Global::Stats::rays_shot << std::endl;
 84 | #ifdef GLOBAL_STATS_TOP_LEVEL_BVH_NODE_TESTS
 85 | 	std::cout << "Top-level BVH node tests: " << Global::Stats::top_level_bvh_node_tests << std::endl;
 86 | #endif
 87 | 	std::cout << "Primitive-ray tests during rendering: " << Global::Stats::primitive_ray_tests << std::endl;
 88 | 	std::cout << "Splits during rendering: " << Global::Stats::split_count << std::endl;
 89 | 	std::cout << "MicroSurface cache misses during rendering: " << Global::Stats::cache_misses << std::endl;
 90 | 	std::cout << "NaN's encountered: " <<  Global::Stats::nan_count << std::endl;
 91 | 	std::cout << "Bad Inf's encountered: " <<  Global::Stats::inf_count << std::endl;
 92 | #endif
 93 | 
 94 | 	std::cout << "Render time (seconds): " << timer.time() << std::endl;
 95 | 
 96 | 
 97 | 	// Finished
 98 | 	return true;
 99 | }
100 | 


--------------------------------------------------------------------------------
/renderer/renderer.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file and renderer.cpp define a Renderer class, which serves as
 3 |  * as the API for setting up, running, and controlling a render.
 4 |  */
 5 | #ifndef RENDERER_HPP
 6 | #define RENDERER_HPP
 7 | 
 8 | #include "numtype.h"
 9 | #include <memory>
10 | #include <string>
11 | #include <vector>
12 | #include <OpenImageIO/imageio.h>
13 | 
14 | #include "scene.hpp"
15 | 
16 | /**
17 |  * @brief Manages a render.
18 |  *
19 |  * The Renderer is responsible for doing the actual rendering.  It is given
20 |  * 3d scene that has already been setup, and it dives in and tears it
21 |  * to pieces.  The result is an image or images.
22 |  *
23 |  * The Renderer is responsible for knowing:
24 |  * - Where to output the render result (e.g. to a file, or files, to
25 |  *   another process...)
26 |  * - What "passes" to output (light path expressions) and in what format.
27 |  * - What resolution to render with.
28 |  * - How to manage resources during rendering (number of threads to use, RAM
29 |  *   usage limits, max grid size, bucket size, ray buffer size...)
30 |  * - Render quality settings (number of samples, adaptive sampling settings,
31 |  *   dicing rate, color clamping...).
32 |  *
33 |  * Essentially, anything that is not part of the scene description is entirely
34 |  * the responsibility of the renderer.
35 |  */
36 | class Renderer {
37 | private:
38 | 	uint res_x, res_y;
39 | 	uint subimage_x1, subimage_y1, subimage_x2, subimage_y2;
40 | 	uint spp;
41 | 	uint spp_max;
42 | 	float variance_max;
43 | 	uint seed;
44 | 	std::string output_path;
45 | 
46 | public:
47 | 	std::unique_ptr<Scene> scene;
48 | 
49 | 	Renderer(Scene *scene, uint res_x, uint res_y, uint spp, uint spp_max, float variance_max, uint seed, std::string output_path):
50 | 		res_x {res_x},
51 | 	      res_y {res_y},
52 | 	      subimage_x1 {0}, subimage_y1 {0}, subimage_x2 {res_x}, subimage_y2 {res_y},
53 | 	      spp {spp},
54 | 	      spp_max {spp_max},
55 | 	      variance_max {variance_max},
56 | 	      seed {seed},
57 | 	      output_path {output_path},
58 | 	      scene {scene}
59 | 	{}
60 | 
61 | 	void set_resolution(int res_x_, int res_y_) {
62 | 		res_x = res_x_;
63 | 		res_y = res_y_;
64 | 	}
65 | 
66 | 	void set_subimage(int subimage_x1_, int subimage_y1_, int subimage_x2_, int subimage_y2_) {
67 | 		subimage_x1 = subimage_x1_;
68 | 		subimage_y1 = subimage_y1_;
69 | 		subimage_x2 = subimage_x2_;
70 | 		subimage_y2 = subimage_y2_;
71 | 	}
72 | 
73 | 	void set_spp(int spp_) {
74 | 		spp = spp_;
75 | 	}
76 | 
77 | 	void set_spp_max(int spp_max_) {
78 | 		spp_max = spp_max_;
79 | 	}
80 | 
81 | 	void set_variance_max(float variance_max_) {
82 | 		variance_max = variance_max_;
83 | 	}
84 | 
85 | 	// Starts a render with the given number of threads.
86 | 	bool render(int thread_count=1);
87 | };
88 | 
89 | 
90 | 
91 | #endif // RENDERER_HPP
92 | 
93 | 


--------------------------------------------------------------------------------
/sampling/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(sampling
2 |     image_sampler sobol halton)
3 | 


--------------------------------------------------------------------------------
/sampling/halton.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2012 Leonhard Gruenschloss (leonhard@gruenschloss.org)
 2 | //
 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | // of this software and associated documentation files (the "Software"), to deal
 5 | // in the Software without restriction, including without limitation the rights to
 6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 7 | // of the Software, and to permit persons to whom the Software is furnished to do
 8 | // so, subject to the following conditions:
 9 | //
10 | // The above copyright notice and this permission notice shall be included in
11 | // all copies or substantial portions of the Software.
12 | //
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | // SOFTWARE.
20 | 
21 | #ifndef HALTON_HPP
22 | #define HALTON_HPP
23 | 
24 | #include "numtype.h"
25 | 
26 | namespace Halton {
27 | 
28 | uint32_t max_dimension();
29 | 
30 | float sample(const uint32_t dimension, const uint32_t index);
31 | 
32 | }
33 | 
34 | #endif // HALTON_HPP
35 | 
36 | 


--------------------------------------------------------------------------------
/sampling/image_sampler.cpp:
--------------------------------------------------------------------------------
  1 | #include "numtype.h"
  2 | 
  3 | #include "halton.hpp"
  4 | #include "rng.hpp"
  5 | #include "image_sampler.hpp"
  6 | #include "hilbert.hpp"
  7 | #include "morton.hpp"
  8 | 
  9 | #include <array>
 10 | #include <limits.h>
 11 | #include <stdlib.h>
 12 | #include <iostream>
 13 | #include <math.h>
 14 | #include <algorithm>
 15 | 
 16 | 
 17 | ImageSampler::ImageSampler(uint spp,
 18 |                            uint res_x, uint res_y,
 19 |                            uint seed):
 20 | 	spp {spp}, res_x {res_x}, res_y {res_y}, rng {seed}, hash {seed}, seed {seed} {
 21 | 
 22 | 	x = 0;
 23 | 	y = 0;
 24 | 	s = 0;
 25 | 
 26 | 	samp_taken = 0;
 27 | 	tot_samp = spp * res_x * res_y;
 28 | 
 29 | 	// Determine square power of two resolution to cover entire image
 30 | 	uint dim = res_x > res_y ? res_x : res_y;
 31 | 	uint curve_order = 1;
 32 | 	curve_res = 2;
 33 | 	while (curve_res < dim) {
 34 | 		curve_res <<= 1;
 35 | 		curve_order++;
 36 | 	}
 37 | 	points_traversed = 0;
 38 | }
 39 | 
 40 | 
 41 | ImageSampler::~ImageSampler() {
 42 | }
 43 | 
 44 | 
 45 | void ImageSampler::get_sample(uint32_t x, uint32_t y, uint32_t d, uint32_t ns, float *sample, uint16_t *coords) {
 46 | 	if (coords != nullptr) {
 47 | 		coords[0] = x;
 48 | 		coords[1] = y;
 49 | 	}
 50 | 
 51 | 	static const std::array<size_t, 10> d_order {{7, 6, 5, 4, 2, 9, 8, 3, 1, 0}}; // Reorder the first several dimensions for least image variance
 52 | 
 53 | 	// Hash the x and y indices of the pixel and use that as an offset
 54 | 	// into the LDS sequence.  This gives the image a more random appearance
 55 | 	// before converging, which is less distracting than the LDS patterns.
 56 | 	// But since within each pixel the samples are contiguous LDS sequences
 57 | 	// this still gives very good convergence properties.
 58 | 	// This also means that each pixel can keep drawing samples in a
 59 | 	// "bottomless" kind of way, which is nice for e.g. adaptive sampling.
 60 | 	uint32_t h = x ^ ((y >> 16) | (y << 16));
 61 | 	const uint32_t samp_i = d + hash.get_int(h);
 62 | 
 63 | 	// Generate the sample
 64 | 	size_t i = 0;
 65 | 	for (; i < ns && i < d_order.size(); ++i)
 66 | 		sample[i] = Halton::sample(d_order[i], samp_i);
 67 | 	for (; i < ns; ++i)
 68 | 		sample[i] = Halton::sample(i, samp_i);
 69 | }
 70 | 
 71 | 
 72 | /**
 73 |  * @brief Itteratively produces samples for an image.
 74 |  *
 75 |  * It provides x, y, u, v, and t coordinates always.
 76 |  * On top of that, additional coordinates can be requested via the ns
 77 |  * parameter.
 78 |  *
 79 |  * @param[out] sample A pointer where the sample is stored.
 80 |  * @param ns The number of additional coordinates to provide.
 81 |  */
 82 | //#define PROGRESSIVE_SAMPLING
 83 | #ifndef PROGRESSIVE_SAMPLING
 84 | bool ImageSampler::get_next_sample(uint32_t ns, float *sample, uint16_t *coords) {
 85 | 	//std::cout << s << " " << x << " " << y << std::endl;
 86 | 	// Check if we're done
 87 | 	if (points_traversed >= (curve_res*curve_res))
 88 | 		return false;
 89 | 
 90 | 	get_sample(x, y, s, ns, sample, coords);
 91 | 
 92 | 	// increment to next sample
 93 | 	samp_taken++;
 94 | 	s++;
 95 | 	if (s >= spp) {
 96 | 		s = 0;
 97 | 
 98 | 		// Space-filling curve traverses pixels
 99 | 		do {
100 | 			Morton::d2xy(points_traversed, &x, &y);
101 | 			points_traversed++;
102 | 			if (points_traversed >= (curve_res*curve_res))
103 | 				return false;
104 | 		} while (x >= res_x || y >= res_y);
105 | 	}
106 | 
107 | 	return true;
108 | }
109 | #else
110 | bool ImageSampler::get_next_sample(uint32_t ns, float *sample, uint16_t *coords) {
111 | 	//std::cout << s << " " << x << " " << y << std::endl;
112 | 	// Check if we're done
113 | 	if (points_traversed >= (curve_res*curve_res) && s >= spp)
114 | 		return false;
115 | 
116 | 	get_sample(x, y, s, ns, sample, coords);
117 | 
118 | 	samp_taken++;
119 | 
120 | 	// Space-filling curve traverses pixels
121 | 	do {
122 | 		Morton::d2xy(points_traversed, &x, &y);
123 | 		points_traversed++;
124 | 		if (points_traversed >= (curve_res*curve_res)) {
125 | 			x = y = points_traversed = 0;
126 | 
127 | 			// increment to next sample
128 | 			s++;
129 | 			if (s >= spp)
130 | 				return false;
131 | 		}
132 | 	} while (x >= res_x || y >= res_y);
133 | 
134 | 
135 | 	return true;
136 | }
137 | #endif
138 | 


--------------------------------------------------------------------------------
/sampling/image_sampler.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef IMAGE_SAMPLER_HPP
 2 | #define IMAGE_SAMPLER_HPP
 3 | 
 4 | #include "numtype.h"
 5 | 
 6 | #include "halton.hpp"
 7 | #include "rng.hpp"
 8 | #include "hash.hpp"
 9 | #include <vector>
10 | 
11 | 
12 | 
13 | /**
14 |  * A sampler for a single "item" which requires a multi-dimensional sample.
15 |  */
16 | struct Sampler {
17 | 	uint32_t offset;
18 | 	uint32_t dim = 0;
19 | 
20 | 	Sampler(): offset {0} {}
21 | 	Sampler(uint32_t x, uint32_t y, uint32_t n, uint32_t seed) {
22 | 		offset = hash_u32(x ^ ((y >> 16) | (y << 16)), seed) + n;
23 | 	}
24 | 
25 | 	float get_sample(const uint32_t dimension) const {
26 | 		static const std::array<size_t, 11> d_order {{10, 7, 6, 5, 4, 2, 9, 8, 3, 1, 0}}; // Reorder the first several dimensions for least image variance
27 | 
28 | 		if (dimension < d_order.size()) {
29 | 			return Halton::sample(d_order[dimension], offset);
30 | 		} else {
31 | 			return Halton::sample(dimension, offset);
32 | 		}
33 | 	}
34 | 
35 | 	float next() {
36 | 		return get_sample(dim++);
37 | 	}
38 | };
39 | 
40 | 
41 | /*
42 |  * An image sampler.  Returns samples for use by the renderer.
43 |  * Image plane <x,y> samples are returned on the [0,1] square, + edge buffer for filtering.
44 |  * Lens <u,v> samples are returned on the [0,1) square.
45 |  * Time samples are returned on the [0,1) line.
46 |  * All 1d, 2d, and 3d samples are returned on the [0,1) line, square,
47 |  * and cube respectively.
48 |  * The renderer is expected to transform sample ranges as necessary.
49 |  */
50 | class ImageSampler {
51 | private:
52 | 	/* General settings. */
53 | 	uint spp;  // Approximate number of samples per pixel
54 | 	uint res_x, res_y;  // Image resolution in pixels
55 | 
56 | 	/* State information. */
57 | 	uint curve_res; // Space filling curve resolution
58 | 	uint points_traversed;
59 | 	uint32_t x, y, s;
60 | 
61 | 	/* For reporting percentages. */
62 | 	uint samp_taken;
63 | 	uint tot_samp;
64 | 
65 | 	/* Random number generator. */
66 | 	RNG rng;
67 | 	Hash hash;
68 | 	uint32_t seed;
69 | 
70 | public:
71 | 	ImageSampler(uint spp,
72 | 	             uint res_x, uint res_y,
73 | 	             uint seed=0);
74 | 	~ImageSampler();
75 | 
76 | 	void init_tile();
77 | 	Sampler get_single_sampler(uint32_t x, uint32_t y, uint32_t i) {
78 | 		return Sampler(x, y, i, seed);
79 | 	}
80 | 	void get_sample(uint32_t x, uint32_t y, uint32_t d, uint32_t ns, float *sample, uint16_t *coords=nullptr);
81 | 	bool get_next_sample(uint32_t ns, float *sample, uint16_t *coords=nullptr);
82 | 
83 | 	float percentage() const {
84 | 		return ((float)(samp_taken)) / tot_samp;
85 | 	}
86 | };
87 | 
88 | 
89 | 
90 | #endif
91 | 


--------------------------------------------------------------------------------
/sampling/samples.hpp:
--------------------------------------------------------------------------------
1 | #ifndef SAMPLES_HPP
2 | #define SAMPLES_HPP
3 | 
4 | 
5 | 
6 | #endif // SAMPLES_HPP


--------------------------------------------------------------------------------
/sampling/sobol.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2012 Leonhard Gruenschloss (leonhard@gruenschloss.org)
 2 | //
 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | // of this software and associated documentation files (the "Software"), to deal
 5 | // in the Software without restriction, including without limitation the rights to
 6 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 7 | // of the Software, and to permit persons to whom the Software is furnished to do
 8 | // so, subject to the following conditions:
 9 | //
10 | // The above copyright notice and this permission notice shall be included in
11 | // all copies or substantial portions of the Software.
12 | //
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | // SOFTWARE.
20 | 
21 | #ifndef SOBOL_HPP
22 | #define SOBOL_HPP
23 | 
24 | #include <cassert>
25 | 
26 | namespace Sobol {
27 | 
28 | struct Matrices {
29 | 	static const unsigned num_dimensions = 1024;
30 | 	static const unsigned size = 52;
31 | 	static const unsigned long long matrices[];
32 | };
33 | 
34 | // Compute one component of the Sobol'-sequence, where the component
35 | // corresponds to the dimension parameter, and the index specifies
36 | // the point inside the sequence. The scramble parameter can be used
37 | // to permute elementary intervals, and might be chosen randomly to
38 | // generate a randomized QMC sequence. Only the Matrices::size least
39 | // significant bits of the scramble value are used.
40 | inline double sample(
41 |     const unsigned dimension,
42 |     unsigned long long index,
43 |     const unsigned long long scramble = 0ULL) {
44 | 	assert(dimension < Matrices::num_dimensions);
45 | 
46 | 	unsigned long long result = scramble & ~-(1ULL << Matrices::size);
47 | 	for (unsigned i = dimension * Matrices::size; index; index >>= 1, ++i) {
48 | 		if (index & 1)
49 | 			result ^= Matrices::matrices[i];
50 | 	}
51 | 
52 | 	return result * (1.0 / (1ULL << Matrices::size));
53 | }
54 | 
55 | } // namespace sobol
56 | 
57 | #endif
58 | 
59 | 


--------------------------------------------------------------------------------
/scene/scene.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file and scene.cpp define a Scene class, which is used to build and
 3 |  * store a scene description to be rendered.
 4 |  */
 5 | #ifndef SCENE_HPP
 6 | #define SCENE_HPP
 7 | 
 8 | #include "numtype.h"
 9 | 
10 | #include "global.hpp"
11 | #include "camera.hpp"
12 | #include "bvh.hpp"
13 | #include "light_array.hpp"
14 | #include "light_tree.hpp"
15 | #include "assembly.hpp"
16 | 
17 | #include <vector>
18 | #include <unordered_map>
19 | #include <string>
20 | #include <memory>
21 | 
22 | #include <iostream>
23 | 
24 | 
25 | 
26 | 
27 | /**
28 |  * @brief A 3D scene for rendering.
29 |  *
30 |  * The Scene class is used to build and store the complete description of a 3d
31 |  * scene to be rendered.
32 |  */
33 | struct Scene {
34 | 	std::string name;
35 | 
36 | 	Color background_color;
37 | 
38 | 	std::unique_ptr<Camera> camera; // The camera of the scene
39 | 
40 | 	std::unique_ptr<Assembly> root; // The root assembly of the scene
41 | 
42 | 
43 | 	Scene() {
44 | 		background_color = Color(0.0f, 0.0f, 0.0f);
45 | 		root = std::unique_ptr<Assembly>(new Assembly());
46 | 	}
47 | 
48 | 
49 | 	// Finalizes the scene for rendering
50 | 	void finalize() {
51 | 		root->finalize();
52 | 	}
53 | };
54 | 
55 | #endif // SCENE_H
56 | 


--------------------------------------------------------------------------------
/shading/closure_union.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef CLOSURE_UNION_HPP
 2 | #define CLOSURE_UNION_HPP
 3 | 
 4 | #include <type_traits>
 5 | #include "surface_closure.hpp"
 6 | 
 7 | /**
 8 |  * A structure that uses type erasure to contain any surface closure.
 9 |  *
10 |  * init() should be used to initialize the structure from a surface closure
11 |  * of some kind.
12 |  *
13 |  * get() should be used to utilize the contained closure via the returned
14 |  * SurfaceClosure pointer.
15 |  */
16 | struct SurfaceClosureUnion {
17 | 	// The following should always be the size and alignment of the
18 | 	// largest and largest-aligning surface closure, respectively.
19 | 	alignas(GTRClosure) char data[sizeof(GTRClosure)];
20 | 
21 | 	/**
22 | 	 * Properly initialize the struct from any surface closure.
23 | 	 */
24 | 	template <class CLOSURE_TYPE>
25 | 	void init(CLOSURE_TYPE closure) {
26 | 		static_assert(std::is_base_of<SurfaceClosure, CLOSURE_TYPE>::value, "CLOSURE_TYPE is not derived from SurfaceClosure.");
27 | 		new(reinterpret_cast<CLOSURE_TYPE*>(data)) CLOSURE_TYPE(closure);
28 | 	}
29 | 
30 | 	/**
31 | 	 * Return a pointer to the underlying SurfaceClosure.
32 | 	 */
33 | 	SurfaceClosure* get() {
34 | 		return reinterpret_cast<SurfaceClosure*>(data);
35 | 	}
36 | 
37 | 	/**
38 | 	 * Return a pointer to the underlying SurfaceClosure.
39 | 	 */
40 | 	const SurfaceClosure* get() const {
41 | 		return reinterpret_cast<const SurfaceClosure*>(data);
42 | 	}
43 | };
44 | 
45 | #endif // CLOSURE_UNION_HPP


--------------------------------------------------------------------------------
/shading/displacement_shader.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef DISPLACEMENT_SHADER_HPP
 2 | #define DISPLACEMENT_SHADER_HPP
 3 | 
 4 | class DisplacementShader {
 5 | 	~DisplacementShader() {}
 6 | 
 7 | 	/**
 8 | 	 * @brief Evaluates the displacement shader for the given surface
 9 | 	 *        parameters.
10 | 	 *
11 | 	 * TODO: differential geometry as input.
12 | 	 * TODO: surface normal and normal differentials in output.
13 | 	 *
14 | 	 * @param u Surface U parameter.
15 | 	 * @param v Surface V parameter.
16 | 	 * @param id Surface id number.
17 | 	 *
18 | 	 * @return A BBox, with min an max displacement coordinates
19 | 	 */
20 | 	virtual BBox evaluate(float32 u, float32 v, uint_i id) = 0;
21 | };
22 | 
23 | #endif // DISPLACEMENT_SHADER_HPP


--------------------------------------------------------------------------------
/shading/surface_shader.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SURFACE_SHADER_HPP
 2 | #define SURFACE_SHADER_HPP
 3 | 
 4 | #include "numtype.h"
 5 | 
 6 | #include "intersection.hpp"
 7 | #include "surface_closure.hpp"
 8 | #include "closure_union.hpp"
 9 | 
10 | class SurfaceShader {
11 | public:
12 | 	virtual ~SurfaceShader() {}
13 | 
14 | 	/**
15 | 	 * @brief Calculates the SurfaceClosure(s) and their pdfs for the given
16 | 	 * intersection.
17 | 	 *
18 | 	 * @param inter The surface intersection data.  This is an in/out parameter:
19 | 	 *              the geometry, transform, ray data, etc. is 'in' and the
20 | 	 *              closure data is 'out'.
21 | 	 *
22 | 	 * @returns True on success, false on failure.
23 | 	 */
24 | 	virtual bool shade(Intersection* inter) const = 0;
25 | };
26 | 
27 | 
28 | class EmitShader: public SurfaceShader {
29 | public:
30 | 	Color col;
31 | 
32 | 	EmitShader(Color col): col {col} {}
33 | 
34 | 	virtual bool shade(Intersection* inter) const override final {
35 | 		inter->surface_closure.init(EmitClosure(col));
36 | 		inter->closure_prob = 1.0f;
37 | 		return true;
38 | 	}
39 | };
40 | 
41 | 
42 | class LambertShader: public SurfaceShader {
43 | public:
44 | 	Color col;
45 | 
46 | 	LambertShader(Color col): col {col} {}
47 | 
48 | 	virtual bool shade(Intersection* inter) const override final {
49 | 		inter->surface_closure.init(LambertClosure(col));
50 | 		inter->closure_prob = 1.0f;
51 | 		return true;
52 | 	}
53 | };
54 | 
55 | 
56 | class GTRShader: public SurfaceShader {
57 | public:
58 | 	Color col;
59 | 	float roughness;
60 | 	float tail_shape;
61 | 	float fresnel;
62 | 
63 | 	GTRShader(Color col, float roughness, float tail_shape, float fresnel): col {col}, roughness {roughness}, tail_shape {tail_shape}, fresnel {fresnel}
64 | 	{}
65 | 
66 | 	virtual bool shade(Intersection* inter) const override final {
67 | 		inter->surface_closure.init(GTRClosure(col, roughness, tail_shape, fresnel));
68 | 		inter->closure_prob = 1.0f;
69 | 		return true;
70 | 	}
71 | };
72 | 
73 | #endif // SURFACE_SHADER_HPP


--------------------------------------------------------------------------------
/test/test.hpp:
--------------------------------------------------------------------------------
1 | #ifndef TEST_HPP
2 | #define TEST_HPP
3 | 
4 | #include "catch.hpp"
5 | 
6 | #endif // TEST_HPP
7 | 
8 | 


--------------------------------------------------------------------------------
/test/test_float.cpp:
--------------------------------------------------------------------------------
 1 | #include "test.hpp"
 2 | 
 3 | #include <cmath>
 4 | #include <limits>
 5 | 
 6 | 
 7 | /*
 8 |  ************************************************************************
 9 |  * Testing suite for floating point values.
10 |  ************************************************************************
11 |  */
12 | 
13 | TEST_CASE("float") {
14 | 	SECTION("inf_1") {
15 | 		float yar = std::numeric_limits<float>::infinity();
16 | 		float foo = -std::numeric_limits<float>::infinity();
17 | 
18 | 		REQUIRE(std::isinf(yar));
19 | 		REQUIRE(std::isinf(foo));
20 | 	}
21 | 
22 | 	SECTION("inf_2") {
23 | 		float yar1 = 1.0f / 0.0f;
24 | 		float yar2 = 1.0f / -0.0f;
25 | 		float foo1 = -1.0f / 0.0f;
26 | 		float foo2 = -1.0f / -0.0f;
27 | 
28 | 		REQUIRE(std::isinf(yar1));
29 | 		REQUIRE(std::isinf(yar2));
30 | 		REQUIRE(std::isinf(foo1));
31 | 		REQUIRE(std::isinf(foo2));
32 | 	}
33 | 
34 | 
35 | 	SECTION("nan_1") {
36 | 		float yar = std::numeric_limits<float>::quiet_NaN();
37 | 		float foo = std::numeric_limits<float>::signaling_NaN();
38 | 
39 | 		REQUIRE(std::isnan(yar));
40 | 		REQUIRE(std::isnan(foo));
41 | 		REQUIRE(yar != yar);
42 | 		REQUIRE(foo != foo);
43 | 	}
44 | 
45 | 	SECTION("nan_2") {
46 | 		float yar1 = 0.0f / 0.0f;
47 | 		float yar2 = 0.0f / -0.0f;
48 | 		float yar3 = -0.0f / 0.0f;
49 | 		float yar4 = -0.0f / -0.0f;
50 | 
51 | 		REQUIRE(std::isnan(yar1));
52 | 		REQUIRE(std::isnan(yar2));
53 | 		REQUIRE(std::isnan(yar3));
54 | 		REQUIRE(std::isnan(yar4));
55 | 	}
56 | 
57 | 	SECTION("nan_3") {
58 | 		float yar1 = 1.0f + std::numeric_limits<float>::quiet_NaN();
59 | 		float yar2 = 1.0f - std::numeric_limits<float>::quiet_NaN();
60 | 		float yar3 = 1.0f * std::numeric_limits<float>::quiet_NaN();
61 | 		float yar4 = 1.0f / std::numeric_limits<float>::quiet_NaN();
62 | 
63 | 		REQUIRE(std::isnan(yar1));
64 | 		REQUIRE(std::isnan(yar2));
65 | 		REQUIRE(std::isnan(yar3));
66 | 		REQUIRE(std::isnan(yar4));
67 | 	}
68 | }
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/test/test_main.cpp:
--------------------------------------------------------------------------------
1 | #define CATCH_CONFIG_MAIN
2 | #include "catch.hpp"


--------------------------------------------------------------------------------
/tracer/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(tracer
2 | 	tracer)
3 | 


--------------------------------------------------------------------------------
/tracer/potentialinter.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef POTENTIALINTER_HPP
 2 | #define POTENTIALINTER_HPP
 3 | 
 4 | #include "numtype.h"
 5 | 
 6 | /**
 7 |  * @brief Records information about a potential intersection with an object.
 8 |  */
 9 | struct PotentialInter {
10 | 	size_t object_id;
11 | 	size_t ray_index;
12 | 	float nearest_hit_t; // The nearest possible hit distance along the ray
13 | 	bool valid; // The potential intersection data is filled and valid
14 | 	uint8_t tag; // Used for misc purposes
15 | 
16 | 	bool operator<(const PotentialInter &b) const {
17 | 		return object_id < b.object_id;
18 | 	}
19 | };
20 | 
21 | static bool compare_potint(const PotentialInter &a, const PotentialInter &b) {
22 | 	// Sort by object id
23 | 	return a.object_id < b.object_id;
24 | }
25 | 
26 | static size_t index_potint(const PotentialInter &a) {
27 | 	return a.object_id;
28 | }
29 | 
30 | #endif // POTENTIALINTER_HPP
31 | 


--------------------------------------------------------------------------------
/tracer/tracer.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file and tracer.cpp define a Tracer class, which manages the tracing
 3 |  * of rays in a scene.
 4 |  */
 5 | #ifndef TRACER_HPP
 6 | #define TRACER_HPP
 7 | 
 8 | #include <vector>
 9 | 
10 | #include "numtype.h"
11 | #include "range.hpp"
12 | #include "rng.hpp"
13 | #include "stack.hpp"
14 | 
15 | #include "instance_id.hpp"
16 | #include "ray.hpp"
17 | #include "intersection.hpp"
18 | #include "potentialinter.hpp"
19 | #include "scene.hpp"
20 | 
21 | 
22 | /**
23 |  * @brief Traces rays in a scene.
24 |  *
25 |  * The Tracer is responsible for doing the actual ray-tracing in a scene.
26 |  * It does _not_ manage the specific integration algorithm, or shading.  Only
27 |  * the tracing of rays and calculating the relevant information about ray
28 |  * hits.
29 |  *
30 |  * It is specifically designed to handle tracing a large number of rays
31 |  * (ideally > a million, as ram allows) simultaneously to gain efficiency
32 |  * in various ways.  The rays do not need to be related to each other or
33 |  * coherent in any way.
34 |  *
35 |  * It is, of course, also capable of tracing a single ray at a time or a small
36 |  * number of rays at a time if necessary. But doing so may be far less
37 |  * efficient depending on the scene.
38 |  *
39 |  * The simplest usage is to add a bunch of rays to the Tracer's queue with
40 |  * queue_rays(), and then trace them all by calling trace_rays().  The
41 |  * resulting intersection data is stored in the rays' data structures directly.
42 |  * Wash, rinse, repeat.
43 |  */
44 | class Tracer {
45 | public:
46 | 	Scene *scene;
47 | 	Range<const WorldRay*> w_rays; // Rays to trace
48 | 	Range<Intersection*> intersections; // Resulting intersections
49 | 	std::vector<Ray> rays;
50 | 	RNG rng;
51 | 	std::vector<const SurfaceShader*> surface_shader_stack;
52 | 	Stack xform_stack; // Stack for transforms as we traverse into transform hierarchies
53 | 	Stack data_stack; // Stack for arbitrary POD data, passed to other functions
54 | 	InstanceID element_id;
55 | 	int element_id_pos = 0;
56 | 
57 | 	Tracer(): xform_stack(16*4*256*64, 256), data_stack(1024*1024*8, 256) {
58 | 		surface_shader_stack.reserve(64);
59 | 	}
60 | 
61 | 	Tracer(Scene *scene_): scene {scene_}, xform_stack(16*4*256*64, 256), data_stack(1024*1024*8, 256) {
62 | 		surface_shader_stack.reserve(64);
63 | 	}
64 | 
65 | 	void set_seed(uint32_t seed) {
66 | 		rng.seed(seed);
67 | 	}
68 | 
69 | 
70 | 	/**
71 | 	 * Traces the provided rays, filling in the corresponding intersections.
72 | 	 *
73 | 	 * @param [in] rays_ The rays to be traced.
74 | 	 * @param [out] intersections_ The resulting intersections.
75 | 	 */
76 | 	uint32_t trace(const WorldRay* w_rays_begin, const WorldRay* w_rays_end, Intersection* intersections_begin, Intersection* intersections_end);
77 | 
78 | private:
79 | 	// Various methods for tracing different object types
80 | 	void trace_assembly(Assembly* assembly, Ray* rays, Ray* rays_end);
81 | 	void trace_surface(Surface* surface, Ray* rays, Ray* end);
82 | 	void trace_complex_surface(ComplexSurface* surface, Ray* rays, Ray* end);
83 | 	void trace_patch_surface(PatchSurface* surface, Ray* rays, Ray* end);
84 | 	void trace_lightsource(Light* light, Ray* rays, Ray* end);
85 | };
86 | 
87 | #endif // TRACER_HPP
88 | 
89 | 


--------------------------------------------------------------------------------
/utils/bit_stack.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef BIT_STACK_HPP
  2 | #define BIT_STACK_HPP
  3 | 
  4 | #include <cassert>
  5 | 
  6 | /**
  7 |  * A bit field for use as a stack of boolean values, with
  8 |  * push, pop, and peek operations.
  9 |  */
 10 | template <typename INT_TYPE>
 11 | class BitStack {
 12 | 	enum {
 13 | 		NUM_BITS = sizeof(INT_TYPE) * 8
 14 | 	};
 15 | 
 16 | 	INT_TYPE bits = 0;
 17 | 
 18 | public:
 19 | 	BitStack() {}
 20 | 	BitStack(INT_TYPE i): bits {i} {}
 21 | 
 22 | 	/**
 23 | 	 * Push a bit onto the top of the stack.
 24 | 	 */
 25 | 	void push(bool value) {
 26 | 		assert(bits >> (NUM_BITS-1) == 0); // Verify no stack overflow
 27 | 		bits <<= 1;
 28 | 		bits |= static_cast<uint32_t>(value);
 29 | 	}
 30 | 
 31 | 	/**
 32 | 	 * Push n bits onto the top of the stack.  The input
 33 | 	 * bits are passed as an integer, with the bit that
 34 | 	 * will be on top in the least significant digit, and
 35 | 	 * the rest following in order from there.
 36 | 	 */
 37 | 	void push(uint32_t value, int n) {
 38 | 		assert(n < NUM_BITS && (bits >> (NUM_BITS-n)) ==  0); // Verify no stack overflow
 39 | 		assert(n < 32); // Verify staying within input size
 40 | 		bits <<= n;
 41 | 		bits |= value & ((1<<n)-1);
 42 | 	}
 43 | 
 44 | 	/**
 45 | 	 * Pop the top bit off the stack.
 46 | 	 */
 47 | 	bool pop() {
 48 | 		const bool b = bits & 1;
 49 | 		bits >>= 1;
 50 | 		return b;
 51 | 	}
 52 | 
 53 | 	/**
 54 | 	 * Pop the top n bits off the stack.  The bits are returned as
 55 | 	 * an integer, with the top bit in the least significant digit,
 56 | 	 * and the rest following in order from there.
 57 | 	 */
 58 | 	uint32_t pop(int n) {
 59 | 		assert(n < NUM_BITS); // Can't pop more than we have
 60 | 		assert(n < 32); // Can't pop more than the return type can hold
 61 | 		const uint32_t b = static_cast<uint32_t>(bits) & ((1<<n)-1);
 62 | 		bits >>= n;
 63 | 		return b;
 64 | 	}
 65 | 
 66 | 	/**
 67 | 	 * Read the top bit of the stack without popping it.
 68 | 	 */
 69 | 	bool peek() const {
 70 | 		return bits & 1;
 71 | 	}
 72 | 
 73 | 	/**
 74 | 	 * Read the top n bits of the stack without popping them.  The bits
 75 | 	 * are returned as an integer, with the top bit in the least
 76 | 	 * significant digit, and the rest following in order from there.
 77 | 	 */
 78 | 	bool peek(int n) const {
 79 | 		assert(n < NUM_BITS); // Can't return more than we have
 80 | 		assert(n < 32); // Can't return more than the return type can hold
 81 | 		return static_cast<uint32_t>(bits) & ((1<<n)-1);
 82 | 	}
 83 | 
 84 | 	/**
 85 | 	 * Read any bit of the stack, by index.
 86 | 	 */
 87 | 	bool operator[](int pos) const {
 88 | 		assert(pos < NUM_BITS); // Verify access bounds
 89 | 		return (bits >> pos) & 1;
 90 | 	}
 91 | };
 92 | 
 93 | 
 94 | /**
 95 |  * A bit field for use as a stack of boolean values, with
 96 |  * push, pop, and peek operations.  Uses two integer types
 97 |  * to give twice the stack size at BitStack.
 98 |  */
 99 | template <typename INT_TYPE>
100 | class BitStack2 {
101 | 	enum {
102 | 		INT_SIZE = sizeof(INT_TYPE) * 8,
103 | 		NUM_BITS = sizeof(INT_TYPE) * 16
104 | 	};
105 | 
106 | 	INT_TYPE bits1, bits2;
107 | 
108 | public:
109 | 	BitStack2() {}
110 | 	BitStack2(INT_TYPE i): bits1 {i} {}
111 | 
112 | 	/**
113 | 	 * Push a bit onto the top of the stack.
114 | 	 */
115 | 	void push(bool value) {
116 | 		assert(bits2 >> (INT_SIZE-1) == 0); // Verify no stack overflow
117 | 		bits2 = (bits2 << 1) | (bits1 >> (INT_SIZE-1));
118 | 		bits1 <<= 1;
119 | 		bits1 |= static_cast<uint32_t>(value);
120 | 	}
121 | 
122 | 	/**
123 | 	 * Push n bits onto the top of the stack.  The input
124 | 	 * bits are passed as an integer, with the bit that
125 | 	 * will be on top in the least significant digit, and
126 | 	 * the rest following in order from there.
127 | 	 */
128 | 	void push(uint32_t value, int n) {
129 | 		assert(n < NUM_BITS && (bits2 >> (INT_SIZE-n)) ==  0); // Verify no stack overflow
130 | 		assert(n < 32); // Verify staying within input size
131 | 		bits2 = (bits2 << n) | (bits1 >> (INT_SIZE-n));
132 | 		bits1 <<= n;
133 | 		bits1 |= value & ((1<<n)-1);
134 | 	}
135 | 
136 | 	/**
137 | 	 * Pop the top bit off the stack.
138 | 	 */
139 | 	bool pop() {
140 | 		const bool b = bits1 & 1;
141 | 		bits1 = (bits1 >> 1) | (bits2 << (INT_SIZE-1));
142 | 		bits2 >>= 1;
143 | 		return b;
144 | 	}
145 | 
146 | 	/**
147 | 	 * Pop the top n bits off the stack.  The bits are returned as
148 | 	 * an integer, with the top bit in the least significant digit,
149 | 	 * and the rest following in order from there.
150 | 	 */
151 | 	uint32_t pop(int n) {
152 | 		assert(n < NUM_BITS); // Can't pop more than we have
153 | 		assert(n < 32); // Can't pop more than the return type can hold
154 | 		const uint32_t b = static_cast<uint32_t>(bits1) & ((1<<n)-1);
155 | 		bits1 = (bits1 >> n) | (bits2 << (INT_SIZE-n));
156 | 		bits2 >>= n;
157 | 		return b;
158 | 	}
159 | 
160 | 	/**
161 | 	 * Read the top bit of the stack without popping it.
162 | 	 */
163 | 	bool peek() const {
164 | 		return bits1 & 1;
165 | 	}
166 | 
167 | 	/**
168 | 	 * Read the top n bits of the stack without popping them.  The bits
169 | 	 * are returned as an integer, with the top bit in the least
170 | 	 * significant digit, and the rest following in order from there.
171 | 	 */
172 | 	bool peek(int n) const {
173 | 		assert(n < NUM_BITS); // Can't return more than we have
174 | 		assert(n < 32); // Can't return more than the return type can hold
175 | 		return static_cast<uint32_t>(bits1) & ((1<<n)-1);
176 | 	}
177 | 
178 | 	/**
179 | 	 * Read any bit of the stack, by index.
180 | 	 */
181 | 	bool operator[](int pos) const {
182 | 		assert(pos < NUM_BITS); // Verify access bounds
183 | 		return (bits1 >> pos) & 1;
184 | 	}
185 | };
186 | 
187 | 
188 | 
189 | 
190 | #endif // BIT_STACK_HPP


--------------------------------------------------------------------------------
/utils/blocked_array.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef BLOCKED_ARRAY_HPP
 2 | #define BLOCKED_ARRAY_HPP
 3 | 
 4 | #include <iostream>
 5 | #include <vector>
 6 | 
 7 | #include "morton.hpp"
 8 | 
 9 | /**
10 |  * @brief A 2d array optimized for cache coherency.
11 |  */
12 | template <class T, uint32_t LOG_BLOCK_SIZE>
13 | class BlockedArray {
14 | private:
15 | 	uint32_t block_size {1 << LOG_BLOCK_SIZE};
16 | 	uint32_t block_mask {block_size - 1};
17 | 	uint32_t u_blocks {0};
18 | 	uint32_t v_blocks {0};
19 | 	uint32_t block_elements {block_size * block_size};
20 | 
21 | 	std::vector<T> data {};
22 | 
23 | public:
24 | 	uint32_t width {0};
25 | 	uint32_t height {0};
26 | 
27 | 	BlockedArray() {}
28 | 
29 | 	BlockedArray(uint32_t w, uint32_t h) {
30 | 		init(w, h);
31 | 
32 | 	}
33 | 
34 | 	void init(uint32_t w, uint32_t h) {
35 | 		width = w;
36 | 		height = h;
37 | 
38 | 		// Round width and height up to the nearest multiple of block_size
39 | 		if (width % block_size)
40 | 			width = width - (width % block_size) + block_size;
41 | 		if (height % block_size)
42 | 			height = height - (height % block_size) + block_size;
43 | 
44 | 		// Calculate the number of blocks in the horizontal direction
45 | 		u_blocks = width >> LOG_BLOCK_SIZE;
46 | 
47 | 		data.resize(width*height);
48 | 	}
49 | 
50 | 	uint32_t index(uint32_t u, uint32_t v) const {
51 | 		// Find the start of the block
52 | 		const uint32_t bu = u >> LOG_BLOCK_SIZE;
53 | 		const uint32_t bv = v >> LOG_BLOCK_SIZE;
54 | 		const uint32_t i1 = block_elements * ((bv * u_blocks) + bu);
55 | 
56 | 		// Find the index within the block
57 | 		u &= block_mask;
58 | 		v &= block_mask;
59 | 		const uint32_t i2 = Morton::xy2d(u, v);
60 | 
61 | 		return i1 + i2;
62 | 	}
63 | 
64 | 	// Element addressing
65 | 	T &operator()(uint32_t u, uint32_t v) {
66 | 		return data[index(u, v)];
67 | 	}
68 | 
69 | 	const T &operator()(uint32_t u, uint32_t v) const {
70 | 		return data[index(u, v)];
71 | 	}
72 | 
73 | };
74 | 
75 | #endif // BLOCKED_ARRAY_HPP
76 | 


--------------------------------------------------------------------------------
/utils/blocked_array_disk_cache.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef BLOCKED_ARRAY_DISK_CACHE_HPP
 2 | #define BLOCKED_ARRAY_DISK_CACHE_HPP
 3 | 
 4 | #include <iostream>
 5 | #include <vector>
 6 | 
 7 | #include "morton.hpp"
 8 | #include "disk_cache.hpp"
 9 | 
10 | #define BLOCK_CACHE_SIZE 64
11 | 
12 | /**
13 |  * @brief A 2d array optimized for cache coherency, and which pages
14 |  * large data to disk.
15 |  *
16 |  * TODO: This class is currently NOT thread safe, even for reading.  This
17 |  * should be addressed in the DiskCache class eventually.
18 |  *
19 |  */
20 | template <class T, uint32_t LOG_BLOCK_SIZE>
21 | class BlockedArrayDiskCache {
22 | private:
23 | 	uint32_t block_size {1 << LOG_BLOCK_SIZE};
24 | 	uint32_t block_mask {block_size - 1};
25 | 	uint32_t u_blocks {0};
26 | 	uint32_t v_blocks {0};
27 | 	uint32_t block_elements {block_size * block_size};
28 | 
29 | 	DiskCache::Cache<T, (1<<LOG_BLOCK_SIZE)> data {};
30 | 
31 | public:
32 | 	uint32_t width {0};
33 | 	uint32_t height {0};
34 | 
35 | 	BlockedArrayDiskCache() {}
36 | 
37 | 	BlockedArrayDiskCache(uint32_t w, uint32_t h): BlockedArrayDiskCache() {
38 | 		init(w, h);
39 | 	}
40 | 
41 | 	void init(uint32_t w, uint32_t h) {
42 | 		width = w;
43 | 		height = h;
44 | 
45 | 		// Round width and height up to the nearest multiple of block_size
46 | 		if (width % block_size)
47 | 			width = width - (width % block_size) + block_size;
48 | 		if (height % block_size)
49 | 			height = height - (height % block_size) + block_size;
50 | 
51 | 		// Calculate the number of blocks in the horizontal direction
52 | 		u_blocks = width >> LOG_BLOCK_SIZE;
53 | 
54 | 		data.init(width*height, BLOCK_CACHE_SIZE);
55 | 	}
56 | 
57 | 	uint32_t index(uint32_t u, uint32_t v) const {
58 | 		// Find the start of the block
59 | 		const uint32_t bu = u >> LOG_BLOCK_SIZE;
60 | 		const uint32_t bv = v >> LOG_BLOCK_SIZE;
61 | 		const uint32_t i1 = block_elements * ((bv * u_blocks) + bu);
62 | 
63 | 		// Find the index within the block
64 | 		u &= block_mask;
65 | 		v &= block_mask;
66 | 		const uint32_t i2 = Morton::xy2d(u, v);
67 | 
68 | 		return i1 + i2;
69 | 	}
70 | 
71 | 	// Element addressing
72 | 	T &operator()(uint32_t u, uint32_t v) {
73 | 		return data[index(u, v)];
74 | 	}
75 | 
76 | 	const T &operator()(uint32_t u, uint32_t v) const {
77 | 		return data[index(u, v)];
78 | 	}
79 | 
80 | };
81 | 
82 | #endif // BLOCKED_ARRAY_DISK_CACHE_HPP
83 | 


--------------------------------------------------------------------------------
/utils/chunked_array_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test.hpp"
  2 | 
  3 | #include "chunked_array.hpp"
  4 | 
  5 | #define INITIAL_VALUE 123456
  6 | 
  7 | struct MyInt {
  8 | 	int n;
  9 | 	MyInt() {
 10 | 		n = INITIAL_VALUE;
 11 | 	}
 12 | };
 13 | 
 14 | 
 15 | 
 16 | TEST_CASE("chunked_array") {
 17 | 	SECTION("constructor_1") {
 18 | 		ChunkedArray<int, 10> ar;
 19 | 		REQUIRE(ar.size() == 0);
 20 | 	}
 21 | 
 22 | 	SECTION("constructor_2") {
 23 | 		ChunkedArray<int, 10> ar(1013);
 24 | 		REQUIRE(ar.size() == 1013);
 25 | 	}
 26 | 
 27 | 	SECTION("constructor_3") {
 28 | 		ChunkedArray<MyInt, 10> ar(1013);
 29 | 
 30 | 		bool eq = true;
 31 | 		for (size_t i = 0; i < ar.size(); i++)
 32 | 			eq = eq && (ar[i].n == INITIAL_VALUE);
 33 | 
 34 | 		REQUIRE(eq);
 35 | 	}
 36 | 
 37 | 	SECTION("read_write_1") {
 38 | 		ChunkedArray<int, 10> ar(4011);
 39 | 
 40 | 		for (size_t i = 0; i < ar.size(); i++)
 41 | 			ar[i] = i;
 42 | 
 43 | 		bool eq = true;
 44 | 		for (size_t i = 0; i < ar.size(); i++)
 45 | 			eq = eq && (ar[i] == (int)i);
 46 | 
 47 | 		REQUIRE(eq);
 48 | 	}
 49 | 
 50 | 	SECTION("resize_1") {
 51 | 		ChunkedArray<int, 10> ar;
 52 | 		ar.resize(1013);
 53 | 
 54 | 		REQUIRE(ar.size() == 1013);
 55 | 	}
 56 | 
 57 | 	SECTION("resize_2") {
 58 | 		ChunkedArray<MyInt, 10> ar(12);
 59 | 		ar.resize(1013);
 60 | 
 61 | 		bool eq = true;
 62 | 		for (size_t i = 0; i < ar.size(); i++)
 63 | 			eq = eq && (ar[i].n == INITIAL_VALUE);
 64 | 
 65 | 		REQUIRE(eq);
 66 | 		REQUIRE(ar.size() == 1013);
 67 | 	}
 68 | 
 69 | 	SECTION("resize_3") {
 70 | 		ChunkedArray<MyInt, 10> ar(40000);
 71 | 		ar.resize(1013);
 72 | 
 73 | 		bool eq = true;
 74 | 		for (size_t i = 0; i < ar.size(); i++)
 75 | 			eq = eq && (ar[i].n == INITIAL_VALUE);
 76 | 
 77 | 		REQUIRE(eq);
 78 | 		REQUIRE(ar.size() == 1013);
 79 | 	}
 80 | 
 81 | 	SECTION("resize_4") {
 82 | 		ChunkedArray<MyInt, 10> ar(40000);
 83 | 		ar.resize(0);
 84 | 		ar.resize(6230);
 85 | 		ar.resize(10000);
 86 | 		ar.resize(943);
 87 | 		ar.resize(302853);
 88 | 		ar.resize(0);
 89 | 		ar.resize(1013);
 90 | 
 91 | 		bool eq = true;
 92 | 		for (size_t i = 0; i < ar.size(); i++)
 93 | 			eq = eq && (ar[i].n == INITIAL_VALUE);
 94 | 
 95 | 		REQUIRE(eq);
 96 | 		REQUIRE(ar.size() == 1013);
 97 | 	}
 98 | 
 99 | 	SECTION("iterator_1") {
100 | 		ChunkedArray<int, 10> ar(1234);
101 | 		ChunkedArray<int, 10>::iterator it = ar.begin();
102 | 
103 | 		for (size_t i = 0; i < ar.size(); i++)
104 | 			it[i] = i;
105 | 
106 | 		bool eq = true;
107 | 		it = ar.begin();
108 | 		for (size_t i = 0; i < ar.size(); i++)
109 | 			eq = eq && ((size_t)it[i] == i);
110 | 
111 | 		REQUIRE(eq);
112 | 	}
113 | 
114 | 	SECTION("iterator_2") {
115 | 		ChunkedArray<int, 10> ar(1234);
116 | 		ChunkedArray<int, 10>::iterator it = ar.begin() + 23;
117 | 
118 | 		for (size_t i = 23; i < ar.size(); i++)
119 | 			it[i-23] = i;
120 | 
121 | 		bool eq = true;
122 | 		it = ar.begin() + 23;
123 | 		for (size_t i = 23; i < ar.size(); i++)
124 | 			eq = eq && ((size_t)it[i-23] == i);
125 | 
126 | 		REQUIRE(eq);
127 | 	}
128 | 
129 | 	SECTION("iterator_3") {
130 | 		ChunkedArray<int, 10> ar(1234);
131 | 		ChunkedArray<int, 10>::iterator it = ar.begin() + 23;
132 | 
133 | 		ar[23] = 54321;
134 | 
135 | 		REQUIRE(*it == 54321);
136 | 	}
137 | 
138 | 	SECTION("iterator_4") {
139 | 		ChunkedArray<int, 10> ar(1234);
140 | 		ChunkedArray<int, 10>::iterator it = ar.begin();
141 | 
142 | 		for (size_t i = 0; i < ar.size(); i++)
143 | 			ar[i] = i;
144 | 
145 | 		bool eq = true;
146 | 		for (size_t i = 0; i < ar.size(); i++) {
147 | 			eq = eq && (ar[i] == *it);
148 | 			++it;
149 | 		}
150 | 
151 | 		REQUIRE(eq);
152 | 	}
153 | }
154 | 
155 | 
156 | 
157 | 


--------------------------------------------------------------------------------
/utils/counting_sort.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef COUNTING_SORT_HPP
 2 | #define COUNTING_SORT_HPP
 3 | 
 4 | #include <iostream>
 5 | #include <algorithm>
 6 | #include "numtype.h"
 7 | 
 8 | namespace CountingSort {
 9 | 
10 | /**
11 |  * @brief Counting sort algorithm.
12 |  *
13 |  * Works on any array whose items can be sorted as non-negative
14 |  * integers (e.g. there are a finite and countable number of possible
15 |  * values).  However, to be practical the maximum integer can't be
16 |  * too absurdly large.
17 |  *
18 |  * The benefit of counting sort is that it sorts in linear time to the
19 |  * length of the array (makes 6*array_length accesses to the data), and
20 |  * there is extremely efficient for very large array sizes.
21 |  *
22 |  * @param list Pointer to the beginning of the array.
23 |  * @param list_length Length of the array.
24 |  * @param max_items The largest integer that can come out of an item in the array.
25 |  * @param indexer Pointer to a function that can turn type T into an integer.
26 |  */
27 | template <class T>
28 | bool sort(T *list, size_t list_length, size_t max_items, size_t(*indexer)(const T &)) {
29 | 	size_t item_counts[max_items];
30 | 	for (size_t i = 0; i < max_items; i++) {
31 | 		item_counts[i] = 0;
32 | 	}
33 | 
34 | 	// Count the items
35 | 	for (size_t i = 0; i < list_length; i++) {
36 | 		item_counts[indexer(list[i])]++;
37 | 	}
38 | 
39 | 	// Set up start-index array
40 | 	size_t item_start_indices[max_items];
41 | 	size_t running_count = 0;
42 | 	for (size_t i = 0; i < max_items; i++) {
43 | 		item_start_indices[i] = running_count;
44 | 		running_count += item_counts[i];
45 | 	}
46 | 
47 | 	// Set up filled-so-far-count array
48 | 	size_t item_fill_counts[max_items];
49 | 	for (size_t i = 0; i < max_items; i++) {
50 | 		item_fill_counts[i] = 0;
51 | 	}
52 | 
53 | 	// Sort the list
54 | 	size_t traversal = 0;
55 | 	size_t i = 0;
56 | 	while (i < list_length) {
57 | 		const size_t index = indexer(list[i]);
58 | 		const size_t next_place = item_start_indices[index] + item_fill_counts[index];
59 | 
60 | 		if (i >= item_start_indices[index] && i < next_place) {
61 | 			i++;
62 | 		} else {
63 | 			std::swap(list[i], list[next_place]);
64 | 			item_fill_counts[index]++;
65 | 		}
66 | 		traversal++;
67 | 	}
68 | 
69 | 	return true;
70 | }
71 | 
72 | 
73 | }
74 | #endif // COUNTING_SORT_HPP


--------------------------------------------------------------------------------
/utils/disk_cache_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "test.hpp"
 2 | 
 3 | #include "disk_cache.hpp"
 4 | #include "rng.hpp"
 5 | 
 6 | 
 7 | TEST_CASE("disk_cache") {
 8 | 	// Constructors
 9 | 	SECTION("constructor") {
10 | 		DiskCache::Cache<float, 1024> cache1(100000, 32);
11 | 		DiskCache::Cache<float, 213> cache2(30001, 33);
12 | 
13 | 		REQUIRE(cache1.block_size() == 1024);
14 | 		REQUIRE(cache2.block_size() == 213);
15 | 		REQUIRE(cache1.element_count() >= 100000);
16 | 		REQUIRE(cache2.element_count() >= 30001);
17 | 	}
18 | 
19 | 	SECTION("manual_init") {
20 | 		DiskCache::Cache<float, 1024> cache1;
21 | 		DiskCache::Cache<float, 213> cache2;
22 | 
23 | 		cache1.init(100000, 32);
24 | 		cache2.init(30001, 33);
25 | 
26 | 		REQUIRE(cache1.block_size() == 1024);
27 | 		REQUIRE(cache2.block_size() == 213);
28 | 		REQUIRE(cache1.element_count() >= 100000);
29 | 		REQUIRE(cache2.element_count() >= 30001);
30 | 	}
31 | 
32 | 	SECTION("write_read") {
33 | 		RNG rng(1);
34 | 		DiskCache::Cache<float, 1024> cache(1000000, 32);
35 | 
36 | 		for (int i = 0; i < 1000000; i++) {
37 | 			cache[i] = rng.next_float();
38 | 		}
39 | 
40 | 		rng.seed(1);
41 | 		bool match = true;
42 | 		for (int i = 0; i < 1000000; i++) {
43 | 			match = match && cache[i] == rng.next_float();
44 | 		}
45 | 
46 | 		REQUIRE(match);
47 | 	}
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/utils/hash.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef HASH_HPP
 2 | #define HASH_HPP
 3 | 
 4 | #include <cstdint>
 5 | 
 6 | static inline uint32_t hash_u32(uint32_t n, uint32_t seed) {
 7 | 	uint32_t hash = n;
 8 | 
 9 | 	for (uint32_t i=0; i < 3; ++i) {
10 | 		hash *= 1936502639;
11 | 		hash ^= hash >> 16;
12 | 		hash += seed;
13 | 	}
14 | 
15 | 	return hash;
16 | }
17 | 
18 | static inline float hash_f32(uint32_t n, uint32_t seed) {
19 | 	uint32_t hash = hash_u32(n, seed);
20 | 
21 | 	union {
22 | 		float w;
23 | 		uint32_t a;
24 | 	};
25 | 	a = hash >> 9; // Take upper 23 bits
26 | 	a |= 0x3F800000; // Make a float from bits
27 | 	return w-1.f;
28 | }
29 | 
30 | /**
31 |  * @brief A seedable hash class.
32 |  *
33 |  * Takes 32 bit unsigned ints as input, and can output either
34 |  * unsigned 32 bit ints or floats.
35 |  */
36 | class Hash {
37 | private:
38 | 	uint32_t seed {42};
39 | 
40 | public:
41 | 	Hash(uint32_t seed): seed {seed} {}
42 | 
43 | 	/**
44 | 	 * @brief Takes an int and returns an int.
45 | 	 */
46 | 	uint32_t get_int(uint32_t n) {
47 | 		return hash_u32(n, seed);
48 | 	}
49 | 
50 | 	/**
51 | 	 * @brief Takes an int and returns a float in [0, 1).
52 | 	 */
53 | 	float get_float(uint32_t n) {
54 | 		uint32_t hash = hash_u32(n, seed);
55 | 
56 | 		union {
57 | 			float w;
58 | 			uint32_t a;
59 | 		};
60 | 		a = hash >> 9; // Take upper 23 bits
61 | 		a |= 0x3F800000; // Make a float from bits
62 | 		return w-1.f;
63 | 	}
64 | };
65 | 
66 | #endif // HASH_HPP


--------------------------------------------------------------------------------
/utils/hilbert.hpp:
--------------------------------------------------------------------------------
 1 | /* Hilbert curve transforms.
 2 |  */
 3 | 
 4 | #ifndef HILBERT_HPP
 5 | #define HILBERT_HPP
 6 | 
 7 | #include "numtype.h"
 8 | 
 9 | namespace Hilbert {
10 | 
11 | // Utility function used by the functions below.
12 | static inline void hil_rot(uint32_t n, uint32_t &x, uint32_t &y, uint32_t rx, uint32_t ry) {
13 | 	if (ry == 0) {
14 | 		if (rx == 1) {
15 | 			x = n-1 - x;
16 | 			y = n-1 - y;
17 | 		}
18 | 		const uint32_t t = x;
19 | 		x = y;
20 | 		y = t;
21 | 	}
22 | }
23 | 
24 | /**
25 |  * @brief Convert (x,y) to hilbert curve index.
26 |  *
27 |  * @param x The x coordinate.  Must be a positive integer no greater than n.
28 |  * @param y The y coordinate.  Must be a positive integer no greater than n.
29 |  *
30 |  * @returns The hilbert curve index corresponding to the (x,y) coordinates given.
31 |  */
32 | static inline uint32_t xy2d(uint32_t x, uint32_t y) {
33 | 	const uint32_t n = 1 << 16;
34 | 	uint32_t rx, ry, s, d=0;
35 | 	for (s=n>>1; s>0; s>>=1) {
36 | 		rx = (x & s) > 0;
37 | 		ry = (y & s) > 0;
38 | 		d += s * s * ((3 * rx) ^ ry);
39 | 		hil_rot(s, x, y, rx, ry);
40 | 	}
41 | 	return d;
42 | }
43 | 
44 | 
45 | /**
46 |  * @brief Convert hilbert curve index to (x,y).
47 |  *
48 |  * @param d The hilbert curve index.
49 |  * @param[out] x Pointer where the x coordinate will be stored.
50 |  * @param[out] y Pointer where the y coordinate will be stored.
51 |  */
52 | static inline void d2xy(uint32_t d, uint32_t *x, uint32_t *y) {
53 | 	const uint32_t n = 1 << 16;
54 | 	uint32_t rx, ry, s, t=d;
55 | 	*x = *y = 0;
56 | 	for (s=1; s<n; s<<=1) {
57 | 		rx = 1 & (t>>1);
58 | 		ry = 1 & (t ^ rx);
59 | 		hil_rot(s, *x, *y, rx, ry);
60 | 		*x += s * rx;
61 | 		*y += s * ry;
62 | 		t >>= 2;
63 | 	}
64 | }
65 | 
66 | }
67 | 
68 | #endif // HILBERT_HPP
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/utils/job_queue.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef JOB_QUEUE_HPP
  2 | #define JOB_QUEUE_HPP
  3 | 
  4 | #include <cstdlib>
  5 | #include <vector>
  6 | #include <thread>
  7 | #include <functional>
  8 | 
  9 | #include "ring_buffer_concurrent.hpp"
 10 | 
 11 | /**
 12 |  * @brief A job queue for the producer/consumer model of managing threads.
 13 |  *
 14 |  * Consumer threads are created and managed by the queue.  To use this,
 15 |  * simply add jobs to the queue and they will be processed.  All jobs
 16 |  * must be thread-safe, as multiple jobs can be processed concurrently.
 17 |  *
 18 |  * A job can be any object that is callable without parameters.  A good
 19 |  * choice is std::function<void()>
 20 |  */
 21 | template <class T=std::function<void()>>
 22 | class JobQueue {
 23 | 	RingBufferConcurrent<T> queue;
 24 | 	std::vector<std::thread> threads;
 25 | 
 26 | 	bool done;
 27 | 
 28 | 	// A consumer thread, which watches the queue for jobs and
 29 | 	// executes them.
 30 | 	void run_consumer() {
 31 | 		T job;
 32 | 		while (pop(&job)) {
 33 | 			job();
 34 | 		}
 35 | 	}
 36 | 
 37 | public:
 38 | 	/**
 39 | 	 * @brief Constructor.
 40 | 	 *
 41 | 	 * By default uses 1 thread and creates a queue 4 times the size
 42 | 	 * of the thread count.
 43 | 	 *
 44 | 	 * @param thread_count Number of consumer threads to spawn for processing jobs.
 45 | 	 * @param queue_size Size of the job queue buffer.  Zero means determine
 46 | 	 *                   automatically from number of threads.
 47 | 	 */
 48 | 	explicit JobQueue(size_t thread_count=1, size_t queue_size=0) {
 49 | 		done = false;
 50 | 
 51 | 		// Set up queue
 52 | 		if (queue_size == 0)
 53 | 			queue_size = thread_count * 4;
 54 | 		queue.resize(queue_size);
 55 | 
 56 | 		// Create and start consumer threads
 57 | 		threads.resize(thread_count);
 58 | 		for (auto &thread: threads)
 59 | 			thread = std::thread(&JobQueue<T>::run_consumer, this);
 60 | 	}
 61 | 
 62 | 	// Destructor. Joins and deletes threads.
 63 | 	~JobQueue() {
 64 | 		finish();
 65 | 	}
 66 | 
 67 | 
 68 | 	/**
 69 | 	 * @brief Marks the queue as done, and waits for all
 70 | 	 *        jobs to finish.
 71 | 	 *
 72 | 	 * Once the queue is done, producers can no longer add jobs to
 73 | 	 * the queue, and consumers will be notified when the queue is
 74 | 	 * empty so they can terminate.
 75 | 	 */
 76 | 	void finish() {
 77 | 		if (!done) {
 78 | 			// Notify all threads that the queue is done
 79 | 			done = true;
 80 | 			queue.disallow_blocking();
 81 | 
 82 | 			// Wait for threads to finish
 83 | 			for (auto &thread: threads)
 84 | 				thread.join();
 85 | 		}
 86 | 	}
 87 | 
 88 | 
 89 | 	/**
 90 | 	 * @brief Adds a job to the queue.
 91 | 	 *
 92 | 	 * @param job The job to add.
 93 | 	 *
 94 | 	 * @return True on success, false if the queue is closed.
 95 | 	 */
 96 | 	bool push(const T &job) {
 97 | 		// Add job to queue
 98 | 		return queue.push_blocking(job);
 99 | 	}
100 | 
101 | 
102 | 	/**
103 | 	 * @brief Gets the next job, removing it from the queue.
104 | 	 *
105 | 	 * @param [out] job The popped job is copied into here.  Must be a
106 | 	 *                  pointer to valid memory.
107 | 	 *
108 | 	 * @return True on success, false if the queue is empty and closed.
109 | 	 */
110 | 	bool pop(T *job) {
111 | 		// Pop the next job
112 | 		return queue.pop_blocking(job);
113 | 	}
114 | };
115 | 
116 | #endif // JOB_QUEUE_HPP


--------------------------------------------------------------------------------
/utils/job_queue_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "test.hpp"
 2 | #include "job_queue.hpp"
 3 | 
 4 | // Simple callable class that does nothing more than set an integer
 5 | // value.
 6 | class TestJob {
 7 | 	int *inc;
 8 | 	int value;
 9 | public:
10 | 	TestJob() {}
11 | 
12 | 	TestJob(int *incr, int val) {
13 | 		inc = incr;
14 | 		value = val;
15 | 	}
16 | 
17 | 	void operator()() {
18 | 		*inc = value;
19 | 	}
20 | };
21 | 
22 | 
23 | 
24 | 
25 | TEST_CASE("job_queue") {
26 | 	SECTION("basic_usage") {
27 | 		JobQueue<TestJob> q;
28 | 		int ints[100];
29 | 		for (int i = 0; i < 100; i++)
30 | 			q.push(TestJob(&(ints[i]), i));
31 | 		q.finish();
32 | 
33 | 		bool test = true;
34 | 		for (int i = 0; i < 100; i++)
35 | 			test = test && ints[i] == i;
36 | 
37 | 		REQUIRE(test);
38 | 	}
39 | 
40 | 	SECTION("queue_bottleneck") {
41 | 		JobQueue<TestJob> q(1000, 2);  // 1000 threads, queue size of 2
42 | 		int ints[100];
43 | 		for (int i = 0; i < 100; i++)
44 | 			q.push(TestJob(&(ints[i]), i));
45 | 		q.finish();
46 | 
47 | 		bool test = true;
48 | 		for (int i = 0; i < 100; i++)
49 | 			test = test && ints[i] == i;
50 | 
51 | 		REQUIRE(test);
52 | 	}
53 | 
54 | 	SECTION("destruct") {
55 | 		JobQueue<TestJob> *q;
56 | 		q = new JobQueue<TestJob>;
57 | 		int ints[100];
58 | 		for (int i = 0; i < 100; i++)
59 | 			q->push(TestJob(&(ints[i]), i));
60 | 		delete q; // Should call finish() via destructor
61 | 
62 | 		bool test = true;
63 | 		for (int i = 0; i < 100; i++)
64 | 			test = test && ints[i] == i;
65 | 
66 | 		REQUIRE(test);
67 | 	}
68 | }
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/utils/low_level.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef LOW_LEVEL_HPP
 2 | #define LOW_LEVEL_HPP
 3 | 
 4 | #include <mmintrin.h>
 5 | 
 6 | namespace LowLevel {
 7 | 
 8 | static const int cache_line_size = 64;
 9 | 
10 | template <typename T>
11 | inline void prefetch_L1(T* address) {
12 | 	constexpr int lines = (sizeof(T)/cache_line_size) + ((sizeof(T)%cache_line_size) == 0 ? 0 : 1);
13 | 	for (int i = 0; i < lines; ++i) {
14 | 		_mm_prefetch(address+i, _MM_HINT_T0);
15 | 	}
16 | }
17 | 
18 | template <typename T>
19 | inline void prefetch_L2(T* address) {
20 | 	constexpr int lines = (sizeof(T)/cache_line_size) + ((sizeof(T)%cache_line_size) == 0 ? 0 : 1);
21 | 	for (int i = 0; i < lines; ++i) {
22 | 		_mm_prefetch(address+i, _MM_HINT_T1);
23 | 	}
24 | }
25 | 
26 | template <typename T>
27 | inline void prefetch_L3(T* address) {
28 | 	constexpr int lines = (sizeof(T)/cache_line_size) + ((sizeof(T)%cache_line_size) == 0 ? 0 : 1);
29 | 	for (int i = 0; i < lines; ++i) {
30 | 		_mm_prefetch(address+i, _MM_HINT_T2);
31 | 	}
32 | }
33 | 
34 | }
35 | 
36 | #endif // LOW_LEVEL_HPP


--------------------------------------------------------------------------------
/utils/lru_cache.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef LRU_CACHE_HPP
  2 | #define LRU_CACHE_HPP
  3 | 
  4 | #include <cstdlib>
  5 | #include <unordered_map>
  6 | #include <list>
  7 | #include <memory>
  8 | #include <mutex>
  9 | 
 10 | #include "spinlock.hpp"
 11 | 
 12 | 
 13 | // Should be overloaded for more complex types
 14 | template <class T>
 15 | size_t size_in_bytes(const T& data) {
 16 | 	return sizeof(T);
 17 | }
 18 | 
 19 | /*
 20 |  * A thread-safe Least-Recently-Used cache.
 21 |  */
 22 | template <class K, class T>
 23 | class LRUCache {
 24 | 	struct LRUPair {
 25 | 		K key;
 26 | 		std::shared_ptr<T> data_ptr;
 27 | 	};
 28 | 
 29 | 	SpinLock slock;
 30 | 
 31 | 	size_t max_bytes;
 32 | 	size_t byte_count {0};
 33 | 
 34 | 	// A map from indices to iterators into the list
 35 | 	std::unordered_map<K, typename std::list<LRUPair>::iterator> map;
 36 | 
 37 | 	// A list that contains the index and a pointer to the data of each element
 38 | 	std::list<LRUPair> elements;
 39 | 
 40 | 	// The number of bytes each item takes up, aside from the size of the item itself.
 41 | 	// In other words, the overhead of the LRUCache per-item.
 42 | 	// Estimated for now as the size of an LRUPair plus the size of
 43 | 	// 2 pointers (for the list) plus the size of an LRUKEY and list
 44 | 	// iterator (for the map).
 45 | 	// TODO: more accurate estimate
 46 | 	const size_t per_item_size_cost = sizeof(LRUPair) + (sizeof(void*)*2) + sizeof(K) + sizeof(typename std::list<LRUPair>::iterator);
 47 | 
 48 | public:
 49 | 	LRUCache(size_t max_bytes_=40): max_bytes {max_bytes_} {}
 50 | 
 51 | 	~LRUCache() {
 52 | 	}
 53 | 
 54 | 	/*
 55 | 	 * Sets the maximum number of bytes in the cache.
 56 | 	 * Should only be called once right after construction.
 57 | 	 */
 58 | 	void set_max_size(size_t size) {
 59 | 		max_bytes = size;
 60 | 	}
 61 | 
 62 | 	/*
 63 | 	 * Adds the given item to the cache using the given key.
 64 | 	 * If the key already exists, the existing item will be
 65 | 	 * replaced.
 66 | 	 *
 67 | 	 * Returns the key.
 68 | 	 */
 69 | 	K put(std::shared_ptr<T> data_ptr, K key) {
 70 | 		std::unique_lock<SpinLock> lock(slock);
 71 | 
 72 | 		// Check if the key exists, and erase it if it does
 73 | 		const auto exists = static_cast<bool>(map.count(key));
 74 | 		if (exists)
 75 | 			erase(key);
 76 | 
 77 | 		// Add data to the cache
 78 | 		add(data_ptr, key);
 79 | 
 80 | 		return key;
 81 | 	}
 82 | 
 83 | 	/**
 84 | 	 * @brief Fetches the data associated with a key.
 85 | 	 *
 86 | 	 * @param key The key of the data to fetch.
 87 | 	 *
 88 | 	 * @return shared_ptr to the data on success, nullptr if the data isn't
 89 | 	 *         in the cache.
 90 | 	 *
 91 | 	 * Example usage:
 92 | 	 * std::shared_ptr<Data> p = cache.get(12345);
 93 | 	 * if (p) {
 94 | 	 *     // Do things with the data here
 95 | 	 * }
 96 | 	 */
 97 | 	std::shared_ptr<T> get(K key) {
 98 | 		std::unique_lock<SpinLock> lock(slock);
 99 | 
100 | 		// Check if the key exists
101 | 		const auto exists = static_cast<bool>(map.count(key));
102 | 		if (!exists)
103 | 			return nullptr;
104 | 
105 | 		touch(key);
106 | 
107 | 		return map[key]->data_ptr;
108 | 	}
109 | 
110 | 	/**
111 | 	 * @brief Erases all items from the cache.get
112 | 	 */
113 | 	void clear() {
114 | 		std::unique_lock<SpinLock> lock(slock);
115 | 
116 | 		map.clear();
117 | 		elements.clear();
118 | 		byte_count = 0;
119 | 	}
120 | 
121 | private:
122 | 	/*
123 | 	 * Adds an item to the cache with the given key.
124 | 	 */
125 | 	void add(std::shared_ptr<T>& data_ptr, K key) {
126 | 		byte_count += size_in_bytes(*data_ptr) + per_item_size_cost;
127 | 
128 | 		// Remove last element(s) if necessary to make room
129 | 		while (byte_count >= max_bytes) {
130 | 			if (!erase_last())
131 | 				break;
132 | 		}
133 | 
134 | 		// Add the new data
135 | 		auto it = elements.begin();
136 | 		it = elements.insert(it, LRUPair {key, data_ptr});
137 | 
138 | 		// Log it in the map
139 | 		map[key] = it;
140 | 	}
141 | 
142 | 	/*
143 | 	 * Erases the given key and associated data from the cache.
144 | 	 */
145 | 	void erase(K key) {
146 | 		byte_count -= size_in_bytes(*(map[key]->data_ptr)) + per_item_size_cost;
147 | 		elements.erase(map[key]);
148 | 		map.erase(key);
149 | 	}
150 | 
151 | 	/*
152 | 	 * Erases the last inactive element in the cache.
153 | 	 */
154 | 	bool erase_last() {
155 | 		for (auto rit = elements.rbegin(); rit != elements.rend(); ++rit) {
156 | 			erase(rit->key);
157 | 			return true;
158 | 		}
159 | 		return false;
160 | 	}
161 | 
162 | 	/*
163 | 	 * Moves a given item to the front of the cache.
164 | 	 */
165 | 	void touch(K key) {
166 | 		elements.splice(elements.begin(), elements, map[key]);
167 | 	}
168 | };
169 | 
170 | #endif
171 | 


--------------------------------------------------------------------------------
/utils/mis.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef MIS_HPP
 2 | #define MIS_HPP
 3 | 
 4 | // Utility functions for multiple importance sampling
 5 | 
 6 | template <typename T>
 7 | T balance_heuristic(T a, T b) {
 8 | 	return a / (a + b);
 9 | }
10 | 
11 | template <typename T>
12 | T power_heuristic(T a, T b) {
13 | 	const auto a2 = a * a;
14 | 	const auto b2 = b * b;
15 | 	return a2 / (a2 + b2);
16 | }
17 | 
18 | #endif // MIS_HPP


--------------------------------------------------------------------------------
/utils/morton.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Morton code (a.k.a. z-curve) transforms.
 3 |  */
 4 | 
 5 | #ifndef MORTON_HPP
 6 | #define MORTON_HPP
 7 | 
 8 | namespace Morton {
 9 | 
10 | /**
11 |  * @brief Encodes x and y coordinates into a morton code index.
12 |  *
13 |  * In practice x and y need to be within the range of an unsigned 16 bit
14 |  * integer, since the output is a single 32 bit index.
15 |  */
16 | static inline uint32_t xy2d(uint32_t x, uint32_t y) {
17 | 	x &= 0x0000ffff;
18 | 	y &= 0x0000ffff;
19 | 	x |= (x << 8);
20 | 	y |= (y << 8);
21 | 	x &= 0x00ff00ff;
22 | 	y &= 0x00ff00ff;
23 | 	x |= (x << 4);
24 | 	y |= (y << 4);
25 | 	x &= 0x0f0f0f0f;
26 | 	y &= 0x0f0f0f0f;
27 | 	x |= (x << 2);
28 | 	y |= (y << 2);
29 | 	x &= 0x33333333;
30 | 	y &= 0x33333333;
31 | 	x |= (x << 1);
32 | 	y |= (y << 1);
33 | 	x &= 0x55555555;
34 | 	y &= 0x55555555;
35 | 	return x | (y << 1);
36 | }
37 | 
38 | /**
39 |  * @brief Decodes a morton code index into x and y coordinates.
40 |  */
41 | static inline void d2xy(uint32_t d, uint32_t *x, uint32_t *y) {
42 | 	*x = d;
43 | 	*y = (*x >> 1);
44 | 	*x &= 0x55555555;
45 | 	*y &= 0x55555555;
46 | 	*x |= (*x >> 1);
47 | 	*y |= (*y >> 1);
48 | 	*x &= 0x33333333;
49 | 	*y &= 0x33333333;
50 | 	*x |= (*x >> 2);
51 | 	*y |= (*y >> 2);
52 | 	*x &= 0x0f0f0f0f;
53 | 	*y &= 0x0f0f0f0f;
54 | 	*x |= (*x >> 4);
55 | 	*y |= (*y >> 4);
56 | 	*x &= 0x00ff00ff;
57 | 	*y &= 0x00ff00ff;
58 | 	*x |= (*x >> 8);
59 | 	*y |= (*y >> 8);
60 | 	*x &= 0x0000ffff;
61 | 	*y &= 0x0000ffff;
62 | }
63 | 
64 | }
65 | 
66 | #endif // MORTON_HPP
67 | 


--------------------------------------------------------------------------------
/utils/numtype.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Defines basic numerical types for use throughout psychopath.
 3 |  */
 4 | 
 5 | #ifndef NUMTYPE_H
 6 | #define NUMTYPE_H
 7 | 
 8 | #include <stdlib.h>
 9 | #include <stdint.h>
10 | 
11 | typedef unsigned int		uint;
12 | 
13 | 
14 | #endif // NUMTYPE_H
15 | 


--------------------------------------------------------------------------------
/utils/range.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef RANGE_HPP
 2 | #define RANGE_HPP
 3 | 
 4 | #include <iterator>
 5 | #include <utility>
 6 | 
 7 | /**
 8 |  * A Range class, based on the proposal for std::range at:
 9 |  * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2012/n3350.html
10 |  * But with a few things omitted.
11 |  */
12 | template<typename Iterator>
13 | class Range {
14 | private:
15 | 	Iterator iter_begin;
16 | 	Iterator iter_end;
17 | 
18 | public:
19 | 	// types
20 | 	typedef typename std::iterator_traits<Iterator>::iterator_category iterator_category;
21 | 	typedef typename std::iterator_traits<Iterator>::value_type value_type;
22 | 	typedef typename std::iterator_traits<Iterator>::difference_type difference_type;
23 | 	typedef typename std::iterator_traits<Iterator>::reference reference;
24 | 	typedef typename std::iterator_traits<Iterator>::pointer pointer;
25 | 
26 | 	// constructors
27 | 	Range() {}
28 | 	constexpr Range(Iterator begin, Iterator end): iter_begin {begin}, iter_end {end} {}
29 | 	constexpr Range(std::pair<Iterator, Iterator> iter_pair): iter_begin {iter_pair.first}, iter_end {iter_pair.second} {}
30 | 
31 | 	// iterator access
32 | 	constexpr Iterator begin() const {
33 | 		return iter_begin;
34 | 	}
35 | 	constexpr Iterator end() const {
36 | 		return iter_end;
37 | 	}
38 | 	constexpr const Iterator cbegin() const {
39 | 		return iter_begin;
40 | 	}
41 | 	constexpr const Iterator cend() const {
42 | 		return iter_end;
43 | 	}
44 | 
45 | 	// element access
46 | 	constexpr reference front() const {
47 | 		return *iter_begin;
48 | 	}
49 | 	constexpr reference back() const {
50 | 		return *iter_end;
51 | 	}
52 | 	constexpr reference operator[](difference_type index) const {
53 | 		return *(iter_begin + index);
54 | 	}
55 | 
56 | 	// size
57 | 	constexpr bool empty() const {
58 | 		return iter_begin == iter_end;
59 | 	}
60 | 	constexpr difference_type size() const {
61 | 		return std::distance(iter_begin, iter_end);
62 | 	}
63 | 
64 | 	// creating derived ranges
65 | 	//pair< range, range > split(difference_type index) const;
66 | 	//Range slice(difference_type start, difference_type stop) const;
67 | 	//Range slice(difference_type start) const;
68 | };
69 | 
70 | // deducing constructor wrappers
71 | template<typename Iterator>
72 | constexpr Range<Iterator> make_range(Iterator begin, Iterator end) {
73 | 	return Range<Iterator>(begin, end);
74 | }
75 | 
76 | #endif // RANGE_HPP
77 | 


--------------------------------------------------------------------------------
/utils/ring_buffer.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef RING_BUFFER
  2 | #define RING_BUFFER
  3 | 
  4 | #include <cstdlib>
  5 | #include <vector>
  6 | 
  7 | /**
  8 |  * @brief A ring buffer, or circular buffer.
  9 |  *
 10 |  * Acts as a limited-size FIFO queue, where overflow simply results in
 11 |  * the queue overwriting itself from the back.
 12 |  */
 13 | template <class T>
 14 | class RingBuffer {
 15 | private:
 16 | 	std::vector<T> buffer {T{}}; // Default 1 item large
 17 | 
 18 | 	size_t next {0};  // Index of the next item to be consumed
 19 | 	size_t count {0};  // Number of unconsumed items in the buffer
 20 | 
 21 | public:
 22 | 	/**
 23 | 	 * @brief Default constructor.
 24 | 	 */
 25 | 	RingBuffer() {}
 26 | 
 27 | 	/**
 28 | 	 * @brief Constructor.
 29 | 	 *
 30 | 	 * @param size Size of the buffer in number-of-items.
 31 | 	 */
 32 | 	RingBuffer(size_t buffer_size): buffer(buffer_size) {}
 33 | 
 34 | 	/**
 35 | 	 * @brief Resizes the buffer.
 36 | 	 *
 37 | 	 * @warning Significant data loss can occur if this is done on
 38 | 	 *          a non-empty buffer.  Check is_empty() before calling
 39 | 	 *          this.
 40 | 	 *
 41 | 	 * @param size New size of the buffer in number-of-items
 42 | 	 *
 43 | 	 * TODO: minimize data loss when running this.
 44 | 	 */
 45 | 	void resize(size_t buffer_size) {
 46 | 		next = 0;
 47 | 		count = 0;
 48 | 		buffer.resize(buffer_size);
 49 | 	}
 50 | 
 51 | 	/**
 52 | 	 * @brief Returns the size of the buffer.
 53 | 	 */
 54 | 	size_t size() {
 55 | 		return buffer.size();
 56 | 	}
 57 | 
 58 | 	/**
 59 | 	 * @brief Pushes an item onto the front of the buffer.
 60 | 	 *
 61 | 	 * If the buffer is full, this will start over-writing
 62 | 	 * the tail of the buffer.  Make sure to check is_full()
 63 | 	 * if you don't want this behavior.
 64 | 	 *
 65 | 	 * @param item The item to push.
 66 | 	 */
 67 | 	void push(const T &item) {
 68 | 		buffer[(next+count)%buffer.size()] = item;
 69 | 		count++;
 70 | 
 71 | 		// If we overwrote a non-empty item in the buffer
 72 | 		if (count > buffer.size()) {
 73 | 			next = (next + 1) % buffer.size();
 74 | 			count = buffer.size();
 75 | 		}
 76 | 	}
 77 | 
 78 | 	/**
 79 | 	 * @brief Pops an item off the back of the buffer.
 80 | 	 *
 81 | 	 * If the buffer is empty, this will return garbage.
 82 | 	 * Make sure to check is_empty().
 83 | 	 *
 84 | 	 * @return The popped item.
 85 | 	 */
 86 | 	T pop() {
 87 | 		const size_t i = next;
 88 | 		if (count > 0) {
 89 | 			next = (next + 1) % buffer.size();
 90 | 			count--;
 91 | 		}
 92 | 
 93 | 		return buffer[i];
 94 | 	}
 95 | 
 96 | 	/**
 97 | 	 * @brief Returns whether the buffer is full or not.
 98 | 	 */
 99 | 	bool is_full() {
100 | 		return count == buffer.size();
101 | 	}
102 | 
103 | 	/**
104 | 	 * @brief Returns whether the buffer is empty or not.
105 | 	 */
106 | 	bool is_empty() {
107 | 		return count == 0;
108 | 	}
109 | };
110 | 
111 | #endif // RING_BUFFER


--------------------------------------------------------------------------------
/utils/ring_buffer_atomic.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef RING_BUFFER
  2 | #define RING_BUFFER
  3 | 
  4 | #include <cstdlib>
  5 | #include <vector>
  6 | #include <atomic>
  7 | 
  8 | 
  9 | template <class T>
 10 | class RingBufferAtomicItem {
 11 | 	T item;
 12 | 	std::atomic_flag taken;
 13 | };
 14 | 
 15 | 
 16 | /**
 17 |  * WIP
 18 |  * @brief A ring buffer, or circular buffer, that uses atomics to be
 19 |  * thread-safe for consumers.  Only single-producer is supported at
 20 |  * the moment.
 21 |  *
 22 |  * Acts as a limited-size FIFO queue, where overflow simply results in
 23 |  * the queue overwriting itself from the back.
 24 |  */
 25 | template <class T>
 26 | class RingBufferAtomic {
 27 | private:
 28 | 	std::vector<RingBufferAtomicItem<T>> buffer;
 29 | 
 30 | 	std::atomic<size_t> next;  // Index of the next item to be consumed
 31 | 	std::atomic<size_t> count;  // Number of unconsumed items in the buffer
 32 | 
 33 | public:
 34 | 	/**
 35 | 	 * @brief Default constructor, buffer size of 1.
 36 | 	 */
 37 | 	RingBuffer(): buffer(1), next {0}, count {0} {
 38 | 		for (auto& item: buffer)
 39 | 			item.taken.test_and_set(std::memory_order_acquire);
 40 | 	}
 41 | 
 42 | 	/**
 43 | 	 * @brief Constructor.
 44 | 	 *
 45 | 	 * @param size Size of the buffer in number-of-items.
 46 | 	 */
 47 | 	RingBuffer(size_t buffer_size): buffer(buffer_size), next {0}, count {0} {
 48 | 		for (auto& item: buffer)
 49 | 			item.taken.test_and_set(std::memory_order_acquire);
 50 | 	}
 51 | 
 52 | 	/**
 53 | 	 * @brief Resizes the buffer.
 54 | 	 *
 55 | 	 * @warning Significant data loss and/or loss of proper
 56 | 	 * syncronization between threads can happen if this is called
 57 | 	 * at the wrong time.  Only call this before any reading or writing
 58 | 	 * is done.
 59 | 	 *
 60 | 	 * @param size New size of the buffer in number-of-items
 61 | 	 */
 62 | 	void resize(size_t buffer_size) {
 63 | 		next = 0;
 64 | 		count = 0;
 65 | 		buffer.resize(buffer_size);
 66 | 		for (auto& item: buffer)
 67 | 			item.taken.test_and_set(std::memory_order_acquire);
 68 | 	}
 69 | 
 70 | 	/**
 71 | 	 * @brief Returns the size of the buffer.
 72 | 	 */
 73 | 	size_t size() const {
 74 | 		return buffer.size();
 75 | 	}
 76 | 
 77 | 	/**
 78 | 	 * @brief Pushes an item onto the front of the buffer.
 79 | 	 *
 80 | 	 * If the buffer is full, this will start over-writing
 81 | 	 * the tail of the buffer.  Make sure to check is_full()
 82 | 	 * if you don't want this behavior.
 83 | 	 *
 84 | 	 * @param item The item to push.
 85 | 	 */
 86 | 	void push(const T &item) {
 87 | 		buffer[(next+count)%buffer.size()].item = item;
 88 | 		count++;
 89 | 
 90 | 		// If we overwrote a non-empty item in the buffer
 91 | 		if (count > buffer.size()) {
 92 | 			next = (next + 1) % buffer.size();
 93 | 			count = buffer.size();
 94 | 		}
 95 | 	}
 96 | 
 97 | 	/**
 98 | 	 * @brief Pops an item off the back of the buffer.
 99 | 	 *
100 | 	 * If the buffer is empty, this will return garbage.
101 | 	 * Make sure to check is_empty().
102 | 	 *
103 | 	 * @return The popped item.
104 | 	 */
105 | 	T pop() {
106 | 		const size_t i = next;
107 | 		if (count > 0) {
108 | 			next = (next + 1) % buffer.size();
109 | 			count--;
110 | 		}
111 | 
112 | 		return buffer[i];
113 | 	}
114 | 
115 | 	/**
116 | 	 * @brief Returns whether the buffer is full or not.
117 | 	 */
118 | 	bool is_full() {
119 | 		return count == buffer.size();
120 | 	}
121 | 
122 | 	/**
123 | 	 * @brief Returns whether the buffer is empty or not.
124 | 	 */
125 | 	bool is_empty() {
126 | 		return count == 0;
127 | 	}
128 | };
129 | 
130 | #endif // RING_BUFFER


--------------------------------------------------------------------------------
/utils/ring_buffer_concurrent.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef RING_BUFFER_CONCURRENT
  2 | #define RING_BUFFER_CONCURRENT
  3 | 
  4 | #include <cstdlib>
  5 | #include <atomic>
  6 | #include <mutex>
  7 | #include <condition_variable>
  8 | 
  9 | #include "ring_buffer.hpp"
 10 | 
 11 | /**
 12 |  * @brief A thread-safe ring buffer, or circular buffer.
 13 |  *
 14 |  * Acts as a limited-size FIFO queue.
 15 |  */
 16 | template <class T>
 17 | class RingBufferConcurrent {
 18 | private:
 19 | 	RingBuffer<T> buffer;
 20 | 
 21 | 	std::mutex mut;
 22 | 	std::condition_variable full;
 23 | 	std::condition_variable empty;
 24 | 
 25 | 	bool stop;
 26 | 	std::atomic<size_t> blocker_count;  // Counters for blocking pushers and poppers.
 27 | 
 28 | public:
 29 | 	/**
 30 | 	 * @brief Default constructor, buffer size of 1.
 31 | 	 */
 32 | 	RingBufferConcurrent(): buffer(1), stop {false}, blocker_count {0} {}
 33 | 
 34 | 	/**
 35 | 	 * @brief Constructor.
 36 | 	 *
 37 | 	 * @param size Size of the buffer in number-of-items.
 38 | 	 */
 39 | 	RingBufferConcurrent(size_t buffer_size): buffer(buffer_size), stop {false}, blocker_count {0} {}
 40 | 
 41 | 	/**
 42 | 	 * @brief Resizes the buffer.
 43 | 	 *
 44 | 	 * @warning Significant data loss can occur if this is done on
 45 | 	 *          a non-empty buffer.  Check is_empty() before calling
 46 | 	 *          this.
 47 | 	 *
 48 | 	 * @param size New size of the buffer in number-of-items
 49 | 	 *
 50 | 	 * TODO: minimize data loss when running this.
 51 | 	 */
 52 | 	void resize(size_t buffer_size) {
 53 | 		std::unique_lock<std::mutex> lock(mut);
 54 | 		buffer.resize(buffer_size);
 55 | 	}
 56 | 
 57 | 	/**
 58 | 	 * @brief Returns the size of the buffer.
 59 | 	 */
 60 | 	size_t size() {
 61 | 		return buffer.size();
 62 | 	}
 63 | 
 64 | 	/**
 65 | 	 * @brief Forces current blocking calls to end and return false.
 66 | 	 *
 67 | 	 * Any currently waiting call to push_blocking() or pop_blocking()
 68 | 	 * will be stopped and will return false.
 69 | 	 */
 70 | 	void stop_blocking() {
 71 | 		mut.lock();
 72 | 		stop = true;
 73 | 		full.notify_all();
 74 | 		empty.notify_all();
 75 | 		mut.unlock();
 76 | 
 77 | 		// Wait for all blockers to stop
 78 | 		while (blocker_count > 0) {}
 79 | 
 80 | 		mut.lock();
 81 | 		stop = false;
 82 | 		mut.unlock();
 83 | 	}
 84 | 
 85 | 	/**
 86 | 	 * @brief Stops all blocking calls and prevents further blocking
 87 | 	 * calls.
 88 | 	 */
 89 | 	void disallow_blocking() {
 90 | 		mut.lock();
 91 | 		stop = true;
 92 | 		full.notify_all();
 93 | 		empty.notify_all();
 94 | 		mut.unlock();
 95 | 	}
 96 | 
 97 | 	/**
 98 | 	 * @brief Pushes an item onto the front of the buffer.
 99 | 	 *
100 | 	 * @param [in] item The item to push.
101 | 	 *
102 | 	 * @return Whether the item was successfully pushed or not.
103 | 	 */
104 | 	bool push(const T &item) {
105 | 		std::unique_lock<std::mutex> lock(mut);
106 | 		if (buffer.is_full())
107 | 			return false;
108 | 
109 | 		// Push item
110 | 		buffer.push(item);
111 | 
112 | 		// Notify waiting poppers that there's an item in the queue
113 | 		empty.notify_all();
114 | 
115 | 		return true;
116 | 	}
117 | 
118 | 	/**
119 | 	 * @brief Pushes an item onto the front of the buffer.
120 | 	 *
121 | 	 * If the buffer is full, this will block until there is space
122 | 	 * to successfully push.
123 | 	 *
124 | 	 * @param [in] item The item to push.
125 | 	 *
126 | 	 * @return Whether the item was successfully pushed or not.
127 | 	 */
128 | 	bool push_blocking(const T &item) {
129 | 		std::unique_lock<std::mutex> lock(mut);
130 | 		blocker_count++;
131 | 
132 | 		// Wait for open space in the buffer
133 | 		while (buffer.is_full()) {
134 | 			if (stop) {
135 | 				blocker_count--;
136 | 				return false;
137 | 			} else {
138 | 				full.wait(lock);
139 | 			}
140 | 		}
141 | 
142 | 		// Push item
143 | 		buffer.push(item);
144 | 
145 | 		// Notify waiting poppers that there's an item in the queue
146 | 		empty.notify_all();
147 | 
148 | 		blocker_count--;
149 | 		return true;
150 | 	}
151 | 
152 | 	/**
153 | 	 * @brief Pops an item off the back of the buffer.
154 | 	 *
155 | 	 * @param [out] item Popped item is copied to this memory location.
156 | 	 * @return Whether an item was successfully popped or not.
157 | 	 */
158 | 	bool pop(T* item) {
159 | 		std::unique_lock<std::mutex> lock(mut);
160 | 		if (buffer.is_empty())
161 | 			return false;
162 | 
163 | 		// Pop item
164 | 		*item = buffer.pop();
165 | 
166 | 		// Notify waiting pushers that there's space free
167 | 		full.notify_all();
168 | 
169 | 		return true;
170 | 	}
171 | 
172 | 	/**
173 | 	 * @brief Pops an item off the back of the buffer.
174 | 	 *
175 | 	 * If the buffer is empty, this will block until there is an item
176 | 	 * to pop.
177 | 	 *
178 | 	 * @param [out] item Popped item is copied to this memory location.
179 | 	 * @return Whether an item was popped or not.
180 | 	 */
181 | 	bool pop_blocking(T* item) {
182 | 		std::unique_lock<std::mutex> lock(mut);
183 | 		blocker_count++;
184 | 
185 | 		// Wait for open space in the buffer
186 | 		while (buffer.is_empty()) {
187 | 			if (stop) {
188 | 				blocker_count--;
189 | 				return false;
190 | 			} else {
191 | 				empty.wait(lock);
192 | 			}
193 | 		}
194 | 
195 | 		// Pop item
196 | 		*item = buffer.pop();
197 | 
198 | 		// Notify waiting pushers that there's space free
199 | 		full.notify_all();
200 | 
201 | 		blocker_count--;
202 | 		return true;
203 | 	}
204 | };
205 | 
206 | #endif // RING_BUFFER_CONCURRENT


--------------------------------------------------------------------------------
/utils/ring_buffer_concurrent_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test.hpp"
  2 | #include "ring_buffer_concurrent.hpp"
  3 | 
  4 | 
  5 | 
  6 | TEST_CASE("ring_buffer_concurrent") {
  7 | 	/* constructor tests */
  8 | 	SECTION("constructor_1") {
  9 | 		RingBufferConcurrent<int> rb;
 10 | 
 11 | 		REQUIRE(rb.size() == 1);
 12 | 	}
 13 | 
 14 | 	SECTION("constructor_2") {
 15 | 		RingBufferConcurrent<int> rb(100);
 16 | 
 17 | 		REQUIRE(rb.size() == 100);
 18 | 	}
 19 | 
 20 | 
 21 | 
 22 | 
 23 | 	/* resize() tests */
 24 | 	SECTION("resize_1") {
 25 | 		RingBufferConcurrent<int> rb;
 26 | 		rb.resize(100);
 27 | 
 28 | 		REQUIRE(rb.size() == 100);
 29 | 	}
 30 | 
 31 | 	SECTION("resize_2") {
 32 | 		RingBufferConcurrent<int> rb(50);
 33 | 		rb.resize(100);
 34 | 
 35 | 		REQUIRE(rb.size() == 100);
 36 | 	}
 37 | 
 38 | 
 39 | 
 40 | 
 41 | 	/* push()/pop() tests */
 42 | 	SECTION("push_pop_1") {
 43 | 		// Partially fill buffer, then empty it
 44 | 		RingBufferConcurrent<int> rb(100);
 45 | 		bool test = true;
 46 | 		for (int i = 0; i < 50; i++)
 47 | 			rb.push(i);
 48 | 		int result {0};
 49 | 		for (int i = 0; i < 50; i++) {
 50 | 			rb.pop(&result);
 51 | 			test = test && (result == i);
 52 | 		}
 53 | 
 54 | 		REQUIRE(test);
 55 | 	}
 56 | 
 57 | 	SECTION("push_pop_2") {
 58 | 		// Fully fill buffer, then empty it
 59 | 		RingBufferConcurrent<int> rb(100);
 60 | 		bool test = true;
 61 | 		for (int i = 0; i < 100; i++)
 62 | 			rb.push(i);
 63 | 		int result {0};
 64 | 		for (int i = 0; i < 100; i++) {
 65 | 			rb.pop(&result);
 66 | 			test = test && (result == i);
 67 | 		}
 68 | 
 69 | 		REQUIRE(test);
 70 | 	}
 71 | 
 72 | 	SECTION("push_pop_3") {
 73 | 		// Push and pop repeatedly
 74 | 		RingBufferConcurrent<int> rb(100);
 75 | 		bool test = true;
 76 | 		int result {0};
 77 | 		for (int i = 0; i < 350; i++) {
 78 | 			rb.push(i);
 79 | 			rb.pop(&result);
 80 | 			test = test && (result == i);
 81 | 		}
 82 | 
 83 | 		REQUIRE(test);
 84 | 	}
 85 | 
 86 | 	SECTION("push_pop_4") {
 87 | 		// Overflow buffer
 88 | 		RingBufferConcurrent<int> rb(100);
 89 | 		bool test = true;
 90 | 		for (int i = 0; i < 350; i++)
 91 | 			test = test && rb.push(i); // Should return false when overflowing
 92 | 
 93 | 		REQUIRE(!test);
 94 | 	}
 95 | 
 96 | 	SECTION("push_pop_5") {
 97 | 		// Overempty buffer
 98 | 		RingBufferConcurrent<int> rb(100);
 99 | 		bool test = true;
100 | 		for (int i = 0; i < 50; i++)
101 | 			rb.push(i);
102 | 		int result {0};
103 | 		for (int i = 0; i < 60; i++)
104 | 			test = test && rb.pop(&result); // Should return false when empty
105 | 
106 | 		REQUIRE(!test);
107 | 	}
108 | }
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/utils/ring_buffer_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test.hpp"
  2 | #include "ring_buffer.hpp"
  3 | 
  4 | 
  5 | TEST_CASE("ring_buffer") {
  6 | 	/* constructor tests */
  7 | 	SECTION("constructor_1") {
  8 | 		RingBuffer<int> rb;
  9 | 
 10 | 		REQUIRE(rb.size() == 1);
 11 | 	}
 12 | 
 13 | 	SECTION("constructor_2") {
 14 | 		RingBuffer<int> rb(100);
 15 | 
 16 | 		REQUIRE(rb.size() == 100);
 17 | 	}
 18 | 
 19 | 
 20 | 
 21 | 
 22 | 	/* resize() tests */
 23 | 	SECTION("resize_1") {
 24 | 		RingBuffer<int> rb;
 25 | 		rb.resize(100);
 26 | 
 27 | 		REQUIRE(rb.size() == 100);
 28 | 	}
 29 | 
 30 | 	SECTION("resize_2") {
 31 | 		RingBuffer<int> rb(50);
 32 | 		rb.resize(100);
 33 | 
 34 | 		REQUIRE(rb.size() == 100);
 35 | 	}
 36 | 
 37 | 
 38 | 
 39 | 
 40 | 	/* is_empty() tests */
 41 | 	SECTION("is_empty_1") {
 42 | 		// No items added
 43 | 		RingBuffer<int> rb(100);
 44 | 
 45 | 		REQUIRE(rb.is_empty());
 46 | 	}
 47 | 
 48 | 	SECTION("is_empty_2") {
 49 | 		// A few items added
 50 | 		RingBuffer<int> rb(100);
 51 | 		for (int i = 0; i < 5; i++)
 52 | 			rb.push(i);
 53 | 
 54 | 		REQUIRE(!rb.is_empty());
 55 | 	}
 56 | 
 57 | 	SECTION("is_empty_3") {
 58 | 		// Max out buffer with items
 59 | 		RingBuffer<int> rb(100);
 60 | 		for (int i = 0; i < 100; i++)
 61 | 			rb.push(i);
 62 | 
 63 | 		REQUIRE(!rb.is_empty());
 64 | 	}
 65 | 
 66 | 	SECTION("is_empty_4") {
 67 | 		// Overflow buffer with items
 68 | 		RingBuffer<int> rb(100);
 69 | 		for (int i = 0; i < 350; i++)
 70 | 			rb.push(i);
 71 | 
 72 | 		REQUIRE(!rb.is_empty());
 73 | 	}
 74 | 
 75 | 	SECTION("is_empty_5") {
 76 | 		// Items added and all removed
 77 | 		RingBuffer<int> rb(100);
 78 | 		for (int i = 0; i < 50; i++)
 79 | 			rb.push(i);
 80 | 		for (int i = 0; i < 50; i++)
 81 | 			rb.pop();
 82 | 
 83 | 		REQUIRE(rb.is_empty());
 84 | 	}
 85 | 
 86 | 	SECTION("is_empty_6") {
 87 | 		// Items added and some removed
 88 | 		RingBuffer<int> rb(100);
 89 | 		for (int i = 0; i < 50; i++)
 90 | 			rb.push(i);
 91 | 		for (int i = 0; i < 25; i++)
 92 | 			rb.pop();
 93 | 
 94 | 		REQUIRE(!rb.is_empty());
 95 | 	}
 96 | 
 97 | 
 98 | 
 99 | 
100 | 	/* is_full() tests */
101 | 	SECTION("is_full_1") {
102 | 		// No items added
103 | 		RingBuffer<int> rb(100);
104 | 
105 | 		REQUIRE(!rb.is_full());
106 | 	}
107 | 
108 | 	SECTION("is_full_2") {
109 | 		// A few items added
110 | 		RingBuffer<int> rb(100);
111 | 		for (int i = 0; i < 5; i++)
112 | 			rb.push(i);
113 | 
114 | 		REQUIRE(!rb.is_full());
115 | 	}
116 | 
117 | 	SECTION("is_full_3") {
118 | 		// Max out buffer with items
119 | 		RingBuffer<int> rb(100);
120 | 		for (int i = 0; i < 100; i++)
121 | 			rb.push(i);
122 | 
123 | 		REQUIRE(rb.is_full());
124 | 	}
125 | 
126 | 	SECTION("is_full_4") {
127 | 		// Overflow buffer with items
128 | 		RingBuffer<int> rb(100);
129 | 		for (int i = 0; i < 350; i++)
130 | 			rb.push(i);
131 | 
132 | 		REQUIRE(rb.is_full());
133 | 	}
134 | 
135 | 	SECTION("is_full_5") {
136 | 		// Items added and all removed
137 | 		RingBuffer<int> rb(100);
138 | 		for (int i = 0; i < 50; i++)
139 | 			rb.push(i);
140 | 		for (int i = 0; i < 50; i++)
141 | 			rb.pop();
142 | 
143 | 		REQUIRE(!rb.is_full());
144 | 	}
145 | 
146 | 	SECTION("is_full_6") {
147 | 		// Items added and some removed
148 | 		RingBuffer<int> rb(100);
149 | 		for (int i = 0; i < 50; i++)
150 | 			rb.push(i);
151 | 		for (int i = 0; i < 25; i++)
152 | 			rb.pop();
153 | 
154 | 		REQUIRE(!rb.is_full());
155 | 	}
156 | 
157 | 
158 | 
159 | 
160 | 	/* push()/pop() tests */
161 | 	SECTION("push_pop_1") {
162 | 		// Partially fill buffer, then empty it
163 | 		RingBuffer<int> rb(100);
164 | 		bool test = true;
165 | 		for (int i = 0; i < 50; i++)
166 | 			rb.push(i);
167 | 		for (int i = 0; i < 50; i++)
168 | 			test = test && (rb.pop() == i);
169 | 
170 | 		REQUIRE(test);
171 | 	}
172 | 
173 | 	SECTION("push_pop_2") {
174 | 		// Fully fill buffer, then empty it
175 | 		RingBuffer<int> rb(100);
176 | 		bool test = true;
177 | 		for (int i = 0; i < 100; i++)
178 | 			rb.push(i);
179 | 		for (int i = 0; i < 100; i++)
180 | 			test = test && (rb.pop() == i);
181 | 
182 | 		REQUIRE(test);
183 | 	}
184 | 
185 | 	SECTION("push_pop_3") {
186 | 		// Overflow buffer, then empty it
187 | 		RingBuffer<int> rb(100);
188 | 		bool test = true;
189 | 		for (int i = 0; i < 350; i++)
190 | 			rb.push(i);
191 | 		for (int i = 250; i < 350; i++)
192 | 			test = test && (rb.pop() == i);
193 | 
194 | 		REQUIRE(test);
195 | 	}
196 | 
197 | 	SECTION("push_pop_4") {
198 | 		// Push and pop repeatedly
199 | 		RingBuffer<int> rb(100);
200 | 		bool test = true;
201 | 		for (int i = 0; i < 350; i++) {
202 | 			rb.push(i);
203 | 			test = test && (rb.pop() == i);
204 | 		}
205 | 
206 | 		REQUIRE(test);
207 | 	}
208 | 
209 | 	SECTION("push_pop_5") {
210 | 		// Push and pop repeatedly in chunks
211 | 		RingBuffer<int> rb(100);
212 | 		bool test = true;
213 | 		for (int i = 0; i < 350; i++) {
214 | 			rb.push(i);
215 | 			rb.push(i+1);
216 | 			rb.push(i+2);
217 | 			rb.push(i+3);
218 | 			rb.push(i+4);
219 | 			rb.push(i+5);
220 | 			test = test && (rb.pop() == i);
221 | 			test = test && (rb.pop() == i+1);
222 | 			test = test && (rb.pop() == i+2);
223 | 			test = test && (rb.pop() == i+3);
224 | 			test = test && (rb.pop() == i+4);
225 | 			test = test && (rb.pop() == i+5);
226 | 		}
227 | 
228 | 		REQUIRE(test);
229 | 	}
230 | }
231 | 
232 | 
233 | 
234 | 


--------------------------------------------------------------------------------
/utils/rng.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef RNG_HPP
  2 | #define RNG_HPP
  3 | 
  4 | #include <ctime>
  5 | #include <cstdint>
  6 | 
  7 | #include <random>
  8 | #include <chrono>
  9 | #include <mutex>
 10 | 
 11 | 
 12 | /**
 13 |  * @brief A psuedo-random number generator.
 14 |  *
 15 |  * Based on the JKISS generator from the paper
 16 |  * "Good Practice in (Pseudo) Random Number
 17 |  *  Generation for Bioinformatics Applications"
 18 |  * by David Jones.
 19 |  *
 20 |  * This generator is surprisingly robust for how simple it is, passing all of
 21 |  * the Dieharder tests as well as the complete Big Crush test set in TestU01.
 22 |  * This robustness is comparable to the Mersenne Twister, excepting for the
 23 |  * smaller period (~2^127 compared to MT's enormous ~2^19937 period).
 24 |  *
 25 |  * This PRNG should be more than sufficient for most purposes.
 26 |  */
 27 | class RNG {
 28 | private:
 29 | 	uint32_t x, y, z, c;
 30 | 
 31 | 	/**
 32 | 	 * @brief Core algorithm of the RNG.
 33 | 	 *
 34 | 	 * Progresses an RNG with state variables x_, y_, z_, c_.
 35 | 	 *
 36 | 	 * @return The next unsigned 32-bit integer in the random sequence.
 37 | 	 */
 38 | 	uint32_t n(uint32_t &x_, uint32_t &y_, uint32_t &z_, uint32_t &c_) {
 39 | 		uint64_t t;
 40 | 
 41 | 		x_ = 314527869 * x_ + 1234567;
 42 | 
 43 | 		y_ ^= y_ << 5;
 44 | 		y_ ^= y_ >> 7;
 45 | 		y_ ^= y_ << 22;
 46 | 
 47 | 		t = 4294584393ULL * z_ + c_;
 48 | 		c_ = t >> 32;
 49 | 		z_ = t;
 50 | 
 51 | 		return x_ + y_ + z_;
 52 | 	}
 53 | 
 54 | public:
 55 | 	/**
 56 | 	 * @brief Constructor.
 57 | 	 *
 58 | 	 * Initializes the RNG with a thread-safe unique random seed.
 59 | 	 * Code that uses this constructor can depend on all RNG's from it
 60 | 	 * being independant with a high level of confidence.
 61 | 	 */
 62 | 	RNG() {
 63 | 		// The seeder is seeded with a combination of random_device,
 64 | 		// large primes, and the current time.  The idea is that if
 65 | 		// random_device doesn't function well, the time and the
 66 | 		// primes function as an okay fall-back.  But ideally
 67 | 		// random_device functions well.
 68 | 		std::random_device rd;
 69 | 		static uint32_t seeder_x = rd() + 2123403127 + std::chrono::high_resolution_clock::now().time_since_epoch().count();
 70 | 		static uint32_t seeder_y = rd() + 1987607653 + std::chrono::high_resolution_clock::now().time_since_epoch().count();
 71 | 		static uint32_t seeder_z = rd() + 3569508323 + std::chrono::high_resolution_clock::now().time_since_epoch().count();
 72 | 		static uint32_t seeder_c = rd() + 5206151 + std::chrono::high_resolution_clock::now().time_since_epoch().count();
 73 | 
 74 | 		// Use the seeder to create subsequent RNG's that are
 75 | 		// unique from each other.
 76 | 		static std::mutex mut;
 77 | 		mut.lock();
 78 | 		seed(n(seeder_x, seeder_y, seeder_z, seeder_c),
 79 | 		     n(seeder_x, seeder_y, seeder_z, seeder_c),
 80 | 		     n(seeder_x, seeder_y, seeder_z, seeder_c),
 81 | 		     n(seeder_x, seeder_y, seeder_z, seeder_c));
 82 | 		mut.unlock();
 83 | 	}
 84 | 
 85 | 	/**
 86 | 	 * @brief Constructor.
 87 | 	 *
 88 | 	 * Initializes the RNG with the given seed.  Full 128-bit variant.
 89 | 	 */
 90 | 	RNG(uint32_t seed_a, uint32_t seed_b, uint32_t seed_c, uint32_t seed_d) {
 91 | 		seed(seed_a, seed_b, seed_c, seed_d);
 92 | 	}
 93 | 
 94 | 	/**
 95 | 	 * @brief Constructor.
 96 | 	 *
 97 | 	 * Initializes the RNG with the given seed.  32-bit variant.
 98 | 	 */
 99 | 	RNG(uint32_t seed_) {
100 | 		seed(seed_);
101 | 	}
102 | 
103 | 	/**
104 | 	 * @brief Sets the seed of the RNG.
105 | 	 *
106 | 	 * Full 128-bit variant.
107 | 	 */
108 | 	void seed(uint32_t seed_a, uint32_t seed_b, uint32_t seed_c, uint32_t seed_d) {
109 | 		x = seed_a;
110 | 		y = seed_b;
111 | 		z = seed_c;
112 | 		c = seed_d;
113 | 	}
114 | 
115 | 	/**
116 | 	 * @brief Sets the seed of the RNG.
117 | 	 *
118 | 	 * 32-bit variant, for convenience.
119 | 	 */
120 | 	void seed(uint32_t seed_) {
121 | 		// Scramble up the seed with offsets and multiplications
122 | 		// by large primes.
123 | 		seed((seed_+ 5) * 3885701021,
124 | 		     (seed_ + 43) * 653005939,
125 | 		     (seed_ + 13) * 1264700623,
126 | 		     (seed_ + 67) * 37452703);
127 | 
128 | 		// Run the RNG a couple of times
129 | 		n(x, y, z, c);
130 | 		n(x, y, z, c);
131 | 	}
132 | 
133 | 	/**
134 | 	 * @brief Returns a random unsigned 32-bit integer.
135 | 	 */
136 | 	uint32_t next_uint() {
137 | 		return n(x, y, z, c);
138 | 	}
139 | 
140 | 	/**
141 | 	 * @brief Returns a random 32-bit float in the interval [0.0, 1.0).
142 | 	 */
143 | 	float next_float() {
144 | 		// The following assumes an IEEE 32-bit binary floating point format.
145 | 		// Alternatively, you could just do "next_uint() / 4294967296.0" which
146 | 		// would accomplish the same thing, albeit slower.
147 | 		union {
148 | 			float w;
149 | 			uint32_t a;
150 | 		};
151 | 		a = n(x, y, z, c) >> 9; // Take upper 23 bits
152 | 		a |= 0x3F800000; // Make a float from bits
153 | 		return w-1.f;
154 | 	}
155 | 
156 | 	/**
157 | 	 * @brief Returns a random 32-bit float in the interval [-0.5, 0.5).
158 | 	 */
159 | 	float next_float_c() {
160 | 		return next_float() - 0.5f;
161 | 	}
162 | };
163 | 
164 | #endif // RNG_HPP
165 | 
166 | 


--------------------------------------------------------------------------------
/utils/rng_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "test.hpp"
 2 | 
 3 | #include "rng.hpp"
 4 | 
 5 | /*
 6 |  * Test suite for the random number generator.
 7 |  */
 8 | TEST_CASE("RNG") {
 9 | 	// Test to see if the RNG gives consistent results
10 | 	// when given the same seed
11 | 	SECTION("consistent") {
12 | 		RNG rng1;
13 | 		RNG rng2;
14 | 		bool equals = true;
15 | 
16 | 		rng1.seed(42);
17 | 		rng2.seed(42);
18 | 		for (int i = 0; i < 100000; i++) {
19 | 			equals = equals && (rng1.next_uint() == rng2.next_uint());
20 | 			equals = equals && (rng1.next_float() == rng2.next_float());
21 | 		}
22 | 
23 | 		REQUIRE(equals);
24 | 	}
25 | 
26 | 
27 | 
28 | 	// Test to see if factory-spawned RNG's get different seeds
29 | 	SECTION("factory_seed_1") {
30 | 		RNG rng1;
31 | 		RNG rng2;
32 | 		bool equals = true;
33 | 
34 | 		for (int i = 0; i < 4; i++) {
35 | 			equals = equals && (rng1.next_uint() == rng2.next_uint());
36 | 			equals = equals && (rng1.next_float() == rng2.next_float());
37 | 		}
38 | 
39 | 		REQUIRE(!equals);
40 | 	}
41 | 
42 | 	// Test to see if the factory code works properly inside object
43 | 	// initializations
44 | 	class RNGTest {
45 | 	public:
46 | 		RNG rng;
47 | 		RNGTest() {}
48 | 	};
49 | 
50 | 	SECTION("factory_seed_2") {
51 | 		RNGTest rng1;
52 | 		RNGTest rng2;
53 | 		bool equals = true;
54 | 
55 | 		for (int i = 0; i < 4; i++) {
56 | 			equals = equals && (rng1.rng.next_uint() == rng2.rng.next_uint());
57 | 			equals = equals && (rng1.rng.next_float() == rng2.rng.next_float());
58 | 		}
59 | 
60 | 		REQUIRE(!equals);
61 | 	}
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/utils/simd_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "test.hpp"
 2 | #include <iostream>
 3 | 
 4 | #include "simd.hpp"
 5 | 
 6 | using namespace SIMD;
 7 | 
 8 | 
 9 | TEST_CASE("simd") {
10 | 	/* constructor tests */
11 | 	SECTION("constructor_1") {
12 | 		float4 f;
13 | 
14 | 		REQUIRE(f[0] == 0.0f);
15 | 		REQUIRE(f[1] == 0.0f);
16 | 		REQUIRE(f[2] == 0.0f);
17 | 		REQUIRE(f[3] == 0.0f);
18 | 	}
19 | 
20 | 	SECTION("constructor_2") {
21 | 		float4 f(2.0f);
22 | 
23 | 		REQUIRE(f[0] == 2.0f);
24 | 		REQUIRE(f[1] == 2.0f);
25 | 		REQUIRE(f[2] == 2.0f);
26 | 		REQUIRE(f[3] == 2.0f);
27 | 	}
28 | 
29 | 	SECTION("constructor_3") {
30 | 		float4 f(1.0f, 2.0f, 3.0f, 4.0f);
31 | 
32 | 		REQUIRE(f[0] == 1.0f);
33 | 		REQUIRE(f[1] == 2.0f);
34 | 		REQUIRE(f[2] == 3.0f);
35 | 		REQUIRE(f[3] == 4.0f);
36 | 	}
37 | 
38 | 	SECTION("constructor_4") {
39 | 		float fs[4] = {1.0f, 2.0f, 3.0f, 4.0f};
40 | 		float4 f(fs);
41 | 
42 | 		REQUIRE(f[0] == 1.0f);
43 | 		REQUIRE(f[1] == 2.0f);
44 | 		REQUIRE(f[2] == 3.0f);
45 | 		REQUIRE(f[3] == 4.0f);
46 | 	}
47 | }
48 | 


--------------------------------------------------------------------------------
/utils/spinlock.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef PSYCHOPATH_SPINLOCK_HPP
  2 | #define PSYCHOPATH_SPINLOCK_HPP
  3 | 
  4 | #include <atomic>
  5 | 
  6 | 
  7 | /*
  8 |  ** @brief A simple spinlock.
  9 |  *
 10 |  * Useful for low-contention thread syncronization, where the lock is
 11 |  * not held for very long compared to other work done.  For locks that
 12 |  * are held for long periods of time, a mutex is generally better.
 13 |  */
 14 | class SpinLock {
 15 | 	std::atomic_flag lock_flag {ATOMIC_FLAG_INIT};
 16 | public:
 17 | 	/**
 18 | 	 * @brief Acquires the lock, spinning until success.
 19 | 	 */
 20 | 	void lock() {
 21 | 		while (lock_flag.test_and_set(std::memory_order_acquire));
 22 | 	}
 23 | 
 24 | 	/**
 25 | 	 * @brief Attempts to acquire the lock once, returning true on
 26 | 	 * success and false on failure.
 27 | 	 */
 28 | 	bool try_lock() {
 29 | 		return !lock_flag.test_and_set(std::memory_order_acquire);
 30 | 	}
 31 | 
 32 | 	/**
 33 | 	 * @brief Releases the lock.
 34 | 	 */
 35 | 	void unlock() {
 36 | 		lock_flag.clear(std::memory_order_release);
 37 | 	}
 38 | };
 39 | 
 40 | 
 41 | /**
 42 |  * @brief A reader-writer spinlock.
 43 |  *
 44 |  * Allows multiple readers to acquire the lock, but only one writer
 45 |  * at a time.  Useful for cases where writers are rare compared to
 46 |  * readers and where the locks are generally only held for short
 47 |  * periods.
 48 |  */
 49 | class SpinLockRW {
 50 | 	std::atomic_flag w_lock {ATOMIC_FLAG_INIT}; // Writer lock
 51 | 	std::atomic<unsigned int> r_lock_count {0}; // Reader lock count
 52 | public:
 53 | 	/**
 54 | 	 * @brief Acquires the writer lock, spinning until success.
 55 | 	 */
 56 | 	void lock_w() {
 57 | 		while (w_lock.test_and_set(std::memory_order_acquire));
 58 | 
 59 | 		while (r_lock_count > 0);
 60 | 	}
 61 | 
 62 | 	/**
 63 | 	 * @brief Attempts to acquire the writer lock once, returning true
 64 | 	 * on success and false on failure.
 65 | 	 */
 66 | 	bool try_lock_w() {
 67 | 		if (!w_lock.test_and_set(std::memory_order_acquire)) {
 68 | 			if (r_lock_count == 0) {
 69 | 				return true;
 70 | 			} else {
 71 | 				w_lock.clear(std::memory_order_release);
 72 | 				return false;
 73 | 			}
 74 | 		}
 75 | 		return false;
 76 | 	}
 77 | 
 78 | 	/**
 79 | 	 * @brief Releases the writer lock.
 80 | 	 */
 81 | 	void unlock_w() {
 82 | 		w_lock.clear(std::memory_order_release);
 83 | 	}
 84 | 
 85 | 	/**
 86 | 	 * @brief Acquires a reader lock, spinning until success.
 87 | 	 */
 88 | 	void lock_r() {
 89 | 		while (w_lock.test_and_set(std::memory_order_acquire));
 90 | 		++r_lock_count;
 91 | 		w_lock.clear(std::memory_order_release);
 92 | 	}
 93 | 
 94 | 	/**
 95 | 	 * @brief Attempts to acquire a reader lock once, returning true
 96 | 	 * on success and false on failure.
 97 | 	 */
 98 | 	bool try_lock_r() {
 99 | 		if (w_lock.test_and_set(std::memory_order_acquire))
100 | 			return false;
101 | 		++r_lock_count;
102 | 		w_lock.clear(std::memory_order_release);
103 | 		return true;
104 | 	}
105 | 
106 | 	/**
107 | 	 * @brief Releases a reader lock.
108 | 	 */
109 | 	void unlock_r() {
110 | 		--r_lock_count;
111 | 	}
112 | };
113 | 
114 | #endif // PSYCHOPATH_SPINLOCK_HPP


--------------------------------------------------------------------------------
/utils/stack.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef STACK_HPP
 2 | #define STACK_HPP
 3 | 
 4 | #include <vector>
 5 | #include <utility>
 6 | #include <cassert>
 7 | #include <stdint.h>
 8 | 
 9 | /**
10 |  * A type-erased stack that can store arrays of POD data.
11 |  *
12 |  * Do _not_ use this to store RAII types, as their destructors
13 |  * will not be run.  Also, you must keep track of the types
14 |  * you store yourself.
15 |  */
16 | class Stack {
17 | 	std::vector<char> data;
18 | 	std::vector<std::pair<char*, char*>> frames;
19 | 
20 | public:
21 | 	Stack() = delete;
22 | 	Stack(size_t data_capacity, size_t reserved_frames): data(data_capacity) {
23 | 		frames.reserve(reserved_frames+1);
24 | 		frames.emplace_back(std::make_pair(&(data[0]), &(data[0])));
25 | 	}
26 | 
27 | 
28 | 	/**
29 | 	 * Pushes space for element_count items or type T, and returns pointers to
30 | 	 * the beginning and just-past-the-end of the resulting array.
31 | 	 */
32 | 	template <typename T>
33 | 	std::pair<T*, T*> push_frame(size_t element_count) {
34 | 		// Figure out how much padding we need between elements for proper
35 | 		// memory alignment if we put them in an array.
36 | 		constexpr auto array_pad = (alignof(T) - (sizeof(T) % alignof(T))) % alignof(T);
37 | 
38 | 		// Total needed bytes for the requested array of data
39 | 		const auto needed_bytes = (sizeof(T) * element_count) + (array_pad * (element_count - 1));
40 | 
41 | 		// Figure out how much padding we need at the beginning to put the
42 | 		// first element in the right place for memory alignment.
43 | 		const auto mem_addr = reinterpret_cast<uintptr_t>(frames.back().second);
44 | 		const auto begin_pad = (alignof(T) - (mem_addr % alignof(T))) % alignof(T);
45 | 
46 | 		// Push onto the stack
47 | 		char* begin = reinterpret_cast<char*>(mem_addr) + begin_pad;
48 | 		auto end = begin + needed_bytes;
49 | 		frames.emplace_back(std::make_pair(begin, end));
50 | 
51 | 		return std::make_pair(reinterpret_cast<T*>(begin), reinterpret_cast<T*>(end));
52 | 	}
53 | 
54 | 	/**
55 | 	 * Returns the top frame, as pointers with the specified type T.
56 | 	 */
57 | 	template <typename T>
58 | 	std::pair<T*, T*> top_frame() {
59 | 		return std::make_pair(reinterpret_cast<T*>(frames.back().first), reinterpret_cast<T*>(frames.back().second));
60 | 	}
61 | 
62 | 	/**
63 | 	 * Returns a frame walking backwards from the top frame.  Zero means the
64 | 	 * top frame.
65 | 	 */
66 | 	template <typename T>
67 | 	std::pair<T*, T*> prev_frame(size_t i) {
68 | 		assert(i < frames.size());
69 | 		const auto i2 = frames.size() - i - 1;
70 | 		return std::make_pair(reinterpret_cast<T*>(frames[i2].first), reinterpret_cast<T*>(frames[i2].second));
71 | 	}
72 | 
73 | 	/**
74 | 	 * Pops the top frame off the stack.
75 | 	 *
76 | 	 * This invalidates any pointers to that stack frame's memory, as that
77 | 	 * memory may be used again for a subsequent stack frame push.
78 | 	 */
79 | 	void pop_frame() {
80 | 		frames.pop_back();
81 | 	}
82 | 
83 | 	/**
84 | 	 * Clears the stack, as if no pushes had ever taken place.
85 | 	 */
86 | 	void clear() {
87 | 		frames.clear();
88 | 		frames.emplace_back(std::make_pair(&(data[0]), &(data[0])));
89 | 	}
90 | };
91 | 
92 | #endif // STACK_HPP


--------------------------------------------------------------------------------
/utils/stack_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "test.hpp"
 2 | 
 3 | #include "stack.hpp"
 4 | 
 5 | struct alignas(64) Yar {
 6 | 	int a, b;
 7 | };
 8 | 
 9 | TEST_CASE("Stack") {
10 | 	SECTION("ints") {
11 | 		Stack s(1024, 64);
12 | 
13 | 		auto f = s.push_frame<int>(4);
14 | 		f.first[0] = 0;
15 | 		f.first[1] = 1;
16 | 		f.first[2] = 2;
17 | 		f.first[3] = 3;
18 | 
19 | 		f = s.push_frame<int>(4);
20 | 		f.first[0] = 4;
21 | 		f.first[1] = 5;
22 | 		f.first[2] = 6;
23 | 		f.first[3] = 7;
24 | 
25 | 		auto tf = s.top_frame<int>();
26 | 
27 | 		REQUIRE(tf.first[0] == 4);
28 | 		REQUIRE(tf.first[1] == 5);
29 | 		REQUIRE(tf.first[2] == 6);
30 | 		REQUIRE(tf.first[3] == 7);
31 | 
32 | 		s.pop_frame();
33 | 
34 | 		tf = s.top_frame<int>();
35 | 
36 | 		REQUIRE(tf.first[0] == 0);
37 | 		REQUIRE(tf.first[1] == 1);
38 | 		REQUIRE(tf.first[2] == 2);
39 | 		REQUIRE(tf.first[3] == 3);
40 | 
41 | 		s.pop_frame();
42 | 	}
43 | 
44 | 	SECTION("alignment") {
45 | 		Stack s(1024, 64);
46 | 
47 | 		s.push_frame<char>(1);
48 | 
49 | 		auto f = s.push_frame<Yar>(4);
50 | 
51 | 		auto tf = s.top_frame<Yar>();
52 | 
53 | 		REQUIRE((reinterpret_cast<uintptr_t>(f.first) % 64) == 0);
54 | 		REQUIRE((reinterpret_cast<uintptr_t>(tf.second) % 64) == 0);
55 | 		REQUIRE(&(tf.first[4]) == tf.second);
56 | 	}
57 | }


--------------------------------------------------------------------------------
/utils/timer.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef PSYCHOPATH_TIMER_HPP
 2 | #define PSYCHOPATH_TIMER_HPP
 3 | 
 4 | #include <chrono>
 5 | 
 6 | template <class CLOCK=std::chrono::high_resolution_clock>
 7 | class Timer {
 8 | 	std::chrono::time_point<CLOCK> start {CLOCK::now()};
 9 | 
10 | public:
11 | 	/**
12 | 	 * Reports the time elapsed so far in seconds.
13 | 	 */
14 | 	float time() {
15 | 		const auto end = CLOCK::now();
16 | 		const float dur = static_cast<float>((end-start).count());
17 | 		return (dur * CLOCK::period::num) / CLOCK::period::den;
18 | 	}
19 | 
20 | 	void reset() {
21 | 		start = CLOCK::now();
22 | 	}
23 | };
24 | 
25 | #endif // PSYCHOPATH_TIMER_HPP


--------------------------------------------------------------------------------
/utils/utils_test.cpp:
--------------------------------------------------------------------------------
  1 | #include "test.hpp"
  2 | 
  3 | #include <vector>
  4 | #include "utils.hpp"
  5 | 
  6 | TEST_CASE("mutable_partition") {
  7 | 	SECTION("already_partitioned") {
  8 | 		std::vector<int> v {1, 1, 1, 1, 2, 2, 2, 2};
  9 | 
 10 | 		auto p = mutable_partition(v.begin(), v.end(), [](int& i) {
 11 | 			return i == 1;
 12 | 		});
 13 | 
 14 | 		REQUIRE(p == v.begin() + 4);
 15 | 		REQUIRE(v[0] == 1);
 16 | 		REQUIRE(v[1] == 1);
 17 | 		REQUIRE(v[2] == 1);
 18 | 		REQUIRE(v[3] == 1);
 19 | 		REQUIRE(v[4] == 2);
 20 | 		REQUIRE(v[5] == 2);
 21 | 		REQUIRE(v[6] == 2);
 22 | 		REQUIRE(v[7] == 2);
 23 | 	}
 24 | 
 25 | 	SECTION("reverse") {
 26 | 		std::vector<int> v {2, 2, 2, 2, 1, 1, 1, 1};
 27 | 
 28 | 		auto p = mutable_partition(v.begin(), v.end(), [](int& i) {
 29 | 			return i == 1;
 30 | 		});
 31 | 
 32 | 		REQUIRE(p == v.begin() + 4);
 33 | 		REQUIRE(v[0] == 1);
 34 | 		REQUIRE(v[1] == 1);
 35 | 		REQUIRE(v[2] == 1);
 36 | 		REQUIRE(v[3] == 1);
 37 | 		REQUIRE(v[4] == 2);
 38 | 		REQUIRE(v[5] == 2);
 39 | 		REQUIRE(v[6] == 2);
 40 | 		REQUIRE(v[7] == 2);
 41 | 	}
 42 | 
 43 | 	SECTION("interleaved") {
 44 | 		std::vector<int> v {2, 1, 2, 1, 2, 1, 2, 1};
 45 | 
 46 | 		auto p = mutable_partition(v.begin(), v.end(), [](int& i) {
 47 | 			return i == 1;
 48 | 		});
 49 | 
 50 | 		REQUIRE(p == v.begin() + 4);
 51 | 		REQUIRE(v[0] == 1);
 52 | 		REQUIRE(v[1] == 1);
 53 | 		REQUIRE(v[2] == 1);
 54 | 		REQUIRE(v[3] == 1);
 55 | 		REQUIRE(v[4] == 2);
 56 | 		REQUIRE(v[5] == 2);
 57 | 		REQUIRE(v[6] == 2);
 58 | 		REQUIRE(v[7] == 2);
 59 | 	}
 60 | 
 61 | 	SECTION("all_true") {
 62 | 		std::vector<int> v {1, 1, 1, 1, 1, 1, 1, 1};
 63 | 
 64 | 		auto p = mutable_partition(v.begin(), v.end(), [](int& i) {
 65 | 			return i == 1;
 66 | 		});
 67 | 
 68 | 		REQUIRE(p == v.end());
 69 | 		REQUIRE(v[0] == 1);
 70 | 		REQUIRE(v[1] == 1);
 71 | 		REQUIRE(v[2] == 1);
 72 | 		REQUIRE(v[3] == 1);
 73 | 		REQUIRE(v[4] == 1);
 74 | 		REQUIRE(v[5] == 1);
 75 | 		REQUIRE(v[6] == 1);
 76 | 		REQUIRE(v[7] == 1);
 77 | 	}
 78 | 
 79 | 	SECTION("all_false") {
 80 | 		std::vector<int> v {2, 2, 2, 2, 2, 2, 2, 2};
 81 | 
 82 | 		auto p = mutable_partition(v.begin(), v.end(), [](int& i) {
 83 | 			return i == 1;
 84 | 		});
 85 | 
 86 | 		REQUIRE(p == v.begin());
 87 | 		REQUIRE(v[0] == 2);
 88 | 		REQUIRE(v[1] == 2);
 89 | 		REQUIRE(v[2] == 2);
 90 | 		REQUIRE(v[3] == 2);
 91 | 		REQUIRE(v[4] == 2);
 92 | 		REQUIRE(v[5] == 2);
 93 | 		REQUIRE(v[6] == 2);
 94 | 		REQUIRE(v[7] == 2);
 95 | 	}
 96 | 
 97 | 	SECTION("predicate_run_once_per_element") {
 98 | 		std::vector<int> v {2, 1, 2, 1, 2, 1, 2, 1};
 99 | 		int n = 0;
100 | 
101 | 		auto p = mutable_partition(v.begin(), v.end(), [&](int& i) {
102 | 			++n;
103 | 			return i == 1;
104 | 		});
105 | 
106 | 		REQUIRE(n == 8);
107 | 		REQUIRE(p == v.begin() + 4);
108 | 		REQUIRE(v[0] == 1);
109 | 		REQUIRE(v[1] == 1);
110 | 		REQUIRE(v[2] == 1);
111 | 		REQUIRE(v[3] == 1);
112 | 		REQUIRE(v[4] == 2);
113 | 		REQUIRE(v[5] == 2);
114 | 		REQUIRE(v[6] == 2);
115 | 		REQUIRE(v[7] == 2);
116 | 	}
117 | }


--------------------------------------------------------------------------------