├── README.md
├── example_images
    ├── stairs_triangles_not_optimized.png
    └── stairs_triangles_optimized.png
└── seamoptimizer.h


/README.md:
--------------------------------------------------------------------------------
 1 | # seamoptimizer
 2 | A C/C++ single-file library that minimizes the hard transition errors of disjoint edges in lightmaps.
 3 | It is based on a idea presented by Michał Iwanicki in the talk [Lighting Technology of "The Last Of Us"](http://miciwan.com/SIGGRAPH2013/Lighting%20Technology%20of%20The%20Last%20Of%20Us.pdf).
 4 | A least squares solver is used to find a minimal error solution to the problem of sampling along the edges between triangles that are mapped with disjoint lightmap regions.
 5 | This can improve the visual appearance at these discontinuities or "seams".
 6 | 
 7 | 
 8 | To paste the implementation into your project, insert the following lines:
 9 | ```
10 | #define SEAMOPTIMIZER_IMPLEMENTATION
11 | #include "seamoptimizer.h"
12 | ```
13 | 
14 | Before optimizing a very bad UV mapping (each triangle edge is a seam):
15 | ![Sean Optimizer Before](https://github.com/ands/seamoptimizer/raw/master/example_images/stairs_triangles_not_optimized.png)
16 | After optimizing the seams of the bad UV mapping:
17 | ![Sean Optimizer After](https://github.com/ands/seamoptimizer/raw/master/example_images/stairs_triangles_optimized.png)
18 | The seams are not all completely gone, but, especially on the walls, there is a very noticeable improvement.
19 | 
20 | # Example Usage
21 | The following example finds and optimizes all the seams for some mesh geometry on a lightmap.
22 | ```
23 | // only optimize seams between triangles that are on the same plane
24 | // (where dot(A.normal, B.normal) > cosNormalThreshold):
25 | const float cosNormalThreshold = 0.99f;
26 | 
27 | // how "important" the original color values are:
28 | const float lambda = 0.1f;
29 | 
30 | 
31 | printf("Searching for separate seams...\n");
32 | so_seam_t *seams = so_seams_find(
33 | 	(float*)mesh->positions, (float*)mesh->texcoords, mesh->vertexCount,
34 | 	cosNormalThreshold,
35 | 	lightmap->data, lightmap->width, lightmap->height, lightmap->channelCount);
36 | 
37 | 
38 | printf("Optimizing seams...\n");
39 | for (so_seam_t *seam = seams; seam; seam = so_seam_next(seam))
40 | {
41 | 	// NOTE: seams can also be optimized in parallel on separate threads!
42 | 	if (!so_seam_optimize(seam, lightmap->data, lightmap->width, lightmap->height, lightmap->channelCount, lambda))
43 | 		printf("Could not optimize a seam (Cholesky decomposition failed).\n");
44 | }
45 | 
46 | printf("Done!\n");
47 | so_seams_free(seams);
48 | ```
49 | 
50 | # Thanks
51 | - To Michał Iwanicki for helping me with the transformation of the problem into a problem that can be solved by a least-squares solver
52 | - To Dominik Lazarek for pointing me to Michał's presentation
53 | 


--------------------------------------------------------------------------------
/example_images/stairs_triangles_not_optimized.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ands/seamoptimizer/19b835c6e52d2100a8e6b58e19fe8da88d271368/example_images/stairs_triangles_not_optimized.png


--------------------------------------------------------------------------------
/example_images/stairs_triangles_optimized.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ands/seamoptimizer/19b835c6e52d2100a8e6b58e19fe8da88d271368/example_images/stairs_triangles_optimized.png


--------------------------------------------------------------------------------
/seamoptimizer.h:
--------------------------------------------------------------------------------
   1 | /***********************************************************
   2 | * A single header file lightmap seam optimization library  *
   3 | * https://github.com/ands/seamoptimizer                    *
   4 | * no warranty implied | use at your own risk               *
   5 | * author: Andreas Mantler (ands) | last change: 05.03.2017 *
   6 | *                                                          *
   7 | * License:                                                 *
   8 | * This software is in the public domain.                   *
   9 | * Where that dedication is not recognized,                 *
  10 | * you are granted a perpetual, irrevocable license to copy *
  11 | * and modify this file however you want.                   *
  12 | ***********************************************************/
  13 | 
  14 | #ifndef SEAMOPTIMIZER_H
  15 | #define SEAMOPTIMIZER_H
  16 | 
  17 | #ifndef SO_CALLOC
  18 | #include <malloc.h> // calloc, free, alloca
  19 | #define SO_CALLOC(count, size) calloc(count, size)
  20 | #define SO_FREE(ptr) free(ptr)
  21 | #endif
  22 | 
  23 | typedef int so_bool;
  24 | #define SO_FALSE 0
  25 | #define SO_TRUE  1
  26 | 
  27 | typedef struct so_seam_t so_seam_t;
  28 | 
  29 | // API
  30 | 
  31 | // so_seams_find:
  32 | // Find all seams according to the specified triangulated geometry and its texture coordinates.
  33 | // This searches for edges that are shared by triangles, but are disjoint in UV space.
  34 | 
  35 | // positions: triangle array 3d positions ((x0, y0, z0), (x1, y1, z1), (x2, y2, z2)), ((x0, y0, z0), (x1, y1, z1), (x2, y2, z2)), ...
  36 | // texcoords: triangle array 2d uv coords (    (u0, v0),     (u1, v1),     (u2, v2)), (    (u0, v0),     (u1, v1),     (u2, v2)), ...
  37 | // vertices: total number of vertices ( = triangles * 3)
  38 | 
  39 | // cosNormalThreshold controls at which angles between neighbour triangles a seam should be considered.
  40 | // if dot(triangle A normal, triangle B normal) > cosNormalThreshold then the seam is included into the returned set.
  41 | 
  42 | // data, w, h, c specifies the lightmap data (data should be a w * h * c array of floats).
  43 | // w = lightmap width, h = lightmap height, c = number of lightmap channels (1..4).
  44 | 
  45 | // returns a linked list of the found seams.
  46 | 
  47 | // Warning: The data may be modified to fill empty (zeroed) edge texels with one of their closest neighbours if they are empty!
  48 | so_seam_t *so_seams_find(
  49 | 	float *positions, float *texcoords, int vertices,
  50 | 	float cosNormalThreshold,
  51 | 	float *data, int w, int h, int c);
  52 | 
  53 | 
  54 | // so_seam_optimize:
  55 | // Optimize a single seam. Seams can be optimized in parallel on different threads.
  56 | // lambda: Weight that controls the deviation from the original color values (must be > 0).
  57 | //         Higher values => Less deviation from the original edge colors => more obvious seams.
  58 | //         Too low values => Optimizer may just choose black as the perfect color for all seam pixels.
  59 | // returns whether the optimization was successful.
  60 | so_bool so_seam_optimize(
  61 | 	so_seam_t *seam,
  62 | 	float *data, int w, int h, int c,
  63 | 	float lambda);
  64 | 
  65 | // so_seam_next: Retrieves the next seam in the linked list.
  66 | so_seam_t *so_seam_next(
  67 | 	so_seam_t *seam);
  68 | 
  69 | // so_seams_free: Free the resources for all seams in the list.
  70 | void so_seams_free(
  71 | 	so_seam_t *seams);
  72 | 
  73 | #endif
  74 | ////////////////////// END OF HEADER //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  75 | #ifdef SEAMOPTIMIZER_IMPLEMENTATION
  76 | #undef SEAMOPTIMIZER_IMPLEMENTATION
  77 | 
  78 | #include <stdlib.h> // qsort
  79 | #include <stdio.h> // printf (TODO)
  80 | #include <string.h> // memcpy
  81 | #include <stdint.h>
  82 | #include <math.h>
  83 | #include <float.h>
  84 | #include <assert.h>
  85 | 
  86 | #define SO_EPSILON 0.00001f
  87 | 
  88 | #ifdef _DEBUG
  89 | #define SO_NOT_ZERO(v) (v > SO_EPSILON || v < -SO_EPSILON) // a lot faster in debug
  90 | #else
  91 | #define SO_NOT_ZERO(v) (fabsf(v) > SO_EPSILON) // faster in release
  92 | #endif
  93 | 
  94 | 
  95 | #ifdef SO_APPROX_RSQRT
  96 | #include "xmmintrin.h"
  97 | static inline float so_rsqrtf(float v)
  98 | {
  99 | 	return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(v)));
 100 | }
 101 | #else
 102 | static inline float so_rsqrtf(float v)
 103 | {
 104 | 	return 1.0f / sqrtf(v);
 105 | }
 106 | #endif
 107 | 
 108 | static inline int16_t  so_min16i    (int16_t a, int16_t b) { return a < b ? a : b; }
 109 | static inline int16_t  so_max16i    (int16_t a, int16_t b) { return a > b ? a : b; }
 110 | static inline float    so_minf      (float   a, float   b) { return a < b ? a : b; }
 111 | static inline float    so_maxf      (float   a, float   b) { return a > b ? a : b; }
 112 | static inline float    so_absf      (float   a           ) { return a < 0.0f ? -a : a; }
 113 | 
 114 | typedef struct so_vec2 { float x, y; } so_vec2;
 115 | static inline so_vec2  so_v2i       (int     x, int     y) { so_vec2 v = { (float)x, (float)y }; return v; }
 116 | static inline so_vec2  so_v2        (float   x, float   y) { so_vec2 v = { x, y }; return v; }
 117 | static inline so_vec2  so_add2      (so_vec2 a, so_vec2 b) { return so_v2(a.x + b.x, a.y + b.y); }
 118 | static inline so_vec2  so_sub2      (so_vec2 a, so_vec2 b) { return so_v2(a.x - b.x, a.y - b.y); }
 119 | static inline so_vec2  so_mul2      (so_vec2 a, so_vec2 b) { return so_v2(a.x * b.x, a.y * b.y); }
 120 | static inline so_vec2  so_scale2    (so_vec2 a, float   b) { return so_v2(a.x * b, a.y * b); }
 121 | static inline float    so_length2sq (so_vec2 a           ) { return a.x * a.x + a.y * a.y; }
 122 | static inline float    so_length2   (so_vec2 a           ) { return sqrtf(so_length2sq(a)); }
 123 | 
 124 | typedef struct so_vec3 { float x, y, z; } so_vec3;
 125 | static inline so_vec3  so_v3        (float   x, float   y, float   z) { so_vec3 v = { x, y, z }; return v; }
 126 | static inline so_vec3  so_sub3      (so_vec3 a, so_vec3 b) { return so_v3(a.x - b.x, a.y - b.y, a.z - b.z); }
 127 | static inline so_vec3  so_mul3      (so_vec3 a, so_vec3 b) { return so_v3(a.x * b.x, a.y * b.y, a.z * b.z); }
 128 | static inline so_vec3  so_scale3    (so_vec3 a, float   b) { return so_v3(a.x * b, a.y * b, a.z * b); }
 129 | static inline so_vec3  so_div3      (so_vec3 a, float   b) { return so_scale3(a, 1.0f / b); }
 130 | static inline so_vec3  so_min3      (so_vec3 a, so_vec3 b) { return so_v3(so_minf(a.x, b.x), so_minf(a.y, b.y), so_minf(a.z, b.z)); }
 131 | static inline so_vec3  so_max3      (so_vec3 a, so_vec3 b) { return so_v3(so_maxf(a.x, b.x), so_maxf(a.y, b.y), so_maxf(a.z, b.z)); }
 132 | static inline float    so_dot3      (so_vec3 a, so_vec3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; }
 133 | static inline so_vec3  so_cross3    (so_vec3 a, so_vec3 b) { return so_v3(a.y * b.z - b.y * a.z, a.z * b.x - b.z * a.x, a.x * b.y - b.x * a.y); }
 134 | static inline float    so_length3sq (so_vec3 a           ) { return a.x * a.x + a.y * a.y + a.z * a.z; }
 135 | static inline float    so_length3   (so_vec3 a           ) { return sqrtf(so_length3sq(a)); }
 136 | static inline so_vec3  so_normalize3(so_vec3 a           ) { return so_div3(a, so_length3(a)); }
 137 | 
 138 | //#define SO_CHECK_FOR_MEMORY_LEAKS // check for memory leaks. don't use this in multithreaded code!
 139 | 
 140 | #ifdef SO_CHECK_FOR_MEMORY_LEAKS
 141 | static uint64_t so_allocated = 0;
 142 | static uint64_t so_allocated_max = 0;
 143 | 
 144 | static void *so_alloc_void(size_t size)
 145 | {
 146 | 	void *memory = SO_CALLOC(1, size + sizeof(size_t));
 147 | 	(*(size_t*)memory) = size;
 148 | 	so_allocated += size;
 149 | 	if (so_allocated > so_allocated_max)
 150 | 		so_allocated_max = so_allocated;
 151 | 	return (size_t*)memory + 1;
 152 | }
 153 | static void so_free(void *memory)
 154 | {
 155 | 	size_t size = ((size_t*)memory)[-1];
 156 | 	so_allocated -= size;
 157 | 	SO_FREE(((size_t*)memory) - 1);
 158 | }
 159 | #else
 160 | static void *so_alloc_void(size_t size)
 161 | {
 162 | 	return SO_CALLOC(1, size);
 163 | }
 164 | static void so_free(void *memory)
 165 | {
 166 | 	SO_FREE(memory);
 167 | }
 168 | #endif
 169 | 
 170 | #define so_alloc(type, count) ((type*)so_alloc_void(sizeof(type) * (count)))
 171 | 
 172 | static inline so_bool so_accumulate_texel(float *sums, int x, int y, float *data, int w, int h, int c)
 173 | {
 174 | 	so_bool exists = SO_FALSE;
 175 | 	for (int i = 0; i < c; i++)
 176 | 	{
 177 | 		float v = data[(y * w + x) * c + i];
 178 | 		sums[i] += v;
 179 | 		exists |= v > 0.0f;
 180 | 	}
 181 | 	return exists;
 182 | }
 183 | 
 184 | static void so_fill_with_closest(int x, int y, float *data, int w, int h, int c, int depth = 2)
 185 | {
 186 | 	assert(c <= 4);
 187 | 
 188 | 	for (int i = 0; i < c; i++)
 189 | 		if (data[(y * w + x) * c + i] > 0.0f)
 190 | 			return;
 191 | 
 192 | 	float sums[4] = {};
 193 | 	int n = 0;
 194 | 
 195 | 	if (x     > 0 && so_accumulate_texel(sums, x - 1, y, data, w, h, c)) n++;
 196 | 	if (x + 1 < w && so_accumulate_texel(sums, x + 1, y, data, w, h, c)) n++;
 197 | 	if (y     > 0 && so_accumulate_texel(sums, x, y - 1, data, w, h, c)) n++;
 198 | 	if (y + 1 < h && so_accumulate_texel(sums, x, y + 1, data, w, h, c)) n++;
 199 | 
 200 | 	if (!n && depth)
 201 | 	{
 202 | 		--depth;
 203 | 		if (x > 0)
 204 | 		{
 205 | 			so_fill_with_closest(x - 1, y, data, w, h, c, depth);
 206 | 			if (so_accumulate_texel(sums, x - 1, y, data, w, h, c)) n++;
 207 | 		}
 208 | 		if (x + 1 < w)
 209 | 		{
 210 | 			so_fill_with_closest(x + 1, y, data, w, h, c, depth);
 211 | 			if (so_accumulate_texel(sums, x + 1, y, data, w, h, c)) n++;
 212 | 		}
 213 | 		if (y > 0)
 214 | 		{
 215 | 			so_fill_with_closest(x, y - 1, data, w, h, c, depth);
 216 | 			if (so_accumulate_texel(sums, x, y - 1, data, w, h, c)) n++;
 217 | 		}
 218 | 		if (y + 1 < h)
 219 | 		{
 220 | 			so_fill_with_closest(x, y + 1, data, w, h, c, depth);
 221 | 			if (so_accumulate_texel(sums, x, y + 1, data, w, h, c)) n++;
 222 | 		}
 223 | 	}
 224 | 
 225 | 	if (n)
 226 | 	{
 227 | 		float ni = 1.0f / (float)n;
 228 | 		for (int i = 0; i < c; i++)
 229 | 			data[(y * w + x) * c + i] = sums[i] * ni;
 230 | 	}
 231 | }
 232 | 
 233 | typedef struct
 234 | {
 235 | 	int16_t x, y;
 236 | } so_texel_t;
 237 | 
 238 | static inline int so_texel_cmp(const void *l, const void *r)
 239 | {
 240 | 	const so_texel_t *lt = (const so_texel_t*)l;
 241 | 	const so_texel_t *rt = (const so_texel_t*)r;
 242 | 	if (lt->y < rt->y) return -1;
 243 | 	if (lt->y > rt->y) return 1;
 244 | 	if (lt->x < rt->x) return -1;
 245 | 	if (lt->x > rt->x) return 1;
 246 | 	return 0;
 247 | }
 248 | 
 249 | typedef struct
 250 | {
 251 | 	so_texel_t texels[4];
 252 | 	float weights[4];
 253 | } so_bilinear_sample_t;
 254 | 
 255 | typedef struct
 256 | {
 257 | 	so_bilinear_sample_t sides[2];
 258 | } so_stitching_point_t;
 259 | 
 260 | typedef struct 
 261 | {
 262 | 	so_texel_t *texels;
 263 | 	uint32_t count;
 264 | 	uint32_t capacity;
 265 | } so_texel_set_t;
 266 | 
 267 | static inline uint32_t so_texel_hash(so_texel_t texel, uint32_t capacity)
 268 | {
 269 | 	return (texel.y * 104173 + texel.x * 86813) % capacity;
 270 | }
 271 | 
 272 | static void so_texel_set_add(so_texel_set_t *set, so_texel_t *texels, int entries, int arrayLength = 0)
 273 | {
 274 | 	if (set->count + entries > set->capacity * 3 / 4) // leave some free space to avoid having many collisions
 275 | 	{
 276 | 		int newCapacity = set->capacity > 64 ? set->capacity * 2 : 64;
 277 | 		while (set->count + entries > newCapacity * 3 / 4)
 278 | 			newCapacity *= 2;
 279 | 
 280 | 		so_texel_t *newTexels = so_alloc(so_texel_t, newCapacity);
 281 | 
 282 | 		for (int i = 0; i < newCapacity; i++)
 283 | 			newTexels[i].x = -1;
 284 | 
 285 | 		if (set->texels)
 286 | 		{
 287 | 			for (int i = 0; i < set->capacity; i++) // rehash all old texels
 288 | 			{
 289 | 				if (set->texels[i].x != -1)
 290 | 				{
 291 | 					uint32_t hash = so_texel_hash(set->texels[i], newCapacity);
 292 | 					while (newTexels[hash].x != -1) // collisions
 293 | 						hash = (hash + 1) % newCapacity;
 294 | 					newTexels[hash] = set->texels[i];
 295 | 				}
 296 | 			}
 297 | 			so_free(set->texels);
 298 | 		}
 299 | 
 300 | 		set->texels = newTexels;
 301 | 		set->capacity = newCapacity;
 302 | 	}
 303 | 
 304 | 	if (arrayLength == 0)
 305 | 		arrayLength = entries;
 306 | 
 307 | 	for (int i = 0; i < arrayLength; i++)
 308 | 	{
 309 | 		if (texels[i].x != -1)
 310 | 		{
 311 | 			uint32_t hash = so_texel_hash(texels[i], set->capacity);
 312 | 			while (set->texels[hash].x != -1) // collisions
 313 | 			{
 314 | 				if (set->texels[hash].x == texels[i].x && set->texels[hash].y == texels[i].y)
 315 | 					break; // texel is already in the set
 316 | 				hash = (hash + 1) % set->capacity;
 317 | 			}
 318 | 
 319 | 			if (set->texels[hash].x == -1)
 320 | 			{
 321 | 				set->texels[hash] = texels[i];
 322 | 				set->count++;
 323 | 			}
 324 | 		}
 325 | 	}
 326 | }
 327 | 
 328 | static so_bool so_texel_set_contains(so_texel_set_t *set, so_texel_t texel)
 329 | {
 330 | 	uint32_t hash = so_texel_hash(texel, set->capacity);
 331 | 	while (set->texels[hash].x != -1) // entries with same hash
 332 | 	{
 333 | 		if (set->texels[hash].x == texel.x && set->texels[hash].y == texel.y)
 334 | 			return SO_TRUE; // texel is already in the set
 335 | 		hash = (hash + 1) % set->capacity;
 336 | 	}
 337 | 	return SO_FALSE;
 338 | }
 339 | 
 340 | static void so_texel_set_free(so_texel_set_t *set)
 341 | {
 342 | 	so_free(set->texels);
 343 | 	*set = {0};
 344 | }
 345 | 
 346 | typedef struct 
 347 | {
 348 | 	so_stitching_point_t *points;
 349 | 	uint32_t count;
 350 | 	uint32_t capacity;
 351 | } so_stitching_points_t;
 352 | 
 353 | static void so_stitching_points_alloc(so_stitching_points_t *points, uint32_t n)
 354 | {
 355 | 	points->points = so_alloc(so_stitching_point_t, n);
 356 | 	points->capacity = n;
 357 | 	points->count = 0;
 358 | }
 359 | static void so_stitching_points_free(so_stitching_points_t *points)
 360 | {
 361 | 	so_free(points->points);
 362 | 	*points = {0};
 363 | }
 364 | static void so_stitching_points_add(so_stitching_points_t *points, so_stitching_point_t *point)
 365 | {
 366 | 	assert(points->count < points->capacity);
 367 | 	points->points[points->count++] = *point;
 368 | }
 369 | static void so_stitching_points_append(so_stitching_points_t *points, so_stitching_points_t *other)
 370 | {
 371 | 	so_stitching_point_t *newPoints = so_alloc(so_stitching_point_t, points->capacity + other->capacity);
 372 | 	memcpy(newPoints, points->points, sizeof(so_stitching_point_t) * points->count);
 373 | 	memcpy(newPoints + points->count, other->points, sizeof(so_stitching_point_t) * other->count);
 374 | 	so_free(points->points);
 375 | 	points->points = newPoints;
 376 | 	points->capacity = points->capacity + other->capacity;
 377 | 	points->count = points->count + other->count;
 378 | }
 379 | 
 380 | struct so_seam_t
 381 | {
 382 | 	int16_t x_min, y_min, x_max, y_max;
 383 | 	so_texel_set_t texels;
 384 | 	so_stitching_points_t stitchingPoints;
 385 | 	so_seam_t *next;
 386 | };
 387 | 
 388 | so_seam_t *so_seam_next(so_seam_t *seam)
 389 | {
 390 | 	return seam->next;
 391 | }
 392 | 
 393 | static void so_seam_alloc(so_seam_t *seam, uint32_t stitchingPointCount)
 394 | {
 395 | 	so_stitching_points_alloc(&seam->stitchingPoints, stitchingPointCount);
 396 | }
 397 | static void so_seam_free(so_seam_t *seam)
 398 | {
 399 | 	so_texel_set_free(&seam->texels);
 400 | 	so_stitching_points_free(&seam->stitchingPoints);
 401 | }
 402 | 
 403 | static void so_seam_add(so_seam_t *seam, so_stitching_point_t *point)
 404 | {
 405 | 	for (int side = 0; side < 2; side++)
 406 | 	{
 407 | 		for (int texel = 0; texel < 4; texel++)
 408 | 		{
 409 | 			so_texel_t t = point->sides[side].texels[texel];
 410 | 			seam->x_min = t.x < seam->x_min ? t.x : seam->x_min;
 411 | 			seam->y_min = t.y < seam->y_min ? t.y : seam->y_min;
 412 | 			seam->x_max = t.x > seam->x_max ? t.x : seam->x_max;
 413 | 			seam->y_max = t.y > seam->y_max ? t.y : seam->y_max;
 414 | 		}
 415 | 		so_texel_set_add(&seam->texels, point->sides[side].texels, 4);
 416 | 	}
 417 | 
 418 | 	so_stitching_points_add(&seam->stitchingPoints, point);
 419 | }
 420 | 
 421 | static so_bool so_seams_intersect(so_seam_t *a, so_seam_t *b)
 422 | {
 423 | 	// compare bounding boxes first
 424 | 	if (a->x_min > b->x_max || b->x_min >= a->x_max ||
 425 | 		a->y_min > b->y_max || b->y_min >= a->y_max)
 426 | 		return SO_FALSE;
 427 | 
 428 | 	// bounds intersect -> check each individual texel for intersection
 429 | 	if (a->texels.capacity > b->texels.capacity) // swap so that we always loop over the smaller set
 430 | 	{
 431 | 		so_seam_t *tmp = a;
 432 | 		a = b;
 433 | 		b = tmp;
 434 | 	}
 435 | 
 436 | 	for (int i = 0; i < a->texels.capacity; i++)
 437 | 		if (a->texels.texels[i].x != -1)
 438 | 			if (so_texel_set_contains(&b->texels, a->texels.texels[i]))
 439 | 				return SO_TRUE;
 440 | 	return SO_FALSE;
 441 | }
 442 | 
 443 | static void so_seams_in_place_merge(so_seam_t *dst, so_seam_t *src)
 444 | {
 445 | 	// expand bounding box
 446 | 	dst->x_min = src->x_min < dst->x_min ? src->x_min : dst->x_min;
 447 | 	dst->y_min = src->y_min < dst->y_min ? src->y_min : dst->y_min;
 448 | 	dst->x_max = src->x_max > dst->x_max ? src->x_max : dst->x_max;
 449 | 	dst->y_max = src->y_max > dst->y_max ? src->y_max : dst->y_max;
 450 | 
 451 | 	// insert src elements
 452 | 	so_texel_set_add(&dst->texels, src->texels.texels, src->texels.count, src->texels.capacity);
 453 | 	so_stitching_points_append(&dst->stitchingPoints, &src->stitchingPoints);
 454 | }
 455 | 
 456 | static void so_seams_add_seam(so_seam_t **seams, so_vec2 a0, so_vec2 a1, so_vec2 b0, so_vec2 b1, float *data, int w, int h, int c)
 457 | {
 458 | 	so_vec2 s = so_v2i(w, h);
 459 | 	a0 = so_mul2(a0, s);
 460 | 	a1 = so_mul2(a1, s);
 461 | 	b0 = so_mul2(b0, s);
 462 | 	b1 = so_mul2(b1, s);
 463 | 	so_vec2 ad = so_sub2(a1, a0);
 464 | 	so_vec2 bd = so_sub2(b1, b0);
 465 | 	float l = so_length2(ad);
 466 | 	int iterations = (int)(l * 5.0f); // TODO: is this the best value?
 467 | 	float step = 1.0f / iterations;
 468 | 
 469 | 	so_seam_t currentSeam = {0};
 470 | 	currentSeam.x_min = w; currentSeam.y_min = h;
 471 | 	currentSeam.x_max = 0; currentSeam.y_max = 0;
 472 | 
 473 | 	so_seam_alloc(&currentSeam, iterations + 1);
 474 | 
 475 | 	for (int i = 0; i <= iterations; i++)
 476 | 	{
 477 | 		float t = i * step;
 478 | 		so_vec2 a = so_add2(a0, so_scale2(ad, t));
 479 | 		so_vec2 b = so_add2(b0, so_scale2(bd, t));
 480 | 		int16_t ax = (int16_t)roundf(a.x), ay = (int16_t)roundf(a.y);
 481 | 		int16_t bx = (int16_t)roundf(b.x), by = (int16_t)roundf(b.y);
 482 | 		float au = a.x - ax, av = a.y - ay, nau = 1.0f - au, nav = 1.0f - av;
 483 | 		float bu = b.x - bx, bv = b.y - by, nbu = 1.0f - bu, nbv = 1.0f - bv;
 484 | 
 485 | 		so_texel_t ta0 = { ax                      , ay                       };
 486 | 		so_texel_t ta1 = { so_min16i(ax + 1, w - 1), ay                       };
 487 | 		so_texel_t ta2 = { ax                      , so_min16i(ay + 1, h - 1) };
 488 | 		so_texel_t ta3 = { so_min16i(ax + 1, w - 1), so_min16i(ay + 1, h - 1) };
 489 | 
 490 | 		so_texel_t tb0 = { bx                      , by                       };
 491 | 		so_texel_t tb1 = { so_min16i(bx + 1, w - 1), by                       };
 492 | 		so_texel_t tb2 = { bx                      , so_min16i(by + 1, h - 1) };
 493 | 		so_texel_t tb3 = { so_min16i(bx + 1, w - 1), so_min16i(by + 1, h - 1) };
 494 | 
 495 | 		so_fill_with_closest(ta0.x, ta0.y, data, w, h, c);
 496 | 		so_fill_with_closest(ta1.x, ta1.y, data, w, h, c);
 497 | 		so_fill_with_closest(ta2.x, ta2.y, data, w, h, c);
 498 | 		so_fill_with_closest(ta3.x, ta3.y, data, w, h, c);
 499 | 
 500 | 		so_fill_with_closest(tb0.x, tb0.y, data, w, h, c);
 501 | 		so_fill_with_closest(tb1.x, tb1.y, data, w, h, c);
 502 | 		so_fill_with_closest(tb2.x, tb2.y, data, w, h, c);
 503 | 		so_fill_with_closest(tb3.x, tb3.y, data, w, h, c);
 504 | 
 505 | 		so_stitching_point_t sp;
 506 | 		sp.sides[0].texels[0] = ta0;
 507 | 		sp.sides[0].texels[1] = ta1;
 508 | 		sp.sides[0].texels[2] = ta2;
 509 | 		sp.sides[0].texels[3] = ta3;
 510 | 		
 511 | 		sp.sides[0].weights[0] = nau * nav;
 512 | 		sp.sides[0].weights[1] = au * nav;
 513 | 		sp.sides[0].weights[2] = nau * av;
 514 | 		sp.sides[0].weights[3] = au * av;
 515 | 
 516 | 		sp.sides[1].texels[0] = tb0;
 517 | 		sp.sides[1].texels[1] = tb1;
 518 | 		sp.sides[1].texels[2] = tb2;
 519 | 		sp.sides[1].texels[3] = tb3;
 520 | 
 521 | 		sp.sides[1].weights[0] = nbu * nbv;
 522 | 		sp.sides[1].weights[1] = bu * nbv;
 523 | 		sp.sides[1].weights[2] = nbu * bv;
 524 | 		sp.sides[1].weights[3] = bu * bv;
 525 | 
 526 | 		so_seam_add(&currentSeam, &sp);
 527 | 	}
 528 | 
 529 | 	so_seam_t *dstSeam = 0;
 530 | 	for (so_seam_t **seam = seams; *seam; seam = &(*seam)->next)
 531 | 	{
 532 | 		retry:
 533 | 		if (so_seams_intersect(&currentSeam, *seam))
 534 | 		{
 535 | 			if (!dstSeam) // found a seam that the edge is connected to -> add current edge to that seam
 536 | 			{
 537 | 				so_seams_in_place_merge(*seam, &currentSeam);
 538 | 				dstSeam = *seam;
 539 | 			}
 540 | 			else // found another seam that the edge is connected to -> merge those seams
 541 | 			{
 542 | 				so_seams_in_place_merge(dstSeam, *seam);
 543 | 
 544 | 				// remove current seam from seams
 545 | 				so_seam_t *toDelete = *seam;
 546 | 				*seam = (*seam)->next;
 547 | 				so_seam_free(toDelete);
 548 | 				so_free(toDelete);
 549 | 				if (*seam)
 550 | 					goto retry; // don't move to next since we already did that by deleting the current seam
 551 | 				else
 552 | 					break;
 553 | 			}
 554 | 		}
 555 | 	}
 556 | 	if (!dstSeam) // did not find a seam that the edge is connected to -> make a new one
 557 | 	{
 558 | 		currentSeam.next = *seams;
 559 | 		*seams = so_alloc(so_seam_t, 1);
 560 | 		**seams = currentSeam;
 561 | 	}
 562 | 	else
 563 | 		so_seam_free(&currentSeam);
 564 | }
 565 | 
 566 | void so_seams_free(so_seam_t *seams)
 567 | {
 568 | 	so_seam_t *seam = seams;
 569 | 	while (seam)
 570 | 	{
 571 | 		so_seam_t *next = seam->next;
 572 | 		so_seam_free(seam);
 573 | 		so_free(seam);
 574 | 		seam = next;
 575 | 	}
 576 | 
 577 | #ifdef SO_CHECK_FOR_MEMORY_LEAKS
 578 | 	assert(so_allocated == 0);
 579 | 	printf("Allocated max %d MB. Not freed: %d bytes.\n", so_allocated_max / (1024 * 1024), so_allocated);
 580 | 	printf("These results are only correct if the lib was used single-threaded.\n");
 581 | 	so_allocated_max = 0;
 582 | #endif
 583 | }
 584 | 
 585 | static int so_should_optimize(so_vec3 *tria, so_vec3 *trib, float cosThreshold)
 586 | {
 587 | 	so_vec3 n0 = so_normalize3(so_cross3(so_sub3(tria[1], tria[0]), so_sub3(tria[2], tria[0])));
 588 | 	so_vec3 n1 = so_normalize3(so_cross3(so_sub3(trib[1], trib[0]), so_sub3(trib[2], trib[0])));
 589 | 	return so_absf(so_dot3(n0, n1)) > cosThreshold;
 590 | }
 591 | 
 592 | so_seam_t *so_seams_find(float *positions, float *texcoords, int vertices, float cosNormalThreshold, float *data, int w, int h, int c)
 593 | {
 594 | 	so_vec3 *pos = (so_vec3*)positions;
 595 | 	so_vec2 *uv = (so_vec2*)texcoords;
 596 | 
 597 | 	so_vec3 bbmin = so_v3(FLT_MAX, FLT_MAX, FLT_MAX);
 598 | 	so_vec3 bbmax = so_v3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
 599 | 	int *hashmap = so_alloc(int, vertices * 2);
 600 | 	for (int i = 0; i < vertices; i++)
 601 | 	{
 602 | 		bbmin = so_min3(bbmin, pos[i]);
 603 | 		bbmax = so_max3(bbmax, pos[i]);
 604 | 		hashmap[i * 2 + 0] = -1;
 605 | 		hashmap[i * 2 + 1] = -1;
 606 | 	}
 607 | 
 608 | 	so_vec3 bbscale = so_v3(15.9f / bbmax.x, 15.9f / bbmax.y, 15.9f / bbmax.z);
 609 | 
 610 | 	so_seam_t *seams = 0;
 611 | 
 612 | 	for (int i0 = 0; i0 < vertices; i0++)
 613 | 	{
 614 | 		int tri = i0 - (i0 % 3);
 615 | 		int i1 = tri + ((i0 + 1) % 3);
 616 | 		int i2 = tri + ((i0 + 2) % 3);
 617 | 		so_vec3 p = so_mul3(so_sub3(pos[i0], bbmin), bbscale);
 618 | 		int hash = (281 * (int)p.x + 569 * (int)p.y + 1447 * (int)p.z) % (vertices * 2);
 619 | 		while (hashmap[hash] >= 0)
 620 | 		{
 621 | 			int oi0 = hashmap[hash];
 622 | #define SO_EQUAL(a, b) so_length3sq(so_sub3(pos[a], pos[b])) < 0.0000001f
 623 | 			if (SO_EQUAL(oi0, i0))
 624 | 			{
 625 | 				int otri = oi0 - (oi0 % 3);
 626 | 				int oi1 = otri + ((oi0 + 1) % 3);
 627 | 				int oi2 = otri + ((oi0 + 2) % 3);
 628 | 				if (SO_EQUAL(oi1, i1) && so_should_optimize(pos + tri, pos + otri, cosNormalThreshold))
 629 | 					so_seams_add_seam(&seams, uv[i0], uv[i1], uv[oi0], uv[oi1], data, w, h, c);
 630 | 				//else if (SO_EQUAL(oi1, i2) && so_should_optimize(pos + tri, pos + otri, cosNormalThreshold)) // this will already be detected by the other side of the seam!
 631 | 				//	so_seams_add_seam(&seams, uv[i0], uv[i2], uv[oi0], uv[oi1], data, w, h, c);
 632 | 				else if (SO_EQUAL(oi2, i1) && so_should_optimize(pos + tri, pos + otri, cosNormalThreshold))
 633 | 					so_seams_add_seam(&seams, uv[i0], uv[i1], uv[oi0], uv[oi2], data, w, h, c);
 634 | 				//break;
 635 | 			}
 636 | 			if (++hash == vertices * 2)
 637 | 				hash = 0;
 638 | 		}
 639 | 		hashmap[hash] = i0;
 640 | 	}
 641 | 
 642 | 	so_free(hashmap);
 643 | 	return seams;
 644 | }
 645 | 
 646 | static int so_texel_binary_search(so_texel_t *texels, int n, so_texel_t toFind)
 647 | {
 648 | 	int n_half = n / 2;
 649 | 	so_texel_t *center = texels + n_half;
 650 | 	if (toFind.y == center->y && toFind.x == center->x)
 651 | 		return n_half;
 652 | 	if (n <= 1)
 653 | 		return -1;
 654 | 	if (toFind.y < center->y || (toFind.y == center->y && toFind.x < center->x))
 655 | 		return so_texel_binary_search(texels, n_half, toFind);
 656 | 	else
 657 | 	{
 658 | 		int result = so_texel_binary_search(center + 1, n - n_half - 1, toFind);
 659 | 		return result == -1 ? -1 : n_half + 1 + result;
 660 | 	}
 661 | }
 662 | 
 663 | typedef struct
 664 | {
 665 | 	int index;
 666 | 	float value;
 667 | } so_sparse_entry_t;
 668 | 
 669 | static int so_sparse_entry_cmp(const void *a, const void *b)
 670 | {
 671 | 	so_sparse_entry_t *ae = (so_sparse_entry_t*)a;
 672 | 	so_sparse_entry_t *be = (so_sparse_entry_t*)b;
 673 | 	return ae->index - be->index;
 674 | }
 675 | 
 676 | typedef struct
 677 | {
 678 | 	so_sparse_entry_t *entries;
 679 | 	int count;
 680 | 	int capacity;
 681 | } so_sparse_entries_t;
 682 | 
 683 | static void so_sparse_matrix_alloc(so_sparse_entries_t *matrix, int capacity)
 684 | {
 685 | 	matrix->entries = so_alloc(so_sparse_entry_t, capacity);
 686 | 	matrix->capacity = capacity;
 687 | 	matrix->count = 0;
 688 | }
 689 | 
 690 | static void so_sparse_matrix_free(so_sparse_entries_t *matrix)
 691 | {
 692 | 	so_free(matrix->entries);
 693 | 	*matrix = { 0 };
 694 | }
 695 | 
 696 | static void so_sparse_matrix_add(so_sparse_entries_t *matrix, int index, float value)
 697 | {
 698 | 	if (matrix->count == matrix->capacity)
 699 | 	{
 700 | 		int newCapacity = matrix->capacity * 2;
 701 | 		if (newCapacity < 64)
 702 | 			newCapacity = 64;
 703 | 		so_sparse_entry_t *newEntries = so_alloc(so_sparse_entry_t, newCapacity);
 704 | 		for (int i = 0; i < matrix->count; i++)
 705 | 			newEntries[i] = matrix->entries[i];
 706 | 		so_free(matrix->entries);
 707 | 		matrix->entries = newEntries;
 708 | 		matrix->capacity = newCapacity;
 709 | 	}
 710 | 
 711 | 	int entryIndex = matrix->count++;
 712 | 	matrix->entries[entryIndex].index = index;
 713 | 	matrix->entries[entryIndex].value = value;
 714 | }
 715 | 
 716 | static void so_sparse_matrix_add(so_sparse_entries_t *matrix, so_sparse_entry_t *entry)
 717 | {
 718 | 	so_sparse_matrix_add(matrix, entry->index, entry->value);
 719 | }
 720 | 
 721 | static void so_sparse_matrix_sort(so_sparse_entries_t *matrix)
 722 | {
 723 | 	qsort(matrix->entries, matrix->count, sizeof(so_sparse_entry_t), so_sparse_entry_cmp);
 724 | }
 725 | 
 726 | static so_bool so_sparse_matrix_advance_to_index(so_sparse_entries_t *matrix, int *position, int index, float *outValue)
 727 | {
 728 | 	int localPosition = *position;
 729 | 	while (localPosition < matrix->count && matrix->entries[localPosition].index < index)
 730 | 		++localPosition;
 731 | 	*position = localPosition;
 732 | 
 733 | 	if (localPosition < matrix->count && matrix->entries[localPosition].index == index)
 734 | 	{
 735 | 		*outValue = matrix->entries[localPosition].value;
 736 | 		return SO_TRUE;
 737 | 	}
 738 | 
 739 | 	return SO_FALSE;
 740 | }
 741 | 
 742 | static inline uint32_t so_sparse_entry_hash(int entryIndex, uint32_t capacity)
 743 | {
 744 | 	return (entryIndex * 104173) % capacity;
 745 | }
 746 | 
 747 | static void so_sparse_entry_set_alloc(so_sparse_entries_t *set, int capacity)
 748 | {
 749 | 	set->entries = so_alloc(so_sparse_entry_t, capacity);
 750 | 	for (int i = 0; i < capacity; i++)
 751 | 		set->entries[i].index = -1;
 752 | 	set->capacity = capacity;
 753 | 	set->count = 0;
 754 | }
 755 | 
 756 | static so_sparse_entry_t *so_sparse_entry_set_get_or_add(so_sparse_entries_t *set, int index)
 757 | {
 758 | 	if (set->count + 1 > set->capacity * 3 / 4) // leave some free space to avoid having many collisions
 759 | 	{
 760 | 		int newCapacity = set->capacity >= 64 ? set->capacity * 2 : 64;
 761 | 		so_sparse_entry_t *newEntries = so_alloc(so_sparse_entry_t, newCapacity);
 762 | 		for (int i = 0; i < newCapacity; i++)
 763 | 			newEntries[i].index = -1;
 764 | 
 765 | 		for (int i = 0; i < set->capacity; i++) // rehash all old entries
 766 | 		{
 767 | 			if (set->entries[i].index != -1)
 768 | 			{
 769 | 				uint32_t hash = so_sparse_entry_hash(set->entries[i].index, newCapacity);
 770 | 				while (newEntries[hash].index != -1) // collisions
 771 | 					hash = (hash + 1) % newCapacity;
 772 | 				newEntries[hash] = set->entries[i];
 773 | 			}
 774 | 		}
 775 | 		so_free(set->entries);
 776 | 		set->entries = newEntries;
 777 | 		set->capacity = newCapacity;
 778 | 	}
 779 | 
 780 | 	uint32_t hash = so_sparse_entry_hash(index, set->capacity);
 781 | 	while (set->entries[hash].index != -1) // collisions
 782 | 	{
 783 | 		if (set->entries[hash].index == index)
 784 | 			return &set->entries[hash]; // entry is already in the set
 785 | 		hash = (hash + 1) % set->capacity;
 786 | 	}
 787 | 
 788 | 	if (set->entries[hash].index == -1) // make new entry
 789 | 	{
 790 | 		set->entries[hash].index = index;
 791 | 		set->entries[hash].value = 0.0f;
 792 | 		set->count++;
 793 | 		return &set->entries[hash];
 794 | 	}
 795 | 
 796 | 	return 0; // shouldn't happen
 797 | }
 798 | 
 799 | static so_sparse_entries_t so_matrix_At_times_A(const float *A, const int *sparseIndices, int maxRowIndices, int m, int n)
 800 | {
 801 | 	so_sparse_entries_t AtA;
 802 | 	so_sparse_entry_set_alloc(&AtA, (n / 16) * (n / 16));
 803 | 
 804 | 	// compute lower left triangle only since the result is symmetric
 805 | 	for (int k = 0; k < m; k++)
 806 | 	{
 807 | 		const float *srcPtr = A + k * maxRowIndices;
 808 | 		const int *indexPtr = sparseIndices + k * maxRowIndices;
 809 | 		for (int i = 0; i < maxRowIndices; i++)
 810 | 		{
 811 | 			int index_i = indexPtr[i];
 812 | 			if (index_i < 0) break;
 813 | 			float v = srcPtr[i];
 814 | 			//float *dstPtr = AtA + index_i * n;
 815 | 			for (int j = 0; j < maxRowIndices; j++)
 816 | 			{
 817 | 				int index_j = indexPtr[j];
 818 | 				if (index_j < 0) break;
 819 | 				//dstPtr[index_j] += v * srcPtr[j];
 820 | 				int index = index_i * n + index_j;
 821 | 
 822 | 				so_sparse_entry_t *entry = so_sparse_entry_set_get_or_add(&AtA, index);
 823 | 				entry->value += v * srcPtr[j];
 824 | 			}
 825 | 		}
 826 | 	}
 827 | 
 828 | 	// compaction step (make a compact array from the scattered hash set values)
 829 | 	for (int i = 0, j = 0; i < AtA.capacity; i++)
 830 | 		if (AtA.entries[i].index != -1)
 831 | 			AtA.entries[j++] = AtA.entries[i];
 832 | 
 833 | 	// sort by index -> this is a sparse matrix now
 834 | 	so_sparse_matrix_sort(&AtA);
 835 | 
 836 | 	return AtA;
 837 | }
 838 | 
 839 | static void so_matrix_At_times_b(const float *A, int m, int n, const float *b, float *Atb, const int *sparseIndices, int maxRowIndices)
 840 | {
 841 | 	memset(Atb, 0, sizeof(float) * n);
 842 | 	for (int j = 0; j < m; j++)
 843 | 	{
 844 | 		const int *rowIndices = sparseIndices + j * maxRowIndices;
 845 | 		for (int i = 0; i < maxRowIndices; i++)
 846 | 		{
 847 | 			int index = rowIndices[i];
 848 | 			if (index < 0) break;
 849 | 			Atb[index] += A[j * maxRowIndices + i] * b[j];
 850 | 		}
 851 | 	}
 852 | }
 853 | 
 854 | static so_sparse_entries_t so_matrix_cholesky_prepare(so_sparse_entries_t *AtA, int n)
 855 | {
 856 | 	// dense
 857 | 	//for (int i = 0; i < n; i++)
 858 | 	//{
 859 | 	//	float *a = L + i * n;
 860 | 	//	for (int j = 0; j <= i; j++)
 861 | 	//	{
 862 | 	//		float *b = L + j * n;
 863 | 	//		float sum = A[i * n + j];// + (i == j ? 0.0001 : 0.0); // some regularization
 864 | 	//		for (int k = 0; k < j; k++)
 865 | 	//			sum -= a[k] * b[k];
 866 | 	//		if (i > j)
 867 | 	//			a[j] = sum / b[j];
 868 | 	//		else // i == j
 869 | 	//		{
 870 | 	//			if (sum <= 0.0)
 871 | 	//				return SO_FALSE;
 872 | 	//			a[i] = sqrtf(sum);
 873 | 	//		}
 874 | 	//	}
 875 | 	//}
 876 | 	
 877 | 	// sparse
 878 | 	int *indices_i;
 879 | 	float *row_i;
 880 | 	float *invDiag;
 881 | 
 882 | 	if (n > 4096)
 883 | 	{
 884 | 		indices_i = so_alloc(int, n);
 885 | 		row_i = so_alloc(float, n);
 886 | 		invDiag = so_alloc(float, n);
 887 | 	}
 888 | 	else
 889 | 	{
 890 | 		indices_i = (int*)alloca(sizeof(int) * n);
 891 | 		row_i = (float*)alloca(sizeof(float) * n);
 892 | 		invDiag = (float*)alloca(sizeof(float) * n);
 893 | 	}
 894 | 
 895 | 	so_sparse_entries_t L;
 896 | 	so_sparse_matrix_alloc(&L, (n / 16) * (n / 16));
 897 | 
 898 | 	int AtAindex = 0;
 899 | 	for (int i = 0; i < n; i++)
 900 | 	{
 901 | 		int index_i_count = 0;
 902 | 
 903 | 		int row_j_index = 0;
 904 | 		for (int j = 0; j <= i; j++)
 905 | 		{
 906 | 			//float sum = A[i * n + j]; // + (i == j ? 0.0001 : 0.0); // regularization
 907 | 			int index = i * n + j;
 908 | 			float sum = 0.0f;
 909 | 			so_sparse_matrix_advance_to_index(AtA, &AtAindex, index, &sum);
 910 | 
 911 | 			for (int k = 0; k < index_i_count; k++)
 912 | 			{
 913 | 				int index_i = indices_i[k];
 914 | 				float Lvalue;
 915 | 				if (so_sparse_matrix_advance_to_index(&L, &row_j_index, j * n + index_i, &Lvalue))
 916 | 					sum -= row_i[index_i] * Lvalue;
 917 | 			}
 918 | 
 919 | 			if (i == j)
 920 | 			{
 921 | 				if (sum <= 0.0f)
 922 | 				{
 923 | 					so_sparse_matrix_free(&L);
 924 | 					return L;
 925 | 				}
 926 | 				invDiag[i] = so_rsqrtf(sum);
 927 | 			}
 928 | 
 929 | 			if (SO_NOT_ZERO(sum))
 930 | 			{
 931 | 				row_i[j] = sum * invDiag[j];
 932 | 				indices_i[index_i_count++] = j;
 933 | 				so_sparse_matrix_add(&L, index, row_i[j]);
 934 | 			}
 935 | 			else
 936 | 				row_i[j] = 0.0f;
 937 | 		}
 938 | 	}
 939 | 
 940 | 	if (n > 4096)
 941 | 	{
 942 | 		so_free(indices_i);
 943 | 		so_free(row_i);
 944 | 		so_free(invDiag);
 945 | 	}
 946 | 
 947 | 	return L;
 948 | }
 949 | 
 950 | static void so_matrix_cholesky_solve(so_sparse_entries_t *Lrows, so_sparse_entries_t *Lcols, float *x, const float *b, int n)
 951 | {
 952 | 	float *y = (float*)alloca(sizeof(float) * n);
 953 | 
 954 | 	// L * y = b
 955 | 	int Lindex = 0;
 956 | 	for (int i = 0; i < n; i++)
 957 | 	{
 958 | 		float sum = b[i];
 959 | 		while (Lindex < Lrows->count && Lrows->entries[Lindex].index < i * (n + 1))
 960 | 		{
 961 | 			sum -= Lrows->entries[Lindex].value * y[Lrows->entries[Lindex].index - i * n];
 962 | 			++Lindex;
 963 | 		}
 964 | 		assert(Lrows->entries[Lindex].index == i * (n + 1));
 965 | 		y[i] = sum / Lrows->entries[Lindex].value;
 966 | 		++Lindex;
 967 | 	}
 968 | 
 969 | 	// L' * x = y
 970 | 	Lindex = Lcols->count - 1;
 971 | 	for (int i = n - 1; i >= 0; i--)
 972 | 	{
 973 | 		float sum = y[i];
 974 | 		while (Lindex >= 0 && Lcols->entries[Lindex].index > i * (n + 1))
 975 | 		{
 976 | 			sum -= Lcols->entries[Lindex].value * x[Lcols->entries[Lindex].index - i * n];
 977 | 			--Lindex;
 978 | 		}
 979 | 		assert(Lcols->entries[Lindex].index == i * (n + 1));
 980 | 		x[i] = sum / Lcols->entries[Lindex].value;
 981 | 		--Lindex;
 982 | 	}
 983 | }
 984 | 
 985 | so_bool so_seam_optimize(so_seam_t *seam, float *data, int w, int h, int c, float lambda)
 986 | {
 987 | 	so_texel_set_t *texels = &seam->texels;
 988 | 	so_stitching_points_t *stitchingPoints = &seam->stitchingPoints;
 989 | 
 990 | 	size_t m = stitchingPoints->count;
 991 | 	size_t n = texels->count;
 992 | 
 993 | 	void *memoryBlock = so_alloc_void(
 994 | 		sizeof(so_texel_t) * n +
 995 | 		sizeof(float) * (m + n) * 8 +
 996 | 		sizeof(int) * (m + n) * 8 +
 997 | 		sizeof(float) * (m + n) +
 998 | 		sizeof(float) * n +
 999 | 		sizeof(float) * n);
1000 | 
1001 | 	uint8_t *memoryStart = (uint8_t*)memoryBlock;
1002 | 
1003 | 	so_texel_t *texelsFlat = (so_texel_t*)memoryStart;
1004 | 	memoryStart += sizeof(so_texel_t) * n;
1005 | 
1006 | 	float *A = (float*)memoryStart;
1007 | 	memoryStart += sizeof(float) * (m + n) * 8;
1008 | 
1009 | 	int *AsparseIndices = (int*)memoryStart;
1010 | 	memoryStart += sizeof(int) * (m + n) * 8;
1011 | 
1012 | 	float *b = (float*)memoryStart;
1013 | 	memoryStart += sizeof(float) * (m + n);
1014 | 
1015 | 	float *Atb = (float*)memoryStart;
1016 | 	memoryStart += sizeof(float) * n;
1017 | 
1018 | 	float *x = (float*)memoryStart;
1019 | 	memoryStart += sizeof(float) * n;
1020 | 
1021 | 	for (int i = 0, j = 0; i < texels->capacity && j < n; i++)
1022 | 		if (texels->texels[i].x != -1)
1023 | 			texelsFlat[j++] = texels->texels[i];
1024 | 
1025 | 	qsort(texelsFlat, n, sizeof(so_texel_t), so_texel_cmp);
1026 | 
1027 | 	size_t r = 0;
1028 | 	for (int i = 0; i < m; i++)
1029 | 	{
1030 | 		ptrdiff_t column0[4];
1031 | 		ptrdiff_t column1[4];
1032 | 		so_bool side0valid = SO_FALSE, side1valid = SO_FALSE;
1033 | 		for (int k = 0; k < 4; k++)
1034 | 		{
1035 | 			so_texel_t t0 = stitchingPoints->points[i].sides[0].texels[k];
1036 | 			so_texel_t t1 = stitchingPoints->points[i].sides[1].texels[k];
1037 | 			column0[k] = so_texel_binary_search(texelsFlat, n, t0);
1038 | 			column1[k] = so_texel_binary_search(texelsFlat, n, t1);
1039 | 
1040 | 			if (column0[k] == -1) { side0valid = SO_FALSE; break; }
1041 | 			if (column1[k] == -1) { side1valid = SO_FALSE; break; }
1042 | 
1043 | 			// test for validity of stitching point
1044 | 			for (int ci = 0; ci < c; ci++)
1045 | 			{
1046 | 				side0valid |= data[(t0.y * w + t0.x) * c + ci] > 0.0f;
1047 | 				side1valid |= data[(t1.y * w + t1.x) * c + ci] > 0.0f;
1048 | 			}
1049 | 		}
1050 | 
1051 | 		if (side0valid && side1valid)
1052 | 		{
1053 | 			for (int k = 0; k < 4; k++)
1054 | 			{
1055 | 				A[r * 8 + k * 2 + 0] = stitchingPoints->points[i].sides[0].weights[k];
1056 | 				AsparseIndices[r * 8 + k * 2 + 0] = column0[k];
1057 | 				A[r * 8 + k * 2 + 1] = -stitchingPoints->points[i].sides[1].weights[k];
1058 | 				AsparseIndices[r * 8 + k * 2 + 1] = column1[k];
1059 | 			}
1060 | 			r++;
1061 | 		}
1062 | 	}
1063 | 
1064 | 	m = r;
1065 | 
1066 | 	// add error terms for deviation from original pixel value (scaled by lambda)
1067 | 	for (int i = 0; i < n; i++)
1068 | 	{
1069 | 		A[(m + i) * 8] = lambda;
1070 | 		AsparseIndices[(m + i) * 8 + 0] = i;
1071 | 		AsparseIndices[(m + i) * 8 + 1] = -1;
1072 | 	}
1073 | 
1074 | 	so_sparse_entries_t AtA = so_matrix_At_times_A(A, AsparseIndices, 8, m + n, n);
1075 | 	so_sparse_entries_t L = so_matrix_cholesky_prepare(&AtA, n);
1076 | 	so_sparse_matrix_free(&AtA);
1077 | 
1078 | 	if (!L.count)
1079 | 	{
1080 | 		so_free(memoryBlock);
1081 | 		return SO_FALSE; // Cholesky decomposition failed
1082 | 	}
1083 | 
1084 | 	so_sparse_entries_t Lcols;
1085 | 	so_sparse_matrix_alloc(&Lcols, L.count);
1086 | 	for (int i = 0; i < L.count; i++)
1087 | 		so_sparse_matrix_add(&Lcols, (L.entries[i].index % n) * n + (L.entries[i].index / n), L.entries[i].value);
1088 | 	so_sparse_matrix_sort(&Lcols);
1089 | 
1090 | 	// solve each color channel independently
1091 |   	for (int ci = 0; ci < c; ci++)
1092 | 	{
1093 | 		for (int i = 0; i < n; i++)
1094 | 			b[m + i] = lambda * data[(texelsFlat[i].y * w + texelsFlat[i].x) * c + ci];
1095 | 
1096 | 		so_matrix_At_times_b(A, m + n, n, b, Atb, AsparseIndices, 8);
1097 | 		so_matrix_cholesky_solve(&L, &Lcols, x, Atb, n);
1098 | 
1099 | 		// write out results
1100 | 		for (int i = 0; i < n; i++)
1101 | 			data[(texelsFlat[i].y * w + texelsFlat[i].x) * c + ci] = x[i];
1102 | 	}
1103 | 
1104 | 	so_free(memoryBlock);
1105 | 	so_sparse_matrix_free(&L);
1106 | 	so_sparse_matrix_free(&Lcols);
1107 | 
1108 | 	return SO_TRUE;
1109 | }
1110 | 
1111 | #endif // SEAMOPTIMIZER_IMPLEMENTATION
1112 | 


--------------------------------------------------------------------------------