├── README.md ├── example_images ├── stairs_triangles_not_optimized.png └── stairs_triangles_optimized.png └── seamoptimizer.h /README.md: -------------------------------------------------------------------------------- 1 | # seamoptimizer 2 | A C/C++ single-file library that minimizes the hard transition errors of disjoint edges in lightmaps. 3 | It is based on a idea presented by Michał Iwanicki in the talk [Lighting Technology of "The Last Of Us"](http://miciwan.com/SIGGRAPH2013/Lighting%20Technology%20of%20The%20Last%20Of%20Us.pdf). 4 | A least squares solver is used to find a minimal error solution to the problem of sampling along the edges between triangles that are mapped with disjoint lightmap regions. 5 | This can improve the visual appearance at these discontinuities or "seams". 6 | 7 | 8 | To paste the implementation into your project, insert the following lines: 9 | ``` 10 | #define SEAMOPTIMIZER_IMPLEMENTATION 11 | #include "seamoptimizer.h" 12 | ``` 13 | 14 | Before optimizing a very bad UV mapping (each triangle edge is a seam): 15 | ![Sean Optimizer Before](https://github.com/ands/seamoptimizer/raw/master/example_images/stairs_triangles_not_optimized.png) 16 | After optimizing the seams of the bad UV mapping: 17 | ![Sean Optimizer After](https://github.com/ands/seamoptimizer/raw/master/example_images/stairs_triangles_optimized.png) 18 | The seams are not all completely gone, but, especially on the walls, there is a very noticeable improvement. 19 | 20 | # Example Usage 21 | The following example finds and optimizes all the seams for some mesh geometry on a lightmap. 22 | ``` 23 | // only optimize seams between triangles that are on the same plane 24 | // (where dot(A.normal, B.normal) > cosNormalThreshold): 25 | const float cosNormalThreshold = 0.99f; 26 | 27 | // how "important" the original color values are: 28 | const float lambda = 0.1f; 29 | 30 | 31 | printf("Searching for separate seams...\n"); 32 | so_seam_t *seams = so_seams_find( 33 | (float*)mesh->positions, (float*)mesh->texcoords, mesh->vertexCount, 34 | cosNormalThreshold, 35 | lightmap->data, lightmap->width, lightmap->height, lightmap->channelCount); 36 | 37 | 38 | printf("Optimizing seams...\n"); 39 | for (so_seam_t *seam = seams; seam; seam = so_seam_next(seam)) 40 | { 41 | // NOTE: seams can also be optimized in parallel on separate threads! 42 | if (!so_seam_optimize(seam, lightmap->data, lightmap->width, lightmap->height, lightmap->channelCount, lambda)) 43 | printf("Could not optimize a seam (Cholesky decomposition failed).\n"); 44 | } 45 | 46 | printf("Done!\n"); 47 | so_seams_free(seams); 48 | ``` 49 | 50 | # Thanks 51 | - To Michał Iwanicki for helping me with the transformation of the problem into a problem that can be solved by a least-squares solver 52 | - To Dominik Lazarek for pointing me to Michał's presentation 53 | -------------------------------------------------------------------------------- /example_images/stairs_triangles_not_optimized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ands/seamoptimizer/19b835c6e52d2100a8e6b58e19fe8da88d271368/example_images/stairs_triangles_not_optimized.png -------------------------------------------------------------------------------- /example_images/stairs_triangles_optimized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ands/seamoptimizer/19b835c6e52d2100a8e6b58e19fe8da88d271368/example_images/stairs_triangles_optimized.png -------------------------------------------------------------------------------- /seamoptimizer.h: -------------------------------------------------------------------------------- 1 | /*********************************************************** 2 | * A single header file lightmap seam optimization library * 3 | * https://github.com/ands/seamoptimizer * 4 | * no warranty implied | use at your own risk * 5 | * author: Andreas Mantler (ands) | last change: 05.03.2017 * 6 | * * 7 | * License: * 8 | * This software is in the public domain. * 9 | * Where that dedication is not recognized, * 10 | * you are granted a perpetual, irrevocable license to copy * 11 | * and modify this file however you want. * 12 | ***********************************************************/ 13 | 14 | #ifndef SEAMOPTIMIZER_H 15 | #define SEAMOPTIMIZER_H 16 | 17 | #ifndef SO_CALLOC 18 | #include // calloc, free, alloca 19 | #define SO_CALLOC(count, size) calloc(count, size) 20 | #define SO_FREE(ptr) free(ptr) 21 | #endif 22 | 23 | typedef int so_bool; 24 | #define SO_FALSE 0 25 | #define SO_TRUE 1 26 | 27 | typedef struct so_seam_t so_seam_t; 28 | 29 | // API 30 | 31 | // so_seams_find: 32 | // Find all seams according to the specified triangulated geometry and its texture coordinates. 33 | // This searches for edges that are shared by triangles, but are disjoint in UV space. 34 | 35 | // positions: triangle array 3d positions ((x0, y0, z0), (x1, y1, z1), (x2, y2, z2)), ((x0, y0, z0), (x1, y1, z1), (x2, y2, z2)), ... 36 | // texcoords: triangle array 2d uv coords ( (u0, v0), (u1, v1), (u2, v2)), ( (u0, v0), (u1, v1), (u2, v2)), ... 37 | // vertices: total number of vertices ( = triangles * 3) 38 | 39 | // cosNormalThreshold controls at which angles between neighbour triangles a seam should be considered. 40 | // if dot(triangle A normal, triangle B normal) > cosNormalThreshold then the seam is included into the returned set. 41 | 42 | // data, w, h, c specifies the lightmap data (data should be a w * h * c array of floats). 43 | // w = lightmap width, h = lightmap height, c = number of lightmap channels (1..4). 44 | 45 | // returns a linked list of the found seams. 46 | 47 | // Warning: The data may be modified to fill empty (zeroed) edge texels with one of their closest neighbours if they are empty! 48 | so_seam_t *so_seams_find( 49 | float *positions, float *texcoords, int vertices, 50 | float cosNormalThreshold, 51 | float *data, int w, int h, int c); 52 | 53 | 54 | // so_seam_optimize: 55 | // Optimize a single seam. Seams can be optimized in parallel on different threads. 56 | // lambda: Weight that controls the deviation from the original color values (must be > 0). 57 | // Higher values => Less deviation from the original edge colors => more obvious seams. 58 | // Too low values => Optimizer may just choose black as the perfect color for all seam pixels. 59 | // returns whether the optimization was successful. 60 | so_bool so_seam_optimize( 61 | so_seam_t *seam, 62 | float *data, int w, int h, int c, 63 | float lambda); 64 | 65 | // so_seam_next: Retrieves the next seam in the linked list. 66 | so_seam_t *so_seam_next( 67 | so_seam_t *seam); 68 | 69 | // so_seams_free: Free the resources for all seams in the list. 70 | void so_seams_free( 71 | so_seam_t *seams); 72 | 73 | #endif 74 | ////////////////////// END OF HEADER ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 75 | #ifdef SEAMOPTIMIZER_IMPLEMENTATION 76 | #undef SEAMOPTIMIZER_IMPLEMENTATION 77 | 78 | #include // qsort 79 | #include // printf (TODO) 80 | #include // memcpy 81 | #include 82 | #include 83 | #include 84 | #include 85 | 86 | #define SO_EPSILON 0.00001f 87 | 88 | #ifdef _DEBUG 89 | #define SO_NOT_ZERO(v) (v > SO_EPSILON || v < -SO_EPSILON) // a lot faster in debug 90 | #else 91 | #define SO_NOT_ZERO(v) (fabsf(v) > SO_EPSILON) // faster in release 92 | #endif 93 | 94 | 95 | #ifdef SO_APPROX_RSQRT 96 | #include "xmmintrin.h" 97 | static inline float so_rsqrtf(float v) 98 | { 99 | return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(v))); 100 | } 101 | #else 102 | static inline float so_rsqrtf(float v) 103 | { 104 | return 1.0f / sqrtf(v); 105 | } 106 | #endif 107 | 108 | static inline int16_t so_min16i (int16_t a, int16_t b) { return a < b ? a : b; } 109 | static inline int16_t so_max16i (int16_t a, int16_t b) { return a > b ? a : b; } 110 | static inline float so_minf (float a, float b) { return a < b ? a : b; } 111 | static inline float so_maxf (float a, float b) { return a > b ? a : b; } 112 | static inline float so_absf (float a ) { return a < 0.0f ? -a : a; } 113 | 114 | typedef struct so_vec2 { float x, y; } so_vec2; 115 | static inline so_vec2 so_v2i (int x, int y) { so_vec2 v = { (float)x, (float)y }; return v; } 116 | static inline so_vec2 so_v2 (float x, float y) { so_vec2 v = { x, y }; return v; } 117 | static inline so_vec2 so_add2 (so_vec2 a, so_vec2 b) { return so_v2(a.x + b.x, a.y + b.y); } 118 | static inline so_vec2 so_sub2 (so_vec2 a, so_vec2 b) { return so_v2(a.x - b.x, a.y - b.y); } 119 | static inline so_vec2 so_mul2 (so_vec2 a, so_vec2 b) { return so_v2(a.x * b.x, a.y * b.y); } 120 | static inline so_vec2 so_scale2 (so_vec2 a, float b) { return so_v2(a.x * b, a.y * b); } 121 | static inline float so_length2sq (so_vec2 a ) { return a.x * a.x + a.y * a.y; } 122 | static inline float so_length2 (so_vec2 a ) { return sqrtf(so_length2sq(a)); } 123 | 124 | typedef struct so_vec3 { float x, y, z; } so_vec3; 125 | static inline so_vec3 so_v3 (float x, float y, float z) { so_vec3 v = { x, y, z }; return v; } 126 | static inline so_vec3 so_sub3 (so_vec3 a, so_vec3 b) { return so_v3(a.x - b.x, a.y - b.y, a.z - b.z); } 127 | static inline so_vec3 so_mul3 (so_vec3 a, so_vec3 b) { return so_v3(a.x * b.x, a.y * b.y, a.z * b.z); } 128 | static inline so_vec3 so_scale3 (so_vec3 a, float b) { return so_v3(a.x * b, a.y * b, a.z * b); } 129 | static inline so_vec3 so_div3 (so_vec3 a, float b) { return so_scale3(a, 1.0f / b); } 130 | static inline so_vec3 so_min3 (so_vec3 a, so_vec3 b) { return so_v3(so_minf(a.x, b.x), so_minf(a.y, b.y), so_minf(a.z, b.z)); } 131 | static inline so_vec3 so_max3 (so_vec3 a, so_vec3 b) { return so_v3(so_maxf(a.x, b.x), so_maxf(a.y, b.y), so_maxf(a.z, b.z)); } 132 | static inline float so_dot3 (so_vec3 a, so_vec3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; } 133 | static inline so_vec3 so_cross3 (so_vec3 a, so_vec3 b) { return so_v3(a.y * b.z - b.y * a.z, a.z * b.x - b.z * a.x, a.x * b.y - b.x * a.y); } 134 | static inline float so_length3sq (so_vec3 a ) { return a.x * a.x + a.y * a.y + a.z * a.z; } 135 | static inline float so_length3 (so_vec3 a ) { return sqrtf(so_length3sq(a)); } 136 | static inline so_vec3 so_normalize3(so_vec3 a ) { return so_div3(a, so_length3(a)); } 137 | 138 | //#define SO_CHECK_FOR_MEMORY_LEAKS // check for memory leaks. don't use this in multithreaded code! 139 | 140 | #ifdef SO_CHECK_FOR_MEMORY_LEAKS 141 | static uint64_t so_allocated = 0; 142 | static uint64_t so_allocated_max = 0; 143 | 144 | static void *so_alloc_void(size_t size) 145 | { 146 | void *memory = SO_CALLOC(1, size + sizeof(size_t)); 147 | (*(size_t*)memory) = size; 148 | so_allocated += size; 149 | if (so_allocated > so_allocated_max) 150 | so_allocated_max = so_allocated; 151 | return (size_t*)memory + 1; 152 | } 153 | static void so_free(void *memory) 154 | { 155 | size_t size = ((size_t*)memory)[-1]; 156 | so_allocated -= size; 157 | SO_FREE(((size_t*)memory) - 1); 158 | } 159 | #else 160 | static void *so_alloc_void(size_t size) 161 | { 162 | return SO_CALLOC(1, size); 163 | } 164 | static void so_free(void *memory) 165 | { 166 | SO_FREE(memory); 167 | } 168 | #endif 169 | 170 | #define so_alloc(type, count) ((type*)so_alloc_void(sizeof(type) * (count))) 171 | 172 | static inline so_bool so_accumulate_texel(float *sums, int x, int y, float *data, int w, int h, int c) 173 | { 174 | so_bool exists = SO_FALSE; 175 | for (int i = 0; i < c; i++) 176 | { 177 | float v = data[(y * w + x) * c + i]; 178 | sums[i] += v; 179 | exists |= v > 0.0f; 180 | } 181 | return exists; 182 | } 183 | 184 | static void so_fill_with_closest(int x, int y, float *data, int w, int h, int c, int depth = 2) 185 | { 186 | assert(c <= 4); 187 | 188 | for (int i = 0; i < c; i++) 189 | if (data[(y * w + x) * c + i] > 0.0f) 190 | return; 191 | 192 | float sums[4] = {}; 193 | int n = 0; 194 | 195 | if (x > 0 && so_accumulate_texel(sums, x - 1, y, data, w, h, c)) n++; 196 | if (x + 1 < w && so_accumulate_texel(sums, x + 1, y, data, w, h, c)) n++; 197 | if (y > 0 && so_accumulate_texel(sums, x, y - 1, data, w, h, c)) n++; 198 | if (y + 1 < h && so_accumulate_texel(sums, x, y + 1, data, w, h, c)) n++; 199 | 200 | if (!n && depth) 201 | { 202 | --depth; 203 | if (x > 0) 204 | { 205 | so_fill_with_closest(x - 1, y, data, w, h, c, depth); 206 | if (so_accumulate_texel(sums, x - 1, y, data, w, h, c)) n++; 207 | } 208 | if (x + 1 < w) 209 | { 210 | so_fill_with_closest(x + 1, y, data, w, h, c, depth); 211 | if (so_accumulate_texel(sums, x + 1, y, data, w, h, c)) n++; 212 | } 213 | if (y > 0) 214 | { 215 | so_fill_with_closest(x, y - 1, data, w, h, c, depth); 216 | if (so_accumulate_texel(sums, x, y - 1, data, w, h, c)) n++; 217 | } 218 | if (y + 1 < h) 219 | { 220 | so_fill_with_closest(x, y + 1, data, w, h, c, depth); 221 | if (so_accumulate_texel(sums, x, y + 1, data, w, h, c)) n++; 222 | } 223 | } 224 | 225 | if (n) 226 | { 227 | float ni = 1.0f / (float)n; 228 | for (int i = 0; i < c; i++) 229 | data[(y * w + x) * c + i] = sums[i] * ni; 230 | } 231 | } 232 | 233 | typedef struct 234 | { 235 | int16_t x, y; 236 | } so_texel_t; 237 | 238 | static inline int so_texel_cmp(const void *l, const void *r) 239 | { 240 | const so_texel_t *lt = (const so_texel_t*)l; 241 | const so_texel_t *rt = (const so_texel_t*)r; 242 | if (lt->y < rt->y) return -1; 243 | if (lt->y > rt->y) return 1; 244 | if (lt->x < rt->x) return -1; 245 | if (lt->x > rt->x) return 1; 246 | return 0; 247 | } 248 | 249 | typedef struct 250 | { 251 | so_texel_t texels[4]; 252 | float weights[4]; 253 | } so_bilinear_sample_t; 254 | 255 | typedef struct 256 | { 257 | so_bilinear_sample_t sides[2]; 258 | } so_stitching_point_t; 259 | 260 | typedef struct 261 | { 262 | so_texel_t *texels; 263 | uint32_t count; 264 | uint32_t capacity; 265 | } so_texel_set_t; 266 | 267 | static inline uint32_t so_texel_hash(so_texel_t texel, uint32_t capacity) 268 | { 269 | return (texel.y * 104173 + texel.x * 86813) % capacity; 270 | } 271 | 272 | static void so_texel_set_add(so_texel_set_t *set, so_texel_t *texels, int entries, int arrayLength = 0) 273 | { 274 | if (set->count + entries > set->capacity * 3 / 4) // leave some free space to avoid having many collisions 275 | { 276 | int newCapacity = set->capacity > 64 ? set->capacity * 2 : 64; 277 | while (set->count + entries > newCapacity * 3 / 4) 278 | newCapacity *= 2; 279 | 280 | so_texel_t *newTexels = so_alloc(so_texel_t, newCapacity); 281 | 282 | for (int i = 0; i < newCapacity; i++) 283 | newTexels[i].x = -1; 284 | 285 | if (set->texels) 286 | { 287 | for (int i = 0; i < set->capacity; i++) // rehash all old texels 288 | { 289 | if (set->texels[i].x != -1) 290 | { 291 | uint32_t hash = so_texel_hash(set->texels[i], newCapacity); 292 | while (newTexels[hash].x != -1) // collisions 293 | hash = (hash + 1) % newCapacity; 294 | newTexels[hash] = set->texels[i]; 295 | } 296 | } 297 | so_free(set->texels); 298 | } 299 | 300 | set->texels = newTexels; 301 | set->capacity = newCapacity; 302 | } 303 | 304 | if (arrayLength == 0) 305 | arrayLength = entries; 306 | 307 | for (int i = 0; i < arrayLength; i++) 308 | { 309 | if (texels[i].x != -1) 310 | { 311 | uint32_t hash = so_texel_hash(texels[i], set->capacity); 312 | while (set->texels[hash].x != -1) // collisions 313 | { 314 | if (set->texels[hash].x == texels[i].x && set->texels[hash].y == texels[i].y) 315 | break; // texel is already in the set 316 | hash = (hash + 1) % set->capacity; 317 | } 318 | 319 | if (set->texels[hash].x == -1) 320 | { 321 | set->texels[hash] = texels[i]; 322 | set->count++; 323 | } 324 | } 325 | } 326 | } 327 | 328 | static so_bool so_texel_set_contains(so_texel_set_t *set, so_texel_t texel) 329 | { 330 | uint32_t hash = so_texel_hash(texel, set->capacity); 331 | while (set->texels[hash].x != -1) // entries with same hash 332 | { 333 | if (set->texels[hash].x == texel.x && set->texels[hash].y == texel.y) 334 | return SO_TRUE; // texel is already in the set 335 | hash = (hash + 1) % set->capacity; 336 | } 337 | return SO_FALSE; 338 | } 339 | 340 | static void so_texel_set_free(so_texel_set_t *set) 341 | { 342 | so_free(set->texels); 343 | *set = {0}; 344 | } 345 | 346 | typedef struct 347 | { 348 | so_stitching_point_t *points; 349 | uint32_t count; 350 | uint32_t capacity; 351 | } so_stitching_points_t; 352 | 353 | static void so_stitching_points_alloc(so_stitching_points_t *points, uint32_t n) 354 | { 355 | points->points = so_alloc(so_stitching_point_t, n); 356 | points->capacity = n; 357 | points->count = 0; 358 | } 359 | static void so_stitching_points_free(so_stitching_points_t *points) 360 | { 361 | so_free(points->points); 362 | *points = {0}; 363 | } 364 | static void so_stitching_points_add(so_stitching_points_t *points, so_stitching_point_t *point) 365 | { 366 | assert(points->count < points->capacity); 367 | points->points[points->count++] = *point; 368 | } 369 | static void so_stitching_points_append(so_stitching_points_t *points, so_stitching_points_t *other) 370 | { 371 | so_stitching_point_t *newPoints = so_alloc(so_stitching_point_t, points->capacity + other->capacity); 372 | memcpy(newPoints, points->points, sizeof(so_stitching_point_t) * points->count); 373 | memcpy(newPoints + points->count, other->points, sizeof(so_stitching_point_t) * other->count); 374 | so_free(points->points); 375 | points->points = newPoints; 376 | points->capacity = points->capacity + other->capacity; 377 | points->count = points->count + other->count; 378 | } 379 | 380 | struct so_seam_t 381 | { 382 | int16_t x_min, y_min, x_max, y_max; 383 | so_texel_set_t texels; 384 | so_stitching_points_t stitchingPoints; 385 | so_seam_t *next; 386 | }; 387 | 388 | so_seam_t *so_seam_next(so_seam_t *seam) 389 | { 390 | return seam->next; 391 | } 392 | 393 | static void so_seam_alloc(so_seam_t *seam, uint32_t stitchingPointCount) 394 | { 395 | so_stitching_points_alloc(&seam->stitchingPoints, stitchingPointCount); 396 | } 397 | static void so_seam_free(so_seam_t *seam) 398 | { 399 | so_texel_set_free(&seam->texels); 400 | so_stitching_points_free(&seam->stitchingPoints); 401 | } 402 | 403 | static void so_seam_add(so_seam_t *seam, so_stitching_point_t *point) 404 | { 405 | for (int side = 0; side < 2; side++) 406 | { 407 | for (int texel = 0; texel < 4; texel++) 408 | { 409 | so_texel_t t = point->sides[side].texels[texel]; 410 | seam->x_min = t.x < seam->x_min ? t.x : seam->x_min; 411 | seam->y_min = t.y < seam->y_min ? t.y : seam->y_min; 412 | seam->x_max = t.x > seam->x_max ? t.x : seam->x_max; 413 | seam->y_max = t.y > seam->y_max ? t.y : seam->y_max; 414 | } 415 | so_texel_set_add(&seam->texels, point->sides[side].texels, 4); 416 | } 417 | 418 | so_stitching_points_add(&seam->stitchingPoints, point); 419 | } 420 | 421 | static so_bool so_seams_intersect(so_seam_t *a, so_seam_t *b) 422 | { 423 | // compare bounding boxes first 424 | if (a->x_min > b->x_max || b->x_min >= a->x_max || 425 | a->y_min > b->y_max || b->y_min >= a->y_max) 426 | return SO_FALSE; 427 | 428 | // bounds intersect -> check each individual texel for intersection 429 | if (a->texels.capacity > b->texels.capacity) // swap so that we always loop over the smaller set 430 | { 431 | so_seam_t *tmp = a; 432 | a = b; 433 | b = tmp; 434 | } 435 | 436 | for (int i = 0; i < a->texels.capacity; i++) 437 | if (a->texels.texels[i].x != -1) 438 | if (so_texel_set_contains(&b->texels, a->texels.texels[i])) 439 | return SO_TRUE; 440 | return SO_FALSE; 441 | } 442 | 443 | static void so_seams_in_place_merge(so_seam_t *dst, so_seam_t *src) 444 | { 445 | // expand bounding box 446 | dst->x_min = src->x_min < dst->x_min ? src->x_min : dst->x_min; 447 | dst->y_min = src->y_min < dst->y_min ? src->y_min : dst->y_min; 448 | dst->x_max = src->x_max > dst->x_max ? src->x_max : dst->x_max; 449 | dst->y_max = src->y_max > dst->y_max ? src->y_max : dst->y_max; 450 | 451 | // insert src elements 452 | so_texel_set_add(&dst->texels, src->texels.texels, src->texels.count, src->texels.capacity); 453 | so_stitching_points_append(&dst->stitchingPoints, &src->stitchingPoints); 454 | } 455 | 456 | static void so_seams_add_seam(so_seam_t **seams, so_vec2 a0, so_vec2 a1, so_vec2 b0, so_vec2 b1, float *data, int w, int h, int c) 457 | { 458 | so_vec2 s = so_v2i(w, h); 459 | a0 = so_mul2(a0, s); 460 | a1 = so_mul2(a1, s); 461 | b0 = so_mul2(b0, s); 462 | b1 = so_mul2(b1, s); 463 | so_vec2 ad = so_sub2(a1, a0); 464 | so_vec2 bd = so_sub2(b1, b0); 465 | float l = so_length2(ad); 466 | int iterations = (int)(l * 5.0f); // TODO: is this the best value? 467 | float step = 1.0f / iterations; 468 | 469 | so_seam_t currentSeam = {0}; 470 | currentSeam.x_min = w; currentSeam.y_min = h; 471 | currentSeam.x_max = 0; currentSeam.y_max = 0; 472 | 473 | so_seam_alloc(¤tSeam, iterations + 1); 474 | 475 | for (int i = 0; i <= iterations; i++) 476 | { 477 | float t = i * step; 478 | so_vec2 a = so_add2(a0, so_scale2(ad, t)); 479 | so_vec2 b = so_add2(b0, so_scale2(bd, t)); 480 | int16_t ax = (int16_t)roundf(a.x), ay = (int16_t)roundf(a.y); 481 | int16_t bx = (int16_t)roundf(b.x), by = (int16_t)roundf(b.y); 482 | float au = a.x - ax, av = a.y - ay, nau = 1.0f - au, nav = 1.0f - av; 483 | float bu = b.x - bx, bv = b.y - by, nbu = 1.0f - bu, nbv = 1.0f - bv; 484 | 485 | so_texel_t ta0 = { ax , ay }; 486 | so_texel_t ta1 = { so_min16i(ax + 1, w - 1), ay }; 487 | so_texel_t ta2 = { ax , so_min16i(ay + 1, h - 1) }; 488 | so_texel_t ta3 = { so_min16i(ax + 1, w - 1), so_min16i(ay + 1, h - 1) }; 489 | 490 | so_texel_t tb0 = { bx , by }; 491 | so_texel_t tb1 = { so_min16i(bx + 1, w - 1), by }; 492 | so_texel_t tb2 = { bx , so_min16i(by + 1, h - 1) }; 493 | so_texel_t tb3 = { so_min16i(bx + 1, w - 1), so_min16i(by + 1, h - 1) }; 494 | 495 | so_fill_with_closest(ta0.x, ta0.y, data, w, h, c); 496 | so_fill_with_closest(ta1.x, ta1.y, data, w, h, c); 497 | so_fill_with_closest(ta2.x, ta2.y, data, w, h, c); 498 | so_fill_with_closest(ta3.x, ta3.y, data, w, h, c); 499 | 500 | so_fill_with_closest(tb0.x, tb0.y, data, w, h, c); 501 | so_fill_with_closest(tb1.x, tb1.y, data, w, h, c); 502 | so_fill_with_closest(tb2.x, tb2.y, data, w, h, c); 503 | so_fill_with_closest(tb3.x, tb3.y, data, w, h, c); 504 | 505 | so_stitching_point_t sp; 506 | sp.sides[0].texels[0] = ta0; 507 | sp.sides[0].texels[1] = ta1; 508 | sp.sides[0].texels[2] = ta2; 509 | sp.sides[0].texels[3] = ta3; 510 | 511 | sp.sides[0].weights[0] = nau * nav; 512 | sp.sides[0].weights[1] = au * nav; 513 | sp.sides[0].weights[2] = nau * av; 514 | sp.sides[0].weights[3] = au * av; 515 | 516 | sp.sides[1].texels[0] = tb0; 517 | sp.sides[1].texels[1] = tb1; 518 | sp.sides[1].texels[2] = tb2; 519 | sp.sides[1].texels[3] = tb3; 520 | 521 | sp.sides[1].weights[0] = nbu * nbv; 522 | sp.sides[1].weights[1] = bu * nbv; 523 | sp.sides[1].weights[2] = nbu * bv; 524 | sp.sides[1].weights[3] = bu * bv; 525 | 526 | so_seam_add(¤tSeam, &sp); 527 | } 528 | 529 | so_seam_t *dstSeam = 0; 530 | for (so_seam_t **seam = seams; *seam; seam = &(*seam)->next) 531 | { 532 | retry: 533 | if (so_seams_intersect(¤tSeam, *seam)) 534 | { 535 | if (!dstSeam) // found a seam that the edge is connected to -> add current edge to that seam 536 | { 537 | so_seams_in_place_merge(*seam, ¤tSeam); 538 | dstSeam = *seam; 539 | } 540 | else // found another seam that the edge is connected to -> merge those seams 541 | { 542 | so_seams_in_place_merge(dstSeam, *seam); 543 | 544 | // remove current seam from seams 545 | so_seam_t *toDelete = *seam; 546 | *seam = (*seam)->next; 547 | so_seam_free(toDelete); 548 | so_free(toDelete); 549 | if (*seam) 550 | goto retry; // don't move to next since we already did that by deleting the current seam 551 | else 552 | break; 553 | } 554 | } 555 | } 556 | if (!dstSeam) // did not find a seam that the edge is connected to -> make a new one 557 | { 558 | currentSeam.next = *seams; 559 | *seams = so_alloc(so_seam_t, 1); 560 | **seams = currentSeam; 561 | } 562 | else 563 | so_seam_free(¤tSeam); 564 | } 565 | 566 | void so_seams_free(so_seam_t *seams) 567 | { 568 | so_seam_t *seam = seams; 569 | while (seam) 570 | { 571 | so_seam_t *next = seam->next; 572 | so_seam_free(seam); 573 | so_free(seam); 574 | seam = next; 575 | } 576 | 577 | #ifdef SO_CHECK_FOR_MEMORY_LEAKS 578 | assert(so_allocated == 0); 579 | printf("Allocated max %d MB. Not freed: %d bytes.\n", so_allocated_max / (1024 * 1024), so_allocated); 580 | printf("These results are only correct if the lib was used single-threaded.\n"); 581 | so_allocated_max = 0; 582 | #endif 583 | } 584 | 585 | static int so_should_optimize(so_vec3 *tria, so_vec3 *trib, float cosThreshold) 586 | { 587 | so_vec3 n0 = so_normalize3(so_cross3(so_sub3(tria[1], tria[0]), so_sub3(tria[2], tria[0]))); 588 | so_vec3 n1 = so_normalize3(so_cross3(so_sub3(trib[1], trib[0]), so_sub3(trib[2], trib[0]))); 589 | return so_absf(so_dot3(n0, n1)) > cosThreshold; 590 | } 591 | 592 | so_seam_t *so_seams_find(float *positions, float *texcoords, int vertices, float cosNormalThreshold, float *data, int w, int h, int c) 593 | { 594 | so_vec3 *pos = (so_vec3*)positions; 595 | so_vec2 *uv = (so_vec2*)texcoords; 596 | 597 | so_vec3 bbmin = so_v3(FLT_MAX, FLT_MAX, FLT_MAX); 598 | so_vec3 bbmax = so_v3(-FLT_MAX, -FLT_MAX, -FLT_MAX); 599 | int *hashmap = so_alloc(int, vertices * 2); 600 | for (int i = 0; i < vertices; i++) 601 | { 602 | bbmin = so_min3(bbmin, pos[i]); 603 | bbmax = so_max3(bbmax, pos[i]); 604 | hashmap[i * 2 + 0] = -1; 605 | hashmap[i * 2 + 1] = -1; 606 | } 607 | 608 | so_vec3 bbscale = so_v3(15.9f / bbmax.x, 15.9f / bbmax.y, 15.9f / bbmax.z); 609 | 610 | so_seam_t *seams = 0; 611 | 612 | for (int i0 = 0; i0 < vertices; i0++) 613 | { 614 | int tri = i0 - (i0 % 3); 615 | int i1 = tri + ((i0 + 1) % 3); 616 | int i2 = tri + ((i0 + 2) % 3); 617 | so_vec3 p = so_mul3(so_sub3(pos[i0], bbmin), bbscale); 618 | int hash = (281 * (int)p.x + 569 * (int)p.y + 1447 * (int)p.z) % (vertices * 2); 619 | while (hashmap[hash] >= 0) 620 | { 621 | int oi0 = hashmap[hash]; 622 | #define SO_EQUAL(a, b) so_length3sq(so_sub3(pos[a], pos[b])) < 0.0000001f 623 | if (SO_EQUAL(oi0, i0)) 624 | { 625 | int otri = oi0 - (oi0 % 3); 626 | int oi1 = otri + ((oi0 + 1) % 3); 627 | int oi2 = otri + ((oi0 + 2) % 3); 628 | if (SO_EQUAL(oi1, i1) && so_should_optimize(pos + tri, pos + otri, cosNormalThreshold)) 629 | so_seams_add_seam(&seams, uv[i0], uv[i1], uv[oi0], uv[oi1], data, w, h, c); 630 | //else if (SO_EQUAL(oi1, i2) && so_should_optimize(pos + tri, pos + otri, cosNormalThreshold)) // this will already be detected by the other side of the seam! 631 | // so_seams_add_seam(&seams, uv[i0], uv[i2], uv[oi0], uv[oi1], data, w, h, c); 632 | else if (SO_EQUAL(oi2, i1) && so_should_optimize(pos + tri, pos + otri, cosNormalThreshold)) 633 | so_seams_add_seam(&seams, uv[i0], uv[i1], uv[oi0], uv[oi2], data, w, h, c); 634 | //break; 635 | } 636 | if (++hash == vertices * 2) 637 | hash = 0; 638 | } 639 | hashmap[hash] = i0; 640 | } 641 | 642 | so_free(hashmap); 643 | return seams; 644 | } 645 | 646 | static int so_texel_binary_search(so_texel_t *texels, int n, so_texel_t toFind) 647 | { 648 | int n_half = n / 2; 649 | so_texel_t *center = texels + n_half; 650 | if (toFind.y == center->y && toFind.x == center->x) 651 | return n_half; 652 | if (n <= 1) 653 | return -1; 654 | if (toFind.y < center->y || (toFind.y == center->y && toFind.x < center->x)) 655 | return so_texel_binary_search(texels, n_half, toFind); 656 | else 657 | { 658 | int result = so_texel_binary_search(center + 1, n - n_half - 1, toFind); 659 | return result == -1 ? -1 : n_half + 1 + result; 660 | } 661 | } 662 | 663 | typedef struct 664 | { 665 | int index; 666 | float value; 667 | } so_sparse_entry_t; 668 | 669 | static int so_sparse_entry_cmp(const void *a, const void *b) 670 | { 671 | so_sparse_entry_t *ae = (so_sparse_entry_t*)a; 672 | so_sparse_entry_t *be = (so_sparse_entry_t*)b; 673 | return ae->index - be->index; 674 | } 675 | 676 | typedef struct 677 | { 678 | so_sparse_entry_t *entries; 679 | int count; 680 | int capacity; 681 | } so_sparse_entries_t; 682 | 683 | static void so_sparse_matrix_alloc(so_sparse_entries_t *matrix, int capacity) 684 | { 685 | matrix->entries = so_alloc(so_sparse_entry_t, capacity); 686 | matrix->capacity = capacity; 687 | matrix->count = 0; 688 | } 689 | 690 | static void so_sparse_matrix_free(so_sparse_entries_t *matrix) 691 | { 692 | so_free(matrix->entries); 693 | *matrix = { 0 }; 694 | } 695 | 696 | static void so_sparse_matrix_add(so_sparse_entries_t *matrix, int index, float value) 697 | { 698 | if (matrix->count == matrix->capacity) 699 | { 700 | int newCapacity = matrix->capacity * 2; 701 | if (newCapacity < 64) 702 | newCapacity = 64; 703 | so_sparse_entry_t *newEntries = so_alloc(so_sparse_entry_t, newCapacity); 704 | for (int i = 0; i < matrix->count; i++) 705 | newEntries[i] = matrix->entries[i]; 706 | so_free(matrix->entries); 707 | matrix->entries = newEntries; 708 | matrix->capacity = newCapacity; 709 | } 710 | 711 | int entryIndex = matrix->count++; 712 | matrix->entries[entryIndex].index = index; 713 | matrix->entries[entryIndex].value = value; 714 | } 715 | 716 | static void so_sparse_matrix_add(so_sparse_entries_t *matrix, so_sparse_entry_t *entry) 717 | { 718 | so_sparse_matrix_add(matrix, entry->index, entry->value); 719 | } 720 | 721 | static void so_sparse_matrix_sort(so_sparse_entries_t *matrix) 722 | { 723 | qsort(matrix->entries, matrix->count, sizeof(so_sparse_entry_t), so_sparse_entry_cmp); 724 | } 725 | 726 | static so_bool so_sparse_matrix_advance_to_index(so_sparse_entries_t *matrix, int *position, int index, float *outValue) 727 | { 728 | int localPosition = *position; 729 | while (localPosition < matrix->count && matrix->entries[localPosition].index < index) 730 | ++localPosition; 731 | *position = localPosition; 732 | 733 | if (localPosition < matrix->count && matrix->entries[localPosition].index == index) 734 | { 735 | *outValue = matrix->entries[localPosition].value; 736 | return SO_TRUE; 737 | } 738 | 739 | return SO_FALSE; 740 | } 741 | 742 | static inline uint32_t so_sparse_entry_hash(int entryIndex, uint32_t capacity) 743 | { 744 | return (entryIndex * 104173) % capacity; 745 | } 746 | 747 | static void so_sparse_entry_set_alloc(so_sparse_entries_t *set, int capacity) 748 | { 749 | set->entries = so_alloc(so_sparse_entry_t, capacity); 750 | for (int i = 0; i < capacity; i++) 751 | set->entries[i].index = -1; 752 | set->capacity = capacity; 753 | set->count = 0; 754 | } 755 | 756 | static so_sparse_entry_t *so_sparse_entry_set_get_or_add(so_sparse_entries_t *set, int index) 757 | { 758 | if (set->count + 1 > set->capacity * 3 / 4) // leave some free space to avoid having many collisions 759 | { 760 | int newCapacity = set->capacity >= 64 ? set->capacity * 2 : 64; 761 | so_sparse_entry_t *newEntries = so_alloc(so_sparse_entry_t, newCapacity); 762 | for (int i = 0; i < newCapacity; i++) 763 | newEntries[i].index = -1; 764 | 765 | for (int i = 0; i < set->capacity; i++) // rehash all old entries 766 | { 767 | if (set->entries[i].index != -1) 768 | { 769 | uint32_t hash = so_sparse_entry_hash(set->entries[i].index, newCapacity); 770 | while (newEntries[hash].index != -1) // collisions 771 | hash = (hash + 1) % newCapacity; 772 | newEntries[hash] = set->entries[i]; 773 | } 774 | } 775 | so_free(set->entries); 776 | set->entries = newEntries; 777 | set->capacity = newCapacity; 778 | } 779 | 780 | uint32_t hash = so_sparse_entry_hash(index, set->capacity); 781 | while (set->entries[hash].index != -1) // collisions 782 | { 783 | if (set->entries[hash].index == index) 784 | return &set->entries[hash]; // entry is already in the set 785 | hash = (hash + 1) % set->capacity; 786 | } 787 | 788 | if (set->entries[hash].index == -1) // make new entry 789 | { 790 | set->entries[hash].index = index; 791 | set->entries[hash].value = 0.0f; 792 | set->count++; 793 | return &set->entries[hash]; 794 | } 795 | 796 | return 0; // shouldn't happen 797 | } 798 | 799 | static so_sparse_entries_t so_matrix_At_times_A(const float *A, const int *sparseIndices, int maxRowIndices, int m, int n) 800 | { 801 | so_sparse_entries_t AtA; 802 | so_sparse_entry_set_alloc(&AtA, (n / 16) * (n / 16)); 803 | 804 | // compute lower left triangle only since the result is symmetric 805 | for (int k = 0; k < m; k++) 806 | { 807 | const float *srcPtr = A + k * maxRowIndices; 808 | const int *indexPtr = sparseIndices + k * maxRowIndices; 809 | for (int i = 0; i < maxRowIndices; i++) 810 | { 811 | int index_i = indexPtr[i]; 812 | if (index_i < 0) break; 813 | float v = srcPtr[i]; 814 | //float *dstPtr = AtA + index_i * n; 815 | for (int j = 0; j < maxRowIndices; j++) 816 | { 817 | int index_j = indexPtr[j]; 818 | if (index_j < 0) break; 819 | //dstPtr[index_j] += v * srcPtr[j]; 820 | int index = index_i * n + index_j; 821 | 822 | so_sparse_entry_t *entry = so_sparse_entry_set_get_or_add(&AtA, index); 823 | entry->value += v * srcPtr[j]; 824 | } 825 | } 826 | } 827 | 828 | // compaction step (make a compact array from the scattered hash set values) 829 | for (int i = 0, j = 0; i < AtA.capacity; i++) 830 | if (AtA.entries[i].index != -1) 831 | AtA.entries[j++] = AtA.entries[i]; 832 | 833 | // sort by index -> this is a sparse matrix now 834 | so_sparse_matrix_sort(&AtA); 835 | 836 | return AtA; 837 | } 838 | 839 | static void so_matrix_At_times_b(const float *A, int m, int n, const float *b, float *Atb, const int *sparseIndices, int maxRowIndices) 840 | { 841 | memset(Atb, 0, sizeof(float) * n); 842 | for (int j = 0; j < m; j++) 843 | { 844 | const int *rowIndices = sparseIndices + j * maxRowIndices; 845 | for (int i = 0; i < maxRowIndices; i++) 846 | { 847 | int index = rowIndices[i]; 848 | if (index < 0) break; 849 | Atb[index] += A[j * maxRowIndices + i] * b[j]; 850 | } 851 | } 852 | } 853 | 854 | static so_sparse_entries_t so_matrix_cholesky_prepare(so_sparse_entries_t *AtA, int n) 855 | { 856 | // dense 857 | //for (int i = 0; i < n; i++) 858 | //{ 859 | // float *a = L + i * n; 860 | // for (int j = 0; j <= i; j++) 861 | // { 862 | // float *b = L + j * n; 863 | // float sum = A[i * n + j];// + (i == j ? 0.0001 : 0.0); // some regularization 864 | // for (int k = 0; k < j; k++) 865 | // sum -= a[k] * b[k]; 866 | // if (i > j) 867 | // a[j] = sum / b[j]; 868 | // else // i == j 869 | // { 870 | // if (sum <= 0.0) 871 | // return SO_FALSE; 872 | // a[i] = sqrtf(sum); 873 | // } 874 | // } 875 | //} 876 | 877 | // sparse 878 | int *indices_i; 879 | float *row_i; 880 | float *invDiag; 881 | 882 | if (n > 4096) 883 | { 884 | indices_i = so_alloc(int, n); 885 | row_i = so_alloc(float, n); 886 | invDiag = so_alloc(float, n); 887 | } 888 | else 889 | { 890 | indices_i = (int*)alloca(sizeof(int) * n); 891 | row_i = (float*)alloca(sizeof(float) * n); 892 | invDiag = (float*)alloca(sizeof(float) * n); 893 | } 894 | 895 | so_sparse_entries_t L; 896 | so_sparse_matrix_alloc(&L, (n / 16) * (n / 16)); 897 | 898 | int AtAindex = 0; 899 | for (int i = 0; i < n; i++) 900 | { 901 | int index_i_count = 0; 902 | 903 | int row_j_index = 0; 904 | for (int j = 0; j <= i; j++) 905 | { 906 | //float sum = A[i * n + j]; // + (i == j ? 0.0001 : 0.0); // regularization 907 | int index = i * n + j; 908 | float sum = 0.0f; 909 | so_sparse_matrix_advance_to_index(AtA, &AtAindex, index, &sum); 910 | 911 | for (int k = 0; k < index_i_count; k++) 912 | { 913 | int index_i = indices_i[k]; 914 | float Lvalue; 915 | if (so_sparse_matrix_advance_to_index(&L, &row_j_index, j * n + index_i, &Lvalue)) 916 | sum -= row_i[index_i] * Lvalue; 917 | } 918 | 919 | if (i == j) 920 | { 921 | if (sum <= 0.0f) 922 | { 923 | so_sparse_matrix_free(&L); 924 | return L; 925 | } 926 | invDiag[i] = so_rsqrtf(sum); 927 | } 928 | 929 | if (SO_NOT_ZERO(sum)) 930 | { 931 | row_i[j] = sum * invDiag[j]; 932 | indices_i[index_i_count++] = j; 933 | so_sparse_matrix_add(&L, index, row_i[j]); 934 | } 935 | else 936 | row_i[j] = 0.0f; 937 | } 938 | } 939 | 940 | if (n > 4096) 941 | { 942 | so_free(indices_i); 943 | so_free(row_i); 944 | so_free(invDiag); 945 | } 946 | 947 | return L; 948 | } 949 | 950 | static void so_matrix_cholesky_solve(so_sparse_entries_t *Lrows, so_sparse_entries_t *Lcols, float *x, const float *b, int n) 951 | { 952 | float *y = (float*)alloca(sizeof(float) * n); 953 | 954 | // L * y = b 955 | int Lindex = 0; 956 | for (int i = 0; i < n; i++) 957 | { 958 | float sum = b[i]; 959 | while (Lindex < Lrows->count && Lrows->entries[Lindex].index < i * (n + 1)) 960 | { 961 | sum -= Lrows->entries[Lindex].value * y[Lrows->entries[Lindex].index - i * n]; 962 | ++Lindex; 963 | } 964 | assert(Lrows->entries[Lindex].index == i * (n + 1)); 965 | y[i] = sum / Lrows->entries[Lindex].value; 966 | ++Lindex; 967 | } 968 | 969 | // L' * x = y 970 | Lindex = Lcols->count - 1; 971 | for (int i = n - 1; i >= 0; i--) 972 | { 973 | float sum = y[i]; 974 | while (Lindex >= 0 && Lcols->entries[Lindex].index > i * (n + 1)) 975 | { 976 | sum -= Lcols->entries[Lindex].value * x[Lcols->entries[Lindex].index - i * n]; 977 | --Lindex; 978 | } 979 | assert(Lcols->entries[Lindex].index == i * (n + 1)); 980 | x[i] = sum / Lcols->entries[Lindex].value; 981 | --Lindex; 982 | } 983 | } 984 | 985 | so_bool so_seam_optimize(so_seam_t *seam, float *data, int w, int h, int c, float lambda) 986 | { 987 | so_texel_set_t *texels = &seam->texels; 988 | so_stitching_points_t *stitchingPoints = &seam->stitchingPoints; 989 | 990 | size_t m = stitchingPoints->count; 991 | size_t n = texels->count; 992 | 993 | void *memoryBlock = so_alloc_void( 994 | sizeof(so_texel_t) * n + 995 | sizeof(float) * (m + n) * 8 + 996 | sizeof(int) * (m + n) * 8 + 997 | sizeof(float) * (m + n) + 998 | sizeof(float) * n + 999 | sizeof(float) * n); 1000 | 1001 | uint8_t *memoryStart = (uint8_t*)memoryBlock; 1002 | 1003 | so_texel_t *texelsFlat = (so_texel_t*)memoryStart; 1004 | memoryStart += sizeof(so_texel_t) * n; 1005 | 1006 | float *A = (float*)memoryStart; 1007 | memoryStart += sizeof(float) * (m + n) * 8; 1008 | 1009 | int *AsparseIndices = (int*)memoryStart; 1010 | memoryStart += sizeof(int) * (m + n) * 8; 1011 | 1012 | float *b = (float*)memoryStart; 1013 | memoryStart += sizeof(float) * (m + n); 1014 | 1015 | float *Atb = (float*)memoryStart; 1016 | memoryStart += sizeof(float) * n; 1017 | 1018 | float *x = (float*)memoryStart; 1019 | memoryStart += sizeof(float) * n; 1020 | 1021 | for (int i = 0, j = 0; i < texels->capacity && j < n; i++) 1022 | if (texels->texels[i].x != -1) 1023 | texelsFlat[j++] = texels->texels[i]; 1024 | 1025 | qsort(texelsFlat, n, sizeof(so_texel_t), so_texel_cmp); 1026 | 1027 | size_t r = 0; 1028 | for (int i = 0; i < m; i++) 1029 | { 1030 | ptrdiff_t column0[4]; 1031 | ptrdiff_t column1[4]; 1032 | so_bool side0valid = SO_FALSE, side1valid = SO_FALSE; 1033 | for (int k = 0; k < 4; k++) 1034 | { 1035 | so_texel_t t0 = stitchingPoints->points[i].sides[0].texels[k]; 1036 | so_texel_t t1 = stitchingPoints->points[i].sides[1].texels[k]; 1037 | column0[k] = so_texel_binary_search(texelsFlat, n, t0); 1038 | column1[k] = so_texel_binary_search(texelsFlat, n, t1); 1039 | 1040 | if (column0[k] == -1) { side0valid = SO_FALSE; break; } 1041 | if (column1[k] == -1) { side1valid = SO_FALSE; break; } 1042 | 1043 | // test for validity of stitching point 1044 | for (int ci = 0; ci < c; ci++) 1045 | { 1046 | side0valid |= data[(t0.y * w + t0.x) * c + ci] > 0.0f; 1047 | side1valid |= data[(t1.y * w + t1.x) * c + ci] > 0.0f; 1048 | } 1049 | } 1050 | 1051 | if (side0valid && side1valid) 1052 | { 1053 | for (int k = 0; k < 4; k++) 1054 | { 1055 | A[r * 8 + k * 2 + 0] = stitchingPoints->points[i].sides[0].weights[k]; 1056 | AsparseIndices[r * 8 + k * 2 + 0] = column0[k]; 1057 | A[r * 8 + k * 2 + 1] = -stitchingPoints->points[i].sides[1].weights[k]; 1058 | AsparseIndices[r * 8 + k * 2 + 1] = column1[k]; 1059 | } 1060 | r++; 1061 | } 1062 | } 1063 | 1064 | m = r; 1065 | 1066 | // add error terms for deviation from original pixel value (scaled by lambda) 1067 | for (int i = 0; i < n; i++) 1068 | { 1069 | A[(m + i) * 8] = lambda; 1070 | AsparseIndices[(m + i) * 8 + 0] = i; 1071 | AsparseIndices[(m + i) * 8 + 1] = -1; 1072 | } 1073 | 1074 | so_sparse_entries_t AtA = so_matrix_At_times_A(A, AsparseIndices, 8, m + n, n); 1075 | so_sparse_entries_t L = so_matrix_cholesky_prepare(&AtA, n); 1076 | so_sparse_matrix_free(&AtA); 1077 | 1078 | if (!L.count) 1079 | { 1080 | so_free(memoryBlock); 1081 | return SO_FALSE; // Cholesky decomposition failed 1082 | } 1083 | 1084 | so_sparse_entries_t Lcols; 1085 | so_sparse_matrix_alloc(&Lcols, L.count); 1086 | for (int i = 0; i < L.count; i++) 1087 | so_sparse_matrix_add(&Lcols, (L.entries[i].index % n) * n + (L.entries[i].index / n), L.entries[i].value); 1088 | so_sparse_matrix_sort(&Lcols); 1089 | 1090 | // solve each color channel independently 1091 | for (int ci = 0; ci < c; ci++) 1092 | { 1093 | for (int i = 0; i < n; i++) 1094 | b[m + i] = lambda * data[(texelsFlat[i].y * w + texelsFlat[i].x) * c + ci]; 1095 | 1096 | so_matrix_At_times_b(A, m + n, n, b, Atb, AsparseIndices, 8); 1097 | so_matrix_cholesky_solve(&L, &Lcols, x, Atb, n); 1098 | 1099 | // write out results 1100 | for (int i = 0; i < n; i++) 1101 | data[(texelsFlat[i].y * w + texelsFlat[i].x) * c + ci] = x[i]; 1102 | } 1103 | 1104 | so_free(memoryBlock); 1105 | so_sparse_matrix_free(&L); 1106 | so_sparse_matrix_free(&Lcols); 1107 | 1108 | return SO_TRUE; 1109 | } 1110 | 1111 | #endif // SEAMOPTIMIZER_IMPLEMENTATION 1112 | --------------------------------------------------------------------------------