├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── details
    ├── cie1931.h
    └── lu.h
├── rgb2spec.c
├── rgb2spec.h
└── rgb2spec_opt.cpp


/.gitignore:
--------------------------------------------------------------------------------
1 | Makefile
2 | CMakeCache.txt
3 | CMakeFiles
4 | *.cmake
5 | rgb2spec_opt
6 | librgb2spec.a
7 | .ninja_*
8 | *.ninja
9 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.2.0)
 2 | project(rgb2spec)
 3 | 
 4 | if (POLICY CMP0068)
 5 |   cmake_policy(SET CMP0068 NEW)
 6 | endif()
 7 | 
 8 | # Set a default build configuration (Release)
 9 | if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
10 |   message(STATUS "Setting build type to 'Release' as none was specified.")
11 |   set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
12 |   set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release"
13 |     "MinSizeRel" "RelWithDebInfo")
14 | endif()
15 | 
16 | if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang|Intel")
17 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
18 | endif()
19 | 
20 | 
21 | if (TARGET tbb)
22 |   add_definitions(-DRGB2SPEC_USE_TBB=1)
23 |   include_directories(${TBB_INCLUDE_DIRS})
24 | else ()
25 |   include(FindOpenMP)
26 |   if(OPENMP_FOUND)
27 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
28 |     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
29 |   endif()
30 | endif()
31 | 
32 | set(CMAKE_POSITION_INDEPENDENT_CODE ON)
33 | 
34 | add_executable(rgb2spec_opt rgb2spec_opt.cpp)
35 | add_library(rgb2spec STATIC rgb2spec.c)
36 | 
37 | if (TARGET tbb)
38 |   target_link_libraries(rgb2spec_opt PRIVATE tbb)
39 | 
40 |   add_custom_command(TARGET rgb2spec_opt POST_BUILD
41 |     COMMAND ${CMAKE_COMMAND} -E copy_if_different
42 |       $<TARGET_FILE:tbb>
43 |       $<TARGET_FILE_DIR:rgb2spec_opt>
44 |   )
45 | endif()
46 | 
47 | add_custom_command(
48 |   OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/srgb.coeff
49 |   DEPENDS rgb2spec_opt
50 |   COMMAND ${CMAKE_COMMAND} -E env LD_LIBRARY_PATH=.:$ENV{LD_LIBRARY_PATH}
51 |   $<TARGET_FILE:rgb2spec_opt> 64 ${CMAKE_CURRENT_BINARY_DIR}/srgb.coeff
52 | )
53 | 
54 | add_custom_target(
55 |   rgb2spec_opt_run
56 |   DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/srgb.coeff
57 | )
58 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2020 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are met:
 5 | 
 6 | 1. Redistributions of source code must retain the above copyright notice, this
 7 |    list of conditions and the following disclaimer.
 8 | 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution.
12 | 
13 | 3. Neither the name of the copyright holder nor the names of its contributors
14 |    may be used to endorse or promote products derived from this software
15 |    without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This repository contains an implementation of the paper [A Low-Dimensional
 2 | Function Space for Efficient Spectral
 3 | Upsampling](http://rgl.epfl.ch/publications/Jakob2019Spectral) by Wenzel Jakob
 4 | and Johannes Hanika.
 5 | 
 6 | In comparison to the supplemental material of the original paper that optimized
 7 | polynomial coefficients using Google's CEPHES solver, the code here relies on a
 8 | much simpler and self-contained Gauss-Newton solver. Mitsuba and PBRT use a
 9 | variant of this code that is simply executed as part of the CMake build system.
10 | 


--------------------------------------------------------------------------------
/details/cie1931.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * This file contains:
  3 |  *
  4 |  * 1. CIE 1931 curves at sampled at 5nm intervals
  5 |  *
  6 |  * 2. CIE D65 and D50 spectra sampled at 5nm intervals.
  7 |  *    Both are normalized to have unit luminance.
  8 |  *
  9 |  * 3. XYZ <-> sRGB conversion matrices
 10 |  *    XYZ <-> ProPhoto RGB conversion matrices
 11 |  *
 12 |  * 4. A convenience function "cie_interp" to access the discretized
 13 |  *    data at arbitrary wavelengths (with linear interpolation)
 14 | }
 15 |  */
 16 | #pragma once
 17 | #define CIE_LAMBDA_MIN 360.0
 18 | #define CIE_LAMBDA_MAX 830.0
 19 | #define CIE_SAMPLES    95
 20 | 
 21 | const double cie_x[CIE_SAMPLES] = {
 22 |     0.000129900000, 0.000232100000, 0.000414900000, 0.000741600000,
 23 |     0.001368000000, 0.002236000000, 0.004243000000, 0.007650000000,
 24 |     0.014310000000, 0.023190000000, 0.043510000000, 0.077630000000,
 25 |     0.134380000000, 0.214770000000, 0.283900000000, 0.328500000000,
 26 |     0.348280000000, 0.348060000000, 0.336200000000, 0.318700000000,
 27 |     0.290800000000, 0.251100000000, 0.195360000000, 0.142100000000,
 28 |     0.095640000000, 0.057950010000, 0.032010000000, 0.014700000000,
 29 |     0.004900000000, 0.002400000000, 0.009300000000, 0.029100000000,
 30 |     0.063270000000, 0.109600000000, 0.165500000000, 0.225749900000,
 31 |     0.290400000000, 0.359700000000, 0.433449900000, 0.512050100000,
 32 |     0.594500000000, 0.678400000000, 0.762100000000, 0.842500000000,
 33 |     0.916300000000, 0.978600000000, 1.026300000000, 1.056700000000,
 34 |     1.062200000000, 1.045600000000, 1.002600000000, 0.938400000000,
 35 |     0.854449900000, 0.751400000000, 0.642400000000, 0.541900000000,
 36 |     0.447900000000, 0.360800000000, 0.283500000000, 0.218700000000,
 37 |     0.164900000000, 0.121200000000, 0.087400000000, 0.063600000000,
 38 |     0.046770000000, 0.032900000000, 0.022700000000, 0.015840000000,
 39 |     0.011359160000, 0.008110916000, 0.005790346000, 0.004109457000,
 40 |     0.002899327000, 0.002049190000, 0.001439971000, 0.000999949300,
 41 |     0.000690078600, 0.000476021300, 0.000332301100, 0.000234826100,
 42 |     0.000166150500, 0.000117413000, 0.000083075270, 0.000058706520,
 43 |     0.000041509940, 0.000029353260, 0.000020673830, 0.000014559770,
 44 |     0.000010253980, 0.000007221456, 0.000005085868, 0.000003581652,
 45 |     0.000002522525, 0.000001776509, 0.000001251141 };
 46 | 
 47 | const double cie_y[CIE_SAMPLES] = {
 48 |     0.000003917000, 0.000006965000, 0.000012390000, 0.000022020000,
 49 |     0.000039000000, 0.000064000000, 0.000120000000, 0.000217000000,
 50 |     0.000396000000, 0.000640000000, 0.001210000000, 0.002180000000,
 51 |     0.004000000000, 0.007300000000, 0.011600000000, 0.016840000000,
 52 |     0.023000000000, 0.029800000000, 0.038000000000, 0.048000000000,
 53 |     0.060000000000, 0.073900000000, 0.090980000000, 0.112600000000,
 54 |     0.139020000000, 0.169300000000, 0.208020000000, 0.258600000000,
 55 |     0.323000000000, 0.407300000000, 0.503000000000, 0.608200000000,
 56 |     0.710000000000, 0.793200000000, 0.862000000000, 0.914850100000,
 57 |     0.954000000000, 0.980300000000, 0.994950100000, 1.000000000000,
 58 |     0.995000000000, 0.978600000000, 0.952000000000, 0.915400000000,
 59 |     0.870000000000, 0.816300000000, 0.757000000000, 0.694900000000,
 60 |     0.631000000000, 0.566800000000, 0.503000000000, 0.441200000000,
 61 |     0.381000000000, 0.321000000000, 0.265000000000, 0.217000000000,
 62 |     0.175000000000, 0.138200000000, 0.107000000000, 0.081600000000,
 63 |     0.061000000000, 0.044580000000, 0.032000000000, 0.023200000000,
 64 |     0.017000000000, 0.011920000000, 0.008210000000, 0.005723000000,
 65 |     0.004102000000, 0.002929000000, 0.002091000000, 0.001484000000,
 66 |     0.001047000000, 0.000740000000, 0.000520000000, 0.000361100000,
 67 |     0.000249200000, 0.000171900000, 0.000120000000, 0.000084800000,
 68 |     0.000060000000, 0.000042400000, 0.000030000000, 0.000021200000,
 69 |     0.000014990000, 0.000010600000, 0.000007465700, 0.000005257800,
 70 |     0.000003702900, 0.000002607800, 0.000001836600, 0.000001293400,
 71 |     0.000000910930, 0.000000641530, 0.000000451810
 72 | };
 73 | 
 74 | const double cie_z[CIE_SAMPLES] = {
 75 |     0.000606100000, 0.001086000000, 0.001946000000, 0.003486000000,
 76 |     0.006450001000, 0.010549990000, 0.020050010000, 0.036210000000,
 77 |     0.067850010000, 0.110200000000, 0.207400000000, 0.371300000000,
 78 |     0.645600000000, 1.039050100000, 1.385600000000, 1.622960000000,
 79 |     1.747060000000, 1.782600000000, 1.772110000000, 1.744100000000,
 80 |     1.669200000000, 1.528100000000, 1.287640000000, 1.041900000000,
 81 |     0.812950100000, 0.616200000000, 0.465180000000, 0.353300000000,
 82 |     0.272000000000, 0.212300000000, 0.158200000000, 0.111700000000,
 83 |     0.078249990000, 0.057250010000, 0.042160000000, 0.029840000000,
 84 |     0.020300000000, 0.013400000000, 0.008749999000, 0.005749999000,
 85 |     0.003900000000, 0.002749999000, 0.002100000000, 0.001800000000,
 86 |     0.001650001000, 0.001400000000, 0.001100000000, 0.001000000000,
 87 |     0.000800000000, 0.000600000000, 0.000340000000, 0.000240000000,
 88 |     0.000190000000, 0.000100000000, 0.000049999990, 0.000030000000,
 89 |     0.000020000000, 0.000010000000, 0.000000000000, 0.000000000000,
 90 |     0.000000000000, 0.000000000000, 0.000000000000, 0.000000000000,
 91 |     0.000000000000, 0.000000000000, 0.000000000000, 0.000000000000,
 92 |     0.000000000000, 0.000000000000, 0.000000000000, 0.000000000000,
 93 |     0.000000000000, 0.000000000000, 0.000000000000, 0.000000000000,
 94 |     0.000000000000, 0.000000000000, 0.000000000000, 0.000000000000,
 95 |     0.000000000000, 0.000000000000, 0.000000000000, 0.000000000000,
 96 |     0.000000000000, 0.000000000000, 0.000000000000, 0.000000000000,
 97 |     0.000000000000, 0.000000000000, 0.000000000000, 0.000000000000,
 98 |     0.000000000000, 0.000000000000, 0.000000000000
 99 | };
100 | 
101 | #define N(x) (x / 10566.864005283874576)
102 | 
103 | const double cie_d65[CIE_SAMPLES] = {
104 |     N(46.6383),  N(49.3637),  N(52.0891),  N(51.0323),  N(49.9755),  N(52.3118),  N(54.6482),  N(68.7015),
105 |     N(82.7549),  N(87.1204),  N(91.486),   N(92.4589),  N(93.4318),  N(90.057),   N(86.6823),  N(95.7736),
106 |     N(104.865),  N(110.936),  N(117.008),  N(117.41),   N(117.812),  N(116.336),  N(114.861),  N(115.392),
107 |     N(115.923),  N(112.367),  N(108.811),  N(109.082),  N(109.354),  N(108.578),  N(107.802),  N(106.296),
108 |     N(104.79),   N(106.239),  N(107.689),  N(106.047),  N(104.405),  N(104.225),  N(104.046),  N(102.023),
109 |     N(100.0),    N(98.1671),  N(96.3342),  N(96.0611),  N(95.788),   N(92.2368),  N(88.6856),  N(89.3459),
110 |     N(90.0062),  N(89.8026),  N(89.5991),  N(88.6489),  N(87.6987),  N(85.4936),  N(83.2886),  N(83.4939),
111 |     N(83.6992),  N(81.863),   N(80.0268),  N(80.1207),  N(80.2146),  N(81.2462),  N(82.2778),  N(80.281),
112 |     N(78.2842),  N(74.0027),  N(69.7213),  N(70.6652),  N(71.6091),  N(72.979),   N(74.349),   N(67.9765),
113 |     N(61.604),   N(65.7448),  N(69.8856),  N(72.4863),  N(75.087),   N(69.3398),  N(63.5927),  N(55.0054),
114 |     N(46.4182),  N(56.6118),  N(66.8054),  N(65.0941),  N(63.3828),  N(63.8434),  N(64.304),   N(61.8779),
115 |     N(59.4519),  N(55.7054),  N(51.959),   N(54.6998),  N(57.4406),  N(58.8765),  N(60.3125)
116 | };
117 | 
118 | #undef N
119 | 
120 | #define N(x) (x / 106.8)
121 | const double cie_e[CIE_SAMPLES] = {
122 |     N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0),  N(1.0),
123 |     N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0),  N(1.0),
124 |     N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0),  N(1.0),
125 |     N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0),  N(1.0),
126 |     N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0),  N(1.0),
127 |     N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0),  N(1.0),
128 |     N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0),  N(1.0),
129 |     N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0),  N(1.0),
130 |     N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0),  N(1.0),
131 |     N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0),  N(1.0),
132 |     N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0),  N(1.0),
133 |     N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0), N(1.0)
134 | };
135 | #undef N
136 | 
137 | #define N(x) (x / 10503.2)
138 | 
139 | const double cie_d50[CIE_SAMPLES] = {
140 |     N(23.942000),  N(25.451000),  N(26.961000),  N(25.724000),  N(24.488000),
141 |     N(27.179000),  N(29.871000),  N(39.589000),  N(49.308000),  N(52.910000),
142 |     N(56.513000),  N(58.273000),  N(60.034000),  N(58.926000),  N(57.818000),
143 |     N(66.321000),  N(74.825000),  N(81.036000),  N(87.247000),  N(88.930000),
144 |     N(90.612000),  N(90.990000),  N(91.368000),  N(93.238000),  N(95.109000),
145 |     N(93.536000),  N(91.963000),  N(93.843000),  N(95.724000),  N(96.169000),
146 |     N(96.613000),  N(96.871000),  N(97.129000),  N(99.614000),  N(102.099000),
147 |     N(101.427000), N(100.755000), N(101.536000), N(102.317000), N(101.159000),
148 |     N(100.000000), N(98.868000),  N(97.735000),  N(98.327000),  N(98.918000),
149 |     N(96.208000),  N(93.499000),  N(95.593000),  N(97.688000),  N(98.478000),
150 |     N(99.269000),  N(99.155000),  N(99.042000),  N(97.382000),  N(95.722000),
151 |     N(97.290000),  N(98.857000),  N(97.262000),  N(95.667000),  N(96.929000),
152 |     N(98.190000),  N(100.597000), N(103.003000), N(101.068000), N(99.133000),
153 |     N(93.257000),  N(87.381000),  N(89.492000),  N(91.604000),  N(92.246000),
154 |     N(92.889000),  N(84.872000),  N(76.854000),  N(81.683000),  N(86.511000),
155 |     N(89.546000),  N(92.580000),  N(85.405000),  N(78.230000),  N(67.961000),
156 |     N(57.692000),  N(70.307000),  N(82.923000),  N(80.599000),  N(78.274000),
157 |     N(0),          N(0),          N(0),          N(0),          N(0),
158 |     N(0),          N(0),          N(0),          N(0)
159 | };
160 | 
161 | #undef N
162 | 
163 | #define N(x) (x / 10536.3)
164 | 
165 | const double cie_d60[CIE_SAMPLES] = {
166 |     N(38.683115),  N(41.014457),  N(42.717548),  N(42.264182),  N(41.454941),
167 |     N(41.763698),  N(46.605319),  N(59.226938),  N(72.278594),  N(78.231500),
168 |     N(80.440600),  N(82.739580),  N(82.915027),  N(79.009168),  N(77.676264),
169 |     N(85.163609),  N(95.681274),  N(103.267764), N(107.954821), N(109.777964),
170 |     N(109.559187), N(108.418402), N(107.758141), N(109.071548), N(109.671404),
171 |     N(106.734741), N(103.707873), N(103.981942), N(105.232199), N(105.235867),
172 |     N(104.427667), N(103.052881), N(102.522934), N(104.371416), N(106.052671),
173 |     N(104.948900), N(103.315154), N(103.416286), N(103.538599), N(102.099304),
174 |     N(100.000000), N(97.992725),  N(96.751421),  N(97.102402),  N(96.712823),
175 |     N(93.174457),  N(89.921479),  N(90.351933),  N(91.999793),  N(92.384009),
176 |     N(92.098710),  N(91.722859),  N(90.646003),  N(88.327552),  N(86.526483),
177 |     N(87.034239),  N(87.579186),  N(85.884584),  N(83.976140),  N(83.743140),
178 |     N(84.724074),  N(86.450818),  N(87.493491),  N(86.546330),  N(83.483070),
179 |     N(78.268785),  N(74.172451),  N(74.275184),  N(76.620385),  N(79.423856),
180 |     N(79.051849),  N(71.763360),  N(65.471371),  N(67.984085),  N(74.106079),
181 |     N(78.556612),  N(79.527120),  N(75.584935),  N(67.307163),  N(55.275106),
182 |     N(49.273538),  N(59.008629),  N(70.892412),  N(70.950115),  N(67.163996),
183 |     N(67.445480),  N(68.171371),  N(66.466636),  N(62.989809),  N(58.067786),
184 |     N(54.990892),  N(56.915942),  N(60.825601),  N(62.987850)
185 | };
186 | 
187 | #undef N
188 | 
189 | const double xyz_to_srgb[3][3] = {
190 |     { 3.240479, -1.537150, -0.498535 },
191 |     {-0.969256,  1.875991,  0.041556 },
192 |     { 0.055648, -0.204043,  1.057311 }
193 | };
194 | 
195 | const double srgb_to_xyz[3][3] = {
196 |     { 0.412453, 0.357580, 0.180423 },
197 |     { 0.212671, 0.715160, 0.072169 },
198 |     { 0.019334, 0.119193, 0.950227 }
199 | };
200 | 
201 | const double xyz_to_xyz[3][3] = {
202 |   { 1.0, 0.0, 0.0 },
203 |   { 0.0, 1.0, 0.0 },
204 |   { 0.0, 0.0, 1.0 },
205 | };
206 | 
207 | const double xyz_to_ergb[3][3] = {
208 |   {  2.689989, -1.276020, -0.413844},
209 |   { -1.022095,  1.978261,  0.043821},
210 |   {  0.061203, -0.224411,  1.162859},
211 | };
212 | 
213 | const double ergb_to_xyz[3][3] = {
214 |   { 0.496859,  0.339094,  0.164047 },
215 |   { 0.256193,  0.678188,  0.065619 },
216 |   { 0.023290,  0.113031,  0.863978 },
217 | };
218 | 
219 | const double xyz_to_prophoto_rgb[3][3] = {
220 |     { 1.3459433,  -0.2556075, -0.0511118 },
221 |     { -0.5445989,  1.5081673,  0.0205351 },
222 |     {  0.0000000,  0.0000000,  1.2118128 }
223 | };
224 | 
225 | const double prophoto_rgb_to_xyz[3][3] = {
226 |     { 0.7976749,  0.1351917,  0.0313534 },
227 |     { 0.2880402,  0.7118741,  0.0000857 },
228 |     { 0.0000000,  0.0000000,  0.8252100 }
229 | };
230 | 
231 | const double xyz_to_aces2065_1[3][3] = {
232 |     {  1.0498110175, 0.0000000000, -0.0000974845 },
233 |     { -0.4959030231, 1.3733130458, 0.0982400361 },
234 |     {  0.0000000000, 0.0000000000, 0.9912520182 }
235 | };
236 | 
237 | const double aces2065_1_to_xyz[3][3] = {
238 |     { 0.9525523959, 0.0000000000, 0.0000936786 },
239 |     { 0.3439664498, 0.7281660966, -0.0721325464 },
240 |     { 0.0000000000, 0.0000000000, 1.0088251844 }
241 | };
242 | 
243 | const double xyz_to_rec2020[3][3] = {
244 |     {  1.7166511880, -0.3556707838, -0.2533662814 },
245 |     { -0.6666843518,  1.6164812366,  0.0157685458 },
246 |     {  0.0176398574, -0.0427706133,  0.9421031212 }
247 | };
248 | 
249 | const double rec2020_to_xyz[3][3] = {
250 |     { 0.6369580483, 0.1446169036, 0.1688809752 },
251 |     { 0.2627002120, 0.6779980715, 0.0593017165 },
252 |     { 0.0000000000, 0.0280726930, 1.0609850577 }
253 | };
254 | 
255 | double cie_interp(const double *data, double x) {
256 |     x -= CIE_LAMBDA_MIN;
257 |     x *= (CIE_SAMPLES - 1) / (CIE_LAMBDA_MAX - CIE_LAMBDA_MIN);
258 |     int offset = (int) x;
259 |     if (offset < 0)
260 |         offset = 0;
261 |     if (offset > CIE_SAMPLES - 2)
262 |         offset = CIE_SAMPLES - 2;
263 |     double weight = x - offset;
264 |     return (1.0 - weight) * data[offset] + weight * data[offset + 1];
265 | }
266 | 


--------------------------------------------------------------------------------
/details/lu.h:
--------------------------------------------------------------------------------
 1 | // LU decomposition & triangular solving code lifted from Wikipedia
 2 | 
 3 | /* INPUT: A - array of pointers to rows of a square matrix having dimension N
 4 |  *        Tol - small tolerance number to detect failure when the matrix is near degenerate
 5 |  * OUTPUT: Matrix A is changed, it contains both matrices L-E and U as A=(L-E)+U such that P*A=L*U.
 6 |  *        The permutation matrix is not stored as a matrix, but in an integer vector P of size N+1
 7 |  *        containing column indexes where the permutation matrix has "1". The last element P[N]=S+N,
 8 |  *        where S is the number of row exchanges needed for determinant computation, det(P)=(-1)^S
 9 |  */
10 | int LUPDecompose(double **A, int N, double Tol, int *P) {
11 | 
12 |     int i, j, k, imax;
13 |     double maxA, *ptr, absA;
14 | 
15 |     for (i = 0; i <= N; i++)
16 |         P[i] = i; //Unit permutation matrix, P[N] initialized with N
17 | 
18 |     for (i = 0; i < N; i++) {
19 |         maxA = 0.0;
20 |         imax = i;
21 | 
22 |         for (k = i; k < N; k++)
23 |             if ((absA = fabs(A[k][i])) > maxA) {
24 |                 maxA = absA;
25 |                 imax = k;
26 |             }
27 | 
28 |         if (maxA < Tol) return 0; //failure, matrix is degenerate
29 | 
30 |         if (imax != i) {
31 |             //pivoting P
32 |             j = P[i];
33 |             P[i] = P[imax];
34 |             P[imax] = j;
35 | 
36 |             //pivoting rows of A
37 |             ptr = A[i];
38 |             A[i] = A[imax];
39 |             A[imax] = ptr;
40 | 
41 |             //counting pivots starting from N (for determinant)
42 |             P[N]++;
43 |         }
44 | 
45 |         for (j = i + 1; j < N; j++) {
46 |             A[j][i] /= A[i][i];
47 | 
48 |             for (k = i + 1; k < N; k++)
49 |                 A[j][k] -= A[j][i] * A[i][k];
50 |         }
51 |     }
52 | 
53 |     return 1;  //decomposition done
54 | }
55 | 
56 | /* INPUT: A,P filled in LUPDecompose; b - rhs vector; N - dimension
57 |  * OUTPUT: x - solution vector of A*x=b
58 |  */
59 | void LUPSolve(double **A, int *P, double *b, int N, double *x) {
60 | 
61 |     for (int i = 0; i < N; i++) {
62 |         x[i] = b[P[i]];
63 | 
64 |         for (int k = 0; k < i; k++)
65 |             x[i] -= A[i][k] * x[k];
66 |     }
67 | 
68 |     for (int i = N - 1; i >= 0; i--) {
69 |         for (int k = i + 1; k < N; k++)
70 |             x[i] -= A[i][k] * x[k];
71 | 
72 |         x[i] = x[i] / A[i][i];
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/rgb2spec.c:
--------------------------------------------------------------------------------
  1 | #include "rgb2spec.h"
  2 | #include <stdlib.h>
  3 | #include <stdio.h>
  4 | #include <string.h>
  5 | #include <assert.h>
  6 | #include <math.h>
  7 | 
  8 | #define rgb2spec_min(a, b) (((a) < (b)) ? (a) : (b))
  9 | #define rgb2spec_max(a, b) (((a) > (b)) ? (a) : (b))
 10 | 
 11 | /// Load a RGB2Spec model from disk
 12 | RGB2Spec *rgb2spec_load(const char *filename) {
 13 |     FILE *f = fopen(filename, "rb");
 14 |     if (!f)
 15 |         return NULL;
 16 | 
 17 |     char header[4];
 18 |     if (fread(header, 4, 1, f) != 1 || memcmp(header, "SPEC", 4) != 0) {
 19 |         fclose(f);
 20 |         return NULL;
 21 |     }
 22 | 
 23 |     RGB2Spec *m = (RGB2Spec *) malloc(sizeof(RGB2Spec));
 24 |     if (!m || fread(&m->res, sizeof(uint32_t), 1, f) != 1) {
 25 |         fclose(f);
 26 |         free(m);
 27 |         return NULL;
 28 |     }
 29 | 
 30 |     size_t size_scale = sizeof(float) * m->res,
 31 |            size_data  = sizeof(float) * m->res * m->res *
 32 |                         m->res * 3 * RGB2SPEC_N_COEFFS;
 33 | 
 34 |     m->scale = (float *) malloc(size_scale);
 35 |     m->data = (float *) malloc(size_data);
 36 | 
 37 |     if (!m->data || !m->scale ||
 38 |         fread(m->scale, size_scale, 1, f) != 1 ||
 39 |         fread(m->data, size_data, 1, f) != 1) {
 40 |         fclose(f);
 41 |         rgb2spec_free(m);
 42 |         return NULL;
 43 |     }
 44 | 
 45 |     fclose(f);
 46 |     return m;
 47 | }
 48 | 
 49 | /// Release all memory associated with a RGB2Spec model
 50 | void rgb2spec_free(RGB2Spec *model) {
 51 |     free(model->scale);
 52 |     free(model->data);
 53 |     free(model);
 54 | }
 55 | 
 56 | static int rgb2spec_find_interval(float *values, int size_, float x) {
 57 |     int left = 0,
 58 |         last_interval = size_ - 2,
 59 |         size = last_interval;
 60 | 
 61 |     while (size > 0) {
 62 |         int half   = size >> 1,
 63 |             middle = left + half + 1;
 64 | 
 65 |         if (values[middle] <= x) {
 66 |             left = middle;
 67 |             size -= half + 1;
 68 |         } else {
 69 |             size = half;
 70 |         }
 71 |     }
 72 | 
 73 |     return rgb2spec_min(left, last_interval);
 74 | }
 75 | 
 76 | /// Convert an RGB value into a RGB2Spec coefficient representation
 77 | void rgb2spec_fetch(RGB2Spec *model, float rgb_[3], float out[RGB2SPEC_N_COEFFS]) {
 78 |     /* Determine largest RGB component */
 79 |     int i = 0, res = model->res;
 80 |     float rgb[3];
 81 |     for (int j = 0; j < 3; ++j)
 82 |         rgb[j] = rgb2spec_max(rgb2spec_min(rgb_[j], 1.f), 0.f);
 83 | 
 84 |     for (int j = 1; j < 3; ++j)
 85 |         if (rgb[j] >= rgb[i])
 86 |             i = j;
 87 | 
 88 |     float z     = rgb[i],
 89 |           scale = (res - 1) / z,
 90 |           x     = rgb[(i + 1) % 3] * scale,
 91 |           y     = rgb[(i + 2) % 3] * scale;
 92 | 
 93 |     /* Trilinearly interpolated lookup */
 94 |     uint32_t xi = rgb2spec_min((uint32_t) x, (uint32_t) (res - 2)),
 95 |              yi = rgb2spec_min((uint32_t) y, (uint32_t) (res - 2)),
 96 |              zi = rgb2spec_find_interval(model->scale, model->res, z),
 97 |              offset = (((i * res + zi) * res + yi) * res + xi) * RGB2SPEC_N_COEFFS,
 98 |              dx = RGB2SPEC_N_COEFFS,
 99 |              dy = RGB2SPEC_N_COEFFS * res,
100 |              dz = RGB2SPEC_N_COEFFS * res * res;
101 | 
102 |     float x1 = x - xi, x0 = 1.f - x1,
103 |           y1 = y - yi, y0 = 1.f - y1,
104 |           z1 = (z - model->scale[zi]) /
105 |                (model->scale[zi + 1] - model->scale[zi]),
106 |           z0 = 1.f - z1;
107 | 
108 |     for (int j = 0; j < RGB2SPEC_N_COEFFS; ++j) {
109 |         out[j] = ((model->data[offset               ] * x0 +
110 |                    model->data[offset + dx          ] * x1) * y0 +
111 |                   (model->data[offset + dy          ] * x0 +
112 |                    model->data[offset + dy + dx     ] * x1) * y1) * z0 +
113 |                  ((model->data[offset + dz          ] * x0 +
114 |                    model->data[offset + dz + dx     ] * x1) * y0 +
115 |                   (model->data[offset + dz + dy     ] * x0 +
116 |                    model->data[offset + dz + dy + dx] * x1) * y1) * z1;
117 |         offset++;
118 |     }
119 | }
120 | 
121 | static inline float rgb2spec_fma(float a, float b, float c) {
122 |     #if defined(__FMA__)
123 |         // Only use fmaf() if implemented in hardware
124 |         return fmaf(a, b, c);
125 |     #else
126 |         return a*b + c;
127 |     #endif
128 | }
129 | 
130 | float rgb2spec_eval_precise(float coeff[RGB2SPEC_N_COEFFS], float lambda) {
131 |     float x = rgb2spec_fma(rgb2spec_fma(coeff[0], lambda, coeff[1]), lambda, coeff[2]),
132 |           y = 1.f / sqrtf(rgb2spec_fma(x, x, 1.f));
133 |     return rgb2spec_fma(.5f * x, y, .5f);
134 | }
135 | 
136 | float rgb2spec_eval_fast(float coeff[RGB2SPEC_N_COEFFS], float lambda) {
137 |     float x = rgb2spec_fma(rgb2spec_fma(coeff[0], lambda, coeff[1]), lambda, coeff[2]),
138 |           y = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(rgb2spec_fma(x, x, 1.f))));
139 |     return rgb2spec_fma(.5f * x, y, .5f);
140 | }
141 | 
142 | #if defined(__SSE4_2__)
143 | static inline __m128 rgb2spec_fma128(__m128 a, __m128 b, __m128 c) {
144 |     #if defined(__FMA__)
145 |         return _mm_fmadd_ps(a, b, c);
146 |     #else
147 |         /// Fallback for pre-Haswell architectures
148 |         return _mm_add_ps(_mm_mul_ps(a, b), c);
149 |     #endif
150 | }
151 | 
152 | __m128 rgb2spec_eval_sse(float coeff[RGB2SPEC_N_COEFFS], __m128 lambda) {
153 |     __m128 c0 = _mm_set1_ps(coeff[0]), c1 = _mm_set1_ps(coeff[1]),
154 |            c2 = _mm_set1_ps(coeff[2]), h = _mm_set1_ps(.5f),
155 |            o = _mm_set1_ps(1.f);
156 | 
157 |     __m128 x = rgb2spec_fma128(rgb2spec_fma128(c0, lambda, c1), lambda, c2),
158 |            y = _mm_rsqrt_ps(rgb2spec_fma128(x, x, o));
159 | 
160 |     return rgb2spec_fma128(_mm_mul_ps(h, x), y, h);
161 | }
162 | #endif
163 | 
164 | #if defined(__AVX__)
165 | __m256 rgb2spec_fma256(__m256 a, __m256 b, __m256 c) {
166 |     #if defined(__FMA__)
167 |         return _mm256_fmadd_ps(a, b, c);
168 |     #else
169 |         /// Fallback for pre-Haswell architectures
170 |         return _mm256_add_ps(_mm256_mul_ps(a, b), c);
171 |     #endif
172 | }
173 | 
174 | __m256 rgb2spec_eval_avx(float coeff[RGB2SPEC_N_COEFFS], __m256 lambda) {
175 |     __m256 c0 = _mm256_set1_ps(coeff[0]), c1 = _mm256_set1_ps(coeff[1]),
176 |            c2 = _mm256_set1_ps(coeff[2]), h = _mm256_set1_ps(.5f),
177 |            o = _mm256_set1_ps(1.f);
178 | 
179 |     __m256 x = rgb2spec_fma256(rgb2spec_fma256(c0, lambda, c1), lambda, c2),
180 |            y = _mm256_rsqrt_ps(rgb2spec_fma256(x, x, o));
181 | 
182 |     return rgb2spec_fma256(_mm256_mul_ps(h, x), y, h);
183 | }
184 | #endif
185 | 
186 | #if defined(__AVX512F__)
187 | __m512 rgb2spec_eval_avx512(float coeff[RGB2SPEC_N_COEFFS], __m512 lambda) {
188 |     __m512 c0 = _mm512_set1_ps(coeff[0]), c1 = _mm512_set1_ps(coeff[1]),
189 |            c2 = _mm512_set1_ps(coeff[2]), h = _mm512_set1_ps(.5f),
190 |            o = _mm512_set1_ps(1.f);
191 | 
192 |     __m512 x = _mm512_fmadd_ps(_mm512_fmadd_ps(c0, lambda, c1), lambda, c2),
193 |            y = _mm512_rsqrt14_ps(_mm512_fmadd_ps(x, x, o));
194 | 
195 |     return _mm512_fmadd_ps(_mm512_mul_ps(h, x), y, h);
196 | }
197 | #endif
198 | 
199 | 


--------------------------------------------------------------------------------
/rgb2spec.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdint.h>
 4 | #include <immintrin.h>
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C"
 8 | {
 9 | #endif
10 | 
11 | /// How many polynomial coefficients?
12 | #define RGB2SPEC_N_COEFFS 3
13 | 
14 | /// Underlying representation
15 | typedef struct {
16 |     uint32_t res;
17 |     float *scale;
18 |     float *data;
19 | } RGB2Spec;
20 | 
21 | /// Load a RGB2Spec model from disk
22 | RGB2Spec *rgb2spec_load(const char *filename);
23 | 
24 | /// Release all memory associated with a RGB2Spec model
25 | void rgb2spec_free(RGB2Spec *model);
26 | 
27 | /// Convert an RGB value into a RGB2Spec coefficient representation
28 | void rgb2spec_fetch(RGB2Spec *model, float rgb[3], float out[RGB2SPEC_N_COEFFS]);
29 | 
30 | /// Evaluate the model for a given wavelength
31 | float rgb2spec_eval_precise(float coeff[RGB2SPEC_N_COEFFS], float lambda);
32 | 
33 | /// Evaluate the model for a given wavelength (fast, with recip. square root)
34 | float rgb2spec_eval_fast(float coeff[RGB2SPEC_N_COEFFS], float lambda);
35 | 
36 | #if defined(__SSE4_2__)
37 |     /// SSE 4.2 version -- evaluates 4 wavelengths at once
38 |     __m128 rgb2spec_eval_sse(float coeff[RGB2SPEC_N_COEFFS], __m128 lambda);
39 | #endif
40 | 
41 | #if defined(__AVX__)
42 |    /// AVX version -- evaluates 8 wavelengths at once
43 |    __m256 rgb2spec_eval_avx(float coeff[RGB2SPEC_N_COEFFS], __m256 lambda);
44 | #endif
45 | 
46 | #if defined(__AVX512F__)
47 |    /// AVX512 version -- evaluates 16 wavelengths at once
48 |    __m512 rgb2spec_eval_avx512(float coeff[RGB2SPEC_N_COEFFS], __m512 lambda);
49 | #endif
50 | 
51 | #ifdef __cplusplus
52 | } // extern "C"
53 | #endif
54 | 


--------------------------------------------------------------------------------
/rgb2spec_opt.cpp:
--------------------------------------------------------------------------------
  1 | #if defined(_MSC_VER)
  2 | #  define NOMINMAX
  3 | #  define strcasecmp _stricmp
  4 | #endif
  5 | 
  6 | #include <cmath>
  7 | #include <cstring>
  8 | #include <stdexcept>
  9 | #include <iostream>
 10 | #include <algorithm>
 11 | 
 12 | #include "details/cie1931.h"
 13 | #include "details/lu.h"
 14 | 
 15 | // Choose a parallelization scheme
 16 | #if defined(RGB2SPEC_USE_TBB)
 17 | #  include <tbb/tbb.h>
 18 | #elif defined(_OPENMP)
 19 | #  define RGB2SPEC_USE_OPENMP 1
 20 | #elif defined(__APPLE__)
 21 | #  define RGB2SPEC_USE_GCD    1
 22 | #  include <dispatch/dispatch.h>
 23 | #endif
 24 | 
 25 | /// Discretization of quadrature scheme
 26 | #define CIE_FINE_SAMPLES ((CIE_SAMPLES - 1) * 3 + 1)
 27 | #define RGB2SPEC_EPSILON 1e-4
 28 | 
 29 | /// Precomputed tables for fast spectral -> RGB conversion
 30 | double lambda_tbl[CIE_FINE_SAMPLES],
 31 |        rgb_tbl[3][CIE_FINE_SAMPLES],
 32 |        rgb_to_xyz[3][3],
 33 |        xyz_to_rgb[3][3],
 34 |        xyz_whitepoint[3];
 35 | 
 36 | /// Currently supported gamuts
 37 | enum Gamut {
 38 |     SRGB,
 39 |     ProPhotoRGB,
 40 |     ACES2065_1,
 41 |     REC2020,
 42 |     ERGB,
 43 |     XYZ,
 44 |     NO_GAMUT,
 45 | };
 46 | 
 47 | double sigmoid(double x) {
 48 |     return 0.5 * x / std::sqrt(1.0 + x * x) + 0.5;
 49 | }
 50 | 
 51 | double smoothstep(double x) {
 52 |     return x * x * (3.0 - 2.0 * x);
 53 | }
 54 | 
 55 | double sqr(double x) { return x * x; }
 56 | 
 57 | void cie_lab(double *p) {
 58 |     double X = 0.0, Y = 0.0, Z = 0.0,
 59 |       Xw = xyz_whitepoint[0],
 60 |       Yw = xyz_whitepoint[1],
 61 |       Zw = xyz_whitepoint[2];
 62 | 
 63 |     for (int j = 0; j < 3; ++j) {
 64 |         X += p[j] * rgb_to_xyz[0][j];
 65 |         Y += p[j] * rgb_to_xyz[1][j];
 66 |         Z += p[j] * rgb_to_xyz[2][j];
 67 |     }
 68 | 
 69 |     auto f = [](double t) -> double {
 70 |         double delta = 6.0 / 29.0;
 71 |         if (t > delta*delta*delta)
 72 |             return cbrt(t);
 73 |         else
 74 |             return t / (delta*delta * 3.0) + (4.0 / 29.0);
 75 |     };
 76 | 
 77 |     p[0] = 116.0 * f(Y / Yw) - 16.0;
 78 |     p[1] = 500.0 * (f(X / Xw) - f(Y / Yw));
 79 |     p[2] = 200.0 * (f(Y / Yw) - f(Z / Zw));
 80 | }
 81 | 
 82 | /**
 83 |  * This function precomputes tables used to convert arbitrary spectra
 84 |  * to RGB (either sRGB or ProPhoto RGB)
 85 |  *
 86 |  * A composite quadrature rule integrates the CIE curves, reflectance, and
 87 |  * illuminant spectrum over each 5nm segment in the 360..830nm range using
 88 |  * Simpson's 3/8 rule (4th-order accurate), which evaluates the integrand at
 89 |  * four positions per segment. While the CIE curves and illuminant spectrum are
 90 |  * linear over the segment, the reflectance could have arbitrary behavior,
 91 |  * hence the extra precations.
 92 |  */
 93 | void init_tables(Gamut gamut) {
 94 |     memset(rgb_tbl, 0, sizeof(rgb_tbl));
 95 |     memset(xyz_whitepoint, 0, sizeof(xyz_whitepoint));
 96 | 
 97 |     double h = (CIE_LAMBDA_MAX - CIE_LAMBDA_MIN) / (CIE_FINE_SAMPLES - 1);
 98 | 
 99 |     const double *illuminant = nullptr;
100 | 
101 |     switch (gamut) {
102 |         case SRGB:
103 |             illuminant = cie_d65;
104 |             memcpy(xyz_to_rgb, xyz_to_srgb, sizeof(double) * 9);
105 |             memcpy(rgb_to_xyz, srgb_to_xyz, sizeof(double) * 9);
106 |             break;
107 | 
108 |         case ERGB:
109 |             illuminant = cie_e;
110 |             memcpy(xyz_to_rgb, xyz_to_ergb, sizeof(double) * 9);
111 |             memcpy(rgb_to_xyz, ergb_to_xyz, sizeof(double) * 9);
112 |             break;
113 | 
114 |         case XYZ:
115 |             illuminant = cie_e;
116 |             memcpy(xyz_to_rgb, xyz_to_xyz, sizeof(double) * 9);
117 |             memcpy(rgb_to_xyz, xyz_to_xyz, sizeof(double) * 9);
118 |             break;
119 | 
120 |         case ProPhotoRGB:
121 |             illuminant = cie_d50;
122 |             memcpy(xyz_to_rgb, xyz_to_prophoto_rgb, sizeof(double) * 9);
123 |             memcpy(rgb_to_xyz, prophoto_rgb_to_xyz, sizeof(double) * 9);
124 |             break;
125 | 
126 |         case ACES2065_1:
127 |             illuminant = cie_d60;
128 |             memcpy(xyz_to_rgb, xyz_to_aces2065_1, sizeof(double) * 9);
129 |             memcpy(rgb_to_xyz, aces2065_1_to_xyz, sizeof(double) * 9);
130 |             break;
131 | 
132 |         case REC2020:
133 |             illuminant = cie_d65;
134 |             memcpy(xyz_to_rgb, xyz_to_rec2020, sizeof(double) * 9);
135 |             memcpy(rgb_to_xyz, rec2020_to_xyz, sizeof(double) * 9);
136 |             break;
137 | 
138 |         default:
139 |             throw std::runtime_error("init_gamut(): invalid/unsupported gamut.");
140 |     }
141 | 
142 |     for (int i = 0; i < CIE_FINE_SAMPLES; ++i) {
143 |         double lambda = CIE_LAMBDA_MIN + i * h;
144 | 
145 |         double xyz[3] = { cie_interp(cie_x, lambda),
146 |                           cie_interp(cie_y, lambda),
147 |                           cie_interp(cie_z, lambda) },
148 |                I = cie_interp(illuminant, lambda);
149 | 
150 |         double weight = 3.0 / 8.0 * h;
151 |         if (i == 0 || i == CIE_FINE_SAMPLES - 1)
152 |             ;
153 |         else if ((i - 1) % 3 == 2)
154 |             weight *= 2.f;
155 |         else
156 |             weight *= 3.f;
157 | 
158 |         lambda_tbl[i] = lambda;
159 |         for (int k = 0; k < 3; ++k)
160 |             for (int j = 0; j < 3; ++j)
161 |                 rgb_tbl[k][i] += xyz_to_rgb[k][j] * xyz[j] * I * weight;
162 | 
163 |         for (int i = 0; i < 3; ++i)
164 |             xyz_whitepoint[i] += xyz[i] * I * weight;
165 |     }
166 | }
167 | 
168 | void eval_residual(const double *coeffs, const double *rgb, double *residual) {
169 |     double out[3] = { 0.0, 0.0, 0.0 };
170 | 
171 |     for (int i = 0; i < CIE_FINE_SAMPLES; ++i) {
172 |         /* Scale lambda to 0..1 range */
173 |         double lambda = (lambda_tbl[i] - CIE_LAMBDA_MIN) /
174 |                         (CIE_LAMBDA_MAX - CIE_LAMBDA_MIN);
175 | 
176 |         /* Polynomial */
177 |         double x = 0.0;
178 |         for (int i = 0; i < 3; ++i)
179 |             x = x * lambda + coeffs[i];
180 | 
181 |         /* Sigmoid */
182 |         double s = sigmoid(x);
183 | 
184 |         /* Integrate against precomputed curves */
185 |         for (int j = 0; j < 3; ++j)
186 |             out[j] += rgb_tbl[j][i] * s;
187 |     }
188 |     cie_lab(out);
189 |     memcpy(residual, rgb, sizeof(double) * 3);
190 |     cie_lab(residual);
191 | 
192 |     for (int j = 0; j < 3; ++j)
193 |         residual[j] -= out[j];
194 | }
195 | 
196 | void eval_jacobian(const double *coeffs, const double *rgb, double **jac) {
197 |     double r0[3], r1[3], tmp[3];
198 | 
199 |     for (int i = 0; i < 3; ++i) {
200 |         memcpy(tmp, coeffs, sizeof(double) * 3);
201 |         tmp[i] -= RGB2SPEC_EPSILON;
202 |         eval_residual(tmp, rgb, r0);
203 | 
204 |         memcpy(tmp, coeffs, sizeof(double) * 3);
205 |         tmp[i] += RGB2SPEC_EPSILON;
206 |         eval_residual(tmp, rgb, r1);
207 | 
208 |         for (int j = 0; j < 3; ++j)
209 |             jac[j][i] = (r1[j] - r0[j]) * 1.0 / (2 * RGB2SPEC_EPSILON);
210 |     }
211 | }
212 | 
213 | double gauss_newton(const double rgb[3], double coeffs[3], int it = 15) {
214 |     double r = 0;
215 |     for (int i = 0; i < it; ++i) {
216 |         double J0[3], J1[3], J2[3], *J[3] = { J0, J1, J2 };
217 | 
218 |         double residual[3];
219 | 
220 |         eval_residual(coeffs, rgb, residual);
221 |         eval_jacobian(coeffs, rgb, J);
222 | 
223 |         int P[4];
224 |         int rv = LUPDecompose(J, 3, 1e-15, P);
225 |         if (rv != 1) {
226 |             std::cout << "RGB " << rgb[0] << " " << rgb[1] << " " << rgb[2] << std::endl;
227 |             std::cout << "-> " << coeffs[0] << " " << coeffs[1] << " " << coeffs[2] << std::endl;
228 |             throw std::runtime_error("LU decomposition failed!");
229 |         }
230 | 
231 |         double x[3];
232 |         LUPSolve(J, P, residual, 3, x);
233 | 
234 |         r = 0.0;
235 |         for (int j = 0; j < 3; ++j) {
236 |             coeffs[j] -= x[j];
237 |             r += residual[j] * residual[j];
238 |         }
239 |         double max = std::max(std::max(coeffs[0], coeffs[1]), coeffs[2]);
240 | 
241 |         if (max > 200) {
242 |             for (int j = 0; j < 3; ++j)
243 |                 coeffs[j] *= 200 / max;
244 |         }
245 | 
246 |         if (r < 1e-6)
247 |             break;
248 |     }
249 |     return std::sqrt(r);
250 | }
251 | 
252 | static Gamut parse_gamut(const char *str) {
253 |     if (!strcasecmp(str, "sRGB"))
254 |         return SRGB;
255 |     if (!strcasecmp(str, "eRGB"))
256 |         return ERGB;
257 |     if (!strcasecmp(str, "XYZ"))
258 |         return XYZ;
259 |     if (!strcasecmp(str, "ProPhotoRGB"))
260 |         return ProPhotoRGB;
261 |     if (!strcasecmp(str, "ACES2065_1"))
262 |         return ACES2065_1;
263 |     if (!strcasecmp(str, "REC2020"))
264 |         return REC2020;
265 |     return NO_GAMUT;
266 | }
267 | 
268 | int main(int argc, char **argv) {
269 |     if (argc < 3) {
270 |         printf("Syntax: rgb2spec_opt <resolution> <output> [<gamut>]\n"
271 |                "where <gamut> is one of sRGB,eRGB,XYZ,ProPhotoRGB,ACES2065_1,REC2020\n");
272 |         exit(-1);
273 |     }
274 |     Gamut gamut = SRGB;
275 |     if (argc > 3) gamut = parse_gamut(argv[3]);
276 |     if (gamut == NO_GAMUT) {
277 |         fprintf(stderr, "Could not parse gamut `%s'!\n", argv[3]);
278 |         exit(-1);
279 |     }
280 |     init_tables(gamut);
281 | 
282 |     const int res = atoi(argv[1]);
283 |     if (res == 0) {
284 |         printf("Invalid resolution!\n");
285 |         exit(-1);
286 |     }
287 | 
288 |     printf("Optimizing spectra ");
289 | 
290 |     float *scale = new float[res];
291 |     for (int k = 0; k < res; ++k)
292 |         scale[k] = (float) smoothstep(smoothstep(k / double(res - 1)));
293 | 
294 |     size_t bufsize = 3*3*res*res*res;
295 |     float *out = new float[bufsize];
296 | 
297 | #if defined(RGB2SPEC_USE_OPENMP)
298 | #  pragma omp parallel for collapse(2) default(none) schedule(dynamic) shared(stdout,scale,out)
299 | #endif
300 |     for (int l = 0; l < 3; ++l) {
301 | #if defined(RGB2SPEC_USE_TBB)
302 |         tbb::parallel_for(0, res, [&](size_t j) {
303 | #elif defined(RGB2SPEC_USE_GCD)
304 |         dispatch_apply(res, dispatch_get_global_queue(0, 0), ^(size_t j) {
305 | #else
306 |         for (int j = 0; j < res; ++j) {
307 | #endif
308 |             const double y = j / double(res - 1);
309 |             printf(".");
310 |             fflush(stdout);
311 |             for (int i = 0; i < res; ++i) {
312 |                 const double x = i / double(res - 1);
313 |                 double coeffs[3], rgb[3];
314 |                 memset(coeffs, 0, sizeof(double)*3);
315 | 
316 |                 int start = res / 5;
317 | 
318 |                 for (int k = start; k < res; ++k) {
319 |                     double b = (double) scale[k];
320 | 
321 |                     rgb[l] = b;
322 |                     rgb[(l + 1) % 3] = x*b;
323 |                     rgb[(l + 2) % 3] = y*b;
324 | 
325 |                     double resid = gauss_newton(rgb, coeffs);
326 |                     (void) resid;
327 | 
328 |                     double c0 = 360.0, c1 = 1.0 / (830.0 - 360.0);
329 |                     double A = coeffs[0], B = coeffs[1], C = coeffs[2];
330 | 
331 |                     int idx = ((l*res + k) * res + j)*res+i;
332 | 
333 |                     out[3*idx + 0] = float(A*(sqr(c1)));
334 |                     out[3*idx + 1] = float(B*c1 - 2*A*c0*(sqr(c1)));
335 |                     out[3*idx + 2] = float(C - B*c0*c1 + A*(sqr(c0*c1)));
336 |                     //out[3*idx + 2] = resid;
337 |                 }
338 | 
339 |                 memset(coeffs, 0, sizeof(double)*3);
340 |                 for (int k = start; k>=0; --k) {
341 |                     double b = (double) scale[k];
342 | 
343 |                     rgb[l] = b;
344 |                     rgb[(l + 1) % 3] = x*b;
345 |                     rgb[(l + 2) % 3] = y*b;
346 | 
347 |                     double resid = gauss_newton(rgb, coeffs);
348 |                     (void) resid;
349 | 
350 |                     double c0 = 360.0, c1 = 1.0 / (830.0 - 360.0);
351 |                     double A = coeffs[0], B = coeffs[1], C = coeffs[2];
352 | 
353 |                     int idx = ((l*res + k) * res + j)*res+i;
354 | 
355 |                     out[3*idx + 0] = float(A*(sqr(c1)));
356 |                     out[3*idx + 1] = float(B*c1 - 2*A*c0*(sqr(c1)));
357 |                     out[3*idx + 2] = float(C - B*c0*c1 + A*(sqr(c0*c1)));
358 |                     //out[3*idx + 2] = resid;
359 |                 }
360 |             }
361 |         }
362 | #if defined(RGB2SPEC_USE_TBB) || defined(RGB2SPEC_USE_GCD)
363 |         );
364 | #endif
365 |     }
366 | 
367 |     FILE *f = fopen(argv[2], "wb");
368 |     if (f == nullptr)
369 |         throw std::runtime_error("Could not create file!");
370 |     fwrite("SPEC", 4, 1, f);
371 |     uint32_t resolution = res;
372 |     fwrite(&resolution, sizeof(uint32_t), 1, f);
373 |     fwrite(scale, res * sizeof(float), 1, f);
374 | 
375 |     fwrite(out, sizeof(float)*bufsize, 1, f);
376 |     delete[] out;
377 |     delete[] scale;
378 |     fclose(f);
379 |     printf(" done.\n");
380 | }
381 | 


--------------------------------------------------------------------------------