├── README.md ├── test.cpp ├── tinyremo.h └── tinyremo_eigen.h /README.md: -------------------------------------------------------------------------------- 1 | # 🥁 TinyReMo 🥁 2 | 3 | Tiny header-only, minimal dependency reverse-mode automatic differentiation library for C++. 4 | 5 | Based on the tape-based rust implementation in the tutorial at https://rufflewind.com/2016-12-30/reverse-mode-automatic-differentiation 6 | 7 | ## Compile and Run Tests 8 | 9 | ```bash 10 | clang++ -std=c++20 -I . -I [path/to/eigen] -I -o test test.cpp 11 | ./test 12 | ``` 13 | 14 | ## Gradient of a function of scalar variables 15 | 16 | Variables for which derivatives are required must be registered with a common `Tape`. 17 | 18 | ```cpp 19 | #include "tinyremo.h" 20 | int main() 21 | { 22 | using namespace tinyremo; 23 | 24 | Tape tape; 25 | 26 | Var x(&tape, tape.push_scalar(), 0.5); 27 | Var y(&tape, tape.push_scalar(), 4.2); 28 | 29 | Var z = x * y + sin(x) + pow(x, y) + log(y) + exp(x) + cos(y); 30 | 31 | std::cout << "z = " << z.getValue() << std::endl; 32 | 33 | auto grads = z.grad(); 34 | std::cout << "∂z/∂x = " << grads[x.getIndex()] << std::endl; 35 | std::cout << "∂z/∂y = " << grads[y.getIndex()] << std::endl; 36 | } 37 | ``` 38 | 39 | 40 | ## As a custom scalar type for Eigen matrices 41 | 42 | ```cpp 43 | Tape tape; 44 | Eigen::Matrix, N, 1> x; 45 | // x has unregistered entries. Initialize them and put on tape 46 | for (int i = 0; i < x.rows(); ++i) 47 | { 48 | x(i) = Var(&tape, tape.push_scalar(), i+1); 49 | } 50 | ``` 51 | 52 | ## Higher-order derivatives 53 | 54 | For each derivative we wish to take we must create a new `Tape`. 55 | 56 | ```cpp 57 | Tape< double > inner_tape; 58 | Tape< Var > outer_tape; 59 | 60 | // Inner variable 61 | Var x_inner(&inner_tape, inner_tape.push_scalar(), 0.5); 62 | // Outer variable (we'll actually use this one) 63 | Var> x(&outer_tape, outer_tape.push_scalar(), x_inner); 64 | // Construct outer variable in one line 65 | Var> y(&outer_tape, outer_tape.push_scalar(), {&inner_tape, inner_tape.push_scalar(), 4.2}); 66 | 67 | auto z = x * y + sin(x) + pow(x, y) + log(y) + exp(x) + cos(y); 68 | 69 | // First derivatives 70 | auto dzd = z.grad(); 71 | printf("z = %g\n", z.getValue().getValue()); 72 | Var dzdx = dzd[x.getIndex()]; 73 | printf("∂z/∂x = %g\n", dzdx.getValue()); 74 | Var dzdy = dzd[y.getIndex()]; 75 | printf("∂z/∂y = %g\n", dzdy.getValue()); 76 | 77 | // Second derivatives 78 | auto d2zdxd = dzdx.grad(); 79 | printf("∂²z/∂x² = %g\n", d2zdxd[x.getValue().getIndex()]); 80 | printf("∂²z/∂x∂y = %g\n", d2zdxd[y.getValue().getIndex()]); 81 | auto d2zdyd = dzdy.grad(); 82 | printf("∂²z/∂y∂x = %g\n", d2zdyd[x.getValue().getIndex()]); 83 | printf("∂²z/∂y² = %g\n", d2zdyd[y.getValue().getIndex()]); 84 | ``` 85 | 86 | ## Easier construction of recorded variables 87 | 88 | Call `record_scalar` to record a value on a bunch of tapes (in order) to create 89 | appropriately nested `Var` object in one line. In this case, `x` will be a 90 | twice-differentiable `Var>`. 91 | 92 | ```cpp 93 | Tape tape_1; 94 | Tape> tape_2; 95 | auto x = record_scalar(0.5,tape_1,tape_2); 96 | ``` 97 | 98 | Similarly, for Eigen matrices, `record_matrix` will copy `double` values from a 99 | matrix and record them on a bunch of tapes in order to create a matrix of `Var` 100 | objects. In this case, `Y` will be a twice-differentiable 101 | `Eigen::Matrix>,Eigen::Dynamic,Eigen::Dynamic>`. 102 | 103 | ```cpp 104 | Eigen::MatrixXd X = Eigen::MatrixXd::Random(3, 3); 105 | Tape tape_1; 106 | Tape> tape_2; 107 | auto Y = record_matrix(X,tape_1,tape_2); 108 | ``` 109 | 110 | ## Gradients with respect to Eigen matrices as Eigen matrices 111 | 112 | For a once-differentiable `Var` object `f`, `f.grad()` will return a 113 | `std::vector` of the gradient of `f` with respect to _everything_ on the 114 | tape, including auxiliary variables. If `f` is a function of some 115 | `Eigen::Matrix` types then we can call `gradient` to get the gradient of `f` in 116 | appropriately sized matrices. 117 | 118 | ```cpp 119 | Tape tape; 120 | Eigen::MatrixXd X1 = Eigen::MatrixXd::Random(3, 3); 121 | Eigen::MatrixXd X2 = Eigen::MatrixXd::Random(3, 3); 122 | auto Y1 = record_matrix(X1,tape); 123 | auto Y2 = record_matrix(X2,tape); 124 | auto f = Y1.array().sin().sum() * Y2.array().cos().sum(); 125 | auto [dfdY1, dfdY2] = gradient(f, Y1, Y2); 126 | ``` 127 | 128 | ## Hessian with respect to Eigen matrices 129 | For a twice-differentiable scalar `Var` object `f`, `hessian` will 130 | populate a `Eigen::Matrix` with the Hessian of `f`. The rows and columns will correspond 131 | to second partial derivatives with respect to pairs of elements of the 132 | matrices provided as input in their respective storage orders (`Eigen::RowMajor` 133 | or `Eigen::ColMajor`). The output matrix will be `Eigen::ColMajor` unless all inputs 134 | are `Eigen::RowMajor`. The output matrix will have `Eigen::Dynamic` rows and 135 | cols, unless all inputs have fixed sizes. 136 | 137 | ```cpp 138 | Tape tape; 139 | Tape> tape_2; 140 | Eigen::MatrixXd X1 = Eigen::MatrixXd::Random(3, 3); 141 | Eigen::MatrixXd X2 = Eigen::MatrixXd::Random(3, 3); 142 | auto Y1 = record_matrix(X1,tape_1,tape_2); 143 | auto Y2 = record_matrix(X2,tape_1,tape_2); 144 | auto f = Y1.array().sin().sum() * Y2.array().cos().sum(); 145 | auto H = gradient(f, Y1, Y2); 146 | ``` 147 | 148 | ## Sparse Jacobian as `Eigen::SparseMatrix` 149 | 150 | For a once-differentiable vector of `Var` objects `F`, `sparse_jacobian` 151 | will populate a `Eigen::SparseMatrix` with the Jacobian of `F`. The rows 152 | correspond to the elements of `F` and the columns correspond to the elements of 153 | the matrices provided as input in their respective storage orders 154 | (`Eigen::RowMajor` or `Eigen::ColMajor`). 155 | 156 | ```cpp 157 | Tape tape; 158 | Eigen::MatrixXd X1 = Eigen::MatrixXd::Random(3, 3); 159 | Eigen::MatrixXd X2 = Eigen::MatrixXd::Random(3, 3); 160 | auto Y1 = record_matrix(X1,tape); 161 | auto Y2 = record_matrix(X2,tape); 162 | // expression involving colwise sum resulting in a column matrix 163 | auto F = (Y1.array().sin().colwise().sum() * Y2.array().cos().colwise().sum()).matrix(); 164 | auto J = sparse_jacobian(F, Y1, Y2); 165 | ``` 166 | 167 | ## Sparse Hessian as `Eigen::SparseMatrix` 168 | 169 | Similarly, for a twice-differentiable `Var>` object `f`, 170 | `sparse_hessian` will populate a `Eigen::SparseMatrix` with the Hessian 171 | of `f`. The rows and columns correspond to the elements of `f` to the elements 172 | of input matrices in their respective storage orders (`Eigen::RowMajor` or 173 | `Eigen::ColMajor`). 174 | 175 | ```cpp 176 | Tape tape_1; 177 | Tape> tape_2; 178 | auto Y1 = record_matrix(X1,tape); 179 | auto Y2 = record_matrix(X2,tape); 180 | auto f = Y1.array().sin().sum() * Y2.array().cos().sum(); 181 | auto H = sparse_hessian(f, Y1, Y2); 182 | ``` 183 | -------------------------------------------------------------------------------- /test.cpp: -------------------------------------------------------------------------------- 1 | #include "tinyremo.h" 2 | #include "tinyremo_eigen.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace tinyremo; 10 | // tictoc function to return time since last call in seconds 11 | double tictoc() 12 | { 13 | static std::chrono::time_point last = std::chrono::high_resolution_clock::now(); 14 | auto now = std::chrono::high_resolution_clock::now(); 15 | double elapsed = std::chrono::duration_cast(now - last).count() * 1e-6; 16 | last = now; 17 | return elapsed; 18 | } 19 | 20 | 21 | // Benchmark ∂y/∂x for y = ‖ A(x)⁻¹ x ‖². This test chokes autodiff.github.io 22 | // and https://github.com/Rookfighter/autodiff-cpp/ 23 | template 24 | void call_and_time_llt() 25 | { 26 | using std::printf; 27 | using Scalar = double; 28 | tictoc(); 29 | std::vector grads; 30 | Eigen::Matrix, N, 1> x; 31 | const int max_iters = 1000; 32 | for(int iter = 0; iter < max_iters; ++iter) 33 | { 34 | Tape tape; 35 | // x has unregistered entries. Initialize them and put on tape 36 | for (int i = 0; i < x.rows(); ++i) 37 | { 38 | x(i) = Var(&tape, tape.push_scalar(), i+1); 39 | } 40 | Eigen::Matrix, N, N> A; 41 | for (int i = 0; i < A.rows(); ++i) 42 | { 43 | for (int j = 0; j < A.cols(); ++j) 44 | { 45 | A(i,j) = x(i) + x(j); 46 | } 47 | A(i,i) = A(i,i)+A(i,i); 48 | } 49 | Eigen::Matrix, N, 1> b = A.llt().solve(x); 50 | Var y = b.array().square().sum(); 51 | grads = y.grad(); 52 | } 53 | double t_elapsed = tictoc(); 54 | printf("%d %g\n",N,t_elapsed/max_iters); 55 | printf(" ... %% "); 56 | for (int i = 0; i < N; ++i) 57 | { 58 | printf("%g ",grads[x(i).getIndex()]); 59 | } 60 | printf("\n"); 61 | } 62 | 63 | // Generate a sequence of calls to call_and_time 64 | template 65 | void call_and_time_all_llt() 66 | { 67 | if constexpr (N > M) 68 | { 69 | return; 70 | }else 71 | { 72 | call_and_time_llt(); 73 | call_and_time_all_llt(); 74 | } 75 | } 76 | 77 | 78 | template 79 | Eigen::Matrix spring_residuals( 80 | const Eigen::Matrix& X, const Eigen::MatrixXi& E, const double l_rest) 81 | { 82 | Eigen::Matrix f(E.rows()); 83 | for(int e = 0; e < E.rows(); ++e) 84 | { 85 | f(e) = (X.row(E(e,0)) - X.row(E(e,1))).norm() - Scalar(l_rest); 86 | } 87 | return f; 88 | } 89 | 90 | template 91 | Scalar spring_energy(const Eigen::Matrix& X, const Eigen::MatrixXi& E, const double l_rest) 92 | { 93 | return Scalar(0.5) * spring_residuals(X,E,l_rest).squaredNorm(); 94 | } 95 | 96 | template 97 | void mass_spring_sparse_hessian() 98 | { 99 | // Spring mass positions 100 | Eigen::Matrix X_double(M*N,2); 101 | for(int i = 0; i < M; ++i) 102 | { 103 | for(int j = 0; j < N; ++j) 104 | { 105 | X_double(i*N+j,0) = i; 106 | X_double(i*N+j,1) = j; 107 | } 108 | } 109 | // Spring rest length 110 | const double l_rest = 2.0; 111 | // Grid of spring edges 112 | Eigen::MatrixXi E(M*(N-1)+(M-1)*N,2); 113 | { 114 | int e = 0; 115 | for(int i = 0; i < M; ++i) 116 | { 117 | for(int j = 0; j < N; ++j) 118 | { 119 | if(j tape_1; 126 | Tape> tape_2; 127 | // Copy from double to Var> and record on tapes 128 | Eigen::Matrix>, Eigen::Dynamic, 2> X = record_matrix(X_double, tape_1, tape_2); 129 | 130 | Var> f = spring_energy(X, E, l_rest); 131 | // Dense first derivatives because we know that they're dense and that every 132 | // row of Hessian has at least one entry. 133 | auto df_d = f.grad(); 134 | auto H = sparse_hessian(f, X); 135 | 136 | if constexpr(print) 137 | { 138 | // If we're printing then let's also check the Jacobian 139 | // It's not required, but since Jacobian only needs first derivatives, let's 140 | // stip off the outer layer. This way J will contain doubles instead of 141 | // Var. 142 | auto X_lower = X.unaryExpr([](const Var>& x){return x.getValue();}).eval(); 143 | auto F = spring_residuals(X_lower,E,l_rest); 144 | auto J = sparse_jacobian(F,X_lower); 145 | 146 | // Print matlab friendly variables 147 | std::cout<<"X=["<& A, const std::string name) 171 | { 172 | std::cout<::InnerIterator it(A,k); it; ++it) 176 | { 177 | std::cout< 191 | void call_and_time_mass_spring() 192 | { 193 | using std::printf; 194 | tictoc(); 195 | mass_spring_sparse_hessian(); 196 | printf("%d %g\n",N*N,tictoc()); 197 | } 198 | 199 | template 200 | void call_and_time_all_mass_spring() 201 | { 202 | if constexpr (N > M) 203 | { 204 | return; 205 | }else 206 | { 207 | call_and_time_mass_spring(); 208 | call_and_time_all_mass_spring<2*N,M>(); 209 | } 210 | } 211 | 212 | int main() { 213 | // Some simple tests 214 | { 215 | printf("# Some simple tests\n"); 216 | using Scalar = double; 217 | Tape tape; 218 | 219 | Var x(&tape, tape.push_scalar(), 0.5); 220 | Var y(&tape, tape.push_scalar(), 4.2); 221 | // This variable should not show up in sparse_grad 222 | Var a(&tape, tape.push_scalar(), 0.0/0.0); 223 | 224 | Var z = x * y + sin(x) + pow(x, y) + log(y) + exp(x) + cos(y); 225 | 226 | std::cout << "z = " << z.getValue() << std::endl; 227 | 228 | auto grads = z.grad(); 229 | auto sparse_grads = z.sparse_grad(); 230 | std::cout << "∂z/∂a " << (sparse_grads.find(a.getIndex()) == sparse_grads.end() ? "correctly not found" : "incorrectly found") << std::endl; 231 | std::cout << "∂z/∂x = " << sparse_grads[x.getIndex()] << std::endl; 232 | std::cout << "∂z/∂y = " << sparse_grads[y.getIndex()] << std::endl; 233 | } 234 | 235 | // Performance benchmark 236 | { 237 | printf("\n# Performance Benchmark with Eigen::Cholesky\n"); 238 | call_and_time_all_llt<1,16>(); 239 | } 240 | 241 | // Second derivatives 242 | { 243 | printf("\n# Second derivatives\n"); 244 | Tape< double > tape_1; 245 | Tape< Var > tape_2; 246 | Var> x(&tape_2, tape_2.push_scalar(), {&tape_1, tape_1.push_scalar(), 0.5}); 247 | Var> y(&tape_2, tape_2.push_scalar(), {&tape_1, tape_1.push_scalar(), 4.2}); 248 | auto z = x * y + sin(x) + pow(x, y) + log(y) + exp(x) + cos(y); 249 | auto dzd = z.grad(); 250 | printf("z = %g\n", z.getValue().getValue()); 251 | Var dzdx = dzd[x.getIndex()]; 252 | printf("∂z/∂x = %g\n", dzdx.getValue()); 253 | Var dzdy = dzd[y.getIndex()]; 254 | printf("∂z/∂y = %g\n", dzdy.getValue()); 255 | auto d2zdxd = dzdx.grad(); 256 | printf("∂²z/∂x² = %g\n", d2zdxd[x.getValue().getIndex()]); 257 | printf("∂²z/∂x∂y = %g\n", d2zdxd[y.getValue().getIndex()]); 258 | auto d2zdyd = dzdy.grad(); 259 | printf("∂²z/∂y∂x = %g\n", d2zdyd[x.getValue().getIndex()]); 260 | printf("∂²z/∂y² = %g\n", d2zdyd[y.getValue().getIndex()]); 261 | } 262 | 263 | // Second derivatives with Eigen 264 | { 265 | printf("\n# Second derivatives with Eigen\n"); 266 | Tape< double > tape_1; 267 | Tape< Var > tape_2; 268 | const int N = 2; 269 | Eigen::Matrix>, N, 1> x; 270 | for (int i = 0; i < x.rows(); ++i) 271 | { 272 | x(i) = Var>(&tape_2, tape_2.push_scalar(), {&tape_1, tape_1.push_scalar(), i+1}); 273 | } 274 | Eigen::Matrix>, N, N> A; 275 | for (int i = 0; i < A.rows(); ++i) 276 | { 277 | for (int j = 0; j < A.cols(); ++j) 278 | { 279 | A(i,j) = x(i) + x(j); 280 | } 281 | A(i,i) = A(i,i)+A(i,i); 282 | } 283 | Eigen::Matrix>, N, 1> b = A.llt().solve(x); 284 | Var> y = b.array().square().sum(); 285 | auto dyd = y.grad(); 286 | for (int i = 0; i < N; ++i) 287 | { 288 | printf("∂y/∂x(%d) = %g\n", i, dyd[x(i).getIndex()].getValue()); 289 | } 290 | for (int i = 0; i < N; ++i) 291 | { 292 | auto d2ydxi = dyd[x(i).getIndex()].grad(); 293 | for (int j = 0; j < N; ++j) 294 | { 295 | printf("∂²y/∂x(%d)∂x(%d) = %g\n", i, j, d2ydxi[x(j).getValue().getIndex()]); 296 | } 297 | } 298 | } 299 | 300 | { 301 | const int M = 3; 302 | const int N = 2; 303 | std::cout<<"# 2D mass-spring system with "<(); 305 | } 306 | { 307 | std::cout<(); 309 | } 310 | 311 | return 0; 312 | } 313 | 314 | -------------------------------------------------------------------------------- /tinyremo.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2024 Alec Jacobson 2 | #pragma once 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | // Adapted from the rust implementation at https://rufflewind.com/2016-12-30/reverse-mode-automatic-differentiation 12 | namespace tinyremo 13 | { 14 | template using index_map = std::unordered_map; 15 | 16 | template 17 | struct Node 18 | { 19 | std::array weights; 20 | std::array deps; 21 | }; 22 | 23 | template 24 | class Tape 25 | { 26 | public: 27 | size_t push_scalar() 28 | { 29 | size_t index = nodes.size(); 30 | nodes.push_back({{Scalar(0),Scalar(0)}, {index, index}}); 31 | return index; 32 | } 33 | 34 | size_t push_unary(size_t dep0, Scalar weight0) 35 | { 36 | size_t index = nodes.size(); 37 | nodes.push_back({{weight0, Scalar(0)}, {dep0, index}}); 38 | return index; 39 | } 40 | 41 | size_t push_binary(size_t dep0, Scalar weight0, size_t dep1, Scalar weight1) 42 | { 43 | size_t index = nodes.size(); 44 | nodes.push_back({{weight0, weight1}, {dep0, dep1}}); 45 | return index; 46 | } 47 | 48 | size_t size() const { return nodes.size(); } 49 | 50 | const Node& operator[](size_t index) const { return nodes[index]; } 51 | 52 | private: 53 | std::vector> nodes; 54 | }; 55 | 56 | template 57 | class Var { 58 | public: 59 | // `Eigen::Matrix,…>` will call `Var(0)` so we need a 60 | // construct that takes a literal. 61 | // 62 | // Since won't know the tape, so we can never follow grads through these, 63 | // but we claim this is ok because they are always constants. 64 | // 65 | // Eigen also calls math operators on constants: 66 | // `Var(0)/Var(1)` kind of thing. So we need to support 67 | // those below (some are asserted to be unsupported currently). 68 | Var(Scalar value = Scalar(0)): tape_ptr(nullptr), index(0), value(value) { } 69 | 70 | // Constructor for scalar types (e.g., int, double), but not for Var types. 71 | // SFINAE is used to enable this constructor only for scalar types. 72 | template::value>::type> 73 | constexpr explicit Var(T v) : tape_ptr(nullptr), index(0), value(v) { } 74 | 75 | // Construct for a tracked variable 76 | Var(Tape* tape_ptr, size_t index, const Scalar & value) : tape_ptr(tape_ptr), index(index), value(value) {} 77 | 78 | Var operator+(const Var& other) const 79 | { 80 | assert(!tape_ptr || !other.tape_ptr || tape_ptr == other.tape_ptr); 81 | if(tape_ptr && other.tape_ptr) { return Var(tape_ptr, tape_ptr->push_binary(index, Scalar(1), other.index, Scalar(1)), value + other.value); } 82 | if(tape_ptr) { return Var(tape_ptr, tape_ptr->push_unary(index, Scalar(1)), value + other.value); } 83 | if(other.tape_ptr) { return Var(other.tape_ptr, other.tape_ptr->push_unary(other.index, Scalar(1)), value + other.value); } 84 | return Var(value + other.value); 85 | } 86 | 87 | Var operator-(const Var& other) const 88 | { 89 | return *this + (-other); 90 | } 91 | 92 | Var operator*(const Var& other) const 93 | { 94 | assert(!tape_ptr || !other.tape_ptr || tape_ptr == other.tape_ptr); 95 | if(tape_ptr && other.tape_ptr) { return Var(tape_ptr, tape_ptr->push_binary(index, other.value, other.index, value), value * other.value); } 96 | if(tape_ptr) { return Var(tape_ptr, tape_ptr->push_unary(index, other.value), value * other.value); } 97 | if(other.tape_ptr) { return Var(other.tape_ptr, other.tape_ptr->push_unary(other.index, value), value * other.value); } 98 | return Var(value * other.value); 99 | } 100 | 101 | Var operator/(const Var& other) const 102 | { 103 | assert(other.value != Scalar(0)); 104 | return *this * other.invert(); 105 | } 106 | 107 | Var invert() const 108 | { 109 | if(tape_ptr) 110 | { 111 | return Var(tape_ptr, tape_ptr->push_unary(index, Scalar(-1) / (value * value)), Scalar(1) / value); 112 | }else 113 | { 114 | return Var(Scalar(1) / value); 115 | } 116 | } 117 | 118 | // Unary negation 119 | Var operator-() const 120 | { 121 | if(tape_ptr) 122 | { 123 | return Var(tape_ptr, tape_ptr->push_unary(index, Scalar(-1)), -value); 124 | }else 125 | { 126 | return Var(-value); 127 | } 128 | } 129 | 130 | // -= operator 131 | Var& operator-=(const Var& other) { *this = *this - other; return *this; } 132 | Var& operator+=(const Var& other) { *this = *this + other; return *this; } 133 | Var& operator*=(const Var& other) { *this = *this * other; return *this; } 134 | Var& operator/=(const Var& other) { *this = *this / other; return *this; } 135 | 136 | // > operator 137 | bool operator>(const Var& other) const { return value > other.value; } 138 | bool operator>=(const Var& other) const { return value >= other.value; } 139 | bool operator<(const Var& other) const { return value < other.value; } 140 | bool operator<=(const Var& other) const { return value <= other.value; } 141 | bool operator==(const Var& other) const { return value == other.value; } 142 | 143 | friend Var sqrt(const Var& v) 144 | { 145 | using std::sqrt; 146 | if(v.tape_ptr) 147 | { 148 | return Var(v.tape_ptr, v.tape_ptr->push_unary(v.index, Scalar(1) / (Scalar(2)*sqrt(v.value))), sqrt(v.value)); 149 | }else 150 | { 151 | return Var(sqrt(v.value)); 152 | } 153 | } 154 | friend Var sin(const Var& v) 155 | { 156 | using std::cos; 157 | using std::sin; 158 | if(v.tape_ptr) 159 | { 160 | return Var(v.tape_ptr, v.tape_ptr->push_unary(v.index, cos(v.value)), sin(v.value)); 161 | }else 162 | { 163 | return Var(sin(v.value)); 164 | } 165 | } 166 | friend Var cos(const Var& v) 167 | { 168 | using std::cos; 169 | using std::sin; 170 | if(v.tape_ptr) 171 | { 172 | return Var(v.tape_ptr, v.tape_ptr->push_unary(v.index, -sin(v.value)), cos(v.value)); 173 | }else 174 | { 175 | return Var(cos(v.value)); 176 | } 177 | } 178 | friend Var exp(const Var& v){ 179 | using std::exp; 180 | if(v.tape_ptr) 181 | { 182 | return Var(v.tape_ptr, v.tape_ptr->push_unary(v.index, exp(v.value)), exp(v.value)); 183 | }else 184 | { 185 | return Var(exp(v.value)); 186 | } 187 | } 188 | friend Var log(const Var& v) 189 | { 190 | using std::log; 191 | if(v.tape_ptr) 192 | { 193 | return Var(v.tape_ptr, v.tape_ptr->push_unary(v.index, Scalar(1) / v.value), log(v.value)); 194 | }else 195 | { 196 | return Var(log(v.value)); 197 | } 198 | } 199 | friend Var pow(const Var& x, const Var& p) 200 | { 201 | using std::pow; 202 | // Mirror operator+ above 203 | if(x.tape_ptr) 204 | { 205 | if(p.tape_ptr) 206 | { 207 | assert(x.tape_ptr == p.tape_ptr); 208 | return Var(x.tape_ptr, x.tape_ptr->push_binary(x.index, p.value * pow(x.value, p.value - Scalar(1)), p.index, pow(x.value, p.value) * log(x.value)), pow(x.value, p.value)); 209 | }else 210 | { 211 | return Var(x.tape_ptr, x.tape_ptr->push_unary(x.index, p.value * pow(x.value, p.value - Scalar(1))), pow(x.value, p.value)); 212 | } 213 | }else 214 | { 215 | if(p.tape_ptr) 216 | { 217 | return Var(p.tape_ptr, p.tape_ptr->push_unary(p.index, pow(x.value, p.value) * log(x.value)), pow(x.value, p.value)); 218 | }else 219 | { 220 | return Var(pow(x.value, p.value)); 221 | } 222 | } 223 | } 224 | friend Var abs(const Var& v) 225 | { 226 | using std::abs; 227 | if(v.tape_ptr) 228 | { 229 | return Var(v.tape_ptr, v.tape_ptr->push_unary(v.index, v.value < Scalar(0) ? Scalar(-1) : Scalar(1)), abs(v.value)); 230 | }else 231 | { 232 | return Var(abs(v.value)); 233 | } 234 | } 235 | 236 | std::vector grad() const 237 | { 238 | std::vector derivs(tape_ptr->size(), Scalar(0)); 239 | derivs[index] = Scalar(1); 240 | 241 | for (int i = tape_ptr->size() - 1; i >= 0; --i) { 242 | const auto& node = (*tape_ptr)[i]; 243 | for (int j = 0; j < 2; ++j) { 244 | derivs[node.deps[j]] += node.weights[j] * derivs[i]; 245 | } 246 | } 247 | return derivs; 248 | } 249 | 250 | index_map sparse_grad() const 251 | { 252 | index_map derivs; 253 | compute_sparse_grad(derivs, Scalar(1), index); 254 | return derivs; 255 | } 256 | 257 | Scalar getValue() const { return value; } 258 | size_t getIndex() const { return index; } 259 | 260 | private: 261 | void compute_sparse_grad(index_map& derivs, Scalar grad_value, size_t idx) const 262 | { 263 | assert(idx < tape_ptr->size() && idx >= 0); 264 | if (derivs.find(idx) != derivs.end()) { 265 | derivs[idx] += grad_value; 266 | } else { 267 | derivs[idx] = grad_value; 268 | } 269 | const Node& node = (*tape_ptr)[idx]; 270 | for (int i = 0; i < 2; ++i) { 271 | size_t dep_idx = node.deps[i]; 272 | if (dep_idx < idx) { 273 | Scalar weight = node.weights[i]; 274 | compute_sparse_grad(derivs, grad_value * weight, dep_idx); 275 | } 276 | } 277 | } 278 | 279 | Tape * tape_ptr; 280 | size_t index; 281 | Scalar value; 282 | }; 283 | 284 | // ``` 285 | // Tape tape_1; 286 | // Tape> tape_2; 287 | // Var> x = record_scalar(0.5,tape_1,tape_2); 288 | // ``` 289 | template 290 | auto record_scalar(Scalar value, FirstTape& first_tape, RestTapes&... rest_tapes) 291 | { 292 | Var x(&first_tape, first_tape.push_scalar(), value); 293 | if constexpr (sizeof...(RestTapes) == 0) { return x; } 294 | else { return record_scalar(x, rest_tapes...); } 295 | } 296 | } 297 | -------------------------------------------------------------------------------- /tinyremo_eigen.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2024 Alec Jacobson 2 | #pragma once 3 | #include "tinyremo.h" 4 | #include 5 | #include 6 | #include 7 | 8 | namespace Eigen 9 | { 10 | template 11 | struct NumTraits> : NumTraits 12 | { 13 | typedef tinyremo::Var Real; 14 | typedef tinyremo::Var Literal; 15 | enum { 16 | IsComplex = NumTraits::IsComplex, 17 | IsInteger = 0, 18 | IsSigned = 1, 19 | RequireInitialization = 0, 20 | // Not sure if this should be an additive or multiplicative constant 21 | ReadCost = NumTraits::ReadCost + 2, 22 | AddCost = NumTraits::AddCost + 2, 23 | MulCost = NumTraits::MulCost + 2 24 | }; 25 | }; 26 | } 27 | 28 | namespace tinyremo 29 | { 30 | // ``` 31 | // Eigen::Matrix X; 32 | // X << 0, 2, 33 | // 1, 4, 34 | // 9, 8; 35 | // Tape tape_1; 36 | // Tape> tape_2; 37 | // Eigen::Matrix>, 3,2> Y = record_matrix(X, tape_1, tape_2); 38 | // ``` 39 | template< typename Derived, typename... Tapes> 40 | auto record_matrix(const Eigen::MatrixBase& X, Tapes&... tapes) 41 | { 42 | return X.unaryExpr([& ](auto&& x) { return record_scalar(x, tapes...); }).matrix().eval(); 43 | } 44 | 45 | template 46 | inline void iterateMatrix(const Matrix& X, const Func & func) 47 | { 48 | if constexpr (Matrix::IsRowMajor) { 49 | for (Eigen::Index i = 0; i < X.rows(); ++i) 50 | for (Eigen::Index j = 0; j < X.cols(); ++j) 51 | func(i, j); 52 | } else { 53 | for (Eigen::Index j = 0; j < X.cols(); ++j) 54 | for (Eigen::Index i = 0; i < X.rows(); ++i) 55 | func(i, j); 56 | } 57 | } 58 | 59 | // ``` 60 | // auto [dfdX,dfdY] = gradient(z,X,Y); 61 | // ``` 62 | template 63 | auto gradient( 64 | const Var & f, 65 | Matrices &... matrices) 66 | { 67 | auto grads = f.grad(); 68 | auto compute_grad_matrix = [&grads](auto& matrix) 69 | { 70 | using MatrixType = std::decay_t; 71 | // Rebuild MatrixType with Var replaced by Scalar 72 | using GradMatrixType = Eigen::Matrix; 73 | GradMatrixType grad_matrix(matrix.rows(), matrix.cols()); 74 | iterateMatrix(matrix, [&](Eigen::Index i, Eigen::Index j) { grad_matrix(i, j) = grads[matrix(i, j).getIndex()]; }); 75 | return grad_matrix; 76 | }; 77 | // Helper lambda to apply compute_grad_matrix to each matrix and return a 78 | // tuple of results 79 | auto tuple_of_gradients = [&](auto&... mats) { 80 | return std::make_tuple(compute_grad_matrix(mats)...); 81 | }; 82 | return std::apply(tuple_of_gradients, std::tie(matrices...)); 83 | } 84 | 85 | template < typename Matrix, typename Container> 86 | size_t collect_indices( const Matrix & X, Container & row, size_t index = 0) 87 | { 88 | // Depending on the containers this is non-trivial to parallelize 89 | iterateMatrix(X, [&](Eigen::Index i, Eigen::Index j) 90 | { 91 | auto k = X(i,j).getIndex(); 92 | row[k] = index; 93 | ++index; 94 | }); 95 | return index; 96 | } 97 | 98 | template < typename Matrix, typename OuterContainer, typename InnerContainer> 99 | size_t collect_outer_and_inner_indices( 100 | const Matrix & X, 101 | OuterContainer & outer_row, 102 | InnerContainer & inner_col, 103 | size_t index = 0) 104 | { 105 | // Depending on the containers this is non-trivial to parallelize 106 | iterateMatrix(X, [&](Eigen::Index i, Eigen::Index j) 107 | { 108 | auto k = X(i,j).getIndex(); 109 | //assert(k < outer_row.size()); 110 | if constexpr (std::is_same_v>) { assert(k < outer_row.size()); } 111 | 112 | outer_row[k] = index; 113 | const auto & Xij = X(i,j); 114 | const auto & Xij_value = Xij.getValue(); 115 | const auto & Xij_value_index = Xij_value.getIndex(); 116 | //inner_col[X(i,j).getValue().getIndex()] = index; 117 | if constexpr (std::is_same_v>) { assert(inner_col.find(Xij_value_index) == inner_col.end()); } 118 | inner_col[Xij_value_index] = index; 119 | ++index; 120 | }); 121 | return index; 122 | } 123 | 124 | template 125 | size_t max_index( Matrices &... matrices) 126 | { 127 | size_t max_so_far = 0; 128 | auto max_index_helper = [&max_so_far](auto& X) 129 | { 130 | iterateMatrix(X, [&](Eigen::Index i, Eigen::Index j) 131 | { 132 | max_so_far = std::max(max_so_far, X(i,j).getIndex()); 133 | }); 134 | }; 135 | (max_index_helper(matrices), ...); 136 | return max_so_far; 137 | } 138 | 139 | template 140 | size_t max_value_index( Matrices &... matrices) 141 | { 142 | size_t max_so_far = 0; 143 | auto max_value_index_helper = [&max_so_far](auto& X) 144 | { 145 | iterateMatrix(X, [&](Eigen::Index i, Eigen::Index j) 146 | { 147 | max_so_far = std::max(max_so_far, X(i,j).getValue().getIndex()); 148 | }); 149 | }; 150 | (max_value_index_helper(matrices), ...); 151 | return max_so_far; 152 | } 153 | 154 | template 155 | Eigen::SparseMatrix sparse_jacobian_from_indices( 156 | const FType & F, 157 | const InnerContainer & inner_col, 158 | const size_t max_index) 159 | { 160 | static_assert(std::is_same_v>); 161 | std::vector> triplets; 162 | for(size_t i = 0; i < F.size(); ++i) 163 | { 164 | auto dFi_d = F[i].sparse_grad(); 165 | for(auto& [index, value] : dFi_d) 166 | { 167 | //if(inner_col.find(index) != inner_col.end()) 168 | //{ 169 | // size_t j = inner_col[index]; 170 | // inner_col may be const 171 | auto it = inner_col.find(index); 172 | if(it != inner_col.end()) 173 | { 174 | size_t j = it->second; 175 | triplets.emplace_back(i, j, value); 176 | } 177 | } 178 | }; 179 | Eigen::SparseMatrix J(F.size(), max_index); 180 | J.setFromTriplets(triplets.begin(),triplets.end()); 181 | return J; 182 | } 183 | 184 | template 185 | Eigen::SparseMatrix sparse_jacobian_generic( 186 | const FType & F, 187 | Matrices &... matrices) 188 | { 189 | index_map col; 190 | int index = 0; 191 | auto collect_all_indices = [&index,&col](auto& X) 192 | { 193 | index = collect_indices(X, col , index); 194 | }; 195 | (collect_all_indices(matrices), ...); 196 | return sparse_jacobian_from_indices< Scalar>(F, col, index); 197 | } 198 | 199 | template 200 | Eigen::SparseMatrix sparse_jacobian( 201 | const std::vector> & F, 202 | Matrices &... matrices) 203 | { 204 | return sparse_jacobian_generic(F, matrices...); 205 | } 206 | 207 | template < 208 | typename Scalar, 209 | int Rows, 210 | int RowsAtCompileTime, 211 | typename... Matrices> 212 | Eigen::SparseMatrix sparse_jacobian( 213 | const Eigen::Matrix, Rows, 1, Eigen::ColMajor, RowsAtCompileTime, 1 > & F, 214 | Matrices &... matrices) 215 | { 216 | return sparse_jacobian_generic(F, matrices...); 217 | } 218 | 219 | template 220 | constexpr int any_dynamic() { 221 | return ((Matrices::ColsAtCompileTime == Eigen::Dynamic || Matrices::RowsAtCompileTime == Eigen::Dynamic) || ...); 222 | } 223 | 224 | // retrun Eigen::RowMajor if all Matrices are RowMajor, otherwise Eigen::ColMajor 225 | template 226 | constexpr int row_major_if_all() { 227 | return ((Matrices::IsRowMajor) && ...) ? Eigen::RowMajor : Eigen::ColMajor; 228 | } 229 | 230 | // Assuming non are dynamic 231 | template 232 | constexpr int total_compile_time_size() { 233 | // Better to just assume otherwise logic get's annoying later 234 | //static_assert(!any_dynamic()); 235 | return (0 + ... + (Matrices::ColsAtCompileTime * Matrices::RowsAtCompileTime)); 236 | } 237 | 238 | template 239 | auto hessian(const Var> & f, Matrices &... matrices) 240 | { 241 | constexpr bool has_dynamic = any_dynamic(); 242 | constexpr int compile_time_size = total_compile_time_size(); 243 | constexpr int N = has_dynamic ? Eigen::Dynamic : compile_time_size; 244 | constexpr int order = row_major_if_all(); 245 | int N_run = N; 246 | if constexpr (N == Eigen::Dynamic) 247 | { 248 | N_run = (0 + ... + (matrices.cols() + matrices.rows())); 249 | } 250 | Eigen::Matrix H(N_run, N_run); 251 | 252 | // Dense gradient 253 | auto df_d_raw = f.grad(); 254 | int outer_i = 0; 255 | auto outer_loop = [&](auto & X) 256 | { 257 | iterateMatrix(X, [&](Eigen::Index i, Eigen::Index j) 258 | { 259 | auto df_dij = df_d_raw[X(i,j).getIndex()]; 260 | auto d2f_dij_d_raw = df_dij.grad(); 261 | 262 | int inner_j = 0; 263 | auto inner_loop = [&](auto & Y) 264 | { 265 | iterateMatrix(Y, [&](Eigen::Index k, Eigen::Index l) 266 | { 267 | auto d2f_dij_dkl = d2f_dij_d_raw[Y(k,l).getValue().getIndex()]; 268 | H(outer_i, inner_j) = d2f_dij_dkl; 269 | inner_j++; 270 | }); 271 | }; 272 | (inner_loop(matrices), ...); 273 | 274 | outer_i++; 275 | }); 276 | }; 277 | (outer_loop(matrices), ...); 278 | 279 | return H; 280 | } 281 | 282 | // Assume that the Hessian is sparse but every row or column has at least one 283 | // entry. 284 | template 285 | Eigen::SparseMatrix sparse_hessian( 286 | const Var> & f, 287 | Matrices &... matrices) 288 | { 289 | // Dense gradient 290 | auto df_d_raw = f.grad(); 291 | std::vector outer_row(max_index(matrices...)+1); 292 | index_map inner_col; 293 | int index = 0; 294 | auto collect_indices = [&inner_col,&outer_row,&index](auto& X) 295 | { 296 | index = collect_outer_and_inner_indices(X, outer_row, inner_col, index); 297 | }; 298 | (collect_indices(matrices), ...); 299 | 300 | // Filter df_d to only include the indices that are in the outer_row 301 | std::vector> df_d(index); 302 | auto collect_df_d_entries = [&](auto&X) 303 | { 304 | // This implements 305 | // for each i; A[J[i]] = B[i] 306 | // Would it be better to have 307 | // for each j: A[j] = B[I[j]] 308 | iterateMatrix(X, [&](Eigen::Index i, Eigen::Index j) 309 | { 310 | auto df_dij = df_d_raw[X(i,j).getIndex()]; 311 | size_t outer_row_ij = outer_row[X(i,j).getIndex()]; 312 | df_d[outer_row_ij] = df_dij; 313 | }); 314 | }; 315 | (collect_df_d_entries(matrices), ...); 316 | return sparse_jacobian_from_indices(df_d, inner_col, index); 317 | } 318 | } 319 | --------------------------------------------------------------------------------