├── .github
    └── workflows
    │   └── cmake-multi-platform.yml
├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── examples
    ├── CMakeLists.txt
    ├── binary-classifier.cpp
    ├── c-plus-equals-cycle.dot
    ├── c-plus-equals-cycle.svg
    ├── c-plus-equals-rewrite.svg
    ├── example-usage-cycle.cpp
    ├── example-usage.cpp
    ├── graph.cpp
    ├── graph.svg
    ├── loss.svg
    ├── mlp1.cpp
    ├── mlp1.svg
    ├── neuron.cpp
    ├── neuron.svg
    └── regression0.cpp
├── include
    ├── array.h
    ├── backprop.h
    ├── graph.h
    ├── loss.h
    ├── mac.h
    ├── nn.h
    ├── randomdata.h
    ├── tuple.h
    └── value.h
└── tests
    ├── CMakeLists.txt
    ├── mac-test.cpp
    ├── multivariate-test.cpp
    └── value-test.cpp


/.github/workflows/cmake-multi-platform.yml:
--------------------------------------------------------------------------------
 1 | # This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform.
 2 | # See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml
 3 | name: CMake on multiple platforms
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ "main" ]
 8 |   pull_request:
 9 |     branches: [ "main" ]
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ${{ matrix.os }}
14 | 
15 |     strategy:
16 |       # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable.
17 |       fail-fast: false
18 | 
19 |       # Set up a matrix to run the following 3 configurations:
20 |       # 1. <Windows, Release, latest MSVC compiler toolchain on the default runner image, default generator>
21 |       # 2. <Linux, Release, latest GCC compiler toolchain on the default runner image, default generator>
22 |       # 3. <Linux, Release, latest Clang compiler toolchain on the default runner image, default generator>
23 |       #
24 |       # To add more build types (Release, Debug, RelWithDebInfo, etc.) customize the build_type list.
25 |       matrix:
26 |         os: [ubuntu-latest, windows-latest]
27 |         build_type: [Release]
28 |         c_compiler: [gcc, cl]
29 |         include:
30 |           - os: windows-latest
31 |             c_compiler: cl
32 |             cpp_compiler: cl
33 |           - os: ubuntu-latest
34 |             c_compiler: gcc
35 |             cpp_compiler: g++
36 |         exclude:
37 |           - os: windows-latest
38 |             c_compiler: gcc
39 |           - os: ubuntu-latest
40 |             c_compiler: cl
41 | 
42 |     steps:
43 |     - uses: actions/checkout@v3
44 | 
45 |     - name: Set reusable strings
46 |       # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
47 |       id: strings
48 |       shell: bash
49 |       run: |
50 |         echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
51 | 
52 |     - name: Configure CMake
53 |       # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
54 |       # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
55 |       run: >
56 |         cmake -B ${{ steps.strings.outputs.build-output-dir }}
57 |         -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
58 |         -DCMAKE_C_COMPILER=${{ matrix.c_compiler }}
59 |         -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
60 |         -S ${{ github.workspace }}
61 | 
62 |     - name: Build
63 |       # Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
64 |       run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}
65 | 
66 |     - name: Test
67 |       working-directory: ${{ steps.strings.outputs.build-output-dir }}
68 |       # Execute tests defined by the CMake configuration. Note that --build-config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
69 |       # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
70 |       run: ctest --build-config ${{ matrix.build_type }}
71 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | *.swp
3 | 
4 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | enable_testing()
 4 | 
 5 | project(ai-play)
 6 | 
 7 | set(CMAKE_CXX_STANDARD 23)
 8 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 9 | set(CMAKE_CXX_EXTENSIONS OFF)
10 | 
11 | # Global compiler flags
12 | if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
13 |     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -Wpedantic -g")
14 |     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wall -Wextra -O3 -march=native -mtune=native -mavx2 -ffast-math")
15 | endif()
16 | 
17 | if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
18 |     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -Wpedantic -g")
19 |     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wall -Wextra -O3 -march=native -mtune=native -mavx2 -ffast-math")
20 | endif()
21 | 
22 | 
23 | add_subdirectory(tests)
24 | 
25 | add_subdirectory(examples)
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Conrad Parker
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![CMake on multiple platforms](https://github.com/kfish/micrograd-cpp-2023/actions/workflows/cmake-multi-platform.yml/badge.svg)](https://github.com/kfish/micrograd-cpp-2023/actions/workflows/cmake-multi-platform.yml)
  2 | 
  3 | # micrograd-cpp-2023
  4 | 
  5 | A C++ implementation of
  6 | [karpathy/micrograd](https://github.com/karpathy/micrograd).
  7 | Each step of the first episode of *Neural Nets: Zero to Hero*:
  8 | [The spelled-out intro to neural networks and backpropagation: building micrograd](https://youtu.be/VMj-3S1tku0)
  9 | is included.
 10 | 
 11 | ![](https://i.ytimg.com/vi/VMj-3S1tku0/hqdefault.jpg)
 12 | 
 13 | This roughly follows the flow of Karpathy's YouTube tutorial, with details specific to this C++ implementation:
 14 | 
 15 |  * [What is micrograd-cpp and why is it interesting?](#what-is-micrograd-cpp-and-why-is-it-interesting)
 16 |    - [Example usage](#example-usage)
 17 |    - [C++ implementation notes](#c-implementation-notes)
 18 |  * [Building out the Value object](#building-out-the-value-object)
 19 |  * [Visualizing the expression graph](#visualizing-the-expression-graph)
 20 |  * [Backpropagation](#backpropagation)
 21 |  * [Backpropagation through a neuron](#backpropagation-through-a-neuron)
 22 |    - [Activation function](#activation-function)
 23 |    - [Math operations](#math-operations)
 24 |    - [Multiply-Accumulate](#multiply-accumulate)
 25 |    - [randomValue, randomArray](#randomvalue-randomarray)
 26 |  * [Multi-Layer Perceptron](#multi-layer-perceptron)
 27 |    - [Layer](#layer)
 28 |    - [BuildLayers](#buildlayers)
 29 |    - [MLP](#mlp)
 30 |    - [MLP1](#mlp1)
 31 |  * [Loss function](#loss-function)
 32 |    - [MSELoss](#mseloss)
 33 |  * [Gradient descent](#gradient-descent)
 34 |    - [Adjusting parameters](#adjusting-parameters)
 35 |    - [CanBackProp](#canbackprop)
 36 |    - [BackProp](#backprop)
 37 |    - [Binary Classifier](#binary-classifier)
 38 | 
 39 | with [References](#references) at the end for further reading about automatic differentiation and C++ implementations.
 40 | 
 41 | See [kfish/makemore-cpp-2023](https://github.com/kfish/makemore-cpp-2023) for a continuation to other
 42 | videos in the series, expanding the codebase to handle automatic differentiation of vectors and matrices.
 43 | 
 44 | ## What is micrograd-cpp and why is it interesting?
 45 | 
 46 | micrograd-cpp introduces some nuances of backpropagation (reverse-mode autodiff) and its
 47 | implementation. At its core is an expression graph which can be evaluated forwards, where
 48 | an expression like `a+b` is implemented using `operator+`, and differentiated in reverse
 49 | using a `std::function` attached to each graph node to calculate its gradient.
 50 | 
 51 | This implementation allows computations using `Value` objects to be written as
 52 | normal-looking C++ code. It also includes generic classes for evaluation and learning for anything that can produce `Value<T>`.
 53 | 
 54 | Like micrograd, the point is still to be educational, with a focus on some implementation details
 55 | and flexibility for exploring learning algorithms.
 56 | 
 57 | ### Building
 58 | 
 59 | For simplicity, there are no code dependencies. Data is manipulated using `std::array<>` and graphing and plotting is done
 60 | with external tools like `dot` and `gnuplot`.
 61 | 
 62 | Build with CMake, eg
 63 | 
 64 | ```bash
 65 | $ mkdir build
 66 | $ cd build
 67 | $ cmake ..
 68 | $ make
 69 | ```
 70 | 
 71 | ### Example usage
 72 | 
 73 | This is [examples/example-usage.cpp](examples/example-usage.cpp):
 74 | 
 75 | ```c++
 76 | #include <iostream>
 77 | 
 78 | #include "value.h"
 79 | 
 80 | using namespace ai;
 81 | 
 82 | int main(int argc, char *argv[])
 83 | {
 84 |     auto a = make_value(-4.0);
 85 |     auto b = make_value(2.0);
 86 | 
 87 |     auto c = a + b;
 88 |     auto d = a * b + pow(b, 3);
 89 | 
 90 |     c += c + 1;
 91 |     c += 1 + c + (-a);
 92 |     d += d * 2 + relu(b + a);
 93 |     d += 3 * d + relu(b - a);
 94 |     auto e = c - d;
 95 |     auto f = pow(e, 2);
 96 |     auto g = f / 2.0;
 97 |     g += 10.0 / f;
 98 |     printf("%.4f\n", g->data()); // prints 24.7041, the outcome of this forward pass
 99 |     backward(g);
100 |     printf("%.4f\n", a->grad()); // prints 138.8338, i.e. the numerical value of dg/da
101 |     printf("%.4f\n", b->grad()); // prints 645.5773, i.e. the numerical value of dg/db
102 | }
103 | ```
104 | 
105 | ### C++ implementation notes
106 | 
107 | 1. Data type
108 | 
109 | Neural nets generally don't require many bits of precision on individual node values,
110 | so let's not limit ourselves to `float` or `double`
111 | (or [FP8](https://github.com/opencomputeproject/FP8/blob/main/ofp8_references.pdf)).
112 | We template using `Value<T>`.
113 | 
114 | 2. Sharing
115 | 
116 | Nodes may appear as inputs to multiple other nodes in the expression graph,
117 | especially for neural networks, so we use a `shared_ptr`:
118 | 
119 | ```c++
120 | using Value<T> = std::shared_ptr<RawValue<T>>;
121 | ```
122 | 
123 | 3. Removal of cycles
124 | 
125 | The expression `c += c + 1` refers to itself, so it contains a cycle. This cycle needs to be
126 | removed in order to implement backpropagation.
127 | 
128 | ![c += c + 1](examples/c-plus-equals-cycle.svg)
129 | 
130 | In Python, `x += y` usually translates to `x.__iadd__(y)` which modifies `x` in-place.
131 | However, the `Value` objects in `micrograd` don't implement `__iadd__`, so Python falls back to using `__add__`
132 | followed by assignment. That means `a += b` is roughly equivalent to `a = a + b`. Each time the + operator
133 | is invoked, a new Value object is created and the graph gets extended, so it is not modifying the existing
134 | objects in-place.
135 | 
136 | In C++, `operator+=` requires an explicit implementation which modifies its value in-place.
137 | We create a copy of the old value and re-write all earlier references in the expression graph
138 | to point to the copy.
139 | 
140 | ![c += c + 1](examples/c-plus-equals-rewrite.svg)
141 | 
142 | Note that this aspect of the implementation is peculiar to the operational semantics of C++
143 | and in-place assignment operators. It is straightforward to implement a neural network
144 | without calling these operators, so the overhead of node copying and graph rewriting could
145 | easily be removed. We include it here only for the translation of micrograd to C++.
146 | 
147 | ## Building out the Value object
148 | 
149 | > Neural nets are some pretty scary expressions. We need some data structures to maintain 
150 | > these expressions.
151 | 
152 | In order to handle basic expressions like:
153 | 
154 | ```c++
155 |     auto a = make_value(2.0, "a");
156 |     auto b = make_value(-3.0, "b");
157 |     auto c = make_value(10.0, "c");
158 | 
159 |     auto d = (a*b) + c;
160 |     std::cout << d << std::endl;
161 | ```
162 | 
163 | we start sketching out the underlying `RawValue` class, implementing operators for `+`
164 | and `*`, and storing the inputs (children) of each for the evaluation graph.
165 | 
166 | ```c++
167 | template <typename T>
168 | class RawValue {
169 |     public:
170 |         using ptr = std::shared_ptr<RawValue<T>>;
171 | 
172 |     private:
173 |         RawValue(const T& data, const std::string& label="")
174 |             : data_(data), label_(label)
175 |         {}
176 | 
177 |         RawValue(const T& data, std::set<ptr>& children, const std::string& op="")
178 |             : data_(data), prev_(children), op_(op)
179 |         {}
180 | 
181 |     public:
182 |         template <typename... Args>
183 |         static ptr make(Args&&... args) {
184 |             return ptr(new RawValue<T>(std::forward<Args>(args)...));
185 |         }
186 | 
187 |         friend ptr operator+(const ptr& a, const ptr& b) {
188 |             std::set<ptr> children = {a, b};
189 |             return make(a->data() + b->data(), children, "+");
190 |         }
191 | 
192 |         friend ptr operator*(const ptr& a, const ptr& b) {
193 |             std::set<ptr> children = {a, b};
194 |             return make(a->data() * b->data(), children, "*");
195 |         }
196 | 
197 |     private:
198 |         T data_;
199 |         std::set<ptr> prev_{};
200 |         std::string op_{""};
201 | };
202 | 
203 | template <typename T>
204 | static inline std::ostream& operator<<(std::ostream& os, const RawValue<T>& value) {
205 |     return os << "Value("
206 |         << "data=" << value.data() << ", "
207 |         << "op=" << value.op()
208 |         << ")";
209 | }
210 | ```
211 | 
212 | In code we use `Value<T>`, which is an alias for `shared_ptr<RawValue<T>>`:
213 | 
214 | ```c++
215 | template <typename T>
216 | using Value = typename RawValue<T>::ptr;
217 | 
218 | template <typename T, typename... Args>
219 | static Value<T> make_value(const T& data, Args&&... args) {
220 |     return RawValue<T>::make(data, std::forward<Args>(args)...);
221 | }
222 | 
223 | template <typename T>
224 | static inline std::ostream& operator<<(std::ostream& os, const std::shared_ptr<RawValue<T>>& value) {
225 |     return os << value.get() << "=&" << *value;
226 | }
227 | ```
228 | 
229 | ## Visualizing the expression graph
230 | 
231 | We provide a `Graph` class that can wrap any `Value<T>`. It has a custom `operator<<` that writes in `dot`
232 | language. The implementation is in [include/graph.h](include/graph.h). We also introduce a `label` to the `Value<T>`
233 | object for labelling graph nodes, and an `expr` factory function for creating labelled expressions.
234 | 
235 | We can pipe the output of a program to `dot -Tsvg` to produce an svg image, or to `xdot` to view it interactively:
236 | 
237 | ```bash
238 | $ build/examples/graph | dot -Tsvg -o graph.svg
239 | $ build/examples/graph | xdot -
240 | ```
241 | 
242 | ![Example graph](examples/graph.svg)
243 | 
244 | ## Backpropagation
245 | 
246 | We add a member variable `grad_` that maintains the gradient with respect to the final output.
247 | 
248 | How each operation affects the output is written as a lambda function, `backward_`.
249 | It copies the `Value` `shared_ptr`s of each node's children in order to increment their reference counts.
250 | 
251 | ```c++
252 |         friend ptr operator+(const ptr& a, const ptr& b) {
253 |             auto out = make(a->data() + b->data(), children, "+");
254 | 
255 |             out->backward_ = [=]() {
256 |                 a->grad_ += out->grad_;
257 |                 b->grad_ += out->grad_;
258 |             };
259 | 
260 |             return out;
261 |         }
262 | 
263 |         friend ptr operator*(const ptr& a, const ptr& b) {
264 |             std::set<ptr> children = {a, b};
265 |             auto out = make(a->data() * b->data(), children, "*");
266 | 
267 |             out->backward_ = [=]() {
268 |                 a->grad_ += b->data() * out->grad();
269 |                 b->grad_ += a->data() * out->grad();
270 |             };
271 | 
272 |             return out;
273 |         }
274 | ```
275 | 
276 | We recursively apply the local derivatives using the chain rule backwards through the expression graph:
277 | 
278 | ```c++
279 |         friend void backward(const ptr& node) {
280 |             std::vector<RawValue<T>*> topo;
281 |             std::set<RawValue<T>*> visited;
282 | 
283 |             std::function<void(const ptr&)> build_topo = [&](const ptr& v) {
284 |                 if (!visited.contains(v.get())) {
285 |                     visited.insert(v.get());
286 |                     for (auto && c : v->children()) {
287 |                         build_topo(c);
288 |                     }
289 |                     topo.push_back(v.get());
290 |                 }
291 |             };
292 | 
293 |             build_topo(node);
294 | 
295 |             for (auto & v : topo) {
296 |                 v->grad_ = 0.0;
297 |             }
298 | 
299 |             node->grad_ = 1.0;
300 | 
301 |             for (auto it = topo.rbegin(); it != topo.rend(); ++it) {
302 |                 const RawValue<T>* v = *it;
303 |                 auto f = v->backward_;
304 |                 if (f) f();
305 |             }
306 |         }
307 | ```
308 | 
309 | ## Backpropagation through a neuron
310 | 
311 | We begin the implementation of a neuron, in [include/nn.h](include/nn.h):
312 | 
313 | ```c++
314 | template <typename T, size_t Nin>
315 | class Neuron {
316 |     public:
317 |         Neuron()
318 |             : weights_(randomArray<T, Nin>()), bias_(randomValue<T>())
319 |         {
320 |         }
321 | 
322 |         Value<T> operator()(const std::array<Value<T>, Nin>& x) const {
323 |             Value<T> y = mac(weights_, x, bias_);
324 |             return expr(tanh(y), "n");
325 |         }
326 | 
327 |         ...
328 | };
329 | ```
330 | 
331 | The resulting expression graph for a neuron with four inputs (code in [examples/neuron.cpp](examples/neuron.cpp)):
332 | 
333 | ![Neuron graph](examples/neuron.svg)
334 | 
335 | ### Activation function
336 | 
337 | In general an activation function modifies the output of a neuron, perhaps so that all neurons have similar ranges of output value or to smooth or filter large and negative values.
338 | Whichever activation function we use, we need to implement a `backward_` function.
339 | This implementation includes `relu` (which just replaces any negative values with zero) and `tanh`, which squashes the output into the range ±1.0. `tanh` is used in the video and has an obvious and continuous effect on the gradient:
340 | 
341 | ```c++
342 |         friend ptr tanh(const ptr& a) {
343 |             std::set<ptr> children = {a};
344 |             double x = a->data();
345 |             double e2x = exp(2.0*x);
346 |             double t = (e2x-1)/(e2x+1);
347 |             auto out = make(t, children, "tanh");
348 | 
349 |             out->backward_ = [=]() {
350 |                 a->grad_ += (1.0 - t*t) * out->grad_;
351 |             };
352 | 
353 |             return out;
354 |         }
355 | ```
356 | 
357 | ### Math operations
358 | 
359 | We must implement all required math operations on `Value<T>`, including pow, exp, and division,
360 | so that we can accumulate gradients and run backpropagation.
361 | 
362 | For convenience we also provide operator specializations where one operand is an arithmetic value, so that instead of
363 | writing `a * make_value(7.0)` you can write `a * 7.0` or `7.0 * a`:
364 | 
365 | ```c++
366 |         template<typename N, std::enable_if_t<std::is_arithmetic<N>::value, int> = 0>
367 |         friend ptr operator*(const ptr& a, N n) { return a * make(n); }
368 | 
369 |         template<typename N, std::enable_if_t<std::is_arithmetic<N>::value, int> = 0>
370 |         friend ptr operator*(N n, const ptr& a) { return make(n) * a; }
371 | ```
372 | 
373 | ### Multiply-Accumulate
374 | 
375 | A neuron takes a number of input values, applies a weight to each, and sums the result. We can abstract this out as a common multiply-accumulate function.
376 | It is usual to use a hardware-optimized, eg. GPU, implementation.
377 | In order to use our explicit `Value` object, we provide a generic implementation is in [include/mac.h](include/mac.h).
378 | This uses `std::execution` to allow the compiler to choose an optimized execution method, allowing parallel and vectorized execution:
379 | 
380 | ```c++
381 | template <typename T, std::size_t N>
382 | T mac(const std::array<T, N>& a, const std::array<T, N>& b, T init = T{}) {
383 |     return std::transform_reduce(
384 |         std::execution::par_unseq, // Use parallel and vectorized execution
385 |         a.begin(), a.end(), // Range of first vector
386 |         b.begin(), // Range of second vector
387 |         init, //static_cast<T>(0), // Initial value
388 |         std::plus<>(), // Accumulate
389 |         std::multiplies<>() // Multiply
390 |     );
391 | }
392 | ```
393 | 
394 | ### randomValue, randomArray
395 | 
396 | We provide helper functions to create random values statically, in deterministic order. This helps with reproducibility for debugging.
397 | 
398 | The implementation is in [include/random.h](include/random.h).
399 | 
400 | ```c++
401 | // Static inline function to generate a random T
402 | template <typename T>
403 | static inline Value<T> randomValue() {
404 |     static unsigned int seed = 42;
405 |     static thread_local std::mt19937 gen(seed++);
406 |     std::uniform_real_distribution<T> dist(-1.0, 1.0);
407 |     seed = gen(); // update seed for next time
408 |     return make_value(dist(gen));
409 | }
410 | 
411 | // Static inline function to generate a random std::array<T, N>
412 | template <typename T, size_t N>
413 | static inline std::array<Value<T>, N> randomArray() {
414 |     std::array<Value<T>, N> arr;
415 |     for (auto& element : arr) {
416 |         element = randomValue<T>();
417 |     }
418 |     return arr;
419 | }
420 | ```
421 | 
422 | ## Multi-Layer Perceptron
423 | 
424 | We arrange neurons in a series of layers. Each layer is just an array of neurons.
425 | 
426 | A layer `Layer<T, Nin, Nout>` consists of `Nout` neurons, and is callable:
427 |   * The same input (array of `Nin` values) is passed to each of the neurons
428 |   * Each neuron produces a single output value
429 |   * These output values are collected into an output array of `Nout` values.
430 | 
431 | ### Layer
432 | 
433 | ```c++
434 | template <typename T, size_t Nin, size_t Nout>
435 | class Layer {
436 |     public:
437 | 
438 |         std::array<Value<T>, Nout> operator()(const std::array<Value<T>, Nin>& x) {
439 |             std::array<Value<T>, Nout> output{};
440 |             std::transform(std::execution::par_unseq, neurons_.begin(), neurons_.end(),
441 |                     output.begin(), [&](const auto& n) { return n(x); });
442 |             return output;
443 |         }
444 | 
445 |     private:
446 |         std::array<Neuron<T, Nin>, Nout> neurons_{};
447 | };
448 | ```
449 | 
450 | ### BuildLayers
451 | 
452 | We introduce a helper type that allows us to specify a sequence of layers of different sizes.
453 | 
454 | ```c++
455 | template <typename T, size_t Nin, size_t... Nouts>
456 | struct BuildLayers;
457 | 
458 | template <typename T, size_t Nin, size_t First, size_t... Rest>
459 | struct BuildLayers<T, Nin, First, Rest...> {
460 |     using type = decltype(std::tuple_cat(
461 |         std::tuple<Layer<T, Nin, First>>{},
462 |         typename BuildLayers<T, First, Rest...>::type{}
463 |     ));
464 |     static constexpr size_t nout = BuildLayers<T, First, Rest...>::nout;
465 | };
466 | 
467 | template <typename T, size_t Nin, size_t Last>
468 | struct BuildLayers<T, Nin, Last> {
469 |     using type = std::tuple<Layer<T, Nin, Last>>;
470 |     static constexpr size_t nout = Last;
471 | };
472 | ```
473 | 
474 | We make an alias for the type of such a sequence, like `Layers<3, 4, 4, 1>`:
475 | 
476 | ```c++
477 | template <typename T, size_t Nin, size_t... Nouts>
478 | using Layers = typename BuildLayers<T, Nin, Nouts...>::type;
479 | ```
480 | 
481 | and a helper to extract the final number of outputs, eg. `LayersNout<3, 4, 4, 1>` is 1:
482 | 
483 | ```c++
484 | template <typename T, size_t Nin, size_t... Nouts>
485 | static constexpr size_t LayersNout = BuildLayers<T, Nin, Nouts...>::nout;
486 | ```
487 | 
488 | ### MLP
489 | 
490 | Finaly we use `Layers<>` in a class `MLP<>`, which:
491 |   * Forwards its input to the first layer
492 |   * Passes the output of each layer to the next layer, in turn
493 |   * Returns the output of the last layer
494 | 
495 | ```c++
496 | template <typename T, size_t Nin, size_t... Nouts>
497 | class MLP {
498 | public:
499 |     static constexpr size_t Nout = LayersNout<T, Nin, Nouts...>;
500 | 
501 |     std::array<Value<T>, Nout> operator()(const std::array<Value<T>, Nin>& input) {
502 |         return forward<0, Nin, Nouts...>(input);
503 |     }
504 | 
505 |     std::array<Value<T>, Nout> operator()(const std::array<T, Nin>& input) {
506 |         return this->operator()(value_array(input));
507 |     }
508 | 
509 | private:
510 |     template <size_t I, size_t NinCurr, size_t NoutCurr, size_t... NoutsRest>
511 |     auto forward(const std::array<Value<T>, NinCurr>& input) -> decltype(auto) {
512 |         auto & p = std::get<I>(layers_);
513 |         auto output = std::get<I>(layers_)(input);
514 |         if constexpr (sizeof...(NoutsRest) > 0) {
515 |             return forward<I + 1, NoutCurr, NoutsRest...>(output);
516 |         } else {
517 |             return output;
518 |         }
519 |     }
520 | 
521 | private:
522 |     Layers<T, Nin, Nouts...> layers_;
523 | 
524 | };
525 | ```
526 | 
527 | ### MLP1
528 | 
529 | If we want a single-valued output from our neural network, we create a wrapper class `MLP1<>` that returns only the first element
530 | of the output of the wrapped `MLP<>`:
531 | 
532 | ```c++
533 | template <typename T, size_t Nin, size_t... Nouts>
534 | class MLP1 : public MLP<T, Nin, Nouts...>
535 | {
536 |     public:
537 |         MLP1()
538 |             : MLP<T, Nin, Nouts...>()
539 |         {}
540 | 
541 |         Value<T> operator()(const std::array<Value<T>, Nin>& input) {
542 |             return MLP<T, Nin, Nouts...>::operator()(input)[0];
543 |         }
544 | 
545 |         Value<T> operator()(const std::array<T, Nin>& input) {
546 |             return MLP<T, Nin, Nouts...>::operator()(input)[0];
547 |         }
548 | };
549 | ```
550 | 
551 | With the following code from [examples/mlp1.cpp](examples/mlp1.cpp) we can make a 3 layer neural net with 1 output,
552 | run backpropagation over it and show the resulting expression graph:
553 | 
554 | ```c++
555 |     MLP1<double, 3, 4, 4, 1> n;
556 | 
557 |     std::array<double, 3> input = {{ 2.0, 3.0, -1.0 }};
558 |     auto output = n(input);
559 | 
560 |     backward(output);
561 | 
562 |     std::cout << Graph(output) << std::endl;
563 | ```
564 | 
565 | ![MLP1 graph](examples/mlp1.svg)
566 | 
567 | ## Loss function
568 | 
569 | Now that we can make a neural net, run it forwards to produce a value and backwards to calculate gradients, we can begin adjusting it to learn.
570 | 
571 | We introduce generic evaluation and learning classes for anything that can produce `Value<T>`.
572 | 
573 | ### MSELoss
574 | 
575 | We evaluate a prediction against a known "ground truth". The difference between these is the *Error*, and we
576 | take the square of the error to approximate distance.
577 | We average these out when considering an array of predictions and their ground truths. This is the Mean Squared Error.
578 | 
579 | Implementation in [include/loss.h](include/loss.h).
580 | 
581 | ```c++
582 | template <typename T>
583 | Value<T> mse_loss(const Value<T>& predicted, const Value<T>& ground_truth) {
584 |     static_assert(std::is_arithmetic<T>::value, "Type must be arithmetic");
585 |     return pow(predicted - ground_truth, 2);
586 | }
587 | ```
588 | 
589 | ```c++
590 | template<typename T, size_t N>
591 | Value<T> mse_loss(const std::array<Value<T>, N>& predictions, const std::array<T, N>& ground_truth) {
592 |     Value<T> sum_squared_error = std::inner_product(predictions.begin(), predictions.end(), ground_truth.begin(), make_value<T>(0),
593 |         std::plus<>(),
594 |         [](Value<T> pred, T truth) { return pow(pred - truth, 2); }
595 |     );
596 |     return sum_squared_error / make_value<T>(N);
597 | }
598 | ```
599 | 
600 | We provide a wrapper class to calculate the Mean Squared Error of any `std::function<Value<T>(const Arg&)>`:
601 | 
602 | ```c++
603 | template<typename T, size_t N, typename Arg>
604 | class MSELoss {
605 |     public:
606 |         MSELoss(const std::function<Value<T>(const Arg&)>& func)
607 |             : func_(func)
608 |         {
609 |         }
610 | 
611 |         Value<T> operator()(std::array<Arg, N>& input, const std::array<T, N>& ground_truth, bool verbose=false) {
612 |             if (verbose) std::cerr << "Predictions: ";
613 |             for (size_t i = 0; i < N; ++i) {
614 |                 predictions_[i] = func_(input[i]);
615 |                 if (verbose) std::cerr << predictions_[i]->data() << " ";
616 |             }
617 |             if (verbose) std::cerr << '\n';
618 |             return mse_loss(predictions_, ground_truth);
619 |         }
620 | 
621 |     private:
622 |         const std::function<Value<T>(const Arg&)> func_;
623 |         std::array<Value<T>, N> predictions_;
624 | };
625 | ```
626 | 
627 | ## Gradient descent
628 | 
629 | The gradients calculated by running `backward(loss)` annotate how each parameter contributes to the error loss.
630 | By adjusting each parameter down (against the gradient) we aim to minimize the error.
631 | 
632 | ### Adjusting parameters
633 | 
634 | We introduce an `adjust()` function in `Value<T>` to modify `data_` according to the calculated gradient `grad_`.
635 | This takes a parameter `learning_rate`, usually in the range `[0.0 .. 1.0]` to scale of the adjustment:
636 | 
637 | ```c++
638 |         void adjust(const T& learning_rate) {
639 |             data_ += -learning_rate * grad_;
640 |         }
641 | ```
642 | 
643 | We then provide `adjust()` functions to adjust all the parameters of an neural net: the weights and bias of a Neuron:
644 | 
645 | ```c++
646 | template <typename T, size_t Nin>
647 | class Neuron {
648 |     ...
649 |         const std::array<Value<T>, Nin>& weights() const {
650 |             return weights_;
651 |         }
652 | 
653 |         Value<T> bias() const {
654 |             return bias_;
655 |         }
656 | 
657 |         void adjust_weights(const T& learning_rate) {
658 |             for (const auto& w : weights_) {
659 |                 w->adjust(learning_rate);
660 |             }
661 |         }
662 | 
663 |         void adjust_bias(const T& learning_rate) {
664 |             bias_->adjust(learning_rate);
665 |         }
666 | 
667 |         void adjust(const T& learning_rate) {
668 |             adjust_weights(learning_rate);
669 |             adjust_bias(learning_rate);
670 |         }
671 |     ...
672 | };
673 | ```
674 | 
675 | then adjust all the Neurons in a Layer:
676 | 
677 | ```c++
678 | template <typename T, size_t Nin, size_t Nout>
679 | class Layer {
680 |     ...
681 |         void adjust(const T& learning_rate) {
682 |             for (auto & n : neurons_) {
683 |                 n.adjust(learning_rate);
684 |             }
685 |         }
686 |     ...
687 | };
688 | ```
689 | 
690 | and all the Layers in an MLP:
691 | 
692 | ```c++
693 | template<typename T, typename... Ls>
694 | void layers_adjust(std::tuple<Ls...>& layers, const T& learning_rate) {
695 |     std::apply([&learning_rate](auto&... layer) {
696 |         // Use fold expression to call adjust on each layer
697 |         (..., layer.adjust(learning_rate));
698 |     }, layers);
699 | }
700 | ```
701 | 
702 | ```c++
703 | template <typename T, size_t Nin, size_t... Nouts>
704 | class MLP {
705 |     ...
706 |         void adjust(const T& learning_rate) {
707 |             layers_adjust(layers_, learning_rate);
708 |         }
709 |     ...
710 | };
711 | ```
712 | 
713 | ### CanBackProp
714 | 
715 | We introduce a concept `CanBackProp` to describe any function that can be evaluated and adjusted:
716 | 
717 | ```c++
718 | template <typename F, typename T, typename Arg>
719 | concept CanBackProp = requires(F f, Arg arg, T learning_rate) {
720 |     { f(arg) } -> std::convertible_to<Value<T>>;
721 |     { f.adjust(learning_rate) } -> std::convertible_to<void>;
722 | };
723 | ```
724 | 
725 | For example, `CanBackProp` is true for `MLP1`.
726 | 
727 | ### BackProp
728 | 
729 | We create a wrapper class for any function that matches the `CanBackProp` concept. This class is callable
730 | with input and ground truth arguments, which are used to iteratively:
731 |   * make predictions
732 |   * evaluate error loss against ground truth
733 |   * adjust parameters to minimize loss
734 | 
735 | The loss at each step is recorded in an output file `loss_path`.
736 | 
737 | ```c++
738 | template<typename T, typename Arg, size_t N, typename F>
739 | class BackPropImpl {
740 |     public:
741 |         BackPropImpl(const F& func, const std::string& loss_path)
742 |             : func_(func), loss_output_(loss_path)
743 |         {
744 |         }
745 | 
746 |         MSELoss<T, N, Arg> loss_function() const {
747 |             return MSELoss<T, N, Arg>(func_);
748 |         }
749 | 
750 |         T operator()(std::array<Arg, N>& input, const std::array<T, N>& ground_truth,
751 |                 T learning_rate, int iterations, bool verbose=false)
752 |         {
753 |             auto loss_f = loss_function();
754 |             T result;
755 | 
756 |             for (int i=0; i < iterations; ++i) {
757 |                 Value<T> loss = loss_f(input, ground_truth, verbose);
758 | 
759 |                 result = loss->data();
760 |                 loss_output_ << iter_ << '\t' << result << '\n';
761 | 
762 |                 if (verbose) {
763 |                     std::cerr << "Loss (" << iter_ << "):\t" << result << std::endl;
764 |                 }
765 | 
766 |                 backward(loss);
767 | 
768 |                 func_.adjust(learning_rate);
769 | 
770 |                 ++iter_;
771 |             }
772 | 
773 |             return result;
774 |         }
775 | 
776 |     private:
777 |         F func_;
778 |         std::ofstream loss_output_;
779 |         int iter_{0};
780 | };
781 | ```
782 | 
783 | The helper `BackProp` template allows us to instantiate without specifying the type of `F`, as the compiler can infer it from the constructor argument:
784 | 
785 | ```c++
786 | template<typename T, typename Arg, size_t N, typename F>
787 | requires CanBackProp<F, T, Arg>
788 | auto BackProp(const F& func, const std::string& loss_path)
789 | {
790 |     return BackPropImpl<T, N, Arg, F>(func, loss_path);
791 | }
792 | ```
793 | 
794 | ### Binary Classifier
795 | 
796 | Full example: [binary-classifier.cpp](examples/binary-classifier.cpp):
797 | 
798 | ```c++
799 | #include <iostream>
800 | 
801 | #include "backprop.h"
802 | #include "nn.h"
803 | 
804 | using namespace ai;
805 | 
806 | int main(int argc, char *argv[])
807 | {
808 |     // Define a neural net
809 |     MLP1<double, 3, 4, 4, 1> n;
810 |     std::cerr << n << std::endl;
811 | 
812 |     // A set of training inputs
813 |     std::array<std::array<double,3>, 4> input = {{
814 |         {2.0, 3.0, -1.0},
815 |         {3.0, -1.0, 0.5},
816 |         {0.5, 1.0, 1.0},
817 |         {1.0, 1.0, -1.0}
818 |     }};
819 | 
820 |     // Corresponding ground truth values for these inputs
821 |     std::array<double, 4> y = {1.0, -1.0, -1.0, 1.0};
822 |     std::cerr << "y (gt):\t" << PrettyArray(y) << std::endl;
823 | 
824 |     double learning_rate = 0.9;
825 | 
826 |     auto backprop = BackProp<double, std::array<double, 3>, 4>(n, "loss.tsv");
827 | 
828 |     // Run backprop for 20 iterations, verbose=true
829 |     double loss = backprop(input, y, learning_rate, 20, true);
830 | }
831 | ```
832 | 
833 | This quickly converges close to the ground truth `y = {1.0, -1.0, -1.0, 1.0}`:
834 | 
835 | ```
836 | Predictions: 0.773488 0.796802 0.870344 0.736159
837 | Loss (0):	1.7119
838 | Predictions: -0.316783 -0.714319 -0.588441 -0.396673
839 | Loss (1):	0.983902
840 | Predictions: 0.997675 0.996129 0.996903 0.99767
841 | Loss (2):	1.99304
842 | Predictions: 0.997454 0.995671 0.996576 0.997448
843 | Loss (3):	1.99226
844 | Predictions: 0.997183 0.995088 0.996169 0.997177
845 | Loss (4):	1.99127
846 | Predictions: 0.996845 0.99432 0.995649 0.996838
847 | Loss (5):	1.98999
848 | Predictions: 0.996409 0.993259 0.99496 0.996403
849 | Loss (6):	1.98824
850 | Predictions: 0.995827 0.991692 0.994 0.995819
851 | Loss (7):	1.98573
852 | Predictions: 0.995001 0.989122 0.992564 0.994993
853 | Loss (8):	1.98174
854 | Predictions: 0.993729 0.984072 0.990152 0.993718
855 | Loss (9):	1.97433
856 | Predictions: 0.991462 0.969564 0.985135 0.991443
857 | Loss (10):	1.95502
858 | Predictions: 0.985916 0.862539 0.967274 0.985854
859 | Loss (11):	1.8349
860 | Predictions: 0.947625 -0.182752 0.497795 0.945489
861 | Loss (12):	0.72925
862 | Predictions: -0.0306315 -0.996451 -0.996155 -0.512822
863 | Loss (13):	0.837715
864 | Predictions: 0.999406 -0.977287 -0.542644 0.999393
865 | Loss (14):	0.0524229
866 | Predictions: 0.999027 -0.997707 -0.997583 0.998998
867 | Loss (15):	3.26197e-06
868 | Predictions: 0.999027 -0.997707 -0.997584 0.998998
869 | Loss (16):	3.26134e-06
870 | Predictions: 0.999027 -0.997708 -0.997584 0.998998
871 | Loss (17):	3.26072e-06
872 | Predictions: 0.999027 -0.997708 -0.997584 0.998998
873 | Loss (18):	3.26009e-06
874 | Predictions: 0.999027 -0.997708 -0.997585 0.998998
875 | Loss (19):	3.25946e-06
876 | ```
877 | 
878 | ## Conclusion
879 | 
880 | We ported all the features of micrograd introduced in Karpathy's YouTube tutorial to C++, giving a different perspective on implementation details.
881 | We also considered some more generic aspects of model evaluation and iterative learning to develop re-usable C++ classes.
882 | 
883 | ## References
884 | 
885 | ### Automatic differentiation in C++
886 | * [Differentiable Programming in C++ - Vassil Vassilev & William Moses - CppCon 2021](https://www.youtube.com/watch?v=1QQj1mAV-eY) [YouTube]
887 | * [Automatic Differentiation in C++](https://compiler-research.org/assets/presentations/CladInROOT_15_02_2020.pdf)[PDF]
888 | * [FastAD: Expression Template-Based C++ Library for Fast and Memory-Efficient Automatic Differentiation](https://arxiv.org/abs/2102.03681) [PDF]
889 | 
890 | ### Automatic differentiation
891 | * [ad](https://hackage.haskell.org/package/ad)
892 | 
893 | ## Next up: [kfish/makemore-cpp-2023](https://github.com/kfish/makemore-cpp-2023)
894 | 
895 | 


--------------------------------------------------------------------------------
/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | set(CMAKE_CXX_STANDARD 23)
 4 | set(CMAKE_CXX_FLAGS_RELEASE "-O3")
 5 | 
 6 | enable_testing()
 7 | 
 8 | project(examples)
 9 | 
10 | get_filename_component(PARENT_DIR ${PROJECT_SOURCE_DIR} DIRECTORY)
11 | 
12 | include_directories(
13 |     ${PARENT_DIR}/include
14 |     )
15 | 
16 | add_executable(example-usage example-usage.cpp)
17 | add_test(NAME example-usage
18 |          COMMAND example-usage)
19 | 
20 | add_executable(example-usage-cycle example-usage-cycle.cpp)
21 | add_test(NAME example-usage-cycle
22 |          COMMAND example-usage-cycle)
23 | 
24 | add_executable(graph graph.cpp)
25 | add_test(NAME graph
26 |          COMMAND graph)
27 | 
28 | add_executable(neuron neuron.cpp)
29 | add_test(NAME neuron
30 |          COMMAND neuron)
31 | 
32 | add_executable(mlp1 mlp1.cpp)
33 | add_test(NAME mlp1
34 |          COMMAND mlp1)
35 | 
36 | add_executable(regression0 regression0.cpp)
37 | add_test(NAME regression0
38 |          COMMAND regression0)
39 | 
40 | add_executable(binary-classifier binary-classifier.cpp)
41 | add_test(NAME binary-classifier
42 |          COMMAND binary-classifier)
43 | 


--------------------------------------------------------------------------------
/examples/binary-classifier.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "backprop.h"
 4 | #include "nn.h"
 5 | #include "graph.h"
 6 | 
 7 | using namespace ai;
 8 | 
 9 | int main(int argc, char *argv[])
10 | {
11 |     // Define a neural net
12 |     MLP1<double, 3, 4, 4, 1> n;
13 |     std::cerr << n << std::endl;
14 | 
15 |     // A set of training inputs
16 |     std::array<std::array<double,3>, 4> input = {{
17 |         {2.0, 3.0, -1.0},
18 |         {3.0, -1.0, 0.5},
19 |         {0.5, 1.0, 1.0},
20 |         {1.0, 1.0, -1.0}
21 |     }};
22 | 
23 |     // Corresponding ground truth values for these inputs
24 |     std::array<double, 4> y = {1.0, -1.0, -1.0, 1.0};
25 |     std::cerr << "y (gt):\t" << PrettyArray(y) << std::endl;
26 | 
27 |     double learning_rate = 0.9;
28 | 
29 |     auto backprop = BackProp<double, std::array<double, 3>, 4>(n, "loss.tsv");
30 | 
31 |     // Run backprop for 20 iterations, verbose=true
32 |     double loss = backprop(input, y, learning_rate, 20, true);
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/examples/c-plus-equals-cycle.dot:
--------------------------------------------------------------------------------
 1 | digraph G {
 2 |   rankdir = "LR";
 3 |   "node0x55628c103eb0" [label = "{  | data=-4.0000 | grad=0.0000 }", shape="record"]
 4 |   "node0x55628c103fb0" [label = "{  | data=2.0000 | grad=0.0000 }", shape="record"]
 5 |   "node0x55628c104130" [label = "{  | data=-3.0000 | grad=0.0000 }", shape="record"]
 6 |   "node0x55628c104130+" [label = "+"]
 7 |   "node0x55628c104130+" -> "node0x55628c104130";
 8 |   "node0x55628c104930" [label = "{  | data=1.0000 | grad=0.0000 }", shape="record"]
 9 |   "node0x55628c104a30" [label = "{  | data=-1.0000 | grad=0.0000 }", shape="record"]
10 |   "node0x55628c104a30+" [label = "+"]
11 |   "node0x55628c104a30+" -> "node0x55628c104a30";
12 |   "node0x55628c103eb0" -> "node0x55628c104130+";
13 |   "node0x55628c103fb0" -> "node0x55628c104130+";
14 |   "node0x55628c104130" -> "node0x55628c104a30+";
15 |   "node0x55628c104930" -> "node0x55628c104a30+";
16 |   "node0x55628c104a30" -> "node0x55628c104130+";
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/examples/c-plus-equals-cycle.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <!-- Generated by graphviz version 2.43.0 (0)
  5 |  -->
  6 | <!-- Title: G Pages: 1 -->
  7 | <svg width="1005pt" height="126pt"
  8 |  viewBox="0.00 0.00 1005.00 125.72" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
  9 | <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 121.72)">
 10 | <title>G</title>
 11 | <polygon fill="white" stroke="transparent" points="-4,4 -4,-121.72 1001,-121.72 1001,4 -4,4"/>
 12 | <!-- node0x55a7a4822eb0 -->
 13 | <g id="node1" class="node">
 14 | <title>node0x55a7a4822eb0</title>
 15 | <polygon fill="none" stroke="black" points="0,-58.22 0,-94.22 250,-94.22 250,-58.22 0,-58.22"/>
 16 | <text text-anchor="middle" x="12.5" y="-72.52" font-family="Times,serif" font-size="14.00">a</text>
 17 | <polyline fill="none" stroke="black" points="25,-58.22 25,-94.22 "/>
 18 | <text text-anchor="middle" x="82" y="-72.52" font-family="Times,serif" font-size="14.00">data=&#45;4.0000</text>
 19 | <polyline fill="none" stroke="black" points="139,-58.22 139,-94.22 "/>
 20 | <text text-anchor="middle" x="194.5" y="-72.52" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
 21 | </g>
 22 | <!-- node0x55a7a4823130+ -->
 23 | <g id="node4" class="node">
 24 | <title>node0x55a7a4823130+</title>
 25 | <ellipse fill="none" stroke="black" cx="313" cy="-48.22" rx="27" ry="18"/>
 26 | <text text-anchor="middle" x="313" y="-44.52" font-family="Times,serif" font-size="14.00">+</text>
 27 | </g>
 28 | <!-- node0x55a7a4822eb0&#45;&gt;node0x55a7a4823130+ -->
 29 | <g id="edge3" class="edge">
 30 | <title>node0x55a7a4822eb0&#45;&gt;node0x55a7a4823130+</title>
 31 | <path fill="none" stroke="black" d="M245.64,-58.21C256.7,-56.54 267.22,-54.96 276.49,-53.57"/>
 32 | <polygon fill="black" stroke="black" points="277.01,-57.03 286.38,-52.08 275.97,-50.1 277.01,-57.03"/>
 33 | </g>
 34 | <!-- node0x55a7a4822fb0 -->
 35 | <g id="node2" class="node">
 36 | <title>node0x55a7a4822fb0</title>
 37 | <polygon fill="none" stroke="black" points="1.5,-3.22 1.5,-39.22 248.5,-39.22 248.5,-3.22 1.5,-3.22"/>
 38 | <text text-anchor="middle" x="14.5" y="-17.52" font-family="Times,serif" font-size="14.00">b</text>
 39 | <polyline fill="none" stroke="black" points="27.5,-3.22 27.5,-39.22 "/>
 40 | <text text-anchor="middle" x="82.5" y="-17.52" font-family="Times,serif" font-size="14.00">data=2.0000</text>
 41 | <polyline fill="none" stroke="black" points="137.5,-3.22 137.5,-39.22 "/>
 42 | <text text-anchor="middle" x="193" y="-17.52" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
 43 | </g>
 44 | <!-- node0x55a7a4822fb0&#45;&gt;node0x55a7a4823130+ -->
 45 | <g id="edge4" class="edge">
 46 | <title>node0x55a7a4822fb0&#45;&gt;node0x55a7a4823130+</title>
 47 | <path fill="none" stroke="black" d="M248.51,-39C258.46,-40.45 267.9,-41.82 276.32,-43.04"/>
 48 | <polygon fill="black" stroke="black" points="275.99,-46.53 286.39,-44.5 277,-39.6 275.99,-46.53"/>
 49 | </g>
 50 | <!-- node0x55a7a4823130 -->
 51 | <g id="node3" class="node">
 52 | <title>node0x55a7a4823130</title>
 53 | <polygon fill="none" stroke="black" points="376,-81.22 376,-117.22 625,-117.22 625,-81.22 376,-81.22"/>
 54 | <text text-anchor="middle" x="388" y="-95.52" font-family="Times,serif" font-size="14.00">c</text>
 55 | <polyline fill="none" stroke="black" points="400,-81.22 400,-117.22 "/>
 56 | <text text-anchor="middle" x="457" y="-95.52" font-family="Times,serif" font-size="14.00">data=&#45;3.0000</text>
 57 | <polyline fill="none" stroke="black" points="514,-81.22 514,-117.22 "/>
 58 | <text text-anchor="middle" x="569.5" y="-95.52" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
 59 | </g>
 60 | <!-- node0x55a7a4823a30+ -->
 61 | <g id="node7" class="node">
 62 | <title>node0x55a7a4823a30+</title>
 63 | <ellipse fill="none" stroke="black" cx="688" cy="-44.22" rx="27" ry="18"/>
 64 | <text text-anchor="middle" x="688" y="-40.52" font-family="Times,serif" font-size="14.00">+</text>
 65 | </g>
 66 | <!-- node0x55a7a4823130&#45;&gt;node0x55a7a4823a30+ -->
 67 | <g id="edge5" class="edge">
 68 | <title>node0x55a7a4823130&#45;&gt;node0x55a7a4823a30+</title>
 69 | <path fill="none" stroke="black" d="M594.13,-81.18C604.63,-78.49 615.1,-75.5 625,-72.22 635.71,-68.67 647.05,-63.88 657.07,-59.26"/>
 70 | <polygon fill="black" stroke="black" points="658.67,-62.37 666.2,-54.93 655.67,-56.05 658.67,-62.37"/>
 71 | </g>
 72 | <!-- node0x55a7a4823130+&#45;&gt;node0x55a7a4823130 -->
 73 | <g id="edge1" class="edge">
 74 | <title>node0x55a7a4823130+&#45;&gt;node0x55a7a4823130</title>
 75 | <path fill="none" stroke="black" d="M336.09,-57.73C347.8,-62.52 362.52,-68.16 376,-72.22 383.62,-74.51 391.56,-76.7 399.57,-78.76"/>
 76 | <polygon fill="black" stroke="black" points="398.82,-82.18 409.37,-81.21 400.52,-75.39 398.82,-82.18"/>
 77 | </g>
 78 | <!-- node0x55a7a4823930 -->
 79 | <g id="node5" class="node">
 80 | <title>node0x55a7a4823930</title>
 81 | <polygon fill="none" stroke="black" points="379.5,-26.22 379.5,-62.22 621.5,-62.22 621.5,-26.22 379.5,-26.22"/>
 82 | <text text-anchor="middle" x="390" y="-40.52" font-family="Times,serif" font-size="14.00"> </text>
 83 | <polyline fill="none" stroke="black" points="400.5,-26.22 400.5,-62.22 "/>
 84 | <text text-anchor="middle" x="455.5" y="-40.52" font-family="Times,serif" font-size="14.00">data=1.0000</text>
 85 | <polyline fill="none" stroke="black" points="510.5,-26.22 510.5,-62.22 "/>
 86 | <text text-anchor="middle" x="566" y="-40.52" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
 87 | </g>
 88 | <!-- node0x55a7a4823930&#45;&gt;node0x55a7a4823a30+ -->
 89 | <g id="edge6" class="edge">
 90 | <title>node0x55a7a4823930&#45;&gt;node0x55a7a4823a30+</title>
 91 | <path fill="none" stroke="black" d="M621.6,-44.22C632,-44.22 641.89,-44.22 650.69,-44.22"/>
 92 | <polygon fill="black" stroke="black" points="650.85,-47.72 660.85,-44.22 650.85,-40.72 650.85,-47.72"/>
 93 | </g>
 94 | <!-- node0x55a7a4823a30 -->
 95 | <g id="node6" class="node">
 96 | <title>node0x55a7a4823a30</title>
 97 | <polygon fill="none" stroke="black" points="751,-2.22 751,-38.22 997,-38.22 997,-2.22 751,-2.22"/>
 98 | <text text-anchor="middle" x="761.5" y="-16.52" font-family="Times,serif" font-size="14.00"> </text>
 99 | <polyline fill="none" stroke="black" points="772,-2.22 772,-38.22 "/>
100 | <text text-anchor="middle" x="829" y="-16.52" font-family="Times,serif" font-size="14.00">data=&#45;1.0000</text>
101 | <polyline fill="none" stroke="black" points="886,-2.22 886,-38.22 "/>
102 | <text text-anchor="middle" x="941.5" y="-16.52" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
103 | </g>
104 | <!-- node0x55a7a4823a30&#45;&gt;node0x55a7a4823130+ -->
105 | <g id="edge7" class="edge">
106 | <title>node0x55a7a4823a30&#45;&gt;node0x55a7a4823130+</title>
107 | <path fill="none" stroke="black" d="M750.95,-17.81C738.8,-17.59 726.66,-17.39 715,-17.22 564.35,-14.95 522.41,19.34 376,-16.22 364.1,-19.11 351.94,-24.7 341.57,-30.42"/>
108 | <polygon fill="black" stroke="black" points="339.5,-27.57 332.61,-35.62 343.02,-33.63 339.5,-27.57"/>
109 | </g>
110 | <!-- node0x55a7a4823a30+&#45;&gt;node0x55a7a4823a30 -->
111 | <g id="edge2" class="edge">
112 | <title>node0x55a7a4823a30+&#45;&gt;node0x55a7a4823a30</title>
113 | <path fill="none" stroke="black" d="M714.58,-40.88C722.13,-39.9 730.96,-38.75 740.51,-37.5"/>
114 | <polygon fill="black" stroke="black" points="741.25,-40.93 750.71,-36.17 740.34,-33.99 741.25,-40.93"/>
115 | </g>
116 | </g>
117 | </svg>
118 | 


--------------------------------------------------------------------------------
/examples/c-plus-equals-rewrite.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <!-- Generated by graphviz version 2.43.0 (0)
  5 |  -->
  6 | <!-- Title: G Pages: 1 -->
  7 | <svg width="1377pt" height="128pt"
  8 |  viewBox="0.00 0.00 1377.00 128.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
  9 | <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 124)">
 10 | <title>G</title>
 11 | <polygon fill="white" stroke="transparent" points="-4,4 -4,-124 1373,-124 1373,4 -4,4"/>
 12 | <!-- node0x56434b372eb0 -->
 13 | <g id="node1" class="node">
 14 | <title>node0x56434b372eb0</title>
 15 | <polygon fill="none" stroke="black" points="0,-83.5 0,-119.5 250,-119.5 250,-83.5 0,-83.5"/>
 16 | <text text-anchor="middle" x="12.5" y="-97.8" font-family="Times,serif" font-size="14.00">a</text>
 17 | <polyline fill="none" stroke="black" points="25,-83.5 25,-119.5 "/>
 18 | <text text-anchor="middle" x="82" y="-97.8" font-family="Times,serif" font-size="14.00">data=&#45;4.0000</text>
 19 | <polyline fill="none" stroke="black" points="139,-83.5 139,-119.5 "/>
 20 | <text text-anchor="middle" x="194.5" y="-97.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
 21 | </g>
 22 | <!-- node0x56434b373bf0+ -->
 23 | <g id="node9" class="node">
 24 | <title>node0x56434b373bf0+</title>
 25 | <ellipse fill="none" stroke="black" cx="313" cy="-73.5" rx="27" ry="18"/>
 26 | <text text-anchor="middle" x="313" y="-69.8" font-family="Times,serif" font-size="14.00">+</text>
 27 | </g>
 28 | <!-- node0x56434b372eb0&#45;&gt;node0x56434b373bf0+ -->
 29 | <g id="edge4" class="edge">
 30 | <title>node0x56434b372eb0&#45;&gt;node0x56434b373bf0+</title>
 31 | <path fill="none" stroke="black" d="M245.64,-83.49C256.7,-81.82 267.22,-80.24 276.49,-78.85"/>
 32 | <polygon fill="black" stroke="black" points="277.01,-82.31 286.38,-77.36 275.97,-75.38 277.01,-82.31"/>
 33 | </g>
 34 | <!-- node0x56434b372fb0 -->
 35 | <g id="node2" class="node">
 36 | <title>node0x56434b372fb0</title>
 37 | <polygon fill="none" stroke="black" points="1.5,-28.5 1.5,-64.5 248.5,-64.5 248.5,-28.5 1.5,-28.5"/>
 38 | <text text-anchor="middle" x="14.5" y="-42.8" font-family="Times,serif" font-size="14.00">b</text>
 39 | <polyline fill="none" stroke="black" points="27.5,-28.5 27.5,-64.5 "/>
 40 | <text text-anchor="middle" x="82.5" y="-42.8" font-family="Times,serif" font-size="14.00">data=2.0000</text>
 41 | <polyline fill="none" stroke="black" points="137.5,-28.5 137.5,-64.5 "/>
 42 | <text text-anchor="middle" x="193" y="-42.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
 43 | </g>
 44 | <!-- node0x56434b372fb0&#45;&gt;node0x56434b373bf0+ -->
 45 | <g id="edge5" class="edge">
 46 | <title>node0x56434b372fb0&#45;&gt;node0x56434b373bf0+</title>
 47 | <path fill="none" stroke="black" d="M248.51,-64.28C258.46,-65.73 267.9,-67.1 276.32,-68.32"/>
 48 | <polygon fill="black" stroke="black" points="275.99,-71.81 286.39,-69.78 277,-64.88 275.99,-71.81"/>
 49 | </g>
 50 | <!-- node0x56434b373130 -->
 51 | <g id="node3" class="node">
 52 | <title>node0x56434b373130</title>
 53 | <polygon fill="none" stroke="black" points="1123,-31.5 1123,-67.5 1369,-67.5 1369,-31.5 1123,-31.5"/>
 54 | <text text-anchor="middle" x="1133.5" y="-45.8" font-family="Times,serif" font-size="14.00"> </text>
 55 | <polyline fill="none" stroke="black" points="1144,-31.5 1144,-67.5 "/>
 56 | <text text-anchor="middle" x="1201" y="-45.8" font-family="Times,serif" font-size="14.00">data=&#45;3.0000</text>
 57 | <polyline fill="none" stroke="black" points="1258,-31.5 1258,-67.5 "/>
 58 | <text text-anchor="middle" x="1313.5" y="-45.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
 59 | </g>
 60 | <!-- node0x56434b373130+ -->
 61 | <g id="node4" class="node">
 62 | <title>node0x56434b373130+</title>
 63 | <ellipse fill="none" stroke="black" cx="1060" cy="-49.5" rx="27" ry="18"/>
 64 | <text text-anchor="middle" x="1060" y="-45.8" font-family="Times,serif" font-size="14.00">+</text>
 65 | </g>
 66 | <!-- node0x56434b373130+&#45;&gt;node0x56434b373130 -->
 67 | <g id="edge1" class="edge">
 68 | <title>node0x56434b373130+&#45;&gt;node0x56434b373130</title>
 69 | <path fill="none" stroke="black" d="M1087.36,-49.5C1094.73,-49.5 1103.28,-49.5 1112.5,-49.5"/>
 70 | <polygon fill="black" stroke="black" points="1112.75,-53 1122.75,-49.5 1112.75,-46 1112.75,-53"/>
 71 | </g>
 72 | <!-- node0x56434b373930 -->
 73 | <g id="node5" class="node">
 74 | <title>node0x56434b373930</title>
 75 | <polygon fill="none" stroke="black" points="379.5,-0.5 379.5,-36.5 621.5,-36.5 621.5,-0.5 379.5,-0.5"/>
 76 | <text text-anchor="middle" x="390" y="-14.8" font-family="Times,serif" font-size="14.00"> </text>
 77 | <polyline fill="none" stroke="black" points="400.5,-0.5 400.5,-36.5 "/>
 78 | <text text-anchor="middle" x="455.5" y="-14.8" font-family="Times,serif" font-size="14.00">data=1.0000</text>
 79 | <polyline fill="none" stroke="black" points="510.5,-0.5 510.5,-36.5 "/>
 80 | <text text-anchor="middle" x="566" y="-14.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
 81 | </g>
 82 | <!-- node0x56434b373a30+ -->
 83 | <g id="node7" class="node">
 84 | <title>node0x56434b373a30+</title>
 85 | <ellipse fill="none" stroke="black" cx="688" cy="-26.5" rx="27" ry="18"/>
 86 | <text text-anchor="middle" x="688" y="-22.8" font-family="Times,serif" font-size="14.00">+</text>
 87 | </g>
 88 | <!-- node0x56434b373930&#45;&gt;node0x56434b373a30+ -->
 89 | <g id="edge6" class="edge">
 90 | <title>node0x56434b373930&#45;&gt;node0x56434b373a30+</title>
 91 | <path fill="none" stroke="black" d="M621.6,-23.68C632,-24.13 641.89,-24.55 650.69,-24.93"/>
 92 | <polygon fill="black" stroke="black" points="650.71,-28.44 660.85,-25.37 651.01,-21.44 650.71,-28.44"/>
 93 | </g>
 94 | <!-- node0x56434b373a30 -->
 95 | <g id="node6" class="node">
 96 | <title>node0x56434b373a30</title>
 97 | <polygon fill="none" stroke="black" points="751,-8.5 751,-44.5 997,-44.5 997,-8.5 751,-8.5"/>
 98 | <text text-anchor="middle" x="761.5" y="-22.8" font-family="Times,serif" font-size="14.00"> </text>
 99 | <polyline fill="none" stroke="black" points="772,-8.5 772,-44.5 "/>
100 | <text text-anchor="middle" x="829" y="-22.8" font-family="Times,serif" font-size="14.00">data=&#45;1.0000</text>
101 | <polyline fill="none" stroke="black" points="886,-8.5 886,-44.5 "/>
102 | <text text-anchor="middle" x="941.5" y="-22.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
103 | </g>
104 | <!-- node0x56434b373a30&#45;&gt;node0x56434b373130+ -->
105 | <g id="edge7" class="edge">
106 | <title>node0x56434b373a30&#45;&gt;node0x56434b373130+</title>
107 | <path fill="none" stroke="black" d="M997.22,-41.78C1006.44,-42.93 1015.2,-44.02 1023.08,-45.01"/>
108 | <polygon fill="black" stroke="black" points="1022.87,-48.51 1033.23,-46.28 1023.74,-41.56 1022.87,-48.51"/>
109 | </g>
110 | <!-- node0x56434b373a30+&#45;&gt;node0x56434b373a30 -->
111 | <g id="edge2" class="edge">
112 | <title>node0x56434b373a30+&#45;&gt;node0x56434b373a30</title>
113 | <path fill="none" stroke="black" d="M715.36,-26.5C722.73,-26.5 731.28,-26.5 740.5,-26.5"/>
114 | <polygon fill="black" stroke="black" points="740.75,-30 750.75,-26.5 740.75,-23 740.75,-30"/>
115 | </g>
116 | <!-- node0x56434b373bf0 -->
117 | <g id="node8" class="node">
118 | <title>node0x56434b373bf0</title>
119 | <polygon fill="none" stroke="black" points="376,-55.5 376,-91.5 625,-91.5 625,-55.5 376,-55.5"/>
120 | <text text-anchor="middle" x="388" y="-69.8" font-family="Times,serif" font-size="14.00">c</text>
121 | <polyline fill="none" stroke="black" points="400,-55.5 400,-91.5 "/>
122 | <text text-anchor="middle" x="457" y="-69.8" font-family="Times,serif" font-size="14.00">data=&#45;2.0000</text>
123 | <polyline fill="none" stroke="black" points="514,-55.5 514,-91.5 "/>
124 | <text text-anchor="middle" x="569.5" y="-69.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
125 | </g>
126 | <!-- node0x56434b373bf0&#45;&gt;node0x56434b373130+ -->
127 | <g id="edge8" class="edge">
128 | <title>node0x56434b373bf0&#45;&gt;node0x56434b373130+</title>
129 | <path fill="none" stroke="black" d="M625.02,-70.21C725.31,-67.19 870.35,-62.03 997,-54.5 1005.43,-54 1014.5,-53.34 1023.02,-52.66"/>
130 | <polygon fill="black" stroke="black" points="1023.39,-56.14 1033.07,-51.84 1022.82,-49.16 1023.39,-56.14"/>
131 | </g>
132 | <!-- node0x56434b373bf0&#45;&gt;node0x56434b373a30+ -->
133 | <g id="edge9" class="edge">
134 | <title>node0x56434b373bf0&#45;&gt;node0x56434b373a30+</title>
135 | <path fill="none" stroke="black" d="M589.13,-55.44C601.22,-52.63 613.45,-49.62 625,-46.5 634.46,-43.95 644.62,-40.83 653.92,-37.82"/>
136 | <polygon fill="black" stroke="black" points="655.2,-41.09 663.61,-34.64 653.01,-34.44 655.2,-41.09"/>
137 | </g>
138 | <!-- node0x56434b373bf0+&#45;&gt;node0x56434b373bf0 -->
139 | <g id="edge3" class="edge">
140 | <title>node0x56434b373bf0+&#45;&gt;node0x56434b373bf0</title>
141 | <path fill="none" stroke="black" d="M340.18,-73.5C347.67,-73.5 356.38,-73.5 365.79,-73.5"/>
142 | <polygon fill="black" stroke="black" points="365.84,-77 375.84,-73.5 365.84,-70 365.84,-77"/>
143 | </g>
144 | </g>
145 | </svg>
146 | 


--------------------------------------------------------------------------------
/examples/example-usage-cycle.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "value.h"
 4 | #include "graph.h"
 5 | 
 6 | using namespace ai;
 7 | 
 8 | int main(int argc, char *argv[])
 9 | {
10 |     auto a = make_value(-4.0, "a");
11 |     auto b = make_value(2.0, "b");
12 | 
13 |     auto c = expr(a + b, "c");;
14 |     auto d = a * b + pow(b, 3);
15 | 
16 |     c += c + 1;
17 | 
18 |     std::cout << Graph(c) << std::endl;
19 | }
20 | 


--------------------------------------------------------------------------------
/examples/example-usage.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "value.h"
 4 | 
 5 | using namespace ai;
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     auto a = make_value(-4.0);
10 |     auto b = make_value(2.0);
11 | 
12 |     auto c = a + b;
13 |     auto d = a * b + pow(b, 3);
14 | 
15 |     c += c + 1;
16 |     c += 1 + c + (-a);
17 |     d += d * 2 + relu(b + a);
18 |     d += 3 * d + relu(b - a);
19 |     auto e = c - d;
20 |     auto f = pow(e, 2);
21 |     auto g = f / 2.0;
22 |     g += 10.0 / f;
23 |     printf("%.4f\n", g->data()); // prints 24.7041, the outcome of this forward pass
24 |     backward(g);
25 |     printf("%.4f\n", a->grad()); // prints 138.8338, i.e. the numerical value of dg/da
26 |     printf("%.4f\n", b->grad()); // prints 645.5773, i.e. the numerical value of dg/db
27 | }
28 | 


--------------------------------------------------------------------------------
/examples/graph.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "graph.h"
 4 | 
 5 | using namespace ai;
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     auto a = make_value(2.0, "a");
10 |     auto b = make_value(-3.0, "b");
11 |     auto c = make_value(10.0, "c");
12 | 
13 |     auto e = expr(a*b, "e");
14 |     auto d = expr(e+c, "d");
15 | 
16 |     auto f = make_value(-2.0, "f");
17 |     auto L = expr(d * f, "L");
18 | 
19 |     std::cout << Graph(L) << std::endl;
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/graph.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <!-- Generated by graphviz version 2.43.0 (0)
  5 |  -->
  6 | <!-- Title: G Pages: 1 -->
  7 | <svg width="1389pt" height="156pt"
  8 |  viewBox="0.00 0.00 1389.00 156.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
  9 | <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 152)">
 10 | <title>G</title>
 11 | <polygon fill="white" stroke="transparent" points="-4,4 -4,-152 1385,-152 1385,4 -4,4"/>
 12 | <!-- node0x561d478f3eb0 -->
 13 | <g id="node1" class="node">
 14 | <title>node0x561d478f3eb0</title>
 15 | <polygon fill="none" stroke="black" points="2.5,-111.5 2.5,-147.5 248.5,-147.5 248.5,-111.5 2.5,-111.5"/>
 16 | <text text-anchor="middle" x="15" y="-125.8" font-family="Times,serif" font-size="14.00">a</text>
 17 | <polyline fill="none" stroke="black" points="27.5,-111.5 27.5,-147.5 "/>
 18 | <text text-anchor="middle" x="82.5" y="-125.8" font-family="Times,serif" font-size="14.00">data=2.0000</text>
 19 | <polyline fill="none" stroke="black" points="137.5,-111.5 137.5,-147.5 "/>
 20 | <text text-anchor="middle" x="193" y="-125.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
 21 | </g>
 22 | <!-- node0x561d478f4230* -->
 23 | <g id="node5" class="node">
 24 | <title>node0x561d478f4230*</title>
 25 | <ellipse fill="none" stroke="black" cx="314" cy="-101.5" rx="27" ry="18"/>
 26 | <text text-anchor="middle" x="314" y="-97.8" font-family="Times,serif" font-size="14.00">*</text>
 27 | </g>
 28 | <!-- node0x561d478f3eb0&#45;&gt;node0x561d478f4230* -->
 29 | <g id="edge4" class="edge">
 30 | <title>node0x561d478f3eb0&#45;&gt;node0x561d478f4230*</title>
 31 | <path fill="none" stroke="black" d="M246.46,-111.49C257.56,-109.82 268.1,-108.24 277.39,-106.85"/>
 32 | <polygon fill="black" stroke="black" points="277.94,-110.3 287.31,-105.36 276.9,-103.38 277.94,-110.3"/>
 33 | </g>
 34 | <!-- node0x561d478f3fb0 -->
 35 | <g id="node2" class="node">
 36 | <title>node0x561d478f3fb0</title>
 37 | <polygon fill="none" stroke="black" points="0,-56.5 0,-92.5 251,-92.5 251,-56.5 0,-56.5"/>
 38 | <text text-anchor="middle" x="13" y="-70.8" font-family="Times,serif" font-size="14.00">b</text>
 39 | <polyline fill="none" stroke="black" points="26,-56.5 26,-92.5 "/>
 40 | <text text-anchor="middle" x="83" y="-70.8" font-family="Times,serif" font-size="14.00">data=&#45;3.0000</text>
 41 | <polyline fill="none" stroke="black" points="140,-56.5 140,-92.5 "/>
 42 | <text text-anchor="middle" x="195.5" y="-70.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
 43 | </g>
 44 | <!-- node0x561d478f3fb0&#45;&gt;node0x561d478f4230* -->
 45 | <g id="edge5" class="edge">
 46 | <title>node0x561d478f3fb0&#45;&gt;node0x561d478f4230*</title>
 47 | <path fill="none" stroke="black" d="M250.9,-92.51C260.38,-93.88 269.37,-95.18 277.42,-96.35"/>
 48 | <polygon fill="black" stroke="black" points="277.02,-99.83 287.42,-97.8 278.02,-92.9 277.02,-99.83"/>
 49 | </g>
 50 | <!-- node0x561d478f40b0 -->
 51 | <g id="node3" class="node">
 52 | <title>node0x561d478f40b0</title>
 53 | <polygon fill="none" stroke="black" points="377,-28.5 377,-64.5 631,-64.5 631,-28.5 377,-28.5"/>
 54 | <text text-anchor="middle" x="389" y="-42.8" font-family="Times,serif" font-size="14.00">c</text>
 55 | <polyline fill="none" stroke="black" points="401,-28.5 401,-64.5 "/>
 56 | <text text-anchor="middle" x="460.5" y="-42.8" font-family="Times,serif" font-size="14.00">data=10.0000</text>
 57 | <polyline fill="none" stroke="black" points="520,-28.5 520,-64.5 "/>
 58 | <text text-anchor="middle" x="575.5" y="-42.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
 59 | </g>
 60 | <!-- node0x561d478f43f0+ -->
 61 | <g id="node7" class="node">
 62 | <title>node0x561d478f43f0+</title>
 63 | <ellipse fill="none" stroke="black" cx="694" cy="-73.5" rx="27" ry="18"/>
 64 | <text text-anchor="middle" x="694" y="-69.8" font-family="Times,serif" font-size="14.00">+</text>
 65 | </g>
 66 | <!-- node0x561d478f40b0&#45;&gt;node0x561d478f43f0+ -->
 67 | <g id="edge6" class="edge">
 68 | <title>node0x561d478f40b0&#45;&gt;node0x561d478f43f0+</title>
 69 | <path fill="none" stroke="black" d="M630.4,-64.51C640.05,-65.89 649.2,-67.21 657.37,-68.38"/>
 70 | <polygon fill="black" stroke="black" points="657.12,-71.88 667.51,-69.84 658.11,-64.95 657.12,-71.88"/>
 71 | </g>
 72 | <!-- node0x561d478f4230 -->
 73 | <g id="node4" class="node">
 74 | <title>node0x561d478f4230</title>
 75 | <polygon fill="none" stroke="black" points="379,-83.5 379,-119.5 629,-119.5 629,-83.5 379,-83.5"/>
 76 | <text text-anchor="middle" x="391.5" y="-97.8" font-family="Times,serif" font-size="14.00">e</text>
 77 | <polyline fill="none" stroke="black" points="404,-83.5 404,-119.5 "/>
 78 | <text text-anchor="middle" x="461" y="-97.8" font-family="Times,serif" font-size="14.00">data=&#45;6.0000</text>
 79 | <polyline fill="none" stroke="black" points="518,-83.5 518,-119.5 "/>
 80 | <text text-anchor="middle" x="573.5" y="-97.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
 81 | </g>
 82 | <!-- node0x561d478f4230&#45;&gt;node0x561d478f43f0+ -->
 83 | <g id="edge7" class="edge">
 84 | <title>node0x561d478f4230&#45;&gt;node0x561d478f43f0+</title>
 85 | <path fill="none" stroke="black" d="M625.92,-83.49C637.21,-81.81 647.94,-80.21 657.37,-78.81"/>
 86 | <polygon fill="black" stroke="black" points="658.04,-82.24 667.42,-77.31 657.01,-75.32 658.04,-82.24"/>
 87 | </g>
 88 | <!-- node0x561d478f4230*&#45;&gt;node0x561d478f4230 -->
 89 | <g id="edge1" class="edge">
 90 | <title>node0x561d478f4230*&#45;&gt;node0x561d478f4230</title>
 91 | <path fill="none" stroke="black" d="M341.14,-101.5C349.17,-101.5 358.61,-101.5 368.83,-101.5"/>
 92 | <polygon fill="black" stroke="black" points="368.85,-105 378.85,-101.5 368.85,-98 368.85,-105"/>
 93 | </g>
 94 | <!-- node0x561d478f43f0 -->
 95 | <g id="node6" class="node">
 96 | <title>node0x561d478f43f0</title>
 97 | <polygon fill="none" stroke="black" points="757,-55.5 757,-91.5 1004,-91.5 1004,-55.5 757,-55.5"/>
 98 | <text text-anchor="middle" x="770" y="-69.8" font-family="Times,serif" font-size="14.00">d</text>
 99 | <polyline fill="none" stroke="black" points="783,-55.5 783,-91.5 "/>
100 | <text text-anchor="middle" x="838" y="-69.8" font-family="Times,serif" font-size="14.00">data=4.0000</text>
101 | <polyline fill="none" stroke="black" points="893,-55.5 893,-91.5 "/>
102 | <text text-anchor="middle" x="948.5" y="-69.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
103 | </g>
104 | <!-- node0x561d478f46b0* -->
105 | <g id="node10" class="node">
106 | <title>node0x561d478f46b0*</title>
107 | <ellipse fill="none" stroke="black" cx="1067" cy="-45.5" rx="27" ry="18"/>
108 | <text text-anchor="middle" x="1067" y="-41.8" font-family="Times,serif" font-size="14.00">*</text>
109 | </g>
110 | <!-- node0x561d478f43f0&#45;&gt;node0x561d478f46b0* -->
111 | <g id="edge8" class="edge">
112 | <title>node0x561d478f43f0&#45;&gt;node0x561d478f46b0*</title>
113 | <path fill="none" stroke="black" d="M1000.17,-55.49C1011.03,-53.84 1021.36,-52.27 1030.48,-50.89"/>
114 | <polygon fill="black" stroke="black" points="1031.22,-54.32 1040.59,-49.36 1030.17,-47.4 1031.22,-54.32"/>
115 | </g>
116 | <!-- node0x561d478f43f0+&#45;&gt;node0x561d478f43f0 -->
117 | <g id="edge2" class="edge">
118 | <title>node0x561d478f43f0+&#45;&gt;node0x561d478f43f0</title>
119 | <path fill="none" stroke="black" d="M721.04,-73.5C728.56,-73.5 737.32,-73.5 746.79,-73.5"/>
120 | <polygon fill="black" stroke="black" points="746.89,-77 756.89,-73.5 746.89,-70 746.89,-77"/>
121 | </g>
122 | <!-- node0x561d478f45b0 -->
123 | <g id="node8" class="node">
124 | <title>node0x561d478f45b0</title>
125 | <polygon fill="none" stroke="black" points="757,-0.5 757,-36.5 1004,-36.5 1004,-0.5 757,-0.5"/>
126 | <text text-anchor="middle" x="768" y="-14.8" font-family="Times,serif" font-size="14.00">f</text>
127 | <polyline fill="none" stroke="black" points="779,-0.5 779,-36.5 "/>
128 | <text text-anchor="middle" x="836" y="-14.8" font-family="Times,serif" font-size="14.00">data=&#45;2.0000</text>
129 | <polyline fill="none" stroke="black" points="893,-0.5 893,-36.5 "/>
130 | <text text-anchor="middle" x="948.5" y="-14.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
131 | </g>
132 | <!-- node0x561d478f45b0&#45;&gt;node0x561d478f46b0* -->
133 | <g id="edge9" class="edge">
134 | <title>node0x561d478f45b0&#45;&gt;node0x561d478f46b0*</title>
135 | <path fill="none" stroke="black" d="M1004.05,-36.43C1013.49,-37.82 1022.46,-39.13 1030.48,-40.3"/>
136 | <polygon fill="black" stroke="black" points="1030.06,-43.78 1040.46,-41.76 1031.07,-36.85 1030.06,-43.78"/>
137 | </g>
138 | <!-- node0x561d478f46b0 -->
139 | <g id="node9" class="node">
140 | <title>node0x561d478f46b0</title>
141 | <polygon fill="none" stroke="black" points="1130,-27.5 1130,-63.5 1381,-63.5 1381,-27.5 1130,-27.5"/>
142 | <text text-anchor="middle" x="1143" y="-41.8" font-family="Times,serif" font-size="14.00">L</text>
143 | <polyline fill="none" stroke="black" points="1156,-27.5 1156,-63.5 "/>
144 | <text text-anchor="middle" x="1213" y="-41.8" font-family="Times,serif" font-size="14.00">data=&#45;8.0000</text>
145 | <polyline fill="none" stroke="black" points="1270,-27.5 1270,-63.5 "/>
146 | <text text-anchor="middle" x="1325.5" y="-41.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
147 | </g>
148 | <!-- node0x561d478f46b0*&#45;&gt;node0x561d478f46b0 -->
149 | <g id="edge3" class="edge">
150 | <title>node0x561d478f46b0*&#45;&gt;node0x561d478f46b0</title>
151 | <path fill="none" stroke="black" d="M1094.33,-45.5C1101.7,-45.5 1110.25,-45.5 1119.5,-45.5"/>
152 | <polygon fill="black" stroke="black" points="1119.77,-49 1129.77,-45.5 1119.77,-42 1119.77,-49"/>
153 | </g>
154 | </g>
155 | </svg>
156 | 


--------------------------------------------------------------------------------
/examples/mlp1.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "nn.h"
 4 | #include "graph.h"
 5 | 
 6 | using namespace ai;
 7 | 
 8 | int main(int argc, char *argv[])
 9 | {
10 |     // Define a neural net
11 |     MLP1<double, 3, 4, 4, 1> n;
12 | 
13 |     std::array<double, 3> input = {{ 2.0, 3.0, -1.0 }};
14 |     auto output = n(input);
15 | 
16 |     backward(output);
17 | 
18 |     std::cout << Graph(output) << std::endl;
19 | }
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/examples/neuron.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "graph.h"
 4 | 
 5 | using namespace ai;
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     // Inputs x1, x2
10 |     auto x1 = make_value(2.0, "x1");
11 |     auto x2 = make_value(0.0, "x2");
12 | 
13 |     // Weights w1, w2
14 |     auto w1 = make_value(-3.0, "w1");
15 |     auto w2 = make_value(1.0, "w2");
16 | 
17 |     // Bias of the neuron
18 |     auto b = make_value(6.8813735870195432, "b");
19 | 
20 |     auto x1w1 = expr(x1*w1, "x1*w1");
21 |     auto x2w2 = expr(x2*w2, "x2*w2");
22 | 
23 |     auto x1w1x2w2 = expr(x1w1 + x2w2, "x1w1+x2w2");
24 |     auto n = expr(x1w1x2w2 + b, "n");
25 | 
26 |     auto o = expr(tanh(n), "o");
27 | 
28 |     backward(o);
29 | 
30 |     std::cout << Graph(o) << std::endl;
31 | }
32 | 


--------------------------------------------------------------------------------
/examples/neuron.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  3 |  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  4 | <!-- Generated by graphviz version 2.43.0 (0)
  5 |  -->
  6 | <!-- Title: G Pages: 1 -->
  7 | <svg width="1895pt" height="210pt"
  8 |  viewBox="0.00 0.00 1894.69 210.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
  9 | <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 206)">
 10 | <title>G</title>
 11 | <polygon fill="white" stroke="transparent" points="-4,4 -4,-206 1890.69,-206 1890.69,4 -4,4"/>
 12 | <!-- node0x55fe2e941eb0 -->
 13 | <g id="node1" class="node">
 14 | <title>node0x55fe2e941eb0</title>
 15 | <polygon fill="none" stroke="black" points="1.5,-165.5 1.5,-201.5 261.5,-201.5 261.5,-165.5 1.5,-165.5"/>
 16 | <text text-anchor="middle" x="18.5" y="-179.8" font-family="Times,serif" font-size="14.00">x1</text>
 17 | <polyline fill="none" stroke="black" points="35.5,-165.5 35.5,-201.5 "/>
 18 | <text text-anchor="middle" x="90.5" y="-179.8" font-family="Times,serif" font-size="14.00">data=2.0000</text>
 19 | <polyline fill="none" stroke="black" points="145.5,-165.5 145.5,-201.5 "/>
 20 | <text text-anchor="middle" x="203.5" y="-179.8" font-family="Times,serif" font-size="14.00">grad=&#45;1.5000</text>
 21 | </g>
 22 | <!-- node0x55fe2e942430* -->
 23 | <g id="node7" class="node">
 24 | <title>node0x55fe2e942430*</title>
 25 | <ellipse fill="none" stroke="black" cx="326" cy="-128.5" rx="27" ry="18"/>
 26 | <text text-anchor="middle" x="326" y="-124.8" font-family="Times,serif" font-size="14.00">*</text>
 27 | </g>
 28 | <!-- node0x55fe2e941eb0&#45;&gt;node0x55fe2e942430* -->
 29 | <g id="edge6" class="edge">
 30 | <title>node0x55fe2e941eb0&#45;&gt;node0x55fe2e942430*</title>
 31 | <path fill="none" stroke="black" d="M231.44,-165.44C242.18,-162.77 252.88,-159.79 263,-156.5 273.73,-153.02 285.08,-148.24 295.09,-143.61"/>
 32 | <polygon fill="black" stroke="black" points="296.69,-146.72 304.22,-139.27 293.69,-140.4 296.69,-146.72"/>
 33 | </g>
 34 | <!-- node0x55fe2e941fb0 -->
 35 | <g id="node2" class="node">
 36 | <title>node0x55fe2e941fb0</title>
 37 | <polygon fill="none" stroke="black" points="4,-55.5 4,-91.5 259,-91.5 259,-55.5 4,-55.5"/>
 38 | <text text-anchor="middle" x="21" y="-69.8" font-family="Times,serif" font-size="14.00">x2</text>
 39 | <polyline fill="none" stroke="black" points="38,-55.5 38,-91.5 "/>
 40 | <text text-anchor="middle" x="93" y="-69.8" font-family="Times,serif" font-size="14.00">data=0.0000</text>
 41 | <polyline fill="none" stroke="black" points="148,-55.5 148,-91.5 "/>
 42 | <text text-anchor="middle" x="203.5" y="-69.8" font-family="Times,serif" font-size="14.00">grad=0.5000</text>
 43 | </g>
 44 | <!-- node0x55fe2e9425f0* -->
 45 | <g id="node9" class="node">
 46 | <title>node0x55fe2e9425f0*</title>
 47 | <ellipse fill="none" stroke="black" cx="326" cy="-73.5" rx="27" ry="18"/>
 48 | <text text-anchor="middle" x="326" y="-69.8" font-family="Times,serif" font-size="14.00">*</text>
 49 | </g>
 50 | <!-- node0x55fe2e941fb0&#45;&gt;node0x55fe2e9425f0* -->
 51 | <g id="edge7" class="edge">
 52 | <title>node0x55fe2e941fb0&#45;&gt;node0x55fe2e9425f0*</title>
 53 | <path fill="none" stroke="black" d="M259.02,-73.5C269.57,-73.5 279.57,-73.5 288.46,-73.5"/>
 54 | <polygon fill="black" stroke="black" points="288.7,-77 298.7,-73.5 288.7,-70 288.7,-77"/>
 55 | </g>
 56 | <!-- node0x55fe2e9420b0 -->
 57 | <g id="node3" class="node">
 58 | <title>node0x55fe2e9420b0</title>
 59 | <polygon fill="none" stroke="black" points="0,-110.5 0,-146.5 263,-146.5 263,-110.5 0,-110.5"/>
 60 | <text text-anchor="middle" x="19" y="-124.8" font-family="Times,serif" font-size="14.00">w1</text>
 61 | <polyline fill="none" stroke="black" points="38,-110.5 38,-146.5 "/>
 62 | <text text-anchor="middle" x="95" y="-124.8" font-family="Times,serif" font-size="14.00">data=&#45;3.0000</text>
 63 | <polyline fill="none" stroke="black" points="152,-110.5 152,-146.5 "/>
 64 | <text text-anchor="middle" x="207.5" y="-124.8" font-family="Times,serif" font-size="14.00">grad=1.0000</text>
 65 | </g>
 66 | <!-- node0x55fe2e9420b0&#45;&gt;node0x55fe2e942430* -->
 67 | <g id="edge8" class="edge">
 68 | <title>node0x55fe2e9420b0&#45;&gt;node0x55fe2e942430*</title>
 69 | <path fill="none" stroke="black" d="M263.03,-128.5C272.25,-128.5 280.99,-128.5 288.84,-128.5"/>
 70 | <polygon fill="black" stroke="black" points="288.95,-132 298.95,-128.5 288.95,-125 288.95,-132"/>
 71 | </g>
 72 | <!-- node0x55fe2e9421b0 -->
 73 | <g id="node4" class="node">
 74 | <title>node0x55fe2e9421b0</title>
 75 | <polygon fill="none" stroke="black" points="2,-0.5 2,-36.5 261,-36.5 261,-0.5 2,-0.5"/>
 76 | <text text-anchor="middle" x="21" y="-14.8" font-family="Times,serif" font-size="14.00">w2</text>
 77 | <polyline fill="none" stroke="black" points="40,-0.5 40,-36.5 "/>
 78 | <text text-anchor="middle" x="95" y="-14.8" font-family="Times,serif" font-size="14.00">data=1.0000</text>
 79 | <polyline fill="none" stroke="black" points="150,-0.5 150,-36.5 "/>
 80 | <text text-anchor="middle" x="205.5" y="-14.8" font-family="Times,serif" font-size="14.00">grad=0.0000</text>
 81 | </g>
 82 | <!-- node0x55fe2e9421b0&#45;&gt;node0x55fe2e9425f0* -->
 83 | <g id="edge9" class="edge">
 84 | <title>node0x55fe2e9421b0&#45;&gt;node0x55fe2e9425f0*</title>
 85 | <path fill="none" stroke="black" d="M227.53,-36.56C239.57,-39.53 251.64,-42.85 263,-46.5 273.48,-49.87 284.6,-54.39 294.48,-58.77"/>
 86 | <polygon fill="black" stroke="black" points="293.33,-62.09 303.89,-63.05 296.24,-55.72 293.33,-62.09"/>
 87 | </g>
 88 | <!-- node0x55fe2e9422b0 -->
 89 | <g id="node5" class="node">
 90 | <title>node0x55fe2e9422b0</title>
 91 | <polygon fill="none" stroke="black" points="843.5,-27.5 843.5,-63.5 1090.5,-63.5 1090.5,-27.5 843.5,-27.5"/>
 92 | <text text-anchor="middle" x="856.5" y="-41.8" font-family="Times,serif" font-size="14.00">b</text>
 93 | <polyline fill="none" stroke="black" points="869.5,-27.5 869.5,-63.5 "/>
 94 | <text text-anchor="middle" x="924.5" y="-41.8" font-family="Times,serif" font-size="14.00">data=6.8814</text>
 95 | <polyline fill="none" stroke="black" points="979.5,-27.5 979.5,-63.5 "/>
 96 | <text text-anchor="middle" x="1035" y="-41.8" font-family="Times,serif" font-size="14.00">grad=0.5000</text>
 97 | </g>
 98 | <!-- node0x55fe2e942970+ -->
 99 | <g id="node13" class="node">
100 | <title>node0x55fe2e942970+</title>
101 | <ellipse fill="none" stroke="black" cx="1195" cy="-72.5" rx="27" ry="18"/>
102 | <text text-anchor="middle" x="1195" y="-68.8" font-family="Times,serif" font-size="14.00">+</text>
103 | </g>
104 | <!-- node0x55fe2e9422b0&#45;&gt;node0x55fe2e942970+ -->
105 | <g id="edge10" class="edge">
106 | <title>node0x55fe2e9422b0&#45;&gt;node0x55fe2e942970+</title>
107 | <path fill="none" stroke="black" d="M1090.59,-60.15C1115.34,-63.1 1139.47,-65.99 1158.19,-68.22"/>
108 | <polygon fill="black" stroke="black" points="1157.93,-71.72 1168.28,-69.43 1158.76,-64.77 1157.93,-71.72"/>
109 | </g>
110 | <!-- node0x55fe2e942430 -->
111 | <g id="node6" class="node">
112 | <title>node0x55fe2e942430</title>
113 | <polygon fill="none" stroke="black" points="389,-110.5 389,-146.5 676,-146.5 676,-110.5 389,-110.5"/>
114 | <text text-anchor="middle" x="420" y="-124.8" font-family="Times,serif" font-size="14.00">x1*w1</text>
115 | <polyline fill="none" stroke="black" points="451,-110.5 451,-146.5 "/>
116 | <text text-anchor="middle" x="508" y="-124.8" font-family="Times,serif" font-size="14.00">data=&#45;6.0000</text>
117 | <polyline fill="none" stroke="black" points="565,-110.5 565,-146.5 "/>
118 | <text text-anchor="middle" x="620.5" y="-124.8" font-family="Times,serif" font-size="14.00">grad=0.5000</text>
119 | </g>
120 | <!-- node0x55fe2e9427b0+ -->
121 | <g id="node11" class="node">
122 | <title>node0x55fe2e9427b0+</title>
123 | <ellipse fill="none" stroke="black" cx="739" cy="-100.5" rx="27" ry="18"/>
124 | <text text-anchor="middle" x="739" y="-96.8" font-family="Times,serif" font-size="14.00">+</text>
125 | </g>
126 | <!-- node0x55fe2e942430&#45;&gt;node0x55fe2e9427b0+ -->
127 | <g id="edge11" class="edge">
128 | <title>node0x55fe2e942430&#45;&gt;node0x55fe2e9427b0+</title>
129 | <path fill="none" stroke="black" d="M665.04,-110.49C678.55,-108.64 691.32,-106.89 702.28,-105.39"/>
130 | <polygon fill="black" stroke="black" points="702.78,-108.86 712.21,-104.03 701.83,-101.92 702.78,-108.86"/>
131 | </g>
132 | <!-- node0x55fe2e942430*&#45;&gt;node0x55fe2e942430 -->
133 | <g id="edge1" class="edge">
134 | <title>node0x55fe2e942430*&#45;&gt;node0x55fe2e942430</title>
135 | <path fill="none" stroke="black" d="M353.32,-128.5C360.76,-128.5 369.44,-128.5 378.88,-128.5"/>
136 | <polygon fill="black" stroke="black" points="378.97,-132 388.97,-128.5 378.97,-125 378.97,-132"/>
137 | </g>
138 | <!-- node0x55fe2e9425f0 -->
139 | <g id="node8" class="node">
140 | <title>node0x55fe2e9425f0</title>
141 | <polygon fill="none" stroke="black" points="391,-55.5 391,-91.5 674,-91.5 674,-55.5 391,-55.5"/>
142 | <text text-anchor="middle" x="422" y="-69.8" font-family="Times,serif" font-size="14.00">x2*w2</text>
143 | <polyline fill="none" stroke="black" points="453,-55.5 453,-91.5 "/>
144 | <text text-anchor="middle" x="508" y="-69.8" font-family="Times,serif" font-size="14.00">data=0.0000</text>
145 | <polyline fill="none" stroke="black" points="563,-55.5 563,-91.5 "/>
146 | <text text-anchor="middle" x="618.5" y="-69.8" font-family="Times,serif" font-size="14.00">grad=0.5000</text>
147 | </g>
148 | <!-- node0x55fe2e9425f0&#45;&gt;node0x55fe2e9427b0+ -->
149 | <g id="edge12" class="edge">
150 | <title>node0x55fe2e9425f0&#45;&gt;node0x55fe2e9427b0+</title>
151 | <path fill="none" stroke="black" d="M669.9,-91.51C681.51,-93.04 692.45,-94.49 702.03,-95.75"/>
152 | <polygon fill="black" stroke="black" points="701.85,-99.26 712.23,-97.1 702.77,-92.32 701.85,-99.26"/>
153 | </g>
154 | <!-- node0x55fe2e9425f0*&#45;&gt;node0x55fe2e9425f0 -->
155 | <g id="edge2" class="edge">
156 | <title>node0x55fe2e9425f0*&#45;&gt;node0x55fe2e9425f0</title>
157 | <path fill="none" stroke="black" d="M353.32,-73.5C361.16,-73.5 370.38,-73.5 380.41,-73.5"/>
158 | <polygon fill="black" stroke="black" points="380.7,-77 390.7,-73.5 380.7,-70 380.7,-77"/>
159 | </g>
160 | <!-- node0x55fe2e9427b0 -->
161 | <g id="node10" class="node">
162 | <title>node0x55fe2e9427b0</title>
163 | <polygon fill="none" stroke="black" points="802,-82.5 802,-118.5 1132,-118.5 1132,-82.5 802,-82.5"/>
164 | <text text-anchor="middle" x="854.5" y="-96.8" font-family="Times,serif" font-size="14.00">x1w1+x2w2</text>
165 | <polyline fill="none" stroke="black" points="907,-82.5 907,-118.5 "/>
166 | <text text-anchor="middle" x="964" y="-96.8" font-family="Times,serif" font-size="14.00">data=&#45;6.0000</text>
167 | <polyline fill="none" stroke="black" points="1021,-82.5 1021,-118.5 "/>
168 | <text text-anchor="middle" x="1076.5" y="-96.8" font-family="Times,serif" font-size="14.00">grad=0.5000</text>
169 | </g>
170 | <!-- node0x55fe2e9427b0&#45;&gt;node0x55fe2e942970+ -->
171 | <g id="edge13" class="edge">
172 | <title>node0x55fe2e9427b0&#45;&gt;node0x55fe2e942970+</title>
173 | <path fill="none" stroke="black" d="M1113.37,-82.49C1129.83,-80.45 1145.3,-78.53 1158.2,-76.94"/>
174 | <polygon fill="black" stroke="black" points="1158.9,-80.38 1168.39,-75.67 1158.04,-73.43 1158.9,-80.38"/>
175 | </g>
176 | <!-- node0x55fe2e9427b0+&#45;&gt;node0x55fe2e9427b0 -->
177 | <g id="edge3" class="edge">
178 | <title>node0x55fe2e9427b0+&#45;&gt;node0x55fe2e9427b0</title>
179 | <path fill="none" stroke="black" d="M766.37,-100.5C773.71,-100.5 782.28,-100.5 791.65,-100.5"/>
180 | <polygon fill="black" stroke="black" points="791.7,-104 801.7,-100.5 791.7,-97 791.7,-104"/>
181 | </g>
182 | <!-- node0x55fe2e942970 -->
183 | <g id="node12" class="node">
184 | <title>node0x55fe2e942970</title>
185 | <polygon fill="none" stroke="black" points="1258,-54.5 1258,-90.5 1505,-90.5 1505,-54.5 1258,-54.5"/>
186 | <text text-anchor="middle" x="1271" y="-68.8" font-family="Times,serif" font-size="14.00">n</text>
187 | <polyline fill="none" stroke="black" points="1284,-54.5 1284,-90.5 "/>
188 | <text text-anchor="middle" x="1339" y="-68.8" font-family="Times,serif" font-size="14.00">data=0.8814</text>
189 | <polyline fill="none" stroke="black" points="1394,-54.5 1394,-90.5 "/>
190 | <text text-anchor="middle" x="1449.5" y="-68.8" font-family="Times,serif" font-size="14.00">grad=0.5000</text>
191 | </g>
192 | <!-- node0x55fe2e942b30tanh -->
193 | <g id="node15" class="node">
194 | <title>node0x55fe2e942b30tanh</title>
195 | <ellipse fill="none" stroke="black" cx="1572.85" cy="-72.5" rx="31.7" ry="18"/>
196 | <text text-anchor="middle" x="1572.85" y="-68.8" font-family="Times,serif" font-size="14.00">tanh</text>
197 | </g>
198 | <!-- node0x55fe2e942970&#45;&gt;node0x55fe2e942b30tanh -->
199 | <g id="edge14" class="edge">
200 | <title>node0x55fe2e942970&#45;&gt;node0x55fe2e942b30tanh</title>
201 | <path fill="none" stroke="black" d="M1505.09,-72.5C1514.08,-72.5 1522.71,-72.5 1530.6,-72.5"/>
202 | <polygon fill="black" stroke="black" points="1530.87,-76 1540.87,-72.5 1530.87,-69 1530.87,-76"/>
203 | </g>
204 | <!-- node0x55fe2e942970+&#45;&gt;node0x55fe2e942970 -->
205 | <g id="edge4" class="edge">
206 | <title>node0x55fe2e942970+&#45;&gt;node0x55fe2e942970</title>
207 | <path fill="none" stroke="black" d="M1222.04,-72.5C1229.56,-72.5 1238.32,-72.5 1247.79,-72.5"/>
208 | <polygon fill="black" stroke="black" points="1247.89,-76 1257.89,-72.5 1247.89,-69 1247.89,-76"/>
209 | </g>
210 | <!-- node0x55fe2e942b30 -->
211 | <g id="node14" class="node">
212 | <title>node0x55fe2e942b30</title>
213 | <polygon fill="none" stroke="black" points="1640.69,-54.5 1640.69,-90.5 1886.69,-90.5 1886.69,-54.5 1640.69,-54.5"/>
214 | <text text-anchor="middle" x="1653.19" y="-68.8" font-family="Times,serif" font-size="14.00">o</text>
215 | <polyline fill="none" stroke="black" points="1665.69,-54.5 1665.69,-90.5 "/>
216 | <text text-anchor="middle" x="1720.69" y="-68.8" font-family="Times,serif" font-size="14.00">data=0.7071</text>
217 | <polyline fill="none" stroke="black" points="1775.69,-54.5 1775.69,-90.5 "/>
218 | <text text-anchor="middle" x="1831.19" y="-68.8" font-family="Times,serif" font-size="14.00">grad=1.0000</text>
219 | </g>
220 | <!-- node0x55fe2e942b30tanh&#45;&gt;node0x55fe2e942b30 -->
221 | <g id="edge5" class="edge">
222 | <title>node0x55fe2e942b30tanh&#45;&gt;node0x55fe2e942b30</title>
223 | <path fill="none" stroke="black" d="M1605.03,-72.5C1612.65,-72.5 1621.28,-72.5 1630.49,-72.5"/>
224 | <polygon fill="black" stroke="black" points="1630.68,-76 1640.68,-72.5 1630.68,-69 1630.68,-76"/>
225 | </g>
226 | </g>
227 | </svg>
228 | 


--------------------------------------------------------------------------------
/examples/regression0.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "backprop.h"
 4 | #include "nn.h"
 5 | #include "graph.h"
 6 | 
 7 | using namespace ai;
 8 | 
 9 | class Regression0 {
10 |     public:
11 |         Regression0()
12 |             : weight_(randomValue<double>())
13 |         {}
14 | 
15 |         Value<double> weight() const {
16 |             return weight_;
17 |         }
18 | 
19 |         Value<double> operator()(const Value<double>& x) const {
20 |             return weight_ * x;
21 |         }
22 | 
23 |         Value<double> operator()(const double& x) const {
24 |             return this->operator()(make_value(x));
25 |         }
26 | 
27 |         void adjust(const double& learning_rate) {
28 |             weight_->adjust(learning_rate);
29 |         }
30 | 
31 |     private:
32 |         Value<double> weight_;
33 | };
34 | 
35 | static inline std::ostream& operator<<(std::ostream& os, const Regression0& r)
36 | {
37 |     return os << "Regression0{weight=" << r.weight() << "}";
38 | }
39 | 
40 | int main(int argc, char *argv[])
41 | {
42 |     Regression0 n;
43 | 
44 |     std::cerr << n << std::endl;
45 | 
46 |     std::array<double, 4> input = {
47 |         {-7.0, -3.0, 1.0, 4.0},
48 |     };
49 | 
50 |     std::array<double, 4> y = {-21.0, -9.0, 3.0, 12.0};
51 |     std::cerr << "y (gt):\t" << PrettyArray(y) << std::endl;
52 | 
53 |     // Run backprop
54 |     double learning_rate = 0.01;
55 | 
56 |     auto backprop = BackProp<double, double, 4>(n, "loss.tsv");
57 | 
58 |     double loss = backprop(input, y, learning_rate, 40, true);
59 | 
60 |     std::cout << Graph(backprop.loss_function()(input, y)) << std::endl;
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/include/array.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <iostream>
 4 | #include <array>
 5 | 
 6 | template<typename T, std::size_t N>
 7 | class PrettyArray : public std::array<T, N> {
 8 | public:
 9 |     PrettyArray() = default;
10 |     PrettyArray(const std::array<T, N>& arr) : std::array<T, N>(arr) {}
11 | 
12 |     friend std::ostream& operator<<(std::ostream& os, const PrettyArray& arr) {
13 |         os << "[\n";
14 |         for (const auto& elem : arr) {
15 |             os << '\t' << elem << '\n';
16 |         }
17 |         os << ']';
18 |         return os;
19 |     }
20 | };
21 | 


--------------------------------------------------------------------------------
/include/backprop.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <concepts>
 4 | #include <functional>
 5 | #include <iostream>
 6 | #include <fstream>
 7 | 
 8 | #include "loss.h"
 9 | 
10 | namespace ai {
11 | 
12 | template <typename F, typename T, typename Arg>
13 | concept CanBackProp = requires(F f, Arg arg, T learning_rate) {
14 |     { f(arg) } -> std::convertible_to<Value<T>>;
15 |     { f.adjust(learning_rate) } -> std::convertible_to<void>;
16 | };
17 | 
18 | 
19 | template<typename T, typename Arg, size_t N, typename F>
20 | class BackPropImpl {
21 |     public:
22 |         BackPropImpl(const F& func, const std::string& loss_path)
23 |             : func_(func), loss_output_(loss_path)
24 |         {
25 |         }
26 | 
27 |         MSELoss<T, N, Arg> loss_function() const {
28 |             return MSELoss<T, N, Arg>(func_);
29 |         }
30 | 
31 |         T operator()(std::array<Arg, N>& input, const std::array<T, N>& ground_truth,
32 |                 T learning_rate, int iterations, bool verbose=false)
33 |         {
34 |             auto loss_f = loss_function();
35 |             T result;
36 | 
37 |             for (int i=0; i < iterations; ++i) {
38 |                 Value<T> loss = loss_f(input, ground_truth, verbose);
39 | 
40 |                 result = loss->data();
41 |                 loss_output_ << iter_ << '\t' << result << '\n';
42 | 
43 |                 if (verbose) {
44 |                     std::cerr << "Loss (" << iter_ << "):\t" << result << std::endl;
45 |                 }
46 | 
47 |                 backward(loss);
48 | 
49 |                 func_.adjust(learning_rate);
50 | 
51 |                 ++iter_;
52 |             }
53 | 
54 |             return result;
55 |         }
56 | 
57 |     private:
58 |         F func_;
59 |         std::ofstream loss_output_;
60 |         int iter_{0};
61 | };
62 | 
63 | template<typename T, typename Arg, size_t N, typename F>
64 | requires CanBackProp<F, T, Arg>
65 | auto BackProp(const F& func, const std::string& loss_path)
66 | {
67 |     return BackPropImpl<T, Arg, N, F>(func, loss_path);
68 | }
69 | 
70 | } // namespace ai
71 | 


--------------------------------------------------------------------------------
/include/graph.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <iomanip>
  4 | #include <utility>
  5 | 
  6 | #include "value.h"
  7 | 
  8 | namespace ai {
  9 | 
 10 | template <typename T>
 11 | class Trace {
 12 |     public:
 13 |         Trace(const Value<T>& root)
 14 |         {
 15 |             build(root);
 16 |         }
 17 | 
 18 |         const std::set<RawValue<T>*>& nodes() const {
 19 |             return nodes_;
 20 |         }
 21 | 
 22 |         const std::set<std::pair<RawValue<T>*, RawValue<T>*>> edges() const {
 23 |             return edges_;
 24 |         }
 25 | 
 26 |     private:
 27 |         void build(const Value<T>& v) {
 28 |             if (!nodes_.contains(v.get())) {
 29 |                 nodes_.insert(v.get());
 30 |                 for (auto && c : v->children()) {
 31 |                     edges_.insert({c.get(), v.get()});
 32 |                     build(c);
 33 |                 }
 34 |             }
 35 |         }
 36 | 
 37 |     private:
 38 |         std::set<RawValue<T>*> nodes_{};
 39 |         std::set<std::pair<RawValue<T>*, RawValue<T>*>> edges_{};
 40 | };
 41 | 
 42 | template <typename T>
 43 | class NodeName {
 44 |     public:
 45 |         NodeName(const RawValue<T>* ptr)
 46 |             : ptr_(ptr)
 47 |         {}
 48 | 
 49 |         const RawValue<T>* get() const {
 50 |             return ptr_;
 51 |         }
 52 | 
 53 |     private:
 54 |         const RawValue<T>* ptr_;
 55 | };
 56 | 
 57 | template <typename T>
 58 | static inline std::ostream& operator<<(std::ostream& os, const NodeName<T>& node) {
 59 |     return os << "\"node" << node.get() << "\"";
 60 | }
 61 | 
 62 | template <typename T>
 63 | class NodeOp {
 64 |     public:
 65 |         NodeOp(const RawValue<T>* ptr)
 66 |             : ptr_(ptr)
 67 |         {}
 68 | 
 69 |         const RawValue<T>* get() const {
 70 |             return ptr_;
 71 |         }
 72 | 
 73 |     private:
 74 |         const RawValue<T>* ptr_;
 75 | };
 76 | 
 77 | template <typename T>
 78 | static inline std::ostream& operator<<(std::ostream& os, const NodeOp<T>& node) {
 79 |     return os << "\"node" << node.get() << node.get()->op() << "\"";
 80 | }
 81 | 
 82 | template <typename T>
 83 | class Graph {
 84 |     public:
 85 |         Graph(const std::shared_ptr<RawValue<T>>& root)
 86 |             : trace_(root)
 87 |         {
 88 |         }
 89 | 
 90 |         std::ostream& dump(std::ostream& os) const {
 91 |             auto old_precision = os.precision();
 92 | 
 93 |             os << "digraph G {\n"
 94 |                << "  rankdir = \"LR\";"
 95 |                << std::endl;
 96 | 
 97 |             os << std::fixed << std::setprecision(4);
 98 | 
 99 |             for (const RawValue<T>* node : trace_.nodes()) {
100 |                 // For any value in the graph, create a rectangular ("record") node
101 |                 // for it
102 |                 os << "  " << NodeName<T>(node)
103 |                    << " [label = \"{ " << node->label()
104 |                    << " | data=" << node->data()
105 |                    << " | grad=" << node->grad()
106 |                    << " }\", shape=\"record\"]"
107 |                    << std::endl;
108 | 
109 |                 if (!node->op().empty()) {
110 |                     // If this value is the result of an operation, create
111 |                     // an op node for it
112 |                     os << "  " << NodeOp<T>(node)
113 |                        << " [label = \"" << node->op() << "\"]"
114 |                        << std::endl;
115 | 
116 |                     // And connect the op to it
117 |                     os << "  " << NodeOp<T>(node)
118 |                        << " -> " << NodeName<T>(node) << ";"
119 |                        << std::endl;
120 |                 }
121 |             }
122 | 
123 |             // Edges
124 |             for (auto && [n1, n2] : trace_.edges()) {
125 |                 // Connect n1 to the op node of n2
126 |                 os << "  " << NodeName<T>(n1) << " -> " << NodeOp<T>(n2) << ";" << std::endl;
127 |             }
128 | 
129 |             os << "}" << std::endl;
130 | 
131 |             os << std::setprecision(old_precision);
132 | 
133 |             return os;
134 |         }
135 | 
136 |     private:
137 |         Trace<T> trace_;
138 | };
139 | 
140 | template <typename T>
141 | static inline std::ostream& operator<<(std::ostream& os, const Graph<T>& graph) {
142 |     return graph.dump(os);
143 | }
144 | 
145 | }
146 | 


--------------------------------------------------------------------------------
/include/loss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <array>
 4 | #include <cmath>
 5 | #include <numeric>
 6 | 
 7 | #include "value.h"
 8 | 
 9 | namespace ai {
10 | 
11 | // Template function for double
12 | template <typename T>
13 | double mse_loss(const T& predicted, const T& ground_truth) {
14 |     static_assert(std::is_arithmetic<T>::value, "Type must be arithmetic");
15 |     return std::pow(predicted - ground_truth, 2);
16 | }
17 | 
18 | // Overloaded function for Value<double>
19 | template <typename T>
20 | Value<T> mse_loss(const Value<T>& predicted, const T& ground_truth) {
21 |     static_assert(std::is_arithmetic<T>::value, "Type must be arithmetic");
22 |     return pow(predicted - ground_truth, 2);
23 | }
24 | 
25 | template <typename T>
26 | Value<T> mse_loss(const Value<T>& predicted, const Value<T>& ground_truth) {
27 |     static_assert(std::is_arithmetic<T>::value, "Type must be arithmetic");
28 |     return pow(predicted - ground_truth, 2);
29 | }
30 | 
31 | // Wrapper function for containers
32 | template <template <typename...> class Container, typename T>
33 | double mse_loss(const Container<T>& predicted, const Container<T>& ground_truth) {
34 |     static_assert(std::is_arithmetic<T>::value || std::is_same<Value<T>, T>::data_, "Type must be arithmetic or Value<T>");
35 | 
36 |     if(predicted.size() != ground_truth.size()) {
37 |         throw std::invalid_argument("Size mismatch between predicted and ground truth data");
38 |     }
39 | 
40 |     double loss = 0.0;
41 |     auto truth_it = ground_truth.begin();
42 | 
43 |     for (const auto& pred : predicted) {
44 |         loss += mse_loss(pred, *truth_it);
45 |         ++truth_it;
46 |     }
47 | 
48 |     return loss / predicted.size();
49 | }
50 | 
51 | template<typename T, size_t N>
52 | T mse_loss(const std::array<T, N>& predictions, const std::array<T, N>& ground_truth) {
53 |     T sum_squared_error = std::inner_product(predictions.begin(), predictions.end(), ground_truth.begin(), T(0),
54 |         std::plus<>(),
55 |         [](T pred, T truth) { return std::pow(pred - truth, 2); }
56 |     );
57 |     return sum_squared_error / static_cast<T>(N);
58 | }
59 | 
60 | template<typename T, size_t N>
61 | Value<T> mse_loss(const std::array<Value<T>, N>& predictions, const std::array<T, N>& ground_truth) {
62 |     Value<T> sum_squared_error = std::inner_product(predictions.begin(), predictions.end(), ground_truth.begin(), make_value<T>(0),
63 |         std::plus<>(),
64 |         [](Value<T> pred, T truth) { return pow(pred - truth, 2); }
65 |     );
66 |     return sum_squared_error / make_value<T>(N);
67 | }
68 | 
69 | template<typename T, size_t N, typename Arg>
70 | class MSELoss {
71 |     public:
72 |         MSELoss(const std::function<Value<T>(const Arg&)>& func)
73 |             : func_(func)
74 |         {
75 |             for (size_t i = 0; i < N; ++i) {
76 |                 predictions_[i] = RawValue<T>::make(0.0);
77 |             }
78 |         }
79 | 
80 |         Value<T> operator()(std::array<Arg, N>& input, const std::array<T, N>& ground_truth, bool verbose=false) {
81 |             if (verbose) std::cerr << "Predictions: ";
82 |             for (size_t i = 0; i < N; ++i) {
83 |                 predictions_[i] = func_(input[i]);
84 |                 if (verbose) std::cerr << predictions_[i]->data() << " ";
85 |             }
86 |             if (verbose) std::cerr << '\n';
87 |             return mse_loss(predictions_, ground_truth);
88 |         }
89 | 
90 |     private:
91 |         const std::function<Value<T>(const Arg&)> func_;
92 |         std::array<Value<T>, N> predictions_;
93 | };
94 | 
95 | };
96 | 


--------------------------------------------------------------------------------
/include/mac.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <iostream>
 4 | #include <vector>
 5 | #include <array>
 6 | #include <numeric>
 7 | #include <execution>
 8 | #include <type_traits>
 9 | 
10 | template <template <typename, typename...> class C, typename T, typename... Args>
11 | //requires requires (C<T, Args...> c) { c.begin(); c.end(); }
12 | T mac(const C<T, Args...>& a, const C<T, Args...>& b, T init = T{}) {
13 |     //static_assert(std::is_arithmetic_v<T>, "Container must hold an arithmetic type");
14 |     if (a.size() != b.size()) {
15 |         throw std::invalid_argument("Both containers must have the same size.");
16 |     }
17 | 
18 |     return std::transform_reduce(
19 |         std::execution::par_unseq, // Use parallel and vectorized execution
20 |         a.begin(), a.end(), // Range of first vector
21 |         b.begin(), // Range of second vector
22 |         init, //static_cast<T>(0), // Initial value
23 |         std::plus<>(), // Accumulate
24 |         std::multiplies<>() // Multiply
25 |     );
26 | }
27 | 
28 | // Specialization for std::array
29 | template <typename T, std::size_t N>
30 | T mac(const std::array<T, N>& a, const std::array<T, N>& b, T init = T{}) {
31 |     //static_assert(std::is_arithmetic_v<T>, "Container must hold an arithmetic type");
32 | 
33 |     return std::transform_reduce(
34 |         std::execution::par_unseq, // Use parallel and vectorized execution
35 |         a.begin(), a.end(), // Range of first vector
36 |         b.begin(), // Range of second vector
37 |         init, //static_cast<T>(0), // Initial value
38 |         std::plus<>(), // Accumulate
39 |         std::multiplies<>() // Multiply
40 |     );
41 | }
42 | 


--------------------------------------------------------------------------------
/include/nn.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cmath>
  4 | 
  5 | #include "array.h"
  6 | #include "tuple.h"
  7 | #include "mac.h"
  8 | #include "randomdata.h"
  9 | #include "value.h"
 10 | 
 11 | namespace ai {
 12 | 
 13 | template <typename T, size_t Nin>
 14 | class Neuron {
 15 |     public:
 16 |         Neuron()
 17 |             : weights_(randomArray<T, Nin>()), bias_(randomValue<T>())
 18 |         {
 19 |         }
 20 | 
 21 |         Value<T> operator()(const std::array<Value<T>, Nin>& x) const {
 22 |             Value<T> y = mac(weights_, x, bias_);
 23 |             return expr(tanh(y), "n");
 24 |         }
 25 | 
 26 |         const std::array<Value<T>, Nin>& weights() const {
 27 |             return weights_;
 28 |         }
 29 | 
 30 |         Value<T> bias() const {
 31 |             return bias_;
 32 |         }
 33 | 
 34 |         void adjust_weights(const T& learning_rate) {
 35 |             for (const auto& w : weights_) {
 36 |                 w->adjust(learning_rate);
 37 |             }
 38 |         }
 39 | 
 40 |         void adjust_bias(const T& learning_rate) {
 41 |             bias_->adjust(learning_rate);
 42 |         }
 43 | 
 44 |         void adjust(const T& learning_rate) {
 45 |             adjust_weights(learning_rate);
 46 |             adjust_bias(learning_rate);
 47 |         }
 48 | 
 49 |     private:
 50 |         std::array<Value<T>, Nin> weights_{};
 51 |         Value<T> bias_{};
 52 | };
 53 | 
 54 | template <typename T, size_t Nin>
 55 | static inline std::ostream& operator<<(std::ostream& os, const Neuron<T, Nin>& n) {
 56 |     return os << "Neuron(" << PrettyArray(n.weights()) << "  bias=" << n.bias() << ")";
 57 | }
 58 | 
 59 | template <typename T, size_t Nin, size_t Nout>
 60 | class Layer {
 61 |     public:
 62 |         std::array<Value<T>, Nout> operator()(const std::array<Value<T>, Nin>& x) const {
 63 |             std::array<Value<T>, Nout> output{};
 64 |             std::transform(std::execution::par_unseq, neurons_.begin(), neurons_.end(),
 65 |                     output.begin(), [&](const auto& n) { return n(x); });
 66 |             return output;
 67 |         }
 68 | 
 69 |         const std::array<Neuron<T, Nin>, Nout> neurons() const {
 70 |             return neurons_;
 71 |         }
 72 | 
 73 |         void adjust(const T& learning_rate) {
 74 |             for (auto & n : neurons_) {
 75 |                 n.adjust(learning_rate);
 76 |             }
 77 |         }
 78 | 
 79 |     private:
 80 |         std::array<Neuron<T, Nin>, Nout> neurons_{};
 81 | };
 82 | 
 83 | template <typename T, size_t Nin, size_t Nout>
 84 | static inline std::ostream& operator<<(std::ostream& os, const Layer<T, Nin, Nout>& l) {
 85 |     return os << "Layer(" << PrettyArray(l.neurons()) << ")";
 86 | }
 87 | 
 88 | template <typename T, size_t Nin, size_t... Nouts>
 89 | struct BuildLayers;
 90 | 
 91 | template <typename T, size_t Nin, size_t First, size_t... Rest>
 92 | struct BuildLayers<T, Nin, First, Rest...> {
 93 |     using type = decltype(std::tuple_cat(
 94 |         std::tuple<Layer<T, Nin, First>>{},
 95 |         typename BuildLayers<T, First, Rest...>::type{}
 96 |     ));
 97 |     static constexpr size_t nout = BuildLayers<T, First, Rest...>::nout;
 98 | };
 99 | 
100 | template <typename T, size_t Nin, size_t Last>
101 | struct BuildLayers<T, Nin, Last> {
102 |     using type = std::tuple<Layer<T, Nin, Last>>;
103 |     static constexpr size_t nout = Last;
104 | };
105 | 
106 | template <typename T, size_t Nin, size_t... Nouts>
107 | using Layers = typename BuildLayers<T, Nin, Nouts...>::type;
108 | 
109 | template <typename T, size_t Nin, size_t... Nouts>
110 | static constexpr size_t LayersNout = BuildLayers<T, Nin, Nouts...>::nout;
111 | 
112 | template<typename T, typename... Ls>
113 | void layers_adjust(std::tuple<Ls...>& layers, const T& learning_rate) {
114 |     std::apply([&learning_rate](auto&... layer) {
115 |         // Use fold expression to call adjust on each layer
116 |         (..., layer.adjust(learning_rate));
117 |     }, layers);
118 | }
119 | 
120 | template <typename T, size_t Nin, size_t... Nouts>
121 | class MLP {
122 | public:
123 |     static constexpr size_t Nout = LayersNout<T, Nin, Nouts...>;
124 | 
125 |     const Layers<T, Nin, Nouts...>& layers() const
126 |     {
127 |         return layers_;
128 |     };
129 | 
130 |     std::array<Value<T>, Nout> operator()(const std::array<Value<T>, Nin>& input) const {
131 |         return forward<0, Nin, Nouts...>(input);
132 |     }
133 | 
134 |     std::array<Value<T>, Nout> operator()(const std::array<T, Nin>& input) const {
135 |         return this->operator()(value_array(input));
136 |     }
137 | 
138 |     void adjust(const T& learning_rate) {
139 |         layers_adjust(layers_, learning_rate);
140 |     }
141 | 
142 | private:
143 |     template <size_t I, size_t NinCurr, size_t NoutCurr, size_t... NoutsRest>
144 |     auto forward(const std::array<Value<T>, NinCurr>& input) const -> decltype(auto) {
145 |         auto & p = std::get<I>(layers_);
146 |         auto output = std::get<I>(layers_)(input);
147 |         if constexpr (sizeof...(NoutsRest) > 0) {
148 |             return forward<I + 1, NoutCurr, NoutsRest...>(output);
149 |         } else {
150 |             return output;
151 |         }
152 |     }
153 | 
154 | private:
155 |     Layers<T, Nin, Nouts...> layers_;
156 | 
157 | };
158 | 
159 | template <typename T, size_t Nin, size_t... Nouts>
160 | static inline std::ostream& operator<<(std::ostream& os, const MLP<T, Nin, Nouts...>& mlp) {
161 |     os << "MLP";
162 |     print_args<Nin, Nouts...>(os);
163 |     os << ": ";
164 |     print_tuple(os, mlp.layers());
165 |     os << ")";
166 |     return os;
167 | }
168 | 
169 | // MLP that only considers the first output
170 | template <typename T, size_t Nin, size_t... Nouts>
171 | class MLP1 : public MLP<T, Nin, Nouts...>
172 | {
173 |     public:
174 |         MLP1()
175 |             : MLP<T, Nin, Nouts...>()
176 |         {}
177 | 
178 |         Value<T> operator()(const std::array<Value<T>, Nin>& input) {
179 |             return MLP<T, Nin, Nouts...>::operator()(input)[0];
180 |         }
181 | 
182 |         Value<T> operator()(const std::array<T, Nin>& input) {
183 |             return MLP<T, Nin, Nouts...>::operator()(input)[0];
184 |         }
185 | };
186 | 
187 | }
188 | 


--------------------------------------------------------------------------------
/include/randomdata.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <random>
 4 | #include <array>
 5 | #include <iostream>
 6 | #include <type_traits>
 7 | 
 8 | #include "value.h"
 9 | 
10 | namespace ai {
11 | 
12 | // Static inline function to generate a random T
13 | template <typename T>
14 | static inline Value<T> randomValue() {
15 |     static unsigned int seed = 42;
16 |     static thread_local std::mt19937 gen(seed++);
17 |     std::uniform_real_distribution<T> dist(-1.0, 1.0);
18 |     seed = gen(); // update seed for next time
19 |     return make_value(dist(gen));
20 | }
21 | 
22 | // Static inline function to generate a random std::array<T, N>
23 | template <typename T, size_t N>
24 | static inline std::array<Value<T>, N> randomArray() {
25 |     std::array<Value<T>, N> arr;
26 |     for (auto& element : arr) {
27 |         element = randomValue<T>();
28 |     }
29 |     return arr;
30 | }
31 | 
32 | } // namespace ai
33 | 


--------------------------------------------------------------------------------
/include/tuple.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <tuple>
 4 | 
 5 | template <std::size_t N, size_t... Ns>
 6 | void print_args_impl(std::ostream& os) {
 7 |     os << N;
 8 |     ((os << ',' << Ns), ...);
 9 | }
10 | 
11 | template <std::size_t... Ns>
12 | void print_args(std::ostream& os) {
13 |     os << "<";
14 |     if constexpr (sizeof...(Ns) > 0) {
15 |         print_args_impl<Ns...>(os);
16 |     }
17 |     os << ">";
18 | }
19 | 
20 | template<std::size_t I = 0, typename... Tp>
21 | inline typename std::enable_if<I == sizeof...(Tp), std::ostream&>::type
22 | print_tuple(std::ostream& os, const std::tuple<Tp...>& t)
23 | {
24 |     return os;
25 | }
26 | 
27 | template<std::size_t I = 0, typename... Tp>
28 | inline typename std::enable_if<I < sizeof...(Tp), std::ostream&>::type
29 | print_tuple(std::ostream& os, const std::tuple<Tp...>& t)
30 | {
31 |     os << std::get<I>(t);
32 |     if (I + 1 != sizeof...(Tp)) os << ", ";
33 |     return print_tuple<I + 1, Tp...>(os, t);
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/include/value.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cmath>
  4 | #include <iostream>
  5 | #include <functional>
  6 | #include <memory>
  7 | #include <set>
  8 | 
  9 | namespace ai {
 10 | 
 11 | template <typename T>
 12 | class RawValue {
 13 |     public:
 14 |         using ptr = std::shared_ptr<RawValue<T>>;
 15 | 
 16 |     private:
 17 |         RawValue(const T& data, const std::string& label="")
 18 |             : data_(data), label_(label)
 19 |         {}
 20 | 
 21 |         RawValue(const T& data, std::set<ptr>& children, const std::string& op="")
 22 |             : data_(data), prev_(children), op_(op)
 23 |         {}
 24 | 
 25 |     public:
 26 |         static ptr add_label(ptr unlabelled, const std::string& label)
 27 |         {
 28 |             unlabelled->label_ = label;
 29 |             return unlabelled;
 30 |         }
 31 | 
 32 |         template <typename... Args>
 33 |         static ptr make(Args&&... args) {
 34 |             auto v = ptr(new RawValue<T>(std::forward<Args>(args)...));
 35 |             universe_.insert(v.get());
 36 |             return v;
 37 |         }
 38 | 
 39 |         const T& data() const {
 40 |             return data_;
 41 |         }
 42 | 
 43 |         double grad() const {
 44 |             return grad_;
 45 |         }
 46 | 
 47 |         void adjust(const T& learning_rate) {
 48 |             data_ += -learning_rate * grad_;
 49 |         }
 50 | 
 51 |         const std::string& label() const {
 52 |             return label_;
 53 |         }
 54 | 
 55 |         const std::set<ptr>& children() const {
 56 |             return prev_;
 57 |         }
 58 | 
 59 |         const std::string& op() const {
 60 |             return op_;
 61 |         }
 62 | 
 63 |         void zerograd() {
 64 |             grad_ = 0.0;
 65 |             for (auto c : prev_) {
 66 |                 c->zerograd();
 67 |             }
 68 |         }
 69 | 
 70 |         friend void backward(const ptr& node) {
 71 |             std::vector<RawValue<T>*> topo;
 72 |             std::set<RawValue<T>*> visited;
 73 | 
 74 |             std::function<void(const ptr&)> build_topo = [&](const ptr& v) {
 75 |                 if (!visited.contains(v.get())) {
 76 |                     visited.insert(v.get());
 77 |                     for (auto && c : v->children()) {
 78 |                         build_topo(c);
 79 |                     }
 80 |                     topo.push_back(v.get());
 81 |                 }
 82 |             };
 83 | 
 84 |             build_topo(node);
 85 | 
 86 |             for (auto & v : topo) {
 87 |                 v->grad_ = 0.0;
 88 |             }
 89 | 
 90 |             node->grad_ = 1.0;
 91 | 
 92 |             for (auto it = topo.rbegin(); it != topo.rend(); ++it) {
 93 |                 const RawValue<T>* v = *it;
 94 |                 auto f = v->backward_;
 95 |                 if (f) f();
 96 |             }
 97 |         }
 98 | 
 99 |         // operator+
100 |         friend ptr operator+(const ptr& a, const ptr& b) {
101 |             std::set<ptr> children = {a, b};
102 | 
103 |             auto out = make(a->data() + b->data(), children, "+");
104 | 
105 |             out->backward_ = [=]() {
106 |                 a->grad_ += out->grad_;
107 |                 b->grad_ += out->grad_;
108 |             };
109 | 
110 |             return out;
111 |         }
112 | 
113 |         template<typename N, std::enable_if_t<std::is_arithmetic<N>::value, int> = 0>
114 |         friend ptr operator+(const ptr& a, N n) { return a + make(n); }
115 | 
116 |         template<typename N, std::enable_if_t<std::is_arithmetic<N>::value, int> = 0>
117 |         friend ptr operator+(N n, const ptr& a) { return make(n) + a; }
118 | 
119 |         // operator +=
120 |         friend ptr& operator+=(ptr& a, const ptr& b) {
121 |             /* For producing example-usage-cycle graph: cannot backprop
122 |             a->data_ += b->data_;
123 |             a->prev_.insert(b);
124 |             return a;
125 |             */
126 | 
127 |             // Update a = old + b
128 | 
129 |             auto old = make(a->data_, a->prev_, a->op_);
130 |             old->grad_ = a->grad_;
131 |             old->label_ = std::move(a->label_);
132 |             old->backward_ = std::move(a->backward_);
133 | 
134 |             // Replace any instances of a with old
135 |             for (auto p : universe_) {
136 |                 if (p->prev_.contains(a)) {
137 |                     p->prev_.erase(a);
138 |                     p->prev_.insert(old);
139 |                 }
140 |             }
141 | 
142 |             a->grad_ = 0;
143 |             a->data_ += b->data_;
144 |             a->label_ = "";
145 |             a->prev_ = {old, b};
146 | 
147 |             a->backward_ = [=]() {
148 |                 old->grad_ += a->grad_;
149 |                 b->grad_ += a->grad_;
150 |             };
151 | 
152 |             return a;
153 |         }
154 | 
155 |         template<typename N, std::enable_if_t<std::is_arithmetic<N>::value, int> = 0>
156 |         friend ptr& operator+=(ptr& a, N n) { return a += make(n); }
157 | 
158 |         // unary operator-
159 |         friend ptr operator-(const ptr& a) {
160 |             std::set<ptr> children = {a};
161 |             auto out = make(-a->data(), children, "neg");
162 | 
163 |             out->backward_ = [=]() {
164 |                 a->grad_ -= out->grad_;
165 |             };
166 | 
167 |             return out;
168 |         }
169 | 
170 |         // operator-
171 |         friend ptr operator-(const ptr& a, const ptr& b) {
172 |             std::set<ptr> children = {a, b};
173 |             auto out = make(a->data() - b->data(), children, "-");
174 | 
175 |             out->backward_ = [=]() {
176 |                 a->grad_ += out->grad_;
177 |                 b->grad_ += -out->grad_;
178 |             };
179 | 
180 |             return out;
181 |         }
182 | 
183 |         template<typename N, std::enable_if_t<std::is_arithmetic<N>::value, int> = 0>
184 |         friend ptr operator-(const ptr& a, N n) { return a - make(n); }
185 | 
186 |         template<typename N, std::enable_if_t<std::is_arithmetic<N>::value, int> = 0>
187 |         friend ptr operator-(N n, const ptr& a) { return make(n) - a; }
188 | 
189 |         // operator*
190 |         friend ptr operator*(const ptr& a, const ptr& b) {
191 |             std::set<ptr> children = {a, b};
192 |             auto out = make(a->data() * b->data(), children, "*");
193 | 
194 |             out->backward_ = [=]() {
195 |                 a->grad_ += b->data() * out->grad();
196 |                 b->grad_ += a->data() * out->grad();
197 |             };
198 | 
199 |             return out;
200 |         }
201 | 
202 |         template<typename N, std::enable_if_t<std::is_arithmetic<N>::value, int> = 0>
203 |         friend ptr operator*(const ptr& a, N n) { return a * make(n); }
204 | 
205 |         template<typename N, std::enable_if_t<std::is_arithmetic<N>::value, int> = 0>
206 |         friend ptr operator*(N n, const ptr& a) { return make(n) * a; }
207 | 
208 |         // operator/
209 |         friend ptr operator/(const ptr& a, const ptr& b) {
210 |             std::set<ptr> children = {a, b};
211 |             auto out = make(a->data() / b->data(), children, "/");
212 | 
213 |             out->backward_ = [=]() {
214 |                 a->grad_ += (1.0/b->data()) * out->grad();
215 |                 b->grad_ += -1.0 * a->data() * pow(b->data(), -2.0) * out->grad();
216 |             };
217 | 
218 |             return out;
219 |         }
220 | 
221 |         template<typename N, std::enable_if_t<std::is_arithmetic<N>::value, int> = 0>
222 |         friend ptr operator/(const ptr& a, N n) { return a / make(n); }
223 | 
224 |         template<typename N, std::enable_if_t<std::is_arithmetic<N>::value, int> = 0>
225 |         friend ptr operator/(N n, const ptr& a) { return make(n) / a; }
226 | 
227 |         // recip
228 |         friend ptr recip(const ptr& a) {
229 |             std::set<ptr> children = {a};
230 |             double t = pow(a->data(), -1.0);
231 |             auto out = make(t, children, "recip");
232 | 
233 |             out->backward_ = [=]() {
234 |                 a->grad_ += (-1.0 * pow(a->data(), -2.0)) * out->grad();
235 |             };
236 | 
237 |             return out;
238 |         }
239 | 
240 |         // exp
241 |         friend ptr exp(const ptr& a) {
242 |             std::set<ptr> children = {a};
243 |             double t = exp(a->data());
244 |             auto out = make(t, children, "exp");
245 | 
246 |             out->backward_ = [=]() {
247 |                 a->grad_ += out->data_ * out->grad_;
248 |             };
249 | 
250 |             return out;
251 |         }
252 | 
253 |         // pow
254 |         friend ptr pow(const ptr& a, const ptr& b) {
255 |             std::set<ptr> children = {a, b};
256 |             double t = pow(a->data(), b->data());
257 |             auto out = make(t, children, "pow");
258 | 
259 |             out->backward_ = [=]() {
260 |                 a->grad_ += (b->data() * pow(a->data(), (b->data()-1))) * out->grad();
261 |             };
262 | 
263 |             return out;
264 |         }
265 | 
266 |         template<typename N, std::enable_if_t<std::is_arithmetic<N>::value, int> = 0>
267 |         friend ptr pow(const ptr& a, N n) { return pow(a, make(n)); }
268 | 
269 |         template<typename N, std::enable_if_t<std::is_arithmetic<N>::value, int> = 0>
270 |         friend ptr pow(N n, const ptr& a) { return pow(make(n), a); }
271 | 
272 |         // tanh
273 |         friend ptr tanh(const ptr& a) {
274 |             std::set<ptr> children = {a};
275 |             double x = a->data();
276 |             double e2x = exp(2.0*x);
277 |             double t = (e2x-1)/(e2x+1);
278 |             auto out = make(t, children, "tanh");
279 | 
280 |             out->backward_ = [=]() {
281 |                 a->grad_ += (1.0 - t*t) * out->grad_;
282 |             };
283 | 
284 |             return out;
285 |         }
286 | 
287 |         // relu
288 |         friend ptr relu(const ptr& a) {
289 |             std::set<ptr> children = {a};
290 |             T x = a->data();
291 |             T v = (x < 0) ? 0 : x;
292 |             auto out = make(v, children, "relu");
293 | 
294 |             out->backward_ = [=]() {
295 |                 T t = out->data_;
296 |                 a->grad_ += (t > 0) ? out->grad_ : 0;
297 |             };
298 | 
299 |             return out;
300 |         }
301 | 
302 |     private:
303 |         static std::set<RawValue<T>*> universe_;
304 |         T data_;
305 |         double grad_{0.0};
306 |         std::string label_{};
307 |         std::set<ptr> prev_{};
308 |         std::string op_{""};
309 | 
310 |         std::function<void()> backward_{};
311 | };
312 | 
313 | template <typename T>
314 | inline std::set<RawValue<T>*> RawValue<T>::universe_ = {};
315 | 
316 | template <typename T>
317 | using Value = typename RawValue<T>::ptr;
318 | 
319 | template <typename T, typename... Args>
320 | static Value<T> make_value(const T& data, Args&&... args) {
321 |     return RawValue<T>::make(data, std::forward<Args>(args)...);
322 | }
323 | 
324 | template <typename T>
325 | static Value<T> expr(std::shared_ptr<RawValue<T>> unlabelled, const std::string& label) {
326 |     return RawValue<T>::add_label(unlabelled, label);
327 | }
328 | 
329 | template <typename T, size_t N>
330 | constexpr std::array<Value<T>, N> value_array(const std::array<T, N>& init) {
331 |     std::array<Value<T>, N> result{};
332 |     for (size_t i = 0; i < N; ++i) {
333 |         result[i] = RawValue<T>::make(init[i]);
334 |     }
335 |     return result;
336 | }
337 | 
338 | template <typename T, size_t M, size_t N>
339 | constexpr std::array<Value<T>, N> value_arrays(const std::array<std::array<T, M>, N>& init) {
340 |     std::array<std::array<Value<T>, M>, N> result{};
341 |     for (size_t i = 0; i < N; ++i) {
342 |         for (size_t j = 0; i < M; ++i) {
343 |             result[j][i] = RawValue<T>::make(init[j][i]);
344 |         }
345 |     }
346 |     return result;
347 | }
348 | 
349 | template <typename T>
350 | static inline std::ostream& operator<<(std::ostream& os, const RawValue<T>& value) {
351 |     return os << "Value("
352 |         << "label=" << value.label() << ", "
353 |         << "data=" << value.data() << ", "
354 |         << "grad=" << value.grad() << ", "
355 |         << "op=" << value.op()
356 |         << ")";
357 | }
358 | 
359 | template <typename T>
360 | static inline std::ostream& operator<<(std::ostream& os, const std::shared_ptr<RawValue<T>>& value) {
361 |     return os << value.get() << "=&" << *value;
362 | }
363 | 
364 | }
365 | 


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | set(CMAKE_CXX_STANDARD 23)
 4 | set(CMAKE_CXX_FLAGS_RELEASE "-O3")
 5 | 
 6 | enable_testing()
 7 | 
 8 | project(tests)
 9 | 
10 | get_filename_component(PARENT_DIR ${PROJECT_SOURCE_DIR} DIRECTORY)
11 | 
12 | include_directories(
13 |     ${PARENT_DIR}/include
14 |     )
15 | 
16 | add_executable(value-test value-test.cpp)
17 | add_test(NAME value-test
18 |          COMMAND value-test)
19 | 
20 | add_executable(multivariate-test multivariate-test.cpp)
21 | add_test(NAME multivariate-test
22 |          COMMAND multivariate-test)
23 | 
24 | add_executable(mac-test mac-test.cpp)
25 | add_test(NAME mac-test
26 |          COMMAND mac-test)
27 | 


--------------------------------------------------------------------------------
/tests/mac-test.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "mac.h"
 4 | 
 5 | int main(int argc, char *argv[])
 6 | {
 7 |     std::vector<int> vec_a = {1, 2, 3};
 8 |     std::vector<int> vec_b = {4, 5, 6};
 9 |     std::array<int, 3> arr_a = {1, 2, 3};
10 |     std::array<int, 3> arr_b = {4, 5, 6};
11 | 
12 |     std::cout << "Vector multiply-accumulate: " << mac(vec_a, vec_b) << std::endl;
13 |     std::cout << "Array multiply-accumulate: " << mac(arr_a, arr_b) << std::endl;
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/tests/multivariate-test.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "graph.h"
 4 | 
 5 | using namespace ai;
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     auto a = make_value(-2.0, "a");
10 |     auto b = make_value(3.0, "b");
11 | 
12 |     auto d = expr(a*b, "d");
13 |     auto e = expr(a+b, "e");
14 |     auto f = expr(d*e, "f");
15 | 
16 |     backward(f);
17 | 
18 |     std::cout << Graph(f) << std::endl;
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/value-test.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "value.h"
 4 | 
 5 | using namespace ai;
 6 | 
 7 | int main(int argc, char *argv[])
 8 | {
 9 |     auto a = make_value(2.0, "a");
10 |     auto b = make_value(-3.0, "b");
11 |     auto c = make_value(10.0, "c");
12 | 
13 |     std::cout << *a << std::endl;
14 |     std::cout << b << std::endl;
15 | 
16 |     auto x = a + b;
17 |     std::cout << x << std::endl;
18 | 
19 |     std::cout << a+b << std::endl;
20 | 
21 |     std::cout << a+7.0 << std::endl;
22 | 
23 |     std::cout << 7.0+b << std::endl;
24 | 
25 |     std::cout << a+7.0+b << std::endl;
26 | 
27 |     std::cout << a+b+c << std::endl;
28 | 
29 |     std::cout << (a*b) << std::endl;
30 | 
31 |     std::cout << (a-b) << std::endl;
32 | 
33 |     auto minus1 = make_value(-1.0, "-1.0");
34 |     auto nb = expr(b * minus1, "nb");
35 |     std::cout << (a + nb) << std::endl;
36 | 
37 |     auto g = expr(a/b, "a/b");;
38 |     std::cout << g << std::endl;
39 | 
40 |     auto tg = expr(tanh(g), "tanh(g)");
41 |     std::cout << tg << std::endl;
42 |     backward(tg);
43 | 
44 |     auto br = expr(recip(b), "br");;
45 |     std::cout << br << std::endl;
46 | 
47 |     auto f = expr(a * br, "f");
48 |     std::cout << f << std::endl;
49 | 
50 |     auto tt = expr(tanh(f), "tanh(f)");;
51 |     std::cout << tt << std::endl;
52 | 
53 |     backward(tt);
54 | 
55 |     auto y = (a*b) + c;
56 |     std::cout << y << std::endl;
57 |     std::cout << (a*b + c) << std::endl;
58 | }
59 | 


--------------------------------------------------------------------------------