├── .github
    └── workflows
    │   └── cmake.yml
├── .gitignore
├── .gitmodules
├── .gitpod.Dockerfile
├── .gitpod.yml
├── CMakeLists.txt
├── LICENSE
├── README.md
├── experimental
    ├── benchmark.cpp
    ├── cuda.cu
    ├── experimental.cpp
    └── mnist.cpp
├── include
    ├── activation.hpp
    ├── allocator.hpp
    ├── autograd
    │   ├── activation.hpp
    │   ├── autograd.hpp
    │   ├── cpu_kernels.hpp
    │   ├── function.hpp
    │   ├── gradient_queue.hpp
    │   ├── iseq.hpp
    │   ├── ml.hpp
    │   ├── optimizer.hpp
    │   └── train.hpp
    ├── cast.hpp
    ├── common.hpp
    ├── complex.hpp
    ├── core
    │   ├── common.hpp
    │   └── kernels.hpp
    ├── counter.hpp
    ├── cuda
    │   ├── error.cuh
    │   ├── essentials.cuh
    │   ├── matrix.cuh
    │   ├── nvarena.cuh
    │   └── vector.cuh
    ├── dataset.hpp
    ├── display.hpp
    ├── dnn.hpp
    ├── dnnopt.hpp
    ├── engine.hpp
    ├── equation.hpp
    ├── erf.hpp
    ├── field.hpp
    ├── filter.hpp
    ├── fixed_vector.hpp
    ├── fourier.hpp
    ├── function.hpp
    ├── gnn.hpp
    ├── gradient.hpp
    ├── image.hpp
    ├── io
    │   └── print.hpp
    ├── layer.hpp
    ├── linalg.hpp
    ├── matrix.hpp
    ├── matrix_cpu.hpp
    ├── netnode.hpp
    ├── operand.hpp
    ├── optimizer.hpp
    ├── parametrization.hpp
    ├── polynomial.hpp
    ├── range.hpp
    ├── rational.hpp
    ├── registration.hpp
    ├── sparse.hpp
    ├── std
    │   ├── activation_derivatives.hpp
    │   ├── activations.hpp
    │   ├── algorithms.hpp
    │   ├── calculus.hpp
    │   ├── combinatorial.hpp
    │   ├── erf_derivatives.hpp
    │   ├── erfs.hpp
    │   ├── filters.hpp
    │   ├── functions.hpp
    │   ├── initializers.hpp
    │   ├── interval.hpp
    │   ├── loaders.hpp
    │   ├── numtheory.hpp
    │   └── optimizers.hpp
    ├── tensor.hpp
    ├── timer.hpp
    ├── token.hpp
    ├── training.hpp
    ├── vector.hpp
    └── vector_type.hpp
├── source
    ├── autograd
    │   ├── autograd.cpp
    │   ├── iseq.cpp
    │   └── ml.cpp
    ├── cuda
    │   └── nvarena.cu
    ├── image.cpp
    ├── io.cpp
    ├── linalg.cpp
    ├── polynomial.cpp
    ├── range.cpp
    ├── std
    │   ├── functions.cpp
    │   └── interval.cpp
    └── timer.cpp
├── testing
    ├── activation.cpp
    ├── calculus.cpp
    ├── fourier.cpp
    ├── global.hpp
    ├── interval.cpp
    ├── linalg.cpp
    ├── main.cpp
    ├── matrix.cpp
    ├── polynomial.cpp
    ├── printing.cpp
    ├── special.cpp
    ├── tensor.cpp
    ├── timers.cpp
    └── vector.cpp
├── zhetapi.hpp
└── zhetapi_logo.svg


/.github/workflows/cmake.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "focused" ]
 6 |   pull_request:
 7 |     branches: [ "focused" ]
 8 | 
 9 | env:
10 |   # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
11 |   BUILD_TYPE: Release
12 | 
13 | jobs:
14 |   build:
15 |     # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac.
16 |     # You can convert this to a matrix build if you need cross-platform coverage.
17 |     # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
18 |     runs-on: ubuntu-latest
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v3
22 |       with:
23 |         submodules: 'true'
24 | 
25 |     - name: Install dependencies
26 |       run: sudo apt install libpng-dev
27 | 
28 |     - name: Compile Google benchmarks
29 |       run: |
30 |         cd ${{github.workspace}}/vendor/benchmark
31 |         cmake -E make_directory "build"
32 |         cmake -E chdir "build" cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release ../
33 |         cmake --build "build" --config Release
34 |         sudo cmake --build "build" --config Release --target install
35 |         cd ${{github.workspace}}
36 | 
37 |     - name: Configure CMake
38 |       run: cmake -B ${{github.workspace}}/build -DZHETAPI_ENABLE_CUDA=OFF
39 | 
40 |     - name: Build
41 |       # Build your program with the given configuration
42 |       run: cmake --build ${{github.workspace}}/build
43 | 
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.a
 2 | *.dSYM
 3 | *.import
 4 | *.log
 5 | *.ninja
 6 | *.out
 7 | *.output
 8 | *.so
 9 | *.sw*
10 | *.zhplib
11 | *ubyte
12 | *ubyte
13 | *~
14 | .cache
15 | .env
16 | .ninja_deps
17 | .ninja_log
18 | .smake
19 | .vscode
20 | /.vs
21 | /Makefile
22 | CMakeCache.txt
23 | CMakeFiles
24 | __gen*
25 | __pycache__
26 | bin
27 | build
28 | cmake_install.cmake
29 | compile_commands.json
30 | coverage.info
31 | data
32 | debug
33 | docs/_build
34 | docs/_static
35 | docs/_templates
36 | docs/html
37 | docs/latex
38 | docs/latex
39 | docs/xml
40 | engine/engine
41 | gcov-files
42 | gcov-out
43 | gen
44 | htests
45 | portability
46 | samples/rl/res
47 | tasks.json
48 | tmp
49 | vendor/benchmark
50 | zhetapi
51 | zhetapi-header
52 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "vendor/indicators"]
2 | 	path = vendor/indicators
3 | 	url = https://github.com/p-ranav/indicators
4 | [submodule "vendor/jitify"]
5 | 	path = vendor/jitify
6 | 	url = https://github.com/NVIDIA/jitify
7 | [submodule "vendor/benchmark"]
8 | 	path = vendor/benchmark
9 | 	url = https://github.com/google/benchmark.git


--------------------------------------------------------------------------------
/.gitpod.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM gitpod/workspace-full
 2 | 
 3 | USER root
 4 | 
 5 | RUN apt-get -yq update \
 6 | 	&& apt-get install -yq gcc-8 g++-8 \
 7 | 	&& apt-get install -yq clang-8 \
 8 | 	&& apt-get install -yq valgrind \
 9 | 	&& apt-get install -yq libboost-all-dev \
10 | 	&& apt-get install -yq asciidoctor \
11 | 	&& apt-get install -yq libcurl4-gnutls-dev \
12 | 	&& apt-get install -yq doxygen \
13 | 	&& apt-get install -yq texlive-latex-base \
14 | 	&& apt-get install -yq texlive-fonts-recommended \
15 | 	&& apt-get install -yq texlive-fonts-extra \
16 | 	&& apt-get install -yq texlive-latex-extra \
17 | 	&& apt-get install -yq graphviz \
18 | 	&& apt-get install -yq clang-tidy-8 \
19 | 	&& apt-get install -yq lcov \
20 | 	&& apt-get install -yq ninja-build \
21 | 	&& apt-get install -yq libsfml-dev \
22 | 	&& pip install smake \
23 | 	&& apt-get clean \
24 | 	&& rm -rf /var/lib/apt/lists/*
25 | 


--------------------------------------------------------------------------------
/.gitpod.yml:
--------------------------------------------------------------------------------
 1 | image:
 2 |   file: .gitpod.Dockerfile
 3 | 
 4 | # List the ports you want to expose and what to do when they are served. See https://www.gitpod.io/docs/config-ports/
 5 | ports:
 6 |   - port: 3000
 7 |     onOpen: open-preview
 8 | 
 9 | # List the start up tasks. You can start them in parallel in multiple terminals. See https://www.gitpod.io/docs/config-start-tasks/
10 | tasks:
11 |   - init: echo 'init script' # runs during prebuild
12 |     command: echo 'start script'
13 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.12)
 2 | 
 3 | # All projects
 4 | project(zhetapi CXX)
 5 | 
 6 | # Check existence of CUDA
 7 | option(ZHETAPI_ENABLE_CUDA "Enable CUDA for Zhetapi" ON)
 8 | 
 9 | if (ZHETAPI_ENABLE_CUDA)
10 | 	enable_language(CUDA)
11 | endif()
12 | 
13 | # CXX options
14 | set(CMAKE_CXX_STANDARD 20)
15 | 
16 | # Compiler (clang default)
17 | if (NOT CMAKE_CXX_COMPILER)
18 | 	set(CMAKE_CXX_COMPILER clang++)
19 | endif()
20 | 
21 | # Color output
22 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
23 | 
24 | # Build type (Release default)
25 | if (NOT CMAKE_BUILD_TYPE)
26 | 	set(CMAKE_BUILD_TYPE Release)
27 | endif()
28 | 
29 | # Required packages
30 | find_package(PNG REQUIRED)
31 | find_package(benchmark REQUIRED)
32 | find_package(OpenMP REQUIRED)
33 | 
34 | # Include directories
35 | include_directories(
36 | 	.
37 | 	vendor
38 | 	vendor/indicators/include
39 | 	vendor/benchmark/include
40 | 	${PNG_INCLUDE_DIRS}
41 | )
42 | 
43 | # Zhetapi library sources
44 | set(Zhetapi_SOURCES
45 | 	source/autograd/autograd.cpp
46 | 	source/autograd/iseq.cpp
47 | 	source/autograd/ml.cpp
48 | 	source/image.cpp
49 | 	source/io.cpp
50 | 	source/linalg.cpp
51 | 	source/polynomial.cpp
52 | 	source/range.cpp
53 | 	source/std/functions.cpp
54 | 	source/std/interval.cpp
55 | 	source/timer.cpp
56 | )
57 | 
58 | # Common object library
59 | add_library(Zhetapi_COMMON OBJECT ${Zhetapi_SOURCES})
60 | 
61 | # Project executables
62 | add_executable(mnist experimental/mnist.cpp $<TARGET_OBJECTS:Zhetapi_COMMON>)
63 | add_executable(experimental experimental/experimental.cpp $<TARGET_OBJECTS:Zhetapi_COMMON>)
64 | 
65 | if (ZHETAPI_ENABLE_CUDA)
66 |         # add_executable(experimental_cuda experimental/cuda.cu ${Zhetapi_SOURCES})
67 | endif()
68 | 
69 | add_executable(benchmark experimental/benchmark.cpp $<TARGET_OBJECTS:Zhetapi_COMMON>)
70 | 
71 | set(ESSENTIAL_LIBS PNG::PNG OpenMP::OpenMP_CXX)
72 | 
73 | target_link_libraries(mnist ${ESSENTIAL_LIBS})
74 | target_link_libraries(experimental ${ESSENTIAL_LIBS})
75 | # target_link_libraries(experimental_cuda ${ESSENTIAL_LIBS} cuda cudart nvrtc)
76 | target_link_libraries(benchmark benchmark::benchmark ${ESSENTIAL_LIBS})
77 | 
78 | if (ZHETAPI_ENABLE_CUDA)
79 |         target_link_libraries(experimental ${ESSENTIAL_LIBS})
80 | endif()
81 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Venkataram Edavamadathil Sivaram
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![Zhetapi Logo](zhetapi_logo.svg)
 2 | 
 3 | ![build badge](https://github.com/vedavamadathil/zhetapi/actions/workflows/cmake.yml/badge.svg)
 4 | 
 5 | Zhetapi is a modern C++ machine learning and numerical analysis library with an
 6 | emphasis on intuitive usage.
 7 | 
 8 | # Linear Algebra
 9 | 
10 | Zhetapi provides a basic linear algebra interface using C++ 20 concepts. The
11 | `Field` structure enforces a contraint on types to ensure they behave like
12 | algebraic fields. As a result these structures support basic arithmetic along
13 | with other useful methods.
14 | 
15 | For now, the following template classes are provided: `Tensor`, `Matrix`,
16 | `Vector`.
17 | 
18 | # Auto Differentiation
19 | 
20 | The auto differentiation facilities in Zhetapi belong in the `zhetapi::autograd`
21 | namespace. All operations that depend on autodiff use `float`s as the underlying
22 | type; in particular `Constant` is a `Tensor <float>` and is the basis of all
23 | numerical values in this module.
24 | 
25 | To provide a seamless, operator based interface into the autodiff facilities,
26 | two notable classes are provided, `Variable` and `Function`. As one can expect,
27 | `Variable`s can store arbitrary `Constant` values, and `Function`s are
28 | compositions of `Variables` under varying operations. For example:
29 | 
30 | ```cpp
31 | Function f = x + y;
32 | Function g = x * y;
33 | 
34 | // f and g are now functions of *two* variables
35 | Constant a = f(1, 2);
36 | Constant b = g(1, 2);
37 | 
38 | // Composition of functions is done likewise
39 | Function h = f(x, g(x, y)); // NOTE: h is still a function of two variables
40 | ```
41 | 
42 | ## Symbolic Differentiation
43 | 
44 | ## Backward Pass
45 | 
46 | Currently, only backward mode is enabled for autograd.
47 | 
48 | # Building
49 | 
50 | Zhetapi is primarily a header-only library, but for now there are some examples
51 | that one can play around with in the `experimental` directory.
52 | 
53 | This project is developed using C++ 20. Additional dependenies include PNG
54 | (`libpng-dev` on Ubuntu systems), OpenMP (Optional) and CUDA (Optional).
55 | 
56 | Generate the build configuration using CMake as follows:
57 | 
58 | ```
59 | $ cd zhetapi
60 | $ mkdir build && cd build
61 | $ cmake -DZHETAPI_ENABLE_CUDA=<ON|OFF> # ON by default
62 | ```
63 | 
64 | And build the targets as one would usually do (e.g. `make` or `ninja`).
65 | 


--------------------------------------------------------------------------------
/experimental/benchmark.cpp:
--------------------------------------------------------------------------------
  1 | // Benchmark headers
  2 | #include <benchmark/benchmark.h>
  3 | 
  4 | // Library headers
  5 | #include "../include/autograd/autograd.hpp"
  6 | #include "../include/autograd/ml.hpp"
  7 | 
  8 | using namespace zhetapi;
  9 | using namespace zhetapi::autograd;
 10 | 
 11 | // Performance of a single dense layer
 12 | static void dense_layer(benchmark::State &state)
 13 | {
 14 | 	Variable x;
 15 | 	Function model = ml::dense(1000, 2000)(x);
 16 | 	Constant in(
 17 | 		{2000},
 18 | 		[](size_t i) {
 19 | 			return 1.0f;
 20 | 		}
 21 | 	);
 22 | 
 23 | 	for (auto _ : state)
 24 | 		model(in);
 25 | }
 26 | 
 27 | BENCHMARK(dense_layer)->Unit(benchmark::kMillisecond);
 28 | 
 29 | // Performance of a deep dense network
 30 | static void dense_network(benchmark::State &state)
 31 | {
 32 | 	Variable x;
 33 | 	Function model = ml::dense(1000, 2000)(x);
 34 | 	model = ml::dense(2000, 2000)(model);
 35 | 	model = ml::dense(2000, 1000)(model);
 36 | 
 37 | 	Constant in(
 38 | 		{2000},
 39 | 		[](size_t i) {
 40 | 			return 1.0f;
 41 | 		}
 42 | 	);
 43 | 
 44 | 	for (auto _ : state)
 45 | 		model(in);
 46 | }
 47 | 
 48 | BENCHMARK(dense_network)->Unit(benchmark::kMillisecond);
 49 | 
 50 | // Benchmarking matrix multiplication
 51 | template <class T>
 52 | Matrix <T> simple_fma(const Matrix <T> &a, const Matrix <T> &b, const Matrix <T> &c)
 53 | {
 54 | 	Matrix <T> out = a * b;
 55 | 	out += c;
 56 | 
 57 | 	return out;
 58 | }
 59 | 
 60 | template <class T>
 61 | void inline_fma(T *out, const T *matrix, const T *bias, const T *input, size_t rows, size_t cols)
 62 | {
 63 | 	for (size_t i = 0; i < rows; i++) {
 64 | 		T sum = 0;
 65 | 
 66 | 		for (size_t j = 0; j < cols; j++)
 67 | 			sum += matrix[i * cols + j] * input[j];
 68 | 
 69 | 		out[i] = sum + bias[i];
 70 | 	}
 71 | }
 72 | 
 73 | template <class T>
 74 | void parallel_fma(T *out, const T *matrix, const T *bias, const T *input, size_t rows, size_t cols)
 75 | {
 76 | #pragma omp parallel for
 77 | 	for (long int i = 0; i < rows; i++) {
 78 | 		T sum = 0;
 79 | 
 80 | 		const T *c = &matrix[i * cols];
 81 | 		for (size_t j = 0; j < cols; j++)
 82 | 			sum += c[i] * input[j];
 83 | 
 84 | 		out[i] = sum + bias[i];
 85 | 	}
 86 | }
 87 | 
 88 | static void matrix_multiply(benchmark::State &state)
 89 | {
 90 | 	Matrix <float> w1 {1000, 2000, [](size_t i) { return 1.0f; }};
 91 | 	Matrix <float> b1 {1000, 1, [](size_t i) { return 1.0f; }};
 92 | 
 93 | 	Matrix <float> in {2000, 1, [](size_t i) { return 1.0f; }};
 94 | 
 95 | 	for (auto _ : state)
 96 | 		simple_fma(w1, in, b1);
 97 | }
 98 | 
 99 | BENCHMARK(matrix_multiply)->Unit(benchmark::kMillisecond);
100 | 
101 | static void matrix_multiply_inline_fma(benchmark::State &state)
102 | {
103 | 	Matrix <float> w1 {1000, 2000, [](size_t i) { return 1.0f; }};
104 | 	Matrix <float> b1 {1000, 1, [](size_t i) { return 1.0f; }};
105 | 
106 | 	Matrix <float> in {2000, 1, [](size_t i) { return 1.0f; }};
107 | 	Matrix <float> out {1000, 1, [](size_t i) { return 0.0f; }};
108 | 
109 | 	for (auto _ : state)
110 | 		inline_fma(out.data(), w1.data(), b1.data(), in.data(), 1000, 1000);
111 | }
112 | 
113 | BENCHMARK(matrix_multiply_inline_fma)->Unit(benchmark::kMillisecond);
114 | 
115 | static void matrix_multiply_parallel_fma(benchmark::State &state)
116 | {
117 | 	Matrix <float> w1 {1000, 2000, [](size_t i) { return 1.0f; }};
118 | 	Matrix <float> b1 {1000, 1, [](size_t i) { return 1.0f; }};
119 | 
120 | 	Matrix <float> in {2000, 1, [](size_t i) { return 1.0f; }};
121 | 	Matrix <float> out {1000, 1, [](size_t i) { return 0.0f; }};
122 | 
123 | 	for (auto _ : state)
124 | 		parallel_fma(out.data(), w1.data(), b1.data(), in.data(), 1000, 1000);
125 | }
126 | 
127 | BENCHMARK(matrix_multiply_parallel_fma)->Unit(benchmark::kMillisecond);
128 | 
129 | // Main
130 | BENCHMARK_MAIN();
131 | 


--------------------------------------------------------------------------------
/experimental/cuda.cu:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "include/tensor.hpp"
 4 | 
 5 | using namespace zhetapi;
 6 | 
 7 | struct CuTensor {
 8 | 	size_t dimensions;
 9 | 	size_t *shape;
10 | 	float *array; // Borrows data from Tensor
11 | };
12 | 
13 | struct CuMatrix {
14 | 	size_t rows;
15 | 	size_t columns;
16 | 	float *array; // Borrows data from Tensor
17 | };
18 | 
19 | int main()
20 | {
21 | 	{
22 | 		Tensor <float> a = Tensor <float> ::ones({2, 2});
23 | 		Tensor <float> b = Tensor <float> ::zeros({2, 2});
24 | 
25 | 		std::cout << "a:" << a << " => " << a.verbose() << std::endl;
26 | 		std::cout << "b:" << b << " => " << b.verbose() << std::endl;
27 | 
28 | 		Tensor <float> ::set_variant(eCUDA);
29 | 
30 | 		Tensor <float> c = a + b;
31 | 
32 | 		std::cout << "c: " << c << " => " << c.verbose() << std::endl;
33 | 
34 | 		// jitify and fill memory as such...
35 | 		Tensor <float> d = Tensor <float> (Tensor <float> ::shape_type {2, 2});
36 | 		std::cout << "d:" << d << " => " << d.verbose() << std::endl;
37 | 		
38 | 		Tensor <float> e = Tensor <float> (Tensor <float> ::shape_type {2, 2});
39 | 		std::cout << "e:" << d << " => " << d.verbose() << std::endl;
40 | 
41 | 		c.copy(a);
42 | 		c.copy(d);
43 | 
44 | 		d.copy(a);
45 | 		e.copy(d);
46 | 		
47 | 		std::cout << "\nc:" << c << " => " << c.verbose() << std::endl;
48 | 		std::cout << "d:" << d << " => " << d.verbose() << std::endl;
49 | 		std::cout << "e:" << e << " => " << e.verbose() << std::endl;
50 | 
51 | 		// TODO: manual array copy
52 | 		
53 | 		detail::MemoryTracker::report();
54 | 	}
55 | 
56 | 	detail::MemoryTracker::report();
57 | }
58 | 


--------------------------------------------------------------------------------
/experimental/experimental.cpp:
--------------------------------------------------------------------------------
 1 | #include "include/autograd/ml.hpp"
 2 | #include "include/autograd/activation.hpp"
 3 | #include "include/autograd/gradient_queue.hpp"
 4 | 
 5 | using namespace zhetapi;
 6 | using namespace zhetapi::autograd;
 7 | 
 8 | int main()
 9 | {
10 | 	{
11 | 		Variable x;
12 | 		auto model = ml::dense(3, 3);
13 | 
14 | 		Constant input {
15 | 			Constant::shape_type {3, 3, 3},
16 | 			1.0f
17 | 		};
18 | 
19 | 		std::cout << "input = " << input.verbose() << "\n";
20 | 		std::cout << "model(input): = " << model(input).verbose() << "\n";
21 | 
22 | 		Constant igrad { Constant::shape_type {3, 3, 3}, 1.0f };
23 | 		Gradient grads = model.gradient({input}, {igrad});
24 | 
25 | 		std::cout << "grads = " << grads.igrads[0].verbose() << "\n";
26 | 	}
27 | 
28 | 	detail::MemoryTracker::report();
29 | }
30 | 


--------------------------------------------------------------------------------
/experimental/mnist.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <map>
  3 | 
  4 | #include <unistd.h>
  5 | #include <sys/stat.h>
  6 | 
  7 | #include "../include/autograd/activation.hpp"
  8 | #include "../include/autograd/autograd.hpp"
  9 | #include "../include/autograd/ml.hpp"
 10 | #include "../include/autograd/optimizer.hpp"
 11 | #include "../include/autograd/train.hpp"
 12 | #include "../include/common.hpp"
 13 | 
 14 | using namespace zhetapi;
 15 | using namespace zhetapi::autograd;
 16 | 
 17 | // Files required
 18 | static const std::map <std::string, std::string> files {
 19 | 	{
 20 | 		"train-images-idx3-ubyte",
 21 | 		"http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz"
 22 | 	},
 23 | 
 24 | 	{
 25 | 		"train-labels-idx1-ubyte",
 26 | 		"http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz"
 27 | 	},
 28 | 
 29 | 	{
 30 | 		"t10k-images-idx3-ubyte",
 31 | 		"http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz"
 32 | 	},
 33 | 
 34 | 	{
 35 | 		"t10k-labels-idx1-ubyte",
 36 | 		"http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz"
 37 | 	}
 38 | };
 39 | 
 40 | // Check if file exists
 41 | bool file_exists(const std::string &path)
 42 | {
 43 | 	struct stat buffer;
 44 | 	return (stat(path.c_str(), &buffer) == 0);
 45 | }
 46 | 
 47 | #ifdef _OPENMP
 48 | #define OPENMP_ENABLED 1
 49 | #else
 50 | #define OPENMP_ENABLED 0
 51 | #endif
 52 | 
 53 | int main()
 54 | {
 55 | 	const size_t TRAIN_IMAGES = 60000;
 56 | 	const size_t VALIDATION_IMAGES = 100;
 57 | 	const size_t DIMENSIONS = 784;
 58 | 
 59 | 	std::cout << "Backend: CPU, OpenMP: " << OPENMP_ENABLED << std::endl;
 60 | 
 61 | 	// TODO: try to use a single tensor for all data, then splice it
 62 | 	const Constant::shape_type IMAGE_SHAPE = {DIMENSIONS};
 63 | 	const Constant::shape_type LABEL_SHAPE = {10};
 64 | 
 65 | 	// MSE function (5 inputs)
 66 | 	Variable x;
 67 | 	Variable y;
 68 | 
 69 | 	/* auto loss = -1.0f * autograd::dot(autograd::log(x), y);
 70 | 	auto dloss = (-1.0f * y/x).refactored(x, y); */
 71 | 
 72 | 	auto loss = square(length(x - y))/Constant {10};
 73 | 	auto dloss = 2 * (x - y)/Constant {10};
 74 | 
 75 | 	std::cout << "Loss:\n" << loss.summary() << std::endl;
 76 | 	std::cout << "dLoss:\n" << dloss.summary() << std::endl;
 77 | 
 78 | 	// Model
 79 | 	auto model = ml::dense(DIMENSIONS, 30)(x);
 80 | 	model = ml::sigmoid(model);
 81 | 	model = ml::dense(30, 10)(model);
 82 | 	model = ml::softmax(model);
 83 | 
 84 | 	std::cout << "\nModel:\n" << model.summary() << std::endl;
 85 | 
 86 | 	// Optimizer
 87 | 	auto optimizer = ml::Adam(model.parameters(), 0.01);
 88 | 
 89 | 	// First load the MNIST dataset
 90 | 	system("mkdir -p data");
 91 | 	for (auto &file : files) {
 92 | 		if (!file_exists("data/" + file.first)) {
 93 | 			std::cout << "Downloading " << file.second << std::endl;
 94 | 			system(("wget " + file.second).c_str());
 95 | 			system(("gunzip " + file.first + ".gz").c_str());
 96 | 			system(("mv " + file.first + " data/").c_str());
 97 | 		} else {
 98 | 			std::cout << "Found " << file.first << std::endl;
 99 | 		}
100 | 	}
101 | 
102 | 	std::cout << "\nLoading MNIST dataset..." << std::endl;
103 | 
104 | 	// Load the data
105 | 	ml::Data train_data;
106 | 	ml::Data validation_data;
107 | 
108 | 	std::vector <Constant> train_labels;
109 | 	std::vector <Constant> validation_labels;
110 | 
111 | 	std::ifstream f_train_images("data/train-images-idx3-ubyte");
112 | 	std::ifstream f_validation_images("data/t10k-images-idx3-ubyte");
113 | 
114 | 	std::ifstream f_train_labels("data/train-labels-idx1-ubyte");
115 | 	std::ifstream f_validation_labels("data/t10k-labels-idx1-ubyte");
116 | 
117 | 	// Read the headers
118 | 	char header[16];
119 | 
120 | 	f_train_images.read(header, 16);
121 | 	f_validation_images.read(header, 16);
122 | 
123 | 	f_train_labels.read(header, 8);
124 | 	f_validation_labels.read(header, 8);
125 | 
126 | 	// Read the data
127 | 	for (int i = 0; i < TRAIN_IMAGES; i++) {
128 | 		// Read the image
129 | 		unsigned char image[DIMENSIONS];
130 | 		std::vector <float> image_data;
131 | 
132 | 		f_train_images.read((char *) image, DIMENSIONS);
133 | 		for (int j = 0; j < DIMENSIONS; j++)
134 | 			image_data.push_back(image[j]/255.0f);
135 | 
136 | 		train_data.push_back({
137 | 			Constant {IMAGE_SHAPE, image_data}
138 | 		});
139 | 
140 | 		// Read the label
141 | 		unsigned char label;
142 | 		f_train_labels.read((char *) &label, 1);
143 | 
144 | 		train_labels.push_back(
145 | 			Constant {LABEL_SHAPE,
146 | 				[&](size_t i) {
147 | 					return i == label ? 1 : 0;
148 | 				}
149 | 			}
150 | 		);
151 | 	}
152 | 
153 | 	for (int i = 0; i < VALIDATION_IMAGES; i++) {
154 | 		// Read the image
155 | 		unsigned char image[DIMENSIONS];
156 | 		std::vector <float> image_data;
157 | 
158 | 		f_validation_images.read((char *) image, DIMENSIONS);
159 | 		for (int j = 0; j < DIMENSIONS; j++)
160 | 			image_data.push_back(image[j]/255.0f);
161 | 
162 | 		validation_data.push_back({
163 | 			Constant {IMAGE_SHAPE, image_data}
164 | 		});
165 | 
166 | 		// Read the label
167 | 		unsigned char label;
168 | 		f_validation_labels.read((char *) &label, 1);
169 | 
170 | 		validation_labels.push_back(
171 | 			Constant {LABEL_SHAPE,
172 | 				[&](size_t i) {
173 | 					return i == label ? 1 : 0;
174 | 				}
175 | 			}
176 | 		);
177 | 	}
178 | 
179 | 	// Validator
180 | 	auto validator = [](const Constant &a, const Constant &b) {
181 | 		int ai = argmax(a);
182 | 		int bi = argmax(b);
183 | 		return ai == bi;
184 | 	};
185 | 
186 | 	std::cout << "Training data loaded" << std::endl;
187 | 	std::cout << "\tcurrent accuracy: " << ml::accuracy(model, train_data, train_labels, validator) << "\n" << std::endl;
188 | 	std::cout << "\toutput on input 0 = " << model(train_data[0]) << std::endl;
189 | 	std::cout << "\tlabel on input 0 = " << train_labels[0] << std::endl;
190 | 	std::cout << "\tloss = " << loss(model(train_data[0]).flat(), train_labels[0]) << std::endl;
191 | 	std::cout << "\tmatch? " << validator(model(train_data[0]), train_labels[0]) << std::endl;
192 | 
193 | 	auto training_suite = ml::TrainingSuite {
194 | 		.loss = loss,
195 | 		.dloss = dloss,
196 | 		.iterations = 100,
197 | 		.batch_size = 100,
198 | 		.reporter = std::make_shared <ml::Validate> (validation_data, validation_labels, validator)
199 | 	};
200 | 
201 | 	ml::fit(model, train_data, train_labels, optimizer, training_suite);
202 | 
203 | 	std::cout << "\n\nTraining finished" << std::endl;
204 | 	std::cout << "\tcurrent accuracy: " << ml::accuracy(model, train_data, train_labels, validator) << std::endl;
205 | 	std::cout << "\toutput on input 0 = " << model(train_data[0]) << std::endl;
206 | 	std::cout << "\tlabel on input 0 = " << train_labels[0] << std::endl;
207 | 	std::cout << "\tmatch? " << validator(model(train_data[0]), train_labels[0]) << std::endl;
208 | 
209 | 	// TODO: multithreaded training
210 | 
211 | 	// TODO: some way to weight the gradients for each input (maybe by error)
212 | 	// TODO: learning rate scheduling
213 | 	// TODO: dropout and regularization
214 | 	// TODO: some method to propogate parameters through ftunctions,
215 | 	// ie. {"dropout", 0.5}, {"batch_norm", true} (a map <string, float> for now)
216 | 
217 | }
218 | 


--------------------------------------------------------------------------------
/include/allocator.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef ZHETAPI_ALLOCATOR_H_
  2 | #define ZHETAPI_ALLOCATOR_H_
  3 | 
  4 | // Standard headers
  5 | #include <cassert>
  6 | #include <cstddef>
  7 | #include <iostream>
  8 | #include <memory>
  9 | #include <unordered_map>
 10 | 
 11 | // Check CUDA availability
 12 | #ifdef __CUDACC__
 13 | #define ZHETAPI_CUDA 1
 14 | #else
 15 | #define ZHETAPI_CUDA 0
 16 | #endif
 17 | 
 18 | namespace zhetapi {
 19 | 
 20 | // Memory variant
 21 | enum Variant {
 22 | 	eCPU,
 23 | 	eCUDA // TODO: only enable if CUDA is available
 24 | };
 25 | 
 26 | namespace detail {
 27 | 
 28 | // Memory allocation tracker
 29 | // TODO: make thread safe
 30 | class MemoryTracker {
 31 | 	// TODO: pack into structs...
 32 | 	long long int m_cpu_allocs = 0;
 33 | 	long long int m_cpu_frees = 0;
 34 | 	long long int m_cpu_inuse = 0;
 35 | 	
 36 | 	std::unordered_map <void *, size_t> m_cpu_map;
 37 | 
 38 | 	long long int m_cuda_allocs = 0;
 39 | 	long long int m_cuda_frees = 0;
 40 | 	long long int m_cuda_inuse = 0;
 41 | 
 42 | 	std::unordered_map <void *, size_t> m_cuda_map;
 43 | 
 44 | 	// TODO: variant based allocation
 45 | 	template <class T>
 46 | 	T *alloc(size_t elements, Variant variant) {
 47 | 		// TODO: option to throw or not
 48 | 		if (elements == 0)
 49 | 			throw std::runtime_error("Must allocate non-zero number of elements");
 50 | 
 51 | 		T *ptr = nullptr;
 52 | 		if (variant == eCPU) {
 53 | 			ptr = new T[elements];
 54 | 			
 55 | 			m_cpu_allocs++;
 56 | 			m_cpu_inuse += elements * sizeof(T);
 57 | 			m_cpu_map[ptr] = elements * sizeof(T);
 58 | 		} else if (variant == eCUDA) {
 59 | 			if constexpr (!ZHETAPI_CUDA)
 60 | 				throw std::runtime_error("CUDA is not available");
 61 | 
 62 | #ifdef __CUDACC__
 63 | 			cudaMalloc(&ptr, elements * sizeof(T));
 64 | 
 65 | 			m_cuda_allocs++;
 66 | 			m_cuda_inuse += elements * sizeof(T);
 67 | 			m_cuda_map[ptr] = elements * sizeof(T);
 68 | #endif
 69 | 		}
 70 | 		
 71 | 		return ptr;
 72 | 	}
 73 | 
 74 | 	template <class T>
 75 | 	void deallocate(T *ptr, Variant variant) {
 76 | 		if (variant == eCPU) {
 77 | 			if (m_cpu_map.find(ptr) == m_cpu_map.end())
 78 | 				throw std::runtime_error("Attempt to free unallocated memory");
 79 | 
 80 | 			m_cpu_frees++;
 81 | 			m_cpu_inuse -= m_cpu_map[ptr];
 82 | 			m_cpu_map.erase(ptr);
 83 | 
 84 | 			delete[] ptr;
 85 | 		} else if (variant == eCUDA) {
 86 | 			if constexpr (!ZHETAPI_CUDA)
 87 | 				throw std::runtime_error("CUDA is not available");
 88 | 
 89 | #ifdef __CUDACC__
 90 | 			if (m_cuda_map.find(ptr) == m_cuda_map.end())
 91 | 				throw std::runtime_error("Attempt to free unallocated memory");
 92 | 
 93 | 			m_cuda_frees++;
 94 | 			m_cuda_inuse -= m_cuda_map[ptr];
 95 | 			m_cuda_map.erase(ptr);
 96 | 
 97 | 			cudaFree(ptr);
 98 | 
 99 | #endif
100 | 		}
101 | 	}
102 | 
103 | 	template <class T>
104 | 	void copy(T *dst, T *src, size_t elements, Variant variant) {
105 | 		if (variant == eCPU) {
106 | 			if (m_cpu_map.find(dst) == m_cpu_map.end())
107 | 				throw std::runtime_error("Attempt to copy to unallocated memory");
108 | 
109 | 			if (m_cpu_map.find(src) == m_cpu_map.end())
110 | 				throw std::runtime_error("Attempt to copy from unallocated memory");
111 | 
112 | 			std::copy(src, src + elements, dst);
113 | 		} else if (variant == eCUDA) {
114 | 			if constexpr (!ZHETAPI_CUDA)
115 | 				throw std::runtime_error("CUDA is not available");
116 | 
117 | #ifdef __CUDACC__
118 | 			if (m_cuda_map.find(dst) == m_cuda_map.end())
119 | 				throw std::runtime_error("Attempt to copy to unallocated memory");
120 | 
121 | 			if (m_cuda_map.find(src) == m_cuda_map.end())
122 | 				throw std::runtime_error("Attempt to copy from unallocated memory");
123 | 
124 | 			cudaMemcpy(dst, src, elements * sizeof(T), cudaMemcpyDeviceToDevice);
125 | #endif
126 | 		}
127 | 	}
128 | 
129 | 	static MemoryTracker &one() {
130 | 		static MemoryTracker singleton;
131 | 		return singleton;
132 | 	}
133 | public:
134 | 	static void report() {
135 | 		MemoryTracker &t = one();
136 | 
137 | 		// TODO: table
138 | 		double MB = 1024.0 * 1024.0;
139 | 		std::cout << "Memory allocation report:" << std::endl;
140 | 
141 | 		std::cout << "\tAllocations: " << t.m_cpu_allocs
142 | 			<< ", Frees: " << t.m_cpu_frees
143 | 			<< ", Net: " << t.m_cpu_allocs - t.m_cpu_frees << std::endl;
144 | 		std::cout << "\tIn use: " << t.m_cpu_inuse/MB << " MB" << std::endl;
145 | 
146 | 		if constexpr (ZHETAPI_CUDA) {
147 | 			std::cout << "\n\tCUDA Allocations: " << t.m_cuda_allocs
148 | 				<< ", CUDA Frees: " << t.m_cuda_frees
149 | 				<< ", Net: " << t.m_cuda_allocs - t.m_cuda_frees << std::endl;
150 | 			std::cout << "\tCUDA In use: " << t.m_cuda_inuse/MB << " MB" << std::endl;
151 | 		}
152 | 	}
153 | 
154 | 	template <class T>
155 | 	friend T *allocate(size_t, Variant);
156 | 
157 | 	template <class T>
158 | 	friend void deallocate(T *, Variant);
159 | 
160 | 	template <class T>
161 | 	friend void copy(const std::shared_ptr <T []> &,
162 | 			const std::shared_ptr <T []> &,
163 | 			size_t, Variant);
164 | };
165 | 
166 | template <class T>
167 | T *allocate(size_t n, Variant variant)
168 | {
169 | 	return MemoryTracker::one().alloc <T> (n, variant);
170 | }
171 | 
172 | template <class T>
173 | void deallocate(T *ptr, Variant variant)
174 | {
175 | 	MemoryTracker::one().deallocate(ptr, variant);
176 | }
177 | 
178 | template <class T>
179 | std::shared_ptr <T []> make_shared_array(size_t elements, Variant variant)
180 | {
181 | 	return std::shared_ptr <T []> (
182 | 		allocate <T> (elements, variant),
183 | 		[variant](T *ptr) {
184 | 			deallocate(ptr, variant);
185 | 		}
186 | 	);
187 | }
188 | 
189 | template <class T>
190 | void copy(const std::shared_ptr <T []> &dst,
191 | 		const std::shared_ptr <T []> &src,
192 | 		size_t elements, Variant variant)
193 | {
194 | 	MemoryTracker::one().copy(dst.get(), src.get(), elements, variant);
195 | }
196 | 
197 | }
198 | 
199 | }
200 | 
201 | #endif
202 | 


--------------------------------------------------------------------------------
/include/autograd/activation.hpp:
--------------------------------------------------------------------------------
  1 | #include "autograd.hpp"
  2 | #include "function.hpp"
  3 | #include "iseq.hpp"
  4 | 
  5 | namespace zhetapi {
  6 | 
  7 | namespace autograd {
  8 | 
  9 | namespace ml {
 10 | 
 11 | // RELU activation function
 12 | class _relu : public ISeq {
 13 | public:
 14 | 	struct kernel : public _function {
 15 | 		kernel() : _function(1) {}
 16 | 
 17 | 		Constant compute(const Input &ins) override {
 18 | 			return ins[0].copy().transform(
 19 | 				[](float x) {
 20 | 					return x > 0 ? x : 0;
 21 | 				}
 22 | 			);
 23 | 		}
 24 | 
 25 | 		Gradient gradient(const Input &ins, const Input &igrads) override {
 26 | 			Constant out = Constant(igrads[0].shape(),
 27 | 				[&](size_t i) {
 28 | 					float x = ins[0].get(i);
 29 | 					return (x > 0 ? 1 : 0) * igrads[0].get(i);
 30 | 				}
 31 | 			);
 32 | 
 33 | 			return Gradient {
 34 | 				.igrads = {out}
 35 | 			};
 36 | 		}
 37 | 
 38 | 		std::string summary() const override {
 39 | 			return "RELU";
 40 | 		}
 41 | 	};
 42 | 
 43 | 	_relu() : ISeq(new_ftn_ <kernel> (), 1) {}
 44 | };
 45 | 
 46 | inline Function relu(const Function &function)
 47 | {
 48 | 	return (new_ <_relu> ())(function);
 49 | }
 50 | 
 51 | // Leaky RELU activation function
 52 | class _leaky_relu : public ISeq {
 53 | public:
 54 | 	struct kernel : public _function {
 55 | 		float _alpha;
 56 | 
 57 | 		kernel(float alpha) : _function(1), _alpha(alpha) {}
 58 | 
 59 | 		Constant compute(const Input &ins) override {
 60 | 			return ins[0].copy().transform(
 61 | 				[this](float x) {
 62 | 					return x > 0 ? x : _alpha * x;
 63 | 				}
 64 | 			);
 65 | 		}
 66 | 
 67 | 		Gradient gradient(const Input &ins, const Input &igrads) override {
 68 | 			Constant out = Constant(igrads[0].shape(),
 69 | 				[&](size_t i) {
 70 | 					float x = ins[0].get(i);
 71 | 					return (x > 0 ? 1 : _alpha) * igrads[0].get(i);
 72 | 				}
 73 | 			);
 74 | 
 75 | 			return Gradient {
 76 | 				.igrads = {out}
 77 | 			};
 78 | 		}
 79 | 
 80 | 		std::string summary() const override {
 81 | 			return "LEAKY RELU";
 82 | 		}
 83 | 	};
 84 | 
 85 | 	_leaky_relu(float alpha) : ISeq(new_ftn_ <kernel> (alpha), 1) {}
 86 | };
 87 | 
 88 | inline Function leaky_relu(float alpha)
 89 | {
 90 | 	return new_ <_leaky_relu> (alpha);
 91 | }
 92 | 
 93 | // Sigmoid activation function
 94 | class _sigmoid : public ISeq {
 95 | public:
 96 | 	struct kernel : public _function {
 97 | 		kernel() : _function(1) {}
 98 | 
 99 | 		Constant compute(const Input &ins) override {
100 | 			return ins[0].copy().transform(
101 | 				[](float x) {
102 | 					return 1 / (1 + std::exp(-x));
103 | 				}
104 | 			);
105 | 		}
106 | 
107 | 		Gradient gradient(const Input &ins, const Input &igrads) override {
108 | 			Constant out = Constant(igrads[0].shape(),
109 | 				[&](size_t i) {
110 | 					float x = ins[0].get(i);
111 | 					float y = 1 / (1 + std::exp(-x));
112 | 					return y * (1 - y) * igrads[0].get(i);
113 | 				}
114 | 			);
115 | 
116 | 			return Gradient {
117 | 				.igrads = {out}
118 | 			};
119 | 		}
120 | 
121 | 		std::string summary() const override {
122 | 			return "SIGMOID";
123 | 		}
124 | 	};
125 | 
126 | 	_sigmoid() : ISeq(new_ftn_ <kernel> (), 1) {}
127 | };
128 | 
129 | inline Function sigmoid(const Function &function)
130 | {
131 | 	return (new_ <_sigmoid> ())(function);
132 | }
133 | 
134 | // Tanh activation function
135 | class _tanh : public ISeq {
136 | public:
137 | 	struct kernel : public _function {
138 | 		kernel() : _function(1) {}
139 | 
140 | 		Constant compute(const Input &ins) override {
141 | 			return ins[0].copy().transform(
142 | 				[](float x) {
143 | 					return std::tanh(x);
144 | 				}
145 | 			);
146 | 		}
147 | 
148 | 		Gradient gradient(const Input &ins, const Input &igrads) override {
149 | 			Constant out = Constant(igrads[0].shape(),
150 | 				[&](size_t i) {
151 | 					float x = ins[0].get(i);
152 | 					float y = std::tanh(x);
153 | 					return (1 - y * y) * igrads[0].get(i);
154 | 				}
155 | 			);
156 | 
157 | 			return Gradient {
158 | 				.igrads = {out}
159 | 			};
160 | 		}
161 | 
162 | 		std::string summary() const override {
163 | 			return "TANH";
164 | 		}
165 | 	};
166 | 
167 | 	_tanh() : ISeq(new_ftn_ <kernel> (), 1) {}
168 | };
169 | 
170 | inline Function tanh(const Function &function)
171 | {
172 | 	return (new_ <_tanh> ())(function);
173 | }
174 | 
175 | // Softmax activation function
176 | class _softmax : public ISeq {
177 | public:
178 | 	struct kernel : public _function {
179 | 		kernel() : _function(1) {}
180 | 
181 | 		Constant compute(const Input &ins) override {
182 | 			auto o = ins[0].copy();
183 | 
184 | 			float omax = max(o);
185 | 			o -= omax;
186 | 
187 | 			float osum = sum(o, expf);
188 | 			return o.transform(
189 | 				[osum](float x) {
190 | 					return std::exp(x)/osum;
191 | 				}
192 | 			);
193 | 		}
194 | 
195 | 		Gradient gradient(const Input &ins, const Input &igrads) override {
196 | 			auto o = ins[0].copy();
197 | 
198 | 			auto omax = max(o);
199 | 			o -= omax;
200 | 
201 | 			auto osum = sum(o, expf);
202 | 
203 | 			Constant out({o.size()},
204 | 				[&](size_t i) {
205 | 					float x = o.get(i);
206 | 					float e = std::exp(x);
207 | 					float t = e * (osum - e) / (osum * osum);
208 | 					return t * igrads[0].get(i);
209 | 				}
210 | 			);
211 | 
212 | 			return Gradient {
213 | 				.igrads = {out}
214 | 			};
215 | 		}
216 | 
217 | 		std::string summary() const override {
218 | 			return "SOFTMAX";
219 | 		}
220 | 	};
221 | 
222 | 	_softmax() : ISeq(new_ftn_ <kernel> (), 1) {}
223 | };
224 | 
225 | inline Function softmax(const Function &function)
226 | {
227 | 	return (new_ <_softmax> ())(function);
228 | }
229 | 
230 | }
231 | 
232 | }
233 | 
234 | }
235 | 


--------------------------------------------------------------------------------
/include/autograd/cpu_kernels.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef ZHETAPI_AUTOGRAD_CPU_KERNELS_H_
 2 | #define ZHETAPI_AUTOGRAD_CPU_KERNELS_H_
 3 | 
 4 | // Standard headers
 5 | #include <cstdlib>
 6 | 
 7 | namespace zhetapi {
 8 | 
 9 | namespace detail {
10 | 
11 | namespace autograd {
12 | 
13 | // TODO: put in source file
14 | inline void fma_matrix_vector(float *out, const float *matrix, const float *bias, const float *input, size_t rows, size_t cols)
15 | {
16 | #pragma omp parallel for
17 | 	for (long int i = 0; i < rows; i++) {
18 | 		float sum = 0;
19 | 
20 | 		for (long int j = 0; j < cols; j++)
21 | 			sum += matrix[i * cols + j] * input[j];
22 | 
23 | 		out[i] = sum + bias[i];
24 | 	}
25 | }
26 | 
27 | inline void mul_vector_vector_transpose(float *out, const float *a, const float *b, size_t na, size_t nb)
28 | {
29 | #pragma omp parallel for
30 | 	for (long int i = 0; i < na; i++) {
31 | 		for (long int j = 0; j < nb; j++)
32 | 			out[i * nb + j] = a[i] * b[j];
33 | 	}
34 | }
35 | 
36 | inline void mul_matrix_transpose_vector(float *out, const float *matrix, const float *vector, size_t na, size_t nb)
37 | {
38 | #pragma omp parallel for
39 | 	for (long int i = 0; i < na; i++) {
40 | 		float sum = 0;
41 | 		
42 | 		for (long int j = 0; j < nb; j++)
43 | 			sum += matrix[i + j * na] * vector[j];
44 | 
45 | 		out[i] = sum;
46 | 	}
47 | }
48 | 
49 | }
50 | 
51 | }
52 | 
53 | }
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/include/autograd/gradient_queue.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef ZHETAPI_AUTOGRAD_GQ_H_
  2 | #define ZHETAPI_AUTOGRAD_GQ_H_
  3 | 
  4 | // Standard headers
  5 | #include <deque>
  6 | 
  7 | // Library headers
  8 | #include "../tensor.hpp"
  9 | 
 10 | namespace zhetapi {
 11 | 
 12 | namespace autograd {
 13 | 
 14 | // Constants are just tensors
 15 | using Constant = Tensor <float>;
 16 | 
 17 | // GradientQueue class is a deque with extra operations
 18 | class GradientQueue : public std::deque <Constant> {
 19 | public:
 20 | 	// Constructors
 21 | 	GradientQueue() = default;
 22 | 
 23 | 	// Initializer list
 24 | 	GradientQueue(std::initializer_list <Constant> l)
 25 | 		: std::deque <Constant> (l) {}
 26 | 
 27 | 	// Arithematic operations
 28 | 	GradientQueue &operator+=(const GradientQueue &rhs) {
 29 | 		assert(size() == rhs.size());
 30 | 		for (size_t i = 0; i < size(); i++)
 31 | 			at(i) += rhs[i];
 32 | 		return *this;
 33 | 	}
 34 | 
 35 | 	GradientQueue &operator-=(const GradientQueue &rhs) {
 36 | 		assert(size() == rhs.size());
 37 | 		for (size_t i = 0; i < size(); i++)
 38 | 			at(i) -= rhs[i];
 39 | 		return *this;
 40 | 	}
 41 | 
 42 | 	GradientQueue &operator*=(const GradientQueue &rhs) {
 43 | 		assert(size() == rhs.size());
 44 | 		for (size_t i = 0; i < size(); i++)
 45 | 			at(i) *= rhs[i];
 46 | 		return *this;
 47 | 	}
 48 | 
 49 | 	GradientQueue &operator/=(const GradientQueue &rhs) {
 50 | 		assert(size() == rhs.size());
 51 | 		for (size_t i = 0; i < size(); i++)
 52 | 			at(i) /= rhs[i];
 53 | 		return *this;
 54 | 	}
 55 | 
 56 | 	// Single constant operations
 57 | 	GradientQueue &operator+=(const Constant &rhs) {
 58 | 		for (auto &x : *this)
 59 | 			x += rhs;
 60 | 		return *this;
 61 | 	}
 62 | 
 63 | 	GradientQueue &operator-=(const Constant &rhs) {
 64 | 		for (auto &x : *this)
 65 | 			x -= rhs;
 66 | 		return *this;
 67 | 	}
 68 | 
 69 | 	GradientQueue &operator*=(const Constant &rhs) {
 70 | 		for (auto &x : *this)
 71 | 			x *= rhs;
 72 | 		return *this;
 73 | 	}
 74 | 
 75 | 	GradientQueue &operator/=(const Constant &rhs) {
 76 | 		for (auto &x : *this)
 77 | 			x /= rhs;
 78 | 		return *this;
 79 | 	}
 80 | };
 81 | 
 82 | // More operators
 83 | // TODO: source file
 84 | inline GradientQueue operator*(const GradientQueue &lhs, const float &rhs)
 85 | {
 86 | 	GradientQueue gq = lhs;
 87 | 	gq *= rhs;
 88 | 	return gq;
 89 | }
 90 | 
 91 | inline GradientQueue operator*(const float &lhs, const GradientQueue &rhs)
 92 | {
 93 | 	GradientQueue gq = rhs;
 94 | 	gq *= lhs;
 95 | 	return gq;
 96 | }
 97 | 
 98 | inline GradientQueue operator/(const GradientQueue &lhs, const float &rhs)
 99 | {
100 | 	GradientQueue gq = lhs;
101 | 	gq /= rhs;
102 | 	return gq;
103 | }
104 | 
105 | inline GradientQueue operator/(const float &lhs, const GradientQueue &rhs)
106 | {
107 | 	GradientQueue gq = rhs;
108 | 	gq /= lhs;
109 | 	return gq;
110 | }
111 | 
112 | }
113 | 
114 | }
115 | 
116 | #endif
117 | 


--------------------------------------------------------------------------------
/include/autograd/iseq.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef ZHETAPI_ISEQ_H_
  2 | #define ZHETAPI_ISEQ_H_
  3 | 
  4 | // Standard headers
  5 | #include <stack>
  6 | #include <unordered_map>
  7 | #include <vector>
  8 | 
  9 | // Library jeaders
 10 | #include "function.hpp"
 11 | 
 12 | #include "../io/print.hpp"
 13 | 
 14 | namespace zhetapi {
 15 | 
 16 | namespace autograd {
 17 | 
 18 | struct _node;
 19 | 
 20 | // Cache structure for automatic differentiation
 21 | using Cache = std::unordered_map <_function *, _function::Input>;
 22 | using Node = std::shared_ptr <_node>;
 23 | 
 24 | // Tree structure
 25 | struct _node {
 26 | 	_function::Ptr fptr;
 27 | 
 28 | 	// TODO: should be a vector of plain nodes
 29 | 	std::vector <Node> children;
 30 | 
 31 | 	// Constructors
 32 | 	_node(const _function::Ptr &);
 33 | 	_node(const _function::Ptr &, const std::vector <Node> &);
 34 | 
 35 | 	static Node make(const _function::Ptr &);
 36 | 	static Node make(const _function::Ptr &, const std::vector <Node> &);
 37 | 	
 38 | 	// Printing the tree
 39 | 	std::string str(int = 0) const;
 40 | };
 41 | 
 42 | // Instruction sequence for a function
 43 | class ISeq : public _function {
 44 | public:
 45 | 	// Public aliases
 46 | 	using Instructions = std::vector <_function::Ptr>;
 47 | 	using ConstantCache = std::vector <Constant>;
 48 | private:
 49 | 	// Private aliases
 50 | 	using Var = std::shared_ptr <_variable>;
 51 | 	using Variables = std::vector <Var>;
 52 | 
 53 | 	// Information about cache usage, for optimization
 54 | 	struct _cache_info {
 55 | 		int refs = 0;
 56 | 		Node value;
 57 | 
 58 | 		// Constructor
 59 | 		_cache_info();
 60 | 		_cache_info(int, const Node &);
 61 | 	};
 62 | 
 63 | 	using _cache_map = std::unordered_map <int, _cache_info>;
 64 | 
 65 | 	// Reindexing map
 66 | 	using _reindex_map = std::unordered_map <int, int>;
 67 | 
 68 | 	// TODO: JIT function to compile into object code
 69 | 	//	this should be possible since the types
 70 | 	//	are homogenous
 71 | 
 72 | 	// Member variables
 73 | 	Instructions		_instrs;	// Instruction sequence
 74 | 	Variables		_vars;		// Variables
 75 | 	ConstantCache		_consts;	// Fixed constants
 76 | 	mutable ConstantCache	_cache;		// More cache for flow of execution
 77 | 	
 78 | 	// TODO: should really be finput = nullptr
 79 | 	// TODO: actually cached input is currently obsolete
 80 | 	Input			_cached_in;
 81 | 	Cache			_cached_finputs;
 82 | 
 83 | 	// TODO: cache tree?
 84 | 
 85 | 	// Append helpers
 86 | 	void append_variable(const _variable *);
 87 | 	void append_iseq(const ISeq *const);
 88 | 	int index_of(const _variable *) const;
 89 | 	void _append_function(const Ptr &);
 90 | 
 91 | 	template <class ... Args>
 92 | 	void _append(const _function::Ptr &, Args ...);
 93 | 
 94 | 	// Computation helpers
 95 | 	void _load(const Input &);
 96 | 	void storec(std::stack <Constant> &, int) const;
 97 | 	bool _ispec(const Ptr &, std::stack <Constant> &);
 98 | 	void _exec(const Ptr &, std::stack <Constant> &);
 99 | 
100 | 	// Composing functions and variables
101 | 	_function::Ptr _compose(const Compositions &) const override;
102 | 
103 | 	// Tree building and rebuilding
104 | 	Node _tree(_cache_map &) const;
105 | 	void _tree_walk(const Ptr &, std::stack <Node> &,
106 | 		_cache_map &) const;
107 | 	void _rebuild(const Node &, Instructions &,
108 | 		ConstantCache &, _cache_map &,
109 | 		const ConstantCache &) const;
110 | 
111 | 	// Optimization functions
112 | 	void _optimize();
113 | 	_reindex_map _generate_reindex_map() const;
114 | 
115 | 	// TODO: remove const
116 | 	// Differentiation functions
117 | 	friend Node _diff_tree(const Node &, int);
118 | 
119 | 	_function::Ptr diff(const int) const override;
120 | protected:
121 | 	// Protected constructors
122 | 	ISeq(const _function::Ptr &, int);
123 | 	ISeq(const std::vector <_function::Ptr> &,
124 | 		std::vector <Constant>, int);
125 | 	ISeq(const std::vector <_function::Ptr> &,
126 | 		std::vector <Constant>, int,
127 | 		const _reindex_map &);
128 | 
129 | 	std::pair <_function *, const MethodTable &> method_table() override;
130 | public:
131 | 	// TODO: check function to make sure only
132 | 	// one element remains on the stack
133 | 
134 | 	// Empty constructor
135 | 	ISeq();
136 | 
137 | 	// Get a variable
138 | 	// TODO: protected?
139 | 	const Var &get(int) const;
140 | 
141 | 	// Append a sequence of instructions
142 | 	template <class ... Args>
143 | 	void append(Args ...);
144 | 
145 | 	// Evaluate the sequence
146 | 	Constant compute(const Input &) override;
147 | 
148 | 	// Evaluate gradient
149 | 	Gradient gradient(const Input &, const Input &) override;
150 | 
151 | 	// Apply gradients
152 | 	void update_parameters(GradientQueue &) override;
153 | 
154 | 	// Permute the order of variables
155 | 	void refactor(const std::vector <const _variable *> &);
156 | 	_function::Ptr refactor(const std::vector <const _variable *> &) const;
157 | 
158 | 	// Info about parameters
159 | 	int parameters() const override;
160 | 	int tunable_parameters() const override;
161 | 
162 | 	// Dump instructions for debugging
163 | 	std::string summary() const override;
164 | };
165 | 
166 | // Append a sequence of instructions
167 | template <class ... Args>
168 | void ISeq::_append(const _function::Ptr &fptr, Args ... args)
169 | {
170 | 	_append_function(fptr);
171 | 	if constexpr (sizeof ... (args) > 0)
172 | 		_append(args ...);
173 | }
174 | 
175 | template <class ... Args>
176 | void ISeq::append(Args ... args)
177 | {
178 | 	// Append all, then optimize
179 | 	_append(args...);
180 | 	_optimize();
181 | }
182 | 
183 | 
184 | }
185 | 
186 | }
187 | 
188 | #endif
189 | 


--------------------------------------------------------------------------------
/include/autograd/ml.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef ZHETAPI_AUTOGRAD_ML_H_
  2 | #define ZHETAPI_AUTOGRAD_ML_H_
  3 | 
  4 | // Standard headers
  5 | #include <iomanip>
  6 | #include <random>
  7 | 
  8 | // Library headers
  9 | #include "../matrix.hpp"
 10 | #include "../vector.hpp"
 11 | #include "../std/interval.hpp"
 12 | #include "autograd.hpp"
 13 | #include "cpu_kernels.hpp"
 14 | 
 15 | namespace zhetapi {
 16 | 
 17 | namespace autograd {
 18 | 
 19 | namespace ml {
 20 | 
 21 | class _kdense : public _function {
 22 | 	// Input and output shape
 23 | 	size_t				m_isize;
 24 | 	size_t				m_osize;
 25 | 	std::string			m_init;
 26 | 	float				m_dropout;
 27 | 
 28 | 	// Weight matrix
 29 | 	Matrix <float>			m_weights;
 30 | 
 31 | 	// Bias
 32 | 	Vector <float>			m_biases;
 33 | 
 34 | 	// Cached resources
 35 | 	Vector <float>			m_output;
 36 | 
 37 | 	// Static random number generator
 38 | 	static utility::Interval <1>	rng;
 39 | public:
 40 | 	_kdense(size_t isize, size_t osize, const std::string &initializer = "xavier")
 41 | 			: _function(1), m_isize(isize), m_osize(osize), m_output(osize)
 42 | 	{
 43 | 		// Lower case initializer
 44 | 		for (auto &c : initializer)
 45 | 			m_init += std::tolower(c);
 46 | 
 47 | 		// Initializer
 48 | 		std::function <float (size_t)> lambda = [](size_t) { return rng(); };
 49 | 
 50 | 		std::random_device rd;
 51 | 		std::mt19937 gen(rd());
 52 | 
 53 | 		std::normal_distribution <float> dist;
 54 | 
 55 | 		int normal = 0;
 56 | 		if (m_init == "lecun") {
 57 | 			dist = std::normal_distribution <float> (0, 1.0 / std::sqrt(isize));
 58 | 			normal++;
 59 | 		} else if (m_init == "he") {
 60 | 			dist = std::normal_distribution <float> (0, 2.0/std::sqrt(isize));
 61 | 			normal++;
 62 | 		} else if (m_init == "xavier") {
 63 | 			float avg = (isize + osize) / 2.0f;
 64 | 			dist = std::normal_distribution <float> (0, 1.0/std::sqrt(avg));
 65 | 			normal++;
 66 | 		}
 67 | 
 68 | 		if (normal)
 69 | 			lambda = [&](size_t i) { return dist(gen); };
 70 | 		else if (m_init == "debug")
 71 | 			lambda = [&](size_t i) { return 1.0f; };
 72 | 		else
 73 | 			lambda = [&](size_t i) { return 0.0f; };
 74 | 
 75 | 		m_weights = Matrix <float> (m_osize, m_isize, lambda);
 76 | 		m_biases = Vector <float> (m_osize, lambda);
 77 | 	}
 78 | 
 79 | 	// Forward pass
 80 | 	Constant compute(const Input &ins) override {
 81 | 		// NOTE: Single input only
 82 | 		// TODO: check if batching...
 83 | 		// Convert first argument into a matrix
 84 | 		detail::autograd::fma_matrix_vector(
 85 | 			m_output.data(), m_weights.data(),
 86 | 			m_biases.data(), ins[0].data(),
 87 | 			m_osize, m_isize
 88 | 		);
 89 | 
 90 | 		return m_output;
 91 | 	}
 92 | 
 93 | 	// Machine learning functions
 94 | 	virtual Gradient gradient(const Input &ins, const Input &igrads) override {
 95 | 		// igrad is the gradient of the output of the
 96 | 		// function wrt to the desired function
 97 | 		Vector <float>			igrad(m_isize);
 98 | 		Matrix <float>			wgrad(m_osize, m_isize);
 99 | 		Vector <float>			bgrad(m_osize);
100 | 
101 | 		detail::autograd::mul_vector_vector_transpose(
102 | 			wgrad.data(), igrads[0].data(), ins[0].data(),
103 | 			m_osize, m_isize
104 | 		);
105 | 
106 | 		// TODO: Copy and computation in parallel?
107 | 		detail::autograd::mul_matrix_transpose_vector(
108 | 			igrad.data(), m_weights.data(), igrads[0].data(),
109 | 			m_isize, m_osize
110 | 		);
111 | 
112 | 		bgrad.copy(igrads[0]);
113 | 
114 | 		// TODO: avoid the need to copy... reduce required allocations
115 | 		// Debug copy issues when using persistent gradient storage...
116 | 		Gradient gradient;
117 | 		gradient.igrads = { igrad };
118 | 		gradient.grads = { wgrad, bgrad };
119 | 		return gradient;
120 | 	}
121 | 
122 | 	// Apply gradient
123 | 	virtual void update_parameters(GradientQueue &grads) override {
124 | 		// Convert first argument into a matrix
125 | 		Vector <float> bgrad(grads.back());
126 | 		grads.pop_back();
127 | 
128 | 		Matrix <float> wgrad(grads.back(), m_osize, m_isize);
129 | 		grads.pop_back();
130 | 
131 | 		m_weights += wgrad;
132 | 		m_biases += bgrad;
133 | 	}
134 | 
135 | 	// Info about parameters
136 | 	virtual int parameters() const override {
137 | 		return 2;
138 | 	}
139 | 
140 | 	virtual int tunable_parameters() const override {
141 | 		return m_weights.size() + m_biases.size();
142 | 	}
143 | 
144 | 	// Method table
145 | 	std::pair <_function *, const MethodTable &> method_table() override {
146 | 		static const MethodTable _map {
147 | 			{"dropout", [](_function  *f, const Arguments &args) {
148 | 				_kdense *kf = dynamic_cast <_kdense *> (f);
149 | 
150 | 				assert(kf);
151 | 				if (args.size() > 0)
152 | 					kf->m_dropout = std::get <float> (args[0]);
153 | 
154 | 				return kf->m_dropout;
155 | 			}}
156 | 		};
157 | 
158 | 		return {this, _map};
159 | 	}
160 | 
161 | 	// Summary of the function
162 | 	std::string summary() const override {
163 | 		std::ostringstream oss;
164 | 		oss << "DENSE(" << m_isize << " x " << m_osize;
165 | 		if (m_dropout > 0)
166 | 			oss << ", dropout = " << std::setprecision(2) << m_dropout;
167 | 		oss << ", " << m_init << ")";
168 | 		return oss.str();
169 | 	}
170 | };
171 | 
172 | class _dense : public ISeq {
173 | public:
174 | 	_dense(size_t isize, size_t osize, const std::string &initializer = "xavier")
175 | 		: ISeq(new_ftn_ <_kdense> (isize, osize, initializer), 1) {}
176 | };
177 | 
178 | // Dense layer factory
179 | inline Function dense(size_t isize, size_t osize, const std::string &initializer = "xavier")
180 | {
181 | 	return Function(new_ftn_ <_dense> (isize, osize, initializer));
182 | }
183 | 
184 | }
185 | 
186 | }
187 | 
188 | }
189 | 
190 | #endif
191 | 


--------------------------------------------------------------------------------
/include/autograd/optimizer.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef ZHETAPI_AUTOGRAD_OPTIMIZER_H_
  2 | #define ZHETAPI_AUTOGRAD_OPTIMIZER_H_
  3 | 
  4 | #include "function.hpp"
  5 | 
  6 | namespace zhetapi {
  7 | 
  8 | namespace autograd {
  9 | 
 10 | namespace ml {
 11 | 
 12 | // Interface for optimizers
 13 | class _optimizer {
 14 | protected:
 15 | 	size_t _parameters = 0;
 16 | public:
 17 | 	float alpha;
 18 | 
 19 | 	_optimizer(size_t parameters, float alpha_ = 0.001)
 20 | 		: _parameters(parameters), alpha(alpha_) {}
 21 | 
 22 | 	// Computation
 23 | 	virtual void optimize(GradientQueue &) = 0;
 24 | 
 25 | 	void operator()(GradientQueue &gq) {
 26 | 		optimize(gq);
 27 | 	}
 28 | };
 29 | 
 30 | // Stochastic gradient descent
 31 | struct SGD : public _optimizer {
 32 | 	SGD(size_t parameters, float alpha)
 33 | 		: _optimizer(parameters, alpha) {}
 34 | 
 35 | 	// Optimize
 36 | 	void optimize(GradientQueue &gq) override {
 37 | 		for (auto &g : gq)
 38 | 			g *= -alpha;
 39 | 	}
 40 | };
 41 | 
 42 | // Momentum
 43 | class Momentum : public _optimizer {
 44 | 	GradientQueue _v;
 45 | public:
 46 | 	float mu;
 47 | 
 48 | 	Momentum(size_t parameters, float alpha, float mu_ = 0.9)
 49 | 		: _optimizer(parameters, alpha), mu(mu_) {
 50 | 		_v.resize(parameters);
 51 | 	}
 52 | 
 53 | 	// Optimize
 54 | 	void optimize(GradientQueue &gq) override {
 55 | 		// TODO: assert that gq.size() == _v.size()
 56 | 		for (size_t i = 0; i < _parameters; i++) {
 57 | 			if (_v[i].shape() != gq[i].shape())
 58 | 				_v[i] = Constant(gq[i].shape(), 0);
 59 | 
 60 | 			_v[i] = mu * _v[i] - alpha * gq[i];
 61 | 			gq[i] = _v[i];
 62 | 		}
 63 | 	}
 64 | };
 65 | 
 66 | // RMSprop
 67 | class RMSprop : public _optimizer {
 68 | 	GradientQueue _v;
 69 | public:
 70 | 	float beta;
 71 | 
 72 | 	RMSprop(size_t parameters, float alpha, float beta_ = 0.9)
 73 | 		: _optimizer(parameters, alpha), beta(beta_) {
 74 | 		_v.resize(parameters);
 75 | 	}
 76 | 
 77 | 	// Optimize
 78 | 	void optimize(GradientQueue &gq) override {
 79 | 		// TODO: assert that gq.size() == _v.size()
 80 | 		for (size_t i = 0; i < _parameters; i++) {
 81 | 			if (_v[i].shape() != gq[i].shape())
 82 | 				_v[i] = Constant(gq[i].shape(), 0);
 83 | 
 84 | 			_v[i] = beta * _v[i] + (1 - beta) * gq[i] * gq[i];
 85 | 			gq[i] = -alpha * gq[i] / _v[i].transform(
 86 | 				[](float x) {
 87 | 					return std::sqrt(x) + 1e-10;
 88 | 				}
 89 | 			);
 90 | 		}
 91 | 	}
 92 | };
 93 | 
 94 | // Adam
 95 | class Adam : public _optimizer {
 96 | 	GradientQueue _v, _m;
 97 | 	size_t _iter = 1;
 98 | public:
 99 | 	float beta1, beta2;
100 | 
101 | 	Adam(size_t parameters, float alpha, float beta1_ = 0.9, float beta2_ = 0.999)
102 | 		: _optimizer(parameters, alpha), beta1(beta1_), beta2(beta2_) {
103 | 		_v.resize(parameters);
104 | 		_m.resize(parameters);
105 | 	}
106 | 
107 | 	// Reset iteration
108 | 	void reset() {
109 | 		_iter = 1;
110 | 	}
111 | 
112 | 	// Optimize
113 | 	void optimize(GradientQueue &gq) override {
114 | 		// TODO: assert that gq.size() == _v.size()
115 | 		for (size_t i = 0; i < _parameters; i++) {
116 | 			if (_v[i].shape() != gq[i].shape())
117 | 				_v[i] = Constant(gq[i].shape(), 0);
118 | 			if (_m[i].shape() != gq[i].shape())
119 | 				_m[i] = Constant(gq[i].shape(), 0);
120 | 
121 | 			_v[i] = beta1 * _v[i] - (1 - beta1) * gq[i];
122 | 			_m[i] = beta2 * _m[i] + (1 - beta2) * gq[i] * gq[i];
123 | 
124 | 			auto _vh = _v[i]/float(1 - std::pow(beta1, _iter));
125 | 			auto _mh = _m[i]/float(1 - std::pow(beta2, _iter));
126 | 
127 | 			gq[i] = alpha * _vh / _mh.transform(
128 | 				[](float x) {
129 | 					return std::sqrt(x) + 1e-10;
130 | 				}
131 | 			);
132 | 		}
133 | 
134 | 		_iter++;
135 | 	}
136 | };
137 | 
138 | }
139 | 
140 | }
141 | 
142 | }
143 | 
144 | #endif
145 | 


--------------------------------------------------------------------------------
/include/autograd/train.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef ZHETAPI_AUTOGRAD_TRAIN_H_
  2 | #define ZHETAPI_AUTOGRAD_TRAIN_H_
  3 | 
  4 | // Standard headers
  5 | #include <vector>
  6 | 
  7 | // Extra headers
  8 | #include <indicators/progress_bar.hpp>
  9 | 
 10 | // Library headers
 11 | #include "function.hpp"
 12 | #include "optimizer.hpp"
 13 | #include "autograd.hpp"
 14 | 
 15 | #include <iostream>
 16 | 
 17 | namespace zhetapi {
 18 | 
 19 | namespace autograd {
 20 | 
 21 | namespace ml {
 22 | 
 23 | // Useful aliases
 24 | using Data = std::vector <_function::Input>;
 25 | using Validator = std::function <bool (const Constant &, const Constant &)>;
 26 | 
 27 | // Check accuracy of model wrt data set, returns proportion
 28 | // TODO: source file
 29 | inline float accuracy(_function::Ptr &model,
 30 | 		const Data &X, const std::vector <Constant> &Y,
 31 | 		const Validator &validator)
 32 | {
 33 | 	int count = 0;
 34 | 	std::vector <int> indices;
 35 | 	for (int i = 0; i < X.size(); i++) {
 36 | 		Constant output = model->compute(X[i]);
 37 | 		if (validator(output, Y[i])) {
 38 | 			indices.push_back(i);
 39 | 			count++;
 40 | 		}
 41 | 	}
 42 | 
 43 | 	return float(count) / X.size();
 44 | }
 45 | 
 46 | // Progress reporters
 47 | struct _reporter {
 48 | 	struct Info {
 49 | 		size_t		epoch;
 50 | 		size_t		batch;
 51 | 		size_t		total_epochs;
 52 | 		float		avg_error;
 53 | 		float		time;
 54 | 		_function::Ptr	&model;
 55 | 	};
 56 | 
 57 | 	virtual void report(const Info &) const {}
 58 | };
 59 | 
 60 | // Progress bar
 61 | class ProgressBar : public _reporter {
 62 | 	mutable indicators::ProgressBar bar;
 63 | public:
 64 | 	ProgressBar() : bar {
 65 | 		indicators::option::BarWidth {50},
 66 | 		indicators::option::Start {" ["},
 67 | 		indicators::option::Fill {"█"},
 68 | 		indicators::option::Lead {"█"},
 69 | 		indicators::option::Remainder {"-"},
 70 | 		indicators::option::End {"]"},
 71 | 		indicators::option::PrefixText {"Training model"},
 72 | 		indicators::option::ShowElapsedTime {true},
 73 | 		indicators::option::ShowRemainingTime {true}
 74 | 	} {}
 75 | 
 76 | 	void report(const Info &info) const override {
 77 | 		float progress = 100.0f * (float) info.epoch / info.total_epochs;
 78 | 		bar.set_progress(progress);
 79 | 	}
 80 | };
 81 | 
 82 | // Validate (requires data set)
 83 | class Validate : public _reporter {
 84 | 	const Data &X;
 85 | 	const std::vector <Constant> &Y;
 86 | 	Validator validator;
 87 | public:
 88 | 	Validate(const Data &X_, const std::vector <Constant> &Y_, const Validator &validator_)
 89 | 		: X {X_}, Y {Y_}, validator {validator_} {}
 90 | 
 91 | 	void report(const Info &info) const override {
 92 | 		float accuracy = ml::accuracy(info.model, X, Y, validator);
 93 | 		std::cout << "Accuracy: " << accuracy
 94 | 			<< ", Time: " << std::setprecision(2) << info.time << "s"
 95 | 			<< ", Average error = " << info.avg_error << std::endl;
 96 | 
 97 | 		// TODO: make this optional
 98 | 		detail::MemoryTracker::report();
 99 | 	}
100 | };
101 | 
102 | // TODO: source file
103 | // TODO: some function/interface to report progress
104 | // TODO: default optimizer and loss function
105 | 
106 | // Information relevant to training
107 | struct TrainingSuite {
108 | 	_function::Ptr &loss;
109 | 	_function::Ptr &dloss;
110 | 	size_t iterations;
111 | 	size_t batch_size;
112 | 	std::shared_ptr <_reporter> reporter = std::make_shared <ProgressBar> ();
113 | };
114 | 
115 | inline void fit(_function::Ptr &f, const Data &X, const std::vector <Constant> &Y,
116 | 		_optimizer &optimizer, const TrainingSuite &suite)
117 | {
118 | 	// Setup timer
119 | 	std::chrono::steady_clock::time_point start;
120 | 
121 | 	// TODO: assert that X.size() == Y.size()
122 | 	for (size_t i = 0; i < suite.iterations; i++) {
123 | 		start = std::chrono::steady_clock::now();
124 | 
125 | 		// TODO: implement verbose
126 | 		// TODO: batching
127 | 		GradientQueue gq;
128 | 		int elements = 0;
129 | 
130 | 		float serror = 0;
131 | 		for (size_t j = 0; j < X.size(); j++) {
132 | 			Constant y = f->compute(X[j]).flat();
133 | 			Constant igrad = suite.dloss->compute({y, Y[j]});
134 | 			_function::Gradient grads = f->gradient(X[j], {igrad});
135 | 			serror += suite.loss->compute({y, Y[j]}).length();
136 | 
137 | 			elements++;
138 | 			if (gq.empty())
139 | 				gq = grads.grads;
140 | 			else
141 | 				gq += grads.grads;
142 | 
143 | 			if (elements >= suite.batch_size) {
144 | 				gq /= float(elements);
145 | 				optimizer.optimize(gq);
146 | 				f->update_parameters(gq);
147 | 
148 | 				gq.clear();
149 | 				elements = 0;
150 | 			}
151 | 		}
152 | 
153 | 		// Report progress
154 | 		float time = std::chrono::duration_cast <std::chrono::milliseconds> (
155 | 			std::chrono::steady_clock::now() - start
156 | 		).count() / 1000.0f;
157 | 
158 | 		suite.reporter->report({i, 0, suite.iterations, serror/X.size(), time, f});
159 | 	}
160 | }
161 | 
162 | }
163 | 
164 | }
165 | 
166 | }
167 | 
168 | #endif
169 | 


--------------------------------------------------------------------------------
/include/cast.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef CAST_H_
  2 | #define CAST_H_
  3 | 
  4 | // Engine headers
  5 | #include "token.hpp"
  6 | 
  7 | /**
  8 |  * @file cast.hpp
  9 |  * @brief Contains functions to help casting pointers to Tokens using
 10 |  * `dynamic_cast`. Helpful for dealing with different overloads in Registrables.
 11 |  * For example, suppose we have the following Registrable that is supposed to
 12 |  * take up to three integers.
 13 |  *
 14 |  * \code{.cpp}
 15 |  *
 16 |  * ZHETAPI_REGISTRABLE(my_registrable)
 17 |  * {
 18 |  * 	OpZ o1, o2, o3;
 19 |  *
 20 |  * 	// Performing overload switching with zhetapi_cast (should be used for very
 21 |  * 	// specific or seemingly random overloads)
 22 |  * 	if (zhetapi_cast(inputs, o1, o2, o3)) {
 23 |  * 		// do the function for three integers (o1, o2, o3)
 24 |  * 	} else if (zhetapi_cast(inputs, o1, o2)) {
 25 |  * 		// do the function for two integers (o1, o2)
 26 |  * 	} else if (zhetapi_cast(inputs, o1)) {
 27 |  * 		// do the function for two integers (o1)
 28 |  * 	} else {
 29 |  * 		// Terminating branch...
 30 |  * 	}
 31 |  *
 32 |  * 	// ...or use zhetapi_cc_cast (should be used for sequences of partial
 33 |  * 	// overloads)
 34 |  * 	switch (zhetapi_cc_cast(inputs, o1, o2, o3)) {
 35 |  * 	case 3:
 36 |  * 		// do the function for three integers (o1, o2, o3)
 37 |  * 	case 2:
 38 |  * 		// do the function for two integers (o1, o2)
 39 |  * 	case 1:
 40 |  * 		// do the function for one integer (o1)
 41 |  * 	default:
 42 |  * 		break;
 43 |  * 	}
 44 |  *
 45 |  * 	// As the terminating action either
 46 |  * 	return nullptr;
 47 |  *
 48 |  * 	// ...or throw
 49 |  * 	throw my_exception();
 50 |  * }
 51 |  *
 52 |  * \endcode
 53 |  */
 54 | 
 55 | namespace zhetapi {
 56 | 
 57 | // TODO: rename cc cast to partial cast
 58 | // and add another cast method that goes down the list until fully casted
 59 | template <class T>
 60 | bool zhetapi_cast_process(
 61 | 		const std::vector <Token *> &tokens,
 62 | 		size_t i,
 63 | 		T &tptr)
 64 | {
 65 | 	if (i >= tokens.size())
 66 | 		return false;
 67 | 
 68 | 	tptr = dynamic_cast <T> (tokens[i]);
 69 | 
 70 | 	if (!tptr)
 71 | 		return false;
 72 | 
 73 | 	return true;
 74 | }
 75 | 
 76 | #ifndef SKIP_DOXYGEN	// Breathe cannot parse variadics
 77 | 
 78 | template <class T, class ... A>
 79 | bool zhetapi_cast_process(
 80 | 		const std::vector <Token *> &tokens,
 81 | 		size_t i,
 82 | 		T &tptr,
 83 | 		A &... args)
 84 | {
 85 | 	if (i >= tokens.size())
 86 | 		return false;
 87 | 
 88 | 	tptr = dynamic_cast <T> (tokens[i]);
 89 | 
 90 | 	if (!tptr)
 91 | 		return false;
 92 | 
 93 | 	return zhetapi_cast_process(tokens, i + 1, args ...);
 94 | }
 95 | 
 96 | template <class ... A>
 97 | bool zhetapi_cast(const std::vector <Token *> &tokens, A &... args)
 98 | {
 99 | 	return zhetapi_cast_process(tokens, 0, args ...);
100 | }
101 | 
102 | #endif
103 | 
104 | // Counting alternatives
105 | template <class T>
106 | void zhetapi_cast_cc_process(
107 | 		const std::vector <Token *> &tokens,
108 | 		size_t &i,
109 | 		T &tptr)
110 | {
111 | 	if (i >= tokens.size())
112 | 		return;
113 | 
114 | 	tptr = dynamic_cast <T> (tokens[i]);
115 | 
116 | 	if (!tptr)
117 | 		return;
118 | 
119 | 	i++;
120 | }
121 | 
122 | #ifndef SKIP_DOXYGEN	// Breathe cannot parse variadics
123 | 
124 | template <class T, class ... A>
125 | void zhetapi_cast_cc_process(
126 | 		const std::vector <Token *> &tokens,
127 | 		size_t &i,
128 | 		T &tptr,
129 | 		A &... args)
130 | {
131 | 	if (i >= tokens.size())
132 | 		return;
133 | 
134 | 	tptr = dynamic_cast <T> (tokens[i]);
135 | 
136 | 	if (!tptr)
137 | 		return;
138 | 
139 | 	zhetapi_cast_cc_process(tokens, ++i, args ...);
140 | }
141 | 
142 | template <class ... A>
143 | size_t zhetapi_cast_cc(const std::vector <Token *> &tokens, A &... args)
144 | {
145 | 	size_t success = 0;
146 | 	zhetapi_cast_cc_process(tokens, success, args ...);
147 | 	return success;
148 | }
149 | 
150 | #endif
151 | 
152 | }
153 | 
154 | #endif
155 | 


--------------------------------------------------------------------------------
/include/common.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef ZHETAPI_COMMON_H_
 2 | #define ZHETAPI_COMMON_H_
 3 | 
 4 | // Printing vectors
 5 | template <class T>
 6 | std::ostream &operator<<(std::ostream &os, const std::vector <T> &v)
 7 | {
 8 | 	os << "{";
 9 | 	for (size_t i = 0; i < v.size(); i++) {
10 | 		os << v[i];
11 | 		if (i != v.size() - 1)
12 | 			os << ", ";
13 | 	}
14 | 
15 | 	return os << "}";
16 | }
17 | 
18 | // Printing deques
19 | template <class T>
20 | std::ostream &operator<<(std::ostream &os, const std::deque <T> &v)
21 | {
22 | 	os << "{";
23 | 	for (size_t i = 0; i < v.size(); i++) {
24 | 		os << v[i];
25 | 		if (i != v.size() - 1)
26 | 			os << ", ";
27 | 	}
28 | 
29 | 	return os << "}";
30 | }
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/include/complex.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef COMPLEX_H_
  2 | #define COMPLEX_H_
  3 | 
  4 | // C/C++ headers
  5 | #include <complex>
  6 | #include <initializer_list>
  7 | #include <iostream>
  8 | #include <string>
  9 | 
 10 | namespace zhetapi {
 11 | 		
 12 | /**
 13 | * The Complex class is an
 14 | * extension of the std::complex
 15 | * class which includes a more convenient
 16 | * set of methods, such as normalization.
 17 | */
 18 | template <class T>
 19 | class Complex : public std::complex <T> {
 20 | public:
 21 | 	// Constructor
 22 | 	Complex();
 23 | 	Complex(const T &, const T &);
 24 | 	Complex(const std::complex <T> &);
 25 | 	
 26 | 	// Fake constructors for conversion
 27 | 	Complex(int, bool, bool);
 28 | 	
 29 | 	template <class A>
 30 | 	Complex(A);
 31 | 
 32 | 	// Getters
 33 | 	T magnitude() const;
 34 | 
 35 | 	bool is_real() const;
 36 | 
 37 | 	// Operators
 38 | 	operator long double() const;
 39 | 	operator long long int() const;
 40 | 
 41 | 	// Functional Methods
 42 | 	Complex normalize() const;
 43 | 
 44 | 	// Output Methods
 45 | 	template <class U>
 46 | 	friend std::string std::to_string(const Complex <U> &);
 47 | 
 48 | 	template <class U>
 49 | 	friend std::ostream &operator<<(std::ostream &, const Complex <U> &);
 50 | };
 51 | 
 52 | //////////////////////////////////////////
 53 | // Constructors
 54 | //////////////////////////////////////////
 55 | template <class T>
 56 | Complex <T> ::Complex() {}
 57 | 
 58 | template <class T>
 59 | template <class A>
 60 | Complex <T> ::Complex(A a)
 61 | {
 62 | 	if (typeid(T) == typeid(A))
 63 | 		this->real((T) a);
 64 | }
 65 | 
 66 | template <class T>
 67 | Complex <T> ::Complex(const T &re, const T &im)
 68 | 	: std::complex <T> (re, im) {}
 69 | 
 70 | template <class T>
 71 | Complex <T> ::Complex(const std::complex <T> &z)
 72 | 	: std::complex <T> (z) {}
 73 | 
 74 | //////////////////////////////////////////
 75 | // Fake Constructors
 76 | //////////////////////////////////////////
 77 | 
 78 | template <class T>
 79 | Complex <T> ::Complex(int a, bool b, bool c) {}
 80 | 
 81 | //////////////////////////////////////////
 82 | // Getters
 83 | //////////////////////////////////////////
 84 | 
 85 | template <class T>
 86 | T Complex <T> ::magnitude() const
 87 | {
 88 | 	return sqrt(norm(*this));
 89 | }
 90 | 
 91 | template <class T>
 92 | bool Complex <T> ::is_real() const
 93 | {
 94 | 	return this->imag() == 0;
 95 | }
 96 | 
 97 | template <class T>
 98 | Complex <T> ::operator long double() const
 99 | {
100 | 	return (long double) this->real();
101 | }
102 | 
103 | template <class T>
104 | Complex <T> ::operator long long int() const
105 | {
106 | 	return (long long int) this->real();
107 | }
108 | 
109 | template <class T>
110 | Complex <T> Complex <T> ::normalize() const
111 | {
112 | 	return *this/magnitude();
113 | }
114 | 
115 | template <class T>
116 | std::string to_string(const Complex <T> &z)
117 | {
118 | 	std::string str;
119 | 
120 | 	bool pl = false;
121 | 
122 | 	if (z.real()) {
123 | 		pl = true;
124 | 		str += to_string(z.real());
125 | 	}
126 | 
127 | 	if (z.imag()) {
128 | 		if (pl)
129 | 			str += " + ";
130 | 		str += to_string(z.imag()) + "i";
131 | 	}
132 | 
133 | 	return str;
134 | }
135 | 
136 | template <class T>
137 | std::ostream &operator<<(std::ostream &os, const Complex <T> &z)
138 | {
139 | 	bool pl = false;
140 | 
141 | 	if (!(z.real() || z.imag())) {
142 | 		os << "0";
143 | 		return os;
144 | 	}
145 | 
146 | 	if (z.real()) {
147 | 		pl = true;
148 | 		os << z.real();
149 | 	}
150 | 
151 | 	if (z.imag()) {
152 | 		if (pl)
153 | 			os << " + ";
154 | 		
155 | 		if (z.imag() != T(1))
156 | 			os << z.imag();
157 | 		
158 | 		os << "i";
159 | 	}
160 | 
161 | 	return os;
162 | }
163 | 
164 | }
165 | 
166 | #endif
167 | 


--------------------------------------------------------------------------------
/include/core/common.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef COMMON_H_
 2 | #define COMMON_H_
 3 | 
 4 | // C/C++ heaaders
 5 | #include <set>
 6 | #include <string>
 7 | #include <unordered_map>
 8 | #include <vector>
 9 | 
10 | // Engine headers
11 | #include "../token.hpp"
12 | 
13 | namespace zhetapi {
14 | 
15 | // This file contains miscellaneous functions that
16 | // do not really fit into any other header
17 | template <class T>
18 | void collect(std::vector <T> &bin, T x)
19 | {
20 | 	bin.push_back(x);
21 | }
22 | 
23 | template <class T, class ... U>
24 | void collect(std::vector <T> &bin, T x, U ... rest)
25 | {
26 | 	bin.push_back(x);
27 | 
28 | 	collect(bin, rest...);
29 | }
30 | 
31 | }
32 | 
33 | #endif


--------------------------------------------------------------------------------
/include/core/kernels.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef KERNELS_H_
  2 | #define KERNELS_H_
  3 | 
  4 | // Engine headers
  5 | #include "../matrix.hpp"
  6 | #include "../vector.hpp"
  7 | 
  8 | // TODO: remove this header, it is quite useless
  9 | 
 10 | /**
 11 |  * @file kernels.hpp
 12 |  * @brief This file contains CPU "kernels" which speed up computation in other parts of
 13 |  * the library, such as Neural Network training. Some of these kernels may be
 14 |  * moved to become part of the public API.
 15 |  */
 16 | 
 17 | namespace zhetapi {
 18 | 
 19 | /**
 20 |  * Computes M * V', where V' is V with 1 appended to the top. Speed-up is due to
 21 |  * the fact that a new vector is not being created and copied.
 22 |  */
 23 | template <class T>
 24 | Vector <T> apt_and_mult(const Matrix <T> &M, const Vector <T> &V)
 25 | {
 26 | 	size_t rs = M.get_rows();
 27 | 	size_t cs = M.get_cols();
 28 | 
 29 | 	Vector <T> out(rs, T(0));
 30 | 
 31 | 	size_t k = V.size();
 32 | 	for (size_t i = 0; i < rs; i++) {
 33 | 		T acc = M._array[i * cs];
 34 | 
 35 | 		for (size_t j = 0; j < k; j++)
 36 | 			acc += M._array[i * cs + 1 + j] * V._array[j];
 37 | 
 38 | 		out._array[i] = acc;
 39 | 	}
 40 | 
 41 | 	return out;
 42 | }
 43 | 
 44 | /**
 45 |  * Computes U', where U = M * V and U' is U without the first element. Speed-up
 46 |  * is again due to the fact that a new vector is not being created.
 47 |  */
 48 | template <class T>
 49 | Vector <T> rmt_and_mult(const Matrix <T> &M, const Vector <T> &V)
 50 | {
 51 | 	/*size_t rs = M.get_rows();
 52 | 	size_t cs = M.get_cols();
 53 | 
 54 | 	Vector <T> out(cs - 1, T(0));
 55 | 	for (size_t i = 1; i < cs; i++) {
 56 | 		T acc = 0;
 57 | 
 58 | 		for (size_t k = 0; k < rs; k++)
 59 | 			acc += M._array[k * cs + i] * V._array[k];
 60 | 
 61 | 		out._array[i - 1] = acc;
 62 | 	} */
 63 | 
 64 | 	/* Reverse loops
 65 | 	for (size_t k = 0; k < rs; k++) {
 66 | 		const T *arr = &(M._array[k * cs]);
 67 | 		T v = V._array[k];
 68 | 
 69 | 		for (size_t i = 1; i < cs; i++)
 70 | 			out._array[i - 1] = arr[i] * v;
 71 | 	}
 72 | 
 73 | 	return out; */
 74 | 
 75 | 	return Vector <T> (M.transpose() * V).remove_top();
 76 | }
 77 | 
 78 | /**
 79 |  * Computes V * (Vt)^T (transpose). Speed-up comes from the fact that we avoid
 80 |  * creating the transpose vector.
 81 |  */
 82 | template <class T>
 83 | Matrix <T> vvt_mult(const Vector <T> &V, const Vector <T> &Vt)
 84 | {
 85 | 	size_t rs = V.size();
 86 | 	size_t cs = Vt.size();
 87 | 
 88 | 	size_t n = rs * cs;
 89 | 
 90 | 	/* T *tmp = new T[n];
 91 | 	for (size_t i = 0; i < n; i++)
 92 | 		tmp[i] = V._array[i / cs] * Vt._array[i % cs];
 93 | 
 94 | 	return Matrix <T> (rs, cs, tmp, false); */
 95 | 
 96 | 	return Matrix <T> (rs, cs, [&] (size_t i, size_t j) {
 97 | 		return V._array[i] * Vt._array[j];
 98 | 	});
 99 | }
100 | 
101 | }
102 | 
103 | #endif
104 | 


--------------------------------------------------------------------------------
/include/counter.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef COUNTER_H_
 2 | #define COUNTER_H_
 3 | 
 4 | namespace zhetapi {
 5 | 
 6 | template <class T>
 7 | class Counter {
 8 | 	T	_min;
 9 | 	T	_max;
10 | 	T	_alpha;
11 | 
12 | 	T	_count;
13 | public:
14 | 	Counter(T, T, T);
15 | 
16 | 	T operator()() const;
17 | };
18 | 
19 | template <class T>
20 | Counter <T> ::Counter(T mn, T mx, T alpha) : _min(mn), _max(mx),
21 | 		_alpha(alpha) {}
22 | 
23 | template <class T>
24 | T Counter <T> ::operator()() const
25 | {
26 | 	return (_count = min(max(_count + _alpha, _min), _max));
27 | }
28 | 
29 | }
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/include/cuda/error.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef ERROR_CUH_
 2 | #define ERROR_CUH_
 3 | 
 4 | cudaError_t e;
 5 | 
 6 | // Error checking macro
 7 | #define cudaCheckError(addr)							\
 8 | 	e = cudaGetLastError();							\
 9 | 	if (e != cudaSuccess) {							\
10 | 		printf("Cuda failure %s:%d: '%s' (addr = %p)\n", __FILE__,	\
11 | 				__LINE__, cudaGetErrorString(e), addr);		\
12 | 		exit(-1);							\
13 | 	}
14 | 
15 | // Allocation
16 | #define cuda_device_alloc(ptr, size)	\
17 | 	cudaMalloc(ptr, size);		\
18 | 	cudaCheckError(ptr);
19 | 
20 | // Copying
21 | #define cuda_host_to_device_memcpy(dst, src, size)		\
22 | 	cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice);	\
23 | 	cudaCheckError(dst);
24 | 
25 | #define cuda_device_to_host_memcpy(dst, src, size)		\
26 | 	cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);	\
27 | 	cudaCheckError(dst);
28 | 
29 | // Deallocation
30 | #define cuda_device_free(ptr) {		\
31 | 	cudaFree(ptr);			\
32 | 	cudaCheckError(ptr);		\
33 | }
34 | 
35 | // Memory status
36 | void cuda_check_memory_status(const char *file, size_t line)
37 | {
38 | 	size_t free_mem;
39 | 	size_t total_mem;
40 | 
41 | 	cudaMemGetInfo(&free_mem, &total_mem);
42 | 
43 | 	printf("At [%s:%lu]: %lu bytes total, of which %lu bytes are free.\n",
44 | 			file, line, total_mem, free_mem);
45 | }
46 | 
47 | #define cuda_show_mem()					\
48 | 	cuda_check_memory_status(__FILE__, __LINE__);
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/include/cuda/essentials.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef ESSENTIALS_H_
 2 | #define ESSENTIALS_H_
 3 | 
 4 | #ifdef __CUDACC__
 5 | 
 6 | #define __cuda_dual__ __host__ __device__
 7 | 
 8 | #else
 9 | 
10 | #define __cuda_dual__
11 | 
12 | #endif // Cuda active
13 | 
14 | // Use when we want to define a new variable cudaError_t error
15 | #define __cuda_check_error()					\
16 | 	cudaError_t error = cudaGetLastError();			\
17 | 	if (error != cudaSuccess) {				\
18 | 		printf("CUDA error: %s\n",			\
19 | 				cudaGetErrorString(error));	\
20 | 		exit(-1);					\
21 | 	}
22 | 
23 | // Use when cudaError_t error has already been defined
24 | #define __cuda_check_perror()					\
25 | 	error = cudaGetLastError();				\
26 | 	if (error != cudaSuccess) {				\
27 | 		printf("CUDA error: %s\n",			\
28 | 				cudaGetErrorString(error));	\
29 | 		exit(-1);					\
30 | 	}
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/include/cuda/matrix.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef MATRIX_CUH_
 2 | #define MATRIX_CUH_
 3 | 
 4 | namespace zhetapi {
 5 | 
 6 | 
 7 | 
 8 | }
 9 | 
10 | #endif


--------------------------------------------------------------------------------
/include/cuda/nvarena.cuh:
--------------------------------------------------------------------------------
  1 | #ifndef NVARENA_CUH_
  2 | #define NVARENA_CUH_
  3 | 
  4 | #define ZHP_CUDA
  5 | 
  6 | // C/C++ headers
  7 | #include <exception>
  8 | #include <map>
  9 | 
 10 | // Engine headers
 11 | #include <cuda/essentials.cuh>
 12 | 
 13 | // Namespace external functions
 14 | struct __addr_cmp {
 15 | 	bool operator()(void *a, void *b) {
 16 | 		return (intptr_t) a < (intptr_t) b;
 17 | 	}
 18 | };
 19 | 
 20 | namespace zhetapi {
 21 | 
 22 | /**
 23 |  * @brief An allocator class for Nvidia GPUs. Has additional features like
 24 |  * warnings for memory leaks and copy bound errors. Overall is a more convenient
 25 |  * interface to GPU memory than standard CUDA operations like \c cudaMalloc and
 26 |  * \c cudaMemcpy.
 27 |  */
 28 | class NVArena {
 29 | public:
 30 | 	// TODO: need to select the specific GPU
 31 | 	using memmap = std::map <void *, size_t, __addr_cmp>;
 32 | 
 33 | 	// TODO: bad alloc
 34 | 
 35 | 	/**
 36 | 	 * @brief This exception is thrown if the user tries to free a piece of
 37 | 	 * memory that was never allocated.
 38 | 	 */
 39 | 	class segfault : public std::runtime_error {
 40 | 	public:
 41 | 		segfault() : std::runtime_error("NVArena: segmentation fault.") {}
 42 | 	};
 43 | 
 44 | 	/**
 45 | 	 * @brief This exceptoin is thrown if the user frees a piece of memory
 46 | 	 * more than once. The allocator keeps track of all allocated blocks for
 47 | 	 * this.
 48 | 	 */
 49 | 	class double_free : public std::runtime_error {
 50 | 	public:
 51 | 		double_free() : std::runtime_error("NVArena: double free.") {}
 52 | 	};
 53 | private:
 54 | 	// Whole pool
 55 | 	void *	_pool	= nullptr;
 56 | 
 57 | 	// Free list (ordered by address)
 58 | 	memmap	_flist;
 59 | 
 60 | 	// Warning flag
 61 | 	bool _warn	= true;
 62 | public:
 63 | 	explicit NVArena(size_t);
 64 | 
 65 | 	// Disable copying of any sort
 66 | 	NVArena(const NVArena &) = delete;
 67 | 	NVArena &operator=(const NVArena &) = delete;
 68 | 
 69 | 	~NVArena();
 70 | 
 71 | 	// Allocation
 72 | 	void *alloc(size_t = 1);
 73 | 
 74 | 	template <class T>
 75 | 	T *alloc(size_t = 1);
 76 | 
 77 | 	// Deallocation
 78 | 	void free(void *);
 79 | 
 80 | 	template <class T>
 81 | 	void free(T *);
 82 | 
 83 | 	// TODO: Warn with memcpy
 84 | 	void write(void *, void *, size_t);
 85 | 	void read(void *, void *, size_t);
 86 | 
 87 | 	// void memcpy(void *, size_t);
 88 | 
 89 | 	// Only allow template for homogenous pointers
 90 | 	// (no implicit size for heterogenous types)
 91 | 	template <class T>
 92 | 	void write(T *, T *, size_t = 1);
 93 | 
 94 | 	template <class T>
 95 | 	void read(T *, T *, size_t = 1);
 96 | 
 97 | 	// Memory map
 98 | 	void show_mem_map() const;
 99 | };
100 | 
101 | /**
102 |  * @brief Allocates a block of items of a specific type.
103 |  *
104 |  * @tparam t the specific type of item to allocate.
105 |  *
106 |  * @param items the number of items to allocate.
107 |  * 
108 |  * @return the allocated block.
109 |  */
110 | template <class T>
111 | T *NVArena::alloc(size_t items)
112 | {
113 |         void *data = alloc(items * sizeof(T));
114 | 
115 |         return (T *) data;
116 | }
117 | 
118 | /**
119 |  * @brief Frees a block of items of a specific type.
120 |  *
121 |  * @tparam T the specific type of item to free.
122 |  *
123 |  * @param ptr the block of memory to be freed.
124 |  */
125 | template <class T>
126 | void NVArena::free(T *ptr)
127 | {
128 | 	free((void *) ptr);
129 | }
130 | 
131 | /**
132 |  * @brief Copies a block of memory from host memory to GPU memory, using \c
133 |  * cudaMemcpy.
134 |  *
135 |  * @tparam T the type of each element in the blocks of memory.
136 |  *
137 |  * @param dst the pointer to the destination in GPU memory.
138 |  * @param src the pointer to the block in host memory.
139 |  * @param n the number of items to copy (note that this copies `n *
140 |  * sizeof(T)` bytes in total).
141 |  */
142 | template <class T>
143 | void NVArena::write(T *dst, T *src, size_t n)
144 | {
145 | 	write((void *) dst, (void *) src, n * sizeof(T));
146 | }
147 | 
148 | /**
149 |  * @brief Copies a block of memory from GPU memory to host memory, using \c
150 |  * cudaMemcpy.
151 |  *
152 |  * @tparam T the type of each element in the blocks of memory.
153 |  *
154 |  * @param dst the pointer to the destination in host memory.
155 |  * @param src the pointer to the block in GPU memory.
156 |  * @param n the number of items to copy (note that this copies `n *
157 |  * sizeof(T)` bytes in total).
158 |  */
159 | template <class T>
160 | void NVArena::read(T *dst, T *src, size_t n)
161 | {
162 | 	read((void *) dst, (void *) src, n * sizeof(T));
163 | }
164 | 
165 | }
166 | 
167 | #endif
168 | 


--------------------------------------------------------------------------------
/include/cuda/vector.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef VECTOR_CUH_
 2 | #define VECTOR_CUH_
 3 | 
 4 | namespace zhetapi {
 5 | 
 6 | // hc is half copy vector
 7 | template <class T>
 8 | void Vector <T> ::cuda_read(Vector <T> *hc)
 9 | {
10 | 	if (hc->_size != this->_size) {
11 | 		// Add a clear array function (and clear dim)
12 | 		delete[] this->_array;
13 | 
14 | 		this->_array = new T[hc->_size];
15 | 	}
16 | 
17 | 	hc->_arena->read(this->_array, hc->_array, hc->_size);
18 | 	hc->_arena->read(this->_dim, hc->_dim, 2);
19 | }
20 | 
21 | // returns a vector with only _dim and _array in device memory
22 | // requires the callee to be fully in host memory
23 | template <class T>
24 | Vector <T> *Vector <T> ::cuda_half_copy(NVArena *arena) const
25 | {
26 | 	size_t *dim = arena->alloc <size_t> (2);
27 | 	T *array = arena->alloc <T> (this->_size);
28 | 
29 | 	arena->write(dim, this->_dim, 2);
30 | 	arena->write(array, this->_array, this->_size);
31 | 
32 | 	// Host copy
33 | 	Vector <T> *hc = new Vector <T>;
34 | 	memcpy(hc, this, sizeof(Vector <T>));
35 | 
36 | 	// Edit hc with the correct values
37 | 	hc->_array = array;
38 | 	hc->_dim = dim;
39 | 	hc->_on_device = true;
40 | 	hc->_arena = arena;
41 | 
42 | 	return hc;
43 | }
44 | 
45 | // returns a vector fully in device memory
46 | // requires the callee to be partially in device memory (_dim and _array)
47 | template <class T>
48 | Vector <T> *Vector <T> ::cuda_full_copy(NVArena *arena)
49 | {
50 | 	Vector <T> *fc = arena->alloc <Vector <T>> ();
51 | 	arena->write(fc, this);
52 | 	// cudaMemcpy(fc, this, sizeof(Vector <T>), cudaMemcpyHostToDevice);
53 | 	return fc;
54 | }
55 | 
56 | }
57 | 
58 | #endif
59 | 


--------------------------------------------------------------------------------
/include/dataset.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef DATASET_H_
 2 | #define DATASET_H_
 3 | 
 4 | #ifndef __AVR
 5 | 
 6 | // C/C++ headers
 7 | #include <vector>
 8 | 
 9 | // Engine headers
10 | #include "vector.hpp"
11 | 
12 | namespace zhetapi {
13 | 
14 | template <class T>
15 | using DataSet = std::vector <Vector <T>>;
16 | 
17 | template <class T>
18 | std::vector <DataSet <T>> split(const DataSet <T> &dset, size_t len)
19 | {
20 | 	std::vector <DataSet <T>> batched;
21 | 
22 | 	DataSet <T> batch;
23 | 
24 | 	size_t size = dset.size();
25 | 	for (int i = 0; i < size; i++) {
26 | 		batch.push_back(dset[i]);
27 | 
28 | 		if (i % len == len - 1 || i == size - 1) {
29 | 			batched.push_back(batch);
30 | 
31 | 			batch.clear();
32 | 		}
33 | 	}
34 | 
35 | 	return batched;
36 | }
37 | 
38 | // General sets of N-dimensional data
39 | template <class T, size_t N>
40 | class NumericalData {
41 | 	// TODO: make more efficient?
42 | 	Vector <T> _stddev() {
43 | 		Vector <T> sum(N, 0);
44 | 		for (const auto &vec : dataset) {
45 | 			Vector <T> dx = (vec - _mean);
46 | 			sum += shur(dx, dx);
47 | 		}
48 | 		return sum/(dataset.size() - sample);
49 | 	}
50 | 
51 | 	// TODO: fixed vector?
52 | 	Vector <T> _mean() {
53 | 		Vector <T> sum(N, 0);
54 | 		for (const auto &vec : dataset)
55 | 			sum += vec;
56 | 		return sum/dataset.size();
57 | 	}
58 | public:
59 | 	DataSet	<T>	dataset;
60 | 	Vector <T>	mean;
61 | 	Vector <T>	stddev;
62 | 	bool		sample;
63 | 
64 | 	// Sample indicates whether the dataset is a
65 | 	// sample or the entire population
66 | 	NumericalData(const DataSet <T> &set, bool sample = false)
67 | 		: dataset(set), mean(_mean()),
68 | 		stddev(_stddev()), sample(sample) {}
69 | };
70 | 
71 | // Dimensional
72 | template <class T>
73 | using BivariateData = NumericalData <T, 2>;
74 | 
75 | }
76 | 
77 | #else
78 | 
79 | #warning Zhetapi does not support zhetapi::Dataset for AVR systems.
80 | 
81 | #endif
82 | 
83 | #endif
84 | 


--------------------------------------------------------------------------------
/include/display.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef DISPLAY_H_
 2 | #define DISPLAY_H_
 3 | 
 4 | #ifndef __AVR	// Does not support AVR
 5 | 
 6 | #include <cstdint>
 7 | 
 8 | #endif		// Does not support AVR
 9 | 
10 | namespace zhetapi {
11 | 
12 | /**
13 |  * Display:
14 |  *
15 |  * Display is a struct of display options during neural network training.
16 |  */
17 | struct Display {
18 | 	typedef uint8_t type;
19 | 	
20 | 	static const uint8_t epoch;
21 | 	static const uint8_t batch;
22 | 	static const uint8_t graph;
23 | };
24 | 
25 | }
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/include/dnnopt.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef DNN_OPT_H_
  2 | #define DNN_OPT_H_
  3 | 
  4 | // Engine headers
  5 | #include "dataset.hpp"
  6 | #include "gradient.hpp"
  7 | 
  8 | namespace zhetapi {
  9 | 
 10 | template <class T>
 11 | class DnnOpt {
 12 | protected:
 13 | 	// Cached
 14 | 	Vector <T> *	__a		= nullptr;
 15 | 	Vector <T> *	__z		= nullptr;
 16 | 
 17 | 	T		__eta		= 0;
 18 | 
 19 | 	size_t		__size		= 0;
 20 | 
 21 | 	bool		__switch	= false;
 22 | 
 23 | 	// Functions
 24 | 	DnnOpt(T);
 25 | 
 26 | 	virtual Matrix <T> *raw_gradient(
 27 | 			Layer <T> *,
 28 | 			size_t,
 29 | 			const Vector <T> &,
 30 | 			const Vector <T> &,
 31 | 			Erf <T> *);
 32 | 
 33 | 	virtual Matrix <T> *raw_batch_gradient(
 34 | 			Layer <T> *,
 35 | 			size_t,
 36 | 			const DataSet <T> &,
 37 | 			const DataSet <T> &,
 38 | 			Erf <T> *);
 39 | 
 40 | 	virtual Matrix <T> *update(
 41 | 			Matrix <T> *,
 42 | 			size_t) = 0;
 43 | public:
 44 | 	virtual ~DnnOpt();
 45 | 
 46 | 	void set_learning_rate(T);
 47 | 
 48 | 	Matrix <T> *gradient(
 49 | 			Layer <T> *,
 50 | 			size_t,
 51 | 			const Vector <T> &,
 52 | 			const Vector <T> &,
 53 | 			Erf <T> *);
 54 | 
 55 | 	Matrix <T> *batch_gradient(
 56 | 			Layer <T> *,
 57 | 			size_t,
 58 | 			const DataSet <T> &,
 59 | 			const DataSet <T> &,
 60 | 			Erf <T> *);
 61 | };
 62 | 
 63 | template <class T>
 64 | DnnOpt <T> ::DnnOpt(T lr) : __eta(lr) {}
 65 | 
 66 | template <class T>
 67 | DnnOpt <T> ::~DnnOpt()
 68 | {
 69 | 	delete[] __a;
 70 | 	delete[] __z;
 71 | }
 72 | 
 73 | template <class T>
 74 | void DnnOpt <T> ::set_learning_rate(T lr)
 75 | {
 76 | 	__eta = lr;
 77 | }
 78 | 
 79 | template <class T>
 80 | Matrix <T> *DnnOpt <T> ::raw_gradient(
 81 | 			Layer <T> *layers,
 82 | 			size_t size,
 83 | 			const Vector <T> &in,
 84 | 			const Vector <T> &out,
 85 | 			Erf <T> *cost)
 86 | {
 87 | 	if (size != __size) {
 88 | 		delete[] __a;
 89 | 		delete[] __z;
 90 | 
 91 | 		__size = size;
 92 | 
 93 | 		__a = new Vector <T> [__size + 1];
 94 | 		__z = new Vector <T> [__size];
 95 | 
 96 | 		__switch = true;
 97 | 	} else {
 98 | 		__switch = false;
 99 | 	}
100 | 
101 | 	return simple_gradient(
102 | 			layers,
103 | 			size,
104 | 			__a,
105 | 			__z,
106 | 			in,
107 | 			out,
108 | 			cost);
109 | }
110 | 
111 | template <class T>
112 | Matrix <T> *DnnOpt <T> ::raw_batch_gradient(
113 | 			Layer <T> *layers,
114 | 			size_t size,
115 | 			const DataSet <T> &ins,
116 | 			const DataSet <T> &outs,
117 | 			Erf <T> *cost)
118 | {
119 | 	if (size != __size) {
120 | 		delete[] __a;
121 | 		delete[] __z;
122 | 
123 | 		__size = size;
124 | 
125 | 		__a = new Vector <T> [__size + 1];
126 | 		__z = new Vector <T> [__size];
127 | 
128 | 		__switch = true;
129 | 	} else {
130 | 		__switch = false;
131 | 	}
132 | 
133 | 	return simple_batch_gradient(
134 | 			layers,
135 | 			size,
136 | 			__a,
137 | 			__z,
138 | 			ins,
139 | 			outs,
140 | 			cost);
141 | }
142 | 
143 | template <class T>
144 | Matrix <T> *DnnOpt <T> ::gradient(
145 | 			Layer <T> *layers,
146 | 			size_t size,
147 | 			const Vector <T> &in,
148 | 			const Vector <T> &out,
149 | 			Erf <T> *cost)
150 | {
151 | 
152 | 	return update(raw_gradient(
153 | 			layers,
154 | 			size,
155 | 			in,
156 | 			out,
157 | 			cost), size);
158 | }
159 | 
160 | template <class T>
161 | Matrix <T> *DnnOpt <T> ::batch_gradient(
162 | 			Layer <T> *layers,
163 | 			size_t size,
164 | 			const DataSet <T> &ins,
165 | 			const DataSet <T> &outs,
166 | 			Erf <T> *cost)
167 | {
168 | 	return update(raw_batch_gradient(
169 | 			layers,
170 | 			size,
171 | 			ins,
172 | 			outs,
173 | 			cost), size);
174 | }
175 | 
176 | }
177 | 
178 | }
179 | 
180 | #endif
181 | 


--------------------------------------------------------------------------------
/include/engine.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef ENGINE_H_
 2 | #define ENGINE_H_
 3 | 
 4 | // C/C++ headers
 5 | #include <unordered_map>
 6 | 
 7 | // Engine headers
 8 | #include "function.hpp"
 9 | #include "registration.hpp"
10 | 
11 | #include "core/algorithm.hpp"
12 | #include "core/common.hpp"
13 | 
14 | namespace zhetapi {
15 | 
16 | // Aliases
17 | template <class T>
18 | using Symtab = std::unordered_map <std::string, T>;
19 | 
20 | class Engine {
21 | 	// Broader scope
22 | 	Engine *		_stack = nullptr;
23 | 	
24 | 	Symtab <Token *>	_var_table;
25 | 
26 | 	// Private methods
27 | 	void set_origin_stack(Engine *);
28 | public:
29 | 	Engine(bool = false);
30 | 	Engine(const Engine &);
31 | 
32 | 	Engine &operator=(const Engine &);
33 | 
34 | 	~Engine();
35 | 
36 | 	// List all symbols
37 | 	Args symbol_list() const;
38 | 
39 | 	// Actions
40 | 	Engine *new_stack();
41 | 	Engine *get_stack();
42 | 
43 | 	void put(const std::string &, Token *);
44 | 
45 | 	Token *get(const std::string &);
46 | 
47 | 	void list() const;
48 | 	void list_registered(std::string) const;
49 | };
50 | 
51 | Engine *push_and_ret_stack(Engine *);
52 | Engine *pop_and_del_stack(Engine *);
53 | 
54 | }
55 | 
56 | #endif
57 | 


--------------------------------------------------------------------------------
/include/equation.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef EQUATION_H_
 2 | #define EQUATION_H_
 3 | 
 4 | // C/C++ headers
 5 | #include <exception>
 6 | #include <vector>
 7 | 
 8 | // Engine headers
 9 | #include "engine.hpp"
10 | 
11 | namespace zhetapi {
12 | 
13 | using Solutions = std::vector <std::vector <Token *>>;
14 | 
15 | class Equation {
16 | 	std::vector <node_manager>	_expressions	= {};
17 | 	std::vector <std::string>	_args		= {};
18 | 	Engine *			_engine		= nullptr;
19 | public:
20 | 	Equation(const std::vector <std::string> &);
21 | 
22 | 	// Properties
23 | 	size_t args() const;
24 | 
25 | 	// Methods
26 | 	Solutions solve() const;
27 | 
28 | 	std::string representation() const;
29 | 
30 | 	// Exceptions
31 | 	class bad_input_size : std::runtime_error {
32 | 	public:
33 | 		bad_input_size()
34 | 			: std::runtime_error("Bad input size for equation") {}
35 | 	};
36 | };
37 | 
38 | std::ostream &operator<<(std::ostream &, const Equation &);
39 | 
40 | }
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/include/erf.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef ERF_H_
  2 | #define ERF_H_
  3 | 
  4 | #ifndef __AVR	// Does not support AVR
  5 | 
  6 | // C/C++ headers
  7 | #include <memory>
  8 | 
  9 | #endif		// Does not support AVR
 10 | 
 11 | // Engine headers
 12 | #ifdef ZHP_CUDA
 13 | 
 14 | #include "cuda/vector.cuh"
 15 | 
 16 | #else
 17 | 
 18 | #include "vector.hpp"
 19 | 
 20 | #endif
 21 | 
 22 | #include "cuda/essentials.cuh"
 23 | 
 24 | namespace zhetapi {
 25 | 
 26 | namespace ml {
 27 | 
 28 | template <class T>
 29 | class Erf {
 30 | public:
 31 | 	// TODO: Replace with a string
 32 | 	enum erf_type {
 33 | 		OPT_Default,
 34 | 		OPT_SE,
 35 | 		OPT_MSE,
 36 | 	};
 37 | 
 38 | 	// TODO: Add a vector <double> constructor for JSON
 39 | 	__cuda_dual__
 40 | 	Erf();
 41 | 
 42 | 	__cuda_dual__
 43 | 	Vector <T> compute(const Vector <T> &, const Vector <T> &) const;
 44 | 
 45 | 	__cuda_dual__
 46 | 	virtual Vector <T> operator()(const Vector <T> &, const Vector <T> &) const;
 47 | 
 48 | 	__cuda_dual__
 49 | 	virtual Erf *derivative() const;
 50 | 
 51 | 	__cuda_dual__
 52 | 	int get_erf_type() const;
 53 | 
 54 | 	template <class U>
 55 | 	__cuda_dual__
 56 | 	friend Erf <U> *copy(Erf <U> *);
 57 | 
 58 | 	// Exceptions
 59 | 	class dimension_mismatch {};
 60 | protected:
 61 | 	static void assert_size(const Vector <T> &, const Vector <T> &);
 62 | 
 63 | 	erf_type kind;
 64 | };
 65 | 
 66 | template <class T>
 67 | void Erf <T> ::assert_size(const Vector <T> &a, const Vector <T> &b)
 68 | {
 69 | 	if (a.size() != b.size())
 70 | 		throw dimension_mismatch();
 71 | }
 72 | 
 73 | #ifndef ZHP_CUDA
 74 | 
 75 | template <class T>
 76 | Erf <T> ::Erf() : kind(OPT_Default) {}
 77 | 
 78 | // TODO: Reverse compute and operator()
 79 | template <class T>
 80 | Vector <T> Erf <T> ::operator()(const Vector <T> &comp, const Vector <T> &in) const
 81 | {
 82 | 	return {(comp - in).norm()};
 83 | }
 84 | 
 85 | template <class T>
 86 | Vector <T> Erf <T> ::compute(const Vector <T> &comp, const Vector <T> &in) const
 87 | {
 88 | 	return (*this)(comp, in);
 89 | }
 90 | 
 91 | template <class T>
 92 | Erf <T> *Erf <T> ::derivative() const
 93 | {
 94 | 	return new Erf();
 95 | }
 96 | 
 97 | template <class T>
 98 | int Erf <T> ::get_erf_type() const
 99 | {
100 | 	return kind;
101 | }
102 | 
103 | #endif
104 | 
105 | }
106 | 
107 | }
108 | 
109 | #endif
110 | 


--------------------------------------------------------------------------------
/include/field.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Standard headers
 4 | #include <concepts>
 5 | 
 6 | namespace zhetapi {
 7 | 
 8 | template <class T>
 9 | class Tensor;
10 | 
11 | #ifndef __CUDACC__
12 | 
13 | // Concept for objects which behave and interact like Matrices...
14 | template <class T, class Type>
15 | concept concept_field = std::is_base_of_v <Tensor <T>, Type> && \
16 | requires (Type &a, Type &b, const Type &ct, T k) {
17 | 	// Properties
18 | 	{a.length()} -> std::convertible_to <T>;
19 | 
20 | 	// Operations
21 | 	{a + b} -> std::convertible_to <Type>;
22 | 	{a - b} -> std::convertible_to <Type>;
23 | 	{k * ct} -> std::convertible_to <Type>;
24 | 	{ct * k} -> std::convertible_to <Type>;
25 | 	{ct / k} -> std::convertible_to <Type>;
26 | 
27 | 	// Functions
28 | 	{min(a)} -> std::convertible_to <T>;
29 | 	{max(a)} -> std::convertible_to <T>;
30 | };
31 | 
32 | // Derivable struct to ensure that a type behaves like a Field
33 | template <class T, class Type>
34 | struct Field {
35 | 	Field() {
36 | 		static_assert(
37 | 			concept_field <T, Type>,
38 | 			"Type does not behave like a Field"
39 | 		);
40 | 	}
41 | };
42 | 
43 | #else
44 | 
45 | // Derivable struct to ensure that a type behaves like a Field
46 | template <class T, class Type>
47 | struct Field {
48 | 	Field() {
49 | 		static_assert(
50 | 			std::is_base_of_v <Tensor <T>, Type>,
51 | 			"Type does not behave like a Field"
52 | 		);
53 | 	}
54 | };
55 | 
56 | #endif
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/include/filter.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FILTER_H_
 2 | #define FILTER_H_
 3 | 
 4 | // Engine headers
 5 | #include "tensor.hpp"
 6 | 
 7 | namespace zhetapi {
 8 | 
 9 | namespace ml {
10 | 
11 | // Type aliases
12 | template <class T>
13 | using Pipe = std::vector <Tensor <T> *>;
14 | 
15 | template <class T = double>
16 | class Filter {
17 | public:
18 | 	/**
19 | 	 * @brief Process method: takes in a set of inputs, performs the
20 | 	 * necessary computations, and places the results into the locations
21 | 	 * specified by the second vector of pointers. Notes that the inputs
22 | 	 * are also passed as a list of pointers.
23 | 	 */
24 | 	virtual void propogate(const Pipe <T> &, Pipe <T> &) = 0;
25 | 	virtual void gradient(const Pipe <T> &, Pipe <T> &) = 0;
26 | 	virtual void apply_gradient(const Pipe <T> &) = 0;
27 | };
28 | 
29 | }
30 | 
31 | }
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/include/fourier.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FOURIER_H_
 2 | #define FOURIER_H_
 3 | 
 4 | // Engine headers
 5 | #include "vector.hpp"
 6 | 
 7 | namespace zhetapi {
 8 | 
 9 | template <class T>
10 | class FourierSeries {
11 | 	Vector <T>	_a;	// Cosine coefficients
12 | 	Vector <T>	_b;	// Sine coefficients
13 | 
14 | 	size_t		_asize;
15 | 	size_t		_bsize;
16 | public:
17 | 	FourierSeries(const Vector <T> &);
18 | 	FourierSeries(const Vector <T> &, const Vector <T> &);
19 | 
20 | 	T evaluate(const T &) const;
21 | 	T operator()(const T &) const;
22 | };
23 | 
24 | // TODO: Fix indices
25 | //
26 | // a0, a1, b1, a2, b2, ...
27 | template <class T>
28 | FourierSeries <T> ::FourierSeries(const Vector <T> &coeffs)
29 | {
30 | 	// Remove vector middle operation
31 | 	std::vector <T> a;
32 | 	std::vector <T> b;
33 | 
34 | 	size_t n = coeffs.size();
35 | 
36 | 	size_t i = 0;
37 | 
38 | 	a.push_back(coeffs[i++]);	
39 | 	while(i < n) {
40 | 		a.push_back(coeffs[i]);
41 | 		b.push_back(coeffs[i + 1]);
42 | 
43 | 		i += 2;
44 | 	}
45 | 
46 | 	_asize = a.size();
47 | 	_bsize = b.size();
48 | 
49 | 	_a = Vector <T> (a);
50 | 	_b = Vector <T> (b);
51 | }
52 | 
53 | template <class T>
54 | FourierSeries <T> ::FourierSeries(const Vector <T> &a, const Vector <T> &b)
55 | 		: _a(a), _b(b), _asize(a.size()), _bsize(b.size()) {}
56 | 
57 | template <class T>
58 | T FourierSeries <T> ::evaluate(const T &x) const
59 | {
60 | 	// Make more efficient construction kernels for vectors
61 | 	// (maybe dont even create a vector: a custom kernel
62 | 	// for modified inner products)
63 | 	Vector <T> k_cos(_asize,
64 | 		[&](size_t i) {
65 | 			if (i == 0)
66 | 				return T(0.5);
67 | 			
68 | 			return std::cos(i * x);
69 | 		}
70 | 	);
71 | 
72 | 	Vector <T> k_sin(_bsize,
73 | 		[&](size_t i) {
74 | 			return std::cos((i + 1) * x);
75 | 		}
76 | 	);
77 | 
78 | 	return inner(_a, k_cos) + inner(_b, k_sin);
79 | }
80 | 
81 | template <class T>
82 | T FourierSeries <T> ::operator()(const T &x) const
83 | {
84 | 	return evaluate(x);
85 | }
86 | 
87 | }
88 | 
89 | #endif
90 | 


--------------------------------------------------------------------------------
/include/function.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef FUNCTION_H_
  2 | #define FUNCTION_H_
  3 | 
  4 | // C/C++ headers
  5 | #include <ostream>
  6 | #include <string>
  7 | 
  8 | #include <dlfcn.h>
  9 | 
 10 | // Engine headers
 11 | #include "core/common.hpp"
 12 | #include "core/functor.hpp"
 13 | #include "core/node_manager.hpp"
 14 | #include "core/method_table.hpp"
 15 | 
 16 | namespace zhetapi {
 17 | 
 18 | class Function : public Functor {
 19 | 	std::string			_symbol;
 20 | 	std::vector <std::string>	_params;
 21 | 	node_manager			_manager;
 22 | 	size_t				_threads;
 23 | public:
 24 | 	Function();
 25 | 	Function(const char *);
 26 | 	Function(const std::string &, Engine * = shared_context);
 27 | 
 28 | 	Function(const std::string &,
 29 | 		const std::vector <std::string> &,
 30 | 		const node_manager &);
 31 | 
 32 | 	Function(const Function &);
 33 | 
 34 | 	bool is_variable(const std::string &) const;
 35 | 
 36 | 	std::string &symbol();
 37 | 	const std::string symbol() const;
 38 | 
 39 | 	void set_threads(size_t);
 40 | 
 41 | 	Token *evaluate(Engine *, const std::vector <Token *> &) override;
 42 | 
 43 | 	Token *compute(const std::vector <Token *> &, Engine * = shared_context);
 44 | 	Token *operator()(const std::vector <Token *> &, Engine * = shared_context);
 45 | 
 46 | 	template <class ... A>
 47 | 	Token *operator()(A ...);
 48 | 
 49 | 	template <size_t, class ... A>
 50 | 	Token *operator()(A ...);
 51 | 
 52 | 	template <class ... A>
 53 | 	Token *derivative(const std::string &, A ...);
 54 | 
 55 | 	Function differentiate(const std::string &) const;
 56 | 
 57 | 	friend bool operator<(const Function &, const Function &);
 58 | 	friend bool operator>(const Function &, const Function &);
 59 | 
 60 | 	// Virtual overloads
 61 | 	Token::type caller() const override;
 62 | 	std::string dbg_str() const override;
 63 | 	Token *copy() const override;
 64 | 	bool operator==(Token *) const override;
 65 | 
 66 | 	// Printing
 67 | 	void print() const;
 68 | 
 69 | 	std::string display() const;
 70 | 
 71 | 
 72 | 	friend std::ostream &operator<<(std::ostream &, const Function &);
 73 | private:
 74 | 	template <class A>
 75 | 	void gather(std::vector <Token *> &, A);
 76 | 
 77 | 	template <class A, class ... B>
 78 | 	void gather(std::vector <Token *> &, A, B ...);
 79 | 
 80 | 	size_t index(const std::string &) const;
 81 | public:
 82 | 	// Exception classes
 83 | 	class invalid_definition {};
 84 | 
 85 | 	// Static variables
 86 | 	static Engine *shared_context;
 87 | 	static double h;
 88 | 	
 89 | 	// Methods
 90 | 	friend ZHP_TOKEN_METHOD(ftn_deriv_method);
 91 | 
 92 | 	// Static method table
 93 | 	static MethodTable mtable;
 94 | };
 95 | 
 96 | template <class ... A>
 97 | Token *Function::operator()(A ... args)
 98 | {
 99 | 	std::vector <Token *> tokens;
100 | 
101 | 	gather(tokens, args...);
102 | 
103 | 	assert(tokens.size() == _params.size());
104 | 
105 | 	return _manager.substitute_and_compute(shared_context, tokens);
106 | }
107 | 
108 | // Gathering facilities
109 | template <class A>
110 | void Function::gather(std::vector <Token *> &toks, A in)
111 | {
112 | 	toks.push_back(new Operand <A>(in));
113 | }
114 | 
115 | template <class A, class ... B>
116 | void Function::gather(std::vector <Token *> &toks, A in, B ... args)
117 | {
118 | 	toks.push_back(new Operand <A> (in));
119 | 
120 | 	gather(toks, args...);
121 | }
122 | 
123 | }
124 | 
125 | #endif
126 | 


--------------------------------------------------------------------------------
/include/gnn.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef GNN_H_
  2 | #define GNN_H_
  3 | 
  4 | // C/C++ headers
  5 | #include <queue>
  6 | #include <set>
  7 | #include <vector>
  8 | 
  9 | // Engine headers
 10 | #include "netnode.hpp"
 11 | 
 12 | namespace zhetapi {
 13 | 
 14 | namespace ml {
 15 | 
 16 | /**
 17 |  * @brief General neural network (GNN):
 18 |  *
 19 |  * Represents a neural network whose structure is theoretically unlimited, i.e.
 20 |  * a neural network with various kinds of connections (skip connections, etc.)
 21 |  * and different types of layers (deep layer, convolutional layer, recurrent layer).
 22 |  * 
 23 |  * The true representation of the network is as a series of "pipes" between
 24 |  * nodes. Each of these pipes contains a Tensor object representing the pipes
 25 |  * current state of execution. The two most important sets of pipes are the
 26 |  * input and output pipes which, as their name implies, carry the inputs and
 27 |  * outputs.
 28 |  */
 29 | template <class T = double>
 30 | class GNN {
 31 | 	std::vector <NetNode <T> *>	_ins	= {};
 32 | 	std::vector <NetNode <T> *>	_outs	= {};
 33 | 
 34 | 	// Variadic constructor helpers
 35 | 	void init(NetNode <T> *);
 36 | 
 37 | 	template <class ... U>
 38 | 	void init(NetNode <T> *, U ...);
 39 | 
 40 | 	// Initialize outs
 41 | 	void getouts();
 42 | public:
 43 | 	GNN();
 44 | 	explicit GNN(NetNode <T> *);
 45 | 	explicit GNN(const std::vector <NetNode <T> *> &);
 46 | 
 47 | 	// Variadic constructor
 48 | 	template <class ... U>
 49 | 	explicit GNN(U ...);
 50 | 
 51 | 	// Extraction
 52 | 	inline NetNode <T> &ipipe(size_t);
 53 | 	inline NetNode <T> &operator[](size_t);
 54 | 
 55 | 	// Retrieval
 56 | 	inline const NetNode <T> &opipe(size_t) const;
 57 | 	inline const NetNode <T> &operator[](size_t) const;
 58 | 
 59 | 	// Passing
 60 | 	void pass(std::vector <Tensor <T>> &) const;
 61 | 	void pass(std::vector <Tensor <T>> &&) const;
 62 | 
 63 | 	void trace() const;
 64 | };
 65 | 
 66 | template <class T>
 67 | GNN <T> ::GNN() {}
 68 | 
 69 | template <class T>
 70 | GNN <T> ::GNN(NetNode <T> *nnptr) : _ins({nnptr})
 71 | {
 72 | 	getouts();
 73 | }
 74 | 
 75 | template <class T>
 76 | GNN <T> ::GNN(const std::vector <NetNode <T> *> &ins)
 77 | 		: _ins(ins) {}
 78 | 
 79 | template <class T>
 80 | template <class ... U>
 81 | GNN <T> ::GNN(U ... args)
 82 | {
 83 | 	init(args...);
 84 | }
 85 | 
 86 | template <class T>
 87 | void GNN <T> ::init(NetNode <T> *nnptr)
 88 | {
 89 | 	_ins.push_back(nnptr);
 90 | 
 91 | 	getouts();
 92 | }
 93 | 
 94 | template <class T>
 95 | template <class ... U>
 96 | void GNN <T> ::init(NetNode <T> *nnptr, U ... args)
 97 | {
 98 | 	_ins.push_back(nnptr);
 99 | 
100 | 	init(args...);
101 | }
102 | 
103 | template <class T>
104 | void GNN <T> ::getouts()
105 | {
106 | 	// Set of visited nodes
107 | 	std::set <NetNode <T> *> vis;
108 | 
109 | 	// BFS queue
110 | 	std::queue <NetNode <T> *> queue;
111 | 
112 | 	for (NetNode <T> *nnptr : _ins)
113 | 		queue.emplace(nnptr);
114 | 	
115 | 	while (!queue.empty()) {
116 | 		NetNode <T> *cptr = queue.top();
117 | 
118 | 		queue.pop();
119 | 
120 | 		if (vis.find() != vis.end())
121 | 			continue;
122 | 
123 | 		auto vfrw = cptr->forward();
124 | 		if (vfrw.empty()) {
125 | 			_outs.push_back(cptr);
126 | 		} else {
127 | 			for (auto frw : vfrw)
128 | 				queue.push(frw->_fr);
129 | 		}
130 | 	}
131 | }
132 | 
133 | /**
134 |  * @brief Modifies an input pipe (when assigned, such as `gnn.ipipe() =
135 |  * tensor`).
136 |  * 
137 |  * @param i the input pipe index.
138 |  */
139 | template <class T>
140 | inline NetNode <T> &GNN <T> ::ipipe(size_t i)
141 | {
142 | 	return *(_ins[i]);	
143 | }
144 | 
145 | /**
146 |  * @brief Modifies an input pipe (when assigned, such as `gnn[0] = tensor`).
147 |  * 
148 |  * @param i the input pipe index.
149 |  */
150 | template <class T>
151 | inline NetNode <T> &GNN <T> ::operator[](size_t i)
152 | {
153 | 	return *(_ins[i]);
154 | }
155 | 
156 | /**
157 |  * @brief Retrieves an output pipe.
158 |  * 
159 |  * @param i the output pipe index.
160 |  */
161 | template <class T>
162 | inline const NetNode <T> &GNN <T> ::opipe(size_t i) const
163 | {
164 | 	return *(_outs[i]);
165 | }
166 | 
167 | /**
168 |  * @brief Retrieves an output pipe.
169 |  * 
170 |  * @param i the output pipe index.
171 |  */
172 | template <class T>
173 | inline const NetNode <T> &GNN <T> ::operator[](size_t i) const
174 | {
175 | 	return *(_outs[i]);
176 | }
177 | 
178 | // Passing
179 | template <class T>
180 | void GNN <T> ::pass(std::vector <Tensor <T>> &args) const
181 | {
182 | 	size_t i = 0;
183 | 	while (!args.empty() && i < _ins.size()) {
184 | 		_ins[i].pass(args);
185 | 
186 | 		i++;
187 | 	}
188 | }
189 | 
190 | template <class T>
191 | void GNN <T> ::pass(std::vector <Tensor <T>> &&rargs) const
192 | {
193 | 	std::vector <Tensor <T>> args = std::move(rargs);
194 | 
195 | 	pass(args);
196 | }
197 | 
198 | template <class T>
199 | void GNN <T> ::trace() const
200 | {
201 | 	for (NetNode <T> *nn : _ins)
202 | 		nn->trace();
203 | }
204 | 
205 | }
206 | 
207 | }
208 | 
209 | #endif
210 | 


--------------------------------------------------------------------------------
/include/image.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef IMAGE_H_
  2 | #define IMAGE_H_
  3 | 
  4 | // C++ headers
  5 | #include <cstring>
  6 | #include <bitset>
  7 | #include <stdexcept>
  8 | 
  9 | // PNG library
 10 | #include <png.h>
 11 | 
 12 | // Engine headers
 13 | #include "tensor.hpp"
 14 | #include "vector.hpp"
 15 | 
 16 | namespace zhetapi {
 17 | 
 18 | namespace image {
 19 | 
 20 | // Global type aliases
 21 | using byte = unsigned char;
 22 | 
 23 | // Global exceptions
 24 | class bad_hex_string {};
 25 | 
 26 | // Color structure
 27 | //
 28 | // TODO: Derive Color from FixedVector
 29 | struct Color {
 30 | 	byte	r	= 0;
 31 | 	byte	g	= 0;
 32 | 	byte	b	= 0;
 33 | 
 34 | 	Color();
 35 | 	Color(const char *);			// Hex constructor
 36 | 	Color(const std::string &);		// Hex constructor
 37 | 	Color(byte = 0, byte = 0, byte = 0);	// Value constructor
 38 | 
 39 | 	uint32_t value() const;
 40 | };
 41 | 
 42 | // Standard colors
 43 | extern const Color RED;
 44 | extern const Color GREEN;
 45 | extern const Color BLUE;
 46 | extern const Color YELLOW;
 47 | extern const Color ORANGE;
 48 | extern const Color CYAN;
 49 | extern const Color WHITE;
 50 | extern const Color BLACK;
 51 | extern const Color GREY;
 52 | 
 53 | /**
 54 |  * @brief A parametrized gradient class, from color A to B, and operating on a
 55 |  * range a to b. A value c in the range \f$[a, b]\f$ will equate to a color
 56 |  * appropriately in between A and B.
 57 |  *
 58 |  * This class can essentially be thought of as a slider from color A to color B
 59 |  * (with the slider value ranging from a to b).
 60 |  *
 61 |  * The reason we do not restrict a and b to 0 and 1 is to allow for more
 62 |  * meaningful values. For example, if the gradient is intended to represent
 63 |  * heat, the Celcius measurements in \f$[0, 100]\f$ are more meaningful to use than
 64 |  * are the values in \f$[0, 1]\f$.
 65 |  */
 66 | class Gradient {
 67 | 	Color		_base;
 68 | 
 69 | 	long double	_dr	= 0;
 70 | 	long double	_dg	= 0;
 71 | 	long double	_db	= 0;
 72 | 
 73 | 	long double	_start	= 0;
 74 | 	long double	_end	= 0;
 75 | public:
 76 | 	Gradient(const Color &, const Color &,
 77 | 			long double = 0, long double = 1);
 78 | 	Gradient(const std::string &, const std::string &,
 79 | 			long double = 0, long double = 1);
 80 | 
 81 | 	Color get(long double);
 82 | 	Color operator()(long double);
 83 | 
 84 | 	/**
 85 | 	 * @brief Thrown in the \c get method if the passed value is out of the
 86 | 	 * bounds of the starting and ending values of the Gradient's scale.
 87 | 	 */
 88 | 	class bad_value : public std::runtime_error {
 89 | 	public:
 90 | 		bad_value(long double x) : std::runtime_error("Gradient value "
 91 | 				+ std::to_string(x)
 92 | 				+ " is out of bounds of the Gradient object's scale.") {}
 93 | 	};
 94 | };
 95 | 
 96 | /**
 97 |  * @brief Represents an image.
 98 |  */
 99 | class Image : public Tensor <unsigned char> {
100 | public:
101 | 	// Using declararations
102 | 	using pixel = std::pair <size_t, size_t>;
103 | 
104 | 	Image();						// Default
105 | 	Image(size_t, size_t, size_t, byte = 0);		// Value
106 | 	Image(size_t, size_t, size_t, const Color &);		// Color
107 | 	Image(size_t, size_t, size_t, const std::string &);	// Color
108 | 	Image(byte *, size_t, size_t, size_t = 1);		// Contigous array
109 | 	Image(byte **, size_t, size_t, size_t);			// List of rows
110 | 	Image(png_bytep *, size_t, size_t, size_t, size_t);	// (Pretty much the same as above)
111 | 
112 | 	Image(const Vector <double> &, size_t, size_t);		// Grayscale from vector
113 | 
114 | 	size_t width() const;
115 | 	size_t height() const;
116 | 	size_t channels() const;
117 | 
118 | 	// Pixel value setter
119 | 	void set(const pixel &, const Color &);			// Color
120 | 	void set(const pixel &, size_t, byte);
121 | 	void set(const pixel &, const Vector <byte> &);
122 | 
123 | 	void set_hex(const pixel &, size_t);
124 | 	void set_hex(const pixel &, const std::string &);
125 | 
126 | 	// Pixel value getter
127 | 	uint32_t color(const pixel &) const;
128 | 
129 | 	// Image extractors
130 | 	Image channel(size_t) const;
131 | 	Image crop(const pixel &, const pixel &) const;
132 | 
133 | 	const unsigned char *const raw() const;
134 | 
135 | 	unsigned char **row_bytes() const;
136 | 
137 | #ifndef ZHP_NO_GUI
138 | 
139 | 	int show() const;
140 | 
141 | #endif
142 | 
143 | 	class out_of_bounds {};
144 | 	class bad_input_order {};
145 | 
146 | 	// Friends
147 | 	template <class T>
148 | 	friend class Convolution;
149 | protected:
150 | 	bool in_bounds(const pixel &) const;
151 | };
152 | 
153 | // Thrown when the file cannot be accessed (replace with std)
154 | class bad_file {};
155 | 
156 | // Thrown when the file being read is not in PNG format
157 | class bad_png {};
158 | 
159 | // Image loading and saving
160 | Image load_png(std::ifstream &);
161 | 
162 | Image load_png(const char *);
163 | Image load_png(const std::string &);
164 | 
165 | void save_png(const Image &, const char *);
166 | 
167 | }
168 | 
169 | // Literal operators
170 | image::Image operator""_png(const char *, size_t);
171 | 
172 | }
173 | 
174 | #endif
175 | 


--------------------------------------------------------------------------------
/include/io/print.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef PRINT_H_
 2 | #define PRINT_H_
 3 | 
 4 | // Standard headers
 5 | #include <string>
 6 | #include <vector>
 7 | 
 8 | namespace zhetapi {
 9 | 
10 | namespace io {
11 | 
12 | using Args = std::vector <std::string>;
13 | 
14 | std::string table(const Args &, const std::vector <Args> &);
15 | 
16 | }
17 | 
18 | }
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/include/operand.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef OPERAND_H_
  2 | #define OPERAND_H_
  3 | 
  4 | // C++ headers
  5 | #include <sstream>
  6 | 
  7 | // Engine headers
  8 | #include "token.hpp"
  9 | #include "core/raw_types.hpp"
 10 | #include "core/common.hpp"
 11 | 
 12 | // Macros to taste
 13 | #define forward_ids(type)			\
 14 | 	template <>				\
 15 | 	uint8_t Operand <type> ::id() const;
 16 | 
 17 | namespace zhetapi {
 18 | 
 19 | // Operand class
 20 | template <class T>
 21 | class Operand : public Token {
 22 | 	T	_val = T();
 23 | public:
 24 | 	Operand();
 25 | 	Operand(const T &);
 26 | 	Operand(const Operand &);
 27 | 
 28 | 	Operand &operator=(const Operand &);
 29 | 
 30 | 	T &get();
 31 | 	const T &get() const;
 32 | 
 33 | 	void set(const T &);
 34 | 
 35 | 	// Virtual functionss
 36 | 	type caller() const override;
 37 | 	std::string dbg_str() const override;
 38 | 	Token *copy() const override;
 39 | 	bool operator==(Token *) const override;
 40 | };
 41 | 
 42 | // Constructors
 43 | template <class T>
 44 | Operand <T> ::Operand () {}
 45 | 
 46 | template <class T>
 47 | Operand <T> ::Operand(const T &data) : _val(data) {}
 48 | 
 49 | template <class T>
 50 | Operand <T> ::Operand(const Operand &other) : _val(other._val) {}
 51 | 
 52 | template <class T>
 53 | Operand <T> &Operand <T> ::operator=(const Operand &other)
 54 | {
 55 | 	if (this != &other)
 56 | 		_val = other._val;
 57 | 
 58 | 	return *this;
 59 | }
 60 | 
 61 | // Getters and setters
 62 | template <class T>
 63 | T &Operand <T> ::get()
 64 | {
 65 | 	return _val;
 66 | }
 67 | 
 68 | template <class T>
 69 | const T &Operand <T> ::get() const
 70 | {
 71 | 	return _val;
 72 | }
 73 | 
 74 | template <class T>
 75 | void Operand <T> ::set(const T &x)
 76 | {
 77 | 	_val = x;
 78 | }
 79 | 
 80 | // Virtual overrides
 81 | template <class T>
 82 | Token::type Operand <T> ::caller() const
 83 | {
 84 | 	return opd;
 85 | }
 86 | 
 87 | template <class T>
 88 | std::string Operand <T> ::dbg_str() const
 89 | {
 90 | 	std::ostringstream oss;
 91 | 
 92 | 	oss << _val;
 93 | 
 94 | 	return oss.str();
 95 | }
 96 | 
 97 | template <class T>
 98 | Token *Operand <T> ::copy() const
 99 | {
100 | 	return new Operand(_val);
101 | }
102 | 
103 | template <class T>
104 | bool Operand <T> ::operator==(Token *tptr) const
105 | {
106 | 	Operand *opd = dynamic_cast <Operand *> (tptr);
107 | 
108 | 	if (!opd)
109 | 		return false;
110 | 
111 | 	return (opd->_val == _val);
112 | }
113 | 
114 | // Forward declare specializations
115 | template <>
116 | std::string Operand <bool> ::dbg_str() const;
117 | 
118 | template <>
119 | std::string Operand <Args> ::dbg_str() const;
120 | 
121 | template <>
122 | std::string Operand <Vector <Token *>> ::dbg_str() const;
123 | 
124 | template <>
125 | std::string Operand <Matrix <Token *>> ::dbg_str() const;
126 | 
127 | }
128 | 
129 | #endif
130 | 


--------------------------------------------------------------------------------
/include/optimizer.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef OPTIMIZER_H_
 2 | #define OPTIMIZER_H_
 3 | 
 4 | // Engine headers
 5 | #include "dataset.hpp"
 6 | #include "gradient.hpp"
 7 | 
 8 | namespace zhetapi {
 9 | 
10 | namespace ml {
11 | 
12 | // Optimizer class
13 | template <class T>
14 | class Optimizer {
15 | protected:
16 | 	T		_eta		= 0;
17 | 	size_t		_size		= 0;
18 | 	bool		_switch	= false;
19 | 
20 | 	// Functions
21 | 	Optimizer(T);
22 | public:
23 | 	void register_size(size_t);
24 | 	void set_learning_rate(T);
25 | 	
26 | 	virtual Matrix <T> *update(
27 | 			Matrix <T> *,
28 | 			size_t) = 0;
29 | };
30 | 
31 | template <class T>
32 | Optimizer <T> ::Optimizer(T lr) : _eta(lr) {}
33 | 
34 | template <class T>
35 | void Optimizer <T> ::register_size(size_t size)
36 | {
37 | 	if (_size != size) {
38 | 		_size = size;
39 | 		_switch = true;
40 | 	} else {
41 | 		_switch = false;
42 | 	}
43 | }
44 | 
45 | template <class T>
46 | void Optimizer <T> ::set_learning_rate(T lr)
47 | {
48 | 	_eta = lr;
49 | }
50 | 
51 | }
52 | 
53 | }
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/include/parametrization.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef PARAMETRIZATION_H_
  2 | #define PARAMETRIZATION_H_
  3 | 
  4 | // C++ headers
  5 | #include <cstdlib>
  6 | 
  7 | // Engine headers
  8 | #include "vector.hpp"
  9 | 
 10 | namespace zhetapi {
 11 | 
 12 | /**
 13 |  * @brief Represents the parametrization of a curve. With parameters T, P and F,
 14 |  * the value of the parametrization at any value t (of type T) is \f$F(P(t))\f$
 15 |  *
 16 |  * Mathematically, given that T is a set of some quantity representing a scalar,
 17 |  * then
 18 |  *
 19 |  * \f$P: T \to T^m\f$
 20 |  *
 21 |  * and
 22 |  *
 23 |  * \f$F: T^m \to T^n.\f$
 24 |  *
 25 |  * The parametrization function is thus the map \f$T \to T^m.\f$
 26 |  *
 27 |  * Terminology:
 28 |  *
 29 |  * - The \b time of the parametrization is the value \f$t\f$ that ranges from
 30 |  *   the starting and ending T values.
 31 |  * - The \b position of the parametrization, \f$P(t)\f$ is the value of the
 32 |  *   secondary transformation at a current time.
 33 |  * - The \b value of the parametrization, \f$F(P(t))\f$ is the value of the
 34 |  *   primary transformation at the value of the secondary transformation at the
 35 |  *   current time.
 36 |  *
 37 |  * @tparam T the basic operating type.
 38 |  * @tparam P the type of the single variable vector function (the secondary
 39 |  * transformation).
 40 |  * @tparam F the type of the multivariable vector function (the primary
 41 |  * transformation).
 42 |  */
 43 | template <class T, class P, class F>
 44 | class Parametrization {
 45 | 	T	_start;
 46 | 	T	_end;
 47 | 
 48 | 	T	_pos;			// Current position
 49 | 	T	_inc;			// Increment
 50 | 
 51 | 	P	_par;			// Single variable vector function
 52 | 	F	_ftn;			// Multivariable vector function
 53 | public:
 54 | 	Parametrization(F, P, T, T);
 55 | 
 56 | 	Vector <T> value() const;	// Value at position
 57 | 	Vector <T> pos() const;		// Vector position
 58 | 	T time() const;
 59 | 
 60 | 	Vector <T> dpos() const;	// Derivative of vector position
 61 | 	
 62 | 	bool step();			// Step through the parametrization
 63 | 
 64 | 	void reset();			// Reset the position
 65 | 
 66 | 	static const size_t partition_size;
 67 | };
 68 | 
 69 | /**
 70 |  * @brief The default number of steps that a parametrization will undergo.
 71 |  * Default value is 1000.
 72 |  */
 73 | template <class T, class P, class F>
 74 | const size_t Parametrization <T, P, F> ::partition_size = 1000;
 75 | 
 76 | /**
 77 |  * @brief Constructs a parametrization with given primary and secondary
 78 |  * transformations, and the bounds of the parametrization. The state of the
 79 |  * parametrization is set to the start.
 80 |  *
 81 |  * @param ftn the primary transformation.
 82 |  * @param par the secondary transformation.
 83 |  * @param start the beginning value of the parametrization.
 84 |  * @param end the ending value of the parametrization.
 85 |  */
 86 | template <class T, class P, class F>
 87 | Parametrization <T, P, F> ::Parametrization(F ftn, P par, T start, T end)
 88 | 		: _ftn(ftn), _par(par),
 89 | 		_start(start), _end(end),
 90 | 		_pos(start)
 91 | {
 92 | 	_inc = (end - start) / (T) partition_size;
 93 | }
 94 | 
 95 | /**
 96 |  * @return the value of the parametrization at the current time.
 97 |  */
 98 | template <class T, class P, class F>
 99 | Vector <T> Parametrization <T, P, F> ::value() const
100 | {
101 | 	return _ftn(_par(_pos));
102 | }
103 | 
104 | /**
105 |  * @return the position of the parametrization at the current time.
106 |  */
107 | template <class T, class P, class F>
108 | Vector <T> Parametrization <T, P, F> ::pos() const
109 | {
110 | 	return _par(_pos);
111 | }
112 | 
113 | /**
114 |  * @return the current time.
115 |  */
116 | template <class T, class P, class F>
117 | T Parametrization <T, P, F> ::time() const
118 | {
119 | 	return _pos;
120 | }
121 | 
122 | /**
123 |  * @brief The derivative of the secondary transformation, evaluated using a
124 |  * forward difference.
125 |  *
126 |  * @return the derivative of the secondary transformation at the current time.
127 |  */
128 | template <class T, class P, class F>
129 | Vector <T> Parametrization <T, P, F> ::dpos() const
130 | {
131 | 	return (_par(_pos + _inc) - _par(_pos))/_inc;
132 | }
133 | 
134 | /**
135 |  * @brief Increments the current time of the parametrization according to the
136 |  * static variable \p partition_size.
137 |  *
138 |  * @return \c true if the parametrization has reached the end of the its bounds
139 |  * and \c false otherwise.
140 |  */
141 | template <class T, class P, class F>
142 | bool Parametrization <T, P, F> ::step()
143 | {
144 | 	// TODO: allow multiple steps at once
145 | 	_pos += _inc;
146 | 
147 | 	// Cycle back to the start;
148 | 	if (_pos > _end) {
149 | 		_pos = _start;
150 | 
151 | 		return true;
152 | 	}
153 | 
154 | 	return false;
155 | }
156 | 
157 | /**
158 |  * @brief Resets the current time of the parametrization to the starting time.
159 |  */
160 | template <class T, class P, class F>
161 | void Parametrization <T, P, F> ::reset()
162 | {
163 | 	_pos = _start;
164 | }
165 | 
166 | }
167 | 
168 | #endif
169 | 


--------------------------------------------------------------------------------
/include/range.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef RANGE_H_
  2 | #define RANGE_H_
  3 | 
  4 | // Standard headers
  5 | #include <cmath>
  6 | #include <cstddef>
  7 | #include <functional>
  8 | #include <iostream>
  9 | #include <limits>
 10 | #include <type_traits>
 11 | 
 12 | namespace zhetapi {
 13 | 
 14 | // Arithmetic kernel for comparison and arithmetic operations
 15 | // TODO: should this go in a separate file?
 16 | template <class T, class = typename std::is_floating_point <T>::type>
 17 | struct arithmetic_kernel {
 18 | 	// Equality with tolerance
 19 | 	static bool eq(T a, T b, T tolerance = std::numeric_limits <T>::epsilon()) {
 20 | 		return std::abs(a - b) <= tolerance;
 21 | 	}
 22 | 
 23 | 	// Modulus
 24 | 	static T mod(T a, T b) {
 25 | 		return std::fmod(a, b);
 26 | 	}
 27 | };
 28 | 
 29 | template <class T>
 30 | struct arithmetic_kernel <T, std::false_type> {
 31 | 	// Equality with tolerance
 32 | 	static bool eq(T a, T b, T tolerance = std::numeric_limits <T>::epsilon()) {
 33 | 		return a == b;
 34 | 	}
 35 | 
 36 | 	// Modulus
 37 | 	static T mod(T a, T b) {
 38 | 		return a % b;
 39 | 	}
 40 | };
 41 | 
 42 | // Range class
 43 | template <class T>
 44 | struct Range {
 45 | 	T start;
 46 | 	T term;
 47 | 	T step;
 48 | 
 49 | 	// Iterator type
 50 | 	struct iterator {
 51 | 		T value;
 52 | 		T step;
 53 | 
 54 | 		// Constructor
 55 | 		iterator(T value, T step) : value(value), step(step) {}
 56 | 
 57 | 		// Pre-increment
 58 | 		iterator& operator++() {
 59 | 			value += step;
 60 | 			return *this;
 61 | 		}
 62 | 
 63 | 		// Post-increment
 64 | 		iterator operator++(int) {
 65 | 			iterator tmp(*this);
 66 | 			value += step;
 67 | 			return tmp;
 68 | 		}
 69 | 
 70 | 		// Pre-decrement
 71 | 		iterator& operator--() {
 72 | 			value -= step;
 73 | 			return *this;
 74 | 		}
 75 | 
 76 | 		// Post-decrement
 77 | 		iterator operator--(int) {
 78 | 			iterator tmp(*this);
 79 | 			value -= step;
 80 | 			return tmp;
 81 | 		}
 82 | 
 83 | 		// Dereference
 84 | 		T &operator*() {
 85 | 			return value;
 86 | 		}
 87 | 
 88 | 		// Comparison operators
 89 | 		bool operator==(const iterator &other) const {
 90 | 			static T eps = T(100) * std::numeric_limits<T>::epsilon();
 91 | 			if (std::is_floating_point <T> ::value)
 92 | 				return std::fabs(value - other.value) < eps;
 93 | 		
 94 | 			return value == other.value;
 95 | 		}
 96 | 
 97 | 		bool operator!=(const iterator& other) const {
 98 | 			return !(*this == other);
 99 | 		}
100 | 
101 | 		bool operator<(const iterator& other) const {
102 | 			return value < other.value;
103 | 		}
104 | 
105 | 		bool operator<=(const iterator& other) const {
106 | 			return value <= other.value;
107 | 		}
108 | 
109 | 		bool operator>(const iterator& other) const {
110 | 			return value > other.value;
111 | 		}
112 | 
113 | 		bool operator>=(const iterator& other) const {
114 | 			return value >= other.value;
115 | 		}
116 | 	};
117 | 
118 | 	// Constructors
119 | 	template <bool = std::is_arithmetic <T> ::value>
120 | 	Range(T pstart, T pterm, T pstep = T(1))
121 | 		: start(pstart), term(pterm), step(pstep) {}
122 | 
123 | 	// Default is an empty range
124 | 	template <bool = std::is_arithmetic <T> ::value>
125 | 	Range(T pterm = T(0))
126 | 		: start(T(0)), term(pterm), step(T(1)) {}
127 | 
128 | 	// Compute range at a given number of steps
129 | 	T compute(size_t nsteps) const {
130 | 		return start + static_cast <T> (step) * nsteps;
131 | 	}
132 | 
133 | 	T operator()(size_t nsteps) const {
134 | 		return compute(nsteps);
135 | 	}
136 | 
137 | 	// Compose two ranges
138 | 	Range operator()(const Range &other) const {
139 | 		T a = start + step * other.start;
140 | 		T b = step * other.step;
141 | 		T c = std::min(term, other.term);
142 | 
143 | 		return Range(a, c, b);
144 | 	}
145 | 
146 | 	// Properties
147 | 	T length() const {
148 | 		return (term - start);
149 | 	}
150 | 
151 | 	size_t size() const {
152 | 		// T rem = std::modulus <T> {}.((term - start), step);
153 | 		T rem = arithmetic_kernel <T> ::mod(term - start, step);
154 | 		return (rem == 0) ? length() / step
155 | 			: length() / step + 1;
156 | 	}
157 | 
158 | 	// Iterators
159 | 	iterator begin() const {
160 | 		return iterator(start, step);
161 | 	}
162 | 
163 | 	iterator end() const {
164 | 		return iterator(term, step);
165 | 	}
166 | 
167 | 	// Boolean operators
168 | 	// TODO: account for eps in comparison
169 | 	bool operator==(const Range& other) const {
170 | 		return (start == other.start)
171 | 			&& (term == other.term)
172 | 			&& (step == other.step);
173 | 	}
174 | 
175 | 	bool operator!=(const Range& other) const {
176 | 		return !(*this == other);
177 | 	}
178 | 
179 | 	// All the elements
180 | 	static const Range all;
181 | };
182 | 
183 | // All for tensor slicing
184 | extern Range <int> all;
185 | 
186 | // Printing
187 | template <class T>
188 | std::ostream &operator<<(std::ostream &os, const Range <T> &range)
189 | {
190 | 	return os << "(" << range.start << ", " << range.term
191 | 		<< ", " << range.step << ")";
192 | }
193 | 
194 | }
195 | 
196 | #endif
197 | 


--------------------------------------------------------------------------------
/include/rational.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef RATIONAL_H_
  2 | #define RATIONAL_H_
  3 | 
  4 | // C/C++ headers
  5 | #include <iostream>
  6 | #include <algorithm>
  7 | 
  8 | /**
  9 |  * @brief Represents the Rational
 10 |  * number a/b where a and b are
 11 |  * both of type T.
 12 |  */
 13 | template <class T>
 14 | class Rational {
 15 | public:
 16 | 	class non_integral_type {};
 17 | private:
 18 | 	T a;
 19 | 	T b;
 20 | public:
 21 | 	Rational(T = 0, T = 1);
 22 | 
 23 | 	operator bool() const;
 24 | 	
 25 | 	explicit operator double() const;
 26 | 
 27 | 	bool is_inf() const;
 28 | 
 29 | 	/* Mathematical Operators - Members */
 30 | 	Rational &operator+=(const Rational &);
 31 | 	Rational &operator-=(const Rational &);
 32 | 	Rational &operator*=(const Rational &);
 33 | 	Rational &operator/=(const Rational &);
 34 | 
 35 | 	/* Mathematical Operators - Non-Members */
 36 | 	template <class U>
 37 | 	friend Rational <U> operator+(const Rational <U> &, const Rational <U> &);
 38 | 
 39 | 	template <class U>
 40 | 	friend Rational <U> operator-(const Rational <U> &, const Rational <U> &);
 41 | 
 42 | 	template <class U>
 43 | 	friend Rational <U> operator*(const Rational <U> &, const Rational <U> &);
 44 | 
 45 | 	template <class U>
 46 | 	friend Rational <U> operator/(const Rational <U> &, const Rational <U> &);
 47 | 
 48 | 	/* Boolean Operators - Non Members */
 49 | 	template <class U>
 50 | 	friend bool operator==(const Rational <U> &, const Rational <U> &);
 51 | 	
 52 | 	template <class U>
 53 | 	friend bool operator!=(const Rational <U> &, const Rational <U> &);
 54 | 	
 55 | 	template <class U>
 56 | 	friend bool operator>(const Rational <U> &, const Rational <U> &);
 57 | 	
 58 | 	template <class U>
 59 | 	friend bool operator<(const Rational <U> &, const Rational <U> &);
 60 | 	
 61 | 	template <class U>
 62 | 	friend bool operator>=(const Rational <U> &, const Rational <U> &);
 63 | 	
 64 | 	template <class U>
 65 | 	friend bool operator<=(const Rational <U> &, const Rational <U> &);
 66 | 
 67 | 	template <class U>
 68 | 	friend Rational <U> abs(const Rational <U> &);
 69 | 
 70 | 	/* Output Functions */
 71 | 	template <class U>
 72 | 	friend std::ostream &operator<<(std::ostream &, const Rational <U> &);
 73 | private:
 74 | 	void simplify();
 75 | 
 76 | 	static T gcd(T, T);
 77 | };
 78 | 
 79 | //////////////////////////////////////////
 80 | // Constructors
 81 | //////////////////////////////////////////
 82 | 
 83 | template <class T>
 84 | Rational <T> ::Rational(T p, T q) : a(p), b(q)
 85 | {
 86 | 	if (!std::is_integral <T> ::value)
 87 | 		throw non_integral_type();
 88 | 
 89 | 	simplify();
 90 | }
 91 | 
 92 | //////////////////////////////////////////
 93 | // Conversion Operators
 94 | //////////////////////////////////////////
 95 | 
 96 | template <class T>
 97 | Rational <T> ::operator bool() const
 98 | {
 99 | 	return a != 0;
100 | }
101 | 
102 | template <class T>
103 | Rational <T> ::operator double() const
104 | {
105 | 	return (double) a / (double) b;
106 | }
107 | 
108 | template <class T>
109 | bool Rational <T> ::is_inf() const
110 | {
111 | 	return b == 0;
112 | }
113 | 
114 | //////////////////////////////////////////
115 | // Arithmetic Operators
116 | //////////////////////////////////////////
117 | 
118 | template <class T>
119 | Rational <T> &Rational <T> ::operator+=(const Rational <T> &other)
120 | {
121 | 	a = a * other.b + b * other.a;
122 | 	b *= other.b;
123 | 
124 | 	simplify();
125 | 
126 | 	return *this;
127 | }
128 | 
129 | template <class T>
130 | Rational <T> &Rational <T> ::operator-=(const Rational <T> &other)
131 | {
132 | 	a = a * other.b - b * other.a;
133 | 	b *= other.b;
134 | 
135 | 	simplify();
136 | 
137 | 	return *this;
138 | }
139 | 
140 | template <class T>
141 | Rational <T> &Rational <T> ::operator*=(const Rational <T> &other)
142 | {
143 | 	using namespace std;
144 | 
145 | 	a *= other.a;
146 | 	b *= other.b;
147 | 
148 | 	simplify();
149 | 
150 | 	return *this;
151 | }
152 | 
153 | template <class T>
154 | Rational <T> &Rational <T> ::operator/=(const Rational <T> &other)
155 | {
156 | 	a *= other.b;
157 | 	b *= other.a;
158 | 
159 | 	simplify();
160 | 
161 | 	return *this;
162 | }
163 | 
164 | template <class T>
165 | Rational <T> operator+(const Rational <T> &a, const Rational <T> &b)
166 | {
167 | 	Rational <T> out = a;
168 | 
169 | 	out += b;
170 | 
171 | 	return out;
172 | }
173 | 
174 | template <class T>
175 | Rational <T> operator-(const Rational <T> &a, const Rational <T> &b)
176 | {
177 | 	Rational <T> out = a;
178 | 
179 | 	out -= b;
180 | 
181 | 	return out;
182 | }
183 | 
184 | template <class T>
185 | Rational <T> operator*(const Rational <T> &a, const Rational <T> &b)
186 | {
187 | 	Rational <T> out = a;
188 | 
189 | 	out *= b;
190 | 
191 | 	return out;
192 | }
193 | 
194 | template <class T>
195 | Rational <T> operator/(const Rational <T> &a, const Rational <T> &b)
196 | {
197 | 	Rational <T> out = a;
198 | 
199 | 	out /= b;
200 | 
201 | 	return out;
202 | }
203 | 
204 | //////////////////////////////////////////
205 | // Boolean Operators
206 | //////////////////////////////////////////
207 | 
208 | template <class T>
209 | bool operator==(const Rational <T> &a, const Rational <T> &b)
210 | {
211 | 	return (a.a == b.a) && (a.b == b.b);
212 | }
213 | 
214 | template <class T>
215 | bool operator!=(const Rational <T> &a, const Rational <T> &b)
216 | {
217 | 	return !(a == b);
218 | }
219 | 
220 | template <class T>
221 | bool operator>(const Rational <T> &a, const Rational <T> &b)
222 | {
223 | 	return (a.a * b.b) > (a.b * b.a);
224 | }
225 | 
226 | template <class T>
227 | bool operator<(const Rational <T> &a, const Rational <T> &b)
228 | {
229 | 	return (a.a * b.b) < (a.b * b.a);
230 | }
231 | 
232 | template <class T>
233 | bool operator>=(const Rational <T> &a, const Rational <T> &b)
234 | {
235 | 	return (a == b) || (a > b);
236 | }
237 | 
238 | template <class T>
239 | bool operator<=(const Rational <T> &a, const Rational <T> &b)
240 | {
241 | 	return (a == b) || (a < b);
242 | }
243 | 
244 | //////////////////////////////////////////
245 | // I/O Functions
246 | //////////////////////////////////////////
247 | 
248 | template <class T>
249 | std::ostream &operator<<(std::ostream &os, const Rational <T> &rat)
250 | {
251 | 	if (rat.a == 0)
252 | 		os << 0;
253 | 	else if (rat.b == 1)
254 | 		os << rat.a;
255 | 	else
256 | 		os << rat.a << "/" << rat.b;
257 | 
258 | 	return os;
259 | }
260 | 
261 | //////////////////////////////////////////
262 | // Private Methods
263 | //////////////////////////////////////////
264 | 
265 | template <class T>
266 | void Rational <T> ::simplify()
267 | {
268 | 	if (b < 0) {
269 | 		a *= -1;
270 | 		b *= -1;
271 | 	}
272 | 
273 | 	T tmp = gcd(a, b);
274 | 
275 | 	a /= tmp;
276 | 	b /= tmp;
277 | }
278 | 
279 | template <class T>
280 | T Rational <T> ::gcd(T a, T b)
281 | {
282 | 		if (a == 0 || b == 0)
283 | 			return 1;
284 | 
285 | 		a = std::abs(a);
286 | 		b = std::abs(b);
287 | 
288 | 		if (a > b)
289 | 			std::swap(a, b);
290 | 
291 | 		while (b % a != 0) {
292 | 			b %= a;
293 | 
294 | 			if (a > b)
295 | 				std::swap(a, b);
296 | 		}
297 | 
298 | 		return std::min(a, b);
299 | }
300 | 
301 | // Extra functions
302 | 
303 | template <class T>
304 | Rational <T> abs(const Rational <T> &a)
305 | {
306 | 	if (a < Rational <int> {0, 1})
307 | 		return {-a.a, a.b};
308 | 	
309 | 	return a;
310 | }
311 | 
312 | #endif
313 | 


--------------------------------------------------------------------------------
/include/registration.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef REGISTRATION_H_
 2 | #define REGISTRATION_H_
 3 | 
 4 | // C/C++ headers
 5 | #include <tuple>
 6 | #include <vector>
 7 | #include <functional>
 8 | 
 9 | // Engine headers
10 | #include "core/functor.hpp"
11 | 
12 | // TODO: separate casting from registration
13 | namespace zhetapi {
14 | 
15 | class Registrable : public Functor {
16 | public:
17 | 	using Evaluator = std::function <Token *(const std::vector <Token *> &)>;
18 | private:
19 | 	Evaluator	_ftn;
20 | 
21 | 	std::string	_ident;
22 | public:
23 | 	Registrable();
24 | 	Registrable(const Registrable &);
25 | 	Registrable(const std::string &, Evaluator);
26 | 
27 | 	// TODO: get rid of this
28 | 	Token *operator()(const std::vector <Token *> &) const;
29 | 
30 | 	Token *evaluate(Engine *, const std::vector <Token *> &) override;
31 | 
32 | 	std::string dbg_str() const override;
33 | 	type caller() const override;
34 | 	Token *copy() const override;
35 | 	bool operator==(Token *) const override;
36 | };
37 | 
38 | }
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/include/sparse.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SPARSE_H_
 2 | #define SPARSE_H_
 3 | 
 4 | // C++ headers
 5 | #include <cstdlib>
 6 | 
 7 | // Engine headers
 8 | #include "matrix.hpp"
 9 | 
10 | namespace zhetapi {
11 | 
12 | template <class T>
13 | class SparseMatrix {
14 | 	struct elem {
15 | 		elem *	_next = nullptr;	// Next element
16 | 		size_t	_ci = 0;		// Column index
17 | 	};
18 | 
19 | 	elem **	_rows = nullptr;
20 | public:
21 | 	SparseMatrix();
22 | 	SparseMatrix(const Matrix <T> &, T);
23 | };
24 | 
25 | template <class T>
26 | SparseMatrix <T> ::SparseMatrix() {}
27 | 
28 | template <class T>
29 | SparseMatrix <T> ::SparseMatrix(const Matrix <T> &mat, T exc)
30 | {
31 | 	size_t rs = mat.get_rows();
32 | 	size_t cs = mat.get_cols();
33 | 
34 | 	_rows = new elem[rs];
35 | 	for (size_t i = 0; i < rs; i++) {
36 | 		_rows[i] = new elem;
37 | 
38 | 		for (size_t i = 0; i < cs; i++) {
39 | 
40 | 		}
41 | 	}
42 | }
43 | 
44 | }
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/include/std/activation_derivatives.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef ACTIVATION_DERIVATIVES_H_
  2 | #define ACTIVATION_DERIVATIVES_H_
  3 | 
  4 | // TODO: put back in std
  5 | namespace zhetapi {
  6 | 
  7 | namespace ml {
  8 | 
  9 | template <class T>
 10 | class _DLinear : public Activation <T> {
 11 | 	T	_alpha;
 12 | public:
 13 | 	__cuda_dual__
 14 | 	explicit _DLinear(const T &alpha = T(1)) : _alpha(alpha) {}
 15 | 
 16 | 	__cuda_dual__
 17 | 	Vector <T> compute(const Vector <T> &x) const {
 18 | 		return Vector <T> (x.size(), _alpha);
 19 | 	}
 20 | };
 21 | 
 22 | // ReLU activation class
 23 | template <class T>
 24 | class _DReLU : public Activation <T> {
 25 | public:
 26 | 	__cuda_dual__
 27 | 	Vector <T> compute(const Vector <T> &x) const {
 28 | 		/* T *arr = new T[x.size()];
 29 | 		for (size_t i = 0; i < x.size(); i++) {
 30 | 			arr[i] = (x[i] > 0) ? 1 : 0;
 31 | 		}
 32 | 		return Vector <T> (x.size(), arr, false); */
 33 | 		return Vector <T> (x.size(),
 34 | 			[&](size_t i) {
 35 | 				return (x[i] > 0) ? 1 : 0;
 36 | 			}
 37 | 		);
 38 | 	}
 39 | };
 40 | 
 41 | // Leaky ReLU activation class
 42 | template <class T>
 43 | class _DLeakyReLU : public Activation <T> {
 44 | 	T	_alpha;
 45 | public:
 46 | 	_DLeakyReLU(const T &alpha = 1) : _alpha(alpha) {}
 47 | 
 48 | 	Vector <T> compute(const Vector <T> &x) const {
 49 | 		return Vector <T> (x.size(),
 50 | 			[&](size_t i) {
 51 | 				return (x[i] < 0) ? _alpha : 1;
 52 | 			}
 53 | 		);
 54 | 	}
 55 | };
 56 | 
 57 | // Sigmoid activation class
 58 | template <class T>
 59 | class _DSigmoid : public Activation <T> {
 60 | public:
 61 | 
 62 | #ifndef ZHP_CUDA
 63 | 
 64 | 	Vector <T> compute(const Vector <T> &x) const {
 65 | 		return Vector <T> (x.size(),
 66 | 			[&](size_t i) {
 67 | 				T tmp = 1.0/(1.0 + exp(-x[i]));
 68 | 
 69 | 				return tmp * (T (1.0) - tmp);
 70 | 			}
 71 | 		);
 72 | 	}
 73 | 
 74 | #else
 75 | 
 76 | 	_host_ _device_
 77 | 	Vector <T> compute(const Vector <T> &x) const {
 78 | 		return Vector <T> (x.size(),
 79 | 			[x] _host_ _device_ (size_t i) {
 80 | 				T tmp = 1.0/(1.0 + exp(-x[i]));
 81 | 
 82 | 				return tmp * (T (1.0) - tmp);
 83 | 			}
 84 | 		);
 85 | 	}
 86 | 
 87 | #endif
 88 | 
 89 | };
 90 | 
 91 | // Scaled Sigmoid activation class
 92 | template <class T>
 93 | class _DScaledSigmoid : public Activation <T> {
 94 | 	T	_alpha;
 95 | public:
 96 | 	_DScaledSigmoid(const T &alpha) : _alpha(alpha) {}
 97 | 
 98 | 	Vector <T> compute(const Vector <T> &x) const {
 99 | 		return Vector <T> (x.size(), [&](size_t i) {return
100 | 				_d_scaled_sigmoid(x[i], _alpha);});
101 | 	}
102 | };
103 | 
104 | // Probability activation class
105 | template <class T>
106 | class _DSoftmax : public Activation <T> {
107 | public:
108 | 	Vector <T> compute(const Vector <T> &x) const {
109 | 		// Subtract by max for numerical stability
110 | 		T _max = x[0];
111 | 		for (size_t i = 1; i < x.size(); i++)
112 | 			_max = (_max > x[i]) ? _max : x[i];
113 | 
114 | 		T _sum = 0;
115 | 		for (size_t i = 0; i < x.size(); i++)
116 | 			_sum += exp(x[i] - _max);
117 | 
118 | 		return Vector <T> (x.size(),
119 | 			[&](size_t i) {
120 | 				return exp(x[i] - _max)
121 | 					* (_sum - exp(x[i] - _max))
122 | 					/ (_sum * _sum);
123 | 			}
124 | 		);
125 | 	}
126 | };
127 | 
128 | }
129 | 
130 | }
131 | 
132 | #endif
133 | 


--------------------------------------------------------------------------------
/include/std/calculus.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef CALCULUS_H_
  2 | #define CALCULUS_H_
  3 | 
  4 | // C/C++ headers
  5 | #include <map>
  6 | #include <type_traits>
  7 | #include <vector>
  8 | #include <cmath>
  9 | 
 10 | // Engine headers
 11 | #include "../vector.hpp"
 12 | #include "../parametrization.hpp"
 13 | 
 14 | namespace zhetapi {
 15 | 
 16 | namespace utility {
 17 | 
 18 | // Single variable, scalar integration of f over [a, b]
 19 | template <class T, class F>
 20 | T sv_integral(F f, T a, T b, size_t partition_size = 1000)
 21 | {
 22 | 	T step = (b - a)/partition_size;
 23 | 
 24 | 	// Use a more accurate method later (Simpon's rule, etc.)
 25 | 	T sum = 0;
 26 | 	for (T x = a; x <= b; x += step)
 27 | 		sum += f(x) * step;
 28 | 
 29 | 	return sum;
 30 | }
 31 | 
 32 | // Euler's method for single variable scalar functions
 33 | template <class T, class Df>
 34 | T eulers_method(Df df, Vector <T> given, T x, size_t partition_size = 1000)
 35 | {
 36 | 	T step = (x - given[0])/partition_size;
 37 | 
 38 | 	T stop = x;
 39 | 	while (fabs(given[0] - stop) > 1e-10) {
 40 | 		given[1] += df(given[0]) * step;
 41 | 		given[0] += step;
 42 | 	}
 43 | 
 44 | 	return given[1];
 45 | }
 46 | 
 47 | template <class T, class P, class F>
 48 | T line_integral(Parametrization <T, P, F> prm)
 49 | {
 50 | 	// Always start from the beginning
 51 | 	prm.reset();
 52 | 
 53 | 	T sum = 0;
 54 | 
 55 | 	do {
 56 | 		sum += inner(prm.value(), prm.dpos());
 57 | 	} while (prm.step());
 58 | 
 59 | 	return sum;
 60 | }
 61 | 
 62 | /**
 63 |  * @brief Solves the homogenous linear differential
 64 |  * equation (with constant coefficients) whose coefficients
 65 |  * are represented by the polynomial that is passed into the
 66 |  * function.
 67 |  *
 68 |  * @tparam T Represents the scalar field; the complex roots of
 69 |  * the polynomial are AUTOMATICALLY generated from this function.
 70 |  *
 71 |  * @return out Represents the basis of functions such that a
 72 |  * linear combination of the functions is a solution to the homogenous
 73 |  * linear differential equation with constant coefficients.
 74 |  
 75 | template <class T>
 76 | std::vector <Function> solve_hlde_constant(const polynomial <Complex <T>> &p,
 77 | 		size_t rounds = 10000, const Complex <T> &eps = 1E-100L,
 78 | 		const Complex <T> &start = {0.4, 0.9})
 79 | {
 80 | 	std::vector <Complex <T>> roots = p.roots(rounds, eps, start);
 81 | 
 82 | 	std::vector <Function <Complex <T>>> out;
 83 | 
 84 | 	std::vector <Complex <T>> inserted;
 85 | 
 86 | 	table <Complex <T>> tbl {
 87 | 		Variable <Complex <T>> {"e", false, exp(1)}
 88 | 	};
 89 | 
 90 | 	for (auto vl : roots) {
 91 | 		if (vl == Complex <T> {0, 0})
 92 | 			continue;
 93 | 
 94 | 		auto itr = ::std::find_if(inserted.begin(), inserted.end(), [&](const Complex <T> &a) {
 95 | 			return pow(norm(vl - a), 10.5) < norm(eps);
 96 | 		});
 97 | 
 98 | 		if (itr != inserted.end()) {
 99 | 			size_t deg = ::std::count_if(inserted.begin(), inserted.end(), [&](const Complex <T> &a) {
100 | 				return pow(norm(vl - a), 10.5) < norm(eps);
101 | 			});
102 | 			
103 | 			if (vl.is_real()) {
104 | 				out.push_back({"f", {"x"}, "x^" + ::std::to_string(deg) + " * e^("
105 | 						+ ::std::to_string(vl.real()) + " * x)", tbl});
106 | 			} else {
107 | 				out.push_back({"f", {"x"}, "x^" + ::std::to_string(deg) + " * e^("
108 | 						+ ::std::to_string(vl.real()) + " * x)" + " * cos("
109 | 						+ ::std::to_string(vl.imag()) + " * x)", tbl});
110 | 			}
111 | 		} else {
112 | 			inserted.push_back(vl);
113 | 
114 | 			if (vl.is_real()) {
115 | 				out.push_back({"f", {"x"}, "e^(" + ::std::to_string(vl.real()) + " * x)", tbl});
116 | 			} else {
117 | 				out.push_back({"f", {"x"}, "e^(" + ::std::to_string(vl.real()) + " * x)"
118 | 						+ " * cos(" + ::std::to_string(vl.imag()) + " * x)", tbl});
119 | 			}
120 | 		}
121 | 	}
122 | 
123 | 	size_t deg = ::std::count(roots.begin(), roots.end(), Complex <T> {0, 0});
124 | 
125 | 	if (deg > 0)
126 | 		out.push_back({"f", {"x"}, "x^" + ::std::to_string(deg - 1), tbl});
127 | 
128 | 	return out;
129 | } */
130 | 
131 | }
132 | 
133 | }
134 | 
135 | #endif
136 | 


--------------------------------------------------------------------------------
/include/std/combinatorial.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef COMBINATORIAL_H_
  2 | #define COMBINATORIAL_H_
  3 | 
  4 | // C/C++ headers
  5 | #include <cmath>
  6 | #include <vector>
  7 | 
  8 | // Engine headers
  9 | #include "../rational.hpp"
 10 | 
 11 | namespace zhetapi {
 12 | 
 13 | namespace utility {
 14 | 
 15 | //////////////////////////////////////////
 16 | // Exceptions
 17 | //////////////////////////////////////////
 18 | 
 19 | /**
 20 |  * @brief Exception for asserting that
 21 |  * a certain input be non-negative.
 22 |  */
 23 | class negative_block_exception {};
 24 | 
 25 | /**
 26 |  * @brief Exception for asserting that
 27 |  * a certain input be strictly positive.
 28 |  */
 29 | class positive_flow_exception {};
 30 | 
 31 | //////////////////////////////////////////
 32 | // Perliminary Helper Functions
 33 | //////////////////////////////////////////
 34 | 
 35 | /**
 36 |  * @brief Factorial function for integral
 37 |  * types to avoid the usage of the
 38 |  * tgamma library function.
 39 |  */
 40 | template <class T>
 41 | T integral_factorial(T n)
 42 | {
 43 | 	T val = 1;
 44 | 	for (T i = 1; i <= n; i++)
 45 | 		val *= i;
 46 | 
 47 | 	return val;
 48 | }
 49 | 
 50 | /**
 51 |  * @brief Falling factorial function, available
 52 |  * for all types that support the arithmetic
 53 |  * operations.
 54 |  */
 55 | template <class T>
 56 | T falling_power(T n, T k)
 57 | {
 58 | 	T val = 1;
 59 | 	for (T i = 0; i < k; i++)
 60 | 		val *= (n - i);
 61 | 
 62 | 	return val;
 63 | }
 64 | 
 65 | //////////////////////////////////////////
 66 | // Binomial Coefficients
 67 | //////////////////////////////////////////
 68 | 
 69 | /**
 70 |  * @brief General binomial, computed using the falling
 71 |  * factorial and the gamma function. The gamma
 72 |  * function defaults to the library function
 73 |  * tgamma.
 74 |  *
 75 |  * @param gamma Used gamma function, defaults to ::std::tgamma
 76 |  */
 77 | template <class T>
 78 | T binom(T n, T k, T (*gamma)(T) = ::std::tgamma)
 79 | {
 80 | 	return falling_power(n, k) / gamma(k + 1);
 81 | }
 82 | 
 83 | /**
 84 |  * @brief Integral binomial, utilizes the falling
 85 |  * power function as well as the integral factorial
 86 |  * function.
 87 |  */
 88 | template <class T>
 89 | T integral_binom(T n, T k)
 90 | {
 91 | 	return falling_power(n, k) / integral_factorial(k);
 92 | }
 93 | 
 94 | //////////////////////////////////////////
 95 | // Binomial Coefficients
 96 | //////////////////////////////////////////
 97 | 
 98 | /**
 99 |  * @brief The Euclidian algorithm for determining
100 |  * the GCD (Greatest Common Divisor) of two
101 |  * numbers. Includes overhead from the passed
102 |  * function.
103 |  */
104 | template <class T>
105 | T gcd(T a, T b, T (*mod)(T, T) = std::fmod, T eps = 0)
106 | {
107 | 	if (a == 0 || b == 0)
108 | 		return 1;
109 | 
110 | 	a = std::abs(a);
111 | 	b = std::abs(b);
112 | 
113 | 	if (a > b)
114 | 		std::swap(a, b);
115 | 
116 | 	while (std::abs(mod(b, a)) != 0) {
117 | 		b = mod(b, a);
118 | 
119 | 		if (a > b)
120 | 			std::swap(a, b);
121 | 	}
122 | 
123 | 	return std::min(a, b);
124 | }
125 | 
126 | /**
127 |  * @brief The LCM (Lowest Commmon Multiple),
128 |  * algorithm which uses the fact that
129 |  * (a, b) * [a, b] = ab. Includes overhead
130 |  * from the modulus function which is passed.
131 |  */
132 | template <class T>
133 | T lcm(T a, T b, T(*mod)(T, T) = std::fmod, T eps = 0)
134 | {
135 | 	return a * b / gcd(a, b, mod, eps);
136 | }
137 | 
138 | /**
139 |  * @brief Bernoulli sequence generator, generates
140 |  * a list (array, vector) of the first arbitrary
141 |  * number of bernoulli numbers. Uses the general
142 |  * binomial function.
143 |  */
144 | template <class T>
145 | std::vector <T> bernoulli_sequence_real(T n, T (*gamma)(T) = ::std::tgamma)
146 | {
147 | 	::std::vector <T> ibs = {1};
148 | 
149 | 	T tmp;
150 | 	for (T i = 1; i <= n; i++) {
151 | 		tmp = 0;
152 | 
153 | 		if (i == 1) {
154 | 			ibs.push_back(-0.5);
155 | 			continue;
156 | 		}
157 | 
158 | 		if (::std::fmod(i, 2) == 1) {
159 | 			ibs.push_back(0);
160 | 			continue;
161 | 		}
162 | 
163 | 		for (T j = 0; j < i; j++)
164 | 			tmp += binom(i + 1, j, gamma) * ibs[j];
165 | 		
166 | 		ibs.push_back(-tmp/(i + 1));
167 | 	}
168 | 
169 | 	return ibs;
170 | }
171 | 
172 | /**
173 |  * @brief Returns the specified Bernoulli number,
174 |  * using the Bernoulli sequence generator.
175 |  */
176 | template <class T>
177 | T bernoulli_number_real(T n, T (*gamma)(T) = ::std::tgamma)
178 | {
179 | 	if (n <= 0)
180 | 		throw positive_flow_exception();
181 | 
182 | 	return bernoulli_sequence_real(n, gamma)[n - 1];
183 | }
184 | 
185 | /**
186 |  * @brief Rational equivalent of the real Bernoulli
187 |  * sequence generator, only that a list of Rational
188 |  * numbers are returned. Should be used when precision
189 |  * is wished to be kept. Note that if the returned
190 |  * sequence appears to be incorrect, it is possible
191 |  * that the range of the template parameter is too
192 |  * small.
193 |  */
194 | template <class T>
195 | ::std::vector <Rational <T>> bernoulli_sequence_rational(T n)
196 | {
197 | 	::std::vector <Rational <T>> ibs = {{1, 1}};
198 | 
199 | 	Rational <T> tmp;
200 | 	for (T i = 1; i <= n; i++) {
201 | 		tmp = {0, 1};
202 | 
203 | 		if (i == 1) {
204 | 			ibs.push_back({-1, 2});
205 | 			continue;
206 | 		}
207 | 
208 | 		if (i % 2 == 1) {
209 | 			ibs.push_back({0, 1});
210 | 			continue;
211 | 		}
212 | 
213 | 		for (T j = 0; j < i; j++)
214 | 			tmp += Rational <T> {integral_binom(i + 1, j), 1} * ibs[j];
215 | 		
216 | 		ibs.push_back(Rational <T> {-1, (i + 1)} * tmp);
217 | 	}
218 | 
219 | 	return ibs;
220 | }
221 | 
222 | /**
223 |  * @brief Return sthe specified Bernoulli number as a
224 |  * Rational number using the Rational Bernoulli sequence
225 |  * generator.
226 |  */
227 | template <class T>
228 | T bernoulli_number_rational(T n, T (*gamma)(T) = ::std::tgamma)
229 | {
230 | 	if (n <= 0)
231 | 		throw positive_flow_exception();
232 | 
233 | 	return bernoulli_sequence_rational(n)[n - 1];
234 | }
235 | 
236 | }
237 | 
238 | }
239 | 
240 | #endif
241 | 


--------------------------------------------------------------------------------
/include/std/erf_derivatives.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef ERF_DERIVATIVES_H_
 2 | #define ERF_DERIVATIVES_H_
 3 | 
 4 | namespace zhetapi {
 5 | 
 6 | namespace ml {
 7 | 
 8 | // Squared error
 9 | template <class T>
10 | class _DSE : public Erf <T> {
11 | public:
12 | 	__cuda_dual__
13 | 	Vector <T> operator()(const Vector <T> &comp, const Vector <T> &in) const {
14 | 		return -T(2) * (comp - in);
15 | 	}
16 | };
17 | 
18 | // M squared error
19 | template <class T>
20 | class _DMSE : public Erf <T> {
21 | public:
22 | 	__cuda_dual__
23 | 	Vector <T> operator()(const Vector <T> &comp, const Vector <T> &in) const {
24 | 		return -T(2)/T(comp.size()) * (comp - in);
25 | 	}
26 | };
27 | 
28 | }
29 | 
30 | }
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/include/std/erfs.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef STD_ERFS_H_
 2 | #define STD_ERFS_H_
 3 | 
 4 | // Engine headers
 5 | #include "../erf.hpp"
 6 | #include "../std/erf_derivatives.hpp"
 7 | 
 8 | // Engine CUDA headers
 9 | #include "../cuda/essentials.cuh"
10 | 
11 | namespace zhetapi {
12 | 
13 | namespace ml {
14 | 
15 | /*
16 | * All Erf classes have inlined member functions for the same
17 | * reason that the activation classes are inlined. Obscure naming is
18 | * also done for the same reason.
19 | */
20 | 
21 | template <class T>
22 | class SE : public Erf <T> {
23 | public:
24 | 	__cuda_dual__
25 | 	SE() {
26 | 		this->kind = Erf <T> ::OPT_SE;
27 | 	}
28 | 
29 | 	__cuda_dual__
30 | 	Vector <T> operator()(const Vector <T> &comp, const Vector <T> &in) const {
31 | 		Erf <T> ::assert_size(comp, in);
32 | 
33 | 		T sum = 0;
34 | 
35 | 		for (size_t i = 0; i < comp.size(); i++)
36 | 			sum += (comp[i] - in[i]) * (comp[i] - in[i]);
37 | 
38 | 		return Vector <T> (1, sum);
39 | 	}
40 | 
41 | 	__cuda_dual__
42 | 	Erf <T> *derivative() const
43 | 	{
44 | 		return new _DSE <T> ();
45 | 	}
46 | };
47 | 
48 | template <class T>
49 | class MSE : public Erf <T> {
50 | public:
51 | 	__cuda_dual__
52 | 	MSE() {
53 | 		this->kind = Erf <T> ::OPT_MSE;
54 | 	}
55 | 
56 | 	__cuda_dual__
57 | 	Vector <T> operator()(const Vector <T> &comp, const Vector <T> &in) const {
58 | 		Erf <T> ::assert_size(comp, in);
59 | 
60 | 		T sum = 0;
61 | 
62 | 		for (size_t i = 0; i < comp.size(); i++)
63 | 			sum += (comp[i] - in[i]) * (comp[i] - in[i]);
64 | 
65 | 		return Vector <T> (1, sum / T(comp.size()));
66 | 	}
67 | 
68 | 	__cuda_dual__
69 | 	Erf <T> *derivative() const {
70 | 		return new _DMSE <T> ();
71 | 	}
72 | };
73 | 
74 | // Copy base activations
75 | template <class T>
76 | __cuda_dual__
77 | Erf <T> *copy(Erf <T> *opt)
78 | {
79 | 	switch (opt->kind) {
80 | 	case Erf <T> ::OPT_Default:
81 | 		return new Erf <T> ();
82 | 	case Erf <T> ::OPT_SE:
83 | 		return new SE <T> ();
84 | 	case Erf <T> ::OPT_MSE:
85 | 		return new MSE <T> ();
86 | 	}
87 | 
88 | 	return nullptr;
89 | }
90 | 
91 | }
92 | 
93 | }
94 | 
95 | #endif
96 | 


--------------------------------------------------------------------------------
/include/std/filters.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef STD_FILTERS_H_
  2 | #define STD_FILTERS_H_
  3 | 
  4 | // Engine headrers
  5 | #include "filter.hpp"
  6 | #include "matrix.hpp"
  7 | #include "vector.hpp"
  8 | #include "image.hpp"
  9 | 
 10 | #include "std/initializers.hpp"
 11 | 
 12 | namespace zhetapi {
 13 | 
 14 | namespace ml {
 15 | 
 16 | template <class T = double>
 17 | class FeedForward : public Filter <T> {
 18 | 	Matrix <T>	_weight	= Matrix <T> ();
 19 | 
 20 | 	Activation <T> *_act		= nullptr;
 21 | 	Activation <T> *_dact		= nullptr;
 22 | 
 23 | 	long double	_dropout	= 0;
 24 | 
 25 | 	Vector <T>	_acache	= Vector <T> ();
 26 | 	Vector <T>	_zcache	= Vector <T> ();
 27 | 
 28 | 	// For batch inputs
 29 | 	// Matrix <T>	_Acache;
 30 | 	// Matrix <T>	_Zcache;
 31 | public:
 32 | 	// Input size, output size
 33 | 	FeedForward(size_t isize, size_t osize, Activation <T> *act, std::function <T ()> init = RandomInitializer <T> ())
 34 | 			: _weight(isize, osize + 1),	// +1 for bias
 35 | 			_act(act->copy()),
 36 | 			_dact(act->derivative())
 37 | 	{
 38 | 		_weight.randomize(init);
 39 | 	}
 40 | 
 41 | 	void propogate(const Pipe <T> &in, Pipe <T> &out)
 42 | 	{
 43 | 		// Slice the input (+1 for bias)
 44 | 		Vector <T> vin = (in[0]->cast_to_vector()).append_above(1);
 45 | 
 46 | 		_acache = vin;
 47 | 
 48 | 		Vector <T> mul = _weight * vin;
 49 | 
 50 | 		_zcache = _dact->compute(mul);
 51 | 
 52 | 		// Send to output pipe
 53 | 		*out[0] = _act->compute(mul);
 54 | 	}
 55 | 
 56 | 	void gradient(const Pipe <T> &delin, Pipe <T> &grads)
 57 | 	{
 58 | 		// TODO: Check sizes later
 59 | 
 60 | 		// Move shur/stable shur to tensor base
 61 | 		*delin[0] = shur(delin[0]->cast_to_vector(), _zcache);
 62 | 
 63 | 		Matrix <T> J = delin[0]->cast_to_vector() * _acache.transpose();
 64 | 
 65 | 		// Use the kernel function here
 66 | 		*grads[0] = J;
 67 | 	}
 68 | 
 69 | 	void apply_gradient(const Pipe <T> &grads)
 70 | 	{
 71 | 		// TODO: check with gradeint checking
 72 | 		Matrix <T> J = grads[0]->cast_to_matrix(
 73 | 				_weight.get_rows(),
 74 | 				_weight.get_cols());
 75 | 
 76 | 		_weight += J;
 77 | 	}
 78 | };
 79 | 
 80 | #define for_img(i, j, w, h)		\
 81 | 	for (int i = 0; i < w; i++) {	\
 82 | 	for (int j = 0; j < h; j++)
 83 | 
 84 | // Assumes that the input tensor is an image
 85 | template <class T>
 86 | class Convolution : public Filter <T> {
 87 | 	Matrix <T>	_filter;
 88 | 	size_t		_dim;
 89 | 	
 90 | 	// Type aliases
 91 | 	using byte = image::byte;
 92 | 	using mbyte = Matrix <byte>;
 93 | 	using vbyte = Vector <byte>;
 94 | 	using vfilt = Vector <T>;
 95 | public:
 96 | 	Convolution(const Matrix <T> &filter)
 97 | 			: _filter(filter), 
 98 | 			_dim(filter.get_rows()) {}
 99 | 
100 | 	// Assume equal padding for now
101 | 	image::Image process(const image::Image &in, int depth = -1) {
102 | 		image::Image out = in;
103 | 
104 | 		int w = in.width();
105 | 		int h = in.height();
106 | 		int c = in.channels();
107 | 
108 | 		// depth = c;
109 | 		// Choose color channels only
110 | 		if (depth < 0)
111 | 			depth = (c > 1) ? c - 1 : c;
112 | 
113 | 		int n = (_dim - 1)/2;
114 | 
115 | 		byte *data = in._array;
116 | 
117 | 		using namespace std;
118 | 		for_img(x, y, w, h) {
119 | 			vbyte t(depth, byte(0));
120 | 			
121 | 			int ymin = y - n;
122 | 			int ymax = y + n;
123 | 
124 | 			int xmin = x - n;
125 | 			int xmax = x + n;
126 | 
127 | 			Vector <T> tmp(depth, T(0));
128 | 			for (int k = 0; k < _dim; k++) {
129 | 				size_t ti = x + k - n;
130 | 
131 | 				if (xmin + k < 0 || xmin + k >= h)
132 | 					continue;
133 | 
134 | 				size_t off = ymin;
135 | 				size_t len = _dim;
136 | 
137 | 				if (ymin < 0) {
138 | 					off = 0;
139 | 					len += ymin;
140 | 				}
141 | 
142 | 				if (ymax >= w)
143 | 					len -= (ymax - w + 1);
144 | 
145 | 				size_t i = c * ((x + k - n) * w + off);
146 | 
147 | 				byte *img = &(data[i]);
148 | 				T *flt = &(_filter[k][off - ymin]);
149 | 				
150 | 				for (size_t ch = 0; ch < depth; ch++) {
151 | 					T s = 0;
152 | 
153 | 					for (size_t i = 0; i < len; i++)
154 | 						s += flt[i] * ((T) img[i * c + ch]);
155 | 
156 | 					tmp[ch] += s;
157 | 				}
158 | 			}
159 | 			
160 | 			for (size_t i = 0; i < depth; i++)
161 | 				t[i] = (tmp[i] > 0) ? tmp[i] : 0;
162 | 
163 | 			out.set({x, y}, t);
164 | 		}}
165 | 
166 | 		return out;
167 | 	}
168 | };
169 | 
170 | }
171 | 
172 | }
173 | 
174 | #endif
175 | 


--------------------------------------------------------------------------------
/include/std/functions.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef FUNCTIONS_H_
 2 | #define FUNCTIONS_H_
 3 | 
 4 | // C++ headers
 5 | #include <cmath>
 6 | 
 7 | #define FACTORIAL_BUFFER_SIZE 2000
 8 | 
 9 | namespace zhetapi {
10 | 
11 | namespace special {
12 | 
13 | double ln_gamma(double);
14 | double ln_factorial(int);
15 | 
16 | double poission(double, int);
17 | 
18 | }
19 | 
20 | }
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/include/std/initializers.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef STD_INITIALIZERS_H_
 2 | #define STD_INITIALIZERS_H_
 3 | 
 4 | #ifdef __AVR	// Does not support AVR
 5 | 
 6 | #include "avr/random.hpp"
 7 | 
 8 | #else
 9 | 
10 | // C++ headers
11 | #include <random>
12 | 
13 | #endif		// Does not support AVR
14 | 
15 | namespace zhetapi {
16 | 
17 | namespace ml {
18 | 
19 | #ifdef __AVR
20 | 
21 | template <class T>
22 | T RandomInitializer() {
23 | 	static avr::RandomEngine reng(16183);
24 | 	
25 | 	return reng.ldouble();
26 | }
27 | 
28 | #else
29 | 
30 | template <class T>
31 | struct RandomInitializer {
32 | 	// Use interval later
33 | 	T operator()() {
34 | 		return T (0.5 - rand()/((double) RAND_MAX));
35 | 	}
36 | };
37 | 
38 | #endif
39 | 
40 | #ifndef __AVR	// Does not support AVR
41 | 
42 | std::random_device	_rd;
43 | std::mt19937		_mt(_rd());
44 | 
45 | template <class T>
46 | struct LeCun {
47 | 	std::normal_distribution <T>		_dbt;
48 | public:
49 | 	explicit LeCun(size_t fan_in)
50 | 			: _dbt(0, sqrt(T(1) / fan_in)) {}
51 | 	
52 | 	T operator()() {
53 | 		return _dbt(_mt);
54 | 	}
55 | };
56 | 
57 | template <class T>
58 | struct He {
59 | 	std::normal_distribution <T>		_dbt;	
60 | public:
61 | 	explicit He(size_t fan_in)
62 | 			: _dbt(0, sqrt(T(2) / fan_in)) {}
63 | 	
64 | 	T operator()() {
65 | 		return _dbt(_mt);
66 | 	}
67 | }; 
68 | 
69 | template <class T>
70 | struct Xavier {
71 | 	std::normal_distribution <T>		_dbt;	
72 | public:
73 | 	explicit Xavier(size_t fan_avg)
74 | 			: _dbt(0, sqrt(T(1) / fan_avg)) {}
75 | 	
76 | 	T operator()() {
77 | 		return _dbt(_mt);
78 | 	}
79 | };
80 | 
81 | #endif
82 | 
83 | }
84 | 
85 | }
86 | 
87 | #endif
88 | 


--------------------------------------------------------------------------------
/include/std/interval.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef INTERVAL_H_
  2 | #define INTERVAL_H_
  3 | 
  4 | // C/C++ headers
  5 | #include <iostream>
  6 | #include <iterator>
  7 | #include <random>
  8 | #include <set>
  9 | #include <vector>
 10 | 
 11 | // Engine headers
 12 | #include "../fixed_vector.hpp"
 13 | 
 14 | namespace zhetapi {
 15 | 
 16 | // TODO: inspect the random-ness of the interval class
 17 | namespace utility {
 18 | 
 19 | extern std::random_device rd;
 20 | 
 21 | // Typedefs for sanity
 22 | using dre = std::mt19937;
 23 | using udb = std::uniform_real_distribution <double>;
 24 | 
 25 | // TODO: extend to long double
 26 | 
 27 | // Keep inlined here for header only purposes
 28 | struct disjoint {
 29 | 	static dre gen;
 30 | 	static udb distro;
 31 | 
 32 | 	using pflt = std::pair <double, double>;
 33 | 
 34 | 	double left = 0;
 35 | 	double right = 0;
 36 | 	bool closed = true;
 37 | 
 38 | 	// c should represent compact instead of closed?
 39 | 	disjoint(double l = 0.0, double r = 0.0, bool c = true)
 40 | 			: left(l), right(r), closed(c) {}
 41 | 
 42 | 	double length() const
 43 | 	{
 44 | 		return right - left;
 45 | 	}
 46 | 
 47 | 	pflt approximate() const
 48 | 	{
 49 | 		static double epsilon = 1e-10;
 50 | 
 51 | 		if (closed)
 52 | 			return {left, right};
 53 | 
 54 | 		return {left + epsilon, right - epsilon};
 55 | 	}
 56 | 
 57 | 	// Use a real uniform distro later
 58 | 	double uniform() const
 59 | 	{
 60 | 		return left + distro(gen) * (right - left);
 61 | 	}
 62 | 
 63 | 	// Check disjointed-ness
 64 | 	bool is_disjoint(const disjoint &dj) const
 65 | 	{
 66 | 		// If either interval is greater,
 67 | 		// then it must be disjoint
 68 | 		return (*this > dj) || (*this < dj);
 69 | 	}
 70 | 
 71 | 	// The interval is completely to the left
 72 | 	bool operator<(const disjoint &dj) const
 73 | 	{
 74 | 		pflt tapp = approximate();
 75 | 		pflt oapp = dj.approximate();
 76 | 
 77 | 		return (tapp.second < oapp.first) && (tapp.first < oapp.first);
 78 | 	}
 79 | 
 80 | 	// The interval is completely to the right
 81 | 	bool operator>(const disjoint &dj) const
 82 | 	{
 83 | 		pflt tapp = approximate();
 84 | 		pflt oapp = dj.approximate();
 85 | 
 86 | 		return (tapp.second > oapp.second) && (tapp.first > oapp.second);
 87 | 	}
 88 | 
 89 | 	bool operator==(const disjoint &dj) const
 90 | 	{
 91 | 		return (left == dj.left) && (right == dj.right) && (closed == dj.closed);
 92 | 	}
 93 | };
 94 | 
 95 | // N is the number of dimensions
 96 | // NOTE: for now multidim intervals
 97 | // can only be one "box", see the TODO
 98 | // below
 99 | 
100 | /**
101 |  * @brief Random generator class, that can uniformly generated Vectors (or
102 |  * scalars) with elements that are randomly sampled from a distribution
103 |  * (currently each element can be sampled only from a uniform distribution
104 |  * that is the union of disjoint intervals).
105 |  *
106 |  * @tparam N the dimension that the corresponding random Vectors should have.
107 |  */
108 | template <size_t N = 1>
109 | class Interval {
110 | 	// TODO: this will not work,
111 | 	// we need a disjoint equivalent for N dimensions
112 | 	// (think about boxes as N-dim intervals)
113 | 	disjoint *axes = nullptr;
114 | public:
115 | 	Interval() : Interval(1.0L) {}
116 | 
117 | 	Interval(long double x) {
118 | 		axes = new disjoint[N];
119 | 
120 | 		for (size_t i = 0; i < N; i++)
121 | 			axes[i] = disjoint(0, x, true);
122 | 	}
123 | 
124 | 	FixedVector <double, N> operator()() const {
125 | 		return uniform();
126 | 	}
127 | 
128 | 	FixedVector <double, N> uniform() const {
129 | 		// First check that the axes are not null
130 | 		if (axes == nullptr)
131 | 			throw null_axes();
132 | 
133 | 		return FixedVector <double, N> (
134 | 			[&](size_t i) -> double {
135 | 				return axes[i].uniform();
136 | 			}, N
137 | 		);
138 | 	}
139 | 
140 | 	template <size_t M>
141 | 	friend std::ostream &operator<<(std::ostream &,
142 | 		const Interval <M> &);
143 | 
144 | 	// Exceptions
145 | 	class null_axes : public std::runtime_error {
146 | 	public:
147 | 		null_axes() : std::runtime_error("Axes of Interval <N>"
148 | 			" are null") {}
149 | 	};
150 | };
151 | 
152 | // TODO: Switch from double to long double
153 | /**
154 |  * @brief Single dimensional (scalar) random generator. Can sample uniformly
155 |  * from a union of intervals.
156 |  */
157 | template <>
158 | class Interval <1> {
159 | 	// For random generation
160 | 	static dre gen;
161 | 	static udb distro;
162 | 
163 | 	// Should always contain disjoint intervals
164 | 	std::set <disjoint>	_union;
165 | 
166 | 	// Assumes that the intervals in un are disjoint
167 | 	explicit Interval(const std::set <disjoint> &un) : _union(un) {}
168 | 
169 | 	// Checks that the new 'disjoint' interval is indeed disjoint
170 | 	bool is_disjoint(const disjoint &djx) const {
171 | 		for (const disjoint &dj : _union) {
172 | 			if (!dj.is_disjoint(djx))
173 | 				return false;
174 | 		}
175 | 
176 | 		return true;
177 | 	}
178 | public:
179 | 	// Defaults to [0, 1]
180 | 	Interval() : Interval(1.0L) {}
181 | 
182 | 	explicit Interval(unsigned long long int x)
183 | 			: Interval((long double) x) {}
184 | 
185 | 	explicit Interval(long double x)
186 | 			: Interval(0, x) {}
187 | 
188 | 	Interval(double left, double right, bool closed = true) {
189 | 		disjoint dj {left, right, closed};
190 | 
191 | 		_union.insert(_union.begin(), dj);
192 | 	}
193 | 
194 | 	// Properties
195 | 	double size() const {
196 | 		double len = 0;
197 | 
198 | 		for (disjoint dj : _union)
199 | 			len += dj.length();
200 | 
201 | 		return len;
202 | 	}
203 | 
204 | 	operator bool() const {
205 | 		return size() > 0;
206 | 	}
207 | 
208 | 	double operator()() const {
209 | 		return uniform();
210 | 	}
211 | 
212 | 	// Sampling
213 | 	double uniform() const {
214 | 		// TODO: Cover case where the interval is not closed
215 | 		double len = size();
216 | 
217 | 		double *db = new double[_union.size() + 1];
218 | 
219 | 		size_t i = 0;
220 | 
221 | 		db[i++] = 0;
222 | 		for (disjoint dj : _union) {
223 | 			db[i] = db[i - 1] + dj.length()/len;
224 | 
225 | 			i++;
226 | 		}
227 | 
228 | 		double rnd = distro(gen);
229 | 
230 | 		for (i = 0; i < _union.size(); i++) {
231 | 			if ((rnd > db[i]) && (rnd < db[i + 1]))
232 | 				break;
233 | 		}
234 | 
235 | 		delete[] db;
236 | 
237 | 		auto itr = _union.begin();
238 | 
239 | 		std::advance(itr, i);
240 | 
241 | 		return itr->uniform();
242 | 	}
243 | 
244 | 	// Operations
245 | 	Interval &operator|=(const Interval &itv) {
246 | 		auto iset = itv._union;
247 | 
248 | 		using namespace std;
249 | 
250 | 		// Check for disjointed-ness
251 | 		for (const disjoint &dj : iset) {
252 | 			if (is_disjoint(dj))
253 | 				_union.insert(_union.begin(), dj);
254 | 			else
255 | 				cout << "Adding a non-disjoint interval" << endl;
256 | 		}
257 | 
258 | 		return *this;
259 | 	}
260 | 
261 | 	// Binary operations
262 | 	friend Interval operator|(const Interval &, const Interval &);
263 | 	friend Interval operator&(const Interval &, const Interval &);
264 | 
265 | 	friend std::ostream &operator<<(std::ostream &, const Interval &);
266 | };
267 | 
268 | Interval <1> operator|(const Interval <1> &, const Interval <1> &);
269 | 
270 | std::ostream &operator<<(std::ostream &, const Interval <1> &);
271 | 
272 | // Literal constructor
273 | Interval <1> operator""_I(unsigned long long int);
274 | Interval <1> operator""_I(long double);
275 | 
276 | }
277 | 
278 | }
279 | 
280 | #endif
281 | 


--------------------------------------------------------------------------------
/include/std/loaders.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef LOADERS_H_
 2 | #define LOADERS_H_
 3 | 
 4 | #ifndef __AVR	// Does not support AVR
 5 | 
 6 | namespace zhetapi {
 7 | 
 8 | namespace ml {
 9 | 
10 | // Forward declarations
11 | template <class T>
12 | class Linear;
13 | 
14 | template <class T>
15 | class ReLU;
16 | 
17 | template <class T>
18 | class Sigmoid;
19 | 
20 | template <class T>
21 | class Softmax;
22 | 
23 | // Loaders
24 | template <class T>
25 | Activation <T> *load_linear(const std::vector <T> &args)
26 | {
27 | 	return new Linear <T> (args[0]);
28 | }
29 | 
30 | template <class T>
31 | Activation <T> *load_relu(const std::vector <T> &args)
32 | {
33 | 	return new ReLU <T> ();
34 | }
35 | 
36 | template <class T>
37 | Activation <T> *load_sigmoid(const std::vector <T> &args)
38 | {
39 | 	return new Sigmoid <T> ();
40 | }
41 | 
42 | template <class T>
43 | Activation <T> *load_softmax(const std::vector <T> &args)
44 | {
45 | 	return new Softmax <T> ();
46 | }
47 | 
48 | }
49 | 
50 | }
51 | 
52 | #endif		// Does not support AVR
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/include/std/numtheory.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef NUMBER_THEORY_H_
  2 | #define NUMBER_THEORY_H_
  3 | 
  4 | // Standard headers
  5 | #include <cmath>
  6 | #include <cstdint>
  7 | #include <unordered_map>
  8 | #include <vector>
  9 | 
 10 | namespace zhetapi {
 11 | 
 12 | namespace number_theory {
 13 | 
 14 | // TODO: use f convention for gcd/lcm
 15 | 
 16 | /**
 17 |  * @brief Integral equivalent of the general
 18 |  * gcd function, preferably used for integer
 19 |  * types.
 20 |  */
 21 | template <class T>
 22 | T integral_gcd(T a, T b)
 23 | {
 24 | 	if (a == 0 || b == 0)
 25 | 		return 1;
 26 | 
 27 | 	a = ::std::abs(a);
 28 | 	b = ::std::abs(b);
 29 | 
 30 | 	if (a > b)
 31 | 		::std::swap(a, b);
 32 | 
 33 | 	while (b % a != 0) {
 34 | 		b %= a;
 35 | 
 36 | 		if (a > b)
 37 | 			::std::swap(a, b);
 38 | 	}
 39 | 
 40 | 	return ::std::min(a, b);
 41 | }
 42 | 
 43 | /**
 44 |  * @brief Integral equivalent of the
 45 |  * lcm function.
 46 |  */
 47 | template <class T>
 48 | T integral_lcm(T a, T b)
 49 | {
 50 | 	return a * b / integral_gcd(a, b);
 51 | }
 52 | 
 53 | // TODO: add totient function
 54 | 
 55 | template <class T>
 56 | T modmul(T a, T b, T mod)
 57 | {
 58 | 	// Checks
 59 | 	if (a == 0 || b == 0)
 60 | 		return 0;
 61 | 
 62 | 	if (a == 1)
 63 | 		return (b % mod);
 64 | 
 65 | 	if (b == 1)
 66 | 		return (a % mod);
 67 | 
 68 | 	T hmul = modmul(a, b/2, mod);
 69 | 	if ((b & 1) == 0)
 70 | 		return (hmul + hmul) % mod;
 71 | 	else
 72 | 		return ((a % mod) + (hmul + hmul)) % mod;
 73 | }
 74 | 
 75 | // Integral only (add f variants)
 76 | template <class T>
 77 | T modexp(T base, T exp, T mod)
 78 | {
 79 | 	// Add a string to print on failure (throw actually)
 80 | 	assert(mod > 1);
 81 | 
 82 | 	if (!exp)
 83 | 		return 1;
 84 | 	else if (exp == 1)
 85 | 		return (base % mod);
 86 | 
 87 | 	T hexp = exp << 1;
 88 | 	T tmp = modexp(base, hexp, mod);
 89 | 
 90 | 	tmp = modmul(tmp, tmp, mod);
 91 | 	if (exp & 1)
 92 | 		tmp = modmul(base, tmp, mod);
 93 | 
 94 | 	return tmp;
 95 | }
 96 | 
 97 | /* template <class T>
 98 | T modexp(T base, T exp, T mod, T totient)
 99 | {
100 | 	// Add a string to print on failure (throw actually)
101 | 	assert(mod > 1);
102 | 
103 | 	if (!exp)
104 | 		return 1;
105 | 	else if (exp == 1)
106 | 		return (base % mod);
107 | 
108 | 	T hexp = exp << 1;
109 | 	T tmp = modexp(base, hexp, mod);
110 | 
111 | 	tmp = (tmp * tmp) % mod;
112 | 	if (exp & 0x1)
113 | 		tmp = (tmp * base) % mod;
114 | 
115 | 	return tmp;
116 | } */
117 | 
118 | // TODO: change to only integral types
119 | template <class T = long long int>
120 | std::vector <T> sieve(T lim)
121 | {
122 | 	std::vector <T> primes = {2};
123 | 	
124 | 	if (lim < 2)
125 | 		return {};
126 | 
127 | 	for (T i = 3; i < lim; i++) {
128 | 		bool prime = true;
129 | 		
130 | 		for (size_t j = 0; primes[j] <= sqrt(i); j++) {
131 | 			if (i % primes[j] == 0) {
132 | 				prime = false;
133 | 
134 | 				break;
135 | 			}
136 | 		}
137 | 
138 | 		if (prime)
139 | 			primes.push_back(i);
140 | 	}
141 | 
142 | 	return primes;
143 | }
144 | 
145 | // Prime factorization
146 | template <class T = long long int>
147 | std::unordered_map <T, uint32_t> factorize(T n)
148 | {
149 | 	std::unordered_map <T, uint32_t> factors;
150 | 
151 | 	T low = 2;
152 | 	while (n > 1) {
153 | 		uint32_t exp = 0;
154 | 
155 | 		T lim = sqrt(n);
156 | 		T i;
157 | 
158 | 		for (i = low; i <= lim; i++) {
159 | 			if (n % i == 0) {
160 | 				do {
161 | 					n /= i;
162 | 					exp++;
163 | 				} while (n % i == 0);
164 | 
165 | 				break;
166 | 			}
167 | 		}
168 | 
169 | 		if (exp > 0) {
170 | 			factors[i] = exp;
171 | 		} else {
172 | 			// No prime factors found
173 | 			factors[n] = 1;
174 | 		}
175 | 	}
176 | 
177 | 	return factors;
178 | }
179 | 
180 | }
181 | 
182 | }
183 | 
184 | #endif
185 | 


--------------------------------------------------------------------------------
/include/timer.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef TIMER_H_
 2 | #define TIMER_H_
 3 | 
 4 | // C/C++ headers
 5 | #include <chrono>
 6 | 
 7 | namespace zhetapi {
 8 | 
 9 | class Timer {
10 | public:
11 | 	using clk = std::chrono::high_resolution_clock;
12 | 	using time = clk::time_point;
13 | private:
14 | 	clk	_clk;
15 | 	time	_start;
16 | 	time	_end;
17 | public:
18 | 	Timer();
19 | 
20 | 	void start();
21 | 	void stop();
22 | 
23 | 	time now();
24 | 
25 | 	long double dt();
26 | 	long double split();
27 | };
28 | 
29 | }
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/include/token.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef TOKEN_H_
  2 | #define TOKEN_H_
  3 | 
  4 | // C/C++ headers
  5 | #include <iostream>
  6 | #include <map>
  7 | #include <string>
  8 | #include <vector>
  9 | #include <exception>
 10 | #include <typeinfo>
 11 | #include <typeindex>
 12 | #include <mutex>
 13 | 
 14 | namespace zhetapi {
 15 | 
 16 | // Forward declarations
 17 | class Engine;
 18 | class MethodTable;
 19 | 
 20 | // TODO: clean this file and its comments
 21 | 
 22 | /** 
 23 |  * @brief The basic unit of computation for the ZHP scripting language and
 24 |  * framework.
 25 |  */
 26 | class Token {
 27 | 	MethodTable *_mtable = nullptr;
 28 | public:
 29 | 	// Keep for this release
 30 | 	// Use more concise names (also refactor to Type)
 31 | 	enum type {
 32 | 		undefined,
 33 | 		alg,
 34 | 		opd,
 35 | 		oph,
 36 | 		opn,
 37 | 		var,
 38 | 		vrh,
 39 | 		vcl,
 40 | 		ftn,
 41 | 		ndr,
 42 | 		ndd,
 43 | 		reg,
 44 | 		token_wildcard,
 45 | 		token_lvalue,
 46 | 		token_rvalue,
 47 | 		token_node_list,
 48 | 		token_module,
 49 | 		token_collection,
 50 | 		token_dictionary
 51 | 	};
 52 | public:
 53 | 	Token();
 54 | 	Token(MethodTable *);
 55 | 	// Token(const std::vector <std::pair <std::string, method>> &);
 56 | 
 57 | 	virtual ~Token();
 58 | 
 59 | 	// TODO: also need to add documentation for methods
 60 | 	virtual Token *attr(const std::string &, Engine *,
 61 | 			const std::vector <Token *> &, size_t);
 62 | 	virtual void list_attributes(std::ostream & = std::cout) const;
 63 | 
 64 | 	bool operator!=(Token *) const;
 65 | 
 66 | 	// Change caller to a public member (static)
 67 | 
 68 | 	/* 
 69 | 	 * Inspector function passed on to all derived classes, helps to
 70 | 	 * choose what to do with different Tokens from other classes.
 71 | 	 */
 72 | 	virtual type caller() const;
 73 | 
 74 | 	/*
 75 | 	 * Returns a representation of the Token, regardless of its
 76 | 	 * type.
 77 | 	 */
 78 | 	virtual std::string dbg_str() const;
 79 | 
 80 | 	// TODO: Add a virtual display method
 81 | 
 82 | 	/*
 83 | 	 * Compares Tokens and returns their similarity. Used for node
 84 | 	 * matching.
 85 | 	 */
 86 | 	virtual bool operator==(Token *) const;
 87 | 
 88 | 	// Read and write
 89 | 	virtual void write(std::ostream &) const;
 90 | 
 91 | 	/**
 92 | 	 * @brief Returns a copy of the Token (with the same data: the resulting
 93 | 	 * Token should equal the original with ==). Pure virtual because any
 94 | 	 * Tokens used will be copied at some point.
 95 | 	 */
 96 | 	virtual Token *copy() const = 0;
 97 | 
 98 | 	/**
 99 | 	 * @brief Thrown if the program requests a Token for an attribute or
100 | 	 * method it does not have.
101 | 	 */
102 | 	class unknown_attribute : public std::runtime_error {
103 | 	public:
104 | 		unknown_attribute(const std::string &msg)
105 | 				: std::runtime_error(msg) {}
106 | 	};
107 | 
108 | 	/**
109 | 	 * @brief Thrown if the Token does not have a write function.
110 | 	 */
111 | 	class empty_io : public std::runtime_error {
112 | 	public:
113 | 		empty_io() : std::runtime_error("Empty IO functions (write)...") {}
114 | 	};
115 | };
116 | 
117 | // Token id macro
118 | #define zhp_token_id(type)			\
119 | 	size_t type::id() const {		\
120 | 		return zhp_id <type> ();	\
121 | 	}
122 | 
123 | // Comparing tokens
124 | bool tokcmp(Token *, Token *);
125 | 
126 | // Printing a list of tokens
127 | std::ostream &operator<<(std::ostream &, const std::vector <Token *> &);
128 | 
129 | // Macro for defining methods
130 | #define TOKEN_METHOD(name)	\
131 | 	Token *name(Token *tptr, const Targs &args)
132 | 
133 | }
134 | 
135 | #endif
136 | 


--------------------------------------------------------------------------------
/include/training.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef TRAINING_H_
  2 | #define TRAINING_H_
  3 | 
  4 | // Engine headers
  5 | #include "dataset.hpp"
  6 | #include "display.hpp"
  7 | #include "dnn.hpp"
  8 | #include "erf.hpp"
  9 | #include "optimizer.hpp"
 10 | 
 11 | #include "linalg.hpp"
 12 | 
 13 | namespace zhetapi {
 14 | 
 15 | namespace ml {
 16 | 
 17 | /*
 18 |  * TODO: Abstract the training methods into a single training program class,
 19 |  * which stores the dataset(s) for its entire life. This way, training can be
 20 |  * made much faster in the GPU, by preloading the datasets into the GPU and
 21 |  * releasing them only when they are to be deconstructed.
 22 |  */
 23 | 
 24 | // Diagnosing function for training
 25 | template <class T>
 26 | using Comparator = bool (*)(const Vector <T> &, const Vector <T> &);
 27 | 
 28 | // Default diagnoser
 29 | template <class T>
 30 | bool _def_cmp(const Vector <T> &a, const Vector <T> &e)
 31 | {
 32 | 	return a == e;
 33 | };
 34 | 
 35 | // Training statistics
 36 | template <class T>
 37 | struct PerformanceStatistics {
 38 | 	T	_cost		= T(0);
 39 | 	size_t	_passed	= 0;
 40 | 	double	_kernel_time	= 0;
 41 | };
 42 | 
 43 | // Fitting a single I/O pair
 44 | template <class T>
 45 | void fit(
 46 | 		DNN <T> &dnn,
 47 | 		const Vector <T> &in,
 48 | 		const Vector <T> &out,
 49 | 		Erf <T> *erf,
 50 | 		Optimizer <T> *opt)
 51 | {
 52 | 	Erf <T> *derf = erf->derivative();
 53 | 
 54 | 	// Use cached compute later
 55 | 	Vector <T> actual = dnn(in);
 56 | 	Vector <T> delta = derf->compute(out, actual);
 57 | 
 58 | 	// TODO: should be a vector of matrices
 59 | 	Matrix <T> *J;
 60 | 	/* Matrix <T> *Jp;
 61 | 	Matrix <T> *Q; */
 62 | 
 63 | 	J = dnn.jacobian_delta(in, delta);
 64 | 
 65 | 	// Checked
 66 | 	// Q = dnn.jacobian_check(in, out, erf);
 67 | 	
 68 | 	/* std::cout << "Diff:" << std::endl;
 69 | 	for (size_t i = 0; i < dnn.size(); i++)
 70 | 		std::cout << "Diff[i] = " << Q[i] - J[i] << std::endl;
 71 | 
 72 | 	// TODO: gotta free
 73 | 	Vector <T> *ta = new Vector <T> [dnn._size + 1];
 74 | 	Vector <T> *tz = new Vector <T> [dnn._size];
 75 | 	Jp = simple_gradient(
 76 | 		dnn._layers,
 77 | 		dnn._size,
 78 | 		ta, tz,
 79 | 		in, out,
 80 | 		erf
 81 | 	);
 82 | 
 83 | 	std::cout << "Diff2:" << std::endl;
 84 | 	for (size_t i = 0; i < dnn.size(); i++)
 85 | 		std::cout << "Diff2[i] = " << Q[i] - Jp[i] << std::endl; */
 86 | 
 87 | 	// Continue regardless
 88 | 	J = opt->update(J, dnn.size());
 89 | 	dnn.apply_gradient(J);
 90 | 
 91 | 	delete[] J;
 92 | 	delete derf;
 93 | }
 94 | 
 95 | template <class T>
 96 | void fit(
 97 | 		DNN <T> &dnn,
 98 | 		const DataSet <T> &ins,
 99 | 		const DataSet <T> &outs,
100 | 		Erf <T> *erf,
101 | 		Optimizer <T> *opt)
102 | {
103 | 	/* if (ins.size() != outs.size())
104 | 		throw bad_io_dimensions();
105 | 
106 | 	if ((ins[0].size() != _isize) || (outs[0].size() != _osize))
107 | 		throw bad_io_dimensions();
108 | 
109 | 	if (!_opt)
110 | 		throw null_optimizer();
111 | 
112 | 	if (!_cost)
113 | 		throw null_loss_function(); */
114 | 
115 | 	Matrix <T> *J;
116 | 
117 | 	// Put batch gradient (multithread and etc) in dnn method (batch jacobian)
118 | 	J = simple_batch_gradient(dnn.layers(),
119 | 		dnn.size(), dnn.acache(), dnn.zcache(),
120 | 		ins, outs, erf
121 | 	);
122 | 
123 | 	J = opt->update(J, dnn.size());
124 | 
125 | 	dnn.apply_gradient(J);
126 | 
127 | 	delete[] J;
128 | }
129 | 
130 | template <class T>
131 | void multithreaded_fit(
132 | 		DNN <T> &dnn,
133 | 		const DataSet <T> &ins,
134 | 		const DataSet <T> &outs,
135 | 		Erf <T> *erf,
136 | 		Optimizer <T> *opt,
137 | 		size_t threads)
138 | {
139 | 	/* if (ins.size() != outs.size())
140 | 		throw bad_io_dimensions();
141 | 
142 | 	if ((ins[0].size() != _isize) || (outs[0].size() != _osize))
143 | 		throw bad_io_dimensions();
144 | 
145 | 	if (!_opt)
146 | 		throw null_optimizer();
147 | 
148 | 	if (!_cost)
149 | 		throw null_loss_function(); */
150 | 
151 | 	Matrix <T> *J;
152 | 
153 | 	J = simple_multithreaded_batch_gradient(
154 | 			dnn.layers(),
155 | 			dnn.size(),
156 | 			ins,
157 | 			outs,
158 | 			erf,
159 | 			threads);
160 | 	J = opt->update(J, dnn.size());
161 | 
162 | 	dnn.apply_gradient(J);
163 | 
164 | 	delete[] J;
165 | }
166 | 
167 | // Non-statistical methods (without performance statistics)
168 | template <class T>
169 | void train_dataset(
170 | 		DNN <T> &dnn,
171 | 		const DataSet <T> &ins,
172 | 		const DataSet <T> &outs,
173 | 		size_t batch_size,
174 | 		size_t threads = 1)
175 | {
176 | 	assert(ins.size() == outs.size());
177 | 
178 | 	std::vector <DataSet <T>> input_batches = split(ins, batch_size);
179 | 	std::vector <DataSet <T>> output_batches = split(outs, batch_size);
180 | 
181 | 	size_t n;
182 | 
183 | 	n = input_batches.size();
184 | 	for (size_t i = 0; i < n; i++) {
185 | 		if (threads > 1)
186 | 			dnn.multithreaded_fit(input_batches[i], output_batches[i], threads);
187 | 		else
188 | 			dnn.fit(input_batches[i], output_batches[i]);
189 | 	}
190 | }
191 | 
192 | // Statistical counterparts of the above (with performance metrics)
193 | template <class T>
194 | PerformanceStatistics <T> train_mini_batch_perf(
195 | 		DNN <T> &dnn,
196 | 		const DataSet <T> &ins,
197 | 		const DataSet <T> &outs,
198 | 		Erf <T> *erf,
199 | 		Optimizer <T> *opt,
200 | 		Comparator <T> cmp = _def_cmp <T>,
201 | 		Display::type display = 0,
202 | 		size_t threads = 1)
203 | {
204 | 	assert(ins.size() == outs.size());
205 | 
206 | 	PerformanceStatistics <T> ns;
207 | 	Vector <double> to;
208 | 	T perr;
209 | 	size_t n;
210 | 
211 | 	perr = 0;
212 | 	n = ins.size();
213 | 
214 | 	// Performance statistics first
215 | 	for (size_t i = 0; i < n; i++) {
216 | 		to = dnn(ins[i]);
217 | 		ns._cost += erf->compute(to, outs[i]).x();
218 | 		ns._passed += cmp(to, outs[i]);
219 | 
220 | 		perr += fabs((to - outs[i]).norm() / outs[i].norm());
221 | 	}
222 | 
223 | 	if (threads > 1)
224 | 		multithreaded_fit(dnn, ins, outs, erf, opt, threads);
225 | 	else
226 | 		fit(dnn, ins, outs, erf, opt);
227 | 
228 | 	perr /= n;
229 | 	if (display & Display::batch) {
230 | 		std::cout << "Batch done:"
231 | 			<< " %-err = " << 100 * perr << "%"
232 | 			<< " %-passed = " << (100.0 * ns._passed)/n << "%"
233 | 			<< " #passed = " << ns._passed
234 | 			<< std::endl;
235 | 	}
236 | 
237 | 	return ns;
238 | }
239 | 
240 | template <class T>
241 | PerformanceStatistics <T> train_dataset_perf(
242 | 		DNN <T> &dnn,
243 | 		const DataSet <T> &ins,
244 | 		const DataSet <T> &outs,
245 | 		size_t batch_size,
246 | 		Erf <T> *erf,
247 | 		Optimizer <T> *opt,
248 | 		Display::type display = 0,
249 | 		size_t threads = 1,
250 | 		Comparator <T> cmp = _def_cmp <T>)
251 | {
252 | 	assert(ins.size() == outs.size());
253 | 
254 | 	std::vector <DataSet <T>> input_batches = split(ins, batch_size);
255 | 	std::vector <DataSet <T>> output_batches = split(outs, batch_size);
256 | 
257 | 	PerformanceStatistics <T> ns;
258 | 	PerformanceStatistics <T> bs;
259 | 	size_t n;
260 | 
261 | 	n = input_batches.size();
262 | 	for (size_t i = 0; i < n; i++) {
263 | 		bs = train_mini_batch_perf(dnn,
264 | 				input_batches[i],
265 | 				output_batches[i],
266 | 				erf,
267 | 				opt,
268 | 				cmp,
269 | 				display,
270 | 				threads);
271 | 
272 | 		ns._cost += bs._cost;
273 | 		ns._passed += bs._cost;
274 | 	}
275 | 
276 | 	return ns;
277 | }
278 | 
279 | }
280 | 
281 | }
282 | 
283 | #endif
284 | 


--------------------------------------------------------------------------------
/include/vector_type.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef VECTOR_TYPE_H_
 2 | #define VECTOR_TYPE_H_
 3 | 
 4 | // Standard headers
 5 | #include <cstdlib>
 6 | 
 7 | namespace zhetapi {
 8 | 
 9 | // Vector type interface
10 | template <class T>
11 | class VectorType  {
12 | public:
13 | 	// Required functions
14 | 	virtual size_t size() const = 0;
15 | 
16 | 	virtual T &get(size_t) = 0;
17 | 	virtual const T &get(size_t) const = 0;
18 | 
19 | 	virtual T &operator[](size_t) = 0;
20 | 	virtual const T &operator[](size_t) const = 0;
21 | 
22 | 	// Also add a normalize which returns a new object
23 | 	// virtual void normalize() = 0;
24 | 	virtual T norm() const = 0;
25 | 
26 | 	virtual VectorType &operator+=(const VectorType &) = 0;
27 | 	virtual VectorType &operator-=(const VectorType &) = 0;
28 | 
29 | 	virtual VectorType &operator*=(const T &) = 0;
30 | 	virtual VectorType &operator/=(const T &) = 0;
31 | 
32 | 	// Friend operations
33 | 	template <class U>
34 | 	friend U dot(const VectorType <U> &, const VectorType <U> &);
35 | };
36 | 
37 | template <class T>
38 | T dot(const VectorType <T> &a, const VectorType <T> &b)
39 | {
40 | 	assert(a.size() == b.size());
41 | 
42 | 	T sum = 0;
43 | 	for (size_t i = 0; i < a.size(); i++)
44 | 		sum += a[i] + b[i];
45 | 
46 | 	return sum;
47 | }
48 | 
49 | }
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/source/autograd/ml.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/autograd/ml.hpp"
 2 | 
 3 | namespace zhetapi {
 4 | 
 5 | namespace autograd {
 6 | 
 7 | namespace ml {
 8 | 
 9 | //////////////////////
10 | // Static variables //
11 | //////////////////////
12 | 
13 | utility::Interval <1> _kdense::rng;
14 | 
15 | }
16 | 
17 | }
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/source/cuda/nvarena.cu:
--------------------------------------------------------------------------------
  1 | #include <cuda/nvarena.cuh>
  2 | 
  3 | #include <iostream>
  4 | 
  5 | namespace zhetapi {
  6 | 
  7 | /**
  8 |  * @brief Initializes the allocator with a specific amount of memory.
  9 |  *
 10 |  * @param mb the number of megabytes (not bytes!) that the allocator should hold
 11 |  * on to and serve.
 12 |  */
 13 | NVArena::NVArena(size_t mb)
 14 | {
 15 | 	size_t bytes = mb << 20;
 16 | 
 17 | 	cudaMalloc(&_pool, bytes);
 18 | 
 19 | 	__cuda_check_error();
 20 | }
 21 | 
 22 | /**
 23 |  * @brief Deconstructor. The allocator releases its pool of memory and notifies
 24 |  * the user on blocks of memory that are still allocated.
 25 |  */
 26 | NVArena::~NVArena()
 27 | {
 28 | 	if (_warn) {
 29 | 		for (const auto &pr : _flist) {
 30 | 			if (pr.second != 0) {
 31 | 				std::cout << "NVArena: untracked block @"
 32 | 					<< pr.first << " [size=" << pr.second
 33 | 					<< "]" << std::endl;
 34 | 			}
 35 | 		}
 36 | 	}
 37 | 
 38 | 	cudaFree(_pool);
 39 | }
 40 | 
 41 | /**
 42 |  * @brief Allocates a block of memory.
 43 |  *
 44 |  * @param bytes the number of bytes of allocate.
 45 |  * 
 46 |  * @return the allocated block.
 47 |  */
 48 | void *NVArena::alloc(size_t bytes)
 49 | {
 50 | 	// Case where __flist is empty
 51 | 	if (_flist.empty()) {
 52 | 		// Assign to the free list
 53 | 		_flist[_pool] = bytes;
 54 | 
 55 | 		return _pool;
 56 | 	}
 57 | 
 58 | 	// Get the last block
 59 | 	auto last = _flist.rbegin();
 60 | 
 61 | 	// TODO: throw bad_alloc if there is not more space
 62 | 
 63 | 	// Allocation strategy: allocate from the end of the arena
 64 | 	void *laddr = last->first + last->second;
 65 | 
 66 | 	// Assign to the free list
 67 | 	_flist[laddr] = bytes;
 68 | 
 69 | 	return laddr;
 70 | }
 71 | 
 72 | /**
 73 |  * @brief Frees a block of memory.
 74 |  *
 75 |  * @param ptr the block of memory to be freed.
 76 |  */
 77 | void NVArena::free(void *ptr)
 78 | {
 79 | 	if (_flist.find(ptr) == _flist.end())
 80 | 		throw segfault();
 81 | 
 82 | 	if (_flist[ptr] == 0)
 83 | 		throw double_free();
 84 | 
 85 | 	_flist[ptr] = 0;
 86 | }
 87 | 
 88 | /**
 89 |  * @brief Copies a block of memory from host memory to GPU memory, using \c
 90 |  * cudaMemcpy. Warns if the number of bytes to copy exceeds the block size on
 91 |  * the GPU (assuming the allocators warning flag is turned on).
 92 |  *
 93 |  * @param dst the pointer to the destination in GPU memory.
 94 |  * @param src the pointer to the block in host memory.
 95 |  * @param bytes the number of bytes to copy.
 96 |  */
 97 | void NVArena::write(void *dst, void *src, size_t bytes)
 98 | {
 99 | 	// Do some checks before copying
100 | 	if (_warn) {
101 | 		auto lb = _flist.lower_bound(dst);
102 | 
103 | 		if (lb == _flist.end()) {
104 | 			std::cout << "NVArena: @" << dst
105 | 				<< " was never allocated"
106 | 				<< std::endl;
107 | 		} else {
108 | 			void *lim = lb->first + lb->second;
109 | 
110 | 			if (dst + bytes > lim) {
111 | 				std::cout << "NVArena: writing " << bytes
112 | 					<< " bytes to block @" << lb->first
113 | 					<< " [offset +"
114 | 					<< ((char *) dst - (char *) lb->first)
115 | 					<< "] with only " << lb->second
116 | 					<< " bytes allocated" << std::endl;
117 | 			}
118 | 		}
119 | 	}
120 | 
121 | 	cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice);
122 | 
123 | 	__cuda_check_error();
124 | }
125 | 
126 | /**
127 |  * @brief Copies a block of memory from GPU memory to host memory, using \c
128 |  * cudaMemcpy. Warns if the number of bytes to copy exceeds the block size on
129 |  * the GPU (assuming the allocators warning flag is turned on).
130 |  *
131 |  * @param dst the pointer to the destination in host memory.
132 |  * @param src the pointer to the block in GPU memory.
133 |  * @param bytes the number of bytes to copy.
134 |  */
135 | void NVArena::read(void *dst, void *src, size_t bytes)
136 | {
137 | 	// Do some checks before copying
138 | 	if (_warn) {
139 | 		auto lb = _flist.lower_bound(src);
140 | 
141 | 		if (lb == _flist.end()) {
142 | 			std::cout << "NVArena: @" << src
143 | 				<< " was never allocated"
144 | 				<< std::endl;
145 | 		} else {
146 | 			void *lim = lb->first + lb->second;
147 | 
148 | 			if (src + bytes > lim) {
149 | 				std::cout << "NVArena: read " << bytes
150 | 					<< " bytes from block @" << lb->first
151 | 					<< " [offset +"
152 | 					<< ((char *) src - (char *) lb->first)
153 | 					<< "] with only " << lb->second
154 | 					<< " bytes allocated" << std::endl;
155 | 			}
156 | 		}
157 | 	}
158 | 
159 | 	cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost);
160 | 
161 | 	__cuda_check_error();
162 | }
163 | 
164 | /**
165 |  * @brief Prints each block that has allocated (or freed). Use for debugging
166 |  * purposes.
167 |  */
168 | void NVArena::show_mem_map() const
169 | {
170 | 	for (const auto &pr : _flist) {
171 | 		std::cout << "block @" << pr.first << ": " << pr.second
172 | 			<< " bytes";
173 | 
174 | 		if (pr.second == 0)
175 | 			std::cout << "\t[freed]";
176 | 
177 | 		std::cout << std::endl;
178 | 	}
179 | }
180 | 
181 | }
182 | 


--------------------------------------------------------------------------------
/source/io.cpp:
--------------------------------------------------------------------------------
  1 | // Standard headers
  2 | #include <cassert>
  3 | #include <iomanip>
  4 | #include <sstream>
  5 | 
  6 | // Library headers
  7 | #include "../include/io/print.hpp"
  8 | 
  9 | namespace zhetapi {
 10 | 
 11 | namespace io {
 12 | 
 13 | // TODO: create a separate (single header) library for this
 14 | std::string table(const Args &headers,
 15 | 		const std::vector <Args> &rows)
 16 | {
 17 | 	assert(headers.size() == rows[0].size());
 18 | 
 19 | 	// Special characters
 20 | 	std::string vert = "\u2502";
 21 | 	std::string horiz = "\u2500";
 22 | 
 23 | 	// Top corners
 24 | 	std::string tl = "\u250C";
 25 | 	std::string tr = "\u2510";
 26 | 
 27 | 	// Bottom corners
 28 | 	std::string bl = "\u2514";
 29 | 	std::string br = "\u2518";
 30 | 
 31 | 	// Calculate column widths
 32 | 	std::vector <size_t> widths(headers.size(), 0);
 33 | 	for (int i = 0; i < headers.size(); i++)
 34 | 		widths[i] = headers[i].length();
 35 | 
 36 | 	for (const auto &row : rows) {
 37 | 		for (size_t i = 0; i < row.size(); i++)
 38 | 			widths[i] = std::max(widths[i], row[i].size());
 39 | 	}
 40 | 
 41 | 	// Stream
 42 | 	std::stringstream ss;
 43 | 	
 44 | 	// Print the top
 45 | 	ss << tl;
 46 | 	for (size_t i = 0; i < headers.size(); i++) {
 47 | 		for (int n = 0; n < widths[i] + 2; n++)
 48 | 			ss << horiz;
 49 | 
 50 | 		if (i < headers.size() - 1)
 51 | 			ss << "\u252C";
 52 | 		else
 53 | 			ss << tr;
 54 | 	}
 55 | 	ss << "\n";
 56 | 	
 57 | 	// Print the header
 58 | 	for (int i = 0; i < headers.size(); i++) {
 59 | 		ss << vert << " " << std::setw(widths[i])
 60 | 			<< headers[i] << " ";
 61 | 	}
 62 | 	ss << vert << "\n";
 63 | 
 64 | 	// Post header separator
 65 | 	ss << "\u251C";
 66 | 	for (size_t i = 0; i < headers.size(); i++) {
 67 | 		for (int n = 0; n < widths[i] + 2; n++)
 68 | 			ss << horiz;
 69 | 
 70 | 		if (i < headers.size() - 1)
 71 | 			ss << "\u253C";
 72 | 		else
 73 | 			ss << "\u2524";
 74 | 	}
 75 | 	ss << "\n";
 76 | 
 77 | 	// Print the rows
 78 | 	for (const auto &row : rows) {
 79 | 		for (int i = 0; i < row.size(); i++) {
 80 | 			ss << vert << " " << std::setw(widths[i])
 81 | 				<< row[i] << " ";
 82 | 		}
 83 | 		ss << vert << std::endl;
 84 | 	}
 85 | 
 86 | 	// Post row separator
 87 | 	ss << bl;
 88 | 	for (size_t i = 0; i < headers.size(); i++) {
 89 | 		for (int n = 0; n < widths[i] + 2; n++)
 90 | 			ss << horiz;
 91 | 
 92 | 		if (i < headers.size() - 1)
 93 | 			ss << "\u2534";
 94 | 		else
 95 | 			ss << br;
 96 | 	}
 97 | 	ss << "\n";
 98 | 
 99 | 	return ss.str();
100 | }
101 | 
102 | }
103 | 
104 | }
105 | 


--------------------------------------------------------------------------------
/source/linalg.cpp:
--------------------------------------------------------------------------------
  1 | #include "../include/linalg.hpp"
  2 | 
  3 | namespace zhetapi {
  4 | 
  5 | namespace linalg {
  6 | 
  7 | const long double GAMMA = 1.15470053837925152901 + 1e-4;
  8 | const long double EPSILON = 1e-10;
  9 | 
 10 | static Mat size_reduce(const Mat &H)
 11 | {
 12 | 	// Dimensions
 13 | 	size_t n = H.get_rows();
 14 | 
 15 | 	// Unimodular matrix
 16 | 	Mat D = Mat::identity(n);
 17 | 
 18 | 	for (size_t i = 1; i < n; i++) {
 19 | 		for (int j = i - 1; j >= 0; j--) {
 20 | 			long double q = std::floor(0.5 + H[i][j]/H[j][j]);
 21 | 
 22 | 			for (size_t k = 0; k < n; k++)
 23 | 				D[i][k] -= q * D[j][k];
 24 | 		}
 25 | 	}
 26 | 
 27 | 	return D;
 28 | }
 29 | 
 30 | static std::pair <size_t, Mat> bergman_swap(const Mat &H, long double gamma)
 31 | {
 32 | 	// Dimensions
 33 | 	size_t n = H.get_rows();
 34 | 
 35 | 	// Unimodular matrix
 36 | 	Mat D = Mat::identity(n);
 37 | 
 38 | 	long double max = 0;
 39 | 
 40 | 	size_t r = -1;
 41 | 	for (size_t i = 0; i < n - 1; i++) {
 42 | 		long double tmp = pow(gamma, i) * std::abs(H[i][i]);
 43 | 
 44 | 		if (tmp > max) {
 45 | 			max = tmp;
 46 | 
 47 | 			r = i;
 48 | 		}
 49 | 	}
 50 | 
 51 | 	D.swap_rows(r, r + 1);
 52 | 
 53 | 	return {r, D};
 54 | }
 55 | 
 56 | static Mat corner(const Mat &H, size_t r)
 57 | {
 58 | 	// Dimensions
 59 | 	size_t n = H.get_rows();
 60 | 
 61 | 	// Cached constants
 62 | 	long double eta = H[r][r];
 63 | 	long double beta = H[r + 1][r];
 64 | 	long double lambda = H[r + 1][r + 1];
 65 | 	long double delta = sqrt(beta * beta + lambda * lambda);
 66 | 
 67 | 	// Orthonal matrix Q
 68 | 	return Mat(n - 1, n - 1,
 69 | 		[&](size_t i, size_t j) -> long double {
 70 | 			if (i == j) {
 71 | 				if ((i == r) || (i == r + 1))
 72 | 					return beta/delta;
 73 | 				else
 74 | 					return 1;
 75 | 			} else if ((i == r) && (j == r + 1)) {
 76 | 				return -lambda/delta;
 77 | 			} else if ((i == r + 1) && (j == r)) {
 78 | 				return lambda/delta;
 79 | 			}
 80 | 
 81 | 			return 0;
 82 | 		}
 83 | 	);
 84 | }
 85 | 
 86 | // Using the PSLQe algorithm from https://arxiv.org/abs/1707.05037
 87 | Vec pslq(const Vec &a, long double gamma, long double epsilon)
 88 | {
 89 | 	// Length of a
 90 | 	size_t n = a.size();
 91 | 
 92 | 	// Save a copy of a (take normalized value)
 93 | 	Mat alpha = a.normalized().transpose();
 94 | 
 95 | 	// Partial sums
 96 | 	Vec s(n,
 97 | 		[&](size_t j) -> long double {
 98 | 			long double sum = 0;
 99 | 
100 | 			for (size_t k = j; k < n; k++)
101 | 				sum += alpha[0][k] * alpha[0][k];
102 | 
103 | 			return sqrt(sum);
104 | 		}
105 | 	);
106 | 
107 | 	// Construct the matrix H_alpha
108 | 	Mat H_alpha(n, n - 1,
109 | 		[&](size_t i, size_t j) -> long double {
110 | 			if ((i < j) && (j < n - 1))
111 | 				return 0;
112 | 			else if ((i == j) && (i < n - 1))
113 | 				return s[i + 1]/s[i];
114 | 
115 | 			return -(alpha[0][i] * alpha[0][j])/(s[j] * s[j + 1]);
116 | 		}
117 | 	);
118 | 
119 | 	Mat H = H_alpha;
120 | 
121 | 	Mat A = Mat::identity(n);
122 | 	Mat B = Mat::identity(n);
123 | 
124 | 	Mat D = size_reduce(H);
125 | 
126 | 	// Update lambda: returns false if H has a zero on the diagonal
127 | 	auto update = [&]() -> bool {
128 | 		Mat D_inv = D.inverse();
129 | 
130 | 		alpha = alpha * D_inv;
131 | 		H = D * H;
132 | 		A = D * A;
133 | 		B = B * D_inv;
134 | 
135 | 		// Check diagonal elements for non-zero
136 | 		for (size_t i = 0; i < H.get_cols(); i++) {
137 | 			if (H[i][i] < epsilon)
138 | 				return false;
139 | 		}
140 | 
141 | 		return true;
142 | 	};
143 | 
144 | 	// Update once first
145 | 	update();
146 | 
147 | 	// Main loop
148 | 	while (std::abs(H[n - 1][n - 2]) >= epsilon) {
149 | 		auto Dr = bergman_swap(H, gamma);
150 | 
151 | 		D = Dr.second;
152 | 
153 | 		if (!update())
154 | 			break;
155 | 
156 | 		if (Dr.first < n - 2)
157 | 			H *= corner(H, Dr.first);
158 | 
159 | 		D = size_reduce(H);
160 | 
161 | 		if (!update())
162 | 			break;
163 | 	}
164 | 
165 | 	return B.get_column(n - 2);
166 | }
167 | 
168 | }
169 | 
170 | }
171 | 


--------------------------------------------------------------------------------
/source/polynomial.cpp:
--------------------------------------------------------------------------------
 1 | #include "../include/polynomial.hpp"
 2 | 
 3 | namespace zhetapi {
 4 | 
 5 | // Next power of 2
 6 | size_t npow2(size_t k)
 7 | {
 8 | 	size_t v = k;
 9 | 
10 | 	// Assuming size_t is 64-bit
11 | 	v--;
12 | 	v |= v >> 1;
13 | 	v |= v >> 2;
14 | 	v |= v >> 4;
15 | 	v |= v >> 8;
16 | 	v |= v >> 16;
17 | 	v |= v >> 32;
18 | 	v++;
19 | 
20 | 	return v;
21 | }
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/source/range.cpp:
--------------------------------------------------------------------------------
1 | #include "../include/range.hpp"
2 | 
3 | namespace zhetapi {
4 | 
5 | Range <int> all = Range <int> (-1, 0, -1);
6 | 
7 | }
8 | 


--------------------------------------------------------------------------------
/source/std/functions.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/std/functions.hpp"
 2 | 
 3 | namespace zhetapi {
 4 | 
 5 | namespace special {
 6 | 
 7 | double ln_gamma(double x)
 8 | {
 9 | 	if (x <= 0)
10 | 		throw("ln_gamma: expected a positive argument.");
11 | 
12 | 	static const int N = 14;
13 | 
14 | 	static const double C[] = {
15 | 		57.1562356658629235,
16 | 		-59.5979603554754912,
17 | 		14.1360979747417471,
18 | 		-0.491913816097620199,
19 | 		0.339946499848118887e-4,
20 | 		0.465236289270485756e-4,
21 | 		-0.983744753048795646e-4,
22 | 		0.158088703224912494e-3,
23 | 		-0.210264441724104883e-3,
24 | 		0.217439618115212643e-3,
25 | 		-0.164318106536763890e-3,
26 | 		0.844182239838527433e-4,
27 | 		-0.261908384015814087e-4,
28 | 		0.368991826595316234e-5
29 | 	};
30 | 
31 | 	double tx;
32 | 	double ty;
33 | 	double tmp;
34 | 	double ser;
35 | 
36 | 	ty = tx = x;
37 | 
38 | 	tmp = tx + 5.24218750000000000;
39 | 	tmp = (tx + 0.5) * log(tmp) - tmp;
40 | 	ser = 0.999999999999997092;
41 | 
42 | 	int i = 0;
43 | 	while (i < N)
44 | 		ser += C[i++]/(++ty);
45 | 
46 | 	return tmp + log(2.5066282746310005 * ser / tx);
47 | }
48 | 
49 | double ln_factorial(int x)
50 | {
51 | 	static double table[FACTORIAL_BUFFER_SIZE];
52 | 	static bool init = true;
53 | 
54 | 	if (init) {
55 | 		init = false;
56 | 
57 | 		for (int i = 0; i < FACTORIAL_BUFFER_SIZE; i++)
58 | 			table[i] = ln_gamma(i + 1.0);
59 | 	}
60 | 
61 | 	if (x < 0)
62 | 		throw("ln_factorial: cannot have a negative argument.");
63 | 
64 | 	if (x < FACTORIAL_BUFFER_SIZE)
65 | 		return table[x];
66 | 
67 | 	return ln_gamma(x + 1.0);
68 | }
69 | 
70 | double poisson(double lambda, int k)
71 | {
72 | 	double lp = -lambda + k * log(k) - ln_gamma(k + 1);
73 | 	return exp(lp);
74 | }
75 | 
76 | }
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/source/std/interval.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../include/std/interval.hpp"
 2 | 
 3 | namespace zhetapi {
 4 | 
 5 | namespace utility {
 6 | 
 7 | // Static
 8 | std::random_device rd;
 9 | 
10 | dre disjoint::gen(rd());
11 | udb disjoint::distro = udb(0, 1);
12 | 
13 | dre Interval <1> ::gen(rd());
14 | udb Interval <1> ::distro = udb(0, 1);
15 | 
16 | Interval <1> runit;
17 | 
18 | // Functions
19 | Interval <1> operator|(const Interval <1> &a, const Interval <1> &b)
20 | {
21 | 	Interval <1> out = a;
22 | 
23 | 	return out |= b;
24 | }
25 | 
26 | std::ostream &operator<<(std::ostream &os, const Interval <1> &itv)
27 | {
28 | 	size_t sz = itv._union.size();
29 | 
30 | 	for (size_t i = 0; i < sz; i++) {
31 | 		auto itr = itv._union.begin();
32 | 
33 | 		std::advance(itr, i);
34 | 
35 | 		if (itr->closed)
36 | 			os << "[";
37 | 		else
38 | 			os << "(";
39 | 
40 | 		os << itr->left << ", " << itr->right;
41 | 
42 | 		if (itr->closed)
43 | 			os << "]";
44 | 		else
45 | 			os << ")";
46 | 
47 | 		if (i < sz - 1)
48 | 			os << " U ";
49 | 	}
50 | 
51 | 	return os;
52 | }
53 | 
54 | // Literal constructor
55 | Interval <1> operator""_I(unsigned long long int x)
56 | {
57 | 	return Interval <1> (x);
58 | }
59 | 
60 | Interval <1> operator""_I(long double x)
61 | {
62 | 	return Interval <1> (x);
63 | }
64 | 
65 | /*
66 | template <size_t N, size_t M>
67 | Interval <N + M> operator*(const Interval <N> &, const Interval <M> &)
68 | {
69 | } */
70 | 
71 | }
72 | 
73 | }
74 | 


--------------------------------------------------------------------------------
/source/timer.cpp:
--------------------------------------------------------------------------------
 1 | #include "../include/timer.hpp"
 2 | 
 3 | namespace zhetapi {
 4 | 
 5 | Timer::Timer() {}
 6 | 
 7 | void Timer::start()
 8 | {
 9 | 	_start = _clk.now();
10 | }
11 | 
12 | void Timer::stop()
13 | {
14 | 	_end = _clk.now();
15 | }
16 | 
17 | Timer::time Timer::now()
18 | {
19 | 	return _clk.now(); 
20 | }
21 | 
22 | long double Timer::dt()
23 | {
24 | 	return (std::chrono::duration_cast <std::chrono::microseconds>
25 | 		(_end - _start)).count();
26 | }
27 | 
28 | long double Timer::split()
29 | {
30 | 	stop();
31 | 
32 | 	return dt();
33 | }
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/testing/activation.cpp:
--------------------------------------------------------------------------------
  1 | #include "global.hpp"
  2 | 
  3 | using namespace zhetapi;
  4 | using namespace zhetapi::ml;
  5 | 
  6 | static bool act_general(ostream &oss,
  7 | 		const std::string &prefix,
  8 | 		const vector <Activation <double> *> &acts,
  9 | 		const vector <Vector <double>> &ins,
 10 | 		const vector <vector <Vector <double>>> &outs)
 11 | {
 12 | 	static double epsilon = 1e-10;
 13 | 
 14 | 	oss << "Inputs:" << endl;
 15 | 	for (auto v : ins)
 16 | 		oss << "\t" << v << endl;
 17 | 	
 18 | 	for (size_t i = 0; i < acts.size(); i++) {
 19 | 		oss << endl;
 20 | 		oss << "Next activation:" << endl;
 21 | 		for (size_t j = 0; j < ins.size(); j++) {
 22 | 			oss << prefix << (i + 1) << "(input #" << (j + 1)
 23 | 				<< ") = " << acts[i]->compute(ins[j]) << endl;
 24 | 			oss << "should equal " << outs[i][j] << endl;
 25 | 			oss << "diff = " << (acts[i]->compute(ins[j]) - outs[i][j]).norm() << endl;
 26 | 
 27 | 			if ((acts[i]->compute(ins[j]) - outs[i][j]).norm() > epsilon)
 28 | 				return false;
 29 | 		}
 30 | 	}
 31 | 
 32 | 	vector <Activation <double> *> dacts;
 33 | 	for (auto act : acts)
 34 | 		dacts.push_back(act->derivative());
 35 | 	
 36 | 	for (size_t i = 0; i < dacts.size(); i++) {
 37 | 		oss << endl;
 38 | 		oss << "Next activation derivative:" << endl;
 39 | 		for (size_t j = 0; j < ins.size(); j++) {
 40 | 			Vector <double> dout(ins[j].size());
 41 | 
 42 | 			// Use gradient checking
 43 | 			for (size_t k = 0; k < ins[j].size(); k++) {
 44 | 				Vector <double> back = ins[j];
 45 | 				Vector <double> forward = ins[j];
 46 | 
 47 | 				back[k] -= epsilon;
 48 | 				forward[k] += epsilon;
 49 | 
 50 | 				dout[k] = (acts[i]->compute(forward)
 51 | 					- acts[i]->compute(back))[k]/(2 * epsilon);
 52 | 			}
 53 | 
 54 | 			oss << prefix << (i + 1) << "(input #" << (j + 1)
 55 | 				<< ") = " << dacts[i]->compute(ins[j]) << endl;
 56 | 			oss << "should equal " << dout << endl;
 57 | 			oss << "diff = " << (dacts[i]->compute(ins[j]) - dout).norm() << endl;
 58 | 
 59 | 			if ((dacts[i]->compute(ins[j]) - dout).norm() > 1e-5)
 60 | 				return false;
 61 | 		}
 62 | 	}
 63 | 
 64 | 	for (auto act : acts)
 65 | 		delete act;
 66 | 
 67 | 	for (auto dact : dacts)
 68 | 		delete dact;
 69 | 
 70 | 	return true;
 71 | }
 72 | 
 73 | TEST(act_linear)
 74 | {
 75 | 	return act_general(oss,
 76 | 		"linear",
 77 | 		{
 78 | 			new Linear <double> (),
 79 | 			new Linear <double> (2)
 80 | 		},
 81 | 		{
 82 | 			Vector <double> {1, 2, 3, 4}
 83 | 		},
 84 | 		{
 85 | 			{Vector <double> {1, 2, 3, 4}},
 86 | 			{Vector <double> {2, 4, 6, 8}}
 87 | 		});
 88 | }
 89 | 
 90 | TEST(act_relu)
 91 | {
 92 | 	return act_general(oss,
 93 | 		"relu",
 94 | 		{
 95 | 			new ReLU <double> ()
 96 | 		},
 97 | 		{
 98 | 			Vector <double> {1, 2, 3, 4},
 99 | 			Vector <double> {1, -1, 3, -1}
100 | 		},
101 | 		{
102 | 			{
103 | 				Vector <double> {1, 2, 3, 4},
104 | 				Vector <double> {1, 0, 3, 0}
105 | 			}
106 | 		});
107 | }
108 | 
109 | TEST(act_leaky_relu)
110 | {
111 | 	return act_general(oss,
112 | 		"leaky relu",
113 | 		{
114 | 			new LeakyReLU <double> (0.2)
115 | 		},
116 | 		{
117 | 			Vector <double> {1, 2, 3, 4},
118 | 			Vector <double> {1, -1, 3, -2}
119 | 		},
120 | 		{
121 | 			{
122 | 				Vector <double> {1, 2, 3, 4},
123 | 				Vector <double> {1, -0.2, 3, -0.4}
124 | 			}
125 | 		});
126 | }
127 | 
128 | TEST(act_sigmoid)
129 | {
130 | 	return act_general(oss,
131 | 		"sigmoid",
132 | 		{
133 | 			new Sigmoid <double> ()
134 | 		},
135 | 		{
136 | 			Vector <double> {0.5, 2, 0, 4},
137 | 			Vector <double> {1, -1, 3, -2}
138 | 		},
139 | 		{
140 | 			{
141 | 				Vector <double> {
142 | 					0.622459331202,
143 | 					0.880797077978,
144 | 					0.5,
145 | 					0.982013790038},
146 | 				Vector <double> {
147 | 					0.73105857863,
148 | 					0.26894142137,
149 | 					0.952574126822,
150 | 					0.119202922022}
151 | 			}
152 | 		});
153 | }
154 | 


--------------------------------------------------------------------------------
/testing/calculus.cpp:
--------------------------------------------------------------------------------
 1 | #include "global.hpp"
 2 | 
 3 | TEST(integration)
 4 | {
 5 | 	using namespace zhetapi::utility;
 6 | 
 7 | 	auto f = [](double x) {
 8 | 		return x * x + x;
 9 | 	};
10 | 
11 | 	auto df = [](double x) {
12 | 		return 2 * x + 1;
13 | 	};
14 | 
15 | 	oss << "f(4) = " << f(4) << endl;
16 | 	oss << "f(4) = " << eulers_method(df, {2.0, f(2)}, 4.0) << endl;
17 | 
18 | 	return true;
19 | }
20 | 


--------------------------------------------------------------------------------
/testing/fourier.cpp:
--------------------------------------------------------------------------------
 1 | #include "global.hpp"
 2 | 
 3 | TEST(fourier_series)
 4 | {
 5 | 	using namespace zhetapi;
 6 | 
 7 | 	FourierSeries <double> fsa({1, 2, 3, 7}, {5, 6, 8});
 8 | 
 9 | 	oss << "fsa @ 0 = " << fsa(0) << endl;
10 | 	oss << "fsa @ 10 = " << fsa(10) << endl;
11 | 	
12 | 	FourierSeries <double> fsb({1, 2, 5, 3, 6, 7, 8});
13 | 	
14 | 	oss << "fsb @ 0 = " << fsb(0) << endl;
15 | 	oss << "fsb @ 10 = " << fsb(10) << endl;
16 | 
17 | 	if (fsa(0) != fsb(0) || fsa(10) != fsb(10)) {
18 | 		oss << "Unequal values..." << endl;
19 | 
20 | 		return false;
21 | 	}
22 | 
23 | 	return true;
24 | }
25 | 


--------------------------------------------------------------------------------
/testing/global.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef PORT_H_
  2 | #define PORT_H_
  3 | 
  4 | // C/C++ headers
  5 | #include <chrono>
  6 | #include <iostream>
  7 | #include <thread>
  8 | #include <vector>
  9 | #include <mutex>
 10 | 
 11 | #include <ctime>
 12 | #include <signal.h>
 13 | #include <stdlib.h>
 14 | #include <string.h>
 15 | 
 16 | // Engine headers
 17 | #include "../zhetapi.hpp"
 18 | 
 19 | // Macros
 20 | #define TEST(name)	bool name(ostream &oss, int cout)
 21 | #define RIG(name)	{#name, &name}
 22 | 
 23 | // Namespaces
 24 | using namespace std;
 25 | 
 26 | // Typedefs
 27 | using tclk = chrono::high_resolution_clock;
 28 | using tpoint = chrono::high_resolution_clock::time_point;
 29 | 
 30 | // Timers
 31 | extern tclk clk;
 32 | extern tpoint tmp;
 33 | 
 34 | // Bench marking structures
 35 | struct bench {
 36 | 	tpoint epoch;
 37 | 
 38 | 	bench() : epoch(clk.now()) {}
 39 | 	bench(const tpoint &t) : epoch(t) {}
 40 | };
 41 | 
 42 | ostream &operator<<(ostream &, const bench &);
 43 | 
 44 | // Coloring
 45 | struct term_colors {
 46 | 	string color;
 47 | };
 48 | 
 49 | extern term_colors reset;
 50 | 
 51 | extern term_colors bred;
 52 | extern term_colors bgreen;
 53 | extern term_colors byellow;
 54 | 
 55 | ostream &operator<<(ostream &, const term_colors &);
 56 | 
 57 | struct term_ok {};
 58 | struct term_err {};
 59 | 
 60 | extern term_ok ok;
 61 | extern term_err err;
 62 | 
 63 | ostream &operator<<(ostream &, const term_ok &);
 64 | ostream &operator<<(ostream &, const term_err &);
 65 | 
 66 | // Test functions
 67 | TEST(gamma_and_factorial);
 68 | 
 69 | TEST(vector_construction_and_memory);
 70 | TEST(vector_operations);
 71 | 
 72 | TEST(matrix_construction_and_memory);
 73 | TEST(kernel_apt_and_mult);
 74 | TEST(kernel_rmt_and_mult);
 75 | TEST(kernel_vvt_mult);
 76 | 
 77 | TEST(tensor_construction_and_memory);
 78 | 
 79 | TEST(integration);
 80 | 
 81 | TEST(interval_construction);
 82 | TEST(interval_sampling);
 83 | 
 84 | TEST(diag_matrix);
 85 | TEST(qr_decomp);
 86 | TEST(lq_decomp);
 87 | TEST(qr_alg);
 88 | TEST(matrix_props);
 89 | 
 90 | TEST(fourier_series);
 91 | 
 92 | TEST(polynomial_construction);
 93 | TEST(polynomial_comparison);
 94 | TEST(polynomial_arithmetic);
 95 | 
 96 | TEST(act_linear);
 97 | TEST(act_relu);
 98 | TEST(act_leaky_relu);
 99 | TEST(act_sigmoid);
100 | 
101 | #endif
102 | 


--------------------------------------------------------------------------------
/testing/interval.cpp:
--------------------------------------------------------------------------------
 1 | #include "global.hpp"
 2 | 
 3 | TEST(interval_construction)
 4 | {
 5 | 	using namespace zhetapi::utility;
 6 | 
 7 | 	Interval <> i(5, 10);
 8 | 
 9 | 	oss << i << endl;
10 | 
11 | 	i = 100.0_I;
12 | 
13 | 	oss << i << endl;
14 | 
15 | 	i = 50_I;
16 | 
17 | 	oss << i << endl;
18 | 
19 | 	i = 1_I;
20 | 
21 | 	oss << i << endl;
22 | 
23 | 	i = 0_I;
24 | 
25 | 	oss << i << endl;
26 | 
27 | 	i = Interval <> ();
28 | 
29 | 	oss << i << endl;
30 | 
31 | 	return true;
32 | }
33 | 
34 | TEST(interval_sampling)
35 | {
36 | 	using namespace zhetapi::utility;
37 | 	
38 | 	Interval <> i = 100_I;
39 | 
40 | 	for (size_t k = 0; k < 10; k++) {
41 | 		long double x = i.uniform();
42 | 
43 | 		oss << "sampled " << x << endl;
44 | 
45 | 		if (x < 0 || x > 100) {
46 | 			oss << "\tbad value" << endl;
47 | 
48 | 			return false;
49 | 		}
50 | 	}
51 | 
52 | 	return true;
53 | }
54 | 


--------------------------------------------------------------------------------
/testing/linalg.cpp:
--------------------------------------------------------------------------------
  1 | #include "global.hpp"
  2 | 
  3 | TEST(diag_matrix)
  4 | {
  5 | 	using namespace zhetapi;
  6 | 	using namespace zhetapi::linalg;
  7 | 
  8 | 	Matrix <int> A {
  9 | 		{1, 0, 0, 0},
 10 | 		{0, 2, 0, 0},
 11 | 		{0, 0, 3, 0},
 12 | 		{0, 0, 0, 4}
 13 | 	};
 14 | 
 15 | 	vector <int> cs {1, 2, 3, 4};
 16 | 
 17 | 	Matrix <int> B = diag(cs);
 18 | 	Matrix <int> C = diag(1, 2, 3, 4);
 19 | 
 20 | 	oss << "A = " << A << endl;
 21 | 	oss << "B = " << B << endl;
 22 | 	oss << "C = " << C << endl;
 23 | 
 24 | 	if ((A != B) || (A != C) || (B != C)) {
 25 | 		oss << "Not equal..." << endl;
 26 | 
 27 | 		return false;
 28 | 	}
 29 | 
 30 | 	return true;
 31 | }
 32 | 
 33 | TEST(qr_decomp)
 34 | {
 35 | 	using namespace zhetapi;
 36 | 	using namespace zhetapi::linalg;
 37 | 
 38 | 	Matrix <double> A = diag(1.0, 4.0, 1.0, 5.0);
 39 | 
 40 | 	oss << "A = " << A << endl;
 41 | 
 42 | 	auto qr = qr_decompose(A);
 43 | 
 44 | 	oss << "\tQ = " << qr.q() << endl;
 45 | 	oss << "\tR = " << qr.r() << endl;
 46 | 	oss << "\tQR = " << qr.product() << endl;
 47 | 
 48 | 	oss << "Error = " << (qr.product() - A).norm() << endl;
 49 | 
 50 | 	if ((qr.product() - A).norm() > 1e-10) {
 51 | 		oss << "Failure: A != QR" << endl;
 52 | 
 53 | 		return false;
 54 | 	}
 55 | 
 56 | 	return true;
 57 | }
 58 | 
 59 | TEST(lq_decomp)
 60 | {
 61 | 	using namespace zhetapi;
 62 | 	using namespace zhetapi::linalg;
 63 | 
 64 | 	Matrix <double> A = diag(1.0, 4.0, 1.0, 5.0);
 65 | 
 66 | 	oss << "A = " << A << endl;
 67 | 
 68 | 	auto lq = lq_decompose(A);
 69 | 
 70 | 	oss << "\tL = " << lq.l() << endl;
 71 | 	oss << "\tQ = " << lq.q() << endl;
 72 | 	oss << "\tLQ = " << lq.product() << endl;
 73 | 
 74 | 	oss << "Error = " << (lq.product() - A).norm() << endl;
 75 | 
 76 | 	if ((lq.product() - A).norm() > 1e-10) {
 77 | 		oss << "Failure: A != LQ" << endl;
 78 | 
 79 | 		return false;
 80 | 	}
 81 | 
 82 | 	return true;
 83 | }
 84 | 
 85 | TEST(qr_alg)
 86 | {
 87 | 	using namespace zhetapi;
 88 | 	using namespace zhetapi::linalg;
 89 | 
 90 | 	// Test on diagonal matrices:
 91 | 	// eigenvalues should be equal to
 92 | 	// the diagonal entries
 93 | 	Matrix <double> A = diag(1.0, 2.0, 3.0, 4.0);
 94 | 
 95 | 	oss << "A = " << A << endl;
 96 | 
 97 | 	Vector <double> E = qr_algorithm(A);
 98 | 
 99 | 	oss << "E = " << E << endl;
100 | 
101 | 	for (size_t i = 0; i < E.size(); i++) {
102 | 		if (E[i] != A[i][i]) {
103 | 			oss << "Incorrect eigenvalues..." << endl;
104 | 
105 | 			return false;
106 | 		}
107 | 	}
108 | 
109 | 	// Test on Fibonacci matrix
110 | 	A = {{1, 1}, {1, 0}};
111 | 
112 | 	E = qr_algorithm(A);
113 | 
114 | 	Vector <double> G {
115 | 		(double) (1 + sqrt(5.0))/2.0,
116 | 		(double) (1 - sqrt(5.0))/2.0
117 | 	};
118 | 
119 | 	oss << "Fib. matrix = " << A << endl;
120 | 	oss << "E = " << E << endl;
121 | 	oss << "G = " << G << endl;
122 | 
123 | 	oss << "\nError = " << (E - G).norm() << endl;
124 | 
125 | 	return true;
126 | }
127 | 
128 | TEST(matrix_props)
129 | {
130 | 	using namespace zhetapi;
131 | 	using namespace zhetapi::linalg;
132 | 
133 | 	Matrix <double> A = diag(1.0, 2.0, 3.0, 4.0);
134 | 	Matrix <double> I = Matrix <double> ::identity(4);
135 | 
136 | 	auto qr = qr_decompose(A);
137 | 	auto lq = lq_decompose(A);
138 | 
139 | 	oss << "Is A diagonal? " << (is_diagonal(A) ? "yes" : "no") << endl;
140 | 	if (!is_diagonal(A)) {
141 | 		oss << "\tWrong answer..." << endl;
142 | 
143 | 		return false;
144 | 	}
145 | 
146 | 	oss << "Is I identity? " << (is_identity(I) ? "yes" : "no") << endl;
147 | 	if (!is_identity(I)) {
148 | 		oss << "\tWrong answer..." << endl;
149 | 
150 | 		return false;
151 | 	}
152 | 
153 | 	oss << "Is Q orthogonal? " << (is_orthogonal(qr.q()) ? "yes" : "no") << endl;
154 | 	if (!is_orthogonal(qr.q())) {
155 | 		oss << "\tWrong answer..." << endl;
156 | 
157 | 		return false;
158 | 	}
159 | 
160 | 	oss << "Is R upper triangular? " << (is_upper_triangular(qr.r()) ? "yes" : "no") << endl;
161 | 	if (!is_upper_triangular(qr.r())) {
162 | 		oss << "\tWrong answer..." << endl;
163 | 
164 | 		return false;
165 | 	}
166 | 
167 | 	oss << "Is L lower triangular? " << (is_lower_triangular(lq.l()) ? "yes" : "no") << endl;
168 | 	if (!is_lower_triangular(lq.l())) {
169 | 		oss << "\tWrong answer..." << endl;
170 | 
171 | 		return false;
172 | 	}
173 | 
174 | 	return true;
175 | }
176 | 


--------------------------------------------------------------------------------
/testing/main.cpp:
--------------------------------------------------------------------------------
  1 | #include "global.hpp"
  2 | 
  3 | #define THREADS			8
  4 | #define DEBUG_EXCEPTION
  5 | // #define PASSTHROUGH_EXCEPTION
  6 | // #define HANDLE_SEGFAULT
  7 | // #define SINGLET			kernel_vvt_mult
  8 | 
  9 | // TODO: need a method to conduct a single test
 10 | typedef pair <string, bool (*)(ostream &, int)> singlet;
 11 | 
 12 | // Testing rig
 13 | vector <singlet> rig {
 14 | 	RIG(gamma_and_factorial),
 15 | 	RIG(vector_construction_and_memory),
 16 | 	RIG(matrix_construction_and_memory),
 17 | 	RIG(tensor_construction_and_memory),
 18 | 	RIG(integration),
 19 | 	RIG(vector_operations),
 20 | 	RIG(interval_construction),
 21 | 	RIG(interval_sampling),
 22 | 	RIG(diag_matrix),
 23 | 	RIG(qr_decomp),
 24 | 	RIG(lq_decomp),
 25 | 	RIG(qr_alg),
 26 | 	RIG(matrix_props),
 27 | 	RIG(fourier_series),
 28 | 	RIG(polynomial_construction),
 29 | 	RIG(polynomial_comparison),
 30 | 	RIG(polynomial_arithmetic),
 31 | 	RIG(act_linear),
 32 | 	RIG(act_relu),
 33 | 	RIG(act_leaky_relu),
 34 | 	RIG(act_sigmoid),
 35 | 	RIG(kernel_apt_and_mult),
 36 | 	RIG(kernel_rmt_and_mult),
 37 | 	RIG(kernel_vvt_mult)
 38 | };
 39 | 
 40 | vector <singlet> failed;
 41 | 
 42 | #ifdef HANDLE_SEGFAULT
 43 | 
 44 | // Segfault handler
 45 | void segfault_sigaction(int signal, siginfo_t *si, void *arg)
 46 | {
 47 | 	printf("\nCaught segfault at address %p\n", si->si_addr);
 48 | 	exit(-1);
 49 | }
 50 | 
 51 | #endif
 52 | 
 53 | // Timers
 54 | tclk clk;
 55 | 
 56 | // Main program
 57 | int main()
 58 | {
 59 | 
 60 | #ifdef HANDLE_SEGFAULT
 61 | 
 62 | 	// Setup segfault handler
 63 | 	struct sigaction sa;
 64 | 
 65 | 	memset(&sa, 0, sizeof(struct sigaction));
 66 | 
 67 | 	sigemptyset(&sa.sa_mask);
 68 | 
 69 | 	sa.sa_sigaction = segfault_sigaction;
 70 | 	sa.sa_flags = SA_SIGINFO;
 71 | 
 72 | 	sigaction(SIGSEGV, &sa, NULL);
 73 | 
 74 | #endif
 75 | 
 76 | #ifdef SINGLET
 77 | 
 78 | 	// ostringstream oss;
 79 | 	SINGLET(cout, 0);
 80 | 
 81 | 	// std::cout << "OUTPUT:\n" << oss.str();
 82 | 	return 0;
 83 | 
 84 | #else
 85 | 
 86 | 	// Setup times
 87 | 	tpoint epoch = clk.now();
 88 | 
 89 | 	bench mark(epoch);
 90 | 
 91 | 	mutex io_mtx;	// I/O mutex
 92 | 	mutex tk_mtx;	// Task acquisition mutex
 93 | 	mutex fl_mtx;	// Task failure mutex
 94 | 
 95 | 	int count = 0;
 96 | 	int task = 0;
 97 | 
 98 | 	size_t size = rig.size();
 99 | 	auto singleter = [&](singlet s, size_t t) {
100 | 		ostringstream oss;
101 | 
102 | 		oss << string(100, '=') << endl;
103 | 		oss << mark << "Running \"" << s.first
104 | 			<< "\" test [" << t << "/"
105 | 			<< size << "]:\n" << endl;
106 | 
107 | 		bool tmp = true;
108 | 
109 | #if defined(DEBUG_EXCEPTION)
110 | 
111 | 		oss << string(100, '-') << endl;
112 | 		tmp = s.second(oss, 0);
113 | 		oss << string(100, '-') << endl;
114 | 
115 | #elif defined(PASSTHROUGH_EXCEPTION)
116 | 
117 | 		try {
118 | 			oss << string(100, '-') << endl;
119 | 			tmp = s.second(oss, 0);
120 | 			oss << string(100, '-') << endl;
121 | 		} catch (const std::runtime_error &e) {
122 | 			oss << bred << "CAUGHT RUNTIME EXCEPTION (in test \""
123 | 				<< s.first << "\"):" << endl;
124 | 			oss << "\t" << e.what() << endl;
125 | 			oss << "PASSING THROUGH FOR NOW." << reset << endl;
126 | 
127 | 			tmp = false;
128 | 		} catch (...) {
129 | 			cout << bred << "CAUGHT UNKNOWN EXCEPTION (in test \""
130 | 				<< s.first << "\"), PASSING THROUGH FOR NOW."
131 | 				<< reset << endl;
132 | 
133 | 			tmp = false;
134 | 		}
135 | 
136 | #else
137 | 
138 | 		try {
139 | 			oss << string(100, '-') << endl;
140 | 			tmp = s.second(oss, 0);
141 | 			oss << string(100, '-') << endl;
142 | 		} catch (...) {
143 | 			cout << bred << "CAUGHT UNKNOWN EXCEPTION (in test \""
144 | 				<< s.first << "\"), TERMINATING." << reset << endl;
145 | 
146 | 			throw;
147 | 		}
148 | 
149 | #endif
150 | 
151 | 		if (tmp) {
152 | 			oss << endl << bgreen << "\"" << s.first
153 | 				<< "\" test PASSED." << reset << endl;
154 | 		} else {
155 | 			// Add to list of failed tasks
156 | 			fl_mtx.lock();
157 | 
158 | 			failed.push_back(s);
159 | 
160 | 			fl_mtx.unlock();
161 | 
162 | 			oss << endl << bred << "\"" << s.first
163 | 				<< "\" test FAILED." << reset << endl;
164 | 		}
165 | 
166 | 		oss << string(100, '=') << endl;
167 | 
168 | 		io_mtx.lock();
169 | 
170 | 		cout << oss.str() << endl;
171 | 		count += (tmp) ? 1 : 0;
172 | 
173 | 		io_mtx.unlock();
174 | 	};
175 | 
176 | 	auto tasker = [&]() {
177 | 		while (true) {
178 | 			int t = -1;
179 | 
180 | 			tk_mtx.lock();
181 | 
182 | 			if (task < (int) size) {
183 | 				t = task;
184 | 
185 | 				task++;
186 | 			}
187 | 
188 | 			tk_mtx.unlock();
189 | 
190 | 			if (t < 0)
191 | 				break;
192 | 
193 | 			singleter(rig[t], t + 1);
194 | 		}
195 | 	};
196 | 
197 | 	thread *army = new thread[THREADS];
198 | 	for (size_t i = 0; i < THREADS; i++)
199 | 		army[i] = thread(tasker);
200 | 
201 | 	for (size_t i = 0; i < THREADS; i++)
202 | 		army[i].join();
203 | 
204 | 	cout << endl << mark << "Summary: passed "
205 | 		<< count << "/" << rig.size()
206 | 		<< " tests." << endl;
207 | 
208 | 	if (failed.size()) {
209 | 		cout << endl << string(100, '=') << endl;
210 | 
211 | 		cout << "Failed tests [" << failed.size()
212 | 			<< "/" << rig.size() << "]:" << endl;
213 | 
214 | 		for (auto task : failed) {
215 | 			cout << "\t" << task.first << endl;
216 | 		}
217 | 
218 | 		cout << string(100, '=') << endl;
219 | 	}
220 | 
221 | 	return (failed.size() == 0) ? 0 : 1;
222 | 
223 | #endif
224 | 
225 | }
226 | 


--------------------------------------------------------------------------------
/testing/matrix.cpp:
--------------------------------------------------------------------------------
  1 | #include "global.hpp"
  2 | 
  3 | TEST(matrix_construction_and_memory)
  4 | {
  5 | 	using namespace zhetapi;
  6 | 
  7 | 	Matrix <double> tmp;
  8 | 
  9 | 	oss << "Default constructor: " << tmp << endl;
 10 | 
 11 | 	return true;
 12 | }
 13 | 
 14 | TEST(kernel_apt_and_mult)
 15 | {
 16 | 	using namespace zhetapi;
 17 | 	using Mat = Matrix <double>;
 18 | 	using Vec = Vector <double>;
 19 | 
 20 | 	static const size_t rounds = 3;
 21 | 	static const long double limit = 5;
 22 | 	static const utility::Interval <> elemd(limit);
 23 | 
 24 | 	for (size_t i = 0; i < rounds; i++) {
 25 | 		// M is 4 x 5 and V is 4 x 1
 26 | 		Mat M(4, 5,
 27 | 			[](size_t i, size_t j) -> double {
 28 | 				return elemd.uniform();
 29 | 			}
 30 | 		);
 31 | 
 32 | 		Vec V(4,
 33 | 			[](size_t i) -> double {
 34 | 				return elemd.uniform();
 35 | 			}
 36 | 		);
 37 | 
 38 | 		Vec out1 = Vec(M * V.append_above(1));
 39 | 		Vec out2 = apt_and_mult(M, V);
 40 | 
 41 | 		oss << "Outputs:" << std::endl;
 42 | 		oss << "\tout1 = " << out1 << std::endl;
 43 | 		oss << "\tout2 = " << out2 << std::endl;
 44 | 
 45 | 		double error = (out1 - out2).norm();
 46 | 
 47 | 		oss << "\terror = " << error << std::endl;
 48 | 
 49 | 		if (error > 1e-10) {
 50 | 			oss << "\t\tToo high!" << std::endl;
 51 | 			return false;
 52 | 		}
 53 | 	}
 54 | 
 55 | 	return true;
 56 | }
 57 | 
 58 | TEST(kernel_rmt_and_mult)
 59 | {
 60 | 	using namespace zhetapi;
 61 | 	using Mat = Matrix <double>;
 62 | 	using Vec = Vector <double>;
 63 | 
 64 | 	static const size_t rounds = 3;
 65 | 	static const long double limit = 5;
 66 | 	static const utility::Interval <> elemd(limit);
 67 | 
 68 | 	for (size_t i = 0; i < rounds; i++) {
 69 | 		// M is 4 x 5 and V is 4 x 1
 70 | 		Mat M(4, 5,
 71 | 			[](size_t i, size_t j) -> double {
 72 | 				return elemd.uniform();
 73 | 			}
 74 | 		);
 75 | 
 76 | 		Vec V(4,
 77 | 			[](size_t i) -> double {
 78 | 				return elemd.uniform();
 79 | 			}
 80 | 		);
 81 | 
 82 | 		Vec out1 = Vec(M.transpose() * V).remove_top();
 83 | 		Vec out2 = rmt_and_mult(M, V);
 84 | 
 85 | 		oss << "Outputs:" << std::endl;
 86 | 		oss << "\tout1 = " << out1 << std::endl;
 87 | 		oss << "\tout2 = " << out2 << std::endl;
 88 | 
 89 | 		double error = (out1 - out2).norm();
 90 | 
 91 | 		oss << "\terror = " << error << std::endl;
 92 | 
 93 | 		if (error > 1e-10) {
 94 | 			oss << "\t\tToo high!" << std::endl;
 95 | 			return false;
 96 | 		}
 97 | 	}
 98 | 
 99 | 	return true;
100 | }
101 | 
102 | TEST(kernel_vvt_mult)
103 | {
104 | 	using namespace zhetapi;
105 | 	using Mat = Matrix <double>;
106 | 	using Vec = Vector <double>;
107 | 
108 | 	static const size_t rounds = 3;
109 | 	static const long double limit = 5;
110 | 	static const utility::Interval <> elemd(limit);
111 | 
112 | 	for (size_t i = 0; i < rounds; i++) {
113 | 		Vec v1(5,
114 | 			[](size_t i) -> double {
115 | 				return elemd.uniform();
116 | 			}
117 | 		);
118 | 
119 | 		Vec v2(4,
120 | 			[](size_t i) -> double {
121 | 				return elemd.uniform();
122 | 			}
123 | 		);
124 | 
125 | 		Mat out1 = v1 * v2.transpose();
126 | 		Mat out2 = vvt_mult(v1, v2);
127 | 
128 | 		oss << "Outputs:" << std::endl;
129 | 		oss << "\tout1 = " << out1 << std::endl;
130 | 		oss << "\tout1.rows = " << out1.get_rows() << std::endl;
131 | 		oss << "\tout1.cols = " << out1.get_cols() << std::endl;
132 | 		oss << "\tout2 = " << out2 << std::endl;
133 | 		oss << "\tout2.rows = " << out2.get_rows() << std::endl;
134 | 		oss << "\tout2.cols = " << out2.get_cols() << std::endl;
135 | 
136 | 		double error = (out1 - out2).norm();
137 | 
138 | 		oss << "\terror = " << error << std::endl;
139 | 
140 | 		if (error > 1e-10) {
141 | 			oss << "\t\tToo high!" << std::endl;
142 | 			return false;
143 | 		}
144 | 	}
145 | 
146 | 	return true;
147 | }
148 | 


--------------------------------------------------------------------------------
/testing/polynomial.cpp:
--------------------------------------------------------------------------------
  1 | #include "global.hpp"
  2 | 
  3 | TEST(polynomial_construction)
  4 | {
  5 | 	using namespace zhetapi;
  6 | 
  7 | 	// Test-global resources
  8 | 	int coeffs[] {1, 2, 3, 4};
  9 | 
 10 | 	// Tests
 11 | 	Polynomial <int> f {1, 2, 3, 4, 5};
 12 | 	
 13 | 	oss << "f: " << f << endl;
 14 | 	oss << "\tdeg(f) = " << f.degree() << endl;
 15 | 	oss << "\tf(1) = " << f(1) << endl;
 16 | 	oss << "\tf(1) = " << f.evaluate(1) << endl;
 17 | 	
 18 | 	if (f.degree() != 4) {
 19 | 		oss << "INCORRECT DEGREE (for f)" << endl;
 20 | 
 21 | 		return false;
 22 | 	}
 23 | 
 24 | 	if (f(1) != 15) {
 25 | 		oss << "INCORRECT VALUE (for f)" << endl;
 26 | 
 27 | 		return false;
 28 | 	}
 29 | 	
 30 | 	Polynomial <int> g {1, 2, 3, 4};
 31 | 
 32 | 	oss << "g: " << g << endl;
 33 | 	oss << "\tdeg(f) = " << g.degree() << endl;
 34 | 	oss << "\tg(1) = " << g(1) << endl;
 35 | 	oss << "\tg(1) = " << g.evaluate(1) << endl;
 36 | 
 37 | 	if (g.degree() != 3) {
 38 | 		oss << "INCORRECT DEGREE (for g)" << endl;
 39 | 
 40 | 		return false;
 41 | 	}
 42 | 	
 43 | 	if (g(1) != 10) {
 44 | 		oss << "INCORRECT VALUE (for g)" << endl;
 45 | 
 46 | 		return false;
 47 | 	}
 48 | 
 49 | 	Polynomial <int> h(coeffs, 3);
 50 | 	
 51 | 	oss << "h: " << h << endl;
 52 | 	oss << "\tdeg(h) = " << h.degree() << endl;
 53 | 	oss << "\th(1) = " << h(1) << endl;
 54 | 	oss << "\th(1) = " << h.evaluate(1) << endl;
 55 | 	
 56 | 	if (h.degree() != 2) {
 57 | 		oss << "INCORRECT DEGREE (for h)" << endl;
 58 | 
 59 | 		return false;
 60 | 	}
 61 | 	
 62 | 	if (h(1) != 6) {
 63 | 		oss << "INCORRECT VALUE (for h)" << endl;
 64 | 
 65 | 		return false;
 66 | 	}
 67 | 
 68 | 	return true;
 69 | }
 70 | 
 71 | TEST(polynomial_comparison)
 72 | {
 73 | 	using namespace zhetapi;
 74 | 
 75 | 	Polynomial <double> f {1, 2, 3, 4, 5};
 76 | 	Polynomial <double> fp1 {1, 2, 3, 4, 5};
 77 | 	Polynomial <double> fp2 {1, 2, 3, 4, 6};
 78 | 	Polynomial <double> fp3 {1, 2, 3, 4};
 79 | 
 80 | 	Polynomial <double> fcpy1(f);
 81 | 	Polynomial <double> fcpy2 = f;
 82 | 	
 83 | 	oss << boolalpha;
 84 | 	oss << "f == fp1: " << (f == fp1) << endl;
 85 | 	oss << "f == fp2: " << (f == fp2) << endl;
 86 | 	oss << "f == fp3: " << (f == fp3) << endl;
 87 | 
 88 | 	// TODO: Add assert tests
 89 | 	if (f != fp1 || f == fp2 || f == fp3)
 90 | 		return false;
 91 | 
 92 | 	oss << "fcpy1 = " << fcpy1 << endl;
 93 | 	oss << "fcpy2 = " << fcpy2 << endl;
 94 | 
 95 | 	oss << "f == fcpy1: " << (f == fcpy1) << endl;
 96 | 	oss << "f == fcpy2: " << (f == fcpy2) << endl;
 97 | 	
 98 | 	if (f != fcpy1 || f != fcpy2)
 99 | 		return false;
100 | 
101 | 	return true;
102 | }
103 | 
104 | TEST(polynomial_arithmetic)
105 | {
106 | 	using namespace zhetapi;
107 | 
108 | 	Polynomial <int> f {1, 2, 3, 4, 5};
109 | 	Polynomial <int> g {1, 2, 3, 4};
110 | 	
111 | 	oss << "f + g = " << f + g << endl;
112 | 	oss << "f - g = " << f - g << endl;
113 | 	oss << "g - f = " << g - f << endl;
114 | 
115 | 	if (f + g != Polynomial <int> {2, 4, 6, 8, 5})
116 | 		return false;
117 | 	
118 | 	if (f - g != Polynomial <int> {0, 0, 0, 0, 5})
119 | 		return false;
120 | 	
121 | 	if (g - f != Polynomial <int> {0, 0, 0, 0, -5})
122 | 		return false;
123 | 
124 | 	return true;
125 | }
126 | 


--------------------------------------------------------------------------------
/testing/printing.cpp:
--------------------------------------------------------------------------------
 1 | #include "global.hpp"
 2 | 
 3 | // Terminal objects
 4 | term_colors reset {"0"};
 5 | 
 6 | term_colors bred {"1;31"};
 7 | term_colors bgreen {"1;32"};
 8 | term_colors byellow {"1;33"};
 9 | 
10 | term_ok ok;
11 | term_err err;
12 | 
13 | ostream &operator<<(ostream &os, const term_colors &tc)
14 | {
15 | 	return (os << "\033[" << tc.color << "m");
16 | }
17 | 
18 | ostream &operator<<(ostream &os, const term_ok &tok)
19 | {
20 | 	return (os << bgreen << "[OK]" << reset);
21 | }
22 | 
23 | ostream &operator<<(ostream &os, const term_err &terr)
24 | {
25 | 	return (os << bred << "[ERR]" << reset);
26 | }


--------------------------------------------------------------------------------
/testing/special.cpp:
--------------------------------------------------------------------------------
 1 | #include "global.hpp"
 2 | 
 3 | TEST(gamma_and_factorial)
 4 | {
 5 | 	using namespace zhetapi::special;
 6 | 
 7 | 	for (double i = 0; i < 10; i++)
 8 | 		oss << "ln_gamma(" << (i + 1) << ") = " << ln_gamma(i + 1) << endl;
 9 | 	
10 | 	for (double i = 0; i < 10; i++)
11 | 		oss << "ln_factorial(" << i << ") = " << ln_factorial(i) << endl;
12 | 	
13 | 	try {
14 | 		ln_gamma(0);
15 | 
16 | 		return false;
17 | 	} catch (const char *err) {
18 | 		oss << "\terr: " << err << endl;
19 | 	}
20 | 
21 | 	try {
22 | 		ln_factorial(-1);
23 | 
24 | 		return false;
25 | 	} catch (const char *err) {
26 | 		oss << "\terr: " << err << endl;
27 | 	}
28 | 
29 | 	return true;
30 | }
31 | 


--------------------------------------------------------------------------------
/testing/tensor.cpp:
--------------------------------------------------------------------------------
 1 | #include "global.hpp"
 2 | 
 3 | TEST(tensor_construction_and_memory)
 4 | {
 5 | 	using namespace zhetapi;
 6 | 
 7 | 	Tensor <double> t1;
 8 | 
 9 | 	oss << "Default: " << t1 << endl;
10 | 
11 | 	Tensor <double> t2({4, 5, 6});
12 | 
13 | 	oss << "Dimension constructor: " << t2 << endl;
14 | 
15 | 	return true;
16 | }
17 | 


--------------------------------------------------------------------------------
/testing/timers.cpp:
--------------------------------------------------------------------------------
 1 | #include "global.hpp"
 2 | 
 3 | ostream &operator<<(ostream &os, const bench &b)
 4 | {
 5 | 	tpoint tmp = clk.now();
 6 | 
 7 | 	double mcs = chrono::duration_cast
 8 | 		<chrono::microseconds>
 9 | 		(tmp - b.epoch).count();
10 | 	
11 | 	string unit = " µs";
12 | 
13 | 	if (mcs > 1e6) {
14 | 		mcs /= 1e6;
15 | 		unit = " s";
16 | 	} else if (mcs > 1e3) {
17 | 		mcs /= 1e3;
18 | 		unit = " ms";
19 | 	}
20 | 
21 | 	os << byellow << "[";
22 | 	os << mcs;
23 | 	os << unit;
24 | 	os << "]\t" << reset;
25 | 
26 | 	return os;
27 | }
28 | 


--------------------------------------------------------------------------------
/testing/vector.cpp:
--------------------------------------------------------------------------------
 1 | #include "global.hpp"
 2 | 
 3 | TEST(vector_construction_and_memory)
 4 | {
 5 | 	using namespace zhetapi;
 6 | 
 7 | 	Vector <double> tmp;
 8 | 
 9 | 	oss << "Default constructor: " << tmp << endl;
10 | 
11 | 	tmp = Vector <double> {1, 1, 1, 4, 6};
12 | 
13 | 	oss << "Initializer list constructor: " << tmp << endl;
14 | 
15 | 	vector <double> nums {1, 6, 3, 8, 1};
16 | 
17 | 	tmp = Vector <double> (nums);
18 | 
19 | 	oss << "Vector construction and assignment operator: " << tmp << endl;
20 | 
21 | 	tmp = Vector <double> (4, 6);
22 | 
23 | 	oss << "Size and default value constructor: " << tmp << endl;
24 | 
25 | 	/* double *arr = new double[10];
26 | 	for (size_t i = 0; i < 10; i++)
27 | 		arr[i] = pow(i, 3.14);
28 | 
29 | 	tmp = Vector <double> (10, arr);
30 | 
31 | 	oss << "Size and pointer constructor: " << tmp << endl;
32 | 
33 | 	tmp = Vector <double> (5, arr);
34 | 
35 | 	oss << "Cropped size and pointer constructor: " << tmp << endl;
36 | 
37 | 	// Free resources
38 | 	delete[] arr; */
39 | 
40 | 	return true;
41 | }
42 | 
43 | TEST(vector_operations)
44 | {
45 | 	using namespace zhetapi;
46 | 
47 | 	Vector <int> a = {1, 2, 3};
48 | 	Vector <int> b = {4, 5, 6};
49 | 	Vector <int> c = {1, 2, 3, 4, 5, 6};
50 | 
51 | 	Vector <int> cc = {
52 | 		1, 2, 3, 4, 5, 6,
53 | 		1, 2, 3, 4, 5, 6,
54 | 	};
55 | 
56 | 	oss << "a = " << a << endl;
57 | 	oss << "b = " << b << endl;
58 | 
59 | 	oss << concat(a, b) << endl;
60 | 	oss << c << endl;
61 | 
62 | 	if (c != concat(a, b)) {
63 | 		oss << "Concatenation is incorrect." << endl;
64 | 
65 | 		return false;
66 | 	} else {
67 | 		oss << "Concatenation passed." << endl;
68 | 	}
69 | 
70 | 	oss << "\na o b = " << concat(a, b) << endl;
71 | 	oss << "c = " << c << endl;
72 | 
73 | 	oss << cc << endl;
74 | 	oss << concat(a, b, c) << endl;
75 | 
76 | 	if (cc != concat(a, b, c)) {
77 | 		oss << "(Variadic) Concatenation is incorrect." << endl;
78 | 
79 | 		return false;
80 | 	} else {
81 | 		oss << "(Variadic) Concatenation passed." << endl;
82 | 	}
83 | 
84 | 	return true;
85 | }
86 | 


--------------------------------------------------------------------------------
/zhetapi.hpp:
--------------------------------------------------------------------------------
 1 | #include "include/fourier.hpp"
 2 | #include "include/linalg.hpp"
 3 | #include "include/matrix.hpp"
 4 | #include "include/polynomial.hpp"
 5 | #include "include/tensor.hpp"
 6 | #include "include/vector.hpp"
 7 | 
 8 | #include "include/std/calculus.hpp"
 9 | #include "include/std/functions.hpp"
10 | #include "include/std/interval.hpp"
11 | #include "include/std/activations.hpp"
12 | #include "include/std/erfs.hpp"
13 | 
14 | #include "include/core/kernels.hpp"


--------------------------------------------------------------------------------