├── .github └── workflows │ └── cmake.yml ├── .gitignore ├── .gitmodules ├── .gitpod.Dockerfile ├── .gitpod.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── experimental ├── benchmark.cpp ├── cuda.cu ├── experimental.cpp └── mnist.cpp ├── include ├── activation.hpp ├── allocator.hpp ├── autograd │ ├── activation.hpp │ ├── autograd.hpp │ ├── cpu_kernels.hpp │ ├── function.hpp │ ├── gradient_queue.hpp │ ├── iseq.hpp │ ├── ml.hpp │ ├── optimizer.hpp │ └── train.hpp ├── cast.hpp ├── common.hpp ├── complex.hpp ├── core │ ├── common.hpp │ └── kernels.hpp ├── counter.hpp ├── cuda │ ├── error.cuh │ ├── essentials.cuh │ ├── matrix.cuh │ ├── nvarena.cuh │ └── vector.cuh ├── dataset.hpp ├── display.hpp ├── dnn.hpp ├── dnnopt.hpp ├── engine.hpp ├── equation.hpp ├── erf.hpp ├── field.hpp ├── filter.hpp ├── fixed_vector.hpp ├── fourier.hpp ├── function.hpp ├── gnn.hpp ├── gradient.hpp ├── image.hpp ├── io │ └── print.hpp ├── layer.hpp ├── linalg.hpp ├── matrix.hpp ├── matrix_cpu.hpp ├── netnode.hpp ├── operand.hpp ├── optimizer.hpp ├── parametrization.hpp ├── polynomial.hpp ├── range.hpp ├── rational.hpp ├── registration.hpp ├── sparse.hpp ├── std │ ├── activation_derivatives.hpp │ ├── activations.hpp │ ├── algorithms.hpp │ ├── calculus.hpp │ ├── combinatorial.hpp │ ├── erf_derivatives.hpp │ ├── erfs.hpp │ ├── filters.hpp │ ├── functions.hpp │ ├── initializers.hpp │ ├── interval.hpp │ ├── loaders.hpp │ ├── numtheory.hpp │ └── optimizers.hpp ├── tensor.hpp ├── timer.hpp ├── token.hpp ├── training.hpp ├── vector.hpp └── vector_type.hpp ├── source ├── autograd │ ├── autograd.cpp │ ├── iseq.cpp │ └── ml.cpp ├── cuda │ └── nvarena.cu ├── image.cpp ├── io.cpp ├── linalg.cpp ├── polynomial.cpp ├── range.cpp ├── std │ ├── functions.cpp │ └── interval.cpp └── timer.cpp ├── testing ├── activation.cpp ├── calculus.cpp ├── fourier.cpp ├── global.hpp ├── interval.cpp ├── linalg.cpp ├── main.cpp ├── matrix.cpp ├── polynomial.cpp ├── printing.cpp ├── special.cpp ├── tensor.cpp ├── timers.cpp └── vector.cpp ├── zhetapi.hpp └── zhetapi_logo.svg /.github/workflows/cmake.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: [ "focused" ] 6 | pull_request: 7 | branches: [ "focused" ] 8 | 9 | env: 10 | # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) 11 | BUILD_TYPE: Release 12 | 13 | jobs: 14 | build: 15 | # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. 16 | # You can convert this to a matrix build if you need cross-platform coverage. 17 | # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | with: 23 | submodules: 'true' 24 | 25 | - name: Install dependencies 26 | run: sudo apt install libpng-dev 27 | 28 | - name: Compile Google benchmarks 29 | run: | 30 | cd ${{github.workspace}}/vendor/benchmark 31 | cmake -E make_directory "build" 32 | cmake -E chdir "build" cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release ../ 33 | cmake --build "build" --config Release 34 | sudo cmake --build "build" --config Release --target install 35 | cd ${{github.workspace}} 36 | 37 | - name: Configure CMake 38 | run: cmake -B ${{github.workspace}}/build -DZHETAPI_ENABLE_CUDA=OFF 39 | 40 | - name: Build 41 | # Build your program with the given configuration 42 | run: cmake --build ${{github.workspace}}/build 43 | 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.a 2 | *.dSYM 3 | *.import 4 | *.log 5 | *.ninja 6 | *.out 7 | *.output 8 | *.so 9 | *.sw* 10 | *.zhplib 11 | *ubyte 12 | *ubyte 13 | *~ 14 | .cache 15 | .env 16 | .ninja_deps 17 | .ninja_log 18 | .smake 19 | .vscode 20 | /.vs 21 | /Makefile 22 | CMakeCache.txt 23 | CMakeFiles 24 | __gen* 25 | __pycache__ 26 | bin 27 | build 28 | cmake_install.cmake 29 | compile_commands.json 30 | coverage.info 31 | data 32 | debug 33 | docs/_build 34 | docs/_static 35 | docs/_templates 36 | docs/html 37 | docs/latex 38 | docs/latex 39 | docs/xml 40 | engine/engine 41 | gcov-files 42 | gcov-out 43 | gen 44 | htests 45 | portability 46 | samples/rl/res 47 | tasks.json 48 | tmp 49 | vendor/benchmark 50 | zhetapi 51 | zhetapi-header 52 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vendor/indicators"] 2 | path = vendor/indicators 3 | url = https://github.com/p-ranav/indicators 4 | [submodule "vendor/jitify"] 5 | path = vendor/jitify 6 | url = https://github.com/NVIDIA/jitify 7 | [submodule "vendor/benchmark"] 8 | path = vendor/benchmark 9 | url = https://github.com/google/benchmark.git -------------------------------------------------------------------------------- /.gitpod.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gitpod/workspace-full 2 | 3 | USER root 4 | 5 | RUN apt-get -yq update \ 6 | && apt-get install -yq gcc-8 g++-8 \ 7 | && apt-get install -yq clang-8 \ 8 | && apt-get install -yq valgrind \ 9 | && apt-get install -yq libboost-all-dev \ 10 | && apt-get install -yq asciidoctor \ 11 | && apt-get install -yq libcurl4-gnutls-dev \ 12 | && apt-get install -yq doxygen \ 13 | && apt-get install -yq texlive-latex-base \ 14 | && apt-get install -yq texlive-fonts-recommended \ 15 | && apt-get install -yq texlive-fonts-extra \ 16 | && apt-get install -yq texlive-latex-extra \ 17 | && apt-get install -yq graphviz \ 18 | && apt-get install -yq clang-tidy-8 \ 19 | && apt-get install -yq lcov \ 20 | && apt-get install -yq ninja-build \ 21 | && apt-get install -yq libsfml-dev \ 22 | && pip install smake \ 23 | && apt-get clean \ 24 | && rm -rf /var/lib/apt/lists/* 25 | -------------------------------------------------------------------------------- /.gitpod.yml: -------------------------------------------------------------------------------- 1 | image: 2 | file: .gitpod.Dockerfile 3 | 4 | # List the ports you want to expose and what to do when they are served. See https://www.gitpod.io/docs/config-ports/ 5 | ports: 6 | - port: 3000 7 | onOpen: open-preview 8 | 9 | # List the start up tasks. You can start them in parallel in multiple terminals. See https://www.gitpod.io/docs/config-start-tasks/ 10 | tasks: 11 | - init: echo 'init script' # runs during prebuild 12 | command: echo 'start script' 13 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.12) 2 | 3 | # All projects 4 | project(zhetapi CXX) 5 | 6 | # Check existence of CUDA 7 | option(ZHETAPI_ENABLE_CUDA "Enable CUDA for Zhetapi" ON) 8 | 9 | if (ZHETAPI_ENABLE_CUDA) 10 | enable_language(CUDA) 11 | endif() 12 | 13 | # CXX options 14 | set(CMAKE_CXX_STANDARD 20) 15 | 16 | # Compiler (clang default) 17 | if (NOT CMAKE_CXX_COMPILER) 18 | set(CMAKE_CXX_COMPILER clang++) 19 | endif() 20 | 21 | # Color output 22 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always") 23 | 24 | # Build type (Release default) 25 | if (NOT CMAKE_BUILD_TYPE) 26 | set(CMAKE_BUILD_TYPE Release) 27 | endif() 28 | 29 | # Required packages 30 | find_package(PNG REQUIRED) 31 | find_package(benchmark REQUIRED) 32 | find_package(OpenMP REQUIRED) 33 | 34 | # Include directories 35 | include_directories( 36 | . 37 | vendor 38 | vendor/indicators/include 39 | vendor/benchmark/include 40 | ${PNG_INCLUDE_DIRS} 41 | ) 42 | 43 | # Zhetapi library sources 44 | set(Zhetapi_SOURCES 45 | source/autograd/autograd.cpp 46 | source/autograd/iseq.cpp 47 | source/autograd/ml.cpp 48 | source/image.cpp 49 | source/io.cpp 50 | source/linalg.cpp 51 | source/polynomial.cpp 52 | source/range.cpp 53 | source/std/functions.cpp 54 | source/std/interval.cpp 55 | source/timer.cpp 56 | ) 57 | 58 | # Common object library 59 | add_library(Zhetapi_COMMON OBJECT ${Zhetapi_SOURCES}) 60 | 61 | # Project executables 62 | add_executable(mnist experimental/mnist.cpp $) 63 | add_executable(experimental experimental/experimental.cpp $) 64 | 65 | if (ZHETAPI_ENABLE_CUDA) 66 | # add_executable(experimental_cuda experimental/cuda.cu ${Zhetapi_SOURCES}) 67 | endif() 68 | 69 | add_executable(benchmark experimental/benchmark.cpp $) 70 | 71 | set(ESSENTIAL_LIBS PNG::PNG OpenMP::OpenMP_CXX) 72 | 73 | target_link_libraries(mnist ${ESSENTIAL_LIBS}) 74 | target_link_libraries(experimental ${ESSENTIAL_LIBS}) 75 | # target_link_libraries(experimental_cuda ${ESSENTIAL_LIBS} cuda cudart nvrtc) 76 | target_link_libraries(benchmark benchmark::benchmark ${ESSENTIAL_LIBS}) 77 | 78 | if (ZHETAPI_ENABLE_CUDA) 79 | target_link_libraries(experimental ${ESSENTIAL_LIBS}) 80 | endif() 81 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Venkataram Edavamadathil Sivaram 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Zhetapi Logo](zhetapi_logo.svg) 2 | 3 | ![build badge](https://github.com/vedavamadathil/zhetapi/actions/workflows/cmake.yml/badge.svg) 4 | 5 | Zhetapi is a modern C++ machine learning and numerical analysis library with an 6 | emphasis on intuitive usage. 7 | 8 | # Linear Algebra 9 | 10 | Zhetapi provides a basic linear algebra interface using C++ 20 concepts. The 11 | `Field` structure enforces a contraint on types to ensure they behave like 12 | algebraic fields. As a result these structures support basic arithmetic along 13 | with other useful methods. 14 | 15 | For now, the following template classes are provided: `Tensor`, `Matrix`, 16 | `Vector`. 17 | 18 | # Auto Differentiation 19 | 20 | The auto differentiation facilities in Zhetapi belong in the `zhetapi::autograd` 21 | namespace. All operations that depend on autodiff use `float`s as the underlying 22 | type; in particular `Constant` is a `Tensor ` and is the basis of all 23 | numerical values in this module. 24 | 25 | To provide a seamless, operator based interface into the autodiff facilities, 26 | two notable classes are provided, `Variable` and `Function`. As one can expect, 27 | `Variable`s can store arbitrary `Constant` values, and `Function`s are 28 | compositions of `Variables` under varying operations. For example: 29 | 30 | ```cpp 31 | Function f = x + y; 32 | Function g = x * y; 33 | 34 | // f and g are now functions of *two* variables 35 | Constant a = f(1, 2); 36 | Constant b = g(1, 2); 37 | 38 | // Composition of functions is done likewise 39 | Function h = f(x, g(x, y)); // NOTE: h is still a function of two variables 40 | ``` 41 | 42 | ## Symbolic Differentiation 43 | 44 | ## Backward Pass 45 | 46 | Currently, only backward mode is enabled for autograd. 47 | 48 | # Building 49 | 50 | Zhetapi is primarily a header-only library, but for now there are some examples 51 | that one can play around with in the `experimental` directory. 52 | 53 | This project is developed using C++ 20. Additional dependenies include PNG 54 | (`libpng-dev` on Ubuntu systems), OpenMP (Optional) and CUDA (Optional). 55 | 56 | Generate the build configuration using CMake as follows: 57 | 58 | ``` 59 | $ cd zhetapi 60 | $ mkdir build && cd build 61 | $ cmake -DZHETAPI_ENABLE_CUDA= # ON by default 62 | ``` 63 | 64 | And build the targets as one would usually do (e.g. `make` or `ninja`). 65 | -------------------------------------------------------------------------------- /experimental/benchmark.cpp: -------------------------------------------------------------------------------- 1 | // Benchmark headers 2 | #include 3 | 4 | // Library headers 5 | #include "../include/autograd/autograd.hpp" 6 | #include "../include/autograd/ml.hpp" 7 | 8 | using namespace zhetapi; 9 | using namespace zhetapi::autograd; 10 | 11 | // Performance of a single dense layer 12 | static void dense_layer(benchmark::State &state) 13 | { 14 | Variable x; 15 | Function model = ml::dense(1000, 2000)(x); 16 | Constant in( 17 | {2000}, 18 | [](size_t i) { 19 | return 1.0f; 20 | } 21 | ); 22 | 23 | for (auto _ : state) 24 | model(in); 25 | } 26 | 27 | BENCHMARK(dense_layer)->Unit(benchmark::kMillisecond); 28 | 29 | // Performance of a deep dense network 30 | static void dense_network(benchmark::State &state) 31 | { 32 | Variable x; 33 | Function model = ml::dense(1000, 2000)(x); 34 | model = ml::dense(2000, 2000)(model); 35 | model = ml::dense(2000, 1000)(model); 36 | 37 | Constant in( 38 | {2000}, 39 | [](size_t i) { 40 | return 1.0f; 41 | } 42 | ); 43 | 44 | for (auto _ : state) 45 | model(in); 46 | } 47 | 48 | BENCHMARK(dense_network)->Unit(benchmark::kMillisecond); 49 | 50 | // Benchmarking matrix multiplication 51 | template 52 | Matrix simple_fma(const Matrix &a, const Matrix &b, const Matrix &c) 53 | { 54 | Matrix out = a * b; 55 | out += c; 56 | 57 | return out; 58 | } 59 | 60 | template 61 | void inline_fma(T *out, const T *matrix, const T *bias, const T *input, size_t rows, size_t cols) 62 | { 63 | for (size_t i = 0; i < rows; i++) { 64 | T sum = 0; 65 | 66 | for (size_t j = 0; j < cols; j++) 67 | sum += matrix[i * cols + j] * input[j]; 68 | 69 | out[i] = sum + bias[i]; 70 | } 71 | } 72 | 73 | template 74 | void parallel_fma(T *out, const T *matrix, const T *bias, const T *input, size_t rows, size_t cols) 75 | { 76 | #pragma omp parallel for 77 | for (long int i = 0; i < rows; i++) { 78 | T sum = 0; 79 | 80 | const T *c = &matrix[i * cols]; 81 | for (size_t j = 0; j < cols; j++) 82 | sum += c[i] * input[j]; 83 | 84 | out[i] = sum + bias[i]; 85 | } 86 | } 87 | 88 | static void matrix_multiply(benchmark::State &state) 89 | { 90 | Matrix w1 {1000, 2000, [](size_t i) { return 1.0f; }}; 91 | Matrix b1 {1000, 1, [](size_t i) { return 1.0f; }}; 92 | 93 | Matrix in {2000, 1, [](size_t i) { return 1.0f; }}; 94 | 95 | for (auto _ : state) 96 | simple_fma(w1, in, b1); 97 | } 98 | 99 | BENCHMARK(matrix_multiply)->Unit(benchmark::kMillisecond); 100 | 101 | static void matrix_multiply_inline_fma(benchmark::State &state) 102 | { 103 | Matrix w1 {1000, 2000, [](size_t i) { return 1.0f; }}; 104 | Matrix b1 {1000, 1, [](size_t i) { return 1.0f; }}; 105 | 106 | Matrix in {2000, 1, [](size_t i) { return 1.0f; }}; 107 | Matrix out {1000, 1, [](size_t i) { return 0.0f; }}; 108 | 109 | for (auto _ : state) 110 | inline_fma(out.data(), w1.data(), b1.data(), in.data(), 1000, 1000); 111 | } 112 | 113 | BENCHMARK(matrix_multiply_inline_fma)->Unit(benchmark::kMillisecond); 114 | 115 | static void matrix_multiply_parallel_fma(benchmark::State &state) 116 | { 117 | Matrix w1 {1000, 2000, [](size_t i) { return 1.0f; }}; 118 | Matrix b1 {1000, 1, [](size_t i) { return 1.0f; }}; 119 | 120 | Matrix in {2000, 1, [](size_t i) { return 1.0f; }}; 121 | Matrix out {1000, 1, [](size_t i) { return 0.0f; }}; 122 | 123 | for (auto _ : state) 124 | parallel_fma(out.data(), w1.data(), b1.data(), in.data(), 1000, 1000); 125 | } 126 | 127 | BENCHMARK(matrix_multiply_parallel_fma)->Unit(benchmark::kMillisecond); 128 | 129 | // Main 130 | BENCHMARK_MAIN(); 131 | -------------------------------------------------------------------------------- /experimental/cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "include/tensor.hpp" 4 | 5 | using namespace zhetapi; 6 | 7 | struct CuTensor { 8 | size_t dimensions; 9 | size_t *shape; 10 | float *array; // Borrows data from Tensor 11 | }; 12 | 13 | struct CuMatrix { 14 | size_t rows; 15 | size_t columns; 16 | float *array; // Borrows data from Tensor 17 | }; 18 | 19 | int main() 20 | { 21 | { 22 | Tensor a = Tensor ::ones({2, 2}); 23 | Tensor b = Tensor ::zeros({2, 2}); 24 | 25 | std::cout << "a:" << a << " => " << a.verbose() << std::endl; 26 | std::cout << "b:" << b << " => " << b.verbose() << std::endl; 27 | 28 | Tensor ::set_variant(eCUDA); 29 | 30 | Tensor c = a + b; 31 | 32 | std::cout << "c: " << c << " => " << c.verbose() << std::endl; 33 | 34 | // jitify and fill memory as such... 35 | Tensor d = Tensor (Tensor ::shape_type {2, 2}); 36 | std::cout << "d:" << d << " => " << d.verbose() << std::endl; 37 | 38 | Tensor e = Tensor (Tensor ::shape_type {2, 2}); 39 | std::cout << "e:" << d << " => " << d.verbose() << std::endl; 40 | 41 | c.copy(a); 42 | c.copy(d); 43 | 44 | d.copy(a); 45 | e.copy(d); 46 | 47 | std::cout << "\nc:" << c << " => " << c.verbose() << std::endl; 48 | std::cout << "d:" << d << " => " << d.verbose() << std::endl; 49 | std::cout << "e:" << e << " => " << e.verbose() << std::endl; 50 | 51 | // TODO: manual array copy 52 | 53 | detail::MemoryTracker::report(); 54 | } 55 | 56 | detail::MemoryTracker::report(); 57 | } 58 | -------------------------------------------------------------------------------- /experimental/experimental.cpp: -------------------------------------------------------------------------------- 1 | #include "include/autograd/ml.hpp" 2 | #include "include/autograd/activation.hpp" 3 | #include "include/autograd/gradient_queue.hpp" 4 | 5 | using namespace zhetapi; 6 | using namespace zhetapi::autograd; 7 | 8 | int main() 9 | { 10 | { 11 | Variable x; 12 | auto model = ml::dense(3, 3); 13 | 14 | Constant input { 15 | Constant::shape_type {3, 3, 3}, 16 | 1.0f 17 | }; 18 | 19 | std::cout << "input = " << input.verbose() << "\n"; 20 | std::cout << "model(input): = " << model(input).verbose() << "\n"; 21 | 22 | Constant igrad { Constant::shape_type {3, 3, 3}, 1.0f }; 23 | Gradient grads = model.gradient({input}, {igrad}); 24 | 25 | std::cout << "grads = " << grads.igrads[0].verbose() << "\n"; 26 | } 27 | 28 | detail::MemoryTracker::report(); 29 | } 30 | -------------------------------------------------------------------------------- /experimental/mnist.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | #include "../include/autograd/activation.hpp" 8 | #include "../include/autograd/autograd.hpp" 9 | #include "../include/autograd/ml.hpp" 10 | #include "../include/autograd/optimizer.hpp" 11 | #include "../include/autograd/train.hpp" 12 | #include "../include/common.hpp" 13 | 14 | using namespace zhetapi; 15 | using namespace zhetapi::autograd; 16 | 17 | // Files required 18 | static const std::map files { 19 | { 20 | "train-images-idx3-ubyte", 21 | "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz" 22 | }, 23 | 24 | { 25 | "train-labels-idx1-ubyte", 26 | "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz" 27 | }, 28 | 29 | { 30 | "t10k-images-idx3-ubyte", 31 | "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz" 32 | }, 33 | 34 | { 35 | "t10k-labels-idx1-ubyte", 36 | "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz" 37 | } 38 | }; 39 | 40 | // Check if file exists 41 | bool file_exists(const std::string &path) 42 | { 43 | struct stat buffer; 44 | return (stat(path.c_str(), &buffer) == 0); 45 | } 46 | 47 | #ifdef _OPENMP 48 | #define OPENMP_ENABLED 1 49 | #else 50 | #define OPENMP_ENABLED 0 51 | #endif 52 | 53 | int main() 54 | { 55 | const size_t TRAIN_IMAGES = 60000; 56 | const size_t VALIDATION_IMAGES = 100; 57 | const size_t DIMENSIONS = 784; 58 | 59 | std::cout << "Backend: CPU, OpenMP: " << OPENMP_ENABLED << std::endl; 60 | 61 | // TODO: try to use a single tensor for all data, then splice it 62 | const Constant::shape_type IMAGE_SHAPE = {DIMENSIONS}; 63 | const Constant::shape_type LABEL_SHAPE = {10}; 64 | 65 | // MSE function (5 inputs) 66 | Variable x; 67 | Variable y; 68 | 69 | /* auto loss = -1.0f * autograd::dot(autograd::log(x), y); 70 | auto dloss = (-1.0f * y/x).refactored(x, y); */ 71 | 72 | auto loss = square(length(x - y))/Constant {10}; 73 | auto dloss = 2 * (x - y)/Constant {10}; 74 | 75 | std::cout << "Loss:\n" << loss.summary() << std::endl; 76 | std::cout << "dLoss:\n" << dloss.summary() << std::endl; 77 | 78 | // Model 79 | auto model = ml::dense(DIMENSIONS, 30)(x); 80 | model = ml::sigmoid(model); 81 | model = ml::dense(30, 10)(model); 82 | model = ml::softmax(model); 83 | 84 | std::cout << "\nModel:\n" << model.summary() << std::endl; 85 | 86 | // Optimizer 87 | auto optimizer = ml::Adam(model.parameters(), 0.01); 88 | 89 | // First load the MNIST dataset 90 | system("mkdir -p data"); 91 | for (auto &file : files) { 92 | if (!file_exists("data/" + file.first)) { 93 | std::cout << "Downloading " << file.second << std::endl; 94 | system(("wget " + file.second).c_str()); 95 | system(("gunzip " + file.first + ".gz").c_str()); 96 | system(("mv " + file.first + " data/").c_str()); 97 | } else { 98 | std::cout << "Found " << file.first << std::endl; 99 | } 100 | } 101 | 102 | std::cout << "\nLoading MNIST dataset..." << std::endl; 103 | 104 | // Load the data 105 | ml::Data train_data; 106 | ml::Data validation_data; 107 | 108 | std::vector train_labels; 109 | std::vector validation_labels; 110 | 111 | std::ifstream f_train_images("data/train-images-idx3-ubyte"); 112 | std::ifstream f_validation_images("data/t10k-images-idx3-ubyte"); 113 | 114 | std::ifstream f_train_labels("data/train-labels-idx1-ubyte"); 115 | std::ifstream f_validation_labels("data/t10k-labels-idx1-ubyte"); 116 | 117 | // Read the headers 118 | char header[16]; 119 | 120 | f_train_images.read(header, 16); 121 | f_validation_images.read(header, 16); 122 | 123 | f_train_labels.read(header, 8); 124 | f_validation_labels.read(header, 8); 125 | 126 | // Read the data 127 | for (int i = 0; i < TRAIN_IMAGES; i++) { 128 | // Read the image 129 | unsigned char image[DIMENSIONS]; 130 | std::vector image_data; 131 | 132 | f_train_images.read((char *) image, DIMENSIONS); 133 | for (int j = 0; j < DIMENSIONS; j++) 134 | image_data.push_back(image[j]/255.0f); 135 | 136 | train_data.push_back({ 137 | Constant {IMAGE_SHAPE, image_data} 138 | }); 139 | 140 | // Read the label 141 | unsigned char label; 142 | f_train_labels.read((char *) &label, 1); 143 | 144 | train_labels.push_back( 145 | Constant {LABEL_SHAPE, 146 | [&](size_t i) { 147 | return i == label ? 1 : 0; 148 | } 149 | } 150 | ); 151 | } 152 | 153 | for (int i = 0; i < VALIDATION_IMAGES; i++) { 154 | // Read the image 155 | unsigned char image[DIMENSIONS]; 156 | std::vector image_data; 157 | 158 | f_validation_images.read((char *) image, DIMENSIONS); 159 | for (int j = 0; j < DIMENSIONS; j++) 160 | image_data.push_back(image[j]/255.0f); 161 | 162 | validation_data.push_back({ 163 | Constant {IMAGE_SHAPE, image_data} 164 | }); 165 | 166 | // Read the label 167 | unsigned char label; 168 | f_validation_labels.read((char *) &label, 1); 169 | 170 | validation_labels.push_back( 171 | Constant {LABEL_SHAPE, 172 | [&](size_t i) { 173 | return i == label ? 1 : 0; 174 | } 175 | } 176 | ); 177 | } 178 | 179 | // Validator 180 | auto validator = [](const Constant &a, const Constant &b) { 181 | int ai = argmax(a); 182 | int bi = argmax(b); 183 | return ai == bi; 184 | }; 185 | 186 | std::cout << "Training data loaded" << std::endl; 187 | std::cout << "\tcurrent accuracy: " << ml::accuracy(model, train_data, train_labels, validator) << "\n" << std::endl; 188 | std::cout << "\toutput on input 0 = " << model(train_data[0]) << std::endl; 189 | std::cout << "\tlabel on input 0 = " << train_labels[0] << std::endl; 190 | std::cout << "\tloss = " << loss(model(train_data[0]).flat(), train_labels[0]) << std::endl; 191 | std::cout << "\tmatch? " << validator(model(train_data[0]), train_labels[0]) << std::endl; 192 | 193 | auto training_suite = ml::TrainingSuite { 194 | .loss = loss, 195 | .dloss = dloss, 196 | .iterations = 100, 197 | .batch_size = 100, 198 | .reporter = std::make_shared (validation_data, validation_labels, validator) 199 | }; 200 | 201 | ml::fit(model, train_data, train_labels, optimizer, training_suite); 202 | 203 | std::cout << "\n\nTraining finished" << std::endl; 204 | std::cout << "\tcurrent accuracy: " << ml::accuracy(model, train_data, train_labels, validator) << std::endl; 205 | std::cout << "\toutput on input 0 = " << model(train_data[0]) << std::endl; 206 | std::cout << "\tlabel on input 0 = " << train_labels[0] << std::endl; 207 | std::cout << "\tmatch? " << validator(model(train_data[0]), train_labels[0]) << std::endl; 208 | 209 | // TODO: multithreaded training 210 | 211 | // TODO: some way to weight the gradients for each input (maybe by error) 212 | // TODO: learning rate scheduling 213 | // TODO: dropout and regularization 214 | // TODO: some method to propogate parameters through ftunctions, 215 | // ie. {"dropout", 0.5}, {"batch_norm", true} (a map for now) 216 | 217 | } 218 | -------------------------------------------------------------------------------- /include/allocator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ZHETAPI_ALLOCATOR_H_ 2 | #define ZHETAPI_ALLOCATOR_H_ 3 | 4 | // Standard headers 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | // Check CUDA availability 12 | #ifdef __CUDACC__ 13 | #define ZHETAPI_CUDA 1 14 | #else 15 | #define ZHETAPI_CUDA 0 16 | #endif 17 | 18 | namespace zhetapi { 19 | 20 | // Memory variant 21 | enum Variant { 22 | eCPU, 23 | eCUDA // TODO: only enable if CUDA is available 24 | }; 25 | 26 | namespace detail { 27 | 28 | // Memory allocation tracker 29 | // TODO: make thread safe 30 | class MemoryTracker { 31 | // TODO: pack into structs... 32 | long long int m_cpu_allocs = 0; 33 | long long int m_cpu_frees = 0; 34 | long long int m_cpu_inuse = 0; 35 | 36 | std::unordered_map m_cpu_map; 37 | 38 | long long int m_cuda_allocs = 0; 39 | long long int m_cuda_frees = 0; 40 | long long int m_cuda_inuse = 0; 41 | 42 | std::unordered_map m_cuda_map; 43 | 44 | // TODO: variant based allocation 45 | template 46 | T *alloc(size_t elements, Variant variant) { 47 | // TODO: option to throw or not 48 | if (elements == 0) 49 | throw std::runtime_error("Must allocate non-zero number of elements"); 50 | 51 | T *ptr = nullptr; 52 | if (variant == eCPU) { 53 | ptr = new T[elements]; 54 | 55 | m_cpu_allocs++; 56 | m_cpu_inuse += elements * sizeof(T); 57 | m_cpu_map[ptr] = elements * sizeof(T); 58 | } else if (variant == eCUDA) { 59 | if constexpr (!ZHETAPI_CUDA) 60 | throw std::runtime_error("CUDA is not available"); 61 | 62 | #ifdef __CUDACC__ 63 | cudaMalloc(&ptr, elements * sizeof(T)); 64 | 65 | m_cuda_allocs++; 66 | m_cuda_inuse += elements * sizeof(T); 67 | m_cuda_map[ptr] = elements * sizeof(T); 68 | #endif 69 | } 70 | 71 | return ptr; 72 | } 73 | 74 | template 75 | void deallocate(T *ptr, Variant variant) { 76 | if (variant == eCPU) { 77 | if (m_cpu_map.find(ptr) == m_cpu_map.end()) 78 | throw std::runtime_error("Attempt to free unallocated memory"); 79 | 80 | m_cpu_frees++; 81 | m_cpu_inuse -= m_cpu_map[ptr]; 82 | m_cpu_map.erase(ptr); 83 | 84 | delete[] ptr; 85 | } else if (variant == eCUDA) { 86 | if constexpr (!ZHETAPI_CUDA) 87 | throw std::runtime_error("CUDA is not available"); 88 | 89 | #ifdef __CUDACC__ 90 | if (m_cuda_map.find(ptr) == m_cuda_map.end()) 91 | throw std::runtime_error("Attempt to free unallocated memory"); 92 | 93 | m_cuda_frees++; 94 | m_cuda_inuse -= m_cuda_map[ptr]; 95 | m_cuda_map.erase(ptr); 96 | 97 | cudaFree(ptr); 98 | 99 | #endif 100 | } 101 | } 102 | 103 | template 104 | void copy(T *dst, T *src, size_t elements, Variant variant) { 105 | if (variant == eCPU) { 106 | if (m_cpu_map.find(dst) == m_cpu_map.end()) 107 | throw std::runtime_error("Attempt to copy to unallocated memory"); 108 | 109 | if (m_cpu_map.find(src) == m_cpu_map.end()) 110 | throw std::runtime_error("Attempt to copy from unallocated memory"); 111 | 112 | std::copy(src, src + elements, dst); 113 | } else if (variant == eCUDA) { 114 | if constexpr (!ZHETAPI_CUDA) 115 | throw std::runtime_error("CUDA is not available"); 116 | 117 | #ifdef __CUDACC__ 118 | if (m_cuda_map.find(dst) == m_cuda_map.end()) 119 | throw std::runtime_error("Attempt to copy to unallocated memory"); 120 | 121 | if (m_cuda_map.find(src) == m_cuda_map.end()) 122 | throw std::runtime_error("Attempt to copy from unallocated memory"); 123 | 124 | cudaMemcpy(dst, src, elements * sizeof(T), cudaMemcpyDeviceToDevice); 125 | #endif 126 | } 127 | } 128 | 129 | static MemoryTracker &one() { 130 | static MemoryTracker singleton; 131 | return singleton; 132 | } 133 | public: 134 | static void report() { 135 | MemoryTracker &t = one(); 136 | 137 | // TODO: table 138 | double MB = 1024.0 * 1024.0; 139 | std::cout << "Memory allocation report:" << std::endl; 140 | 141 | std::cout << "\tAllocations: " << t.m_cpu_allocs 142 | << ", Frees: " << t.m_cpu_frees 143 | << ", Net: " << t.m_cpu_allocs - t.m_cpu_frees << std::endl; 144 | std::cout << "\tIn use: " << t.m_cpu_inuse/MB << " MB" << std::endl; 145 | 146 | if constexpr (ZHETAPI_CUDA) { 147 | std::cout << "\n\tCUDA Allocations: " << t.m_cuda_allocs 148 | << ", CUDA Frees: " << t.m_cuda_frees 149 | << ", Net: " << t.m_cuda_allocs - t.m_cuda_frees << std::endl; 150 | std::cout << "\tCUDA In use: " << t.m_cuda_inuse/MB << " MB" << std::endl; 151 | } 152 | } 153 | 154 | template 155 | friend T *allocate(size_t, Variant); 156 | 157 | template 158 | friend void deallocate(T *, Variant); 159 | 160 | template 161 | friend void copy(const std::shared_ptr &, 162 | const std::shared_ptr &, 163 | size_t, Variant); 164 | }; 165 | 166 | template 167 | T *allocate(size_t n, Variant variant) 168 | { 169 | return MemoryTracker::one().alloc (n, variant); 170 | } 171 | 172 | template 173 | void deallocate(T *ptr, Variant variant) 174 | { 175 | MemoryTracker::one().deallocate(ptr, variant); 176 | } 177 | 178 | template 179 | std::shared_ptr make_shared_array(size_t elements, Variant variant) 180 | { 181 | return std::shared_ptr ( 182 | allocate (elements, variant), 183 | [variant](T *ptr) { 184 | deallocate(ptr, variant); 185 | } 186 | ); 187 | } 188 | 189 | template 190 | void copy(const std::shared_ptr &dst, 191 | const std::shared_ptr &src, 192 | size_t elements, Variant variant) 193 | { 194 | MemoryTracker::one().copy(dst.get(), src.get(), elements, variant); 195 | } 196 | 197 | } 198 | 199 | } 200 | 201 | #endif 202 | -------------------------------------------------------------------------------- /include/autograd/activation.hpp: -------------------------------------------------------------------------------- 1 | #include "autograd.hpp" 2 | #include "function.hpp" 3 | #include "iseq.hpp" 4 | 5 | namespace zhetapi { 6 | 7 | namespace autograd { 8 | 9 | namespace ml { 10 | 11 | // RELU activation function 12 | class _relu : public ISeq { 13 | public: 14 | struct kernel : public _function { 15 | kernel() : _function(1) {} 16 | 17 | Constant compute(const Input &ins) override { 18 | return ins[0].copy().transform( 19 | [](float x) { 20 | return x > 0 ? x : 0; 21 | } 22 | ); 23 | } 24 | 25 | Gradient gradient(const Input &ins, const Input &igrads) override { 26 | Constant out = Constant(igrads[0].shape(), 27 | [&](size_t i) { 28 | float x = ins[0].get(i); 29 | return (x > 0 ? 1 : 0) * igrads[0].get(i); 30 | } 31 | ); 32 | 33 | return Gradient { 34 | .igrads = {out} 35 | }; 36 | } 37 | 38 | std::string summary() const override { 39 | return "RELU"; 40 | } 41 | }; 42 | 43 | _relu() : ISeq(new_ftn_ (), 1) {} 44 | }; 45 | 46 | inline Function relu(const Function &function) 47 | { 48 | return (new_ <_relu> ())(function); 49 | } 50 | 51 | // Leaky RELU activation function 52 | class _leaky_relu : public ISeq { 53 | public: 54 | struct kernel : public _function { 55 | float _alpha; 56 | 57 | kernel(float alpha) : _function(1), _alpha(alpha) {} 58 | 59 | Constant compute(const Input &ins) override { 60 | return ins[0].copy().transform( 61 | [this](float x) { 62 | return x > 0 ? x : _alpha * x; 63 | } 64 | ); 65 | } 66 | 67 | Gradient gradient(const Input &ins, const Input &igrads) override { 68 | Constant out = Constant(igrads[0].shape(), 69 | [&](size_t i) { 70 | float x = ins[0].get(i); 71 | return (x > 0 ? 1 : _alpha) * igrads[0].get(i); 72 | } 73 | ); 74 | 75 | return Gradient { 76 | .igrads = {out} 77 | }; 78 | } 79 | 80 | std::string summary() const override { 81 | return "LEAKY RELU"; 82 | } 83 | }; 84 | 85 | _leaky_relu(float alpha) : ISeq(new_ftn_ (alpha), 1) {} 86 | }; 87 | 88 | inline Function leaky_relu(float alpha) 89 | { 90 | return new_ <_leaky_relu> (alpha); 91 | } 92 | 93 | // Sigmoid activation function 94 | class _sigmoid : public ISeq { 95 | public: 96 | struct kernel : public _function { 97 | kernel() : _function(1) {} 98 | 99 | Constant compute(const Input &ins) override { 100 | return ins[0].copy().transform( 101 | [](float x) { 102 | return 1 / (1 + std::exp(-x)); 103 | } 104 | ); 105 | } 106 | 107 | Gradient gradient(const Input &ins, const Input &igrads) override { 108 | Constant out = Constant(igrads[0].shape(), 109 | [&](size_t i) { 110 | float x = ins[0].get(i); 111 | float y = 1 / (1 + std::exp(-x)); 112 | return y * (1 - y) * igrads[0].get(i); 113 | } 114 | ); 115 | 116 | return Gradient { 117 | .igrads = {out} 118 | }; 119 | } 120 | 121 | std::string summary() const override { 122 | return "SIGMOID"; 123 | } 124 | }; 125 | 126 | _sigmoid() : ISeq(new_ftn_ (), 1) {} 127 | }; 128 | 129 | inline Function sigmoid(const Function &function) 130 | { 131 | return (new_ <_sigmoid> ())(function); 132 | } 133 | 134 | // Tanh activation function 135 | class _tanh : public ISeq { 136 | public: 137 | struct kernel : public _function { 138 | kernel() : _function(1) {} 139 | 140 | Constant compute(const Input &ins) override { 141 | return ins[0].copy().transform( 142 | [](float x) { 143 | return std::tanh(x); 144 | } 145 | ); 146 | } 147 | 148 | Gradient gradient(const Input &ins, const Input &igrads) override { 149 | Constant out = Constant(igrads[0].shape(), 150 | [&](size_t i) { 151 | float x = ins[0].get(i); 152 | float y = std::tanh(x); 153 | return (1 - y * y) * igrads[0].get(i); 154 | } 155 | ); 156 | 157 | return Gradient { 158 | .igrads = {out} 159 | }; 160 | } 161 | 162 | std::string summary() const override { 163 | return "TANH"; 164 | } 165 | }; 166 | 167 | _tanh() : ISeq(new_ftn_ (), 1) {} 168 | }; 169 | 170 | inline Function tanh(const Function &function) 171 | { 172 | return (new_ <_tanh> ())(function); 173 | } 174 | 175 | // Softmax activation function 176 | class _softmax : public ISeq { 177 | public: 178 | struct kernel : public _function { 179 | kernel() : _function(1) {} 180 | 181 | Constant compute(const Input &ins) override { 182 | auto o = ins[0].copy(); 183 | 184 | float omax = max(o); 185 | o -= omax; 186 | 187 | float osum = sum(o, expf); 188 | return o.transform( 189 | [osum](float x) { 190 | return std::exp(x)/osum; 191 | } 192 | ); 193 | } 194 | 195 | Gradient gradient(const Input &ins, const Input &igrads) override { 196 | auto o = ins[0].copy(); 197 | 198 | auto omax = max(o); 199 | o -= omax; 200 | 201 | auto osum = sum(o, expf); 202 | 203 | Constant out({o.size()}, 204 | [&](size_t i) { 205 | float x = o.get(i); 206 | float e = std::exp(x); 207 | float t = e * (osum - e) / (osum * osum); 208 | return t * igrads[0].get(i); 209 | } 210 | ); 211 | 212 | return Gradient { 213 | .igrads = {out} 214 | }; 215 | } 216 | 217 | std::string summary() const override { 218 | return "SOFTMAX"; 219 | } 220 | }; 221 | 222 | _softmax() : ISeq(new_ftn_ (), 1) {} 223 | }; 224 | 225 | inline Function softmax(const Function &function) 226 | { 227 | return (new_ <_softmax> ())(function); 228 | } 229 | 230 | } 231 | 232 | } 233 | 234 | } 235 | -------------------------------------------------------------------------------- /include/autograd/cpu_kernels.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ZHETAPI_AUTOGRAD_CPU_KERNELS_H_ 2 | #define ZHETAPI_AUTOGRAD_CPU_KERNELS_H_ 3 | 4 | // Standard headers 5 | #include 6 | 7 | namespace zhetapi { 8 | 9 | namespace detail { 10 | 11 | namespace autograd { 12 | 13 | // TODO: put in source file 14 | inline void fma_matrix_vector(float *out, const float *matrix, const float *bias, const float *input, size_t rows, size_t cols) 15 | { 16 | #pragma omp parallel for 17 | for (long int i = 0; i < rows; i++) { 18 | float sum = 0; 19 | 20 | for (long int j = 0; j < cols; j++) 21 | sum += matrix[i * cols + j] * input[j]; 22 | 23 | out[i] = sum + bias[i]; 24 | } 25 | } 26 | 27 | inline void mul_vector_vector_transpose(float *out, const float *a, const float *b, size_t na, size_t nb) 28 | { 29 | #pragma omp parallel for 30 | for (long int i = 0; i < na; i++) { 31 | for (long int j = 0; j < nb; j++) 32 | out[i * nb + j] = a[i] * b[j]; 33 | } 34 | } 35 | 36 | inline void mul_matrix_transpose_vector(float *out, const float *matrix, const float *vector, size_t na, size_t nb) 37 | { 38 | #pragma omp parallel for 39 | for (long int i = 0; i < na; i++) { 40 | float sum = 0; 41 | 42 | for (long int j = 0; j < nb; j++) 43 | sum += matrix[i + j * na] * vector[j]; 44 | 45 | out[i] = sum; 46 | } 47 | } 48 | 49 | } 50 | 51 | } 52 | 53 | } 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /include/autograd/gradient_queue.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ZHETAPI_AUTOGRAD_GQ_H_ 2 | #define ZHETAPI_AUTOGRAD_GQ_H_ 3 | 4 | // Standard headers 5 | #include 6 | 7 | // Library headers 8 | #include "../tensor.hpp" 9 | 10 | namespace zhetapi { 11 | 12 | namespace autograd { 13 | 14 | // Constants are just tensors 15 | using Constant = Tensor ; 16 | 17 | // GradientQueue class is a deque with extra operations 18 | class GradientQueue : public std::deque { 19 | public: 20 | // Constructors 21 | GradientQueue() = default; 22 | 23 | // Initializer list 24 | GradientQueue(std::initializer_list l) 25 | : std::deque (l) {} 26 | 27 | // Arithematic operations 28 | GradientQueue &operator+=(const GradientQueue &rhs) { 29 | assert(size() == rhs.size()); 30 | for (size_t i = 0; i < size(); i++) 31 | at(i) += rhs[i]; 32 | return *this; 33 | } 34 | 35 | GradientQueue &operator-=(const GradientQueue &rhs) { 36 | assert(size() == rhs.size()); 37 | for (size_t i = 0; i < size(); i++) 38 | at(i) -= rhs[i]; 39 | return *this; 40 | } 41 | 42 | GradientQueue &operator*=(const GradientQueue &rhs) { 43 | assert(size() == rhs.size()); 44 | for (size_t i = 0; i < size(); i++) 45 | at(i) *= rhs[i]; 46 | return *this; 47 | } 48 | 49 | GradientQueue &operator/=(const GradientQueue &rhs) { 50 | assert(size() == rhs.size()); 51 | for (size_t i = 0; i < size(); i++) 52 | at(i) /= rhs[i]; 53 | return *this; 54 | } 55 | 56 | // Single constant operations 57 | GradientQueue &operator+=(const Constant &rhs) { 58 | for (auto &x : *this) 59 | x += rhs; 60 | return *this; 61 | } 62 | 63 | GradientQueue &operator-=(const Constant &rhs) { 64 | for (auto &x : *this) 65 | x -= rhs; 66 | return *this; 67 | } 68 | 69 | GradientQueue &operator*=(const Constant &rhs) { 70 | for (auto &x : *this) 71 | x *= rhs; 72 | return *this; 73 | } 74 | 75 | GradientQueue &operator/=(const Constant &rhs) { 76 | for (auto &x : *this) 77 | x /= rhs; 78 | return *this; 79 | } 80 | }; 81 | 82 | // More operators 83 | // TODO: source file 84 | inline GradientQueue operator*(const GradientQueue &lhs, const float &rhs) 85 | { 86 | GradientQueue gq = lhs; 87 | gq *= rhs; 88 | return gq; 89 | } 90 | 91 | inline GradientQueue operator*(const float &lhs, const GradientQueue &rhs) 92 | { 93 | GradientQueue gq = rhs; 94 | gq *= lhs; 95 | return gq; 96 | } 97 | 98 | inline GradientQueue operator/(const GradientQueue &lhs, const float &rhs) 99 | { 100 | GradientQueue gq = lhs; 101 | gq /= rhs; 102 | return gq; 103 | } 104 | 105 | inline GradientQueue operator/(const float &lhs, const GradientQueue &rhs) 106 | { 107 | GradientQueue gq = rhs; 108 | gq /= lhs; 109 | return gq; 110 | } 111 | 112 | } 113 | 114 | } 115 | 116 | #endif 117 | -------------------------------------------------------------------------------- /include/autograd/iseq.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ZHETAPI_ISEQ_H_ 2 | #define ZHETAPI_ISEQ_H_ 3 | 4 | // Standard headers 5 | #include 6 | #include 7 | #include 8 | 9 | // Library jeaders 10 | #include "function.hpp" 11 | 12 | #include "../io/print.hpp" 13 | 14 | namespace zhetapi { 15 | 16 | namespace autograd { 17 | 18 | struct _node; 19 | 20 | // Cache structure for automatic differentiation 21 | using Cache = std::unordered_map <_function *, _function::Input>; 22 | using Node = std::shared_ptr <_node>; 23 | 24 | // Tree structure 25 | struct _node { 26 | _function::Ptr fptr; 27 | 28 | // TODO: should be a vector of plain nodes 29 | std::vector children; 30 | 31 | // Constructors 32 | _node(const _function::Ptr &); 33 | _node(const _function::Ptr &, const std::vector &); 34 | 35 | static Node make(const _function::Ptr &); 36 | static Node make(const _function::Ptr &, const std::vector &); 37 | 38 | // Printing the tree 39 | std::string str(int = 0) const; 40 | }; 41 | 42 | // Instruction sequence for a function 43 | class ISeq : public _function { 44 | public: 45 | // Public aliases 46 | using Instructions = std::vector <_function::Ptr>; 47 | using ConstantCache = std::vector ; 48 | private: 49 | // Private aliases 50 | using Var = std::shared_ptr <_variable>; 51 | using Variables = std::vector ; 52 | 53 | // Information about cache usage, for optimization 54 | struct _cache_info { 55 | int refs = 0; 56 | Node value; 57 | 58 | // Constructor 59 | _cache_info(); 60 | _cache_info(int, const Node &); 61 | }; 62 | 63 | using _cache_map = std::unordered_map ; 64 | 65 | // Reindexing map 66 | using _reindex_map = std::unordered_map ; 67 | 68 | // TODO: JIT function to compile into object code 69 | // this should be possible since the types 70 | // are homogenous 71 | 72 | // Member variables 73 | Instructions _instrs; // Instruction sequence 74 | Variables _vars; // Variables 75 | ConstantCache _consts; // Fixed constants 76 | mutable ConstantCache _cache; // More cache for flow of execution 77 | 78 | // TODO: should really be finput = nullptr 79 | // TODO: actually cached input is currently obsolete 80 | Input _cached_in; 81 | Cache _cached_finputs; 82 | 83 | // TODO: cache tree? 84 | 85 | // Append helpers 86 | void append_variable(const _variable *); 87 | void append_iseq(const ISeq *const); 88 | int index_of(const _variable *) const; 89 | void _append_function(const Ptr &); 90 | 91 | template 92 | void _append(const _function::Ptr &, Args ...); 93 | 94 | // Computation helpers 95 | void _load(const Input &); 96 | void storec(std::stack &, int) const; 97 | bool _ispec(const Ptr &, std::stack &); 98 | void _exec(const Ptr &, std::stack &); 99 | 100 | // Composing functions and variables 101 | _function::Ptr _compose(const Compositions &) const override; 102 | 103 | // Tree building and rebuilding 104 | Node _tree(_cache_map &) const; 105 | void _tree_walk(const Ptr &, std::stack &, 106 | _cache_map &) const; 107 | void _rebuild(const Node &, Instructions &, 108 | ConstantCache &, _cache_map &, 109 | const ConstantCache &) const; 110 | 111 | // Optimization functions 112 | void _optimize(); 113 | _reindex_map _generate_reindex_map() const; 114 | 115 | // TODO: remove const 116 | // Differentiation functions 117 | friend Node _diff_tree(const Node &, int); 118 | 119 | _function::Ptr diff(const int) const override; 120 | protected: 121 | // Protected constructors 122 | ISeq(const _function::Ptr &, int); 123 | ISeq(const std::vector <_function::Ptr> &, 124 | std::vector , int); 125 | ISeq(const std::vector <_function::Ptr> &, 126 | std::vector , int, 127 | const _reindex_map &); 128 | 129 | std::pair <_function *, const MethodTable &> method_table() override; 130 | public: 131 | // TODO: check function to make sure only 132 | // one element remains on the stack 133 | 134 | // Empty constructor 135 | ISeq(); 136 | 137 | // Get a variable 138 | // TODO: protected? 139 | const Var &get(int) const; 140 | 141 | // Append a sequence of instructions 142 | template 143 | void append(Args ...); 144 | 145 | // Evaluate the sequence 146 | Constant compute(const Input &) override; 147 | 148 | // Evaluate gradient 149 | Gradient gradient(const Input &, const Input &) override; 150 | 151 | // Apply gradients 152 | void update_parameters(GradientQueue &) override; 153 | 154 | // Permute the order of variables 155 | void refactor(const std::vector &); 156 | _function::Ptr refactor(const std::vector &) const; 157 | 158 | // Info about parameters 159 | int parameters() const override; 160 | int tunable_parameters() const override; 161 | 162 | // Dump instructions for debugging 163 | std::string summary() const override; 164 | }; 165 | 166 | // Append a sequence of instructions 167 | template 168 | void ISeq::_append(const _function::Ptr &fptr, Args ... args) 169 | { 170 | _append_function(fptr); 171 | if constexpr (sizeof ... (args) > 0) 172 | _append(args ...); 173 | } 174 | 175 | template 176 | void ISeq::append(Args ... args) 177 | { 178 | // Append all, then optimize 179 | _append(args...); 180 | _optimize(); 181 | } 182 | 183 | 184 | } 185 | 186 | } 187 | 188 | #endif 189 | -------------------------------------------------------------------------------- /include/autograd/ml.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ZHETAPI_AUTOGRAD_ML_H_ 2 | #define ZHETAPI_AUTOGRAD_ML_H_ 3 | 4 | // Standard headers 5 | #include 6 | #include 7 | 8 | // Library headers 9 | #include "../matrix.hpp" 10 | #include "../vector.hpp" 11 | #include "../std/interval.hpp" 12 | #include "autograd.hpp" 13 | #include "cpu_kernels.hpp" 14 | 15 | namespace zhetapi { 16 | 17 | namespace autograd { 18 | 19 | namespace ml { 20 | 21 | class _kdense : public _function { 22 | // Input and output shape 23 | size_t m_isize; 24 | size_t m_osize; 25 | std::string m_init; 26 | float m_dropout; 27 | 28 | // Weight matrix 29 | Matrix m_weights; 30 | 31 | // Bias 32 | Vector m_biases; 33 | 34 | // Cached resources 35 | Vector m_output; 36 | 37 | // Static random number generator 38 | static utility::Interval <1> rng; 39 | public: 40 | _kdense(size_t isize, size_t osize, const std::string &initializer = "xavier") 41 | : _function(1), m_isize(isize), m_osize(osize), m_output(osize) 42 | { 43 | // Lower case initializer 44 | for (auto &c : initializer) 45 | m_init += std::tolower(c); 46 | 47 | // Initializer 48 | std::function lambda = [](size_t) { return rng(); }; 49 | 50 | std::random_device rd; 51 | std::mt19937 gen(rd()); 52 | 53 | std::normal_distribution dist; 54 | 55 | int normal = 0; 56 | if (m_init == "lecun") { 57 | dist = std::normal_distribution (0, 1.0 / std::sqrt(isize)); 58 | normal++; 59 | } else if (m_init == "he") { 60 | dist = std::normal_distribution (0, 2.0/std::sqrt(isize)); 61 | normal++; 62 | } else if (m_init == "xavier") { 63 | float avg = (isize + osize) / 2.0f; 64 | dist = std::normal_distribution (0, 1.0/std::sqrt(avg)); 65 | normal++; 66 | } 67 | 68 | if (normal) 69 | lambda = [&](size_t i) { return dist(gen); }; 70 | else if (m_init == "debug") 71 | lambda = [&](size_t i) { return 1.0f; }; 72 | else 73 | lambda = [&](size_t i) { return 0.0f; }; 74 | 75 | m_weights = Matrix (m_osize, m_isize, lambda); 76 | m_biases = Vector (m_osize, lambda); 77 | } 78 | 79 | // Forward pass 80 | Constant compute(const Input &ins) override { 81 | // NOTE: Single input only 82 | // TODO: check if batching... 83 | // Convert first argument into a matrix 84 | detail::autograd::fma_matrix_vector( 85 | m_output.data(), m_weights.data(), 86 | m_biases.data(), ins[0].data(), 87 | m_osize, m_isize 88 | ); 89 | 90 | return m_output; 91 | } 92 | 93 | // Machine learning functions 94 | virtual Gradient gradient(const Input &ins, const Input &igrads) override { 95 | // igrad is the gradient of the output of the 96 | // function wrt to the desired function 97 | Vector igrad(m_isize); 98 | Matrix wgrad(m_osize, m_isize); 99 | Vector bgrad(m_osize); 100 | 101 | detail::autograd::mul_vector_vector_transpose( 102 | wgrad.data(), igrads[0].data(), ins[0].data(), 103 | m_osize, m_isize 104 | ); 105 | 106 | // TODO: Copy and computation in parallel? 107 | detail::autograd::mul_matrix_transpose_vector( 108 | igrad.data(), m_weights.data(), igrads[0].data(), 109 | m_isize, m_osize 110 | ); 111 | 112 | bgrad.copy(igrads[0]); 113 | 114 | // TODO: avoid the need to copy... reduce required allocations 115 | // Debug copy issues when using persistent gradient storage... 116 | Gradient gradient; 117 | gradient.igrads = { igrad }; 118 | gradient.grads = { wgrad, bgrad }; 119 | return gradient; 120 | } 121 | 122 | // Apply gradient 123 | virtual void update_parameters(GradientQueue &grads) override { 124 | // Convert first argument into a matrix 125 | Vector bgrad(grads.back()); 126 | grads.pop_back(); 127 | 128 | Matrix wgrad(grads.back(), m_osize, m_isize); 129 | grads.pop_back(); 130 | 131 | m_weights += wgrad; 132 | m_biases += bgrad; 133 | } 134 | 135 | // Info about parameters 136 | virtual int parameters() const override { 137 | return 2; 138 | } 139 | 140 | virtual int tunable_parameters() const override { 141 | return m_weights.size() + m_biases.size(); 142 | } 143 | 144 | // Method table 145 | std::pair <_function *, const MethodTable &> method_table() override { 146 | static const MethodTable _map { 147 | {"dropout", [](_function *f, const Arguments &args) { 148 | _kdense *kf = dynamic_cast <_kdense *> (f); 149 | 150 | assert(kf); 151 | if (args.size() > 0) 152 | kf->m_dropout = std::get (args[0]); 153 | 154 | return kf->m_dropout; 155 | }} 156 | }; 157 | 158 | return {this, _map}; 159 | } 160 | 161 | // Summary of the function 162 | std::string summary() const override { 163 | std::ostringstream oss; 164 | oss << "DENSE(" << m_isize << " x " << m_osize; 165 | if (m_dropout > 0) 166 | oss << ", dropout = " << std::setprecision(2) << m_dropout; 167 | oss << ", " << m_init << ")"; 168 | return oss.str(); 169 | } 170 | }; 171 | 172 | class _dense : public ISeq { 173 | public: 174 | _dense(size_t isize, size_t osize, const std::string &initializer = "xavier") 175 | : ISeq(new_ftn_ <_kdense> (isize, osize, initializer), 1) {} 176 | }; 177 | 178 | // Dense layer factory 179 | inline Function dense(size_t isize, size_t osize, const std::string &initializer = "xavier") 180 | { 181 | return Function(new_ftn_ <_dense> (isize, osize, initializer)); 182 | } 183 | 184 | } 185 | 186 | } 187 | 188 | } 189 | 190 | #endif 191 | -------------------------------------------------------------------------------- /include/autograd/optimizer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ZHETAPI_AUTOGRAD_OPTIMIZER_H_ 2 | #define ZHETAPI_AUTOGRAD_OPTIMIZER_H_ 3 | 4 | #include "function.hpp" 5 | 6 | namespace zhetapi { 7 | 8 | namespace autograd { 9 | 10 | namespace ml { 11 | 12 | // Interface for optimizers 13 | class _optimizer { 14 | protected: 15 | size_t _parameters = 0; 16 | public: 17 | float alpha; 18 | 19 | _optimizer(size_t parameters, float alpha_ = 0.001) 20 | : _parameters(parameters), alpha(alpha_) {} 21 | 22 | // Computation 23 | virtual void optimize(GradientQueue &) = 0; 24 | 25 | void operator()(GradientQueue &gq) { 26 | optimize(gq); 27 | } 28 | }; 29 | 30 | // Stochastic gradient descent 31 | struct SGD : public _optimizer { 32 | SGD(size_t parameters, float alpha) 33 | : _optimizer(parameters, alpha) {} 34 | 35 | // Optimize 36 | void optimize(GradientQueue &gq) override { 37 | for (auto &g : gq) 38 | g *= -alpha; 39 | } 40 | }; 41 | 42 | // Momentum 43 | class Momentum : public _optimizer { 44 | GradientQueue _v; 45 | public: 46 | float mu; 47 | 48 | Momentum(size_t parameters, float alpha, float mu_ = 0.9) 49 | : _optimizer(parameters, alpha), mu(mu_) { 50 | _v.resize(parameters); 51 | } 52 | 53 | // Optimize 54 | void optimize(GradientQueue &gq) override { 55 | // TODO: assert that gq.size() == _v.size() 56 | for (size_t i = 0; i < _parameters; i++) { 57 | if (_v[i].shape() != gq[i].shape()) 58 | _v[i] = Constant(gq[i].shape(), 0); 59 | 60 | _v[i] = mu * _v[i] - alpha * gq[i]; 61 | gq[i] = _v[i]; 62 | } 63 | } 64 | }; 65 | 66 | // RMSprop 67 | class RMSprop : public _optimizer { 68 | GradientQueue _v; 69 | public: 70 | float beta; 71 | 72 | RMSprop(size_t parameters, float alpha, float beta_ = 0.9) 73 | : _optimizer(parameters, alpha), beta(beta_) { 74 | _v.resize(parameters); 75 | } 76 | 77 | // Optimize 78 | void optimize(GradientQueue &gq) override { 79 | // TODO: assert that gq.size() == _v.size() 80 | for (size_t i = 0; i < _parameters; i++) { 81 | if (_v[i].shape() != gq[i].shape()) 82 | _v[i] = Constant(gq[i].shape(), 0); 83 | 84 | _v[i] = beta * _v[i] + (1 - beta) * gq[i] * gq[i]; 85 | gq[i] = -alpha * gq[i] / _v[i].transform( 86 | [](float x) { 87 | return std::sqrt(x) + 1e-10; 88 | } 89 | ); 90 | } 91 | } 92 | }; 93 | 94 | // Adam 95 | class Adam : public _optimizer { 96 | GradientQueue _v, _m; 97 | size_t _iter = 1; 98 | public: 99 | float beta1, beta2; 100 | 101 | Adam(size_t parameters, float alpha, float beta1_ = 0.9, float beta2_ = 0.999) 102 | : _optimizer(parameters, alpha), beta1(beta1_), beta2(beta2_) { 103 | _v.resize(parameters); 104 | _m.resize(parameters); 105 | } 106 | 107 | // Reset iteration 108 | void reset() { 109 | _iter = 1; 110 | } 111 | 112 | // Optimize 113 | void optimize(GradientQueue &gq) override { 114 | // TODO: assert that gq.size() == _v.size() 115 | for (size_t i = 0; i < _parameters; i++) { 116 | if (_v[i].shape() != gq[i].shape()) 117 | _v[i] = Constant(gq[i].shape(), 0); 118 | if (_m[i].shape() != gq[i].shape()) 119 | _m[i] = Constant(gq[i].shape(), 0); 120 | 121 | _v[i] = beta1 * _v[i] - (1 - beta1) * gq[i]; 122 | _m[i] = beta2 * _m[i] + (1 - beta2) * gq[i] * gq[i]; 123 | 124 | auto _vh = _v[i]/float(1 - std::pow(beta1, _iter)); 125 | auto _mh = _m[i]/float(1 - std::pow(beta2, _iter)); 126 | 127 | gq[i] = alpha * _vh / _mh.transform( 128 | [](float x) { 129 | return std::sqrt(x) + 1e-10; 130 | } 131 | ); 132 | } 133 | 134 | _iter++; 135 | } 136 | }; 137 | 138 | } 139 | 140 | } 141 | 142 | } 143 | 144 | #endif 145 | -------------------------------------------------------------------------------- /include/autograd/train.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ZHETAPI_AUTOGRAD_TRAIN_H_ 2 | #define ZHETAPI_AUTOGRAD_TRAIN_H_ 3 | 4 | // Standard headers 5 | #include 6 | 7 | // Extra headers 8 | #include 9 | 10 | // Library headers 11 | #include "function.hpp" 12 | #include "optimizer.hpp" 13 | #include "autograd.hpp" 14 | 15 | #include 16 | 17 | namespace zhetapi { 18 | 19 | namespace autograd { 20 | 21 | namespace ml { 22 | 23 | // Useful aliases 24 | using Data = std::vector <_function::Input>; 25 | using Validator = std::function ; 26 | 27 | // Check accuracy of model wrt data set, returns proportion 28 | // TODO: source file 29 | inline float accuracy(_function::Ptr &model, 30 | const Data &X, const std::vector &Y, 31 | const Validator &validator) 32 | { 33 | int count = 0; 34 | std::vector indices; 35 | for (int i = 0; i < X.size(); i++) { 36 | Constant output = model->compute(X[i]); 37 | if (validator(output, Y[i])) { 38 | indices.push_back(i); 39 | count++; 40 | } 41 | } 42 | 43 | return float(count) / X.size(); 44 | } 45 | 46 | // Progress reporters 47 | struct _reporter { 48 | struct Info { 49 | size_t epoch; 50 | size_t batch; 51 | size_t total_epochs; 52 | float avg_error; 53 | float time; 54 | _function::Ptr &model; 55 | }; 56 | 57 | virtual void report(const Info &) const {} 58 | }; 59 | 60 | // Progress bar 61 | class ProgressBar : public _reporter { 62 | mutable indicators::ProgressBar bar; 63 | public: 64 | ProgressBar() : bar { 65 | indicators::option::BarWidth {50}, 66 | indicators::option::Start {" ["}, 67 | indicators::option::Fill {"█"}, 68 | indicators::option::Lead {"█"}, 69 | indicators::option::Remainder {"-"}, 70 | indicators::option::End {"]"}, 71 | indicators::option::PrefixText {"Training model"}, 72 | indicators::option::ShowElapsedTime {true}, 73 | indicators::option::ShowRemainingTime {true} 74 | } {} 75 | 76 | void report(const Info &info) const override { 77 | float progress = 100.0f * (float) info.epoch / info.total_epochs; 78 | bar.set_progress(progress); 79 | } 80 | }; 81 | 82 | // Validate (requires data set) 83 | class Validate : public _reporter { 84 | const Data &X; 85 | const std::vector &Y; 86 | Validator validator; 87 | public: 88 | Validate(const Data &X_, const std::vector &Y_, const Validator &validator_) 89 | : X {X_}, Y {Y_}, validator {validator_} {} 90 | 91 | void report(const Info &info) const override { 92 | float accuracy = ml::accuracy(info.model, X, Y, validator); 93 | std::cout << "Accuracy: " << accuracy 94 | << ", Time: " << std::setprecision(2) << info.time << "s" 95 | << ", Average error = " << info.avg_error << std::endl; 96 | 97 | // TODO: make this optional 98 | detail::MemoryTracker::report(); 99 | } 100 | }; 101 | 102 | // TODO: source file 103 | // TODO: some function/interface to report progress 104 | // TODO: default optimizer and loss function 105 | 106 | // Information relevant to training 107 | struct TrainingSuite { 108 | _function::Ptr &loss; 109 | _function::Ptr &dloss; 110 | size_t iterations; 111 | size_t batch_size; 112 | std::shared_ptr <_reporter> reporter = std::make_shared (); 113 | }; 114 | 115 | inline void fit(_function::Ptr &f, const Data &X, const std::vector &Y, 116 | _optimizer &optimizer, const TrainingSuite &suite) 117 | { 118 | // Setup timer 119 | std::chrono::steady_clock::time_point start; 120 | 121 | // TODO: assert that X.size() == Y.size() 122 | for (size_t i = 0; i < suite.iterations; i++) { 123 | start = std::chrono::steady_clock::now(); 124 | 125 | // TODO: implement verbose 126 | // TODO: batching 127 | GradientQueue gq; 128 | int elements = 0; 129 | 130 | float serror = 0; 131 | for (size_t j = 0; j < X.size(); j++) { 132 | Constant y = f->compute(X[j]).flat(); 133 | Constant igrad = suite.dloss->compute({y, Y[j]}); 134 | _function::Gradient grads = f->gradient(X[j], {igrad}); 135 | serror += suite.loss->compute({y, Y[j]}).length(); 136 | 137 | elements++; 138 | if (gq.empty()) 139 | gq = grads.grads; 140 | else 141 | gq += grads.grads; 142 | 143 | if (elements >= suite.batch_size) { 144 | gq /= float(elements); 145 | optimizer.optimize(gq); 146 | f->update_parameters(gq); 147 | 148 | gq.clear(); 149 | elements = 0; 150 | } 151 | } 152 | 153 | // Report progress 154 | float time = std::chrono::duration_cast ( 155 | std::chrono::steady_clock::now() - start 156 | ).count() / 1000.0f; 157 | 158 | suite.reporter->report({i, 0, suite.iterations, serror/X.size(), time, f}); 159 | } 160 | } 161 | 162 | } 163 | 164 | } 165 | 166 | } 167 | 168 | #endif 169 | -------------------------------------------------------------------------------- /include/cast.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CAST_H_ 2 | #define CAST_H_ 3 | 4 | // Engine headers 5 | #include "token.hpp" 6 | 7 | /** 8 | * @file cast.hpp 9 | * @brief Contains functions to help casting pointers to Tokens using 10 | * `dynamic_cast`. Helpful for dealing with different overloads in Registrables. 11 | * For example, suppose we have the following Registrable that is supposed to 12 | * take up to three integers. 13 | * 14 | * \code{.cpp} 15 | * 16 | * ZHETAPI_REGISTRABLE(my_registrable) 17 | * { 18 | * OpZ o1, o2, o3; 19 | * 20 | * // Performing overload switching with zhetapi_cast (should be used for very 21 | * // specific or seemingly random overloads) 22 | * if (zhetapi_cast(inputs, o1, o2, o3)) { 23 | * // do the function for three integers (o1, o2, o3) 24 | * } else if (zhetapi_cast(inputs, o1, o2)) { 25 | * // do the function for two integers (o1, o2) 26 | * } else if (zhetapi_cast(inputs, o1)) { 27 | * // do the function for two integers (o1) 28 | * } else { 29 | * // Terminating branch... 30 | * } 31 | * 32 | * // ...or use zhetapi_cc_cast (should be used for sequences of partial 33 | * // overloads) 34 | * switch (zhetapi_cc_cast(inputs, o1, o2, o3)) { 35 | * case 3: 36 | * // do the function for three integers (o1, o2, o3) 37 | * case 2: 38 | * // do the function for two integers (o1, o2) 39 | * case 1: 40 | * // do the function for one integer (o1) 41 | * default: 42 | * break; 43 | * } 44 | * 45 | * // As the terminating action either 46 | * return nullptr; 47 | * 48 | * // ...or throw 49 | * throw my_exception(); 50 | * } 51 | * 52 | * \endcode 53 | */ 54 | 55 | namespace zhetapi { 56 | 57 | // TODO: rename cc cast to partial cast 58 | // and add another cast method that goes down the list until fully casted 59 | template 60 | bool zhetapi_cast_process( 61 | const std::vector &tokens, 62 | size_t i, 63 | T &tptr) 64 | { 65 | if (i >= tokens.size()) 66 | return false; 67 | 68 | tptr = dynamic_cast (tokens[i]); 69 | 70 | if (!tptr) 71 | return false; 72 | 73 | return true; 74 | } 75 | 76 | #ifndef SKIP_DOXYGEN // Breathe cannot parse variadics 77 | 78 | template 79 | bool zhetapi_cast_process( 80 | const std::vector &tokens, 81 | size_t i, 82 | T &tptr, 83 | A &... args) 84 | { 85 | if (i >= tokens.size()) 86 | return false; 87 | 88 | tptr = dynamic_cast (tokens[i]); 89 | 90 | if (!tptr) 91 | return false; 92 | 93 | return zhetapi_cast_process(tokens, i + 1, args ...); 94 | } 95 | 96 | template 97 | bool zhetapi_cast(const std::vector &tokens, A &... args) 98 | { 99 | return zhetapi_cast_process(tokens, 0, args ...); 100 | } 101 | 102 | #endif 103 | 104 | // Counting alternatives 105 | template 106 | void zhetapi_cast_cc_process( 107 | const std::vector &tokens, 108 | size_t &i, 109 | T &tptr) 110 | { 111 | if (i >= tokens.size()) 112 | return; 113 | 114 | tptr = dynamic_cast (tokens[i]); 115 | 116 | if (!tptr) 117 | return; 118 | 119 | i++; 120 | } 121 | 122 | #ifndef SKIP_DOXYGEN // Breathe cannot parse variadics 123 | 124 | template 125 | void zhetapi_cast_cc_process( 126 | const std::vector &tokens, 127 | size_t &i, 128 | T &tptr, 129 | A &... args) 130 | { 131 | if (i >= tokens.size()) 132 | return; 133 | 134 | tptr = dynamic_cast (tokens[i]); 135 | 136 | if (!tptr) 137 | return; 138 | 139 | zhetapi_cast_cc_process(tokens, ++i, args ...); 140 | } 141 | 142 | template 143 | size_t zhetapi_cast_cc(const std::vector &tokens, A &... args) 144 | { 145 | size_t success = 0; 146 | zhetapi_cast_cc_process(tokens, success, args ...); 147 | return success; 148 | } 149 | 150 | #endif 151 | 152 | } 153 | 154 | #endif 155 | -------------------------------------------------------------------------------- /include/common.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ZHETAPI_COMMON_H_ 2 | #define ZHETAPI_COMMON_H_ 3 | 4 | // Printing vectors 5 | template 6 | std::ostream &operator<<(std::ostream &os, const std::vector &v) 7 | { 8 | os << "{"; 9 | for (size_t i = 0; i < v.size(); i++) { 10 | os << v[i]; 11 | if (i != v.size() - 1) 12 | os << ", "; 13 | } 14 | 15 | return os << "}"; 16 | } 17 | 18 | // Printing deques 19 | template 20 | std::ostream &operator<<(std::ostream &os, const std::deque &v) 21 | { 22 | os << "{"; 23 | for (size_t i = 0; i < v.size(); i++) { 24 | os << v[i]; 25 | if (i != v.size() - 1) 26 | os << ", "; 27 | } 28 | 29 | return os << "}"; 30 | } 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /include/complex.hpp: -------------------------------------------------------------------------------- 1 | #ifndef COMPLEX_H_ 2 | #define COMPLEX_H_ 3 | 4 | // C/C++ headers 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace zhetapi { 11 | 12 | /** 13 | * The Complex class is an 14 | * extension of the std::complex 15 | * class which includes a more convenient 16 | * set of methods, such as normalization. 17 | */ 18 | template 19 | class Complex : public std::complex { 20 | public: 21 | // Constructor 22 | Complex(); 23 | Complex(const T &, const T &); 24 | Complex(const std::complex &); 25 | 26 | // Fake constructors for conversion 27 | Complex(int, bool, bool); 28 | 29 | template 30 | Complex(A); 31 | 32 | // Getters 33 | T magnitude() const; 34 | 35 | bool is_real() const; 36 | 37 | // Operators 38 | operator long double() const; 39 | operator long long int() const; 40 | 41 | // Functional Methods 42 | Complex normalize() const; 43 | 44 | // Output Methods 45 | template 46 | friend std::string std::to_string(const Complex &); 47 | 48 | template 49 | friend std::ostream &operator<<(std::ostream &, const Complex &); 50 | }; 51 | 52 | ////////////////////////////////////////// 53 | // Constructors 54 | ////////////////////////////////////////// 55 | template 56 | Complex ::Complex() {} 57 | 58 | template 59 | template 60 | Complex ::Complex(A a) 61 | { 62 | if (typeid(T) == typeid(A)) 63 | this->real((T) a); 64 | } 65 | 66 | template 67 | Complex ::Complex(const T &re, const T &im) 68 | : std::complex (re, im) {} 69 | 70 | template 71 | Complex ::Complex(const std::complex &z) 72 | : std::complex (z) {} 73 | 74 | ////////////////////////////////////////// 75 | // Fake Constructors 76 | ////////////////////////////////////////// 77 | 78 | template 79 | Complex ::Complex(int a, bool b, bool c) {} 80 | 81 | ////////////////////////////////////////// 82 | // Getters 83 | ////////////////////////////////////////// 84 | 85 | template 86 | T Complex ::magnitude() const 87 | { 88 | return sqrt(norm(*this)); 89 | } 90 | 91 | template 92 | bool Complex ::is_real() const 93 | { 94 | return this->imag() == 0; 95 | } 96 | 97 | template 98 | Complex ::operator long double() const 99 | { 100 | return (long double) this->real(); 101 | } 102 | 103 | template 104 | Complex ::operator long long int() const 105 | { 106 | return (long long int) this->real(); 107 | } 108 | 109 | template 110 | Complex Complex ::normalize() const 111 | { 112 | return *this/magnitude(); 113 | } 114 | 115 | template 116 | std::string to_string(const Complex &z) 117 | { 118 | std::string str; 119 | 120 | bool pl = false; 121 | 122 | if (z.real()) { 123 | pl = true; 124 | str += to_string(z.real()); 125 | } 126 | 127 | if (z.imag()) { 128 | if (pl) 129 | str += " + "; 130 | str += to_string(z.imag()) + "i"; 131 | } 132 | 133 | return str; 134 | } 135 | 136 | template 137 | std::ostream &operator<<(std::ostream &os, const Complex &z) 138 | { 139 | bool pl = false; 140 | 141 | if (!(z.real() || z.imag())) { 142 | os << "0"; 143 | return os; 144 | } 145 | 146 | if (z.real()) { 147 | pl = true; 148 | os << z.real(); 149 | } 150 | 151 | if (z.imag()) { 152 | if (pl) 153 | os << " + "; 154 | 155 | if (z.imag() != T(1)) 156 | os << z.imag(); 157 | 158 | os << "i"; 159 | } 160 | 161 | return os; 162 | } 163 | 164 | } 165 | 166 | #endif 167 | -------------------------------------------------------------------------------- /include/core/common.hpp: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H_ 2 | #define COMMON_H_ 3 | 4 | // C/C++ heaaders 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // Engine headers 11 | #include "../token.hpp" 12 | 13 | namespace zhetapi { 14 | 15 | // This file contains miscellaneous functions that 16 | // do not really fit into any other header 17 | template 18 | void collect(std::vector &bin, T x) 19 | { 20 | bin.push_back(x); 21 | } 22 | 23 | template 24 | void collect(std::vector &bin, T x, U ... rest) 25 | { 26 | bin.push_back(x); 27 | 28 | collect(bin, rest...); 29 | } 30 | 31 | } 32 | 33 | #endif -------------------------------------------------------------------------------- /include/core/kernels.hpp: -------------------------------------------------------------------------------- 1 | #ifndef KERNELS_H_ 2 | #define KERNELS_H_ 3 | 4 | // Engine headers 5 | #include "../matrix.hpp" 6 | #include "../vector.hpp" 7 | 8 | // TODO: remove this header, it is quite useless 9 | 10 | /** 11 | * @file kernels.hpp 12 | * @brief This file contains CPU "kernels" which speed up computation in other parts of 13 | * the library, such as Neural Network training. Some of these kernels may be 14 | * moved to become part of the public API. 15 | */ 16 | 17 | namespace zhetapi { 18 | 19 | /** 20 | * Computes M * V', where V' is V with 1 appended to the top. Speed-up is due to 21 | * the fact that a new vector is not being created and copied. 22 | */ 23 | template 24 | Vector apt_and_mult(const Matrix &M, const Vector &V) 25 | { 26 | size_t rs = M.get_rows(); 27 | size_t cs = M.get_cols(); 28 | 29 | Vector out(rs, T(0)); 30 | 31 | size_t k = V.size(); 32 | for (size_t i = 0; i < rs; i++) { 33 | T acc = M._array[i * cs]; 34 | 35 | for (size_t j = 0; j < k; j++) 36 | acc += M._array[i * cs + 1 + j] * V._array[j]; 37 | 38 | out._array[i] = acc; 39 | } 40 | 41 | return out; 42 | } 43 | 44 | /** 45 | * Computes U', where U = M * V and U' is U without the first element. Speed-up 46 | * is again due to the fact that a new vector is not being created. 47 | */ 48 | template 49 | Vector rmt_and_mult(const Matrix &M, const Vector &V) 50 | { 51 | /*size_t rs = M.get_rows(); 52 | size_t cs = M.get_cols(); 53 | 54 | Vector out(cs - 1, T(0)); 55 | for (size_t i = 1; i < cs; i++) { 56 | T acc = 0; 57 | 58 | for (size_t k = 0; k < rs; k++) 59 | acc += M._array[k * cs + i] * V._array[k]; 60 | 61 | out._array[i - 1] = acc; 62 | } */ 63 | 64 | /* Reverse loops 65 | for (size_t k = 0; k < rs; k++) { 66 | const T *arr = &(M._array[k * cs]); 67 | T v = V._array[k]; 68 | 69 | for (size_t i = 1; i < cs; i++) 70 | out._array[i - 1] = arr[i] * v; 71 | } 72 | 73 | return out; */ 74 | 75 | return Vector (M.transpose() * V).remove_top(); 76 | } 77 | 78 | /** 79 | * Computes V * (Vt)^T (transpose). Speed-up comes from the fact that we avoid 80 | * creating the transpose vector. 81 | */ 82 | template 83 | Matrix vvt_mult(const Vector &V, const Vector &Vt) 84 | { 85 | size_t rs = V.size(); 86 | size_t cs = Vt.size(); 87 | 88 | size_t n = rs * cs; 89 | 90 | /* T *tmp = new T[n]; 91 | for (size_t i = 0; i < n; i++) 92 | tmp[i] = V._array[i / cs] * Vt._array[i % cs]; 93 | 94 | return Matrix (rs, cs, tmp, false); */ 95 | 96 | return Matrix (rs, cs, [&] (size_t i, size_t j) { 97 | return V._array[i] * Vt._array[j]; 98 | }); 99 | } 100 | 101 | } 102 | 103 | #endif 104 | -------------------------------------------------------------------------------- /include/counter.hpp: -------------------------------------------------------------------------------- 1 | #ifndef COUNTER_H_ 2 | #define COUNTER_H_ 3 | 4 | namespace zhetapi { 5 | 6 | template 7 | class Counter { 8 | T _min; 9 | T _max; 10 | T _alpha; 11 | 12 | T _count; 13 | public: 14 | Counter(T, T, T); 15 | 16 | T operator()() const; 17 | }; 18 | 19 | template 20 | Counter ::Counter(T mn, T mx, T alpha) : _min(mn), _max(mx), 21 | _alpha(alpha) {} 22 | 23 | template 24 | T Counter ::operator()() const 25 | { 26 | return (_count = min(max(_count + _alpha, _min), _max)); 27 | } 28 | 29 | } 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /include/cuda/error.cuh: -------------------------------------------------------------------------------- 1 | #ifndef ERROR_CUH_ 2 | #define ERROR_CUH_ 3 | 4 | cudaError_t e; 5 | 6 | // Error checking macro 7 | #define cudaCheckError(addr) \ 8 | e = cudaGetLastError(); \ 9 | if (e != cudaSuccess) { \ 10 | printf("Cuda failure %s:%d: '%s' (addr = %p)\n", __FILE__, \ 11 | __LINE__, cudaGetErrorString(e), addr); \ 12 | exit(-1); \ 13 | } 14 | 15 | // Allocation 16 | #define cuda_device_alloc(ptr, size) \ 17 | cudaMalloc(ptr, size); \ 18 | cudaCheckError(ptr); 19 | 20 | // Copying 21 | #define cuda_host_to_device_memcpy(dst, src, size) \ 22 | cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice); \ 23 | cudaCheckError(dst); 24 | 25 | #define cuda_device_to_host_memcpy(dst, src, size) \ 26 | cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost); \ 27 | cudaCheckError(dst); 28 | 29 | // Deallocation 30 | #define cuda_device_free(ptr) { \ 31 | cudaFree(ptr); \ 32 | cudaCheckError(ptr); \ 33 | } 34 | 35 | // Memory status 36 | void cuda_check_memory_status(const char *file, size_t line) 37 | { 38 | size_t free_mem; 39 | size_t total_mem; 40 | 41 | cudaMemGetInfo(&free_mem, &total_mem); 42 | 43 | printf("At [%s:%lu]: %lu bytes total, of which %lu bytes are free.\n", 44 | file, line, total_mem, free_mem); 45 | } 46 | 47 | #define cuda_show_mem() \ 48 | cuda_check_memory_status(__FILE__, __LINE__); 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /include/cuda/essentials.cuh: -------------------------------------------------------------------------------- 1 | #ifndef ESSENTIALS_H_ 2 | #define ESSENTIALS_H_ 3 | 4 | #ifdef __CUDACC__ 5 | 6 | #define __cuda_dual__ __host__ __device__ 7 | 8 | #else 9 | 10 | #define __cuda_dual__ 11 | 12 | #endif // Cuda active 13 | 14 | // Use when we want to define a new variable cudaError_t error 15 | #define __cuda_check_error() \ 16 | cudaError_t error = cudaGetLastError(); \ 17 | if (error != cudaSuccess) { \ 18 | printf("CUDA error: %s\n", \ 19 | cudaGetErrorString(error)); \ 20 | exit(-1); \ 21 | } 22 | 23 | // Use when cudaError_t error has already been defined 24 | #define __cuda_check_perror() \ 25 | error = cudaGetLastError(); \ 26 | if (error != cudaSuccess) { \ 27 | printf("CUDA error: %s\n", \ 28 | cudaGetErrorString(error)); \ 29 | exit(-1); \ 30 | } 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /include/cuda/matrix.cuh: -------------------------------------------------------------------------------- 1 | #ifndef MATRIX_CUH_ 2 | #define MATRIX_CUH_ 3 | 4 | namespace zhetapi { 5 | 6 | 7 | 8 | } 9 | 10 | #endif -------------------------------------------------------------------------------- /include/cuda/nvarena.cuh: -------------------------------------------------------------------------------- 1 | #ifndef NVARENA_CUH_ 2 | #define NVARENA_CUH_ 3 | 4 | #define ZHP_CUDA 5 | 6 | // C/C++ headers 7 | #include 8 | #include 9 | 10 | // Engine headers 11 | #include 12 | 13 | // Namespace external functions 14 | struct __addr_cmp { 15 | bool operator()(void *a, void *b) { 16 | return (intptr_t) a < (intptr_t) b; 17 | } 18 | }; 19 | 20 | namespace zhetapi { 21 | 22 | /** 23 | * @brief An allocator class for Nvidia GPUs. Has additional features like 24 | * warnings for memory leaks and copy bound errors. Overall is a more convenient 25 | * interface to GPU memory than standard CUDA operations like \c cudaMalloc and 26 | * \c cudaMemcpy. 27 | */ 28 | class NVArena { 29 | public: 30 | // TODO: need to select the specific GPU 31 | using memmap = std::map ; 32 | 33 | // TODO: bad alloc 34 | 35 | /** 36 | * @brief This exception is thrown if the user tries to free a piece of 37 | * memory that was never allocated. 38 | */ 39 | class segfault : public std::runtime_error { 40 | public: 41 | segfault() : std::runtime_error("NVArena: segmentation fault.") {} 42 | }; 43 | 44 | /** 45 | * @brief This exceptoin is thrown if the user frees a piece of memory 46 | * more than once. The allocator keeps track of all allocated blocks for 47 | * this. 48 | */ 49 | class double_free : public std::runtime_error { 50 | public: 51 | double_free() : std::runtime_error("NVArena: double free.") {} 52 | }; 53 | private: 54 | // Whole pool 55 | void * _pool = nullptr; 56 | 57 | // Free list (ordered by address) 58 | memmap _flist; 59 | 60 | // Warning flag 61 | bool _warn = true; 62 | public: 63 | explicit NVArena(size_t); 64 | 65 | // Disable copying of any sort 66 | NVArena(const NVArena &) = delete; 67 | NVArena &operator=(const NVArena &) = delete; 68 | 69 | ~NVArena(); 70 | 71 | // Allocation 72 | void *alloc(size_t = 1); 73 | 74 | template 75 | T *alloc(size_t = 1); 76 | 77 | // Deallocation 78 | void free(void *); 79 | 80 | template 81 | void free(T *); 82 | 83 | // TODO: Warn with memcpy 84 | void write(void *, void *, size_t); 85 | void read(void *, void *, size_t); 86 | 87 | // void memcpy(void *, size_t); 88 | 89 | // Only allow template for homogenous pointers 90 | // (no implicit size for heterogenous types) 91 | template 92 | void write(T *, T *, size_t = 1); 93 | 94 | template 95 | void read(T *, T *, size_t = 1); 96 | 97 | // Memory map 98 | void show_mem_map() const; 99 | }; 100 | 101 | /** 102 | * @brief Allocates a block of items of a specific type. 103 | * 104 | * @tparam t the specific type of item to allocate. 105 | * 106 | * @param items the number of items to allocate. 107 | * 108 | * @return the allocated block. 109 | */ 110 | template 111 | T *NVArena::alloc(size_t items) 112 | { 113 | void *data = alloc(items * sizeof(T)); 114 | 115 | return (T *) data; 116 | } 117 | 118 | /** 119 | * @brief Frees a block of items of a specific type. 120 | * 121 | * @tparam T the specific type of item to free. 122 | * 123 | * @param ptr the block of memory to be freed. 124 | */ 125 | template 126 | void NVArena::free(T *ptr) 127 | { 128 | free((void *) ptr); 129 | } 130 | 131 | /** 132 | * @brief Copies a block of memory from host memory to GPU memory, using \c 133 | * cudaMemcpy. 134 | * 135 | * @tparam T the type of each element in the blocks of memory. 136 | * 137 | * @param dst the pointer to the destination in GPU memory. 138 | * @param src the pointer to the block in host memory. 139 | * @param n the number of items to copy (note that this copies `n * 140 | * sizeof(T)` bytes in total). 141 | */ 142 | template 143 | void NVArena::write(T *dst, T *src, size_t n) 144 | { 145 | write((void *) dst, (void *) src, n * sizeof(T)); 146 | } 147 | 148 | /** 149 | * @brief Copies a block of memory from GPU memory to host memory, using \c 150 | * cudaMemcpy. 151 | * 152 | * @tparam T the type of each element in the blocks of memory. 153 | * 154 | * @param dst the pointer to the destination in host memory. 155 | * @param src the pointer to the block in GPU memory. 156 | * @param n the number of items to copy (note that this copies `n * 157 | * sizeof(T)` bytes in total). 158 | */ 159 | template 160 | void NVArena::read(T *dst, T *src, size_t n) 161 | { 162 | read((void *) dst, (void *) src, n * sizeof(T)); 163 | } 164 | 165 | } 166 | 167 | #endif 168 | -------------------------------------------------------------------------------- /include/cuda/vector.cuh: -------------------------------------------------------------------------------- 1 | #ifndef VECTOR_CUH_ 2 | #define VECTOR_CUH_ 3 | 4 | namespace zhetapi { 5 | 6 | // hc is half copy vector 7 | template 8 | void Vector ::cuda_read(Vector *hc) 9 | { 10 | if (hc->_size != this->_size) { 11 | // Add a clear array function (and clear dim) 12 | delete[] this->_array; 13 | 14 | this->_array = new T[hc->_size]; 15 | } 16 | 17 | hc->_arena->read(this->_array, hc->_array, hc->_size); 18 | hc->_arena->read(this->_dim, hc->_dim, 2); 19 | } 20 | 21 | // returns a vector with only _dim and _array in device memory 22 | // requires the callee to be fully in host memory 23 | template 24 | Vector *Vector ::cuda_half_copy(NVArena *arena) const 25 | { 26 | size_t *dim = arena->alloc (2); 27 | T *array = arena->alloc (this->_size); 28 | 29 | arena->write(dim, this->_dim, 2); 30 | arena->write(array, this->_array, this->_size); 31 | 32 | // Host copy 33 | Vector *hc = new Vector ; 34 | memcpy(hc, this, sizeof(Vector )); 35 | 36 | // Edit hc with the correct values 37 | hc->_array = array; 38 | hc->_dim = dim; 39 | hc->_on_device = true; 40 | hc->_arena = arena; 41 | 42 | return hc; 43 | } 44 | 45 | // returns a vector fully in device memory 46 | // requires the callee to be partially in device memory (_dim and _array) 47 | template 48 | Vector *Vector ::cuda_full_copy(NVArena *arena) 49 | { 50 | Vector *fc = arena->alloc > (); 51 | arena->write(fc, this); 52 | // cudaMemcpy(fc, this, sizeof(Vector ), cudaMemcpyHostToDevice); 53 | return fc; 54 | } 55 | 56 | } 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /include/dataset.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DATASET_H_ 2 | #define DATASET_H_ 3 | 4 | #ifndef __AVR 5 | 6 | // C/C++ headers 7 | #include 8 | 9 | // Engine headers 10 | #include "vector.hpp" 11 | 12 | namespace zhetapi { 13 | 14 | template 15 | using DataSet = std::vector >; 16 | 17 | template 18 | std::vector > split(const DataSet &dset, size_t len) 19 | { 20 | std::vector > batched; 21 | 22 | DataSet batch; 23 | 24 | size_t size = dset.size(); 25 | for (int i = 0; i < size; i++) { 26 | batch.push_back(dset[i]); 27 | 28 | if (i % len == len - 1 || i == size - 1) { 29 | batched.push_back(batch); 30 | 31 | batch.clear(); 32 | } 33 | } 34 | 35 | return batched; 36 | } 37 | 38 | // General sets of N-dimensional data 39 | template 40 | class NumericalData { 41 | // TODO: make more efficient? 42 | Vector _stddev() { 43 | Vector sum(N, 0); 44 | for (const auto &vec : dataset) { 45 | Vector dx = (vec - _mean); 46 | sum += shur(dx, dx); 47 | } 48 | return sum/(dataset.size() - sample); 49 | } 50 | 51 | // TODO: fixed vector? 52 | Vector _mean() { 53 | Vector sum(N, 0); 54 | for (const auto &vec : dataset) 55 | sum += vec; 56 | return sum/dataset.size(); 57 | } 58 | public: 59 | DataSet dataset; 60 | Vector mean; 61 | Vector stddev; 62 | bool sample; 63 | 64 | // Sample indicates whether the dataset is a 65 | // sample or the entire population 66 | NumericalData(const DataSet &set, bool sample = false) 67 | : dataset(set), mean(_mean()), 68 | stddev(_stddev()), sample(sample) {} 69 | }; 70 | 71 | // Dimensional 72 | template 73 | using BivariateData = NumericalData ; 74 | 75 | } 76 | 77 | #else 78 | 79 | #warning Zhetapi does not support zhetapi::Dataset for AVR systems. 80 | 81 | #endif 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /include/display.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DISPLAY_H_ 2 | #define DISPLAY_H_ 3 | 4 | #ifndef __AVR // Does not support AVR 5 | 6 | #include 7 | 8 | #endif // Does not support AVR 9 | 10 | namespace zhetapi { 11 | 12 | /** 13 | * Display: 14 | * 15 | * Display is a struct of display options during neural network training. 16 | */ 17 | struct Display { 18 | typedef uint8_t type; 19 | 20 | static const uint8_t epoch; 21 | static const uint8_t batch; 22 | static const uint8_t graph; 23 | }; 24 | 25 | } 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /include/dnnopt.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DNN_OPT_H_ 2 | #define DNN_OPT_H_ 3 | 4 | // Engine headers 5 | #include "dataset.hpp" 6 | #include "gradient.hpp" 7 | 8 | namespace zhetapi { 9 | 10 | template 11 | class DnnOpt { 12 | protected: 13 | // Cached 14 | Vector * __a = nullptr; 15 | Vector * __z = nullptr; 16 | 17 | T __eta = 0; 18 | 19 | size_t __size = 0; 20 | 21 | bool __switch = false; 22 | 23 | // Functions 24 | DnnOpt(T); 25 | 26 | virtual Matrix *raw_gradient( 27 | Layer *, 28 | size_t, 29 | const Vector &, 30 | const Vector &, 31 | Erf *); 32 | 33 | virtual Matrix *raw_batch_gradient( 34 | Layer *, 35 | size_t, 36 | const DataSet &, 37 | const DataSet &, 38 | Erf *); 39 | 40 | virtual Matrix *update( 41 | Matrix *, 42 | size_t) = 0; 43 | public: 44 | virtual ~DnnOpt(); 45 | 46 | void set_learning_rate(T); 47 | 48 | Matrix *gradient( 49 | Layer *, 50 | size_t, 51 | const Vector &, 52 | const Vector &, 53 | Erf *); 54 | 55 | Matrix *batch_gradient( 56 | Layer *, 57 | size_t, 58 | const DataSet &, 59 | const DataSet &, 60 | Erf *); 61 | }; 62 | 63 | template 64 | DnnOpt ::DnnOpt(T lr) : __eta(lr) {} 65 | 66 | template 67 | DnnOpt ::~DnnOpt() 68 | { 69 | delete[] __a; 70 | delete[] __z; 71 | } 72 | 73 | template 74 | void DnnOpt ::set_learning_rate(T lr) 75 | { 76 | __eta = lr; 77 | } 78 | 79 | template 80 | Matrix *DnnOpt ::raw_gradient( 81 | Layer *layers, 82 | size_t size, 83 | const Vector &in, 84 | const Vector &out, 85 | Erf *cost) 86 | { 87 | if (size != __size) { 88 | delete[] __a; 89 | delete[] __z; 90 | 91 | __size = size; 92 | 93 | __a = new Vector [__size + 1]; 94 | __z = new Vector [__size]; 95 | 96 | __switch = true; 97 | } else { 98 | __switch = false; 99 | } 100 | 101 | return simple_gradient( 102 | layers, 103 | size, 104 | __a, 105 | __z, 106 | in, 107 | out, 108 | cost); 109 | } 110 | 111 | template 112 | Matrix *DnnOpt ::raw_batch_gradient( 113 | Layer *layers, 114 | size_t size, 115 | const DataSet &ins, 116 | const DataSet &outs, 117 | Erf *cost) 118 | { 119 | if (size != __size) { 120 | delete[] __a; 121 | delete[] __z; 122 | 123 | __size = size; 124 | 125 | __a = new Vector [__size + 1]; 126 | __z = new Vector [__size]; 127 | 128 | __switch = true; 129 | } else { 130 | __switch = false; 131 | } 132 | 133 | return simple_batch_gradient( 134 | layers, 135 | size, 136 | __a, 137 | __z, 138 | ins, 139 | outs, 140 | cost); 141 | } 142 | 143 | template 144 | Matrix *DnnOpt ::gradient( 145 | Layer *layers, 146 | size_t size, 147 | const Vector &in, 148 | const Vector &out, 149 | Erf *cost) 150 | { 151 | 152 | return update(raw_gradient( 153 | layers, 154 | size, 155 | in, 156 | out, 157 | cost), size); 158 | } 159 | 160 | template 161 | Matrix *DnnOpt ::batch_gradient( 162 | Layer *layers, 163 | size_t size, 164 | const DataSet &ins, 165 | const DataSet &outs, 166 | Erf *cost) 167 | { 168 | return update(raw_batch_gradient( 169 | layers, 170 | size, 171 | ins, 172 | outs, 173 | cost), size); 174 | } 175 | 176 | } 177 | 178 | } 179 | 180 | #endif 181 | -------------------------------------------------------------------------------- /include/engine.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ENGINE_H_ 2 | #define ENGINE_H_ 3 | 4 | // C/C++ headers 5 | #include 6 | 7 | // Engine headers 8 | #include "function.hpp" 9 | #include "registration.hpp" 10 | 11 | #include "core/algorithm.hpp" 12 | #include "core/common.hpp" 13 | 14 | namespace zhetapi { 15 | 16 | // Aliases 17 | template 18 | using Symtab = std::unordered_map ; 19 | 20 | class Engine { 21 | // Broader scope 22 | Engine * _stack = nullptr; 23 | 24 | Symtab _var_table; 25 | 26 | // Private methods 27 | void set_origin_stack(Engine *); 28 | public: 29 | Engine(bool = false); 30 | Engine(const Engine &); 31 | 32 | Engine &operator=(const Engine &); 33 | 34 | ~Engine(); 35 | 36 | // List all symbols 37 | Args symbol_list() const; 38 | 39 | // Actions 40 | Engine *new_stack(); 41 | Engine *get_stack(); 42 | 43 | void put(const std::string &, Token *); 44 | 45 | Token *get(const std::string &); 46 | 47 | void list() const; 48 | void list_registered(std::string) const; 49 | }; 50 | 51 | Engine *push_and_ret_stack(Engine *); 52 | Engine *pop_and_del_stack(Engine *); 53 | 54 | } 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /include/equation.hpp: -------------------------------------------------------------------------------- 1 | #ifndef EQUATION_H_ 2 | #define EQUATION_H_ 3 | 4 | // C/C++ headers 5 | #include 6 | #include 7 | 8 | // Engine headers 9 | #include "engine.hpp" 10 | 11 | namespace zhetapi { 12 | 13 | using Solutions = std::vector >; 14 | 15 | class Equation { 16 | std::vector _expressions = {}; 17 | std::vector _args = {}; 18 | Engine * _engine = nullptr; 19 | public: 20 | Equation(const std::vector &); 21 | 22 | // Properties 23 | size_t args() const; 24 | 25 | // Methods 26 | Solutions solve() const; 27 | 28 | std::string representation() const; 29 | 30 | // Exceptions 31 | class bad_input_size : std::runtime_error { 32 | public: 33 | bad_input_size() 34 | : std::runtime_error("Bad input size for equation") {} 35 | }; 36 | }; 37 | 38 | std::ostream &operator<<(std::ostream &, const Equation &); 39 | 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /include/erf.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ERF_H_ 2 | #define ERF_H_ 3 | 4 | #ifndef __AVR // Does not support AVR 5 | 6 | // C/C++ headers 7 | #include 8 | 9 | #endif // Does not support AVR 10 | 11 | // Engine headers 12 | #ifdef ZHP_CUDA 13 | 14 | #include "cuda/vector.cuh" 15 | 16 | #else 17 | 18 | #include "vector.hpp" 19 | 20 | #endif 21 | 22 | #include "cuda/essentials.cuh" 23 | 24 | namespace zhetapi { 25 | 26 | namespace ml { 27 | 28 | template 29 | class Erf { 30 | public: 31 | // TODO: Replace with a string 32 | enum erf_type { 33 | OPT_Default, 34 | OPT_SE, 35 | OPT_MSE, 36 | }; 37 | 38 | // TODO: Add a vector constructor for JSON 39 | __cuda_dual__ 40 | Erf(); 41 | 42 | __cuda_dual__ 43 | Vector compute(const Vector &, const Vector &) const; 44 | 45 | __cuda_dual__ 46 | virtual Vector operator()(const Vector &, const Vector &) const; 47 | 48 | __cuda_dual__ 49 | virtual Erf *derivative() const; 50 | 51 | __cuda_dual__ 52 | int get_erf_type() const; 53 | 54 | template 55 | __cuda_dual__ 56 | friend Erf *copy(Erf *); 57 | 58 | // Exceptions 59 | class dimension_mismatch {}; 60 | protected: 61 | static void assert_size(const Vector &, const Vector &); 62 | 63 | erf_type kind; 64 | }; 65 | 66 | template 67 | void Erf ::assert_size(const Vector &a, const Vector &b) 68 | { 69 | if (a.size() != b.size()) 70 | throw dimension_mismatch(); 71 | } 72 | 73 | #ifndef ZHP_CUDA 74 | 75 | template 76 | Erf ::Erf() : kind(OPT_Default) {} 77 | 78 | // TODO: Reverse compute and operator() 79 | template 80 | Vector Erf ::operator()(const Vector &comp, const Vector &in) const 81 | { 82 | return {(comp - in).norm()}; 83 | } 84 | 85 | template 86 | Vector Erf ::compute(const Vector &comp, const Vector &in) const 87 | { 88 | return (*this)(comp, in); 89 | } 90 | 91 | template 92 | Erf *Erf ::derivative() const 93 | { 94 | return new Erf(); 95 | } 96 | 97 | template 98 | int Erf ::get_erf_type() const 99 | { 100 | return kind; 101 | } 102 | 103 | #endif 104 | 105 | } 106 | 107 | } 108 | 109 | #endif 110 | -------------------------------------------------------------------------------- /include/field.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Standard headers 4 | #include 5 | 6 | namespace zhetapi { 7 | 8 | template 9 | class Tensor; 10 | 11 | #ifndef __CUDACC__ 12 | 13 | // Concept for objects which behave and interact like Matrices... 14 | template 15 | concept concept_field = std::is_base_of_v , Type> && \ 16 | requires (Type &a, Type &b, const Type &ct, T k) { 17 | // Properties 18 | {a.length()} -> std::convertible_to ; 19 | 20 | // Operations 21 | {a + b} -> std::convertible_to ; 22 | {a - b} -> std::convertible_to ; 23 | {k * ct} -> std::convertible_to ; 24 | {ct * k} -> std::convertible_to ; 25 | {ct / k} -> std::convertible_to ; 26 | 27 | // Functions 28 | {min(a)} -> std::convertible_to ; 29 | {max(a)} -> std::convertible_to ; 30 | }; 31 | 32 | // Derivable struct to ensure that a type behaves like a Field 33 | template 34 | struct Field { 35 | Field() { 36 | static_assert( 37 | concept_field , 38 | "Type does not behave like a Field" 39 | ); 40 | } 41 | }; 42 | 43 | #else 44 | 45 | // Derivable struct to ensure that a type behaves like a Field 46 | template 47 | struct Field { 48 | Field() { 49 | static_assert( 50 | std::is_base_of_v , Type>, 51 | "Type does not behave like a Field" 52 | ); 53 | } 54 | }; 55 | 56 | #endif 57 | 58 | } 59 | -------------------------------------------------------------------------------- /include/filter.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FILTER_H_ 2 | #define FILTER_H_ 3 | 4 | // Engine headers 5 | #include "tensor.hpp" 6 | 7 | namespace zhetapi { 8 | 9 | namespace ml { 10 | 11 | // Type aliases 12 | template 13 | using Pipe = std::vector *>; 14 | 15 | template 16 | class Filter { 17 | public: 18 | /** 19 | * @brief Process method: takes in a set of inputs, performs the 20 | * necessary computations, and places the results into the locations 21 | * specified by the second vector of pointers. Notes that the inputs 22 | * are also passed as a list of pointers. 23 | */ 24 | virtual void propogate(const Pipe &, Pipe &) = 0; 25 | virtual void gradient(const Pipe &, Pipe &) = 0; 26 | virtual void apply_gradient(const Pipe &) = 0; 27 | }; 28 | 29 | } 30 | 31 | } 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /include/fourier.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FOURIER_H_ 2 | #define FOURIER_H_ 3 | 4 | // Engine headers 5 | #include "vector.hpp" 6 | 7 | namespace zhetapi { 8 | 9 | template 10 | class FourierSeries { 11 | Vector _a; // Cosine coefficients 12 | Vector _b; // Sine coefficients 13 | 14 | size_t _asize; 15 | size_t _bsize; 16 | public: 17 | FourierSeries(const Vector &); 18 | FourierSeries(const Vector &, const Vector &); 19 | 20 | T evaluate(const T &) const; 21 | T operator()(const T &) const; 22 | }; 23 | 24 | // TODO: Fix indices 25 | // 26 | // a0, a1, b1, a2, b2, ... 27 | template 28 | FourierSeries ::FourierSeries(const Vector &coeffs) 29 | { 30 | // Remove vector middle operation 31 | std::vector a; 32 | std::vector b; 33 | 34 | size_t n = coeffs.size(); 35 | 36 | size_t i = 0; 37 | 38 | a.push_back(coeffs[i++]); 39 | while(i < n) { 40 | a.push_back(coeffs[i]); 41 | b.push_back(coeffs[i + 1]); 42 | 43 | i += 2; 44 | } 45 | 46 | _asize = a.size(); 47 | _bsize = b.size(); 48 | 49 | _a = Vector (a); 50 | _b = Vector (b); 51 | } 52 | 53 | template 54 | FourierSeries ::FourierSeries(const Vector &a, const Vector &b) 55 | : _a(a), _b(b), _asize(a.size()), _bsize(b.size()) {} 56 | 57 | template 58 | T FourierSeries ::evaluate(const T &x) const 59 | { 60 | // Make more efficient construction kernels for vectors 61 | // (maybe dont even create a vector: a custom kernel 62 | // for modified inner products) 63 | Vector k_cos(_asize, 64 | [&](size_t i) { 65 | if (i == 0) 66 | return T(0.5); 67 | 68 | return std::cos(i * x); 69 | } 70 | ); 71 | 72 | Vector k_sin(_bsize, 73 | [&](size_t i) { 74 | return std::cos((i + 1) * x); 75 | } 76 | ); 77 | 78 | return inner(_a, k_cos) + inner(_b, k_sin); 79 | } 80 | 81 | template 82 | T FourierSeries ::operator()(const T &x) const 83 | { 84 | return evaluate(x); 85 | } 86 | 87 | } 88 | 89 | #endif 90 | -------------------------------------------------------------------------------- /include/function.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FUNCTION_H_ 2 | #define FUNCTION_H_ 3 | 4 | // C/C++ headers 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | // Engine headers 11 | #include "core/common.hpp" 12 | #include "core/functor.hpp" 13 | #include "core/node_manager.hpp" 14 | #include "core/method_table.hpp" 15 | 16 | namespace zhetapi { 17 | 18 | class Function : public Functor { 19 | std::string _symbol; 20 | std::vector _params; 21 | node_manager _manager; 22 | size_t _threads; 23 | public: 24 | Function(); 25 | Function(const char *); 26 | Function(const std::string &, Engine * = shared_context); 27 | 28 | Function(const std::string &, 29 | const std::vector &, 30 | const node_manager &); 31 | 32 | Function(const Function &); 33 | 34 | bool is_variable(const std::string &) const; 35 | 36 | std::string &symbol(); 37 | const std::string symbol() const; 38 | 39 | void set_threads(size_t); 40 | 41 | Token *evaluate(Engine *, const std::vector &) override; 42 | 43 | Token *compute(const std::vector &, Engine * = shared_context); 44 | Token *operator()(const std::vector &, Engine * = shared_context); 45 | 46 | template 47 | Token *operator()(A ...); 48 | 49 | template 50 | Token *operator()(A ...); 51 | 52 | template 53 | Token *derivative(const std::string &, A ...); 54 | 55 | Function differentiate(const std::string &) const; 56 | 57 | friend bool operator<(const Function &, const Function &); 58 | friend bool operator>(const Function &, const Function &); 59 | 60 | // Virtual overloads 61 | Token::type caller() const override; 62 | std::string dbg_str() const override; 63 | Token *copy() const override; 64 | bool operator==(Token *) const override; 65 | 66 | // Printing 67 | void print() const; 68 | 69 | std::string display() const; 70 | 71 | 72 | friend std::ostream &operator<<(std::ostream &, const Function &); 73 | private: 74 | template 75 | void gather(std::vector &, A); 76 | 77 | template 78 | void gather(std::vector &, A, B ...); 79 | 80 | size_t index(const std::string &) const; 81 | public: 82 | // Exception classes 83 | class invalid_definition {}; 84 | 85 | // Static variables 86 | static Engine *shared_context; 87 | static double h; 88 | 89 | // Methods 90 | friend ZHP_TOKEN_METHOD(ftn_deriv_method); 91 | 92 | // Static method table 93 | static MethodTable mtable; 94 | }; 95 | 96 | template 97 | Token *Function::operator()(A ... args) 98 | { 99 | std::vector tokens; 100 | 101 | gather(tokens, args...); 102 | 103 | assert(tokens.size() == _params.size()); 104 | 105 | return _manager.substitute_and_compute(shared_context, tokens); 106 | } 107 | 108 | // Gathering facilities 109 | template 110 | void Function::gather(std::vector &toks, A in) 111 | { 112 | toks.push_back(new Operand (in)); 113 | } 114 | 115 | template 116 | void Function::gather(std::vector &toks, A in, B ... args) 117 | { 118 | toks.push_back(new Operand (in)); 119 | 120 | gather(toks, args...); 121 | } 122 | 123 | } 124 | 125 | #endif 126 | -------------------------------------------------------------------------------- /include/gnn.hpp: -------------------------------------------------------------------------------- 1 | #ifndef GNN_H_ 2 | #define GNN_H_ 3 | 4 | // C/C++ headers 5 | #include 6 | #include 7 | #include 8 | 9 | // Engine headers 10 | #include "netnode.hpp" 11 | 12 | namespace zhetapi { 13 | 14 | namespace ml { 15 | 16 | /** 17 | * @brief General neural network (GNN): 18 | * 19 | * Represents a neural network whose structure is theoretically unlimited, i.e. 20 | * a neural network with various kinds of connections (skip connections, etc.) 21 | * and different types of layers (deep layer, convolutional layer, recurrent layer). 22 | * 23 | * The true representation of the network is as a series of "pipes" between 24 | * nodes. Each of these pipes contains a Tensor object representing the pipes 25 | * current state of execution. The two most important sets of pipes are the 26 | * input and output pipes which, as their name implies, carry the inputs and 27 | * outputs. 28 | */ 29 | template 30 | class GNN { 31 | std::vector *> _ins = {}; 32 | std::vector *> _outs = {}; 33 | 34 | // Variadic constructor helpers 35 | void init(NetNode *); 36 | 37 | template 38 | void init(NetNode *, U ...); 39 | 40 | // Initialize outs 41 | void getouts(); 42 | public: 43 | GNN(); 44 | explicit GNN(NetNode *); 45 | explicit GNN(const std::vector *> &); 46 | 47 | // Variadic constructor 48 | template 49 | explicit GNN(U ...); 50 | 51 | // Extraction 52 | inline NetNode &ipipe(size_t); 53 | inline NetNode &operator[](size_t); 54 | 55 | // Retrieval 56 | inline const NetNode &opipe(size_t) const; 57 | inline const NetNode &operator[](size_t) const; 58 | 59 | // Passing 60 | void pass(std::vector > &) const; 61 | void pass(std::vector > &&) const; 62 | 63 | void trace() const; 64 | }; 65 | 66 | template 67 | GNN ::GNN() {} 68 | 69 | template 70 | GNN ::GNN(NetNode *nnptr) : _ins({nnptr}) 71 | { 72 | getouts(); 73 | } 74 | 75 | template 76 | GNN ::GNN(const std::vector *> &ins) 77 | : _ins(ins) {} 78 | 79 | template 80 | template 81 | GNN ::GNN(U ... args) 82 | { 83 | init(args...); 84 | } 85 | 86 | template 87 | void GNN ::init(NetNode *nnptr) 88 | { 89 | _ins.push_back(nnptr); 90 | 91 | getouts(); 92 | } 93 | 94 | template 95 | template 96 | void GNN ::init(NetNode *nnptr, U ... args) 97 | { 98 | _ins.push_back(nnptr); 99 | 100 | init(args...); 101 | } 102 | 103 | template 104 | void GNN ::getouts() 105 | { 106 | // Set of visited nodes 107 | std::set *> vis; 108 | 109 | // BFS queue 110 | std::queue *> queue; 111 | 112 | for (NetNode *nnptr : _ins) 113 | queue.emplace(nnptr); 114 | 115 | while (!queue.empty()) { 116 | NetNode *cptr = queue.top(); 117 | 118 | queue.pop(); 119 | 120 | if (vis.find() != vis.end()) 121 | continue; 122 | 123 | auto vfrw = cptr->forward(); 124 | if (vfrw.empty()) { 125 | _outs.push_back(cptr); 126 | } else { 127 | for (auto frw : vfrw) 128 | queue.push(frw->_fr); 129 | } 130 | } 131 | } 132 | 133 | /** 134 | * @brief Modifies an input pipe (when assigned, such as `gnn.ipipe() = 135 | * tensor`). 136 | * 137 | * @param i the input pipe index. 138 | */ 139 | template 140 | inline NetNode &GNN ::ipipe(size_t i) 141 | { 142 | return *(_ins[i]); 143 | } 144 | 145 | /** 146 | * @brief Modifies an input pipe (when assigned, such as `gnn[0] = tensor`). 147 | * 148 | * @param i the input pipe index. 149 | */ 150 | template 151 | inline NetNode &GNN ::operator[](size_t i) 152 | { 153 | return *(_ins[i]); 154 | } 155 | 156 | /** 157 | * @brief Retrieves an output pipe. 158 | * 159 | * @param i the output pipe index. 160 | */ 161 | template 162 | inline const NetNode &GNN ::opipe(size_t i) const 163 | { 164 | return *(_outs[i]); 165 | } 166 | 167 | /** 168 | * @brief Retrieves an output pipe. 169 | * 170 | * @param i the output pipe index. 171 | */ 172 | template 173 | inline const NetNode &GNN ::operator[](size_t i) const 174 | { 175 | return *(_outs[i]); 176 | } 177 | 178 | // Passing 179 | template 180 | void GNN ::pass(std::vector > &args) const 181 | { 182 | size_t i = 0; 183 | while (!args.empty() && i < _ins.size()) { 184 | _ins[i].pass(args); 185 | 186 | i++; 187 | } 188 | } 189 | 190 | template 191 | void GNN ::pass(std::vector > &&rargs) const 192 | { 193 | std::vector > args = std::move(rargs); 194 | 195 | pass(args); 196 | } 197 | 198 | template 199 | void GNN ::trace() const 200 | { 201 | for (NetNode *nn : _ins) 202 | nn->trace(); 203 | } 204 | 205 | } 206 | 207 | } 208 | 209 | #endif 210 | -------------------------------------------------------------------------------- /include/image.hpp: -------------------------------------------------------------------------------- 1 | #ifndef IMAGE_H_ 2 | #define IMAGE_H_ 3 | 4 | // C++ headers 5 | #include 6 | #include 7 | #include 8 | 9 | // PNG library 10 | #include 11 | 12 | // Engine headers 13 | #include "tensor.hpp" 14 | #include "vector.hpp" 15 | 16 | namespace zhetapi { 17 | 18 | namespace image { 19 | 20 | // Global type aliases 21 | using byte = unsigned char; 22 | 23 | // Global exceptions 24 | class bad_hex_string {}; 25 | 26 | // Color structure 27 | // 28 | // TODO: Derive Color from FixedVector 29 | struct Color { 30 | byte r = 0; 31 | byte g = 0; 32 | byte b = 0; 33 | 34 | Color(); 35 | Color(const char *); // Hex constructor 36 | Color(const std::string &); // Hex constructor 37 | Color(byte = 0, byte = 0, byte = 0); // Value constructor 38 | 39 | uint32_t value() const; 40 | }; 41 | 42 | // Standard colors 43 | extern const Color RED; 44 | extern const Color GREEN; 45 | extern const Color BLUE; 46 | extern const Color YELLOW; 47 | extern const Color ORANGE; 48 | extern const Color CYAN; 49 | extern const Color WHITE; 50 | extern const Color BLACK; 51 | extern const Color GREY; 52 | 53 | /** 54 | * @brief A parametrized gradient class, from color A to B, and operating on a 55 | * range a to b. A value c in the range \f$[a, b]\f$ will equate to a color 56 | * appropriately in between A and B. 57 | * 58 | * This class can essentially be thought of as a slider from color A to color B 59 | * (with the slider value ranging from a to b). 60 | * 61 | * The reason we do not restrict a and b to 0 and 1 is to allow for more 62 | * meaningful values. For example, if the gradient is intended to represent 63 | * heat, the Celcius measurements in \f$[0, 100]\f$ are more meaningful to use than 64 | * are the values in \f$[0, 1]\f$. 65 | */ 66 | class Gradient { 67 | Color _base; 68 | 69 | long double _dr = 0; 70 | long double _dg = 0; 71 | long double _db = 0; 72 | 73 | long double _start = 0; 74 | long double _end = 0; 75 | public: 76 | Gradient(const Color &, const Color &, 77 | long double = 0, long double = 1); 78 | Gradient(const std::string &, const std::string &, 79 | long double = 0, long double = 1); 80 | 81 | Color get(long double); 82 | Color operator()(long double); 83 | 84 | /** 85 | * @brief Thrown in the \c get method if the passed value is out of the 86 | * bounds of the starting and ending values of the Gradient's scale. 87 | */ 88 | class bad_value : public std::runtime_error { 89 | public: 90 | bad_value(long double x) : std::runtime_error("Gradient value " 91 | + std::to_string(x) 92 | + " is out of bounds of the Gradient object's scale.") {} 93 | }; 94 | }; 95 | 96 | /** 97 | * @brief Represents an image. 98 | */ 99 | class Image : public Tensor { 100 | public: 101 | // Using declararations 102 | using pixel = std::pair ; 103 | 104 | Image(); // Default 105 | Image(size_t, size_t, size_t, byte = 0); // Value 106 | Image(size_t, size_t, size_t, const Color &); // Color 107 | Image(size_t, size_t, size_t, const std::string &); // Color 108 | Image(byte *, size_t, size_t, size_t = 1); // Contigous array 109 | Image(byte **, size_t, size_t, size_t); // List of rows 110 | Image(png_bytep *, size_t, size_t, size_t, size_t); // (Pretty much the same as above) 111 | 112 | Image(const Vector &, size_t, size_t); // Grayscale from vector 113 | 114 | size_t width() const; 115 | size_t height() const; 116 | size_t channels() const; 117 | 118 | // Pixel value setter 119 | void set(const pixel &, const Color &); // Color 120 | void set(const pixel &, size_t, byte); 121 | void set(const pixel &, const Vector &); 122 | 123 | void set_hex(const pixel &, size_t); 124 | void set_hex(const pixel &, const std::string &); 125 | 126 | // Pixel value getter 127 | uint32_t color(const pixel &) const; 128 | 129 | // Image extractors 130 | Image channel(size_t) const; 131 | Image crop(const pixel &, const pixel &) const; 132 | 133 | const unsigned char *const raw() const; 134 | 135 | unsigned char **row_bytes() const; 136 | 137 | #ifndef ZHP_NO_GUI 138 | 139 | int show() const; 140 | 141 | #endif 142 | 143 | class out_of_bounds {}; 144 | class bad_input_order {}; 145 | 146 | // Friends 147 | template 148 | friend class Convolution; 149 | protected: 150 | bool in_bounds(const pixel &) const; 151 | }; 152 | 153 | // Thrown when the file cannot be accessed (replace with std) 154 | class bad_file {}; 155 | 156 | // Thrown when the file being read is not in PNG format 157 | class bad_png {}; 158 | 159 | // Image loading and saving 160 | Image load_png(std::ifstream &); 161 | 162 | Image load_png(const char *); 163 | Image load_png(const std::string &); 164 | 165 | void save_png(const Image &, const char *); 166 | 167 | } 168 | 169 | // Literal operators 170 | image::Image operator""_png(const char *, size_t); 171 | 172 | } 173 | 174 | #endif 175 | -------------------------------------------------------------------------------- /include/io/print.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PRINT_H_ 2 | #define PRINT_H_ 3 | 4 | // Standard headers 5 | #include 6 | #include 7 | 8 | namespace zhetapi { 9 | 10 | namespace io { 11 | 12 | using Args = std::vector ; 13 | 14 | std::string table(const Args &, const std::vector &); 15 | 16 | } 17 | 18 | } 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /include/operand.hpp: -------------------------------------------------------------------------------- 1 | #ifndef OPERAND_H_ 2 | #define OPERAND_H_ 3 | 4 | // C++ headers 5 | #include 6 | 7 | // Engine headers 8 | #include "token.hpp" 9 | #include "core/raw_types.hpp" 10 | #include "core/common.hpp" 11 | 12 | // Macros to taste 13 | #define forward_ids(type) \ 14 | template <> \ 15 | uint8_t Operand ::id() const; 16 | 17 | namespace zhetapi { 18 | 19 | // Operand class 20 | template 21 | class Operand : public Token { 22 | T _val = T(); 23 | public: 24 | Operand(); 25 | Operand(const T &); 26 | Operand(const Operand &); 27 | 28 | Operand &operator=(const Operand &); 29 | 30 | T &get(); 31 | const T &get() const; 32 | 33 | void set(const T &); 34 | 35 | // Virtual functionss 36 | type caller() const override; 37 | std::string dbg_str() const override; 38 | Token *copy() const override; 39 | bool operator==(Token *) const override; 40 | }; 41 | 42 | // Constructors 43 | template 44 | Operand ::Operand () {} 45 | 46 | template 47 | Operand ::Operand(const T &data) : _val(data) {} 48 | 49 | template 50 | Operand ::Operand(const Operand &other) : _val(other._val) {} 51 | 52 | template 53 | Operand &Operand ::operator=(const Operand &other) 54 | { 55 | if (this != &other) 56 | _val = other._val; 57 | 58 | return *this; 59 | } 60 | 61 | // Getters and setters 62 | template 63 | T &Operand ::get() 64 | { 65 | return _val; 66 | } 67 | 68 | template 69 | const T &Operand ::get() const 70 | { 71 | return _val; 72 | } 73 | 74 | template 75 | void Operand ::set(const T &x) 76 | { 77 | _val = x; 78 | } 79 | 80 | // Virtual overrides 81 | template 82 | Token::type Operand ::caller() const 83 | { 84 | return opd; 85 | } 86 | 87 | template 88 | std::string Operand ::dbg_str() const 89 | { 90 | std::ostringstream oss; 91 | 92 | oss << _val; 93 | 94 | return oss.str(); 95 | } 96 | 97 | template 98 | Token *Operand ::copy() const 99 | { 100 | return new Operand(_val); 101 | } 102 | 103 | template 104 | bool Operand ::operator==(Token *tptr) const 105 | { 106 | Operand *opd = dynamic_cast (tptr); 107 | 108 | if (!opd) 109 | return false; 110 | 111 | return (opd->_val == _val); 112 | } 113 | 114 | // Forward declare specializations 115 | template <> 116 | std::string Operand ::dbg_str() const; 117 | 118 | template <> 119 | std::string Operand ::dbg_str() const; 120 | 121 | template <> 122 | std::string Operand > ::dbg_str() const; 123 | 124 | template <> 125 | std::string Operand > ::dbg_str() const; 126 | 127 | } 128 | 129 | #endif 130 | -------------------------------------------------------------------------------- /include/optimizer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef OPTIMIZER_H_ 2 | #define OPTIMIZER_H_ 3 | 4 | // Engine headers 5 | #include "dataset.hpp" 6 | #include "gradient.hpp" 7 | 8 | namespace zhetapi { 9 | 10 | namespace ml { 11 | 12 | // Optimizer class 13 | template 14 | class Optimizer { 15 | protected: 16 | T _eta = 0; 17 | size_t _size = 0; 18 | bool _switch = false; 19 | 20 | // Functions 21 | Optimizer(T); 22 | public: 23 | void register_size(size_t); 24 | void set_learning_rate(T); 25 | 26 | virtual Matrix *update( 27 | Matrix *, 28 | size_t) = 0; 29 | }; 30 | 31 | template 32 | Optimizer ::Optimizer(T lr) : _eta(lr) {} 33 | 34 | template 35 | void Optimizer ::register_size(size_t size) 36 | { 37 | if (_size != size) { 38 | _size = size; 39 | _switch = true; 40 | } else { 41 | _switch = false; 42 | } 43 | } 44 | 45 | template 46 | void Optimizer ::set_learning_rate(T lr) 47 | { 48 | _eta = lr; 49 | } 50 | 51 | } 52 | 53 | } 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /include/parametrization.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARAMETRIZATION_H_ 2 | #define PARAMETRIZATION_H_ 3 | 4 | // C++ headers 5 | #include 6 | 7 | // Engine headers 8 | #include "vector.hpp" 9 | 10 | namespace zhetapi { 11 | 12 | /** 13 | * @brief Represents the parametrization of a curve. With parameters T, P and F, 14 | * the value of the parametrization at any value t (of type T) is \f$F(P(t))\f$ 15 | * 16 | * Mathematically, given that T is a set of some quantity representing a scalar, 17 | * then 18 | * 19 | * \f$P: T \to T^m\f$ 20 | * 21 | * and 22 | * 23 | * \f$F: T^m \to T^n.\f$ 24 | * 25 | * The parametrization function is thus the map \f$T \to T^m.\f$ 26 | * 27 | * Terminology: 28 | * 29 | * - The \b time of the parametrization is the value \f$t\f$ that ranges from 30 | * the starting and ending T values. 31 | * - The \b position of the parametrization, \f$P(t)\f$ is the value of the 32 | * secondary transformation at a current time. 33 | * - The \b value of the parametrization, \f$F(P(t))\f$ is the value of the 34 | * primary transformation at the value of the secondary transformation at the 35 | * current time. 36 | * 37 | * @tparam T the basic operating type. 38 | * @tparam P the type of the single variable vector function (the secondary 39 | * transformation). 40 | * @tparam F the type of the multivariable vector function (the primary 41 | * transformation). 42 | */ 43 | template 44 | class Parametrization { 45 | T _start; 46 | T _end; 47 | 48 | T _pos; // Current position 49 | T _inc; // Increment 50 | 51 | P _par; // Single variable vector function 52 | F _ftn; // Multivariable vector function 53 | public: 54 | Parametrization(F, P, T, T); 55 | 56 | Vector value() const; // Value at position 57 | Vector pos() const; // Vector position 58 | T time() const; 59 | 60 | Vector dpos() const; // Derivative of vector position 61 | 62 | bool step(); // Step through the parametrization 63 | 64 | void reset(); // Reset the position 65 | 66 | static const size_t partition_size; 67 | }; 68 | 69 | /** 70 | * @brief The default number of steps that a parametrization will undergo. 71 | * Default value is 1000. 72 | */ 73 | template 74 | const size_t Parametrization ::partition_size = 1000; 75 | 76 | /** 77 | * @brief Constructs a parametrization with given primary and secondary 78 | * transformations, and the bounds of the parametrization. The state of the 79 | * parametrization is set to the start. 80 | * 81 | * @param ftn the primary transformation. 82 | * @param par the secondary transformation. 83 | * @param start the beginning value of the parametrization. 84 | * @param end the ending value of the parametrization. 85 | */ 86 | template 87 | Parametrization ::Parametrization(F ftn, P par, T start, T end) 88 | : _ftn(ftn), _par(par), 89 | _start(start), _end(end), 90 | _pos(start) 91 | { 92 | _inc = (end - start) / (T) partition_size; 93 | } 94 | 95 | /** 96 | * @return the value of the parametrization at the current time. 97 | */ 98 | template 99 | Vector Parametrization ::value() const 100 | { 101 | return _ftn(_par(_pos)); 102 | } 103 | 104 | /** 105 | * @return the position of the parametrization at the current time. 106 | */ 107 | template 108 | Vector Parametrization ::pos() const 109 | { 110 | return _par(_pos); 111 | } 112 | 113 | /** 114 | * @return the current time. 115 | */ 116 | template 117 | T Parametrization ::time() const 118 | { 119 | return _pos; 120 | } 121 | 122 | /** 123 | * @brief The derivative of the secondary transformation, evaluated using a 124 | * forward difference. 125 | * 126 | * @return the derivative of the secondary transformation at the current time. 127 | */ 128 | template 129 | Vector Parametrization ::dpos() const 130 | { 131 | return (_par(_pos + _inc) - _par(_pos))/_inc; 132 | } 133 | 134 | /** 135 | * @brief Increments the current time of the parametrization according to the 136 | * static variable \p partition_size. 137 | * 138 | * @return \c true if the parametrization has reached the end of the its bounds 139 | * and \c false otherwise. 140 | */ 141 | template 142 | bool Parametrization ::step() 143 | { 144 | // TODO: allow multiple steps at once 145 | _pos += _inc; 146 | 147 | // Cycle back to the start; 148 | if (_pos > _end) { 149 | _pos = _start; 150 | 151 | return true; 152 | } 153 | 154 | return false; 155 | } 156 | 157 | /** 158 | * @brief Resets the current time of the parametrization to the starting time. 159 | */ 160 | template 161 | void Parametrization ::reset() 162 | { 163 | _pos = _start; 164 | } 165 | 166 | } 167 | 168 | #endif 169 | -------------------------------------------------------------------------------- /include/range.hpp: -------------------------------------------------------------------------------- 1 | #ifndef RANGE_H_ 2 | #define RANGE_H_ 3 | 4 | // Standard headers 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace zhetapi { 13 | 14 | // Arithmetic kernel for comparison and arithmetic operations 15 | // TODO: should this go in a separate file? 16 | template ::type> 17 | struct arithmetic_kernel { 18 | // Equality with tolerance 19 | static bool eq(T a, T b, T tolerance = std::numeric_limits ::epsilon()) { 20 | return std::abs(a - b) <= tolerance; 21 | } 22 | 23 | // Modulus 24 | static T mod(T a, T b) { 25 | return std::fmod(a, b); 26 | } 27 | }; 28 | 29 | template 30 | struct arithmetic_kernel { 31 | // Equality with tolerance 32 | static bool eq(T a, T b, T tolerance = std::numeric_limits ::epsilon()) { 33 | return a == b; 34 | } 35 | 36 | // Modulus 37 | static T mod(T a, T b) { 38 | return a % b; 39 | } 40 | }; 41 | 42 | // Range class 43 | template 44 | struct Range { 45 | T start; 46 | T term; 47 | T step; 48 | 49 | // Iterator type 50 | struct iterator { 51 | T value; 52 | T step; 53 | 54 | // Constructor 55 | iterator(T value, T step) : value(value), step(step) {} 56 | 57 | // Pre-increment 58 | iterator& operator++() { 59 | value += step; 60 | return *this; 61 | } 62 | 63 | // Post-increment 64 | iterator operator++(int) { 65 | iterator tmp(*this); 66 | value += step; 67 | return tmp; 68 | } 69 | 70 | // Pre-decrement 71 | iterator& operator--() { 72 | value -= step; 73 | return *this; 74 | } 75 | 76 | // Post-decrement 77 | iterator operator--(int) { 78 | iterator tmp(*this); 79 | value -= step; 80 | return tmp; 81 | } 82 | 83 | // Dereference 84 | T &operator*() { 85 | return value; 86 | } 87 | 88 | // Comparison operators 89 | bool operator==(const iterator &other) const { 90 | static T eps = T(100) * std::numeric_limits::epsilon(); 91 | if (std::is_floating_point ::value) 92 | return std::fabs(value - other.value) < eps; 93 | 94 | return value == other.value; 95 | } 96 | 97 | bool operator!=(const iterator& other) const { 98 | return !(*this == other); 99 | } 100 | 101 | bool operator<(const iterator& other) const { 102 | return value < other.value; 103 | } 104 | 105 | bool operator<=(const iterator& other) const { 106 | return value <= other.value; 107 | } 108 | 109 | bool operator>(const iterator& other) const { 110 | return value > other.value; 111 | } 112 | 113 | bool operator>=(const iterator& other) const { 114 | return value >= other.value; 115 | } 116 | }; 117 | 118 | // Constructors 119 | template ::value> 120 | Range(T pstart, T pterm, T pstep = T(1)) 121 | : start(pstart), term(pterm), step(pstep) {} 122 | 123 | // Default is an empty range 124 | template ::value> 125 | Range(T pterm = T(0)) 126 | : start(T(0)), term(pterm), step(T(1)) {} 127 | 128 | // Compute range at a given number of steps 129 | T compute(size_t nsteps) const { 130 | return start + static_cast (step) * nsteps; 131 | } 132 | 133 | T operator()(size_t nsteps) const { 134 | return compute(nsteps); 135 | } 136 | 137 | // Compose two ranges 138 | Range operator()(const Range &other) const { 139 | T a = start + step * other.start; 140 | T b = step * other.step; 141 | T c = std::min(term, other.term); 142 | 143 | return Range(a, c, b); 144 | } 145 | 146 | // Properties 147 | T length() const { 148 | return (term - start); 149 | } 150 | 151 | size_t size() const { 152 | // T rem = std::modulus {}.((term - start), step); 153 | T rem = arithmetic_kernel ::mod(term - start, step); 154 | return (rem == 0) ? length() / step 155 | : length() / step + 1; 156 | } 157 | 158 | // Iterators 159 | iterator begin() const { 160 | return iterator(start, step); 161 | } 162 | 163 | iterator end() const { 164 | return iterator(term, step); 165 | } 166 | 167 | // Boolean operators 168 | // TODO: account for eps in comparison 169 | bool operator==(const Range& other) const { 170 | return (start == other.start) 171 | && (term == other.term) 172 | && (step == other.step); 173 | } 174 | 175 | bool operator!=(const Range& other) const { 176 | return !(*this == other); 177 | } 178 | 179 | // All the elements 180 | static const Range all; 181 | }; 182 | 183 | // All for tensor slicing 184 | extern Range all; 185 | 186 | // Printing 187 | template 188 | std::ostream &operator<<(std::ostream &os, const Range &range) 189 | { 190 | return os << "(" << range.start << ", " << range.term 191 | << ", " << range.step << ")"; 192 | } 193 | 194 | } 195 | 196 | #endif 197 | -------------------------------------------------------------------------------- /include/rational.hpp: -------------------------------------------------------------------------------- 1 | #ifndef RATIONAL_H_ 2 | #define RATIONAL_H_ 3 | 4 | // C/C++ headers 5 | #include 6 | #include 7 | 8 | /** 9 | * @brief Represents the Rational 10 | * number a/b where a and b are 11 | * both of type T. 12 | */ 13 | template 14 | class Rational { 15 | public: 16 | class non_integral_type {}; 17 | private: 18 | T a; 19 | T b; 20 | public: 21 | Rational(T = 0, T = 1); 22 | 23 | operator bool() const; 24 | 25 | explicit operator double() const; 26 | 27 | bool is_inf() const; 28 | 29 | /* Mathematical Operators - Members */ 30 | Rational &operator+=(const Rational &); 31 | Rational &operator-=(const Rational &); 32 | Rational &operator*=(const Rational &); 33 | Rational &operator/=(const Rational &); 34 | 35 | /* Mathematical Operators - Non-Members */ 36 | template 37 | friend Rational operator+(const Rational &, const Rational &); 38 | 39 | template 40 | friend Rational operator-(const Rational &, const Rational &); 41 | 42 | template 43 | friend Rational operator*(const Rational &, const Rational &); 44 | 45 | template 46 | friend Rational operator/(const Rational &, const Rational &); 47 | 48 | /* Boolean Operators - Non Members */ 49 | template 50 | friend bool operator==(const Rational &, const Rational &); 51 | 52 | template 53 | friend bool operator!=(const Rational &, const Rational &); 54 | 55 | template 56 | friend bool operator>(const Rational &, const Rational &); 57 | 58 | template 59 | friend bool operator<(const Rational &, const Rational &); 60 | 61 | template 62 | friend bool operator>=(const Rational &, const Rational &); 63 | 64 | template 65 | friend bool operator<=(const Rational &, const Rational &); 66 | 67 | template 68 | friend Rational abs(const Rational &); 69 | 70 | /* Output Functions */ 71 | template 72 | friend std::ostream &operator<<(std::ostream &, const Rational &); 73 | private: 74 | void simplify(); 75 | 76 | static T gcd(T, T); 77 | }; 78 | 79 | ////////////////////////////////////////// 80 | // Constructors 81 | ////////////////////////////////////////// 82 | 83 | template 84 | Rational ::Rational(T p, T q) : a(p), b(q) 85 | { 86 | if (!std::is_integral ::value) 87 | throw non_integral_type(); 88 | 89 | simplify(); 90 | } 91 | 92 | ////////////////////////////////////////// 93 | // Conversion Operators 94 | ////////////////////////////////////////// 95 | 96 | template 97 | Rational ::operator bool() const 98 | { 99 | return a != 0; 100 | } 101 | 102 | template 103 | Rational ::operator double() const 104 | { 105 | return (double) a / (double) b; 106 | } 107 | 108 | template 109 | bool Rational ::is_inf() const 110 | { 111 | return b == 0; 112 | } 113 | 114 | ////////////////////////////////////////// 115 | // Arithmetic Operators 116 | ////////////////////////////////////////// 117 | 118 | template 119 | Rational &Rational ::operator+=(const Rational &other) 120 | { 121 | a = a * other.b + b * other.a; 122 | b *= other.b; 123 | 124 | simplify(); 125 | 126 | return *this; 127 | } 128 | 129 | template 130 | Rational &Rational ::operator-=(const Rational &other) 131 | { 132 | a = a * other.b - b * other.a; 133 | b *= other.b; 134 | 135 | simplify(); 136 | 137 | return *this; 138 | } 139 | 140 | template 141 | Rational &Rational ::operator*=(const Rational &other) 142 | { 143 | using namespace std; 144 | 145 | a *= other.a; 146 | b *= other.b; 147 | 148 | simplify(); 149 | 150 | return *this; 151 | } 152 | 153 | template 154 | Rational &Rational ::operator/=(const Rational &other) 155 | { 156 | a *= other.b; 157 | b *= other.a; 158 | 159 | simplify(); 160 | 161 | return *this; 162 | } 163 | 164 | template 165 | Rational operator+(const Rational &a, const Rational &b) 166 | { 167 | Rational out = a; 168 | 169 | out += b; 170 | 171 | return out; 172 | } 173 | 174 | template 175 | Rational operator-(const Rational &a, const Rational &b) 176 | { 177 | Rational out = a; 178 | 179 | out -= b; 180 | 181 | return out; 182 | } 183 | 184 | template 185 | Rational operator*(const Rational &a, const Rational &b) 186 | { 187 | Rational out = a; 188 | 189 | out *= b; 190 | 191 | return out; 192 | } 193 | 194 | template 195 | Rational operator/(const Rational &a, const Rational &b) 196 | { 197 | Rational out = a; 198 | 199 | out /= b; 200 | 201 | return out; 202 | } 203 | 204 | ////////////////////////////////////////// 205 | // Boolean Operators 206 | ////////////////////////////////////////// 207 | 208 | template 209 | bool operator==(const Rational &a, const Rational &b) 210 | { 211 | return (a.a == b.a) && (a.b == b.b); 212 | } 213 | 214 | template 215 | bool operator!=(const Rational &a, const Rational &b) 216 | { 217 | return !(a == b); 218 | } 219 | 220 | template 221 | bool operator>(const Rational &a, const Rational &b) 222 | { 223 | return (a.a * b.b) > (a.b * b.a); 224 | } 225 | 226 | template 227 | bool operator<(const Rational &a, const Rational &b) 228 | { 229 | return (a.a * b.b) < (a.b * b.a); 230 | } 231 | 232 | template 233 | bool operator>=(const Rational &a, const Rational &b) 234 | { 235 | return (a == b) || (a > b); 236 | } 237 | 238 | template 239 | bool operator<=(const Rational &a, const Rational &b) 240 | { 241 | return (a == b) || (a < b); 242 | } 243 | 244 | ////////////////////////////////////////// 245 | // I/O Functions 246 | ////////////////////////////////////////// 247 | 248 | template 249 | std::ostream &operator<<(std::ostream &os, const Rational &rat) 250 | { 251 | if (rat.a == 0) 252 | os << 0; 253 | else if (rat.b == 1) 254 | os << rat.a; 255 | else 256 | os << rat.a << "/" << rat.b; 257 | 258 | return os; 259 | } 260 | 261 | ////////////////////////////////////////// 262 | // Private Methods 263 | ////////////////////////////////////////// 264 | 265 | template 266 | void Rational ::simplify() 267 | { 268 | if (b < 0) { 269 | a *= -1; 270 | b *= -1; 271 | } 272 | 273 | T tmp = gcd(a, b); 274 | 275 | a /= tmp; 276 | b /= tmp; 277 | } 278 | 279 | template 280 | T Rational ::gcd(T a, T b) 281 | { 282 | if (a == 0 || b == 0) 283 | return 1; 284 | 285 | a = std::abs(a); 286 | b = std::abs(b); 287 | 288 | if (a > b) 289 | std::swap(a, b); 290 | 291 | while (b % a != 0) { 292 | b %= a; 293 | 294 | if (a > b) 295 | std::swap(a, b); 296 | } 297 | 298 | return std::min(a, b); 299 | } 300 | 301 | // Extra functions 302 | 303 | template 304 | Rational abs(const Rational &a) 305 | { 306 | if (a < Rational {0, 1}) 307 | return {-a.a, a.b}; 308 | 309 | return a; 310 | } 311 | 312 | #endif 313 | -------------------------------------------------------------------------------- /include/registration.hpp: -------------------------------------------------------------------------------- 1 | #ifndef REGISTRATION_H_ 2 | #define REGISTRATION_H_ 3 | 4 | // C/C++ headers 5 | #include 6 | #include 7 | #include 8 | 9 | // Engine headers 10 | #include "core/functor.hpp" 11 | 12 | // TODO: separate casting from registration 13 | namespace zhetapi { 14 | 15 | class Registrable : public Functor { 16 | public: 17 | using Evaluator = std::function &)>; 18 | private: 19 | Evaluator _ftn; 20 | 21 | std::string _ident; 22 | public: 23 | Registrable(); 24 | Registrable(const Registrable &); 25 | Registrable(const std::string &, Evaluator); 26 | 27 | // TODO: get rid of this 28 | Token *operator()(const std::vector &) const; 29 | 30 | Token *evaluate(Engine *, const std::vector &) override; 31 | 32 | std::string dbg_str() const override; 33 | type caller() const override; 34 | Token *copy() const override; 35 | bool operator==(Token *) const override; 36 | }; 37 | 38 | } 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /include/sparse.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SPARSE_H_ 2 | #define SPARSE_H_ 3 | 4 | // C++ headers 5 | #include 6 | 7 | // Engine headers 8 | #include "matrix.hpp" 9 | 10 | namespace zhetapi { 11 | 12 | template 13 | class SparseMatrix { 14 | struct elem { 15 | elem * _next = nullptr; // Next element 16 | size_t _ci = 0; // Column index 17 | }; 18 | 19 | elem ** _rows = nullptr; 20 | public: 21 | SparseMatrix(); 22 | SparseMatrix(const Matrix &, T); 23 | }; 24 | 25 | template 26 | SparseMatrix ::SparseMatrix() {} 27 | 28 | template 29 | SparseMatrix ::SparseMatrix(const Matrix &mat, T exc) 30 | { 31 | size_t rs = mat.get_rows(); 32 | size_t cs = mat.get_cols(); 33 | 34 | _rows = new elem[rs]; 35 | for (size_t i = 0; i < rs; i++) { 36 | _rows[i] = new elem; 37 | 38 | for (size_t i = 0; i < cs; i++) { 39 | 40 | } 41 | } 42 | } 43 | 44 | } 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /include/std/activation_derivatives.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ACTIVATION_DERIVATIVES_H_ 2 | #define ACTIVATION_DERIVATIVES_H_ 3 | 4 | // TODO: put back in std 5 | namespace zhetapi { 6 | 7 | namespace ml { 8 | 9 | template 10 | class _DLinear : public Activation { 11 | T _alpha; 12 | public: 13 | __cuda_dual__ 14 | explicit _DLinear(const T &alpha = T(1)) : _alpha(alpha) {} 15 | 16 | __cuda_dual__ 17 | Vector compute(const Vector &x) const { 18 | return Vector (x.size(), _alpha); 19 | } 20 | }; 21 | 22 | // ReLU activation class 23 | template 24 | class _DReLU : public Activation { 25 | public: 26 | __cuda_dual__ 27 | Vector compute(const Vector &x) const { 28 | /* T *arr = new T[x.size()]; 29 | for (size_t i = 0; i < x.size(); i++) { 30 | arr[i] = (x[i] > 0) ? 1 : 0; 31 | } 32 | return Vector (x.size(), arr, false); */ 33 | return Vector (x.size(), 34 | [&](size_t i) { 35 | return (x[i] > 0) ? 1 : 0; 36 | } 37 | ); 38 | } 39 | }; 40 | 41 | // Leaky ReLU activation class 42 | template 43 | class _DLeakyReLU : public Activation { 44 | T _alpha; 45 | public: 46 | _DLeakyReLU(const T &alpha = 1) : _alpha(alpha) {} 47 | 48 | Vector compute(const Vector &x) const { 49 | return Vector (x.size(), 50 | [&](size_t i) { 51 | return (x[i] < 0) ? _alpha : 1; 52 | } 53 | ); 54 | } 55 | }; 56 | 57 | // Sigmoid activation class 58 | template 59 | class _DSigmoid : public Activation { 60 | public: 61 | 62 | #ifndef ZHP_CUDA 63 | 64 | Vector compute(const Vector &x) const { 65 | return Vector (x.size(), 66 | [&](size_t i) { 67 | T tmp = 1.0/(1.0 + exp(-x[i])); 68 | 69 | return tmp * (T (1.0) - tmp); 70 | } 71 | ); 72 | } 73 | 74 | #else 75 | 76 | _host_ _device_ 77 | Vector compute(const Vector &x) const { 78 | return Vector (x.size(), 79 | [x] _host_ _device_ (size_t i) { 80 | T tmp = 1.0/(1.0 + exp(-x[i])); 81 | 82 | return tmp * (T (1.0) - tmp); 83 | } 84 | ); 85 | } 86 | 87 | #endif 88 | 89 | }; 90 | 91 | // Scaled Sigmoid activation class 92 | template 93 | class _DScaledSigmoid : public Activation { 94 | T _alpha; 95 | public: 96 | _DScaledSigmoid(const T &alpha) : _alpha(alpha) {} 97 | 98 | Vector compute(const Vector &x) const { 99 | return Vector (x.size(), [&](size_t i) {return 100 | _d_scaled_sigmoid(x[i], _alpha);}); 101 | } 102 | }; 103 | 104 | // Probability activation class 105 | template 106 | class _DSoftmax : public Activation { 107 | public: 108 | Vector compute(const Vector &x) const { 109 | // Subtract by max for numerical stability 110 | T _max = x[0]; 111 | for (size_t i = 1; i < x.size(); i++) 112 | _max = (_max > x[i]) ? _max : x[i]; 113 | 114 | T _sum = 0; 115 | for (size_t i = 0; i < x.size(); i++) 116 | _sum += exp(x[i] - _max); 117 | 118 | return Vector (x.size(), 119 | [&](size_t i) { 120 | return exp(x[i] - _max) 121 | * (_sum - exp(x[i] - _max)) 122 | / (_sum * _sum); 123 | } 124 | ); 125 | } 126 | }; 127 | 128 | } 129 | 130 | } 131 | 132 | #endif 133 | -------------------------------------------------------------------------------- /include/std/calculus.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CALCULUS_H_ 2 | #define CALCULUS_H_ 3 | 4 | // C/C++ headers 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // Engine headers 11 | #include "../vector.hpp" 12 | #include "../parametrization.hpp" 13 | 14 | namespace zhetapi { 15 | 16 | namespace utility { 17 | 18 | // Single variable, scalar integration of f over [a, b] 19 | template 20 | T sv_integral(F f, T a, T b, size_t partition_size = 1000) 21 | { 22 | T step = (b - a)/partition_size; 23 | 24 | // Use a more accurate method later (Simpon's rule, etc.) 25 | T sum = 0; 26 | for (T x = a; x <= b; x += step) 27 | sum += f(x) * step; 28 | 29 | return sum; 30 | } 31 | 32 | // Euler's method for single variable scalar functions 33 | template 34 | T eulers_method(Df df, Vector given, T x, size_t partition_size = 1000) 35 | { 36 | T step = (x - given[0])/partition_size; 37 | 38 | T stop = x; 39 | while (fabs(given[0] - stop) > 1e-10) { 40 | given[1] += df(given[0]) * step; 41 | given[0] += step; 42 | } 43 | 44 | return given[1]; 45 | } 46 | 47 | template 48 | T line_integral(Parametrization prm) 49 | { 50 | // Always start from the beginning 51 | prm.reset(); 52 | 53 | T sum = 0; 54 | 55 | do { 56 | sum += inner(prm.value(), prm.dpos()); 57 | } while (prm.step()); 58 | 59 | return sum; 60 | } 61 | 62 | /** 63 | * @brief Solves the homogenous linear differential 64 | * equation (with constant coefficients) whose coefficients 65 | * are represented by the polynomial that is passed into the 66 | * function. 67 | * 68 | * @tparam T Represents the scalar field; the complex roots of 69 | * the polynomial are AUTOMATICALLY generated from this function. 70 | * 71 | * @return out Represents the basis of functions such that a 72 | * linear combination of the functions is a solution to the homogenous 73 | * linear differential equation with constant coefficients. 74 | 75 | template 76 | std::vector solve_hlde_constant(const polynomial > &p, 77 | size_t rounds = 10000, const Complex &eps = 1E-100L, 78 | const Complex &start = {0.4, 0.9}) 79 | { 80 | std::vector > roots = p.roots(rounds, eps, start); 81 | 82 | std::vector >> out; 83 | 84 | std::vector > inserted; 85 | 86 | table > tbl { 87 | Variable > {"e", false, exp(1)} 88 | }; 89 | 90 | for (auto vl : roots) { 91 | if (vl == Complex {0, 0}) 92 | continue; 93 | 94 | auto itr = ::std::find_if(inserted.begin(), inserted.end(), [&](const Complex &a) { 95 | return pow(norm(vl - a), 10.5) < norm(eps); 96 | }); 97 | 98 | if (itr != inserted.end()) { 99 | size_t deg = ::std::count_if(inserted.begin(), inserted.end(), [&](const Complex &a) { 100 | return pow(norm(vl - a), 10.5) < norm(eps); 101 | }); 102 | 103 | if (vl.is_real()) { 104 | out.push_back({"f", {"x"}, "x^" + ::std::to_string(deg) + " * e^(" 105 | + ::std::to_string(vl.real()) + " * x)", tbl}); 106 | } else { 107 | out.push_back({"f", {"x"}, "x^" + ::std::to_string(deg) + " * e^(" 108 | + ::std::to_string(vl.real()) + " * x)" + " * cos(" 109 | + ::std::to_string(vl.imag()) + " * x)", tbl}); 110 | } 111 | } else { 112 | inserted.push_back(vl); 113 | 114 | if (vl.is_real()) { 115 | out.push_back({"f", {"x"}, "e^(" + ::std::to_string(vl.real()) + " * x)", tbl}); 116 | } else { 117 | out.push_back({"f", {"x"}, "e^(" + ::std::to_string(vl.real()) + " * x)" 118 | + " * cos(" + ::std::to_string(vl.imag()) + " * x)", tbl}); 119 | } 120 | } 121 | } 122 | 123 | size_t deg = ::std::count(roots.begin(), roots.end(), Complex {0, 0}); 124 | 125 | if (deg > 0) 126 | out.push_back({"f", {"x"}, "x^" + ::std::to_string(deg - 1), tbl}); 127 | 128 | return out; 129 | } */ 130 | 131 | } 132 | 133 | } 134 | 135 | #endif 136 | -------------------------------------------------------------------------------- /include/std/combinatorial.hpp: -------------------------------------------------------------------------------- 1 | #ifndef COMBINATORIAL_H_ 2 | #define COMBINATORIAL_H_ 3 | 4 | // C/C++ headers 5 | #include 6 | #include 7 | 8 | // Engine headers 9 | #include "../rational.hpp" 10 | 11 | namespace zhetapi { 12 | 13 | namespace utility { 14 | 15 | ////////////////////////////////////////// 16 | // Exceptions 17 | ////////////////////////////////////////// 18 | 19 | /** 20 | * @brief Exception for asserting that 21 | * a certain input be non-negative. 22 | */ 23 | class negative_block_exception {}; 24 | 25 | /** 26 | * @brief Exception for asserting that 27 | * a certain input be strictly positive. 28 | */ 29 | class positive_flow_exception {}; 30 | 31 | ////////////////////////////////////////// 32 | // Perliminary Helper Functions 33 | ////////////////////////////////////////// 34 | 35 | /** 36 | * @brief Factorial function for integral 37 | * types to avoid the usage of the 38 | * tgamma library function. 39 | */ 40 | template 41 | T integral_factorial(T n) 42 | { 43 | T val = 1; 44 | for (T i = 1; i <= n; i++) 45 | val *= i; 46 | 47 | return val; 48 | } 49 | 50 | /** 51 | * @brief Falling factorial function, available 52 | * for all types that support the arithmetic 53 | * operations. 54 | */ 55 | template 56 | T falling_power(T n, T k) 57 | { 58 | T val = 1; 59 | for (T i = 0; i < k; i++) 60 | val *= (n - i); 61 | 62 | return val; 63 | } 64 | 65 | ////////////////////////////////////////// 66 | // Binomial Coefficients 67 | ////////////////////////////////////////// 68 | 69 | /** 70 | * @brief General binomial, computed using the falling 71 | * factorial and the gamma function. The gamma 72 | * function defaults to the library function 73 | * tgamma. 74 | * 75 | * @param gamma Used gamma function, defaults to ::std::tgamma 76 | */ 77 | template 78 | T binom(T n, T k, T (*gamma)(T) = ::std::tgamma) 79 | { 80 | return falling_power(n, k) / gamma(k + 1); 81 | } 82 | 83 | /** 84 | * @brief Integral binomial, utilizes the falling 85 | * power function as well as the integral factorial 86 | * function. 87 | */ 88 | template 89 | T integral_binom(T n, T k) 90 | { 91 | return falling_power(n, k) / integral_factorial(k); 92 | } 93 | 94 | ////////////////////////////////////////// 95 | // Binomial Coefficients 96 | ////////////////////////////////////////// 97 | 98 | /** 99 | * @brief The Euclidian algorithm for determining 100 | * the GCD (Greatest Common Divisor) of two 101 | * numbers. Includes overhead from the passed 102 | * function. 103 | */ 104 | template 105 | T gcd(T a, T b, T (*mod)(T, T) = std::fmod, T eps = 0) 106 | { 107 | if (a == 0 || b == 0) 108 | return 1; 109 | 110 | a = std::abs(a); 111 | b = std::abs(b); 112 | 113 | if (a > b) 114 | std::swap(a, b); 115 | 116 | while (std::abs(mod(b, a)) != 0) { 117 | b = mod(b, a); 118 | 119 | if (a > b) 120 | std::swap(a, b); 121 | } 122 | 123 | return std::min(a, b); 124 | } 125 | 126 | /** 127 | * @brief The LCM (Lowest Commmon Multiple), 128 | * algorithm which uses the fact that 129 | * (a, b) * [a, b] = ab. Includes overhead 130 | * from the modulus function which is passed. 131 | */ 132 | template 133 | T lcm(T a, T b, T(*mod)(T, T) = std::fmod, T eps = 0) 134 | { 135 | return a * b / gcd(a, b, mod, eps); 136 | } 137 | 138 | /** 139 | * @brief Bernoulli sequence generator, generates 140 | * a list (array, vector) of the first arbitrary 141 | * number of bernoulli numbers. Uses the general 142 | * binomial function. 143 | */ 144 | template 145 | std::vector bernoulli_sequence_real(T n, T (*gamma)(T) = ::std::tgamma) 146 | { 147 | ::std::vector ibs = {1}; 148 | 149 | T tmp; 150 | for (T i = 1; i <= n; i++) { 151 | tmp = 0; 152 | 153 | if (i == 1) { 154 | ibs.push_back(-0.5); 155 | continue; 156 | } 157 | 158 | if (::std::fmod(i, 2) == 1) { 159 | ibs.push_back(0); 160 | continue; 161 | } 162 | 163 | for (T j = 0; j < i; j++) 164 | tmp += binom(i + 1, j, gamma) * ibs[j]; 165 | 166 | ibs.push_back(-tmp/(i + 1)); 167 | } 168 | 169 | return ibs; 170 | } 171 | 172 | /** 173 | * @brief Returns the specified Bernoulli number, 174 | * using the Bernoulli sequence generator. 175 | */ 176 | template 177 | T bernoulli_number_real(T n, T (*gamma)(T) = ::std::tgamma) 178 | { 179 | if (n <= 0) 180 | throw positive_flow_exception(); 181 | 182 | return bernoulli_sequence_real(n, gamma)[n - 1]; 183 | } 184 | 185 | /** 186 | * @brief Rational equivalent of the real Bernoulli 187 | * sequence generator, only that a list of Rational 188 | * numbers are returned. Should be used when precision 189 | * is wished to be kept. Note that if the returned 190 | * sequence appears to be incorrect, it is possible 191 | * that the range of the template parameter is too 192 | * small. 193 | */ 194 | template 195 | ::std::vector > bernoulli_sequence_rational(T n) 196 | { 197 | ::std::vector > ibs = {{1, 1}}; 198 | 199 | Rational tmp; 200 | for (T i = 1; i <= n; i++) { 201 | tmp = {0, 1}; 202 | 203 | if (i == 1) { 204 | ibs.push_back({-1, 2}); 205 | continue; 206 | } 207 | 208 | if (i % 2 == 1) { 209 | ibs.push_back({0, 1}); 210 | continue; 211 | } 212 | 213 | for (T j = 0; j < i; j++) 214 | tmp += Rational {integral_binom(i + 1, j), 1} * ibs[j]; 215 | 216 | ibs.push_back(Rational {-1, (i + 1)} * tmp); 217 | } 218 | 219 | return ibs; 220 | } 221 | 222 | /** 223 | * @brief Return sthe specified Bernoulli number as a 224 | * Rational number using the Rational Bernoulli sequence 225 | * generator. 226 | */ 227 | template 228 | T bernoulli_number_rational(T n, T (*gamma)(T) = ::std::tgamma) 229 | { 230 | if (n <= 0) 231 | throw positive_flow_exception(); 232 | 233 | return bernoulli_sequence_rational(n)[n - 1]; 234 | } 235 | 236 | } 237 | 238 | } 239 | 240 | #endif 241 | -------------------------------------------------------------------------------- /include/std/erf_derivatives.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ERF_DERIVATIVES_H_ 2 | #define ERF_DERIVATIVES_H_ 3 | 4 | namespace zhetapi { 5 | 6 | namespace ml { 7 | 8 | // Squared error 9 | template 10 | class _DSE : public Erf { 11 | public: 12 | __cuda_dual__ 13 | Vector operator()(const Vector &comp, const Vector &in) const { 14 | return -T(2) * (comp - in); 15 | } 16 | }; 17 | 18 | // M squared error 19 | template 20 | class _DMSE : public Erf { 21 | public: 22 | __cuda_dual__ 23 | Vector operator()(const Vector &comp, const Vector &in) const { 24 | return -T(2)/T(comp.size()) * (comp - in); 25 | } 26 | }; 27 | 28 | } 29 | 30 | } 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /include/std/erfs.hpp: -------------------------------------------------------------------------------- 1 | #ifndef STD_ERFS_H_ 2 | #define STD_ERFS_H_ 3 | 4 | // Engine headers 5 | #include "../erf.hpp" 6 | #include "../std/erf_derivatives.hpp" 7 | 8 | // Engine CUDA headers 9 | #include "../cuda/essentials.cuh" 10 | 11 | namespace zhetapi { 12 | 13 | namespace ml { 14 | 15 | /* 16 | * All Erf classes have inlined member functions for the same 17 | * reason that the activation classes are inlined. Obscure naming is 18 | * also done for the same reason. 19 | */ 20 | 21 | template 22 | class SE : public Erf { 23 | public: 24 | __cuda_dual__ 25 | SE() { 26 | this->kind = Erf ::OPT_SE; 27 | } 28 | 29 | __cuda_dual__ 30 | Vector operator()(const Vector &comp, const Vector &in) const { 31 | Erf ::assert_size(comp, in); 32 | 33 | T sum = 0; 34 | 35 | for (size_t i = 0; i < comp.size(); i++) 36 | sum += (comp[i] - in[i]) * (comp[i] - in[i]); 37 | 38 | return Vector (1, sum); 39 | } 40 | 41 | __cuda_dual__ 42 | Erf *derivative() const 43 | { 44 | return new _DSE (); 45 | } 46 | }; 47 | 48 | template 49 | class MSE : public Erf { 50 | public: 51 | __cuda_dual__ 52 | MSE() { 53 | this->kind = Erf ::OPT_MSE; 54 | } 55 | 56 | __cuda_dual__ 57 | Vector operator()(const Vector &comp, const Vector &in) const { 58 | Erf ::assert_size(comp, in); 59 | 60 | T sum = 0; 61 | 62 | for (size_t i = 0; i < comp.size(); i++) 63 | sum += (comp[i] - in[i]) * (comp[i] - in[i]); 64 | 65 | return Vector (1, sum / T(comp.size())); 66 | } 67 | 68 | __cuda_dual__ 69 | Erf *derivative() const { 70 | return new _DMSE (); 71 | } 72 | }; 73 | 74 | // Copy base activations 75 | template 76 | __cuda_dual__ 77 | Erf *copy(Erf *opt) 78 | { 79 | switch (opt->kind) { 80 | case Erf ::OPT_Default: 81 | return new Erf (); 82 | case Erf ::OPT_SE: 83 | return new SE (); 84 | case Erf ::OPT_MSE: 85 | return new MSE (); 86 | } 87 | 88 | return nullptr; 89 | } 90 | 91 | } 92 | 93 | } 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /include/std/filters.hpp: -------------------------------------------------------------------------------- 1 | #ifndef STD_FILTERS_H_ 2 | #define STD_FILTERS_H_ 3 | 4 | // Engine headrers 5 | #include "filter.hpp" 6 | #include "matrix.hpp" 7 | #include "vector.hpp" 8 | #include "image.hpp" 9 | 10 | #include "std/initializers.hpp" 11 | 12 | namespace zhetapi { 13 | 14 | namespace ml { 15 | 16 | template 17 | class FeedForward : public Filter { 18 | Matrix _weight = Matrix (); 19 | 20 | Activation *_act = nullptr; 21 | Activation *_dact = nullptr; 22 | 23 | long double _dropout = 0; 24 | 25 | Vector _acache = Vector (); 26 | Vector _zcache = Vector (); 27 | 28 | // For batch inputs 29 | // Matrix _Acache; 30 | // Matrix _Zcache; 31 | public: 32 | // Input size, output size 33 | FeedForward(size_t isize, size_t osize, Activation *act, std::function init = RandomInitializer ()) 34 | : _weight(isize, osize + 1), // +1 for bias 35 | _act(act->copy()), 36 | _dact(act->derivative()) 37 | { 38 | _weight.randomize(init); 39 | } 40 | 41 | void propogate(const Pipe &in, Pipe &out) 42 | { 43 | // Slice the input (+1 for bias) 44 | Vector vin = (in[0]->cast_to_vector()).append_above(1); 45 | 46 | _acache = vin; 47 | 48 | Vector mul = _weight * vin; 49 | 50 | _zcache = _dact->compute(mul); 51 | 52 | // Send to output pipe 53 | *out[0] = _act->compute(mul); 54 | } 55 | 56 | void gradient(const Pipe &delin, Pipe &grads) 57 | { 58 | // TODO: Check sizes later 59 | 60 | // Move shur/stable shur to tensor base 61 | *delin[0] = shur(delin[0]->cast_to_vector(), _zcache); 62 | 63 | Matrix J = delin[0]->cast_to_vector() * _acache.transpose(); 64 | 65 | // Use the kernel function here 66 | *grads[0] = J; 67 | } 68 | 69 | void apply_gradient(const Pipe &grads) 70 | { 71 | // TODO: check with gradeint checking 72 | Matrix J = grads[0]->cast_to_matrix( 73 | _weight.get_rows(), 74 | _weight.get_cols()); 75 | 76 | _weight += J; 77 | } 78 | }; 79 | 80 | #define for_img(i, j, w, h) \ 81 | for (int i = 0; i < w; i++) { \ 82 | for (int j = 0; j < h; j++) 83 | 84 | // Assumes that the input tensor is an image 85 | template 86 | class Convolution : public Filter { 87 | Matrix _filter; 88 | size_t _dim; 89 | 90 | // Type aliases 91 | using byte = image::byte; 92 | using mbyte = Matrix ; 93 | using vbyte = Vector ; 94 | using vfilt = Vector ; 95 | public: 96 | Convolution(const Matrix &filter) 97 | : _filter(filter), 98 | _dim(filter.get_rows()) {} 99 | 100 | // Assume equal padding for now 101 | image::Image process(const image::Image &in, int depth = -1) { 102 | image::Image out = in; 103 | 104 | int w = in.width(); 105 | int h = in.height(); 106 | int c = in.channels(); 107 | 108 | // depth = c; 109 | // Choose color channels only 110 | if (depth < 0) 111 | depth = (c > 1) ? c - 1 : c; 112 | 113 | int n = (_dim - 1)/2; 114 | 115 | byte *data = in._array; 116 | 117 | using namespace std; 118 | for_img(x, y, w, h) { 119 | vbyte t(depth, byte(0)); 120 | 121 | int ymin = y - n; 122 | int ymax = y + n; 123 | 124 | int xmin = x - n; 125 | int xmax = x + n; 126 | 127 | Vector tmp(depth, T(0)); 128 | for (int k = 0; k < _dim; k++) { 129 | size_t ti = x + k - n; 130 | 131 | if (xmin + k < 0 || xmin + k >= h) 132 | continue; 133 | 134 | size_t off = ymin; 135 | size_t len = _dim; 136 | 137 | if (ymin < 0) { 138 | off = 0; 139 | len += ymin; 140 | } 141 | 142 | if (ymax >= w) 143 | len -= (ymax - w + 1); 144 | 145 | size_t i = c * ((x + k - n) * w + off); 146 | 147 | byte *img = &(data[i]); 148 | T *flt = &(_filter[k][off - ymin]); 149 | 150 | for (size_t ch = 0; ch < depth; ch++) { 151 | T s = 0; 152 | 153 | for (size_t i = 0; i < len; i++) 154 | s += flt[i] * ((T) img[i * c + ch]); 155 | 156 | tmp[ch] += s; 157 | } 158 | } 159 | 160 | for (size_t i = 0; i < depth; i++) 161 | t[i] = (tmp[i] > 0) ? tmp[i] : 0; 162 | 163 | out.set({x, y}, t); 164 | }} 165 | 166 | return out; 167 | } 168 | }; 169 | 170 | } 171 | 172 | } 173 | 174 | #endif 175 | -------------------------------------------------------------------------------- /include/std/functions.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FUNCTIONS_H_ 2 | #define FUNCTIONS_H_ 3 | 4 | // C++ headers 5 | #include 6 | 7 | #define FACTORIAL_BUFFER_SIZE 2000 8 | 9 | namespace zhetapi { 10 | 11 | namespace special { 12 | 13 | double ln_gamma(double); 14 | double ln_factorial(int); 15 | 16 | double poission(double, int); 17 | 18 | } 19 | 20 | } 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /include/std/initializers.hpp: -------------------------------------------------------------------------------- 1 | #ifndef STD_INITIALIZERS_H_ 2 | #define STD_INITIALIZERS_H_ 3 | 4 | #ifdef __AVR // Does not support AVR 5 | 6 | #include "avr/random.hpp" 7 | 8 | #else 9 | 10 | // C++ headers 11 | #include 12 | 13 | #endif // Does not support AVR 14 | 15 | namespace zhetapi { 16 | 17 | namespace ml { 18 | 19 | #ifdef __AVR 20 | 21 | template 22 | T RandomInitializer() { 23 | static avr::RandomEngine reng(16183); 24 | 25 | return reng.ldouble(); 26 | } 27 | 28 | #else 29 | 30 | template 31 | struct RandomInitializer { 32 | // Use interval later 33 | T operator()() { 34 | return T (0.5 - rand()/((double) RAND_MAX)); 35 | } 36 | }; 37 | 38 | #endif 39 | 40 | #ifndef __AVR // Does not support AVR 41 | 42 | std::random_device _rd; 43 | std::mt19937 _mt(_rd()); 44 | 45 | template 46 | struct LeCun { 47 | std::normal_distribution _dbt; 48 | public: 49 | explicit LeCun(size_t fan_in) 50 | : _dbt(0, sqrt(T(1) / fan_in)) {} 51 | 52 | T operator()() { 53 | return _dbt(_mt); 54 | } 55 | }; 56 | 57 | template 58 | struct He { 59 | std::normal_distribution _dbt; 60 | public: 61 | explicit He(size_t fan_in) 62 | : _dbt(0, sqrt(T(2) / fan_in)) {} 63 | 64 | T operator()() { 65 | return _dbt(_mt); 66 | } 67 | }; 68 | 69 | template 70 | struct Xavier { 71 | std::normal_distribution _dbt; 72 | public: 73 | explicit Xavier(size_t fan_avg) 74 | : _dbt(0, sqrt(T(1) / fan_avg)) {} 75 | 76 | T operator()() { 77 | return _dbt(_mt); 78 | } 79 | }; 80 | 81 | #endif 82 | 83 | } 84 | 85 | } 86 | 87 | #endif 88 | -------------------------------------------------------------------------------- /include/std/interval.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INTERVAL_H_ 2 | #define INTERVAL_H_ 3 | 4 | // C/C++ headers 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | // Engine headers 12 | #include "../fixed_vector.hpp" 13 | 14 | namespace zhetapi { 15 | 16 | // TODO: inspect the random-ness of the interval class 17 | namespace utility { 18 | 19 | extern std::random_device rd; 20 | 21 | // Typedefs for sanity 22 | using dre = std::mt19937; 23 | using udb = std::uniform_real_distribution ; 24 | 25 | // TODO: extend to long double 26 | 27 | // Keep inlined here for header only purposes 28 | struct disjoint { 29 | static dre gen; 30 | static udb distro; 31 | 32 | using pflt = std::pair ; 33 | 34 | double left = 0; 35 | double right = 0; 36 | bool closed = true; 37 | 38 | // c should represent compact instead of closed? 39 | disjoint(double l = 0.0, double r = 0.0, bool c = true) 40 | : left(l), right(r), closed(c) {} 41 | 42 | double length() const 43 | { 44 | return right - left; 45 | } 46 | 47 | pflt approximate() const 48 | { 49 | static double epsilon = 1e-10; 50 | 51 | if (closed) 52 | return {left, right}; 53 | 54 | return {left + epsilon, right - epsilon}; 55 | } 56 | 57 | // Use a real uniform distro later 58 | double uniform() const 59 | { 60 | return left + distro(gen) * (right - left); 61 | } 62 | 63 | // Check disjointed-ness 64 | bool is_disjoint(const disjoint &dj) const 65 | { 66 | // If either interval is greater, 67 | // then it must be disjoint 68 | return (*this > dj) || (*this < dj); 69 | } 70 | 71 | // The interval is completely to the left 72 | bool operator<(const disjoint &dj) const 73 | { 74 | pflt tapp = approximate(); 75 | pflt oapp = dj.approximate(); 76 | 77 | return (tapp.second < oapp.first) && (tapp.first < oapp.first); 78 | } 79 | 80 | // The interval is completely to the right 81 | bool operator>(const disjoint &dj) const 82 | { 83 | pflt tapp = approximate(); 84 | pflt oapp = dj.approximate(); 85 | 86 | return (tapp.second > oapp.second) && (tapp.first > oapp.second); 87 | } 88 | 89 | bool operator==(const disjoint &dj) const 90 | { 91 | return (left == dj.left) && (right == dj.right) && (closed == dj.closed); 92 | } 93 | }; 94 | 95 | // N is the number of dimensions 96 | // NOTE: for now multidim intervals 97 | // can only be one "box", see the TODO 98 | // below 99 | 100 | /** 101 | * @brief Random generator class, that can uniformly generated Vectors (or 102 | * scalars) with elements that are randomly sampled from a distribution 103 | * (currently each element can be sampled only from a uniform distribution 104 | * that is the union of disjoint intervals). 105 | * 106 | * @tparam N the dimension that the corresponding random Vectors should have. 107 | */ 108 | template 109 | class Interval { 110 | // TODO: this will not work, 111 | // we need a disjoint equivalent for N dimensions 112 | // (think about boxes as N-dim intervals) 113 | disjoint *axes = nullptr; 114 | public: 115 | Interval() : Interval(1.0L) {} 116 | 117 | Interval(long double x) { 118 | axes = new disjoint[N]; 119 | 120 | for (size_t i = 0; i < N; i++) 121 | axes[i] = disjoint(0, x, true); 122 | } 123 | 124 | FixedVector operator()() const { 125 | return uniform(); 126 | } 127 | 128 | FixedVector uniform() const { 129 | // First check that the axes are not null 130 | if (axes == nullptr) 131 | throw null_axes(); 132 | 133 | return FixedVector ( 134 | [&](size_t i) -> double { 135 | return axes[i].uniform(); 136 | }, N 137 | ); 138 | } 139 | 140 | template 141 | friend std::ostream &operator<<(std::ostream &, 142 | const Interval &); 143 | 144 | // Exceptions 145 | class null_axes : public std::runtime_error { 146 | public: 147 | null_axes() : std::runtime_error("Axes of Interval " 148 | " are null") {} 149 | }; 150 | }; 151 | 152 | // TODO: Switch from double to long double 153 | /** 154 | * @brief Single dimensional (scalar) random generator. Can sample uniformly 155 | * from a union of intervals. 156 | */ 157 | template <> 158 | class Interval <1> { 159 | // For random generation 160 | static dre gen; 161 | static udb distro; 162 | 163 | // Should always contain disjoint intervals 164 | std::set _union; 165 | 166 | // Assumes that the intervals in un are disjoint 167 | explicit Interval(const std::set &un) : _union(un) {} 168 | 169 | // Checks that the new 'disjoint' interval is indeed disjoint 170 | bool is_disjoint(const disjoint &djx) const { 171 | for (const disjoint &dj : _union) { 172 | if (!dj.is_disjoint(djx)) 173 | return false; 174 | } 175 | 176 | return true; 177 | } 178 | public: 179 | // Defaults to [0, 1] 180 | Interval() : Interval(1.0L) {} 181 | 182 | explicit Interval(unsigned long long int x) 183 | : Interval((long double) x) {} 184 | 185 | explicit Interval(long double x) 186 | : Interval(0, x) {} 187 | 188 | Interval(double left, double right, bool closed = true) { 189 | disjoint dj {left, right, closed}; 190 | 191 | _union.insert(_union.begin(), dj); 192 | } 193 | 194 | // Properties 195 | double size() const { 196 | double len = 0; 197 | 198 | for (disjoint dj : _union) 199 | len += dj.length(); 200 | 201 | return len; 202 | } 203 | 204 | operator bool() const { 205 | return size() > 0; 206 | } 207 | 208 | double operator()() const { 209 | return uniform(); 210 | } 211 | 212 | // Sampling 213 | double uniform() const { 214 | // TODO: Cover case where the interval is not closed 215 | double len = size(); 216 | 217 | double *db = new double[_union.size() + 1]; 218 | 219 | size_t i = 0; 220 | 221 | db[i++] = 0; 222 | for (disjoint dj : _union) { 223 | db[i] = db[i - 1] + dj.length()/len; 224 | 225 | i++; 226 | } 227 | 228 | double rnd = distro(gen); 229 | 230 | for (i = 0; i < _union.size(); i++) { 231 | if ((rnd > db[i]) && (rnd < db[i + 1])) 232 | break; 233 | } 234 | 235 | delete[] db; 236 | 237 | auto itr = _union.begin(); 238 | 239 | std::advance(itr, i); 240 | 241 | return itr->uniform(); 242 | } 243 | 244 | // Operations 245 | Interval &operator|=(const Interval &itv) { 246 | auto iset = itv._union; 247 | 248 | using namespace std; 249 | 250 | // Check for disjointed-ness 251 | for (const disjoint &dj : iset) { 252 | if (is_disjoint(dj)) 253 | _union.insert(_union.begin(), dj); 254 | else 255 | cout << "Adding a non-disjoint interval" << endl; 256 | } 257 | 258 | return *this; 259 | } 260 | 261 | // Binary operations 262 | friend Interval operator|(const Interval &, const Interval &); 263 | friend Interval operator&(const Interval &, const Interval &); 264 | 265 | friend std::ostream &operator<<(std::ostream &, const Interval &); 266 | }; 267 | 268 | Interval <1> operator|(const Interval <1> &, const Interval <1> &); 269 | 270 | std::ostream &operator<<(std::ostream &, const Interval <1> &); 271 | 272 | // Literal constructor 273 | Interval <1> operator""_I(unsigned long long int); 274 | Interval <1> operator""_I(long double); 275 | 276 | } 277 | 278 | } 279 | 280 | #endif 281 | -------------------------------------------------------------------------------- /include/std/loaders.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LOADERS_H_ 2 | #define LOADERS_H_ 3 | 4 | #ifndef __AVR // Does not support AVR 5 | 6 | namespace zhetapi { 7 | 8 | namespace ml { 9 | 10 | // Forward declarations 11 | template 12 | class Linear; 13 | 14 | template 15 | class ReLU; 16 | 17 | template 18 | class Sigmoid; 19 | 20 | template 21 | class Softmax; 22 | 23 | // Loaders 24 | template 25 | Activation *load_linear(const std::vector &args) 26 | { 27 | return new Linear (args[0]); 28 | } 29 | 30 | template 31 | Activation *load_relu(const std::vector &args) 32 | { 33 | return new ReLU (); 34 | } 35 | 36 | template 37 | Activation *load_sigmoid(const std::vector &args) 38 | { 39 | return new Sigmoid (); 40 | } 41 | 42 | template 43 | Activation *load_softmax(const std::vector &args) 44 | { 45 | return new Softmax (); 46 | } 47 | 48 | } 49 | 50 | } 51 | 52 | #endif // Does not support AVR 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /include/std/numtheory.hpp: -------------------------------------------------------------------------------- 1 | #ifndef NUMBER_THEORY_H_ 2 | #define NUMBER_THEORY_H_ 3 | 4 | // Standard headers 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace zhetapi { 11 | 12 | namespace number_theory { 13 | 14 | // TODO: use f convention for gcd/lcm 15 | 16 | /** 17 | * @brief Integral equivalent of the general 18 | * gcd function, preferably used for integer 19 | * types. 20 | */ 21 | template 22 | T integral_gcd(T a, T b) 23 | { 24 | if (a == 0 || b == 0) 25 | return 1; 26 | 27 | a = ::std::abs(a); 28 | b = ::std::abs(b); 29 | 30 | if (a > b) 31 | ::std::swap(a, b); 32 | 33 | while (b % a != 0) { 34 | b %= a; 35 | 36 | if (a > b) 37 | ::std::swap(a, b); 38 | } 39 | 40 | return ::std::min(a, b); 41 | } 42 | 43 | /** 44 | * @brief Integral equivalent of the 45 | * lcm function. 46 | */ 47 | template 48 | T integral_lcm(T a, T b) 49 | { 50 | return a * b / integral_gcd(a, b); 51 | } 52 | 53 | // TODO: add totient function 54 | 55 | template 56 | T modmul(T a, T b, T mod) 57 | { 58 | // Checks 59 | if (a == 0 || b == 0) 60 | return 0; 61 | 62 | if (a == 1) 63 | return (b % mod); 64 | 65 | if (b == 1) 66 | return (a % mod); 67 | 68 | T hmul = modmul(a, b/2, mod); 69 | if ((b & 1) == 0) 70 | return (hmul + hmul) % mod; 71 | else 72 | return ((a % mod) + (hmul + hmul)) % mod; 73 | } 74 | 75 | // Integral only (add f variants) 76 | template 77 | T modexp(T base, T exp, T mod) 78 | { 79 | // Add a string to print on failure (throw actually) 80 | assert(mod > 1); 81 | 82 | if (!exp) 83 | return 1; 84 | else if (exp == 1) 85 | return (base % mod); 86 | 87 | T hexp = exp << 1; 88 | T tmp = modexp(base, hexp, mod); 89 | 90 | tmp = modmul(tmp, tmp, mod); 91 | if (exp & 1) 92 | tmp = modmul(base, tmp, mod); 93 | 94 | return tmp; 95 | } 96 | 97 | /* template 98 | T modexp(T base, T exp, T mod, T totient) 99 | { 100 | // Add a string to print on failure (throw actually) 101 | assert(mod > 1); 102 | 103 | if (!exp) 104 | return 1; 105 | else if (exp == 1) 106 | return (base % mod); 107 | 108 | T hexp = exp << 1; 109 | T tmp = modexp(base, hexp, mod); 110 | 111 | tmp = (tmp * tmp) % mod; 112 | if (exp & 0x1) 113 | tmp = (tmp * base) % mod; 114 | 115 | return tmp; 116 | } */ 117 | 118 | // TODO: change to only integral types 119 | template 120 | std::vector sieve(T lim) 121 | { 122 | std::vector primes = {2}; 123 | 124 | if (lim < 2) 125 | return {}; 126 | 127 | for (T i = 3; i < lim; i++) { 128 | bool prime = true; 129 | 130 | for (size_t j = 0; primes[j] <= sqrt(i); j++) { 131 | if (i % primes[j] == 0) { 132 | prime = false; 133 | 134 | break; 135 | } 136 | } 137 | 138 | if (prime) 139 | primes.push_back(i); 140 | } 141 | 142 | return primes; 143 | } 144 | 145 | // Prime factorization 146 | template 147 | std::unordered_map factorize(T n) 148 | { 149 | std::unordered_map factors; 150 | 151 | T low = 2; 152 | while (n > 1) { 153 | uint32_t exp = 0; 154 | 155 | T lim = sqrt(n); 156 | T i; 157 | 158 | for (i = low; i <= lim; i++) { 159 | if (n % i == 0) { 160 | do { 161 | n /= i; 162 | exp++; 163 | } while (n % i == 0); 164 | 165 | break; 166 | } 167 | } 168 | 169 | if (exp > 0) { 170 | factors[i] = exp; 171 | } else { 172 | // No prime factors found 173 | factors[n] = 1; 174 | } 175 | } 176 | 177 | return factors; 178 | } 179 | 180 | } 181 | 182 | } 183 | 184 | #endif 185 | -------------------------------------------------------------------------------- /include/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef TIMER_H_ 2 | #define TIMER_H_ 3 | 4 | // C/C++ headers 5 | #include 6 | 7 | namespace zhetapi { 8 | 9 | class Timer { 10 | public: 11 | using clk = std::chrono::high_resolution_clock; 12 | using time = clk::time_point; 13 | private: 14 | clk _clk; 15 | time _start; 16 | time _end; 17 | public: 18 | Timer(); 19 | 20 | void start(); 21 | void stop(); 22 | 23 | time now(); 24 | 25 | long double dt(); 26 | long double split(); 27 | }; 28 | 29 | } 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /include/token.hpp: -------------------------------------------------------------------------------- 1 | #ifndef TOKEN_H_ 2 | #define TOKEN_H_ 3 | 4 | // C/C++ headers 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace zhetapi { 15 | 16 | // Forward declarations 17 | class Engine; 18 | class MethodTable; 19 | 20 | // TODO: clean this file and its comments 21 | 22 | /** 23 | * @brief The basic unit of computation for the ZHP scripting language and 24 | * framework. 25 | */ 26 | class Token { 27 | MethodTable *_mtable = nullptr; 28 | public: 29 | // Keep for this release 30 | // Use more concise names (also refactor to Type) 31 | enum type { 32 | undefined, 33 | alg, 34 | opd, 35 | oph, 36 | opn, 37 | var, 38 | vrh, 39 | vcl, 40 | ftn, 41 | ndr, 42 | ndd, 43 | reg, 44 | token_wildcard, 45 | token_lvalue, 46 | token_rvalue, 47 | token_node_list, 48 | token_module, 49 | token_collection, 50 | token_dictionary 51 | }; 52 | public: 53 | Token(); 54 | Token(MethodTable *); 55 | // Token(const std::vector > &); 56 | 57 | virtual ~Token(); 58 | 59 | // TODO: also need to add documentation for methods 60 | virtual Token *attr(const std::string &, Engine *, 61 | const std::vector &, size_t); 62 | virtual void list_attributes(std::ostream & = std::cout) const; 63 | 64 | bool operator!=(Token *) const; 65 | 66 | // Change caller to a public member (static) 67 | 68 | /* 69 | * Inspector function passed on to all derived classes, helps to 70 | * choose what to do with different Tokens from other classes. 71 | */ 72 | virtual type caller() const; 73 | 74 | /* 75 | * Returns a representation of the Token, regardless of its 76 | * type. 77 | */ 78 | virtual std::string dbg_str() const; 79 | 80 | // TODO: Add a virtual display method 81 | 82 | /* 83 | * Compares Tokens and returns their similarity. Used for node 84 | * matching. 85 | */ 86 | virtual bool operator==(Token *) const; 87 | 88 | // Read and write 89 | virtual void write(std::ostream &) const; 90 | 91 | /** 92 | * @brief Returns a copy of the Token (with the same data: the resulting 93 | * Token should equal the original with ==). Pure virtual because any 94 | * Tokens used will be copied at some point. 95 | */ 96 | virtual Token *copy() const = 0; 97 | 98 | /** 99 | * @brief Thrown if the program requests a Token for an attribute or 100 | * method it does not have. 101 | */ 102 | class unknown_attribute : public std::runtime_error { 103 | public: 104 | unknown_attribute(const std::string &msg) 105 | : std::runtime_error(msg) {} 106 | }; 107 | 108 | /** 109 | * @brief Thrown if the Token does not have a write function. 110 | */ 111 | class empty_io : public std::runtime_error { 112 | public: 113 | empty_io() : std::runtime_error("Empty IO functions (write)...") {} 114 | }; 115 | }; 116 | 117 | // Token id macro 118 | #define zhp_token_id(type) \ 119 | size_t type::id() const { \ 120 | return zhp_id (); \ 121 | } 122 | 123 | // Comparing tokens 124 | bool tokcmp(Token *, Token *); 125 | 126 | // Printing a list of tokens 127 | std::ostream &operator<<(std::ostream &, const std::vector &); 128 | 129 | // Macro for defining methods 130 | #define TOKEN_METHOD(name) \ 131 | Token *name(Token *tptr, const Targs &args) 132 | 133 | } 134 | 135 | #endif 136 | -------------------------------------------------------------------------------- /include/training.hpp: -------------------------------------------------------------------------------- 1 | #ifndef TRAINING_H_ 2 | #define TRAINING_H_ 3 | 4 | // Engine headers 5 | #include "dataset.hpp" 6 | #include "display.hpp" 7 | #include "dnn.hpp" 8 | #include "erf.hpp" 9 | #include "optimizer.hpp" 10 | 11 | #include "linalg.hpp" 12 | 13 | namespace zhetapi { 14 | 15 | namespace ml { 16 | 17 | /* 18 | * TODO: Abstract the training methods into a single training program class, 19 | * which stores the dataset(s) for its entire life. This way, training can be 20 | * made much faster in the GPU, by preloading the datasets into the GPU and 21 | * releasing them only when they are to be deconstructed. 22 | */ 23 | 24 | // Diagnosing function for training 25 | template 26 | using Comparator = bool (*)(const Vector &, const Vector &); 27 | 28 | // Default diagnoser 29 | template 30 | bool _def_cmp(const Vector &a, const Vector &e) 31 | { 32 | return a == e; 33 | }; 34 | 35 | // Training statistics 36 | template 37 | struct PerformanceStatistics { 38 | T _cost = T(0); 39 | size_t _passed = 0; 40 | double _kernel_time = 0; 41 | }; 42 | 43 | // Fitting a single I/O pair 44 | template 45 | void fit( 46 | DNN &dnn, 47 | const Vector &in, 48 | const Vector &out, 49 | Erf *erf, 50 | Optimizer *opt) 51 | { 52 | Erf *derf = erf->derivative(); 53 | 54 | // Use cached compute later 55 | Vector actual = dnn(in); 56 | Vector delta = derf->compute(out, actual); 57 | 58 | // TODO: should be a vector of matrices 59 | Matrix *J; 60 | /* Matrix *Jp; 61 | Matrix *Q; */ 62 | 63 | J = dnn.jacobian_delta(in, delta); 64 | 65 | // Checked 66 | // Q = dnn.jacobian_check(in, out, erf); 67 | 68 | /* std::cout << "Diff:" << std::endl; 69 | for (size_t i = 0; i < dnn.size(); i++) 70 | std::cout << "Diff[i] = " << Q[i] - J[i] << std::endl; 71 | 72 | // TODO: gotta free 73 | Vector *ta = new Vector [dnn._size + 1]; 74 | Vector *tz = new Vector [dnn._size]; 75 | Jp = simple_gradient( 76 | dnn._layers, 77 | dnn._size, 78 | ta, tz, 79 | in, out, 80 | erf 81 | ); 82 | 83 | std::cout << "Diff2:" << std::endl; 84 | for (size_t i = 0; i < dnn.size(); i++) 85 | std::cout << "Diff2[i] = " << Q[i] - Jp[i] << std::endl; */ 86 | 87 | // Continue regardless 88 | J = opt->update(J, dnn.size()); 89 | dnn.apply_gradient(J); 90 | 91 | delete[] J; 92 | delete derf; 93 | } 94 | 95 | template 96 | void fit( 97 | DNN &dnn, 98 | const DataSet &ins, 99 | const DataSet &outs, 100 | Erf *erf, 101 | Optimizer *opt) 102 | { 103 | /* if (ins.size() != outs.size()) 104 | throw bad_io_dimensions(); 105 | 106 | if ((ins[0].size() != _isize) || (outs[0].size() != _osize)) 107 | throw bad_io_dimensions(); 108 | 109 | if (!_opt) 110 | throw null_optimizer(); 111 | 112 | if (!_cost) 113 | throw null_loss_function(); */ 114 | 115 | Matrix *J; 116 | 117 | // Put batch gradient (multithread and etc) in dnn method (batch jacobian) 118 | J = simple_batch_gradient(dnn.layers(), 119 | dnn.size(), dnn.acache(), dnn.zcache(), 120 | ins, outs, erf 121 | ); 122 | 123 | J = opt->update(J, dnn.size()); 124 | 125 | dnn.apply_gradient(J); 126 | 127 | delete[] J; 128 | } 129 | 130 | template 131 | void multithreaded_fit( 132 | DNN &dnn, 133 | const DataSet &ins, 134 | const DataSet &outs, 135 | Erf *erf, 136 | Optimizer *opt, 137 | size_t threads) 138 | { 139 | /* if (ins.size() != outs.size()) 140 | throw bad_io_dimensions(); 141 | 142 | if ((ins[0].size() != _isize) || (outs[0].size() != _osize)) 143 | throw bad_io_dimensions(); 144 | 145 | if (!_opt) 146 | throw null_optimizer(); 147 | 148 | if (!_cost) 149 | throw null_loss_function(); */ 150 | 151 | Matrix *J; 152 | 153 | J = simple_multithreaded_batch_gradient( 154 | dnn.layers(), 155 | dnn.size(), 156 | ins, 157 | outs, 158 | erf, 159 | threads); 160 | J = opt->update(J, dnn.size()); 161 | 162 | dnn.apply_gradient(J); 163 | 164 | delete[] J; 165 | } 166 | 167 | // Non-statistical methods (without performance statistics) 168 | template 169 | void train_dataset( 170 | DNN &dnn, 171 | const DataSet &ins, 172 | const DataSet &outs, 173 | size_t batch_size, 174 | size_t threads = 1) 175 | { 176 | assert(ins.size() == outs.size()); 177 | 178 | std::vector > input_batches = split(ins, batch_size); 179 | std::vector > output_batches = split(outs, batch_size); 180 | 181 | size_t n; 182 | 183 | n = input_batches.size(); 184 | for (size_t i = 0; i < n; i++) { 185 | if (threads > 1) 186 | dnn.multithreaded_fit(input_batches[i], output_batches[i], threads); 187 | else 188 | dnn.fit(input_batches[i], output_batches[i]); 189 | } 190 | } 191 | 192 | // Statistical counterparts of the above (with performance metrics) 193 | template 194 | PerformanceStatistics train_mini_batch_perf( 195 | DNN &dnn, 196 | const DataSet &ins, 197 | const DataSet &outs, 198 | Erf *erf, 199 | Optimizer *opt, 200 | Comparator cmp = _def_cmp , 201 | Display::type display = 0, 202 | size_t threads = 1) 203 | { 204 | assert(ins.size() == outs.size()); 205 | 206 | PerformanceStatistics ns; 207 | Vector to; 208 | T perr; 209 | size_t n; 210 | 211 | perr = 0; 212 | n = ins.size(); 213 | 214 | // Performance statistics first 215 | for (size_t i = 0; i < n; i++) { 216 | to = dnn(ins[i]); 217 | ns._cost += erf->compute(to, outs[i]).x(); 218 | ns._passed += cmp(to, outs[i]); 219 | 220 | perr += fabs((to - outs[i]).norm() / outs[i].norm()); 221 | } 222 | 223 | if (threads > 1) 224 | multithreaded_fit(dnn, ins, outs, erf, opt, threads); 225 | else 226 | fit(dnn, ins, outs, erf, opt); 227 | 228 | perr /= n; 229 | if (display & Display::batch) { 230 | std::cout << "Batch done:" 231 | << " %-err = " << 100 * perr << "%" 232 | << " %-passed = " << (100.0 * ns._passed)/n << "%" 233 | << " #passed = " << ns._passed 234 | << std::endl; 235 | } 236 | 237 | return ns; 238 | } 239 | 240 | template 241 | PerformanceStatistics train_dataset_perf( 242 | DNN &dnn, 243 | const DataSet &ins, 244 | const DataSet &outs, 245 | size_t batch_size, 246 | Erf *erf, 247 | Optimizer *opt, 248 | Display::type display = 0, 249 | size_t threads = 1, 250 | Comparator cmp = _def_cmp ) 251 | { 252 | assert(ins.size() == outs.size()); 253 | 254 | std::vector > input_batches = split(ins, batch_size); 255 | std::vector > output_batches = split(outs, batch_size); 256 | 257 | PerformanceStatistics ns; 258 | PerformanceStatistics bs; 259 | size_t n; 260 | 261 | n = input_batches.size(); 262 | for (size_t i = 0; i < n; i++) { 263 | bs = train_mini_batch_perf(dnn, 264 | input_batches[i], 265 | output_batches[i], 266 | erf, 267 | opt, 268 | cmp, 269 | display, 270 | threads); 271 | 272 | ns._cost += bs._cost; 273 | ns._passed += bs._cost; 274 | } 275 | 276 | return ns; 277 | } 278 | 279 | } 280 | 281 | } 282 | 283 | #endif 284 | -------------------------------------------------------------------------------- /include/vector_type.hpp: -------------------------------------------------------------------------------- 1 | #ifndef VECTOR_TYPE_H_ 2 | #define VECTOR_TYPE_H_ 3 | 4 | // Standard headers 5 | #include 6 | 7 | namespace zhetapi { 8 | 9 | // Vector type interface 10 | template 11 | class VectorType { 12 | public: 13 | // Required functions 14 | virtual size_t size() const = 0; 15 | 16 | virtual T &get(size_t) = 0; 17 | virtual const T &get(size_t) const = 0; 18 | 19 | virtual T &operator[](size_t) = 0; 20 | virtual const T &operator[](size_t) const = 0; 21 | 22 | // Also add a normalize which returns a new object 23 | // virtual void normalize() = 0; 24 | virtual T norm() const = 0; 25 | 26 | virtual VectorType &operator+=(const VectorType &) = 0; 27 | virtual VectorType &operator-=(const VectorType &) = 0; 28 | 29 | virtual VectorType &operator*=(const T &) = 0; 30 | virtual VectorType &operator/=(const T &) = 0; 31 | 32 | // Friend operations 33 | template 34 | friend U dot(const VectorType &, const VectorType &); 35 | }; 36 | 37 | template 38 | T dot(const VectorType &a, const VectorType &b) 39 | { 40 | assert(a.size() == b.size()); 41 | 42 | T sum = 0; 43 | for (size_t i = 0; i < a.size(); i++) 44 | sum += a[i] + b[i]; 45 | 46 | return sum; 47 | } 48 | 49 | } 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /source/autograd/ml.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/autograd/ml.hpp" 2 | 3 | namespace zhetapi { 4 | 5 | namespace autograd { 6 | 7 | namespace ml { 8 | 9 | ////////////////////// 10 | // Static variables // 11 | ////////////////////// 12 | 13 | utility::Interval <1> _kdense::rng; 14 | 15 | } 16 | 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /source/cuda/nvarena.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace zhetapi { 6 | 7 | /** 8 | * @brief Initializes the allocator with a specific amount of memory. 9 | * 10 | * @param mb the number of megabytes (not bytes!) that the allocator should hold 11 | * on to and serve. 12 | */ 13 | NVArena::NVArena(size_t mb) 14 | { 15 | size_t bytes = mb << 20; 16 | 17 | cudaMalloc(&_pool, bytes); 18 | 19 | __cuda_check_error(); 20 | } 21 | 22 | /** 23 | * @brief Deconstructor. The allocator releases its pool of memory and notifies 24 | * the user on blocks of memory that are still allocated. 25 | */ 26 | NVArena::~NVArena() 27 | { 28 | if (_warn) { 29 | for (const auto &pr : _flist) { 30 | if (pr.second != 0) { 31 | std::cout << "NVArena: untracked block @" 32 | << pr.first << " [size=" << pr.second 33 | << "]" << std::endl; 34 | } 35 | } 36 | } 37 | 38 | cudaFree(_pool); 39 | } 40 | 41 | /** 42 | * @brief Allocates a block of memory. 43 | * 44 | * @param bytes the number of bytes of allocate. 45 | * 46 | * @return the allocated block. 47 | */ 48 | void *NVArena::alloc(size_t bytes) 49 | { 50 | // Case where __flist is empty 51 | if (_flist.empty()) { 52 | // Assign to the free list 53 | _flist[_pool] = bytes; 54 | 55 | return _pool; 56 | } 57 | 58 | // Get the last block 59 | auto last = _flist.rbegin(); 60 | 61 | // TODO: throw bad_alloc if there is not more space 62 | 63 | // Allocation strategy: allocate from the end of the arena 64 | void *laddr = last->first + last->second; 65 | 66 | // Assign to the free list 67 | _flist[laddr] = bytes; 68 | 69 | return laddr; 70 | } 71 | 72 | /** 73 | * @brief Frees a block of memory. 74 | * 75 | * @param ptr the block of memory to be freed. 76 | */ 77 | void NVArena::free(void *ptr) 78 | { 79 | if (_flist.find(ptr) == _flist.end()) 80 | throw segfault(); 81 | 82 | if (_flist[ptr] == 0) 83 | throw double_free(); 84 | 85 | _flist[ptr] = 0; 86 | } 87 | 88 | /** 89 | * @brief Copies a block of memory from host memory to GPU memory, using \c 90 | * cudaMemcpy. Warns if the number of bytes to copy exceeds the block size on 91 | * the GPU (assuming the allocators warning flag is turned on). 92 | * 93 | * @param dst the pointer to the destination in GPU memory. 94 | * @param src the pointer to the block in host memory. 95 | * @param bytes the number of bytes to copy. 96 | */ 97 | void NVArena::write(void *dst, void *src, size_t bytes) 98 | { 99 | // Do some checks before copying 100 | if (_warn) { 101 | auto lb = _flist.lower_bound(dst); 102 | 103 | if (lb == _flist.end()) { 104 | std::cout << "NVArena: @" << dst 105 | << " was never allocated" 106 | << std::endl; 107 | } else { 108 | void *lim = lb->first + lb->second; 109 | 110 | if (dst + bytes > lim) { 111 | std::cout << "NVArena: writing " << bytes 112 | << " bytes to block @" << lb->first 113 | << " [offset +" 114 | << ((char *) dst - (char *) lb->first) 115 | << "] with only " << lb->second 116 | << " bytes allocated" << std::endl; 117 | } 118 | } 119 | } 120 | 121 | cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice); 122 | 123 | __cuda_check_error(); 124 | } 125 | 126 | /** 127 | * @brief Copies a block of memory from GPU memory to host memory, using \c 128 | * cudaMemcpy. Warns if the number of bytes to copy exceeds the block size on 129 | * the GPU (assuming the allocators warning flag is turned on). 130 | * 131 | * @param dst the pointer to the destination in host memory. 132 | * @param src the pointer to the block in GPU memory. 133 | * @param bytes the number of bytes to copy. 134 | */ 135 | void NVArena::read(void *dst, void *src, size_t bytes) 136 | { 137 | // Do some checks before copying 138 | if (_warn) { 139 | auto lb = _flist.lower_bound(src); 140 | 141 | if (lb == _flist.end()) { 142 | std::cout << "NVArena: @" << src 143 | << " was never allocated" 144 | << std::endl; 145 | } else { 146 | void *lim = lb->first + lb->second; 147 | 148 | if (src + bytes > lim) { 149 | std::cout << "NVArena: read " << bytes 150 | << " bytes from block @" << lb->first 151 | << " [offset +" 152 | << ((char *) src - (char *) lb->first) 153 | << "] with only " << lb->second 154 | << " bytes allocated" << std::endl; 155 | } 156 | } 157 | } 158 | 159 | cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost); 160 | 161 | __cuda_check_error(); 162 | } 163 | 164 | /** 165 | * @brief Prints each block that has allocated (or freed). Use for debugging 166 | * purposes. 167 | */ 168 | void NVArena::show_mem_map() const 169 | { 170 | for (const auto &pr : _flist) { 171 | std::cout << "block @" << pr.first << ": " << pr.second 172 | << " bytes"; 173 | 174 | if (pr.second == 0) 175 | std::cout << "\t[freed]"; 176 | 177 | std::cout << std::endl; 178 | } 179 | } 180 | 181 | } 182 | -------------------------------------------------------------------------------- /source/io.cpp: -------------------------------------------------------------------------------- 1 | // Standard headers 2 | #include 3 | #include 4 | #include 5 | 6 | // Library headers 7 | #include "../include/io/print.hpp" 8 | 9 | namespace zhetapi { 10 | 11 | namespace io { 12 | 13 | // TODO: create a separate (single header) library for this 14 | std::string table(const Args &headers, 15 | const std::vector &rows) 16 | { 17 | assert(headers.size() == rows[0].size()); 18 | 19 | // Special characters 20 | std::string vert = "\u2502"; 21 | std::string horiz = "\u2500"; 22 | 23 | // Top corners 24 | std::string tl = "\u250C"; 25 | std::string tr = "\u2510"; 26 | 27 | // Bottom corners 28 | std::string bl = "\u2514"; 29 | std::string br = "\u2518"; 30 | 31 | // Calculate column widths 32 | std::vector widths(headers.size(), 0); 33 | for (int i = 0; i < headers.size(); i++) 34 | widths[i] = headers[i].length(); 35 | 36 | for (const auto &row : rows) { 37 | for (size_t i = 0; i < row.size(); i++) 38 | widths[i] = std::max(widths[i], row[i].size()); 39 | } 40 | 41 | // Stream 42 | std::stringstream ss; 43 | 44 | // Print the top 45 | ss << tl; 46 | for (size_t i = 0; i < headers.size(); i++) { 47 | for (int n = 0; n < widths[i] + 2; n++) 48 | ss << horiz; 49 | 50 | if (i < headers.size() - 1) 51 | ss << "\u252C"; 52 | else 53 | ss << tr; 54 | } 55 | ss << "\n"; 56 | 57 | // Print the header 58 | for (int i = 0; i < headers.size(); i++) { 59 | ss << vert << " " << std::setw(widths[i]) 60 | << headers[i] << " "; 61 | } 62 | ss << vert << "\n"; 63 | 64 | // Post header separator 65 | ss << "\u251C"; 66 | for (size_t i = 0; i < headers.size(); i++) { 67 | for (int n = 0; n < widths[i] + 2; n++) 68 | ss << horiz; 69 | 70 | if (i < headers.size() - 1) 71 | ss << "\u253C"; 72 | else 73 | ss << "\u2524"; 74 | } 75 | ss << "\n"; 76 | 77 | // Print the rows 78 | for (const auto &row : rows) { 79 | for (int i = 0; i < row.size(); i++) { 80 | ss << vert << " " << std::setw(widths[i]) 81 | << row[i] << " "; 82 | } 83 | ss << vert << std::endl; 84 | } 85 | 86 | // Post row separator 87 | ss << bl; 88 | for (size_t i = 0; i < headers.size(); i++) { 89 | for (int n = 0; n < widths[i] + 2; n++) 90 | ss << horiz; 91 | 92 | if (i < headers.size() - 1) 93 | ss << "\u2534"; 94 | else 95 | ss << br; 96 | } 97 | ss << "\n"; 98 | 99 | return ss.str(); 100 | } 101 | 102 | } 103 | 104 | } 105 | -------------------------------------------------------------------------------- /source/linalg.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/linalg.hpp" 2 | 3 | namespace zhetapi { 4 | 5 | namespace linalg { 6 | 7 | const long double GAMMA = 1.15470053837925152901 + 1e-4; 8 | const long double EPSILON = 1e-10; 9 | 10 | static Mat size_reduce(const Mat &H) 11 | { 12 | // Dimensions 13 | size_t n = H.get_rows(); 14 | 15 | // Unimodular matrix 16 | Mat D = Mat::identity(n); 17 | 18 | for (size_t i = 1; i < n; i++) { 19 | for (int j = i - 1; j >= 0; j--) { 20 | long double q = std::floor(0.5 + H[i][j]/H[j][j]); 21 | 22 | for (size_t k = 0; k < n; k++) 23 | D[i][k] -= q * D[j][k]; 24 | } 25 | } 26 | 27 | return D; 28 | } 29 | 30 | static std::pair bergman_swap(const Mat &H, long double gamma) 31 | { 32 | // Dimensions 33 | size_t n = H.get_rows(); 34 | 35 | // Unimodular matrix 36 | Mat D = Mat::identity(n); 37 | 38 | long double max = 0; 39 | 40 | size_t r = -1; 41 | for (size_t i = 0; i < n - 1; i++) { 42 | long double tmp = pow(gamma, i) * std::abs(H[i][i]); 43 | 44 | if (tmp > max) { 45 | max = tmp; 46 | 47 | r = i; 48 | } 49 | } 50 | 51 | D.swap_rows(r, r + 1); 52 | 53 | return {r, D}; 54 | } 55 | 56 | static Mat corner(const Mat &H, size_t r) 57 | { 58 | // Dimensions 59 | size_t n = H.get_rows(); 60 | 61 | // Cached constants 62 | long double eta = H[r][r]; 63 | long double beta = H[r + 1][r]; 64 | long double lambda = H[r + 1][r + 1]; 65 | long double delta = sqrt(beta * beta + lambda * lambda); 66 | 67 | // Orthonal matrix Q 68 | return Mat(n - 1, n - 1, 69 | [&](size_t i, size_t j) -> long double { 70 | if (i == j) { 71 | if ((i == r) || (i == r + 1)) 72 | return beta/delta; 73 | else 74 | return 1; 75 | } else if ((i == r) && (j == r + 1)) { 76 | return -lambda/delta; 77 | } else if ((i == r + 1) && (j == r)) { 78 | return lambda/delta; 79 | } 80 | 81 | return 0; 82 | } 83 | ); 84 | } 85 | 86 | // Using the PSLQe algorithm from https://arxiv.org/abs/1707.05037 87 | Vec pslq(const Vec &a, long double gamma, long double epsilon) 88 | { 89 | // Length of a 90 | size_t n = a.size(); 91 | 92 | // Save a copy of a (take normalized value) 93 | Mat alpha = a.normalized().transpose(); 94 | 95 | // Partial sums 96 | Vec s(n, 97 | [&](size_t j) -> long double { 98 | long double sum = 0; 99 | 100 | for (size_t k = j; k < n; k++) 101 | sum += alpha[0][k] * alpha[0][k]; 102 | 103 | return sqrt(sum); 104 | } 105 | ); 106 | 107 | // Construct the matrix H_alpha 108 | Mat H_alpha(n, n - 1, 109 | [&](size_t i, size_t j) -> long double { 110 | if ((i < j) && (j < n - 1)) 111 | return 0; 112 | else if ((i == j) && (i < n - 1)) 113 | return s[i + 1]/s[i]; 114 | 115 | return -(alpha[0][i] * alpha[0][j])/(s[j] * s[j + 1]); 116 | } 117 | ); 118 | 119 | Mat H = H_alpha; 120 | 121 | Mat A = Mat::identity(n); 122 | Mat B = Mat::identity(n); 123 | 124 | Mat D = size_reduce(H); 125 | 126 | // Update lambda: returns false if H has a zero on the diagonal 127 | auto update = [&]() -> bool { 128 | Mat D_inv = D.inverse(); 129 | 130 | alpha = alpha * D_inv; 131 | H = D * H; 132 | A = D * A; 133 | B = B * D_inv; 134 | 135 | // Check diagonal elements for non-zero 136 | for (size_t i = 0; i < H.get_cols(); i++) { 137 | if (H[i][i] < epsilon) 138 | return false; 139 | } 140 | 141 | return true; 142 | }; 143 | 144 | // Update once first 145 | update(); 146 | 147 | // Main loop 148 | while (std::abs(H[n - 1][n - 2]) >= epsilon) { 149 | auto Dr = bergman_swap(H, gamma); 150 | 151 | D = Dr.second; 152 | 153 | if (!update()) 154 | break; 155 | 156 | if (Dr.first < n - 2) 157 | H *= corner(H, Dr.first); 158 | 159 | D = size_reduce(H); 160 | 161 | if (!update()) 162 | break; 163 | } 164 | 165 | return B.get_column(n - 2); 166 | } 167 | 168 | } 169 | 170 | } 171 | -------------------------------------------------------------------------------- /source/polynomial.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/polynomial.hpp" 2 | 3 | namespace zhetapi { 4 | 5 | // Next power of 2 6 | size_t npow2(size_t k) 7 | { 8 | size_t v = k; 9 | 10 | // Assuming size_t is 64-bit 11 | v--; 12 | v |= v >> 1; 13 | v |= v >> 2; 14 | v |= v >> 4; 15 | v |= v >> 8; 16 | v |= v >> 16; 17 | v |= v >> 32; 18 | v++; 19 | 20 | return v; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /source/range.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/range.hpp" 2 | 3 | namespace zhetapi { 4 | 5 | Range all = Range (-1, 0, -1); 6 | 7 | } 8 | -------------------------------------------------------------------------------- /source/std/functions.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/std/functions.hpp" 2 | 3 | namespace zhetapi { 4 | 5 | namespace special { 6 | 7 | double ln_gamma(double x) 8 | { 9 | if (x <= 0) 10 | throw("ln_gamma: expected a positive argument."); 11 | 12 | static const int N = 14; 13 | 14 | static const double C[] = { 15 | 57.1562356658629235, 16 | -59.5979603554754912, 17 | 14.1360979747417471, 18 | -0.491913816097620199, 19 | 0.339946499848118887e-4, 20 | 0.465236289270485756e-4, 21 | -0.983744753048795646e-4, 22 | 0.158088703224912494e-3, 23 | -0.210264441724104883e-3, 24 | 0.217439618115212643e-3, 25 | -0.164318106536763890e-3, 26 | 0.844182239838527433e-4, 27 | -0.261908384015814087e-4, 28 | 0.368991826595316234e-5 29 | }; 30 | 31 | double tx; 32 | double ty; 33 | double tmp; 34 | double ser; 35 | 36 | ty = tx = x; 37 | 38 | tmp = tx + 5.24218750000000000; 39 | tmp = (tx + 0.5) * log(tmp) - tmp; 40 | ser = 0.999999999999997092; 41 | 42 | int i = 0; 43 | while (i < N) 44 | ser += C[i++]/(++ty); 45 | 46 | return tmp + log(2.5066282746310005 * ser / tx); 47 | } 48 | 49 | double ln_factorial(int x) 50 | { 51 | static double table[FACTORIAL_BUFFER_SIZE]; 52 | static bool init = true; 53 | 54 | if (init) { 55 | init = false; 56 | 57 | for (int i = 0; i < FACTORIAL_BUFFER_SIZE; i++) 58 | table[i] = ln_gamma(i + 1.0); 59 | } 60 | 61 | if (x < 0) 62 | throw("ln_factorial: cannot have a negative argument."); 63 | 64 | if (x < FACTORIAL_BUFFER_SIZE) 65 | return table[x]; 66 | 67 | return ln_gamma(x + 1.0); 68 | } 69 | 70 | double poisson(double lambda, int k) 71 | { 72 | double lp = -lambda + k * log(k) - ln_gamma(k + 1); 73 | return exp(lp); 74 | } 75 | 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /source/std/interval.cpp: -------------------------------------------------------------------------------- 1 | #include "../../include/std/interval.hpp" 2 | 3 | namespace zhetapi { 4 | 5 | namespace utility { 6 | 7 | // Static 8 | std::random_device rd; 9 | 10 | dre disjoint::gen(rd()); 11 | udb disjoint::distro = udb(0, 1); 12 | 13 | dre Interval <1> ::gen(rd()); 14 | udb Interval <1> ::distro = udb(0, 1); 15 | 16 | Interval <1> runit; 17 | 18 | // Functions 19 | Interval <1> operator|(const Interval <1> &a, const Interval <1> &b) 20 | { 21 | Interval <1> out = a; 22 | 23 | return out |= b; 24 | } 25 | 26 | std::ostream &operator<<(std::ostream &os, const Interval <1> &itv) 27 | { 28 | size_t sz = itv._union.size(); 29 | 30 | for (size_t i = 0; i < sz; i++) { 31 | auto itr = itv._union.begin(); 32 | 33 | std::advance(itr, i); 34 | 35 | if (itr->closed) 36 | os << "["; 37 | else 38 | os << "("; 39 | 40 | os << itr->left << ", " << itr->right; 41 | 42 | if (itr->closed) 43 | os << "]"; 44 | else 45 | os << ")"; 46 | 47 | if (i < sz - 1) 48 | os << " U "; 49 | } 50 | 51 | return os; 52 | } 53 | 54 | // Literal constructor 55 | Interval <1> operator""_I(unsigned long long int x) 56 | { 57 | return Interval <1> (x); 58 | } 59 | 60 | Interval <1> operator""_I(long double x) 61 | { 62 | return Interval <1> (x); 63 | } 64 | 65 | /* 66 | template 67 | Interval operator*(const Interval &, const Interval &) 68 | { 69 | } */ 70 | 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /source/timer.cpp: -------------------------------------------------------------------------------- 1 | #include "../include/timer.hpp" 2 | 3 | namespace zhetapi { 4 | 5 | Timer::Timer() {} 6 | 7 | void Timer::start() 8 | { 9 | _start = _clk.now(); 10 | } 11 | 12 | void Timer::stop() 13 | { 14 | _end = _clk.now(); 15 | } 16 | 17 | Timer::time Timer::now() 18 | { 19 | return _clk.now(); 20 | } 21 | 22 | long double Timer::dt() 23 | { 24 | return (std::chrono::duration_cast 25 | (_end - _start)).count(); 26 | } 27 | 28 | long double Timer::split() 29 | { 30 | stop(); 31 | 32 | return dt(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /testing/activation.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | using namespace zhetapi; 4 | using namespace zhetapi::ml; 5 | 6 | static bool act_general(ostream &oss, 7 | const std::string &prefix, 8 | const vector *> &acts, 9 | const vector > &ins, 10 | const vector >> &outs) 11 | { 12 | static double epsilon = 1e-10; 13 | 14 | oss << "Inputs:" << endl; 15 | for (auto v : ins) 16 | oss << "\t" << v << endl; 17 | 18 | for (size_t i = 0; i < acts.size(); i++) { 19 | oss << endl; 20 | oss << "Next activation:" << endl; 21 | for (size_t j = 0; j < ins.size(); j++) { 22 | oss << prefix << (i + 1) << "(input #" << (j + 1) 23 | << ") = " << acts[i]->compute(ins[j]) << endl; 24 | oss << "should equal " << outs[i][j] << endl; 25 | oss << "diff = " << (acts[i]->compute(ins[j]) - outs[i][j]).norm() << endl; 26 | 27 | if ((acts[i]->compute(ins[j]) - outs[i][j]).norm() > epsilon) 28 | return false; 29 | } 30 | } 31 | 32 | vector *> dacts; 33 | for (auto act : acts) 34 | dacts.push_back(act->derivative()); 35 | 36 | for (size_t i = 0; i < dacts.size(); i++) { 37 | oss << endl; 38 | oss << "Next activation derivative:" << endl; 39 | for (size_t j = 0; j < ins.size(); j++) { 40 | Vector dout(ins[j].size()); 41 | 42 | // Use gradient checking 43 | for (size_t k = 0; k < ins[j].size(); k++) { 44 | Vector back = ins[j]; 45 | Vector forward = ins[j]; 46 | 47 | back[k] -= epsilon; 48 | forward[k] += epsilon; 49 | 50 | dout[k] = (acts[i]->compute(forward) 51 | - acts[i]->compute(back))[k]/(2 * epsilon); 52 | } 53 | 54 | oss << prefix << (i + 1) << "(input #" << (j + 1) 55 | << ") = " << dacts[i]->compute(ins[j]) << endl; 56 | oss << "should equal " << dout << endl; 57 | oss << "diff = " << (dacts[i]->compute(ins[j]) - dout).norm() << endl; 58 | 59 | if ((dacts[i]->compute(ins[j]) - dout).norm() > 1e-5) 60 | return false; 61 | } 62 | } 63 | 64 | for (auto act : acts) 65 | delete act; 66 | 67 | for (auto dact : dacts) 68 | delete dact; 69 | 70 | return true; 71 | } 72 | 73 | TEST(act_linear) 74 | { 75 | return act_general(oss, 76 | "linear", 77 | { 78 | new Linear (), 79 | new Linear (2) 80 | }, 81 | { 82 | Vector {1, 2, 3, 4} 83 | }, 84 | { 85 | {Vector {1, 2, 3, 4}}, 86 | {Vector {2, 4, 6, 8}} 87 | }); 88 | } 89 | 90 | TEST(act_relu) 91 | { 92 | return act_general(oss, 93 | "relu", 94 | { 95 | new ReLU () 96 | }, 97 | { 98 | Vector {1, 2, 3, 4}, 99 | Vector {1, -1, 3, -1} 100 | }, 101 | { 102 | { 103 | Vector {1, 2, 3, 4}, 104 | Vector {1, 0, 3, 0} 105 | } 106 | }); 107 | } 108 | 109 | TEST(act_leaky_relu) 110 | { 111 | return act_general(oss, 112 | "leaky relu", 113 | { 114 | new LeakyReLU (0.2) 115 | }, 116 | { 117 | Vector {1, 2, 3, 4}, 118 | Vector {1, -1, 3, -2} 119 | }, 120 | { 121 | { 122 | Vector {1, 2, 3, 4}, 123 | Vector {1, -0.2, 3, -0.4} 124 | } 125 | }); 126 | } 127 | 128 | TEST(act_sigmoid) 129 | { 130 | return act_general(oss, 131 | "sigmoid", 132 | { 133 | new Sigmoid () 134 | }, 135 | { 136 | Vector {0.5, 2, 0, 4}, 137 | Vector {1, -1, 3, -2} 138 | }, 139 | { 140 | { 141 | Vector { 142 | 0.622459331202, 143 | 0.880797077978, 144 | 0.5, 145 | 0.982013790038}, 146 | Vector { 147 | 0.73105857863, 148 | 0.26894142137, 149 | 0.952574126822, 150 | 0.119202922022} 151 | } 152 | }); 153 | } 154 | -------------------------------------------------------------------------------- /testing/calculus.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | TEST(integration) 4 | { 5 | using namespace zhetapi::utility; 6 | 7 | auto f = [](double x) { 8 | return x * x + x; 9 | }; 10 | 11 | auto df = [](double x) { 12 | return 2 * x + 1; 13 | }; 14 | 15 | oss << "f(4) = " << f(4) << endl; 16 | oss << "f(4) = " << eulers_method(df, {2.0, f(2)}, 4.0) << endl; 17 | 18 | return true; 19 | } 20 | -------------------------------------------------------------------------------- /testing/fourier.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | TEST(fourier_series) 4 | { 5 | using namespace zhetapi; 6 | 7 | FourierSeries fsa({1, 2, 3, 7}, {5, 6, 8}); 8 | 9 | oss << "fsa @ 0 = " << fsa(0) << endl; 10 | oss << "fsa @ 10 = " << fsa(10) << endl; 11 | 12 | FourierSeries fsb({1, 2, 5, 3, 6, 7, 8}); 13 | 14 | oss << "fsb @ 0 = " << fsb(0) << endl; 15 | oss << "fsb @ 10 = " << fsb(10) << endl; 16 | 17 | if (fsa(0) != fsb(0) || fsa(10) != fsb(10)) { 18 | oss << "Unequal values..." << endl; 19 | 20 | return false; 21 | } 22 | 23 | return true; 24 | } 25 | -------------------------------------------------------------------------------- /testing/global.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PORT_H_ 2 | #define PORT_H_ 3 | 4 | // C/C++ headers 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | // Engine headers 17 | #include "../zhetapi.hpp" 18 | 19 | // Macros 20 | #define TEST(name) bool name(ostream &oss, int cout) 21 | #define RIG(name) {#name, &name} 22 | 23 | // Namespaces 24 | using namespace std; 25 | 26 | // Typedefs 27 | using tclk = chrono::high_resolution_clock; 28 | using tpoint = chrono::high_resolution_clock::time_point; 29 | 30 | // Timers 31 | extern tclk clk; 32 | extern tpoint tmp; 33 | 34 | // Bench marking structures 35 | struct bench { 36 | tpoint epoch; 37 | 38 | bench() : epoch(clk.now()) {} 39 | bench(const tpoint &t) : epoch(t) {} 40 | }; 41 | 42 | ostream &operator<<(ostream &, const bench &); 43 | 44 | // Coloring 45 | struct term_colors { 46 | string color; 47 | }; 48 | 49 | extern term_colors reset; 50 | 51 | extern term_colors bred; 52 | extern term_colors bgreen; 53 | extern term_colors byellow; 54 | 55 | ostream &operator<<(ostream &, const term_colors &); 56 | 57 | struct term_ok {}; 58 | struct term_err {}; 59 | 60 | extern term_ok ok; 61 | extern term_err err; 62 | 63 | ostream &operator<<(ostream &, const term_ok &); 64 | ostream &operator<<(ostream &, const term_err &); 65 | 66 | // Test functions 67 | TEST(gamma_and_factorial); 68 | 69 | TEST(vector_construction_and_memory); 70 | TEST(vector_operations); 71 | 72 | TEST(matrix_construction_and_memory); 73 | TEST(kernel_apt_and_mult); 74 | TEST(kernel_rmt_and_mult); 75 | TEST(kernel_vvt_mult); 76 | 77 | TEST(tensor_construction_and_memory); 78 | 79 | TEST(integration); 80 | 81 | TEST(interval_construction); 82 | TEST(interval_sampling); 83 | 84 | TEST(diag_matrix); 85 | TEST(qr_decomp); 86 | TEST(lq_decomp); 87 | TEST(qr_alg); 88 | TEST(matrix_props); 89 | 90 | TEST(fourier_series); 91 | 92 | TEST(polynomial_construction); 93 | TEST(polynomial_comparison); 94 | TEST(polynomial_arithmetic); 95 | 96 | TEST(act_linear); 97 | TEST(act_relu); 98 | TEST(act_leaky_relu); 99 | TEST(act_sigmoid); 100 | 101 | #endif 102 | -------------------------------------------------------------------------------- /testing/interval.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | TEST(interval_construction) 4 | { 5 | using namespace zhetapi::utility; 6 | 7 | Interval <> i(5, 10); 8 | 9 | oss << i << endl; 10 | 11 | i = 100.0_I; 12 | 13 | oss << i << endl; 14 | 15 | i = 50_I; 16 | 17 | oss << i << endl; 18 | 19 | i = 1_I; 20 | 21 | oss << i << endl; 22 | 23 | i = 0_I; 24 | 25 | oss << i << endl; 26 | 27 | i = Interval <> (); 28 | 29 | oss << i << endl; 30 | 31 | return true; 32 | } 33 | 34 | TEST(interval_sampling) 35 | { 36 | using namespace zhetapi::utility; 37 | 38 | Interval <> i = 100_I; 39 | 40 | for (size_t k = 0; k < 10; k++) { 41 | long double x = i.uniform(); 42 | 43 | oss << "sampled " << x << endl; 44 | 45 | if (x < 0 || x > 100) { 46 | oss << "\tbad value" << endl; 47 | 48 | return false; 49 | } 50 | } 51 | 52 | return true; 53 | } 54 | -------------------------------------------------------------------------------- /testing/linalg.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | TEST(diag_matrix) 4 | { 5 | using namespace zhetapi; 6 | using namespace zhetapi::linalg; 7 | 8 | Matrix A { 9 | {1, 0, 0, 0}, 10 | {0, 2, 0, 0}, 11 | {0, 0, 3, 0}, 12 | {0, 0, 0, 4} 13 | }; 14 | 15 | vector cs {1, 2, 3, 4}; 16 | 17 | Matrix B = diag(cs); 18 | Matrix C = diag(1, 2, 3, 4); 19 | 20 | oss << "A = " << A << endl; 21 | oss << "B = " << B << endl; 22 | oss << "C = " << C << endl; 23 | 24 | if ((A != B) || (A != C) || (B != C)) { 25 | oss << "Not equal..." << endl; 26 | 27 | return false; 28 | } 29 | 30 | return true; 31 | } 32 | 33 | TEST(qr_decomp) 34 | { 35 | using namespace zhetapi; 36 | using namespace zhetapi::linalg; 37 | 38 | Matrix A = diag(1.0, 4.0, 1.0, 5.0); 39 | 40 | oss << "A = " << A << endl; 41 | 42 | auto qr = qr_decompose(A); 43 | 44 | oss << "\tQ = " << qr.q() << endl; 45 | oss << "\tR = " << qr.r() << endl; 46 | oss << "\tQR = " << qr.product() << endl; 47 | 48 | oss << "Error = " << (qr.product() - A).norm() << endl; 49 | 50 | if ((qr.product() - A).norm() > 1e-10) { 51 | oss << "Failure: A != QR" << endl; 52 | 53 | return false; 54 | } 55 | 56 | return true; 57 | } 58 | 59 | TEST(lq_decomp) 60 | { 61 | using namespace zhetapi; 62 | using namespace zhetapi::linalg; 63 | 64 | Matrix A = diag(1.0, 4.0, 1.0, 5.0); 65 | 66 | oss << "A = " << A << endl; 67 | 68 | auto lq = lq_decompose(A); 69 | 70 | oss << "\tL = " << lq.l() << endl; 71 | oss << "\tQ = " << lq.q() << endl; 72 | oss << "\tLQ = " << lq.product() << endl; 73 | 74 | oss << "Error = " << (lq.product() - A).norm() << endl; 75 | 76 | if ((lq.product() - A).norm() > 1e-10) { 77 | oss << "Failure: A != LQ" << endl; 78 | 79 | return false; 80 | } 81 | 82 | return true; 83 | } 84 | 85 | TEST(qr_alg) 86 | { 87 | using namespace zhetapi; 88 | using namespace zhetapi::linalg; 89 | 90 | // Test on diagonal matrices: 91 | // eigenvalues should be equal to 92 | // the diagonal entries 93 | Matrix A = diag(1.0, 2.0, 3.0, 4.0); 94 | 95 | oss << "A = " << A << endl; 96 | 97 | Vector E = qr_algorithm(A); 98 | 99 | oss << "E = " << E << endl; 100 | 101 | for (size_t i = 0; i < E.size(); i++) { 102 | if (E[i] != A[i][i]) { 103 | oss << "Incorrect eigenvalues..." << endl; 104 | 105 | return false; 106 | } 107 | } 108 | 109 | // Test on Fibonacci matrix 110 | A = {{1, 1}, {1, 0}}; 111 | 112 | E = qr_algorithm(A); 113 | 114 | Vector G { 115 | (double) (1 + sqrt(5.0))/2.0, 116 | (double) (1 - sqrt(5.0))/2.0 117 | }; 118 | 119 | oss << "Fib. matrix = " << A << endl; 120 | oss << "E = " << E << endl; 121 | oss << "G = " << G << endl; 122 | 123 | oss << "\nError = " << (E - G).norm() << endl; 124 | 125 | return true; 126 | } 127 | 128 | TEST(matrix_props) 129 | { 130 | using namespace zhetapi; 131 | using namespace zhetapi::linalg; 132 | 133 | Matrix A = diag(1.0, 2.0, 3.0, 4.0); 134 | Matrix I = Matrix ::identity(4); 135 | 136 | auto qr = qr_decompose(A); 137 | auto lq = lq_decompose(A); 138 | 139 | oss << "Is A diagonal? " << (is_diagonal(A) ? "yes" : "no") << endl; 140 | if (!is_diagonal(A)) { 141 | oss << "\tWrong answer..." << endl; 142 | 143 | return false; 144 | } 145 | 146 | oss << "Is I identity? " << (is_identity(I) ? "yes" : "no") << endl; 147 | if (!is_identity(I)) { 148 | oss << "\tWrong answer..." << endl; 149 | 150 | return false; 151 | } 152 | 153 | oss << "Is Q orthogonal? " << (is_orthogonal(qr.q()) ? "yes" : "no") << endl; 154 | if (!is_orthogonal(qr.q())) { 155 | oss << "\tWrong answer..." << endl; 156 | 157 | return false; 158 | } 159 | 160 | oss << "Is R upper triangular? " << (is_upper_triangular(qr.r()) ? "yes" : "no") << endl; 161 | if (!is_upper_triangular(qr.r())) { 162 | oss << "\tWrong answer..." << endl; 163 | 164 | return false; 165 | } 166 | 167 | oss << "Is L lower triangular? " << (is_lower_triangular(lq.l()) ? "yes" : "no") << endl; 168 | if (!is_lower_triangular(lq.l())) { 169 | oss << "\tWrong answer..." << endl; 170 | 171 | return false; 172 | } 173 | 174 | return true; 175 | } 176 | -------------------------------------------------------------------------------- /testing/main.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | #define THREADS 8 4 | #define DEBUG_EXCEPTION 5 | // #define PASSTHROUGH_EXCEPTION 6 | // #define HANDLE_SEGFAULT 7 | // #define SINGLET kernel_vvt_mult 8 | 9 | // TODO: need a method to conduct a single test 10 | typedef pair singlet; 11 | 12 | // Testing rig 13 | vector rig { 14 | RIG(gamma_and_factorial), 15 | RIG(vector_construction_and_memory), 16 | RIG(matrix_construction_and_memory), 17 | RIG(tensor_construction_and_memory), 18 | RIG(integration), 19 | RIG(vector_operations), 20 | RIG(interval_construction), 21 | RIG(interval_sampling), 22 | RIG(diag_matrix), 23 | RIG(qr_decomp), 24 | RIG(lq_decomp), 25 | RIG(qr_alg), 26 | RIG(matrix_props), 27 | RIG(fourier_series), 28 | RIG(polynomial_construction), 29 | RIG(polynomial_comparison), 30 | RIG(polynomial_arithmetic), 31 | RIG(act_linear), 32 | RIG(act_relu), 33 | RIG(act_leaky_relu), 34 | RIG(act_sigmoid), 35 | RIG(kernel_apt_and_mult), 36 | RIG(kernel_rmt_and_mult), 37 | RIG(kernel_vvt_mult) 38 | }; 39 | 40 | vector failed; 41 | 42 | #ifdef HANDLE_SEGFAULT 43 | 44 | // Segfault handler 45 | void segfault_sigaction(int signal, siginfo_t *si, void *arg) 46 | { 47 | printf("\nCaught segfault at address %p\n", si->si_addr); 48 | exit(-1); 49 | } 50 | 51 | #endif 52 | 53 | // Timers 54 | tclk clk; 55 | 56 | // Main program 57 | int main() 58 | { 59 | 60 | #ifdef HANDLE_SEGFAULT 61 | 62 | // Setup segfault handler 63 | struct sigaction sa; 64 | 65 | memset(&sa, 0, sizeof(struct sigaction)); 66 | 67 | sigemptyset(&sa.sa_mask); 68 | 69 | sa.sa_sigaction = segfault_sigaction; 70 | sa.sa_flags = SA_SIGINFO; 71 | 72 | sigaction(SIGSEGV, &sa, NULL); 73 | 74 | #endif 75 | 76 | #ifdef SINGLET 77 | 78 | // ostringstream oss; 79 | SINGLET(cout, 0); 80 | 81 | // std::cout << "OUTPUT:\n" << oss.str(); 82 | return 0; 83 | 84 | #else 85 | 86 | // Setup times 87 | tpoint epoch = clk.now(); 88 | 89 | bench mark(epoch); 90 | 91 | mutex io_mtx; // I/O mutex 92 | mutex tk_mtx; // Task acquisition mutex 93 | mutex fl_mtx; // Task failure mutex 94 | 95 | int count = 0; 96 | int task = 0; 97 | 98 | size_t size = rig.size(); 99 | auto singleter = [&](singlet s, size_t t) { 100 | ostringstream oss; 101 | 102 | oss << string(100, '=') << endl; 103 | oss << mark << "Running \"" << s.first 104 | << "\" test [" << t << "/" 105 | << size << "]:\n" << endl; 106 | 107 | bool tmp = true; 108 | 109 | #if defined(DEBUG_EXCEPTION) 110 | 111 | oss << string(100, '-') << endl; 112 | tmp = s.second(oss, 0); 113 | oss << string(100, '-') << endl; 114 | 115 | #elif defined(PASSTHROUGH_EXCEPTION) 116 | 117 | try { 118 | oss << string(100, '-') << endl; 119 | tmp = s.second(oss, 0); 120 | oss << string(100, '-') << endl; 121 | } catch (const std::runtime_error &e) { 122 | oss << bred << "CAUGHT RUNTIME EXCEPTION (in test \"" 123 | << s.first << "\"):" << endl; 124 | oss << "\t" << e.what() << endl; 125 | oss << "PASSING THROUGH FOR NOW." << reset << endl; 126 | 127 | tmp = false; 128 | } catch (...) { 129 | cout << bred << "CAUGHT UNKNOWN EXCEPTION (in test \"" 130 | << s.first << "\"), PASSING THROUGH FOR NOW." 131 | << reset << endl; 132 | 133 | tmp = false; 134 | } 135 | 136 | #else 137 | 138 | try { 139 | oss << string(100, '-') << endl; 140 | tmp = s.second(oss, 0); 141 | oss << string(100, '-') << endl; 142 | } catch (...) { 143 | cout << bred << "CAUGHT UNKNOWN EXCEPTION (in test \"" 144 | << s.first << "\"), TERMINATING." << reset << endl; 145 | 146 | throw; 147 | } 148 | 149 | #endif 150 | 151 | if (tmp) { 152 | oss << endl << bgreen << "\"" << s.first 153 | << "\" test PASSED." << reset << endl; 154 | } else { 155 | // Add to list of failed tasks 156 | fl_mtx.lock(); 157 | 158 | failed.push_back(s); 159 | 160 | fl_mtx.unlock(); 161 | 162 | oss << endl << bred << "\"" << s.first 163 | << "\" test FAILED." << reset << endl; 164 | } 165 | 166 | oss << string(100, '=') << endl; 167 | 168 | io_mtx.lock(); 169 | 170 | cout << oss.str() << endl; 171 | count += (tmp) ? 1 : 0; 172 | 173 | io_mtx.unlock(); 174 | }; 175 | 176 | auto tasker = [&]() { 177 | while (true) { 178 | int t = -1; 179 | 180 | tk_mtx.lock(); 181 | 182 | if (task < (int) size) { 183 | t = task; 184 | 185 | task++; 186 | } 187 | 188 | tk_mtx.unlock(); 189 | 190 | if (t < 0) 191 | break; 192 | 193 | singleter(rig[t], t + 1); 194 | } 195 | }; 196 | 197 | thread *army = new thread[THREADS]; 198 | for (size_t i = 0; i < THREADS; i++) 199 | army[i] = thread(tasker); 200 | 201 | for (size_t i = 0; i < THREADS; i++) 202 | army[i].join(); 203 | 204 | cout << endl << mark << "Summary: passed " 205 | << count << "/" << rig.size() 206 | << " tests." << endl; 207 | 208 | if (failed.size()) { 209 | cout << endl << string(100, '=') << endl; 210 | 211 | cout << "Failed tests [" << failed.size() 212 | << "/" << rig.size() << "]:" << endl; 213 | 214 | for (auto task : failed) { 215 | cout << "\t" << task.first << endl; 216 | } 217 | 218 | cout << string(100, '=') << endl; 219 | } 220 | 221 | return (failed.size() == 0) ? 0 : 1; 222 | 223 | #endif 224 | 225 | } 226 | -------------------------------------------------------------------------------- /testing/matrix.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | TEST(matrix_construction_and_memory) 4 | { 5 | using namespace zhetapi; 6 | 7 | Matrix tmp; 8 | 9 | oss << "Default constructor: " << tmp << endl; 10 | 11 | return true; 12 | } 13 | 14 | TEST(kernel_apt_and_mult) 15 | { 16 | using namespace zhetapi; 17 | using Mat = Matrix ; 18 | using Vec = Vector ; 19 | 20 | static const size_t rounds = 3; 21 | static const long double limit = 5; 22 | static const utility::Interval <> elemd(limit); 23 | 24 | for (size_t i = 0; i < rounds; i++) { 25 | // M is 4 x 5 and V is 4 x 1 26 | Mat M(4, 5, 27 | [](size_t i, size_t j) -> double { 28 | return elemd.uniform(); 29 | } 30 | ); 31 | 32 | Vec V(4, 33 | [](size_t i) -> double { 34 | return elemd.uniform(); 35 | } 36 | ); 37 | 38 | Vec out1 = Vec(M * V.append_above(1)); 39 | Vec out2 = apt_and_mult(M, V); 40 | 41 | oss << "Outputs:" << std::endl; 42 | oss << "\tout1 = " << out1 << std::endl; 43 | oss << "\tout2 = " << out2 << std::endl; 44 | 45 | double error = (out1 - out2).norm(); 46 | 47 | oss << "\terror = " << error << std::endl; 48 | 49 | if (error > 1e-10) { 50 | oss << "\t\tToo high!" << std::endl; 51 | return false; 52 | } 53 | } 54 | 55 | return true; 56 | } 57 | 58 | TEST(kernel_rmt_and_mult) 59 | { 60 | using namespace zhetapi; 61 | using Mat = Matrix ; 62 | using Vec = Vector ; 63 | 64 | static const size_t rounds = 3; 65 | static const long double limit = 5; 66 | static const utility::Interval <> elemd(limit); 67 | 68 | for (size_t i = 0; i < rounds; i++) { 69 | // M is 4 x 5 and V is 4 x 1 70 | Mat M(4, 5, 71 | [](size_t i, size_t j) -> double { 72 | return elemd.uniform(); 73 | } 74 | ); 75 | 76 | Vec V(4, 77 | [](size_t i) -> double { 78 | return elemd.uniform(); 79 | } 80 | ); 81 | 82 | Vec out1 = Vec(M.transpose() * V).remove_top(); 83 | Vec out2 = rmt_and_mult(M, V); 84 | 85 | oss << "Outputs:" << std::endl; 86 | oss << "\tout1 = " << out1 << std::endl; 87 | oss << "\tout2 = " << out2 << std::endl; 88 | 89 | double error = (out1 - out2).norm(); 90 | 91 | oss << "\terror = " << error << std::endl; 92 | 93 | if (error > 1e-10) { 94 | oss << "\t\tToo high!" << std::endl; 95 | return false; 96 | } 97 | } 98 | 99 | return true; 100 | } 101 | 102 | TEST(kernel_vvt_mult) 103 | { 104 | using namespace zhetapi; 105 | using Mat = Matrix ; 106 | using Vec = Vector ; 107 | 108 | static const size_t rounds = 3; 109 | static const long double limit = 5; 110 | static const utility::Interval <> elemd(limit); 111 | 112 | for (size_t i = 0; i < rounds; i++) { 113 | Vec v1(5, 114 | [](size_t i) -> double { 115 | return elemd.uniform(); 116 | } 117 | ); 118 | 119 | Vec v2(4, 120 | [](size_t i) -> double { 121 | return elemd.uniform(); 122 | } 123 | ); 124 | 125 | Mat out1 = v1 * v2.transpose(); 126 | Mat out2 = vvt_mult(v1, v2); 127 | 128 | oss << "Outputs:" << std::endl; 129 | oss << "\tout1 = " << out1 << std::endl; 130 | oss << "\tout1.rows = " << out1.get_rows() << std::endl; 131 | oss << "\tout1.cols = " << out1.get_cols() << std::endl; 132 | oss << "\tout2 = " << out2 << std::endl; 133 | oss << "\tout2.rows = " << out2.get_rows() << std::endl; 134 | oss << "\tout2.cols = " << out2.get_cols() << std::endl; 135 | 136 | double error = (out1 - out2).norm(); 137 | 138 | oss << "\terror = " << error << std::endl; 139 | 140 | if (error > 1e-10) { 141 | oss << "\t\tToo high!" << std::endl; 142 | return false; 143 | } 144 | } 145 | 146 | return true; 147 | } 148 | -------------------------------------------------------------------------------- /testing/polynomial.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | TEST(polynomial_construction) 4 | { 5 | using namespace zhetapi; 6 | 7 | // Test-global resources 8 | int coeffs[] {1, 2, 3, 4}; 9 | 10 | // Tests 11 | Polynomial f {1, 2, 3, 4, 5}; 12 | 13 | oss << "f: " << f << endl; 14 | oss << "\tdeg(f) = " << f.degree() << endl; 15 | oss << "\tf(1) = " << f(1) << endl; 16 | oss << "\tf(1) = " << f.evaluate(1) << endl; 17 | 18 | if (f.degree() != 4) { 19 | oss << "INCORRECT DEGREE (for f)" << endl; 20 | 21 | return false; 22 | } 23 | 24 | if (f(1) != 15) { 25 | oss << "INCORRECT VALUE (for f)" << endl; 26 | 27 | return false; 28 | } 29 | 30 | Polynomial g {1, 2, 3, 4}; 31 | 32 | oss << "g: " << g << endl; 33 | oss << "\tdeg(f) = " << g.degree() << endl; 34 | oss << "\tg(1) = " << g(1) << endl; 35 | oss << "\tg(1) = " << g.evaluate(1) << endl; 36 | 37 | if (g.degree() != 3) { 38 | oss << "INCORRECT DEGREE (for g)" << endl; 39 | 40 | return false; 41 | } 42 | 43 | if (g(1) != 10) { 44 | oss << "INCORRECT VALUE (for g)" << endl; 45 | 46 | return false; 47 | } 48 | 49 | Polynomial h(coeffs, 3); 50 | 51 | oss << "h: " << h << endl; 52 | oss << "\tdeg(h) = " << h.degree() << endl; 53 | oss << "\th(1) = " << h(1) << endl; 54 | oss << "\th(1) = " << h.evaluate(1) << endl; 55 | 56 | if (h.degree() != 2) { 57 | oss << "INCORRECT DEGREE (for h)" << endl; 58 | 59 | return false; 60 | } 61 | 62 | if (h(1) != 6) { 63 | oss << "INCORRECT VALUE (for h)" << endl; 64 | 65 | return false; 66 | } 67 | 68 | return true; 69 | } 70 | 71 | TEST(polynomial_comparison) 72 | { 73 | using namespace zhetapi; 74 | 75 | Polynomial f {1, 2, 3, 4, 5}; 76 | Polynomial fp1 {1, 2, 3, 4, 5}; 77 | Polynomial fp2 {1, 2, 3, 4, 6}; 78 | Polynomial fp3 {1, 2, 3, 4}; 79 | 80 | Polynomial fcpy1(f); 81 | Polynomial fcpy2 = f; 82 | 83 | oss << boolalpha; 84 | oss << "f == fp1: " << (f == fp1) << endl; 85 | oss << "f == fp2: " << (f == fp2) << endl; 86 | oss << "f == fp3: " << (f == fp3) << endl; 87 | 88 | // TODO: Add assert tests 89 | if (f != fp1 || f == fp2 || f == fp3) 90 | return false; 91 | 92 | oss << "fcpy1 = " << fcpy1 << endl; 93 | oss << "fcpy2 = " << fcpy2 << endl; 94 | 95 | oss << "f == fcpy1: " << (f == fcpy1) << endl; 96 | oss << "f == fcpy2: " << (f == fcpy2) << endl; 97 | 98 | if (f != fcpy1 || f != fcpy2) 99 | return false; 100 | 101 | return true; 102 | } 103 | 104 | TEST(polynomial_arithmetic) 105 | { 106 | using namespace zhetapi; 107 | 108 | Polynomial f {1, 2, 3, 4, 5}; 109 | Polynomial g {1, 2, 3, 4}; 110 | 111 | oss << "f + g = " << f + g << endl; 112 | oss << "f - g = " << f - g << endl; 113 | oss << "g - f = " << g - f << endl; 114 | 115 | if (f + g != Polynomial {2, 4, 6, 8, 5}) 116 | return false; 117 | 118 | if (f - g != Polynomial {0, 0, 0, 0, 5}) 119 | return false; 120 | 121 | if (g - f != Polynomial {0, 0, 0, 0, -5}) 122 | return false; 123 | 124 | return true; 125 | } 126 | -------------------------------------------------------------------------------- /testing/printing.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | // Terminal objects 4 | term_colors reset {"0"}; 5 | 6 | term_colors bred {"1;31"}; 7 | term_colors bgreen {"1;32"}; 8 | term_colors byellow {"1;33"}; 9 | 10 | term_ok ok; 11 | term_err err; 12 | 13 | ostream &operator<<(ostream &os, const term_colors &tc) 14 | { 15 | return (os << "\033[" << tc.color << "m"); 16 | } 17 | 18 | ostream &operator<<(ostream &os, const term_ok &tok) 19 | { 20 | return (os << bgreen << "[OK]" << reset); 21 | } 22 | 23 | ostream &operator<<(ostream &os, const term_err &terr) 24 | { 25 | return (os << bred << "[ERR]" << reset); 26 | } -------------------------------------------------------------------------------- /testing/special.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | TEST(gamma_and_factorial) 4 | { 5 | using namespace zhetapi::special; 6 | 7 | for (double i = 0; i < 10; i++) 8 | oss << "ln_gamma(" << (i + 1) << ") = " << ln_gamma(i + 1) << endl; 9 | 10 | for (double i = 0; i < 10; i++) 11 | oss << "ln_factorial(" << i << ") = " << ln_factorial(i) << endl; 12 | 13 | try { 14 | ln_gamma(0); 15 | 16 | return false; 17 | } catch (const char *err) { 18 | oss << "\terr: " << err << endl; 19 | } 20 | 21 | try { 22 | ln_factorial(-1); 23 | 24 | return false; 25 | } catch (const char *err) { 26 | oss << "\terr: " << err << endl; 27 | } 28 | 29 | return true; 30 | } 31 | -------------------------------------------------------------------------------- /testing/tensor.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | TEST(tensor_construction_and_memory) 4 | { 5 | using namespace zhetapi; 6 | 7 | Tensor t1; 8 | 9 | oss << "Default: " << t1 << endl; 10 | 11 | Tensor t2({4, 5, 6}); 12 | 13 | oss << "Dimension constructor: " << t2 << endl; 14 | 15 | return true; 16 | } 17 | -------------------------------------------------------------------------------- /testing/timers.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | ostream &operator<<(ostream &os, const bench &b) 4 | { 5 | tpoint tmp = clk.now(); 6 | 7 | double mcs = chrono::duration_cast 8 | 9 | (tmp - b.epoch).count(); 10 | 11 | string unit = " µs"; 12 | 13 | if (mcs > 1e6) { 14 | mcs /= 1e6; 15 | unit = " s"; 16 | } else if (mcs > 1e3) { 17 | mcs /= 1e3; 18 | unit = " ms"; 19 | } 20 | 21 | os << byellow << "["; 22 | os << mcs; 23 | os << unit; 24 | os << "]\t" << reset; 25 | 26 | return os; 27 | } 28 | -------------------------------------------------------------------------------- /testing/vector.cpp: -------------------------------------------------------------------------------- 1 | #include "global.hpp" 2 | 3 | TEST(vector_construction_and_memory) 4 | { 5 | using namespace zhetapi; 6 | 7 | Vector tmp; 8 | 9 | oss << "Default constructor: " << tmp << endl; 10 | 11 | tmp = Vector {1, 1, 1, 4, 6}; 12 | 13 | oss << "Initializer list constructor: " << tmp << endl; 14 | 15 | vector nums {1, 6, 3, 8, 1}; 16 | 17 | tmp = Vector (nums); 18 | 19 | oss << "Vector construction and assignment operator: " << tmp << endl; 20 | 21 | tmp = Vector (4, 6); 22 | 23 | oss << "Size and default value constructor: " << tmp << endl; 24 | 25 | /* double *arr = new double[10]; 26 | for (size_t i = 0; i < 10; i++) 27 | arr[i] = pow(i, 3.14); 28 | 29 | tmp = Vector (10, arr); 30 | 31 | oss << "Size and pointer constructor: " << tmp << endl; 32 | 33 | tmp = Vector (5, arr); 34 | 35 | oss << "Cropped size and pointer constructor: " << tmp << endl; 36 | 37 | // Free resources 38 | delete[] arr; */ 39 | 40 | return true; 41 | } 42 | 43 | TEST(vector_operations) 44 | { 45 | using namespace zhetapi; 46 | 47 | Vector a = {1, 2, 3}; 48 | Vector b = {4, 5, 6}; 49 | Vector c = {1, 2, 3, 4, 5, 6}; 50 | 51 | Vector cc = { 52 | 1, 2, 3, 4, 5, 6, 53 | 1, 2, 3, 4, 5, 6, 54 | }; 55 | 56 | oss << "a = " << a << endl; 57 | oss << "b = " << b << endl; 58 | 59 | oss << concat(a, b) << endl; 60 | oss << c << endl; 61 | 62 | if (c != concat(a, b)) { 63 | oss << "Concatenation is incorrect." << endl; 64 | 65 | return false; 66 | } else { 67 | oss << "Concatenation passed." << endl; 68 | } 69 | 70 | oss << "\na o b = " << concat(a, b) << endl; 71 | oss << "c = " << c << endl; 72 | 73 | oss << cc << endl; 74 | oss << concat(a, b, c) << endl; 75 | 76 | if (cc != concat(a, b, c)) { 77 | oss << "(Variadic) Concatenation is incorrect." << endl; 78 | 79 | return false; 80 | } else { 81 | oss << "(Variadic) Concatenation passed." << endl; 82 | } 83 | 84 | return true; 85 | } 86 | -------------------------------------------------------------------------------- /zhetapi.hpp: -------------------------------------------------------------------------------- 1 | #include "include/fourier.hpp" 2 | #include "include/linalg.hpp" 3 | #include "include/matrix.hpp" 4 | #include "include/polynomial.hpp" 5 | #include "include/tensor.hpp" 6 | #include "include/vector.hpp" 7 | 8 | #include "include/std/calculus.hpp" 9 | #include "include/std/functions.hpp" 10 | #include "include/std/interval.hpp" 11 | #include "include/std/activations.hpp" 12 | #include "include/std/erfs.hpp" 13 | 14 | #include "include/core/kernels.hpp" --------------------------------------------------------------------------------