├── .github ├── CONTRIBUTING.md └── workflows │ └── main.yml ├── .gitignore ├── .vscode └── extensions.json ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake ├── CMakeLists.txt └── cabernet-config.cmake.in ├── examples ├── CMakeLists.txt ├── Makefile ├── cabernet-examples-layers ├── functions.cpp ├── layers.cpp ├── model.cpp └── operations.cpp ├── in-process ├── README.md ├── data │ ├── train-images.idx3-ubyte │ └── train-labels.idx1-ubyte ├── data_reader.cpp ├── data_reader_bin.cpp ├── idea.cpp ├── read_mnist.cpp ├── usless_datareader.hpp ├── usless_dataset.hpp └── usless_normalizers.hpp ├── include ├── CaberNet.h └── CaberNet │ ├── criterions.h │ ├── dataset.h │ ├── functions.h │ ├── initializers.h │ ├── layers.h │ ├── model.h │ ├── optimizers.h │ ├── statistics │ └── distributions.h │ ├── tensor.h │ └── tensor │ ├── tensor_float.h │ └── tensor_int.h ├── src ├── README.md ├── criterions.cpp ├── functions.cpp ├── internals │ ├── config.h │ ├── criterions │ │ ├── internal_criterion_nllloss.cpp │ │ └── internal_criterions.hpp │ ├── functions │ │ ├── internal_function_linear.cpp │ │ ├── internal_function_logsoftmax.cpp │ │ ├── internal_function_relu.cpp │ │ ├── internal_function_softmax.cpp │ │ └── internal_functions.hpp │ ├── internal_array.hpp │ ├── internal_array_map.cpp │ ├── internal_base.hpp │ ├── internal_expression.hpp │ ├── internal_graph.hpp │ ├── internal_tensor.hpp │ └── operations │ │ ├── internal_operation_addition.cpp │ │ ├── internal_operation_matmul.cpp │ │ ├── internal_operation_multiplication.cpp │ │ └── internal_operations.hpp ├── layers.cpp ├── optimizers.cpp ├── tensor.cpp └── tensor │ ├── tensor_float.cpp │ └── tensor_int.cpp └── tests ├── CMakeLists.txt ├── criterions.cpp ├── function_softmax.cpp ├── functions.cpp ├── operations.cpp └── optimizers.cpp /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Any contribution is more the welcomed. If you are not sure about your contribution just push it in the in-process folder and I will see what can i do with it. 4 | 5 | > **Tip:** If you are working from VS Code, you can [install the recommended extensions](https://dev.to/askrishnapravin/recommend-vs-code-extensions-to-your-future-teammates-4gkb) to quickly get setup. 6 | 7 | ## Working Locally 8 | 9 | After forking and cloning your repo, You'll need to install a few dependencies 10 | 11 | ```sh 12 | sudo apt install libeigen3-dev libgtest-dev libgmock-dev 13 | ``` 14 | 15 | Or you can use your favorite system installed. 16 | If you prefer C++ package managers, you can do 17 | 18 | - `vcpkg install eigen3 gtest` 19 | - `conan install --requires="eigen/[>=3 <4]" --requires="gtest/[>=1 <2]" --generator=CMakeDeps --output-folder=build` 20 | 21 | ### Building the library 22 | 23 | ```sh 24 | cmake -E make_directory build 25 | cd build 26 | 27 | cmake .. 28 | cmake --build . 29 | ctest . 30 | ``` 31 | 32 | If you have any questions feel free to reach out on discord. 33 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | build: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: [macos-latest, windows-latest, ubuntu-latest] 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | - uses: friendlyanon/setup-vcpkg@v1 # needs to be after checkout or else it's deleted 19 | with: 20 | committish: db0473513e5dc73ec6b6f431ff05d2f398eea042 21 | - if: matrix.os == 'windows-latest' # Because windows is still x86 by default 22 | shell: bash # access regardless of the host operating system 23 | run: $VCPKG_ROOT/vcpkg install eigen3 gtest --triplet=x64-windows 24 | - if: matrix.os != 'windows-latest' 25 | run: $VCPKG_ROOT/vcpkg install eigen3 gtest 26 | 27 | - run: cmake -E make_directory ${{ github.workspace }}/build 28 | 29 | - name: configure 30 | shell: bash 31 | working-directory: ${{ github.workspace }}/build 32 | run: cmake $GITHUB_WORKSPACE -DCMAKE_TOOLCHAIN_FILE=$VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake 33 | 34 | - name: build 35 | shell: bash 36 | working-directory: ${{ github.workspace }}/build 37 | run: cmake --build . 38 | 39 | - name: test 40 | shell: bash 41 | working-directory: ${{ github.workspace }}/build 42 | run: ctest . --build-config Debug --rerun-failed --output-on-failure 43 | 44 | install: 45 | runs-on: ${{ matrix.os }} 46 | env: 47 | CMAKE_BUILD_TYPE: Release 48 | strategy: 49 | matrix: 50 | os: [macos-latest, windows-latest, ubuntu-latest] 51 | 52 | steps: 53 | - uses: actions/checkout@v3 54 | - uses: friendlyanon/setup-vcpkg@v1 # needs to be after checkout or else it's deleted 55 | with: 56 | committish: db0473513e5dc73ec6b6f431ff05d2f398eea042 57 | - if: matrix.os == 'windows-latest' # Because windows is still x86 by default 58 | shell: bash # access regardless of the host operating system 59 | run: $VCPKG_ROOT/vcpkg install eigen3 gtest --triplet=x64-windows 60 | - if: matrix.os != 'windows-latest' 61 | run: $VCPKG_ROOT/vcpkg install eigen3 gtest 62 | 63 | - run: cmake -E make_directory ${{ github.workspace }}/build 64 | 65 | - if: matrix.os != 'windows-latest' 66 | name: setup 67 | shell: bash 68 | working-directory: ${{ github.workspace }}/build 69 | run: | 70 | cmake $GITHUB_WORKSPACE -DCMAKE_TOOLCHAIN_FILE=$VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake -DBUILD_TESTING=OFF 71 | cmake --build . 72 | sudo cmake --install . 73 | 74 | # No sudo and it's multi config 75 | - if: matrix.os == 'windows-latest' 76 | name: setup 77 | shell: bash 78 | working-directory: ${{ github.workspace }}/build 79 | run: | 80 | cmake $GITHUB_WORKSPACE -DCMAKE_TOOLCHAIN_FILE=$VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake -DBUILD_TESTING=OFF 81 | cmake --build . --config Release 82 | cmake --install . --config Release 83 | 84 | - name: examples 85 | shell: bash 86 | working-directory: ${{ github.workspace }}/examples 87 | run: | 88 | cmake . --debug-find -DCMAKE_TOOLCHAIN_FILE=$VCPKG_ROOT/scripts/buildsystems/vcpkg.cmake 89 | cmake --build . --config Release 90 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled output 2 | build/ 3 | bin/ 4 | *.out 5 | autogradient_design 6 | 7 | # CMake artifacts 8 | CMakeFiles/ 9 | CMakeCache.txt 10 | CMakeScripts/ 11 | cmake_install.cmake 12 | CMakeOutput.log 13 | CMakeTmp/ 14 | CMakeDirectoryInformation.cmake 15 | 16 | # Compiled libraries 17 | lib/ 18 | 19 | # IDE-specific files 20 | .vscode/ 21 | .vscode/!extensions.json # Allow to provide recommended extensions 22 | .idea/ 23 | 24 | # Ignore any personal or system-specific files 25 | .DS_Store 26 | Thumbs.db 27 | 28 | # Ignore user-specific CMake files 29 | CMakeUserPresets.json 30 | CMakeUserPresets.json.old 31 | 32 | # Ignore compilation scripts (if not needed in the repo) 33 | compilation.sh 34 | 35 | # Ignore generated documentation or other generated files 36 | /doc/ 37 | /docs/ 38 | /coverage/ 39 | /xml/ 40 | 41 | # Ignore any log files 42 | *.log 43 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "ms-vscode.cpptools", 4 | "ms-vscode.cpptools-extension-pack", 5 | "ms-vscode.cmake-tools", 6 | "twxs.cmake" 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.22) 2 | project(cabernet CXX) 3 | 4 | include(GNUInstallDirs) # Reasonable Defaults 5 | 6 | option(CABERNET_BUILD_EXAMPLES "Build and run the examples demonstration important features" OFF) 7 | 8 | # Define source and header file variables 9 | set(INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include) 10 | set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) 11 | 12 | set(HEADERS 13 | ${INCLUDE_DIR}/CaberNet.h 14 | ${INCLUDE_DIR}/CaberNet/tensor.h 15 | ${INCLUDE_DIR}/CaberNet/tensor/tensor_float.h 16 | ${INCLUDE_DIR}/CaberNet/tensor/tensor_int.h 17 | ${INCLUDE_DIR}/CaberNet/functions.h 18 | ${INCLUDE_DIR}/CaberNet/layers.h 19 | ${INCLUDE_DIR}/CaberNet/model.h 20 | ${INCLUDE_DIR}/CaberNet/optimizers.h 21 | ${INCLUDE_DIR}/CaberNet/criterions.h 22 | ${INCLUDE_DIR}/CaberNet/dataset.h 23 | ) 24 | 25 | set(SOURCES 26 | ${SOURCE_DIR}/tensor.cpp 27 | ${SOURCE_DIR}/tensor/tensor_float.cpp 28 | ${SOURCE_DIR}/tensor/tensor_int.cpp 29 | ${SOURCE_DIR}/functions.cpp 30 | ${SOURCE_DIR}/optimizers.cpp 31 | ${SOURCE_DIR}/layers.cpp 32 | ${SOURCE_DIR}/criterions.cpp 33 | ${SOURCE_DIR}/internals/operations/internal_operation_addition.cpp 34 | ${SOURCE_DIR}/internals/operations/internal_operation_multiplication.cpp 35 | ${SOURCE_DIR}/internals/operations/internal_operation_matmul.cpp 36 | ${SOURCE_DIR}/internals/functions/internal_function_linear.cpp 37 | ${SOURCE_DIR}/internals/functions/internal_function_softmax.cpp 38 | ${SOURCE_DIR}/internals/functions/internal_function_logsoftmax.cpp 39 | ${SOURCE_DIR}/internals/functions/internal_function_relu.cpp 40 | ${SOURCE_DIR}/internals/criterions/internal_criterion_nllloss.cpp 41 | ) 42 | 43 | find_package(Eigen3 REQUIRED CONFIG) 44 | 45 | # Create the library target 46 | add_library(${PROJECT_NAME} ${SOURCES} ${HEADERS}) 47 | target_include_directories( 48 | ${PROJECT_NAME} 49 | PUBLIC $ $ 50 | PRIVATE $) 51 | target_link_libraries(${PROJECT_NAME} PRIVATE Eigen3::Eigen) 52 | target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_17) 53 | 54 | # Install the library and header files 55 | add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) 56 | add_subdirectory(cmake) 57 | 58 | include(CTest) 59 | if(BUILD_TESTING) 60 | add_subdirectory(tests) 61 | endif() 62 | 63 | if(CABERNET_BUILD_EXAMPLES) 64 | add_subdirectory(examples) 65 | endif() 66 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Eric Cardozo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # C++ Deep Learning Library 2 | 3 | ## Archived 4 | While the idea of having a full deep learning framework with autograd in C++ is nice, it's to ambitious, with limited resources I think we should focus on an inference only library first. There are to many things to consider aside from the core, like performant dataloaders, data access, tools for training, optimizers, etc. I will be working on inference only for now for some specific models. 5 | 6 | ## Description 7 | 8 | A full C++ deep learning library inspired by PyTorch API. It has one notable difference: when you perform an operation, the program doesn't actually execute it immediately. Instead, it allocates a node into a graph, waiting for you to call the perform() method on the result (like tensorflow but this is a dynamic graph). This allows the programmer to perform operations without making new memory allocations. 9 | 10 | There is an example [here](examples/model.cpp) , of the digit MNIST dataset for a simple neural network working. Since I'm not a facebook team don't expect pytorch's performance, but it works nice. 11 | 12 | In the future, I plan to re write the backend using static polymorphism to avoid the virtual calls that disables the compilers optimizations. 13 | 14 | 15 | ## To build the project 16 | 17 | Please see the [contributing](.github/CONTRIBUTING.md#building-the-library) guide for more information. 18 | 19 | Example: 20 | 21 | ```cpp 22 | #include 23 | #include 24 | 25 | int main() { 26 | 27 | net::Tensor x({2,3}, net::requires_gradient::False); x.fill({1,2,3,4,5,6}); 28 | net::Tensor w({4,3}, net::requires_gradient::True); w.fill({1,2,-3,4,5,6,7,8,-9,10,11,-12}); 29 | net::Tensor v({2,4}, net::requires_gradient::True); v.fill({1,2,-3,4,5,6,7,8}); 30 | 31 | // Or use just a boolean. 32 | net::Tensor b({1,4}, true); b.fill({1,2,3,4}); 33 | net::Tensor I({2,4}, false); I.fill(1); 34 | 35 | x = net::function::linear(x,w,b); 36 | x = net::function::relu(x); 37 | x = x + net::matmul(v, w) * x; 38 | // builds an internal computational graph. 39 | 40 | x.perform(); 41 | x.backward(I); // transverses the graph. 42 | 43 | std::cout << x << std::endl; 44 | std::cout << w.gradient() << std::endl; 45 | std::cout << b.gradient() << std::endl; 46 | return 0; 47 | } 48 | ``` 49 | 50 | It produced the same results as the equivalent PyTorch code. 51 | You can also build layers like this: 52 | 53 | ```cpp 54 | 55 | #include 56 | 57 | struct Network : public net::Model { 58 | 59 | // it uses the CRTP pattern, so you define the forward method 60 | // to use the () operator when performing operations. 61 | 62 | Network() { 63 | layers.configure_optimizer(optimizer); 64 | } 65 | 66 | net::layer::Sequence layers { 67 | net::layer::Linear(784, 128, net::initializer::He), // default initializer 68 | net::layer::ReLU(), 69 | net::layer::Linear(128, 5), 70 | net::layer::LogSoftmax(1/*axis*/) 71 | }; 72 | 73 | net::Tensor forward(net::Tensor x) { 74 | x = layers(x); 75 | return x; 76 | } 77 | 78 | net::optimizer::SGD optimizer {/*learning rate*/ 0.1}; 79 | }; 80 | 81 | int main() { 82 | Network network; 83 | net::Tensor input({5,784}, true); input.fill(1) // fills with ones 84 | net::Tensor output = network(input); 85 | net::Tensor labels({5}); labels.fill({1,2,3,4,5}); 86 | 87 | net::criterion::NLLLoss loss_function(output, labels); 88 | 89 | output.perform(); 90 | 91 | std::cout << loss_function.loss() << std::endl; 92 | 93 | loss_function.backward(); // backpropagate the gradients 94 | network.optimizer.step() // triggers the optimizer. 95 | } 96 | 97 | ``` 98 | 99 | Eigen library is used for performing all operations. The code is also backend-agnostic, meaning you can write your custom CUDA implementations if needed. 100 | 101 | ## Acknowledgements 102 | 103 | Thanks for all your work!: 104 | 105 | * @prince-chrismc. All your work is extremely valuable. 106 | -------------------------------------------------------------------------------- /cmake/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | install( 2 | TARGETS ${PROJECT_NAME} 3 | EXPORT ${PROJECT_NAME}-targets 4 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 5 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 6 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) 7 | install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../include/ 8 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) 9 | 10 | # Create and Install CMake Config files to be used in the system 11 | include(CMakePackageConfigHelpers) 12 | configure_package_config_file( 13 | ${CMAKE_CURRENT_LIST_DIR}/${PROJECT_NAME}-config.cmake.in 14 | ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake 15 | INSTALL_DESTINATION ${CMAKE_INSTALL_PREFIX}/cmake) 16 | write_basic_package_version_file( 17 | ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake 18 | VERSION 0.0.0 19 | COMPATIBILITY ExactVersion) 20 | install( 21 | EXPORT ${PROJECT_NAME}-targets 22 | NAMESPACE ${PROJECT_NAME}:: 23 | FILE ${PROJECT_NAME}-targets.cmake 24 | DESTINATION ${CMAKE_INSTALL_PREFIX}/cmake) 25 | install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake 26 | ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake 27 | DESTINATION ${CMAKE_INSTALL_PREFIX}/cmake) 28 | -------------------------------------------------------------------------------- /cmake/cabernet-config.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | include(CMakeFindDependencyMacro) 4 | find_dependency(Eigen3 CONFIG REQUIRED) 5 | 6 | include("${CMAKE_CURRENT_LIST_DIR}/cabernet-targets.cmake") 7 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.22) 2 | project(cabernet-examples CXX) 3 | 4 | if(PROJECT_IS_TOP_LEVEL) 5 | find_package(cabernet CONFIG REQUIRED) 6 | endif() 7 | 8 | function(add_example example) 9 | add_executable(${PROJECT_NAME}-${example} ${example}.cpp) 10 | target_link_libraries(${PROJECT_NAME}-${example} PRIVATE cabernet::cabernet) 11 | add_custom_command( 12 | TARGET ${PROJECT_NAME}-${example} POST_BUILD 13 | COMMAND $ 14 | VERBATIM) 15 | target_compile_features(${PROJECT_NAME}-${example} PRIVATE cxx_std_17) 16 | endfunction(add_example) 17 | 18 | add_example(functions) 19 | add_example(layers) 20 | add_example(operations) 21 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.22 3 | 4 | # Default target executed when no arguments are given to make. 5 | default_target: all 6 | .PHONY : default_target 7 | 8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism. 9 | .NOTPARALLEL: 10 | 11 | #============================================================================= 12 | # Special targets provided by cmake. 13 | 14 | # Disable implicit rules so canonical targets will work. 15 | .SUFFIXES: 16 | 17 | # Disable VCS-based implicit rules. 18 | % : %,v 19 | 20 | # Disable VCS-based implicit rules. 21 | % : RCS/% 22 | 23 | # Disable VCS-based implicit rules. 24 | % : RCS/%,v 25 | 26 | # Disable VCS-based implicit rules. 27 | % : SCCS/s.% 28 | 29 | # Disable VCS-based implicit rules. 30 | % : s.% 31 | 32 | .SUFFIXES: .hpux_make_needs_suffix_list 33 | 34 | # Command-line flag to silence nested $(MAKE). 35 | $(VERBOSE)MAKESILENT = -s 36 | 37 | #Suppress display of executed commands. 38 | $(VERBOSE).SILENT: 39 | 40 | # A target that is always out of date. 41 | cmake_force: 42 | .PHONY : cmake_force 43 | 44 | #============================================================================= 45 | # Set environment variables for the build. 46 | 47 | # The shell in which to execute make rules. 48 | SHELL = /bin/sh 49 | 50 | # The CMake executable. 51 | CMAKE_COMMAND = /usr/bin/cmake 52 | 53 | # The command to remove a file. 54 | RM = /usr/bin/cmake -E rm -f 55 | 56 | # Escaping for special characters. 57 | EQUALS = = 58 | 59 | # The top-level source directory on which CMake was run. 60 | CMAKE_SOURCE_DIR = /home/eric/tmp/deep-learning-library/examples 61 | 62 | # The top-level build directory on which CMake was run. 63 | CMAKE_BINARY_DIR = /home/eric/tmp/deep-learning-library/examples 64 | 65 | #============================================================================= 66 | # Targets provided globally by CMake. 67 | 68 | # Special rule for the target edit_cache 69 | edit_cache: 70 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..." 71 | /usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. 72 | .PHONY : edit_cache 73 | 74 | # Special rule for the target edit_cache 75 | edit_cache/fast: edit_cache 76 | .PHONY : edit_cache/fast 77 | 78 | # Special rule for the target rebuild_cache 79 | rebuild_cache: 80 | @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." 81 | /usr/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) 82 | .PHONY : rebuild_cache 83 | 84 | # Special rule for the target rebuild_cache 85 | rebuild_cache/fast: rebuild_cache 86 | .PHONY : rebuild_cache/fast 87 | 88 | # The main all target 89 | all: cmake_check_build_system 90 | $(CMAKE_COMMAND) -E cmake_progress_start /home/eric/tmp/deep-learning-library/examples/CMakeFiles /home/eric/tmp/deep-learning-library/examples//CMakeFiles/progress.marks 91 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 all 92 | $(CMAKE_COMMAND) -E cmake_progress_start /home/eric/tmp/deep-learning-library/examples/CMakeFiles 0 93 | .PHONY : all 94 | 95 | # The main clean target 96 | clean: 97 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 clean 98 | .PHONY : clean 99 | 100 | # The main clean target 101 | clean/fast: clean 102 | .PHONY : clean/fast 103 | 104 | # Prepare targets for installation. 105 | preinstall: all 106 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 preinstall 107 | .PHONY : preinstall 108 | 109 | # Prepare targets for installation. 110 | preinstall/fast: 111 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 preinstall 112 | .PHONY : preinstall/fast 113 | 114 | # clear depends 115 | depend: 116 | $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 117 | .PHONY : depend 118 | 119 | #============================================================================= 120 | # Target rules for targets named cabernet-examples-functions 121 | 122 | # Build rule for target. 123 | cabernet-examples-functions: cmake_check_build_system 124 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 cabernet-examples-functions 125 | .PHONY : cabernet-examples-functions 126 | 127 | # fast build rule for target. 128 | cabernet-examples-functions/fast: 129 | $(MAKE) $(MAKESILENT) -f CMakeFiles/cabernet-examples-functions.dir/build.make CMakeFiles/cabernet-examples-functions.dir/build 130 | .PHONY : cabernet-examples-functions/fast 131 | 132 | #============================================================================= 133 | # Target rules for targets named cabernet-examples-layers 134 | 135 | # Build rule for target. 136 | cabernet-examples-layers: cmake_check_build_system 137 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 cabernet-examples-layers 138 | .PHONY : cabernet-examples-layers 139 | 140 | # fast build rule for target. 141 | cabernet-examples-layers/fast: 142 | $(MAKE) $(MAKESILENT) -f CMakeFiles/cabernet-examples-layers.dir/build.make CMakeFiles/cabernet-examples-layers.dir/build 143 | .PHONY : cabernet-examples-layers/fast 144 | 145 | #============================================================================= 146 | # Target rules for targets named cabernet-examples-operations 147 | 148 | # Build rule for target. 149 | cabernet-examples-operations: cmake_check_build_system 150 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 cabernet-examples-operations 151 | .PHONY : cabernet-examples-operations 152 | 153 | # fast build rule for target. 154 | cabernet-examples-operations/fast: 155 | $(MAKE) $(MAKESILENT) -f CMakeFiles/cabernet-examples-operations.dir/build.make CMakeFiles/cabernet-examples-operations.dir/build 156 | .PHONY : cabernet-examples-operations/fast 157 | 158 | functions.o: functions.cpp.o 159 | .PHONY : functions.o 160 | 161 | # target to build an object file 162 | functions.cpp.o: 163 | $(MAKE) $(MAKESILENT) -f CMakeFiles/cabernet-examples-functions.dir/build.make CMakeFiles/cabernet-examples-functions.dir/functions.cpp.o 164 | .PHONY : functions.cpp.o 165 | 166 | functions.i: functions.cpp.i 167 | .PHONY : functions.i 168 | 169 | # target to preprocess a source file 170 | functions.cpp.i: 171 | $(MAKE) $(MAKESILENT) -f CMakeFiles/cabernet-examples-functions.dir/build.make CMakeFiles/cabernet-examples-functions.dir/functions.cpp.i 172 | .PHONY : functions.cpp.i 173 | 174 | functions.s: functions.cpp.s 175 | .PHONY : functions.s 176 | 177 | # target to generate assembly for a file 178 | functions.cpp.s: 179 | $(MAKE) $(MAKESILENT) -f CMakeFiles/cabernet-examples-functions.dir/build.make CMakeFiles/cabernet-examples-functions.dir/functions.cpp.s 180 | .PHONY : functions.cpp.s 181 | 182 | layers.o: layers.cpp.o 183 | .PHONY : layers.o 184 | 185 | # target to build an object file 186 | layers.cpp.o: 187 | $(MAKE) $(MAKESILENT) -f CMakeFiles/cabernet-examples-layers.dir/build.make CMakeFiles/cabernet-examples-layers.dir/layers.cpp.o 188 | .PHONY : layers.cpp.o 189 | 190 | layers.i: layers.cpp.i 191 | .PHONY : layers.i 192 | 193 | # target to preprocess a source file 194 | layers.cpp.i: 195 | $(MAKE) $(MAKESILENT) -f CMakeFiles/cabernet-examples-layers.dir/build.make CMakeFiles/cabernet-examples-layers.dir/layers.cpp.i 196 | .PHONY : layers.cpp.i 197 | 198 | layers.s: layers.cpp.s 199 | .PHONY : layers.s 200 | 201 | # target to generate assembly for a file 202 | layers.cpp.s: 203 | $(MAKE) $(MAKESILENT) -f CMakeFiles/cabernet-examples-layers.dir/build.make CMakeFiles/cabernet-examples-layers.dir/layers.cpp.s 204 | .PHONY : layers.cpp.s 205 | 206 | operations.o: operations.cpp.o 207 | .PHONY : operations.o 208 | 209 | # target to build an object file 210 | operations.cpp.o: 211 | $(MAKE) $(MAKESILENT) -f CMakeFiles/cabernet-examples-operations.dir/build.make CMakeFiles/cabernet-examples-operations.dir/operations.cpp.o 212 | .PHONY : operations.cpp.o 213 | 214 | operations.i: operations.cpp.i 215 | .PHONY : operations.i 216 | 217 | # target to preprocess a source file 218 | operations.cpp.i: 219 | $(MAKE) $(MAKESILENT) -f CMakeFiles/cabernet-examples-operations.dir/build.make CMakeFiles/cabernet-examples-operations.dir/operations.cpp.i 220 | .PHONY : operations.cpp.i 221 | 222 | operations.s: operations.cpp.s 223 | .PHONY : operations.s 224 | 225 | # target to generate assembly for a file 226 | operations.cpp.s: 227 | $(MAKE) $(MAKESILENT) -f CMakeFiles/cabernet-examples-operations.dir/build.make CMakeFiles/cabernet-examples-operations.dir/operations.cpp.s 228 | .PHONY : operations.cpp.s 229 | 230 | # Help Target 231 | help: 232 | @echo "The following are some of the valid targets for this Makefile:" 233 | @echo "... all (the default if no target is provided)" 234 | @echo "... clean" 235 | @echo "... depend" 236 | @echo "... edit_cache" 237 | @echo "... rebuild_cache" 238 | @echo "... cabernet-examples-functions" 239 | @echo "... cabernet-examples-layers" 240 | @echo "... cabernet-examples-operations" 241 | @echo "... functions.o" 242 | @echo "... functions.i" 243 | @echo "... functions.s" 244 | @echo "... layers.o" 245 | @echo "... layers.i" 246 | @echo "... layers.s" 247 | @echo "... operations.o" 248 | @echo "... operations.i" 249 | @echo "... operations.s" 250 | .PHONY : help 251 | 252 | 253 | 254 | #============================================================================= 255 | # Special targets to cleanup operation of make. 256 | 257 | # Special rule to run CMake to check the build system integrity. 258 | # No rule that depends on this can have commands that come from listfiles 259 | # because they might be regenerated. 260 | cmake_check_build_system: 261 | $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 262 | .PHONY : cmake_check_build_system 263 | 264 | -------------------------------------------------------------------------------- /examples/cabernet-examples-layers: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mr-mapache/deep-learning-library/96a25528b82c53c2276dbfc00bee2850b5a441dd/examples/cabernet-examples-layers -------------------------------------------------------------------------------- /examples/functions.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | To run this code build the library following the instructions in the .github folder. 4 | 5 | then compile this file with: 6 | 7 | cmake . -DCABERNET_BUILD_EXAMPLES=ON 8 | cmake --build . --target cabernet-examples-functions 9 | 10 | */ 11 | 12 | #include 13 | 14 | #include 15 | 16 | int main() { 17 | // You can use enums to set the gradient requirement: 18 | net::Tensor x({2,3}, net::requires_gradient::False); x.fill({1,2,3,4,5,6}); 19 | net::Tensor w({4,3}, net::requires_gradient::True); w.fill({1,2,-3,4,5,6,7,8,-9,10,11,-12}); 20 | 21 | // Or use just a boolean. Whatever you prefer. 22 | net::Tensor b({1,4}, true); b.fill({1,2,3,4}); 23 | net::Tensor I({2,4}, false); I.fill(1); 24 | 25 | x = net::function::linear(x,w,b); 26 | x = net::function::relu(x); 27 | x.perform(); 28 | x.backward(I); 29 | 30 | std::cout << x << std::endl; 31 | std::cout << w.gradient() << std::endl; 32 | std::cout << b.gradient() << std::endl; 33 | return 0; 34 | } 35 | 36 | /* 37 | Results should be: 38 | [0, 34, 0, 0, 0, 79, 17, 27, ] 39 | [0, 0, 0, 5, 7, 9, 4, 5, 6, 4, 5, 6, ] 40 | [0, 2, 1, 1, ] 41 | 42 | */ 43 | 44 | /* 45 | 46 | Equivalent pytorch code: 47 | 48 | import torch.nn.functional as F 49 | 50 | # Initialize tensors 51 | x = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32, requires_grad=False) 52 | w = torch.tensor([[1, 2, -3], [4, 5, 6], [7, 8, -9], [10, 11, -12]], dtype=torch.float32, requires_grad=True) 53 | b = torch.tensor([1, 2, 3, 4], dtype=torch.float32, requires_grad=True) 54 | I = torch.tensor([[1,1,1,1],[1,1,1,1]], dtype=torch.float32) 55 | 56 | # Perform linear operation using torch.nn.functional.linear 57 | x = F.linear(x, w, b) 58 | x = F.relu(x) 59 | x.backward(I) 60 | 61 | # Print the result 62 | print(x) 63 | print(w.grad) 64 | print(b.grad) 65 | 66 | */ -------------------------------------------------------------------------------- /examples/layers.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | To run this code build the library following the instructions in the .github folder. 4 | 5 | then compile this file with: 6 | 7 | cmake . -DCABERNET_BUILD_EXAMPLES=ON 8 | cmake --build . --target cabernet-examples-layers 9 | 10 | */ 11 | 12 | #include 13 | 14 | struct Autoencoder : public net::Model { 15 | 16 | Autoencoder() { 17 | encoder_optimizer.add_parameter(encoder.parameters()); 18 | decoder_optimizer.add_parameter(decoder.parameters()); 19 | } 20 | 21 | net::layer::Sequence encoder { 22 | net::layer::Linear(784, 128, net::initializer::He), 23 | net::layer::ReLU(), 24 | net::layer::Linear(128, 64, net::initializer::He), 25 | }; 26 | 27 | net::layer::Sequence decoder { 28 | net::layer::Linear(64, 128, net::initializer::He), 29 | net::layer::ReLU(), 30 | net::layer::Linear(128, 784, net::initializer::He), 31 | net::layer::LogSoftmax(/*axis*/ 1) 32 | }; 33 | 34 | net::Tensor forward(net::Tensor x) { 35 | x = encoder(x); 36 | x = decoder(x); 37 | return x; 38 | } 39 | 40 | void step() { 41 | encoder_optimizer.step(); 42 | decoder_optimizer.step(); 43 | } 44 | /* you can add different optimizers to different layers 45 | or the same, doesn't matter, the optimizer has a shared pointer 46 | to it's implementation so you can pass instances of it with value 47 | semantics without making deep copies */ 48 | 49 | net::optimizer::SGD encoder_optimizer{0.01}; 50 | net::optimizer::SGD decoder_optimizer{0.01}; 51 | }; 52 | 53 | int main() { 54 | Autoencoder model; 55 | net::Tensor x({1, 784}); x.fill(net::initializer::He); 56 | net::Tensor y = model(x); 57 | y.perform(); 58 | std::cout << y; 59 | 60 | model.step(); 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /examples/model.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct Network : public net::Model { 4 | 5 | Network() { 6 | optimizer.add_parameter(layers.parameters()); 7 | } 8 | 9 | net::layer::Sequence layers { 10 | net::layer::Linear(784, 128), 11 | net::layer::ReLU(), 12 | net::layer::Linear(128, 10), 13 | net::layer::LogSoftmax(/*axis*/ 1) 14 | }; 15 | 16 | net::Tensor forward(net::Tensor x) { 17 | return layers(x); 18 | } 19 | 20 | net::optimizer::SGD optimizer{0.01}; 21 | }; 22 | 23 | int main() { 24 | net::Dataset dataset(64, false); 25 | 26 | dataset.read_targets("data/train-labels.idx1-ubyte"); 27 | dataset.read_features("data/train-images.idx3-ubyte"); 28 | 29 | net::Tensor targets({64}); 30 | net::Tensor input({64,784}, false, true); 31 | 32 | Network model; 33 | 34 | net::Tensor output({64,10}); 35 | output = model(input); 36 | 37 | net::criterion::NLLLoss criterion(output, targets); 38 | 39 | for(int epoch = 0; epoch < 10; ++epoch) { 40 | 41 | std::cout << "Epoch: " << epoch + 1 << std::endl; 42 | 43 | for(int batch = 0; batch < dataset.length(); ++batch) { 44 | input.copy(dataset.features()[batch].internal()); // I will fix this in the future so it will be prettier and without copies. 45 | targets.copy(dataset.targets()[batch].internal()); 46 | 47 | std::cout << "loss" << criterion.loss() << std::endl; 48 | criterion.backward(); 49 | model.optimizer.step(); 50 | } 51 | 52 | } 53 | } -------------------------------------------------------------------------------- /examples/operations.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | To run this code build the library following the instructions in the .github folder. 4 | 5 | then compile this file with: 6 | 7 | cmake . -DCABERNET_BUILD_EXAMPLES=ON 8 | cmake --build . --target cabernet-examples-operations 9 | 10 | */ 11 | 12 | #include 13 | #include 14 | 15 | int main() { 16 | net::Tensor x({2,3}, false); x.fill({1,2,3,4,5,6}); 17 | net::Tensor y({2,3}, true); y.fill({1,1,1,-1,-1,-1}); 18 | net::Tensor z({2,3}, true); z.fill(1); 19 | net::Tensor I({2,3}, false); I.fill(1); 20 | net::Tensor w({3,3}, true); w.fill({1,2,3,4,5,6,7,8,9}); 21 | 22 | x = x + I; 23 | x = net::matmul(x, w); 24 | x = x * z + y * z + z * y; 25 | x.perform(); 26 | x.backward(I); 27 | 28 | std::cout << "x : " << x << std::endl; 29 | std::cout << "Jy: " << y.gradient() << std::endl; 30 | std::cout << "Jz: " << z.gradient() << std::endl; 31 | std::cout << "Jw: " << w.gradient() << std::endl; 32 | return 0; 33 | } 34 | 35 | /* 36 | 37 | Results should be: 38 | x : [44, 53, 62, 76, 94, 112] 39 | Jy: [2, 2, 2, 2, 2, 2] 40 | Jz: [44, 53, 62, 76, 94, 112] 41 | Jw: [7, 7, 7, 9, 9, 9, 11, 11, 11] 42 | 43 | */ 44 | 45 | /* 46 | Equivalent pytorch code: 47 | 48 | import torch 49 | 50 | # Initialize tensors 51 | x = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32, requires_grad=False) 52 | y = torch.tensor([[1, 1, 1], [-1, -1, -1]], dtype=torch.float32, requires_grad=True) 53 | z = torch.tensor([[1, 1, 1], [1, 1, 1]], dtype=torch.float32, requires_grad=True) 54 | I = torch.tensor([[1, 1, 1], [1, 1, 1]], dtype=torch.float32, requires_grad=False) 55 | w = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.float32, requires_grad=True) 56 | 57 | 58 | # Perform operations 59 | 60 | x = x + I 61 | x = torch.matmul(x, w) 62 | x = x * z + y * z + z * y 63 | x.backward(I) 64 | 65 | # Print results 66 | print("x : ", x) 67 | print("Jy: ", y.grad) 68 | print("Jz: ", z.grad) 69 | print("Jw: ", w.grad) 70 | */ -------------------------------------------------------------------------------- /in-process/README.md: -------------------------------------------------------------------------------- 1 | ### Important 2 | 3 | Here i put some old and nasty code that needs a completly rebuild under the prefix usless_. 4 | 5 | If you want to build a whole statistical package, or a whole IO package or have some 6 | open source project that you want to integrate to this would be nice. 7 | 8 | If you have more features that you are not sure if you should add them or not, place them here. 9 | -------------------------------------------------------------------------------- /in-process/data/train-images.idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mr-mapache/deep-learning-library/96a25528b82c53c2276dbfc00bee2850b5a441dd/in-process/data/train-images.idx3-ubyte -------------------------------------------------------------------------------- /in-process/data/train-labels.idx1-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mr-mapache/deep-learning-library/96a25528b82c53c2276dbfc00bee2850b5a441dd/in-process/data/train-labels.idx1-ubyte -------------------------------------------------------------------------------- /in-process/data_reader.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include "../src/internals/internal_array.hpp" 11 | #include "../src/internals/internal_tensor.hpp" 12 | 13 | namespace net { 14 | 15 | class Dataset { 16 | public: 17 | 18 | Dataset(std::size_t batch_size, bool shuffle = false, std::size_t reserve_size = 60000) { 19 | batch_size_ = batch_size; 20 | shuffle_ = shuffle; 21 | } 22 | 23 | void read_targets(std::string filepath) { 24 | std::ifstream file(filepath, std::ios::binary); 25 | if (file.is_open()) { 26 | uint32_t magic, num_labels; 27 | file.read(reinterpret_cast(&magic), 4); 28 | file.read(reinterpret_cast(&num_labels), 4); 29 | 30 | magic = swap_endian(magic); 31 | num_labels = swap_endian(num_labels); 32 | 33 | for(std::size_t batch = 0; batch < num_labels; batch += batch_size_) { 34 | targets_.emplace_back(Tensor({batch_size_})); 35 | for(std::size_t index = 0; index < batch_size_; ++index) { 36 | uint8_t label; 37 | file.read(reinterpret_cast(&label), 1); 38 | targets_.back().data()[index] = static_cast(label); 39 | }; 40 | std::cout << targets_.back() << std::endl; 41 | } 42 | } 43 | 44 | else { 45 | std::cout << "Error opening file." << std::endl; 46 | } 47 | 48 | } 49 | 50 | 51 | void read_features(std::string filepath) { 52 | std::ifstream file(filepath, std::ios::binary); 53 | if (file.is_open()) { 54 | uint32_t magic, num_imgs, rows, cols; 55 | file.read(reinterpret_cast(&magic), 4); 56 | file.read(reinterpret_cast(&num_imgs), 4); 57 | file.read(reinterpret_cast(&rows), 4); 58 | file.read(reinterpret_cast(&cols), 4); 59 | 60 | magic = swap_endian(magic); 61 | num_imgs = swap_endian(num_imgs); 62 | rows = swap_endian(rows); 63 | cols = swap_endian(cols); 64 | 65 | features_size_ = rows * cols; 66 | 67 | for (std::size_t batch = 0; batch < num_imgs; batch += batch_size_) { 68 | features_.emplace_back(Tensor({batch_size_, features_size_})); 69 | for (std::size_t index = 0; index < batch_size_; ++index) { 70 | std::vector features(features_size_); 71 | file.read(reinterpret_cast(&features[0]), features_size_); 72 | std::transform(features.begin(), features.end(), features_.back().data() + index * features_size_, [](uint8_t x) { return static_cast(x) / 255.0f; }); 73 | } 74 | std::cout << features_.back() << std::endl; 75 | } 76 | 77 | } 78 | 79 | else { 80 | std::cout << "Error opening file." << std::endl; 81 | } 82 | } 83 | 84 | void clear() { 85 | features_.clear(); 86 | targets_.clear(); 87 | } 88 | 89 | private: 90 | uint32_t swap_endian(uint32_t val) { 91 | return ((val << 24) & 0xff000000) | 92 | ((val << 8) & 0x00ff0000) | 93 | ((val >> 8) & 0x0000ff00) | 94 | ((val >> 24) & 0x000000ff); 95 | } 96 | 97 | std::vector> features_; 98 | std::vector> targets_; 99 | 100 | std::size_t features_size_; 101 | std::size_t batch_size_; 102 | bool shuffle_; 103 | }; 104 | 105 | } // namespace net 106 | 107 | 108 | // g++ functions.cpp -Lcabernet/lib -lcabernet -I cabernet/include 109 | 110 | int main() { 111 | net::Dataset dataset(7); 112 | dataset.read_targets("data/train-labels.idx1-ubyte"); 113 | dataset.read_features("data/train-images.idx3-ubyte"); 114 | 115 | 116 | } -------------------------------------------------------------------------------- /in-process/data_reader_bin.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | class Dataset { 9 | public: 10 | Dataset(std::size_t reserve_size = 60000) { 11 | features_.reserve(reserve_size); 12 | targets_.reserve(reserve_size); 13 | } 14 | 15 | void read_features(std::string filepath) { 16 | std::ifstream file(filepath, std::ios::binary); 17 | if (file.is_open()) { 18 | uint32_t magic, num_imgs, rows, cols; 19 | file.read(reinterpret_cast(&magic), 4); 20 | file.read(reinterpret_cast(&num_imgs), 4); 21 | file.read(reinterpret_cast(&rows), 4); 22 | file.read(reinterpret_cast(&cols), 4); 23 | 24 | magic = swap_endian(magic); 25 | num_imgs = swap_endian(num_imgs); 26 | rows = swap_endian(rows); 27 | cols = swap_endian(cols); 28 | 29 | for (int i = 0; i < num_imgs; ++i) { 30 | file.seekg(16 + i * rows * cols, std::ios::beg); 31 | 32 | std::vector image(rows * cols); 33 | file.read(reinterpret_cast(&image[0]), rows * cols); 34 | features_.emplace_back(std::move(image)); 35 | } 36 | } 37 | 38 | else { 39 | std::cout << "Error opening file." << std::endl; 40 | } 41 | } 42 | 43 | void read_targets(std::string filepath) { 44 | std::ifstream file(filepath, std::ios::binary); 45 | if (file.is_open()) { 46 | uint32_t magic, num_labels; 47 | file.read(reinterpret_cast(&magic), 4); 48 | file.read(reinterpret_cast(&num_labels), 4); 49 | 50 | magic = swap_endian(magic); 51 | num_labels = swap_endian(num_labels); 52 | 53 | for (int i = 0; i < num_labels; ++i) { 54 | file.seekg(8 + i, std::ios::beg); 55 | 56 | uint8_t label; 57 | file.read(reinterpret_cast(&label), 1); 58 | targets_.push_back(label); 59 | } 60 | } 61 | 62 | else { 63 | std::cout << "Error opening file." << std::endl; 64 | } 65 | 66 | } 67 | 68 | void clear() { 69 | features_.clear(); 70 | targets_.clear(); 71 | } 72 | 73 | private: 74 | uint32_t swap_endian(uint32_t val) { 75 | return ((val << 24) & 0xff000000) | 76 | ((val << 8) & 0x00ff0000) | 77 | ((val >> 8) & 0x0000ff00) | 78 | ((val >> 24) & 0x000000ff); 79 | } 80 | 81 | std::vector> features_; 82 | std::vector targets_; 83 | 84 | std::size_t batch_size_; 85 | }; 86 | 87 | class Loader { 88 | public: 89 | Loader(std::size_t batch_size); 90 | 91 | 92 | private: 93 | std::size_t batch_size_; 94 | }; 95 | 96 | int main() { 97 | Dataset data; 98 | data.read_features("data/train-images.idx3-ubyte"); 99 | data.read_targets("data/train-labels.idx1-ubyte"); 100 | return 0; 101 | } -------------------------------------------------------------------------------- /in-process/idea.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace internal { 6 | 7 | class Base { 8 | public: 9 | virtual ~Base() = default; 10 | }; 11 | 12 | template 13 | class Array : Base { 14 | public: 15 | using scalar_type = T; 16 | using pointer = scalar_type*; 17 | using const_pointer = const scalar_type*; 18 | 19 | using storage_type = std::vector; 20 | using iterator = typename storage_type::iterator; 21 | using const_iterator = typename storage_type::const_iterator; 22 | 23 | Array() = default; 24 | Array(std::size_t size) : data_(size) { 25 | for (std::size_t i = 0; i < size; ++i) data_[i] = 1; 26 | } 27 | 28 | storage_type data_; 29 | }; 30 | 31 | class Tensor : public Array { 32 | public: 33 | Tensor() = default; 34 | Tensor(std::size_t size) : Array(size) {} 35 | }; 36 | 37 | }; 38 | 39 | 40 | namespace net { 41 | 42 | class integer_32 { 43 | public: 44 | using scalar_type = int; 45 | using iterator = std::vector::iterator; 46 | using const_iterator = std::vector::const_iterator; 47 | 48 | integer_32(std::size_t size) { 49 | tensor_ = std::make_shared>(size); 50 | } 51 | 52 | iterator begin() { return tensor_->data_.begin(); } 53 | iterator end() { return tensor_->data_.end(); } 54 | 55 | const_iterator begin() const { return tensor_->data_.cbegin(); } 56 | const_iterator end() const { return tensor_->data_.cend(); } 57 | 58 | private: 59 | std::shared_ptr> tensor_; 60 | }; 61 | 62 | class float_32 { 63 | public: 64 | using scalar_type = float; 65 | using iterator = std::vector::iterator; 66 | using const_iterator = std::vector::const_iterator; 67 | 68 | float_32(std::size_t size) { 69 | tensor_ = std::make_shared(size); 70 | } 71 | 72 | iterator begin() { return tensor_->data_.begin(); } 73 | iterator end() { return tensor_->data_.end(); } 74 | 75 | const_iterator begin() const { return tensor_->data_.cbegin(); } 76 | const_iterator end() const { return tensor_->data_.cend(); } 77 | 78 | private: 79 | std::shared_ptr tensor_; 80 | }; 81 | 82 | template 83 | class Tensor{ 84 | public: 85 | using scalar_type = T; 86 | 87 | private: 88 | std::shared_ptr> data_; 89 | }; 90 | 91 | template<> 92 | class Tensor : public float_32 { 93 | public: 94 | Tensor(std::size_t size) : float_32(size) { 95 | std::cout << "i'm a specialization"; 96 | } 97 | }; 98 | 99 | Tensor fn(Tensor x){ 100 | return x; 101 | } 102 | 103 | 104 | } // namespace net 105 | 106 | int main() { 107 | net::Tensor tensor(10); 108 | net::Tensor tensor2 = net::fn(tensor); 109 | for(auto i : tensor2) std::cout << i << std::endl; 110 | } -------------------------------------------------------------------------------- /in-process/read_mnist.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | uint32_t swap_endian(uint32_t val) { 7 | return ((val << 24) & 0xff000000) | 8 | ((val << 8) & 0x00ff0000) | 9 | ((val >> 8) & 0x0000ff00) | 10 | ((val >> 24) & 0x000000ff); 11 | } 12 | 13 | void read_mnist_images(const std::string& filepath) { 14 | std::ifstream file(filepath, std::ios::binary); 15 | if (file.is_open()) { 16 | uint32_t magic, num_imgs, rows, cols; 17 | file.read(reinterpret_cast(&magic), 4); 18 | file.read(reinterpret_cast(&num_imgs), 4); 19 | file.read(reinterpret_cast(&rows), 4); 20 | file.read(reinterpret_cast(&cols), 4); 21 | 22 | magic = swap_endian(magic); 23 | num_imgs = swap_endian(num_imgs); 24 | rows = swap_endian(rows); 25 | cols = swap_endian(cols); 26 | 27 | // Navigate to the image at index 5 28 | file.seekg(16 + 5 * rows * cols, std::ios::beg); 29 | 30 | // Read and print image at IDX=5 31 | std::vector image(rows * cols); 32 | file.read(reinterpret_cast(&image[0]), rows * cols); 33 | 34 | std::cout << "Image at IDX=5:" << std::endl; 35 | for(int r = 0; r < rows; ++r) { 36 | for(int c = 0; c < cols; ++c) { 37 | std::cout << static_cast(image[r * cols + c]) << ' '; 38 | } 39 | std::cout << std::endl; 40 | } 41 | 42 | } else { 43 | std::cout << "Error opening file." << std::endl; 44 | } 45 | } 46 | 47 | void read_mnist_labels(const std::string& filepath) { 48 | std::ifstream file(filepath, std::ios::binary); 49 | if (file.is_open()) { 50 | uint32_t magic, num_labels; 51 | file.read(reinterpret_cast(&magic), 4); 52 | file.read(reinterpret_cast(&num_labels), 4); 53 | 54 | magic = swap_endian(magic); 55 | num_labels = swap_endian(num_labels); 56 | 57 | // Navigate to the label at index 5 58 | file.seekg(8 + 5, std::ios::beg); 59 | 60 | // Read and print label at IDX=5 61 | uint8_t label; 62 | file.read(reinterpret_cast(&label), 1); 63 | 64 | std::cout << "Label at IDX=5: " << static_cast(label) << std::endl; 65 | 66 | } else { 67 | std::cout << "Error opening file." << std::endl; 68 | } 69 | } 70 | 71 | int main() { 72 | read_mnist_images("train-images.idx3-ubyte"); 73 | read_mnist_labels("train-labels.idx1-ubyte"); 74 | return 0; 75 | } -------------------------------------------------------------------------------- /in-process/usless_datareader.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DATA_READER_HPP 2 | #define DATA_READER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | /* 13 | #THIS NEEDS A COMPLETE REWRITE 14 | 15 | Reads a dataset from a csv file. 16 | Returns a pair of vectors, the first one containing the features and the second one the targets. 17 | It's header only, so it can be included in the main file easily. 18 | */ 19 | 20 | std::pair>, std::vector> read(const std::string& filename) 21 | { 22 | std::ifstream file(filename); 23 | std::string line, header; 24 | std::vector> features; 25 | std::vector targets; 26 | 27 | std::getline(file, header); // Skip header 28 | 29 | features.reserve(10000); 30 | targets.reserve(10000); 31 | 32 | while (std::getline(file, line)) { 33 | std::vector image; 34 | std::istringstream iss(line); 35 | std::string field; 36 | std::getline(iss, field, ','); 37 | targets.emplace_back(std::stoi(field)); 38 | while (std::getline(iss, field, ',')) 39 | { 40 | image.emplace_back(std::stof(field)); 41 | } 42 | features.emplace_back(std::move(image)); 43 | } 44 | file.close(); 45 | return std::make_pair(std::move(features), std::move(targets)); 46 | } 47 | 48 | #endif -------------------------------------------------------------------------------- /in-process/usless_dataset.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DATASET_H_ 2 | #define DATASET_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "./linear_algebra.h" 13 | #include "./normalizers.h" 14 | 15 | class Dataset 16 | { 17 | public: 18 | struct Batch { 19 | Matrix features; 20 | std::vector targets; 21 | Batch(std::size_t batch_size, std::size_t features_size); 22 | void shuffle(); 23 | }; 24 | 25 | using iterator = typename std::list::iterator; 26 | using const_iterator = typename std::list::const_iterator; 27 | 28 | iterator begin(); 29 | iterator end(); 30 | const_iterator begin() const; 31 | const_iterator end() const; 32 | 33 | Dataset( 34 | const std::vector>& data, 35 | const std::vector& targets, 36 | std::size_t batch_size, 37 | bool normalize = true, 38 | std::unique_ptr normalizer = std::make_unique() 39 | ); 40 | 41 | void shuffle(); 42 | 43 | private: 44 | std::list dataset_; 45 | std::size_t batch_size_; 46 | std::size_t features_size_; 47 | std::unique_ptr normalizer_; 48 | }; 49 | 50 | 51 | Dataset::Batch::Batch(std::size_t batch_size, std::size_t features_size) 52 | : features(batch_size, features_size), 53 | targets(batch_size) 54 | {} 55 | 56 | void Dataset::Batch::shuffle() 57 | { 58 | std::vector indices(targets.size()); 59 | std::iota(indices.begin(), indices.end(), 0); 60 | std::shuffle(indices.begin(), indices.end(), std::mt19937{std::random_device{}()}); 61 | Matrix shuffled_features(features.rows(), features.cols()); 62 | std::vector shuffled_targets(targets.size()); 63 | for(std::size_t i = 0; i < indices.size(); i++) 64 | { 65 | shuffled_features.row(i) = features.row(indices[i]); 66 | shuffled_targets[i] = targets[indices[i]]; 67 | } 68 | features = shuffled_features; 69 | targets = shuffled_targets; 70 | } 71 | 72 | Dataset::iterator Dataset::begin(){ return dataset_.begin(); } 73 | Dataset::iterator Dataset::end(){ return dataset_.end(); } 74 | 75 | Dataset::const_iterator Dataset::begin() const{ return dataset_.begin(); } 76 | Dataset::const_iterator Dataset::end() const{ return dataset_.end(); } 77 | 78 | 79 | Dataset::Dataset( 80 | const std::vector>& features, 81 | const std::vector& targets, 82 | std::size_t batch_size, 83 | bool normalize, 84 | std::unique_ptr normalizer 85 | ) 86 | : batch_size_(batch_size) 87 | , features_size_(features[0].size()) 88 | { 89 | if(normalize){ 90 | normalizer_ = std::move(normalizer); 91 | } 92 | 93 | for(std::size_t i = 0; i < features.size()/batch_size; i++) 94 | { 95 | Batch batch(batch_size, features[0].size()); 96 | for(std::size_t j = 0; j < batch_size; j++) 97 | { 98 | batch.features.row(j) = Eigen::Map( 99 | (normalizer_->fit_transform(features[i * batch_size + j])).data(), 100 | features[0].size() 101 | ); 102 | 103 | batch.targets[j] = targets[i * batch_size + j]; 104 | } 105 | dataset_.emplace_back(std::move(batch)); 106 | } 107 | } 108 | 109 | void Dataset::shuffle() 110 | { 111 | for(auto& batch : dataset_) 112 | { 113 | batch.shuffle(); 114 | } 115 | } 116 | 117 | #endif -------------------------------------------------------------------------------- /in-process/usless_normalizers.hpp: -------------------------------------------------------------------------------- 1 | #ifndef NORMALIZERS_H_ 2 | #define NORMALIZERS_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | class Normalizer 10 | { 11 | public: 12 | virtual ~Normalizer() = default; 13 | 14 | virtual void fit(const std::vector& features_vector); 15 | virtual std::vector transform(const std::vector& features_vector); 16 | virtual std::vector fit_transform(const std::vector& features_vector); 17 | virtual std::vector inverse_transform(const std::vector& features_vector); 18 | }; 19 | 20 | class Standard : public Normalizer 21 | { 22 | public: 23 | Standard() = default; 24 | ~Standard() = default; 25 | 26 | void fit(const std::vector& features_vector) override; 27 | std::vector transform(const std::vector& features_vector) override; 28 | std::vector fit_transform(const std::vector& features_vector) override; 29 | std::vector inverse_transform(const std::vector& features_vector) override; 30 | 31 | private: 32 | float mean_; 33 | float standard_deviation_; 34 | }; 35 | 36 | 37 | void Standard::fit(const std::vector& features_vector) { 38 | mean_ = mean(features_vector); 39 | standard_deviation_ = standard_deviation(features_vector); 40 | } 41 | 42 | std::vector Standard::transform(const std::vector& features_vector) 43 | { 44 | std::vector transformed_features(features_vector.size()); 45 | std::transform( 46 | features_vector.begin(), 47 | features_vector.end(), 48 | transformed_features.begin(), 49 | [this](float feature){ return (feature - mean_) / standard_deviation_; } 50 | ); 51 | return transformed_features; 52 | } 53 | 54 | std::vector Standard::fit_transform(const std::vector& features_vector) { 55 | fit(features_vector); 56 | return transform(features_vector); 57 | } 58 | 59 | std::vector Standard::inverse_transform(const std::vector& features_vector) 60 | { 61 | std::vector inverse_transformed_features(features_vector.size()); 62 | std::transform( 63 | features_vector.begin(), 64 | features_vector.end(), 65 | inverse_transformed_features.begin(), 66 | [this](float feature){return (feature * standard_deviation_) + mean_;} 67 | ); 68 | return inverse_transformed_features; 69 | } 70 | 71 | #endif 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /include/CaberNet.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "CaberNet/tensor.h" 3 | #include "CaberNet/functions.h" 4 | #include "CaberNet/layers.h" 5 | #include "CaberNet/model.h" 6 | #include "CaberNet/criterions.h" 7 | #include "CaberNet/optimizers.h" 8 | #include "CaberNet/dataset.h" -------------------------------------------------------------------------------- /include/CaberNet/criterions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "tensor.h" 7 | 8 | namespace internal { 9 | class Criterion; 10 | } 11 | 12 | namespace net::criterion { 13 | 14 | class NLLLoss { 15 | public: 16 | ~NLLLoss(); 17 | NLLLoss(Tensor output, Tensor targets); 18 | float loss() const; 19 | void backward(); 20 | 21 | private: 22 | std::unique_ptr criterion_; 23 | }; 24 | 25 | } // namespace net::criterion -------------------------------------------------------------------------------- /include/CaberNet/dataset.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "tensor.h" 10 | 11 | namespace net { 12 | 13 | class Dataset { 14 | public: 15 | 16 | Dataset(std::size_t batch_size, bool shuffle = false, std::size_t reserve_size = 60000) { 17 | batch_size_ = batch_size; 18 | shuffle_ = shuffle; 19 | } 20 | 21 | void read_targets(std::string filepath) { 22 | std::ifstream file(filepath, std::ios::binary); 23 | if (file.is_open()) { 24 | uint32_t magic, num_labels; 25 | file.read(reinterpret_cast(&magic), 4); 26 | file.read(reinterpret_cast(&num_labels), 4); 27 | 28 | magic = swap_endian(magic); 29 | num_labels = swap_endian(num_labels); 30 | 31 | for(std::size_t batch = 0; batch < num_labels; batch += batch_size_) { 32 | targets_.emplace_back(Tensor({batch_size_})); 33 | for(std::size_t index = 0; index < batch_size_; ++index) { 34 | uint8_t label; 35 | file.read(reinterpret_cast(&label), 1); 36 | targets_.back().data()[index] = static_cast(label); 37 | }; 38 | 39 | } 40 | 41 | targets_.pop_back(); // the last batch has a lot of zeros, so we pop it. fix later. 42 | 43 | } 44 | 45 | else { 46 | std::cout << "Error opening file." << std::endl; 47 | } 48 | 49 | } 50 | 51 | 52 | void read_features(std::string filepath) { 53 | std::ifstream file(filepath, std::ios::binary); 54 | if (file.is_open()) { 55 | uint32_t magic, num_imgs, rows, cols; 56 | file.read(reinterpret_cast(&magic), 4); 57 | file.read(reinterpret_cast(&num_imgs), 4); 58 | file.read(reinterpret_cast(&rows), 4); 59 | file.read(reinterpret_cast(&cols), 4); 60 | 61 | magic = swap_endian(magic); 62 | num_imgs = swap_endian(num_imgs); 63 | rows = swap_endian(rows); 64 | cols = swap_endian(cols); 65 | 66 | features_size_ = rows * cols; 67 | 68 | for (std::size_t batch = 0; batch < num_imgs; batch += batch_size_) { 69 | features_.emplace_back(Tensor({batch_size_, features_size_}, false, true)); 70 | for (std::size_t index = 0; index < batch_size_; ++index) { 71 | std::vector features(features_size_); 72 | file.read(reinterpret_cast(&features[0]), features_size_); 73 | std::transform( 74 | features.begin(), 75 | features.end(), features_.back().data() + index * features_size_, 76 | [](uint8_t x) { return static_cast(x) / 255.0f; } 77 | ); 78 | } 79 | 80 | } 81 | 82 | features_.pop_back(); 83 | 84 | } 85 | 86 | else { 87 | std::cout << "Error opening file." << std::endl; 88 | } 89 | } 90 | 91 | void clear() { 92 | features_.clear(); 93 | targets_.clear(); 94 | } 95 | 96 | std::size_t length() { 97 | return features_.size(); 98 | } 99 | 100 | std::vector>& features() { return features_; } 101 | const std::vector>& features() const { return features_; } 102 | 103 | std::vector>& targets() { return targets_; } 104 | const std::vector>& targets() const { return targets_; } 105 | 106 | private: 107 | uint32_t swap_endian(uint32_t val) { 108 | return ((val << 24) & 0xff000000) | 109 | ((val << 8) & 0x00ff0000) | 110 | ((val >> 8) & 0x0000ff00) | 111 | ((val >> 24) & 0x000000ff); 112 | } 113 | 114 | std::vector> features_; 115 | std::vector> targets_; 116 | 117 | std::size_t features_size_; 118 | std::size_t batch_size_; 119 | bool shuffle_; 120 | }; 121 | 122 | } // namespace net 123 | -------------------------------------------------------------------------------- /include/CaberNet/functions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "tensor.h" 3 | 4 | namespace net::function { 5 | 6 | Tensor linear(const Tensor& input, const Tensor& weight, const Tensor& bias); 7 | Tensor softmax(Tensor& input, int axis); 8 | Tensor log_softmax(Tensor&input, int axis); 9 | Tensor relu(const Tensor& input); 10 | 11 | } // namespace net::function 12 | -------------------------------------------------------------------------------- /include/CaberNet/initializers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace net { 4 | 5 | enum class requires_gradient { 6 | True = true, 7 | False = false 8 | }; 9 | 10 | enum class initializer { 11 | He 12 | }; 13 | 14 | } // namespace net -------------------------------------------------------------------------------- /include/CaberNet/layers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "model.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace internal { 11 | class Tensor; 12 | }; 13 | 14 | namespace net::layer { 15 | 16 | class Linear : public Model { 17 | public: 18 | ~Linear(); 19 | Linear( 20 | size_type input_features, 21 | size_type output_features, 22 | initializer distribution = initializer::He ); 23 | 24 | Tensor forward(Tensor x); 25 | 26 | void set_optimizer(std::shared_ptr optimizer); 27 | 28 | std::vector parameters() const { 29 | return { weight_.internal(), bias_.internal() }; 30 | } 31 | 32 | private: 33 | Tensor weight_; 34 | Tensor bias_; 35 | }; 36 | 37 | struct ReLU : public Model { 38 | ReLU() = default; 39 | 40 | std::vector parameters()const { 41 | return {}; 42 | } 43 | 44 | Tensor forward(Tensor input); 45 | void set_optimizer(std::shared_ptr optimizer) { return; } 46 | }; 47 | 48 | struct Softmax : public Model { 49 | int axis; 50 | Softmax(int axis); 51 | 52 | std::vector parameters()const { 53 | return {}; 54 | } 55 | 56 | Tensor forward(Tensor input); 57 | void set_optimizer(std::shared_ptr optimizer) { return; } 58 | }; 59 | 60 | struct LogSoftmax : public Model { 61 | int axis; 62 | LogSoftmax(int axis); 63 | 64 | std::vector parameters()const { 65 | return {}; 66 | } 67 | 68 | Tensor forward(Tensor input); 69 | void set_optimizer(std::shared_ptr optimizer) { return; } 70 | }; 71 | 72 | class Sequence : public Model { 73 | using layer_variant = std::variant< 74 | Linear, 75 | ReLU, 76 | Softmax, 77 | LogSoftmax 78 | >; 79 | 80 | public: 81 | 82 | template 83 | Sequence(Layers&& ... layers) { 84 | layers_ = { std::forward(layers)... }; 85 | } 86 | 87 | Tensor forward(Tensor input) { 88 | for (auto& layer : layers_) { 89 | input = std::visit([input](auto&& argument) { return argument.forward(input); }, layer); 90 | } 91 | return input; 92 | } 93 | 94 | std::vector parameters() const { 95 | 96 | std::vector parameter_list; 97 | for (auto& layer : layers_) { 98 | std::visit([¶meter_list](auto&& argument) { 99 | for(auto parameter : argument.parameters()) { 100 | parameter_list.push_back(parameter); 101 | } 102 | 103 | }, layer); 104 | } 105 | 106 | return parameter_list; 107 | } 108 | 109 | void set_optimizer(std::shared_ptr optimizer) { 110 | for (auto& layer : layers_) { 111 | std::visit([optimizer](auto&& argument) { argument.set_optimizer(optimizer); }, layer); 112 | } 113 | } 114 | 115 | private: 116 | std::vector layers_; 117 | }; 118 | 119 | } // namespace net -------------------------------------------------------------------------------- /include/CaberNet/model.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "tensor.h" 7 | #include "optimizers.h" 8 | 9 | namespace net { 10 | 11 | template 12 | class Model { 13 | public: 14 | using size_type = std::size_t; 15 | using shape_type = std::vector; 16 | 17 | Tensor operator()(Tensor input) { 18 | return static_cast(this)->forward(input); 19 | } 20 | 21 | void configure_optimizer(std::shared_ptr optimizer) { 22 | static_cast(this)->set_optimizer(optimizer); 23 | optimizer_ = optimizer; 24 | } 25 | 26 | private: 27 | std::shared_ptr optimizer_ = std::make_shared(); 28 | 29 | protected: 30 | Model() = default; 31 | Model(std::shared_ptr optimizer) : optimizer_(optimizer) { 32 | static_cast(this)->set_optimizer(optimizer); 33 | } 34 | }; 35 | 36 | } // namespace net 37 | -------------------------------------------------------------------------------- /include/CaberNet/optimizers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | namespace internal { class Tensor; } 9 | 10 | namespace net::base { 11 | 12 | struct Optimizer { 13 | virtual ~Optimizer() = default; 14 | virtual void add_parameter(internal::Tensor* parameter) = 0; 15 | virtual void add_parameter(const std::vector& parameters) = 0; 16 | virtual void step() = 0; 17 | }; 18 | 19 | template 20 | class Optimize : public Optimizer { 21 | public: 22 | ~Optimize() override = default; 23 | 24 | void add_parameter(internal::Tensor* parameter) override final { 25 | parameters_.push_back(parameter); 26 | } 27 | 28 | void add_parameter(const std::vector& parameters) override final { 29 | parameters_.insert(parameters_.end(), parameters.begin(), parameters.end()); 30 | } 31 | 32 | void step() override final { 33 | for(internal::Tensor* parameter : parameters_) { 34 | static_cast(this)->update(parameter); 35 | } 36 | } 37 | 38 | private: 39 | std::vector parameters_; 40 | }; 41 | 42 | } 43 | 44 | namespace net::optimizer { 45 | 46 | class NoOptimization : public base::Optimize { 47 | public: 48 | ~NoOptimization() = default; 49 | void update(internal::Tensor* parameter) {return;} 50 | }; 51 | 52 | 53 | class SGD : public base::Optimize { 54 | public: 55 | SGD(float learning_rate): learning_rate_{learning_rate} {} 56 | ~SGD() = default; 57 | 58 | void update(internal::Tensor* parameter); 59 | 60 | protected: 61 | const float learning_rate_; 62 | }; 63 | 64 | } // namespace net::optimizer -------------------------------------------------------------------------------- /include/CaberNet/statistics/distributions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace net::distribution { 9 | 10 | template 11 | class Distribution { 12 | public: 13 | using real_type = T; 14 | virtual ~Distribution() = default; 15 | virtual real_type generate() = 0; 16 | 17 | protected: 18 | Distribution() = default; 19 | }; 20 | 21 | template 22 | struct Normal : public Distribution { 23 | public: 24 | using real_type = T; 25 | Normal(real_type mean, real_type standard_deviation) 26 | : distribution_(mean, standard_deviation) 27 | , generator_(std::random_device()()) {} 28 | 29 | real_type generate() final { return distribution_(generator_); } 30 | 31 | std::normal_distribution distribution_; 32 | std::mt19937 generator_; 33 | }; 34 | 35 | } // namespace distribution -------------------------------------------------------------------------------- /include/CaberNet/tensor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "tensor/tensor_float.h" 8 | #include "tensor/tensor_int.h" 9 | 10 | namespace net { 11 | 12 | template class Tensor; 13 | 14 | template<> 15 | struct Tensor : public TensorFloat { 16 | using TensorFloat::TensorFloat; 17 | }; 18 | 19 | template<> 20 | struct Tensor : public TensorInt { 21 | using TensorInt::TensorInt; 22 | }; 23 | 24 | std::ostream& operator<<(std::ostream& ostream, const Tensor& tensor); 25 | std::ostream& operator<<(std::ostream& ostream, const Tensor& tensor); 26 | 27 | Tensor matmul(const Tensor& first, const Tensor& second); 28 | Tensor operator + (const Tensor& first, const Tensor& second); 29 | Tensor operator * (const Tensor& first, const Tensor& second); 30 | 31 | } // namespace net -------------------------------------------------------------------------------- /include/CaberNet/tensor/tensor_float.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #include "../initializers.h" 7 | #include "../statistics/distributions.h" 8 | 9 | namespace internal { class Tensor; } 10 | 11 | namespace net { 12 | 13 | template class Tensor; 14 | 15 | class TensorFloat { 16 | public: 17 | using value_type = float; // Needed for GMock's built-in matches 18 | using pointer = value_type*; 19 | using const_pointer = const value_type*; 20 | 21 | using size_type = std::size_t; 22 | using shape_type = std::vector; 23 | using storage_type = std::vector; 24 | 25 | using iterator = storage_type::iterator; 26 | using const_iterator = storage_type::const_iterator; 27 | 28 | TensorFloat() = default; 29 | TensorFloat(std::shared_ptr tensor); 30 | TensorFloat(shape_type shape, bool gradient_requirement = false, bool detached = false); 31 | TensorFloat(shape_type shape, requires_gradient gradient_requirement, bool detached = false); 32 | 33 | void reshape(shape_type shape); 34 | 35 | void backward(const Tensor& gradient); 36 | void perform(); 37 | 38 | void fill(initializer distribution); 39 | void fill(value_type value); 40 | void fill(std::vector values); 41 | 42 | void copy(internal::Tensor* other); 43 | 44 | internal::Tensor* internal() const; 45 | internal::Tensor* internal(); 46 | 47 | iterator begin(); 48 | iterator end(); 49 | const_iterator begin() const; 50 | const_iterator end() const; 51 | const_iterator cbegin() const; 52 | const_iterator cend() const; 53 | 54 | Tensor gradient() const; 55 | 56 | pointer data(); 57 | const_pointer data() const; 58 | shape_type shape() const; 59 | size_type rank() const; 60 | 61 | private: 62 | std::shared_ptr tensor_; 63 | }; 64 | 65 | 66 | } // namespace net 67 | -------------------------------------------------------------------------------- /include/CaberNet/tensor/tensor_int.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | namespace internal { template class Array; } 7 | 8 | namespace net { 9 | 10 | template class Tensor; 11 | 12 | class TensorInt { 13 | public: 14 | using value_type = int; 15 | using pointer = value_type*; 16 | using const_pointer = const value_type*; 17 | 18 | using size_type = std::size_t; 19 | using shape_type = std::vector; 20 | 21 | using iterator = std::vector::iterator; 22 | using const_iterator = std::vector::const_iterator; 23 | 24 | TensorInt() = default; 25 | TensorInt(std::shared_ptr> subscripts); 26 | TensorInt(shape_type shape); 27 | 28 | void reshape(shape_type shape); 29 | void fill(value_type value); 30 | void fill(std::vector values); 31 | 32 | void copy(internal::Array* other); 33 | 34 | internal::Array* internal() const; 35 | internal::Array* internal(); 36 | 37 | iterator begin(); 38 | iterator end(); 39 | const_iterator begin() const; 40 | const_iterator end() const; 41 | const_iterator cbegin() const; 42 | const_iterator cend() const; 43 | 44 | pointer data(); 45 | const_pointer data() const; 46 | shape_type shape() const; 47 | size_type rank() const; 48 | 49 | private: 50 | std::shared_ptr> data_; 51 | 52 | }; 53 | 54 | } // namespace net -------------------------------------------------------------------------------- /src/README.md: -------------------------------------------------------------------------------- 1 | ## For contributors 2 | ### If you want to add a function or an operation to the library follow the next steps. 3 | * In the src/internal/functions add to the internal_functions.h file the function you want to add. 4 | It needs to inherit from the Function class, and overrides the forward and the backward methods as in the examples provided. 5 | * Create a cpp file following the naming convention of the library, and implement the function there. 6 | * Add the function you created into the src/functions.cpp file and in the include/CaberNet/functions.h file. 7 | -------------------------------------------------------------------------------- /src/criterions.cpp: -------------------------------------------------------------------------------- 1 | #include "CaberNet/tensor.h" 2 | #include "CaberNet/criterions.h" 3 | 4 | #include "internals/internal_tensor.hpp" 5 | #include "internals/criterions/internal_criterions.hpp" 6 | 7 | namespace net::criterion { 8 | 9 | NLLLoss::~NLLLoss() = default; 10 | 11 | NLLLoss::NLLLoss(Tensor output, Tensor targets) { 12 | criterion_ = std::make_unique(output.internal(), targets.internal()); 13 | } 14 | 15 | float NLLLoss::loss() const { 16 | return criterion_->loss(); 17 | } 18 | 19 | void NLLLoss::backward() { 20 | criterion_->backward(); 21 | } 22 | 23 | } // namespace net::criterion 24 | 25 | -------------------------------------------------------------------------------- /src/functions.cpp: -------------------------------------------------------------------------------- 1 | #include "CaberNet/tensor.h" 2 | #include "CaberNet/functions.h" 3 | #include "internals/internal_tensor.hpp" 4 | #include "internals/functions/internal_functions.hpp" 5 | 6 | #include 7 | #include 8 | 9 | namespace net::function { 10 | 11 | Tensor linear(const Tensor& input, const Tensor& weight, const Tensor& bias) { 12 | return Tensor(std::make_shared( input.internal(), weight.internal(), bias.internal() )); 13 | } 14 | 15 | Tensor softmax(Tensor& input, int axis) { 16 | return Tensor(std::make_shared( input.internal(), axis )); 17 | } 18 | 19 | Tensor log_softmax(Tensor& input, int axis) { 20 | return Tensor(std::make_shared( input.internal(), axis )); 21 | } 22 | 23 | Tensor relu(const Tensor& input) { 24 | return Tensor(std::make_shared( input.internal() )); 25 | } 26 | 27 | } // namespace net::function -------------------------------------------------------------------------------- /src/internals/config.h: -------------------------------------------------------------------------------- 1 | /* 2 | Note: This file is used to configure the internal library. 3 | Eigen is used as the default backend for the library, and should be included only 4 | in the cpp files, not in the h or hpp files. 5 | 6 | This is for making the implementation of the operations independent of the internal 7 | library. The internal library should be able to use any backend, and the user should 8 | be able to choose the backend when compiling the library. 9 | */ 10 | 11 | #ifndef INTERNAL_CONFIG_H 12 | #define INTERNAL_CONFIG_H 13 | 14 | #define USE_EIGEN_BACKEND true 15 | 16 | #if defined(USE_EIGEN_BACKEND) 17 | #include 18 | #endif // USE_EIGEN_BACKEND 19 | 20 | #endif //INTERNAL_CONFIG_H 21 | -------------------------------------------------------------------------------- /src/internals/criterions/internal_criterion_nllloss.cpp: -------------------------------------------------------------------------------- 1 | #include "../config.h" 2 | #include "internal_criterions.hpp" 3 | 4 | #if defined(USE_EIGEN_BACKEND) 5 | 6 | namespace internal { 7 | 8 | NLLLoss::scalar_type NLLLoss::loss() const { 9 | Eigen::Map> output_map( 10 | output()->forward()->data(), 11 | batch_size(), 12 | number_of_classes() 13 | ); 14 | 15 | scalar_type loss_value = 0; 16 | for (auto index = 0; index < batch_size(); ++index) { 17 | loss_value -= output_map(index, targets()->data()[index]); 18 | } 19 | 20 | return loss_value / static_cast(batch_size()); 21 | } 22 | 23 | 24 | void NLLLoss::backward() { 25 | Eigen::Map> gradient_map( 26 | gradient()->data(), 27 | batch_size(), 28 | number_of_classes() 29 | ); 30 | 31 | gradient_map.fill(0); 32 | for(auto index = 0; index < batch_size(); ++index) { 33 | gradient_map(index, targets()->data()[index]) = -1; 34 | } 35 | gradient_map /= static_cast(batch_size()); 36 | output()->backward(gradient()); 37 | } 38 | 39 | } // namespace internal 40 | 41 | #endif // USE_EIGEN_BACKEND -------------------------------------------------------------------------------- /src/internals/criterions/internal_criterions.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INTERNAL_CRITERIONS_HPP 2 | #define INTERNAL_CRITERIONS_HPP 3 | 4 | #include "../internal_tensor.hpp" 5 | #include "../internal_array.hpp" 6 | 7 | namespace internal { 8 | 9 | // TODO : manage the int type. 10 | 11 | class Criterion { 12 | public: 13 | using size_type = Tensor::size_type; 14 | using shape_type = Tensor::shape_type; 15 | using scalar_type = Tensor::scalar_type; 16 | 17 | Criterion(Tensor* output, Array* targets) { 18 | output_ = output; 19 | targets_ = targets; 20 | gradient_ = std::make_unique(output_->shape(), false); 21 | } 22 | 23 | virtual ~Criterion() = default; 24 | virtual scalar_type loss() const = 0; 25 | virtual void backward() = 0; 26 | 27 | Tensor* output() const { return output_; } 28 | Array* targets() const { return targets_; } 29 | 30 | size_type number_of_classes() const { return output()->size() / batch_size(); } 31 | size_type batch_size() const { return output()->shape().front(); } 32 | 33 | Tensor* gradient() const { return gradient_.get(); } 34 | 35 | private: 36 | Tensor* output_; 37 | std::unique_ptr gradient_; 38 | Array* targets_; 39 | }; 40 | 41 | class NLLLoss : public Criterion { 42 | public: 43 | ~NLLLoss() final = default; 44 | NLLLoss(Tensor* output, Array* targets) : Criterion(output, targets) {} 45 | scalar_type loss() const final; 46 | void backward() final; 47 | }; 48 | 49 | } // namespace internal 50 | 51 | #endif // INTERNAL_CRITERIONS_HPP -------------------------------------------------------------------------------- /src/internals/functions/internal_function_linear.cpp: -------------------------------------------------------------------------------- 1 | #include "../config.h" 2 | #include "../internal_array.hpp" 3 | #include "../internal_tensor.hpp" 4 | #include "internal_functions.hpp" 5 | 6 | #if defined(USE_EIGEN_BACKEND) 7 | 8 | // matmul(x, W.T) + b like pytorch 9 | 10 | namespace internal { 11 | 12 | Linear::Linear(Tensor* input, Tensor* weight, Tensor* bias) 13 | : Function(input) 14 | , weight_(weight) 15 | , bias_(bias) { 16 | if (input->rank() != 2 || weight->rank() != 2) throw std::runtime_error("rank mismatch"); 17 | if (input->shape().back() != weight->shape().back()) throw std::runtime_error("shape mismatch between input and weight"); 18 | if (bias->shape().back() != weight->shape().front()) throw std::runtime_error("shape mismatch between bias and weight"); 19 | reshape({input->shape().front(), weight->shape().front()}); 20 | bool gradient_requirement = input->requires_gradient() || ( weight->requires_gradient() || bias->requires_gradient() ); 21 | requires_gradient(gradient_requirement); 22 | 23 | if (bias->requires_gradient()) { 24 | bias_gradient_copy_ = std::make_unique(bias->shape(), false, false); 25 | } 26 | 27 | if (weight->requires_gradient()) { 28 | weight_gradient_copy_ = std::make_unique(weight->shape(), false, false); 29 | } 30 | 31 | } 32 | 33 | Tensor* Linear::forward() { 34 | 35 | Eigen::Map> this_map( 36 | this->data(), 37 | rows_dimension(), 38 | columns_dimension()); 39 | 40 | Eigen::Map> input_map( 41 | input()->forward()->data(), 42 | rows_dimension(), 43 | inner_dimension() ); 44 | 45 | Eigen::Map> weight_map( 46 | weight()->forward()->data(), 47 | columns_dimension(), 48 | inner_dimension() ); 49 | 50 | if (bias()) { 51 | Eigen::Map> bias_map( 52 | bias()->forward()->data(), 53 | columns_dimension()); 54 | 55 | this_map = (input_map * weight_map.transpose()).rowwise() + bias_map; 56 | return this; 57 | } 58 | 59 | else { 60 | this_map = input_map * weight_map.transpose(); 61 | return this; 62 | } 63 | } 64 | 65 | 66 | void Linear::backward(Tensor* gradient) const { 67 | 68 | Eigen::Map> gradient_map( 69 | gradient->data(), 70 | rows_dimension(), 71 | columns_dimension() ); 72 | 73 | if (input()->requires_gradient()) { 74 | Tensor* input_gradient = new Tensor({rows_dimension(), inner_dimension()}, false, false); 75 | 76 | Eigen::Map> weight_map( 77 | weight()->data(), 78 | columns_dimension(), 79 | inner_dimension() ); 80 | 81 | Eigen::Map> input_gradient_map( 82 | input_gradient->data(), 83 | rows_dimension(), 84 | inner_dimension() ); 85 | 86 | input_gradient_map = gradient_map * weight_map; 87 | input()->backward(input_gradient); 88 | delete input_gradient; 89 | } 90 | 91 | if (weight()->requires_gradient()) { 92 | 93 | Eigen::Map> input_map( 94 | input()->data(), 95 | rows_dimension(), 96 | inner_dimension() ); 97 | 98 | Eigen::Map> weight_gradient_map( 99 | weight_gradient_copy_->data(), 100 | columns_dimension(), 101 | inner_dimension() ); 102 | 103 | weight_gradient_map = gradient_map.transpose() * input_map; 104 | weight()->backward(weight_gradient_copy_.get()); 105 | } 106 | 107 | if (bias() && bias()->requires_gradient()) { 108 | Eigen::Map> bias_gradient_map( 109 | bias_gradient_copy_->data(), 110 | columns_dimension()); 111 | bias_gradient_map = gradient_map.colwise().sum(); 112 | bias()->backward(bias_gradient_copy_.get()); 113 | } 114 | } 115 | 116 | } // namespace internal 117 | 118 | #endif // USE_EIGEN_BACKEND -------------------------------------------------------------------------------- /src/internals/functions/internal_function_logsoftmax.cpp: -------------------------------------------------------------------------------- 1 | #include "../config.h" 2 | #include "../internal_tensor.hpp" 3 | 4 | #include "internal_functions.hpp" 5 | 6 | #if defined(USE_EIGEN_BACKEND) 7 | 8 | namespace internal { 9 | 10 | LogSoftmax::LogSoftmax(Tensor* input, int axis) : Function(input) { 11 | if (axis != 0 && axis != 1) { throw std::runtime_error("axis should be 0 or 1"); } 12 | axis_ = axis; 13 | reshape(input->shape()); 14 | } 15 | 16 | Tensor* LogSoftmax::forward() { 17 | size_type rows = input()->shape().front(); 18 | size_type columns = input()->size() / input()->shape().front(); 19 | 20 | Eigen::Map> input_map( 21 | input()->forward()->data(), 22 | rows, 23 | columns ); 24 | 25 | Eigen::Map> output_map( 26 | this->data(), 27 | rows, 28 | columns ); 29 | 30 | 31 | if (axis_ == 0) { 32 | auto shifted = (input_map.colwise() - input_map.rowwise().maxCoeff()); 33 | output_map = shifted.rowwise() - shifted.exp().colwise().sum().log(); 34 | } 35 | 36 | else if (axis_ == 1) { 37 | auto shifted = (input_map.colwise() - input_map.rowwise().maxCoeff()); 38 | output_map = shifted.colwise() - shifted.exp().rowwise().sum().log(); 39 | } 40 | 41 | else { 42 | throw std::runtime_error("axis should be 0 or 1"); 43 | } 44 | 45 | return this; 46 | } 47 | 48 | void LogSoftmax::backward(Tensor* gradient) const { 49 | size_type rows = input()->shape().front(); 50 | size_type columns = input()->size() / input()->shape().front(); 51 | 52 | if (input()->requires_gradient()) { 53 | Eigen::Map> output_map( 54 | this->data(), 55 | rows, 56 | columns ); 57 | 58 | Eigen::Map> gradient_map( 59 | gradient->data(), 60 | rows, 61 | columns ); 62 | 63 | if (axis_ == 0) { 64 | gradient_map -= output_map.exp().rowwise() * gradient_map.colwise().sum(); 65 | } 66 | 67 | else if (axis_ == 1) { 68 | gradient_map -= output_map.exp().colwise() * gradient_map.rowwise().sum(); 69 | } 70 | 71 | else { 72 | throw std::runtime_error("axis should be 0 or 1"); 73 | } 74 | 75 | input()->backward(gradient); 76 | } 77 | } 78 | 79 | } // namespace internal 80 | 81 | #endif // USE_EIGEN_BACKEND -------------------------------------------------------------------------------- /src/internals/functions/internal_function_relu.cpp: -------------------------------------------------------------------------------- 1 | #include "../config.h" 2 | #include "../internal_array.hpp" 3 | #include "../internal_tensor.hpp" 4 | #include "internal_functions.hpp" 5 | 6 | #if defined(USE_EIGEN_BACKEND) 7 | 8 | namespace internal { 9 | 10 | ReLU::ReLU(Tensor* input) : Function(input) { 11 | reshape(input->shape()); 12 | } 13 | 14 | Tensor* ReLU::forward() { 15 | this->copy(input()->forward()); 16 | Eigen::Map> result_map( 17 | this->data(), 18 | this->size()); 19 | 20 | result_map = result_map.cwiseMax(0); 21 | return this; 22 | } 23 | 24 | void ReLU::backward(Tensor* gradient) const { 25 | if (requires_gradient()) { 26 | Eigen::Map> result_map( 27 | this->data(), 28 | this->size()); 29 | 30 | Eigen::Map> gradient_map( 31 | gradient->data(), 32 | gradient->size()); 33 | 34 | gradient_map = gradient_map * (result_map > 0).cast(); 35 | input()->backward(gradient); 36 | } 37 | } 38 | 39 | } // namespace internal 40 | 41 | #endif // USE_EIGEN_BACKEND -------------------------------------------------------------------------------- /src/internals/functions/internal_function_softmax.cpp: -------------------------------------------------------------------------------- 1 | #include "../config.h" 2 | #include "../internal_tensor.hpp" 3 | #include "./internal_functions.hpp" 4 | 5 | #if defined(USE_EIGEN_BACKEND) 6 | 7 | namespace internal { 8 | 9 | Softmax::Softmax(Tensor* input, int axis) : Function(input) { 10 | if (axis != 0 && axis != 1) { throw std::runtime_error("axis should be 0 or 1"); } 11 | axis_ = axis; 12 | reshape(input->shape()); 13 | } 14 | 15 | Tensor* Softmax::forward() { 16 | this->copy(input()->forward()); 17 | 18 | size_type rows = input()->shape().front(); 19 | size_type columns = input()->size() / input()->shape().front(); 20 | 21 | if (axis_ == 0) { 22 | Eigen::Map> input_map( 23 | this->data(), 24 | rows, 25 | columns ); 26 | 27 | 28 | auto shifted_exp = (input_map.colwise() - input_map.rowwise().maxCoeff()).exp(); 29 | input_map = shifted_exp.colwise() / shifted_exp.rowwise().sum(); 30 | } 31 | 32 | else if (axis_ == 1) { 33 | Eigen::Map> input_map( 34 | this->data(), 35 | rows, 36 | columns ); 37 | 38 | 39 | auto shifted_exp = (input_map.colwise() - input_map.rowwise().maxCoeff()).exp(); 40 | input_map = shifted_exp.colwise() / shifted_exp.rowwise().sum(); 41 | } 42 | 43 | return this; 44 | } 45 | 46 | void Softmax::backward(Tensor* gradient) const { 47 | if (requires_gradient()) { 48 | throw std::runtime_error("Not implemented yet, if you want to contribute, you can start from here!"); 49 | input()->backward(gradient); 50 | } 51 | } 52 | 53 | } // namespace internal 54 | 55 | #endif // USE_EIGEN_BACKEND -------------------------------------------------------------------------------- /src/internals/functions/internal_functions.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This classes are non-leaf nodes representing functions. They are used to build the computational. 3 | A function should be implemented overriding the forward and backward methods, of the Tensor Base class. 4 | The forward method should return the "this" pointer, and the backward method should pass the gradient 5 | to the inputs. 6 | 7 | Inplace functions are planned to be implemented in the future. 8 | 9 | For implementing operations use the data() method to access the data of the tensor and map it to some 10 | data structure of some library. In this case I used Eigen::Map so avoid any unnecessary copy of the data 11 | when performing the operations, and to be able to use the optimized Eigen library for the operations. 12 | */ 13 | 14 | 15 | #ifndef INTERNAL_FUNCTIONS_HPP 16 | #define INTERNAL_FUNCTIONS_HPP 17 | 18 | #include "../internal_tensor.hpp" 19 | #include "../internal_expression.hpp" 20 | 21 | namespace internal { 22 | 23 | class Function : public Expression { 24 | public: 25 | ~Function() override = default; 26 | Function(Tensor* input) { input_ = input; } 27 | Tensor* input() const { return input_; } 28 | 29 | private: 30 | Tensor* input_; 31 | }; 32 | 33 | class Linear : public Function { 34 | public: 35 | ~Linear() final = default; 36 | Linear(Tensor* input, Tensor* weight, Tensor* bias); 37 | Tensor* forward() final; 38 | void backward(Tensor* gradient) const final; 39 | 40 | Tensor* weight() const { return weight_; } 41 | Tensor* bias() const { return bias_; } 42 | 43 | size_type rows_dimension() const { return input()->shape().front(); } 44 | size_type inner_dimension() const { return input()->shape().back(); } 45 | size_type columns_dimension() const { return weight()->shape().front(); } 46 | 47 | private: 48 | std::unique_ptr weight_gradient_copy_; 49 | std::unique_ptr bias_gradient_copy_; 50 | Tensor* weight_; 51 | Tensor* bias_; 52 | }; 53 | 54 | class ReLU : public Function { 55 | public: 56 | ~ReLU() final = default; 57 | ReLU(Tensor* input); 58 | Tensor* forward() final; 59 | void backward(Tensor* gradient) const final; 60 | }; 61 | 62 | class Softmax : public Function { 63 | public: 64 | ~Softmax() final = default; 65 | Softmax(Tensor* input, int axis); 66 | Tensor* forward() final; 67 | void backward(Tensor* gradient) const final; 68 | 69 | private: 70 | int axis_; 71 | }; 72 | 73 | class LogSoftmax : public Function { 74 | public: 75 | ~LogSoftmax() final = default; 76 | LogSoftmax(Tensor* input, int axis); 77 | Tensor* forward() final; 78 | void backward(Tensor* gradient) const final; 79 | 80 | private: 81 | int axis_; 82 | }; 83 | 84 | } // namespace internal 85 | 86 | #endif // INTERNAL_FUNCTIONS_HPP -------------------------------------------------------------------------------- /src/internals/internal_array.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This is the internal multidimensional array class that is used to store the data of the tensors. 3 | It consists in a wrapper of a contiguous memory block (std::vector in this case, but it could be 4 | changed into another container in the future) and a shape vector that stores the size of each 5 | dimension of the array. 6 | */ 7 | 8 | #ifndef INTERNAL_ARRAY_HPP 9 | #define INTERNAL_ARRAY_HPP 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "internal_base.hpp" 16 | 17 | namespace internal { 18 | 19 | template 20 | class Array : public Base { 21 | public: 22 | using scalar_type = T; 23 | using pointer = scalar_type*; 24 | using const_pointer = const scalar_type*; 25 | 26 | using storage_type = std::vector; 27 | using iterator = typename storage_type::iterator; 28 | using const_iterator = typename storage_type::const_iterator; 29 | 30 | Array() = default; 31 | 32 | Array(const Array* other) : Base(other->shape()) { 33 | storage_ = other->storage_; 34 | } 35 | 36 | Array(shape_type shape) : Base(shape) { 37 | storage_.resize(size()); 38 | } 39 | 40 | pointer data() { return storage_.data(); } 41 | const_pointer data() const { return storage_.data(); } 42 | 43 | iterator begin() { return storage_.begin(); } 44 | iterator end() { return storage_.end(); } 45 | const_iterator begin() const { return storage_.cbegin(); } 46 | const_iterator end() const { return storage_.cend(); } 47 | const_iterator cbegin() const { return storage_.cbegin(); } 48 | const_iterator cend() const { return storage_.cend(); } 49 | 50 | void copy(const Array* other) { 51 | reshape(other->shape()); 52 | storage_ = other->storage_; 53 | }; 54 | 55 | void move(Array* other) { 56 | reshape(other->shape()); 57 | other->collapse(); 58 | storage_ = std::move(other->storage_); 59 | other->storage_.clear(); 60 | }; 61 | 62 | void reshape(const shape_type& shape) { 63 | Base::reshape(shape); 64 | storage_.resize(size()); 65 | } 66 | 67 | void clear() { 68 | storage_.clear(); 69 | collapse(); 70 | } 71 | 72 | private: 73 | storage_type storage_; 74 | }; 75 | 76 | } // namespace internal 77 | 78 | #endif // INTERNAL_ARRAY_HPP -------------------------------------------------------------------------------- /src/internals/internal_array_map.cpp: -------------------------------------------------------------------------------- 1 | #include "internal_array.hpp" 2 | 3 | #include "config.h" 4 | 5 | namespace internal { 6 | 7 | 8 | 9 | 10 | } 11 | -------------------------------------------------------------------------------- /src/internals/internal_base.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INTERNAL_BASE_HPP 2 | #define INTERNAL_BASE_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace internal { 8 | 9 | class Base { 10 | public: 11 | using size_type = std::size_t; 12 | using shape_type = std::vector; 13 | 14 | Base() = default; 15 | 16 | Base(shape_type shape) : shape_(shape) { 17 | size_ = 1; 18 | for (auto& dimension : shape_) size_ *= dimension; 19 | } 20 | 21 | size_type size() const { return size_; } 22 | shape_type shape() const { return shape_; } 23 | size_type rank() const { return shape_.size(); } 24 | 25 | void reshape(shape_type shape) { 26 | shape_ = shape; 27 | size_ = 1; 28 | for (auto& dimension : shape_) size_ *= dimension; 29 | } 30 | 31 | void melt() { 32 | shape_.clear(); 33 | shape_.push_back(size_); 34 | } 35 | 36 | void collapse() { 37 | size_ = 0; 38 | shape_.clear(); 39 | } 40 | 41 | private: 42 | size_type size_; 43 | shape_type shape_; 44 | // shape_type strides_; 45 | }; 46 | 47 | } 48 | 49 | #endif -------------------------------------------------------------------------------- /src/internals/internal_expression.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This is just an interface class for the non-leaf nodes of the computational graph. 3 | */ 4 | 5 | #ifndef INTERNAL_EXPRESSION_HPP 6 | #define INTERNAL_EXPRESSION_HPP 7 | 8 | #include "internal_tensor.hpp" 9 | 10 | namespace internal { 11 | 12 | class Expression : public Tensor { 13 | public: 14 | ~Expression() override = default; 15 | 16 | protected: 17 | Expression() 18 | : Tensor(false) {} 19 | 20 | Expression(shape_type shape, bool gradient_requirement = false) 21 | : Tensor(shape, gradient_requirement, false) {} 22 | }; 23 | 24 | } // namespace internal 25 | 26 | #endif // INTERNAL_EXPRESSION_HPP -------------------------------------------------------------------------------- /src/internals/internal_graph.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | This is the computational graph class. It is a singleton class that stores the tensors that are 3 | created during the forward pass. The tensors are stored in a buffer and are deleted when the buffer 4 | is flushed. This allows to preallocate the memory for the tensors before the forward pass, allowing 5 | lazy evaluation. This is useful when the forward pass is performed multiple times, for example in 6 | training and testing phases of a neural network. 7 | */ 8 | 9 | #ifndef INTERNAL_GRAPH_HPP 10 | #define INTERNAL_GRAPH_HPP 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include "internal_tensor.hpp" 17 | 18 | namespace internal { 19 | 20 | class Expression; 21 | 22 | class Graph { 23 | public: 24 | static Graph& instance() { static Graph buffer; return buffer; } 25 | static void add(std::shared_ptr tensor) { instance().buffer_.push_back(tensor); } 26 | static void flush() { instance().buffer_.clear(); } 27 | 28 | private: 29 | Graph() = default; 30 | ~Graph() = default; 31 | Graph(const Graph&) = delete; 32 | Graph(Graph&&) = delete; 33 | Graph& operator=(Graph&&) = delete; 34 | Graph& operator=(const Graph&) = delete; 35 | std::vector> buffer_; 36 | }; 37 | 38 | } // namespace internal 39 | 40 | #endif // INTERNAL_GRAPH_HPP -------------------------------------------------------------------------------- /src/internals/internal_tensor.hpp: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | This is the main data structure of the library. It acts as a node of the computational graph. It is 4 | provided with virtual forward and backward methods that are used to perform the forward and backward 5 | passes of the data through the graph. Those methods are mean to be overriden when implementing 6 | different operations or functions as nodes of the computational graph. 7 | It is a multidimensional array that stores the metadata of the tensors and the gradients of 8 | the tensors. 9 | The requires_gradient_ flag is used to determine if the tensor needs to store the gradient or not. 10 | The is_leaf_ flag is used to determine if the tensor is a leaf node of the computational graph or 11 | not. The Tensor class is leaf by default. 12 | The gradient of the tensor is stored in the gradient_ pointer. If the tensor is a leaf 13 | node, then the Tensor class owns the gradient_ pointer and is responsible for its deletion. 14 | Since optional ownership cannot be expressed with smart pointers, the 15 | gradient_ pointer is a raw pointer. 16 | The gradient of the tensor is created only when the requires_gradient_ flag is set to true by the 17 | requires_gradient() method. The gradient_ pointer is set to nullptr by default. 18 | This will need a refactor in the future. Since I don't like how the Tensor class is coupled to the 19 | Array class, and I would like to design some optional ownership mechanism that emulates the Eigen::Map 20 | but with our own data structure, so people can map their data structures to our Tensors and use them, 21 | as we are using Eigen now. this changes won't really afect the following code. But for now it works okay. 22 | */ 23 | 24 | #ifndef INTERNAL_TENSOR_HPP 25 | #define INTERNAL_TENSOR_HPP 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | #include "internal_array.hpp" 32 | 33 | namespace internal { 34 | 35 | class Tensor : public Array { 36 | public: 37 | Tensor(bool leaf_status = true) 38 | : Array() { 39 | is_leaf_ = leaf_status; 40 | requires_gradient_ = false; 41 | } 42 | 43 | Tensor(shape_type shape, bool gradient_requirement = false, bool leaf_status = true) 44 | : Array(shape) { 45 | is_leaf_ = leaf_status; 46 | requires_gradient_ = gradient_requirement; 47 | if (is_leaf_ && requires_gradient_) gradient_ = new Tensor(shape, false, false); 48 | } 49 | 50 | virtual ~Tensor() { if (is_leaf_ && requires_gradient_) delete gradient_; } 51 | Tensor(const Tensor* other) { copy(other); } 52 | Tensor(const Tensor& other) = delete; 53 | Tensor(Tensor&& other) = delete; 54 | Tensor& operator=(const Tensor& other) = delete; 55 | Tensor& operator=(Tensor&& other) = delete; 56 | 57 | 58 | void copy(const Tensor* other) { 59 | reshape(other->shape()); 60 | std::copy(other->begin(), other->end(), this->begin()); 61 | requires_gradient_ = other->requires_gradient_; 62 | 63 | if (requires_gradient_ ) { 64 | if (other->is_leaf_ && is_leaf_) { 65 | if (!gradient_) gradient_ = new Tensor(other->gradient_); 66 | else gradient_->copy(other->gradient_); 67 | } 68 | 69 | else { 70 | if (is_leaf_) delete gradient_; 71 | gradient_ = other->gradient_; 72 | } 73 | 74 | } 75 | 76 | else { 77 | if (is_leaf_) delete gradient_; 78 | gradient_ = nullptr; 79 | } 80 | 81 | is_leaf_ = other->is_leaf_; 82 | } 83 | 84 | void move(Tensor* other) { 85 | reshape(other->shape()); 86 | std::move(other->begin(), other->end(), this->begin()); 87 | other->clear(); 88 | if (is_leaf_) delete gradient_; 89 | is_leaf_ = other->is_leaf_; 90 | requires_gradient_ = other->requires_gradient_; 91 | gradient_ = other->gradient_; 92 | other->gradient_ = nullptr; 93 | } 94 | 95 | Tensor* gradient() const { return gradient_; } 96 | 97 | void requires_gradient(bool status) { 98 | if (requires_gradient_ == false && status == true) { 99 | requires_gradient_ = true; 100 | if (is_leaf_) gradient_ = new Tensor(this->shape(), false, false); 101 | } 102 | 103 | if (requires_gradient_ == true && status == false ) { 104 | requires_gradient_ = false; 105 | if (is_leaf_) delete gradient_; 106 | gradient_ = nullptr; 107 | } 108 | } 109 | 110 | bool is_leaf() const { return is_leaf_; } 111 | bool requires_gradient() const { return requires_gradient_; } 112 | 113 | virtual Tensor* forward() { return this; } 114 | virtual void backward(Tensor* gradient) const { gradient_->add(gradient); } 115 | 116 | void add(const Tensor* other); 117 | void multiply(const Tensor* other); 118 | 119 | protected: 120 | bool is_leaf_; 121 | bool requires_gradient_; 122 | 123 | private: 124 | Tensor* gradient_; 125 | }; 126 | 127 | } // namespace internal 128 | 129 | #endif // INTERNAL_TENSOR_HPP -------------------------------------------------------------------------------- /src/internals/operations/internal_operation_addition.cpp: -------------------------------------------------------------------------------- 1 | #include "../config.h" 2 | #include "../internal_tensor.hpp" 3 | #include "../internal_array.hpp" 4 | #include "./internal_operations.hpp" 5 | 6 | #if defined(USE_EIGEN_BACKEND) 7 | 8 | namespace internal { 9 | 10 | void Tensor::add(const Tensor* other) { 11 | if(shape() != other->shape()) throw std::runtime_error("shape mismatch"); 12 | Eigen::Map> this_map(data(), size()); 13 | Eigen::Map> other_map(other->data(), other->size()); 14 | this_map += other_map; 15 | } 16 | 17 | 18 | Addition::Addition(Tensor* first, Tensor* second) 19 | : Operation(first, second) { 20 | if(first->shape() != second->shape()) throw std::runtime_error("shape mismatch"); 21 | reshape(first->shape()); 22 | } 23 | 24 | Tensor* Addition::forward() { 25 | Eigen::Map> this_map( 26 | this->data(), 27 | this->size() ); 28 | 29 | Eigen::Map> first_operand_map( 30 | first_operand()->forward()->data(), 31 | first_operand()->size() ); 32 | 33 | Eigen::Map> second_operand_map( 34 | second_operand()->forward()->data(), 35 | second_operand()->size() ); 36 | 37 | this_map = first_operand_map + second_operand_map; 38 | return this; 39 | } 40 | 41 | void Addition::backward(Tensor* gradient) const { 42 | if (first_operand()->requires_gradient()) { 43 | if (second_operand()->requires_gradient()) { 44 | Tensor* gradient_copy = new Tensor(gradient); 45 | first_operand()->backward(gradient_copy); 46 | delete gradient_copy; 47 | } 48 | 49 | else { 50 | first_operand()->backward(gradient); 51 | } 52 | } 53 | 54 | if (second_operand()->requires_gradient()) { 55 | second_operand()->backward(gradient); 56 | } 57 | } 58 | 59 | } // namespace internal 60 | 61 | #endif // USE_EIGEN_BACKEND -------------------------------------------------------------------------------- /src/internals/operations/internal_operation_matmul.cpp: -------------------------------------------------------------------------------- 1 | #include "../config.h" 2 | #include "../internal_array.hpp" 3 | #include "../internal_tensor.hpp" 4 | #include "internal_operations.hpp" 5 | 6 | #if defined(USE_EIGEN_BACKEND) 7 | 8 | namespace internal { 9 | 10 | Matmul::Matmul(Tensor* first, Tensor* second) 11 | : Operation(first, second) { 12 | if (first_operand()->rank() != 2 || second_operand()->rank() != 2) 13 | throw std::runtime_error("rank mismatch"); 14 | 15 | if (first_operand()->shape().back() != second_operand()->shape().front()) 16 | throw std::runtime_error("shape mismatch"); 17 | 18 | reshape({first_operand()->shape().front(), second_operand()->shape().back()}); 19 | } 20 | 21 | 22 | Tensor* Matmul::forward() { 23 | 24 | Eigen::Map> this_map( 25 | this->data(), 26 | rows_dimension(), 27 | columns_dimension() ); 28 | 29 | Eigen::Map> first_operand_map( 30 | first_operand()->forward()->data(), 31 | rows_dimension(), 32 | inner_dimension() ); 33 | 34 | Eigen::Map> second_operand_map( 35 | second_operand()->forward()->data(), 36 | inner_dimension(), 37 | columns_dimension() ); 38 | 39 | this_map = first_operand_map * second_operand_map; 40 | return this; 41 | } 42 | 43 | 44 | void Matmul::backward(Tensor* gradient) const { 45 | 46 | Eigen::Map> gradient_map( 47 | gradient->data(), 48 | rows_dimension(), 49 | columns_dimension() ); 50 | 51 | if (first_operand()->requires_gradient()) { 52 | Tensor* first_gradient = new Tensor({rows_dimension(), inner_dimension()}, false, false); 53 | 54 | Eigen::Map> second_map( 55 | second_operand()->data(), 56 | inner_dimension(), 57 | columns_dimension() ); 58 | 59 | Eigen::Map> first_gradient_map( 60 | first_gradient->data(), 61 | rows_dimension(), 62 | inner_dimension() ); 63 | 64 | first_gradient_map = gradient_map * second_map.transpose(); 65 | first_operand()->backward(first_gradient); 66 | delete first_gradient; 67 | } 68 | 69 | if (second_operand()->requires_gradient()) { 70 | Tensor* second_gradient = new Tensor({inner_dimension(), columns_dimension()}, false, false); 71 | Eigen::Map> first_map( 72 | first_operand()->data(), 73 | rows_dimension(), 74 | inner_dimension() ); 75 | 76 | Eigen::Map> second_gradient_map( 77 | second_gradient->data(), 78 | inner_dimension(), 79 | columns_dimension() ); 80 | 81 | second_gradient_map = first_map.transpose() * gradient_map; 82 | second_operand()->backward(second_gradient); 83 | delete second_gradient; 84 | } 85 | } 86 | 87 | } // namespace internal 88 | 89 | #endif // USE_EIGEN_BACKEND -------------------------------------------------------------------------------- /src/internals/operations/internal_operation_multiplication.cpp: -------------------------------------------------------------------------------- 1 | #include "../config.h" 2 | #include "../internal_tensor.hpp" 3 | #include "../internal_array.hpp" 4 | #include "./internal_operations.hpp" 5 | 6 | #if defined(USE_EIGEN_BACKEND) 7 | 8 | namespace internal { 9 | 10 | void Tensor::multiply(const Tensor* other) { 11 | if(shape() != other->shape()) throw std::runtime_error("shape mismatch"); 12 | Eigen::Map> this_map(data(), size()); 13 | Eigen::Map> other_map(other->data(), other->size()); 14 | this_map *= other_map; 15 | } 16 | 17 | Multiplication::Multiplication(Tensor* first, Tensor* second) 18 | : Operation(first, second) { 19 | if(first->shape() != second->shape()) throw std::runtime_error("shape mismatch"); 20 | reshape(first->shape()); 21 | } 22 | 23 | Tensor* Multiplication::forward() { 24 | 25 | Eigen::Map> this_map( 26 | this->data(), 27 | this->size() ); 28 | 29 | Eigen::Map> first_operand_map( 30 | first_operand()->forward()->data(), 31 | first_operand()->size() ); 32 | 33 | Eigen::Map> second_operand_map( 34 | second_operand()->forward()->data(), 35 | second_operand()->size() ); 36 | 37 | this_map = first_operand_map * second_operand_map; 38 | return this; 39 | } 40 | 41 | void Multiplication::backward(Tensor* gradient) const { 42 | Eigen::Map> gradient_map(gradient->data(), gradient->size()); 43 | 44 | if (first_operand()->requires_gradient()) { 45 | Eigen::Map> second_operand_map( 46 | second_operand()->data(), 47 | second_operand()->size() 48 | ); 49 | 50 | if (second_operand()->requires_gradient()) { 51 | Tensor* gradient_copy = new Tensor(gradient); 52 | Eigen::Map> gradient_copy_map( 53 | gradient_copy->data(), 54 | gradient_copy->size() 55 | ); 56 | gradient_copy_map *= second_operand_map; 57 | first_operand()->backward(gradient_copy); 58 | delete gradient_copy; 59 | } 60 | 61 | else { 62 | gradient_map *= second_operand_map; 63 | first_operand()->backward(gradient); 64 | } 65 | } 66 | 67 | if (second_operand()->requires_gradient()) { 68 | Eigen::Map> first_operand_map( 69 | first_operand()->data(), 70 | first_operand()->size() 71 | ); 72 | 73 | gradient_map *= first_operand_map; 74 | second_operand()->backward(gradient); 75 | } 76 | } 77 | 78 | } // namespace internal 79 | 80 | #endif // USE_EIGEN_BACKEND -------------------------------------------------------------------------------- /src/internals/operations/internal_operations.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | This classes are non-leaf nodes representing operations. They are used to build the computational. 4 | An operations should be implemented overriding the forward and backward methods, of the Tensor Base class. 5 | The forward method should return the "this" pointer, and the backward method should pass the gradient 6 | to the inputs. 7 | 8 | For implementing operations use the data() method to access the data of the tensor and map it to some 9 | data structure of some library. In this case I used Eigen::Map so avoid any unnecessary copy of the data 10 | when performing the operations, and to be able to use the optimized Eigen library for the operations. 11 | 12 | */ 13 | 14 | 15 | #ifndef INTERNAL_OPERATION_HPP 16 | #define INTERNAL_OPERATION_HPP 17 | 18 | #include "../internal_tensor.hpp" 19 | #include "../internal_expression.hpp" 20 | 21 | namespace internal { 22 | 23 | class Operation : public Expression { 24 | public: 25 | Operation(Tensor* first, Tensor* second) { 26 | first_operand_ = first; 27 | second_operand_ = second; 28 | requires_gradient(first->requires_gradient() || second->requires_gradient()); 29 | } 30 | 31 | Tensor* first_operand() const { return first_operand_; } 32 | Tensor* second_operand() const { return second_operand_; } 33 | 34 | private: 35 | Tensor* first_operand_; 36 | Tensor* second_operand_; 37 | }; 38 | 39 | class Addition : public Operation { 40 | public: 41 | Addition(Tensor* first, Tensor* second); 42 | Tensor* forward() final; 43 | void backward(Tensor* gradient) const final; 44 | }; 45 | 46 | 47 | class Multiplication : public Operation { 48 | public: 49 | Multiplication(Tensor* first, Tensor* second); 50 | Tensor* forward() final; 51 | void backward(Tensor* gradient) const final; 52 | }; 53 | 54 | class Matmul : public Operation { 55 | public: 56 | Matmul(Tensor* first, Tensor* second); 57 | 58 | Tensor* forward() final; 59 | void backward(Tensor* gradient) const final; 60 | 61 | size_type rows_dimension() const { return first_operand()->shape().front(); } 62 | size_type inner_dimension() const { return first_operand()->shape().back(); } 63 | size_type columns_dimension() const { return second_operand()->shape().back(); } 64 | }; 65 | 66 | } // namespace internal 67 | 68 | #endif // INTERNAL_OPERATION -------------------------------------------------------------------------------- /src/layers.cpp: -------------------------------------------------------------------------------- 1 | #include "CaberNet/tensor.h" 2 | #include "CaberNet/layers.h" 3 | 4 | #include "internals/functions/internal_functions.hpp" 5 | 6 | 7 | namespace net::layer { 8 | 9 | /// constructors 10 | 11 | Linear::~Linear() = default; 12 | 13 | Linear::Linear(size_type input_features, size_type output_features, initializer distribution) 14 | : weight_(shape_type{output_features, input_features}, true), 15 | bias_(shape_type{1, output_features}, true) { 16 | weight_.fill(distribution); 17 | bias_.fill(0.0); 18 | } 19 | 20 | Softmax::Softmax(int axis) : axis(axis) {} 21 | LogSoftmax::LogSoftmax(int axis) : axis(axis) {} 22 | 23 | /// settings 24 | 25 | void Linear::set_optimizer(std::shared_ptr optimizer) { 26 | optimizer->add_parameter(weight_.internal()); 27 | optimizer->add_parameter(bias_.internal()); 28 | } 29 | 30 | /// forward methods 31 | 32 | Tensor Linear::forward(Tensor input) { 33 | return Tensor(std::make_shared(input.internal(), weight_.internal(), bias_.internal())); 34 | 35 | } 36 | 37 | Tensor ReLU::forward(Tensor input) { 38 | return Tensor(std::make_shared(input.internal())); 39 | } 40 | 41 | Tensor Softmax::forward(Tensor input) { 42 | return Tensor(std::make_shared(input.internal(), axis)); 43 | } 44 | 45 | Tensor LogSoftmax::forward(Tensor input) { 46 | return Tensor(std::make_shared(input.internal(), axis)); 47 | } 48 | 49 | } // namespace net::layer 50 | -------------------------------------------------------------------------------- /src/optimizers.cpp: -------------------------------------------------------------------------------- 1 | #include "CaberNet/optimizers.h" 2 | 3 | #include "internals/config.h" 4 | #include "internals/internal_tensor.hpp" 5 | 6 | 7 | #if defined(USE_EIGEN_BACKEND) 8 | 9 | namespace net::optimizer { 10 | 11 | void SGD::update(internal::Tensor* parameter) { 12 | Eigen::Map> parameter_map(parameter->data(), parameter->size()); 13 | Eigen::Map> parameter_gradient_map(parameter->gradient()->data(), parameter->size()); 14 | parameter_map -= learning_rate_ * parameter_gradient_map; 15 | parameter_gradient_map = 0; 16 | } 17 | 18 | } 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/tensor.cpp: -------------------------------------------------------------------------------- 1 | #include "CaberNet/tensor.h" 2 | 3 | #include "internals/internal_tensor.hpp" 4 | #include "internals/internal_graph.hpp" 5 | #include "internals/operations/internal_operations.hpp" 6 | 7 | namespace net { 8 | 9 | 10 | std::ostream& operator<<(std::ostream& ostream, const Tensor& tensor) { 11 | ostream << "["; 12 | for (auto element : tensor) ostream << element << ", "; 13 | ostream << "]"; 14 | return ostream; 15 | } 16 | 17 | std::ostream& operator<<(std::ostream& ostream, const Tensor& tensor) { 18 | ostream << "["; 19 | for (auto element : tensor) ostream << element << ", "; 20 | ostream << "]"; 21 | return ostream; 22 | } 23 | 24 | Tensor operator + (const Tensor & first, const Tensor & second) { 25 | return Tensor (std::make_shared( first.internal(), second.internal() )); 26 | } 27 | 28 | Tensor operator * (const Tensor & first, const Tensor & second) { 29 | return Tensor(std::make_shared( first.internal(), second.internal() )); 30 | } 31 | 32 | Tensor matmul(const Tensor & first, const Tensor & second) { 33 | return Tensor(std::make_shared( first.internal(), second.internal() )); 34 | } 35 | 36 | } // namespace net -------------------------------------------------------------------------------- /src/tensor/tensor_float.cpp: -------------------------------------------------------------------------------- 1 | #include "CaberNet/tensor/tensor_float.h" 2 | #include "CaberNet/tensor.h" 3 | 4 | #include "../internals/internal_tensor.hpp" 5 | #include "../internals/internal_graph.hpp" 6 | #include "../internals/operations/internal_operations.hpp" 7 | 8 | namespace net { 9 | 10 | TensorFloat::TensorFloat(std::shared_ptr tensor) { 11 | tensor_ = tensor; 12 | internal::Graph::add(tensor_); 13 | } 14 | 15 | TensorFloat::TensorFloat(shape_type shape, bool gradient_requirement, bool detached ) { 16 | tensor_ = std::make_shared(shape); 17 | tensor_-> requires_gradient(gradient_requirement); 18 | if(!detached) internal::Graph::add(tensor_); 19 | } 20 | 21 | TensorFloat::TensorFloat(shape_type shape, requires_gradient gradient_requirement , bool detached ) { 22 | tensor_ = std::make_shared(shape); 23 | tensor_-> requires_gradient(static_cast(gradient_requirement)); 24 | if(!detached) internal::Graph::add(tensor_); 25 | } 26 | 27 | void TensorFloat::reshape(shape_type shape) { 28 | if(tensor_ == nullptr) tensor_ = std::make_shared(shape, false, false); 29 | tensor_-> reshape(shape); 30 | } 31 | 32 | Tensor TensorFloat::gradient() const { 33 | Tensor gradient = std::make_shared(shape(), false); 34 | std::copy(tensor_->gradient()->begin(), tensor_->gradient()->end(), gradient.begin()); 35 | return gradient; 36 | } 37 | 38 | internal::Tensor* TensorFloat::internal() const {return tensor_.get(); } 39 | internal::Tensor* TensorFloat::internal() { return tensor_.get(); } 40 | 41 | void TensorFloat::backward(const Tensor& gradient) { tensor_-> backward(gradient.internal()); } 42 | void TensorFloat::perform() { tensor_-> forward(); } // TODO : this should have a return type. 43 | 44 | void TensorFloat::copy(internal::Tensor* other) { tensor_-> copy(other); } 45 | 46 | TensorFloat::iterator TensorFloat::begin() { return tensor_->begin(); } 47 | TensorFloat::iterator TensorFloat::end() { return tensor_->end(); } 48 | TensorFloat::const_iterator TensorFloat::begin() const { return tensor_->begin(); } 49 | TensorFloat::const_iterator TensorFloat::end() const { return tensor_->end(); } 50 | TensorFloat::const_iterator TensorFloat::cbegin() const { return tensor_->cbegin(); } 51 | TensorFloat::const_iterator TensorFloat::cend() const { return tensor_->cend(); } 52 | 53 | TensorFloat::pointer TensorFloat::data() { return tensor_->data(); } 54 | TensorFloat::const_pointer TensorFloat::data() const { return tensor_->data(); } 55 | TensorFloat::shape_type TensorFloat::shape() const { return tensor_->shape(); } 56 | TensorFloat::size_type TensorFloat::rank() const { return tensor_->rank(); } 57 | 58 | void TensorFloat::fill(initializer distribution) { 59 | distribution::Distribution* filler = nullptr; 60 | switch (distribution) { 61 | case initializer::He : 62 | filler = new distribution::Normal(0, std::sqrt(2.0 / shape().back())); 63 | for (auto& element : *this) element = filler->generate(); 64 | break; 65 | 66 | default : 67 | throw std::runtime_error("Invalid initializer"); 68 | break; 69 | } 70 | 71 | delete filler; 72 | } 73 | 74 | void TensorFloat::fill(value_type value) { 75 | std::fill(tensor_->begin(), tensor_->end(), value); 76 | } 77 | 78 | void TensorFloat::fill(std::vector values) { 79 | std::move(values.begin(), values.end(), tensor_->begin()); 80 | } 81 | 82 | } // namespace net 83 | 84 | -------------------------------------------------------------------------------- /src/tensor/tensor_int.cpp: -------------------------------------------------------------------------------- 1 | #include "CaberNet/tensor/tensor_int.h" 2 | #include "CaberNet/tensor.h" 3 | 4 | #include "../internals/internal_array.hpp" 5 | 6 | namespace net { 7 | 8 | TensorInt::TensorInt(std::shared_ptr> subscripts) { 9 | data_ = subscripts; 10 | } 11 | 12 | TensorInt::TensorInt(shape_type shape) { 13 | data_ = std::make_shared>(shape); 14 | } 15 | 16 | void TensorInt::reshape(shape_type shape) { 17 | data_->reshape(shape); 18 | } 19 | 20 | void TensorInt::fill(value_type value) { 21 | std::fill(data_->begin(), data_->end(), value); 22 | } 23 | 24 | void TensorInt::fill(std::vector values) { 25 | std::move(values.begin(), values.end(), data_->begin()); 26 | } 27 | 28 | void TensorInt::copy(internal::Array* other) { 29 | data_->copy(other); 30 | } 31 | 32 | internal::Array* TensorInt::internal() const { return data_.get(); } 33 | internal::Array* TensorInt::internal() { return data_.get(); } 34 | 35 | TensorInt::iterator TensorInt::begin() { return data_->begin(); } 36 | TensorInt::iterator TensorInt::end() { return data_->end(); } 37 | TensorInt::const_iterator TensorInt::begin() const { return data_->cbegin(); } 38 | TensorInt::const_iterator TensorInt::end() const { return data_->cend(); } 39 | TensorInt::const_iterator TensorInt::cbegin() const { return data_->cbegin(); } 40 | TensorInt::const_iterator TensorInt::cend() const { return data_->cend(); } 41 | 42 | TensorInt::pointer TensorInt::data() { return data_->data(); } 43 | TensorInt::const_pointer TensorInt::data() const { return data_->data(); } 44 | TensorInt::shape_type TensorInt::shape() const { return data_->shape(); } 45 | TensorInt::size_type TensorInt::rank() const { return data_->rank(); } 46 | 47 | } // namespace net -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(GTest REQUIRED CONFIG) 2 | include(GoogleTest) 3 | 4 | add_executable(${PROJECT_NAME}Tests criterions.cpp functions.cpp operations.cpp optimizers.cpp) 5 | target_link_libraries(${PROJECT_NAME}Tests PRIVATE ${PROJECT_NAME} GTest::gtest GTest::gtest_main GTest::gmock) 6 | target_compile_features(${PROJECT_NAME}Tests PRIVATE cxx_std_17) 7 | gtest_add_tests(TARGET ${PROJECT_NAME}Tests) 8 | -------------------------------------------------------------------------------- /tests/criterions.cpp: -------------------------------------------------------------------------------- 1 | #include "CaberNet.h" 2 | #include 3 | 4 | TEST(criterion, loss) { 5 | /* Equivalent pytorch code: 6 | 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | # Generate some example data 11 | # In practice, you would replace this with your actual data and targets 12 | X = torch.tensor([[-1.0, 2.0, -0.5, 1.0, 3.0], [0.5, 1.0, 2.0, -1.0, -2.0], [2.0, 1.0, -1.0, 0.5, -0.5]]) 13 | y = torch.tensor([1, 3, 0]) # Example target labels 14 | 15 | # Compute the negative log likelihood (NLL) loss 16 | 17 | X = F.log_softmax(X,1) 18 | X = F.nll_loss(X,y) 19 | print(X) 20 | 21 | */ 22 | 23 | net::Tensor X({3, 5}, false); X.fill( 24 | { 25 | -1.0, 2.0, -0.5, 1.0, 3.0, 26 | 0.5, 1.0, 2.0, -1.0, -2.0, 27 | 2.0, 1.0, -1.0, 0.5, -0.5 28 | } 29 | ); 30 | 31 | net::Tensor y({3,1}); y.fill({1, 3, 0}); 32 | X = net::function::log_softmax(X,1); 33 | 34 | net::criterion::NLLLoss criterion(X, y); 35 | 36 | /* 37 | The result should be: 1.82998 38 | */ 39 | 40 | EXPECT_FLOAT_EQ(1.8298835f, criterion.loss()); 41 | } 42 | -------------------------------------------------------------------------------- /tests/function_softmax.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | 5 | x = torch.tensor([[1,2,3],[-1,3,2]], dtype = torch.float32, requires_grad = True) 6 | y = F.log_softmax(x, dim = 0) 7 | I = torch.tensor([[1,3,1],[1,2,1]], dtype = torch.float32) 8 | print(y) 9 | 10 | y.backward(I) 11 | 12 | print(x.grad) 13 | 14 | 15 | */ 16 | 17 | int main() { 18 | /* 19 | net::Tensor x({2, 3}, true); x.fill({1,2,3,-1,3,2}); 20 | net::Tensor I({2, 3}); I.fill({1,3,1,1,2,1}); 21 | 22 | net::Tensor y({2,3}); 23 | 24 | y = net::function::log_softmax(x, 1); 25 | y.perform(); 26 | y.backward(I); 27 | 28 | std::cout << "x: " << x << std::endl; 29 | std::cout << "y: " << y << std::endl; 30 | std::cout << "x.grad: " << x.gradient() << std::endl; 31 | */ 32 | 33 | net::Tensor x({2,3}, false); x.fill({1,2,3,4,5,6}); 34 | net::Tensor w({4,3}, true); w.fill({1,2,-3,4,5,6,7,8,-9,10,11,-12}); 35 | net::Tensor b({1,4}, true); b.fill({1,2,3,4}); 36 | net::Tensor I({2,4}, false); I.fill(1); 37 | 38 | x = net::function::linear(x,w,b); 39 | x = net::function::relu(x); 40 | x = net::function::log_softmax(x, 1); 41 | 42 | x.perform(); 43 | x.backward(I); 44 | 45 | std::cout << "x: " << x << std::endl; 46 | std::cout << "w.grad: " << w.gradient() << std::endl; 47 | std::cout << "b.grad: " << b.gradient() << std::endl; 48 | } -------------------------------------------------------------------------------- /tests/functions.cpp: -------------------------------------------------------------------------------- 1 | #include "CaberNet.h" 2 | #include 3 | #include 4 | 5 | using ::testing::ElementsAre; 6 | 7 | TEST(functions, gradient) { 8 | /* 9 | import torch.nn.functional as F 10 | 11 | # Initialize tensors 12 | x = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32, requires_grad=False) 13 | w = torch.tensor([[1, 2, -3], [4, 5, 6], [7, 8, -9], [10, 11, -12]], dtype=torch.float32, requires_grad=True) 14 | b = torch.tensor([1, 2, 3, 4], dtype=torch.float32, requires_grad=True) 15 | I = torch.tensor([[1,1,1,1],[1,1,1,1]], dtype=torch.float32) 16 | 17 | # Perform linear operation using torch.nn.functional.linear 18 | x = F.linear(x, w, b) 19 | x = F.relu(x) 20 | x.backward(I) 21 | 22 | # Print the result 23 | print(x) 24 | print(w.grad) 25 | print(b.grad) 26 | 27 | */ 28 | 29 | net::Tensor x({2,3}, false); x.fill({1,2,3,4,5,6}); 30 | net::Tensor w({4,3}, true); w.fill({1,2,-3,4,5,6,7,8,-9,10,11,-12}); 31 | net::Tensor b({1,4}, true); b.fill({1,2,3,4}); 32 | net::Tensor I({2,4}, false); I.fill(1); 33 | 34 | x = net::function::linear(x,w,b); 35 | x = net::function::relu(x); 36 | x.perform(); 37 | x.backward(I); 38 | 39 | /* 40 | Results should be: 41 | [0, 34, 0, 0, 0, 79, 17, 27, ] 42 | [0, 0, 0, 5, 7, 9, 4, 5, 6, 4, 5, 6, ] 43 | [0, 2, 1, 1, ] 44 | */ 45 | 46 | EXPECT_THAT(x, ElementsAre(0, 34, 0, 0, 0, 79, 17, 27)); 47 | EXPECT_THAT(w.gradient(), ElementsAre(0, 0, 0, 5, 7, 9, 4, 5, 6, 4, 5, 6)); 48 | EXPECT_THAT(b.gradient(), ElementsAre(0, 2, 1, 1)); 49 | } 50 | -------------------------------------------------------------------------------- /tests/operations.cpp: -------------------------------------------------------------------------------- 1 | #include "CaberNet.h" 2 | #include 3 | #include 4 | 5 | using ::testing::ElementsAre; 6 | 7 | TEST(operations, matmul) { 8 | /* 9 | import torch 10 | 11 | # Initialize tensors 12 | x = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32, requires_grad=False) 13 | y = torch.tensor([[1, 1, 1], [-1, -1, -1]], dtype=torch.float32, requires_grad=True) 14 | z = torch.tensor([[1, 1, 1], [1, 1, 1]], dtype=torch.float32, requires_grad=True) 15 | I = torch.tensor([[1, 1, 1], [1, 1, 1]], dtype=torch.float32, requires_grad=False) 16 | w = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.float32, requires_grad=True) 17 | 18 | 19 | # Perform operations 20 | 21 | x = x + I 22 | x = torch.matmul(x, w) 23 | x = x * z + y * z + z * y 24 | x.backward(I) 25 | 26 | # Print results 27 | print("x : ", x) 28 | print("Jy: ", y.grad) 29 | print("Jz: ", z.grad) 30 | print("Jw: ", w.grad) 31 | */ 32 | 33 | 34 | net::Tensor x({2,3}, false); x.fill({1,2,3,4,5,6}); 35 | net::Tensor y({2,3}, true); y.fill({1,1,1,-1,-1,-1}); 36 | net::Tensor z({2,3}, true); z.fill(1); 37 | net::Tensor I({2,3}, false); I.fill(1); 38 | net::Tensor w({3,3}, true); w.fill({1,2,3,4,5,6,7,8,9}); 39 | 40 | x = x + I; 41 | x = net::matmul(x, w); 42 | x = x * z + y * z + z * y; 43 | x.perform(); 44 | x.backward(I); 45 | 46 | /* Results should be: 47 | x : [44, 53, 62, 76, 94, 112] 48 | Jy: [2, 2, 2, 2, 2, 2] 49 | Jz: [44, 53, 62, 76, 94, 112] 50 | Jw: [7, 7, 7, 9, 9, 9, 11, 11, 11] 51 | */ 52 | 53 | EXPECT_THAT(x, ElementsAre(44, 53, 62, 76, 94, 112)); 54 | EXPECT_THAT(y.gradient(), ElementsAre(2, 2, 2, 2, 2, 2)); 55 | EXPECT_THAT(z.gradient(), ElementsAre(44, 53, 62, 76, 94, 112)); 56 | EXPECT_THAT(w.gradient(), ElementsAre(7, 7, 7, 9, 9, 9, 11, 11, 11)); 57 | } 58 | 59 | -------------------------------------------------------------------------------- /tests/optimizers.cpp: -------------------------------------------------------------------------------- 1 | #include "CaberNet.h" 2 | #include 3 | #include 4 | 5 | using ::testing::ElementsAre; 6 | 7 | TEST(optimizer, sgd) { 8 | 9 | net::Tensor X({2,2}, true); X.fill(1); 10 | net::Tensor I({2,2}); I.fill(1); 11 | X.backward(I); 12 | 13 | net::optimizer::SGD optimizer(0.1); 14 | optimizer.add_parameter(X.internal()); 15 | optimizer.step(); 16 | 17 | EXPECT_THAT(X, ElementsAre(0.9, 0.9, 0.9, 0.9)); 18 | } --------------------------------------------------------------------------------