├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── examples ├── CMakeLists.txt ├── autograd.cpp └── xor.cpp ├── include └── af │ ├── autograd.h │ ├── autograd │ ├── Functions.hpp │ └── Variable.hpp │ ├── nn.h │ ├── nn │ ├── Init.hpp │ ├── Modules.hpp │ └── Modules │ │ ├── Activations.hpp │ │ ├── Container.hpp │ │ ├── Dropout.hpp │ │ ├── Linear.hpp │ │ ├── Loss.hpp │ │ └── Module.hpp │ ├── optim.h │ └── optim │ └── Optimizers.hpp └── src ├── autograd ├── Functions.cpp └── Variable.cpp ├── nn ├── Init.cpp └── Modules │ ├── Activations.cpp │ ├── Container.cpp │ ├── Dropout.cpp │ ├── Linear.cpp │ ├── Loss.cpp │ └── Module.cpp └── optim └── Optimizers.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | 30 | build 31 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5.1) 2 | 3 | project(ArrayFireML 4 | VERSION 0.1.0 5 | LANGUAGES C CXX) 6 | 7 | find_package(ArrayFire REQUIRED) 8 | 9 | add_library(afml SHARED "") 10 | 11 | target_sources(afml 12 | PRIVATE 13 | src/autograd/Functions.cpp 14 | src/autograd/Variable.cpp 15 | src/nn/Modules/Activations.cpp 16 | src/nn/Modules/Container.cpp 17 | src/nn/Modules/Linear.cpp 18 | src/nn/Modules/Loss.cpp 19 | src/nn/Modules/Module.cpp 20 | src/nn/Modules/Dropout.cpp 21 | src/nn/Init.cpp 22 | src/optim/Optimizers.cpp 23 | ) 24 | 25 | target_include_directories(afml 26 | PUBLIC 27 | ${CMAKE_CURRENT_SOURCE_DIR}/include) 28 | 29 | target_link_libraries(afml PUBLIC ArrayFire::af) 30 | 31 | set_target_properties(afml 32 | PROPERTIES 33 | VERSION "${ArrayFireML_VERSION}" 34 | SOVERSION "${ArrayFireML_VERSION_MAJOR}" 35 | CXX_STANDARD 11) 36 | 37 | add_subdirectory(examples) 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, ArrayFire 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of arrayfire_ml nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #ArrayFire ML 2 | 3 | 4 | ArrayFire ML is a C and C+++ machine learning library built on top of the [ArrayFire library](https://github.com/arrayfire/arrayfire). This library leverages arrayfire's cross platform support to provide high performance machine learning algorithms for multi-core CPUs, NVIDIA and AMD GPUs, and other accelerators. 5 | 6 | [Slack Channel](https://join.slack.com/t/arrayfire-org/shared_invite/enQtMjI4MjIzMDMzMTczLWM4ODIyZjA3YmY3NWEwMjk2N2Q0YTQyNGMwZmU4ZjkxNGU0MjYzYmUzYTg3ZTM0MDQxOTE2OTJjNGVkOGEwN2M) 7 | 8 | This project is currently under active development. Please follow [this issue](https://github.com/arrayfire/arrayfire_ml/issues/3) for tracking our progress. 9 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | function(build_example SRC) 2 | get_filename_component(src_name ${SRC} NAME_WE) 3 | set(target "${src_name}") 4 | add_executable(${target} ${SRC}) 5 | target_link_libraries(${target} 6 | PRIVATE 7 | afml 8 | ) 9 | target_compile_features(${target} 10 | PRIVATE cxx_range_for) 11 | endfunction(build_example) 12 | 13 | # build_example(Activations.cpp) 14 | # build_example(FFNet.cpp) 15 | # build_example(Node.cpp) 16 | build_example(xor.cpp) 17 | # build_example(Weights.cpp) 18 | build_example(autograd.cpp) 19 | -------------------------------------------------------------------------------- /examples/autograd.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #define VERIFY(VAL) do { \ 16 | auto res = af::allTrue(af::abs(VAL) < 1E-5); \ 17 | printf("%s:%d %s\n", __FUNCTION__, __LINE__, \ 18 | res ? "PASS" : "FAIL"); \ 19 | } while(0) 20 | 21 | using af::autograd::Variable; 22 | void test_multiply() 23 | { 24 | auto x = Variable(af::randu(5), true); 25 | auto y = x * x; 26 | auto dy = Variable(af::constant(1.0, 5), false); 27 | y.backward(dy); 28 | auto dx = x.grad(); 29 | VERIFY(dx.array() - 2 * x.array()); 30 | } 31 | 32 | void test_multipl_add() 33 | { 34 | auto x = Variable(af::randu(5), true); 35 | auto y = Variable(af::randu(5), true); 36 | auto z = x * x + x * y + y * y; 37 | auto dz = Variable(af::constant(1.0, 5), false); 38 | z.backward(dz); 39 | auto dx = x.grad(); 40 | auto dy = y.grad(); 41 | VERIFY(dx.array() - 2 * x.array() - y.array()); 42 | VERIFY(dy.array() - 2 * y.array() - x.array()); 43 | } 44 | 45 | void test_no_calc_grad() 46 | { 47 | auto x = Variable(af::randu(5), false); 48 | auto y = Variable(af::randu(5), true); 49 | auto z = x * x + x * y + y * y; 50 | auto dz = Variable(af::constant(1.0, 5), false); 51 | z.backward(dz); 52 | auto dy = y.grad(); 53 | VERIFY(dy.array() - 2 * y.array() - x.array()); 54 | try { 55 | auto dx = x.grad(); 56 | } catch(af::exception &ex) { 57 | std::cout << ex.what() << std::endl; 58 | return; 59 | } 60 | printf("%s:%d No Gradient check Failed\n"); 61 | } 62 | 63 | void test_multiply_sub() 64 | { 65 | auto x = Variable(af::randu(5), true); 66 | auto y = Variable(af::randu(5), true); 67 | auto z = x * x - x * y; 68 | auto dz = Variable(af::constant(1.0, 5), false); 69 | z.backward(dz); 70 | auto dx = x.grad(); 71 | auto dy = y.grad(); 72 | VERIFY(dx.array() - (2 * x.array() - y.array())); 73 | VERIFY(dy.array() - (-x.array())); 74 | } 75 | 76 | void test_divide_add() 77 | { 78 | auto x = Variable(af::randu(5), true); 79 | auto y = Variable(af::randu(5), true); 80 | auto z = x + x / y + y; 81 | auto dz = Variable(af::constant(1.0, 5), false); 82 | z.backward(dz); 83 | auto dx = x.grad(); 84 | auto dy = y.grad(); 85 | VERIFY(dx.array() - (1.0 + 1.0 / y.array())); 86 | VERIFY(dy.array() - (1.0 - x.array() / (y.array() * y.array()))); 87 | } 88 | 89 | void test_multiply_add_scalar() 90 | { 91 | auto x = Variable(af::randu(5), true); 92 | auto y = Variable(af::randu(5), true); 93 | auto z = 2 * x + x * y + y; 94 | auto dz = Variable(af::constant(1.0, 5), false); 95 | z.backward(dz); 96 | auto dx = x.grad(); 97 | auto dy = y.grad(); 98 | VERIFY(dx.array() - (2.0 + y.array())); 99 | VERIFY(dy.array() - (1.0 + x.array())); 100 | } 101 | 102 | void test_exp() 103 | { 104 | auto x = Variable(af::randu(5), true); 105 | auto y = exp(x); 106 | auto dy = Variable(af::constant(1.0, 5), false); 107 | y.backward(dy); 108 | auto dx = x.grad(); 109 | VERIFY(dx.array() - (af::exp(x.array()))); 110 | } 111 | 112 | void test_sigmoid() 113 | { 114 | auto x = Variable(af::randu(5), true); 115 | auto y = sigmoid(x); 116 | auto dy = Variable(af::constant(1.0, 5), false); 117 | y.backward(dy); 118 | auto dx = x.grad(); 119 | VERIFY(dx.array() - (y.array() * (1 - y.array()))); 120 | VERIFY(dx.array() - (af::sigmoid(x.array()) * (1 - af::sigmoid(x.array())))); 121 | } 122 | 123 | void test_tanh() 124 | { 125 | auto x = Variable(af::randu(5), true); 126 | auto y = tanh(x); 127 | auto dy = Variable(af::constant(1.0, 5), false); 128 | y.backward(dy); 129 | auto dx = x.grad(); 130 | VERIFY(dx.array() - (1 - y.array() * y.array())); 131 | VERIFY(dx.array() - (1 + af::tanh(x.array())) * (1 - af::tanh(x.array()))); 132 | } 133 | 134 | void test_tile() 135 | { 136 | auto x = Variable(af::randu(5), true); 137 | auto y = Variable(af::randu(5, 2), true); 138 | auto z = y * tileAs(x, y); 139 | auto dz = Variable(af::constant(1.0, 5, 2), false); 140 | z.backward(dz); 141 | auto dy = y.grad(); 142 | auto dx = x.grad(); 143 | VERIFY(dy.array() - af::tile(x.array(), 1, 2)); 144 | VERIFY(dx.array() - af::sum(y.array(), 1)); 145 | } 146 | 147 | void test_sum() 148 | { 149 | auto x = Variable(af::randu(5), true); 150 | auto y = Variable(af::randu(5, 2), true); 151 | auto z = x * sumAs(y, x); 152 | auto dz = Variable(af::constant(1.0, 5), false); 153 | z.backward(dz); 154 | auto dy = y.grad(); 155 | auto dx = x.grad(); 156 | VERIFY(dy.array() - af::tile(x.array(), 1, 2)); 157 | VERIFY(dx.array() - af::sum(y.array(), 1)); 158 | } 159 | 160 | void test_mean() 161 | { 162 | auto x = Variable(af::randu(5), true); 163 | auto y = Variable(af::randu(5, 3, 2), true); 164 | auto z = x * mean(y, {1,2}); 165 | auto dz = Variable(af::constant(1.0, 5), false); 166 | z.backward(dz); 167 | auto dy = y.grad(); 168 | auto dx = x.grad(); 169 | VERIFY(dy.array() - 6 * af::tile(x.array(), 1, 3, 2)); 170 | VERIFY(dx.array() - af::mean(af::mean(y.array(), 1), 2)); 171 | } 172 | 173 | int main() 174 | { 175 | af::info(); 176 | test_multiply(); 177 | test_multipl_add(); 178 | test_no_calc_grad(); 179 | test_multiply_sub(); 180 | test_divide_add(); 181 | test_multiply_add_scalar(); 182 | test_exp(); 183 | test_sigmoid(); 184 | test_tanh(); 185 | test_tile(); 186 | test_sum(); 187 | test_mean(); 188 | return 0; 189 | } 190 | -------------------------------------------------------------------------------- /examples/xor.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | using namespace af; 18 | using namespace af::nn; 19 | using namespace af::autograd; 20 | 21 | int main(int argc, const char **args) 22 | { 23 | int optim_mode = 0; 24 | std::string optimizer_arg = std::string(args[1]); 25 | if (optimizer_arg == "--adam") { 26 | optim_mode = 1; 27 | } else if (optimizer_arg == "--rmsprop") { 28 | optim_mode = 2; 29 | } 30 | 31 | const int inputSize = 2; 32 | const int outputSize = 1; 33 | const double lr = 0.01; 34 | const double mu = 0.1; 35 | const int numSamples = 4; 36 | 37 | float hInput[] = {1, 1, 38 | 0, 0, 39 | 1, 0, 40 | 0, 1}; 41 | 42 | float hOutput[] = {1, 43 | 0, 44 | 1, 45 | 1}; 46 | 47 | auto in = af::array(inputSize, numSamples, hInput); 48 | auto out = af::array(outputSize, numSamples, hOutput); 49 | 50 | nn::Sequential model; 51 | 52 | model.add(nn::Linear(inputSize, outputSize)); 53 | model.add(nn::Sigmoid()); 54 | 55 | auto loss = nn::MeanSquaredError(); 56 | 57 | std::unique_ptr optim; 58 | 59 | if (optimizer_arg == "--rmsprop") { 60 | optim = std::unique_ptr(new optim::RMSPropOptimizer(model.parameters(), lr)); 61 | } else if (optimizer_arg == "--adam") { 62 | optim = std::unique_ptr(new optim::AdamOptimizer(model.parameters(), lr)); 63 | } else { 64 | optim = std::unique_ptr(new optim::SGDOptimizer(model.parameters(), lr, mu)); 65 | } 66 | 67 | Variable result, l; 68 | for (int i = 0; i < 1000; i++) { 69 | for (int j = 0; j < numSamples; j++) { 70 | 71 | model.train(); 72 | optim->zeroGrad(); 73 | 74 | af::array in_j = in(af::span, j); 75 | af::array out_j = out(af::span, j); 76 | 77 | // Forward propagation 78 | result = model(nn::input(in_j)); 79 | 80 | // Calculate loss 81 | l = loss(result, nn::noGrad(out_j)); 82 | 83 | // Backward propagation 84 | l.backward(); 85 | 86 | // Update parameters 87 | optim->update(); 88 | } 89 | 90 | if ((i + 1) % 100 == 0) { 91 | model.eval(); 92 | 93 | // Forward propagation 94 | result = model(nn::input(in)); 95 | 96 | // Calculate loss 97 | // TODO: Use loss function 98 | af::array diff = out - result.array(); 99 | printf("Average Error at iteration(%d) : %lf\n", i + 1, af::mean(af::abs(diff))); 100 | printf("Predicted\n"); 101 | af_print(result.array()); 102 | printf("Expected\n"); 103 | af_print(out); 104 | printf("\n\n"); 105 | } 106 | } 107 | return 0; 108 | } 109 | -------------------------------------------------------------------------------- /include/af/autograd.h: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #include 10 | #include 11 | -------------------------------------------------------------------------------- /include/af/autograd/Functions.hpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace af { 15 | namespace autograd { 16 | 17 | class Variable; 18 | 19 | Variable operator +(const Variable &lhs, const Variable &rhs); 20 | Variable operator *(const Variable &lhs, const Variable &rhs); 21 | Variable operator -(const Variable &lhs, const Variable &rhs); 22 | Variable operator /(const Variable &lhs, const Variable &rhs); 23 | Variable operator >(const Variable &lhs, const Variable &rhs); 24 | Variable operator <(const Variable &lhs, const Variable &rhs); 25 | Variable operator >=(const Variable &lhs, const Variable &rhs); 26 | Variable operator <=(const Variable &lhs, const Variable &rhs); 27 | 28 | Variable operator +(const double &lhs, const Variable &rhs); 29 | Variable operator *(const double &lhs, const Variable &rhs); 30 | Variable operator -(const double &lhs, const Variable &rhs); 31 | Variable operator /(const double &lhs, const Variable &rhs); 32 | Variable operator >(const double &lhs, const Variable &rhs); 33 | Variable operator <(const double &lhs, const Variable &rhs); 34 | Variable operator >=(const double &lhs, const Variable &rhs); 35 | Variable operator <=(const double &lhs, const Variable &rhs); 36 | 37 | Variable operator +(const Variable &lhs, const double &rhs); 38 | Variable operator *(const Variable &lhs, const double &rhs); 39 | Variable operator -(const Variable &lhs, const double &rhs); 40 | Variable operator /(const Variable &lhs, const double &rhs); 41 | Variable operator >(const Variable &lhs, const double &rhs); 42 | Variable operator <(const Variable &lhs, const double &rhs); 43 | Variable operator >=(const Variable &lhs, const double &rhs); 44 | Variable operator <=(const Variable &lhs, const double &rhs); 45 | 46 | Variable operator !(const Variable &input); 47 | 48 | Variable negate(const Variable &input); 49 | Variable reciprocal(const Variable &input); 50 | 51 | Variable exp(const Variable &input); 52 | Variable log(const Variable &input); 53 | Variable sin(const Variable &input); 54 | Variable cos(const Variable &input); 55 | Variable tanh(const Variable &input); 56 | Variable sigmoid(const Variable &input); 57 | 58 | Variable max(const Variable &lhs, const Variable &rhs); 59 | Variable max(const Variable &lhs, const double &rhs); 60 | Variable max(const double &lhs, const Variable &rhs); 61 | 62 | Variable min(const Variable &lhs, const Variable &rhs); 63 | Variable min(const Variable &lhs, const double &rhs); 64 | Variable min(const double &lhs, const Variable &rhs); 65 | 66 | Variable transpose(const Variable &input); 67 | Variable tileAs(const Variable &input, const Variable &reference); 68 | Variable sumAs(const Variable &input, const Variable &reference); 69 | 70 | Variable tile(const Variable &input, const std::vector &repeats); 71 | Variable sum(const Variable &input, const std::vector &axes); 72 | Variable mean(const Variable &input, const std::vector &axes); 73 | 74 | Variable matmul(const Variable &lhs, const Variable &rhs); 75 | Variable matmulTN(const Variable &lhs, const Variable &rhs); 76 | Variable matmulNT(const Variable &lhs, const Variable &rhs); 77 | 78 | Variable abs(const Variable &input); 79 | 80 | Variable flat(const Variable &input); 81 | Variable moddims(const Variable &input, const dim4 &dims); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /include/af/autograd/Variable.hpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | 10 | #pragma once 11 | 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace af { 21 | namespace autograd { 22 | class Variable 23 | { 24 | public: 25 | typedef std::function &, const Variable &)> GradFunc_t; 26 | typedef std::unordered_map Cache_t; 27 | typedef std::vector DAG_t; 28 | 29 | private: 30 | struct Shared { 31 | Shared(); 32 | Shared(const af::array &data, bool calc_grad); 33 | Shared(const af::array &data, 34 | const std::vector &inputs, 35 | GradFunc_t grad_func, 36 | bool calc_grad); 37 | 38 | bool m_calc_grad; 39 | af::array m_data; 40 | std::vector m_inputs; 41 | std::vector m_grads; 42 | GradFunc_t m_grad_func; 43 | }; 44 | 45 | public: 46 | 47 | Variable(); 48 | Variable(const af::array &data, bool calc_grad); 49 | Variable(const af::array &data, 50 | const std::vector &inputs, 51 | GradFunc_t grad_func); 52 | 53 | af::array& array() const; 54 | 55 | Variable& grad() const; 56 | 57 | std::ptrdiff_t id() const; 58 | 59 | bool isCalcGrad() const; 60 | 61 | bool isGradAvailable() const; 62 | 63 | af::dim4 dims() const; 64 | 65 | af::dtype type() const; 66 | 67 | void zeroGrad(); 68 | 69 | void setCalcGrad(bool calc_grad); 70 | 71 | void addGrad(const Variable &child_grad); 72 | 73 | void calcGradInputs(bool retain_grad_graph = false); 74 | 75 | void backward(const Variable &grad, bool retain_grad_graph = false); 76 | 77 | void backward(bool retain_grad_graph = false); 78 | 79 | private: 80 | void evalGrad(bool retain_grad_graph = false); 81 | 82 | std::vector& getInputs() const; 83 | 84 | static void buildSubGraph(Cache_t &cache, DAG_t &dag, const Variable &var); 85 | 86 | static DAG_t build(const Variable &var); 87 | 88 | std::shared_ptr m_shared; 89 | }; 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /include/af/nn.h: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | 10 | #pragma once 11 | 12 | #include 13 | #include 14 | -------------------------------------------------------------------------------- /include/af/nn/Init.hpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace af { 14 | namespace nn { 15 | 16 | autograd::Variable input(const af::array &arr); 17 | 18 | autograd::Variable noGrad(const af::array &arr); 19 | 20 | autograd::Variable parameter(const af::array &arr); 21 | 22 | autograd::Variable uniform(int input_size, int output_size, 23 | double min = 0, double max = 1, 24 | af::dtype type = f32, bool calc_grad=true); 25 | 26 | autograd::Variable uniform(af::dim4 dims, 27 | double min = 0, double max = 1, 28 | af::dtype type = f32, bool calc_grad=true); 29 | 30 | autograd::Variable normal(int input_size, int output_size, 31 | double stdv = 1, double mean = 0, 32 | af::dtype type = f32, bool calc_grad=true); 33 | 34 | autograd::Variable normal(af::dim4 dims, 35 | double stdv = 1, double mean = 0, 36 | af::dtype type = f32, bool calc_grad=true); 37 | 38 | autograd::Variable lecunUniform(int input_size, int output_size, 39 | af::dtype type = f32, bool calc_grad=true); 40 | 41 | autograd::Variable lecunUniform(af::dim4 dims, 42 | af::dtype type = f32, bool calc_grad=true); 43 | 44 | autograd::Variable lecunNormal(int input_size, int output_size, 45 | af::dtype type = f32, bool calc_grad=true); 46 | 47 | autograd::Variable lecunNormal(af::dim4 dims, 48 | af::dtype type = f32, bool calc_grad=true); 49 | 50 | autograd::Variable glorotUniform(int input_size, int output_size, 51 | af::dtype type = f32, bool calc_grad=true); 52 | 53 | autograd::Variable glorotUniform(af::dim4 dims, 54 | af::dtype type = f32, bool calc_grad=true); 55 | 56 | autograd::Variable glorotNormal(int input_size, int output_size, 57 | af::dtype type = f32, bool calc_grad=true); 58 | 59 | autograd::Variable glorotNormal(af::dim4 dims, 60 | af::dtype type = f32, bool calc_grad=true); 61 | 62 | 63 | autograd::Variable constant(double val, int input_size, int output_size, 64 | af::dtype type = f32, bool calc_grad=true); 65 | 66 | autograd::Variable constant(double val, af::dim4 dims, 67 | af::dtype type = f32, bool calc_grad=true); 68 | 69 | autograd::Variable identity(int input_size, int output_size, 70 | af::dtype type = f32, bool calc_grad=true); 71 | 72 | autograd::Variable identity(af::dim4 dims, 73 | af::dtype type = f32, bool calc_grad=true); 74 | 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /include/af/nn/Modules.hpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | -------------------------------------------------------------------------------- /include/af/nn/Modules/Activations.hpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace af 15 | { 16 | namespace nn 17 | { 18 | class Sigmoid : public Module 19 | { 20 | public: 21 | Sigmoid(); 22 | 23 | autograd::Variable forward(const autograd::Variable &input); 24 | }; 25 | 26 | class Tanh : public Module 27 | { 28 | public: 29 | Tanh(); 30 | 31 | autograd::Variable forward(const autograd::Variable &input); 32 | }; 33 | 34 | class ReLU : public Module 35 | { 36 | public: 37 | ReLU(); 38 | 39 | autograd::Variable forward(const autograd::Variable &input); 40 | }; 41 | 42 | class LeakyReLU : public Module 43 | { 44 | private: 45 | double m_slope; 46 | public: 47 | LeakyReLU(double slope = 0.0); 48 | 49 | autograd::Variable forward(const autograd::Variable &input); 50 | }; 51 | 52 | class PReLU : public Module 53 | { 54 | public: 55 | PReLU(int size, double value = 1.0); 56 | PReLU(const autograd::Variable &w); 57 | 58 | autograd::Variable forward(const autograd::Variable &input); 59 | }; 60 | 61 | class ELU : public Module 62 | { 63 | private: 64 | double m_alpha; 65 | public: 66 | ELU(double alpha = 1.0); 67 | 68 | autograd::Variable forward(const autograd::Variable &input); 69 | }; 70 | 71 | class ThresholdReLU : public Module 72 | { 73 | private: 74 | double m_threshold; 75 | public: 76 | ThresholdReLU(double threshold = 1.0); 77 | 78 | autograd::Variable forward(const autograd::Variable &input); 79 | }; 80 | 81 | 82 | 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /include/af/nn/Modules/Container.hpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | namespace af 17 | { 18 | namespace nn 19 | { 20 | 21 | typedef std::shared_ptr ModulePtr; 22 | 23 | class Container : public Module 24 | { 25 | protected: 26 | 27 | std::vector m_modules; 28 | 29 | Container(); 30 | 31 | public: 32 | 33 | template 34 | void add(T module) 35 | { 36 | m_modules.emplace_back(new T(module)); 37 | for (auto param : module.parameters()) { 38 | m_parameters.push_back(param); 39 | } 40 | } 41 | 42 | ModulePtr get(int id); 43 | 44 | std::vector modules(); 45 | }; 46 | 47 | class Sequential : public Container 48 | { 49 | public: 50 | 51 | Sequential(); 52 | 53 | autograd::Variable forward(const autograd::Variable &input); 54 | }; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /include/af/nn/Modules/Dropout.hpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace af 14 | { 15 | namespace nn 16 | { 17 | class Dropout : public Module 18 | { 19 | private: 20 | double m_ratio; 21 | public: 22 | Dropout(double drop_ratio = 0.5); 23 | 24 | autograd::Variable forward(const autograd::Variable &input); 25 | }; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /include/af/nn/Modules/Linear.hpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace af 14 | { 15 | namespace nn 16 | { 17 | class Linear : public Module 18 | { 19 | private: 20 | bool m_bias; 21 | public: 22 | Linear(int input_size, int output_size, bool bias = true, float spread = 0.05); 23 | 24 | Linear(const autograd::Variable &w); 25 | 26 | Linear(const autograd::Variable &w, const autograd::Variable &b); 27 | 28 | autograd::Variable forward(const autograd::Variable &input); 29 | }; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /include/af/nn/Modules/Loss.hpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace af 14 | { 15 | namespace nn 16 | { 17 | class Loss : public Module 18 | { 19 | public: 20 | Loss() {} 21 | 22 | virtual autograd::Variable forward(const autograd::Variable &inputs, 23 | const autograd::Variable &targets) = 0; 24 | 25 | autograd::Variable forward(const autograd::Variable &inputs); 26 | 27 | autograd::Variable operator()(const autograd::Variable &inputs, 28 | const autograd::Variable &targets); 29 | }; 30 | 31 | class MeanSquaredError : public Loss 32 | { 33 | public: 34 | MeanSquaredError() {} 35 | 36 | autograd::Variable forward(const autograd::Variable &inputs, 37 | const autograd::Variable &targets); 38 | }; 39 | 40 | class MeanAbsoluteError : public Loss 41 | { 42 | public: 43 | MeanAbsoluteError() {} 44 | 45 | autograd::Variable forward(const autograd::Variable &inputs, 46 | const autograd::Variable &targets); 47 | }; 48 | 49 | class BinaryCrossEntropyLoss : public Loss 50 | { 51 | public: 52 | BinaryCrossEntropyLoss() {} 53 | 54 | autograd::Variable forward(const autograd::Variable &inputs, 55 | const autograd::Variable &targets); 56 | 57 | autograd::Variable forward(const autograd::Variable &inputs, 58 | const autograd::Variable &targets, 59 | const autograd::Variable &weights); 60 | }; 61 | 62 | typedef MeanSquaredError MSE; 63 | typedef MeanAbsoluteError MAE; 64 | typedef MeanAbsoluteError L1Loss; 65 | typedef BinaryCrossEntropyLoss BCELoss; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /include/af/nn/Modules/Module.hpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | namespace af 17 | { 18 | namespace nn 19 | { 20 | 21 | class Module 22 | { 23 | protected: 24 | std::vector m_parameters; 25 | 26 | bool m_train; 27 | 28 | Module(); 29 | 30 | Module(const std::vector ¶meters); 31 | 32 | void setParams(const std::vector ¶meters); 33 | 34 | public: 35 | 36 | std::vector parameters(); 37 | 38 | void train(); 39 | 40 | void eval(); 41 | 42 | virtual autograd::Variable forward(const autograd::Variable &input) = 0; 43 | 44 | autograd::Variable operator()(const autograd::Variable &input); 45 | }; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /include/af/optim.h: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #include 10 | -------------------------------------------------------------------------------- /include/af/optim/Optimizers.hpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | 10 | #pragma once 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | namespace af 18 | { 19 | namespace optim 20 | { 21 | 22 | class Optimizer 23 | { 24 | protected: 25 | std::vector m_parameters; 26 | public: 27 | 28 | Optimizer(const std::vector ¶meters); 29 | 30 | virtual void update() = 0; 31 | 32 | void zeroGrad(); 33 | }; 34 | 35 | class SGDOptimizer : public Optimizer 36 | { 37 | bool m_use_nesterov; 38 | double m_lr; 39 | double m_mu; 40 | double m_wd; 41 | std::vector m_velocities; 42 | public: 43 | SGDOptimizer(const std::vector ¶meters, 44 | double learning_rate, double momentum = 0, 45 | double weight_decay = 0, 46 | bool use_nesterov = false); 47 | void update(); 48 | }; 49 | 50 | class AdamOptimizer : public Optimizer 51 | { 52 | double m_lr; 53 | double m_beta1; 54 | double m_beta2; 55 | double m_eps; 56 | double m_wd; 57 | int m_count; 58 | std::vector m_biased_first; 59 | std::vector m_biased_second; 60 | public: 61 | AdamOptimizer(const std::vector ¶meters, 62 | double learning_rate, 63 | double beta1 = 0.9, 64 | double beta2 = 0.999, 65 | double epsilon = 1E-8, 66 | double weight_decay = 0); 67 | void update(); 68 | }; 69 | 70 | class RMSPropOptimizer : public Optimizer 71 | { 72 | bool m_use_first; 73 | double m_lr; 74 | double m_rho; 75 | double m_eps; 76 | double m_wd; 77 | std::vector m_first; 78 | std::vector m_second; 79 | public: 80 | RMSPropOptimizer(const std::vector ¶meters, 81 | double learning_rate, 82 | double rho = 0.99, 83 | double epsilon = 1E-8, 84 | double weight_decay = 0, 85 | bool use_first = false); 86 | void update(); 87 | }; 88 | 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/autograd/Functions.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | 10 | #include 11 | #include 12 | 13 | namespace af { 14 | namespace autograd { 15 | 16 | Variable operator +(const Variable &lhs, const Variable &rhs) 17 | { 18 | auto result = lhs.array() + rhs.array(); 19 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 20 | inputs[0].addGrad(grad_output); 21 | inputs[1].addGrad(grad_output); 22 | }; 23 | return Variable(result, {lhs, rhs}, grad_func); 24 | } 25 | 26 | Variable operator -(const Variable &lhs, const Variable &rhs) 27 | { 28 | auto result = lhs.array() - rhs.array(); 29 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 30 | inputs[0].addGrad(grad_output); 31 | inputs[1].addGrad(negate(grad_output)); 32 | }; 33 | return Variable(result, {lhs, rhs}, grad_func); 34 | } 35 | 36 | Variable operator *(const Variable &lhs, const Variable &rhs) 37 | { 38 | auto result = lhs.array() * rhs.array(); 39 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 40 | inputs[0].addGrad(grad_output * inputs[1]); 41 | inputs[1].addGrad(grad_output * inputs[0]); 42 | }; 43 | return Variable(result, {lhs, rhs}, grad_func); 44 | } 45 | 46 | Variable operator /(const Variable &lhs, const Variable &rhs) 47 | { 48 | auto result = lhs.array() / rhs.array(); 49 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 50 | auto inputs_1_rec = reciprocal(inputs[1]); 51 | auto grad_input_0 = grad_output * inputs_1_rec; 52 | inputs[0].addGrad(grad_input_0); 53 | inputs[1].addGrad(grad_input_0 * negate(inputs[0]) * inputs_1_rec); 54 | }; 55 | return Variable(result, {lhs, rhs}, grad_func); 56 | } 57 | 58 | Variable operator >(const Variable &lhs, const Variable &rhs) 59 | { 60 | auto result = lhs.array() > rhs.array(); 61 | return Variable(result, false); 62 | } 63 | 64 | Variable operator <(const Variable &lhs, const Variable &rhs) 65 | { 66 | auto result = lhs.array() < rhs.array(); 67 | return Variable(result, false); 68 | } 69 | 70 | Variable operator >=(const Variable &lhs, const Variable &rhs) 71 | { 72 | auto result = lhs.array() >= rhs.array(); 73 | return Variable(result, false); 74 | } 75 | 76 | Variable operator <=(const Variable &lhs, const Variable &rhs) 77 | { 78 | auto result = lhs.array() <= rhs.array(); 79 | return Variable(result, false); 80 | } 81 | 82 | 83 | 84 | #define INSTANTIATE_OPERATOR(OP) \ 85 | Variable operator OP(const double &lhs_val, const Variable &rhs) \ 86 | { \ 87 | auto lhs = Variable( \ 88 | af::constant(lhs_val, \ 89 | rhs.array().dims(), \ 90 | rhs.array().type()), \ 91 | false); \ 92 | return lhs OP rhs; \ 93 | } \ 94 | Variable operator OP(const Variable &lhs, const double &rhs_val) \ 95 | { \ 96 | auto rhs = Variable( \ 97 | af::constant(rhs_val, \ 98 | lhs.array().dims(), lhs.array().type()), \ 99 | false); \ 100 | return lhs OP rhs; \ 101 | } \ 102 | 103 | INSTANTIATE_OPERATOR(+) 104 | INSTANTIATE_OPERATOR(-) 105 | INSTANTIATE_OPERATOR(*) 106 | INSTANTIATE_OPERATOR(/) 107 | INSTANTIATE_OPERATOR(>) 108 | INSTANTIATE_OPERATOR(<) 109 | INSTANTIATE_OPERATOR(>=) 110 | INSTANTIATE_OPERATOR(<=) 111 | 112 | #undef INSTANTIATE_OPERATOR 113 | 114 | Variable operator !(const Variable &input) 115 | { 116 | auto result = !input.array(); 117 | return Variable(result, false); 118 | } 119 | 120 | Variable max(const Variable &lhs, const Variable &rhs) 121 | { 122 | auto mask = lhs > rhs; 123 | auto result = max(lhs.array(), rhs.array()); 124 | 125 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 126 | inputs[0].addGrad( inputs[2] * grad_output); 127 | inputs[1].addGrad(!inputs[2] * grad_output); 128 | }; 129 | return Variable(result, {lhs, rhs, mask}, grad_func); 130 | } 131 | 132 | Variable min(const Variable &lhs, const Variable &rhs) 133 | { 134 | auto mask = lhs < rhs; 135 | auto result = min(lhs.array(), rhs.array()); 136 | 137 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 138 | inputs[0].addGrad( inputs[2] * grad_output); 139 | inputs[1].addGrad(!inputs[2] * grad_output); 140 | }; 141 | return Variable(result, {lhs, rhs, mask}, grad_func); 142 | } 143 | 144 | #define INSTANTIATE_FUNCTION(FN) \ 145 | Variable FN(const double &lhs_val, const Variable &rhs) \ 146 | { \ 147 | auto lhs = Variable( \ 148 | af::constant(lhs_val, \ 149 | rhs.array().dims(), \ 150 | rhs.array().type()), \ 151 | false); \ 152 | return FN(lhs,rhs); \ 153 | } \ 154 | Variable FN(const Variable &lhs, const double &rhs_val) \ 155 | { \ 156 | auto rhs = Variable( \ 157 | af::constant(rhs_val, \ 158 | lhs.array().dims(), lhs.array().type()), \ 159 | false); \ 160 | return FN(lhs, rhs); \ 161 | } 162 | 163 | 164 | INSTANTIATE_FUNCTION(max); 165 | INSTANTIATE_FUNCTION(min); 166 | 167 | #undef INSTANTIATE_FUNCTION 168 | 169 | Variable negate(const Variable &input) 170 | { 171 | auto result = 0.0 - input.array(); 172 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 173 | inputs[0].addGrad(negate(grad_output)); 174 | }; 175 | return Variable(result, {input}, grad_func); 176 | } 177 | 178 | Variable reciprocal(const Variable &input) 179 | { 180 | auto result = 1.0 / input.array(); 181 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 182 | auto res = reciprocal(inputs[0]); 183 | inputs[0].addGrad(negate(grad_output) * res * res); 184 | }; 185 | return Variable(result, {input}, grad_func); 186 | } 187 | 188 | Variable exp(const Variable &input) 189 | { 190 | auto result = exp(input.array()); 191 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 192 | inputs[0].addGrad(grad_output * exp(inputs[0])); 193 | }; 194 | return Variable(result, {input}, grad_func); 195 | } 196 | 197 | Variable log(const Variable &input) 198 | { 199 | auto result = log(input.array()); 200 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 201 | inputs[0].addGrad(grad_output / inputs[0]); 202 | }; 203 | return Variable(result, {input}, grad_func); 204 | } 205 | 206 | Variable sin(const Variable &input) 207 | { 208 | auto result = sin(input.array()); 209 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 210 | inputs[0].addGrad(grad_output * cos(inputs[0])); 211 | }; 212 | return Variable(result, {input}, grad_func); 213 | } 214 | 215 | Variable cos(const Variable &input) 216 | { 217 | auto result = cos(input.array()); 218 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 219 | inputs[0].addGrad(grad_output * negate(sin(inputs[0]))); 220 | }; 221 | return Variable(result, {input}, grad_func); 222 | } 223 | 224 | Variable tanh(const Variable &input) 225 | { 226 | auto result = tanh(input.array()); 227 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 228 | auto tmp = tanh(inputs[0]); 229 | inputs[0].addGrad(grad_output * (1.0 - tmp * tmp)); 230 | }; 231 | return Variable(result, {input}, grad_func); 232 | } 233 | 234 | Variable sigmoid(const Variable &input) 235 | { 236 | auto result = sigmoid(input.array()); 237 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 238 | auto tmp = sigmoid(inputs[0]); 239 | inputs[0].addGrad(grad_output * tmp * (1 - tmp)); 240 | }; 241 | return Variable(result, {input}, grad_func); 242 | } 243 | 244 | Variable transpose(const Variable &input) 245 | { 246 | auto result = transpose(input.array()); 247 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 248 | inputs[0].addGrad(transpose(grad_output)); 249 | }; 250 | return Variable(result, {input}, grad_func); 251 | } 252 | 253 | Variable tileAs(const Variable &input, const Variable &reference) 254 | { 255 | dim4 dims(1,1,1,1); 256 | dim4 rdims = reference.dims(); 257 | dim4 idims = input.dims(); 258 | for (int i = 0; i < 4; i++) { 259 | dims[i] = rdims[i] / idims[i]; 260 | } 261 | auto result = tile(input.array(), dims); 262 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 263 | inputs[0].addGrad(sumAs(grad_output, inputs[0])); 264 | }; 265 | return Variable(result, {input}, grad_func); 266 | } 267 | 268 | Variable sumAs(const Variable &input, const Variable &reference) 269 | { 270 | dim4 rdims = reference.dims(); 271 | dim4 idims = input.dims(); 272 | auto result = input.array(); 273 | for (int i = 0; i < 4; i++) { 274 | if (idims[i] != rdims[i]) result = sum(result, i); 275 | } 276 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 277 | inputs[0].addGrad(tileAs(grad_output, inputs[0])); 278 | }; 279 | return Variable(result, {input}, grad_func); 280 | } 281 | 282 | Variable tile(const Variable &input, const std::vector &repeats) 283 | { 284 | dim4 dims; 285 | for (size_t i = 0; i < repeats.size(); i++) { 286 | dims[i] = repeats[i]; 287 | } 288 | auto result = tile(input.array(), dims); 289 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 290 | inputs[0].addGrad(sumAs(grad_output, inputs[0])); 291 | }; 292 | return Variable(result, {input}, grad_func); 293 | } 294 | 295 | Variable sum(const Variable &input, const std::vector &axes) 296 | { 297 | auto result = input.array(); 298 | for (size_t i = 0; i < axes.size(); i++) { 299 | result = sum(result, axes[i]); 300 | } 301 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 302 | inputs[0].addGrad(tileAs(grad_output, inputs[0])); 303 | }; 304 | return Variable(result, {input}, grad_func); 305 | } 306 | 307 | Variable mean(const Variable &input, const std::vector &axes) 308 | { 309 | auto result = input.array(); 310 | for (size_t i = 0; i < axes.size(); i++) { 311 | result = mean(result, axes[i]); 312 | } 313 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 314 | dim4 odims = grad_output.dims(); 315 | dim4 idims = inputs[0].dims(); 316 | dim_t count = 1; 317 | for (int i = 0; i < 4; i++) { 318 | count *= idims[i] / odims[i]; 319 | } 320 | inputs[0].addGrad(count * tileAs(grad_output, inputs[0])); 321 | }; 322 | return Variable(result, {input}, grad_func); 323 | } 324 | 325 | Variable matmul(const Variable &lhs, const Variable &rhs) 326 | { 327 | // lhs:Input[0] -- [M, N] 328 | // rhs:Input[1] -- [N, K] 329 | //matmul(lhs, rhs) 330 | // -- matmul([M, N], [N, K]) -- [M, K] 331 | // result:grad_output -- [M, K] 332 | auto result = matmul(lhs.array(), rhs.array()); 333 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 334 | // matmulNT(grad_output, inputs[1]) 335 | // -- matmulNT([M, K], [N, K]) 336 | // -- matmul([M, K], [K, N]) -- [M, K] 337 | inputs[0].addGrad(matmulNT(grad_output, inputs[1])); 338 | // matmulTN(inputs[0], grad_output) 339 | // -- matmulTN([M, N], [M, K]) 340 | // -- matmul([N, M], [M, K]) -- [N, K] 341 | inputs[1].addGrad(matmulTN(inputs[0], grad_output)); 342 | }; 343 | return Variable(result, {lhs, rhs}, grad_func); 344 | } 345 | 346 | Variable matmulTN(const Variable &lhs, const Variable &rhs) 347 | { 348 | // lhs:Input[0] -- [N, M] 349 | // rhs:Input[1] -- [N, K] 350 | // matmulTN(lhs, rhs) 351 | // -- matmulTN([N, M], [N, K]) 352 | // -- matmul([M, N], [N, K]) -- [M, K] 353 | // result:grad_output -- [M, K] 354 | auto result = matmulTN(lhs.array(), rhs.array()); 355 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 356 | // matmulNT(inputs[1], grad_output) 357 | // -- matmulNT([N, K], [M, K]) 358 | // -- matmul([N, K], [K, M]) -- [N, M] 359 | inputs[0].addGrad(matmulNT(inputs[1], grad_output)); 360 | // matmul(inputs[0], grad_output) 361 | // -- matmulNT([N, M], [M, K]) -- [N, K] 362 | inputs[1].addGrad(matmul(inputs[0], grad_output)); 363 | }; 364 | return Variable(result, {lhs, rhs}, grad_func); 365 | } 366 | 367 | Variable matmulNT(const Variable &lhs, const Variable &rhs) 368 | { 369 | // lhs:Input[0] -- [M, N] 370 | // rhs:Input[1] -- [K, N] 371 | // matmulNT(lhs, rhs) 372 | // -- matmulNT([M, N], [K, N]) 373 | // -- matmul([M, N], [N, K]) -- [M, K] 374 | // result:grad_output -- [M, K] 375 | auto result = matmulNT(lhs.array(), rhs.array()); 376 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 377 | // matmul(grad_output, inputs[1]) 378 | // -- matmul([M, K], [K, N]) -- [M, N] 379 | inputs[0].addGrad(matmul(grad_output, inputs[1])); 380 | // matmulTN(grad_output, inputs[0]) 381 | // -- matmulTN([M, K], [M, N]) 382 | // -- matmul([K, M], [M, N]) -- [K, N] 383 | inputs[1].addGrad(matmulTN(grad_output, inputs[0])); 384 | }; 385 | return Variable(result, {lhs, rhs}, grad_func); 386 | } 387 | 388 | Variable abs(const Variable &input) 389 | { 390 | auto result = af::abs(input.array()); 391 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 392 | // af::sign returns signbit 393 | // Convert it into -1, 1 394 | auto sign = Variable(1 - 2 * af::sign(inputs[0].array()), false); 395 | inputs[0].addGrad(sign * grad_output); 396 | }; 397 | return Variable(result, {input}, grad_func); 398 | } 399 | 400 | Variable flat(const Variable &input) 401 | { 402 | auto result = af::flat(input.array()); 403 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 404 | inputs[0].addGrad(moddims(grad_output, inputs[0].dims())); 405 | }; 406 | return Variable(result, {input}, grad_func); 407 | } 408 | 409 | Variable moddims(const Variable &input, const dim4 &dims) 410 | { 411 | auto result = af::moddims(input.array(), dims); 412 | auto grad_func = [](std::vector &inputs, const Variable &grad_output) { 413 | inputs[0].addGrad(moddims(grad_output, inputs[0].dims())); 414 | }; 415 | return Variable(result, {input}, grad_func); 416 | } 417 | } 418 | } 419 | -------------------------------------------------------------------------------- /src/autograd/Variable.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | 10 | #include 11 | #include 12 | 13 | namespace af { 14 | namespace autograd { 15 | 16 | Variable::Shared::Shared() : 17 | m_calc_grad(true), 18 | m_data(), 19 | m_inputs(), 20 | m_grads(), 21 | m_grad_func(nullptr) 22 | {} 23 | 24 | Variable::Shared::Shared(const af::array &data, bool calc_grad) : 25 | m_calc_grad(calc_grad), 26 | m_data(data), 27 | m_inputs(), 28 | m_grads(), 29 | m_grad_func(nullptr) 30 | {} 31 | 32 | Variable::Shared::Shared(const af::array &data, 33 | const std::vector &inputs, 34 | GradFunc_t grad_func, 35 | bool calc_grad) : 36 | m_calc_grad(calc_grad), 37 | m_data(data), 38 | m_inputs(inputs.begin(), inputs.end()), 39 | m_grads(), 40 | m_grad_func(grad_func) 41 | {} 42 | 43 | Variable::Variable() : 44 | m_shared(new Shared()) 45 | { 46 | } 47 | 48 | Variable::Variable(const af::array &data, bool calc_grad) : 49 | m_shared(new Shared(data, calc_grad)) 50 | {} 51 | 52 | Variable::Variable(const af::array &data, 53 | const std::vector &inputs, 54 | GradFunc_t grad_func) : 55 | m_shared(nullptr) 56 | { 57 | bool calc_grad = false; 58 | for (const auto &input : inputs) { 59 | calc_grad |= input.isCalcGrad(); 60 | } 61 | if (calc_grad) { 62 | m_shared = std::shared_ptr(new Shared(data, inputs, grad_func, true)); 63 | } else { 64 | m_shared = std::shared_ptr(new Shared(data, false)); 65 | } 66 | } 67 | 68 | af::array& Variable::array() const 69 | { 70 | return m_shared->m_data; 71 | } 72 | 73 | Variable& Variable::grad() const 74 | { 75 | if (!m_shared->m_calc_grad) { 76 | throw af::exception("Gradient calclation disabled."); 77 | } 78 | if (m_shared->m_grads.size() == 0) { 79 | throw af::exception("Gradient hasn't been calculated yet."); 80 | } 81 | return m_shared->m_grads[0]; 82 | } 83 | 84 | std::ptrdiff_t Variable::id() const 85 | { 86 | return (std::ptrdiff_t)m_shared.get(); 87 | } 88 | 89 | std::vector& Variable::getInputs() const 90 | { 91 | return m_shared->m_inputs; 92 | } 93 | 94 | bool Variable::isCalcGrad() const 95 | { 96 | return m_shared->m_calc_grad; 97 | } 98 | 99 | bool Variable::isGradAvailable() const 100 | { 101 | if (!m_shared->m_calc_grad) return false; 102 | return m_shared->m_grads.size() >= 1; 103 | } 104 | 105 | af::dim4 Variable::dims() const 106 | { 107 | return m_shared->m_data.dims(); 108 | } 109 | 110 | af::dtype Variable::type() const 111 | { 112 | return m_shared->m_data.type(); 113 | } 114 | 115 | void Variable::zeroGrad() 116 | { 117 | m_shared->m_grads.clear(); 118 | } 119 | 120 | void Variable::setCalcGrad(bool calc_grad) 121 | { 122 | m_shared->m_calc_grad = calc_grad; 123 | if (!calc_grad) { 124 | m_shared->m_grad_func = nullptr; 125 | m_shared->m_inputs.clear(); 126 | m_shared->m_grads.clear(); 127 | } 128 | } 129 | 130 | void Variable::addGrad(const Variable &child_grad) 131 | { 132 | if (m_shared->m_calc_grad) { 133 | m_shared->m_grads.push_back(child_grad); 134 | } 135 | } 136 | 137 | void Variable::evalGrad(bool retain_grad_graph) 138 | { 139 | // Flag asking not to calculate gradients 140 | if (!m_shared->m_calc_grad) return; 141 | 142 | // Best not to evaluate the JIT immediately if theres only a single gradient 143 | Variable grad = m_shared->m_grads[0]; 144 | if (m_shared->m_grads.size() > 1) { 145 | for (unsigned i = 1; i < m_shared->m_grads.size(); i++) { 146 | grad = grad + m_shared->m_grads[i]; 147 | } 148 | grad.array().eval(); 149 | m_shared->m_grads.resize(1); 150 | } 151 | 152 | grad.setCalcGrad(retain_grad_graph); 153 | m_shared->m_grads[0] = grad; 154 | } 155 | 156 | void Variable::calcGradInputs(bool retain_grad_graph) 157 | { 158 | evalGrad(); 159 | if (m_shared->m_grad_func) { 160 | m_shared->m_grad_func(m_shared->m_inputs, m_shared->m_grads[0]); 161 | } 162 | } 163 | 164 | void Variable::backward(const Variable &grad, bool retain_grad_graph) 165 | { 166 | this->addGrad(grad); 167 | Variable::DAG_t dag = Variable::build(*this); 168 | for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) { 169 | iter->calcGradInputs(retain_grad_graph); 170 | } 171 | } 172 | 173 | void Variable::backward(bool retain_grad_graph) 174 | { 175 | auto ones = Variable(af::constant(1, this->dims()), false); 176 | this->backward(ones, retain_grad_graph); 177 | } 178 | 179 | Variable::DAG_t Variable::build(const Variable &var) 180 | { 181 | Cache_t cache; 182 | Variable::DAG_t dag; 183 | Variable::buildSubGraph(cache, dag, var); 184 | return dag; 185 | } 186 | 187 | void Variable::buildSubGraph(Cache_t &cache, Variable::DAG_t &dag, const Variable &var) 188 | { 189 | std::ptrdiff_t id = var.id(); 190 | if (cache.find(id) != cache.end()) { 191 | return; 192 | } 193 | for (const auto &input : var.getInputs()) { 194 | Variable::buildSubGraph(cache, dag, input); 195 | } 196 | cache[id] = true; 197 | dag.push_back(var); 198 | } 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /src/nn/Init.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | 10 | #include 11 | 12 | #include 13 | 14 | namespace af { 15 | namespace nn { 16 | 17 | using autograd::Variable; 18 | 19 | Variable input(const af::array &arr) 20 | { 21 | return Variable(arr, false); 22 | } 23 | 24 | Variable noGrad(const af::array &arr) 25 | { 26 | return Variable(arr, false); 27 | } 28 | 29 | Variable parameter(const af::array &arr) 30 | { 31 | return Variable(arr, true); 32 | } 33 | 34 | autograd::Variable uniform(int output_size, int input_size, 35 | double min, double max, 36 | af::dtype type, bool calc_grad) 37 | { 38 | return nn::uniform(af::dim4(output_size, input_size), min, max, type, calc_grad); 39 | } 40 | 41 | autograd::Variable uniform(af::dim4 dims, double min, double max, 42 | af::dtype type, bool calc_grad) 43 | { 44 | af::array result = af::randu(dims, type); 45 | if (min != 0 || max != 1) { 46 | result = (max - min) * result + min; 47 | } 48 | return Variable(result, calc_grad); 49 | } 50 | 51 | autograd::Variable normal(int output_size, int input_size, 52 | double stdv, double mean, 53 | af::dtype type, bool calc_grad) 54 | { 55 | return nn::normal(af::dim4(output_size, input_size), stdv, mean, type, calc_grad); 56 | } 57 | 58 | autograd::Variable normal(af::dim4 dims, double stdv, double mean, 59 | af::dtype type, bool calc_grad) 60 | { 61 | af::array result = af::randn(dims, type); 62 | if (mean != 0 || stdv != 1) { 63 | result = stdv * result + mean; 64 | } 65 | return Variable(result, calc_grad); 66 | } 67 | 68 | autograd::Variable lecunUniform(int output_size, int input_size, 69 | af::dtype type, bool calc_grad) 70 | { 71 | return nn::lecunUniform(af::dim4(output_size, input_size), type, calc_grad); 72 | } 73 | 74 | autograd::Variable lecunUniform(af::dim4 dims, 75 | af::dtype type, bool calc_grad) 76 | { 77 | dim_t elements = dims.elements(); 78 | dim_t fan_in = elements / dims[1]; 79 | double stdv = ::sqrt(1.0/(double)fan_in); 80 | double limit = ::sqrt(3.0) * stdv; 81 | return nn::uniform(dims, -limit, limit, type, calc_grad); 82 | } 83 | 84 | autograd::Variable lecunNormal(int output_size, int input_size, 85 | af::dtype type, bool calc_grad) 86 | { 87 | return nn::lecunNormal(af::dim4(output_size, input_size), type, calc_grad); 88 | } 89 | 90 | autograd::Variable lecunNormal(af::dim4 dims, 91 | af::dtype type, bool calc_grad) 92 | { 93 | dim_t elements = dims.elements(); 94 | dim_t fan_in = elements / dims[1]; 95 | double stdv = ::sqrt(1.0/(double)fan_in); 96 | return nn::normal(dims, 0, stdv, type, calc_grad); 97 | } 98 | 99 | autograd::Variable glorotUniform(int output_size, int input_size, 100 | af::dtype type, bool calc_grad) 101 | { 102 | return nn::glorotUniform(af::dim4(output_size, input_size), type, calc_grad); 103 | } 104 | 105 | autograd::Variable glorotUniform(af::dim4 dims, 106 | af::dtype type, bool calc_grad) 107 | { 108 | dim_t elements = dims.elements(); 109 | dim_t fan_in = elements / dims[1]; 110 | dim_t fan_out = elements / dims[0]; 111 | double stdv = ::sqrt(2.0/(double)(fan_in + fan_out)); 112 | double limit = ::sqrt(3.0) * stdv; 113 | return nn::uniform(dims, -limit, limit, type, calc_grad); 114 | } 115 | 116 | autograd::Variable glorotNormal(int output_size, int input_size, 117 | af::dtype type, bool calc_grad) 118 | { 119 | return nn::glorotNormal(af::dim4(output_size, input_size), type, calc_grad); 120 | } 121 | 122 | autograd::Variable glorotNormal(af::dim4 dims, 123 | af::dtype type, bool calc_grad) 124 | { 125 | dim_t elements = dims.elements(); 126 | dim_t fan_in = elements / dims[1]; 127 | dim_t fan_out = elements / dims[0]; 128 | double stdv = ::sqrt(2.0/(double)(fan_in + fan_out)); 129 | return nn::normal(dims, 0, stdv, type, calc_grad); 130 | } 131 | 132 | autograd::Variable constant(double val, int output_size, int input_size, 133 | af::dtype type, bool calc_grad) 134 | { 135 | return nn::constant(val, af::dim4(output_size, input_size), type, calc_grad); 136 | } 137 | 138 | autograd::Variable constant(double val, af::dim4 dims, 139 | af::dtype type, bool calc_grad) 140 | { 141 | return Variable(af::constant(val, dims, type), calc_grad); 142 | } 143 | 144 | autograd::Variable identity(int output_size, int input_size, 145 | af::dtype type, bool calc_grad) 146 | { 147 | return nn::identity(af::dim4(output_size, input_size), type, calc_grad); 148 | } 149 | 150 | autograd::Variable identity(af::dim4 dims, 151 | af::dtype type, bool calc_grad) 152 | { 153 | return Variable(af::identity(dims, type), calc_grad); 154 | } 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /src/nn/Modules/Activations.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | 10 | #include 11 | #include 12 | #include 13 | namespace af 14 | { 15 | namespace nn 16 | { 17 | using namespace autograd; 18 | 19 | Sigmoid::Sigmoid() {} 20 | 21 | Variable Sigmoid::forward(const Variable &input) 22 | { 23 | return sigmoid(input); 24 | } 25 | 26 | Tanh::Tanh() {} 27 | 28 | Variable Tanh::forward(const Variable &input) 29 | { 30 | return tanh(input); 31 | } 32 | 33 | ReLU::ReLU() {} 34 | 35 | Variable ReLU::forward(const Variable &input) 36 | { 37 | return max(input, 0.0); 38 | } 39 | 40 | LeakyReLU::LeakyReLU(double slope) : 41 | m_slope(slope) 42 | { 43 | } 44 | 45 | Variable LeakyReLU::forward(const Variable &input) 46 | { 47 | return max(input, m_slope * input); 48 | } 49 | 50 | PReLU::PReLU(int size, double value) 51 | { 52 | auto w = nn::constant(value, size, 1); 53 | setParams({w}); 54 | } 55 | 56 | PReLU::PReLU(const Variable &w) : 57 | Module({w}) 58 | { 59 | } 60 | 61 | Variable PReLU::forward(const Variable &input) 62 | { 63 | auto mask = input >= 0.0; 64 | return (input * mask) + (input * !mask * tileAs(m_parameters[0], input)); 65 | } 66 | 67 | ELU::ELU(double alpha) : 68 | m_alpha(alpha) 69 | { 70 | } 71 | 72 | Variable ELU::forward(const Variable &input) 73 | { 74 | auto mask = input >= 0.0; 75 | return (mask * input) + (!mask * m_alpha * (exp(input)-1)); 76 | } 77 | 78 | ThresholdReLU::ThresholdReLU(double threshold) : 79 | m_threshold(threshold) 80 | { 81 | } 82 | 83 | Variable ThresholdReLU::forward(const Variable &input) 84 | { 85 | auto mask = input >= m_threshold; 86 | return input * mask; 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/nn/Modules/Container.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | 10 | #include 11 | #include 12 | 13 | namespace af 14 | { 15 | namespace nn 16 | { 17 | using namespace autograd; 18 | 19 | Container::Container() {} 20 | 21 | ModulePtr Container::get(int id) 22 | { 23 | return m_modules[id]; 24 | } 25 | 26 | std::vector Container::modules() 27 | { 28 | return m_modules; 29 | } 30 | 31 | Sequential::Sequential() {} 32 | 33 | Variable Sequential::forward(const Variable &input) 34 | { 35 | Variable output = input; 36 | for (auto &module : m_modules) { 37 | output = module->forward(output); 38 | } 39 | return output; 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/nn/Modules/Dropout.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace af 15 | { 16 | namespace nn 17 | { 18 | using namespace autograd; 19 | 20 | Dropout::Dropout(double drop_ratio) : 21 | m_ratio(drop_ratio) 22 | { 23 | } 24 | 25 | Variable Dropout::forward(const Variable &input) 26 | { 27 | if(m_train) 28 | return (uniform(input.dims(), 0.0, 1.0, f32, false) > m_ratio) * input; 29 | else 30 | return input; 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/nn/Modules/Linear.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace af 15 | { 16 | namespace nn 17 | { 18 | using namespace autograd; 19 | 20 | Linear::Linear(int input_size, int output_size, bool bias, float spread) : 21 | m_bias(bias) 22 | { 23 | auto w = nn::lecunNormal(output_size, input_size); 24 | if (bias) { 25 | auto b = nn::lecunNormal(output_size, 1); 26 | setParams({w, b}); 27 | } else { 28 | setParams({w}); 29 | } 30 | } 31 | 32 | Linear::Linear(const Variable &w) : 33 | m_bias(false), 34 | Module({w}) 35 | { 36 | } 37 | 38 | Linear::Linear(const Variable &w, const Variable &b) : 39 | m_bias(true), 40 | Module({w, b}) 41 | { 42 | if (b.array().dims(0) != w.array().dims(0)) { 43 | throw af::exception("nn:Linear: Dimension mismatch between weight and bias."); 44 | } 45 | if (b.array().dims(1) != 1) { 46 | throw af::exception("nn::Linear: Bias must be a vector."); 47 | } 48 | } 49 | 50 | Variable Linear::forward(const Variable &input) 51 | { 52 | auto res = matmul(m_parameters[0], input); 53 | if (m_bias) { 54 | res = res + tileAs(m_parameters[1], res); 55 | } 56 | return res; 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/nn/Modules/Loss.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | #include 10 | #include 11 | 12 | 13 | namespace af 14 | { 15 | namespace nn 16 | { 17 | using namespace autograd; 18 | 19 | autograd::Variable Loss::forward(const autograd::Variable &inputs) 20 | { 21 | throw af::exception("Loss module requires both inputs and targets"); 22 | } 23 | 24 | autograd::Variable Loss::operator()(const autograd::Variable &inputs, 25 | const autograd::Variable &targets) 26 | { 27 | return this->forward(inputs, targets); 28 | } 29 | 30 | autograd::Variable MeanSquaredError::forward(const autograd::Variable &inputs, 31 | const autograd::Variable &targets) 32 | { 33 | auto df = inputs - targets; 34 | auto res = mean(flat(df * df), {0}); 35 | return res; 36 | } 37 | 38 | autograd::Variable MeanAbsoluteError::forward(const autograd::Variable &inputs, 39 | const autograd::Variable &targets) 40 | { 41 | auto df = inputs - targets; 42 | auto res = mean(flat(abs(df)), {0}); 43 | } 44 | 45 | static autograd::Variable 46 | binaryCrossEntropy(const autograd::Variable &inputs, 47 | const autograd::Variable &targets) 48 | { 49 | targets * inputs + (1 - targets) * (1 - inputs); 50 | } 51 | 52 | autograd::Variable BinaryCrossEntropyLoss::forward(const autograd::Variable &inputs, 53 | const autograd::Variable &targets) 54 | { 55 | return mean(flat(binaryCrossEntropy(inputs, targets)), {0}); 56 | } 57 | 58 | autograd::Variable BinaryCrossEntropyLoss::forward(const autograd::Variable &inputs, 59 | const autograd::Variable &targets, 60 | const autograd::Variable &weights) 61 | { 62 | return mean(flat(weights * binaryCrossEntropy(inputs, targets)), {0}); 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/nn/Modules/Module.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | 10 | #include 11 | 12 | namespace af 13 | { 14 | namespace nn 15 | { 16 | using autograd::Variable; 17 | Module::Module() : 18 | m_parameters() 19 | { 20 | m_train = false; 21 | } 22 | 23 | Module::Module(const std::vector ¶meters) : 24 | m_parameters(parameters.begin(), parameters.end()) 25 | { 26 | } 27 | 28 | void Module::setParams(const std::vector ¶meters) 29 | { 30 | m_parameters.clear(); 31 | for(auto parameter : parameters) { 32 | m_parameters.push_back(parameter); 33 | } 34 | } 35 | 36 | void Module::train() 37 | { 38 | m_train = true; 39 | for (auto ¶meter : m_parameters) { 40 | parameter.setCalcGrad(true); 41 | } 42 | } 43 | 44 | void Module::eval() 45 | { 46 | m_train = false; 47 | for (auto ¶meter : m_parameters) { 48 | parameter.setCalcGrad(false); 49 | } 50 | } 51 | 52 | std::vector Module::parameters() 53 | { 54 | return m_parameters; 55 | } 56 | 57 | Variable Module::operator()(const Variable &input) 58 | { 59 | return this->forward(input); 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/optim/Optimizers.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************* 2 | * Copyright (c) 2017, ArrayFire 3 | * All rights reserved. 4 | * 5 | * This file is distributed under 3-clause BSD license. 6 | * The complete license agreement can be obtained at: 7 | * http://arrayfire.com/licenses/BSD-3-Clause 8 | ********************************************************/ 9 | 10 | #include 11 | 12 | #include 13 | 14 | using af::autograd::Variable; 15 | using std::vector; 16 | 17 | // References: 18 | // SGD and Momentum: http://cs231n.github.io/neural-networks-3/#sgd 19 | // Adam: https://arxiv.org/pdf/1412.6980.pdf 20 | // RMSProp: https://arxiv.org/pdf/1308.0850v5.pdf 21 | 22 | // Comparision between various update rules: 23 | // https://www.quora.com/What-are-differences-between-update-rules-like-AdaDelta-RMSProp-AdaGrad-and-AdaM 24 | 25 | namespace af 26 | { 27 | namespace optim 28 | { 29 | Optimizer::Optimizer(const vector ¶meters) 30 | : m_parameters(parameters.begin(), parameters.end()) 31 | { 32 | } 33 | 34 | void Optimizer::zeroGrad() 35 | { 36 | for (auto ¶meter : m_parameters) { 37 | parameter.zeroGrad(); 38 | } 39 | } 40 | 41 | SGDOptimizer::SGDOptimizer(const vector ¶meters, 42 | double learning_rate, double momentum, 43 | double weight_decay, bool use_nesterov) 44 | : Optimizer(parameters), 45 | m_use_nesterov(use_nesterov), 46 | m_lr(learning_rate), 47 | m_mu(momentum), 48 | m_wd(weight_decay), 49 | m_velocities() 50 | { 51 | if (momentum != 0) { 52 | m_velocities.reserve(parameters.size()); 53 | for (const auto ¶meter : m_parameters) { 54 | m_velocities.push_back(af::constant(0, parameter.dims(), parameter.type())); 55 | m_velocities.back().eval(); 56 | } 57 | } 58 | } 59 | 60 | void SGDOptimizer::update() 61 | { 62 | for (size_t i = 0; i < m_parameters.size(); i++) { 63 | 64 | const af::array &grad = m_parameters[i].grad().array(); 65 | af::array &data = m_parameters[i].array(); 66 | 67 | if (m_wd != 0) { 68 | // Weight decay term 69 | data = data - m_wd * data; 70 | } 71 | 72 | if (m_mu != 0) { 73 | af::array &velocity = m_velocities[i]; 74 | 75 | // Regular momentum 76 | velocity = m_mu * velocity - m_lr * grad; 77 | if (m_use_nesterov) { 78 | // Update for nesterov momentum 79 | data = data + velocity * m_mu - m_lr * grad; 80 | } else { 81 | data = data + velocity; 82 | } 83 | 84 | af::eval(velocity, data); 85 | } else { 86 | 87 | data = data - m_lr * grad; 88 | af::eval(data); 89 | } 90 | } 91 | } 92 | 93 | 94 | AdamOptimizer::AdamOptimizer(const vector ¶meters, 95 | double learning_rate, 96 | double beta1, double beta2, 97 | double epsilon, double weight_decay) 98 | : Optimizer(parameters), 99 | m_lr(learning_rate), 100 | m_beta1(beta1), 101 | m_beta2(beta2), 102 | m_eps(epsilon), 103 | m_wd(weight_decay), 104 | m_count(0), 105 | m_biased_first(), 106 | m_biased_second() 107 | { 108 | m_biased_first.reserve(parameters.size()); 109 | m_biased_second.reserve(parameters.size()); 110 | 111 | for (const auto ¶meter : m_parameters) { 112 | m_biased_first.push_back(af::constant(0, parameter.dims(), parameter.type())); 113 | m_biased_second.push_back(af::constant(0, parameter.dims(), parameter.type())); 114 | 115 | m_biased_first.back().eval(); 116 | m_biased_second.back().eval(); 117 | } 118 | } 119 | 120 | void AdamOptimizer::update() 121 | { 122 | for (size_t i = 0; i < m_parameters.size(); i++) { 123 | const af::array &grad = m_parameters[i].grad().array(); 124 | af::array &data = m_parameters[i].array(); 125 | 126 | if (m_wd != 0) { 127 | // Weight decay term 128 | data = data - m_wd * data; 129 | } 130 | 131 | af::array &biased_first = m_biased_first[i]; 132 | af::array &biased_second = m_biased_second[i]; 133 | 134 | biased_first = m_beta1 * biased_first + (1 - m_beta1) * grad; 135 | biased_second = m_beta2 * biased_second + (1 - m_beta2) * grad * grad; 136 | 137 | m_count++; 138 | 139 | double corrected_bias1 = 1 - std::pow(m_beta1, m_count); 140 | double corrected_bias2 = 1 - std::pow(m_beta2, m_count); 141 | double corrected_lr = m_lr * std::sqrt(corrected_bias2) / corrected_bias1; 142 | 143 | data = data - (corrected_lr * biased_first) / (af::sqrt(biased_second) + m_eps); 144 | 145 | af::eval(data, biased_first, biased_second); 146 | } 147 | } 148 | 149 | RMSPropOptimizer::RMSPropOptimizer(const vector ¶meters, 150 | double learning_rate, 151 | double rho, 152 | double epsilon, 153 | double weight_decay, 154 | bool use_first) 155 | : Optimizer(parameters), 156 | m_use_first(use_first), 157 | m_lr(learning_rate), 158 | m_rho(rho), 159 | m_eps(epsilon), 160 | m_wd(weight_decay), 161 | m_first(), 162 | m_second() 163 | { 164 | if (m_use_first) m_first.reserve(parameters.size()); 165 | m_second.reserve(parameters.size()); 166 | 167 | for (const auto ¶meter : m_parameters) { 168 | if (m_use_first) { 169 | m_first.push_back(af::constant(0, parameter.dims(), parameter.type())); 170 | m_first.back().eval(); 171 | } 172 | 173 | m_second.push_back(af::constant(0, parameter.dims(), parameter.type())); 174 | m_second.back().eval(); 175 | } 176 | } 177 | 178 | void RMSPropOptimizer::update() 179 | { 180 | for (size_t i = 0; i < m_parameters.size(); i++) { 181 | const af::array &grad = m_parameters[i].grad().array(); 182 | af::array &data = m_parameters[i].array(); 183 | 184 | if (m_wd != 0) { 185 | // Weight decay term 186 | data = data - m_wd * data; 187 | } 188 | 189 | af::array &second = m_second[i]; 190 | second = m_rho * second + (1 - m_rho) * grad * grad; 191 | 192 | // Create shallow copy of second so that we don't update "second" below 193 | af::array moments = second; 194 | if (m_use_first) { 195 | af::array &first = m_first[i]; 196 | first = m_rho * first + (1 - m_rho) * grad; 197 | moments = moments - first * first; 198 | } 199 | 200 | data = data - (m_lr * grad) / (af::sqrt(moments) + m_eps); 201 | 202 | if (m_use_first) { 203 | af::array &first = m_first[i]; 204 | af::eval(data, first, second); 205 | } else { 206 | af::eval(data, second); 207 | } 208 | } 209 | } 210 | } 211 | } 212 | --------------------------------------------------------------------------------