├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── examples
    ├── CMakeLists.txt
    ├── autograd.cpp
    └── xor.cpp
├── include
    └── af
    │   ├── autograd.h
    │   ├── autograd
    │       ├── Functions.hpp
    │       └── Variable.hpp
    │   ├── nn.h
    │   ├── nn
    │       ├── Init.hpp
    │       ├── Modules.hpp
    │       └── Modules
    │       │   ├── Activations.hpp
    │       │   ├── Container.hpp
    │       │   ├── Dropout.hpp
    │       │   ├── Linear.hpp
    │       │   ├── Loss.hpp
    │       │   └── Module.hpp
    │   ├── optim.h
    │   └── optim
    │       └── Optimizers.hpp
└── src
    ├── autograd
        ├── Functions.cpp
        └── Variable.cpp
    ├── nn
        ├── Init.cpp
        └── Modules
        │   ├── Activations.cpp
        │   ├── Container.cpp
        │   ├── Dropout.cpp
        │   ├── Linear.cpp
        │   ├── Loss.cpp
        │   └── Module.cpp
    └── optim
        └── Optimizers.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | *.obj
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Compiled Dynamic libraries
12 | *.so
13 | *.dylib
14 | *.dll
15 | 
16 | # Fortran module files
17 | *.mod
18 | 
19 | # Compiled Static libraries
20 | *.lai
21 | *.la
22 | *.a
23 | *.lib
24 | 
25 | # Executables
26 | *.exe
27 | *.out
28 | *.app
29 | 
30 | build
31 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.5.1)
 2 | 
 3 | project(ArrayFireML
 4 |   VERSION 0.1.0
 5 |   LANGUAGES C CXX)
 6 | 
 7 | find_package(ArrayFire REQUIRED)
 8 | 
 9 | add_library(afml SHARED "")
10 | 
11 | target_sources(afml
12 |   PRIVATE
13 |   src/autograd/Functions.cpp
14 |   src/autograd/Variable.cpp
15 |   src/nn/Modules/Activations.cpp
16 |   src/nn/Modules/Container.cpp
17 |   src/nn/Modules/Linear.cpp
18 |   src/nn/Modules/Loss.cpp
19 |   src/nn/Modules/Module.cpp
20 |   src/nn/Modules/Dropout.cpp
21 |   src/nn/Init.cpp
22 |   src/optim/Optimizers.cpp
23 |   )
24 | 
25 | target_include_directories(afml
26 |   PUBLIC
27 |   ${CMAKE_CURRENT_SOURCE_DIR}/include)
28 | 
29 | target_link_libraries(afml PUBLIC ArrayFire::af)
30 | 
31 | set_target_properties(afml
32 |   PROPERTIES
33 |   VERSION "${ArrayFireML_VERSION}"
34 |   SOVERSION "${ArrayFireML_VERSION_MAJOR}"
35 |   CXX_STANDARD 11)
36 | 
37 | add_subdirectory(examples)
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, ArrayFire
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of arrayfire_ml nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | #ArrayFire ML
2 | 
3 | 
4 | ArrayFire ML is a C and C+++ machine learning library built on top of the [ArrayFire library](https://github.com/arrayfire/arrayfire). This library leverages arrayfire's cross platform support to provide high performance machine learning algorithms for multi-core CPUs, NVIDIA and AMD GPUs, and other accelerators.
5 | 
6 | [Slack Channel](https://join.slack.com/t/arrayfire-org/shared_invite/enQtMjI4MjIzMDMzMTczLWM4ODIyZjA3YmY3NWEwMjk2N2Q0YTQyNGMwZmU4ZjkxNGU0MjYzYmUzYTg3ZTM0MDQxOTE2OTJjNGVkOGEwN2M)
7 | 
8 | This project is currently under active development. Please follow [this issue](https://github.com/arrayfire/arrayfire_ml/issues/3) for tracking our progress.
9 | 


--------------------------------------------------------------------------------
/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | function(build_example SRC)
 2 |   get_filename_component(src_name ${SRC} NAME_WE)
 3 |   set(target "${src_name}")
 4 |   add_executable(${target} ${SRC})
 5 |   target_link_libraries(${target}
 6 |     PRIVATE
 7 |     afml
 8 |     )
 9 |   target_compile_features(${target}
10 |     PRIVATE cxx_range_for)
11 | endfunction(build_example)
12 | 
13 | # build_example(Activations.cpp)
14 | # build_example(FFNet.cpp)
15 | # build_example(Node.cpp)
16 | build_example(xor.cpp)
17 | # build_example(Weights.cpp)
18 | build_example(autograd.cpp)
19 | 


--------------------------------------------------------------------------------
/examples/autograd.cpp:
--------------------------------------------------------------------------------
  1 | /*******************************************************
  2 |  * Copyright (c) 2017, ArrayFire
  3 |  * All rights reserved.
  4 |  *
  5 |  * This file is distributed under 3-clause BSD license.
  6 |  * The complete license agreement can be obtained at:
  7 |  * http://arrayfire.com/licenses/BSD-3-Clause
  8 |  ********************************************************/
  9 | 
 10 | #include <af/autograd.h>
 11 | #include <af/nn.h>
 12 | 
 13 | #include <iostream>
 14 | 
 15 | #define VERIFY(VAL) do {                                    \
 16 |         auto res = af::allTrue<bool>(af::abs(VAL) < 1E-5);  \
 17 |         printf("%s:%d %s\n", __FUNCTION__, __LINE__,        \
 18 |                res ? "PASS" : "FAIL");                      \
 19 |     } while(0)
 20 | 
 21 | using af::autograd::Variable;
 22 | void test_multiply()
 23 | {
 24 |     auto x = Variable(af::randu(5), true);
 25 |     auto y = x * x;
 26 |     auto dy = Variable(af::constant(1.0, 5), false);
 27 |     y.backward(dy);
 28 |     auto dx = x.grad();
 29 |     VERIFY(dx.array() - 2 * x.array());
 30 | }
 31 | 
 32 | void test_multipl_add()
 33 | {
 34 |     auto x = Variable(af::randu(5), true);
 35 |     auto y = Variable(af::randu(5), true);
 36 |     auto z = x * x + x * y + y * y;
 37 |     auto dz = Variable(af::constant(1.0, 5), false);
 38 |     z.backward(dz);
 39 |     auto dx = x.grad();
 40 |     auto dy = y.grad();
 41 |     VERIFY(dx.array() - 2 * x.array() - y.array());
 42 |     VERIFY(dy.array() - 2 * y.array() - x.array());
 43 | }
 44 | 
 45 | void test_no_calc_grad()
 46 | {
 47 |     auto x = Variable(af::randu(5), false);
 48 |     auto y = Variable(af::randu(5), true);
 49 |     auto z = x * x + x * y + y * y;
 50 |     auto dz = Variable(af::constant(1.0, 5), false);
 51 |     z.backward(dz);
 52 |     auto dy = y.grad();
 53 |     VERIFY(dy.array() - 2 * y.array() - x.array());
 54 |     try {
 55 |         auto dx = x.grad();
 56 |     } catch(af::exception &ex) {
 57 |         std::cout << ex.what() << std::endl;
 58 |         return;
 59 |     }
 60 |     printf("%s:%d No Gradient check Failed\n");
 61 | }
 62 | 
 63 | void test_multiply_sub()
 64 | {
 65 |     auto x = Variable(af::randu(5), true);
 66 |     auto y = Variable(af::randu(5), true);
 67 |     auto z = x * x - x * y;
 68 |     auto dz = Variable(af::constant(1.0, 5), false);
 69 |     z.backward(dz);
 70 |     auto dx = x.grad();
 71 |     auto dy = y.grad();
 72 |     VERIFY(dx.array() - (2 * x.array() - y.array()));
 73 |     VERIFY(dy.array() - (-x.array()));
 74 | }
 75 | 
 76 | void test_divide_add()
 77 | {
 78 |     auto x = Variable(af::randu(5), true);
 79 |     auto y = Variable(af::randu(5), true);
 80 |     auto z = x + x / y + y;
 81 |     auto dz = Variable(af::constant(1.0, 5), false);
 82 |     z.backward(dz);
 83 |     auto dx = x.grad();
 84 |     auto dy = y.grad();
 85 |     VERIFY(dx.array() - (1.0 + 1.0 / y.array()));
 86 |     VERIFY(dy.array() - (1.0 - x.array() / (y.array() * y.array())));
 87 | }
 88 | 
 89 | void test_multiply_add_scalar()
 90 | {
 91 |     auto x = Variable(af::randu(5), true);
 92 |     auto y = Variable(af::randu(5), true);
 93 |     auto z = 2 * x + x * y + y;
 94 |     auto dz = Variable(af::constant(1.0, 5), false);
 95 |     z.backward(dz);
 96 |     auto dx = x.grad();
 97 |     auto dy = y.grad();
 98 |     VERIFY(dx.array() - (2.0 + y.array()));
 99 |     VERIFY(dy.array() - (1.0 + x.array()));
100 | }
101 | 
102 | void test_exp()
103 | {
104 |     auto x = Variable(af::randu(5), true);
105 |     auto y = exp(x);
106 |     auto dy = Variable(af::constant(1.0, 5), false);
107 |     y.backward(dy);
108 |     auto dx = x.grad();
109 |     VERIFY(dx.array() - (af::exp(x.array())));
110 | }
111 | 
112 | void test_sigmoid()
113 | {
114 |     auto x = Variable(af::randu(5), true);
115 |     auto y = sigmoid(x);
116 |     auto dy = Variable(af::constant(1.0, 5), false);
117 |     y.backward(dy);
118 |     auto dx = x.grad();
119 |     VERIFY(dx.array() - (y.array() * (1 - y.array())));
120 |     VERIFY(dx.array() - (af::sigmoid(x.array()) * (1 - af::sigmoid(x.array()))));
121 | }
122 | 
123 | void test_tanh()
124 | {
125 |     auto x = Variable(af::randu(5), true);
126 |     auto y = tanh(x);
127 |     auto dy = Variable(af::constant(1.0, 5), false);
128 |     y.backward(dy);
129 |     auto dx = x.grad();
130 |     VERIFY(dx.array() - (1 - y.array() * y.array()));
131 |     VERIFY(dx.array() - (1 + af::tanh(x.array())) * (1 - af::tanh(x.array())));
132 | }
133 | 
134 | void test_tile()
135 | {
136 |     auto x = Variable(af::randu(5), true);
137 |     auto y = Variable(af::randu(5, 2), true);
138 |     auto z = y * tileAs(x, y);
139 |     auto dz = Variable(af::constant(1.0, 5, 2), false);
140 |     z.backward(dz);
141 |     auto dy = y.grad();
142 |     auto dx = x.grad();
143 |     VERIFY(dy.array() - af::tile(x.array(), 1, 2));
144 |     VERIFY(dx.array() - af::sum(y.array(), 1));
145 | }
146 | 
147 | void test_sum()
148 | {
149 |     auto x = Variable(af::randu(5), true);
150 |     auto y = Variable(af::randu(5, 2), true);
151 |     auto z = x * sumAs(y, x);
152 |     auto dz = Variable(af::constant(1.0, 5), false);
153 |     z.backward(dz);
154 |     auto dy = y.grad();
155 |     auto dx = x.grad();
156 |     VERIFY(dy.array() - af::tile(x.array(), 1, 2));
157 |     VERIFY(dx.array() - af::sum(y.array(), 1));
158 | }
159 | 
160 | void test_mean()
161 | {
162 |     auto x = Variable(af::randu(5), true);
163 |     auto y = Variable(af::randu(5, 3, 2), true);
164 |     auto z = x * mean(y, {1,2});
165 |     auto dz = Variable(af::constant(1.0, 5), false);
166 |     z.backward(dz);
167 |     auto dy = y.grad();
168 |     auto dx = x.grad();
169 |     VERIFY(dy.array() - 6 * af::tile(x.array(), 1, 3, 2));
170 |     VERIFY(dx.array() - af::mean(af::mean(y.array(), 1), 2));
171 | }
172 | 
173 | int main()
174 | {
175 |     af::info();
176 |     test_multiply();
177 |     test_multipl_add();
178 |     test_no_calc_grad();
179 |     test_multiply_sub();
180 |     test_divide_add();
181 |     test_multiply_add_scalar();
182 |     test_exp();
183 |     test_sigmoid();
184 |     test_tanh();
185 |     test_tile();
186 |     test_sum();
187 |     test_mean();
188 |     return 0;
189 | }
190 | 


--------------------------------------------------------------------------------
/examples/xor.cpp:
--------------------------------------------------------------------------------
  1 | /*******************************************************
  2 |  * Copyright (c) 2017, ArrayFire
  3 |  * All rights reserved.
  4 |  *
  5 |  * This file is distributed under 3-clause BSD license.
  6 |  * The complete license agreement can be obtained at:
  7 |  * http://arrayfire.com/licenses/BSD-3-Clause
  8 |  ********************************************************/
  9 | 
 10 | #include <af/autograd.h>
 11 | #include <af/nn.h>
 12 | #include <af/optim.h>
 13 | 
 14 | #include <string>
 15 | #include <memory>
 16 | 
 17 | using namespace af;
 18 | using namespace af::nn;
 19 | using namespace af::autograd;
 20 | 
 21 | int main(int argc, const char **args)
 22 | {
 23 |     int optim_mode = 0;
 24 |     std::string optimizer_arg = std::string(args[1]);
 25 |     if (optimizer_arg == "--adam") {
 26 |         optim_mode = 1;
 27 |     } else if (optimizer_arg == "--rmsprop") {
 28 |         optim_mode = 2;
 29 |     }
 30 | 
 31 |     const int inputSize  = 2;
 32 |     const int outputSize = 1;
 33 |     const double lr = 0.01;
 34 |     const double mu = 0.1;
 35 |     const int numSamples = 4;
 36 | 
 37 |     float hInput[] = {1, 1,
 38 |                       0, 0,
 39 |                       1, 0,
 40 |                       0, 1};
 41 | 
 42 |     float hOutput[] = {1,
 43 |                        0,
 44 |                        1,
 45 |                        1};
 46 | 
 47 |     auto in = af::array(inputSize, numSamples, hInput);
 48 |     auto out = af::array(outputSize, numSamples, hOutput);
 49 | 
 50 |     nn::Sequential model;
 51 | 
 52 |     model.add(nn::Linear(inputSize, outputSize));
 53 |     model.add(nn::Sigmoid());
 54 | 
 55 |     auto loss = nn::MeanSquaredError();
 56 | 
 57 |     std::unique_ptr<optim::Optimizer> optim;
 58 | 
 59 |     if (optimizer_arg == "--rmsprop") {
 60 |         optim = std::unique_ptr<optim::Optimizer>(new optim::RMSPropOptimizer(model.parameters(), lr));
 61 |     } else if (optimizer_arg == "--adam") {
 62 |         optim = std::unique_ptr<optim::Optimizer>(new optim::AdamOptimizer(model.parameters(), lr));
 63 |     } else {
 64 |         optim = std::unique_ptr<optim::Optimizer>(new optim::SGDOptimizer(model.parameters(), lr, mu));
 65 |     }
 66 | 
 67 |     Variable result, l;
 68 |     for (int i = 0; i < 1000; i++) {
 69 |         for (int j = 0; j < numSamples; j++) {
 70 | 
 71 |             model.train();
 72 |             optim->zeroGrad();
 73 | 
 74 |             af::array in_j = in(af::span, j);
 75 |             af::array out_j = out(af::span, j);
 76 | 
 77 |             // Forward propagation
 78 |             result = model(nn::input(in_j));
 79 | 
 80 |             // Calculate loss
 81 |             l = loss(result, nn::noGrad(out_j));
 82 | 
 83 |             // Backward propagation
 84 |             l.backward();
 85 | 
 86 |             // Update parameters
 87 |             optim->update();
 88 |         }
 89 | 
 90 |         if ((i + 1) % 100 == 0) {
 91 |             model.eval();
 92 | 
 93 |             // Forward propagation
 94 |             result = model(nn::input(in));
 95 | 
 96 |             // Calculate loss
 97 |             // TODO: Use loss function
 98 |             af::array diff = out - result.array();
 99 |             printf("Average Error at iteration(%d) : %lf\n", i + 1, af::mean<float>(af::abs(diff)));
100 |             printf("Predicted\n");
101 |             af_print(result.array());
102 |             printf("Expected\n");
103 |             af_print(out);
104 |             printf("\n\n");
105 |         }
106 |     }
107 |     return 0;
108 | }
109 | 


--------------------------------------------------------------------------------
/include/af/autograd.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #include <af/autograd/Variable.hpp>
10 | #include <af/autograd/Functions.hpp>
11 | 


--------------------------------------------------------------------------------
/include/af/autograd/Functions.hpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #pragma once
10 | 
11 | #include <arrayfire.h>
12 | #include <vector>
13 | 
14 | namespace af {
15 |     namespace autograd {
16 | 
17 |         class Variable;
18 | 
19 |         Variable operator +(const Variable &lhs, const Variable &rhs);
20 |         Variable operator *(const Variable &lhs, const Variable &rhs);
21 |         Variable operator -(const Variable &lhs, const Variable &rhs);
22 |         Variable operator /(const Variable &lhs, const Variable &rhs);
23 |         Variable operator >(const Variable &lhs, const Variable &rhs);
24 |         Variable operator <(const Variable &lhs, const Variable &rhs);
25 |         Variable operator >=(const Variable &lhs, const Variable &rhs);
26 |         Variable operator <=(const Variable &lhs, const Variable &rhs);
27 | 
28 |         Variable operator +(const double &lhs, const Variable &rhs);
29 |         Variable operator *(const double &lhs, const Variable &rhs);
30 |         Variable operator -(const double &lhs, const Variable &rhs);
31 |         Variable operator /(const double &lhs, const Variable &rhs);
32 |         Variable operator >(const double &lhs, const Variable &rhs);
33 |         Variable operator <(const double &lhs, const Variable &rhs);
34 |         Variable operator >=(const double &lhs, const Variable &rhs);
35 |         Variable operator <=(const double &lhs, const Variable &rhs);
36 | 
37 |         Variable operator +(const Variable &lhs, const double &rhs);
38 |         Variable operator *(const Variable &lhs, const double &rhs);
39 |         Variable operator -(const Variable &lhs, const double &rhs);
40 |         Variable operator /(const Variable &lhs, const double &rhs);
41 |         Variable operator >(const Variable &lhs, const double &rhs);
42 |         Variable operator <(const Variable &lhs, const double &rhs);
43 |         Variable operator >=(const Variable &lhs, const double &rhs);
44 |         Variable operator <=(const Variable &lhs, const double &rhs);
45 | 
46 |         Variable operator !(const Variable &input);
47 | 
48 |         Variable negate(const Variable &input);
49 |         Variable reciprocal(const Variable &input);
50 | 
51 |         Variable exp(const Variable &input);
52 |         Variable log(const Variable &input);
53 |         Variable sin(const Variable &input);
54 |         Variable cos(const Variable &input);
55 |         Variable tanh(const Variable &input);
56 |         Variable sigmoid(const Variable &input);
57 | 
58 |         Variable max(const Variable &lhs, const Variable &rhs);
59 |         Variable max(const Variable &lhs, const double &rhs);
60 |         Variable max(const double &lhs, const Variable &rhs);
61 | 
62 |         Variable min(const Variable &lhs, const Variable &rhs);
63 |         Variable min(const Variable &lhs, const double &rhs);
64 |         Variable min(const double &lhs, const Variable &rhs);
65 | 
66 |         Variable transpose(const Variable &input);
67 |         Variable tileAs(const Variable &input, const Variable &reference);
68 |         Variable sumAs(const Variable &input, const Variable &reference);
69 | 
70 |         Variable tile(const Variable &input, const std::vector<int> &repeats);
71 |         Variable sum(const Variable &input, const std::vector<int> &axes);
72 |         Variable mean(const Variable &input, const std::vector<int> &axes);
73 | 
74 |         Variable matmul(const Variable &lhs, const Variable &rhs);
75 |         Variable matmulTN(const Variable &lhs, const Variable &rhs);
76 |         Variable matmulNT(const Variable &lhs, const Variable &rhs);
77 | 
78 |         Variable abs(const Variable &input);
79 | 
80 |         Variable flat(const Variable &input);
81 |         Variable moddims(const Variable &input, const dim4 &dims);
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/include/af/autograd/Variable.hpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | 
10 | #pragma once
11 | 
12 | #include <arrayfire.h>
13 | 
14 | #include <cstddef>
15 | #include <functional>
16 | #include <memory>
17 | #include <vector>
18 | #include <unordered_map>
19 | 
20 | namespace af {
21 |     namespace autograd {
22 |         class Variable
23 |         {
24 |         public:
25 |             typedef std::function<void(std::vector<Variable> &, const Variable &)> GradFunc_t;
26 |             typedef std::unordered_map<std::ptrdiff_t, bool> Cache_t;
27 |             typedef std::vector<Variable> DAG_t;
28 | 
29 |         private:
30 |             struct Shared {
31 |                 Shared();
32 |                 Shared(const af::array &data, bool calc_grad);
33 |                 Shared(const af::array &data,
34 |                        const std::vector<Variable> &inputs,
35 |                        GradFunc_t grad_func,
36 |                        bool calc_grad);
37 | 
38 |                 bool m_calc_grad;
39 |                 af::array m_data;
40 |                 std::vector<Variable> m_inputs;
41 |                 std::vector<Variable> m_grads;
42 |                 GradFunc_t m_grad_func;
43 |             };
44 | 
45 |         public:
46 | 
47 |             Variable();
48 |             Variable(const af::array &data, bool calc_grad);
49 |             Variable(const af::array &data,
50 |                      const std::vector<Variable> &inputs,
51 |                      GradFunc_t grad_func);
52 | 
53 |             af::array& array() const;
54 | 
55 |             Variable& grad() const;
56 | 
57 |             std::ptrdiff_t id() const;
58 | 
59 |             bool isCalcGrad() const;
60 | 
61 |             bool isGradAvailable() const;
62 | 
63 |             af::dim4 dims() const;
64 | 
65 |             af::dtype type() const;
66 | 
67 |             void zeroGrad();
68 | 
69 |             void setCalcGrad(bool calc_grad);
70 | 
71 |             void addGrad(const Variable &child_grad);
72 | 
73 |             void calcGradInputs(bool retain_grad_graph = false);
74 | 
75 |             void backward(const Variable &grad, bool retain_grad_graph = false);
76 | 
77 |             void backward(bool retain_grad_graph = false);
78 | 
79 |         private:
80 |             void evalGrad(bool retain_grad_graph = false);
81 | 
82 |             std::vector<Variable>& getInputs() const;
83 | 
84 |             static void buildSubGraph(Cache_t &cache, DAG_t &dag, const Variable &var);
85 | 
86 |             static DAG_t build(const Variable &var);
87 | 
88 |             std::shared_ptr<Shared> m_shared;
89 |         };
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/include/af/nn.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | 
10 | #pragma once
11 | 
12 | #include <af/nn/Modules.hpp>
13 | #include <af/nn/Init.hpp>
14 | 


--------------------------------------------------------------------------------
/include/af/nn/Init.hpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #pragma once
10 | 
11 | #include <af/autograd/Variable.hpp>
12 | 
13 | namespace af {
14 |     namespace nn {
15 | 
16 |         autograd::Variable input(const af::array &arr);
17 | 
18 |         autograd::Variable noGrad(const af::array &arr);
19 | 
20 |         autograd::Variable parameter(const af::array &arr);
21 | 
22 |         autograd::Variable uniform(int input_size, int output_size,
23 |                                    double min = 0, double max = 1,
24 |                                    af::dtype type = f32, bool calc_grad=true);
25 | 
26 |         autograd::Variable uniform(af::dim4 dims,
27 |                                    double min = 0, double max = 1,
28 |                                    af::dtype type = f32, bool calc_grad=true);
29 | 
30 |         autograd::Variable normal(int input_size, int output_size,
31 |                                   double stdv = 1, double mean = 0,
32 |                                   af::dtype type = f32, bool calc_grad=true);
33 | 
34 |         autograd::Variable normal(af::dim4 dims,
35 |                                   double stdv = 1, double mean = 0,
36 |                                   af::dtype type = f32, bool calc_grad=true);
37 | 
38 |         autograd::Variable lecunUniform(int input_size, int output_size,
39 |                                         af::dtype type = f32, bool calc_grad=true);
40 | 
41 |         autograd::Variable lecunUniform(af::dim4 dims,
42 |                                         af::dtype type = f32, bool calc_grad=true);
43 | 
44 |         autograd::Variable lecunNormal(int input_size, int output_size,
45 |                                        af::dtype type = f32, bool calc_grad=true);
46 | 
47 |         autograd::Variable lecunNormal(af::dim4 dims,
48 |                                        af::dtype type = f32, bool calc_grad=true);
49 | 
50 |         autograd::Variable glorotUniform(int input_size, int output_size,
51 |                                          af::dtype type = f32, bool calc_grad=true);
52 | 
53 |         autograd::Variable glorotUniform(af::dim4 dims,
54 |                                          af::dtype type = f32, bool calc_grad=true);
55 | 
56 |         autograd::Variable glorotNormal(int input_size, int output_size,
57 |                                         af::dtype type = f32, bool calc_grad=true);
58 | 
59 |         autograd::Variable glorotNormal(af::dim4 dims,
60 |                                         af::dtype type = f32, bool calc_grad=true);
61 | 
62 | 
63 |         autograd::Variable constant(double val, int input_size, int output_size,
64 |                                     af::dtype type = f32, bool calc_grad=true);
65 | 
66 |         autograd::Variable constant(double val, af::dim4 dims,
67 |                                     af::dtype type = f32, bool calc_grad=true);
68 | 
69 |         autograd::Variable identity(int input_size, int output_size,
70 |                                     af::dtype type = f32, bool calc_grad=true);
71 | 
72 |         autograd::Variable identity(af::dim4 dims,
73 |                                     af::dtype type = f32, bool calc_grad=true);
74 | 
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/include/af/nn/Modules.hpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #pragma once
10 | 
11 | #include <af/nn/Modules/Module.hpp>
12 | #include <af/nn/Modules/Linear.hpp>
13 | #include <af/nn/Modules/Container.hpp>
14 | #include <af/nn/Modules/Activations.hpp>
15 | #include <af/nn/Modules/Loss.hpp>
16 | #include <af/nn/Modules/Dropout.hpp>
17 | 


--------------------------------------------------------------------------------
/include/af/nn/Modules/Activations.hpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #pragma once
10 | 
11 | #include <af/autograd/Variable.hpp>
12 | #include <af/nn/Modules/Module.hpp>
13 | 
14 | namespace af
15 | {
16 |     namespace nn
17 |     {
18 |         class Sigmoid : public Module
19 |         {
20 |         public:
21 |             Sigmoid();
22 | 
23 |             autograd::Variable forward(const autograd::Variable &input);
24 |         };
25 | 
26 |         class Tanh : public Module
27 |         {
28 |         public:
29 |             Tanh();
30 | 
31 |             autograd::Variable forward(const autograd::Variable &input);
32 |         };
33 | 
34 |         class ReLU : public Module
35 |         {
36 |         public:
37 |             ReLU();
38 | 
39 |             autograd::Variable forward(const autograd::Variable &input);
40 |         };
41 | 
42 |         class LeakyReLU : public Module
43 |         {
44 |         private:
45 |             double m_slope;
46 |         public:
47 |             LeakyReLU(double slope = 0.0);
48 | 
49 |             autograd::Variable forward(const autograd::Variable &input);
50 |         };
51 | 
52 |         class PReLU : public Module
53 |         {
54 |         public:
55 |             PReLU(int size, double value = 1.0);
56 |             PReLU(const autograd::Variable &w);
57 | 
58 |             autograd::Variable forward(const autograd::Variable &input);
59 |         };
60 | 
61 |         class ELU : public Module
62 |         {
63 |         private:
64 |             double m_alpha;
65 |         public:
66 |             ELU(double alpha = 1.0);
67 | 
68 |             autograd::Variable forward(const autograd::Variable &input);
69 |         };
70 | 
71 |         class ThresholdReLU : public Module
72 |         {
73 |         private:
74 |             double m_threshold;
75 |         public:
76 |             ThresholdReLU(double threshold = 1.0);
77 | 
78 |             autograd::Variable forward(const autograd::Variable &input);
79 |         };
80 | 
81 | 
82 | 
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/include/af/nn/Modules/Container.hpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #pragma once
10 | 
11 | #include <af/autograd/Variable.hpp>
12 | #include <af/nn/Modules/Module.hpp>
13 | 
14 | #include <memory>
15 | 
16 | namespace af
17 | {
18 |     namespace nn
19 |     {
20 | 
21 |         typedef std::shared_ptr<Module> ModulePtr;
22 | 
23 |         class Container : public Module
24 |         {
25 |         protected:
26 | 
27 |             std::vector<ModulePtr> m_modules;
28 | 
29 |             Container();
30 | 
31 |         public:
32 | 
33 |             template<typename T>
34 |             void add(T module)
35 |             {
36 |                 m_modules.emplace_back(new T(module));
37 |                 for (auto param : module.parameters()) {
38 |                     m_parameters.push_back(param);
39 |                 }
40 |             }
41 | 
42 |             ModulePtr get(int id);
43 | 
44 |             std::vector<ModulePtr> modules();
45 |         };
46 | 
47 |         class Sequential : public Container
48 |         {
49 |         public:
50 | 
51 |             Sequential();
52 | 
53 |             autograd::Variable forward(const autograd::Variable &input);
54 |         };
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/include/af/nn/Modules/Dropout.hpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #pragma once
10 | 
11 | #include <af/nn/Modules/Module.hpp>
12 | 
13 | namespace af
14 | {
15 |     namespace nn
16 |     {
17 |         class Dropout : public Module
18 |         {
19 |         private:
20 |             double m_ratio;
21 |         public:
22 |             Dropout(double drop_ratio = 0.5);
23 | 
24 |             autograd::Variable forward(const autograd::Variable &input);
25 |         };
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/include/af/nn/Modules/Linear.hpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #pragma once
10 | 
11 | #include <af/nn/Modules/Module.hpp>
12 | 
13 | namespace af
14 | {
15 |     namespace nn
16 |     {
17 |         class Linear : public Module
18 |         {
19 |         private:
20 |             bool m_bias;
21 |         public:
22 |             Linear(int input_size, int output_size, bool bias = true, float spread = 0.05);
23 | 
24 |             Linear(const autograd::Variable &w);
25 | 
26 |             Linear(const autograd::Variable &w, const autograd::Variable &b);
27 | 
28 |             autograd::Variable forward(const autograd::Variable &input);
29 |         };
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/include/af/nn/Modules/Loss.hpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #pragma once
10 | 
11 | #include <af/nn/Modules/Module.hpp>
12 | 
13 | namespace af
14 | {
15 |     namespace nn
16 |     {
17 |         class Loss : public Module
18 |         {
19 |         public:
20 |             Loss() {}
21 | 
22 |             virtual autograd::Variable forward(const autograd::Variable &inputs,
23 |                                                const autograd::Variable &targets) = 0;
24 | 
25 |             autograd::Variable forward(const autograd::Variable &inputs);
26 | 
27 |             autograd::Variable operator()(const autograd::Variable &inputs,
28 |                                           const autograd::Variable &targets);
29 |         };
30 | 
31 |         class MeanSquaredError : public Loss
32 |         {
33 |         public:
34 |             MeanSquaredError() {}
35 | 
36 |             autograd::Variable forward(const autograd::Variable &inputs,
37 |                                        const autograd::Variable &targets);
38 |         };
39 | 
40 |         class MeanAbsoluteError : public Loss
41 |         {
42 |         public:
43 |             MeanAbsoluteError() {}
44 | 
45 |             autograd::Variable forward(const autograd::Variable &inputs,
46 |                                        const autograd::Variable &targets);
47 |         };
48 | 
49 |         class BinaryCrossEntropyLoss : public Loss
50 |         {
51 |         public:
52 |             BinaryCrossEntropyLoss() {}
53 | 
54 |             autograd::Variable forward(const autograd::Variable &inputs,
55 |                                        const autograd::Variable &targets);
56 | 
57 |             autograd::Variable forward(const autograd::Variable &inputs,
58 |                                        const autograd::Variable &targets,
59 |                                        const autograd::Variable &weights);
60 |         };
61 | 
62 |         typedef MeanSquaredError MSE;
63 |         typedef MeanAbsoluteError MAE;
64 |         typedef MeanAbsoluteError L1Loss;
65 |         typedef BinaryCrossEntropyLoss BCELoss;
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/include/af/nn/Modules/Module.hpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #pragma once
10 | 
11 | #include <af/autograd/Variable.hpp>
12 | 
13 | #include <string>
14 | #include <vector>
15 | 
16 | namespace af
17 | {
18 |     namespace nn
19 |     {
20 | 
21 |         class Module
22 |         {
23 |         protected:
24 |             std::vector<autograd::Variable> m_parameters;
25 | 
26 |             bool m_train;
27 | 
28 |             Module();
29 | 
30 |             Module(const std::vector<autograd::Variable> &parameters);
31 | 
32 |             void setParams(const std::vector<autograd::Variable> &parameters);
33 | 
34 |         public:
35 | 
36 |             std::vector<autograd::Variable> parameters();
37 | 
38 |             void train();
39 | 
40 |             void eval();
41 | 
42 |             virtual autograd::Variable forward(const autograd::Variable &input) = 0;
43 | 
44 |             autograd::Variable operator()(const autograd::Variable &input);
45 |         };
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/include/af/optim.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #include <af/optim/Optimizers.hpp>
10 | 


--------------------------------------------------------------------------------
/include/af/optim/Optimizers.hpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | 
10 | #pragma once
11 | 
12 | #include <af/autograd/Variable.hpp>
13 | #include <arrayfire.h>
14 | 
15 | #include <vector>
16 | 
17 | namespace af
18 | {
19 |     namespace optim
20 |     {
21 | 
22 |         class Optimizer
23 |         {
24 |         protected:
25 |             std::vector<autograd::Variable> m_parameters;
26 |         public:
27 | 
28 |             Optimizer(const std::vector<autograd::Variable> &parameters);
29 | 
30 |             virtual void update() = 0;
31 | 
32 |             void zeroGrad();
33 |         };
34 | 
35 |         class SGDOptimizer : public Optimizer
36 |         {
37 |             bool m_use_nesterov;
38 |             double m_lr;
39 |             double m_mu;
40 |             double m_wd;
41 |             std::vector<af::array> m_velocities;
42 |         public:
43 |             SGDOptimizer(const std::vector<autograd::Variable> &parameters,
44 |                          double learning_rate, double momentum = 0,
45 |                          double weight_decay = 0,
46 |                          bool use_nesterov = false);
47 |             void update();
48 |         };
49 | 
50 |         class AdamOptimizer : public Optimizer
51 |         {
52 |             double m_lr;
53 |             double m_beta1;
54 |             double m_beta2;
55 |             double m_eps;
56 |             double m_wd;
57 |             int m_count;
58 |             std::vector<af::array> m_biased_first;
59 |             std::vector<af::array> m_biased_second;
60 |         public:
61 |             AdamOptimizer(const std::vector<autograd::Variable> &parameters,
62 |                           double learning_rate,
63 |                           double beta1 = 0.9,
64 |                           double beta2 = 0.999,
65 |                           double epsilon = 1E-8,
66 |                           double weight_decay = 0);
67 |             void update();
68 |         };
69 | 
70 |         class RMSPropOptimizer : public Optimizer
71 |         {
72 |             bool m_use_first;
73 |             double m_lr;
74 |             double m_rho;
75 |             double m_eps;
76 |             double m_wd;
77 |             std::vector<af::array> m_first;
78 |             std::vector<af::array> m_second;
79 |         public:
80 |             RMSPropOptimizer(const std::vector<autograd::Variable> &parameters,
81 |                              double learning_rate,
82 |                              double rho = 0.99,
83 |                              double epsilon = 1E-8,
84 |                              double weight_decay = 0,
85 |                              bool use_first = false);
86 |             void update();
87 |         };
88 | 
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/src/autograd/Functions.cpp:
--------------------------------------------------------------------------------
  1 | /*******************************************************
  2 |  * Copyright (c) 2017, ArrayFire
  3 |  * All rights reserved.
  4 |  *
  5 |  * This file is distributed under 3-clause BSD license.
  6 |  * The complete license agreement can be obtained at:
  7 |  * http://arrayfire.com/licenses/BSD-3-Clause
  8 |  ********************************************************/
  9 | 
 10 | #include <af/autograd/Variable.hpp>
 11 | #include <af/autograd/Functions.hpp>
 12 | 
 13 | namespace af {
 14 |     namespace autograd {
 15 | 
 16 |         Variable operator +(const Variable &lhs, const Variable &rhs)
 17 |         {
 18 |             auto result = lhs.array() + rhs.array();
 19 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
 20 |                 inputs[0].addGrad(grad_output);
 21 |                 inputs[1].addGrad(grad_output);
 22 |             };
 23 |             return Variable(result, {lhs, rhs}, grad_func);
 24 |         }
 25 | 
 26 |         Variable operator -(const Variable &lhs, const Variable &rhs)
 27 |         {
 28 |             auto result = lhs.array() - rhs.array();
 29 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
 30 |                 inputs[0].addGrad(grad_output);
 31 |                 inputs[1].addGrad(negate(grad_output));
 32 |             };
 33 |             return Variable(result, {lhs, rhs}, grad_func);
 34 |         }
 35 | 
 36 |         Variable operator *(const Variable &lhs, const Variable &rhs)
 37 |         {
 38 |             auto result = lhs.array() * rhs.array();
 39 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
 40 |                 inputs[0].addGrad(grad_output * inputs[1]);
 41 |                 inputs[1].addGrad(grad_output * inputs[0]);
 42 |             };
 43 |             return Variable(result, {lhs, rhs}, grad_func);
 44 |         }
 45 | 
 46 |         Variable operator /(const Variable &lhs, const Variable &rhs)
 47 |         {
 48 |             auto result = lhs.array() / rhs.array();
 49 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
 50 |                 auto inputs_1_rec = reciprocal(inputs[1]);
 51 |                 auto grad_input_0 = grad_output * inputs_1_rec;
 52 |                 inputs[0].addGrad(grad_input_0);
 53 |                 inputs[1].addGrad(grad_input_0 * negate(inputs[0]) * inputs_1_rec);
 54 |             };
 55 |             return Variable(result, {lhs, rhs}, grad_func);
 56 |         }
 57 | 
 58 |         Variable operator >(const Variable &lhs, const Variable &rhs)
 59 |         {
 60 |             auto result = lhs.array() > rhs.array();
 61 |             return Variable(result, false);
 62 |         }
 63 | 
 64 |         Variable operator <(const Variable &lhs, const Variable &rhs)
 65 |         {
 66 |             auto result = lhs.array() < rhs.array();
 67 |             return Variable(result, false);
 68 |         }
 69 | 
 70 |         Variable operator >=(const Variable &lhs, const Variable &rhs)
 71 |         {
 72 |             auto result = lhs.array() >= rhs.array();
 73 |             return Variable(result, false);
 74 |         }
 75 | 
 76 |         Variable operator <=(const Variable &lhs, const Variable &rhs)
 77 |         {
 78 |             auto result = lhs.array() <= rhs.array();
 79 |             return Variable(result, false);
 80 |         }
 81 | 
 82 | 
 83 | 
 84 | #define INSTANTIATE_OPERATOR(OP)                                        \
 85 |         Variable operator OP(const double &lhs_val, const Variable &rhs) \
 86 |         {                                                               \
 87 |             auto lhs = Variable(                                        \
 88 |                 af::constant(lhs_val,                                   \
 89 |                              rhs.array().dims(),                        \
 90 |                              rhs.array().type()),                       \
 91 |                 false);                                                 \
 92 |             return lhs OP rhs;                                          \
 93 |         }                                                               \
 94 |         Variable operator OP(const Variable &lhs, const double &rhs_val) \
 95 |         {                                                               \
 96 |             auto rhs = Variable(                                        \
 97 |                 af::constant(rhs_val,                                   \
 98 |                              lhs.array().dims(), lhs.array().type()),   \
 99 |                 false);                                                 \
100 |             return lhs OP rhs;                                          \
101 |         }                                                               \
102 | 
103 |         INSTANTIATE_OPERATOR(+)
104 |         INSTANTIATE_OPERATOR(-)
105 |         INSTANTIATE_OPERATOR(*)
106 |         INSTANTIATE_OPERATOR(/)
107 |         INSTANTIATE_OPERATOR(>)
108 |         INSTANTIATE_OPERATOR(<)
109 |         INSTANTIATE_OPERATOR(>=)
110 |         INSTANTIATE_OPERATOR(<=)
111 | 
112 | #undef INSTANTIATE_OPERATOR
113 | 
114 |         Variable operator !(const Variable &input)
115 |         {
116 |             auto result = !input.array();
117 |             return Variable(result, false);
118 |         }
119 | 
120 |         Variable max(const Variable &lhs, const Variable &rhs)
121 |         {
122 |             auto mask = lhs > rhs;
123 |             auto result = max(lhs.array(), rhs.array());
124 | 
125 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
126 |                 inputs[0].addGrad( inputs[2] * grad_output);
127 |                 inputs[1].addGrad(!inputs[2] * grad_output);
128 |             };
129 |             return Variable(result, {lhs, rhs, mask}, grad_func);
130 |         }
131 | 
132 |         Variable min(const Variable &lhs, const Variable &rhs)
133 |         {
134 |             auto mask = lhs < rhs;
135 |             auto result = min(lhs.array(), rhs.array());
136 | 
137 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
138 |               inputs[0].addGrad( inputs[2] * grad_output);
139 |               inputs[1].addGrad(!inputs[2] * grad_output);
140 |             };
141 |             return Variable(result, {lhs, rhs, mask}, grad_func);
142 |         }
143 | 
144 | #define INSTANTIATE_FUNCTION(FN)                                        \
145 |         Variable FN(const double &lhs_val, const Variable &rhs)         \
146 |         {                                                               \
147 |             auto lhs = Variable(                                        \
148 |                 af::constant(lhs_val,                                   \
149 |                              rhs.array().dims(),                        \
150 |                              rhs.array().type()),                       \
151 |                 false);                                                 \
152 |             return FN(lhs,rhs);                                         \
153 |         }                                                               \
154 |         Variable FN(const Variable &lhs, const double &rhs_val)         \
155 |         {                                                               \
156 |             auto rhs = Variable(                                        \
157 |                 af::constant(rhs_val,                                   \
158 |                              lhs.array().dims(), lhs.array().type()),   \
159 |                 false);                                                 \
160 |             return FN(lhs, rhs);                                        \
161 |         }
162 | 
163 | 
164 |       INSTANTIATE_FUNCTION(max);
165 |       INSTANTIATE_FUNCTION(min);
166 | 
167 | #undef INSTANTIATE_FUNCTION
168 | 
169 |       Variable negate(const Variable &input)
170 |         {
171 |             auto result = 0.0 - input.array();
172 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
173 |                 inputs[0].addGrad(negate(grad_output));
174 |             };
175 |             return Variable(result, {input}, grad_func);
176 |         }
177 | 
178 |         Variable reciprocal(const Variable &input)
179 |         {
180 |             auto result = 1.0 / input.array();
181 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
182 |                 auto res = reciprocal(inputs[0]);
183 |                 inputs[0].addGrad(negate(grad_output) * res * res);
184 |             };
185 |             return Variable(result, {input}, grad_func);
186 |         }
187 | 
188 |         Variable exp(const Variable &input)
189 |         {
190 |             auto result = exp(input.array());
191 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
192 |                 inputs[0].addGrad(grad_output * exp(inputs[0]));
193 |             };
194 |             return Variable(result, {input}, grad_func);
195 |         }
196 | 
197 |         Variable log(const Variable &input)
198 |         {
199 |             auto result = log(input.array());
200 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
201 |                 inputs[0].addGrad(grad_output / inputs[0]);
202 |             };
203 |             return Variable(result, {input}, grad_func);
204 |         }
205 | 
206 |         Variable sin(const Variable &input)
207 |         {
208 |             auto result = sin(input.array());
209 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
210 |                 inputs[0].addGrad(grad_output * cos(inputs[0]));
211 |             };
212 |             return Variable(result, {input}, grad_func);
213 |         }
214 | 
215 |         Variable cos(const Variable &input)
216 |         {
217 |             auto result = cos(input.array());
218 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
219 |                 inputs[0].addGrad(grad_output * negate(sin(inputs[0])));
220 |             };
221 |             return Variable(result, {input}, grad_func);
222 |         }
223 | 
224 |         Variable tanh(const Variable &input)
225 |         {
226 |             auto result = tanh(input.array());
227 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
228 |                 auto tmp = tanh(inputs[0]);
229 |                 inputs[0].addGrad(grad_output * (1.0 - tmp * tmp));
230 |             };
231 |             return Variable(result, {input}, grad_func);
232 |         }
233 | 
234 |         Variable sigmoid(const Variable &input)
235 |         {
236 |             auto result = sigmoid(input.array());
237 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
238 |                 auto tmp = sigmoid(inputs[0]);
239 |                 inputs[0].addGrad(grad_output * tmp * (1 - tmp));
240 |             };
241 |             return Variable(result, {input}, grad_func);
242 |         }
243 | 
244 |         Variable transpose(const Variable &input)
245 |         {
246 |             auto result = transpose(input.array());
247 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
248 |                 inputs[0].addGrad(transpose(grad_output));
249 |             };
250 |             return Variable(result, {input}, grad_func);
251 |         }
252 | 
253 |         Variable tileAs(const Variable &input, const Variable &reference)
254 |         {
255 |             dim4 dims(1,1,1,1);
256 |             dim4 rdims = reference.dims();
257 |             dim4 idims = input.dims();
258 |             for (int i = 0; i < 4; i++) {
259 |                 dims[i] = rdims[i] / idims[i];
260 |             }
261 |             auto result = tile(input.array(), dims);
262 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
263 |                 inputs[0].addGrad(sumAs(grad_output, inputs[0]));
264 |             };
265 |             return Variable(result, {input}, grad_func);
266 |         }
267 | 
268 |         Variable sumAs(const Variable &input, const Variable &reference)
269 |         {
270 |             dim4 rdims = reference.dims();
271 |             dim4 idims = input.dims();
272 |             auto result = input.array();
273 |             for (int i = 0; i < 4; i++) {
274 |                 if (idims[i] != rdims[i]) result = sum(result, i);
275 |             }
276 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
277 |                 inputs[0].addGrad(tileAs(grad_output, inputs[0]));
278 |             };
279 |             return Variable(result, {input}, grad_func);
280 |         }
281 | 
282 |         Variable tile(const Variable &input, const std::vector<int> &repeats)
283 |         {
284 |             dim4 dims;
285 |             for (size_t i = 0; i < repeats.size(); i++) {
286 |                 dims[i] = repeats[i];
287 |             }
288 |             auto result = tile(input.array(), dims);
289 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
290 |                 inputs[0].addGrad(sumAs(grad_output, inputs[0]));
291 |             };
292 |             return Variable(result, {input}, grad_func);
293 |         }
294 | 
295 |         Variable sum(const Variable &input, const std::vector<int> &axes)
296 |         {
297 |             auto result = input.array();
298 |             for (size_t i = 0; i < axes.size(); i++) {
299 |                 result = sum(result, axes[i]);
300 |             }
301 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
302 |                 inputs[0].addGrad(tileAs(grad_output, inputs[0]));
303 |             };
304 |             return Variable(result, {input}, grad_func);
305 |         }
306 | 
307 |         Variable mean(const Variable &input, const std::vector<int> &axes)
308 |         {
309 |             auto result = input.array();
310 |             for (size_t i = 0; i < axes.size(); i++) {
311 |                 result = mean(result, axes[i]);
312 |             }
313 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
314 |                 dim4 odims = grad_output.dims();
315 |                 dim4 idims = inputs[0].dims();
316 |                 dim_t count = 1;
317 |                 for (int i = 0; i < 4; i++) {
318 |                     count *= idims[i] / odims[i];
319 |                 }
320 |                 inputs[0].addGrad(count * tileAs(grad_output, inputs[0]));
321 |             };
322 |             return Variable(result, {input}, grad_func);
323 |         }
324 | 
325 |         Variable matmul(const Variable &lhs, const Variable &rhs)
326 |         {
327 |             // lhs:Input[0] -- [M, N]
328 |             // rhs:Input[1] -- [N, K]
329 |             //matmul(lhs, rhs)
330 |             // -- matmul([M, N], [N, K]) --  [M, K]
331 |             // result:grad_output -- [M, K]
332 |             auto result = matmul(lhs.array(), rhs.array());
333 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
334 |                 // matmulNT(grad_output, inputs[1])
335 |                 // -- matmulNT([M, K], [N, K])
336 |                 // -- matmul([M, K], [K, N]) -- [M, K]
337 |                 inputs[0].addGrad(matmulNT(grad_output, inputs[1]));
338 |                 // matmulTN(inputs[0], grad_output)
339 |                 // -- matmulTN([M, N], [M, K])
340 |                 // -- matmul([N, M], [M, K]) -- [N, K]
341 |                 inputs[1].addGrad(matmulTN(inputs[0], grad_output));
342 |             };
343 |             return Variable(result, {lhs, rhs}, grad_func);
344 |         }
345 | 
346 |         Variable matmulTN(const Variable &lhs, const Variable &rhs)
347 |         {
348 |             // lhs:Input[0] -- [N, M]
349 |             // rhs:Input[1] -- [N, K]
350 |             // matmulTN(lhs, rhs)
351 |             // -- matmulTN([N, M], [N, K])
352 |             // -- matmul([M, N], [N, K]) -- [M, K]
353 |             // result:grad_output -- [M, K]
354 |             auto result = matmulTN(lhs.array(), rhs.array());
355 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
356 |                 // matmulNT(inputs[1], grad_output)
357 |                 // -- matmulNT([N, K], [M, K])
358 |                 // -- matmul([N, K], [K, M]) -- [N, M]
359 |                 inputs[0].addGrad(matmulNT(inputs[1], grad_output));
360 |                 // matmul(inputs[0], grad_output)
361 |                 // -- matmulNT([N, M], [M, K]) -- [N, K]
362 |                 inputs[1].addGrad(matmul(inputs[0], grad_output));
363 |             };
364 |             return Variable(result, {lhs, rhs}, grad_func);
365 |         }
366 | 
367 |         Variable matmulNT(const Variable &lhs, const Variable &rhs)
368 |         {
369 |             // lhs:Input[0] -- [M, N]
370 |             // rhs:Input[1] -- [K, N]
371 |             // matmulNT(lhs, rhs)
372 |             // -- matmulNT([M, N], [K, N])
373 |             // -- matmul([M, N], [N, K]) -- [M, K]
374 |             // result:grad_output -- [M, K]
375 |             auto result = matmulNT(lhs.array(), rhs.array());
376 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
377 |                 // matmul(grad_output, inputs[1])
378 |                 // -- matmul([M, K], [K, N]) -- [M, N]
379 |                 inputs[0].addGrad(matmul(grad_output, inputs[1]));
380 |                 // matmulTN(grad_output, inputs[0])
381 |                 // -- matmulTN([M, K], [M, N])
382 |                 // -- matmul([K, M], [M, N]) -- [K, N]
383 |                 inputs[1].addGrad(matmulTN(grad_output, inputs[0]));
384 |             };
385 |             return Variable(result, {lhs, rhs}, grad_func);
386 |         }
387 | 
388 |         Variable abs(const Variable &input)
389 |         {
390 |             auto result = af::abs(input.array());
391 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
392 |                 // af::sign returns signbit
393 |                 // Convert it into -1, 1
394 |                 auto sign = Variable(1 - 2 * af::sign(inputs[0].array()), false);
395 |                 inputs[0].addGrad(sign * grad_output);
396 |             };
397 |             return Variable(result, {input}, grad_func);
398 |         }
399 | 
400 |         Variable flat(const Variable &input)
401 |         {
402 |             auto result = af::flat(input.array());
403 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
404 |                 inputs[0].addGrad(moddims(grad_output, inputs[0].dims()));
405 |             };
406 |             return Variable(result, {input}, grad_func);
407 |         }
408 | 
409 |         Variable moddims(const Variable &input, const dim4 &dims)
410 |         {
411 |             auto result = af::moddims(input.array(), dims);
412 |             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
413 |                 inputs[0].addGrad(moddims(grad_output, inputs[0].dims()));
414 |             };
415 |             return Variable(result, {input}, grad_func);
416 |         }
417 |     }
418 | }
419 | 


--------------------------------------------------------------------------------
/src/autograd/Variable.cpp:
--------------------------------------------------------------------------------
  1 | /*******************************************************
  2 |  * Copyright (c) 2017, ArrayFire
  3 |  * All rights reserved.
  4 |  *
  5 |  * This file is distributed under 3-clause BSD license.
  6 |  * The complete license agreement can be obtained at:
  7 |  * http://arrayfire.com/licenses/BSD-3-Clause
  8 |  ********************************************************/
  9 | 
 10 | #include <af/autograd/Variable.hpp>
 11 | #include <af/autograd/Functions.hpp>
 12 | 
 13 | namespace af {
 14 |     namespace autograd {
 15 | 
 16 |         Variable::Shared::Shared() :
 17 |             m_calc_grad(true),
 18 |             m_data(),
 19 |             m_inputs(),
 20 |             m_grads(),
 21 |             m_grad_func(nullptr)
 22 |         {}
 23 | 
 24 |         Variable::Shared::Shared(const af::array &data, bool calc_grad) :
 25 |             m_calc_grad(calc_grad),
 26 |             m_data(data),
 27 |             m_inputs(),
 28 |             m_grads(),
 29 |             m_grad_func(nullptr)
 30 |         {}
 31 | 
 32 |         Variable::Shared::Shared(const af::array &data,
 33 |                                  const std::vector<Variable> &inputs,
 34 |                                  GradFunc_t grad_func,
 35 |                                  bool calc_grad) :
 36 |             m_calc_grad(calc_grad),
 37 |             m_data(data),
 38 |             m_inputs(inputs.begin(), inputs.end()),
 39 |             m_grads(),
 40 |             m_grad_func(grad_func)
 41 |         {}
 42 | 
 43 |         Variable::Variable() :
 44 |             m_shared(new Shared())
 45 |         {
 46 |         }
 47 | 
 48 |         Variable::Variable(const af::array &data, bool calc_grad) :
 49 |             m_shared(new Shared(data, calc_grad))
 50 |         {}
 51 | 
 52 |         Variable::Variable(const af::array &data,
 53 |                            const std::vector<Variable> &inputs,
 54 |                            GradFunc_t grad_func) :
 55 |             m_shared(nullptr)
 56 |         {
 57 |             bool calc_grad = false;
 58 |             for (const auto &input : inputs) {
 59 |                 calc_grad |= input.isCalcGrad();
 60 |             }
 61 |             if (calc_grad) {
 62 |                 m_shared = std::shared_ptr<Shared>(new Shared(data, inputs, grad_func, true));
 63 |             } else {
 64 |                 m_shared = std::shared_ptr<Shared>(new Shared(data, false));
 65 |             }
 66 |         }
 67 | 
 68 |         af::array& Variable::array() const
 69 |         {
 70 |             return m_shared->m_data;
 71 |         }
 72 | 
 73 |         Variable& Variable::grad() const
 74 |         {
 75 |             if (!m_shared->m_calc_grad) {
 76 |                 throw af::exception("Gradient calclation disabled.");
 77 |             }
 78 |             if (m_shared->m_grads.size() == 0) {
 79 |                 throw af::exception("Gradient hasn't been calculated yet.");
 80 |             }
 81 |             return m_shared->m_grads[0];
 82 |         }
 83 | 
 84 |         std::ptrdiff_t Variable::id() const
 85 |         {
 86 |             return (std::ptrdiff_t)m_shared.get();
 87 |         }
 88 | 
 89 |         std::vector<Variable>& Variable::getInputs() const
 90 |         {
 91 |             return m_shared->m_inputs;
 92 |         }
 93 | 
 94 |         bool Variable::isCalcGrad() const
 95 |         {
 96 |             return m_shared->m_calc_grad;
 97 |         }
 98 | 
 99 |         bool Variable::isGradAvailable() const
100 |         {
101 |             if (!m_shared->m_calc_grad) return false;
102 |             return m_shared->m_grads.size() >= 1;
103 |         }
104 | 
105 |         af::dim4 Variable::dims() const
106 |         {
107 |             return m_shared->m_data.dims();
108 |         }
109 | 
110 |         af::dtype Variable::type() const
111 |         {
112 |             return m_shared->m_data.type();
113 |         }
114 | 
115 |         void Variable::zeroGrad()
116 |         {
117 |             m_shared->m_grads.clear();
118 |         }
119 | 
120 |         void Variable::setCalcGrad(bool calc_grad)
121 |         {
122 |             m_shared->m_calc_grad = calc_grad;
123 |             if (!calc_grad) {
124 |                 m_shared->m_grad_func = nullptr;
125 |                 m_shared->m_inputs.clear();
126 |                 m_shared->m_grads.clear();
127 |             }
128 |         }
129 | 
130 |         void Variable::addGrad(const Variable &child_grad)
131 |         {
132 |             if (m_shared->m_calc_grad) {
133 |                 m_shared->m_grads.push_back(child_grad);
134 |             }
135 |         }
136 | 
137 |         void Variable::evalGrad(bool retain_grad_graph)
138 |         {
139 |             // Flag asking not to calculate gradients
140 |             if (!m_shared->m_calc_grad) return;
141 | 
142 |             // Best not to evaluate the JIT immediately if theres only a single gradient
143 |             Variable grad = m_shared->m_grads[0];
144 |             if (m_shared->m_grads.size() > 1) {
145 |                 for (unsigned i = 1; i < m_shared->m_grads.size(); i++) {
146 |                     grad = grad + m_shared->m_grads[i];
147 |                 }
148 |                 grad.array().eval();
149 |                 m_shared->m_grads.resize(1);
150 |             }
151 | 
152 |             grad.setCalcGrad(retain_grad_graph);
153 |             m_shared->m_grads[0] = grad;
154 |         }
155 | 
156 |         void Variable::calcGradInputs(bool retain_grad_graph)
157 |         {
158 |             evalGrad();
159 |             if (m_shared->m_grad_func) {
160 |                 m_shared->m_grad_func(m_shared->m_inputs, m_shared->m_grads[0]);
161 |             }
162 |         }
163 | 
164 |         void Variable::backward(const Variable &grad, bool retain_grad_graph)
165 |         {
166 |             this->addGrad(grad);
167 |             Variable::DAG_t dag = Variable::build(*this);
168 |             for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) {
169 |                 iter->calcGradInputs(retain_grad_graph);
170 |             }
171 |         }
172 | 
173 |         void Variable::backward(bool retain_grad_graph)
174 |         {
175 |             auto ones = Variable(af::constant(1, this->dims()), false);
176 |             this->backward(ones, retain_grad_graph);
177 |         }
178 | 
179 |         Variable::DAG_t Variable::build(const Variable &var)
180 |         {
181 |             Cache_t cache;
182 |             Variable::DAG_t dag;
183 |             Variable::buildSubGraph(cache, dag, var);
184 |             return dag;
185 |         }
186 | 
187 |         void Variable::buildSubGraph(Cache_t &cache, Variable::DAG_t &dag, const Variable &var)
188 |         {
189 |             std::ptrdiff_t id = var.id();
190 |             if (cache.find(id) != cache.end()) {
191 |                 return;
192 |             }
193 |             for (const auto &input : var.getInputs()) {
194 |                 Variable::buildSubGraph(cache, dag, input);
195 |             }
196 |             cache[id] = true;
197 |             dag.push_back(var);
198 |         }
199 |     }
200 | }
201 | 


--------------------------------------------------------------------------------
/src/nn/Init.cpp:
--------------------------------------------------------------------------------
  1 | /*******************************************************
  2 |  * Copyright (c) 2017, ArrayFire
  3 |  * All rights reserved.
  4 |  *
  5 |  * This file is distributed under 3-clause BSD license.
  6 |  * The complete license agreement can be obtained at:
  7 |  * http://arrayfire.com/licenses/BSD-3-Clause
  8 |  ********************************************************/
  9 | 
 10 | #include <cmath>
 11 | 
 12 | #include <af/nn/Init.hpp>
 13 | 
 14 | namespace af {
 15 |     namespace nn {
 16 | 
 17 |         using autograd::Variable;
 18 | 
 19 |         Variable input(const af::array &arr)
 20 |         {
 21 |             return Variable(arr, false);
 22 |         }
 23 | 
 24 |         Variable noGrad(const af::array &arr)
 25 |         {
 26 |             return Variable(arr, false);
 27 |         }
 28 | 
 29 |         Variable parameter(const af::array &arr)
 30 |         {
 31 |             return Variable(arr, true);
 32 |         }
 33 | 
 34 |         autograd::Variable uniform(int output_size, int input_size,
 35 |                                    double min, double max,
 36 |                                    af::dtype type, bool calc_grad)
 37 |         {
 38 |             return nn::uniform(af::dim4(output_size, input_size), min, max, type, calc_grad);
 39 |         }
 40 | 
 41 |         autograd::Variable uniform(af::dim4 dims, double min, double max,
 42 |                                    af::dtype type, bool calc_grad)
 43 |         {
 44 |             af::array result = af::randu(dims, type);
 45 |             if (min != 0 || max != 1) {
 46 |                 result = (max - min) * result + min;
 47 |             }
 48 |             return Variable(result, calc_grad);
 49 |         }
 50 | 
 51 |         autograd::Variable normal(int output_size, int input_size,
 52 |                                   double stdv, double mean,
 53 |                                   af::dtype type, bool calc_grad)
 54 |         {
 55 |             return nn::normal(af::dim4(output_size, input_size), stdv, mean, type, calc_grad);
 56 |         }
 57 | 
 58 |         autograd::Variable normal(af::dim4 dims, double stdv, double mean,
 59 |                                   af::dtype type, bool calc_grad)
 60 |         {
 61 |             af::array result = af::randn(dims, type);
 62 |             if (mean != 0 || stdv != 1) {
 63 |                 result = stdv * result + mean;
 64 |             }
 65 |             return Variable(result, calc_grad);
 66 |         }
 67 | 
 68 |         autograd::Variable lecunUniform(int output_size, int input_size,
 69 |                                         af::dtype type, bool calc_grad)
 70 |         {
 71 |             return nn::lecunUniform(af::dim4(output_size, input_size), type, calc_grad);
 72 |         }
 73 | 
 74 |         autograd::Variable lecunUniform(af::dim4 dims,
 75 |                                         af::dtype type, bool calc_grad)
 76 |         {
 77 |             dim_t elements = dims.elements();
 78 |             dim_t fan_in = elements / dims[1];
 79 |             double stdv = ::sqrt(1.0/(double)fan_in);
 80 |             double limit = ::sqrt(3.0) * stdv;
 81 |             return nn::uniform(dims, -limit, limit, type, calc_grad);
 82 |         }
 83 | 
 84 |         autograd::Variable lecunNormal(int output_size, int input_size,
 85 |                                        af::dtype type, bool calc_grad)
 86 |         {
 87 |             return nn::lecunNormal(af::dim4(output_size, input_size), type, calc_grad);
 88 |         }
 89 | 
 90 |         autograd::Variable lecunNormal(af::dim4 dims,
 91 |                                        af::dtype type, bool calc_grad)
 92 |         {
 93 |             dim_t elements = dims.elements();
 94 |             dim_t fan_in = elements / dims[1];
 95 |             double stdv = ::sqrt(1.0/(double)fan_in);
 96 |             return nn::normal(dims, 0, stdv, type, calc_grad);
 97 |         }
 98 | 
 99 |         autograd::Variable glorotUniform(int output_size, int input_size,
100 |                                          af::dtype type, bool calc_grad)
101 |         {
102 |             return nn::glorotUniform(af::dim4(output_size, input_size), type, calc_grad);
103 |         }
104 | 
105 |         autograd::Variable glorotUniform(af::dim4 dims,
106 |                                          af::dtype type, bool calc_grad)
107 |         {
108 |             dim_t elements = dims.elements();
109 |             dim_t fan_in = elements / dims[1];
110 |             dim_t fan_out = elements / dims[0];
111 |             double stdv = ::sqrt(2.0/(double)(fan_in + fan_out));
112 |             double limit = ::sqrt(3.0) * stdv;
113 |             return nn::uniform(dims, -limit, limit, type, calc_grad);
114 |         }
115 | 
116 |         autograd::Variable glorotNormal(int output_size, int input_size,
117 |                                         af::dtype type, bool calc_grad)
118 |         {
119 |             return nn::glorotNormal(af::dim4(output_size, input_size), type, calc_grad);
120 |         }
121 | 
122 |         autograd::Variable glorotNormal(af::dim4 dims,
123 |                                         af::dtype type, bool calc_grad)
124 |         {
125 |             dim_t elements = dims.elements();
126 |             dim_t fan_in = elements / dims[1];
127 |             dim_t fan_out = elements / dims[0];
128 |             double stdv = ::sqrt(2.0/(double)(fan_in + fan_out));
129 |             return nn::normal(dims, 0, stdv, type, calc_grad);
130 |         }
131 | 
132 |         autograd::Variable constant(double val, int output_size, int input_size,
133 |                                     af::dtype type, bool calc_grad)
134 |         {
135 |             return nn::constant(val, af::dim4(output_size, input_size), type, calc_grad);
136 |         }
137 | 
138 |         autograd::Variable constant(double val, af::dim4 dims,
139 |                                     af::dtype type, bool calc_grad)
140 |         {
141 |             return Variable(af::constant(val, dims, type), calc_grad);
142 |         }
143 | 
144 |         autograd::Variable identity(int output_size, int input_size,
145 |                                     af::dtype type, bool calc_grad)
146 |         {
147 |             return nn::identity(af::dim4(output_size, input_size), type, calc_grad);
148 |         }
149 | 
150 |         autograd::Variable identity(af::dim4 dims,
151 |                                     af::dtype type, bool calc_grad)
152 |         {
153 |             return Variable(af::identity(dims, type), calc_grad);
154 |         }
155 |     }
156 | }
157 | 


--------------------------------------------------------------------------------
/src/nn/Modules/Activations.cpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | 
10 | #include <af/autograd/Functions.hpp>
11 | #include <af/nn/Modules/Activations.hpp>
12 | #include <af/nn/Init.hpp>
13 | namespace af
14 | {
15 |     namespace nn
16 |     {
17 |         using namespace autograd;
18 | 
19 |         Sigmoid::Sigmoid() {}
20 | 
21 |         Variable Sigmoid::forward(const Variable &input)
22 |         {
23 |             return sigmoid(input);
24 |         }
25 | 
26 |         Tanh::Tanh() {}
27 | 
28 |         Variable Tanh::forward(const Variable &input)
29 |         {
30 |             return tanh(input);
31 |         }
32 | 
33 |         ReLU::ReLU() {}
34 | 
35 |         Variable ReLU::forward(const Variable &input)
36 |         {
37 |             return max(input, 0.0);
38 |         }
39 | 
40 |         LeakyReLU::LeakyReLU(double slope) :
41 |             m_slope(slope)
42 |         {
43 |         }
44 | 
45 |         Variable LeakyReLU::forward(const Variable &input)
46 |         {
47 |             return max(input, m_slope * input);
48 |         }
49 | 
50 |         PReLU::PReLU(int size, double value)
51 |         {
52 |             auto w = nn::constant(value, size, 1);
53 |             setParams({w});
54 |         }
55 | 
56 |         PReLU::PReLU(const Variable &w) :
57 |             Module({w})
58 |         {
59 |         }
60 | 
61 |         Variable PReLU::forward(const Variable &input)
62 |         {
63 |             auto mask = input >= 0.0;
64 |             return (input * mask) + (input * !mask * tileAs(m_parameters[0], input));
65 |         }
66 | 
67 |         ELU::ELU(double alpha) :
68 |             m_alpha(alpha)
69 |         {
70 |         }
71 | 
72 |         Variable ELU::forward(const Variable &input)
73 |         {
74 |             auto mask = input >= 0.0;
75 |             return (mask * input) + (!mask * m_alpha * (exp(input)-1));
76 |         }
77 | 
78 |         ThresholdReLU::ThresholdReLU(double threshold) :
79 |             m_threshold(threshold)
80 |         {
81 |         }
82 | 
83 |         Variable ThresholdReLU::forward(const Variable &input)
84 |         {
85 |             auto mask = input >= m_threshold;
86 |             return input * mask;
87 |         }
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/src/nn/Modules/Container.cpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | 
10 | #include <af/autograd/Variable.hpp>
11 | #include <af/nn/Modules/Container.hpp>
12 | 
13 | namespace af
14 | {
15 |     namespace nn
16 |     {
17 |         using namespace autograd;
18 | 
19 |         Container::Container() {}
20 | 
21 |         ModulePtr Container::get(int id)
22 |         {
23 |             return m_modules[id];
24 |         }
25 | 
26 |         std::vector<ModulePtr> Container::modules()
27 |         {
28 |             return m_modules;
29 |         }
30 | 
31 |         Sequential::Sequential() {}
32 | 
33 |         Variable Sequential::forward(const Variable &input)
34 |         {
35 |             Variable output = input;
36 |             for (auto &module : m_modules) {
37 |                 output = module->forward(output);
38 |             }
39 |             return output;
40 |         }
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/nn/Modules/Dropout.cpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #include <af/autograd/Functions.hpp>
10 | 
11 | #include <af/nn/Init.hpp>
12 | #include <af/nn/Modules/Dropout.hpp>
13 | 
14 | namespace af
15 | {
16 |     namespace nn
17 |     {
18 |         using namespace autograd;
19 | 
20 |         Dropout::Dropout(double drop_ratio) :
21 |             m_ratio(drop_ratio)
22 |         {
23 |         }
24 | 
25 |         Variable Dropout::forward(const Variable &input)
26 |         {
27 |             if(m_train)
28 |                 return (uniform(input.dims(), 0.0, 1.0, f32, false) > m_ratio) * input;
29 |             else
30 |                 return input;
31 |         }
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/nn/Modules/Linear.cpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #include <af/autograd/Functions.hpp>
10 | 
11 | #include <af/nn/Init.hpp>
12 | #include <af/nn/Modules/Linear.hpp>
13 | 
14 | namespace af
15 | {
16 |     namespace nn
17 |     {
18 |         using namespace autograd;
19 | 
20 |         Linear::Linear(int input_size, int output_size, bool bias, float spread) :
21 |             m_bias(bias)
22 |         {
23 |             auto w = nn::lecunNormal(output_size, input_size);
24 |             if (bias) {
25 |                 auto b = nn::lecunNormal(output_size, 1);
26 |                 setParams({w, b});
27 |             } else {
28 |                 setParams({w});
29 |             }
30 |         }
31 | 
32 |         Linear::Linear(const Variable &w) :
33 |             m_bias(false),
34 |             Module({w})
35 |         {
36 |         }
37 | 
38 |         Linear::Linear(const Variable &w, const Variable &b) :
39 |             m_bias(true),
40 |             Module({w, b})
41 |         {
42 |             if (b.array().dims(0) != w.array().dims(0)) {
43 |                 throw af::exception("nn:Linear: Dimension mismatch between weight and bias.");
44 |             }
45 |             if (b.array().dims(1) != 1) {
46 |                 throw af::exception("nn::Linear: Bias must be a vector.");
47 |             }
48 |         }
49 | 
50 |         Variable Linear::forward(const Variable &input)
51 |         {
52 |             auto res = matmul(m_parameters[0], input);
53 |             if (m_bias) {
54 |                 res = res + tileAs(m_parameters[1], res);
55 |             }
56 |             return res;
57 |         }
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/nn/Modules/Loss.cpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | #include <af/autograd/Functions.hpp>
10 | #include <af/nn/Modules/Loss.hpp>
11 | 
12 | 
13 | namespace af
14 | {
15 |     namespace nn
16 |     {
17 |         using namespace autograd;
18 | 
19 |         autograd::Variable Loss::forward(const autograd::Variable &inputs)
20 |         {
21 |             throw af::exception("Loss module requires both inputs and targets");
22 |         }
23 | 
24 |         autograd::Variable Loss::operator()(const autograd::Variable &inputs,
25 |                                             const autograd::Variable &targets)
26 |         {
27 |             return this->forward(inputs, targets);
28 |         }
29 | 
30 |         autograd::Variable MeanSquaredError::forward(const autograd::Variable &inputs,
31 |                                                      const autograd::Variable &targets)
32 |         {
33 |             auto df = inputs - targets;
34 |             auto res = mean(flat(df * df), {0});
35 |             return res;
36 |         }
37 | 
38 |         autograd::Variable MeanAbsoluteError::forward(const autograd::Variable &inputs,
39 |                                                       const autograd::Variable &targets)
40 |         {
41 |             auto df = inputs - targets;
42 |             auto res = mean(flat(abs(df)), {0});
43 |         }
44 | 
45 |         static autograd::Variable
46 |         binaryCrossEntropy(const autograd::Variable &inputs,
47 |                            const autograd::Variable &targets)
48 |         {
49 |             targets * inputs + (1 - targets) * (1 - inputs);
50 |         }
51 | 
52 |         autograd::Variable BinaryCrossEntropyLoss::forward(const autograd::Variable &inputs,
53 |                                                            const autograd::Variable &targets)
54 |         {
55 |             return mean(flat(binaryCrossEntropy(inputs, targets)), {0});
56 |         }
57 | 
58 |         autograd::Variable BinaryCrossEntropyLoss::forward(const autograd::Variable &inputs,
59 |                                                            const autograd::Variable &targets,
60 |                                                            const autograd::Variable &weights)
61 |         {
62 |             return mean(flat(weights * binaryCrossEntropy(inputs, targets)), {0});
63 |         }
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/src/nn/Modules/Module.cpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************
 2 |  * Copyright (c) 2017, ArrayFire
 3 |  * All rights reserved.
 4 |  *
 5 |  * This file is distributed under 3-clause BSD license.
 6 |  * The complete license agreement can be obtained at:
 7 |  * http://arrayfire.com/licenses/BSD-3-Clause
 8 |  ********************************************************/
 9 | 
10 | #include <af/nn/Modules/Module.hpp>
11 | 
12 | namespace af
13 | {
14 |     namespace nn
15 |     {
16 |         using autograd::Variable;
17 |         Module::Module() :
18 |             m_parameters()
19 |         {
20 |             m_train = false;
21 |         }
22 | 
23 |         Module::Module(const std::vector<Variable> &parameters) :
24 |             m_parameters(parameters.begin(), parameters.end())
25 |         {
26 |         }
27 | 
28 |         void Module::setParams(const std::vector<Variable> &parameters)
29 |         {
30 |             m_parameters.clear();
31 |             for(auto parameter : parameters) {
32 |                 m_parameters.push_back(parameter);
33 |             }
34 |         }
35 | 
36 |         void Module::train()
37 |         {
38 |             m_train = true;
39 |             for (auto &parameter : m_parameters) {
40 |                 parameter.setCalcGrad(true);
41 |             }
42 |         }
43 | 
44 |         void Module::eval()
45 |         {
46 |             m_train = false;
47 |             for (auto &parameter : m_parameters) {
48 |                 parameter.setCalcGrad(false);
49 |             }
50 |         }
51 | 
52 |         std::vector<Variable> Module::parameters()
53 |         {
54 |             return m_parameters;
55 |         }
56 | 
57 |         Variable Module::operator()(const Variable &input)
58 |         {
59 |             return this->forward(input);
60 |         }
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/optim/Optimizers.cpp:
--------------------------------------------------------------------------------
  1 | /*******************************************************
  2 |  * Copyright (c) 2017, ArrayFire
  3 |  * All rights reserved.
  4 |  *
  5 |  * This file is distributed under 3-clause BSD license.
  6 |  * The complete license agreement can be obtained at:
  7 |  * http://arrayfire.com/licenses/BSD-3-Clause
  8 |  ********************************************************/
  9 | 
 10 | #include <af/optim/Optimizers.hpp>
 11 | 
 12 | #include <cmath>
 13 | 
 14 | using af::autograd::Variable;
 15 | using std::vector;
 16 | 
 17 | // References:
 18 | // SGD and Momentum: http://cs231n.github.io/neural-networks-3/#sgd
 19 | // Adam: https://arxiv.org/pdf/1412.6980.pdf
 20 | // RMSProp: https://arxiv.org/pdf/1308.0850v5.pdf
 21 | 
 22 | // Comparision between various update rules:
 23 | // https://www.quora.com/What-are-differences-between-update-rules-like-AdaDelta-RMSProp-AdaGrad-and-AdaM
 24 | 
 25 | namespace af
 26 | {
 27 |     namespace optim
 28 |     {
 29 |         Optimizer::Optimizer(const vector<Variable> &parameters)
 30 |             : m_parameters(parameters.begin(), parameters.end())
 31 |         {
 32 |         }
 33 | 
 34 |         void Optimizer::zeroGrad()
 35 |         {
 36 |             for (auto &parameter : m_parameters) {
 37 |                 parameter.zeroGrad();
 38 |             }
 39 |         }
 40 | 
 41 |         SGDOptimizer::SGDOptimizer(const vector<Variable> &parameters,
 42 |                                    double learning_rate, double momentum,
 43 |                                    double weight_decay, bool use_nesterov)
 44 |             : Optimizer(parameters),
 45 |               m_use_nesterov(use_nesterov),
 46 |               m_lr(learning_rate),
 47 |               m_mu(momentum),
 48 |               m_wd(weight_decay),
 49 |               m_velocities()
 50 |         {
 51 |             if (momentum != 0) {
 52 |                 m_velocities.reserve(parameters.size());
 53 |                 for (const auto &parameter : m_parameters) {
 54 |                     m_velocities.push_back(af::constant(0, parameter.dims(), parameter.type()));
 55 |                     m_velocities.back().eval();
 56 |                 }
 57 |             }
 58 |         }
 59 | 
 60 |         void SGDOptimizer::update()
 61 |         {
 62 |             for (size_t i = 0; i < m_parameters.size(); i++) {
 63 | 
 64 |                 const af::array &grad = m_parameters[i].grad().array();
 65 |                 af::array &data = m_parameters[i].array();
 66 | 
 67 |                 if (m_wd != 0) {
 68 |                     // Weight decay term
 69 |                     data = data - m_wd * data;
 70 |                 }
 71 | 
 72 |                 if (m_mu != 0) {
 73 |                     af::array &velocity = m_velocities[i];
 74 | 
 75 |                     // Regular momentum
 76 |                     velocity = m_mu * velocity - m_lr * grad;
 77 |                     if (m_use_nesterov) {
 78 |                         // Update for nesterov momentum
 79 |                         data = data + velocity * m_mu  - m_lr * grad;
 80 |                     } else {
 81 |                         data = data + velocity;
 82 |                     }
 83 | 
 84 |                     af::eval(velocity, data);
 85 |                 } else {
 86 | 
 87 |                     data = data - m_lr * grad;
 88 |                     af::eval(data);
 89 |                 }
 90 |             }
 91 |         }
 92 | 
 93 | 
 94 |         AdamOptimizer::AdamOptimizer(const vector<Variable> &parameters,
 95 |                                      double learning_rate,
 96 |                                      double beta1, double beta2,
 97 |                                      double epsilon, double weight_decay)
 98 |             : Optimizer(parameters),
 99 |               m_lr(learning_rate),
100 |               m_beta1(beta1),
101 |               m_beta2(beta2),
102 |               m_eps(epsilon),
103 |               m_wd(weight_decay),
104 |               m_count(0),
105 |               m_biased_first(),
106 |               m_biased_second()
107 |         {
108 |             m_biased_first.reserve(parameters.size());
109 |             m_biased_second.reserve(parameters.size());
110 | 
111 |             for (const auto &parameter : m_parameters) {
112 |                 m_biased_first.push_back(af::constant(0, parameter.dims(), parameter.type()));
113 |                 m_biased_second.push_back(af::constant(0, parameter.dims(), parameter.type()));
114 | 
115 |                 m_biased_first.back().eval();
116 |                 m_biased_second.back().eval();
117 |             }
118 |         }
119 | 
120 |         void AdamOptimizer::update()
121 |         {
122 |             for (size_t i = 0; i < m_parameters.size(); i++) {
123 |                 const af::array &grad = m_parameters[i].grad().array();
124 |                 af::array &data = m_parameters[i].array();
125 | 
126 |                 if (m_wd != 0) {
127 |                     // Weight decay term
128 |                     data = data - m_wd * data;
129 |                 }
130 | 
131 |                 af::array &biased_first = m_biased_first[i];
132 |                 af::array &biased_second = m_biased_second[i];
133 | 
134 |                 biased_first  = m_beta1 * biased_first  + (1 - m_beta1) * grad;
135 |                 biased_second = m_beta2 * biased_second + (1 - m_beta2) * grad * grad;
136 | 
137 |                 m_count++;
138 | 
139 |                 double corrected_bias1 = 1 - std::pow(m_beta1, m_count);
140 |                 double corrected_bias2 = 1 - std::pow(m_beta2, m_count);
141 |                 double corrected_lr = m_lr * std::sqrt(corrected_bias2) / corrected_bias1;
142 | 
143 |                 data = data - (corrected_lr * biased_first) / (af::sqrt(biased_second) + m_eps);
144 | 
145 |                 af::eval(data, biased_first, biased_second);
146 |             }
147 |         }
148 | 
149 |         RMSPropOptimizer::RMSPropOptimizer(const vector<Variable> &parameters,
150 |                                            double learning_rate,
151 |                                            double rho,
152 |                                            double epsilon,
153 |                                            double weight_decay,
154 |                                            bool use_first)
155 |             : Optimizer(parameters),
156 |               m_use_first(use_first),
157 |               m_lr(learning_rate),
158 |               m_rho(rho),
159 |               m_eps(epsilon),
160 |               m_wd(weight_decay),
161 |               m_first(),
162 |               m_second()
163 |         {
164 |             if (m_use_first) m_first.reserve(parameters.size());
165 |             m_second.reserve(parameters.size());
166 | 
167 |             for (const auto &parameter : m_parameters) {
168 |                 if (m_use_first) {
169 |                     m_first.push_back(af::constant(0, parameter.dims(), parameter.type()));
170 |                     m_first.back().eval();
171 |                 }
172 | 
173 |                 m_second.push_back(af::constant(0, parameter.dims(), parameter.type()));
174 |                 m_second.back().eval();
175 |             }
176 |         }
177 | 
178 |         void RMSPropOptimizer::update()
179 |         {
180 |             for (size_t i = 0; i < m_parameters.size(); i++) {
181 |                 const af::array &grad = m_parameters[i].grad().array();
182 |                 af::array &data = m_parameters[i].array();
183 | 
184 |                 if (m_wd != 0) {
185 |                     // Weight decay term
186 |                     data = data - m_wd * data;
187 |                 }
188 | 
189 |                 af::array &second = m_second[i];
190 |                 second = m_rho * second + (1 - m_rho) * grad * grad;
191 | 
192 |                 // Create shallow copy of second so that we don't update "second" below
193 |                 af::array moments = second;
194 |                 if (m_use_first) {
195 |                     af::array &first = m_first[i];
196 |                     first  = m_rho * first  + (1 - m_rho) * grad;
197 |                     moments = moments - first * first;
198 |                 }
199 | 
200 |                 data = data - (m_lr * grad) / (af::sqrt(moments) + m_eps);
201 | 
202 |                 if (m_use_first) {
203 |                     af::array &first = m_first[i];
204 |                     af::eval(data, first, second);
205 |                 } else {
206 |                     af::eval(data, second);
207 |                 }
208 |             }
209 |         }
210 |     }
211 | }
212 | 


--------------------------------------------------------------------------------