├── .github └── workflows │ └── cmake.yml ├── .gitignore ├── CMakeLists.txt ├── README.md ├── main.cpp └── model ├── attention.h ├── decoder.h ├── decoder_layer.h ├── dropout.h ├── encoder.h ├── encoder_layer.h ├── feedforward.h ├── function.h ├── linear.h ├── norm.h ├── relu.h ├── softmax.h └── transformer.h /.github/workflows/cmake.yml: -------------------------------------------------------------------------------- 1 | name: CMake 2 | 3 | on: [push] 4 | 5 | env: 6 | # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) 7 | BUILD_TYPE: Release 8 | 9 | jobs: 10 | build: 11 | # The CMake configure and build commands are platform agnostic and should work equally 12 | # well on Windows or Mac. You can convert this to a matrix build if you need 13 | # cross-platform coverage. 14 | # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | 20 | - name: Create Build Environment 21 | # Some projects don't allow in-source building, so create a separate build directory 22 | # We'll use this as our working directory for all subsequent commands 23 | run: cmake -E make_directory ${{github.workspace}}/build 24 | 25 | - name: Configure CMake 26 | # Use a bash shell so we can use the same syntax for environment variable 27 | # access regardless of the host operating system 28 | shell: bash 29 | working-directory: ${{github.workspace}}/build 30 | # Note the current convention is to use the -S and -B options here to specify source 31 | # and build directories, but this is only available with CMake 3.13 and higher. 32 | # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 33 | run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE 34 | 35 | - name: Build 36 | working-directory: ${{github.workspace}}/build 37 | shell: bash 38 | # Execute the build. You can specify a specific target with "--target " 39 | run: cmake --build . --config $BUILD_TYPE 40 | 41 | - name: Test 42 | working-directory: ${{github.workspace}}/build 43 | shell: bash 44 | # Execute tests defined by the CMake configuration. 45 | # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail 46 | run: echo ...... 47 | # ctest -C $BUILD_TYPE 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | output/ 3 | cmake-build-debug/ 4 | .idea/ 5 | cmake-build-debug-mingw/ -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | 3 | project(Transformer) 4 | set(CMAKE_CXX_STANDARD 20) 5 | 6 | include_directories("model") 7 | include(CTest) 8 | add_executable(transformer main.cpp) 9 | add_test(transformer_test transformer) 10 | 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # transformer-cpp-cpu 2 | 3 | ------------------------------------- 4 | 用C++实现一个简单的Transformer(**进行中**) 5 | 6 | ### 参考其他模型中的参数 7 | | key | value | description | 8 | | --- | --- | --- | 9 | | DIM | `512` | Embedding的维度 | 10 | | DEP | `20` | 句子长度 | 11 | | DIM_HID | `2048` | FeedForwardNetwork中隐藏层宽度 | 12 | | HEAD_SIZE | `8` | MultiHeadAttention中Head的数量 | 13 | | ENC_LAYER_CNT | `6` | Encoder Layer 的层数 | 14 | | DEC_LAYER_CNT | `6` | Decoder Layer 的层数 | 15 | 16 | #### 最终统计： 17 | | key | value | 18 | | --- | --- | 19 | | 参数量 | `176454656` | 20 | | CPU_TIME | `127.080 s` | 21 | |REAL_TIME | `127.243 s` | 22 | | Memory | `711688192 bytes` | 23 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "transformer.h" 3 | 4 | typedef float T; 5 | // Embedding的维度 6 | #define DIM 16 7 | // 句子长度 8 | #define DEP 20 9 | // FeedForwardNetwork中隐藏层宽度 10 | #define DIM_HID 32 11 | // MultiHeadAttention中Head的数量 12 | #define HEAD_SIZE 8 13 | // Encoder 的层数 14 | #define ENC_LAYER_CNT 6 15 | // Decoder 的层数 16 | #define DEC_LAYER_CNT 6 17 | 18 | /** 19 | 参考其他模型中的参数 DIM: 512, DEP: 20, DIM_HID: 2048, HEAD_SIZE: 8, ENC_LAYER_CNT: 6, DEC_LAYER_CNT: 6 20 | 最终统计：参数量: 176454656 21 | CPU_TIME: 127080 ms 22 | REAL_TIME: 127243 ms 23 | Memory: 711688192 bytes 24 | **/ 25 | 26 | int main() { 27 | auto *param = new transformer::TransformerParameter(); 28 | std::cout << "parameters count: " << param->count() << std::endl; 29 | auto *input = new std::array, DEP>{}; 30 | auto *output = new std::array, DEP>{}; 31 | transformer::Transformer::forward(*input, *output, 32 | *param); 33 | for (int i = 0; i < DEP; ++i) { 34 | for (int j = 0; j < DIM; ++j) { 35 | std::cout << (*output)[i][j] << " "; 36 | } 37 | std::cout << std::endl; 38 | 39 | } 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /model/attention.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dianhsu on 2021/03/10. 3 | // 4 | 5 | #ifndef TRANSFORMER_ATTENTION_H 6 | #define TRANSFORMER_ATTENTION_H 7 | 8 | 9 | #include 10 | #include 11 | 12 | #include "linear.h" 13 | #include "dropout.h" 14 | #include "softmax.h" 15 | 16 | namespace transformer { 17 | template 18 | struct MultiHeadAttentionParameter { 19 | LinearParameter linear_q_p[HEAD_SIZE], linear_k_p[HEAD_SIZE], linear_v_p[HEAD_SIZE]; 20 | LinearParameter linear_p; 21 | T dr; 22 | 23 | MultiHeadAttentionParameter() { 24 | dr = 0.1; 25 | } 26 | 27 | long long count() { 28 | return linear_k_p[0].count() * HEAD_SIZE * 3 + linear_p.count(); 29 | } 30 | }; 31 | 32 | template 33 | class MultiHeadAttention { 34 | public: 35 | static void forward(std::array, DEP> &q_in, 36 | std::array, DEP> &k_in, 37 | std::array, DEP> &v_in, 38 | std::array, DEP> &output, 39 | MultiHeadAttentionParameter &p) { 40 | T scale = 1.0 / sqrt((T) DIM * 1.0 / HEAD_SIZE); 41 | 42 | auto q_tmp = std::array, DEP>, HEAD_SIZE>{}; 43 | auto k_tmp = std::array, DEP>, HEAD_SIZE>{}; 44 | auto v_tmp = std::array, DEP>, HEAD_SIZE>{}; 45 | auto q_tmp_2 = std::array, DEP>, HEAD_SIZE>{}; 46 | 47 | 48 | for (int i = 0; i < HEAD_SIZE; ++i) { 49 | MultiLinear::forward(q_in, q_tmp[i], p.linear_q_p[i]); 50 | MultiLinear::forward(k_in, k_tmp[i], p.linear_k_p[i]); 51 | MultiLinear::forward(v_in, v_tmp[i], p.linear_v_p[i]); 52 | 53 | for (int j = 0; j < DEP; ++j) { 54 | Dropout::forward(q_tmp[i][j], q_tmp_2[i][j], p.dr); 55 | for (int k = 0; k < DIM; ++k) { 56 | q_tmp_2[i][j][k] *= scale; 57 | } 58 | } 59 | } 60 | 61 | // Attention(Q, K, V) = softmax(QK^T/sqrt(d_k))V && Concat 62 | auto nex_tmp = std::array, DEP>, HEAD_SIZE>, 2>{}; 63 | for (int h = 0; h < HEAD_SIZE; ++h) { 64 | for (int i = 0; i < DEP; ++i) { 65 | for (int j = 0; j < DEP; ++j) { 66 | nex_tmp[0][h][i][j] = 0; 67 | for (int k = 0; k < DIM; ++k) { 68 | nex_tmp[0][h][i][j] += q_tmp_2[h][i][k] * k_tmp[h][j][k]; 69 | } 70 | } 71 | } 72 | Softmax::forward(nex_tmp[0][h], nex_tmp[1][h]); 73 | } 74 | auto f_nex_tmp = std::array, DEP>{}; 75 | for (int h = 0; h < HEAD_SIZE; ++h) { 76 | for (int i = 0; i < DEP; ++i) { 77 | for (int j = 0; j < DIM; ++j) { 78 | f_nex_tmp[i][h * HEAD_SIZE + j] = 0; 79 | for (int k = 0; k < DEP; ++k) { 80 | f_nex_tmp[i][h * HEAD_SIZE + j] += nex_tmp[1][h][i][k] * v_tmp[h][k][j]; 81 | } 82 | } 83 | } 84 | } 85 | 86 | MultiLinear::forward(f_nex_tmp, output, p.linear_p); 87 | } 88 | }; 89 | } 90 | #endif //TRANSFORMER_ATTENTION_H 91 | -------------------------------------------------------------------------------- /model/decoder.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dianhsu on 2021/03/11. 3 | // 4 | 5 | #ifndef TRANSFORMER_DECODER_H 6 | #define TRANSFORMER_DECODER_H 7 | 8 | #include 9 | #include "decoder_layer.h" 10 | 11 | namespace transformer { 12 | template 13 | struct DecoderParameter { 14 | DecoderLayerParameter layers_p[LAYER_CNT]; 15 | LayerNormParameter norm_p; 16 | 17 | long long count() { 18 | return layers_p[0].count() * LAYER_CNT + norm_p.count(); 19 | } 20 | }; 21 | 22 | template 23 | class Decoder { 24 | public: 25 | static void forward(std::array, DEP> &input, 26 | std::array, DEP> &enc_output, 27 | std::array, DEP> &output, 28 | DecoderParameter &p) { 29 | auto tmp = std::array, DEP>, LAYER_CNT>{}; 30 | for (int i = 0; i < LAYER_CNT; ++i) { 31 | if (i == 0) { 32 | DecoderLayer::forward(input, enc_output, tmp[0], p.layers_p[i]); 33 | } else { 34 | DecoderLayer::forward(tmp[i - 1], enc_output, tmp[0], 35 | p.layers_p[i]); 36 | } 37 | } 38 | for (int i = 0; i < DEP; ++i) { 39 | LayerNorm::forward(tmp[LAYER_CNT - 1][i], output[i], p.norm_p); 40 | } 41 | } 42 | }; 43 | } 44 | #endif //TRANSFORMER_DECODER_H 45 | -------------------------------------------------------------------------------- /model/decoder_layer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dianhsu on 2021/03/11. 3 | // 4 | 5 | #ifndef TRANSFORMER_DECODER_LAYER_H 6 | #define TRANSFORMER_DECODER_LAYER_H 7 | 8 | #include "norm.h" 9 | #include "attention.h" 10 | #include "dropout.h" 11 | #include "feedforward.h" 12 | 13 | namespace transformer { 14 | template 15 | struct DecoderLayerParameter { 16 | LayerNormParameter norm1_p; 17 | MultiHeadAttentionParameter attention1_p; 18 | T dr1; 19 | LayerNormParameter norm2_p; 20 | MultiHeadAttentionParameter attention2_p; 21 | LayerNormParameter norm3_p; 22 | FeedForwardNetworkParameter ff_p; 23 | T dr2; 24 | 25 | DecoderLayerParameter() { 26 | dr1 = 0.1; 27 | dr2 = 0.1; 28 | } 29 | 30 | long long count() { 31 | return norm1_p.count() + attention1_p.count() + norm2_p.count() + attention2_p.count() + norm3_p.count() + 32 | ff_p.count(); 33 | } 34 | }; 35 | 36 | template 37 | class DecoderLayer { 38 | public: 39 | static void forward(std::array, DEP> &input, 40 | std::array, DEP> &enc_output, 41 | std::array, DEP> &output, 42 | DecoderLayerParameter &p) { 43 | auto tmp = std::array, DEP>, 7>{}; 44 | for (int i = 0; i < DEP; ++i) { 45 | LayerNorm::forward(input[i], tmp[0][i], p.norm1_p); 46 | } 47 | MultiHeadAttention::forward(tmp[0], tmp[0], tmp[0], tmp[1], p.attention1_p); 48 | for (int i = 0; i < DEP; ++i) { 49 | Dropout::forward(tmp[1][i], tmp[2][i], p.dr1); 50 | } 51 | for (int i = 0; i < DEP; ++i) { 52 | LayerNorm::forward(tmp[2][i], tmp[3][i], p.norm2_p); 53 | } 54 | MultiHeadAttention::forward(tmp[3], enc_output, enc_output, tmp[4], p.attention2_p); 55 | for (int i = 0; i < DEP; ++i) { 56 | LayerNorm::forward(tmp[4][i], tmp[5][i], p.norm3_p); 57 | } 58 | for (int i = 0; i < DEP; ++i) { 59 | FeedForwardNetwork::forward(tmp[5][i], tmp[6][i], p.ff_p); 60 | } 61 | for (int i = 0; i < DEP; ++i) { 62 | Dropout::forward(tmp[6][i], output[i], p.dr2); 63 | } 64 | } 65 | }; 66 | } 67 | #endif //TRANSFORMER_DECODER_LAYER_H 68 | -------------------------------------------------------------------------------- /model/dropout.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dianhsu on 2021/03/10. 3 | // 4 | 5 | #ifndef TRANSFORMER_DROPOUT_H 6 | #define TRANSFORMER_DROPOUT_H 7 | 8 | #include 9 | namespace transformer { 10 | template 11 | class Dropout { 12 | public: 13 | static void forward(std::array &input, std::array &output, T dropout_rate) { 14 | for (int i = 0; i < DIM; ++i) { 15 | if (input[i] < dropout_rate) { 16 | output[i] = 0; 17 | } else { 18 | output[i] = input[i]; 19 | } 20 | } 21 | } 22 | }; 23 | 24 | } 25 | #endif //TRANSFORMER_DROPOUT_H 26 | -------------------------------------------------------------------------------- /model/encoder.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dianhsu on 2021/03/10. 3 | // 4 | 5 | #ifndef TRANSFORMER_ENCODER_H 6 | #define TRANSFORMER_ENCODER_H 7 | 8 | #include 9 | #include "encoder_layer.h" 10 | 11 | namespace transformer { 12 | template 13 | struct EncoderParameter { 14 | std::array, LAYER_CNT> layers_p; 15 | LayerNormParameter norm_p; 16 | 17 | long long count() { 18 | return layers_p[0].count() * LAYER_CNT + norm_p.count(); 19 | } 20 | }; 21 | 22 | template 23 | class Encoder { 24 | public: 25 | static void forward(std::array, DEP> &input, 26 | std::array, DEP> &output, 27 | EncoderParameter &p) { 28 | auto tmp = std::array, DEP>, LAYER_CNT>{}; 29 | for (int i = 0; i < LAYER_CNT; ++i) { 30 | if (i == 0) { 31 | EncoderLayer::forward(input, tmp[0], p.layers_p[i]); 32 | } else { 33 | EncoderLayer::forward(tmp[i - 1], tmp[i], p.layers_p[i]); 34 | } 35 | } 36 | for (int i = 0; i < DEP; ++i) { 37 | LayerNorm::forward(tmp[LAYER_CNT - 1][i], output[i], p.norm_p); 38 | } 39 | } 40 | }; 41 | } 42 | #endif //TRANSFORMER_ENCODER_H 43 | -------------------------------------------------------------------------------- /model/encoder_layer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dianhsu on 2021/03/10. 3 | // 4 | 5 | #ifndef TRANSFORMER_ENCODER_LAYER_H 6 | #define TRANSFORMER_ENCODER_LAYER_H 7 | 8 | #include 9 | #include "linear.h" 10 | #include "feedforward.h" 11 | #include "dropout.h" 12 | #include "norm.h" 13 | #include "attention.h" 14 | 15 | namespace transformer { 16 | template 17 | struct EncoderLayerParameter { 18 | LayerNormParameter norm1_p; 19 | MultiHeadAttentionParameter attn_p; 20 | T dr1; 21 | LayerNormParameter norm2_p; 22 | FeedForwardNetworkParameter ff_p; 23 | T dr2; 24 | 25 | EncoderLayerParameter() { 26 | dr1 = 0.1; 27 | dr2 = 0.1; 28 | } 29 | 30 | long long count() { 31 | return norm1_p.count() + attn_p.count() + norm2_p.count() + ff_p.count(); 32 | } 33 | }; 34 | 35 | template 36 | class EncoderLayer { 37 | public: 38 | static void forward(std::array, DEP> &input, 39 | std::array, DEP> &output, 40 | EncoderLayerParameter &p) { 41 | auto tmp = std::array, DEP>, 6>{}; 42 | for (int i = 0; i < DEP; ++i) { 43 | LayerNorm::forward(input[i], tmp[0][i], p.norm1_p); 44 | } 45 | MultiHeadAttention::forward(tmp[0], tmp[0], tmp[0], tmp[1], p.attn_p); 46 | for (int i = 0; i < DEP; ++i) { 47 | Dropout::forward(tmp[1][i], tmp[2][i], p.dr1); 48 | } 49 | for (int i = 0; i < DEP; ++i) { 50 | for (int j = 0; j < DIM; ++j) { 51 | tmp[2][i][j] += input[i][j]; 52 | } 53 | } 54 | for (int i = 0; i < DEP; ++i) { 55 | LayerNorm::forward(tmp[2][i], tmp[3][i], p.norm2_p); 56 | } 57 | for (int i = 0; i < DEP; ++i) { 58 | FeedForwardNetwork::forward(tmp[3][i], tmp[4][i], p.ff_p); 59 | } 60 | for (int i = 0; i < DEP; ++i) { 61 | Dropout::forward(tmp[4][i], tmp[5][i], p.dr2); 62 | } 63 | for (int i = 0; i < DEP; ++i) { 64 | for (int j = 0; j < DIM; ++j) { 65 | output[i][j] = input[i][j] + tmp[5][i][j]; 66 | } 67 | } 68 | } 69 | }; 70 | } 71 | #endif //TRANSFORMER_ENCODER_LAYER_H 72 | -------------------------------------------------------------------------------- /model/feedforward.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dianhsu on 2021/03/10. 3 | // 4 | 5 | #ifndef TRANSFORMER_FEEDFORWARD_H 6 | #define TRANSFORMER_FEEDFORWARD_H 7 | 8 | #include 9 | 10 | #include "linear.h" 11 | #include "dropout.h" 12 | #include "relu.h" 13 | 14 | namespace transformer { 15 | template 16 | struct FeedForwardNetworkParameter { 17 | LinearParameter linear_p1; 18 | LinearParameter linear_p2; 19 | T dr; 20 | 21 | FeedForwardNetworkParameter() { 22 | this->dr = 0.1; 23 | } 24 | 25 | long long count() { 26 | return linear_p1.count() + linear_p2.count(); 27 | } 28 | }; 29 | 30 | template 31 | class FeedForwardNetwork { 32 | public: 33 | static void forward(std::array &input, 34 | std::array &output, 35 | FeedForwardNetworkParameter &ff_p) { 36 | auto tmp = std::array, 3>{}; 37 | Linear::forward(input, tmp[0], ff_p.linear_p1); 38 | Relu::forward(tmp[0], tmp[1]); 39 | Dropout::forward(tmp[1], tmp[2], ff_p.dr); 40 | Linear::forward(tmp[2], output, ff_p.linear_p2); 41 | } 42 | }; 43 | 44 | } 45 | #endif //TRANSFORMER_FEEDFORWARD_H 46 | -------------------------------------------------------------------------------- /model/function.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dianhsu on 2021/03/10. 3 | // 4 | 5 | #ifndef TRANSFORMER_FUNCTION_H 6 | #define TRANSFORMER_FUNCTION_H 7 | 8 | #endif //TRANSFORMER_FUNCTION_H 9 | -------------------------------------------------------------------------------- /model/linear.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dianhsu on 2021/03/10. 3 | // 4 | 5 | #ifndef TRANSFORMER_LINEAR_H 6 | #define TRANSFORMER_LINEAR_H 7 | 8 | #include 9 | 10 | 11 | namespace transformer { 12 | template 13 | struct LinearParameter { 14 | std::array, DIM_IN> weights; 15 | std::array bias; 16 | 17 | long long count() { 18 | long long ret = 0; 19 | ret += DIM_OUT * DIM_IN + DIM_OUT; 20 | return ret; 21 | } 22 | }; 23 | 24 | template 25 | class Linear { 26 | public: 27 | static void forward(std::array &input, 28 | std::array &output, 29 | LinearParameter ¶m) { 30 | for (int i = 0; i < DIM_OUT; ++i) { 31 | output[i] = param.bias[i]; 32 | for (int j = 0; j < DIM_IN; ++j) { 33 | output[i] += input[j] * param.weights[j][i]; 34 | } 35 | } 36 | } 37 | 38 | }; 39 | 40 | template 41 | class MultiLinear { 42 | public: 43 | static void forward(std::array, DEP> &input, 44 | std::array, DEP> &output, 45 | LinearParameter ¶m) { 46 | for (int k = 0; k < DEP; ++k) { 47 | for (int i = 0; i < DIM_OUT; ++i) { 48 | output[k][i] = param.bias[i]; 49 | for (int j = 0; j < DIM_IN; ++j) { 50 | output[k][i] += input[k][j] * param.weights[j][i]; 51 | } 52 | } 53 | } 54 | } 55 | }; 56 | }; 57 | 58 | #endif //TRANSFORMER_LINEAR_H 59 | -------------------------------------------------------------------------------- /model/norm.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dianhsu on 2021/03/10. 3 | // 4 | 5 | #ifndef TRANSFORMER_NORM_H 6 | #define TRANSFORMER_NORM_H 7 | 8 | #include 9 | 10 | template 11 | struct LayerNormParameter { 12 | std::array weights; 13 | std::array bias; 14 | 15 | long long count() { 16 | return DIM * 2; 17 | } 18 | }; 19 | 20 | template 21 | class LayerNorm { 22 | public: 23 | static void forward(std::array &input, std::array &output, LayerNormParameter &p) { 24 | T sum = 0; 25 | T sum2 = 0; 26 | for (int i = 0; i < DIM; ++i) { 27 | sum += input[i]; 28 | sum2 += input[i] * input[i]; 29 | } 30 | T avg = sum / DIM; 31 | T avg2 = sum2 / DIM; 32 | T var = avg2 - avg * avg; 33 | T sq_var = sqrt(var + 1e-5); 34 | for (int i = 0; i < DIM; ++i) { 35 | output[i] = (input[i] - avg) / sq_var * p.weights[i] + p.bias[i]; 36 | } 37 | } 38 | }; 39 | 40 | #endif //TRANSFORMER_NORM_H 41 | -------------------------------------------------------------------------------- /model/relu.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dianhsu on 2021/03/10. 3 | // 4 | 5 | #ifndef TRANSFORMER_RELU_H 6 | #define TRANSFORMER_RELU_H 7 | 8 | #include 9 | 10 | namespace transformer { 11 | template 12 | class Relu { 13 | public: 14 | static void forward(std::array &input, std::array &output) { 15 | for (int i = 0; i < DIM; ++i) { 16 | if (input[i] < 0) { 17 | output[i] = 0; 18 | } else { 19 | output[i] = input[i]; 20 | } 21 | } 22 | } 23 | }; 24 | } 25 | #endif //TRANSFORMER_RELU_H 26 | -------------------------------------------------------------------------------- /model/softmax.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dianhsu on 2021/03/10. 3 | // 4 | 5 | #ifndef TRANSFORMER_SOFTMAX_H 6 | #define TRANSFORMER_SOFTMAX_H 7 | 8 | #include 9 | 10 | template 11 | class Softmax { 12 | public: 13 | static void forward(std::array, DEP> input, std::array, DEP> &output) { 14 | for (int j = 0; j < DIM; ++j) { 15 | T tmp = 0; 16 | for (int i = 0; i < DEP; ++i) { 17 | tmp += input[i][j]; 18 | } 19 | for (int i = 0; i < DEP; ++i) { 20 | output[i][j] = input[i][j] / tmp; 21 | } 22 | } 23 | } 24 | }; 25 | 26 | #endif //TRANSFORMER_SOFTMAX_H 27 | -------------------------------------------------------------------------------- /model/transformer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by dianhsu on 2021/03/11. 3 | // 4 | 5 | #ifndef TRANSFORMER_TRANSFORMER_H 6 | #define TRANSFORMER_TRANSFORMER_H 7 | 8 | #include 9 | #include "encoder.h" 10 | #include "decoder.h" 11 | 12 | namespace transformer { 13 | template 14 | struct TransformerParameter { 15 | EncoderParameter encoder_p; 16 | DecoderParameter decoder_p; 17 | 18 | long long count() { 19 | return encoder_p.count() + decoder_p.count(); 20 | } 21 | }; 22 | 23 | template 24 | class Transformer { 25 | public: 26 | static void forward(std::array, DEP> &input, 27 | std::array, DEP> &output, 28 | TransformerParameter &p) { 29 | auto tmp = std::array, DEP>{}; 30 | Encoder::forward(input, tmp, p.encoder_p); 31 | Decoder::forward(tmp, tmp, output, p.decoder_p); 32 | } 33 | }; 34 | } 35 | #endif //TRANSFORMER_TRANSFORMER_H 36 | --------------------------------------------------------------------------------