├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── app └── src │ ├── compare-eisner.cpp │ ├── compare-phrase.cpp │ ├── test-dynet.cpp │ └── test-mtt.cpp ├── lib ├── CMakeLists.txt ├── include │ └── diffdp │ │ ├── algorithm │ │ ├── binary_phrase.h │ │ └── eisner.h │ │ ├── builder │ │ ├── binary-phrase.h │ │ └── dependency.h │ │ ├── chart.h │ │ ├── deduction_operations.h │ │ ├── dynet │ │ ├── args.h │ │ ├── binary_phrase.h │ │ ├── eisner.h │ │ └── matrix_tree_theorem.h │ │ └── math.h └── src │ ├── algorithm │ ├── binary_phrase.cpp │ └── eisner.cpp │ ├── builder │ ├── binary-phrase.cpp │ └── dependency.cpp │ ├── chart.cpp │ └── dynet │ ├── binary_phrase.cpp │ ├── eisner.cpp │ └── matrix_tree_theorem.cpp └── test ├── test-binary-phrase-algdiff.cpp ├── test-binary-phrase-ereg.cpp ├── test-dynet-eisner.cpp ├── test-dynet-phrase.cpp ├── test-eisner-algdiff.cpp ├── test-eisner-ereg.cpp ├── test-eisner-ereg.dSYM └── Contents │ ├── Info.plist │ └── Resources │ └── DWARF │ └── test-eisner-ereg └── test-math.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | cmake-build-debug/* 3 | 4 | # Prerequisites 5 | *.d 6 | 7 | # Compiled Object files 8 | *.slo 9 | *.lo 10 | *.o 11 | *.obj 12 | 13 | # Precompiled Headers 14 | *.gch 15 | *.pch 16 | 17 | # Compiled Dynamic libraries 18 | *.so 19 | *.dylib 20 | *.dll 21 | 22 | # Fortran module files 23 | *.mod 24 | *.smod 25 | 26 | # Compiled Static libraries 27 | *.lai 28 | *.la 29 | *.a 30 | *.lib 31 | 32 | # Executables 33 | *.exe 34 | *.out 35 | *.app 36 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.12) 2 | set(CMAKE_CXX_STANDARD 11) 3 | project(diffdp) 4 | enable_testing() 5 | 6 | find_package(Boost COMPONENTS unit_test_framework regex serialization filesystem REQUIRED) 7 | include_directories("/Users/filippo/repos/dynet") 8 | 9 | add_subdirectory(lib) 10 | 11 | 12 | add_executable(compare-phrase app/src/compare-phrase.cpp) 13 | target_link_libraries(compare-phrase lib-diffdp) 14 | 15 | add_executable(compare-eisner app/src/compare-eisner.cpp) 16 | target_link_libraries(compare-eisner lib-diffdp) 17 | 18 | add_executable(test-mtt app/src/test-mtt.cpp) 19 | target_link_libraries(test-mtt lib-diffdp) 20 | 21 | add_executable(test-dynet app/src/test-dynet.cpp) 22 | target_link_libraries(test-dynet lib-diffdp) 23 | 24 | 25 | file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} test/test-*.cpp) 26 | foreach(testSrc ${TEST_SRCS}) 27 | get_filename_component(testName ${testSrc} NAME_WE) 28 | 29 | add_executable(${testName} ${testSrc}) 30 | target_link_libraries(${testName} ${Boost_LIBRARIES} lib-diffdp dynet) 31 | 32 | add_test(NAME ${testName} COMMAND ${testName} ) 33 | endforeach(testSrc) 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Caio Filippo Corro 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Differentiable Perturb-and-Parse operator 2 | 3 | This repository contains the code for the continuous relaxation of the Eisner algorithm presented in: 4 | "Differentiable Perturb-and-Parse: Semi-Supervised Parsing with a Structured Variational Autoencoder", 5 | Caio Corro, Ivan Titov 6 | 7 | 8 | See: https://openreview.net/forum?id=BJlgNh0qKQ 9 | 10 | 11 | To cite: 12 | ``` 13 | @InProceedings{perturb-and-parse, 14 | author = "Corro, Caio and Titov, Ivan", 15 | title = "Differentiable Perturb-and-Parse: Semi-Supervised Parsing with a Structured Variational Autoencoder", 16 | booktitle = "Proceedings of Seventh International Conference on Learning Representations", 17 | year = "2019" 18 | } 19 | ``` 20 | 21 | The full VAE code and model will be released after the official proceedings release. 22 | If any question, please contact me at following mail address: c.f.corro@uva.nl 23 | 24 | 25 | ## Usage 26 | 27 | ``` 28 | #include "diffdp/dynet/eisner.h" 29 | 30 | auto arcs = dynet::algorithmic_differentiable_eisner( 31 | weights, // input : matrix of arc weights 32 | difwfdp::DiscreteMode::ForwardRegularized, // relaxation mode 33 | diffdp::DependencyGraphMode::Adjacency, diffdp::DependencyGraphMode::Adjacency, // input/output format 34 | true // set to false to remove root arcs 35 | ); 36 | ``` 37 | 38 | 39 | ## Arguments 40 | 41 | The following arguments must be provided: 42 | 1. the arc-factored weights of dependencies 43 | 2. the relaxation mode: diffdp::DiscreteMode::BackwardRegularized output the discrete structure and us 44 | the relaxation only for chart_backward, diffdp::DiscreteMode::ForwardRegularized use the relaxation during chart_forward 45 | 3. the input format: diffdp::DependencyGraphMode::Adjacency use a adjacency matrix as input format, i.e. the main diagonal 46 | represent self connections and is never used, diffdp::DependencyGraphMode::Compact use the main diagonal to represent the weights 47 | of root dependencies 48 | 4. the output format 49 | 5. set to false to remove root arcs from the output 50 | 51 | 52 | ## Batching 53 | 54 | This computational node can be used with mini-batches. 55 | However, it does not implement the auto-batch functionnality of Dynet, so mini-batches should be constructed manually. 56 | 57 | If sentences are of different sizes, a pointer of type "std::vector*" can be given as the last argument. 58 | This compatible with static graph (i.e. each chart_forward call will check sentence sizes in the vector) 59 | 60 | WARNING: the size of batch input *must not* include the root node. 61 | 62 | 63 | ## TODO 64 | 65 | - The memory usage could be divided by 2 66 | - Clean duplicate code 67 | - Static batch size (this could drastically save memory usage) -------------------------------------------------------------------------------- /app/src/compare-eisner.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "diffdp/chart.h" 10 | 11 | #include "diffdp/math.h" 12 | #include "diffdp/deduction_operations.h" 13 | #include "diffdp/algorithm/eisner.h" 14 | 15 | int main(int argc, char* argv[]) 16 | { 17 | std::default_random_engine generator; 18 | std::uniform_real_distribution distribution(0.0,1.0); 19 | 20 | const unsigned size = 5; 21 | std::vector weights(size * size); 22 | for (unsigned i = 0 ; i < size ; ++i) 23 | weights.at(i) = distribution(generator); 24 | 25 | std::cerr << "Entropy reg / Alg diff\n"; 26 | diffdp::EntropyRegularizedEisner entrop_reg_eisner(size); 27 | entrop_reg_eisner.forward( 28 | [&] (unsigned head, unsigned mod) -> float 29 | { 30 | return weights.at(head + mod * size); 31 | } 32 | ); 33 | diffdp::AlgorithmicDifferentiableEisner algo_diff_eisner(size); 34 | algo_diff_eisner.forward( 35 | [&] (unsigned head, unsigned mod) -> float 36 | { 37 | return weights.at(head + mod * size); 38 | }); 39 | 40 | for (unsigned head = 0 ; head < size ; ++head) 41 | { 42 | for (unsigned mod = 1 ; mod < size ; ++mod) 43 | { 44 | if (head == mod) 45 | continue; 46 | 47 | std::cerr 48 | << entrop_reg_eisner.output(head, mod) 49 | << "\t" 50 | << algo_diff_eisner.output(head, mod) 51 | << "\n"; 52 | } 53 | } 54 | } -------------------------------------------------------------------------------- /app/src/compare-phrase.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "diffdp/chart.h" 10 | 11 | #include "diffdp/math.h" 12 | #include "diffdp/deduction_operations.h" 13 | #include "diffdp/algorithm/binary_phrase.h" 14 | 15 | int main(int argc, char* argv[]) 16 | { 17 | std::default_random_engine generator; 18 | std::uniform_real_distribution distribution(0.0,1.0); 19 | 20 | const unsigned size = 5; 21 | std::vector weights(size * size); 22 | for (unsigned i = 0 ; i < weights.size() ; ++i) 23 | weights.at(i) = distribution(generator); 24 | 25 | diffdp::AlgorithmicDifferentiableBinaryPhraseStructure algo_diff(size); 26 | algo_diff.forward( 27 | [&] (unsigned head, unsigned mod) -> float 28 | { 29 | return weights.at(head + mod * size); 30 | }); 31 | 32 | for (unsigned left = 0 ; left < size ; ++left) 33 | { 34 | for (unsigned right = left+1 ; right < size ; ++right) 35 | { 36 | std::cerr 37 | << left << "," << right 38 | << "\t" 39 | << algo_diff.output(left, right) 40 | << "\n"; 41 | } 42 | } 43 | } -------------------------------------------------------------------------------- /app/src/test-dynet.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "dynet/nodes-def-macros.h" 9 | #include "dynet/nodes-impl-macros.h" 10 | #include "dynet/tensor-eigen.h" 11 | #include "diffdp/dynet/matrix_tree_theorem.h" 12 | #include "dytools/functions/rooted_arborescence_marginals.h" 13 | 14 | int main(int argc, char* argv[]) 15 | { 16 | const auto size = 3; 17 | dynet::initialize(argc, argv); 18 | 19 | std::vector v_weights(size * size, 0.f); 20 | v_weights.at(0 + 1 * size) = 1.f; 21 | v_weights.at(0 + 2 * size) = 4.f; 22 | v_weights.at(1 + 2 * size) = 1.f; 23 | v_weights.at(2 + 1 * size) = 1.f; 24 | 25 | dynet::ComputationGraph cg; 26 | 27 | 28 | const auto e_weights = dynet::input(cg, {size, size}, v_weights); 29 | 30 | std::vector sizes{2}; 31 | const auto e_marginals = dytools::rooted_arborescence_marginals(cg, e_weights, sizes); 32 | 33 | const auto v_output = as_vector(cg.forward(e_marginals)); 34 | for (unsigned i = 0 ; i < size ; ++i) 35 | { 36 | for (unsigned j = 0 ; j < size ; ++j) 37 | std::cerr << v_output.at(i + j * size) << "\t"; 38 | std::cerr << "\n"; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /app/src/test-mtt.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "dynet/expr.h" 9 | #include "diffdp/dynet/matrix_tree_theorem.h" 10 | 11 | int main(int argc, char* argv[]) 12 | { 13 | dynet::initialize(argc, argv); 14 | 15 | const unsigned size = 3; 16 | std::vector v_input(3 * 3, 1.f); 17 | 18 | dynet::ComputationGraph cg; 19 | auto e_input = dynet::input(cg, {size, size}, v_input); 20 | auto e_output = dynet::matrix_tree_theorem(e_input); 21 | 22 | auto v_output = as_vector(cg.forward(e_output)); 23 | 24 | for (unsigned i = 0 ; i < size ; ++i) 25 | { 26 | for (unsigned j = 0 ; j < size ; ++j) 27 | { 28 | std::cout << v_output.at(i + j * size) << "\t"; 29 | } 30 | std::cout << "\n"; 31 | } 32 | } -------------------------------------------------------------------------------- /lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library( 2 | lib-diffdp 3 | 4 | src/chart.cpp 5 | 6 | src/algorithm/eisner.cpp 7 | src/algorithm/binary_phrase.cpp 8 | 9 | src/dynet/eisner.cpp 10 | src/dynet/binary_phrase.cpp 11 | #src/dynet/matrix_tree_theorem.cpp 12 | 13 | src/builder/dependency.cpp 14 | src/builder/binary-phrase.cpp 15 | ) 16 | 17 | # Define headers for this library. PUBLIC headers are used for 18 | # compiling the library, and will be added to consumers' build 19 | # paths. 20 | target_include_directories( 21 | lib-diffdp 22 | 23 | PUBLIC 24 | $ 25 | $ 26 | 27 | PRIVATE 28 | src 29 | ) 30 | add_subdirectory("/Users/filippo/repos/dynet-tools" dytools) 31 | 32 | target_link_libraries(lib-diffdp ${Boost_LIBRARIES}) 33 | target_link_libraries(lib-diffdp dynet) 34 | target_link_libraries(lib-diffdp libdytools) -------------------------------------------------------------------------------- /lib/include/diffdp/algorithm/binary_phrase.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "diffdp/chart.h" 8 | #include "diffdp/deduction_operations.h" 9 | 10 | namespace diffdp 11 | { 12 | 13 | 14 | struct BinaryPhraseStructureChart 15 | { 16 | const unsigned size; 17 | const unsigned size_3d; 18 | const unsigned size_2d; 19 | float* _memory = nullptr; 20 | const bool _erase_memory; 21 | 22 | Tensor3D split_weights, backptr; 23 | Matrix weight, soft_selection; 24 | 25 | BinaryPhraseStructureChart(unsigned size); 26 | BinaryPhraseStructureChart(unsigned size, float* mem); 27 | ~BinaryPhraseStructureChart(); 28 | 29 | void zeros(); 30 | 31 | static std::size_t required_memory(const unsigned size); 32 | static unsigned required_cells(const unsigned size); 33 | }; 34 | 35 | 36 | 37 | struct AlgorithmicDifferentiableBinaryPhraseStructure 38 | { 39 | unsigned _size; 40 | 41 | std::shared_ptr chart_forward; 42 | std::shared_ptr chart_backward; 43 | 44 | explicit AlgorithmicDifferentiableBinaryPhraseStructure(const unsigned t_size); 45 | AlgorithmicDifferentiableBinaryPhraseStructure(std::shared_ptr chart_forward, std::shared_ptr chart_backward); 46 | 47 | template 48 | void forward(Functor&& weight_callback); 49 | 50 | template 51 | void backward(Functor&& gradient_callback); 52 | 53 | static void forward_maximize(std::shared_ptr& chart_forward); 54 | static void forward_backtracking(std::shared_ptr& chart_forward); 55 | 56 | static void backward_maximize(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward); 57 | static void backward_backtracking(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward); 58 | 59 | float output(const unsigned head, const unsigned mod) const; 60 | float gradient(const unsigned left, const unsigned right) const; 61 | 62 | unsigned size() const; 63 | }; 64 | 65 | 66 | struct EntropyRegularizedBinaryPhraseStructure 67 | { 68 | unsigned _size; 69 | 70 | std::shared_ptr chart_forward; 71 | std::shared_ptr chart_backward; 72 | 73 | explicit EntropyRegularizedBinaryPhraseStructure(const unsigned t_size); 74 | EntropyRegularizedBinaryPhraseStructure(std::shared_ptr chart_forward, std::shared_ptr chart_backward); 75 | 76 | 77 | template 78 | void forward(Functor&& weight_callback); 79 | 80 | template 81 | void backward(Functor&& gradient_callback); 82 | 83 | static void forward_maximize(std::shared_ptr& chart_forward); 84 | static void forward_backtracking(std::shared_ptr& chart_forward); 85 | 86 | static void backward_maximize(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward); 87 | static void backward_backtracking(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward); 88 | 89 | float output(const unsigned head, const unsigned mod) const; 90 | float gradient(const unsigned head, const unsigned mod) const; 91 | 92 | unsigned size() const; 93 | }; 94 | 95 | 96 | // templates implementations 97 | 98 | template 99 | void AlgorithmicDifferentiableBinaryPhraseStructure::forward(Functor&& weight_callback) 100 | { 101 | const unsigned size = chart_forward->size; 102 | 103 | chart_forward->zeros(); // we could skip some zeros here 104 | for (unsigned i = 0; i < size; ++i) 105 | { 106 | for (unsigned j = i + 1; j < size; ++j) 107 | { 108 | chart_forward->weight(i, j) = weight_callback(i, j); 109 | } 110 | } 111 | 112 | AlgorithmicDifferentiableBinaryPhraseStructure::forward_maximize(chart_forward); 113 | AlgorithmicDifferentiableBinaryPhraseStructure::forward_backtracking(chart_forward); 114 | } 115 | 116 | template 117 | void AlgorithmicDifferentiableBinaryPhraseStructure::backward(Functor&& gradient_callback) 118 | { 119 | const unsigned size = chart_forward->size; 120 | 121 | chart_backward->zeros(); 122 | // init gradient here 123 | for (unsigned i = 0; i < size; ++i) 124 | { 125 | for (unsigned j = i + 1; j < size; ++j) 126 | { 127 | chart_backward->soft_selection(i, j) = gradient_callback(i, j); 128 | } 129 | } 130 | 131 | AlgorithmicDifferentiableBinaryPhraseStructure::backward_backtracking(chart_forward, chart_backward); 132 | AlgorithmicDifferentiableBinaryPhraseStructure::backward_maximize(chart_forward, chart_backward); 133 | } 134 | 135 | template 136 | void EntropyRegularizedBinaryPhraseStructure::forward(Functor&& weight_callback) 137 | { 138 | const unsigned size = chart_forward->size; 139 | 140 | chart_forward->zeros(); // we could skip some zeros here 141 | for (unsigned i = 0; i < size; ++i) 142 | { 143 | for (unsigned j = i + 1; j < size; ++j) 144 | { 145 | chart_forward->weight(i, j) = weight_callback(i, j); 146 | } 147 | } 148 | 149 | EntropyRegularizedBinaryPhraseStructure::forward_maximize(chart_forward); 150 | EntropyRegularizedBinaryPhraseStructure::forward_backtracking(chart_forward); 151 | } 152 | 153 | template 154 | void EntropyRegularizedBinaryPhraseStructure::backward(Functor&& gradient_callback) 155 | { 156 | const unsigned size = chart_forward->size; 157 | 158 | chart_backward->zeros(); 159 | // init gradient here 160 | for (unsigned i = 0; i < size; ++i) 161 | { 162 | for (unsigned j = i + 1; j < size; ++j) 163 | { 164 | chart_backward->soft_selection(i, j) = gradient_callback(i, j); 165 | } 166 | } 167 | 168 | EntropyRegularizedBinaryPhraseStructure::backward_backtracking(chart_forward, chart_backward); 169 | EntropyRegularizedBinaryPhraseStructure::backward_maximize(chart_forward, chart_backward); 170 | } 171 | 172 | 173 | } -------------------------------------------------------------------------------- /lib/include/diffdp/algorithm/eisner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "diffdp/chart.h" 8 | #include "diffdp/deduction_operations.h" 9 | 10 | namespace diffdp 11 | { 12 | 13 | struct EisnerChart 14 | { 15 | const unsigned size; 16 | const unsigned size_3d; 17 | const unsigned size_2d; 18 | float* _memory = nullptr; 19 | const bool _erase_memory; 20 | 21 | Tensor3D 22 | a_cleft, a_cright, a_uleft, a_uright, 23 | b_cleft, b_cright, b_uleft, b_uright; 24 | 25 | Matrix 26 | c_cleft, c_cright, c_uleft, c_uright, 27 | soft_c_cleft, soft_c_cright, soft_c_uleft, soft_c_uright 28 | ; 29 | 30 | EisnerChart(unsigned size); 31 | EisnerChart(unsigned size, float* mem); 32 | ~EisnerChart(); 33 | 34 | void zeros(); 35 | 36 | static std::size_t required_memory(const unsigned size); 37 | static unsigned required_cells(const unsigned size); 38 | }; 39 | 40 | /* 41 | * Continuous relaxation of "Differentiable Perturb-and-Parse: Semi-Supervised Parsing with a Structured Variational Autoencoder, Corro & Titov" 42 | */ 43 | struct AlgorithmicDifferentiableEisner 44 | { 45 | unsigned _size; 46 | 47 | std::shared_ptr chart_forward; 48 | std::shared_ptr chart_backward; 49 | 50 | explicit AlgorithmicDifferentiableEisner(const unsigned t_size); 51 | AlgorithmicDifferentiableEisner(std::shared_ptr chart_forward, std::shared_ptr chart_backward); 52 | 53 | template 54 | void forward(Functor&& weight_callback); 55 | 56 | template 57 | void backward(Functor&& gradient_callback); 58 | 59 | static void forward_maximize(std::shared_ptr& chart_forward); 60 | static void forward_backtracking(std::shared_ptr& chart_forward); 61 | 62 | static void backward_maximize(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward); 63 | static void backward_backtracking(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward); 64 | 65 | float output(const unsigned head, const unsigned mod) const; 66 | float gradient(const unsigned head, const unsigned mod) const; 67 | 68 | unsigned size() const; 69 | }; 70 | 71 | 72 | 73 | /* 74 | * Continuous relaxation of "Differentiable Dynamic Programming for Structured Prediction and Attention, Mensch & Blondel" 75 | * This is equivalent to structured attention (i.e. marginalization), 76 | * but it has better numerically stability in practice (i.e. no underflow/overflow issue) 77 | */ 78 | struct EntropyRegularizedEisner 79 | { 80 | unsigned _size; 81 | 82 | std::shared_ptr chart_forward; 83 | std::shared_ptr chart_backward; 84 | 85 | explicit EntropyRegularizedEisner(const unsigned t_size); 86 | EntropyRegularizedEisner(std::shared_ptr chart_forward, std::shared_ptr chart_backward); 87 | 88 | 89 | template 90 | void forward(Functor&& weight_callback); 91 | 92 | template 93 | void backward(Functor&& gradient_callback); 94 | 95 | static void forward_maximize(std::shared_ptr& chart_forward); 96 | static void forward_backtracking(std::shared_ptr& chart_forward); 97 | 98 | //static void backward_maximize(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward); 99 | //static void backward_backtracking(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward); 100 | 101 | float output(const unsigned head, const unsigned mod) const; 102 | float gradient(const unsigned head, const unsigned mod) const; 103 | 104 | unsigned size() const; 105 | }; 106 | 107 | 108 | // templates implementations 109 | 110 | template 111 | void AlgorithmicDifferentiableEisner::forward(Functor&& weight_callback) 112 | { 113 | const unsigned size = chart_forward->size; 114 | 115 | chart_forward->zeros(); // we could skip some zeros here 116 | for (unsigned i = 0; i < size; ++i) 117 | { 118 | for (unsigned j = 1; j < size; ++j) 119 | { 120 | if (i < j) 121 | chart_forward->c_uright(i, j) = weight_callback(i, j); 122 | else if (j < i) 123 | chart_forward->c_uleft(j, i) = weight_callback(i, j); 124 | } 125 | } 126 | 127 | AlgorithmicDifferentiableEisner::forward_maximize(chart_forward); 128 | AlgorithmicDifferentiableEisner::forward_backtracking(chart_forward); 129 | } 130 | 131 | template 132 | void AlgorithmicDifferentiableEisner::backward(Functor&& gradient_callback) 133 | { 134 | const unsigned size = chart_forward->size; 135 | 136 | chart_backward->zeros(); 137 | for (unsigned i = 0; i < size; ++i) 138 | { 139 | for (unsigned j = 1; j < size; ++j) 140 | { 141 | if (i < j) 142 | chart_backward->soft_c_uright(i, j) = gradient_callback(i, j); 143 | else if (j < i) 144 | chart_backward->soft_c_uleft(j, i) = gradient_callback(i, j); 145 | } 146 | } 147 | 148 | AlgorithmicDifferentiableEisner::backward_backtracking(chart_forward, chart_backward); 149 | AlgorithmicDifferentiableEisner::backward_maximize(chart_forward, chart_backward); 150 | } 151 | 152 | template 153 | void EntropyRegularizedEisner::forward(Functor&& weight_callback) 154 | { 155 | const unsigned size = chart_forward->size; 156 | 157 | // this initialization seems ok, but check why it works! 158 | chart_forward->zeros(); // we could skip some zeros here 159 | for (unsigned i = 0; i < size; ++i) 160 | { 161 | for (unsigned j = 1; j < size; ++j) 162 | { 163 | if (i < j) 164 | chart_forward->c_uright(i, j) = weight_callback(i, j); 165 | else if (j < i) 166 | chart_forward->c_uleft(j, i) = weight_callback(i, j); 167 | } 168 | } 169 | 170 | EntropyRegularizedEisner::forward_maximize(chart_forward); 171 | EntropyRegularizedEisner::forward_backtracking(chart_forward); 172 | } 173 | 174 | template 175 | void EntropyRegularizedEisner::backward(Functor&& gradient_callback) 176 | { 177 | const unsigned size = chart_forward->size; 178 | 179 | // check if this init is correct 180 | chart_backward->zeros(); 181 | for (unsigned i = 0; i < size; ++i) 182 | { 183 | for (unsigned j = 1; j < size; ++j) 184 | { 185 | if (i < j) 186 | chart_backward->soft_c_uright(i, j) = gradient_callback(i, j); 187 | else if (j < i) 188 | chart_backward->soft_c_uleft(j, i) = gradient_callback(i, j); 189 | } 190 | } 191 | 192 | // backpropagate throught backtracking 193 | for (unsigned l = 1; l < size ; ++l) 194 | { 195 | for (unsigned i = 0; i < size - l; ++i) 196 | { 197 | unsigned j = i + l; 198 | 199 | if (i > 0u) 200 | { 201 | diffdp::backward_backtracking( 202 | chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j), 203 | chart_forward->soft_c_uleft(i, j), 204 | chart_forward->b_uleft.iter3(i, j, i), 205 | 206 | chart_backward->soft_c_cright.iter2(i, i), chart_backward->soft_c_cleft.iter1(i + 1, j), 207 | &chart_backward->soft_c_uleft(i, j), 208 | chart_backward->b_uleft.iter3(i, j, i), 209 | 210 | l 211 | ); 212 | } 213 | 214 | diffdp::backward_backtracking( 215 | chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j), 216 | chart_forward->soft_c_uright(i, j), 217 | chart_forward->b_uright.iter3(i, j, i), 218 | 219 | chart_backward->soft_c_cright.iter2(i, i), chart_backward->soft_c_cleft.iter1(i + 1, j), 220 | &chart_backward->soft_c_uright(i, j), 221 | chart_backward->b_uright.iter3(i, j, i), 222 | 223 | l 224 | ); 225 | 226 | if (i > 0u) 227 | { 228 | diffdp::backward_backtracking( 229 | chart_forward->soft_c_cleft.iter2(i, i), chart_forward->soft_c_uleft.iter1(i, j), 230 | chart_forward->soft_c_cleft(i, j), 231 | chart_forward->b_cleft.iter3(i, j, i), 232 | 233 | chart_backward->soft_c_cleft.iter2(i, i), chart_backward->soft_c_uleft.iter1(i, j), 234 | &chart_backward->soft_c_cleft(i, j), 235 | chart_backward->b_cleft.iter3(i, j, i), 236 | 237 | l 238 | ); 239 | } 240 | 241 | diffdp::backward_backtracking( 242 | chart_forward->soft_c_uright.iter2(i, i+1), chart_forward->soft_c_cright.iter1(i+1, j), 243 | chart_forward->soft_c_cright(i, j), 244 | chart_forward->b_cright.iter3(i, j, i + 1), 245 | 246 | chart_backward->soft_c_uright.iter2(i, i+1), chart_backward->soft_c_cright.iter1(i+1, j), 247 | &chart_backward->soft_c_cright(i, j), 248 | chart_backward->b_cright.iter3(i, j, i + 1), 249 | 250 | l 251 | ); 252 | } 253 | } 254 | for (unsigned l = size - 1; l >= 1; --l) 255 | { 256 | for (unsigned i = 0; i < size - l; ++i) 257 | { 258 | unsigned j = i + l; 259 | 260 | if (i > 0u) 261 | { 262 | backward_entropy_reg( 263 | chart_forward->c_cleft.iter2(i, i), chart_forward->c_uleft.iter1(i, j), 264 | chart_forward->a_cleft.iter3(i, j, i), 265 | chart_forward->b_cleft.iter3(i, j, i), 266 | 267 | chart_backward->c_cleft.iter2(i, i), chart_backward->c_uleft.iter1(i, j), 268 | chart_backward->c_cleft(i, j), 269 | chart_backward->a_cleft.iter3(i, j, i), 270 | chart_backward->b_cleft.iter3(i, j, i), 271 | 272 | l 273 | ); 274 | } 275 | 276 | backward_entropy_reg( 277 | chart_forward->c_uright.iter2(i, i + 1), chart_forward->c_cright.iter1(i + 1, j), 278 | chart_forward->a_cright.iter3(i, j, i + 1), 279 | chart_forward->b_cright.iter3(i, j, i + 1), 280 | 281 | chart_backward->c_uright.iter2(i, i + 1), chart_backward->c_cright.iter1(i + 1, j), 282 | chart_backward->c_cright(i, j), 283 | chart_backward->a_cright.iter3(i, j, i + 1), 284 | chart_backward->b_cright.iter3(i, j, i + 1), 285 | 286 | l 287 | ); 288 | 289 | if (i > 0u) 290 | { 291 | backward_entropy_reg( 292 | chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j), 293 | chart_forward->a_uleft.iter3(i, j, i), 294 | chart_forward->b_uleft.iter3(i, j, i), 295 | 296 | chart_backward->c_cright.iter2(i, i), chart_backward->c_cleft.iter1(i + 1, j), 297 | chart_backward->c_uleft(i, j), 298 | chart_backward->a_uleft.iter3(i, j, i), 299 | chart_backward->b_uleft.iter3(i, j, i), 300 | 301 | l 302 | ); 303 | } 304 | 305 | backward_entropy_reg( 306 | chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j), 307 | chart_forward->a_uright.iter3(i, j, i), 308 | chart_forward->b_uright.iter3(i, j, i), 309 | 310 | chart_backward->c_cright.iter2(i, i), chart_backward->c_cleft.iter1(i + 1, j), 311 | chart_backward->c_uright(i, j), 312 | chart_backward->a_uright.iter3(i, j, i), 313 | chart_backward->b_uright.iter3(i, j, i), 314 | 315 | l 316 | ); 317 | } 318 | } 319 | } 320 | 321 | 322 | 323 | 324 | } 325 | 326 | -------------------------------------------------------------------------------- /lib/include/diffdp/builder/binary-phrase.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "dynet/expr.h" 4 | 5 | namespace diffdp 6 | { 7 | 8 | enum struct BinaryPhraseType 9 | { 10 | AlgDiff, 11 | EntropyReg 12 | }; 13 | 14 | struct BinaryPhraseSettings 15 | { 16 | BinaryPhraseType type = BinaryPhraseType::AlgDiff; 17 | bool perturb = false; 18 | }; 19 | 20 | struct BinaryPhraseBuilder 21 | { 22 | const BinaryPhraseSettings settings; 23 | dynet::ComputationGraph* _cg; 24 | bool _training = true; 25 | 26 | BinaryPhraseBuilder(const BinaryPhraseSettings& settings); 27 | 28 | void new_graph(dynet::ComputationGraph& cg, bool training); 29 | dynet::Expression relaxed(const dynet::Expression& weights); 30 | dynet::Expression argmax(const dynet::Expression& weights); 31 | 32 | dynet::Expression relaxed_alg_diff(const dynet::Expression& weights); 33 | dynet::Expression relaxed_entropy_Reg(const dynet::Expression& weights); 34 | protected: 35 | /** 36 | * Perturb arc if training mode and setting.perturb == true 37 | */ 38 | dynet::Expression perturb(const dynet::Expression& arc_weights); 39 | }; 40 | 41 | } -------------------------------------------------------------------------------- /lib/include/diffdp/builder/dependency.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "dynet/expr.h" 4 | 5 | namespace diffdp 6 | { 7 | 8 | enum struct DependencyType 9 | { 10 | Head, 11 | NonProjective, 12 | ProjectiveAlgDiff, 13 | ProjectiveEntropyReg 14 | }; 15 | 16 | struct DependencySettings 17 | { 18 | DependencyType type = DependencyType::Head; 19 | bool perturb = false; 20 | }; 21 | 22 | struct DependencyBuilder 23 | { 24 | const DependencySettings settings; 25 | dynet::ComputationGraph* _cg; 26 | bool _training = true; 27 | 28 | DependencyBuilder(const DependencySettings& settings); 29 | 30 | void new_graph(dynet::ComputationGraph& cg, bool training); 31 | dynet::Expression relaxed(const dynet::Expression& arc_weights, std::vector* sizes = nullptr, dynet::Expression* e_mask = nullptr); 32 | 33 | dynet::Expression relaxed_head(const dynet::Expression& arc_weights, dynet::Expression* e_mask = nullptr); 34 | dynet::Expression relaxed_nonprojective(const dynet::Expression& arc_weights, std::vector* sizes = nullptr); 35 | dynet::Expression relaxed_projective_alg_diff(const dynet::Expression& arc_weights, std::vector* sizes = nullptr); 36 | dynet::Expression relaxed_projective_entropy_reg(const dynet::Expression& arc_weights, std::vector* sizes = nullptr); 37 | 38 | dynet::Expression argmax(const dynet::Expression& arc_weights, std::vector* sizes = nullptr, dynet::Expression* e_mask = nullptr); 39 | dynet::Expression argmax_head(const dynet::Expression& arc_weights, dynet::Expression* e_mask = nullptr); 40 | dynet::Expression argmax_nonprojective(const dynet::Expression& arc_weights, std::vector* sizes = nullptr); 41 | dynet::Expression argmax_projective_alg_diff(const dynet::Expression& arc_weights, std::vector* sizes = nullptr); 42 | dynet::Expression argmax_projective_entropy_reg(const dynet::Expression& arc_weights, std::vector* sizes = nullptr); 43 | 44 | protected: 45 | /** 46 | * Perturb arc if training mode and setting.perturb == true 47 | */ 48 | dynet::Expression perturb(const dynet::Expression& arc_weights); 49 | }; 50 | 51 | } -------------------------------------------------------------------------------- /lib/include/diffdp/chart.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace diffdp 6 | { 7 | 8 | template 9 | struct Tensor3D 10 | { 11 | unsigned _size; 12 | bool _free_data; 13 | T* _data; 14 | 15 | Tensor3D(const unsigned size); 16 | Tensor3D(const unsigned size, T* _data); 17 | ~Tensor3D(); 18 | 19 | static std::size_t required_memory(const unsigned size); 20 | static unsigned required_cells(const unsigned size); 21 | 22 | inline T& operator()(const unsigned i, const unsigned j, const unsigned k) noexcept; 23 | inline T operator()(const unsigned i, const unsigned j, const unsigned k) const noexcept; 24 | 25 | inline 26 | T* iter3(const unsigned i, const unsigned j, const unsigned k) noexcept; 27 | }; 28 | 29 | template 30 | struct Matrix; 31 | 32 | template 33 | struct MatrixRowIterator 34 | { 35 | Matrix* chart; 36 | T* current; 37 | 38 | MatrixRowIterator(Matrix* chart, T* current); 39 | MatrixRowIterator(const MatrixRowIterator& o); 40 | 41 | T& operator*(); 42 | MatrixRowIterator& operator++(); 43 | bool operator!=(const MatrixRowIterator& o) const; 44 | }; 45 | 46 | template 47 | struct Matrix 48 | { 49 | unsigned _size; 50 | bool _free_data; 51 | T* _data; 52 | 53 | Matrix(const unsigned size); 54 | Matrix(const unsigned size, T* _data); 55 | ~Matrix(); 56 | 57 | inline static std::size_t required_memory(const unsigned size); 58 | inline static unsigned required_cells(const unsigned size); 59 | 60 | inline T& operator()(const unsigned i, const unsigned j) noexcept; 61 | inline T operator()(const unsigned i, const unsigned j) const noexcept; 62 | 63 | inline MatrixRowIterator iter1(const unsigned i, const unsigned j) noexcept; 64 | inline T* iter2(const unsigned i, const unsigned j) noexcept; 65 | }; 66 | 67 | 68 | // Template implementations 69 | template 70 | Tensor3D::Tensor3D(const unsigned size) : 71 | _size(size), 72 | _free_data(true) 73 | { 74 | _data = new T[required_cells(size)]; 75 | } 76 | 77 | template 78 | Tensor3D::Tensor3D(const unsigned size, T* _data) : 79 | _size(size), 80 | _free_data(false), 81 | _data(_data) 82 | {} 83 | 84 | template 85 | Tensor3D::~Tensor3D() 86 | { 87 | if (_free_data) 88 | delete[] _data; 89 | } 90 | 91 | template 92 | std::size_t Tensor3D::required_memory(const unsigned size) 93 | { 94 | return required_cells(size) * sizeof(T); 95 | } 96 | 97 | template 98 | unsigned Tensor3D::required_cells(const unsigned size) 99 | { 100 | return size * size * size; 101 | } 102 | 103 | 104 | template 105 | T& Tensor3D::operator()(const unsigned i, const unsigned j, const unsigned k) noexcept 106 | { 107 | return _data[i * _size * _size + j * _size + k]; 108 | } 109 | 110 | 111 | template 112 | T Tensor3D::operator()(const unsigned i, const unsigned j, const unsigned k) const noexcept 113 | { 114 | return _data[i * _size * _size + j * _size + k]; 115 | } 116 | 117 | 118 | template 119 | T* Tensor3D::iter3(const unsigned i, const unsigned j, const unsigned k) noexcept 120 | { 121 | return _data + i * _size *_size + j * _size + k; 122 | } 123 | 124 | 125 | template 126 | MatrixRowIterator::MatrixRowIterator(Matrix* chart, T* current) : 127 | chart(chart), 128 | current(current) 129 | {} 130 | 131 | template 132 | MatrixRowIterator::MatrixRowIterator(const MatrixRowIterator& o) : 133 | chart(o.chart), 134 | current(o.current) 135 | {} 136 | 137 | template 138 | T& MatrixRowIterator::operator*() 139 | { 140 | return *current; 141 | } 142 | 143 | template 144 | MatrixRowIterator& MatrixRowIterator::operator++() 145 | { 146 | current += chart->_size; 147 | return *this; 148 | } 149 | 150 | template 151 | bool MatrixRowIterator::operator!=(const MatrixRowIterator& o) const 152 | { 153 | return !(chart == o.chart && current == o.current); 154 | } 155 | 156 | 157 | 158 | template 159 | Matrix::Matrix(const unsigned size) : 160 | _size(size), 161 | _free_data(true), 162 | _data(new T[required_cells(size)]) 163 | {} 164 | 165 | template 166 | Matrix::Matrix(const unsigned size, T* _data) : 167 | _size(size), 168 | _free_data(false), 169 | _data(_data) 170 | {} 171 | 172 | template 173 | std::size_t Matrix::required_memory(const unsigned size) 174 | { 175 | return required_cells(size) * sizeof(T); 176 | } 177 | 178 | template 179 | unsigned Matrix::required_cells(const unsigned size) 180 | { 181 | return size * size; 182 | } 183 | 184 | template 185 | Matrix::~Matrix() 186 | { 187 | if (_free_data) 188 | delete[] _data; 189 | } 190 | 191 | template 192 | T& Matrix::operator()(const unsigned i, const unsigned j) noexcept 193 | { 194 | return _data[i * _size + j]; 195 | } 196 | 197 | template 198 | T Matrix::operator()(const unsigned i, const unsigned j) const noexcept 199 | { 200 | return _data[i * _size + j]; 201 | } 202 | 203 | template 204 | MatrixRowIterator Matrix::iter1(const unsigned i, const unsigned j) noexcept 205 | { 206 | return {this, _data + i * _size + j}; 207 | } 208 | 209 | template 210 | T* Matrix::iter2(const unsigned i, const unsigned j) noexcept 211 | { 212 | return _data + i * _size + j; 213 | } 214 | 215 | 216 | } 217 | -------------------------------------------------------------------------------- /lib/include/diffdp/deduction_operations.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /** 4 | * Header-only implementation of operations on deduction rules 5 | * 6 | * Inputs/outputs are iterator-like objects that must be: 7 | * - copy-constructible 8 | * - dereferenceable 9 | * - incrementable 10 | * 11 | * Author: Caio Corro 12 | */ 13 | 14 | #include 15 | #include "diffdp/math.h" 16 | 17 | namespace diffdp 18 | { 19 | 20 | template 21 | float forward_algorithmic_softmax( 22 | T left_antecedent, U right_antecedent, 23 | V split_weights, 24 | W backptr, 25 | unsigned size 26 | ) 27 | { 28 | cwise_add(split_weights, left_antecedent, right_antecedent, size); 29 | softmax(backptr, split_weights, size); 30 | return dot(split_weights, backptr, size); 31 | } 32 | 33 | template 34 | void forward_backtracking( 35 | T contrib_left_antecedent, U contrib_right_antecedent, 36 | const float contrib_consequent, 37 | V backptr, 38 | const unsigned size 39 | ) 40 | { 41 | add_cwise_mult(contrib_left_antecedent, backptr, contrib_consequent, size); 42 | add_cwise_mult(contrib_right_antecedent, backptr, contrib_consequent, size); 43 | } 44 | 45 | template 46 | void backward_backtracking( 47 | T contrib_left_antecedent, U contrib_right_antecedent, 48 | const float contrib_consequent, 49 | V backptr, 50 | 51 | A gradient_contrib_left_antecedent, B gradient_contrib_right_antecedent, 52 | float *gradient_contrib_consequent, 53 | C gradient_backptr, 54 | 55 | const unsigned size 56 | ) 57 | { 58 | *gradient_contrib_consequent += dot(backptr, gradient_contrib_left_antecedent, size); 59 | *gradient_contrib_consequent += dot(backptr, gradient_contrib_right_antecedent, size); 60 | add_cwise_mult(gradient_backptr, gradient_contrib_left_antecedent, contrib_consequent, size); 61 | add_cwise_mult(gradient_backptr, gradient_contrib_right_antecedent, contrib_consequent, size); 62 | } 63 | 64 | 65 | template 66 | void backward_algorithmic_softmax( 67 | T left_antecedent, U right_antecedent, 68 | V split_weights, 69 | W backptr, 70 | 71 | A gradient_left_antecedent, B gradient_right_antecedent, 72 | const float gradient_consequent, 73 | C gradient_split_weights, 74 | D gradient_backptr, 75 | 76 | unsigned size 77 | ) 78 | { 79 | add_cwise_mult(gradient_backptr, split_weights, gradient_consequent, size); 80 | add_cwise_mult(gradient_split_weights, backptr, gradient_consequent, size); 81 | 82 | backprop_softmax(gradient_split_weights, gradient_backptr, split_weights, backptr, size); 83 | 84 | add(gradient_left_antecedent, gradient_split_weights, size); 85 | add(gradient_right_antecedent, gradient_split_weights, size); 86 | } 87 | 88 | 89 | template 90 | float forward_entropy_reg( 91 | T left_antecedent, U right_antecedent, 92 | V split_weights, 93 | W backptr, 94 | unsigned size 95 | ) 96 | { 97 | cwise_add(split_weights, left_antecedent, right_antecedent, size); 98 | softmax(backptr, split_weights, size); 99 | float m = max(split_weights, size); 100 | float s = 0; 101 | for (unsigned i = 0 ; i < size ; ++i, ++split_weights) 102 | s += std::exp(*split_weights-m); 103 | return m + std::log(s); 104 | } 105 | 106 | 107 | template 108 | void backward_entropy_reg( 109 | T left_antecedent, U right_antecedent, 110 | V split_weights, 111 | W backptr, 112 | 113 | A gradient_left_antecedent, B gradient_right_antecedent, 114 | const float gradient_consequent, 115 | C gradient_split_weights, 116 | D gradient_backptr, 117 | 118 | unsigned size 119 | ) 120 | { 121 | add_cwise_mult(gradient_split_weights, backptr, gradient_consequent, size); 122 | 123 | backprop_softmax(gradient_split_weights, gradient_backptr, split_weights, backptr, size); 124 | 125 | add(gradient_left_antecedent, gradient_split_weights, size); 126 | add(gradient_right_antecedent, gradient_split_weights, size); 127 | } 128 | 129 | } -------------------------------------------------------------------------------- /lib/include/diffdp/dynet/args.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace diffdp 4 | { 5 | 6 | enum struct DiscreteMode 7 | { 8 | Null, // do not backpropagate 9 | StraightThrough, // discrete output, copy input gradient 10 | ForwardRegularized, // differentiable surrogate 11 | BackwardRegularized // forward: discrete, backward: differentiable surrogate 12 | }; 13 | 14 | } -------------------------------------------------------------------------------- /lib/include/diffdp/dynet/binary_phrase.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "dynet/expr.h" 10 | #include "dynet/tensor-eigen.h" 11 | #include "dynet/nodes-impl-macros.h" 12 | #include "dynet/nodes-def-macros.h" 13 | 14 | #include "diffdp/dynet/args.h" 15 | #include "diffdp/algorithm/binary_phrase.h" 16 | 17 | namespace dynet 18 | { 19 | 20 | Expression algorithmic_differentiable_binary_phrase_structure( 21 | const Expression &x, 22 | diffdp::DiscreteMode mode, 23 | std::vector *batch_sizes = nullptr 24 | ); 25 | 26 | Expression entropy_regularized_binary_phrase_structure( 27 | const Expression &x, 28 | diffdp::DiscreteMode mode, 29 | std::vector *batch_sizes = nullptr 30 | ); 31 | 32 | struct AlgorithmicDifferentiableBinaryPhraseStructure : 33 | public dynet::Node 34 | { 35 | const diffdp::DiscreteMode mode; 36 | std::vector* batch_sizes = nullptr; 37 | 38 | std::vector _ce_ptr; 39 | 40 | explicit AlgorithmicDifferentiableBinaryPhraseStructure( 41 | const std::initializer_list& a, 42 | diffdp::DiscreteMode mode, 43 | std::vector* batch_sizes 44 | ); 45 | 46 | DYNET_NODE_DEFINE_DEV_IMPL() 47 | 48 | virtual bool supports_multibatch() const override; 49 | size_t aux_storage_size() const override; 50 | 51 | virtual ~AlgorithmicDifferentiableBinaryPhraseStructure(); 52 | }; 53 | 54 | struct EntropyRegularizedBinaryPhraseStructure : 55 | public dynet::Node 56 | { 57 | const diffdp::DiscreteMode mode; 58 | std::vector* batch_sizes = nullptr; 59 | 60 | std::vector _ce_ptr; 61 | 62 | explicit EntropyRegularizedBinaryPhraseStructure( 63 | const std::initializer_list& a, 64 | diffdp::DiscreteMode mode, 65 | std::vector* batch_sizes 66 | ); 67 | 68 | DYNET_NODE_DEFINE_DEV_IMPL() 69 | 70 | virtual bool supports_multibatch() const override; 71 | size_t aux_storage_size() const override; 72 | 73 | virtual ~EntropyRegularizedBinaryPhraseStructure(); 74 | }; 75 | 76 | } 77 | -------------------------------------------------------------------------------- /lib/include/diffdp/dynet/eisner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "dynet/expr.h" 10 | #include "dynet/tensor-eigen.h" 11 | #include "dynet/nodes-impl-macros.h" 12 | #include "dynet/nodes-def-macros.h" 13 | 14 | #include "diffdp/dynet/args.h" 15 | #include "diffdp/algorithm/eisner.h" 16 | 17 | namespace diffdp 18 | { 19 | 20 | enum struct DependencyGraphMode 21 | { 22 | Adjacency, // adjacency matrix 23 | Compact 24 | }; 25 | 26 | std::pair from_adjacency(const std::pair dep, const diffdp::DependencyGraphMode mode); 27 | std::pair from_compact(const std::pair dep, const diffdp::DependencyGraphMode mode); 28 | 29 | } 30 | 31 | namespace dynet 32 | { 33 | 34 | Expression algorithmic_differentiable_eisner( 35 | const Expression &x, 36 | diffdp::DiscreteMode mode, 37 | diffdp::DependencyGraphMode input_graph = diffdp::DependencyGraphMode::Compact, 38 | diffdp::DependencyGraphMode output_graph = diffdp::DependencyGraphMode::Compact, 39 | bool with_root_arcs = true, 40 | std::vector *batch_sizes = nullptr 41 | ); 42 | 43 | Expression entropy_regularized_eisner( 44 | const Expression &x, 45 | diffdp::DiscreteMode mode, 46 | diffdp::DependencyGraphMode input_graph = diffdp::DependencyGraphMode::Compact, 47 | diffdp::DependencyGraphMode output_graph = diffdp::DependencyGraphMode::Compact, 48 | bool with_root_arcs = true, 49 | std::vector *batch_sizes = nullptr 50 | ); 51 | 52 | struct AlgorithmicDifferentiableEisner : 53 | public dynet::Node 54 | { 55 | const diffdp::DiscreteMode mode; 56 | const diffdp::DependencyGraphMode input_graph; 57 | const diffdp::DependencyGraphMode output_graph; 58 | bool with_root_arcs; 59 | std::vector* batch_sizes = nullptr; 60 | 61 | std::vector _ce_ptr; 62 | 63 | explicit AlgorithmicDifferentiableEisner( 64 | const std::initializer_list& a, 65 | diffdp::DiscreteMode mode, 66 | diffdp::DependencyGraphMode input_graph, 67 | diffdp::DependencyGraphMode output_graph, 68 | bool with_root_arcs, 69 | std::vector* batch_sizes 70 | ); 71 | 72 | DYNET_NODE_DEFINE_DEV_IMPL() 73 | 74 | virtual bool supports_multibatch() const override; 75 | size_t aux_storage_size() const override; 76 | 77 | virtual ~AlgorithmicDifferentiableEisner(); 78 | }; 79 | 80 | struct EntropyRegularizedEisner : 81 | public dynet::Node 82 | { 83 | const diffdp::DiscreteMode mode; 84 | const diffdp::DependencyGraphMode input_graph; 85 | const diffdp::DependencyGraphMode output_graph; 86 | bool with_root_arcs; 87 | std::vector* batch_sizes = nullptr; 88 | 89 | std::vector _ce_ptr; 90 | 91 | explicit EntropyRegularizedEisner( 92 | const std::initializer_list& a, 93 | diffdp::DiscreteMode mode, 94 | diffdp::DependencyGraphMode input_graph, 95 | diffdp::DependencyGraphMode output_graph, 96 | bool with_root_arcs, 97 | std::vector* batch_sizes 98 | ); 99 | 100 | DYNET_NODE_DEFINE_DEV_IMPL() 101 | 102 | virtual bool supports_multibatch() const override; 103 | size_t aux_storage_size() const override; 104 | 105 | virtual ~EntropyRegularizedEisner(); 106 | }; 107 | 108 | 109 | } 110 | -------------------------------------------------------------------------------- /lib/include/diffdp/dynet/matrix_tree_theorem.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "dynet/expr.h" 5 | #include "dynet/nodes-def-macros.h" 6 | 7 | namespace dynet 8 | { 9 | 10 | Expression matrix_tree_theorem(const Expression &weights); 11 | 12 | 13 | struct MatrixTreeTheorem : 14 | public dynet::Node 15 | { 16 | explicit MatrixTreeTheorem( 17 | const std::initializer_list& a 18 | ); 19 | 20 | DYNET_NODE_DEFINE_DEV_IMPL() 21 | 22 | virtual bool supports_multibatch() const override; 23 | size_t aux_storage_size() const override; 24 | }; 25 | 26 | 27 | } -------------------------------------------------------------------------------- /lib/include/diffdp/math.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /** 4 | * Very small header-only math library used to simplify 5 | * the implementation of continuous relaxation of dynamic programming algorithms. 6 | * 7 | * Inputs/outputs are iterator-like objects that must be: 8 | * - copy-constructible 9 | * - dereferenceable 10 | * - incrementable 11 | * 12 | * Author: Caio Corro 13 | */ 14 | 15 | #include 16 | #include 17 | 18 | namespace diffdp 19 | { 20 | 21 | /** 22 | * Performs an element-wise sum of vectors input1 and output2. 23 | * The result is stored in output. 24 | * 25 | * @param output Vector where the result will be stored 26 | * @param input1 First input vector 27 | * @param input2 Second intput vector 28 | * @param size Size of the input vectors 29 | */ 30 | template 31 | void cwise_add(T output, U input1, V input2, const unsigned size) 32 | { 33 | for (unsigned i = 0u; i < size; ++i, ++input1, ++input2, ++output) 34 | *output = *input1 + *input2; 35 | } 36 | 37 | /** 38 | * Return the maximum element stored in a vector. 39 | * 40 | * @param input Input vector 41 | * @param size Size of the input vector 42 | * @return The maximum element stored in the input vector 43 | */ 44 | template 45 | float max(T input, const unsigned size) 46 | { 47 | float value = -std::numeric_limits::infinity(); 48 | for (unsigned i = 0u; i < size; ++i, ++input) 49 | value = std::max(value, *input); 50 | return value; 51 | } 52 | 53 | /** 54 | * Divide each element of a vector by a given value. 55 | * The results is stored inplace. 56 | * 57 | * @param input Input/output vector 58 | * @param v Value to divide by 59 | * @param size Size of the input/output vector 60 | */ 61 | template 62 | void inplace_cwise_div(T input, const float v, const unsigned size) 63 | { 64 | for (unsigned i = 0u; i < size; ++i, ++input) 65 | *input = *input / v; 66 | } 67 | 68 | /** 69 | * Component wise addition between two vectors. 70 | * The results is stored in the first argument. 71 | * 72 | * @param input Input vector 73 | * @param output Output vector 74 | * @param size Size of the input vector 75 | */ 76 | template 77 | void add(T output, U input, const unsigned size) 78 | { 79 | for (unsigned i = 0u; i < size; ++i, ++input, ++output) 80 | *output += *input; 81 | } 82 | 83 | /** 84 | * Perform a component wise multiplication of the input vector with a scalar 85 | * and store the results as a component-wise addition with the output. 86 | * 87 | * @param output Output vector 88 | * @param input Input vector 89 | * @param v Scalar use for the multiplication 90 | * @param size Size of the input vector 91 | */ 92 | template 93 | void add_cwise_mult(T output, U input, const float v, const unsigned size) 94 | { 95 | for (unsigned i = 0u; i < size; ++i, ++input, ++output) 96 | *output += (*input) * v; 97 | } 98 | 99 | /** 100 | * Return the dot product between the two input vectors. 101 | * 102 | * @param input1 First input vector 103 | * @param input2 Second input vector 104 | * @param size Size of the input vectors 105 | * @return The dot product between the two input vectors 106 | */ 107 | template 108 | float dot(T input1, U input2, const unsigned size) 109 | { 110 | float ret = 0.f; 111 | for (unsigned i = 0u; i < size; ++i, ++input1, ++input2) 112 | ret += (*input1) * (*input2); 113 | return ret; 114 | } 115 | 116 | /** 117 | * Exponentiate each element of a vector by first substracting a scalar. 118 | * 119 | * @param output Output vector 120 | * @param input Input vector 121 | * @param m Scalar to substract 122 | * @param size Size of the input 123 | * @return Return the sum of the elements of the output vector (i.e. the partition) 124 | */ 125 | template 126 | float exp_minus_cst(T output, U input, const float m, const unsigned size) 127 | { 128 | float ret = 0.f; 129 | for (unsigned i = 0u; i < size; ++i, ++input, ++output) 130 | { 131 | const float v = std::exp(*input - m); 132 | *output = v; 133 | ret += v; 134 | } 135 | return ret; 136 | } 137 | 138 | /** 139 | * Compute the softmax of the input. 140 | * 141 | * @param output Output vector 142 | * @param input Input vector 143 | * @param size Size of the input vector 144 | */ 145 | template 146 | void softmax(T output, U input, unsigned size) noexcept 147 | { 148 | 149 | float m = max(input, size); 150 | float z = exp_minus_cst(output, input, m, size); 151 | inplace_cwise_div(output, z, size); 152 | } 153 | 154 | /** 155 | * Backpropagate through a softmax function. 156 | * 157 | * @param gradient_input Gradient of the softmax 158 | * @param gradient_output Gradient incoming to the softmax 159 | * @param input Input of the softmax 160 | * @param output Output of the softmax (i.e. it should be computed beforehand) 161 | * @param size Size of the input 162 | */ 163 | template 164 | void backprop_softmax(A gradient_input, B gradient_output, T input, U output, const unsigned size) 165 | { 166 | const float s = dot(gradient_output, output, size); 167 | for (unsigned i = 0; i < size; ++i, ++gradient_input, ++gradient_output, ++output) 168 | *gradient_input += (*output) * ((*gradient_output) - s); 169 | } 170 | 171 | } -------------------------------------------------------------------------------- /lib/src/algorithm/binary_phrase.cpp: -------------------------------------------------------------------------------- 1 | #include "diffdp/algorithm/binary_phrase.h" 2 | 3 | namespace diffdp 4 | { 5 | 6 | BinaryPhraseStructureChart::BinaryPhraseStructureChart(unsigned size) : 7 | size(size), 8 | size_3d(size*size*size), 9 | size_2d(size*size), 10 | _memory(new float[size_3d * 2 + size_2d * 2]), 11 | _erase_memory(true), 12 | split_weights(size, _memory), 13 | backptr(size, _memory + 1u*size_3d), 14 | weight(size, _memory + 2u*size_3d), 15 | soft_selection(size, _memory + 2u*size_3d + 1u*size_2d) 16 | {} 17 | 18 | BinaryPhraseStructureChart::BinaryPhraseStructureChart(unsigned size, float* mem) : 19 | size(size), 20 | size_3d(size*size*size), 21 | size_2d(size*size), 22 | _memory(mem), 23 | _erase_memory(false), 24 | split_weights(size, _memory), 25 | backptr(size, _memory + 1u*size_3d), 26 | weight(size, _memory + 2u*size_3d), 27 | soft_selection(size, _memory + 2u*size_3d + 1u*size_2d) 28 | {} 29 | 30 | BinaryPhraseStructureChart::~BinaryPhraseStructureChart() 31 | { 32 | if (_erase_memory) 33 | delete[] _memory; 34 | } 35 | 36 | void BinaryPhraseStructureChart::zeros() 37 | { 38 | std::fill(_memory, _memory + required_cells(size), float{}); 39 | } 40 | 41 | std::size_t BinaryPhraseStructureChart::required_memory(const unsigned size) 42 | { 43 | return 44 | 2 * Tensor3D::required_memory(size) 45 | + 2 * Matrix::required_memory(size) 46 | ; 47 | } 48 | 49 | unsigned BinaryPhraseStructureChart::required_cells(const unsigned size) 50 | { 51 | return 52 | 2 * Tensor3D::required_cells(size) 53 | + 2 * Matrix::required_cells(size) 54 | ; 55 | } 56 | 57 | 58 | AlgorithmicDifferentiableBinaryPhraseStructure::AlgorithmicDifferentiableBinaryPhraseStructure(const unsigned t_size) : 59 | _size(t_size), 60 | chart_forward(std::make_shared(_size)), 61 | chart_backward(std::make_shared(_size)) 62 | {} 63 | 64 | AlgorithmicDifferentiableBinaryPhraseStructure::AlgorithmicDifferentiableBinaryPhraseStructure(std::shared_ptr chart_forward, std::shared_ptr chart_backward) : 65 | _size(chart_forward->size), 66 | chart_forward(chart_forward), 67 | chart_backward(chart_backward) 68 | {} 69 | 70 | void AlgorithmicDifferentiableBinaryPhraseStructure::forward_maximize(std::shared_ptr& chart_forward) 71 | { 72 | const unsigned size = chart_forward->size; 73 | for (unsigned l = 1u; l < size; ++l) 74 | { 75 | for (unsigned i = 0u; i < size - l; ++i) 76 | { 77 | unsigned j = i + l; 78 | 79 | // use += because we initialized them with arc weights 80 | chart_forward->weight(i, j) += forward_algorithmic_softmax( 81 | chart_forward->weight.iter2(i, i), chart_forward->weight.iter1(i + 1, j), 82 | chart_forward->split_weights.iter3(i, j, i), 83 | chart_forward->backptr.iter3(i, j, i), 84 | l 85 | ); 86 | } 87 | } 88 | } 89 | 90 | void AlgorithmicDifferentiableBinaryPhraseStructure::forward_backtracking(std::shared_ptr& chart_forward) 91 | { 92 | const unsigned size = chart_forward->size; 93 | chart_forward->soft_selection(0, size - 1) = 1.0f; 94 | 95 | for (unsigned l = size - 1; l >= 1; --l) 96 | { 97 | for (unsigned i = 0u; i < size - l; ++i) 98 | { 99 | unsigned j = i + l; 100 | diffdp::forward_backtracking( 101 | chart_forward->soft_selection.iter2(i, i), chart_forward->soft_selection.iter1(i + 1, j), 102 | chart_forward->soft_selection(i, j), 103 | chart_forward->backptr.iter3(i, j, i), 104 | l 105 | ); 106 | } 107 | } 108 | } 109 | 110 | void AlgorithmicDifferentiableBinaryPhraseStructure::backward_backtracking(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward) 111 | { 112 | const unsigned size = chart_forward->size; 113 | 114 | for (unsigned l = 1; l < size ; ++l) 115 | { 116 | for (unsigned i = 0; i < size - l; ++i) 117 | { 118 | unsigned j = i + l; 119 | 120 | diffdp::backward_backtracking( 121 | chart_forward->soft_selection.iter2(i, i), chart_forward->soft_selection.iter1(i + 1, j), 122 | chart_forward->soft_selection(i, j), 123 | chart_forward->backptr.iter3(i, j, i), 124 | 125 | chart_backward->soft_selection.iter2(i, i), chart_backward->soft_selection.iter1(i + 1, j), 126 | &chart_backward->soft_selection(i, j), 127 | chart_backward->backptr.iter3(i, j, i), 128 | 129 | l 130 | ); 131 | } 132 | } 133 | 134 | } 135 | 136 | void AlgorithmicDifferentiableBinaryPhraseStructure::backward_maximize(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward) 137 | { 138 | const unsigned size = chart_forward->size; 139 | 140 | for (unsigned l = size - 1; l >= 1; --l) 141 | { 142 | for (unsigned i = 0; i < size - l; ++i) 143 | { 144 | unsigned j = i + l; 145 | 146 | backward_algorithmic_softmax( 147 | chart_forward->weight.iter2(i, i), chart_forward->weight.iter1(i + 1, j), 148 | chart_forward->split_weights.iter3(i, j, i), 149 | chart_forward->backptr.iter3(i, j, i), 150 | 151 | chart_backward->weight.iter2(i, i), chart_backward->weight.iter1(i + 1, j), 152 | chart_backward->weight(i, j), 153 | chart_backward->split_weights.iter3(i, j, i), 154 | chart_backward->backptr.iter3(i, j, i), 155 | 156 | l 157 | ); 158 | } 159 | } 160 | } 161 | 162 | unsigned AlgorithmicDifferentiableBinaryPhraseStructure::size() const 163 | { 164 | return _size; 165 | } 166 | 167 | float AlgorithmicDifferentiableBinaryPhraseStructure::output(const unsigned left, const unsigned right) const 168 | { 169 | return chart_forward->soft_selection(left, right); 170 | } 171 | 172 | float AlgorithmicDifferentiableBinaryPhraseStructure::gradient(const unsigned left, const unsigned right) const 173 | { 174 | return chart_backward->weight(left, right); 175 | } 176 | 177 | 178 | 179 | EntropyRegularizedBinaryPhraseStructure::EntropyRegularizedBinaryPhraseStructure(const unsigned t_size) : 180 | _size(t_size), 181 | chart_forward(std::make_shared(_size)), 182 | chart_backward(std::make_shared(_size)) 183 | {} 184 | 185 | EntropyRegularizedBinaryPhraseStructure::EntropyRegularizedBinaryPhraseStructure(std::shared_ptr chart_forward, std::shared_ptr chart_backward) : 186 | _size(chart_forward->size), 187 | chart_forward(chart_forward), 188 | chart_backward(chart_backward) 189 | {} 190 | 191 | 192 | void EntropyRegularizedBinaryPhraseStructure::forward_maximize(std::shared_ptr& chart_forward) 193 | { 194 | const unsigned size = chart_forward->size; 195 | for (unsigned l = 1u; l < size; ++l) 196 | { 197 | for (unsigned i = 0u; i < size - l; ++i) 198 | { 199 | unsigned j = i + l; 200 | 201 | // use += because we initialized them with arc weights 202 | chart_forward->weight(i, j) += forward_entropy_reg( 203 | chart_forward->weight.iter2(i, i), chart_forward->weight.iter1(i + 1, j), 204 | chart_forward->split_weights.iter3(i, j, i), 205 | chart_forward->backptr.iter3(i, j, i), 206 | l 207 | ); 208 | } 209 | } 210 | } 211 | 212 | void EntropyRegularizedBinaryPhraseStructure::forward_backtracking(std::shared_ptr& chart_forward) 213 | { 214 | const unsigned size = chart_forward->size; 215 | chart_forward->soft_selection(0, size - 1) = 1.0f; 216 | 217 | for (unsigned l = size - 1; l >= 1; --l) 218 | { 219 | for (unsigned i = 0u; i < size - l; ++i) 220 | { 221 | unsigned j = i + l; 222 | diffdp::forward_backtracking( 223 | chart_forward->soft_selection.iter2(i, i), chart_forward->soft_selection.iter1(i + 1, j), 224 | chart_forward->soft_selection(i, j), 225 | chart_forward->backptr.iter3(i, j, i), 226 | l 227 | ); 228 | } 229 | } 230 | } 231 | 232 | 233 | unsigned EntropyRegularizedBinaryPhraseStructure::size() const 234 | { 235 | return _size; 236 | } 237 | 238 | float EntropyRegularizedBinaryPhraseStructure::output(const unsigned left, const unsigned right) const 239 | { 240 | return chart_forward->soft_selection(left, right); 241 | } 242 | 243 | float EntropyRegularizedBinaryPhraseStructure::gradient(const unsigned left, const unsigned right) const 244 | { 245 | return chart_backward->weight(left, right); 246 | } 247 | 248 | void EntropyRegularizedBinaryPhraseStructure::backward_backtracking(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward) 249 | { 250 | const unsigned size = chart_forward->size; 251 | 252 | for (unsigned l = 1; l < size ; ++l) 253 | { 254 | for (unsigned i = 0; i < size - l; ++i) 255 | { 256 | unsigned j = i + l; 257 | 258 | diffdp::backward_backtracking( 259 | chart_forward->soft_selection.iter2(i, i), chart_forward->soft_selection.iter1(i + 1, j), 260 | chart_forward->soft_selection(i, j), 261 | chart_forward->backptr.iter3(i, j, i), 262 | 263 | chart_backward->soft_selection.iter2(i, i), chart_backward->soft_selection.iter1(i + 1, j), 264 | &chart_backward->soft_selection(i, j), 265 | chart_backward->backptr.iter3(i, j, i), 266 | 267 | l 268 | ); 269 | } 270 | } 271 | } 272 | void EntropyRegularizedBinaryPhraseStructure::backward_maximize(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward) 273 | { 274 | const unsigned size = chart_forward->size; 275 | 276 | for (unsigned l = size - 1; l >= 1; --l) 277 | { 278 | for (unsigned i = 0; i < size - l; ++i) 279 | { 280 | unsigned j = i + l; 281 | 282 | backward_entropy_reg( 283 | chart_forward->weight.iter2(i, i), chart_forward->weight.iter1(i + 1, j), 284 | chart_forward->split_weights.iter3(i, j, i), 285 | chart_forward->backptr.iter3(i, j, i), 286 | 287 | chart_backward->weight.iter2(i, i), chart_backward->weight.iter1(i + 1, j), 288 | chart_backward->weight(i, j), 289 | chart_backward->split_weights.iter3(i, j, i), 290 | chart_backward->backptr.iter3(i, j, i), 291 | 292 | l 293 | ); 294 | } 295 | } 296 | } 297 | } -------------------------------------------------------------------------------- /lib/src/algorithm/eisner.cpp: -------------------------------------------------------------------------------- 1 | #include "diffdp/algorithm/eisner.h" 2 | 3 | namespace diffdp 4 | { 5 | 6 | EisnerChart::EisnerChart(unsigned size) : 7 | size(size), 8 | size_3d(size*size*size), 9 | size_2d(size*size), 10 | _memory(new float[size_3d * 8 + size_2d * 8]), 11 | _erase_memory(true), 12 | a_cleft(size, _memory), 13 | a_cright(size, _memory + 1u*size_3d), 14 | a_uleft(size, _memory + 2u*size_3d), 15 | a_uright(size, _memory + 3u*size_3d), 16 | b_cleft(size, _memory + 4u*size_3d), 17 | b_cright(size, _memory + 5u*size_3d), 18 | b_uleft(size, _memory + 6u*size_3d), 19 | b_uright(size, _memory + 7u*size_3d), 20 | c_cleft(size, _memory + 8u*size_3d), 21 | c_cright(size, _memory + 8u*size_3d + 1u*size_2d), 22 | c_uleft(size, _memory + 8u*size_3d + 2u*size_2d), 23 | c_uright(size, _memory + 8u*size_3d + 3u*size_2d), 24 | soft_c_cleft(size, _memory + 8u*size_3d + 4u*size_2d), 25 | soft_c_cright(size, _memory + 8u*size_3d + 5u*size_2d), 26 | soft_c_uleft(size, _memory + 8u*size_3d + 6u*size_2d), 27 | soft_c_uright(size, _memory + 8u*size_3d + 7u*size_2d) 28 | {} 29 | 30 | EisnerChart::EisnerChart(unsigned size, float* mem) : 31 | size(size), 32 | size_3d(size*size*size), 33 | size_2d(size*size), 34 | _memory(mem), 35 | _erase_memory(false), 36 | a_cleft(size, mem), 37 | a_cright(size, mem + 1u*size_3d), 38 | a_uleft(size, mem + 2u*size_3d), 39 | a_uright(size, mem + 3u*size_3d), 40 | b_cleft(size, mem + 4u*size_3d), 41 | b_cright(size, mem + 5u*size_3d), 42 | b_uleft(size, mem + 6u*size_3d), 43 | b_uright(size, mem + 7u*size_3d), 44 | c_cleft(size, mem + 8u*size_3d), 45 | c_cright(size, mem + 8u*size_3d + 1u*size_2d), 46 | c_uleft(size, mem + 8u*size_3d + 2u*size_2d), 47 | c_uright(size, mem + 8u*size_3d + 3u*size_2d), 48 | soft_c_cleft(size, mem + 8u*size_3d + 4u*size_2d), 49 | soft_c_cright(size, mem + 8u*size_3d + 5u*size_2d), 50 | soft_c_uleft(size, mem + 8u*size_3d + 6u*size_2d), 51 | soft_c_uright(size, mem + 8u*size_3d + 7u*size_2d) 52 | {} 53 | 54 | EisnerChart::~EisnerChart() 55 | { 56 | if (_erase_memory) 57 | delete[] _memory; 58 | } 59 | 60 | void EisnerChart::zeros() 61 | { 62 | std::fill(_memory, _memory + size_3d * 8 + size_2d * 8, float{}); 63 | } 64 | 65 | std::size_t EisnerChart::required_memory(const unsigned size) 66 | { 67 | return 68 | 8 * Tensor3D::required_memory(size) 69 | + 8 * Matrix::required_memory(size) 70 | ; 71 | } 72 | 73 | unsigned EisnerChart::required_cells(const unsigned size) 74 | { 75 | return 76 | 8 * Tensor3D::required_cells(size) 77 | + 8 * Matrix::required_cells(size) 78 | ; 79 | } 80 | 81 | 82 | AlgorithmicDifferentiableEisner::AlgorithmicDifferentiableEisner(const unsigned t_size) : 83 | _size(t_size), 84 | chart_forward(std::make_shared(_size)), 85 | chart_backward(std::make_shared(_size)) 86 | {} 87 | 88 | AlgorithmicDifferentiableEisner::AlgorithmicDifferentiableEisner(std::shared_ptr chart_forward, std::shared_ptr chart_backward) : 89 | _size(chart_forward->size), 90 | chart_forward(chart_forward), 91 | chart_backward(chart_backward) 92 | {} 93 | 94 | void AlgorithmicDifferentiableEisner::forward_maximize(std::shared_ptr& chart_forward) 95 | { 96 | const unsigned size = chart_forward->size; 97 | for (unsigned l = 1u; l < size; ++l) 98 | { 99 | for (unsigned i = 0u; i < size - l; ++i) 100 | { 101 | unsigned j = i + l; 102 | 103 | // use += because we initialized them with arc weights 104 | chart_forward->c_uright(i, j) += forward_algorithmic_softmax( 105 | chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j), 106 | chart_forward->a_uright.iter3(i, j, i), 107 | chart_forward->b_uright.iter3(i, j, i), 108 | l 109 | ); 110 | 111 | if (i > 0u) // because the root cannot be the modifier 112 | { 113 | chart_forward->c_uleft(i, j) += forward_algorithmic_softmax( 114 | chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j), 115 | chart_forward->a_uleft.iter3(i, j, i), 116 | chart_forward->b_uleft.iter3(i, j, i), 117 | l 118 | ); 119 | } 120 | 121 | chart_forward->c_cright(i, j) = forward_algorithmic_softmax( 122 | chart_forward->c_uright.iter2(i, i + 1), chart_forward->c_cright.iter1(i + 1, j), 123 | chart_forward->a_cright.iter3(i, j, i + 1), 124 | chart_forward->b_cright.iter3(i, j, i + 1), 125 | l 126 | ); 127 | 128 | if (i > 0u) 129 | { 130 | chart_forward->c_cleft(i, j) = forward_algorithmic_softmax( 131 | chart_forward->c_cleft.iter2(i, i), chart_forward->c_uleft.iter1(i, j), 132 | chart_forward->a_cleft.iter3(i, j, i), 133 | chart_forward->b_cleft.iter3(i, j, i), 134 | l 135 | ); 136 | } 137 | } 138 | } 139 | } 140 | 141 | void AlgorithmicDifferentiableEisner::forward_backtracking(std::shared_ptr& chart_forward) 142 | { 143 | const unsigned size = chart_forward->size; 144 | chart_forward->soft_c_cright(0, size - 1) = 1.0f; 145 | 146 | for (unsigned l = size - 1; l >= 1; --l) 147 | { 148 | for (unsigned i = 0u; i < size - l; ++i) 149 | { 150 | unsigned j = i + l; 151 | 152 | diffdp::forward_backtracking( 153 | chart_forward->soft_c_uright.iter2(i, i + 1), chart_forward->soft_c_cright.iter1(i + 1, j), 154 | chart_forward->soft_c_cright(i, j), 155 | chart_forward->b_cright.iter3(i, j, i + 1), 156 | l 157 | ); 158 | 159 | if (i > 0u) 160 | { 161 | diffdp::forward_backtracking( 162 | chart_forward->soft_c_cleft.iter2(i, i), chart_forward->soft_c_uleft.iter1(i, j), 163 | chart_forward->soft_c_cleft(i, j), 164 | chart_forward->b_cleft.iter3(i, j, i), 165 | l 166 | ); 167 | } 168 | 169 | diffdp::forward_backtracking( 170 | chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j), 171 | chart_forward->soft_c_uright(i, j), 172 | chart_forward->b_uright.iter3(i, j, i), 173 | l 174 | ); 175 | 176 | 177 | if (i > 0u) 178 | { 179 | diffdp::forward_backtracking( 180 | chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j), 181 | chart_forward->soft_c_uleft(i, j), 182 | chart_forward->b_uleft.iter3(i, j, i), 183 | l 184 | ); 185 | } 186 | } 187 | } 188 | } 189 | 190 | void AlgorithmicDifferentiableEisner::backward_backtracking(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward) 191 | { 192 | const unsigned size = chart_forward->size; 193 | 194 | for (unsigned l = 1; l < size ; ++l) 195 | { 196 | for (unsigned i = 0; i < size - l; ++i) 197 | { 198 | unsigned j = i + l; 199 | 200 | if (i > 0u) 201 | { 202 | diffdp::backward_backtracking( 203 | chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j), 204 | chart_forward->soft_c_uleft(i, j), 205 | chart_forward->b_uleft.iter3(i, j, i), 206 | 207 | chart_backward->soft_c_cright.iter2(i, i), chart_backward->soft_c_cleft.iter1(i + 1, j), 208 | &chart_backward->soft_c_uleft(i, j), 209 | chart_backward->b_uleft.iter3(i, j, i), 210 | 211 | l 212 | ); 213 | } 214 | 215 | diffdp::backward_backtracking( 216 | chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j), 217 | chart_forward->soft_c_uright(i, j), 218 | chart_forward->b_uright.iter3(i, j, i), 219 | 220 | chart_backward->soft_c_cright.iter2(i, i), chart_backward->soft_c_cleft.iter1(i + 1, j), 221 | &chart_backward->soft_c_uright(i, j), 222 | chart_backward->b_uright.iter3(i, j, i), 223 | 224 | l 225 | ); 226 | 227 | if (i > 0u) 228 | { 229 | diffdp::backward_backtracking( 230 | chart_forward->soft_c_cleft.iter2(i, i), chart_forward->soft_c_uleft.iter1(i, j), 231 | chart_forward->soft_c_cleft(i, j), 232 | chart_forward->b_cleft.iter3(i, j, i), 233 | 234 | chart_backward->soft_c_cleft.iter2(i, i), chart_backward->soft_c_uleft.iter1(i, j), 235 | &chart_backward->soft_c_cleft(i, j), 236 | chart_backward->b_cleft.iter3(i, j, i), 237 | 238 | l 239 | ); 240 | } 241 | 242 | diffdp::backward_backtracking( 243 | chart_forward->soft_c_uright.iter2(i, i+1), chart_forward->soft_c_cright.iter1(i+1, j), 244 | chart_forward->soft_c_cright(i, j), 245 | chart_forward->b_cright.iter3(i, j, i + 1), 246 | 247 | chart_backward->soft_c_uright.iter2(i, i+1), chart_backward->soft_c_cright.iter1(i+1, j), 248 | &chart_backward->soft_c_cright(i, j), 249 | chart_backward->b_cright.iter3(i, j, i + 1), 250 | 251 | l 252 | ); 253 | } 254 | } 255 | 256 | } 257 | 258 | void AlgorithmicDifferentiableEisner::backward_maximize(std::shared_ptr& chart_forward, std::shared_ptr& chart_backward) 259 | { 260 | const unsigned size = chart_forward->size; 261 | 262 | for (unsigned l = size - 1; l >= 1; --l) 263 | { 264 | for (unsigned i = 0; i < size - l; ++i) 265 | { 266 | unsigned j = i + l; 267 | 268 | if (i > 0u) 269 | { 270 | backward_algorithmic_softmax( 271 | chart_forward->c_cleft.iter2(i, i), chart_forward->c_uleft.iter1(i, j), 272 | chart_forward->a_cleft.iter3(i, j, i), 273 | chart_forward->b_cleft.iter3(i, j, i), 274 | 275 | chart_backward->c_cleft.iter2(i, i), chart_backward->c_uleft.iter1(i, j), 276 | chart_backward->c_cleft(i, j), 277 | chart_backward->a_cleft.iter3(i, j, i), 278 | chart_backward->b_cleft.iter3(i, j, i), 279 | 280 | l 281 | ); 282 | } 283 | 284 | backward_algorithmic_softmax( 285 | chart_forward->c_uright.iter2(i, i + 1), chart_forward->c_cright.iter1(i + 1, j), 286 | chart_forward->a_cright.iter3(i, j, i + 1), 287 | chart_forward->b_cright.iter3(i, j, i + 1), 288 | 289 | chart_backward->c_uright.iter2(i, i + 1), chart_backward->c_cright.iter1(i + 1, j), 290 | chart_backward->c_cright(i, j), 291 | chart_backward->a_cright.iter3(i, j, i + 1), 292 | chart_backward->b_cright.iter3(i, j, i + 1), 293 | 294 | l 295 | ); 296 | 297 | if (i > 0u) 298 | { 299 | backward_algorithmic_softmax( 300 | chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j), 301 | chart_forward->a_uleft.iter3(i, j, i), 302 | chart_forward->b_uleft.iter3(i, j, i), 303 | 304 | chart_backward->c_cright.iter2(i, i), chart_backward->c_cleft.iter1(i + 1, j), 305 | chart_backward->c_uleft(i, j), 306 | chart_backward->a_uleft.iter3(i, j, i), 307 | chart_backward->b_uleft.iter3(i, j, i), 308 | 309 | l 310 | ); 311 | } 312 | 313 | backward_algorithmic_softmax( 314 | chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j), 315 | chart_forward->a_uright.iter3(i, j, i), 316 | chart_forward->b_uright.iter3(i, j, i), 317 | 318 | chart_backward->c_cright.iter2(i, i), chart_backward->c_cleft.iter1(i + 1, j), 319 | chart_backward->c_uright(i, j), 320 | chart_backward->a_uright.iter3(i, j, i), 321 | chart_backward->b_uright.iter3(i, j, i), 322 | 323 | l 324 | ); 325 | } 326 | } 327 | } 328 | 329 | unsigned AlgorithmicDifferentiableEisner::size() const 330 | { 331 | return _size; 332 | } 333 | 334 | float AlgorithmicDifferentiableEisner::output(const unsigned head, const unsigned mod) const 335 | { 336 | if (head < mod) 337 | return chart_forward->soft_c_uright(head, mod); 338 | else if (mod < head) 339 | return chart_forward->soft_c_uleft(mod, head); 340 | else 341 | return std::nanf(""); 342 | } 343 | 344 | float AlgorithmicDifferentiableEisner::gradient(const unsigned head, const unsigned mod) const 345 | { 346 | if (head < mod) 347 | return chart_backward->c_uright(head, mod); 348 | else if (mod < head) 349 | return chart_backward->c_uleft(mod, head); 350 | else 351 | return std::nanf(""); 352 | } 353 | 354 | 355 | 356 | 357 | EntropyRegularizedEisner::EntropyRegularizedEisner(const unsigned t_size) : 358 | _size(t_size), 359 | chart_forward(std::make_shared(_size)), 360 | chart_backward(std::make_shared(_size)) 361 | {} 362 | 363 | EntropyRegularizedEisner::EntropyRegularizedEisner(std::shared_ptr chart_forward, std::shared_ptr chart_backward) : 364 | _size(chart_forward->size), 365 | chart_forward(chart_forward), 366 | chart_backward(chart_backward) 367 | {} 368 | 369 | 370 | void EntropyRegularizedEisner::forward_maximize(std::shared_ptr& chart_forward) 371 | { 372 | const unsigned size = chart_forward->size; 373 | 374 | for (unsigned l = 1u; l < size; ++l) 375 | { 376 | for (unsigned i = 0u; i < size - l; ++i) 377 | { 378 | unsigned j = i + l; 379 | 380 | chart_forward->c_uright(i, j) += forward_entropy_reg( 381 | chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j), 382 | chart_forward->a_uright.iter3(i, j, i), 383 | chart_forward->b_uright.iter3(i, j, i), 384 | l 385 | ); 386 | 387 | if (i > 0u) // because the root cannot be the modifier 388 | { 389 | chart_forward->c_uleft(i, j) += forward_entropy_reg( 390 | chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j), 391 | chart_forward->a_uleft.iter3(i, j, i), 392 | chart_forward->b_uleft.iter3(i, j, i), 393 | l 394 | ); 395 | } 396 | 397 | chart_forward->c_cright(i, j) = forward_entropy_reg( 398 | chart_forward->c_uright.iter2(i, i + 1), chart_forward->c_cright.iter1(i + 1, j), 399 | chart_forward->a_cright.iter3(i, j, i + 1), 400 | chart_forward->b_cright.iter3(i, j, i + 1), 401 | l 402 | ); 403 | 404 | if (i > 0u) 405 | { 406 | chart_forward->c_cleft(i, j) = forward_entropy_reg( 407 | chart_forward->c_cleft.iter2(i, i), chart_forward->c_uleft.iter1(i, j), 408 | chart_forward->a_cleft.iter3(i, j, i), 409 | chart_forward->b_cleft.iter3(i, j, i), 410 | l 411 | ); 412 | } 413 | } 414 | } 415 | } 416 | 417 | void EntropyRegularizedEisner::forward_backtracking(std::shared_ptr& chart_forward) 418 | { 419 | const unsigned size = chart_forward->size; 420 | 421 | chart_forward->soft_c_cright(0, size - 1) = 1.0f; 422 | 423 | for (unsigned l = size - 1; l >= 1; --l) 424 | { 425 | for (unsigned i = 0u; i < size - l; ++i) 426 | { 427 | unsigned j = i + l; 428 | 429 | diffdp::forward_backtracking( 430 | chart_forward->soft_c_uright.iter2(i, i + 1), chart_forward->soft_c_cright.iter1(i + 1, j), 431 | chart_forward->soft_c_cright(i, j), 432 | chart_forward->b_cright.iter3(i, j, i + 1), 433 | l 434 | ); 435 | 436 | if (i > 0u) 437 | { 438 | diffdp::forward_backtracking( 439 | chart_forward->soft_c_cleft.iter2(i, i), chart_forward->soft_c_uleft.iter1(i, j), 440 | chart_forward->soft_c_cleft(i, j), 441 | chart_forward->b_cleft.iter3(i, j, i), 442 | l 443 | ); 444 | } 445 | 446 | diffdp::forward_backtracking( 447 | chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j), 448 | chart_forward->soft_c_uright(i, j), 449 | chart_forward->b_uright.iter3(i, j, i), 450 | l 451 | ); 452 | 453 | 454 | if (i > 0u) 455 | { 456 | diffdp::forward_backtracking( 457 | chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j), 458 | chart_forward->soft_c_uleft(i, j), 459 | chart_forward->b_uleft.iter3(i, j, i), 460 | l 461 | ); 462 | } 463 | } 464 | } 465 | } 466 | 467 | 468 | unsigned EntropyRegularizedEisner::size() const 469 | { 470 | return _size; 471 | } 472 | 473 | float EntropyRegularizedEisner::output(const unsigned head, const unsigned mod) const 474 | { 475 | if (head < mod) 476 | return chart_forward->soft_c_uright(head, mod); 477 | else if (mod < head) 478 | return chart_forward->soft_c_uleft(mod, head); 479 | else 480 | return std::nanf(""); 481 | } 482 | 483 | float EntropyRegularizedEisner::gradient(const unsigned head, const unsigned mod) const 484 | { 485 | if (head < mod) 486 | return chart_backward->c_uright(head, mod); 487 | else if (mod < head) 488 | return chart_backward->c_uleft(mod, head); 489 | else 490 | return std::nanf(""); 491 | } 492 | 493 | } -------------------------------------------------------------------------------- /lib/src/builder/binary-phrase.cpp: -------------------------------------------------------------------------------- 1 | #include "diffdp/builder/binary-phrase.h" 2 | 3 | #include "diffdp/dynet/binary_phrase.h" 4 | #include "dytools/algorithms/span-parser.h" 5 | #include "dytools/utils.h" 6 | 7 | namespace diffdp 8 | { 9 | 10 | BinaryPhraseBuilder::BinaryPhraseBuilder(const BinaryPhraseSettings& settings) : 11 | settings(settings) 12 | {} 13 | 14 | void BinaryPhraseBuilder::new_graph(dynet::ComputationGraph& cg, bool training) 15 | { 16 | _cg = &cg; 17 | _training = training; 18 | } 19 | 20 | dynet::Expression BinaryPhraseBuilder::relaxed(const dynet::Expression& weights) 21 | { 22 | if (settings.type == BinaryPhraseType::AlgDiff) 23 | return relaxed_alg_diff(weights); 24 | else 25 | return relaxed_entropy_Reg(weights); 26 | } 27 | 28 | dynet::Expression BinaryPhraseBuilder::argmax(const dynet::Expression& weights) 29 | { 30 | const auto size = weights.dim().rows(); 31 | 32 | const auto p_weights = perturb(weights); 33 | const auto v_weights = as_vector(_cg->incremental_forward(p_weights)); 34 | 35 | const auto tree = dytools::binary_span_parser(size, v_weights); 36 | 37 | std::vector indices; 38 | for (const auto& span : tree) 39 | indices.push_back(span.first + span.second * size); 40 | std::vector values(indices.size(), 1.f); 41 | 42 | const auto output = dynet::input(*_cg, {size, size}, indices, values); 43 | return output; 44 | } 45 | 46 | 47 | dynet::Expression BinaryPhraseBuilder::relaxed_alg_diff(const dynet::Expression& weights) 48 | { 49 | const auto p_weights = perturb(weights); 50 | return dytools::force_cpu(dynet::algorithmic_differentiable_binary_phrase_structure, p_weights, DiscreteMode::ForwardRegularized, nullptr); 51 | } 52 | 53 | dynet::Expression BinaryPhraseBuilder::relaxed_entropy_Reg(const dynet::Expression& weights) 54 | { 55 | const auto p_weights = perturb(weights); 56 | return dytools::force_cpu(dynet::entropy_regularized_binary_phrase_structure, p_weights, DiscreteMode::ForwardRegularized, nullptr); 57 | } 58 | 59 | 60 | dynet::Expression BinaryPhraseBuilder::perturb(const dynet::Expression& arc_weights) 61 | { 62 | if (settings.perturb and _training) 63 | return arc_weights + dynet::random_gumbel(*_cg, arc_weights.dim()); 64 | else 65 | return arc_weights; 66 | } 67 | 68 | 69 | } -------------------------------------------------------------------------------- /lib/src/builder/dependency.cpp: -------------------------------------------------------------------------------- 1 | #include "diffdp/builder/dependency.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "dytools/functions/rooted_arborescence_marginals.h" 7 | #include "dytools/functions/masking.h" 8 | #include "diffdp/dynet/eisner.h" 9 | #include "dytools/utils.h" 10 | 11 | namespace diffdp 12 | { 13 | 14 | DependencyBuilder::DependencyBuilder(const DependencySettings& settings) : 15 | settings(settings) 16 | {} 17 | 18 | void DependencyBuilder::new_graph(dynet::ComputationGraph& cg, bool training) 19 | { 20 | _cg = &cg; 21 | _training = training; 22 | } 23 | 24 | dynet::Expression DependencyBuilder::relaxed(const dynet::Expression& arc_weights, std::vector* sizes, dynet::Expression* e_mask) 25 | { 26 | if (settings.type == DependencyType::Head) 27 | return relaxed_head(arc_weights, e_mask); 28 | else if (settings.type == DependencyType::NonProjective) 29 | return relaxed_nonprojective(arc_weights, sizes); 30 | else if (settings.type == DependencyType::ProjectiveAlgDiff) 31 | return relaxed_projective_alg_diff(arc_weights, sizes); 32 | else 33 | return relaxed_projective_entropy_reg(arc_weights, sizes); 34 | } 35 | 36 | dynet::Expression DependencyBuilder::relaxed_head(const dynet::Expression& arc_weights, dynet::Expression* e_mask) 37 | { 38 | if (e_mask != nullptr) 39 | if (e_mask->dim().rows() != 1 || e_mask->dim().cols() != arc_weights.dim().cols()) 40 | throw std::runtime_error("Relaxed Head: mask has the wrong dimension"); 41 | const auto p_arc_weights = perturb(arc_weights); 42 | 43 | // mask the diagonal 44 | const unsigned n_max_vertices = arc_weights.dim().rows(); 45 | const auto e_inf_mask = dytools::main_diagonal_mask(*_cg, {n_max_vertices, n_max_vertices}, -std::numeric_limits::infinity()); 46 | 47 | auto heads = dynet::softmax(p_arc_weights + e_inf_mask); 48 | 49 | if (e_mask != nullptr) 50 | heads = dynet::cmult(heads, *e_mask); 51 | 52 | // first column should be empty (the root word has no head) 53 | std::vector values(arc_weights.dim().cols(), 1.f); 54 | values[0] = 0.f; 55 | const auto mask = dynet::input(*_cg, {1, n_max_vertices}, values); 56 | heads = dynet::cmult(heads, mask); 57 | 58 | return heads; 59 | } 60 | 61 | dynet::Expression DependencyBuilder::relaxed_nonprojective(const dynet::Expression& arc_weights, std::vector* sizes) 62 | { 63 | const auto p_arc_weights = perturb(arc_weights); 64 | return dytools::rooted_arborescence_marginals(*_cg, p_arc_weights, sizes); 65 | } 66 | 67 | dynet::Expression DependencyBuilder::relaxed_projective_alg_diff(const dynet::Expression& arc_weights, std::vector* sizes) 68 | { 69 | const auto p_arc_weights = perturb(arc_weights); 70 | return dytools::force_cpu(dynet::algorithmic_differentiable_eisner, 71 | p_arc_weights, 72 | DiscreteMode::ForwardRegularized, 73 | DependencyGraphMode::Adjacency, 74 | DependencyGraphMode::Adjacency, 75 | true, 76 | sizes 77 | ); 78 | } 79 | 80 | dynet::Expression DependencyBuilder::relaxed_projective_entropy_reg(const dynet::Expression& arc_weights, std::vector* sizes) 81 | { 82 | const auto p_arc_weights = perturb(arc_weights); 83 | return dytools::force_cpu(dynet::entropy_regularized_eisner, 84 | p_arc_weights, 85 | DiscreteMode::ForwardRegularized, 86 | DependencyGraphMode::Adjacency, 87 | DependencyGraphMode::Adjacency, 88 | true, 89 | sizes 90 | ); 91 | } 92 | 93 | dynet::Expression DependencyBuilder::argmax(const dynet::Expression& arc_weights, std::vector* sizes, dynet::Expression* e_mask) 94 | { 95 | if (settings.type == DependencyType::Head) 96 | return argmax_head(arc_weights, e_mask); 97 | else if (settings.type == DependencyType::NonProjective) 98 | return argmax_nonprojective(arc_weights, sizes); 99 | else if (settings.type == DependencyType::ProjectiveAlgDiff) 100 | return argmax_projective_alg_diff(arc_weights, sizes); 101 | else 102 | return argmax_projective_entropy_reg(arc_weights, sizes); 103 | } 104 | 105 | dynet::Expression DependencyBuilder::argmax_head(const dynet::Expression&, dynet::Expression*) 106 | { 107 | throw std::runtime_error("Not implemented yet."); 108 | } 109 | 110 | dynet::Expression DependencyBuilder::argmax_nonprojective(const dynet::Expression&, std::vector*) 111 | { 112 | throw std::runtime_error("Not implemented yet."); 113 | } 114 | 115 | dynet::Expression DependencyBuilder::argmax_projective_alg_diff(const dynet::Expression&, std::vector*) 116 | { 117 | throw std::runtime_error("Not implemented yet."); 118 | } 119 | 120 | dynet::Expression DependencyBuilder::argmax_projective_entropy_reg(const dynet::Expression&, std::vector*) 121 | { 122 | throw std::runtime_error("Not implemented yet."); 123 | } 124 | 125 | 126 | dynet::Expression DependencyBuilder::perturb(const dynet::Expression& arc_weights) 127 | { 128 | if (settings.perturb and _training) 129 | return arc_weights + dynet::random_gumbel(*_cg, arc_weights.dim()); 130 | else 131 | return arc_weights; 132 | } 133 | 134 | 135 | } -------------------------------------------------------------------------------- /lib/src/chart.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "diffdp/chart.h" 4 | 5 | namespace diffdp 6 | { 7 | 8 | 9 | 10 | 11 | 12 | } 13 | -------------------------------------------------------------------------------- /lib/src/dynet/binary_phrase.cpp: -------------------------------------------------------------------------------- 1 | #include "diffdp/dynet/binary_phrase.h" 2 | #include "dynet/tensor-eigen.h" 3 | 4 | namespace dynet 5 | { 6 | 7 | Expression algorithmic_differentiable_binary_phrase_structure(const Expression& x, diffdp::DiscreteMode mode, std::vector* batch_sizes) 8 | { 9 | return Expression(x.pg, x.pg->add_function({x.i}, mode, batch_sizes)); 10 | } 11 | 12 | Expression entropy_regularized_binary_phrase_structure(const Expression& x, diffdp::DiscreteMode mode, std::vector* batch_sizes) 13 | { 14 | return Expression(x.pg, x.pg->add_function({x.i}, mode, batch_sizes)); 15 | } 16 | 17 | AlgorithmicDifferentiableBinaryPhraseStructure::AlgorithmicDifferentiableBinaryPhraseStructure( 18 | const std::initializer_list& a, 19 | diffdp::DiscreteMode mode, 20 | std::vector* batch_sizes 21 | ) : 22 | Node(a), 23 | mode(mode), 24 | batch_sizes(batch_sizes) 25 | { 26 | this->has_cuda_implemented = false; 27 | } 28 | 29 | bool AlgorithmicDifferentiableBinaryPhraseStructure::supports_multibatch() const 30 | { 31 | return true; 32 | } 33 | 34 | AlgorithmicDifferentiableBinaryPhraseStructure::~AlgorithmicDifferentiableBinaryPhraseStructure() 35 | { 36 | for (auto*& ptr : _ce_ptr) 37 | if (ptr != nullptr) 38 | { 39 | delete ptr; 40 | ptr = nullptr; 41 | } 42 | } 43 | 44 | std::string AlgorithmicDifferentiableBinaryPhraseStructure::as_string(const std::vector& arg_names) const { 45 | std::ostringstream s; 46 | s << "algorithmic_differentiable_eisner(" << arg_names[0] << ")"; 47 | return s.str(); 48 | } 49 | 50 | Dim AlgorithmicDifferentiableBinaryPhraseStructure::dim_forward(const std::vector& xs) const { 51 | DYNET_ARG_CHECK( 52 | xs.size() == 1 && xs[0].nd == 2 && xs[0].rows() == xs[0].cols(), 53 | "Bad input dimensions in AlgorithmicDifferentiableEisner: " << xs 54 | ); 55 | 56 | return dynet::Dim(xs[0]); 57 | } 58 | 59 | size_t AlgorithmicDifferentiableBinaryPhraseStructure::aux_storage_size() const { 60 | // 2 times because we have a forward and a backward chart 61 | const size_t dp_mem = 2 * diffdp::BinaryPhraseStructureChart::required_memory(dim.rows()); 62 | return dim.batch_elems() * dp_mem; 63 | } 64 | 65 | template 66 | void AlgorithmicDifferentiableBinaryPhraseStructure::forward_dev_impl( 67 | const MyDevice&, 68 | const std::vector& xs, 69 | Tensor& fx 70 | ) const { 71 | #ifdef __CUDACC__ 72 | DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::forward"); 73 | #else 74 | // TODO call zero only when necessary 75 | TensorTools::zero(fx); 76 | 77 | std::vector& _ce_ptr2 = 78 | const_cast&>(_ce_ptr); 79 | 80 | for (auto*& ptr : _ce_ptr2) 81 | if (ptr != nullptr) 82 | { 83 | delete ptr; 84 | ptr = nullptr; 85 | } 86 | 87 | if (_ce_ptr2.size() != xs[0]->d.batch_elems()) 88 | _ce_ptr2.resize(xs[0]->d.batch_elems(), nullptr); 89 | 90 | const unsigned max_input_dim = xs[0]->d.rows(); 91 | float* aux_fmem = static_cast(aux_mem); 92 | 93 | //#pragma omp parallel for 94 | for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch) 95 | { 96 | const unsigned eisner_dim = ( 97 | batch_sizes == nullptr 98 | ? max_input_dim 99 | : batch_sizes->at(batch) 100 | ); 101 | 102 | auto input = batch_matrix(*(xs[0]), batch); 103 | 104 | if (mode == diffdp::DiscreteMode::ForwardRegularized) 105 | { 106 | float* fmem = aux_fmem + batch * 2 * diffdp::BinaryPhraseStructureChart::required_cells(max_input_dim); 107 | //auto forward_chart = std::make_shared(eisner_dim, fmem); 108 | //auto backward_chart = std::make_shared(eisner_dim, fmem + diffdp::EisnerChart::required_cells(max_eisner_dim)); 109 | auto forward_chart = std::make_shared(eisner_dim, fmem); 110 | auto backward_chart = std::make_shared(eisner_dim); 111 | 112 | _ce_ptr2.at(batch) = new diffdp::AlgorithmicDifferentiableBinaryPhraseStructure(forward_chart, backward_chart); 113 | 114 | 115 | _ce_ptr2.at(batch)->forward( 116 | [&] (const unsigned left, const unsigned right) 117 | { 118 | return input(left, right); 119 | } 120 | ); 121 | 122 | auto output = batch_matrix(fx, batch); 123 | 124 | for (unsigned left = 0u ; left < eisner_dim ; ++left) 125 | { 126 | for (unsigned right = left+1; right < eisner_dim ; ++right) 127 | { 128 | const float a = _ce_ptr2[batch]->output(left, right); 129 | output(left, right) = a; 130 | } 131 | } 132 | } 133 | else 134 | { 135 | throw std::runtime_error("Not implemented: only ForwardRegularized can be used at the moment"); 136 | } 137 | } 138 | #endif 139 | } 140 | 141 | template 142 | void AlgorithmicDifferentiableBinaryPhraseStructure::backward_dev_impl( 143 | const MyDevice &, 144 | const std::vector& xs, 145 | const Tensor&, 146 | const Tensor& dEdf, 147 | unsigned, 148 | Tensor& dEdxi 149 | ) const { 150 | #ifdef __CUDACC__ 151 | DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::backward"); 152 | #else 153 | //#pragma omp parallel for 154 | for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch) 155 | { 156 | auto output_grad = batch_matrix(dEdxi, batch); 157 | auto input_grad = batch_matrix(dEdf, batch); 158 | 159 | auto& dp = *(_ce_ptr.at(batch)); 160 | 161 | dp.backward( 162 | [&] (unsigned left, unsigned right) -> float 163 | { 164 | return input_grad(left, right); 165 | } 166 | ); 167 | 168 | for (unsigned left = 0u ; left < dp.size() ; ++left) 169 | for (unsigned right = left + 1u; right < dp.size(); ++right) 170 | output_grad(left, right) += dp.gradient(left, right); 171 | } 172 | #endif 173 | } 174 | 175 | DYNET_NODE_INST_DEV_IMPL(AlgorithmicDifferentiableBinaryPhraseStructure) 176 | 177 | 178 | 179 | 180 | // ENTROPY Regularized 181 | 182 | EntropyRegularizedBinaryPhraseStructure::EntropyRegularizedBinaryPhraseStructure( 183 | const std::initializer_list& a, 184 | diffdp::DiscreteMode mode, 185 | std::vector* batch_sizes 186 | ) : 187 | Node(a), 188 | mode(mode), 189 | batch_sizes(batch_sizes) 190 | { 191 | this->has_cuda_implemented = false; 192 | } 193 | 194 | bool EntropyRegularizedBinaryPhraseStructure::supports_multibatch() const 195 | { 196 | return true; 197 | } 198 | 199 | EntropyRegularizedBinaryPhraseStructure::~EntropyRegularizedBinaryPhraseStructure() 200 | { 201 | for (auto*& ptr : _ce_ptr) 202 | if (ptr != nullptr) 203 | { 204 | delete ptr; 205 | ptr = nullptr; 206 | } 207 | } 208 | 209 | std::string EntropyRegularizedBinaryPhraseStructure::as_string(const std::vector& arg_names) const { 210 | std::ostringstream s; 211 | s << "algorithmic_differentiable_eisner(" << arg_names[0] << ")"; 212 | return s.str(); 213 | } 214 | 215 | Dim EntropyRegularizedBinaryPhraseStructure::dim_forward(const std::vector& xs) const { 216 | DYNET_ARG_CHECK( 217 | xs.size() == 1 && xs[0].nd == 2 && xs[0].rows() == xs[0].cols(), 218 | "Bad input dimensions in AlgorithmicDifferentiableEisner: " << xs 219 | ); 220 | 221 | return dynet::Dim(xs[0]); 222 | } 223 | 224 | size_t EntropyRegularizedBinaryPhraseStructure::aux_storage_size() const { 225 | // 2 times because we have a forward and a backward chart 226 | const size_t dp_mem = 2 * diffdp::BinaryPhraseStructureChart::required_memory(dim.rows()); 227 | return dim.batch_elems() * dp_mem; 228 | } 229 | 230 | template 231 | void EntropyRegularizedBinaryPhraseStructure::forward_dev_impl( 232 | const MyDevice&, 233 | const std::vector& xs, 234 | Tensor& fx 235 | ) const { 236 | #ifdef __CUDACC__ 237 | DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::forward"); 238 | #else 239 | // TODO call zero only when necessary 240 | TensorTools::zero(fx); 241 | 242 | std::vector& _ce_ptr2 = 243 | const_cast&>(_ce_ptr); 244 | 245 | for (auto*& ptr : _ce_ptr2) 246 | if (ptr != nullptr) 247 | { 248 | delete ptr; 249 | ptr = nullptr; 250 | } 251 | 252 | if (_ce_ptr2.size() != xs[0]->d.batch_elems()) 253 | _ce_ptr2.resize(xs[0]->d.batch_elems(), nullptr); 254 | 255 | const unsigned max_input_dim = xs[0]->d.rows(); 256 | float* aux_fmem = static_cast(aux_mem); 257 | 258 | //#pragma omp parallel for 259 | for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch) 260 | { 261 | const unsigned eisner_dim = ( 262 | batch_sizes == nullptr 263 | ? max_input_dim 264 | : batch_sizes->at(batch) 265 | ); 266 | 267 | auto input = batch_matrix(*(xs[0]), batch); 268 | 269 | if (mode == diffdp::DiscreteMode::ForwardRegularized) 270 | { 271 | float* fmem = aux_fmem + batch * 2 * diffdp::BinaryPhraseStructureChart::required_cells(max_input_dim); 272 | //auto forward_chart = std::make_shared(eisner_dim, fmem); 273 | //auto backward_chart = std::make_shared(eisner_dim, fmem + diffdp::EisnerChart::required_cells(max_eisner_dim)); 274 | auto forward_chart = std::make_shared(eisner_dim, fmem); 275 | auto backward_chart = std::make_shared(eisner_dim, fmem + diffdp::BinaryPhraseStructureChart::required_cells(max_input_dim)); 276 | 277 | _ce_ptr2.at(batch) = new diffdp::EntropyRegularizedBinaryPhraseStructure(forward_chart, backward_chart); 278 | 279 | 280 | _ce_ptr2.at(batch)->forward( 281 | [&] (const unsigned left, const unsigned right) 282 | { 283 | return input(left, right); 284 | } 285 | ); 286 | 287 | auto output = batch_matrix(fx, batch); 288 | 289 | for (unsigned left = 0u ; left < eisner_dim ; ++left) 290 | { 291 | for (unsigned right = left+1; right < eisner_dim ; ++right) 292 | { 293 | const float a = _ce_ptr2[batch]->output(left, right); 294 | output(left, right) = a; 295 | } 296 | } 297 | } 298 | else 299 | { 300 | throw std::runtime_error("Not implemented: only ForwardRegularized can be used at the moment"); 301 | } 302 | } 303 | #endif 304 | } 305 | 306 | template 307 | void EntropyRegularizedBinaryPhraseStructure::backward_dev_impl( 308 | const MyDevice &, 309 | const std::vector& xs, 310 | const Tensor&, 311 | const Tensor& dEdf, 312 | unsigned, 313 | Tensor& dEdxi 314 | ) const { 315 | #ifdef __CUDACC__ 316 | DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::backward"); 317 | #else 318 | //#pragma omp parallel for 319 | for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch) 320 | { 321 | auto output_grad = batch_matrix(dEdxi, batch); 322 | auto input_grad = batch_matrix(dEdf, batch); 323 | 324 | auto& dp = *(_ce_ptr.at(batch)); 325 | 326 | dp.backward( 327 | [&] (unsigned left, unsigned right) -> float 328 | { 329 | return input_grad(left, right); 330 | } 331 | ); 332 | 333 | for (unsigned left = 0u ; left < dp.size() ; ++left) 334 | for (unsigned right = left + 1u; right < dp.size(); ++right) 335 | { 336 | output_grad(left, right) += dp.gradient(left, right); 337 | } 338 | } 339 | #endif 340 | } 341 | 342 | DYNET_NODE_INST_DEV_IMPL(EntropyRegularizedBinaryPhraseStructure) 343 | 344 | 345 | 346 | } -------------------------------------------------------------------------------- /lib/src/dynet/eisner.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * TODO: there is a lot a duplicate code between the two nodes (almost identical) 3 | */ 4 | #include "diffdp/dynet/eisner.h" 5 | #include "dynet/tensor-eigen.h" 6 | 7 | namespace diffdp 8 | { 9 | 10 | std::pair from_adjacency(const std::pair dep, const diffdp::DependencyGraphMode mode) 11 | { 12 | unsigned head = dep.first; 13 | unsigned mod = dep.second; 14 | if (mode == diffdp::DependencyGraphMode::Compact) 15 | { 16 | mod -= 1u; 17 | if (head == 0u) 18 | head = mod; 19 | else 20 | head -= 1u; 21 | } 22 | 23 | return {head, mod}; 24 | } 25 | 26 | std::pair from_compact(const std::pair dep, const diffdp::DependencyGraphMode mode) 27 | { 28 | unsigned head = dep.first; 29 | unsigned mod = dep.second; 30 | if (mode == diffdp::DependencyGraphMode::Adjacency) 31 | { 32 | if (head == mod) 33 | head = 0u; 34 | else 35 | head += 1u; 36 | mod += 1u; 37 | } 38 | 39 | return {head, mod}; 40 | } 41 | } 42 | 43 | namespace dynet 44 | { 45 | 46 | Expression algorithmic_differentiable_eisner(const Expression& x, diffdp::DiscreteMode mode, diffdp::DependencyGraphMode input_graph, diffdp::DependencyGraphMode output_graph, bool with_root_arcs, std::vector* batch_sizes) 47 | { 48 | return Expression(x.pg, x.pg->add_function({x.i}, mode, input_graph, output_graph, with_root_arcs, batch_sizes)); 49 | } 50 | 51 | Expression entropy_regularized_eisner(const Expression& x, diffdp::DiscreteMode mode, diffdp::DependencyGraphMode input_graph, diffdp::DependencyGraphMode output_graph, bool with_root_arcs, std::vector* batch_sizes) 52 | { 53 | return Expression(x.pg, x.pg->add_function({x.i}, mode, input_graph, output_graph, with_root_arcs, batch_sizes)); 54 | } 55 | 56 | AlgorithmicDifferentiableEisner::AlgorithmicDifferentiableEisner( 57 | const std::initializer_list& a, 58 | diffdp::DiscreteMode mode, 59 | diffdp::DependencyGraphMode input_graph, 60 | diffdp::DependencyGraphMode output_graph, 61 | bool with_root_arcs, 62 | std::vector* batch_sizes 63 | ) : 64 | Node(a), 65 | mode(mode), 66 | input_graph(input_graph), 67 | output_graph(output_graph), 68 | with_root_arcs(with_root_arcs), 69 | batch_sizes(batch_sizes) 70 | { 71 | this->has_cuda_implemented = false; 72 | } 73 | 74 | bool AlgorithmicDifferentiableEisner::supports_multibatch() const 75 | { 76 | return true; 77 | } 78 | 79 | AlgorithmicDifferentiableEisner::~AlgorithmicDifferentiableEisner() 80 | { 81 | for (auto*& ptr : _ce_ptr) 82 | if (ptr != nullptr) 83 | { 84 | delete ptr; 85 | ptr = nullptr; 86 | } 87 | } 88 | 89 | std::string AlgorithmicDifferentiableEisner::as_string(const std::vector& arg_names) const { 90 | std::ostringstream s; 91 | s << "algorithmic_differentiable_eisner(" << arg_names[0] << ")"; 92 | return s.str(); 93 | } 94 | 95 | Dim AlgorithmicDifferentiableEisner::dim_forward(const std::vector& xs) const { 96 | DYNET_ARG_CHECK( 97 | xs.size() == 1 && xs[0].nd == 2 && xs[0].rows() == xs[0].cols(), 98 | "Bad input dimensions in AlgorithmicDifferentiableEisner: " << xs 99 | ); 100 | if (input_graph == diffdp::DependencyGraphMode::Compact) 101 | DYNET_ARG_CHECK( 102 | xs[0].rows() >= 1, 103 | "Bad input dimensions in AlgorithmicDifferentiableEisner: " << xs 104 | ) 105 | else 106 | DYNET_ARG_CHECK( 107 | xs[0].rows() >= 2, 108 | "Bad input dimensions in AlgorithmicDifferentiableEisner: " << xs 109 | ) 110 | 111 | unsigned dim; 112 | if (input_graph == output_graph) 113 | dim = xs[0].rows(); 114 | else if (input_graph == diffdp::DependencyGraphMode::Compact) 115 | dim = xs[0].rows() + 1; // from compact to adj 116 | else 117 | dim = xs[0].rows() - 1; // from adj to compact 118 | 119 | return dynet::Dim({dim, dim}, xs[0].batch_elems()); 120 | } 121 | 122 | size_t AlgorithmicDifferentiableEisner::aux_storage_size() const { 123 | const unsigned eisner_dim = dim.rows() + (output_graph == diffdp::DependencyGraphMode::Compact ? 1 : 0); 124 | // 2 times because we have a forward and a backward chart 125 | const size_t eisner_mem = 2 * diffdp::EisnerChart::required_memory(eisner_dim); 126 | return dim.batch_elems() * eisner_mem; 127 | } 128 | 129 | 130 | 131 | 132 | template 133 | void AlgorithmicDifferentiableEisner::forward_dev_impl( 134 | const MyDevice&, 135 | const std::vector& xs, 136 | Tensor& fx 137 | ) const { 138 | #ifdef __CUDACC__ 139 | DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::forward"); 140 | #else 141 | // TODO call zero only when necessary 142 | TensorTools::zero(fx); 143 | 144 | std::vector& _ce_ptr2 = 145 | const_cast&>(_ce_ptr); 146 | 147 | for (auto*& ptr : _ce_ptr2) 148 | if (ptr != nullptr) 149 | { 150 | delete ptr; 151 | ptr = nullptr; 152 | } 153 | 154 | if (_ce_ptr2.size() != xs[0]->d.batch_elems()) 155 | _ce_ptr2.resize(xs[0]->d.batch_elems(), nullptr); 156 | 157 | const unsigned max_eisner_dim = xs[0]->d.rows() + (input_graph == diffdp::DependencyGraphMode::Compact ? 1 : 0); 158 | float* aux_fmem = static_cast(aux_mem); 159 | 160 | //#pragma omp parallel for 161 | for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch) 162 | { 163 | const unsigned eisner_dim = ( 164 | batch_sizes == nullptr 165 | ? max_eisner_dim 166 | : batch_sizes->at(batch) + 1 167 | ); 168 | 169 | auto input = batch_matrix(*(xs[0]), batch); 170 | 171 | if (mode == diffdp::DiscreteMode::ForwardRegularized) 172 | { 173 | float* fmem = aux_fmem + batch * 2 * diffdp::EisnerChart::required_cells(max_eisner_dim); 174 | //auto forward_chart = std::make_shared(eisner_dim, fmem); 175 | //auto backward_chart = std::make_shared(eisner_dim, fmem + diffdp::EisnerChart::required_cells(max_eisner_dim)); 176 | auto forward_chart = std::make_shared(eisner_dim, fmem); 177 | auto backward_chart = std::make_shared(eisner_dim); 178 | 179 | _ce_ptr2.at(batch) = new diffdp::AlgorithmicDifferentiableEisner(forward_chart, backward_chart); 180 | 181 | 182 | _ce_ptr2.at(batch)->forward( 183 | [&] (const unsigned head, const unsigned mod) 184 | { 185 | if (mod == 0u) 186 | throw std::runtime_error("Illegal arc"); 187 | if (head == 0u && !with_root_arcs) 188 | { 189 | return 0.f; 190 | } 191 | else 192 | { 193 | const auto arc = diffdp::from_adjacency({head, mod}, input_graph); 194 | const float v = input(arc.first, arc.second); 195 | return v; 196 | } 197 | } 198 | ); 199 | 200 | auto output = batch_matrix(fx, batch); 201 | 202 | for (unsigned head = 0u ; head < eisner_dim ; ++head) 203 | { 204 | for (unsigned mod = 1u; mod < eisner_dim ; ++mod) 205 | { 206 | const auto arc = diffdp::from_adjacency({head, mod}, output_graph); 207 | if (head == mod) 208 | { 209 | output(arc.first, arc.second) = 0.f; 210 | continue; 211 | } 212 | 213 | if (head == 0u && !with_root_arcs) 214 | { 215 | output(arc.first, arc.second) = 0.f; 216 | continue; 217 | } 218 | 219 | const float a = _ce_ptr2[batch]->output(head, mod); 220 | 221 | if (!std::isfinite(a)) 222 | throw std::runtime_error("BAD eisner output"); 223 | 224 | output(arc.first, arc.second) = a; 225 | } 226 | } 227 | } 228 | else 229 | { 230 | throw std::runtime_error("Not implemented: only ForwardRegularized can be used at the moment"); 231 | } 232 | } 233 | #endif 234 | } 235 | 236 | template 237 | void AlgorithmicDifferentiableEisner::backward_dev_impl( 238 | const MyDevice &, 239 | const std::vector& xs, 240 | const Tensor&, 241 | const Tensor& dEdf, 242 | unsigned, 243 | Tensor& dEdxi 244 | ) const { 245 | #ifdef __CUDACC__ 246 | DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::backward"); 247 | #else 248 | //#pragma omp parallel for 249 | for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch) 250 | { 251 | auto output_grad = batch_matrix(dEdxi, batch); 252 | auto input_grad = batch_matrix(dEdf, batch); 253 | 254 | auto& eisner = *(_ce_ptr.at(batch)); 255 | 256 | eisner.backward( 257 | [&] (unsigned head, unsigned mod) -> float 258 | { 259 | if (head == 0u && !with_root_arcs) 260 | return 0.f; 261 | auto arc = diffdp::from_adjacency({head, mod}, output_graph); 262 | const float v = input_grad(arc.first, arc.second); 263 | if (!std::isfinite(v)) 264 | throw std::runtime_error("BAD eisner input grad"); 265 | return v; 266 | } 267 | ); 268 | 269 | for (unsigned head = 0u ; head < eisner.size() ; ++head) 270 | { 271 | for (unsigned mod = 1u; mod < eisner.size(); ++mod) 272 | { 273 | if (head == mod) 274 | continue; 275 | 276 | if (head == 0u && !with_root_arcs) 277 | return; 278 | 279 | auto const v = eisner.gradient(head, mod); 280 | if (!std::isfinite(v)) 281 | throw std::runtime_error("BAD eisner output grad"); 282 | 283 | auto arc = diffdp::from_adjacency({head, mod}, input_graph); 284 | output_grad(arc.first, arc.second) += v; 285 | } 286 | } 287 | } 288 | #endif 289 | } 290 | 291 | 292 | DYNET_NODE_INST_DEV_IMPL(AlgorithmicDifferentiableEisner) 293 | 294 | 295 | 296 | 297 | // ENTROPY Regularized 298 | 299 | 300 | EntropyRegularizedEisner::EntropyRegularizedEisner( 301 | const std::initializer_list& a, 302 | diffdp::DiscreteMode mode, 303 | diffdp::DependencyGraphMode input_graph, 304 | diffdp::DependencyGraphMode output_graph, 305 | bool with_root_arcs, 306 | std::vector* batch_sizes 307 | ) : 308 | Node(a), 309 | mode(mode), 310 | input_graph(input_graph), 311 | output_graph(output_graph), 312 | with_root_arcs(with_root_arcs), 313 | batch_sizes(batch_sizes) 314 | { 315 | this->has_cuda_implemented = false; 316 | } 317 | 318 | bool EntropyRegularizedEisner::supports_multibatch() const 319 | { 320 | return true; 321 | } 322 | 323 | EntropyRegularizedEisner::~EntropyRegularizedEisner() 324 | { 325 | for (auto*& ptr : _ce_ptr) 326 | if (ptr != nullptr) 327 | { 328 | delete ptr; 329 | ptr = nullptr; 330 | } 331 | } 332 | 333 | std::string EntropyRegularizedEisner::as_string(const std::vector& arg_names) const { 334 | std::ostringstream s; 335 | s << "entropy_regularized_eisner(" << arg_names[0] << ")"; 336 | return s.str(); 337 | } 338 | 339 | Dim EntropyRegularizedEisner::dim_forward(const std::vector& xs) const { 340 | DYNET_ARG_CHECK( 341 | xs.size() == 1 && xs[0].nd == 2 && xs[0].rows() == xs[0].cols(), 342 | "Bad input dimensions in EntropyRegularizedEisner: " << xs 343 | ); 344 | if (input_graph == diffdp::DependencyGraphMode::Compact) 345 | DYNET_ARG_CHECK( 346 | xs[0].rows() >= 1, 347 | "Bad input dimensions in EntropyRegularizedEisner: " << xs 348 | ) 349 | else 350 | DYNET_ARG_CHECK( 351 | xs[0].rows() >= 2, 352 | "Bad input dimensions in EntropyRegularizedEisner: " << xs 353 | ) 354 | 355 | unsigned dim; 356 | if (input_graph == output_graph) 357 | dim = xs[0].rows(); 358 | else if (input_graph == diffdp::DependencyGraphMode::Compact) 359 | dim = xs[0].rows() + 1; // from compact to adj 360 | else 361 | dim = xs[0].rows() - 1; // from adj to compact 362 | 363 | return dynet::Dim({dim, dim}, xs[0].batch_elems()); 364 | } 365 | 366 | size_t EntropyRegularizedEisner::aux_storage_size() const { 367 | const unsigned eisner_dim = dim.rows() + (output_graph == diffdp::DependencyGraphMode::Compact ? 1 : 0); 368 | // 2 times because we have a forward and a backward chart 369 | const size_t eisner_mem = 2 * diffdp::EisnerChart::required_memory(eisner_dim); 370 | return dim.batch_elems() * eisner_mem; 371 | } 372 | 373 | template 374 | void EntropyRegularizedEisner::forward_dev_impl( 375 | const MyDevice&, 376 | const std::vector& xs, 377 | Tensor& fx 378 | ) const { 379 | #ifdef __CUDACC__ 380 | DYNET_NO_CUDA_IMPL_ERROR("EntropyRegularizedEisner::forward"); 381 | #else 382 | // TODO call zero only when necessary 383 | TensorTools::zero(fx); 384 | 385 | std::vector& _ce_ptr2 = 386 | const_cast&>(_ce_ptr); 387 | 388 | for (auto*& ptr : _ce_ptr2) 389 | if (ptr != nullptr) 390 | { 391 | delete ptr; 392 | ptr = nullptr; 393 | } 394 | 395 | if (_ce_ptr2.size() != xs[0]->d.batch_elems()) 396 | _ce_ptr2.resize(xs[0]->d.batch_elems(), nullptr); 397 | 398 | const unsigned max_eisner_dim = xs[0]->d.rows() + (input_graph == diffdp::DependencyGraphMode::Compact ? 1 : 0); 399 | float* aux_fmem = static_cast(aux_mem); 400 | 401 | //#pragma omp parallel for 402 | for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch) 403 | { 404 | const unsigned eisner_dim = ( 405 | batch_sizes == nullptr 406 | ? max_eisner_dim 407 | : batch_sizes->at(batch) + 1 408 | ); 409 | 410 | auto input = batch_matrix(*(xs[0]), batch); 411 | 412 | if (mode == diffdp::DiscreteMode::ForwardRegularized) 413 | { 414 | float* fmem = aux_fmem + batch * 2 * diffdp::EisnerChart::required_cells(max_eisner_dim); 415 | //auto forward_chart = std::make_shared(eisner_dim, fmem); 416 | //auto backward_chart = std::make_shared(eisner_dim, fmem + diffdp::EisnerChart::required_cells(max_eisner_dim)); 417 | auto forward_chart = std::make_shared(eisner_dim, fmem); 418 | auto backward_chart = std::make_shared(eisner_dim); 419 | 420 | _ce_ptr2.at(batch) = new diffdp::EntropyRegularizedEisner(forward_chart, backward_chart); 421 | 422 | 423 | _ce_ptr2.at(batch)->forward( 424 | [&] (const unsigned head, const unsigned mod) 425 | { 426 | if (mod == 0u) 427 | throw std::runtime_error("Illegal arc"); 428 | if (head == 0u && !with_root_arcs) 429 | { 430 | return 0.f; 431 | } 432 | else 433 | { 434 | const auto arc = diffdp::from_adjacency({head, mod}, input_graph); 435 | const float v = input(arc.first, arc.second); 436 | return v; 437 | } 438 | } 439 | ); 440 | 441 | auto output = batch_matrix(fx, batch); 442 | 443 | for (unsigned head = 0u ; head < eisner_dim ; ++head) 444 | { 445 | for (unsigned mod = 1u; mod < eisner_dim ; ++mod) 446 | { 447 | const auto arc = diffdp::from_adjacency({head, mod}, output_graph); 448 | if (head == mod) 449 | { 450 | output(arc.first, arc.second) = 0.f; 451 | continue; 452 | } 453 | 454 | if (head == 0u && !with_root_arcs) 455 | { 456 | output(arc.first, arc.second) = 0.f; 457 | continue; 458 | } 459 | 460 | const float a = _ce_ptr2[batch]->output(head, mod); 461 | 462 | if (!std::isfinite(a)) 463 | throw std::runtime_error("BAD eisner output"); 464 | 465 | output(arc.first, arc.second) = a; 466 | } 467 | } 468 | } 469 | else 470 | { 471 | throw std::runtime_error("Not implemented: only ForwardRegularized can be used at the moment"); 472 | } 473 | } 474 | #endif 475 | } 476 | 477 | template 478 | void EntropyRegularizedEisner::backward_dev_impl( 479 | const MyDevice &, 480 | const std::vector& xs, 481 | const Tensor&, 482 | const Tensor& dEdf, 483 | unsigned, 484 | Tensor& dEdxi 485 | ) const { 486 | #ifdef __CUDACC__ 487 | DYNET_NO_CUDA_IMPL_ERROR("EntropyRegularizedEisner::backward"); 488 | #else 489 | //#pragma omp parallel for 490 | for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch) 491 | { 492 | auto output_grad = batch_matrix(dEdxi, batch); 493 | auto input_grad = batch_matrix(dEdf, batch); 494 | 495 | auto& eisner = *(_ce_ptr.at(batch)); 496 | 497 | eisner.backward( 498 | [&] (unsigned head, unsigned mod) -> float 499 | { 500 | if (head == 0u && !with_root_arcs) 501 | return 0.f; 502 | auto arc = diffdp::from_adjacency({head, mod}, output_graph); 503 | const float v = input_grad(arc.first, arc.second); 504 | if (!std::isfinite(v)) 505 | throw std::runtime_error("BAD eisner input grad"); 506 | return v; 507 | } 508 | ); 509 | 510 | for (unsigned head = 0u ; head < eisner.size() ; ++head) 511 | { 512 | for (unsigned mod = 1u; mod < eisner.size(); ++mod) 513 | { 514 | if (head == mod) 515 | continue; 516 | 517 | if (head == 0u && !with_root_arcs) 518 | return; 519 | 520 | auto const v = eisner.gradient(head, mod); 521 | if (!std::isfinite(v)) 522 | throw std::runtime_error("BAD eisner output grad"); 523 | 524 | auto arc = diffdp::from_adjacency({head, mod}, input_graph); 525 | output_grad(arc.first, arc.second) += v; 526 | } 527 | } 528 | } 529 | #endif 530 | } 531 | 532 | DYNET_NODE_INST_DEV_IMPL(EntropyRegularizedEisner) 533 | 534 | } -------------------------------------------------------------------------------- /lib/src/dynet/matrix_tree_theorem.cpp: -------------------------------------------------------------------------------- 1 | #include "diffdp/dynet/matrix_tree_theorem.h" 2 | 3 | #include "dynet/tensor-eigen.h" 4 | #include "dynet/nodes-impl-macros.h" 5 | 6 | namespace dynet 7 | { 8 | 9 | Expression matrix_tree_theorem(const Expression &weights) 10 | { 11 | return Expression(weights.pg, weights.pg->add_function({weights.i})); 12 | } 13 | 14 | 15 | MatrixTreeTheorem::MatrixTreeTheorem( 16 | const std::initializer_list& a 17 | ) : 18 | Node(a) 19 | { 20 | this->has_cuda_implemented = false; 21 | } 22 | 23 | bool MatrixTreeTheorem::supports_multibatch() const 24 | { 25 | return false; 26 | } 27 | 28 | 29 | std::string MatrixTreeTheorem::as_string(const std::vector& arg_names) const { 30 | std::ostringstream s; 31 | s << "matrix_tree_theorem(" << arg_names[0] << ")"; 32 | return s.str(); 33 | } 34 | 35 | Dim MatrixTreeTheorem::dim_forward(const std::vector& xs) const { 36 | return xs[0]; 37 | } 38 | 39 | size_t MatrixTreeTheorem::aux_storage_size() const 40 | { 41 | const auto matrix_size = dim.rows() * dim.cols(); 42 | // 1. exp weights 43 | // 2. laplacian inverse 44 | // 3. output1 45 | // 4. output2 46 | return sizeof(float) * matrix_size * 4; 47 | } 48 | 49 | template 50 | void MatrixTreeTheorem::forward_dev_impl( 51 | const MyDevice& dev, 52 | const std::vector& xs, 53 | Tensor& fx 54 | ) const { 55 | #ifdef __CUDACC__ 56 | DYNET_NO_CUDA_IMPL_ERROR("MatrixTreeTheorem::forward"); 57 | #else 58 | // aux mem 59 | const Dim matrix_dim({fx.d.cols(), fx.d.rows()}); 60 | const unsigned matrix_size = fx.d.cols() * fx.d.rows(); 61 | 62 | float* f_aux_mem = (float*) aux_mem; 63 | Tensor tensor_exp_weights(matrix_dim, f_aux_mem, fx.device, DeviceMempool::FXS); 64 | Tensor tensor_laplacian(matrix_dim, f_aux_mem + matrix_size, fx.device, DeviceMempool::FXS); 65 | Tensor tensor_output1(matrix_dim, f_aux_mem + 2*matrix_size, fx.device, DeviceMempool::FXS); 66 | Tensor tensor_output2(matrix_dim, f_aux_mem + 3*matrix_size, fx.device, DeviceMempool::FXS); 67 | 68 | // temp mem 69 | AlignedMemoryPool* scratch_allocator = fx.device->pools[(int)DeviceMempool::SCS]; 70 | Tensor tensor_col_sum(Dim({fx.d.cols()}), nullptr, fx.device, DeviceMempool::FXS); 71 | tensor_col_sum.v = static_cast(scratch_allocator->allocate(tensor_col_sum.d.size() * sizeof(float))); 72 | 73 | auto weights = mat(*xs[0]); 74 | auto exp_weights = mat(tensor_exp_weights); 75 | auto col_sum = vec(tensor_col_sum); 76 | auto laplacian = mat(tensor_laplacian); 77 | auto output1 = mat(tensor_output1); 78 | auto output2 = mat(tensor_output2); 79 | auto marginals = mat(fx); 80 | 81 | exp_weights = weights.array().exp(); 82 | 83 | // sum over columns 84 | col_sum = exp_weights.colwise().sum(); 85 | 86 | // set fx = laplacian 87 | laplacian = -exp_weights; 88 | laplacian.diagonal() += col_sum; 89 | 90 | laplacian.row(0).setZero(); 91 | laplacian(0, 0) = 1.f; // anythinhg > 0 will work here 92 | 93 | for (unsigned i = 0 ; i < 3 ; ++i) 94 | { 95 | for (unsigned j = 0 ; j < 3 ; ++j) 96 | std::cerr << laplacian(i, j) << "\t"; 97 | std::cerr << std::endl; 98 | } 99 | // inverse 100 | laplacian = laplacian.inverse(); 101 | 102 | // on gpu it may be faster to use masked matrix? 103 | output1.col(0).setZero(); 104 | for (unsigned i = 1 ; i < fx.d.rows() ; ++i) 105 | output1.col(i) = exp_weights.col(i) * laplacian(i, i); 106 | 107 | // array because it a cwise product, not a matrix product 108 | output2 = exp_weights.array() * laplacian.transpose().array(); 109 | output2.row(0).setZero(); 110 | 111 | marginals = output1 - output2; 112 | 113 | scratch_allocator->free(); 114 | #endif 115 | } 116 | 117 | template 118 | void MatrixTreeTheorem::backward_dev_impl( 119 | const MyDevice &, 120 | const std::vector& xs, 121 | const Tensor& fx, 122 | const Tensor& dEdf, 123 | unsigned, 124 | Tensor& dEdxi 125 | ) const { 126 | #ifdef __CUDACC__ 127 | DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::backward"); 128 | #else 129 | 130 | const Dim matrix_dim({fx.d.cols(), fx.d.rows()}); 131 | const unsigned matrix_size = fx.d.cols() * fx.d.rows(); 132 | 133 | float* f_aux_mem = (float*) aux_mem; 134 | Tensor tensor_exp_weights(matrix_dim, f_aux_mem, fx.device, DeviceMempool::FXS); 135 | Tensor tensor_laplacian(matrix_dim, f_aux_mem + matrix_size, fx.device, DeviceMempool::FXS); 136 | Tensor tensor_output1(matrix_dim, f_aux_mem + 2*matrix_size, fx.device, DeviceMempool::FXS); 137 | Tensor tensor_output2(matrix_dim, f_aux_mem + 3*matrix_size, fx.device, DeviceMempool::FXS); 138 | 139 | auto weights = mat(*xs[0]); 140 | auto exp_weights = mat(tensor_exp_weights); 141 | auto laplacian = mat(tensor_laplacian); 142 | auto output1 = mat(tensor_output1); 143 | auto output2 = mat(tensor_output2); 144 | auto marginals = mat(fx); 145 | 146 | auto d_marginals = mat(dEdf); 147 | auto d_weights = mat(dEdxi); 148 | 149 | 150 | #endif 151 | } 152 | 153 | 154 | DYNET_NODE_INST_DEV_IMPL(MatrixTreeTheorem) 155 | 156 | } -------------------------------------------------------------------------------- /test/test-binary-phrase-algdiff.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_DYN_LINK 2 | #define BOOST_TEST_MODULE "AlgorithmicDifferentiableEisner" 3 | 4 | #include 5 | namespace utf = boost::unit_test; 6 | 7 | #include "diffdp/algorithm/binary_phrase.h" 8 | 9 | // using boost test with intolerance fails (too precise), 10 | // so let's just use the same test as in Dynet. 11 | bool check_grad(float g, float g_act) 12 | { 13 | float f = std::fabs(g - g_act); 14 | float m = std::max(std::fabs(g), std::fabs(g_act)); 15 | if (f > 0.01 && m > 0.f) 16 | f /= m; 17 | 18 | if (f > 0.01 || std::isnan(f)) 19 | return false; 20 | else 21 | return true; 22 | } 23 | 24 | BOOST_AUTO_TEST_CASE(gradient) 25 | { 26 | const unsigned size = 10; 27 | const float sensitivity = 1e-2; 28 | 29 | diffdp::AlgorithmicDifferentiableBinaryPhraseStructure alg_diff(size); 30 | 31 | // check gradient 32 | std::vector weights(size * size); 33 | for (unsigned output_left = 0 ; output_left < size ; ++output_left) 34 | { 35 | for (unsigned output_right = output_left + 1 ; output_right < size ; ++output_right) 36 | { 37 | for (unsigned input_left = 0 ; input_left < size ; ++input_left) 38 | { 39 | for (unsigned input_right = input_left + 1; input_right < size; ++input_right) 40 | { 41 | // compute gradient using the algorithm 42 | alg_diff.forward( 43 | [&] (const unsigned left, const unsigned right) -> float 44 | { 45 | return weights.at(left + right * size); 46 | } 47 | ); 48 | alg_diff.backward( 49 | [&] (const unsigned left, const unsigned right) -> float 50 | { 51 | if (left == output_left && right == output_right) 52 | return 1.f; 53 | else 54 | return 0.f; 55 | } 56 | ); 57 | 58 | const double computed_gradient = alg_diff.gradient(input_left, input_right); 59 | 60 | // estimate the gradient 61 | const float sensitivity = 1e-3; 62 | const double original_weights = weights.at(input_left + input_right * size); 63 | 64 | weights.at(input_left + input_right * size) = original_weights + sensitivity; 65 | alg_diff.forward( 66 | [&] (const unsigned left, const unsigned right) -> float 67 | { 68 | return weights.at(left + right * size); 69 | } 70 | ); 71 | 72 | const double output_a = alg_diff.output(output_left, output_right); 73 | 74 | weights.at(input_left + input_right * size) = original_weights - sensitivity; 75 | alg_diff.forward( 76 | [&] (const unsigned left, const unsigned right) -> float 77 | { 78 | return weights.at(left + right * size); 79 | } 80 | ); 81 | const double output_b = alg_diff.output(output_left, output_right); 82 | 83 | // restore 84 | weights.at(input_left + input_right * size) = original_weights; 85 | 86 | const double estimated_gradient = (output_a - output_b) / (2.f * sensitivity); 87 | 88 | BOOST_CHECK(check_grad(computed_gradient, estimated_gradient)); 89 | } 90 | } 91 | } 92 | } 93 | } -------------------------------------------------------------------------------- /test/test-binary-phrase-ereg.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_DYN_LINK 2 | #define BOOST_TEST_MODULE "EntropyRegularizedBinaryPhrase" 3 | 4 | #include 5 | #include 6 | #include 7 | namespace utf = boost::unit_test; 8 | 9 | #include "diffdp/algorithm/binary_phrase.h" 10 | 11 | // using boost test with intolerance fails (too precise), 12 | // so let's just use the same test as in Dynet. 13 | bool check_grad(float g, float g_act) 14 | { 15 | float f = std::fabs(g - g_act); 16 | float m = std::max(std::fabs(g), std::fabs(g_act)); 17 | if (f > 0.01 && m > 0.f) 18 | f /= m; 19 | 20 | if (f > 0.01 || std::isnan(f)) 21 | return false; 22 | else 23 | return true; 24 | } 25 | 26 | BOOST_AUTO_TEST_CASE(first_order_gradient, * utf::tolerance(1e-2f)) 27 | { 28 | const unsigned size = 10; 29 | const float sensitivity = 1e-2; 30 | 31 | std::vector weights(size * size); 32 | for (unsigned i = 0 ; i < size ; ++i) 33 | weights.at(i) = size; 34 | 35 | diffdp::EntropyRegularizedBinaryPhraseStructure parser(size); 36 | parser.forward( 37 | [&] (unsigned left, unsigned right) -> float 38 | { 39 | return weights.at(left + right * size); 40 | } 41 | ); 42 | 43 | for (unsigned left = 0 ; left < size ; ++left) 44 | { 45 | for (unsigned right = left + 1; right < size ; ++right) 46 | { 47 | const float computed_arc = parser.output(left, right); 48 | 49 | // estimate the gradient 50 | const float original_weights = weights.at(left + right * size); 51 | 52 | weights.at(left + right * size) = original_weights + sensitivity; 53 | diffdp::EntropyRegularizedBinaryPhraseStructure parser2(size); 54 | parser2.forward( 55 | [&] (const unsigned left, const unsigned right) -> float 56 | { 57 | return weights.at(left + right * size); 58 | } 59 | ); 60 | const float output_a = parser2.chart_forward->weight(0, size-1); 61 | 62 | weights.at(left + right * size) = original_weights - sensitivity; 63 | parser2.forward( 64 | [&] (const unsigned left, const unsigned right) -> float 65 | { 66 | return weights.at(left + right * size); 67 | } 68 | ); 69 | const float output_b = parser2.chart_forward->weight(0, size-1); 70 | 71 | // restore 72 | weights.at(left + right * size) = original_weights; 73 | 74 | const float estimated_arc = (output_a - output_b) / (2.f * sensitivity); 75 | 76 | BOOST_CHECK(check_grad(computed_arc, estimated_arc)); 77 | } 78 | } 79 | } 80 | 81 | 82 | BOOST_AUTO_TEST_CASE(second_order_gradient, * utf::tolerance(1e-2f)) 83 | { 84 | const unsigned size = 10; 85 | const float sensitivity = 1e-5; 86 | 87 | std::vector weights(size * size); 88 | for (unsigned i = 0 ; i < size ; ++i) 89 | weights.at(i) = size; 90 | 91 | diffdp::EntropyRegularizedBinaryPhraseStructure parser(size); 92 | parser.forward( 93 | [&] (unsigned left, unsigned right) -> float 94 | { 95 | return weights.at(left + right * size); 96 | } 97 | ); 98 | 99 | for (unsigned input_left = 0 ; input_left < size ; ++input_left) 100 | { 101 | for (unsigned input_right = input_left + 1; input_right < size ; ++input_right) 102 | { 103 | for (unsigned output_left = 0 ; output_left < size ; ++output_left) 104 | { 105 | for (unsigned output_right = output_left + 1; output_right < size; ++output_right) 106 | { 107 | parser.backward( 108 | [&](const unsigned left, const unsigned right) 109 | { 110 | if (left == output_left && right == output_right) 111 | return 1.f; 112 | else 113 | return 0.f; 114 | } 115 | ); 116 | const float computed_gradient = parser.gradient(input_left, input_right); 117 | 118 | // estimate the gradient 119 | const float sensitivity = 1e-3; 120 | const float original_weights = weights.at(input_left + input_right * size); 121 | 122 | weights.at(input_left + input_right * size) = original_weights + sensitivity; 123 | diffdp::EntropyRegularizedBinaryPhraseStructure parser2(size); 124 | parser2.forward( 125 | [&](const unsigned left, const unsigned right) -> float 126 | { 127 | return weights.at(left + right * size); 128 | } 129 | ); 130 | const float output_a = parser2.output(output_left, output_right); 131 | 132 | weights.at(input_left + input_right * size) = original_weights - sensitivity; 133 | parser2.forward( 134 | [&](const unsigned left, const unsigned right) -> float 135 | { 136 | return weights.at(left + right * size); 137 | } 138 | ); 139 | const float output_b = parser2.output(output_left, output_right); 140 | 141 | // restore 142 | weights.at(input_left + input_right * size) = original_weights; 143 | 144 | const double estimated_gradient = (output_a - output_b) / (2.f * sensitivity); 145 | BOOST_CHECK(check_grad(computed_gradient, estimated_gradient)); 146 | } 147 | } 148 | } 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /test/test-dynet-eisner.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * TODO split in two different test cases 3 | */ 4 | #define BOOST_TEST_DYN_LINK 5 | #define BOOST_TEST_MODULE "EntropyRegularizedEisner" 6 | 7 | #include 8 | namespace utf = boost::unit_test; 9 | 10 | #include 11 | 12 | #include "dynet/expr.h" 13 | #include "dynet/param-init.h" 14 | #include "dynet/grad-check.h" 15 | 16 | #include "diffdp/dynet/eisner.h" 17 | 18 | BOOST_AUTO_TEST_CASE(test_dynet_eisner_algdiff) 19 | { 20 | const unsigned size = 10u; 21 | 22 | int argc = 1; 23 | char **argv; 24 | dynet::initialize(argc, argv); 25 | 26 | dynet::ParameterCollection pc; 27 | 28 | std::vector weights(size * size); 29 | for (unsigned i = 0 ; i < weights.size() ; ++i) 30 | weights.at(i) = (float) i; 31 | auto p_weights = pc.add_parameters(dynet::Dim({size, size}), dynet::ParameterInitFromVector(weights)); 32 | 33 | dynet::ComputationGraph cg; 34 | cg.set_immediate_compute(true); 35 | cg.set_check_validity(true); 36 | 37 | auto e_weights = dynet::parameter(cg, p_weights); 38 | 39 | { 40 | auto e_arcs = dynet::algorithmic_differentiable_eisner( 41 | e_weights, 42 | diffdp::DiscreteMode::ForwardRegularized, 43 | diffdp::DependencyGraphMode::Adjacency, 44 | diffdp::DependencyGraphMode::Adjacency 45 | ); 46 | 47 | for (unsigned head = 0u; head < size; ++head) 48 | { 49 | for (unsigned mod = 0u; mod < size; ++mod) 50 | { 51 | auto e_output = dynet::strided_select( 52 | e_arcs, 53 | {(int) 1u, (int) 1u}, 54 | {(int) head, (int) mod}, 55 | {(int) head + 1, (int) mod + 1} // not included 56 | ); 57 | 58 | BOOST_CHECK(check_grad(pc, e_output, 0)); 59 | } 60 | } 61 | } 62 | { 63 | auto e_arcs = dynet::entropy_regularized_eisner( 64 | e_weights, 65 | diffdp::DiscreteMode::ForwardRegularized, 66 | diffdp::DependencyGraphMode::Adjacency, 67 | diffdp::DependencyGraphMode::Adjacency 68 | ); 69 | 70 | for (unsigned head = 0u; head < size; ++head) 71 | { 72 | for (unsigned mod = 0u; mod < size; ++mod) 73 | { 74 | auto e_output = dynet::strided_select( 75 | e_arcs, 76 | {(int) 1u, (int) 1u}, 77 | {(int) head, (int) mod}, 78 | {(int) head + 1, (int) mod + 1} // not included 79 | ); 80 | BOOST_CHECK(check_grad(pc, e_output, 0)); 81 | } 82 | } 83 | } 84 | } -------------------------------------------------------------------------------- /test/test-dynet-phrase.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * TODO split in two different test cases 3 | */ 4 | #define BOOST_TEST_DYN_LINK 5 | #define BOOST_TEST_MODULE "DynetPhrase" 6 | 7 | #include 8 | namespace utf = boost::unit_test; 9 | 10 | #include 11 | 12 | #include "dynet/expr.h" 13 | #include "dynet/param-init.h" 14 | #include "dynet/grad-check.h" 15 | 16 | #include "diffdp/dynet/binary_phrase.h" 17 | 18 | BOOST_AUTO_TEST_CASE(test_dynet_phrase) 19 | { 20 | const unsigned size = 10u; 21 | 22 | int argc = 1; 23 | char **argv; 24 | dynet::initialize(argc, argv); 25 | 26 | dynet::ParameterCollection pc; 27 | 28 | std::vector weights(size * size); 29 | //for (unsigned i = 0 ; i < weights.size() ; ++i) 30 | // weights.at(i) = (float) i; 31 | auto p_weights = pc.add_parameters(dynet::Dim({size, size}), dynet::ParameterInitFromVector(weights)); 32 | 33 | { 34 | for (unsigned head = 0u; head < size; ++head) 35 | { 36 | for (unsigned mod = 0u; mod < size; ++mod) 37 | { 38 | dynet::ComputationGraph cg; 39 | auto e_weights = dynet::parameter(cg, p_weights); 40 | auto e_arcs = dynet::algorithmic_differentiable_binary_phrase_structure( 41 | e_weights, 42 | diffdp::DiscreteMode::ForwardRegularized 43 | ); 44 | auto e_output = dynet::strided_select( 45 | e_arcs, 46 | {(int) 1u, (int) 1u}, 47 | {(int) head, (int) mod}, 48 | {(int) head + 1, (int) mod + 1} // not included 49 | ); 50 | 51 | BOOST_CHECK(check_grad(pc, e_output, 0)); 52 | } 53 | } 54 | } 55 | { 56 | 57 | for (unsigned head = 0u; head < size; ++head) 58 | { 59 | for (unsigned mod = 0u; mod < size; ++mod) 60 | { 61 | dynet::ComputationGraph cg; 62 | auto e_weights = dynet::parameter(cg, p_weights); 63 | 64 | auto e_arcs = dynet::entropy_regularized_binary_phrase_structure( 65 | e_weights, 66 | diffdp::DiscreteMode::ForwardRegularized 67 | ); 68 | auto e_output = dynet::strided_select( 69 | e_arcs, 70 | {(int) 1u, (int) 1u}, 71 | {(int) head, (int) mod}, 72 | {(int) head + 1, (int) mod + 1} // not included 73 | ); 74 | 75 | BOOST_CHECK(check_grad(pc, e_output, 0)); 76 | } 77 | } 78 | } 79 | } -------------------------------------------------------------------------------- /test/test-eisner-algdiff.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_DYN_LINK 2 | #define BOOST_TEST_MODULE "EntropyRegularizedEisner" 3 | 4 | #include 5 | namespace utf = boost::unit_test; 6 | 7 | #include "diffdp/algorithm/eisner.h" 8 | 9 | // using boost test with intolerance fails (too precise), 10 | // so let's just use the same test as in Dynet. 11 | bool check_grad(float g, float g_act) 12 | { 13 | float f = std::fabs(g - g_act); 14 | float m = std::max(std::fabs(g), std::fabs(g_act)); 15 | if (f > 0.01 && m > 0.f) 16 | f /= m; 17 | 18 | if (f > 0.01 || std::isnan(f)) 19 | return false; 20 | else 21 | return true; 22 | } 23 | 24 | BOOST_AUTO_TEST_CASE(gradient) 25 | { 26 | const unsigned size = 10; 27 | const float sensitivity = 1e-2; 28 | 29 | diffdp::AlgorithmicDifferentiableEisner alg_diff_eisner(size); 30 | 31 | // check gradient 32 | std::vector weights(size * size); 33 | for (unsigned output_head = 0 ; output_head < size ; ++output_head) 34 | { 35 | for (unsigned output_mod = 1 ; output_mod < size ; ++output_mod) 36 | { 37 | if (output_head == output_mod) 38 | continue; 39 | 40 | for (unsigned input_head = 0 ; input_head < size ; ++input_head) 41 | { 42 | for (unsigned input_mod = 1; input_mod < size; ++input_mod) 43 | { 44 | if (input_head == input_mod) 45 | continue; 46 | 47 | // compute gradient using the algorithm 48 | alg_diff_eisner.forward( 49 | [&] (const unsigned head, const unsigned mod) -> float 50 | { 51 | return weights.at(head + mod * size); 52 | } 53 | ); 54 | alg_diff_eisner.backward( 55 | [&] (const unsigned head, const unsigned mod) -> float 56 | { 57 | if (head == output_head && mod == output_mod) 58 | return 1.f; 59 | else 60 | return 0.f; 61 | } 62 | ); 63 | 64 | const double computed_gradient = alg_diff_eisner.gradient(input_head, input_mod); 65 | 66 | // estimate the gradient 67 | const float sensitivity = 1e-3; 68 | const double original_weights = weights.at(input_head + input_mod * size); 69 | 70 | weights.at(input_head + input_mod * size) = original_weights + sensitivity; 71 | alg_diff_eisner.forward( 72 | [&] (const unsigned head, const unsigned mod) -> float 73 | { 74 | return weights.at(head + mod * size); 75 | } 76 | ); 77 | 78 | const double output_a = alg_diff_eisner.output(output_head, output_mod); 79 | 80 | weights.at(input_head + input_mod * size) = original_weights - sensitivity; 81 | alg_diff_eisner.forward( 82 | [&] (const unsigned head, const unsigned mod) -> float 83 | { 84 | return weights.at(head + mod * size); 85 | } 86 | ); 87 | const double output_b = alg_diff_eisner.output(output_head, output_mod); 88 | 89 | // restore 90 | weights.at(input_head + input_mod * size) = original_weights; 91 | 92 | const double estimated_gradient = (output_a - output_b) / (2.f * sensitivity); 93 | 94 | BOOST_CHECK(check_grad(computed_gradient, estimated_gradient)); 95 | 96 | } 97 | } 98 | } 99 | } 100 | } -------------------------------------------------------------------------------- /test/test-eisner-ereg.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_DYN_LINK 2 | #define BOOST_TEST_MODULE "EntropyRegularizedEisner" 3 | 4 | #include 5 | namespace utf = boost::unit_test; 6 | 7 | #include "diffdp/algorithm/eisner.h" 8 | 9 | // using boost test with intolerance fails (too precise), 10 | // so let's just use the same test as in Dynet. 11 | bool check_grad(float g, float g_act) 12 | { 13 | float f = std::fabs(g - g_act); 14 | float m = std::max(std::fabs(g), std::fabs(g_act)); 15 | if (f > 0.01 && m > 0.f) 16 | f /= m; 17 | 18 | if (f > 0.01 || std::isnan(f)) 19 | return false; 20 | else 21 | return true; 22 | } 23 | 24 | BOOST_AUTO_TEST_CASE(first_order_gradient, * utf::tolerance(1e-2f)) 25 | { 26 | const unsigned size = 10; 27 | const float sensitivity = 1e-2; 28 | 29 | std::vector weights(size * size); 30 | for (unsigned i = 0 ; i < size ; ++i) 31 | weights.at(i) = size; 32 | 33 | diffdp::EntropyRegularizedEisner parser(size); 34 | parser.forward( 35 | [&] (unsigned head, unsigned mod) -> float 36 | { 37 | return weights.at(head + mod * size); 38 | } 39 | ); 40 | 41 | for (unsigned head = 0 ; head < size ; ++head) 42 | { 43 | for (unsigned mod = 1; mod < size ; ++mod) 44 | { 45 | if (head == mod) 46 | continue; 47 | 48 | const float computed_arc = parser.output(head, mod); 49 | 50 | // estimate the gradient 51 | const float original_weights = weights.at(head + mod * size); 52 | 53 | weights.at(head + mod * size) = original_weights + sensitivity; 54 | diffdp::EntropyRegularizedEisner parser2(size); 55 | parser2.forward( 56 | [&] (const unsigned head, const unsigned mod) -> float 57 | { 58 | return weights.at(head + mod * size); 59 | } 60 | ); 61 | const float output_a = parser2.chart_forward->c_cright(0, size-1); 62 | 63 | weights.at(head + mod * size) = original_weights - sensitivity; 64 | parser2.forward( 65 | [&] (const unsigned head, const unsigned mod) -> float 66 | { 67 | return weights.at(head + mod * size); 68 | } 69 | ); 70 | const float output_b = parser2.chart_forward->c_cright(0, size-1); 71 | 72 | // restore 73 | weights.at(head + mod * size) = original_weights; 74 | 75 | const float estimated_arc = (output_a - output_b) / (2.f * sensitivity); 76 | 77 | BOOST_CHECK(check_grad(computed_arc, estimated_arc)); 78 | } 79 | } 80 | } 81 | 82 | BOOST_AUTO_TEST_CASE(second_order_gradient, * utf::tolerance(1e-2f)) 83 | { 84 | const unsigned size = 10; 85 | const float sensitivity = 1e-5; 86 | 87 | std::vector weights(size * size); 88 | for (unsigned i = 0 ; i < size ; ++i) 89 | weights.at(i) = size; 90 | 91 | diffdp::EntropyRegularizedEisner parser(size); 92 | parser.forward( 93 | [&] (unsigned head, unsigned mod) -> float 94 | { 95 | return weights.at(head + mod * size); 96 | } 97 | ); 98 | 99 | for (unsigned input_head = 0 ; input_head < size ; ++input_head) 100 | { 101 | for (unsigned input_mod = 1; input_mod < size ; ++input_mod) 102 | { 103 | if (input_head == input_mod) 104 | continue; 105 | 106 | for (unsigned output_head = 0 ; output_head < size ; ++output_head) 107 | { 108 | for (unsigned output_mod = 1; output_mod < size; ++output_mod) 109 | { 110 | if (output_head == output_mod) 111 | continue; 112 | 113 | parser.backward( 114 | [&](const unsigned head, const unsigned mod) 115 | { 116 | if (head == output_head && mod == output_mod) 117 | return 1.f; 118 | else 119 | return 0.f; 120 | } 121 | ); 122 | const float computed_gradient = parser.gradient(input_head, input_mod); 123 | 124 | // estimate the gradient 125 | const float sensitivity = 1e-3; 126 | const float original_weights = weights.at(input_head + input_mod * size); 127 | 128 | weights.at(input_head + input_mod * size) = original_weights + sensitivity; 129 | diffdp::EntropyRegularizedEisner parser2(size); 130 | parser2.forward( 131 | [&](const unsigned head, const unsigned mod) -> float 132 | { 133 | return weights.at(head + mod * size); 134 | } 135 | ); 136 | const float output_a = parser2.output(output_head, output_mod); 137 | 138 | weights.at(input_head + input_mod * size) = original_weights - sensitivity; 139 | parser2.forward( 140 | [&](const unsigned head, const unsigned mod) -> float 141 | { 142 | return weights.at(head + mod * size); 143 | } 144 | ); 145 | const float output_b = parser2.output(output_head, output_mod); 146 | 147 | // restore 148 | weights.at(input_head + input_mod * size) = original_weights; 149 | 150 | const double estimated_gradient = (output_a - output_b) / (2.f * sensitivity); 151 | 152 | BOOST_CHECK(check_grad(computed_gradient, estimated_gradient)); 153 | } 154 | } 155 | } 156 | } 157 | } -------------------------------------------------------------------------------- /test/test-eisner-ereg.dSYM/Contents/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | English 7 | CFBundleIdentifier 8 | com.apple.xcode.dsym.test-eisner-ereg 9 | CFBundleInfoDictionaryVersion 10 | 6.0 11 | CFBundlePackageType 12 | dSYM 13 | CFBundleSignature 14 | ???? 15 | CFBundleShortVersionString 16 | 1.0 17 | CFBundleVersion 18 | 1 19 | 20 | 21 | -------------------------------------------------------------------------------- /test/test-eisner-ereg.dSYM/Contents/Resources/DWARF/test-eisner-ereg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoC/diffdp/58ae35b171ddd54b778790bc64838890c0f8956f/test/test-eisner-ereg.dSYM/Contents/Resources/DWARF/test-eisner-ereg -------------------------------------------------------------------------------- /test/test-math.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_DYN_LINK 2 | #define BOOST_TEST_MODULE "EntropyRegularizedEisner" 3 | 4 | #include 5 | namespace utf = boost::unit_test; 6 | 7 | #include 8 | 9 | #include "diffdp/algorithm/eisner.h" 10 | #include "diffdp/math.h" 11 | #include "dynet/expr.h" 12 | 13 | // using boost test with intolerance fails (too precise), 14 | // so let's just use the same test as in Dynet. 15 | bool check_grad(float g, float g_act) 16 | { 17 | float f = std::fabs(g - g_act); 18 | float m = std::max(std::fabs(g), std::fabs(g_act)); 19 | if (f > 0.01 && m > 0.f) 20 | f /= m; 21 | 22 | if (f > 0.01 || std::isnan(f)) 23 | return false; 24 | else 25 | return true; 26 | } 27 | 28 | BOOST_AUTO_TEST_CASE(test_softmax) 29 | { 30 | int argc = 1; 31 | char **argv; 32 | dynet::initialize(argc, argv); 33 | 34 | std::vector input(10); 35 | std::vector output(10); 36 | std::vector input_grad(10); 37 | std::vector output_grad(10); 38 | for (unsigned i = 0 ; i < input.size() ; ++i) 39 | input.at(i) = i; 40 | 41 | { 42 | diffdp::softmax(output.begin(), input.begin(), input.size()); 43 | 44 | dynet::ComputationGraph cg; 45 | auto e_output = dynet::softmax(dynet::input(cg, {10}, input)); 46 | auto dynet_output = as_vector(cg.forward(e_output)); 47 | 48 | for (unsigned i = 0 ; i < 10 ; ++i) 49 | BOOST_CHECK(check_grad(output.at(i), dynet_output.at(i))); 50 | } 51 | 52 | for (unsigned input_id = 0 ; input_id < input.size() ; ++input_id) 53 | { 54 | for (unsigned output_id = 0 ; output_id < input.size() ; ++output_id) 55 | { 56 | 57 | // compute gradient 58 | 59 | std::fill(input_grad.begin(), input_grad.end(), 0.f); 60 | std::fill(output_grad.begin(), output_grad.end(), 0.f); 61 | //std::fill(output.begin(), output.end(), 0.f); 62 | 63 | diffdp::softmax(output.begin(), input.begin(), input.size()); 64 | output_grad.at(output_id) = 1.f; 65 | diffdp::backprop_softmax( 66 | input_grad.begin(), output_grad.begin(), 67 | input.begin(), output.begin(), 68 | input.size() 69 | ); 70 | const float computed_gradient = input_grad.at(input_id); 71 | 72 | // dynet gradient 73 | dynet::ComputationGraph cg; 74 | 75 | auto e_input = dynet::input(cg, {10}, input); 76 | auto e_softmax = dynet::softmax(e_input); 77 | auto e_output = dynet::pick(e_softmax, output_id); 78 | cg.forward(e_output); 79 | cg.backward(e_output, true); 80 | 81 | auto dynet_gradient_all = as_vector(e_input.gradient()); 82 | float dynet_gradient = dynet_gradient_all.at(input_id); 83 | 84 | BOOST_CHECK(check_grad(computed_gradient, dynet_gradient)); 85 | } 86 | 87 | } 88 | } --------------------------------------------------------------------------------