├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── app
    └── src
    │   ├── compare-eisner.cpp
    │   ├── compare-phrase.cpp
    │   ├── test-dynet.cpp
    │   └── test-mtt.cpp
├── lib
    ├── CMakeLists.txt
    ├── include
    │   └── diffdp
    │   │   ├── algorithm
    │   │       ├── binary_phrase.h
    │   │       └── eisner.h
    │   │   ├── builder
    │   │       ├── binary-phrase.h
    │   │       └── dependency.h
    │   │   ├── chart.h
    │   │   ├── deduction_operations.h
    │   │   ├── dynet
    │   │       ├── args.h
    │   │       ├── binary_phrase.h
    │   │       ├── eisner.h
    │   │       └── matrix_tree_theorem.h
    │   │   └── math.h
    └── src
    │   ├── algorithm
    │       ├── binary_phrase.cpp
    │       └── eisner.cpp
    │   ├── builder
    │       ├── binary-phrase.cpp
    │       └── dependency.cpp
    │   ├── chart.cpp
    │   └── dynet
    │       ├── binary_phrase.cpp
    │       ├── eisner.cpp
    │       └── matrix_tree_theorem.cpp
└── test
    ├── test-binary-phrase-algdiff.cpp
    ├── test-binary-phrase-ereg.cpp
    ├── test-dynet-eisner.cpp
    ├── test-dynet-phrase.cpp
    ├── test-eisner-algdiff.cpp
    ├── test-eisner-ereg.cpp
    ├── test-eisner-ereg.dSYM
        └── Contents
        │   ├── Info.plist
        │   └── Resources
        │       └── DWARF
        │           └── test-eisner-ereg
    └── test-math.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea/*
 2 | cmake-build-debug/*
 3 | 
 4 | # Prerequisites
 5 | *.d
 6 | 
 7 | # Compiled Object files
 8 | *.slo
 9 | *.lo
10 | *.o
11 | *.obj
12 | 
13 | # Precompiled Headers
14 | *.gch
15 | *.pch
16 | 
17 | # Compiled Dynamic libraries
18 | *.so
19 | *.dylib
20 | *.dll
21 | 
22 | # Fortran module files
23 | *.mod
24 | *.smod
25 | 
26 | # Compiled Static libraries
27 | *.lai
28 | *.la
29 | *.a
30 | *.lib
31 | 
32 | # Executables
33 | *.exe
34 | *.out
35 | *.app
36 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.12)
 2 | set(CMAKE_CXX_STANDARD 11)
 3 | project(diffdp)
 4 | enable_testing()
 5 | 
 6 | find_package(Boost COMPONENTS unit_test_framework regex serialization filesystem REQUIRED)
 7 | include_directories("/Users/filippo/repos/dynet")
 8 | 
 9 | add_subdirectory(lib)
10 | 
11 | 
12 | add_executable(compare-phrase app/src/compare-phrase.cpp)
13 | target_link_libraries(compare-phrase lib-diffdp)
14 | 
15 | add_executable(compare-eisner app/src/compare-eisner.cpp)
16 | target_link_libraries(compare-eisner lib-diffdp)
17 | 
18 | add_executable(test-mtt app/src/test-mtt.cpp)
19 | target_link_libraries(test-mtt lib-diffdp)
20 | 
21 | add_executable(test-dynet app/src/test-dynet.cpp)
22 | target_link_libraries(test-dynet lib-diffdp)
23 | 
24 | 
25 | file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} test/test-*.cpp)
26 | foreach(testSrc ${TEST_SRCS})
27 |     get_filename_component(testName ${testSrc} NAME_WE)
28 | 
29 |     add_executable(${testName} ${testSrc})
30 |     target_link_libraries(${testName} ${Boost_LIBRARIES} lib-diffdp dynet)
31 | 
32 |     add_test(NAME ${testName} COMMAND ${testName} )
33 | endforeach(testSrc)
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Caio Filippo Corro
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Differentiable Perturb-and-Parse operator
 2 | 
 3 | This repository contains the code for the continuous relaxation of the Eisner algorithm presented in:  
 4 | "Differentiable Perturb-and-Parse: Semi-Supervised Parsing with a Structured Variational Autoencoder",  
 5 | Caio Corro, Ivan Titov
 6 | 
 7 | 
 8 | See: https://openreview.net/forum?id=BJlgNh0qKQ
 9 | 
10 | 
11 | To cite:  
12 | ```
13 | @InProceedings{perturb-and-parse,  
14 |   author = "Corro, Caio and Titov, Ivan",  
15 |   title = "Differentiable Perturb-and-Parse: Semi-Supervised Parsing with a Structured Variational Autoencoder",  
16 |   booktitle = "Proceedings of Seventh International Conference on Learning Representations",  
17 |   year = "2019"  
18 | }
19 | ```
20 | 
21 | The full VAE code and model will be released after the official proceedings release.  
22 | If any question, please contact me at following mail address: c.f.corro@uva.nl
23 | 
24 | 
25 | ## Usage
26 | 
27 | ```
28 | #include "diffdp/dynet/eisner.h"
29 | 
30 | auto arcs = dynet::algorithmic_differentiable_eisner(
31 |         weights, // input : matrix of arc weights
32 |         difwfdp::DiscreteMode::ForwardRegularized, // relaxation mode
33 |         diffdp::DependencyGraphMode::Adjacency, diffdp::DependencyGraphMode::Adjacency, // input/output format
34 |         true // set to false to remove root arcs
35 | );
36 | ```
37 | 
38 | 
39 | ## Arguments
40 | 
41 | The following arguments must be provided:
42 | 1. the arc-factored weights of dependencies
43 | 2. the relaxation mode: diffdp::DiscreteMode::BackwardRegularized output the discrete structure and us
44 |    the relaxation only for chart_backward, diffdp::DiscreteMode::ForwardRegularized use the relaxation during chart_forward
45 | 3. the input format: diffdp::DependencyGraphMode::Adjacency use a adjacency matrix as input format, i.e. the main diagonal
46 |    represent self connections and is never used, diffdp::DependencyGraphMode::Compact use the main diagonal to represent the weights
47 |    of root dependencies
48 | 4. the output format
49 | 5. set to false to remove root arcs from the output
50 | 
51 | 
52 | ## Batching
53 | 
54 | This computational node can be used with mini-batches.
55 | However, it does not implement the auto-batch functionnality of Dynet, so mini-batches should be constructed manually.
56 | 
57 | If sentences are of different sizes, a pointer of type "std::vector<unsigned>*" can be given as the last argument.
58 | This compatible with static graph (i.e. each chart_forward call will check sentence sizes in the vector)
59 | 
60 | WARNING: the size of batch input *must not* include the root node.
61 | 
62 | 
63 | ## TODO
64 | 
65 | - The memory usage could be divided by 2
66 | - Clean duplicate code
67 | - Static batch size (this could drastically save memory usage)


--------------------------------------------------------------------------------
/app/src/compare-eisner.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <vector>
 3 | #include <algorithm>
 4 | #include <cmath>
 5 | #include <cassert>
 6 | #include <limits>
 7 | #include <random>
 8 | 
 9 | #include "diffdp/chart.h"
10 | 
11 | #include "diffdp/math.h"
12 | #include "diffdp/deduction_operations.h"
13 | #include "diffdp/algorithm/eisner.h"
14 | 
15 | int main(int argc, char* argv[])
16 | {
17 |     std::default_random_engine generator;
18 |     std::uniform_real_distribution<double> distribution(0.0,1.0);
19 | 
20 |     const unsigned size = 5;
21 |     std::vector<float> weights(size * size);
22 |     for (unsigned i = 0 ; i < size ; ++i)
23 |         weights.at(i) = distribution(generator);
24 | 
25 |     std::cerr << "Entropy reg / Alg diff\n";
26 |     diffdp::EntropyRegularizedEisner entrop_reg_eisner(size);
27 |     entrop_reg_eisner.forward(
28 |             [&] (unsigned head, unsigned mod) -> float
29 |             {
30 |                 return weights.at(head + mod * size);
31 |             }
32 |     );
33 |     diffdp::AlgorithmicDifferentiableEisner algo_diff_eisner(size);
34 |     algo_diff_eisner.forward(
35 |             [&] (unsigned head, unsigned mod) -> float
36 |             {
37 |                 return weights.at(head + mod * size);
38 |             });
39 | 
40 |     for (unsigned head = 0 ; head < size ; ++head)
41 |     {
42 |         for (unsigned mod = 1 ; mod < size ; ++mod)
43 |         {
44 |             if (head == mod)
45 |                 continue;
46 | 
47 |             std::cerr
48 |                 << entrop_reg_eisner.output(head, mod)
49 |                 << "\t"
50 |                 << algo_diff_eisner.output(head, mod)
51 |                 << "\n";
52 |         }
53 |     }
54 | }


--------------------------------------------------------------------------------
/app/src/compare-phrase.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <vector>
 3 | #include <algorithm>
 4 | #include <cmath>
 5 | #include <cassert>
 6 | #include <limits>
 7 | #include <random>
 8 | 
 9 | #include "diffdp/chart.h"
10 | 
11 | #include "diffdp/math.h"
12 | #include "diffdp/deduction_operations.h"
13 | #include "diffdp/algorithm/binary_phrase.h"
14 | 
15 | int main(int argc, char* argv[])
16 | {
17 |     std::default_random_engine generator;
18 |     std::uniform_real_distribution<float> distribution(0.0,1.0);
19 | 
20 |     const unsigned size = 5;
21 |     std::vector<float> weights(size * size);
22 |     for (unsigned i = 0 ; i < weights.size() ; ++i)
23 |         weights.at(i) = distribution(generator);
24 | 
25 |     diffdp::AlgorithmicDifferentiableBinaryPhraseStructure algo_diff(size);
26 |     algo_diff.forward(
27 |             [&] (unsigned head, unsigned mod) -> float
28 |             {
29 |                 return weights.at(head + mod * size);
30 |             });
31 | 
32 |     for (unsigned left = 0 ; left < size ; ++left)
33 |     {
34 |         for (unsigned right = left+1 ; right < size ; ++right)
35 |         {
36 |             std::cerr
37 |                     << left << "," << right
38 |                     << "\t"
39 |                     << algo_diff.output(left, right)
40 |                     << "\n";
41 |         }
42 |     }
43 | }


--------------------------------------------------------------------------------
/app/src/test-dynet.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <algorithm>
 3 | #include <cmath>
 4 | #include <cassert>
 5 | #include <limits>
 6 | #include <random>
 7 | 
 8 | #include "dynet/nodes-def-macros.h"
 9 | #include "dynet/nodes-impl-macros.h"
10 | #include "dynet/tensor-eigen.h"
11 | #include "diffdp/dynet/matrix_tree_theorem.h"
12 | #include "dytools/functions/rooted_arborescence_marginals.h"
13 | 
14 | int main(int argc, char* argv[])
15 | {
16 |     const auto size = 3;
17 |     dynet::initialize(argc, argv);
18 | 
19 |     std::vector<float> v_weights(size * size, 0.f);
20 |     v_weights.at(0 + 1 * size) = 1.f;
21 |     v_weights.at(0 + 2 * size) = 4.f;
22 |     v_weights.at(1 + 2 * size) = 1.f;
23 |     v_weights.at(2 + 1 * size) = 1.f;
24 | 
25 |     dynet::ComputationGraph cg;
26 | 
27 | 
28 |     const auto e_weights = dynet::input(cg, {size, size}, v_weights);
29 | 
30 |     std::vector<unsigned> sizes{2};
31 |     const auto e_marginals = dytools::rooted_arborescence_marginals(cg, e_weights, sizes);
32 | 
33 |     const auto v_output = as_vector(cg.forward(e_marginals));
34 |     for (unsigned i = 0  ; i < size ; ++i)
35 |     {
36 |         for (unsigned j = 0 ; j < size ; ++j)
37 |             std::cerr << v_output.at(i + j * size) << "\t";
38 |         std::cerr << "\n";
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/app/src/test-mtt.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <algorithm>
 3 | #include <cmath>
 4 | #include <cassert>
 5 | #include <limits>
 6 | #include <random>
 7 | 
 8 | #include "dynet/expr.h"
 9 | #include "diffdp/dynet/matrix_tree_theorem.h"
10 | 
11 | int main(int argc, char* argv[])
12 | {
13 |     dynet::initialize(argc, argv);
14 | 
15 |     const unsigned size = 3;
16 |     std::vector<float> v_input(3 * 3, 1.f);
17 | 
18 |     dynet::ComputationGraph cg;
19 |     auto e_input = dynet::input(cg, {size, size}, v_input);
20 |     auto e_output = dynet::matrix_tree_theorem(e_input);
21 | 
22 |     auto v_output = as_vector(cg.forward(e_output));
23 | 
24 |     for (unsigned i = 0 ; i < size ; ++i)
25 |     {
26 |         for (unsigned j = 0 ; j < size ; ++j)
27 |         {
28 |             std::cout << v_output.at(i + j * size) << "\t";
29 |         }
30 |         std::cout << "\n";
31 |     }
32 | }


--------------------------------------------------------------------------------
/lib/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_library(
 2 |         lib-diffdp
 3 | 
 4 |         src/chart.cpp
 5 | 
 6 |         src/algorithm/eisner.cpp
 7 |         src/algorithm/binary_phrase.cpp
 8 | 
 9 |         src/dynet/eisner.cpp
10 |         src/dynet/binary_phrase.cpp
11 |         #src/dynet/matrix_tree_theorem.cpp
12 | 
13 |         src/builder/dependency.cpp
14 |         src/builder/binary-phrase.cpp
15 | )
16 | 
17 | # Define headers for this library. PUBLIC headers are used for
18 | # compiling the library, and will be added to consumers' build
19 | # paths.
20 | target_include_directories(
21 |         lib-diffdp
22 | 
23 |         PUBLIC
24 |         $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
25 |         $<INSTALL_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
26 | 
27 |         PRIVATE
28 |         src
29 | )
30 | add_subdirectory("/Users/filippo/repos/dynet-tools" dytools)
31 | 
32 | target_link_libraries(lib-diffdp ${Boost_LIBRARIES})
33 | target_link_libraries(lib-diffdp dynet)
34 | target_link_libraries(lib-diffdp libdytools)


--------------------------------------------------------------------------------
/lib/include/diffdp/algorithm/binary_phrase.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cassert>
  4 | #include <functional>
  5 | #include <memory>
  6 | 
  7 | #include "diffdp/chart.h"
  8 | #include "diffdp/deduction_operations.h"
  9 | 
 10 | namespace diffdp
 11 | {
 12 | 
 13 | 
 14 | struct BinaryPhraseStructureChart
 15 | {
 16 |     const unsigned size;
 17 |     const unsigned size_3d;
 18 |     const unsigned size_2d;
 19 |     float* _memory = nullptr;
 20 |     const bool _erase_memory;
 21 | 
 22 |     Tensor3D<float> split_weights, backptr;
 23 |     Matrix<float> weight, soft_selection;
 24 | 
 25 |     BinaryPhraseStructureChart(unsigned size);
 26 |     BinaryPhraseStructureChart(unsigned size, float* mem);
 27 |     ~BinaryPhraseStructureChart();
 28 | 
 29 |     void zeros();
 30 | 
 31 |     static std::size_t required_memory(const unsigned size);
 32 |     static unsigned required_cells(const unsigned size);
 33 | };
 34 | 
 35 | 
 36 | 
 37 | struct AlgorithmicDifferentiableBinaryPhraseStructure
 38 | {
 39 |     unsigned _size;
 40 | 
 41 |     std::shared_ptr<BinaryPhraseStructureChart> chart_forward;
 42 |     std::shared_ptr<BinaryPhraseStructureChart> chart_backward;
 43 | 
 44 |     explicit AlgorithmicDifferentiableBinaryPhraseStructure(const unsigned t_size);
 45 |     AlgorithmicDifferentiableBinaryPhraseStructure(std::shared_ptr<BinaryPhraseStructureChart> chart_forward, std::shared_ptr<BinaryPhraseStructureChart> chart_backward);
 46 | 
 47 |     template<class Functor>
 48 |     void forward(Functor&& weight_callback);
 49 | 
 50 |     template<class Functor>
 51 |     void backward(Functor&& gradient_callback);
 52 | 
 53 |     static void forward_maximize(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward);
 54 |     static void forward_backtracking(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward);
 55 | 
 56 |     static void backward_maximize(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward, std::shared_ptr<BinaryPhraseStructureChart>& chart_backward);
 57 |     static void backward_backtracking(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward, std::shared_ptr<BinaryPhraseStructureChart>& chart_backward);
 58 | 
 59 |     float output(const unsigned head, const unsigned mod) const;
 60 |     float gradient(const unsigned left, const unsigned right) const;
 61 | 
 62 |     unsigned size() const;
 63 | };
 64 | 
 65 | 
 66 | struct EntropyRegularizedBinaryPhraseStructure
 67 | {
 68 |     unsigned _size;
 69 | 
 70 |     std::shared_ptr<BinaryPhraseStructureChart> chart_forward;
 71 |     std::shared_ptr<BinaryPhraseStructureChart> chart_backward;
 72 | 
 73 |     explicit EntropyRegularizedBinaryPhraseStructure(const unsigned t_size);
 74 |     EntropyRegularizedBinaryPhraseStructure(std::shared_ptr<BinaryPhraseStructureChart> chart_forward, std::shared_ptr<BinaryPhraseStructureChart> chart_backward);
 75 | 
 76 | 
 77 |     template<class Functor>
 78 |     void forward(Functor&& weight_callback);
 79 | 
 80 |     template<class Functor>
 81 |     void backward(Functor&& gradient_callback);
 82 | 
 83 |     static void forward_maximize(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward);
 84 |     static void forward_backtracking(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward);
 85 | 
 86 |     static void backward_maximize(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward, std::shared_ptr<BinaryPhraseStructureChart>& chart_backward);
 87 |     static void backward_backtracking(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward, std::shared_ptr<BinaryPhraseStructureChart>& chart_backward);
 88 | 
 89 |     float output(const unsigned head, const unsigned mod) const;
 90 |     float gradient(const unsigned head, const unsigned mod) const;
 91 | 
 92 |     unsigned size() const;
 93 | };
 94 | 
 95 | 
 96 | // templates implementations
 97 | 
 98 | template<class Functor>
 99 | void AlgorithmicDifferentiableBinaryPhraseStructure::forward(Functor&& weight_callback)
100 | {
101 |     const unsigned size = chart_forward->size;
102 | 
103 |     chart_forward->zeros(); // we could skip some zeros here
104 |     for (unsigned i = 0; i < size; ++i)
105 |     {
106 |         for (unsigned j = i + 1; j < size; ++j)
107 |         {
108 |             chart_forward->weight(i, j) = weight_callback(i, j);
109 |         }
110 |     }
111 | 
112 |     AlgorithmicDifferentiableBinaryPhraseStructure::forward_maximize(chart_forward);
113 |     AlgorithmicDifferentiableBinaryPhraseStructure::forward_backtracking(chart_forward);
114 | }
115 | 
116 | template<class Functor>
117 | void AlgorithmicDifferentiableBinaryPhraseStructure::backward(Functor&& gradient_callback)
118 | {
119 |     const unsigned size = chart_forward->size;
120 | 
121 |     chart_backward->zeros();
122 |     // init gradient here
123 |     for (unsigned i = 0; i < size; ++i)
124 |     {
125 |         for (unsigned j = i + 1; j < size; ++j)
126 |         {
127 |             chart_backward->soft_selection(i, j) = gradient_callback(i, j);
128 |         }
129 |     }
130 | 
131 |     AlgorithmicDifferentiableBinaryPhraseStructure::backward_backtracking(chart_forward, chart_backward);
132 |     AlgorithmicDifferentiableBinaryPhraseStructure::backward_maximize(chart_forward, chart_backward);
133 | }
134 | 
135 | template<class Functor>
136 | void EntropyRegularizedBinaryPhraseStructure::forward(Functor&& weight_callback)
137 | {
138 |     const unsigned size = chart_forward->size;
139 | 
140 |     chart_forward->zeros(); // we could skip some zeros here
141 |     for (unsigned i = 0; i < size; ++i)
142 |     {
143 |         for (unsigned j = i + 1; j < size; ++j)
144 |         {
145 |             chart_forward->weight(i, j) = weight_callback(i, j);
146 |         }
147 |     }
148 | 
149 |     EntropyRegularizedBinaryPhraseStructure::forward_maximize(chart_forward);
150 |     EntropyRegularizedBinaryPhraseStructure::forward_backtracking(chart_forward);
151 | }
152 | 
153 | template<class Functor>
154 | void EntropyRegularizedBinaryPhraseStructure::backward(Functor&& gradient_callback)
155 | {
156 |     const unsigned size = chart_forward->size;
157 | 
158 |     chart_backward->zeros();
159 |     // init gradient here
160 |     for (unsigned i = 0; i < size; ++i)
161 |     {
162 |         for (unsigned j = i + 1; j < size; ++j)
163 |         {
164 |             chart_backward->soft_selection(i, j) = gradient_callback(i, j);
165 |         }
166 |     }
167 | 
168 |     EntropyRegularizedBinaryPhraseStructure::backward_backtracking(chart_forward, chart_backward);
169 |     EntropyRegularizedBinaryPhraseStructure::backward_maximize(chart_forward, chart_backward);
170 | }
171 | 
172 | 
173 | }


--------------------------------------------------------------------------------
/lib/include/diffdp/algorithm/eisner.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cassert>
  4 | #include <functional>
  5 | #include <memory>
  6 | 
  7 | #include "diffdp/chart.h"
  8 | #include "diffdp/deduction_operations.h"
  9 | 
 10 | namespace diffdp
 11 | {
 12 | 
 13 | struct EisnerChart
 14 | {
 15 |     const unsigned size;
 16 |     const unsigned size_3d;
 17 |     const unsigned size_2d;
 18 |     float* _memory = nullptr;
 19 |     const bool _erase_memory;
 20 | 
 21 |     Tensor3D<float>
 22 |         a_cleft, a_cright, a_uleft, a_uright,
 23 |         b_cleft, b_cright, b_uleft, b_uright;
 24 | 
 25 |     Matrix<float>
 26 |         c_cleft, c_cright, c_uleft, c_uright,
 27 |         soft_c_cleft, soft_c_cright, soft_c_uleft, soft_c_uright
 28 |         ;
 29 | 
 30 |     EisnerChart(unsigned size);
 31 |     EisnerChart(unsigned size, float* mem);
 32 |     ~EisnerChart();
 33 | 
 34 |     void zeros();
 35 | 
 36 |     static std::size_t required_memory(const unsigned size);
 37 |     static unsigned required_cells(const unsigned size);
 38 | };
 39 | 
 40 | /*
 41 |  * Continuous relaxation of "Differentiable Perturb-and-Parse: Semi-Supervised Parsing with a Structured Variational Autoencoder, Corro & Titov"
 42 |  */
 43 | struct AlgorithmicDifferentiableEisner
 44 | {
 45 |     unsigned _size;
 46 | 
 47 |     std::shared_ptr<EisnerChart> chart_forward;
 48 |     std::shared_ptr<EisnerChart> chart_backward;
 49 | 
 50 |     explicit AlgorithmicDifferentiableEisner(const unsigned t_size);
 51 |     AlgorithmicDifferentiableEisner(std::shared_ptr<EisnerChart> chart_forward, std::shared_ptr<EisnerChart> chart_backward);
 52 | 
 53 |     template<class Functor>
 54 |     void forward(Functor&& weight_callback);
 55 | 
 56 |     template<class Functor>
 57 |     void backward(Functor&& gradient_callback);
 58 | 
 59 |     static void forward_maximize(std::shared_ptr<EisnerChart>& chart_forward);
 60 |     static void forward_backtracking(std::shared_ptr<EisnerChart>& chart_forward);
 61 | 
 62 |     static void backward_maximize(std::shared_ptr<EisnerChart>& chart_forward, std::shared_ptr<EisnerChart>& chart_backward);
 63 |     static void backward_backtracking(std::shared_ptr<EisnerChart>& chart_forward, std::shared_ptr<EisnerChart>& chart_backward);
 64 | 
 65 |     float output(const unsigned head, const unsigned mod) const;
 66 |     float gradient(const unsigned head, const unsigned mod) const;
 67 | 
 68 |     unsigned size() const;
 69 | };
 70 | 
 71 | 
 72 | 
 73 | /*
 74 |  * Continuous relaxation of "Differentiable Dynamic Programming for Structured Prediction and Attention, Mensch & Blondel"
 75 |  * This is equivalent to structured attention (i.e. marginalization),
 76 |  * but it has better numerically stability in practice (i.e. no underflow/overflow issue)
 77 |  */
 78 | struct EntropyRegularizedEisner
 79 | {
 80 |     unsigned _size;
 81 | 
 82 |     std::shared_ptr<EisnerChart> chart_forward;
 83 |     std::shared_ptr<EisnerChart> chart_backward;
 84 | 
 85 |     explicit EntropyRegularizedEisner(const unsigned t_size);
 86 |     EntropyRegularizedEisner(std::shared_ptr<EisnerChart> chart_forward, std::shared_ptr<EisnerChart> chart_backward);
 87 | 
 88 | 
 89 |     template<class Functor>
 90 |     void forward(Functor&& weight_callback);
 91 | 
 92 |     template<class Functor>
 93 |     void backward(Functor&& gradient_callback);
 94 | 
 95 |     static void forward_maximize(std::shared_ptr<EisnerChart>& chart_forward);
 96 |     static void forward_backtracking(std::shared_ptr<EisnerChart>& chart_forward);
 97 | 
 98 |     //static void backward_maximize(std::shared_ptr<EisnerChart>& chart_forward, std::shared_ptr<EisnerChart>& chart_backward);
 99 |     //static void backward_backtracking(std::shared_ptr<EisnerChart>& chart_forward, std::shared_ptr<EisnerChart>& chart_backward);
100 | 
101 |     float output(const unsigned head, const unsigned mod) const;
102 |     float gradient(const unsigned head, const unsigned mod) const;
103 | 
104 |     unsigned size() const;
105 | };
106 | 
107 | 
108 | // templates implementations
109 | 
110 | template<class Functor>
111 | void AlgorithmicDifferentiableEisner::forward(Functor&& weight_callback)
112 | {
113 |     const unsigned size = chart_forward->size;
114 | 
115 |     chart_forward->zeros(); // we could skip some zeros here
116 |     for (unsigned i = 0; i < size; ++i)
117 |     {
118 |         for (unsigned j = 1; j < size; ++j)
119 |         {
120 |             if (i < j)
121 |                 chart_forward->c_uright(i, j) = weight_callback(i, j);
122 |             else if (j < i)
123 |                 chart_forward->c_uleft(j, i) = weight_callback(i, j);
124 |         }
125 |     }
126 | 
127 |     AlgorithmicDifferentiableEisner::forward_maximize(chart_forward);
128 |     AlgorithmicDifferentiableEisner::forward_backtracking(chart_forward);
129 | }
130 | 
131 | template<class Functor>
132 | void AlgorithmicDifferentiableEisner::backward(Functor&& gradient_callback)
133 | {
134 |     const unsigned size = chart_forward->size;
135 | 
136 |     chart_backward->zeros();
137 |     for (unsigned i = 0; i < size; ++i)
138 |     {
139 |         for (unsigned j = 1; j < size; ++j)
140 |         {
141 |             if (i < j)
142 |                 chart_backward->soft_c_uright(i, j) = gradient_callback(i, j);
143 |             else if (j < i)
144 |                 chart_backward->soft_c_uleft(j, i) = gradient_callback(i, j);
145 |         }
146 |     }
147 | 
148 |     AlgorithmicDifferentiableEisner::backward_backtracking(chart_forward, chart_backward);
149 |     AlgorithmicDifferentiableEisner::backward_maximize(chart_forward, chart_backward);
150 | }
151 | 
152 | template<class Functor>
153 | void EntropyRegularizedEisner::forward(Functor&& weight_callback)
154 | {
155 |     const unsigned size = chart_forward->size;
156 | 
157 |     // this initialization seems ok, but check why it works!
158 |     chart_forward->zeros(); // we could skip some zeros here
159 |     for (unsigned i = 0; i < size; ++i)
160 |     {
161 |         for (unsigned j = 1; j < size; ++j)
162 |         {
163 |             if (i < j)
164 |                 chart_forward->c_uright(i, j) = weight_callback(i, j);
165 |             else if (j < i)
166 |                 chart_forward->c_uleft(j, i) = weight_callback(i, j);
167 |         }
168 |     }
169 | 
170 |     EntropyRegularizedEisner::forward_maximize(chart_forward);
171 |     EntropyRegularizedEisner::forward_backtracking(chart_forward);
172 | }
173 | 
174 | template<class Functor>
175 | void EntropyRegularizedEisner::backward(Functor&& gradient_callback)
176 | {
177 |     const unsigned size = chart_forward->size;
178 | 
179 |     // check if this init is correct
180 |     chart_backward->zeros();
181 |     for (unsigned i = 0; i < size; ++i)
182 |     {
183 |         for (unsigned j = 1; j < size; ++j)
184 |         {
185 |             if (i < j)
186 |                 chart_backward->soft_c_uright(i, j) = gradient_callback(i, j);
187 |             else if (j < i)
188 |                 chart_backward->soft_c_uleft(j, i) = gradient_callback(i, j);
189 |         }
190 |     }
191 | 
192 |     // backpropagate throught backtracking
193 |     for (unsigned l = 1; l < size ; ++l)
194 |     {
195 |         for (unsigned i = 0; i < size - l; ++i)
196 |         {
197 |             unsigned j = i + l;
198 | 
199 |             if (i > 0u)
200 |             {
201 |                 diffdp::backward_backtracking(
202 |                         chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j),
203 |                         chart_forward->soft_c_uleft(i, j),
204 |                         chart_forward->b_uleft.iter3(i, j, i),
205 | 
206 |                         chart_backward->soft_c_cright.iter2(i, i), chart_backward->soft_c_cleft.iter1(i + 1, j),
207 |                         &chart_backward->soft_c_uleft(i, j),
208 |                         chart_backward->b_uleft.iter3(i, j, i),
209 | 
210 |                         l
211 |                 );
212 |             }
213 | 
214 |             diffdp::backward_backtracking(
215 |                     chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j),
216 |                     chart_forward->soft_c_uright(i, j),
217 |                     chart_forward->b_uright.iter3(i, j, i),
218 | 
219 |                     chart_backward->soft_c_cright.iter2(i, i), chart_backward->soft_c_cleft.iter1(i + 1, j),
220 |                     &chart_backward->soft_c_uright(i, j),
221 |                     chart_backward->b_uright.iter3(i, j, i),
222 | 
223 |                     l
224 |             );
225 | 
226 |             if (i > 0u)
227 |             {
228 |                 diffdp::backward_backtracking(
229 |                         chart_forward->soft_c_cleft.iter2(i, i), chart_forward->soft_c_uleft.iter1(i, j),
230 |                         chart_forward->soft_c_cleft(i, j),
231 |                         chart_forward->b_cleft.iter3(i, j, i),
232 | 
233 |                         chart_backward->soft_c_cleft.iter2(i, i), chart_backward->soft_c_uleft.iter1(i, j),
234 |                         &chart_backward->soft_c_cleft(i, j),
235 |                         chart_backward->b_cleft.iter3(i, j, i),
236 | 
237 |                         l
238 |                 );
239 |             }
240 | 
241 |             diffdp::backward_backtracking(
242 |                     chart_forward->soft_c_uright.iter2(i, i+1), chart_forward->soft_c_cright.iter1(i+1, j),
243 |                     chart_forward->soft_c_cright(i, j),
244 |                     chart_forward->b_cright.iter3(i, j, i + 1),
245 | 
246 |                     chart_backward->soft_c_uright.iter2(i, i+1), chart_backward->soft_c_cright.iter1(i+1, j),
247 |                     &chart_backward->soft_c_cright(i, j),
248 |                     chart_backward->b_cright.iter3(i, j, i + 1),
249 | 
250 |                     l
251 |             );
252 |         }
253 |     }
254 |     for (unsigned l = size - 1; l >= 1; --l)
255 |     {
256 |         for (unsigned i = 0; i < size - l; ++i)
257 |         {
258 |             unsigned j = i + l;
259 | 
260 |             if (i > 0u)
261 |             {
262 |                 backward_entropy_reg(
263 |                         chart_forward->c_cleft.iter2(i, i), chart_forward->c_uleft.iter1(i, j),
264 |                         chart_forward->a_cleft.iter3(i, j, i),
265 |                         chart_forward->b_cleft.iter3(i, j, i),
266 | 
267 |                         chart_backward->c_cleft.iter2(i, i), chart_backward->c_uleft.iter1(i, j),
268 |                         chart_backward->c_cleft(i, j),
269 |                         chart_backward->a_cleft.iter3(i, j, i),
270 |                         chart_backward->b_cleft.iter3(i, j, i),
271 | 
272 |                         l
273 |                 );
274 |             }
275 | 
276 |             backward_entropy_reg(
277 |                     chart_forward->c_uright.iter2(i, i + 1), chart_forward->c_cright.iter1(i + 1, j),
278 |                     chart_forward->a_cright.iter3(i, j, i + 1),
279 |                     chart_forward->b_cright.iter3(i, j, i + 1),
280 | 
281 |                     chart_backward->c_uright.iter2(i, i + 1), chart_backward->c_cright.iter1(i + 1, j),
282 |                     chart_backward->c_cright(i, j),
283 |                     chart_backward->a_cright.iter3(i, j, i + 1),
284 |                     chart_backward->b_cright.iter3(i, j, i + 1),
285 | 
286 |                     l
287 |             );
288 | 
289 |             if (i > 0u)
290 |             {
291 |                 backward_entropy_reg(
292 |                         chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j),
293 |                         chart_forward->a_uleft.iter3(i, j, i),
294 |                         chart_forward->b_uleft.iter3(i, j, i),
295 | 
296 |                         chart_backward->c_cright.iter2(i, i), chart_backward->c_cleft.iter1(i + 1, j),
297 |                         chart_backward->c_uleft(i, j),
298 |                         chart_backward->a_uleft.iter3(i, j, i),
299 |                         chart_backward->b_uleft.iter3(i, j, i),
300 | 
301 |                         l
302 |                 );
303 |             }
304 | 
305 |             backward_entropy_reg(
306 |                     chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j),
307 |                     chart_forward->a_uright.iter3(i, j, i),
308 |                     chart_forward->b_uright.iter3(i, j, i),
309 | 
310 |                     chart_backward->c_cright.iter2(i, i), chart_backward->c_cleft.iter1(i + 1, j),
311 |                     chart_backward->c_uright(i, j),
312 |                     chart_backward->a_uright.iter3(i, j, i),
313 |                     chart_backward->b_uright.iter3(i, j, i),
314 | 
315 |                     l
316 |             );
317 |         }
318 |     }
319 | }
320 | 
321 | 
322 | 
323 | 
324 | }
325 | 
326 | 


--------------------------------------------------------------------------------
/lib/include/diffdp/builder/binary-phrase.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "dynet/expr.h"
 4 | 
 5 | namespace diffdp
 6 | {
 7 | 
 8 | enum struct BinaryPhraseType
 9 | {
10 |     AlgDiff,
11 |     EntropyReg
12 | };
13 | 
14 | struct BinaryPhraseSettings
15 | {
16 |     BinaryPhraseType type = BinaryPhraseType::AlgDiff;
17 |     bool perturb = false;
18 | };
19 | 
20 | struct BinaryPhraseBuilder
21 | {
22 |     const BinaryPhraseSettings settings;
23 |     dynet::ComputationGraph* _cg;
24 |     bool _training = true;
25 | 
26 |     BinaryPhraseBuilder(const BinaryPhraseSettings& settings);
27 | 
28 |     void new_graph(dynet::ComputationGraph& cg, bool training);
29 |     dynet::Expression relaxed(const dynet::Expression& weights);
30 |     dynet::Expression argmax(const dynet::Expression& weights);
31 | 
32 |     dynet::Expression relaxed_alg_diff(const dynet::Expression& weights);
33 |     dynet::Expression relaxed_entropy_Reg(const dynet::Expression& weights);
34 | protected:
35 |     /**
36 |      * Perturb arc if training mode and setting.perturb == true
37 |      */
38 |     dynet::Expression perturb(const dynet::Expression& arc_weights);
39 | };
40 | 
41 | }


--------------------------------------------------------------------------------
/lib/include/diffdp/builder/dependency.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "dynet/expr.h"
 4 | 
 5 | namespace diffdp
 6 | {
 7 | 
 8 | enum struct DependencyType
 9 | {
10 |     Head,
11 |     NonProjective,
12 |     ProjectiveAlgDiff,
13 |     ProjectiveEntropyReg
14 | };
15 | 
16 | struct DependencySettings
17 | {
18 |     DependencyType type = DependencyType::Head;
19 |     bool perturb = false;
20 | };
21 | 
22 | struct DependencyBuilder
23 | {
24 |     const DependencySettings settings;
25 |     dynet::ComputationGraph* _cg;
26 |     bool _training = true;
27 | 
28 |     DependencyBuilder(const DependencySettings& settings);
29 | 
30 |     void new_graph(dynet::ComputationGraph& cg, bool training);
31 |     dynet::Expression relaxed(const dynet::Expression& arc_weights, std::vector<unsigned>* sizes = nullptr, dynet::Expression* e_mask = nullptr);
32 | 
33 |     dynet::Expression relaxed_head(const dynet::Expression& arc_weights, dynet::Expression* e_mask = nullptr);
34 |     dynet::Expression relaxed_nonprojective(const dynet::Expression& arc_weights, std::vector<unsigned>* sizes = nullptr);
35 |     dynet::Expression relaxed_projective_alg_diff(const dynet::Expression& arc_weights, std::vector<unsigned>* sizes = nullptr);
36 |     dynet::Expression relaxed_projective_entropy_reg(const dynet::Expression& arc_weights, std::vector<unsigned>* sizes = nullptr);
37 | 
38 |     dynet::Expression argmax(const dynet::Expression& arc_weights, std::vector<unsigned>* sizes = nullptr, dynet::Expression* e_mask = nullptr);
39 |     dynet::Expression argmax_head(const dynet::Expression& arc_weights, dynet::Expression* e_mask = nullptr);
40 |     dynet::Expression argmax_nonprojective(const dynet::Expression& arc_weights, std::vector<unsigned>* sizes = nullptr);
41 |     dynet::Expression argmax_projective_alg_diff(const dynet::Expression& arc_weights, std::vector<unsigned>* sizes = nullptr);
42 |     dynet::Expression argmax_projective_entropy_reg(const dynet::Expression& arc_weights, std::vector<unsigned>* sizes = nullptr);
43 | 
44 | protected:
45 |     /**
46 |      * Perturb arc if training mode and setting.perturb == true
47 |      */
48 |     dynet::Expression perturb(const dynet::Expression& arc_weights);
49 | };
50 | 
51 | }


--------------------------------------------------------------------------------
/lib/include/diffdp/chart.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <vector>
  4 | 
  5 | namespace diffdp
  6 | {
  7 | 
  8 | template<class T>
  9 | struct Tensor3D
 10 | {
 11 |     unsigned _size;
 12 |     bool _free_data;
 13 |     T* _data;
 14 | 
 15 |     Tensor3D(const unsigned size);
 16 |     Tensor3D(const unsigned size, T* _data);
 17 |     ~Tensor3D();
 18 | 
 19 |     static std::size_t required_memory(const unsigned size);
 20 |     static unsigned required_cells(const unsigned size);
 21 | 
 22 |     inline T& operator()(const unsigned i, const unsigned j, const unsigned k) noexcept;
 23 |     inline T operator()(const unsigned i, const unsigned j, const unsigned k) const noexcept;
 24 | 
 25 |     inline
 26 |     T* iter3(const unsigned i, const unsigned j, const unsigned k) noexcept;
 27 | };
 28 | 
 29 | template<class T>
 30 | struct Matrix;
 31 | 
 32 | template<class T>
 33 | struct MatrixRowIterator
 34 | {
 35 |     Matrix<T>* chart;
 36 |     T* current;
 37 | 
 38 |     MatrixRowIterator(Matrix<T>* chart, T* current);
 39 |     MatrixRowIterator<T>(const MatrixRowIterator<T>& o);
 40 | 
 41 |     T& operator*();
 42 |     MatrixRowIterator<T>& operator++();
 43 |     bool operator!=(const MatrixRowIterator<T>& o) const;
 44 | };
 45 | 
 46 | template<class T>
 47 | struct Matrix
 48 | {
 49 |     unsigned _size;
 50 |     bool _free_data;
 51 |     T* _data;
 52 | 
 53 |     Matrix(const unsigned size);
 54 |     Matrix(const unsigned size, T* _data);
 55 |     ~Matrix();
 56 | 
 57 |     inline static std::size_t required_memory(const unsigned size);
 58 |     inline static unsigned required_cells(const unsigned size);
 59 | 
 60 |     inline T& operator()(const unsigned i, const unsigned j) noexcept;
 61 |     inline T operator()(const unsigned i, const unsigned j) const noexcept;
 62 | 
 63 |     inline MatrixRowIterator<T> iter1(const unsigned i, const unsigned j) noexcept;
 64 |     inline T* iter2(const unsigned i, const unsigned j) noexcept;
 65 | };
 66 | 
 67 | 
 68 | // Template implementations
 69 | template <class T>
 70 | Tensor3D<T>::Tensor3D(const unsigned size) :
 71 |     _size(size),
 72 |     _free_data(true)
 73 | {
 74 |     _data = new T[required_cells(size)];
 75 | }
 76 | 
 77 | template <class T>
 78 | Tensor3D<T>::Tensor3D(const unsigned size, T* _data) :
 79 |     _size(size),
 80 |     _free_data(false),
 81 |     _data(_data)
 82 | {}
 83 | 
 84 | template <class T>
 85 | Tensor3D<T>::~Tensor3D()
 86 | {
 87 |     if (_free_data)
 88 |         delete[] _data;
 89 | }
 90 | 
 91 | template <class T>
 92 | std::size_t Tensor3D<T>::required_memory(const unsigned size)
 93 | {
 94 |     return required_cells(size) * sizeof(T);
 95 | }
 96 | 
 97 | template <class T>
 98 | unsigned Tensor3D<T>::required_cells(const unsigned size)
 99 | {
100 |     return size * size * size;
101 | }
102 | 
103 | 
104 | template <class T>
105 | T& Tensor3D<T>::operator()(const unsigned i, const unsigned j, const unsigned k) noexcept
106 | {
107 |     return _data[i * _size * _size + j * _size + k];
108 | }
109 | 
110 | 
111 | template <class T>
112 | T Tensor3D<T>::operator()(const unsigned i, const unsigned j, const unsigned k) const noexcept
113 | {
114 |     return _data[i * _size * _size + j * _size + k];
115 | }
116 | 
117 | 
118 | template <class T>
119 | T* Tensor3D<T>::iter3(const unsigned i, const unsigned j, const unsigned k) noexcept
120 | {
121 |     return _data + i * _size *_size + j * _size + k;
122 | }
123 | 
124 | 
125 | template <class T>
126 | MatrixRowIterator<T>::MatrixRowIterator(Matrix<T>* chart, T* current) :
127 |         chart(chart),
128 |         current(current)
129 | {}
130 | 
131 | template <class T>
132 | MatrixRowIterator<T>::MatrixRowIterator(const MatrixRowIterator<T>& o) :
133 |         chart(o.chart),
134 |         current(o.current)
135 | {}
136 | 
137 | template <class T>
138 | T& MatrixRowIterator<T>::operator*()
139 | {
140 |     return *current;
141 | }
142 | 
143 | template <class T>
144 | MatrixRowIterator<T>& MatrixRowIterator<T>::operator++()
145 | {
146 |     current += chart->_size;
147 |     return *this;
148 | }
149 | 
150 | template <class T>
151 | bool MatrixRowIterator<T>::operator!=(const MatrixRowIterator<T>& o) const
152 | {
153 |     return !(chart == o.chart && current == o.current);
154 | }
155 | 
156 | 
157 | 
158 | template<class T>
159 | Matrix<T>::Matrix(const unsigned size) :
160 |         _size(size),
161 |         _free_data(true),
162 |         _data(new T[required_cells(size)])
163 | {}
164 | 
165 | template<class T>
166 | Matrix<T>::Matrix(const unsigned size, T* _data) :
167 |         _size(size),
168 |         _free_data(false),
169 |         _data(_data)
170 | {}
171 | 
172 | template<class T>
173 | std::size_t Matrix<T>::required_memory(const unsigned size)
174 | {
175 |     return required_cells(size) * sizeof(T);
176 | }
177 | 
178 | template<class T>
179 | unsigned Matrix<T>::required_cells(const unsigned size)
180 | {
181 |     return size * size;
182 | }
183 | 
184 | template<class T>
185 | Matrix<T>::~Matrix()
186 | {
187 |     if (_free_data)
188 |         delete[] _data;
189 | }
190 | 
191 | template<class T>
192 | T& Matrix<T>::operator()(const unsigned i, const unsigned j) noexcept
193 | {
194 |     return _data[i * _size + j];
195 | }
196 | 
197 | template<class T>
198 | T Matrix<T>::operator()(const unsigned i, const unsigned j) const noexcept
199 | {
200 |     return _data[i * _size + j];
201 | }
202 | 
203 | template<class T>
204 | MatrixRowIterator<T> Matrix<T>::iter1(const unsigned i, const unsigned j) noexcept
205 | {
206 |     return {this, _data + i * _size + j};
207 | }
208 | 
209 | template<class T>
210 | T* Matrix<T>::iter2(const unsigned i, const unsigned j) noexcept
211 | {
212 |     return _data + i * _size + j;
213 | }
214 | 
215 | 
216 | }
217 | 


--------------------------------------------------------------------------------
/lib/include/diffdp/deduction_operations.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /**
  4 |  * Header-only implementation of operations on deduction rules
  5 |  *
  6 |  * Inputs/outputs are iterator-like objects that must be:
  7 |  * - copy-constructible
  8 |  * - dereferenceable
  9 |  * - incrementable
 10 |  *
 11 |  * Author: Caio Corro
 12 |  */
 13 | 
 14 | #include <iostream>
 15 | #include "diffdp/math.h"
 16 | 
 17 | namespace diffdp
 18 | {
 19 | 
 20 | template<class T, class U, class V, class W>
 21 | float forward_algorithmic_softmax(
 22 |         T left_antecedent, U right_antecedent,
 23 |         V split_weights,
 24 |         W backptr,
 25 |         unsigned size
 26 | )
 27 | {
 28 |     cwise_add(split_weights, left_antecedent, right_antecedent, size);
 29 |     softmax(backptr, split_weights, size);
 30 |     return dot(split_weights, backptr, size);
 31 | }
 32 | 
 33 | template<class T, class U, class V>
 34 | void forward_backtracking(
 35 |         T contrib_left_antecedent, U contrib_right_antecedent,
 36 |         const float contrib_consequent,
 37 |         V backptr,
 38 |         const unsigned size
 39 | )
 40 | {
 41 |     add_cwise_mult(contrib_left_antecedent, backptr, contrib_consequent, size);
 42 |     add_cwise_mult(contrib_right_antecedent, backptr, contrib_consequent, size);
 43 | }
 44 | 
 45 | template<class T, class U, class V, class A, class B, class C>
 46 | void backward_backtracking(
 47 |         T contrib_left_antecedent, U contrib_right_antecedent,
 48 |         const float contrib_consequent,
 49 |         V backptr,
 50 | 
 51 |         A gradient_contrib_left_antecedent, B gradient_contrib_right_antecedent,
 52 |         float *gradient_contrib_consequent,
 53 |         C gradient_backptr,
 54 | 
 55 |         const unsigned size
 56 | )
 57 | {
 58 |     *gradient_contrib_consequent += dot(backptr, gradient_contrib_left_antecedent, size);
 59 |     *gradient_contrib_consequent += dot(backptr, gradient_contrib_right_antecedent, size);
 60 |     add_cwise_mult(gradient_backptr, gradient_contrib_left_antecedent, contrib_consequent, size);
 61 |     add_cwise_mult(gradient_backptr, gradient_contrib_right_antecedent, contrib_consequent, size);
 62 | }
 63 | 
 64 | 
 65 | template<class T, class U, class V, class W, class A, class B, class C, class D>
 66 | void backward_algorithmic_softmax(
 67 |         T left_antecedent, U right_antecedent,
 68 |         V split_weights,
 69 |         W backptr,
 70 | 
 71 |         A gradient_left_antecedent, B gradient_right_antecedent,
 72 |         const float gradient_consequent,
 73 |         C gradient_split_weights,
 74 |         D gradient_backptr,
 75 | 
 76 |         unsigned size
 77 | )
 78 | {
 79 |     add_cwise_mult(gradient_backptr, split_weights, gradient_consequent, size);
 80 |     add_cwise_mult(gradient_split_weights, backptr, gradient_consequent, size);
 81 | 
 82 |     backprop_softmax(gradient_split_weights, gradient_backptr, split_weights, backptr, size);
 83 | 
 84 |     add(gradient_left_antecedent, gradient_split_weights, size);
 85 |     add(gradient_right_antecedent, gradient_split_weights, size);
 86 | }
 87 | 
 88 | 
 89 | template<class T, class U, class V, class W>
 90 | float forward_entropy_reg(
 91 |         T left_antecedent, U right_antecedent,
 92 |         V split_weights,
 93 |         W backptr,
 94 |         unsigned size
 95 | )
 96 | {
 97 |     cwise_add(split_weights, left_antecedent, right_antecedent, size);
 98 |     softmax(backptr, split_weights, size);
 99 |     float m = max(split_weights, size);
100 |     float s = 0;
101 |     for (unsigned i = 0 ; i < size ; ++i, ++split_weights)
102 |         s += std::exp(*split_weights-m);
103 |     return m + std::log(s);
104 | }
105 | 
106 | 
107 | template<class T, class U, class V, class W, class A, class B, class C, class D>
108 | void backward_entropy_reg(
109 |         T left_antecedent, U right_antecedent,
110 |         V split_weights,
111 |         W backptr,
112 | 
113 |         A gradient_left_antecedent, B gradient_right_antecedent,
114 |         const float gradient_consequent,
115 |         C gradient_split_weights,
116 |         D gradient_backptr,
117 | 
118 |         unsigned size
119 | )
120 | {
121 |     add_cwise_mult(gradient_split_weights, backptr, gradient_consequent, size);
122 | 
123 |     backprop_softmax(gradient_split_weights, gradient_backptr, split_weights, backptr, size);
124 | 
125 |     add(gradient_left_antecedent, gradient_split_weights, size);
126 |     add(gradient_right_antecedent, gradient_split_weights, size);
127 | }
128 | 
129 | }


--------------------------------------------------------------------------------
/lib/include/diffdp/dynet/args.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace diffdp
 4 | {
 5 | 
 6 | enum struct DiscreteMode
 7 | {
 8 |     Null, // do not backpropagate
 9 |     StraightThrough, // discrete output, copy input gradient
10 |     ForwardRegularized, // differentiable surrogate
11 |     BackwardRegularized // forward: discrete, backward: differentiable surrogate
12 | };
13 | 
14 | }


--------------------------------------------------------------------------------
/lib/include/diffdp/dynet/binary_phrase.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cmath>
 4 | #include <algorithm>
 5 | #include <numeric>
 6 | #include <memory>
 7 | #include <vector>
 8 | 
 9 | #include "dynet/expr.h"
10 | #include "dynet/tensor-eigen.h"
11 | #include "dynet/nodes-impl-macros.h"
12 | #include "dynet/nodes-def-macros.h"
13 | 
14 | #include "diffdp/dynet/args.h"
15 | #include "diffdp/algorithm/binary_phrase.h"
16 | 
17 | namespace dynet
18 | {
19 | 
20 | Expression algorithmic_differentiable_binary_phrase_structure(
21 |         const Expression &x,
22 |         diffdp::DiscreteMode mode,
23 |         std::vector<unsigned> *batch_sizes = nullptr
24 | );
25 | 
26 | Expression entropy_regularized_binary_phrase_structure(
27 |         const Expression &x,
28 |         diffdp::DiscreteMode mode,
29 |         std::vector<unsigned> *batch_sizes = nullptr
30 | );
31 | 
32 | struct AlgorithmicDifferentiableBinaryPhraseStructure :
33 |         public dynet::Node
34 | {
35 |     const diffdp::DiscreteMode mode;
36 |     std::vector<unsigned>* batch_sizes = nullptr;
37 | 
38 |     std::vector<diffdp::AlgorithmicDifferentiableBinaryPhraseStructure*> _ce_ptr;
39 | 
40 |     explicit AlgorithmicDifferentiableBinaryPhraseStructure(
41 |             const std::initializer_list<VariableIndex>& a,
42 |             diffdp::DiscreteMode mode,
43 |             std::vector<unsigned>* batch_sizes
44 |     );
45 | 
46 |     DYNET_NODE_DEFINE_DEV_IMPL()
47 | 
48 |     virtual bool supports_multibatch() const override;
49 |     size_t aux_storage_size() const override;
50 | 
51 |     virtual ~AlgorithmicDifferentiableBinaryPhraseStructure();
52 | };
53 | 
54 | struct EntropyRegularizedBinaryPhraseStructure :
55 |         public dynet::Node
56 | {
57 |     const diffdp::DiscreteMode mode;
58 |     std::vector<unsigned>* batch_sizes = nullptr;
59 | 
60 |     std::vector<diffdp::EntropyRegularizedBinaryPhraseStructure*> _ce_ptr;
61 | 
62 |     explicit EntropyRegularizedBinaryPhraseStructure(
63 |             const std::initializer_list<VariableIndex>& a,
64 |             diffdp::DiscreteMode mode,
65 |             std::vector<unsigned>* batch_sizes
66 |     );
67 | 
68 |     DYNET_NODE_DEFINE_DEV_IMPL()
69 | 
70 |     virtual bool supports_multibatch() const override;
71 |     size_t aux_storage_size() const override;
72 | 
73 |     virtual ~EntropyRegularizedBinaryPhraseStructure();
74 | };
75 | 
76 | }
77 | 


--------------------------------------------------------------------------------
/lib/include/diffdp/dynet/eisner.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cmath>
  4 | #include <algorithm>
  5 | #include <numeric>
  6 | #include <memory>
  7 | #include <vector>
  8 | 
  9 | #include "dynet/expr.h"
 10 | #include "dynet/tensor-eigen.h"
 11 | #include "dynet/nodes-impl-macros.h"
 12 | #include "dynet/nodes-def-macros.h"
 13 | 
 14 | #include "diffdp/dynet/args.h"
 15 | #include "diffdp/algorithm/eisner.h"
 16 | 
 17 | namespace diffdp
 18 | {
 19 | 
 20 | enum struct DependencyGraphMode
 21 | {
 22 |     Adjacency, // adjacency matrix
 23 |     Compact
 24 | };
 25 | 
 26 | std::pair<unsigned, unsigned> from_adjacency(const std::pair<unsigned, unsigned> dep, const diffdp::DependencyGraphMode mode);
 27 | std::pair<unsigned, unsigned> from_compact(const std::pair<unsigned, unsigned> dep, const diffdp::DependencyGraphMode mode);
 28 | 
 29 | }
 30 | 
 31 | namespace dynet
 32 | {
 33 | 
 34 | Expression algorithmic_differentiable_eisner(
 35 |         const Expression &x,
 36 |         diffdp::DiscreteMode mode,
 37 |         diffdp::DependencyGraphMode input_graph = diffdp::DependencyGraphMode::Compact,
 38 |         diffdp::DependencyGraphMode output_graph = diffdp::DependencyGraphMode::Compact,
 39 |         bool with_root_arcs = true,
 40 |         std::vector<unsigned> *batch_sizes = nullptr
 41 | );
 42 | 
 43 | Expression entropy_regularized_eisner(
 44 |         const Expression &x,
 45 |         diffdp::DiscreteMode mode,
 46 |         diffdp::DependencyGraphMode input_graph = diffdp::DependencyGraphMode::Compact,
 47 |         diffdp::DependencyGraphMode output_graph = diffdp::DependencyGraphMode::Compact,
 48 |         bool with_root_arcs = true,
 49 |         std::vector<unsigned> *batch_sizes = nullptr
 50 | );
 51 | 
 52 | struct AlgorithmicDifferentiableEisner :
 53 |         public dynet::Node
 54 | {
 55 |     const diffdp::DiscreteMode mode;
 56 |     const diffdp::DependencyGraphMode input_graph;
 57 |     const diffdp::DependencyGraphMode output_graph;
 58 |     bool with_root_arcs;
 59 |     std::vector<unsigned>* batch_sizes = nullptr;
 60 | 
 61 |     std::vector<diffdp::AlgorithmicDifferentiableEisner*> _ce_ptr;
 62 | 
 63 |     explicit AlgorithmicDifferentiableEisner(
 64 |             const std::initializer_list<VariableIndex>& a,
 65 |             diffdp::DiscreteMode mode,
 66 |             diffdp::DependencyGraphMode input_graph,
 67 |             diffdp::DependencyGraphMode output_graph,
 68 |             bool with_root_arcs,
 69 |             std::vector<unsigned>* batch_sizes
 70 |     );
 71 | 
 72 |     DYNET_NODE_DEFINE_DEV_IMPL()
 73 | 
 74 |     virtual bool supports_multibatch() const override;
 75 |     size_t aux_storage_size() const override;
 76 | 
 77 |     virtual ~AlgorithmicDifferentiableEisner();
 78 | };
 79 | 
 80 | struct EntropyRegularizedEisner :
 81 |         public dynet::Node
 82 | {
 83 |     const diffdp::DiscreteMode mode;
 84 |     const diffdp::DependencyGraphMode input_graph;
 85 |     const diffdp::DependencyGraphMode output_graph;
 86 |     bool with_root_arcs;
 87 |     std::vector<unsigned>* batch_sizes = nullptr;
 88 | 
 89 |     std::vector<diffdp::EntropyRegularizedEisner*> _ce_ptr;
 90 | 
 91 |     explicit EntropyRegularizedEisner(
 92 |             const std::initializer_list<VariableIndex>& a,
 93 |             diffdp::DiscreteMode mode,
 94 |             diffdp::DependencyGraphMode input_graph,
 95 |             diffdp::DependencyGraphMode output_graph,
 96 |             bool with_root_arcs,
 97 |             std::vector<unsigned>* batch_sizes
 98 |     );
 99 | 
100 |     DYNET_NODE_DEFINE_DEV_IMPL()
101 | 
102 |     virtual bool supports_multibatch() const override;
103 |     size_t aux_storage_size() const override;
104 | 
105 |     virtual ~EntropyRegularizedEisner();
106 | };
107 | 
108 | 
109 | }
110 | 


--------------------------------------------------------------------------------
/lib/include/diffdp/dynet/matrix_tree_theorem.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <utility>
 4 | #include "dynet/expr.h"
 5 | #include "dynet/nodes-def-macros.h"
 6 | 
 7 | namespace dynet
 8 | {
 9 | 
10 | Expression matrix_tree_theorem(const Expression &weights);
11 | 
12 | 
13 | struct MatrixTreeTheorem :
14 |         public dynet::Node
15 | {
16 |     explicit MatrixTreeTheorem(
17 |             const std::initializer_list<VariableIndex>& a
18 |     );
19 | 
20 |     DYNET_NODE_DEFINE_DEV_IMPL()
21 | 
22 |     virtual bool supports_multibatch() const override;
23 |     size_t aux_storage_size() const override;
24 | };
25 | 
26 | 
27 | }


--------------------------------------------------------------------------------
/lib/include/diffdp/math.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /**
  4 |  * Very small header-only math library used to simplify
  5 |  * the implementation of continuous relaxation of dynamic programming algorithms.
  6 |  *
  7 |  * Inputs/outputs are iterator-like objects that must be:
  8 |  * - copy-constructible
  9 |  * - dereferenceable
 10 |  * - incrementable
 11 |  *
 12 |  * Author: Caio Corro
 13 |  */
 14 | 
 15 | #include <cmath>
 16 | #include <limits>
 17 | 
 18 | namespace diffdp
 19 | {
 20 | 
 21 | /**
 22 |  * Performs an element-wise sum of vectors input1 and output2.
 23 |  * The result is stored in output.
 24 |  *
 25 |  * @param output Vector where the result will be stored
 26 |  * @param input1 First input vector
 27 |  * @param input2 Second intput vector
 28 |  * @param size Size of the input vectors
 29 |  */
 30 | template<class T, class U, class V>
 31 | void cwise_add(T output, U input1, V input2, const unsigned size)
 32 | {
 33 |     for (unsigned i = 0u; i < size; ++i, ++input1, ++input2, ++output)
 34 |         *output = *input1 + *input2;
 35 | }
 36 | 
 37 | /**
 38 |  * Return the maximum element stored in a vector.
 39 |  *
 40 |  * @param input Input vector
 41 |  * @param size Size of the input vector
 42 |  * @return The maximum element stored in the input vector
 43 |  */
 44 | template<class T>
 45 | float max(T input, const unsigned size)
 46 | {
 47 |     float value = -std::numeric_limits<float>::infinity();
 48 |     for (unsigned i = 0u; i < size; ++i, ++input)
 49 |         value = std::max(value, *input);
 50 |     return value;
 51 | }
 52 | 
 53 | /**
 54 |  * Divide each element of a vector by a given value.
 55 |  * The results is stored inplace.
 56 |  *
 57 |  * @param input Input/output vector
 58 |  * @param v Value to divide by
 59 |  * @param size Size of the input/output vector
 60 |  */
 61 | template<class T>
 62 | void inplace_cwise_div(T input, const float v, const unsigned size)
 63 | {
 64 |     for (unsigned i = 0u; i < size; ++i, ++input)
 65 |         *input = *input / v;
 66 | }
 67 | 
 68 | /**
 69 |  * Component wise addition between two vectors.
 70 |  * The results is stored in the first argument.
 71 |  *
 72 |  * @param input Input vector
 73 |  * @param output Output vector
 74 |  * @param size Size of the input vector
 75 |  */
 76 | template<class T, class U>
 77 | void add(T output, U input, const unsigned size)
 78 | {
 79 |     for (unsigned i = 0u; i < size; ++i, ++input, ++output)
 80 |         *output += *input;
 81 | }
 82 | 
 83 | /**
 84 |  * Perform a component wise multiplication of the input vector with a scalar
 85 |  * and store the results as a component-wise addition with the output.
 86 |  *
 87 |  * @param output Output vector
 88 |  * @param input Input vector
 89 |  * @param v Scalar use for the multiplication
 90 |  * @param size Size of the input vector
 91 |  */
 92 | template<class T, class U>
 93 | void add_cwise_mult(T output, U input, const float v, const unsigned size)
 94 | {
 95 |     for (unsigned i = 0u; i < size; ++i, ++input, ++output)
 96 |         *output += (*input) * v;
 97 | }
 98 | 
 99 | /**
100 |  * Return the dot product between the two input vectors.
101 |  *
102 |  * @param input1 First input vector
103 |  * @param input2 Second input vector
104 |  * @param size Size of the input vectors
105 |  * @return The dot product between the two input vectors
106 |  */
107 | template<class T, class U>
108 | float dot(T input1, U input2, const unsigned size)
109 | {
110 |     float ret = 0.f;
111 |     for (unsigned i = 0u; i < size; ++i, ++input1, ++input2)
112 |         ret += (*input1) * (*input2);
113 |     return ret;
114 | }
115 | 
116 | /**
117 |  * Exponentiate each element of a vector by first substracting a scalar.
118 |  *
119 |  * @param output Output vector
120 |  * @param input Input vector
121 |  * @param m Scalar to substract
122 |  * @param size Size of the input
123 |  * @return Return the sum of the elements of the output vector (i.e. the partition)
124 |  */
125 | template<class T, class U>
126 | float exp_minus_cst(T output, U input, const float m, const unsigned size)
127 | {
128 |     float ret = 0.f;
129 |     for (unsigned i = 0u; i < size; ++i, ++input, ++output)
130 |     {
131 |         const float v = std::exp(*input - m);
132 |         *output = v;
133 |         ret += v;
134 |     }
135 |     return ret;
136 | }
137 | 
138 | /**
139 |  * Compute the softmax of the input.
140 |  *
141 |  * @param output Output vector
142 |  * @param input Input vector
143 |  * @param size Size of the input vector
144 |  */
145 | template<class T, class U>
146 | void softmax(T output, U input, unsigned size) noexcept
147 | {
148 | 
149 |     float m = max(input, size);
150 |     float z = exp_minus_cst(output, input, m, size);
151 |     inplace_cwise_div(output, z, size);
152 | }
153 | 
154 | /**
155 |  * Backpropagate through a softmax function.
156 |  *
157 |  * @param gradient_input Gradient of the softmax
158 |  * @param gradient_output Gradient incoming to the softmax
159 |  * @param input Input of the softmax
160 |  * @param output Output of the softmax (i.e. it should be computed beforehand)
161 |  * @param size Size of the input
162 |  */
163 | template<class T, class U, class A, class B>
164 | void backprop_softmax(A gradient_input, B gradient_output, T input, U output, const unsigned size)
165 | {
166 |     const float s = dot(gradient_output, output, size);
167 |     for (unsigned i = 0; i < size; ++i, ++gradient_input, ++gradient_output, ++output)
168 |         *gradient_input += (*output) * ((*gradient_output) - s);
169 | }
170 | 
171 | }


--------------------------------------------------------------------------------
/lib/src/algorithm/binary_phrase.cpp:
--------------------------------------------------------------------------------
  1 | #include "diffdp/algorithm/binary_phrase.h"
  2 | 
  3 | namespace diffdp
  4 | {
  5 | 
  6 | BinaryPhraseStructureChart::BinaryPhraseStructureChart(unsigned size) :
  7 |         size(size),
  8 |         size_3d(size*size*size),
  9 |         size_2d(size*size),
 10 |         _memory(new float[size_3d * 2 + size_2d * 2]),
 11 |         _erase_memory(true),
 12 |         split_weights(size, _memory),
 13 |         backptr(size, _memory + 1u*size_3d),
 14 |         weight(size, _memory + 2u*size_3d),
 15 |         soft_selection(size, _memory + 2u*size_3d + 1u*size_2d)
 16 | {}
 17 | 
 18 | BinaryPhraseStructureChart::BinaryPhraseStructureChart(unsigned size, float* mem) :
 19 |         size(size),
 20 |         size_3d(size*size*size),
 21 |         size_2d(size*size),
 22 |         _memory(mem),
 23 |         _erase_memory(false),
 24 |         split_weights(size, _memory),
 25 |         backptr(size, _memory + 1u*size_3d),
 26 |         weight(size, _memory + 2u*size_3d),
 27 |         soft_selection(size, _memory + 2u*size_3d + 1u*size_2d)
 28 | {}
 29 | 
 30 | BinaryPhraseStructureChart::~BinaryPhraseStructureChart()
 31 | {
 32 |     if (_erase_memory)
 33 |         delete[] _memory;
 34 | }
 35 | 
 36 | void BinaryPhraseStructureChart::zeros()
 37 | {
 38 |     std::fill(_memory, _memory + required_cells(size), float{});
 39 | }
 40 | 
 41 | std::size_t BinaryPhraseStructureChart::required_memory(const unsigned size)
 42 | {
 43 |     return
 44 |             2 * Tensor3D<float>::required_memory(size)
 45 |             + 2 * Matrix<float>::required_memory(size)
 46 |             ;
 47 | }
 48 | 
 49 | unsigned BinaryPhraseStructureChart::required_cells(const unsigned size)
 50 | {
 51 |     return
 52 |             2 * Tensor3D<float>::required_cells(size)
 53 |             + 2 * Matrix<float>::required_cells(size)
 54 |             ;
 55 | }
 56 | 
 57 | 
 58 | AlgorithmicDifferentiableBinaryPhraseStructure::AlgorithmicDifferentiableBinaryPhraseStructure(const unsigned t_size) :
 59 |         _size(t_size),
 60 |         chart_forward(std::make_shared<BinaryPhraseStructureChart>(_size)),
 61 |         chart_backward(std::make_shared<BinaryPhraseStructureChart>(_size))
 62 | {}
 63 | 
 64 | AlgorithmicDifferentiableBinaryPhraseStructure::AlgorithmicDifferentiableBinaryPhraseStructure(std::shared_ptr<BinaryPhraseStructureChart> chart_forward, std::shared_ptr<BinaryPhraseStructureChart> chart_backward) :
 65 |         _size(chart_forward->size),
 66 |         chart_forward(chart_forward),
 67 |         chart_backward(chart_backward)
 68 | {}
 69 | 
 70 | void AlgorithmicDifferentiableBinaryPhraseStructure::forward_maximize(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward)
 71 | {
 72 |     const unsigned size = chart_forward->size;
 73 |     for (unsigned l = 1u; l < size; ++l)
 74 |     {
 75 |         for (unsigned i = 0u; i < size - l; ++i)
 76 |         {
 77 |             unsigned j = i + l;
 78 | 
 79 |             // use += because we initialized them with arc weights
 80 |             chart_forward->weight(i, j) += forward_algorithmic_softmax(
 81 |                     chart_forward->weight.iter2(i, i), chart_forward->weight.iter1(i + 1, j),
 82 |                     chart_forward->split_weights.iter3(i, j, i),
 83 |                     chart_forward->backptr.iter3(i, j, i),
 84 |                     l
 85 |             );
 86 |         }
 87 |     }
 88 | }
 89 | 
 90 | void AlgorithmicDifferentiableBinaryPhraseStructure::forward_backtracking(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward)
 91 | {
 92 |     const unsigned size = chart_forward->size;
 93 |     chart_forward->soft_selection(0, size - 1) = 1.0f;
 94 | 
 95 |     for (unsigned l = size - 1; l >= 1; --l)
 96 |     {
 97 |         for (unsigned i = 0u; i < size - l; ++i)
 98 |         {
 99 |             unsigned j = i + l;
100 |             diffdp::forward_backtracking(
101 |                     chart_forward->soft_selection.iter2(i, i), chart_forward->soft_selection.iter1(i + 1, j),
102 |                     chart_forward->soft_selection(i, j),
103 |                     chart_forward->backptr.iter3(i, j, i),
104 |                     l
105 |             );
106 |         }
107 |     }
108 | }
109 | 
110 | void AlgorithmicDifferentiableBinaryPhraseStructure::backward_backtracking(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward, std::shared_ptr<BinaryPhraseStructureChart>& chart_backward)
111 | {
112 |     const unsigned size = chart_forward->size;
113 | 
114 |     for (unsigned l = 1; l < size ; ++l)
115 |     {
116 |         for (unsigned i = 0; i < size - l; ++i)
117 |         {
118 |             unsigned j = i + l;
119 | 
120 |             diffdp::backward_backtracking(
121 |                     chart_forward->soft_selection.iter2(i, i), chart_forward->soft_selection.iter1(i + 1, j),
122 |                     chart_forward->soft_selection(i, j),
123 |                     chart_forward->backptr.iter3(i, j, i),
124 | 
125 |                     chart_backward->soft_selection.iter2(i, i), chart_backward->soft_selection.iter1(i + 1, j),
126 |                     &chart_backward->soft_selection(i, j),
127 |                     chart_backward->backptr.iter3(i, j, i),
128 | 
129 |                     l
130 |             );
131 |         }
132 |     }
133 | 
134 | }
135 | 
136 | void AlgorithmicDifferentiableBinaryPhraseStructure::backward_maximize(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward, std::shared_ptr<BinaryPhraseStructureChart>& chart_backward)
137 | {
138 |     const unsigned size = chart_forward->size;
139 | 
140 |     for (unsigned l = size - 1; l >= 1; --l)
141 |     {
142 |         for (unsigned i = 0; i < size - l; ++i)
143 |         {
144 |             unsigned j = i + l;
145 | 
146 |             backward_algorithmic_softmax(
147 |                     chart_forward->weight.iter2(i, i), chart_forward->weight.iter1(i + 1, j),
148 |                     chart_forward->split_weights.iter3(i, j, i),
149 |                     chart_forward->backptr.iter3(i, j, i),
150 | 
151 |                     chart_backward->weight.iter2(i, i), chart_backward->weight.iter1(i + 1, j),
152 |                     chart_backward->weight(i, j),
153 |                     chart_backward->split_weights.iter3(i, j, i),
154 |                     chart_backward->backptr.iter3(i, j, i),
155 | 
156 |                     l
157 |             );
158 |         }
159 |     }
160 | }
161 | 
162 | unsigned AlgorithmicDifferentiableBinaryPhraseStructure::size() const
163 | {
164 |     return _size;
165 | }
166 | 
167 | float AlgorithmicDifferentiableBinaryPhraseStructure::output(const unsigned left, const unsigned right) const
168 | {
169 |     return chart_forward->soft_selection(left, right);
170 | }
171 | 
172 | float AlgorithmicDifferentiableBinaryPhraseStructure::gradient(const unsigned left, const unsigned right) const
173 | {
174 |     return chart_backward->weight(left, right);
175 | }
176 | 
177 | 
178 | 
179 | EntropyRegularizedBinaryPhraseStructure::EntropyRegularizedBinaryPhraseStructure(const unsigned t_size) :
180 |         _size(t_size),
181 |         chart_forward(std::make_shared<BinaryPhraseStructureChart>(_size)),
182 |         chart_backward(std::make_shared<BinaryPhraseStructureChart>(_size))
183 | {}
184 | 
185 | EntropyRegularizedBinaryPhraseStructure::EntropyRegularizedBinaryPhraseStructure(std::shared_ptr<BinaryPhraseStructureChart> chart_forward, std::shared_ptr<BinaryPhraseStructureChart> chart_backward) :
186 |         _size(chart_forward->size),
187 |         chart_forward(chart_forward),
188 |         chart_backward(chart_backward)
189 | {}
190 | 
191 | 
192 | void EntropyRegularizedBinaryPhraseStructure::forward_maximize(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward)
193 | {
194 |     const unsigned size = chart_forward->size;
195 |     for (unsigned l = 1u; l < size; ++l)
196 |     {
197 |         for (unsigned i = 0u; i < size - l; ++i)
198 |         {
199 |             unsigned j = i + l;
200 | 
201 |             // use += because we initialized them with arc weights
202 |             chart_forward->weight(i, j) += forward_entropy_reg(
203 |                     chart_forward->weight.iter2(i, i), chart_forward->weight.iter1(i + 1, j),
204 |                     chart_forward->split_weights.iter3(i, j, i),
205 |                     chart_forward->backptr.iter3(i, j, i),
206 |                     l
207 |             );
208 |         }
209 |     }
210 | }
211 | 
212 | void EntropyRegularizedBinaryPhraseStructure::forward_backtracking(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward)
213 | {
214 |     const unsigned size = chart_forward->size;
215 |     chart_forward->soft_selection(0, size - 1) = 1.0f;
216 | 
217 |     for (unsigned l = size - 1; l >= 1; --l)
218 |     {
219 |         for (unsigned i = 0u; i < size - l; ++i)
220 |         {
221 |             unsigned j = i + l;
222 |             diffdp::forward_backtracking(
223 |                     chart_forward->soft_selection.iter2(i, i), chart_forward->soft_selection.iter1(i + 1, j),
224 |                     chart_forward->soft_selection(i, j),
225 |                     chart_forward->backptr.iter3(i, j, i),
226 |                     l
227 |             );
228 |         }
229 |     }
230 | }
231 | 
232 | 
233 | unsigned EntropyRegularizedBinaryPhraseStructure::size() const
234 | {
235 |     return _size;
236 | }
237 | 
238 | float EntropyRegularizedBinaryPhraseStructure::output(const unsigned left, const unsigned right) const
239 | {
240 |     return chart_forward->soft_selection(left, right);
241 | }
242 | 
243 | float EntropyRegularizedBinaryPhraseStructure::gradient(const unsigned left, const unsigned right) const
244 | {
245 |     return chart_backward->weight(left, right);
246 | }
247 | 
248 | void EntropyRegularizedBinaryPhraseStructure::backward_backtracking(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward, std::shared_ptr<BinaryPhraseStructureChart>& chart_backward)
249 | {
250 |     const unsigned size = chart_forward->size;
251 | 
252 |     for (unsigned l = 1; l < size ; ++l)
253 |     {
254 |         for (unsigned i = 0; i < size - l; ++i)
255 |         {
256 |             unsigned j = i + l;
257 | 
258 |             diffdp::backward_backtracking(
259 |                     chart_forward->soft_selection.iter2(i, i), chart_forward->soft_selection.iter1(i + 1, j),
260 |                     chart_forward->soft_selection(i, j),
261 |                     chart_forward->backptr.iter3(i, j, i),
262 | 
263 |                     chart_backward->soft_selection.iter2(i, i), chart_backward->soft_selection.iter1(i + 1, j),
264 |                     &chart_backward->soft_selection(i, j),
265 |                     chart_backward->backptr.iter3(i, j, i),
266 | 
267 |                     l
268 |             );
269 |         }
270 |     }
271 | }
272 | void EntropyRegularizedBinaryPhraseStructure::backward_maximize(std::shared_ptr<BinaryPhraseStructureChart>& chart_forward, std::shared_ptr<BinaryPhraseStructureChart>& chart_backward)
273 | {
274 |     const unsigned size = chart_forward->size;
275 | 
276 |     for (unsigned l = size - 1; l >= 1; --l)
277 |     {
278 |         for (unsigned i = 0; i < size - l; ++i)
279 |         {
280 |             unsigned j = i + l;
281 | 
282 |             backward_entropy_reg(
283 |                     chart_forward->weight.iter2(i, i), chart_forward->weight.iter1(i + 1, j),
284 |                     chart_forward->split_weights.iter3(i, j, i),
285 |                     chart_forward->backptr.iter3(i, j, i),
286 | 
287 |                     chart_backward->weight.iter2(i, i), chart_backward->weight.iter1(i + 1, j),
288 |                     chart_backward->weight(i, j),
289 |                     chart_backward->split_weights.iter3(i, j, i),
290 |                     chart_backward->backptr.iter3(i, j, i),
291 | 
292 |                     l
293 |             );
294 |         }
295 |     }
296 | }
297 | }


--------------------------------------------------------------------------------
/lib/src/algorithm/eisner.cpp:
--------------------------------------------------------------------------------
  1 | #include "diffdp/algorithm/eisner.h"
  2 | 
  3 | namespace diffdp
  4 | {
  5 | 
  6 | EisnerChart::EisnerChart(unsigned size) :
  7 |     size(size),
  8 |     size_3d(size*size*size),
  9 |     size_2d(size*size),
 10 |     _memory(new float[size_3d * 8 + size_2d * 8]),
 11 |     _erase_memory(true),
 12 |     a_cleft(size, _memory),
 13 |     a_cright(size, _memory + 1u*size_3d),
 14 |     a_uleft(size, _memory + 2u*size_3d),
 15 |     a_uright(size, _memory + 3u*size_3d),
 16 |     b_cleft(size, _memory + 4u*size_3d),
 17 |     b_cright(size, _memory + 5u*size_3d),
 18 |     b_uleft(size, _memory + 6u*size_3d),
 19 |     b_uright(size, _memory + 7u*size_3d),
 20 |     c_cleft(size, _memory + 8u*size_3d),
 21 |     c_cright(size, _memory + 8u*size_3d + 1u*size_2d),
 22 |     c_uleft(size, _memory + 8u*size_3d + 2u*size_2d),
 23 |     c_uright(size, _memory + 8u*size_3d + 3u*size_2d),
 24 |     soft_c_cleft(size, _memory + 8u*size_3d + 4u*size_2d),
 25 |     soft_c_cright(size, _memory + 8u*size_3d + 5u*size_2d),
 26 |     soft_c_uleft(size, _memory + 8u*size_3d + 6u*size_2d),
 27 |     soft_c_uright(size, _memory + 8u*size_3d + 7u*size_2d)
 28 | {}
 29 | 
 30 | EisnerChart::EisnerChart(unsigned size, float* mem) :
 31 |     size(size),
 32 |     size_3d(size*size*size),
 33 |     size_2d(size*size),
 34 |     _memory(mem),
 35 |     _erase_memory(false),
 36 |     a_cleft(size, mem),
 37 |     a_cright(size, mem + 1u*size_3d),
 38 |     a_uleft(size, mem + 2u*size_3d),
 39 |     a_uright(size, mem + 3u*size_3d),
 40 |     b_cleft(size, mem + 4u*size_3d),
 41 |     b_cright(size, mem + 5u*size_3d),
 42 |     b_uleft(size, mem + 6u*size_3d),
 43 |     b_uright(size, mem + 7u*size_3d),
 44 |     c_cleft(size, mem + 8u*size_3d),
 45 |     c_cright(size, mem + 8u*size_3d + 1u*size_2d),
 46 |     c_uleft(size, mem + 8u*size_3d + 2u*size_2d),
 47 |     c_uright(size, mem + 8u*size_3d + 3u*size_2d),
 48 |     soft_c_cleft(size, mem + 8u*size_3d + 4u*size_2d),
 49 |     soft_c_cright(size, mem + 8u*size_3d + 5u*size_2d),
 50 |     soft_c_uleft(size, mem + 8u*size_3d + 6u*size_2d),
 51 |     soft_c_uright(size, mem + 8u*size_3d + 7u*size_2d)
 52 | {}
 53 | 
 54 | EisnerChart::~EisnerChart()
 55 | {
 56 |     if (_erase_memory)
 57 |         delete[] _memory;
 58 | }
 59 | 
 60 | void EisnerChart::zeros()
 61 | {
 62 |     std::fill(_memory, _memory + size_3d * 8 + size_2d * 8, float{});
 63 | }
 64 | 
 65 | std::size_t EisnerChart::required_memory(const unsigned size)
 66 | {
 67 |     return
 68 |             8 * Tensor3D<float>::required_memory(size)
 69 |             + 8 * Matrix<float>::required_memory(size)
 70 |             ;
 71 | }
 72 | 
 73 | unsigned EisnerChart::required_cells(const unsigned size)
 74 | {
 75 |     return
 76 |             8 * Tensor3D<float>::required_cells(size)
 77 |             + 8 * Matrix<float>::required_cells(size)
 78 |             ;
 79 | }
 80 | 
 81 | 
 82 | AlgorithmicDifferentiableEisner::AlgorithmicDifferentiableEisner(const unsigned t_size) :
 83 |     _size(t_size),
 84 |     chart_forward(std::make_shared<EisnerChart>(_size)),
 85 |     chart_backward(std::make_shared<EisnerChart>(_size))
 86 | {}
 87 | 
 88 | AlgorithmicDifferentiableEisner::AlgorithmicDifferentiableEisner(std::shared_ptr<EisnerChart> chart_forward, std::shared_ptr<EisnerChart> chart_backward) :
 89 |         _size(chart_forward->size),
 90 |         chart_forward(chart_forward),
 91 |         chart_backward(chart_backward)
 92 | {}
 93 | 
 94 | void AlgorithmicDifferentiableEisner::forward_maximize(std::shared_ptr<EisnerChart>& chart_forward)
 95 | {
 96 |     const unsigned size = chart_forward->size;
 97 |     for (unsigned l = 1u; l < size; ++l)
 98 |     {
 99 |         for (unsigned i = 0u; i < size - l; ++i)
100 |         {
101 |             unsigned j = i + l;
102 | 
103 |             // use += because we initialized them with arc weights
104 |             chart_forward->c_uright(i, j) += forward_algorithmic_softmax(
105 |                     chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j),
106 |                     chart_forward->a_uright.iter3(i, j, i),
107 |                     chart_forward->b_uright.iter3(i, j, i),
108 |                     l
109 |             );
110 | 
111 |             if (i > 0u) // because the root cannot be the modifier
112 |             {
113 |                 chart_forward->c_uleft(i, j) += forward_algorithmic_softmax(
114 |                         chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j),
115 |                         chart_forward->a_uleft.iter3(i, j, i),
116 |                         chart_forward->b_uleft.iter3(i, j, i),
117 |                         l
118 |                 );
119 |             }
120 | 
121 |             chart_forward->c_cright(i, j) = forward_algorithmic_softmax(
122 |                     chart_forward->c_uright.iter2(i, i + 1), chart_forward->c_cright.iter1(i + 1, j),
123 |                     chart_forward->a_cright.iter3(i, j, i + 1),
124 |                     chart_forward->b_cright.iter3(i, j, i + 1),
125 |                     l
126 |             );
127 | 
128 |             if (i > 0u)
129 |             {
130 |                 chart_forward->c_cleft(i, j) = forward_algorithmic_softmax(
131 |                         chart_forward->c_cleft.iter2(i, i), chart_forward->c_uleft.iter1(i, j),
132 |                         chart_forward->a_cleft.iter3(i, j, i),
133 |                         chart_forward->b_cleft.iter3(i, j, i),
134 |                         l
135 |                 );
136 |             }
137 |         }
138 |     }
139 | }
140 | 
141 | void AlgorithmicDifferentiableEisner::forward_backtracking(std::shared_ptr<EisnerChart>& chart_forward)
142 | {
143 |     const unsigned size = chart_forward->size;
144 |     chart_forward->soft_c_cright(0, size - 1) = 1.0f;
145 | 
146 |     for (unsigned l = size - 1; l >= 1; --l)
147 |     {
148 |         for (unsigned i = 0u; i < size - l; ++i)
149 |         {
150 |             unsigned j = i + l;
151 | 
152 |             diffdp::forward_backtracking(
153 |                     chart_forward->soft_c_uright.iter2(i, i + 1), chart_forward->soft_c_cright.iter1(i + 1, j),
154 |                     chart_forward->soft_c_cright(i, j),
155 |                     chart_forward->b_cright.iter3(i, j, i + 1),
156 |                     l
157 |             );
158 | 
159 |             if (i > 0u)
160 |             {
161 |                 diffdp::forward_backtracking(
162 |                         chart_forward->soft_c_cleft.iter2(i, i), chart_forward->soft_c_uleft.iter1(i, j),
163 |                         chart_forward->soft_c_cleft(i, j),
164 |                         chart_forward->b_cleft.iter3(i, j, i),
165 |                         l
166 |                 );
167 |             }
168 | 
169 |             diffdp::forward_backtracking(
170 |                     chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j),
171 |                     chart_forward->soft_c_uright(i, j),
172 |                     chart_forward->b_uright.iter3(i, j, i),
173 |                     l
174 |             );
175 | 
176 | 
177 |             if (i > 0u)
178 |             {
179 |                 diffdp::forward_backtracking(
180 |                         chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j),
181 |                         chart_forward->soft_c_uleft(i, j),
182 |                         chart_forward->b_uleft.iter3(i, j, i),
183 |                         l
184 |                 );
185 |             }
186 |         }
187 |     }
188 | }
189 | 
190 | void AlgorithmicDifferentiableEisner::backward_backtracking(std::shared_ptr<EisnerChart>& chart_forward, std::shared_ptr<EisnerChart>& chart_backward)
191 | {
192 |     const unsigned size = chart_forward->size;
193 | 
194 |     for (unsigned l = 1; l < size ; ++l)
195 |     {
196 |         for (unsigned i = 0; i < size - l; ++i)
197 |         {
198 |             unsigned j = i + l;
199 | 
200 |             if (i > 0u)
201 |             {
202 |                 diffdp::backward_backtracking(
203 |                         chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j),
204 |                         chart_forward->soft_c_uleft(i, j),
205 |                         chart_forward->b_uleft.iter3(i, j, i),
206 | 
207 |                         chart_backward->soft_c_cright.iter2(i, i), chart_backward->soft_c_cleft.iter1(i + 1, j),
208 |                         &chart_backward->soft_c_uleft(i, j),
209 |                         chart_backward->b_uleft.iter3(i, j, i),
210 | 
211 |                         l
212 |                 );
213 |             }
214 | 
215 |             diffdp::backward_backtracking(
216 |                     chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j),
217 |                     chart_forward->soft_c_uright(i, j),
218 |                     chart_forward->b_uright.iter3(i, j, i),
219 | 
220 |                     chart_backward->soft_c_cright.iter2(i, i), chart_backward->soft_c_cleft.iter1(i + 1, j),
221 |                     &chart_backward->soft_c_uright(i, j),
222 |                     chart_backward->b_uright.iter3(i, j, i),
223 | 
224 |                     l
225 |             );
226 | 
227 |             if (i > 0u)
228 |             {
229 |                 diffdp::backward_backtracking(
230 |                         chart_forward->soft_c_cleft.iter2(i, i), chart_forward->soft_c_uleft.iter1(i, j),
231 |                         chart_forward->soft_c_cleft(i, j),
232 |                         chart_forward->b_cleft.iter3(i, j, i),
233 | 
234 |                         chart_backward->soft_c_cleft.iter2(i, i), chart_backward->soft_c_uleft.iter1(i, j),
235 |                         &chart_backward->soft_c_cleft(i, j),
236 |                         chart_backward->b_cleft.iter3(i, j, i),
237 | 
238 |                         l
239 |                 );
240 |             }
241 | 
242 |             diffdp::backward_backtracking(
243 |                     chart_forward->soft_c_uright.iter2(i, i+1), chart_forward->soft_c_cright.iter1(i+1, j),
244 |                     chart_forward->soft_c_cright(i, j),
245 |                     chart_forward->b_cright.iter3(i, j, i + 1),
246 | 
247 |                     chart_backward->soft_c_uright.iter2(i, i+1), chart_backward->soft_c_cright.iter1(i+1, j),
248 |                     &chart_backward->soft_c_cright(i, j),
249 |                     chart_backward->b_cright.iter3(i, j, i + 1),
250 | 
251 |                     l
252 |             );
253 |         }
254 |     }
255 | 
256 | }
257 | 
258 | void AlgorithmicDifferentiableEisner::backward_maximize(std::shared_ptr<EisnerChart>& chart_forward, std::shared_ptr<EisnerChart>& chart_backward)
259 | {
260 |     const unsigned size = chart_forward->size;
261 | 
262 |     for (unsigned l = size - 1; l >= 1; --l)
263 |     {
264 |         for (unsigned i = 0; i < size - l; ++i)
265 |         {
266 |             unsigned j = i + l;
267 | 
268 |             if (i > 0u)
269 |             {
270 |                 backward_algorithmic_softmax(
271 |                         chart_forward->c_cleft.iter2(i, i), chart_forward->c_uleft.iter1(i, j),
272 |                         chart_forward->a_cleft.iter3(i, j, i),
273 |                         chart_forward->b_cleft.iter3(i, j, i),
274 | 
275 |                         chart_backward->c_cleft.iter2(i, i), chart_backward->c_uleft.iter1(i, j),
276 |                         chart_backward->c_cleft(i, j),
277 |                         chart_backward->a_cleft.iter3(i, j, i),
278 |                         chart_backward->b_cleft.iter3(i, j, i),
279 | 
280 |                         l
281 |                 );
282 |             }
283 | 
284 |             backward_algorithmic_softmax(
285 |                     chart_forward->c_uright.iter2(i, i + 1), chart_forward->c_cright.iter1(i + 1, j),
286 |                     chart_forward->a_cright.iter3(i, j, i + 1),
287 |                     chart_forward->b_cright.iter3(i, j, i + 1),
288 | 
289 |                     chart_backward->c_uright.iter2(i, i + 1), chart_backward->c_cright.iter1(i + 1, j),
290 |                     chart_backward->c_cright(i, j),
291 |                     chart_backward->a_cright.iter3(i, j, i + 1),
292 |                     chart_backward->b_cright.iter3(i, j, i + 1),
293 | 
294 |                     l
295 |             );
296 | 
297 |             if (i > 0u)
298 |             {
299 |                 backward_algorithmic_softmax(
300 |                         chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j),
301 |                         chart_forward->a_uleft.iter3(i, j, i),
302 |                         chart_forward->b_uleft.iter3(i, j, i),
303 | 
304 |                         chart_backward->c_cright.iter2(i, i), chart_backward->c_cleft.iter1(i + 1, j),
305 |                         chart_backward->c_uleft(i, j),
306 |                         chart_backward->a_uleft.iter3(i, j, i),
307 |                         chart_backward->b_uleft.iter3(i, j, i),
308 | 
309 |                         l
310 |                 );
311 |             }
312 | 
313 |             backward_algorithmic_softmax(
314 |                     chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j),
315 |                     chart_forward->a_uright.iter3(i, j, i),
316 |                     chart_forward->b_uright.iter3(i, j, i),
317 | 
318 |                     chart_backward->c_cright.iter2(i, i), chart_backward->c_cleft.iter1(i + 1, j),
319 |                     chart_backward->c_uright(i, j),
320 |                     chart_backward->a_uright.iter3(i, j, i),
321 |                     chart_backward->b_uright.iter3(i, j, i),
322 | 
323 |                     l
324 |             );
325 |         }
326 |     }
327 | }
328 | 
329 | unsigned AlgorithmicDifferentiableEisner::size() const
330 | {
331 |     return _size;
332 | }
333 | 
334 | float AlgorithmicDifferentiableEisner::output(const unsigned head, const unsigned mod) const
335 | {
336 |     if (head < mod)
337 |         return chart_forward->soft_c_uright(head, mod);
338 |     else if (mod < head)
339 |         return chart_forward->soft_c_uleft(mod, head);
340 |     else
341 |         return std::nanf("");
342 | }
343 | 
344 | float AlgorithmicDifferentiableEisner::gradient(const unsigned head, const unsigned mod) const
345 | {
346 |     if (head < mod)
347 |         return chart_backward->c_uright(head, mod);
348 |     else if (mod < head)
349 |         return chart_backward->c_uleft(mod, head);
350 |     else
351 |         return std::nanf("");
352 | }
353 | 
354 | 
355 | 
356 | 
357 | EntropyRegularizedEisner::EntropyRegularizedEisner(const unsigned t_size) :
358 |         _size(t_size),
359 |         chart_forward(std::make_shared<EisnerChart>(_size)),
360 |         chart_backward(std::make_shared<EisnerChart>(_size))
361 | {}
362 | 
363 | EntropyRegularizedEisner::EntropyRegularizedEisner(std::shared_ptr<EisnerChart> chart_forward, std::shared_ptr<EisnerChart> chart_backward) :
364 |         _size(chart_forward->size),
365 |         chart_forward(chart_forward),
366 |         chart_backward(chart_backward)
367 | {}
368 | 
369 | 
370 | void EntropyRegularizedEisner::forward_maximize(std::shared_ptr<EisnerChart>& chart_forward)
371 | {
372 |     const unsigned size = chart_forward->size;
373 | 
374 |     for (unsigned l = 1u; l < size; ++l)
375 |     {
376 |         for (unsigned i = 0u; i < size - l; ++i)
377 |         {
378 |             unsigned j = i + l;
379 | 
380 |             chart_forward->c_uright(i, j) += forward_entropy_reg(
381 |                     chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j),
382 |                     chart_forward->a_uright.iter3(i, j, i),
383 |                     chart_forward->b_uright.iter3(i, j, i),
384 |                     l
385 |             );
386 | 
387 |             if (i > 0u) // because the root cannot be the modifier
388 |             {
389 |                 chart_forward->c_uleft(i, j) += forward_entropy_reg(
390 |                         chart_forward->c_cright.iter2(i, i), chart_forward->c_cleft.iter1(i + 1, j),
391 |                         chart_forward->a_uleft.iter3(i, j, i),
392 |                         chart_forward->b_uleft.iter3(i, j, i),
393 |                         l
394 |                 );
395 |             }
396 | 
397 |             chart_forward->c_cright(i, j) = forward_entropy_reg(
398 |                     chart_forward->c_uright.iter2(i, i + 1), chart_forward->c_cright.iter1(i + 1, j),
399 |                     chart_forward->a_cright.iter3(i, j, i + 1),
400 |                     chart_forward->b_cright.iter3(i, j, i + 1),
401 |                     l
402 |             );
403 | 
404 |             if (i > 0u)
405 |             {
406 |                 chart_forward->c_cleft(i, j) = forward_entropy_reg(
407 |                         chart_forward->c_cleft.iter2(i, i), chart_forward->c_uleft.iter1(i, j),
408 |                         chart_forward->a_cleft.iter3(i, j, i),
409 |                         chart_forward->b_cleft.iter3(i, j, i),
410 |                         l
411 |                 );
412 |             }
413 |         }
414 |     }
415 | }
416 | 
417 | void EntropyRegularizedEisner::forward_backtracking(std::shared_ptr<EisnerChart>& chart_forward)
418 | {
419 |     const unsigned size = chart_forward->size;
420 | 
421 |     chart_forward->soft_c_cright(0, size - 1) = 1.0f;
422 | 
423 |     for (unsigned l = size - 1; l >= 1; --l)
424 |     {
425 |         for (unsigned i = 0u; i < size - l; ++i)
426 |         {
427 |             unsigned j = i + l;
428 | 
429 |             diffdp::forward_backtracking(
430 |                     chart_forward->soft_c_uright.iter2(i, i + 1), chart_forward->soft_c_cright.iter1(i + 1, j),
431 |                     chart_forward->soft_c_cright(i, j),
432 |                     chart_forward->b_cright.iter3(i, j, i + 1),
433 |                     l
434 |             );
435 | 
436 |             if (i > 0u)
437 |             {
438 |                 diffdp::forward_backtracking(
439 |                         chart_forward->soft_c_cleft.iter2(i, i), chart_forward->soft_c_uleft.iter1(i, j),
440 |                         chart_forward->soft_c_cleft(i, j),
441 |                         chart_forward->b_cleft.iter3(i, j, i),
442 |                         l
443 |                 );
444 |             }
445 | 
446 |             diffdp::forward_backtracking(
447 |                     chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j),
448 |                     chart_forward->soft_c_uright(i, j),
449 |                     chart_forward->b_uright.iter3(i, j, i),
450 |                     l
451 |             );
452 | 
453 | 
454 |             if (i > 0u)
455 |             {
456 |                 diffdp::forward_backtracking(
457 |                         chart_forward->soft_c_cright.iter2(i, i), chart_forward->soft_c_cleft.iter1(i + 1, j),
458 |                         chart_forward->soft_c_uleft(i, j),
459 |                         chart_forward->b_uleft.iter3(i, j, i),
460 |                         l
461 |                 );
462 |             }
463 |         }
464 |     }
465 | }
466 | 
467 | 
468 | unsigned EntropyRegularizedEisner::size() const
469 | {
470 |     return _size;
471 | }
472 | 
473 | float EntropyRegularizedEisner::output(const unsigned head, const unsigned mod) const
474 | {
475 |     if (head < mod)
476 |         return chart_forward->soft_c_uright(head, mod);
477 |     else if (mod < head)
478 |         return chart_forward->soft_c_uleft(mod, head);
479 |     else
480 |         return std::nanf("");
481 | }
482 | 
483 | float EntropyRegularizedEisner::gradient(const unsigned head, const unsigned mod) const
484 | {
485 |     if (head < mod)
486 |         return chart_backward->c_uright(head, mod);
487 |     else if (mod < head)
488 |         return chart_backward->c_uleft(mod, head);
489 |     else
490 |         return std::nanf("");
491 | }
492 | 
493 | }


--------------------------------------------------------------------------------
/lib/src/builder/binary-phrase.cpp:
--------------------------------------------------------------------------------
 1 | #include "diffdp/builder/binary-phrase.h"
 2 | 
 3 | #include "diffdp/dynet/binary_phrase.h"
 4 | #include "dytools/algorithms/span-parser.h"
 5 | #include "dytools/utils.h"
 6 | 
 7 | namespace diffdp
 8 | {
 9 | 
10 | BinaryPhraseBuilder::BinaryPhraseBuilder(const BinaryPhraseSettings& settings) :
11 |     settings(settings)
12 | {}
13 | 
14 | void BinaryPhraseBuilder::new_graph(dynet::ComputationGraph& cg, bool training)
15 | {
16 |     _cg = &cg;
17 |     _training = training;
18 | }
19 | 
20 | dynet::Expression BinaryPhraseBuilder::relaxed(const dynet::Expression& weights)
21 | {
22 |     if (settings.type == BinaryPhraseType::AlgDiff)
23 |         return relaxed_alg_diff(weights);
24 |     else
25 |         return relaxed_entropy_Reg(weights);
26 | }
27 | 
28 | dynet::Expression BinaryPhraseBuilder::argmax(const dynet::Expression& weights)
29 | {
30 |     const auto size = weights.dim().rows();
31 | 
32 |     const auto p_weights = perturb(weights);
33 |     const auto v_weights = as_vector(_cg->incremental_forward(p_weights));
34 | 
35 |     const auto tree = dytools::binary_span_parser(size, v_weights);
36 | 
37 |     std::vector<unsigned> indices;
38 |     for (const auto& span : tree)
39 |         indices.push_back(span.first + span.second * size);
40 |     std::vector<float> values(indices.size(), 1.f);
41 | 
42 |     const auto output = dynet::input(*_cg, {size, size}, indices, values);
43 |     return output;
44 | }
45 | 
46 | 
47 | dynet::Expression BinaryPhraseBuilder::relaxed_alg_diff(const dynet::Expression& weights)
48 | {
49 |     const auto p_weights = perturb(weights);
50 |     return dytools::force_cpu(dynet::algorithmic_differentiable_binary_phrase_structure, p_weights, DiscreteMode::ForwardRegularized, nullptr);
51 | }
52 | 
53 | dynet::Expression BinaryPhraseBuilder::relaxed_entropy_Reg(const dynet::Expression& weights)
54 | {
55 |     const auto p_weights = perturb(weights);
56 |     return dytools::force_cpu(dynet::entropy_regularized_binary_phrase_structure, p_weights, DiscreteMode::ForwardRegularized, nullptr);
57 | }
58 | 
59 | 
60 | dynet::Expression BinaryPhraseBuilder::perturb(const dynet::Expression& arc_weights)
61 | {
62 |     if (settings.perturb and _training)
63 |         return arc_weights + dynet::random_gumbel(*_cg, arc_weights.dim());
64 |     else
65 |         return arc_weights;
66 | }
67 | 
68 | 
69 | }


--------------------------------------------------------------------------------
/lib/src/builder/dependency.cpp:
--------------------------------------------------------------------------------
  1 | #include "diffdp/builder/dependency.h"
  2 | 
  3 | #include <vector>
  4 | #include <limits>
  5 | 
  6 | #include "dytools/functions/rooted_arborescence_marginals.h"
  7 | #include "dytools/functions/masking.h"
  8 | #include "diffdp/dynet/eisner.h"
  9 | #include "dytools/utils.h"
 10 | 
 11 | namespace diffdp
 12 | {
 13 | 
 14 | DependencyBuilder::DependencyBuilder(const DependencySettings& settings) :
 15 |     settings(settings)
 16 | {}
 17 | 
 18 | void DependencyBuilder::new_graph(dynet::ComputationGraph& cg, bool training)
 19 | {
 20 |     _cg = &cg;
 21 |     _training = training;
 22 | }
 23 | 
 24 | dynet::Expression DependencyBuilder::relaxed(const dynet::Expression& arc_weights, std::vector<unsigned>* sizes, dynet::Expression* e_mask)
 25 | {
 26 |     if (settings.type == DependencyType::Head)
 27 |         return relaxed_head(arc_weights, e_mask);
 28 |     else if (settings.type == DependencyType::NonProjective)
 29 |         return relaxed_nonprojective(arc_weights, sizes);
 30 |     else if (settings.type == DependencyType::ProjectiveAlgDiff)
 31 |         return relaxed_projective_alg_diff(arc_weights, sizes);
 32 |     else
 33 |         return relaxed_projective_entropy_reg(arc_weights, sizes);
 34 | }
 35 | 
 36 | dynet::Expression DependencyBuilder::relaxed_head(const dynet::Expression& arc_weights, dynet::Expression* e_mask)
 37 | {
 38 |     if (e_mask != nullptr)
 39 |         if (e_mask->dim().rows() != 1 || e_mask->dim().cols() != arc_weights.dim().cols())
 40 |             throw std::runtime_error("Relaxed Head: mask has the wrong dimension");
 41 |     const auto p_arc_weights = perturb(arc_weights);
 42 | 
 43 |     // mask the diagonal
 44 |     const unsigned n_max_vertices = arc_weights.dim().rows();
 45 |     const auto e_inf_mask = dytools::main_diagonal_mask(*_cg, {n_max_vertices, n_max_vertices}, -std::numeric_limits<float>::infinity());
 46 | 
 47 |     auto heads = dynet::softmax(p_arc_weights + e_inf_mask);
 48 | 
 49 |     if (e_mask != nullptr)
 50 |         heads = dynet::cmult(heads, *e_mask);
 51 | 
 52 |     // first column should be empty (the root word has no head)
 53 |     std::vector<float> values(arc_weights.dim().cols(), 1.f);
 54 |     values[0] = 0.f;
 55 |     const auto mask = dynet::input(*_cg, {1, n_max_vertices}, values);
 56 |     heads = dynet::cmult(heads, mask);
 57 | 
 58 |     return heads;
 59 | }
 60 | 
 61 | dynet::Expression DependencyBuilder::relaxed_nonprojective(const dynet::Expression& arc_weights, std::vector<unsigned>* sizes)
 62 | {
 63 |     const auto p_arc_weights = perturb(arc_weights);
 64 |     return dytools::rooted_arborescence_marginals(*_cg, p_arc_weights, sizes);
 65 | }
 66 | 
 67 | dynet::Expression DependencyBuilder::relaxed_projective_alg_diff(const dynet::Expression& arc_weights, std::vector<unsigned>* sizes)
 68 | {
 69 |     const auto p_arc_weights = perturb(arc_weights);
 70 |     return dytools::force_cpu(dynet::algorithmic_differentiable_eisner,
 71 |             p_arc_weights,
 72 |             DiscreteMode::ForwardRegularized,
 73 |             DependencyGraphMode::Adjacency,
 74 |             DependencyGraphMode::Adjacency,
 75 |             true,
 76 |             sizes
 77 |     );
 78 | }
 79 | 
 80 | dynet::Expression DependencyBuilder::relaxed_projective_entropy_reg(const dynet::Expression& arc_weights, std::vector<unsigned>* sizes)
 81 | {
 82 |     const auto p_arc_weights = perturb(arc_weights);
 83 |     return dytools::force_cpu(dynet::entropy_regularized_eisner,
 84 |             p_arc_weights,
 85 |             DiscreteMode::ForwardRegularized,
 86 |             DependencyGraphMode::Adjacency,
 87 |             DependencyGraphMode::Adjacency,
 88 |             true,
 89 |             sizes
 90 |     );
 91 | }
 92 | 
 93 | dynet::Expression DependencyBuilder::argmax(const dynet::Expression& arc_weights, std::vector<unsigned>* sizes, dynet::Expression* e_mask)
 94 | {
 95 |     if (settings.type == DependencyType::Head)
 96 |         return argmax_head(arc_weights, e_mask);
 97 |     else if (settings.type == DependencyType::NonProjective)
 98 |         return argmax_nonprojective(arc_weights, sizes);
 99 |     else if (settings.type == DependencyType::ProjectiveAlgDiff)
100 |         return argmax_projective_alg_diff(arc_weights, sizes);
101 |     else
102 |         return argmax_projective_entropy_reg(arc_weights, sizes);
103 | }
104 | 
105 | dynet::Expression DependencyBuilder::argmax_head(const dynet::Expression&, dynet::Expression*)
106 | {
107 |     throw std::runtime_error("Not implemented yet.");
108 | }
109 | 
110 | dynet::Expression DependencyBuilder::argmax_nonprojective(const dynet::Expression&, std::vector<unsigned>*)
111 | {
112 |     throw std::runtime_error("Not implemented yet.");
113 | }
114 | 
115 | dynet::Expression DependencyBuilder::argmax_projective_alg_diff(const dynet::Expression&, std::vector<unsigned>*)
116 | {
117 |     throw std::runtime_error("Not implemented yet.");
118 | }
119 | 
120 | dynet::Expression DependencyBuilder::argmax_projective_entropy_reg(const dynet::Expression&, std::vector<unsigned>*)
121 | {
122 |     throw std::runtime_error("Not implemented yet.");
123 | }
124 | 
125 | 
126 | dynet::Expression DependencyBuilder::perturb(const dynet::Expression& arc_weights)
127 | {
128 |     if (settings.perturb and _training)
129 |         return arc_weights + dynet::random_gumbel(*_cg, arc_weights.dim());
130 |     else
131 |         return arc_weights;
132 | }
133 | 
134 | 
135 | }


--------------------------------------------------------------------------------
/lib/src/chart.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdexcept>
 2 | #include <iostream>
 3 | #include "diffdp/chart.h"
 4 | 
 5 | namespace diffdp
 6 | {
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | }
13 | 


--------------------------------------------------------------------------------
/lib/src/dynet/binary_phrase.cpp:
--------------------------------------------------------------------------------
  1 | #include "diffdp/dynet/binary_phrase.h"
  2 | #include "dynet/tensor-eigen.h"
  3 | 
  4 | namespace dynet
  5 | {
  6 | 
  7 | Expression algorithmic_differentiable_binary_phrase_structure(const Expression& x, diffdp::DiscreteMode mode, std::vector<unsigned>* batch_sizes)
  8 | {
  9 |     return Expression(x.pg, x.pg->add_function<AlgorithmicDifferentiableBinaryPhraseStructure>({x.i}, mode, batch_sizes));
 10 | }
 11 | 
 12 | Expression entropy_regularized_binary_phrase_structure(const Expression& x, diffdp::DiscreteMode mode, std::vector<unsigned>* batch_sizes)
 13 | {
 14 |     return Expression(x.pg, x.pg->add_function<EntropyRegularizedBinaryPhraseStructure>({x.i}, mode, batch_sizes));
 15 | }
 16 | 
 17 | AlgorithmicDifferentiableBinaryPhraseStructure::AlgorithmicDifferentiableBinaryPhraseStructure(
 18 |         const std::initializer_list<VariableIndex>& a,
 19 |         diffdp::DiscreteMode mode,
 20 |         std::vector<unsigned>* batch_sizes
 21 | ) :
 22 |         Node(a),
 23 |         mode(mode),
 24 |         batch_sizes(batch_sizes)
 25 | {
 26 |     this->has_cuda_implemented = false;
 27 | }
 28 | 
 29 | bool AlgorithmicDifferentiableBinaryPhraseStructure::supports_multibatch() const
 30 | {
 31 |     return true;
 32 | }
 33 | 
 34 | AlgorithmicDifferentiableBinaryPhraseStructure::~AlgorithmicDifferentiableBinaryPhraseStructure()
 35 | {
 36 |     for (auto*& ptr : _ce_ptr)
 37 |         if (ptr != nullptr)
 38 |         {
 39 |             delete ptr;
 40 |             ptr = nullptr;
 41 |         }
 42 | }
 43 | 
 44 | std::string AlgorithmicDifferentiableBinaryPhraseStructure::as_string(const std::vector<std::string>& arg_names) const {
 45 |     std::ostringstream s;
 46 |     s << "algorithmic_differentiable_eisner(" << arg_names[0] << ")";
 47 |     return s.str();
 48 | }
 49 | 
 50 | Dim AlgorithmicDifferentiableBinaryPhraseStructure::dim_forward(const std::vector<Dim>& xs) const {
 51 |     DYNET_ARG_CHECK(
 52 |             xs.size() == 1 && xs[0].nd == 2 && xs[0].rows() == xs[0].cols(),
 53 |             "Bad input dimensions in AlgorithmicDifferentiableEisner: " << xs
 54 |     );
 55 | 
 56 |     return dynet::Dim(xs[0]);
 57 | }
 58 | 
 59 | size_t AlgorithmicDifferentiableBinaryPhraseStructure::aux_storage_size() const {
 60 |     // 2 times because we have a forward and a backward chart
 61 |     const size_t dp_mem = 2 * diffdp::BinaryPhraseStructureChart::required_memory(dim.rows());
 62 |     return dim.batch_elems() * dp_mem;
 63 | }
 64 | 
 65 | template<class MyDevice>
 66 | void AlgorithmicDifferentiableBinaryPhraseStructure::forward_dev_impl(
 67 |         const MyDevice&,
 68 |         const std::vector<const Tensor*>& xs,
 69 |         Tensor& fx
 70 | ) const {
 71 | #ifdef __CUDACC__
 72 |     DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::forward");
 73 | #else
 74 |     // TODO call zero only when necessary
 75 |     TensorTools::zero(fx);
 76 | 
 77 |     std::vector<diffdp::AlgorithmicDifferentiableBinaryPhraseStructure*>& _ce_ptr2 =
 78 |             const_cast<std::vector<diffdp::AlgorithmicDifferentiableBinaryPhraseStructure*>&>(_ce_ptr);
 79 | 
 80 |     for (auto*& ptr : _ce_ptr2)
 81 |         if (ptr != nullptr)
 82 |         {
 83 |             delete ptr;
 84 |             ptr = nullptr;
 85 |         }
 86 | 
 87 |     if (_ce_ptr2.size() != xs[0]->d.batch_elems())
 88 |         _ce_ptr2.resize(xs[0]->d.batch_elems(), nullptr);
 89 | 
 90 |     const unsigned max_input_dim = xs[0]->d.rows();
 91 |     float* aux_fmem = static_cast<float*>(aux_mem);
 92 | 
 93 |     //#pragma omp parallel for
 94 |     for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch)
 95 |     {
 96 |         const unsigned eisner_dim = (
 97 |                 batch_sizes == nullptr
 98 |                 ? max_input_dim
 99 |                 : batch_sizes->at(batch)
100 |         );
101 | 
102 |         auto input = batch_matrix(*(xs[0]), batch);
103 | 
104 |         if (mode == diffdp::DiscreteMode::ForwardRegularized)
105 |         {
106 |             float* fmem = aux_fmem + batch * 2 * diffdp::BinaryPhraseStructureChart::required_cells(max_input_dim);
107 |             //auto forward_chart = std::make_shared<diffdp::EisnerChart>(eisner_dim, fmem);
108 |             //auto backward_chart = std::make_shared<diffdp::EisnerChart>(eisner_dim, fmem + diffdp::EisnerChart::required_cells(max_eisner_dim));
109 |             auto forward_chart = std::make_shared<diffdp::BinaryPhraseStructureChart>(eisner_dim, fmem);
110 |             auto backward_chart = std::make_shared<diffdp::BinaryPhraseStructureChart>(eisner_dim);
111 | 
112 |             _ce_ptr2.at(batch) = new diffdp::AlgorithmicDifferentiableBinaryPhraseStructure(forward_chart, backward_chart);
113 | 
114 | 
115 |             _ce_ptr2.at(batch)->forward(
116 |                     [&] (const unsigned left, const unsigned right)
117 |                     {
118 |                         return input(left, right);
119 |                     }
120 |             );
121 | 
122 |             auto output = batch_matrix(fx, batch);
123 | 
124 |             for (unsigned left = 0u ; left < eisner_dim ; ++left)
125 |             {
126 |                 for (unsigned right = left+1; right < eisner_dim ; ++right)
127 |                 {
128 |                     const float a = _ce_ptr2[batch]->output(left, right);
129 |                     output(left, right) = a;
130 |                 }
131 |             }
132 |         }
133 |         else
134 |         {
135 |             throw std::runtime_error("Not implemented: only ForwardRegularized can be used at the moment");
136 |         }
137 |     }
138 | #endif
139 | }
140 | 
141 | template<class MyDevice>
142 | void AlgorithmicDifferentiableBinaryPhraseStructure::backward_dev_impl(
143 |         const MyDevice &,
144 |         const std::vector<const Tensor*>& xs,
145 |         const Tensor&,
146 |         const Tensor& dEdf,
147 |         unsigned,
148 |         Tensor& dEdxi
149 | ) const {
150 | #ifdef __CUDACC__
151 |     DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::backward");
152 | #else
153 |     //#pragma omp parallel for
154 |     for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch)
155 |     {
156 |         auto output_grad = batch_matrix(dEdxi, batch);
157 |         auto input_grad = batch_matrix(dEdf, batch);
158 | 
159 |         auto& dp = *(_ce_ptr.at(batch));
160 | 
161 |         dp.backward(
162 |                 [&] (unsigned left, unsigned right) -> float
163 |                 {
164 |                     return input_grad(left, right);
165 |                 }
166 |         );
167 | 
168 |         for (unsigned left = 0u ; left < dp.size() ; ++left)
169 |             for (unsigned right = left + 1u; right < dp.size(); ++right)
170 |                 output_grad(left, right) += dp.gradient(left, right);
171 |     }
172 | #endif
173 | }
174 | 
175 | DYNET_NODE_INST_DEV_IMPL(AlgorithmicDifferentiableBinaryPhraseStructure)
176 | 
177 | 
178 | 
179 | 
180 | // ENTROPY Regularized
181 | 
182 | EntropyRegularizedBinaryPhraseStructure::EntropyRegularizedBinaryPhraseStructure(
183 |         const std::initializer_list<VariableIndex>& a,
184 |         diffdp::DiscreteMode mode,
185 |         std::vector<unsigned>* batch_sizes
186 | ) :
187 |         Node(a),
188 |         mode(mode),
189 |         batch_sizes(batch_sizes)
190 | {
191 |     this->has_cuda_implemented = false;
192 | }
193 | 
194 | bool EntropyRegularizedBinaryPhraseStructure::supports_multibatch() const
195 | {
196 |     return true;
197 | }
198 | 
199 | EntropyRegularizedBinaryPhraseStructure::~EntropyRegularizedBinaryPhraseStructure()
200 | {
201 |     for (auto*& ptr : _ce_ptr)
202 |         if (ptr != nullptr)
203 |         {
204 |             delete ptr;
205 |             ptr = nullptr;
206 |         }
207 | }
208 | 
209 | std::string EntropyRegularizedBinaryPhraseStructure::as_string(const std::vector<std::string>& arg_names) const {
210 |     std::ostringstream s;
211 |     s << "algorithmic_differentiable_eisner(" << arg_names[0] << ")";
212 |     return s.str();
213 | }
214 | 
215 | Dim EntropyRegularizedBinaryPhraseStructure::dim_forward(const std::vector<Dim>& xs) const {
216 |     DYNET_ARG_CHECK(
217 |             xs.size() == 1 && xs[0].nd == 2 && xs[0].rows() == xs[0].cols(),
218 |             "Bad input dimensions in AlgorithmicDifferentiableEisner: " << xs
219 |     );
220 | 
221 |     return dynet::Dim(xs[0]);
222 | }
223 | 
224 | size_t EntropyRegularizedBinaryPhraseStructure::aux_storage_size() const {
225 |     // 2 times because we have a forward and a backward chart
226 |     const size_t dp_mem = 2 * diffdp::BinaryPhraseStructureChart::required_memory(dim.rows());
227 |     return dim.batch_elems() * dp_mem;
228 | }
229 | 
230 | template<class MyDevice>
231 | void EntropyRegularizedBinaryPhraseStructure::forward_dev_impl(
232 |         const MyDevice&,
233 |         const std::vector<const Tensor*>& xs,
234 |         Tensor& fx
235 | ) const {
236 | #ifdef __CUDACC__
237 |     DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::forward");
238 | #else
239 |     // TODO call zero only when necessary
240 |     TensorTools::zero(fx);
241 | 
242 |     std::vector<diffdp::EntropyRegularizedBinaryPhraseStructure*>& _ce_ptr2 =
243 |             const_cast<std::vector<diffdp::EntropyRegularizedBinaryPhraseStructure*>&>(_ce_ptr);
244 | 
245 |     for (auto*& ptr : _ce_ptr2)
246 |         if (ptr != nullptr)
247 |         {
248 |             delete ptr;
249 |             ptr = nullptr;
250 |         }
251 | 
252 |     if (_ce_ptr2.size() != xs[0]->d.batch_elems())
253 |         _ce_ptr2.resize(xs[0]->d.batch_elems(), nullptr);
254 | 
255 |     const unsigned max_input_dim = xs[0]->d.rows();
256 |     float* aux_fmem = static_cast<float*>(aux_mem);
257 | 
258 |     //#pragma omp parallel for
259 |     for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch)
260 |     {
261 |         const unsigned eisner_dim = (
262 |                 batch_sizes == nullptr
263 |                 ? max_input_dim
264 |                 : batch_sizes->at(batch)
265 |         );
266 | 
267 |         auto input = batch_matrix(*(xs[0]), batch);
268 | 
269 |         if (mode == diffdp::DiscreteMode::ForwardRegularized)
270 |         {
271 |             float* fmem = aux_fmem + batch * 2 * diffdp::BinaryPhraseStructureChart::required_cells(max_input_dim);
272 |             //auto forward_chart = std::make_shared<diffdp::EisnerChart>(eisner_dim, fmem);
273 |             //auto backward_chart = std::make_shared<diffdp::EisnerChart>(eisner_dim, fmem + diffdp::EisnerChart::required_cells(max_eisner_dim));
274 |             auto forward_chart = std::make_shared<diffdp::BinaryPhraseStructureChart>(eisner_dim, fmem);
275 |             auto backward_chart = std::make_shared<diffdp::BinaryPhraseStructureChart>(eisner_dim, fmem + diffdp::BinaryPhraseStructureChart::required_cells(max_input_dim));
276 | 
277 |             _ce_ptr2.at(batch) = new diffdp::EntropyRegularizedBinaryPhraseStructure(forward_chart, backward_chart);
278 | 
279 | 
280 |             _ce_ptr2.at(batch)->forward(
281 |                     [&] (const unsigned left, const unsigned right)
282 |                     {
283 |                         return input(left, right);
284 |                     }
285 |             );
286 | 
287 |             auto output = batch_matrix(fx, batch);
288 | 
289 |             for (unsigned left = 0u ; left < eisner_dim ; ++left)
290 |             {
291 |                 for (unsigned right = left+1; right < eisner_dim ; ++right)
292 |                 {
293 |                     const float a = _ce_ptr2[batch]->output(left, right);
294 |                     output(left, right) = a;
295 |                 }
296 |             }
297 |         }
298 |         else
299 |         {
300 |             throw std::runtime_error("Not implemented: only ForwardRegularized can be used at the moment");
301 |         }
302 |     }
303 | #endif
304 | }
305 | 
306 | template<class MyDevice>
307 | void EntropyRegularizedBinaryPhraseStructure::backward_dev_impl(
308 |         const MyDevice &,
309 |         const std::vector<const Tensor*>& xs,
310 |         const Tensor&,
311 |         const Tensor& dEdf,
312 |         unsigned,
313 |         Tensor& dEdxi
314 | ) const {
315 | #ifdef __CUDACC__
316 |     DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::backward");
317 | #else
318 |     //#pragma omp parallel for
319 |     for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch)
320 |     {
321 |         auto output_grad = batch_matrix(dEdxi, batch);
322 |         auto input_grad = batch_matrix(dEdf, batch);
323 | 
324 |         auto& dp = *(_ce_ptr.at(batch));
325 | 
326 |         dp.backward(
327 |                 [&] (unsigned left, unsigned right) -> float
328 |                 {
329 |                     return input_grad(left, right);
330 |                 }
331 |         );
332 | 
333 |         for (unsigned left = 0u ; left < dp.size() ; ++left)
334 |             for (unsigned right = left + 1u; right < dp.size(); ++right)
335 |             {
336 |                 output_grad(left, right) += dp.gradient(left, right);
337 |             }
338 |     }
339 | #endif
340 | }
341 | 
342 | DYNET_NODE_INST_DEV_IMPL(EntropyRegularizedBinaryPhraseStructure)
343 | 
344 | 
345 | 
346 | }


--------------------------------------------------------------------------------
/lib/src/dynet/eisner.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * TODO: there is a lot a duplicate code between the two nodes (almost identical)
  3 |  */
  4 | #include "diffdp/dynet/eisner.h"
  5 | #include "dynet/tensor-eigen.h"
  6 | 
  7 | namespace diffdp
  8 | {
  9 | 
 10 | std::pair<unsigned, unsigned> from_adjacency(const std::pair<unsigned, unsigned> dep, const diffdp::DependencyGraphMode mode)
 11 | {
 12 |     unsigned head = dep.first;
 13 |     unsigned mod = dep.second;
 14 |     if (mode == diffdp::DependencyGraphMode::Compact)
 15 |     {
 16 |         mod -= 1u;
 17 |         if (head == 0u)
 18 |             head = mod;
 19 |         else
 20 |             head -= 1u;
 21 |     }
 22 | 
 23 |     return {head, mod};
 24 | }
 25 | 
 26 | std::pair<unsigned, unsigned> from_compact(const std::pair<unsigned, unsigned> dep, const diffdp::DependencyGraphMode mode)
 27 | {
 28 |     unsigned head = dep.first;
 29 |     unsigned mod = dep.second;
 30 |     if (mode == diffdp::DependencyGraphMode::Adjacency)
 31 |     {
 32 |         if (head == mod)
 33 |             head = 0u;
 34 |         else
 35 |             head += 1u;
 36 |         mod += 1u;
 37 |     }
 38 | 
 39 |     return {head, mod};
 40 | }
 41 | }
 42 | 
 43 | namespace dynet
 44 | {
 45 | 
 46 | Expression algorithmic_differentiable_eisner(const Expression& x, diffdp::DiscreteMode mode, diffdp::DependencyGraphMode input_graph, diffdp::DependencyGraphMode output_graph, bool with_root_arcs, std::vector<unsigned>* batch_sizes)
 47 | {
 48 |     return Expression(x.pg, x.pg->add_function<AlgorithmicDifferentiableEisner>({x.i}, mode, input_graph, output_graph, with_root_arcs, batch_sizes));
 49 | }
 50 | 
 51 | Expression entropy_regularized_eisner(const Expression& x, diffdp::DiscreteMode mode, diffdp::DependencyGraphMode input_graph, diffdp::DependencyGraphMode output_graph, bool with_root_arcs, std::vector<unsigned>* batch_sizes)
 52 | {
 53 |     return Expression(x.pg, x.pg->add_function<EntropyRegularizedEisner>({x.i}, mode, input_graph, output_graph, with_root_arcs, batch_sizes));
 54 | }
 55 | 
 56 | AlgorithmicDifferentiableEisner::AlgorithmicDifferentiableEisner(
 57 |         const std::initializer_list<VariableIndex>& a,
 58 |         diffdp::DiscreteMode mode,
 59 |         diffdp::DependencyGraphMode input_graph,
 60 |         diffdp::DependencyGraphMode output_graph,
 61 |         bool with_root_arcs,
 62 |         std::vector<unsigned>* batch_sizes
 63 | ) :
 64 |         Node(a),
 65 |         mode(mode),
 66 |         input_graph(input_graph),
 67 |         output_graph(output_graph),
 68 |         with_root_arcs(with_root_arcs),
 69 |         batch_sizes(batch_sizes)
 70 | {
 71 |     this->has_cuda_implemented = false;
 72 | }
 73 | 
 74 | bool AlgorithmicDifferentiableEisner::supports_multibatch() const
 75 | {
 76 |     return true;
 77 | }
 78 | 
 79 | AlgorithmicDifferentiableEisner::~AlgorithmicDifferentiableEisner()
 80 | {
 81 |     for (auto*& ptr : _ce_ptr)
 82 |         if (ptr != nullptr)
 83 |         {
 84 |             delete ptr;
 85 |             ptr = nullptr;
 86 |         }
 87 | }
 88 | 
 89 | std::string AlgorithmicDifferentiableEisner::as_string(const std::vector<std::string>& arg_names) const {
 90 |     std::ostringstream s;
 91 |     s << "algorithmic_differentiable_eisner(" << arg_names[0] << ")";
 92 |     return s.str();
 93 | }
 94 | 
 95 | Dim AlgorithmicDifferentiableEisner::dim_forward(const std::vector<Dim>& xs) const {
 96 |     DYNET_ARG_CHECK(
 97 |             xs.size() == 1 && xs[0].nd == 2 && xs[0].rows() == xs[0].cols(),
 98 |             "Bad input dimensions in AlgorithmicDifferentiableEisner: " << xs
 99 |     );
100 |     if (input_graph == diffdp::DependencyGraphMode::Compact)
101 |         DYNET_ARG_CHECK(
102 |                 xs[0].rows() >= 1,
103 |                 "Bad input dimensions in AlgorithmicDifferentiableEisner: " << xs
104 |         )
105 |     else
106 |         DYNET_ARG_CHECK(
107 |                 xs[0].rows() >= 2,
108 |                 "Bad input dimensions in AlgorithmicDifferentiableEisner: " << xs
109 |         )
110 | 
111 |     unsigned dim;
112 |     if (input_graph == output_graph)
113 |         dim = xs[0].rows();
114 |     else if (input_graph == diffdp::DependencyGraphMode::Compact)
115 |         dim = xs[0].rows() + 1; // from compact to adj
116 |     else
117 |         dim = xs[0].rows() - 1; // from adj to compact
118 | 
119 |     return dynet::Dim({dim, dim}, xs[0].batch_elems());
120 | }
121 | 
122 | size_t AlgorithmicDifferentiableEisner::aux_storage_size() const {
123 |     const unsigned eisner_dim = dim.rows() + (output_graph == diffdp::DependencyGraphMode::Compact ? 1 : 0);
124 |     // 2 times because we have a forward and a backward chart
125 |     const size_t eisner_mem = 2 * diffdp::EisnerChart::required_memory(eisner_dim);
126 |     return dim.batch_elems() * eisner_mem;
127 | }
128 | 
129 | 
130 | 
131 | 
132 | template<class MyDevice>
133 | void AlgorithmicDifferentiableEisner::forward_dev_impl(
134 |         const MyDevice&,
135 |         const std::vector<const Tensor*>& xs,
136 |         Tensor& fx
137 | ) const {
138 | #ifdef __CUDACC__
139 |     DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::forward");
140 | #else
141 |     // TODO call zero only when necessary
142 |     TensorTools::zero(fx);
143 | 
144 |     std::vector<diffdp::AlgorithmicDifferentiableEisner*>& _ce_ptr2 =
145 |             const_cast<std::vector<diffdp::AlgorithmicDifferentiableEisner*>&>(_ce_ptr);
146 | 
147 |     for (auto*& ptr : _ce_ptr2)
148 |         if (ptr != nullptr)
149 |         {
150 |             delete ptr;
151 |             ptr = nullptr;
152 |         }
153 | 
154 |     if (_ce_ptr2.size() != xs[0]->d.batch_elems())
155 |         _ce_ptr2.resize(xs[0]->d.batch_elems(), nullptr);
156 | 
157 |     const unsigned max_eisner_dim = xs[0]->d.rows() + (input_graph == diffdp::DependencyGraphMode::Compact ? 1 : 0);
158 |     float* aux_fmem = static_cast<float*>(aux_mem);
159 | 
160 |     //#pragma omp parallel for
161 |     for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch)
162 |     {
163 |         const unsigned eisner_dim = (
164 |                 batch_sizes == nullptr
165 |                 ? max_eisner_dim
166 |                 : batch_sizes->at(batch) + 1
167 |         );
168 | 
169 |         auto input = batch_matrix(*(xs[0]), batch);
170 | 
171 |         if (mode == diffdp::DiscreteMode::ForwardRegularized)
172 |         {
173 |             float* fmem = aux_fmem + batch * 2 * diffdp::EisnerChart::required_cells(max_eisner_dim);
174 |             //auto forward_chart = std::make_shared<diffdp::EisnerChart>(eisner_dim, fmem);
175 |             //auto backward_chart = std::make_shared<diffdp::EisnerChart>(eisner_dim, fmem + diffdp::EisnerChart::required_cells(max_eisner_dim));
176 |             auto forward_chart = std::make_shared<diffdp::EisnerChart>(eisner_dim, fmem);
177 |             auto backward_chart = std::make_shared<diffdp::EisnerChart>(eisner_dim);
178 | 
179 |             _ce_ptr2.at(batch) = new diffdp::AlgorithmicDifferentiableEisner(forward_chart, backward_chart);
180 | 
181 | 
182 |             _ce_ptr2.at(batch)->forward(
183 |                     [&] (const unsigned head, const unsigned mod)
184 |                     {
185 |                         if (mod == 0u)
186 |                             throw std::runtime_error("Illegal arc");
187 |                         if (head == 0u && !with_root_arcs)
188 |                         {
189 |                             return 0.f;
190 |                         }
191 |                         else
192 |                         {
193 |                             const auto arc = diffdp::from_adjacency({head, mod}, input_graph);
194 |                             const float v = input(arc.first, arc.second);
195 |                             return v;
196 |                         }
197 |                     }
198 |             );
199 | 
200 |             auto output = batch_matrix(fx, batch);
201 | 
202 |             for (unsigned head = 0u ; head < eisner_dim ; ++head)
203 |             {
204 |                 for (unsigned mod = 1u; mod < eisner_dim ; ++mod)
205 |                 {
206 |                     const auto arc = diffdp::from_adjacency({head, mod}, output_graph);
207 |                     if (head == mod)
208 |                     {
209 |                         output(arc.first, arc.second) = 0.f;
210 |                         continue;
211 |                     }
212 | 
213 |                     if (head == 0u && !with_root_arcs)
214 |                     {
215 |                         output(arc.first, arc.second) = 0.f;
216 |                         continue;
217 |                     }
218 | 
219 |                     const float a = _ce_ptr2[batch]->output(head, mod);
220 | 
221 |                     if (!std::isfinite(a))
222 |                         throw std::runtime_error("BAD eisner output");
223 | 
224 |                     output(arc.first, arc.second) = a;
225 |                 }
226 |             }
227 |         }
228 |         else
229 |         {
230 |             throw std::runtime_error("Not implemented: only ForwardRegularized can be used at the moment");
231 |         }
232 |     }
233 | #endif
234 | }
235 | 
236 | template<class MyDevice>
237 | void AlgorithmicDifferentiableEisner::backward_dev_impl(
238 |         const MyDevice &,
239 |         const std::vector<const Tensor*>& xs,
240 |         const Tensor&,
241 |         const Tensor& dEdf,
242 |         unsigned,
243 |         Tensor& dEdxi
244 | ) const {
245 | #ifdef __CUDACC__
246 |     DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::backward");
247 | #else
248 |     //#pragma omp parallel for
249 |     for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch)
250 |     {
251 |         auto output_grad = batch_matrix(dEdxi, batch);
252 |         auto input_grad = batch_matrix(dEdf, batch);
253 | 
254 |         auto& eisner = *(_ce_ptr.at(batch));
255 | 
256 |         eisner.backward(
257 |                 [&] (unsigned head, unsigned mod) -> float
258 |                 {
259 |                     if (head == 0u && !with_root_arcs)
260 |                         return 0.f;
261 |                     auto arc = diffdp::from_adjacency({head, mod}, output_graph);
262 |                     const float v = input_grad(arc.first, arc.second);
263 |                     if (!std::isfinite(v))
264 |                         throw std::runtime_error("BAD eisner input grad");
265 |                     return v;
266 |                 }
267 |         );
268 | 
269 |         for (unsigned head = 0u ; head < eisner.size() ; ++head)
270 |         {
271 |             for (unsigned mod = 1u; mod < eisner.size(); ++mod)
272 |             {
273 |                 if (head == mod)
274 |                     continue;
275 | 
276 |                 if (head == 0u && !with_root_arcs)
277 |                     return;
278 | 
279 |                 auto const v = eisner.gradient(head, mod);
280 |                 if (!std::isfinite(v))
281 |                     throw std::runtime_error("BAD eisner output grad");
282 | 
283 |                 auto arc = diffdp::from_adjacency({head, mod}, input_graph);
284 |                 output_grad(arc.first, arc.second) += v;
285 |             }
286 |         }
287 |     }
288 | #endif
289 | }
290 | 
291 | 
292 | DYNET_NODE_INST_DEV_IMPL(AlgorithmicDifferentiableEisner)
293 | 
294 | 
295 | 
296 | 
297 | // ENTROPY Regularized
298 | 
299 | 
300 | EntropyRegularizedEisner::EntropyRegularizedEisner(
301 |         const std::initializer_list<VariableIndex>& a,
302 |         diffdp::DiscreteMode mode,
303 |         diffdp::DependencyGraphMode input_graph,
304 |         diffdp::DependencyGraphMode output_graph,
305 |         bool with_root_arcs,
306 |         std::vector<unsigned>* batch_sizes
307 | ) :
308 |         Node(a),
309 |         mode(mode),
310 |         input_graph(input_graph),
311 |         output_graph(output_graph),
312 |         with_root_arcs(with_root_arcs),
313 |         batch_sizes(batch_sizes)
314 | {
315 |     this->has_cuda_implemented = false;
316 | }
317 | 
318 | bool EntropyRegularizedEisner::supports_multibatch() const
319 | {
320 |     return true;
321 | }
322 | 
323 | EntropyRegularizedEisner::~EntropyRegularizedEisner()
324 | {
325 |     for (auto*& ptr : _ce_ptr)
326 |         if (ptr != nullptr)
327 |         {
328 |             delete ptr;
329 |             ptr = nullptr;
330 |         }
331 | }
332 | 
333 | std::string EntropyRegularizedEisner::as_string(const std::vector<std::string>& arg_names) const {
334 |     std::ostringstream s;
335 |     s << "entropy_regularized_eisner(" << arg_names[0] << ")";
336 |     return s.str();
337 | }
338 | 
339 | Dim EntropyRegularizedEisner::dim_forward(const std::vector<Dim>& xs) const {
340 |     DYNET_ARG_CHECK(
341 |             xs.size() == 1 && xs[0].nd == 2 && xs[0].rows() == xs[0].cols(),
342 |             "Bad input dimensions in EntropyRegularizedEisner: " << xs
343 |     );
344 |     if (input_graph == diffdp::DependencyGraphMode::Compact)
345 |         DYNET_ARG_CHECK(
346 |                 xs[0].rows() >= 1,
347 |                 "Bad input dimensions in EntropyRegularizedEisner: " << xs
348 |         )
349 |     else
350 |         DYNET_ARG_CHECK(
351 |                 xs[0].rows() >= 2,
352 |                 "Bad input dimensions in EntropyRegularizedEisner: " << xs
353 |         )
354 | 
355 |     unsigned dim;
356 |     if (input_graph == output_graph)
357 |         dim = xs[0].rows();
358 |     else if (input_graph == diffdp::DependencyGraphMode::Compact)
359 |         dim = xs[0].rows() + 1; // from compact to adj
360 |     else
361 |         dim = xs[0].rows() - 1; // from adj to compact
362 | 
363 |     return dynet::Dim({dim, dim}, xs[0].batch_elems());
364 | }
365 | 
366 | size_t EntropyRegularizedEisner::aux_storage_size() const {
367 |     const unsigned eisner_dim = dim.rows() + (output_graph == diffdp::DependencyGraphMode::Compact ? 1 : 0);
368 |     // 2 times because we have a forward and a backward chart
369 |     const size_t eisner_mem = 2 * diffdp::EisnerChart::required_memory(eisner_dim);
370 |     return dim.batch_elems() * eisner_mem;
371 | }
372 | 
373 | template<class MyDevice>
374 | void EntropyRegularizedEisner::forward_dev_impl(
375 |         const MyDevice&,
376 |         const std::vector<const Tensor*>& xs,
377 |         Tensor& fx
378 | ) const {
379 | #ifdef __CUDACC__
380 |     DYNET_NO_CUDA_IMPL_ERROR("EntropyRegularizedEisner::forward");
381 | #else
382 |     // TODO call zero only when necessary
383 |     TensorTools::zero(fx);
384 | 
385 |     std::vector<diffdp::EntropyRegularizedEisner*>& _ce_ptr2 =
386 |             const_cast<std::vector<diffdp::EntropyRegularizedEisner*>&>(_ce_ptr);
387 | 
388 |     for (auto*& ptr : _ce_ptr2)
389 |         if (ptr != nullptr)
390 |         {
391 |             delete ptr;
392 |             ptr = nullptr;
393 |         }
394 | 
395 |     if (_ce_ptr2.size() != xs[0]->d.batch_elems())
396 |         _ce_ptr2.resize(xs[0]->d.batch_elems(), nullptr);
397 | 
398 |     const unsigned max_eisner_dim = xs[0]->d.rows() + (input_graph == diffdp::DependencyGraphMode::Compact ? 1 : 0);
399 |     float* aux_fmem = static_cast<float*>(aux_mem);
400 | 
401 |     //#pragma omp parallel for
402 |     for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch)
403 |     {
404 |         const unsigned eisner_dim = (
405 |                 batch_sizes == nullptr
406 |                 ? max_eisner_dim
407 |                 : batch_sizes->at(batch) + 1
408 |         );
409 | 
410 |         auto input = batch_matrix(*(xs[0]), batch);
411 | 
412 |         if (mode == diffdp::DiscreteMode::ForwardRegularized)
413 |         {
414 |             float* fmem = aux_fmem + batch * 2 * diffdp::EisnerChart::required_cells(max_eisner_dim);
415 |             //auto forward_chart = std::make_shared<diffdp::EisnerChart>(eisner_dim, fmem);
416 |             //auto backward_chart = std::make_shared<diffdp::EisnerChart>(eisner_dim, fmem + diffdp::EisnerChart::required_cells(max_eisner_dim));
417 |             auto forward_chart = std::make_shared<diffdp::EisnerChart>(eisner_dim, fmem);
418 |             auto backward_chart = std::make_shared<diffdp::EisnerChart>(eisner_dim);
419 | 
420 |             _ce_ptr2.at(batch) = new diffdp::EntropyRegularizedEisner(forward_chart, backward_chart);
421 | 
422 | 
423 |             _ce_ptr2.at(batch)->forward(
424 |                     [&] (const unsigned head, const unsigned mod)
425 |                     {
426 |                         if (mod == 0u)
427 |                             throw std::runtime_error("Illegal arc");
428 |                         if (head == 0u && !with_root_arcs)
429 |                         {
430 |                             return 0.f;
431 |                         }
432 |                         else
433 |                         {
434 |                             const auto arc = diffdp::from_adjacency({head, mod}, input_graph);
435 |                             const float v = input(arc.first, arc.second);
436 |                             return v;
437 |                         }
438 |                     }
439 |             );
440 | 
441 |             auto output = batch_matrix(fx, batch);
442 | 
443 |             for (unsigned head = 0u ; head < eisner_dim ; ++head)
444 |             {
445 |                 for (unsigned mod = 1u; mod < eisner_dim ; ++mod)
446 |                 {
447 |                     const auto arc = diffdp::from_adjacency({head, mod}, output_graph);
448 |                     if (head == mod)
449 |                     {
450 |                         output(arc.first, arc.second) = 0.f;
451 |                         continue;
452 |                     }
453 | 
454 |                     if (head == 0u && !with_root_arcs)
455 |                     {
456 |                         output(arc.first, arc.second) = 0.f;
457 |                         continue;
458 |                     }
459 | 
460 |                     const float a = _ce_ptr2[batch]->output(head, mod);
461 | 
462 |                     if (!std::isfinite(a))
463 |                         throw std::runtime_error("BAD eisner output");
464 | 
465 |                     output(arc.first, arc.second) = a;
466 |                 }
467 |             }
468 |         }
469 |         else
470 |         {
471 |             throw std::runtime_error("Not implemented: only ForwardRegularized can be used at the moment");
472 |         }
473 |     }
474 | #endif
475 | }
476 | 
477 | template<class MyDevice>
478 | void EntropyRegularizedEisner::backward_dev_impl(
479 |         const MyDevice &,
480 |         const std::vector<const Tensor*>& xs,
481 |         const Tensor&,
482 |         const Tensor& dEdf,
483 |         unsigned,
484 |         Tensor& dEdxi
485 | ) const {
486 | #ifdef __CUDACC__
487 |     DYNET_NO_CUDA_IMPL_ERROR("EntropyRegularizedEisner::backward");
488 | #else
489 |     //#pragma omp parallel for
490 |     for (unsigned batch = 0u ; batch < xs[0]->d.batch_elems() ; ++batch)
491 |     {
492 |         auto output_grad = batch_matrix(dEdxi, batch);
493 |         auto input_grad = batch_matrix(dEdf, batch);
494 | 
495 |         auto& eisner = *(_ce_ptr.at(batch));
496 | 
497 |         eisner.backward(
498 |                 [&] (unsigned head, unsigned mod) -> float
499 |                 {
500 |                     if (head == 0u && !with_root_arcs)
501 |                         return 0.f;
502 |                     auto arc = diffdp::from_adjacency({head, mod}, output_graph);
503 |                     const float v = input_grad(arc.first, arc.second);
504 |                     if (!std::isfinite(v))
505 |                         throw std::runtime_error("BAD eisner input grad");
506 |                     return v;
507 |                 }
508 |         );
509 | 
510 |         for (unsigned head = 0u ; head < eisner.size() ; ++head)
511 |         {
512 |             for (unsigned mod = 1u; mod < eisner.size(); ++mod)
513 |             {
514 |                 if (head == mod)
515 |                     continue;
516 | 
517 |                 if (head == 0u && !with_root_arcs)
518 |                     return;
519 | 
520 |                 auto const v = eisner.gradient(head, mod);
521 |                 if (!std::isfinite(v))
522 |                     throw std::runtime_error("BAD eisner output grad");
523 | 
524 |                 auto arc = diffdp::from_adjacency({head, mod}, input_graph);
525 |                 output_grad(arc.first, arc.second) += v;
526 |             }
527 |         }
528 |     }
529 | #endif
530 | }
531 | 
532 | DYNET_NODE_INST_DEV_IMPL(EntropyRegularizedEisner)
533 | 
534 | }


--------------------------------------------------------------------------------
/lib/src/dynet/matrix_tree_theorem.cpp:
--------------------------------------------------------------------------------
  1 | #include "diffdp/dynet/matrix_tree_theorem.h"
  2 | 
  3 | #include "dynet/tensor-eigen.h"
  4 | #include "dynet/nodes-impl-macros.h"
  5 | 
  6 | namespace dynet
  7 | {
  8 | 
  9 | Expression matrix_tree_theorem(const Expression &weights)
 10 | {
 11 |     return Expression(weights.pg, weights.pg->add_function<MatrixTreeTheorem>({weights.i}));
 12 | }
 13 | 
 14 | 
 15 | MatrixTreeTheorem::MatrixTreeTheorem(
 16 |         const std::initializer_list<VariableIndex>& a
 17 | ) :
 18 |         Node(a)
 19 | {
 20 |     this->has_cuda_implemented = false;
 21 | }
 22 | 
 23 | bool MatrixTreeTheorem::supports_multibatch() const
 24 | {
 25 |     return false;
 26 | }
 27 | 
 28 | 
 29 | std::string MatrixTreeTheorem::as_string(const std::vector<std::string>& arg_names) const {
 30 |     std::ostringstream s;
 31 |     s << "matrix_tree_theorem(" << arg_names[0] << ")";
 32 |     return s.str();
 33 | }
 34 | 
 35 | Dim MatrixTreeTheorem::dim_forward(const std::vector<Dim>& xs) const {
 36 |     return xs[0];
 37 | }
 38 | 
 39 | size_t MatrixTreeTheorem::aux_storage_size() const
 40 | {
 41 |     const auto matrix_size = dim.rows() * dim.cols();
 42 |     // 1. exp weights
 43 |     // 2. laplacian inverse
 44 |     // 3. output1
 45 |     // 4. output2
 46 |     return sizeof(float) * matrix_size * 4;
 47 | }
 48 | 
 49 | template<class MyDevice>
 50 | void MatrixTreeTheorem::forward_dev_impl(
 51 |         const MyDevice& dev,
 52 |         const std::vector<const Tensor*>& xs,
 53 |         Tensor& fx
 54 | ) const {
 55 | #ifdef __CUDACC__
 56 |     DYNET_NO_CUDA_IMPL_ERROR("MatrixTreeTheorem::forward");
 57 | #else
 58 |     // aux mem
 59 |     const Dim matrix_dim({fx.d.cols(), fx.d.rows()});
 60 |     const unsigned matrix_size = fx.d.cols() * fx.d.rows();
 61 | 
 62 |     float* f_aux_mem = (float*) aux_mem;
 63 |     Tensor tensor_exp_weights(matrix_dim, f_aux_mem, fx.device, DeviceMempool::FXS);
 64 |     Tensor tensor_laplacian(matrix_dim, f_aux_mem + matrix_size, fx.device, DeviceMempool::FXS);
 65 |     Tensor tensor_output1(matrix_dim, f_aux_mem + 2*matrix_size, fx.device, DeviceMempool::FXS);
 66 |     Tensor tensor_output2(matrix_dim, f_aux_mem + 3*matrix_size, fx.device, DeviceMempool::FXS);
 67 | 
 68 |     // temp mem
 69 |     AlignedMemoryPool* scratch_allocator = fx.device->pools[(int)DeviceMempool::SCS];
 70 |     Tensor tensor_col_sum(Dim({fx.d.cols()}), nullptr, fx.device, DeviceMempool::FXS);
 71 |     tensor_col_sum.v = static_cast<float*>(scratch_allocator->allocate(tensor_col_sum.d.size() * sizeof(float)));
 72 | 
 73 |     auto weights = mat(*xs[0]);
 74 |     auto exp_weights = mat(tensor_exp_weights);
 75 |     auto col_sum = vec(tensor_col_sum);
 76 |     auto laplacian = mat(tensor_laplacian);
 77 |     auto output1 = mat(tensor_output1);
 78 |     auto output2 = mat(tensor_output2);
 79 |     auto marginals = mat(fx);
 80 | 
 81 |     exp_weights = weights.array().exp();
 82 | 
 83 |     // sum over columns
 84 |     col_sum = exp_weights.colwise().sum();
 85 | 
 86 |     // set fx = laplacian
 87 |     laplacian = -exp_weights;
 88 |     laplacian.diagonal() += col_sum;
 89 | 
 90 |     laplacian.row(0).setZero();
 91 |     laplacian(0, 0) = 1.f; // anythinhg > 0 will work here
 92 | 
 93 |     for (unsigned i = 0 ; i < 3 ; ++i)
 94 |     {
 95 |         for (unsigned j = 0 ; j < 3 ; ++j)
 96 |             std::cerr << laplacian(i, j) << "\t";
 97 |         std::cerr << std::endl;
 98 |     }
 99 |     // inverse
100 |     laplacian = laplacian.inverse();
101 | 
102 |     // on gpu it may be faster to use masked matrix?
103 |     output1.col(0).setZero();
104 |     for (unsigned i = 1 ; i < fx.d.rows() ; ++i)
105 |         output1.col(i) = exp_weights.col(i) * laplacian(i, i);
106 | 
107 |     // array because it a cwise product, not a matrix product
108 |     output2 = exp_weights.array() * laplacian.transpose().array();
109 |     output2.row(0).setZero();
110 | 
111 |     marginals = output1 - output2;
112 | 
113 |     scratch_allocator->free();
114 | #endif
115 | }
116 | 
117 | template<class MyDevice>
118 | void MatrixTreeTheorem::backward_dev_impl(
119 |         const MyDevice &,
120 |         const std::vector<const Tensor*>& xs,
121 |         const Tensor& fx,
122 |         const Tensor& dEdf,
123 |         unsigned,
124 |         Tensor& dEdxi
125 | ) const {
126 | #ifdef __CUDACC__
127 |     DYNET_NO_CUDA_IMPL_ERROR("AlgorithmicDifferentiableEisner::backward");
128 | #else
129 | 
130 |     const Dim matrix_dim({fx.d.cols(), fx.d.rows()});
131 |     const unsigned matrix_size = fx.d.cols() * fx.d.rows();
132 | 
133 |     float* f_aux_mem = (float*) aux_mem;
134 |     Tensor tensor_exp_weights(matrix_dim, f_aux_mem, fx.device, DeviceMempool::FXS);
135 |     Tensor tensor_laplacian(matrix_dim, f_aux_mem + matrix_size, fx.device, DeviceMempool::FXS);
136 |     Tensor tensor_output1(matrix_dim, f_aux_mem + 2*matrix_size, fx.device, DeviceMempool::FXS);
137 |     Tensor tensor_output2(matrix_dim, f_aux_mem + 3*matrix_size, fx.device, DeviceMempool::FXS);
138 | 
139 |     auto weights = mat(*xs[0]);
140 |     auto exp_weights = mat(tensor_exp_weights);
141 |     auto laplacian = mat(tensor_laplacian);
142 |     auto output1 = mat(tensor_output1);
143 |     auto output2 = mat(tensor_output2);
144 |     auto marginals = mat(fx);
145 | 
146 |     auto d_marginals = mat(dEdf);
147 |     auto d_weights = mat(dEdxi);
148 | 
149 | 
150 | #endif
151 | }
152 | 
153 | 
154 | DYNET_NODE_INST_DEV_IMPL(MatrixTreeTheorem)
155 | 
156 | }


--------------------------------------------------------------------------------
/test/test-binary-phrase-algdiff.cpp:
--------------------------------------------------------------------------------
 1 | #define BOOST_TEST_DYN_LINK
 2 | #define BOOST_TEST_MODULE "AlgorithmicDifferentiableEisner"
 3 | 
 4 | #include <boost/test/unit_test.hpp>
 5 | namespace utf = boost::unit_test;
 6 | 
 7 | #include "diffdp/algorithm/binary_phrase.h"
 8 | 
 9 | // using boost test with intolerance fails (too precise),
10 | // so let's just use the same test as in Dynet.
11 | bool check_grad(float g, float g_act)
12 | {
13 |     float f = std::fabs(g - g_act);
14 |     float m = std::max(std::fabs(g), std::fabs(g_act));
15 |     if (f > 0.01 && m > 0.f)
16 |         f /= m;
17 | 
18 |     if (f > 0.01 || std::isnan(f))
19 |         return false;
20 |     else
21 |         return true;
22 | }
23 | 
24 | BOOST_AUTO_TEST_CASE(gradient)
25 |         {
26 |                 const unsigned size = 10;
27 |                 const float sensitivity = 1e-2;
28 | 
29 |                 diffdp::AlgorithmicDifferentiableBinaryPhraseStructure alg_diff(size);
30 | 
31 |                 // check gradient
32 |                 std::vector<float> weights(size * size);
33 |                 for (unsigned output_left = 0 ; output_left < size ; ++output_left)
34 |                 {
35 |                     for (unsigned output_right = output_left + 1 ; output_right < size ; ++output_right)
36 |                     {
37 |                         for (unsigned input_left = 0 ; input_left < size ; ++input_left)
38 |                         {
39 |                             for (unsigned input_right = input_left + 1; input_right < size; ++input_right)
40 |                             {
41 |                                 // compute gradient using the algorithm
42 |                                 alg_diff.forward(
43 |                                         [&] (const unsigned left, const unsigned right) -> float
44 |                                         {
45 |                                             return weights.at(left + right * size);
46 |                                         }
47 |                                 );
48 |                                 alg_diff.backward(
49 |                                         [&] (const unsigned left, const unsigned right) -> float
50 |                                         {
51 |                                             if (left == output_left && right == output_right)
52 |                                                 return 1.f;
53 |                                             else
54 |                                                 return 0.f;
55 |                                         }
56 |                                 );
57 | 
58 |                                 const double computed_gradient = alg_diff.gradient(input_left, input_right);
59 | 
60 |                                 // estimate the gradient
61 |                                 const float sensitivity = 1e-3;
62 |                                 const double original_weights = weights.at(input_left + input_right * size);
63 | 
64 |                                 weights.at(input_left + input_right * size) = original_weights + sensitivity;
65 |                                 alg_diff.forward(
66 |                                         [&] (const unsigned left, const unsigned right) -> float
67 |                                         {
68 |                                             return weights.at(left + right * size);
69 |                                         }
70 |                                 );
71 | 
72 |                                 const double output_a = alg_diff.output(output_left, output_right);
73 | 
74 |                                 weights.at(input_left + input_right * size) = original_weights - sensitivity;
75 |                                 alg_diff.forward(
76 |                                         [&] (const unsigned left, const unsigned right) -> float
77 |                                         {
78 |                                             return weights.at(left + right * size);
79 |                                         }
80 |                                 );
81 |                                 const double output_b = alg_diff.output(output_left, output_right);
82 | 
83 |                                 // restore
84 |                                 weights.at(input_left + input_right * size) = original_weights;
85 | 
86 |                                 const double estimated_gradient = (output_a - output_b) / (2.f * sensitivity);
87 | 
88 |                                 BOOST_CHECK(check_grad(computed_gradient, estimated_gradient));
89 |                             }
90 |                         }
91 |                     }
92 |                 }
93 |         }


--------------------------------------------------------------------------------
/test/test-binary-phrase-ereg.cpp:
--------------------------------------------------------------------------------
  1 | #define BOOST_TEST_DYN_LINK
  2 | #define BOOST_TEST_MODULE "EntropyRegularizedBinaryPhrase"
  3 | 
  4 | #include <cmath>
  5 | #include <algorithm>
  6 | #include <boost/test/unit_test.hpp>
  7 | namespace utf = boost::unit_test;
  8 | 
  9 | #include "diffdp/algorithm/binary_phrase.h"
 10 | 
 11 | // using boost test with intolerance fails (too precise),
 12 | // so let's just use the same test as in Dynet.
 13 | bool check_grad(float g, float g_act)
 14 | {
 15 |     float f = std::fabs(g - g_act);
 16 |     float m = std::max(std::fabs(g), std::fabs(g_act));
 17 |     if (f > 0.01 && m > 0.f)
 18 |         f /= m;
 19 | 
 20 |     if (f > 0.01 || std::isnan(f))
 21 |         return false;
 22 |     else
 23 |         return true;
 24 | }
 25 | 
 26 | BOOST_AUTO_TEST_CASE(first_order_gradient, * utf::tolerance(1e-2f))
 27 | {
 28 |     const unsigned size = 10;
 29 |     const float sensitivity = 1e-2;
 30 | 
 31 |     std::vector<float> weights(size * size);
 32 |     for (unsigned i = 0 ; i < size ; ++i)
 33 |         weights.at(i) = size;
 34 | 
 35 |     diffdp::EntropyRegularizedBinaryPhraseStructure parser(size);
 36 |         parser.forward(
 37 |         [&] (unsigned left, unsigned right) -> float
 38 |         {
 39 |             return weights.at(left + right * size);
 40 |         }
 41 |     );
 42 | 
 43 |     for (unsigned left = 0 ; left < size ; ++left)
 44 |     {
 45 |         for (unsigned right = left + 1; right < size ; ++right)
 46 |         {
 47 |             const float computed_arc = parser.output(left, right);
 48 | 
 49 |             // estimate the gradient
 50 |             const float original_weights = weights.at(left + right * size);
 51 | 
 52 |             weights.at(left + right * size) = original_weights + sensitivity;
 53 |             diffdp::EntropyRegularizedBinaryPhraseStructure parser2(size);
 54 |                 parser2.forward(
 55 |                 [&] (const unsigned left, const unsigned right) -> float
 56 |                 {
 57 |                     return weights.at(left + right * size);
 58 |                 }
 59 |             );
 60 |             const float output_a = parser2.chart_forward->weight(0, size-1);
 61 | 
 62 |             weights.at(left + right * size) = original_weights - sensitivity;
 63 |             parser2.forward(
 64 |             [&] (const unsigned left, const unsigned right) -> float
 65 |                 {
 66 |                     return weights.at(left + right * size);
 67 |                 }
 68 |             );
 69 |             const float output_b = parser2.chart_forward->weight(0, size-1);
 70 | 
 71 |             // restore
 72 |             weights.at(left + right * size) = original_weights;
 73 | 
 74 |             const float estimated_arc = (output_a - output_b) / (2.f * sensitivity);
 75 | 
 76 |             BOOST_CHECK(check_grad(computed_arc, estimated_arc));
 77 |         }
 78 |     }
 79 | }
 80 | 
 81 | 
 82 | BOOST_AUTO_TEST_CASE(second_order_gradient, * utf::tolerance(1e-2f))
 83 | {
 84 |     const unsigned size = 10;
 85 |     const float sensitivity = 1e-5;
 86 | 
 87 |     std::vector<float> weights(size * size);
 88 |     for (unsigned i = 0 ; i < size ; ++i)
 89 |     weights.at(i) = size;
 90 | 
 91 |     diffdp::EntropyRegularizedBinaryPhraseStructure parser(size);
 92 |     parser.forward(
 93 |         [&] (unsigned left, unsigned right) -> float
 94 |         {
 95 |             return weights.at(left + right * size);
 96 |         }
 97 |     );
 98 | 
 99 |     for (unsigned input_left = 0 ; input_left < size ; ++input_left)
100 |     {
101 |         for (unsigned input_right = input_left + 1; input_right < size ; ++input_right)
102 |         {
103 |             for (unsigned output_left = 0 ; output_left < size ; ++output_left)
104 |             {
105 |                 for (unsigned output_right = output_left + 1; output_right < size; ++output_right)
106 |                 {
107 |                     parser.backward(
108 |                         [&](const unsigned left, const unsigned right)
109 |                         {
110 |                             if (left == output_left && right == output_right)
111 |                                 return 1.f;
112 |                             else
113 |                                 return 0.f;
114 |                         }
115 |                     );
116 |                     const float computed_gradient = parser.gradient(input_left, input_right);
117 | 
118 |                     // estimate the gradient
119 |                     const float sensitivity = 1e-3;
120 |                     const float original_weights = weights.at(input_left + input_right * size);
121 | 
122 |                     weights.at(input_left + input_right * size) = original_weights + sensitivity;
123 |                         diffdp::EntropyRegularizedBinaryPhraseStructure parser2(size);
124 |                         parser2.forward(
125 |                         [&](const unsigned left, const unsigned right) -> float
126 |                         {
127 |                             return weights.at(left + right * size);
128 |                         }
129 |                     );
130 |                     const float output_a = parser2.output(output_left, output_right);
131 | 
132 |                     weights.at(input_left + input_right * size) = original_weights - sensitivity;
133 |                         parser2.forward(
134 |                         [&](const unsigned left, const unsigned right) -> float
135 |                         {
136 |                             return weights.at(left + right * size);
137 |                         }
138 |                     );
139 |                     const float output_b = parser2.output(output_left, output_right);
140 | 
141 |                     // restore
142 |                     weights.at(input_left + input_right * size) = original_weights;
143 | 
144 |                     const double estimated_gradient = (output_a - output_b) / (2.f * sensitivity);
145 |                     BOOST_CHECK(check_grad(computed_gradient, estimated_gradient));
146 |                 }
147 |             }
148 |         }
149 |     }
150 | }
151 | 


--------------------------------------------------------------------------------
/test/test-dynet-eisner.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * TODO split in two different test cases
 3 |  */
 4 | #define BOOST_TEST_DYN_LINK
 5 | #define BOOST_TEST_MODULE "EntropyRegularizedEisner"
 6 | 
 7 | #include <boost/test/unit_test.hpp>
 8 | namespace utf = boost::unit_test;
 9 | 
10 | #include <vector>
11 | 
12 | #include "dynet/expr.h"
13 | #include "dynet/param-init.h"
14 | #include "dynet/grad-check.h"
15 | 
16 | #include "diffdp/dynet/eisner.h"
17 | 
18 | BOOST_AUTO_TEST_CASE(test_dynet_eisner_algdiff)
19 | {
20 |     const unsigned size = 10u;
21 | 
22 |     int argc = 1;
23 |     char **argv;
24 |     dynet::initialize(argc, argv);
25 | 
26 |     dynet::ParameterCollection pc;
27 | 
28 |     std::vector<float> weights(size * size);
29 |     for (unsigned i = 0 ; i < weights.size() ; ++i)
30 |         weights.at(i) = (float) i;
31 |     auto p_weights = pc.add_parameters(dynet::Dim({size, size}), dynet::ParameterInitFromVector(weights));
32 | 
33 |     dynet::ComputationGraph cg;
34 |     cg.set_immediate_compute(true);
35 |     cg.set_check_validity(true);
36 | 
37 |     auto e_weights = dynet::parameter(cg, p_weights);
38 | 
39 |     {
40 |         auto e_arcs = dynet::algorithmic_differentiable_eisner(
41 |                 e_weights,
42 |                 diffdp::DiscreteMode::ForwardRegularized,
43 |                 diffdp::DependencyGraphMode::Adjacency,
44 |                 diffdp::DependencyGraphMode::Adjacency
45 |         );
46 | 
47 |         for (unsigned head = 0u; head < size; ++head)
48 |         {
49 |             for (unsigned mod = 0u; mod < size; ++mod)
50 |             {
51 |                 auto e_output = dynet::strided_select(
52 |                         e_arcs,
53 |                         {(int) 1u, (int) 1u},
54 |                         {(int) head, (int) mod},
55 |                         {(int) head + 1, (int) mod + 1} // not included
56 |                 );
57 | 
58 |                 BOOST_CHECK(check_grad(pc, e_output, 0));
59 |             }
60 |         }
61 |     }
62 |     {
63 |         auto e_arcs = dynet::entropy_regularized_eisner(
64 |                 e_weights,
65 |                 diffdp::DiscreteMode::ForwardRegularized,
66 |                 diffdp::DependencyGraphMode::Adjacency,
67 |                 diffdp::DependencyGraphMode::Adjacency
68 |         );
69 | 
70 |         for (unsigned head = 0u; head < size; ++head)
71 |         {
72 |             for (unsigned mod = 0u; mod < size; ++mod)
73 |             {
74 |                 auto e_output = dynet::strided_select(
75 |                         e_arcs,
76 |                         {(int) 1u, (int) 1u},
77 |                         {(int) head, (int) mod},
78 |                         {(int) head + 1, (int) mod + 1} // not included
79 |                 );
80 |                 BOOST_CHECK(check_grad(pc, e_output, 0));
81 |             }
82 |         }
83 |     }
84 | }


--------------------------------------------------------------------------------
/test/test-dynet-phrase.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * TODO split in two different test cases
 3 |  */
 4 | #define BOOST_TEST_DYN_LINK
 5 | #define BOOST_TEST_MODULE "DynetPhrase"
 6 | 
 7 | #include <boost/test/unit_test.hpp>
 8 | namespace utf = boost::unit_test;
 9 | 
10 | #include <vector>
11 | 
12 | #include "dynet/expr.h"
13 | #include "dynet/param-init.h"
14 | #include "dynet/grad-check.h"
15 | 
16 | #include "diffdp/dynet/binary_phrase.h"
17 | 
18 | BOOST_AUTO_TEST_CASE(test_dynet_phrase)
19 |         {
20 |                 const unsigned size = 10u;
21 | 
22 |                 int argc = 1;
23 |                 char **argv;
24 |                 dynet::initialize(argc, argv);
25 | 
26 |                 dynet::ParameterCollection pc;
27 | 
28 |                 std::vector<float> weights(size * size);
29 |                 //for (unsigned i = 0 ; i < weights.size() ; ++i)
30 |                 //    weights.at(i) = (float) i;
31 |                 auto p_weights = pc.add_parameters(dynet::Dim({size, size}), dynet::ParameterInitFromVector(weights));
32 | 
33 |                 {
34 |                     for (unsigned head = 0u; head < size; ++head)
35 |                     {
36 |                         for (unsigned mod = 0u; mod < size; ++mod)
37 |                         {
38 |                             dynet::ComputationGraph cg;
39 |                             auto e_weights = dynet::parameter(cg, p_weights);
40 |                             auto e_arcs = dynet::algorithmic_differentiable_binary_phrase_structure(
41 |                                     e_weights,
42 |                                     diffdp::DiscreteMode::ForwardRegularized
43 |                             );
44 |                             auto e_output = dynet::strided_select(
45 |                                     e_arcs,
46 |                                     {(int) 1u, (int) 1u},
47 |                                     {(int) head, (int) mod},
48 |                                     {(int) head + 1, (int) mod + 1} // not included
49 |                             );
50 | 
51 |                             BOOST_CHECK(check_grad(pc, e_output, 0));
52 |                         }
53 |                     }
54 |                 }
55 |                 {
56 | 
57 |                     for (unsigned head = 0u; head < size; ++head)
58 |                     {
59 |                         for (unsigned mod = 0u; mod < size; ++mod)
60 |                         {
61 |                             dynet::ComputationGraph cg;
62 |                             auto e_weights = dynet::parameter(cg, p_weights);
63 | 
64 |                             auto e_arcs = dynet::entropy_regularized_binary_phrase_structure(
65 |                                     e_weights,
66 |                                     diffdp::DiscreteMode::ForwardRegularized
67 |                             );
68 |                             auto e_output = dynet::strided_select(
69 |                                     e_arcs,
70 |                                     {(int) 1u, (int) 1u},
71 |                                     {(int) head, (int) mod},
72 |                                     {(int) head + 1, (int) mod + 1} // not included
73 |                             );
74 | 
75 |                             BOOST_CHECK(check_grad(pc, e_output, 0));
76 |                         }
77 |                     }
78 |                 }
79 |         }


--------------------------------------------------------------------------------
/test/test-eisner-algdiff.cpp:
--------------------------------------------------------------------------------
  1 | #define BOOST_TEST_DYN_LINK
  2 | #define BOOST_TEST_MODULE "EntropyRegularizedEisner"
  3 | 
  4 | #include <boost/test/unit_test.hpp>
  5 | namespace utf = boost::unit_test;
  6 | 
  7 | #include "diffdp/algorithm/eisner.h"
  8 | 
  9 | // using boost test with intolerance fails (too precise),
 10 | // so let's just use the same test as in Dynet.
 11 | bool check_grad(float g, float g_act)
 12 | {
 13 |     float f = std::fabs(g - g_act);
 14 |     float m = std::max(std::fabs(g), std::fabs(g_act));
 15 |     if (f > 0.01 && m > 0.f)
 16 |         f /= m;
 17 | 
 18 |     if (f > 0.01 || std::isnan(f))
 19 |         return false;
 20 |     else
 21 |         return true;
 22 | }
 23 | 
 24 | BOOST_AUTO_TEST_CASE(gradient)
 25 | {
 26 |     const unsigned size = 10;
 27 |     const float sensitivity = 1e-2;
 28 | 
 29 |     diffdp::AlgorithmicDifferentiableEisner alg_diff_eisner(size);
 30 | 
 31 |     // check gradient
 32 |     std::vector<float> weights(size * size);
 33 |     for (unsigned output_head = 0 ; output_head < size ; ++output_head)
 34 |     {
 35 |         for (unsigned output_mod = 1 ; output_mod < size ; ++output_mod)
 36 |         {
 37 |             if (output_head == output_mod)
 38 |             continue;
 39 | 
 40 |             for (unsigned input_head = 0 ; input_head < size ; ++input_head)
 41 |             {
 42 |                 for (unsigned input_mod = 1; input_mod < size; ++input_mod)
 43 |                 {
 44 |                     if (input_head == input_mod)
 45 |                         continue;
 46 | 
 47 |                     // compute gradient using the algorithm
 48 |                     alg_diff_eisner.forward(
 49 |                         [&] (const unsigned head, const unsigned mod) -> float
 50 |                         {
 51 |                             return weights.at(head + mod * size);
 52 |                         }
 53 |                         );
 54 |                     alg_diff_eisner.backward(
 55 |                         [&] (const unsigned head, const unsigned mod) -> float
 56 |                         {
 57 |                             if (head == output_head && mod == output_mod)
 58 |                                 return 1.f;
 59 |                             else
 60 |                                 return 0.f;
 61 |                         }
 62 |                     );
 63 | 
 64 |                     const double computed_gradient = alg_diff_eisner.gradient(input_head, input_mod);
 65 | 
 66 |                     // estimate the gradient
 67 |                     const float sensitivity = 1e-3;
 68 |                     const double original_weights = weights.at(input_head + input_mod * size);
 69 | 
 70 |                     weights.at(input_head + input_mod * size) = original_weights + sensitivity;
 71 |                     alg_diff_eisner.forward(
 72 |                         [&] (const unsigned head, const unsigned mod) -> float
 73 |                         {
 74 |                             return weights.at(head + mod * size);
 75 |                         }
 76 |                         );
 77 | 
 78 |                     const double output_a = alg_diff_eisner.output(output_head, output_mod);
 79 | 
 80 |                     weights.at(input_head + input_mod * size) = original_weights - sensitivity;
 81 |                         alg_diff_eisner.forward(
 82 |                         [&] (const unsigned head, const unsigned mod) -> float
 83 |                         {
 84 |                             return weights.at(head + mod * size);
 85 |                         }
 86 |                     );
 87 |                     const double output_b = alg_diff_eisner.output(output_head, output_mod);
 88 | 
 89 |                     // restore
 90 |                     weights.at(input_head + input_mod * size) = original_weights;
 91 | 
 92 |                     const double estimated_gradient = (output_a - output_b) / (2.f * sensitivity);
 93 | 
 94 |                     BOOST_CHECK(check_grad(computed_gradient, estimated_gradient));
 95 | 
 96 |                 }
 97 |             }
 98 |         }
 99 |     }
100 | }


--------------------------------------------------------------------------------
/test/test-eisner-ereg.cpp:
--------------------------------------------------------------------------------
  1 | #define BOOST_TEST_DYN_LINK
  2 | #define BOOST_TEST_MODULE "EntropyRegularizedEisner"
  3 | 
  4 | #include <boost/test/unit_test.hpp>
  5 | namespace utf = boost::unit_test;
  6 | 
  7 | #include "diffdp/algorithm/eisner.h"
  8 | 
  9 | // using boost test with intolerance fails (too precise),
 10 | // so let's just use the same test as in Dynet.
 11 | bool check_grad(float g, float g_act)
 12 | {
 13 |     float f = std::fabs(g - g_act);
 14 |     float m = std::max(std::fabs(g), std::fabs(g_act));
 15 |     if (f > 0.01 && m > 0.f)
 16 |         f /= m;
 17 | 
 18 |     if (f > 0.01 || std::isnan(f))
 19 |         return false;
 20 |     else
 21 |         return true;
 22 | }
 23 | 
 24 | BOOST_AUTO_TEST_CASE(first_order_gradient, * utf::tolerance(1e-2f))
 25 | {
 26 |         const unsigned size = 10;
 27 |         const float sensitivity = 1e-2;
 28 | 
 29 |         std::vector<float> weights(size * size);
 30 |         for (unsigned i = 0 ; i < size ; ++i)
 31 |             weights.at(i) = size;
 32 | 
 33 |         diffdp::EntropyRegularizedEisner parser(size);
 34 |         parser.forward(
 35 |             [&] (unsigned head, unsigned mod) -> float
 36 |                     {
 37 |                         return weights.at(head + mod * size);
 38 |                     }
 39 |             );
 40 | 
 41 |         for (unsigned head = 0 ; head < size ; ++head)
 42 |         {
 43 |             for (unsigned mod = 1; mod < size ; ++mod)
 44 |             {
 45 |                 if (head == mod)
 46 |                     continue;
 47 | 
 48 |                 const float computed_arc = parser.output(head, mod);
 49 | 
 50 |                 // estimate the gradient
 51 |                 const float original_weights = weights.at(head + mod * size);
 52 | 
 53 |                 weights.at(head + mod * size) = original_weights + sensitivity;
 54 |                 diffdp::EntropyRegularizedEisner parser2(size);
 55 |                 parser2.forward(
 56 |                         [&] (const unsigned head, const unsigned mod) -> float
 57 |                         {
 58 |                             return weights.at(head + mod * size);
 59 |                         }
 60 |                 );
 61 |                 const float output_a = parser2.chart_forward->c_cright(0, size-1);
 62 | 
 63 |                 weights.at(head + mod * size) = original_weights - sensitivity;
 64 |                 parser2.forward(
 65 |                         [&] (const unsigned head, const unsigned mod) -> float
 66 |                         {
 67 |                             return weights.at(head + mod * size);
 68 |                         }
 69 |                 );
 70 |                 const float output_b = parser2.chart_forward->c_cright(0, size-1);
 71 | 
 72 |                 // restore
 73 |                 weights.at(head + mod * size) = original_weights;
 74 | 
 75 |                 const float estimated_arc = (output_a - output_b) / (2.f * sensitivity);
 76 | 
 77 |                 BOOST_CHECK(check_grad(computed_arc, estimated_arc));
 78 |             }
 79 |         }
 80 | }
 81 | 
 82 | BOOST_AUTO_TEST_CASE(second_order_gradient, * utf::tolerance(1e-2f))
 83 | {
 84 |     const unsigned size = 10;
 85 |     const float sensitivity = 1e-5;
 86 | 
 87 |     std::vector<float> weights(size * size);
 88 |     for (unsigned i = 0 ; i < size ; ++i)
 89 |     weights.at(i) = size;
 90 | 
 91 |     diffdp::EntropyRegularizedEisner parser(size);
 92 |         parser.forward(
 93 |         [&] (unsigned head, unsigned mod) -> float
 94 |         {
 95 |             return weights.at(head + mod * size);
 96 |         }
 97 |     );
 98 | 
 99 |     for (unsigned input_head = 0 ; input_head < size ; ++input_head)
100 |     {
101 |         for (unsigned input_mod = 1; input_mod < size ; ++input_mod)
102 |         {
103 |             if (input_head == input_mod)
104 |                 continue;
105 | 
106 |             for (unsigned output_head = 0 ; output_head < size ; ++output_head)
107 |             {
108 |                 for (unsigned output_mod = 1; output_mod < size; ++output_mod)
109 |                 {
110 |                     if (output_head == output_mod)
111 |                         continue;
112 | 
113 |                     parser.backward(
114 |                             [&](const unsigned head, const unsigned mod)
115 |                             {
116 |                                 if (head == output_head && mod == output_mod)
117 |                                     return 1.f;
118 |                                 else
119 |                                     return 0.f;
120 |                             }
121 |                     );
122 |                     const float computed_gradient = parser.gradient(input_head, input_mod);
123 | 
124 |                     // estimate the gradient
125 |                     const float sensitivity = 1e-3;
126 |                     const float original_weights = weights.at(input_head + input_mod * size);
127 | 
128 |                     weights.at(input_head + input_mod * size) = original_weights + sensitivity;
129 |                     diffdp::EntropyRegularizedEisner parser2(size);
130 |                     parser2.forward(
131 |                             [&](const unsigned head, const unsigned mod) -> float
132 |                             {
133 |                                 return weights.at(head + mod * size);
134 |                             }
135 |                     );
136 |                     const float output_a = parser2.output(output_head, output_mod);
137 | 
138 |                     weights.at(input_head + input_mod * size) = original_weights - sensitivity;
139 |                     parser2.forward(
140 |                             [&](const unsigned head, const unsigned mod) -> float
141 |                             {
142 |                                 return weights.at(head + mod * size);
143 |                             }
144 |                     );
145 |                     const float output_b = parser2.output(output_head, output_mod);
146 | 
147 |                     // restore
148 |                     weights.at(input_head + input_mod * size) = original_weights;
149 | 
150 |                     const double estimated_gradient = (output_a - output_b) / (2.f * sensitivity);
151 | 
152 |                     BOOST_CHECK(check_grad(computed_gradient, estimated_gradient));
153 |                 }
154 |             }
155 |         }
156 |     }
157 | }


--------------------------------------------------------------------------------
/test/test-eisner-ereg.dSYM/Contents/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | 	<dict>
 5 | 		<key>CFBundleDevelopmentRegion</key>
 6 | 		<string>English</string>
 7 | 		<key>CFBundleIdentifier</key>
 8 | 		<string>com.apple.xcode.dsym.test-eisner-ereg</string>
 9 | 		<key>CFBundleInfoDictionaryVersion</key>
10 | 		<string>6.0</string>
11 | 		<key>CFBundlePackageType</key>
12 | 		<string>dSYM</string>
13 | 		<key>CFBundleSignature</key>
14 | 		<string>????</string>
15 | 		<key>CFBundleShortVersionString</key>
16 | 		<string>1.0</string>
17 | 		<key>CFBundleVersion</key>
18 | 		<string>1</string>
19 | 	</dict>
20 | </plist>
21 | 


--------------------------------------------------------------------------------
/test/test-eisner-ereg.dSYM/Contents/Resources/DWARF/test-eisner-ereg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoC/diffdp/58ae35b171ddd54b778790bc64838890c0f8956f/test/test-eisner-ereg.dSYM/Contents/Resources/DWARF/test-eisner-ereg


--------------------------------------------------------------------------------
/test/test-math.cpp:
--------------------------------------------------------------------------------
 1 | #define BOOST_TEST_DYN_LINK
 2 | #define BOOST_TEST_MODULE "EntropyRegularizedEisner"
 3 | 
 4 | #include <boost/test/unit_test.hpp>
 5 | namespace utf = boost::unit_test;
 6 | 
 7 | #include <vector>
 8 | 
 9 | #include "diffdp/algorithm/eisner.h"
10 | #include "diffdp/math.h"
11 | #include "dynet/expr.h"
12 | 
13 | // using boost test with intolerance fails (too precise),
14 | // so let's just use the same test as in Dynet.
15 | bool check_grad(float g, float g_act)
16 | {
17 |     float f = std::fabs(g - g_act);
18 |     float m = std::max(std::fabs(g), std::fabs(g_act));
19 |     if (f > 0.01 && m > 0.f)
20 |         f /= m;
21 | 
22 |     if (f > 0.01 || std::isnan(f))
23 |         return false;
24 |     else
25 |         return true;
26 | }
27 | 
28 | BOOST_AUTO_TEST_CASE(test_softmax)
29 | {
30 |     int argc = 1;
31 |     char **argv;
32 |     dynet::initialize(argc, argv);
33 | 
34 |     std::vector<float> input(10);
35 |     std::vector<float> output(10);
36 |     std::vector<float> input_grad(10);
37 |     std::vector<float> output_grad(10);
38 |     for (unsigned i = 0 ; i < input.size() ; ++i)
39 |     input.at(i) = i;
40 | 
41 |     {
42 |         diffdp::softmax(output.begin(), input.begin(), input.size());
43 | 
44 |         dynet::ComputationGraph cg;
45 |         auto e_output = dynet::softmax(dynet::input(cg, {10}, input));
46 |         auto dynet_output = as_vector(cg.forward(e_output));
47 | 
48 |         for (unsigned i = 0 ; i < 10 ; ++i)
49 |             BOOST_CHECK(check_grad(output.at(i), dynet_output.at(i)));
50 |     }
51 | 
52 |     for (unsigned input_id = 0 ; input_id < input.size() ; ++input_id)
53 |     {
54 |         for (unsigned output_id = 0 ; output_id < input.size() ; ++output_id)
55 |         {
56 | 
57 |             // compute gradient
58 | 
59 |             std::fill(input_grad.begin(), input_grad.end(), 0.f);
60 |             std::fill(output_grad.begin(), output_grad.end(), 0.f);
61 |             //std::fill(output.begin(), output.end(), 0.f);
62 | 
63 |             diffdp::softmax(output.begin(), input.begin(), input.size());
64 |             output_grad.at(output_id) = 1.f;
65 |             diffdp::backprop_softmax(
66 |                     input_grad.begin(), output_grad.begin(),
67 |                     input.begin(), output.begin(),
68 |                     input.size()
69 |             );
70 |             const float computed_gradient = input_grad.at(input_id);
71 | 
72 |             // dynet gradient
73 |             dynet::ComputationGraph cg;
74 | 
75 |             auto e_input = dynet::input(cg, {10}, input);
76 |             auto e_softmax = dynet::softmax(e_input);
77 |             auto e_output = dynet::pick(e_softmax, output_id);
78 |             cg.forward(e_output);
79 |             cg.backward(e_output, true);
80 | 
81 |             auto dynet_gradient_all = as_vector(e_input.gradient());
82 |             float dynet_gradient = dynet_gradient_all.at(input_id);
83 | 
84 |             BOOST_CHECK(check_grad(computed_gradient, dynet_gradient));
85 |         }
86 | 
87 |     }
88 | }


--------------------------------------------------------------------------------