├── .gitignore
├── .gitmodules
├── README.md
├── amr_aligner
    ├── amr
    │   ├── __init__.py
    │   └── aligned.py
    ├── eager_actions_evaluator.py
    ├── eager_oracle.py
    ├── refresh_alignments.py
    ├── resources
    │   ├── morphosemantic_links
    │   │   ├── README.md
    │   │   ├── extract_morphosemantic.py
    │   │   └── morphosemantic-links.dic.bz2
    │   └── word2vec
    │   │   └── README.md
    ├── rule_base_align.py
    ├── rule_based_aligner
    │   ├── __init__.py
    │   ├── aligned_results.py
    │   ├── match_result.py
    │   ├── matcher.py
    │   ├── morphosemantic-links.dic
    │   ├── stemmer.py
    │   └── updater.py
    ├── smatch
    │   ├── README.md
    │   ├── __init__.py
    │   ├── _gain.cc
    │   ├── _gain.h
    │   ├── _smatch.cpp
    │   ├── _smatch.pyx
    │   ├── amr.py
    │   ├── api.py
    │   ├── fast_smatch.py
    │   ├── setup.py
    │   ├── smatch-table.py
    │   └── smatch.py
    └── system
    │   ├── __init__.py
    │   ├── eager
    │       ├── __init__.py
    │       ├── oracle.py
    │       └── state.py
    │   ├── edge.py
    │   ├── misc.py
    │   └── node.py
├── amr_parser
    ├── CMakeLists.txt
    ├── cmake
    │   └── FindEigen3.cmake
    ├── scripts
    │   └── eval_eager.sh
    └── src
    │   ├── CMakeLists.txt
    │   ├── corpus.cc
    │   ├── corpus.h
    │   ├── ds.cc
    │   ├── ds.h
    │   ├── left_to_right
    │       ├── CMakeLists.txt
    │       ├── decode
    │       │   ├── CMakeLists.txt
    │       │   ├── testing.cc
    │       │   └── testing.h
    │       ├── ensemble.cc
    │       ├── evaluate
    │       │   ├── CMakeLists.txt
    │       │   ├── evaluate.cc
    │       │   └── evaluate.h
    │       ├── main.cc
    │       ├── parser
    │       │   ├── CMakeLists.txt
    │       │   ├── parser.cc
    │       │   ├── parser.h
    │       │   ├── parser_builder.cc
    │       │   ├── parser_builder.h
    │       │   ├── parser_eager.cc
    │       │   ├── parser_eager.h
    │       │   ├── parser_swap.cc
    │       │   └── parser_swap.h
    │       ├── system
    │       │   ├── CMakeLists.txt
    │       │   ├── eager.cc
    │       │   ├── eager.h
    │       │   ├── state.cc
    │       │   ├── state.h
    │       │   ├── swap.cc
    │       │   ├── swap.h
    │       │   ├── system.cc
    │       │   └── system.h
    │       └── train
    │       │   ├── CMakeLists.txt
    │       │   ├── algorithm.h
    │       │   ├── train.cc
    │       │   ├── train.h
    │       │   ├── train_supervised.cc
    │       │   └── train_supervised.h
    │   ├── logging.cc
    │   ├── logging.h
    │   ├── lstm.cc
    │   ├── lstm.h
    │   ├── math_utils.cc
    │   ├── math_utils.h
    │   ├── sys_utils.cc
    │   ├── sys_utils.h
    │   ├── trainer_utils.cc
    │   └── trainer_utils.h
├── awesome.md
├── pipeline.sh
└── release
    ├── ldc2014t12
        ├── README.md
        ├── amr-release-1.0-training_fix.patch
        ├── amr-release-1.0-training_fix.txt.cdec_tok.tamr_alignment.bz2
        └── amr-release-1.0-training_fix.txt.sd_tok.tamr_alignment.bz2
    └── ldc2017t10
        ├── README.md
        ├── amr-release-2.0-amrs-training-bolt.txt.cdec_tok.tamr_alignment.bz2
        ├── amr-release-2.0-amrs-training-cctv.txt.no_wiki.cdec_tok.tamr_alignment.bz2
        ├── amr-release-2.0-amrs-training-dfa_fix.patch
        ├── amr-release-2.0-amrs-training-dfa_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2
        ├── amr-release-2.0-amrs-training-dfb_fix.patch
        ├── amr-release-2.0-amrs-training-dfb_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2
        ├── amr-release-2.0-amrs-training-guidelines_fix.patch
        ├── amr-release-2.0-amrs-training-guidelines_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2
        ├── amr-release-2.0-amrs-training-mt09sdl_fix.patch
        ├── amr-release-2.0-amrs-training-mt09sdl_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2
        ├── amr-release-2.0-amrs-training-proxy_fix.patch
        ├── amr-release-2.0-amrs-training-proxy_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2
        ├── amr-release-2.0-amrs-training-wb_fix.patch
        ├── amr-release-2.0-amrs-training-wb_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2
        └── amr-release-2.0-amrs-training-xinhua.txt.no_wiki.cdec_tok.tamr_alignment.bz2


/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | .static_storage/
 58 | .media/
 59 | local_settings.py
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # Environments
 87 | .env
 88 | .venv
 89 | env/
 90 | venv/
 91 | ENV/
 92 | env.bak/
 93 | venv.bak/
 94 | 
 95 | # Spyder project settings
 96 | .spyderproject
 97 | .spyproject
 98 | 
 99 | # Rope project settings
100 | .ropeproject
101 | 
102 | # mkdocs documentation
103 | /site
104 | 
105 | # mypy
106 | .mypy_cache/
107 | ## Core latex/pdflatex auxiliary files:
108 | *.aux
109 | *.lof
110 | *.log
111 | *.lot
112 | *.fls
113 | *.out
114 | *.toc
115 | *.fmt
116 | *.fot
117 | *.cb
118 | *.cb2
119 | .*.lb
120 | 
121 | ## Intermediate documents:
122 | *.dvi
123 | *.xdv
124 | *-converted-to.*
125 | # these rules might exclude image files for figures etc.
126 | # *.ps
127 | # *.eps
128 | # *.pdf
129 | 
130 | ## Generated if empty string is given at "Please type another file name for output:"
131 | .pdf
132 | 
133 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
134 | *.bbl
135 | *.bcf
136 | *.blg
137 | *-blx.aux
138 | *-blx.bib
139 | *.run.xml
140 | 
141 | ## Build tool auxiliary files:
142 | *.fdb_latexmk
143 | *.synctex
144 | *.synctex(busy)
145 | *.synctex.gz
146 | *.synctex.gz(busy)
147 | *.pdfsync
148 | 
149 | ## Auxiliary and intermediate files from other packages:
150 | # algorithms
151 | *.alg
152 | *.loa
153 | 
154 | # achemso
155 | acs-*.bib
156 | 
157 | # amsthm
158 | *.thm
159 | 
160 | # beamer
161 | *.nav
162 | *.pre
163 | *.snm
164 | *.vrb
165 | 
166 | # changes
167 | *.soc
168 | 
169 | # cprotect
170 | *.cpt
171 | 
172 | # elsarticle (documentclass of Elsevier journals)
173 | *.spl
174 | 
175 | # endnotes
176 | *.ent
177 | 
178 | # fixme
179 | *.lox
180 | 
181 | # feynmf/feynmp
182 | *.mf
183 | *.mp
184 | *.t[1-9]
185 | *.t[1-9][0-9]
186 | *.tfm
187 | 
188 | #(r)(e)ledmac/(r)(e)ledpar
189 | *.end
190 | *.?end
191 | *.[1-9]
192 | *.[1-9][0-9]
193 | *.[1-9][0-9][0-9]
194 | *.[1-9]R
195 | *.[1-9][0-9]R
196 | *.[1-9][0-9][0-9]R
197 | *.eledsec[1-9]
198 | *.eledsec[1-9]R
199 | *.eledsec[1-9][0-9]
200 | *.eledsec[1-9][0-9]R
201 | *.eledsec[1-9][0-9][0-9]
202 | *.eledsec[1-9][0-9][0-9]R
203 | 
204 | # glossaries
205 | *.acn
206 | *.acr
207 | *.glg
208 | *.glo
209 | *.gls
210 | *.glsdefs
211 | 
212 | # gnuplottex
213 | *-gnuplottex-*
214 | 
215 | # gregoriotex
216 | *.gaux
217 | *.gtex
218 | 
219 | # hyperref
220 | *.brf
221 | 
222 | # knitr
223 | *-concordance.tex
224 | # TODO Comment the next line if you want to keep your tikz graphics files
225 | *.tikz
226 | *-tikzDictionary
227 | 
228 | # listings
229 | *.lol
230 | 
231 | # makeidx
232 | *.idx
233 | *.ilg
234 | *.ind
235 | *.ist
236 | 
237 | # minitoc
238 | *.maf
239 | *.mlf
240 | *.mlt
241 | *.mtc[0-9]*
242 | *.slf[0-9]*
243 | *.slt[0-9]*
244 | *.stc[0-9]*
245 | 
246 | # minted
247 | _minted*
248 | *.pyg
249 | 
250 | # morewrites
251 | *.mw
252 | 
253 | # nomencl
254 | *.nlo
255 | 
256 | # pax
257 | *.pax
258 | 
259 | # pdfpcnotes
260 | *.pdfpc
261 | 
262 | # sagetex
263 | *.sagetex.sage
264 | *.sagetex.py
265 | *.sagetex.scmd
266 | 
267 | # scrwfile
268 | *.wrt
269 | 
270 | # sympy
271 | *.sout
272 | *.sympy
273 | sympy-plots-for-*.tex/
274 | 
275 | # pdfcomment
276 | *.upa
277 | *.upb
278 | 
279 | # pythontex
280 | *.pytxcode
281 | pythontex-files-*/
282 | 
283 | # thmtools
284 | *.loe
285 | 
286 | # TikZ & PGF
287 | *.dpth
288 | *.md5
289 | *.auxlock
290 | 
291 | # todonotes
292 | *.tdo
293 | 
294 | # easy-todo
295 | *.lod
296 | 
297 | # xindy
298 | *.xdy
299 | 
300 | # xypic precompiled matrices
301 | *.xyc
302 | 
303 | # endfloat
304 | *.ttt
305 | *.fff
306 | 
307 | # Latexian
308 | TSWLatexianTemp*
309 | 
310 | ## Editors:
311 | # WinEdt
312 | *.bak
313 | *.sav
314 | 
315 | # Texpad
316 | .texpadtmp
317 | 
318 | # Kile
319 | *.backup
320 | 
321 | # KBibTeX
322 | *~[0-9]*
323 | 
324 | # auto folder when using emacs and auctex
325 | ./auto/*
326 | *.el
327 | 
328 | # expex forward references with \gathertags
329 | *-tags.tex
330 | 
331 | # standalone packages
332 | *.sta
333 | 
334 | data/
335 | align_results/
336 | reports/
337 | analysis/
338 | output/
339 | parser_l2r*
340 | *.pdf
341 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "amr_parser/dynet"]
2 | 	path = amr_parser/dynet
3 | 	url = https://github.com/clab/dynet.git
4 | [submodule "amr_parser/dynet_layer"]
5 | 	path = amr_parser/dynet_layer
6 | 	url = https://github.com/Oneplus/dynet_layer.git
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | tamr
  2 | ====
  3 | 
  4 | A transition-based AMR parser along with an aligner tuned by the parser.
  5 | Used in our EMNLP 2018 paper [An AMR Aligner Tuned by Transition-based Parser](https://arxiv.org/pdf/1810.03541.pdf).
  6 | 
  7 | 
  8 | ## Notion
  9 | 
 10 | In the following sections, we will use the following notions:
 11 | 
 12 | - `${TAMR_HOME}`: the root directory of the project
 13 | - `${TAMR_ALIGNER}`: the directory of the AMR aligner, which equals
 14 |  to `${TAMR_HOME}/amr_aligner`
 15 | - `${TAMR_PARSER}`: the directory of the transition-based aligner, which equals
 16 |  to `${TAMR_HOME}/amr_parser`
 17 |  
 18 | ## Aligner
 19 | 
 20 | The code for AMR aligner is under `${TAMR_ALIGNER}`.
 21 | 
 22 | ### Pre-requisites
 23 | 
 24 | - python2.7
 25 | - JAMR
 26 | - nltk
 27 | - gensim
 28 | - penman
 29 | - Cython (optional, for fast_smatch.py)
 30 | 
 31 | ### Prepare resource
 32 | We use `word2vec` for semantic matching. See the [README.md](https://github.com/Oneplus/tamr/tree/master/amr_aligner/resources/word2vec)
 33 | for more information about filtering wordvec.
 34 | 
 35 | ### Prepare data
 36 | Our alignment is built on the JAMR alignment results.
 37 | You can get the input data with the following commends:
 38 | ```
 39 | pushd "$JAMR_HOME" > /dev/null
 40 | . scripts/config.sh
 41 | scripts/ALIGN.sh < /path/to/your/input/data > /path/to/your/baseline/data
 42 | ```
 43 | 
 44 | ### Run the Aligner
 45 | Go into `${TAMR_ALIGNER}` and run the following commands:
 46 | 
 47 | ```
 48 | python rule_base_align.py \
 49 |     -verbose \
 50 |     -data \
 51 |     /path/to/your/baseline/data \
 52 |     -output \
 53 |     /path/to/your/alignment/data \
 54 |     -wordvec \
 55 |     /path/to/your/wordvec/data \
 56 |     -trials \
 57 |     10000 \
 58 |     -improve_perfect \
 59 |     -morpho_match \
 60 |     -semantic_match
 61 | ```
 62 | 
 63 | The quality of an alignment is evaluated by the smatch
 64 | score of the graph
 65 | it leads to. Here using `-improve_perfect` will
 66 | update the alignment even with the baseline alignment
 67 | achieve an smatch score of 1.0.
 68 | 
 69 | The output alignment is shown as blocks of results in the following format:
 70 | ```
 71 | id
 72 | # ::alignment:
 73 | ```
 74 | 
 75 | **[2018/12/20 update]** old `replace_comments.py`
 76 | does not update the alignment in `# ::node` fields
 77 | which was used in `eager_oracle.py`. Please use the
 78 | `refresh_alignments.py` script to generate new alignment data.
 79 | Thanks @jcyk for bug shooting!
 80 | 
 81 | After getting the alignment, use the following commands to generate
 82 | new alignment:
 83 | ```
 84 | python refresh_alignments.py \
 85 |     -lexicon \
 86 |     /path/to/your/alignment/data \
 87 |     -data \
 88 |     /path/to/your/baseline/data \
 89 |     > /path/to/your/new/alignment/data
 90 | ```
 91 | 
 92 | You can also use `refresh_alignments.py` to yield aligned AMR file
 93 | for LDC2014T12 with the alignment we release.
 94 | 
 95 | ## Parser
 96 | 
 97 | ### Pre-requisites
 98 | 
 99 | - cmake
100 | - c++ supporting c++11
101 | - eigen
102 | 
103 | ### Build
104 | 
105 | Before compiling, you need to fetch the `dynet` and `dynet_layer` with
106 | ```
107 | git submodule init
108 | git submodule update
109 | ```
110 | under `${TAMR_HOME}`.
111 | 
112 | After fetching the submodules, run the following commends.
113 | 
114 | ```
115 | cd amr_parser
116 | mkdir build
117 | cd build
118 | cmake .. -DEIGEN3_INCLUDE_DIR=/path/to/your/eigen/
119 | make
120 | ```
121 | 
122 | The compilation will generate an executable under `${TAMR_PARSER}/bin/`.
123 | 
124 | ### Prepare data
125 | 
126 | After getting your data with alignment,
127 | do run the `${TAMR_ALIGNER}/eager_oracle.py`
128 | to generate training action file for the alignment as
129 | ```
130 | python eager_oracle.py \
131 |     -mod \
132 |     dump \
133 |     -aligned \
134 |     /path/to/your/new/alignment/data \
135 |     > /path/to/your/actions
136 | ```
137 | 
138 | ### Training the Parser
139 | With the following commands under `$TAMR_PARSER`:
140 | ```
141 | ./amr_parser/bin/parser_l2r \
142 |     --dynet-seed \
143 |     1 \
144 |     --train \
145 |     --training_data \
146 |     /path/to/your/new/actions/training/data \
147 |     --devel_data \
148 |     /path/to/your/new/actions/dev/data \
149 |     --test_data \
150 |     /path/to/your/new/actions/test/data \
151 |     --pretrained \
152 |     /path/to/your/embedding/file \
153 |     --model \
154 |     data/little_prince/model \
155 |     --optimizer_enable_eta_decay \
156 |     true \
157 |     --optimizer_enable_clipping \
158 |     true \
159 |     --external_eval \
160 |     ./amr_parser/scripts/eval_eager.sh \
161 |     --devel_gold \
162 |     /path/to/your/new/alignment/dev/data \
163 |     --test_gold \
164 |     /path/to/your/new/alignment/test/data \
165 |     --max_iter \
166 |     1
167 | ```
168 | 
169 | ## Released Alignments
170 |  
171 | ### [LDC2014T12](https://catalog.ldc.upenn.edu/LDC2014T12)
172 | 
173 | You can find our alignment for LDC2014T12 under `${TAMR_HOME}/release/ldc2014t12`.
174 | Since JAMR and CAMR use different tokenization, our release includes
175 | the alignment processed with cdec tokenization and stanford tokenization.
176 | 
177 | ### [LDC2017T10](https://catalog.ldc.upenn.edu/LDC2017T10)
178 | 
179 | You can find our alignment for LDC2014T12 under `${TAMR_HOME}/release/ldc2017t10`.
180 | Our release only contains alignment processed with cdec tokenization.
181 | 
182 | ## Pipeline Script
183 | 
184 | We demonstrate the process in the `pipeline.sh` script.
185 | 
186 | ## Awesome AMR
187 | 
188 | Our alignment helps other AMR parser to achieve better performance.
189 | We show how to hack into several open-source AMR parser and replace
190 | their alignment with ours in the [awesome.md](https://github.com/Oneplus/tamr/blob/master/awesome.md).
191 | 
192 | ## Contact
193 | 
194 | Yijia Liu <<yjliu@ir.hit.edu.cn>>
195 | 


--------------------------------------------------------------------------------
/amr_aligner/amr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/amr_aligner/amr/__init__.py


--------------------------------------------------------------------------------
/amr_aligner/eager_actions_evaluator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | from __future__ import unicode_literals
 4 | import argparse
 5 | import codecs
 6 | from system.node import TokenNode, EntityNode, ConceptNode
 7 | from system.eager.state import State
 8 | from amr.aligned import AlignmentReader, Alignment
 9 | from smatch.api import SmatchScorer
10 | 
11 | 
12 | class Generator(object):
13 |     def __init__(self, verbose=False):
14 |         self.verbose = verbose
15 | 
16 |     def parse(self, align, actions):
17 |         state = State(align)
18 |         
19 |         for action in actions:
20 |             if action[0] == 'SHIFT':
21 |                 state.shift()
22 |             elif action[0] == 'DROP':
23 |                 state.drop()
24 |             elif action[0] == 'REDUCE':
25 |                 state.reduce()
26 |             elif action[0] == 'CACHE':
27 |                 state.cache()
28 |             elif action[0] == 'MERGE':
29 |                 state.merge()
30 |             elif action[0] == 'CONFIRM':
31 |                 if action[2] == '_UNK_':
32 |                     action[2] = state.buffer_[0].name
33 |                 state.confirm(action[2])
34 |             elif action[0] == 'ENTITY':
35 |                 state.entity(action[1], None)
36 |             elif action[0] == 'LEFT':
37 |                 state.left(action[1])
38 |             elif action[0] == 'RIGHT':
39 |                 state.right(action[1])
40 |             elif action[0] == 'NEWNODE':
41 |                 state.add_newnode(ConceptNode(action[1], None, None))
42 |                 state.newnode()
43 |             else:
44 |                 assert false
45 | 
46 |         return state
47 | 
48 | 
49 | def main():
50 |     cmd = argparse.ArgumentParser(usage='the evaluate script.')
51 |     cmd.add_argument('-gold', help='the path to the gold amr graph.')
52 |     cmd.add_argument('-pred_actions', help='the path to the predicted actions.')
53 |     opt = cmd.parse_args()
54 | 
55 |     reader = AlignmentReader(opt.gold)
56 |     generator = Generator()
57 |     scorer = SmatchScorer()
58 | 
59 |     predict_dataset = codecs.open(opt.pred_actions, 'r', encoding='utf-8').read().strip().split('\n\n')
60 |     for block, predict_data in zip(reader, predict_dataset):
61 |         graph = Alignment(block)
62 |         actions = [line.replace('# ::action\t', '').split('\t')
63 |                    for line in predict_data.splitlines() if line.startswith('# ::action')]
64 |         try:
65 |             state = generator.parse(graph, actions)
66 |             predict_amr_graph = str(state.arcs_).encode('utf-8')
67 |         except:
68 |             # print('{0}'.format(graph.n))
69 |             # print('Failed to parse actions:')
70 |             # for action in actions:
71 |             #     print(' - {0}'.format('\t'.join(action).encode('utf-8')))
72 |             predict_amr_graph = '(a / amr-empty)'
73 |         scorer.update(graph.amr_graph, predict_amr_graph)
74 |     print(scorer.f_score())
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     main()
79 | 


--------------------------------------------------------------------------------
/amr_aligner/eager_oracle.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | from __future__ import unicode_literals
 4 | import sys
 5 | import traceback
 6 | import argparse
 7 | import time
 8 | from amr.aligned import Alignment, AlignmentReader
 9 | from system.eager.oracle import Oracle
10 | from smatch.api import smatch
11 | 
12 | 
13 | def main():
14 |     cmd = argparse.ArgumentParser('Test the program.')
15 |     cmd.add_argument('-mod', default='evaluate', choices=('parse', 'evaluate', 'dump'),
16 |                      help='the running mode. -parse: evaluate the best AMR graph achieved by the alignment '
17 |                           '(specified in ::alignment field) and use the resulted graph to replace the original'
18 |                           'AMR graph; -evaluate: same as parser without replacement; -dump: dump action file.')
19 |     cmd.add_argument('-aligned', help='the path to the filename.')
20 |     cmd.add_argument('-verbose', default=False, action='store_true', help='verbose the actions.')
21 |     opt = cmd.parse_args()
22 | 
23 |     align_handler = AlignmentReader(opt.aligned)
24 |     parser = Oracle(verbose=opt.verbose)
25 | 
26 |     for align_block in align_handler:
27 |         graph = Alignment(align_block)
28 |         try:
29 |             actions, state = parser.parse(graph)
30 | 
31 |             if opt.mod in ('parse', 'evaluate'):
32 |                 predicted_amr_graph = str(state.arcs_)
33 |                 f_score = smatch(predicted_amr_graph, graph.amr_graph)
34 |                 for line in align_block:
35 |                     if line.startswith('# ::alignments'):
36 |                         line = line + ' ::parser eager_oracle.py' \
37 |                                       ' ::smatch {0} ::n_actions {1}'.format(f_score, len(actions))
38 |                     if line.startswith('('):
39 |                         break
40 |                     print(line.encode('utf-8'))
41 |                 if opt.mod == 'parse':
42 |                     print(str(state.arcs_))
43 |                 else:
44 |                     print(graph.amr_graph)
45 |             else:
46 |                 print('# ::id {0}'.format(graph.n))
47 |                 for line in align_block:
48 |                     if line.startswith('# ::tok') or line.startswith('# ::pos'):
49 |                         print(line.encode('utf-8'))
50 |                 print('\n'.join(['# ::action {0}'.format(action) for action in actions]))
51 |             print()
52 | 
53 |             if opt.verbose:
54 |                 print(graph.n, file=sys.stderr)
55 |                 print('\n'.join(actions), file=sys.stderr, end='\n\n')
56 |         except Exception:
57 |             print(graph.n, file=sys.stderr)
58 |             traceback.print_exc(file=sys.stderr)
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     main()
63 | 


--------------------------------------------------------------------------------
/amr_aligner/refresh_alignments.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | from __future__ import unicode_literals
 4 | import argparse
 5 | from amr.aligned import AlignmentReader, Alignment
 6 | 
 7 | 
 8 | def main():
 9 |     cmd = argparse.ArgumentParser('Get the block that contains certain amr graph.')
10 |     cmd.add_argument('-lexicon', help='the path to the alignment file.')
11 |     cmd.add_argument('-data', help='the path to the alignment file.')
12 |     cmd.add_argument('-keep_alignment_in_node', default=False, action='store_true', help='')
13 |     opt = cmd.parse_args()
14 | 
15 |     lexicon = {}
16 |     for data in open(opt.lexicon, 'r').read().strip().split('\n\n'):
17 |         lines = data.splitlines()
18 |         assert len(lines) == 2
19 |         lexicon[lines[0].strip()] = lines[1].strip()
20 | 
21 |     handler = AlignmentReader(opt.data)
22 |     for block in handler:
23 |         graph = Alignment(block)
24 |         new_alignment = lexicon[graph.n]
25 | 
26 |         graph.alignments = Alignment._parse_alignment([new_alignment])
27 |         graph.refill_alignment()
28 | 
29 |         for line in block:
30 |             if line.startswith('#'):
31 |                 if line.startswith('# ::alignments'):
32 |                     print(new_alignment)
33 |                 else:
34 |                     if not opt.keep_alignment_in_node and line.startswith('# ::node'):
35 |                         tokens = line.split()
36 |                         level = tokens[2]
37 |                         alignment = graph.get_node_by_level(level).alignment
38 |                         print('# ::node\t{0}\t{1}\t{2}'.format(
39 |                             tokens[2], tokens[3], '{0}-{1}'.format(alignment[0], alignment[1]) if alignment else ''))
40 |                     else:
41 |                         print(line.encode('utf-8'))
42 | 
43 |         print(graph.amr_graph.encode('utf-8'), end='\n\n')
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     main()
48 | 


--------------------------------------------------------------------------------
/amr_aligner/resources/morphosemantic_links/README.md:
--------------------------------------------------------------------------------
 1 | Processed morphosemantic links
 2 | ==============================
 3 | 
 4 | The original [Morphosemantic Links Database](https://wordnet.princeton.edu/download/standoff-files)
 5 | contains derivational links connecting noun and verb senses.
 6 | In this project, we provide a processed mapping
 7 | between a verb and its noun form in `morphosemantic-links.dic`.
 8 | We use the `extract_morphosemantic.py` for
 9 | getting the dictionary from the CSV formatted file of
10 | [morphosemantic-links.xls](http://wordnetcode.princeton.edu/standoff-files/morphosemantic-links.xls)
11 | 
12 | Do use bzip2 to extract the raw text from the zip file.
13 |   
14 | ### LICENSE
15 | 
16 | ```
17 | WordNet Release 3.0 This software and database is being provided to you, the LICENSEE, 
18 | by Princeton University under the following license. By obtaining, using and/or copying 
19 | this software and database, you agree that you have read, understood, and will comply 
20 | with these terms and conditions.: Permission to use, copy, modify and distribute this 
21 | software and database and its documentation for any purpose and without fee or royalty 
22 | is hereby granted, provided that you agree to comply with the following copyright notice 
23 | and statements, including the disclaimer, and that the same appear on ALL copies of the 
24 | software, database and documentation, including modifications that you make for internal 
25 | use or for distribution. WordNet 3.0 Copyright 2006 by Princeton University. All rights 
26 | reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON UNIVERSITY MAKES 
27 | NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT 
28 | LIMITATION, PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF 
29 | MERCHANT- ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED 
30 | SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, 
31 | COPYRIGHTS, TRADEMARKS  OR OTHER RIGHTS. The name of Princeton University or Princeton
32 | may not be used in  advertising or publicity pertaining to distribution of the software 
33 | and/or database.  Title to copyright in this software, database and any associated 
34 | documentation shall  at all times remain with Princeton University and LICENSEE agrees 
35 | to preserve same.
36 | ```
37 | 
38 | 


--------------------------------------------------------------------------------
/amr_aligner/resources/morphosemantic_links/extract_morphosemantic.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | import sys
 4 | 
 5 | 
 6 | def main():
 7 |     lexicon = set()
 8 |     is_header = True
 9 |     for line in open(sys.argv[1], 'r'):
10 |         if is_header:
11 |             is_header = False
12 |             continue
13 |         tokens = line.strip().split(',')
14 |         verb, noun = tokens[0], tokens[3]
15 |         verb = verb.split('%')[0]
16 |         noun = noun.split('%')[0]
17 |         lexicon.add((verb, noun))
18 |     for verb, noun in lexicon:
19 |         print('{0},{1}'.format(verb, noun))
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     main()
24 | 


--------------------------------------------------------------------------------
/amr_aligner/resources/morphosemantic_links/morphosemantic-links.dic.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/amr_aligner/resources/morphosemantic_links/morphosemantic-links.dic.bz2


--------------------------------------------------------------------------------
/amr_aligner/resources/word2vec/README.md:
--------------------------------------------------------------------------------
1 | Word2vec for our aligner
2 | ========================
3 | 
4 | We use the [glove.840B.300d](http://nlp.stanford.edu/data/glove.840B.300d.zip).
5 | We suggest to filter the embeddings by the words and concepts 
6 | (trimming the tail in word sense) in the data.


--------------------------------------------------------------------------------
/amr_aligner/rule_based_aligner/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/amr_aligner/rule_based_aligner/__init__.py


--------------------------------------------------------------------------------
/amr_aligner/rule_based_aligner/aligned_results.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | class AlignedResults(object):
 5 |     def __init__(self, multiple=True):
 6 |         self.spans_to_levels = {}
 7 |         self.levels_to_spans = {}
 8 |         self.multiple = multiple
 9 | 
10 |     def add(self, start, end, level, dependent):
11 |         if self.multiple:
12 |             return self._mutualisticly_add(start, end, level, dependent)
13 |         else:
14 |             return self._exclusively_add(start, end, level, dependent)
15 | 
16 |     def _mutualisticly_add(self, start, end, level, dependent):
17 |         added = False
18 |         if (start, end) not in self.spans_to_levels:
19 |             self.spans_to_levels[start, end] = set()
20 |         if (level, dependent) not in self.spans_to_levels[start, end]:
21 |             added = True
22 |         self.spans_to_levels[start, end].add((level, dependent))
23 | 
24 |         if level not in self.levels_to_spans:
25 |             self.levels_to_spans[level] = set()
26 |         self.levels_to_spans[level].add((start, end, dependent))
27 |         return added
28 | 
29 |     def _exclusively_add(self, start, end, level, dependent):
30 |         # first check if the concept is aligned.
31 |         if level in self.levels_to_spans:
32 |             return False
33 |         self.levels_to_spans[level] = {(start, end, dependent)}
34 |         added = False
35 |         if dependent is not None:
36 |             if (start, end) not in self.spans_to_levels:
37 |                 self.spans_to_levels[start, end] = set()
38 |             if (level, dependent) not in self.spans_to_levels[start, end]:
39 |                 added = True
40 |             self.spans_to_levels[start, end].add((level, dependent))
41 |         else:
42 |             overlap = False
43 |             for new_start, new_end in self.spans_to_levels:
44 |                 if (start < new_start < end) or (start < new_end < end):
45 |                     overlap = True
46 |                     break
47 |             if not overlap:
48 |                 if (start, end) not in self.spans_to_levels:
49 |                     self.spans_to_levels[start, end] = set()
50 |                 if (level, dependent) not in self.spans_to_levels[start, end]:
51 |                     added = True
52 |                 self.spans_to_levels[start, end].add((level, dependent))
53 |         return added
54 | 
55 |     def contains(self, level):
56 |         return level in self.levels_to_spans
57 | 
58 |     def get_spans_by_level(self, level):
59 |         return set([(start, end) for start, end, _ in self.levels_to_spans.get(level, set())])
60 | 
61 |     def get_levels_by_span(self, start, end):
62 |         return set([level for level, _ in self.spans_to_levels.get((start, end), set())])
63 | 


--------------------------------------------------------------------------------
/amr_aligner/rule_based_aligner/match_result.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import unicode_literals
  3 | 
  4 | 
  5 | class MatchResult(object):
  6 |     def __init__(self, level, signature):
  7 |         self.level = level
  8 |         self.signature = signature
  9 | 
 10 |     def __eq__(self, other):
 11 |         if isinstance(other, MatchResult):
 12 |             return self.level == other.level
 13 |         return False
 14 | 
 15 |     def __str__(self):
 16 |         return '{0}={1}'.format(self.signature, self.level)
 17 | 
 18 |     def __repr__(self):
 19 |         return self.__str__()
 20 | 
 21 |     def __hash__(self):
 22 |         return self.level.__hash__()
 23 | 
 24 | 
 25 | class EntityMatchResult(MatchResult):
 26 |     def __init__(self, level, children_levels, signature):
 27 |         super(EntityMatchResult, self).__init__(level, signature)
 28 |         self.children_levels = children_levels
 29 | 
 30 |     def __str__(self):
 31 |         return '{0}=({1}, {2})'.format(self.signature, self.level, self.children_levels)
 32 | 
 33 | 
 34 | class WordMatchResult(MatchResult):
 35 |     def __init__(self, level, signature='word'):
 36 |         super(WordMatchResult, self).__init__(level, signature)
 37 | 
 38 |     def __eq__(self, other):
 39 |         if isinstance(other, WordMatchResult) or \
 40 |                 isinstance(other, FuzzyWordMatchResult) or \
 41 |                 isinstance(other, SemanticWordMatchResult):
 42 |             return self.level == other.level
 43 |         return False
 44 | 
 45 | 
 46 | class FuzzyWordMatchResult(WordMatchResult):
 47 |     def __init__(self, level):
 48 |         super(FuzzyWordMatchResult, self).__init__(level, '(fuzzy)word')
 49 | 
 50 | 
 51 | class MorphosemanticLinkMatchResult(WordMatchResult):
 52 |     def __init__(self, level):
 53 |         super(MorphosemanticLinkMatchResult, self).__init__(level, '(morph)word')
 54 | 
 55 | 
 56 | class SemanticWordMatchResult(WordMatchResult):
 57 |     def __init__(self, level):
 58 |         super(SemanticWordMatchResult, self).__init__(level, '(sem)word')
 59 | 
 60 | 
 61 | class FuzzySpanMatchResult(MatchResult):
 62 |     def __init__(self, level):
 63 |         super(FuzzySpanMatchResult, self).__init__(level, '(fuzzy)span')
 64 | 
 65 | 
 66 | class NamedEntityMatchResult(EntityMatchResult):
 67 |     def __init__(self, level, children_levels, signature='entity'):
 68 |         super(NamedEntityMatchResult, self).__init__(level, children_levels, signature)
 69 | 
 70 |     def __eq__(self, other):
 71 |         if isinstance(other, FuzzyNamedEntityMatchResult) or \
 72 |                 isinstance(other, NamedEntityMatchResult) or \
 73 |                 isinstance(other, SemanticNamedEntityMatchResult):
 74 |             return self.level == other.level
 75 |         return False
 76 | 
 77 | 
 78 | class FuzzyNamedEntityMatchResult(NamedEntityMatchResult):
 79 |     def __init__(self, level, children_levels):
 80 |         super(FuzzyNamedEntityMatchResult, self).__init__(level, children_levels, '(fuzzy)entity')
 81 | 
 82 | 
 83 | class SemanticNamedEntityMatchResult(NamedEntityMatchResult):
 84 |     def __init__(self, level, children_levels):
 85 |         super(SemanticNamedEntityMatchResult, self).__init__(level, children_levels, '(sem)entity')
 86 | 
 87 | 
 88 | class URLEntityMatchResult(EntityMatchResult):
 89 |     def __init__(self, level, children_levels):
 90 |         super(URLEntityMatchResult, self).__init__(level, children_levels, 'url-entity')
 91 | 
 92 | 
 93 | class OrdinalEntityMatchResult(EntityMatchResult):
 94 |     def __init__(self, level, children_levels):
 95 |         super(OrdinalEntityMatchResult, self).__init__(level, children_levels, 'ordinal-entity')
 96 | 
 97 | 
 98 | class DateEntityMatchResult(MatchResult):
 99 |     def __init__(self, level, children_levels):
100 |         super(DateEntityMatchResult, self).__init__(level, 'date-entity')
101 |         self.children_levels = children_levels
102 | 
103 |     def __str__(self):
104 |         return 'date-entity=({0}, {1})'.format(self.level, self.children_levels)
105 | 
106 | 
107 | class MinusPolarityMatchResult(MatchResult):
108 |     def __init__(self, level):
109 |         super(MinusPolarityMatchResult, self).__init__(level, 'minus')
110 | 
111 | 
112 | class EntityTypeMatchResult(MatchResult):
113 |     def __init__(self, level):
114 |         super(EntityTypeMatchResult, self).__init__(level, 'entity_type')
115 | 
116 | 
117 | class QuantityMatchResult(MatchResult):
118 |     def __init__(self, level):
119 |         super(QuantityMatchResult, self).__init__(level, 'quantity')
120 | 
121 | 
122 | class PersonOfUpdateResult(MatchResult):
123 |     def __init__(self, level):
124 |         super(PersonOfUpdateResult, self).__init__(level, 'person_of')
125 | 
126 | 
127 | class PersonUpdateResult(MatchResult):
128 |     def __init__(self, level):
129 |         super(PersonUpdateResult, self).__init__(level, 'person')
130 | 
131 | 
132 | class GovernmentOrganizationUpdateResult(MatchResult):
133 |     def __init__(self, level):
134 |         super(GovernmentOrganizationUpdateResult, self).__init__(level, 'gov_org')
135 | 
136 | 
137 | class MinusPolarityPrefixesUpdateResult(MatchResult):
138 |     def __init__(self, level):
139 |         super(MinusPolarityPrefixesUpdateResult, self).__init__(level, 'minus_prefix')
140 | 
141 | 
142 | class DegreeUpdateResult(MatchResult):
143 |     def __init__(self, level):
144 |         super(DegreeUpdateResult, self).__init__(level, 'degree')
145 | 
146 | 
147 | class RelativePositionUpdateResult(MatchResult):
148 |     def __init__(self, level):
149 |         super(RelativePositionUpdateResult, self).__init__(level, 'relative_position')
150 | 
151 | 
152 | class HaveOrgRoleUpdateResult(MatchResult):
153 |     def __init__(self, level):
154 |         super(HaveOrgRoleUpdateResult, self).__init__(level, 'have-org-role-91')
155 | 
156 | 
157 | class CauseUpdateResult(MatchResult):
158 |     def __init__(self, level):
159 |         super(CauseUpdateResult, self).__init__(level, 'cause01')
160 | 
161 | 
162 | class BelocatedAtMatchResult(MatchResult):
163 |     def __init__(self, level):
164 |         super(BelocatedAtMatchResult, self).__init__(level, 'be-located-91')
165 | 
166 | 
167 | class ImperativeUpdateResult(MatchResult):
168 |     def __init__(self, level):
169 |         super(ImperativeUpdateResult, self).__init__(level, 'imperative')
170 | 
171 | 
172 | class PossibleUpdateResult(MatchResult):
173 |     def __init__(self, level):
174 |         super(PossibleUpdateResult, self).__init__(level, 'possible')
175 | 
176 | 
177 | __all__ = [
178 |     'EntityTypeMatchResult',
179 |     'QuantityMatchResult',
180 |     'DateEntityMatchResult',
181 |     'URLEntityMatchResult',
182 |     'OrdinalEntityMatchResult',
183 |     'MinusPolarityMatchResult',
184 |     'BelocatedAtMatchResult',
185 |     'FuzzyWordMatchResult',
186 |     'FuzzySpanMatchResult',
187 |     'MorphosemanticLinkMatchResult',
188 |     'SemanticWordMatchResult',
189 |     'NamedEntityMatchResult',
190 |     'FuzzyNamedEntityMatchResult',
191 |     'SemanticNamedEntityMatchResult',
192 | 
193 |     'PersonOfUpdateResult',
194 |     'PersonUpdateResult',
195 |     'RelativePositionUpdateResult',
196 |     'GovernmentOrganizationUpdateResult',
197 |     'MinusPolarityPrefixesUpdateResult',
198 |     'DegreeUpdateResult',
199 |     'HaveOrgRoleUpdateResult',
200 |     'CauseUpdateResult',
201 |     'ImperativeUpdateResult',
202 |     'WordMatchResult',
203 |     'PossibleUpdateResult',
204 | ]
205 | 


--------------------------------------------------------------------------------
/amr_aligner/rule_based_aligner/stemmer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import unicode_literals
  3 | from nltk.stem.wordnet import WordNetLemmatizer
  4 | lemmatizer = WordNetLemmatizer()
  5 | 
  6 | 
  7 | class Stemmer(object):
  8 |     kMinusPrefix2 = ('un', 'in', 'il', 'im', 'ir', 'il', 'Un', 'In', 'Il', 'Im', 'Ir', 'Il')
  9 |     kMinusPrefix3 = ('non', 'Non')
 10 | 
 11 |     kMonths = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August',
 12 |                'September', 'October', 'November', 'December']
 13 | 
 14 |     kNumbers = ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine']
 15 | 
 16 |     kExceptions = {'.': ('multi-sentence', ),
 17 |                    ';': ('and', 'multi-sentence', ),
 18 |                    ':': ('mean', ),
 19 |                    '!': ('expressive', ),
 20 |                    '..': ('expressive', ),
 21 |                    '...': ('expressive', ),
 22 |                    '....': ('expressive', ),
 23 |                    '?': ('interrogative', ),
 24 |                    '%': ('percentage-entity', ),
 25 |                    '$': ('dollar', ),
 26 |                    'also': ('include',),
 27 |                    'anti': ('oppose', 'counter'),
 28 |                    'but': ('contrast', 'have-concession'),
 29 |                    'while': ('contrast', ),
 30 |                    'because': ('cause',),
 31 |                    'whereby': ('cause', ),
 32 |                    'if': ('cause', 'interrogative'),
 33 |                    'by': ('cause', ),
 34 |                    'for': ('cause', ),
 35 |                    'so': ('infer', 'cause'),
 36 |                    'since': ('cause', ),
 37 |                    'on': ('cause', ),
 38 |                    'in': ('cause', ),
 39 |                    'against': ('-', ),
 40 |                    'no': ('-',),
 41 |                    'non': ('-', ),
 42 |                    'not': ('-', ),
 43 |                    'n\'t': ('-', ),
 44 |                    'never': ('-', ),
 45 |                    'yet': ('-', ),
 46 |                    'neither': ('-', ),
 47 |                    'of': ('include', 'have-manner', ),
 48 |                    'might': ('possible', ),
 49 |                    'may': ('possible', ),
 50 |                    'maybe': ('possible', ),
 51 |                    'could': ('possible', ),
 52 |                    'can': ('possible', ),
 53 |                    'cant': ('possible', ),
 54 |                    'cannot': ('possible', ),
 55 |                    'can\'t': ('possible', ),
 56 |                    'should': ('recommend', ),
 57 |                    'who': ('amr-unknown', ),
 58 |                    'what': ('amr-unknown', ),
 59 |                    'how': ('amr-unknown', ),
 60 |                    'as': ('and', 'same', 'contrast',),
 61 |                    'with': ('and', ),
 62 |                    'plus': ('and', ),
 63 |                    '-': ('and', ),
 64 |                    'without': ('-', ),
 65 |                    'me': ('i', ),
 66 |                    'my': ('i', ),
 67 |                    'her': ('she', ),
 68 |                    'his': ('he', ),
 69 |                    'him': ('he', ),
 70 |                    'us': ('we', ),
 71 |                    'our': ('we', ),
 72 |                    'ours': ('we', ),
 73 |                    'your': ('you', ),
 74 |                    'yourself': ('you', ),
 75 |                    'these': ('this', ),
 76 |                    'those': ('that', ),
 77 |                    'o.k.': ('okay', ),
 78 |                    'death': ('die',),
 79 |                    'deaths': ('die', ),
 80 |                    'like': ('resemble', ),
 81 |                    'similar': ('resemble', ),
 82 |                    'right': ('entitle', ),
 83 |                    'rights': ('entitle',),
 84 |                    'must': ('obligate',),
 85 |                    'etc': ('et-cetera',),
 86 |                    'according': ('say', ),}
 87 | 
 88 |     def __init__(self):
 89 |         pass
 90 | 
 91 |     def stem(self, word, postag):
 92 |         ret = set()
 93 |         ret.add(word)
 94 |         ret.add(word.lower())
 95 | 
 96 |         # lemmatize
 97 |         if postag is not None:
 98 |             ret.add(lemmatizer.lemmatize(word.lower(), postag))
 99 |         else:
100 |             ret.add(lemmatizer.lemmatize(word.lower(), 'n'))
101 |             ret.add(lemmatizer.lemmatize(word.lower(), 'v'))
102 |             ret.add(lemmatizer.lemmatize(word.lower(), 'a'))
103 |             ret.add(lemmatizer.lemmatize(word.lower(), 's'))
104 | 
105 |         # normalize month
106 |         month_normalized_word = self._normalize_month(word)
107 |         if month_normalized_word is not None:
108 |             ret.add(month_normalized_word)
109 | 
110 |         # normalize number
111 |         number_normalized_word = self._normalize_number(word)
112 |         if number_normalized_word is not None:
113 |             ret.add(number_normalized_word)
114 | 
115 |         # normalize exceptions
116 |         exception_normalized_words = self._normalize_exceptions(word)
117 |         if exception_normalized_words is not None:
118 |             for exception_normalized_word in exception_normalized_words:
119 |                 ret.add(exception_normalized_word)
120 | 
121 |         other_normalized_word = self._normalize_others(word)
122 |         if other_normalized_word is not None:
123 |             ret.add(other_normalized_word)
124 |         return ret
125 | 
126 |     def _normalize_number(self, word):
127 |         if word.lower() in self.kNumbers:
128 |             return str(self.kNumbers.index(word.lower()) + 1)
129 |         elif ',' in word and word.replace(',', '').isdigit():
130 |             return word.replace(',', '')
131 |         return None
132 | 
133 |     def _normalize_month(self, word):
134 |         if word in self.kMonths:
135 |             return str(self.kMonths.index(word) + 1)
136 |         return None
137 | 
138 |     def _normalize_exceptions(self, word):
139 |         if word.lower() in self.kExceptions:
140 |             return self.kExceptions[word.lower()]
141 |         return None
142 | 
143 |     def _normalize_others(self, word):
144 |         if word[:3] in self.kMinusPrefix3:
145 |             return word[3:]
146 |         elif word[:2] in self.kMinusPrefix2:
147 |             return word[2:]
148 |         elif word.endswith('er'):
149 |             return word[:-2]
150 |         elif word.endswith('ers'):
151 |             return word[:-3]
152 |         return None
153 | 
154 | 


--------------------------------------------------------------------------------
/amr_aligner/rule_based_aligner/updater.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import unicode_literals
  3 | from __future__ import absolute_import
  4 | from .stemmer import Stemmer
  5 | 
  6 | 
  7 | class Updater(object):
  8 |     def __init__(self):
  9 |         pass
 10 | 
 11 |     def update(self, words, graph, align_results):
 12 |         """
 13 | 
 14 |         :param words: list[str]
 15 |         :param graph: Alignment
 16 |         :param align_results: AlignedResults
 17 |         :return:
 18 |         """
 19 |         raise NotImplemented
 20 | 
 21 | 
 22 | class EntityTypeUpdater(Updater):
 23 |     def __init__(self):
 24 |         super(EntityTypeUpdater, self).__init__()
 25 | 
 26 |     def update(self, words, graph, align_results):
 27 |         updated = False
 28 |         for node in graph.true_nodes():
 29 |             if graph.is_entity(node, consider_alignment=False):
 30 |                 # get the :name node
 31 |                 edges = filter(lambda e: e.relation == 'name', graph.edges_by_parents[node.level])
 32 |                 if len(edges) > 0:
 33 |                     for start, end in align_results.get_spans_by_level(edges[0].tgt_level):
 34 |                         updated = updated or align_results.add(start, end, node.level, edges[0].tgt_level)
 35 |         return updated
 36 | 
 37 | 
 38 | class QuantityUpdater(Updater):
 39 |     def __init__(self):
 40 |         super(QuantityUpdater, self).__init__()
 41 | 
 42 |     def update(self, words, graph, align_results):
 43 |         updated = False
 44 |         for node in graph.true_nodes():
 45 |             if not node.name.endswith('-quantity') or node.level not in graph.edges_by_parents:
 46 |                 continue
 47 |             edges = filter(lambda e: e.relation == 'unit', graph.edges_by_parents[node.level])
 48 |             if len(edges) > 0:
 49 |                 for start, end in align_results.get_spans_by_level(edges[0].tgt_level):
 50 |                     updated = updated or align_results.add(start, end, node.level, edges[0].tgt_level)
 51 |         return updated
 52 | 
 53 | 
 54 | class PersonOfUpdater(Updater):
 55 |     def __init__(self):
 56 |         super(PersonOfUpdater, self).__init__()
 57 | 
 58 |     def update(self, words, graph, align_results):
 59 |         updated = False
 60 |         for node in graph.true_nodes():
 61 |             if node.name not in ('person', 'thing') or node.level not in graph.edges_by_parents:
 62 |                 continue
 63 |             edges = filter(lambda e: e.relation.endswith('-of'), graph.edges_by_parents[node.level])
 64 |             if len(edges) > 0:
 65 |                 for start, end in align_results.get_spans_by_level(edges[0].tgt_level):
 66 |                     updated = updated or align_results.add(start, end, node.level, edges[0].tgt_level)
 67 |         return updated
 68 | 
 69 | 
 70 | class PersonUpdater(Updater):
 71 |     def __init__(self):
 72 |         super(PersonUpdater, self).__init__()
 73 | 
 74 |     def update(self, words, graph, align_results):
 75 |         updated = False
 76 |         for node in graph.true_nodes():
 77 |             if node.name != 'person' or node.level not in graph.edges_by_parents:
 78 |                 continue
 79 |             edges = graph.edges_by_parents[node.level]
 80 |             if len(edges) == 1:
 81 |                 for start, end in align_results.get_spans_by_level(edges[0].tgt_level):
 82 |                     updated = updated or align_results.add(start, end, node.level, edges[0].tgt_level)
 83 |         return updated
 84 | 
 85 | 
 86 | class GovernmentOrganizationUpdater(Updater):
 87 |     def __init__(self):
 88 |         super(GovernmentOrganizationUpdater, self).__init__()
 89 | 
 90 |     def update(self, words, graph, align_results):
 91 |         updated = False
 92 |         for edge in graph.edges:
 93 |             if not edge.relation.endswith('-of') or \
 94 |                     not edge.relation.startswith('ARG') or \
 95 |                     edge.src_name != 'government-organization':
 96 |                 continue
 97 |             for start, end in align_results.get_spans_by_level(edge.tgt_level):
 98 |                 updated = updated or align_results.add(start, end, edge.src_level, edge.tgt_level)
 99 |         return updated
100 | 
101 | 
102 | class RelativePositionUpdater(Updater):
103 |     def __init__(self):
104 |         super(RelativePositionUpdater, self).__init__()
105 | 
106 |     def update(self, words, graph, align_results):
107 |         updated = False
108 |         for edge in graph.edges:
109 |             if edge.src_name != 'relative-position':
110 |                 continue
111 |             for start, end in align_results.get_spans_by_level(edge.tgt_level):
112 |                 updated = updated or align_results.add(start, end, edge.src_level, edge.tgt_level)
113 |         return updated
114 | 
115 | 
116 | class MinusPolarityPrefixUpdater(Updater):
117 |     def __init__(self):
118 |         super(MinusPolarityPrefixUpdater, self).__init__()
119 | 
120 |     def update(self, words, graph, align_results):
121 |         updated = False
122 |         for node in graph.true_nodes():
123 |             if node.name != '-':
124 |                 continue
125 |             edges = graph.edges_by_children[node.level]
126 |             if len(edges) == 1 and edges[0].relation == 'polarity':
127 |                 for start, end in align_results.get_spans_by_level(edges[0].src_level):
128 |                     if start + 1 == end and (words[start][:2] in Stemmer.kMinusPrefix2 or
129 |                                              words[start][:3] in Stemmer.kMinusPrefix3 or
130 |                                              words[start].endswith('less') or
131 |                                              words[start].endswith('nt') or
132 |                                              words[start].endswith('n\'t')):
133 |                         updated = updated or align_results.add(start, end, node.level, edges[0].src_level)
134 |         return updated
135 | 
136 | 
137 | class DegreeUpdater(Updater):
138 |     def __init__(self):
139 |         super(DegreeUpdater, self).__init__()
140 | 
141 |     def update(self, words, graph, align_results):
142 |         updated = False
143 |         for edge in graph.edges:
144 |             if edge.relation != 'degree':
145 |                 continue
146 |             for start, end in align_results.get_spans_by_level(edge.src_level):
147 |                 if start + 1 == end and (words[start].endswith('est') or words[start].endswith('er')):
148 |                     updated = updated or align_results.add(start, end, edge.tgt_level, edge.src_level)
149 |         return updated
150 | 
151 | 
152 | class HaveOrgRoleUpdater(Updater):
153 |     def __init__(self):
154 |         super(HaveOrgRoleUpdater, self).__init__()
155 | 
156 |     def update(self, words, graph, align_results):
157 |         updated = False
158 |         for node in graph.true_nodes():
159 |             if node.name not in ('have-org-role-91', 'have-rel-role-91') or node.level not in graph.edges_by_parents:
160 |                 continue
161 |             edges = [edge for edge in graph.edges_by_parents[node.level] if edge.relation in ('ARG1', 'ARG2')]
162 |             if len(edges) == 1:
163 |                 edge = edges[0]
164 |             elif len(edges) == 2:
165 |                 edge = edges[0] if edges[0].relation == 'ARG2' else edges[1]
166 |             for start, end in align_results.get_spans_by_level(edge.tgt_level):
167 |                 updated = updated or align_results.add(start, end, edge.src_level, edge.tgt_level)
168 |         return updated
169 | 
170 | 
171 | class CauseUpdater(Updater):
172 |     def __init__(self):
173 |         super(CauseUpdater, self).__init__()
174 | 
175 |     def update(self, words, graph, align_results):
176 |         updated = False
177 |         for edge in graph.edges:
178 |             if edge.tgt_name != 'cause-01' or not edge.relation.startswith('ARG') or not edge.relation.endswith('-of'):
179 |                 continue
180 |             for start, end in align_results.get_spans_by_level(edge.src_level):
181 |                 if start + 1 == end:
182 |                     updated = updated or align_results.add(start, end, edge.tgt_level, edge.src_level)
183 |         return updated
184 | 
185 | 
186 | class ImperativeUpdater(Updater):
187 |     def __init__(self):
188 |         super(ImperativeUpdater, self).__init__()
189 | 
190 |     def update(self, words, graph, align_results):
191 |         updated = False
192 |         for edge in graph.edges:
193 |             if edge.tgt_name != 'imperative' or edge.relation != 'mode':
194 |                 continue
195 |             you_level = [e.tgt_level for e in graph.edges_by_parents[edge.src_level] if e.tgt_name == 'you']
196 |             for start, end in align_results.get_spans_by_level(edge.src_level):
197 |                 if start + 1 == end:
198 |                     updated = updated or align_results.add(start, end, edge.tgt_level, edge.src_level)
199 |                 if len(you_level) == 1:
200 |                     updated = updated or align_results.add(start, end, you_level[0], edge.src_level)
201 |         return updated
202 | 
203 | 
204 | class PossibleUpdater(Updater):
205 |     def __init__(self):
206 |         super(PossibleUpdater, self).__init__()
207 | 
208 |     def update(self, words, graph, align_results):
209 |         updated = False
210 |         for edge in graph.edges:
211 |             if edge.src_name == 'possible' and edge.relation == 'domain':
212 |                 # operable => (p / possible :domain (o / operate))
213 |                 for start, end in align_results.get_spans_by_level(edge.tgt_level):
214 |                     if start + 1 == end and words[start].endswith('ble'):
215 |                         updated = updated or align_results.add(start, end, edge.src_level, edge.tgt_level)
216 |             elif edge.tgt_name == 'possible' and edge.relation == 'mod':
217 |                 for start, end in align_results.get_spans_by_level(edge.src_level):
218 |                     if start + 1 == end and words[start].endswith('ble'):
219 |                         updated = updated or align_results.add(start, end, edge.tgt_level, edge.src_level)
220 |         return updated
221 | 
222 | 
223 | __all__ = [
224 |     'EntityTypeUpdater',
225 |     'QuantityUpdater',
226 |     'PersonOfUpdater',
227 |     'PersonUpdater',
228 |     'RelativePositionUpdater',
229 |     'MinusPolarityPrefixUpdater',
230 |     'DegreeUpdater',
231 |     'HaveOrgRoleUpdater',
232 |     'GovernmentOrganizationUpdater',
233 |     'CauseUpdater',
234 |     'ImperativeUpdater',
235 |     'PossibleUpdater'
236 | ]


--------------------------------------------------------------------------------
/amr_aligner/smatch/README.md:
--------------------------------------------------------------------------------
 1 | C++ Implementation of Fast Smatch
 2 | =================================
 3 | 
 4 | We use the *oracle smatch score*
 5 | to evaluate each generated alignment,
 6 | and we found the original smatch script
 7 | greatly slowed down our program.
 8 | So we use `Cython` to re-implement the smatch
 9 | script.
10 | 
11 | ## Compilation
12 | 
13 | run `python setup.py build` in the `amr_aligner/smatch`
14 | folder. It will generate a dynamic library `_smatch.so` 
15 | under the `build/lib.${arch}-2.7/` folder.
16 | Move the dynamic library into `amr_aligner/smatch`
17 | and it will do the work.
18 | 
19 | ## Smatch Version
20 | 
21 | 2.0.2


--------------------------------------------------------------------------------
/amr_aligner/smatch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/amr_aligner/smatch/__init__.py


--------------------------------------------------------------------------------
/amr_aligner/smatch/_gain.cc:
--------------------------------------------------------------------------------
  1 | #include "_gain.h"
  2 | 
  3 | int _hash_pair(int x, int y) {
  4 |   return _HASH_PAIR(x, y);
  5 | }
  6 | 
  7 | int _get_0(int x) {
  8 |   return _GET_0(x);
  9 | }
 10 | 
 11 | int _get_1(int x) {
 12 |   return _GET_1(x);
 13 | }
 14 | 
 15 | 
 16 | int move_gain(MappingType & mapping,
 17 |     int node_id,
 18 |     int old_id,
 19 |     int new_id,
 20 |     WeightDictType & weight_dict,
 21 |     int match_num) {
 22 |   int new_mapping = _HASH_PAIR(node_id, new_id);
 23 |   int old_mapping = _HASH_PAIR(node_id, old_id);
 24 |   int saved_id = mapping[node_id];
 25 | 
 26 |   mapping[node_id] = new_id;
 27 |   int gain = 0;
 28 |   WeightDictType::const_iterator entry = weight_dict.find(new_mapping);
 29 |   if (entry != weight_dict.end()) {
 30 |     for (std::unordered_map<int, int>::const_iterator key = entry->second.begin();
 31 |         key != entry->second.end();
 32 |         key ++) {
 33 |       if (key->first == -1) {
 34 |         gain += key->second;
 35 |       } else if (mapping[_GET_0(key->first)] == _GET_1(key->first)) {
 36 |         gain += key->second;
 37 |       }
 38 |     }
 39 |   }
 40 | 
 41 |   mapping[node_id] = saved_id;
 42 |   entry = weight_dict.find(old_mapping);
 43 |   if (entry != weight_dict.end()) {
 44 |     for (std::unordered_map<int, int>::const_iterator key = entry->second.begin();
 45 |         key != entry->second.end();
 46 |         key ++) {
 47 |       if (key->first == -1) {
 48 |         gain -= key->second;
 49 |       } else if (mapping[_GET_0(key->first)] == _GET_1(key->first)) {
 50 |         gain -= key->second;
 51 |       }
 52 |     }
 53 |   }
 54 |   return gain;
 55 | }
 56 | 
 57 | int swap_gain(MappingType & mapping,
 58 |     int node_id1,
 59 |     int mapping_id1,
 60 |     int node_id2,
 61 |     int mapping_id2,
 62 |     WeightDictType & weight_dict,
 63 |     int match_num) {
 64 |   int saved_id1 = mapping[node_id1];
 65 |   int saved_id2 = mapping[node_id2];
 66 |   int gain = 0;
 67 | 
 68 |   int new_mapping1 = _HASH_PAIR(node_id1, mapping_id2);
 69 |   int new_mapping2 = _HASH_PAIR(node_id2, mapping_id1);
 70 |   int old_mapping1 = _HASH_PAIR(node_id1, mapping_id1);
 71 |   int old_mapping2 = _HASH_PAIR(node_id2, mapping_id2);
 72 | 
 73 |   if (node_id1 > node_id2) {
 74 |     std::swap(new_mapping1, new_mapping2);
 75 |     std::swap(old_mapping1, old_mapping2);
 76 |   }
 77 | 
 78 |   mapping[node_id1] = mapping_id2;
 79 |   mapping[node_id2] = mapping_id1;
 80 | 
 81 |   WeightDictType::const_iterator entry = weight_dict.find(new_mapping1);
 82 |   if (entry != weight_dict.end()) {
 83 |     for (std::unordered_map<int, int>::const_iterator key = entry->second.begin();
 84 |         key != entry->second.end();
 85 |         key ++) {
 86 |       if (key->first == -1) {
 87 |         gain += key->second;
 88 |       } else if (mapping[_GET_0(key->first)] == _GET_1(key->first)) {
 89 |         gain += key->second;
 90 |       }
 91 |     }
 92 |   }
 93 | 
 94 |   entry = weight_dict.find(new_mapping2);
 95 |   if (entry != weight_dict.end()) {
 96 |     for (std::unordered_map<int, int>::const_iterator key = entry->second.begin();
 97 |         key != entry->second.end();
 98 |         key ++) {
 99 |       if (key->first == -1) {
100 |         gain += key->second;
101 |         continue;
102 |       }
103 |       int first = _GET_0(key->first);
104 |       if (first != node_id1 && mapping[first] == _GET_1(key->first)) {
105 |         gain += key->second;
106 |       }
107 |     }
108 |   }
109 | 
110 |   mapping[node_id1] = saved_id1;
111 |   mapping[node_id2] = saved_id2;
112 | 
113 |   entry = weight_dict.find(old_mapping1);
114 |   if (entry != weight_dict.end()) {
115 |     for (std::unordered_map<int, int>::const_iterator key = entry->second.begin();
116 |         key != entry->second.end();
117 |         key ++) {
118 |       if (key->first == -1) {
119 |         gain -= key->second;
120 |       } else if (mapping[_GET_0(key->first)] == _GET_1(key->first)) {
121 |         gain -= key->second;
122 |       }
123 |     }
124 |   }
125 | 
126 |   entry = weight_dict.find(old_mapping2);
127 |   if (entry != weight_dict.end()) {
128 |     for (std::unordered_map<int, int>::const_iterator key = entry->second.begin();
129 |         key != entry->second.end();
130 |         key ++) {
131 |       if (key->first == -1) {
132 |         gain -= key->second;
133 |         continue;
134 |       }
135 |       int first = _GET_0(key->first);
136 |       if (first != node_id1 && mapping[first] == _GET_1(key->first)) {
137 |         gain -= key->second;
138 |       }
139 |     }
140 |   }
141 | 
142 |   return gain;
143 | }
144 | 


--------------------------------------------------------------------------------
/amr_aligner/smatch/_gain.h:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <vector>
 3 | #include <unordered_map>
 4 | 
 5 | #define _HASH_PAIR(x, y) (((x) << 14) | (y))
 6 | #define _GET_0(x) ((x) >> 14)
 7 | #define _GET_1(x) ((x) & ((1 << 14) - 1))
 8 | 
 9 | typedef std::unordered_map<int, std::unordered_map<int, int> > WeightDictType;
10 | typedef std::vector<int> MappingType;
11 | 
12 | int _hash_pair(int x, int y);
13 | 
14 | int _get_0(int x);
15 | 
16 | int _get_1(int x);
17 | 
18 | int move_gain(MappingType & mapping,
19 |     int node_id,
20 |     int old_id,
21 |     int new_id,
22 |     WeightDictType & weight_dict,
23 |     int match_num);
24 | 
25 | int swap_gain(MappingType & mapping,
26 |     int node_id1,
27 |     int mapping_id1,
28 |     int node_id2,
29 |     int mapping_id2,
30 |     WeightDictType & weight_dict,
31 |     int match_num);
32 | 
33 | 


--------------------------------------------------------------------------------
/amr_aligner/smatch/api.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | from amr import AMR
 4 | try:
 5 |     from _smatch import get_best_match, compute_f, clear_match_triple_dict
 6 | except:
 7 |     import sys
 8 |     print('WARN: use slow version of smatch api.', file=sys.stderr)
 9 |     from smatch import get_best_match, compute_f, clear_match_triple_dict
10 | 
11 | 
12 | def _smatch(cur_amr1, cur_amr2, n_iter):
13 |     clear_match_triple_dict()
14 | 
15 |     amr1 = AMR.parse_AMR_line(cur_amr1)
16 |     amr2 = AMR.parse_AMR_line(cur_amr2)
17 |     prefix1 = "a"
18 |     prefix2 = "b"
19 | 
20 |     amr1.rename_node(prefix1)
21 |     amr2.rename_node(prefix2)
22 |     instance1, attributes1, relation1 = amr1.get_triples()
23 |     instance2, attributes2, relation2 = amr2.get_triples()
24 | 
25 |     best_mapping, best_match_num = get_best_match(instance1, attributes1, relation1,
26 |                                                   instance2, attributes2, relation2,
27 |                                                   prefix1, prefix2)
28 | 
29 |     test_triple_num = len(instance1) + len(attributes1) + len(relation1)
30 |     gold_triple_num = len(instance2) + len(attributes2) + len(relation2)
31 |     return best_match_num, test_triple_num, gold_triple_num
32 | 
33 | 
34 | def smatch(cur_amr1, cur_amr2, n_iter=5):
35 |     best_match_num, test_triple_num, gold_triple_num = _smatch(cur_amr1, cur_amr2, n_iter)
36 |     precision, recall, best_f_score = compute_f(best_match_num, test_triple_num, gold_triple_num)
37 |     return best_f_score
38 | 
39 | 
40 | class SmatchScorer(object):
41 |     def __init__(self, n_iter=5):
42 |         self.total_match_num = 0
43 |         self.total_test_num = 0
44 |         self.total_gold_num = 0
45 |         self.last_match_num = 0
46 |         self.last_test_num = 0
47 |         self.last_gold_num = 0
48 |         self.n_iter = n_iter
49 | 
50 |     def update(self, cur_amr1, cur_amr2):
51 |         best_match_num, test_triple_num, gold_triple_num = _smatch(cur_amr1, cur_amr2, self.n_iter)
52 |         self.last_match_num = best_match_num
53 |         self.last_test_num = test_triple_num
54 |         self.last_gold_num = gold_triple_num
55 | 
56 |         self.total_match_num += best_match_num
57 |         self.total_test_num += test_triple_num
58 |         self.total_gold_num += gold_triple_num
59 | 
60 |     def f_score(self):
61 |         return compute_f(self.total_match_num, self.total_test_num, self.total_gold_num)[2]
62 | 
63 |     def last_f_score(self):
64 |         return compute_f(self.last_match_num, self.last_test_num, self.last_gold_num)[2]
65 | 
66 |     def reset(self):
67 |         self.total_match_num = 0
68 |         self.total_test_num = 0
69 |         self.total_gold_num = 0
70 | 


--------------------------------------------------------------------------------
/amr_aligner/smatch/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from distutils.core import setup, Extension
3 | from Cython.Build import cythonize
4 | 
5 | setup(ext_modules=cythonize(Extension("_smatch", sources=["_smatch.pyx", "_gain.cc"], language="c++")))
6 | 


--------------------------------------------------------------------------------
/amr_aligner/system/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/amr_aligner/system/__init__.py


--------------------------------------------------------------------------------
/amr_aligner/system/eager/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/amr_aligner/system/eager/__init__.py


--------------------------------------------------------------------------------
/amr_aligner/system/edge.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import unicode_literals
  3 | from system.node import TokenNode, AttributeNode
  4 | 
  5 | 
  6 | class Edge(object):
  7 |     def __init__(self, source_node, relation, target_node):
  8 |         self.source_node = source_node
  9 |         self.relation = relation
 10 |         self.target_node = target_node
 11 | 
 12 | 
 13 | class EdgeSet(set):
 14 |     def __init__(self, top):
 15 |         super(EdgeSet, self).__init__()
 16 |         self.top = top
 17 | 
 18 |     def _traverse_print(self, root, variables, shown, in_const_edge):
 19 |         children = []
 20 |         for edge in self.__iter__():
 21 |             if edge.source_node == root:
 22 |                 children.append((edge.relation, edge.target_node))
 23 |         children.sort(key=lambda x: (x[0], x[1].name))
 24 | 
 25 |         if len(children) == 0:
 26 |             if not shown[root]:
 27 |                 shown[root] = True
 28 |                 if isinstance(root, TokenNode):
 29 |                     ret = '"{0}"'.format(self._normalize_entity_token(root.name))
 30 |                 elif isinstance(root, AttributeNode) or (in_const_edge and self._is_attribute(root.name)):
 31 |                     ret = root.name
 32 |                 else:
 33 |                     ret = '({0} / {1})'.format(variables[root], root.name)
 34 |             else:
 35 |                 ret = variables[root]
 36 |         else:
 37 |             if shown[root]:
 38 |                 ret = '{0}'.format(variables[root])
 39 |             else:
 40 |                 shown[root] = True
 41 |                 unnamed_concept = in_const_edge and self._is_attribute(root.name)
 42 |                 if unnamed_concept:
 43 |                     ret = root.name
 44 |                 else:
 45 |                     ret = '({0} / {1}'.format(variables[root], root.name)
 46 |                 for relation, child in children:
 47 |                     ret += ' :{0} {1}'.format(relation, self._traverse_print(child, variables, shown,
 48 |                                                                              self._is_const_relation(relation)))
 49 |                 if not unnamed_concept:
 50 |                     ret += ')'
 51 |         return ret
 52 | 
 53 |     def _get_size(self, root, visited, covered=set()):
 54 |         if root in visited or root in covered:
 55 |             return 1
 56 |         visited.add(root)
 57 |         tree_size = 0
 58 |         children = []
 59 |         for edge in self.__iter__():
 60 |             if edge.source_node == root:
 61 |                 children.append(edge.target_node)
 62 |         for child in children:
 63 |             tree_size += self._get_size(child, visited, covered) + 1
 64 |         return tree_size + 1
 65 | 
 66 |     def _print(self):
 67 |         roots = self._get_roots()
 68 |         variables = self._get_variables
 69 |         shown = {node: False for node in variables}
 70 |         if len(roots) == 1:
 71 |             return self._traverse_print(roots[0], variables, shown, False)
 72 |         elif len(roots) > 1:
 73 |             # return self._traverse_print(self.top, variables, shown)
 74 |             new_root = roots[0]
 75 |             for i, root in enumerate(roots[1:]):
 76 |                 self.add(Edge(new_root, 'TOP{0}'.format(i), root))
 77 |             return self._traverse_print(new_root, variables, shown, False)
 78 |         else:
 79 |             return '(a / amr-empty)'
 80 | 
 81 |     def __str__(self):
 82 |         return self._print()
 83 | 
 84 |     def _get_roots(self):
 85 |         covered = set()
 86 |         for node in self._get_variables:
 87 |             if node.name == '_ROOT_':
 88 |                 covered.add(node)
 89 | 
 90 |         ret = []
 91 |         for edge in self.__iter__():
 92 |             if edge.source_node == self.top:
 93 |                 ret.append(edge.target_node)
 94 |                 self._get_size(edge.target_node, covered)
 95 | 
 96 |         while True:
 97 |             max_sz = 0
 98 |             max_node = None
 99 |             for node in self._get_variables:
100 |                 if node not in covered:
101 |                     visited = set()
102 |                     sz = self._get_size(node, visited, covered)
103 |                     if sz > max_sz:
104 |                         max_node = node
105 |                         max_sz = sz
106 |             if max_node is None:
107 |                 break
108 |             ret.append(max_node)
109 |             visited = set()
110 |             sz = self._get_size(max_node, covered)
111 | 
112 |         assert len(covered) == len(self._get_variables)
113 |         return ret
114 | 
115 |     @property
116 |     def _get_variables(self):
117 |         nodes = set()
118 |         for edge in self.__iter__():
119 |             nodes.add(edge.source_node)
120 |             nodes.add(edge.target_node)
121 |         nodes = list(nodes)
122 |         nodes.sort(key=lambda x: x.name)
123 | 
124 |         variables = {}
125 |         variable_name_counts = {}
126 |         for node in nodes:
127 |             shortname = self._shortname(node.name)
128 |             if shortname not in variable_name_counts:
129 |                 variable_name_counts[shortname] = 0
130 |             variable_name_counts[shortname] += 1
131 |             count = variable_name_counts[shortname]
132 |             variables[node] = shortname if count == 1 else (shortname + str(count))
133 |         return variables
134 | 
135 |     @staticmethod
136 |     def _normalize_entity_token(token):
137 |         if token == '"':
138 |             return '_QUOTE_'
139 |         return token
140 | 
141 |     @staticmethod
142 |     def _shortname(token):
143 |         if token[0] == '"':
144 |             return token[1] if len(token) > 1 else 'q'
145 |         return token[0]
146 | 
147 |     @staticmethod
148 |     def _is_attribute(name):
149 |         if name in ('-', 'imperative'):  # polarity
150 |             return True
151 |         return name.isdigit()
152 | 
153 |     @staticmethod
154 |     def _is_const_relation(relation):
155 |         if relation.startswith('op') or \
156 |             relation in ('month', 'decade', 'polarity', 'day', 'quarter', 'year', 'era', 'century',
157 |                          'timezone', 'polite', 'mode', 'value', 'quant', 'unit', 'range', 'scale'):
158 |             return True
159 |         return False
160 | 


--------------------------------------------------------------------------------
/amr_aligner/system/misc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | from __future__ import unicode_literals
 4 | import sys
 5 | from datetime import datetime
 6 | _DATE_FORMATS = {
 7 |     '%y0000': (True, False, False),
 8 |     '%y%m00': (True, True, False),
 9 |     '%y%m%d': (True, True, True),
10 |     '%Y0000': (True, False, False),
11 |     '%Y%m00': (True, True, False),
12 |     '%d %B %Y': (True, True, True),
13 |     '%d %B': (True, True, False),
14 |     '%d %Y': (True, False, True),
15 |     '%Y%m%d': (True, True, True),
16 |     '%Y-%m-%d': (True, True, True),
17 |     '%m/%d': (False, True, True),
18 |     '%m/%d/%Y': (True, True, True),
19 |     '%m - %d - %Y': (True, True, True),
20 |     '%B %Y': (True, True, False),
21 |     '%B , %Y': (True, True, False),
22 |     '%B %d %Y': (True, True, True),
23 |     '%B %d , %Y': (True, True, True),
24 |     '%B %d': (False, True, True),
25 |     '%B %dst': (False, True, True),
26 |     '%B %dnd': (False, True, True),
27 |     '%B %drd': (False, True, True),
28 |     '%B %dth': (False, True, True),
29 |     '%B': (False, True, False),
30 |     '%Y': (True, False, False),
31 |     '%y': (True, False, False),
32 | }
33 | 
34 | 
35 | def parse_date(expression):
36 |     results = []
37 |     for format_ in _DATE_FORMATS:
38 |         try:
39 |             result = datetime.strptime(expression, format_)
40 |             results.append((result, _DATE_FORMATS[format_]))
41 |         except:
42 |             continue
43 |     results = list(filter(lambda result: 1900 <= result[0].year < 2100, results))
44 |     if len(results) > 1:
45 |         return results[0]
46 |     elif len(results) == 1:
47 |         return results[0]
48 |     else:
49 |         return None, (False, False, False)
50 | 
51 | 
52 | def parse_all_dates(expression):
53 |     results = []
54 |     for format_ in _DATE_FORMATS:
55 |         try:
56 |             result = datetime.strptime(expression, format_)
57 |             results.append((result, _DATE_FORMATS[format_]))
58 |         except:
59 |             continue
60 |     results = list(filter(lambda r: 1900 <= r[0].year < 2100, results))
61 |     return results
62 | 
63 | 
64 | def test():
65 |     for line in open(sys.argv[1], 'r'):
66 |         expression, fields = line.strip().split('|||')
67 |         expression = expression.strip()
68 |         result = parse_date(expression)
69 |         slots = result[1]
70 |         for field in fields:
71 |             if field == 'year':
72 |                 assert slots[0]
73 |             if field == 'month':
74 |                 assert slots[1]
75 |             if field == 'day':
76 |                 assert slots[2]
77 |         print('{0} ||| {1} ||| {2}'.format(expression, slots, fields), file=sys.stderr)
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     test()
82 | 


--------------------------------------------------------------------------------
/amr_aligner/system/node.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import unicode_literals
 3 | 
 4 | 
 5 | class Node(object):
 6 |     def __init__(self, name, type_name, coverage):
 7 |         self.name = name
 8 |         self.type_name = type_name
 9 |         self.coverage = coverage
10 | 
11 |     def get_coverage(self):
12 |         return self.coverage
13 | 
14 |     def get_name(self):
15 |         return self.name
16 | 
17 |     def get_type(self):
18 |         return self.type_name
19 | 
20 | 
21 | class TokenNode(Node):
22 |     def __init__(self, name, coverage):
23 |         super(TokenNode, self).__init__(name, 'token', coverage)
24 | 
25 |     def __str__(self):
26 |         return '"{0}"'.format(self.name)
27 | 
28 | 
29 | class EntityNode(Node):
30 |     def __init__(self, node1, node2):
31 |         self.nodes = [node1, node2]
32 |         coverage = node1.get_coverage() + node2.get_coverage()
33 |         name = '{0}'.format('_'.join([node1.name, node2.name]))
34 |         super(EntityNode, self).__init__(name, 'entity', coverage)
35 | 
36 |     def add(self, node):
37 |         self.nodes.append(node)
38 |         self.coverage = self.coverage + node.get_coverage()
39 |         self.name = self.name + '_{0}'.format(node.name)
40 | 
41 |     def __str__(self):
42 |         return '"{0}"'.format(self.name)
43 | 
44 | 
45 | class ConceptNode(Node):
46 |     def __init__(self, name, coverage, level=None):
47 |         super(ConceptNode, self).__init__(name, 'concept', coverage)
48 |         self.level = level
49 | 
50 |     def get_level(self):
51 |         return self.level
52 |         
53 |     def __str__(self):
54 |         return self.name
55 | 
56 | 
57 | class AttributeNode(Node):
58 |     def __init__(self, value):
59 |         super(AttributeNode, self).__init__(value, 'attribute', None)
60 | 
61 |     def __str__(self):
62 |         return '={0}'.format(self.name)
63 | 
64 | 
65 | def coverage_match_alignment(coverage, align):
66 |     assert isinstance(coverage, list)
67 |     if len(coverage) == 1:
68 |         return align[0] == coverage[0] and align[1] == align[0] + 1
69 |     else:
70 |         return align[0] == coverage[0] and align[1] == coverage[-1] + 1
71 | 


--------------------------------------------------------------------------------
/amr_parser/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | project(amr_parser)
  2 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
  3 | 
  4 | set(CMAKE_MACOSX_RPATH 1)
  5 | set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
  6 | set (EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
  7 | 
  8 | # DYNET uses Eigen which exploits modern CPU architectures. To get the
  9 | # best possible performance, the following are recommended:
 10 | #   1. use very recent versions of gcc or Clang to build
 11 | #   2. use very recent versions of Eigen (ideally the dev version)
 12 | #   3. try compiler options like -march=native or other architecture
 13 | #      flags (the compiler does not always make the best configuration
 14 | #      decisions without help)
 15 | 
 16 | function(find_mkl)
 17 |   set(MKL_ARCH intel64)
 18 |   find_path(MKL_INCLUDE_DIR mkl.h
 19 |             PATHS ${MKL_ROOT} ${MKL_ROOT}/include)
 20 |   find_library(MKL_CORE_LIB NAMES mkl_intel_lp64 mkl_intel_thread mkl_core
 21 |                PATHS ${MKL_ROOT} ${MKL_ROOT}/lib/${MKL_ARCH}
 22 |                DOC "MKL core library path")
 23 | 
 24 |   find_library(MKL_COMPILER_LIB NAMES iomp5 libiomp5md
 25 |                PATHS ${MKL_ROOT} ${MKL_ROOT}/../compiler/lib/${MKL_ARCH}              #Windows
 26 |                      ${MKL_ROOT}/../compilers_and_libraries/linux/lib/${MKL_ARCH}_lin #Linux
 27 |                DOC "MKL compiler lib (for threaded MKL)")
 28 | 
 29 |   if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_COMPILER_LIB)
 30 |     get_filename_component(MKL_CORE_LIB_DIR ${MKL_CORE_LIB} DIRECTORY)
 31 |     get_filename_component(MKL_COMPILER_LIB_DIR ${MKL_COMPILER_LIB} DIRECTORY)
 32 |     get_filename_component(MKL_COMPILER_LIB_FILE ${MKL_COMPILER_LIB} NAME)
 33 |     message(STATUS "Found MKL\n   * include: ${MKL_INCLUDE_DIR},\n   * core library dir: ${MKL_CORE_LIB_DIR},\n   * compiler library: ${MKL_COMPILER_LIB}")
 34 | 
 35 |     # Due to a conflict with /MT and /MD, MSVC needs mkl_intel_lp64 linked last, or we can change individual
 36 |     # projects to use /MT (mkl_intel_lp64 linked with /MT, default MSVC projects use /MD), or we can instead
 37 |     # link to the DLL versions. For now I'm opting for this solution which seems to work with projects still
 38 |     # at their default /MD. Linux build requires the mkl_intel_lp64 to be linked first. So...:
 39 |     if(MSVC)
 40 |       set(LIBS ${LIBS} mkl_intel_thread mkl_core mkl_intel_lp64 ${MKL_COMPILER_LIB_FILE} PARENT_SCOPE)
 41 |     else()
 42 |       set(LIBS ${LIBS} mkl_intel_lp64 mkl_intel_thread mkl_core ${MKL_COMPILER_LIB_FILE} PARENT_SCOPE)
 43 |     endif()
 44 |     include_directories(${MKL_INCLUDE_DIR})
 45 |     link_directories(${MKL_CORE_LIB_DIR} ${MKL_COMPILER_LIB_DIR})
 46 |     set(MKL_LINK_DIRS ${MKL_CORE_LIB_DIR} ${MKL_COMPILER_LIB_DIR} PARENT_SCOPE) # Keeping this for python build
 47 |   else()
 48 |     message(FATAL_ERROR "Failed to find MKL in path: ${MKL_ROOT} (Did you set MKL_ROOT properly?)")
 49 |   endif()
 50 | endfunction()
 51 | 
 52 | ######## Cross-compiler, cross-platform options
 53 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_FAST_MATH")
 54 | if (MKL OR MKL_ROOT)
 55 |   find_mkl()  # sets include/lib directories and sets ${LIBS} needed for linking
 56 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_USE_MKL_ALL")
 57 | endif()
 58 | 
 59 | 
 60 | ######## Platform-specific options
 61 | if(WIN32)
 62 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX")   # Disable min/max macros in windef.h
 63 | endif()
 64 | 
 65 | ######## Compiler-specific options
 66 | if(MSVC)
 67 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W1 -DEIGEN_HAS_C99_MATH /MP")   # -Wall produces 20k warnings
 68 | else()
 69 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops -Wall -Wno-missing-braces -std=c++11 -Ofast -g -march=native")
 70 | endif()
 71 | 
 72 | enable_testing()
 73 | 
 74 | function(find_cudnn)
 75 |   set(CUDNN_ROOT "" CACHE PATH "CUDNN root path")
 76 |   find_path(CUDNN_INCLUDE_DIRS cudnn.h
 77 |     PATHS ${CUDNN_ROOT}
 78 |           ${CUDNN_ROOT}/include
 79 |     DOC "CUDNN include path")
 80 |   find_library(CUDNN_LIBRARIES NAMES libcudnn.so
 81 |     PATHS ${CUDNN_ROOT}
 82 |           ${CUDNN_ROOT}/lib
 83 |           ${CUDNN_ROOT}/lib64
 84 |     DOC "CUDNN library path")
 85 |   if(CUDNN_INCLUDE_DIRS AND CUDNN_LIBRARIES)
 86 |     set(CUDNN_FOUND TRUE PARENT_SCOPE)
 87 |     message(STATUS "Found CUDNN (include: ${CUDNN_INCLUDE_DIRS}, library: ${CUDNN_LIBRARIES})")
 88 |     mark_as_advanced(CUDNN_INCLUDE_DIRS CUDNN_LIBRARIES)
 89 |   else()
 90 |     MESSAGE(FATAL_ERROR "Failed to find CUDNN in path: ${CUDNN_ROOT} (Did you set CUDNN_ROOT properly?)")
 91 |   endif()
 92 | endfunction()
 93 | 
 94 | # look for Boost
 95 | if(DEFINED ENV{BOOST_ROOT})
 96 |   set(Boost_NO_SYSTEM_PATHS ON)
 97 |   get_filename_component(Boost_INCLUDE_DIR "${Boost_INCLUDE_DIR}" REALPATH BASE_DIR "${CMAKE_BINARY_DIR}")
 98 | endif()
 99 | set(Boost_REALPATH ON)
100 | message("-- Boost dir is " ${Boost_INCLUDE_DIR})
101 | if (MSVC)
102 |     find_package(Boost COMPONENTS program_options regex serialization REQUIRED)
103 | else()
104 |     add_definitions (-DBOOST_LOG_DYN_LINK)
105 |     find_package(Boost COMPONENTS program_options regex serialization log_setup log thread system REQUIRED)
106 | endif()
107 | include_directories(${Boost_INCLUDE_DIR})
108 | if(MSVC)
109 |   # Boost does auto-linking when using a compiler like Microsoft Visual C++, we just need to help it find the libraries
110 |   set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LIBPATH:${Boost_LIBRARY_DIRS}")
111 |   set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /LIBPATH:${Boost_LIBRARY_DIRS}")
112 | else()
113 |   set(LIBS ${LIBS} ${Boost_LIBRARIES})
114 | endif()
115 | # trouble shooting:
116 | # if boost library cannot be found, in addition to install boost library
117 | # check if environment variables are set
118 | #
119 | # to set boost root and its library root in environment variable, use
120 | # for example
121 | # echo "export BOOST_LIBRARYDIR=/usr/local/lib" >> ~/.bashrc
122 | # echo "export BOOST_ROOT=/cygdrive/d/tools/boost_1_58_0/boost_1_58_0" >> ~/.bashrc
123 | # then run source ~/.bashrc to have those environment variable effective immediately
124 | if (NOT DEFINED DYNET_DEBUG_LEVEL)
125 |   set(DYNET_DEBUG_LEVEL 1)
126 | endif()
127 | add_definitions(-DDYNET_DEBUG_LEVEL=${DYNET_DEBUG_LEVEL})
128 | 
129 | if(BACKEND)
130 |   message("-- BACKEND: ${BACKEND}")
131 | else()
132 |   message("-- BACKEND not specified, defaulting to eigen.")
133 |   set(BACKEND "eigen")
134 | endif()
135 | 
136 | if(BACKEND MATCHES "^eigen$")
137 |   set(WITH_EIGEN_BACKEND 1)
138 | elseif(BACKEND MATCHES "^cuda$")
139 |   set(WITH_CUDA_BACKEND 1)
140 | else()
141 |   message(SEND_ERROR "BACKEND must be eigen or cuda")
142 | endif()
143 | 
144 | if (WITH_CUDA_BACKEND)
145 |   find_package(CUDA REQUIRED)
146 |   set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_ROOT})
147 |   include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
148 |   #list(APPEND CUDA_LIBRARIES /usr/lib64/libpthread.so)
149 |   MESSAGE("CUDA_LIBRARIES: ${CUDA_LIBRARIES}")
150 |   list(REMOVE_ITEM CUDA_LIBRARIES -lpthread)
151 |   set(LIBS ${LIBS} ${CUDA_LIBRARIES})
152 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_HAS_CUDA_FP16 -DEIGEN_USE_GPU")
153 |   #find_cudnn()
154 |   #include_directories(SYSTEM ${CUDNN_INCLUDE_DIRS})
155 | endif()
156 | 
157 | # look for Eigen
158 | find_package(Eigen3 REQUIRED)
159 | get_filename_component(EIGEN3_INCLUDE_DIR "${EIGEN3_INCLUDE_DIR}" REALPATH BASE_DIR "${CMAKE_BINARY_DIR}")
160 | message("-- Eigen dir is " ${EIGEN3_INCLUDE_DIR})
161 | include_directories(${EIGEN3_INCLUDE_DIR})
162 | 
163 | FIND_PACKAGE(Threads REQUIRED)
164 | set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT})
165 | 
166 | #configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h)
167 | #set(source_directory ${PROJECT_SOURCE_DIR}/src)
168 | 
169 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/dynet)
170 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/dynet_layer)
171 | 
172 | add_subdirectory(dynet/dynet)
173 | set (DYNET_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}}/dynet)
174 | add_subdirectory(dynet_layer/dynet_layer)
175 | add_subdirectory(src)
176 | 


--------------------------------------------------------------------------------
/amr_parser/cmake/FindEigen3.cmake:
--------------------------------------------------------------------------------
 1 | # - Try to find Eigen3 lib
 2 | #
 3 | # This module supports requiring a minimum version, e.g. you can do
 4 | #   find_package(Eigen3 3.1.2)
 5 | # to require version 3.1.2 or newer of Eigen3.
 6 | #
 7 | # Once done this will define
 8 | #
 9 | #  EIGEN3_FOUND - system has eigen lib with correct version
10 | #  EIGEN3_INCLUDE_DIR - the eigen include directory
11 | #  EIGEN3_VERSION - eigen version
12 | 
13 | # Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
14 | # Copyright (c) 2008, 2009 Gael Guennebaud, <g.gael@free.fr>
15 | # Copyright (c) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
16 | # Redistribution and use is allowed according to the terms of the 2-clause BSD license.
17 | 
18 | if(NOT Eigen3_FIND_VERSION)
19 |   if(NOT Eigen3_FIND_VERSION_MAJOR)
20 |     set(Eigen3_FIND_VERSION_MAJOR 2)
21 |   endif(NOT Eigen3_FIND_VERSION_MAJOR)
22 |   if(NOT Eigen3_FIND_VERSION_MINOR)
23 |     set(Eigen3_FIND_VERSION_MINOR 91)
24 |   endif(NOT Eigen3_FIND_VERSION_MINOR)
25 |   if(NOT Eigen3_FIND_VERSION_PATCH)
26 |     set(Eigen3_FIND_VERSION_PATCH 0)
27 |   endif(NOT Eigen3_FIND_VERSION_PATCH)
28 | 
29 |   set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}")
30 | endif(NOT Eigen3_FIND_VERSION)
31 | 
32 | macro(_eigen3_check_version)
33 |   file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header)
34 | 
35 |   string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}")
36 |   set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}")
37 |   string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}")
38 |   set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}")
39 |   string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}")
40 |   set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}")
41 | 
42 |   set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION})
43 |   if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
44 |     set(EIGEN3_VERSION_OK FALSE)
45 |   else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
46 |     set(EIGEN3_VERSION_OK TRUE)
47 |   endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION})
48 | 
49 |   if(NOT EIGEN3_VERSION_OK)
50 | 
51 |     message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, "
52 |                    "but at least version ${Eigen3_FIND_VERSION} is required")
53 |   endif(NOT EIGEN3_VERSION_OK)
54 | endmacro(_eigen3_check_version)
55 | 
56 | if (EIGEN3_INCLUDE_DIR)
57 | 
58 |   # in cache already
59 |   _eigen3_check_version()
60 |   set(EIGEN3_FOUND ${EIGEN3_VERSION_OK})
61 | 
62 | else (EIGEN3_INCLUDE_DIR)
63 | 
64 |   find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library
65 |       PATHS
66 |       ${CMAKE_INSTALL_PREFIX}/include
67 |       ${KDE4_INCLUDE_DIR}
68 |       PATH_SUFFIXES eigen3 eigen
69 |     )
70 | 
71 |   if(EIGEN3_INCLUDE_DIR)
72 |     _eigen3_check_version()
73 |   endif(EIGEN3_INCLUDE_DIR)
74 | 
75 |   include(FindPackageHandleStandardArgs)
76 |   find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK)
77 | 
78 |   mark_as_advanced(EIGEN3_INCLUDE_DIR)
79 | 
80 | endif(EIGEN3_INCLUDE_DIR)
81 | 
82 | 


--------------------------------------------------------------------------------
/amr_parser/scripts/eval_eager.sh:
--------------------------------------------------------------------------------
 1 | #
 2 | # Evaluation for eager system
 3 | #
 4 | # It will convert actions into AMR graph and call smatch
 5 | # to evaluate it score
 6 | # 
 7 | # Usage:
 8 | #
 9 | #   bash eval_eager.sh predict-action gold-AMR
10 | #
11 | #!/bin/bash
12 | BASEDIR=$(dirname "$0")/../../amr_aligner
13 | python ${BASEDIR}/eager_actions_evaluator.py -pred_actions $1 -gold $2
14 | 


--------------------------------------------------------------------------------
/amr_parser/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_library (common
 2 |     corpus.cc
 3 |     corpus.h
 4 |     ds.cc
 5 |     ds.h
 6 |     logging.cc
 7 |     logging.h
 8 |     math_utils.cc
 9 |     math_utils.h
10 |     sys_utils.cc
11 |     sys_utils.h
12 |     trainer_utils.cc
13 |     trainer_utils.h
14 |     lstm.h
15 |     lstm.cc)
16 | 
17 | target_link_libraries (common ${Boost_LIBRARIES})
18 | add_subdirectory (left_to_right)
19 | 


--------------------------------------------------------------------------------
/amr_parser/src/corpus.cc:
--------------------------------------------------------------------------------
  1 | #include "corpus.h"
  2 | #include <iostream>
  3 | #include <fstream>
  4 | #include <sstream>
  5 | #include <map>
  6 | #include "logging.h"
  7 | #include <boost/assert.hpp>
  8 | #include <boost/lexical_cast.hpp>
  9 | #include <boost/algorithm/string.hpp>
 10 | 
 11 | const char* Corpus::UNK  = "_UNK_";
 12 | const char* Corpus::SPAN = "_SPAN_";
 13 | const char* Corpus::BAD0 = "_BAD0_";
 14 | const char* Corpus::ROOT = "_ROOT_";
 15 | 
 16 | Corpus::Corpus() : n_train(0), n_devel(0) {
 17 | 
 18 | }
 19 | 
 20 | void Corpus::load_training_data(const std::string& filename) {
 21 |   _INFO << "Corpus:: reading training data from: " << filename;
 22 | 
 23 |   word_map.insert(Corpus::ROOT);
 24 |   word_map.insert(Corpus::UNK);
 25 |   // word_map.insert(Corpus::SPAN);
 26 |   pos_map.insert(Corpus::ROOT);
 27 |   pos_map.insert(Corpus::UNK);
 28 |   char_map.insert(Corpus::UNK);
 29 |   action_map.insert("CONFIRM");
 30 |   action_map.insert(Corpus::UNK);
 31 | 
 32 |   confirm_map[word_map.get(Corpus::UNK)] = Alphabet();
 33 |   confirm_map[word_map.get(Corpus::UNK)].insert(Corpus::UNK);
 34 | 
 35 |   std::ifstream in(filename);
 36 |   BOOST_ASSERT_MSG(in, "Corpus:: failed to open the training file.");
 37 | 
 38 |   n_train = 0;
 39 |   std::string data;
 40 |   std::string line;
 41 |   while (std::getline(in, line)) {
 42 |     boost::algorithm::trim(line);
 43 |     if (line.empty()) {
 44 |       parse_data(data, training_inputs[n_train], training_actions[n_train], true);
 45 |       data = "";
 46 |       ++n_train;
 47 |     } else {
 48 |       data += (line + "\n");
 49 |     }
 50 |   }
 51 |   if (!data.empty()) {
 52 |     parse_data(data, training_inputs[n_train], training_actions[n_train], true);
 53 |     ++n_train;
 54 |   }
 55 |   _INFO << "Corpus:: loaded " << n_train << " training sentences.";
 56 | }
 57 | 
 58 | void Corpus::load_devel_data(const std::string& filename) {
 59 |   _INFO << "Corpus:: reading development data from: " << filename;
 60 |   BOOST_ASSERT_MSG(word_map.size() > 1,
 61 |     "Corpus:: ROOT and UNK should be inserted before loading devel data.");
 62 | 
 63 |   std::ifstream in(filename);
 64 |   BOOST_ASSERT_MSG(in, "Corpus:: failed to open the devel file.");
 65 | 
 66 |   n_devel = 0;
 67 |   std::string data;
 68 |   std::string line;
 69 |   while (std::getline(in, line)) {
 70 |     boost::algorithm::trim(line);
 71 |     if (line.empty()) {
 72 |       parse_data(data, devel_inputs[n_devel], devel_actions[n_devel], false);
 73 |       data = "";
 74 |       ++n_devel;
 75 |     } else {
 76 |       data += (line + "\n");
 77 |     }
 78 |   }
 79 |   if (!data.empty()) {
 80 |     parse_data(data, devel_inputs[n_devel], devel_actions[n_devel], false);
 81 |     ++n_devel;
 82 |   }
 83 |   _INFO << "Corpus:: loaded " << n_devel << " development sentences.";
 84 | }
 85 | 
 86 | void Corpus::load_test_data(const std::string & filename) {
 87 |   _INFO << "Corpus:: reading test data from: " << filename;
 88 |   BOOST_ASSERT_MSG(word_map.size() > 1,
 89 |                    "Corpus:: ROOT and UNK should be inserted before loading devel data.");
 90 | 
 91 |   std::ifstream in(filename);
 92 |   BOOST_ASSERT_MSG(in, "Corpus:: failed to open the test file.");
 93 | 
 94 |   n_test = 0;
 95 |   std::string data;
 96 |   std::string line;
 97 |   while (std::getline(in, line)) {
 98 |     boost::algorithm::trim(line);
 99 |     if (line.size() == 0) {
100 |       parse_data(data, test_inputs[n_test], test_actions[n_test], false);
101 |       data = "";
102 |       ++n_test;
103 |     } else {
104 |       data += (line + "\n");
105 |     }
106 |   }
107 |   if (!data.empty()) {
108 |     parse_data(data, test_inputs[n_test], test_actions[n_test], false);
109 |     ++n_test;
110 |   }
111 |   _INFO << "Corpus:: loaded " << n_test << " development sentences.";
112 | }
113 | 
114 | void Corpus::parse_data(const std::string& data,
115 |                         InputUnits& input_units,
116 |                         ActionUnits& action_units,
117 |                         bool train) {
118 |   std::stringstream S(data);
119 |   std::string line;
120 | 
121 |   input_units.clear();
122 |   action_units.clear();
123 | 
124 |   while (std::getline(S, line)) {
125 |     std::vector<std::string> tokens;
126 |     boost::algorithm::trim(line);
127 |     boost::algorithm::split(tokens, line, boost::is_any_of(" \t"), boost::token_compress_on);
128 | 
129 |     if (tokens[1] == "::tok") {
130 |       for (int i = 2; i < tokens.size(); i++) {
131 |         input_units.push_back(InputUnit());
132 |         if (train) {
133 |           unsigned wid = word_map.insert(tokens[i]);
134 |           input_units[i - 2].wid = wid;
135 |           input_units[i - 2].aux_wid = wid;
136 |           input_units[i - 2].w_str = tokens[i];
137 |           for (int j = 0; j < tokens[i].size(); ++j) {
138 |             unsigned c_id = char_map.insert(std::string(1, tokens[i][j]));
139 |             input_units[i - 2].c_id.push_back(c_id);
140 |           }
141 |         } else {
142 |           unsigned wid = (word_map.contains(tokens[i])) ? word_map.get(tokens[i]) : word_map.get(UNK);
143 |           input_units[i - 2].wid = wid;
144 |           input_units[i - 2].aux_wid = wid;
145 |           input_units[i - 2].w_str = tokens[i];
146 |           for (int j = 0; j < tokens[i].size(); ++j) {
147 |             unsigned c_id = (char_map.contains(std::string(1, tokens[i][j]))) ? char_map.get(std::string(1, tokens[i][j])) : char_map.get(UNK);
148 |             input_units[i - 2].c_id.push_back(c_id);
149 |           }
150 |         }
151 |       } 
152 |     } else if (tokens[1] == "::pos") {
153 |       for (int i = 2; i < tokens.size(); i++) {
154 |         if (train) {
155 |           unsigned pid = pos_map.insert(tokens[i]);
156 |           input_units[i - 2].pid = pid;
157 |         } else {
158 |           unsigned pid = (pos_map.contains(tokens[i])) ? pos_map.get(tokens[i]) : pos_map.get(UNK);
159 |           input_units[i - 2].pid = pid;
160 |         }
161 |       }
162 |     } else if (tokens[1] == "::action") {
163 |       std::string action = tokens[2];
164 |       for (int i = 3; i < tokens.size(); i++) {
165 |         action += "\t" + tokens[i];
166 |       }
167 |       ActionUnit action_unit = ActionUnit(action, tokens[2]);
168 |       if (tokens[2] == "CONFIRM") {
169 |         action_unit.action_name = "CONFIRM";
170 |       } else {
171 |         action_unit.action_name = action;
172 |       }
173 | 
174 |       if (train) {
175 |         std::vector<std::string> terms;
176 |         boost::algorithm::split(terms, action, boost::is_any_of(" \t"), boost::token_compress_on);
177 |         if (terms[0] == "CONFIRM") {
178 |           unsigned wid = (word_map.contains(terms[1])) ? word_map.get(terms[1]) : word_map.get(UNK);
179 |           if (wid == word_map.get(UNK)) {
180 |             action_unit.idx = 0;
181 |           } else {
182 |             if (confirm_map.find(wid) == confirm_map.end()) {
183 |               confirm_map[wid] = Alphabet();
184 |               confirm_map[wid].insert(word_map.get(wid));
185 |             }
186 |             action_unit.idx = confirm_map[wid].insert(terms[2]);
187 |           }
188 |         } else if (terms[0] == "NEWNODE") {
189 |           unsigned nid = node_map.insert(terms[1]);
190 |           action_unit.idx = nid;
191 |         } else if (terms[0] == "LEFT" || terms[0] == "RIGHT") {
192 |           unsigned rid = rel_map.insert(terms[1]);
193 |           action_unit.idx = rid;
194 |         } else if (terms[0] == "ENTITY") {
195 |           unsigned eid = entity_map.insert(terms[1]);
196 |           action_unit.idx = eid;
197 |         }
198 |         unsigned aid = action_map.insert(action_unit.action_name);
199 |         action_unit.aid = aid;
200 |       } else {
201 |         unsigned aid = (action_map.contains(action_unit.action_name)) ? action_map.get(action_unit.action_name) : action_map.get(UNK);
202 |         action_unit.aid = aid;
203 |       }
204 |       action_units.push_back(action_unit);
205 |     }
206 |   }
207 |   InputUnit input_unit;
208 |   input_unit.wid = word_map.get(ROOT);
209 |   input_unit.pid = pos_map.get(ROOT);
210 |   input_unit.aux_wid = word_map.get(ROOT);
211 |   input_unit.w_str = ROOT;
212 |   input_units.push_back(input_unit);
213 | }
214 | 
215 | unsigned Corpus::get_or_add_word(const std::string& word) {
216 |   return word_map.insert(word);
217 | }
218 | 
219 | void Corpus::stat() {
220 |   _INFO << "Corpus:: # of words = " << word_map.size();
221 |   _INFO << "Corpus:: # of pos = " << pos_map.size();
222 | }
223 | 
224 | void Corpus::get_vocabulary_and_singletons() {
225 |   std::map<unsigned, unsigned> counter;
226 |   for (auto& payload : training_inputs) {
227 |     for (auto& item : payload.second) {
228 |       vocab.insert(item.wid);
229 |       ++counter[item.wid];
230 |     }
231 |   }
232 |   for (auto& payload : counter) {
233 |     if (payload.second == 1) { singleton.insert(payload.first); }
234 |   }
235 | }
236 | 
237 | void load_pretrained_word_embedding(const std::string& embedding_file,
238 |                                     unsigned pretrained_dim,
239 |                                     std::unordered_map<unsigned, std::vector<float> >& pretrained,
240 |                                     Corpus& corpus) {
241 |   pretrained[corpus.get_or_add_word(Corpus::BAD0)] = std::vector<float>(pretrained_dim, 0.f);
242 |   pretrained[corpus.get_or_add_word(Corpus::UNK)] = std::vector<float>(pretrained_dim, 0.f);
243 |   _INFO << "Main:: Loading from " << embedding_file << " with " << pretrained_dim << " dimensions.";
244 |   std::ifstream ifs(embedding_file);
245 |   BOOST_ASSERT_MSG(ifs, "Failed to load embedding file.");
246 |   std::string line;
247 |   // get the header in word2vec styled embedding.
248 |   std::getline(ifs, line);
249 |   std::vector<float> v(pretrained_dim, 0.);
250 |   std::string word;
251 |   while (std::getline(ifs, line)) {
252 |     std::istringstream iss(line);
253 |     iss >> word;
254 |     // actually, there should be a checking about the embedding dimension.
255 |     for (unsigned i = 0; i < pretrained_dim; ++i) { iss >> v[i]; }
256 |     unsigned id = corpus.get_or_add_word(word);
257 |     pretrained[id] = v;
258 |   }
259 | }
260 | 


--------------------------------------------------------------------------------
/amr_parser/src/corpus.h:
--------------------------------------------------------------------------------
  1 | #ifndef RLPARSER_CORPUS_H
  2 | #define RLPARSER_CORPUS_H
  3 | 
  4 | #include <unordered_map>
  5 | #include <vector>
  6 | #include <set>
  7 | #include "ds.h"
  8 | #include <boost/serialization/vector.hpp>
  9 | 
 10 | struct InputUnit {
 11 |   unsigned wid;
 12 |   unsigned aux_wid;
 13 |   unsigned pid;
 14 |   std::vector<unsigned> c_id;
 15 | 
 16 |   std::string w_str;
 17 | 
 18 |   friend class boost::serialization::access;
 19 |   template <class Archive>
 20 |   void serialize(Archive& ar, const unsigned version) {
 21 |     ar & wid;
 22 |     ar & pid;
 23 |     ar & c_id;
 24 |     ar & aux_wid;
 25 |   }
 26 | };
 27 | 
 28 | struct ActionUnit {
 29 |   std::string a_str;
 30 |   std::string action_name;
 31 |   unsigned aid;
 32 |   unsigned idx; //for confirm, newnode, la and ra op
 33 | 
 34 |   ActionUnit(std::string a_str, std::string action_name): a_str(a_str), action_name(action_name){}
 35 |   friend class boost::serialization::access;
 36 |   template <class Archive>
 37 |   void serialize(Archive& ar, const unsigned version) {
 38 |     ar & aid;
 39 |     ar & idx;
 40 |   }
 41 | };
 42 | 
 43 | typedef std::vector<InputUnit> InputUnits;
 44 | typedef std::vector<ActionUnit> ActionUnits;
 45 | 
 46 | struct Corpus {
 47 |   const static char* UNK;
 48 |   const static char* SPAN;
 49 |   const static char* BAD0;
 50 |   const static char* ROOT;
 51 | 
 52 |   unsigned n_train;
 53 |   unsigned n_devel;
 54 |   unsigned n_test;
 55 | 
 56 |   Alphabet word_map;
 57 |   Alphabet pos_map;
 58 |   Alphabet action_map;
 59 |   Alphabet char_map;
 60 |   Alphabet node_map;
 61 |   Alphabet rel_map;
 62 |   Alphabet entity_map;
 63 | 
 64 |   std::unordered_map<unsigned, Alphabet> confirm_map;
 65 | 
 66 |   std::unordered_map<unsigned, InputUnits> training_inputs;
 67 |   std::unordered_map<unsigned, ActionUnits> training_actions;
 68 |   std::unordered_map<unsigned, InputUnits> devel_inputs;
 69 |   std::unordered_map<unsigned, ActionUnits> devel_actions;
 70 |   std::unordered_map<unsigned, InputUnits> test_inputs;
 71 |   std::unordered_map<unsigned, ActionUnits> test_actions;
 72 | 
 73 |   std::set<unsigned> vocab;
 74 |   std::set<unsigned> singleton;
 75 |   
 76 |   Corpus();
 77 | 
 78 |   void load_training_data(const std::string& filename);
 79 | 
 80 |   void load_devel_data(const std::string& filename);
 81 | 
 82 |   void load_test_data(const std::string& filename);
 83 | 
 84 |   void parse_data(const std::string& data,
 85 |                   InputUnits& input_units, 
 86 |                   ActionUnits& action_units,
 87 |                   bool train);
 88 |  
 89 |   void get_vocabulary_and_singletons();
 90 | 
 91 |   unsigned get_or_add_word(const std::string& word);
 92 |   void stat();
 93 | };
 94 | 
 95 | void load_pretrained_word_embedding(const std::string& embedding_file,
 96 |                                     unsigned pretrained_dim,
 97 |                                     std::unordered_map<unsigned, std::vector<float> >& pretrained,
 98 |                                     Corpus& corpus);
 99 | 
100 | #endif  //  end for RLPARSER_CORPUS_H
101 | 


--------------------------------------------------------------------------------
/amr_parser/src/ds.cc:
--------------------------------------------------------------------------------
 1 | #include "ds.h"
 2 | #include "logging.h"
 3 | #include <set>
 4 | #include <tuple>
 5 | 
 6 | Alphabet::Alphabet() : max_id(0), freezed(false), in_order(true) {
 7 | 
 8 | }
 9 | 
10 | void Alphabet::freeze() {
11 |   freezed = false;
12 | }
13 | 
14 | unsigned Alphabet::size() const {
15 |   return max_id;
16 | }
17 | 
18 | unsigned Alphabet::get(const std::string& str) const {
19 |   const auto found = str_to_id.find(str);
20 |   if (found == str_to_id.end()) {
21 |     _ERROR << "Alphabet :: str[\"" << str << "\"] not found!";
22 |     abort();
23 |   }
24 |   return found->second;
25 | }
26 | 
27 | std::string Alphabet::get(unsigned id) const {
28 |   const auto found = id_to_str.find(id);
29 |   if (found == id_to_str.end()) {
30 |     _ERROR << "Alphabet :: id[" << id << "] not found!";
31 |     abort();
32 |   }
33 |   return found->second;
34 | }
35 | 
36 | bool Alphabet::contains(const std::string& str) const {
37 |   const auto found = str_to_id.find(str);
38 |   return (found != str_to_id.end());
39 | }
40 | 
41 | bool Alphabet::contains(unsigned id) const {
42 |   const auto found = id_to_str.find(id);
43 |   return (found != id_to_str.end());
44 | }
45 | 
46 | unsigned Alphabet::insert(const std::string& str) {
47 |   BOOST_ASSERT_MSG(freezed == false, "Corpus::Insert should not insert into freezed alphabet.");
48 |   if (contains(str)) {
49 |     return get(str);
50 |   }
51 | 
52 |   str_to_id[str] = max_id;
53 |   id_to_str[max_id] = str;
54 |   max_id++;
55 |   return max_id - 1;
56 | }
57 | 
58 | 
59 | unsigned Alphabet::insert(const std::string& str, unsigned id) {
60 |   _ERROR << "not implemented!";
61 |   abort();
62 | }
63 | 


--------------------------------------------------------------------------------
/amr_parser/src/ds.h:
--------------------------------------------------------------------------------
 1 | #ifndef RLPARSER_DS_H
 2 | #define RLPARSER_DS_H
 3 | 
 4 | #include <string>
 5 | #include <unordered_map>
 6 | #include <boost/functional/hash.hpp>
 7 | 
 8 | struct Alphabet {
 9 |   typedef std::unordered_map<std::string, unsigned> StringToIdMap;
10 |   typedef std::unordered_map<unsigned, std::string> IdToStringMap;
11 | 
12 |   unsigned max_id;
13 |   StringToIdMap str_to_id;
14 |   IdToStringMap id_to_str;
15 |   bool freezed;
16 |   bool in_order;
17 | 
18 |   Alphabet();
19 | 
20 |   void freeze();
21 |   unsigned size() const;
22 |   unsigned get(const std::string& str) const;
23 |   std::string get(unsigned id) const;
24 |   bool contains(const std::string& str) const;
25 |   bool contains(unsigned id) const;
26 |   unsigned insert(const std::string& str);
27 |   unsigned insert(const std::string& str, unsigned id);
28 | };
29 | 
30 | struct HashVector : public std::vector<unsigned> {
31 |   bool operator == (const HashVector& other) const {
32 |     if (size() != other.size()) { return false; }
33 |     for (unsigned i = 0; i < size(); ++i) {
34 |       if (at(i) != other.at(i)) { return false; }
35 |     }
36 |     return true;
37 |   }
38 | };
39 | 
40 | 
41 | namespace std {
42 | template<>
43 | struct hash<HashVector> {
44 |   std::size_t operator()(const HashVector& values) const {
45 |     size_t seed = 0;
46 |     boost::hash_range(seed, values.begin(), values.end());
47 |     return seed;
48 |   }
49 | };
50 | }
51 | 
52 | #endif  //  end for RLPARSER_DS_H


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include_directories (${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/src/left_to_right/ ${PROJECT_SOURCE_DIR}/src/left_to_right/s2a/)
 2 | 
 3 | add_subdirectory (parser)
 4 | add_subdirectory (train)
 5 | add_subdirectory (decode)
 6 | add_subdirectory (evaluate)
 7 | add_subdirectory (system)
 8 | 
 9 | add_executable (parser_l2r main.cc)
10 | 
11 | target_link_libraries (parser_l2r
12 |     parser_l2r_system
13 |     parser_l2r_parser
14 |     parser_l2r_train
15 |     parser_l2r_decode
16 |     parser_l2r_evaluate
17 |     dynet
18 |     dynet_layer
19 |     common
20 |     ${LIBS})
21 | 
22 | add_executable (ensemble ensemble.cc)
23 | 
24 | target_link_libraries (ensemble
25 |         parser_l2r_system
26 |         parser_l2r_parser
27 |         parser_l2r_train
28 |         parser_l2r_decode
29 |         parser_l2r_evaluate
30 |         dynet
31 |         dynet_layer
32 |         common
33 |     ${LIBS})
34 | 
35 | if(UNIX AND NOT APPLE)
36 |     target_link_libraries (parser_l2r rt)
37 | endif()
38 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/decode/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | include_directories (${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/src/left_to_right/)
2 | 
3 | add_library (parser_l2r_decode
4 |     testing.cc
5 |     testing.h)
6 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/decode/testing.cc:
--------------------------------------------------------------------------------
 1 | #include "testing.h"
 2 | #include "logging.h"
 3 | #include <boost/algorithm/string.hpp>
 4 | 
 5 | 
 6 | po::options_description Tester::get_options() {
 7 |   po::options_description cmd("Testing model options");
 8 |   cmd.add_options()
 9 |     ("test_model_path", po::value<std::string>(), "The path to the model")
10 |     ("test_target", po::value<std::string>()->default_value("train"), "The evaluation target.")
11 |     ("test_mode", po::value<std::string>()->default_value("decision_acc"), "The mode of testing [decision_acc, pred_detail].")
12 |     ("test_num_tests", po::value<unsigned>()->default_value(1), "The number of tests run on each instance.")
13 |     ;
14 |   return cmd;
15 | }


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/decode/testing.h:
--------------------------------------------------------------------------------
 1 | #ifndef TESTING_H
 2 | #define TESTING_H
 3 | 
 4 | #include "parser/parser.h"
 5 | 
 6 | struct Tester {
 7 |   /*enum TEST_TARGET { kTrain, kDevelopment };
 8 |   TEST_TARGET test_target;
 9 |   bool enable_decision_acc_test;
10 |   bool enable_pred_detail_test;
11 | 
12 |   Parser* parser;
13 |   unsigned n_tests;*/
14 |   
15 |   static po::options_description get_options();
16 | 
17 |   /*Tester(const po::variables_map& conf, Parser* parser_);
18 | 
19 |   void test(const po::variables_map& conf,
20 |             Corpus& corpus,
21 |             const std::string& model_name);*/
22 | };
23 | 
24 | #endif  //  end for TESTING_H


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/evaluate/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | include_directories (${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/src/left_to_right/)
2 | 
3 | add_library (parser_l2r_evaluate evaluate.cc evaluate.h)
4 | 
5 | target_link_libraries (parser_l2r_evaluate parser_l2r_parser)
6 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/evaluate/evaluate.cc:
--------------------------------------------------------------------------------
  1 | #include "evaluate.h"
  2 | #include "logging.h"
  3 | #include "sys_utils.h"
  4 | #include <fstream>
  5 | #include <chrono>
  6 | 
  7 | float evaluate(const po::variables_map & conf,
  8 |                Corpus & corpus,
  9 |                Parser & parser,
 10 |                const std::string & output,
 11 |                bool devel) {
 12 |   auto t_start = std::chrono::high_resolution_clock::now();
 13 |   unsigned kUNK = corpus.get_or_add_word(Corpus::UNK);
 14 | 
 15 |   std::ofstream ofs(output);
 16 |   parser.inactivate_training();
 17 | 
 18 |   unsigned n = (devel ? corpus.n_devel : corpus.n_test);
 19 |   std::unordered_map<unsigned, InputUnits> & inputs = (devel ? corpus.devel_inputs : corpus.test_inputs);
 20 | 
 21 |   for (unsigned sid = 0; sid < n; ++sid) {
 22 | 
 23 |     ofs << "# ::tok";
 24 |     for (unsigned i = 0; i < inputs[sid].size() - 1; ++i) { //except for _ROOT_
 25 |       ofs << " " << inputs[sid][i].w_str;
 26 |     }
 27 |     ofs << std::endl;
 28 | 
 29 |     InputUnits& input_units = inputs[sid];
 30 | 
 31 |     for (InputUnit& u : input_units) {
 32 |       if (!corpus.vocab.count(u.wid)) { u.wid = kUNK; }
 33 |     }
 34 |     dynet::ComputationGraph cg;
 35 |     ActionUnits output;
 36 | 
 37 |     unsigned len = input_units.size();
 38 |     State state(len);
 39 | 
 40 |     parser.new_graph(cg);
 41 | 
 42 |     parser.initialize(cg, input_units, state);
 43 |     unsigned n_actions = 0;
 44 |     while (!state.terminated() && n_actions++ < 500) {
 45 |       // collect all valid actions.
 46 |       std::vector<unsigned> valid_actions;
 47 |       parser.sys.get_valid_actions(state, valid_actions);
 48 |       //std::cerr << valid_actions.size() << std::endl;
 49 | 
 50 |       std::vector<float> scores = dynet::as_vector(cg.get_value(parser.get_scores()));
 51 | 
 52 |       auto payload = Parser::get_best_action(scores, valid_actions);
 53 |       unsigned best_a = payload.first;
 54 |       unsigned best_c = 0;
 55 |       //if CONFIRM
 56 |       if (best_a == 0) {
 57 |         unsigned wid = 0;
 58 |         if (conf["system"].as<std::string>() == "swap") {
 59 |           wid = state.stack.back().first;
 60 |         } else if (conf["system"].as<std::string>() == "eager") {
 61 |           wid = state.buffer.back().first;
 62 |         } else {
 63 |           BOOST_ASSERT_MSG(false, "Illegal System");
 64 |         }
 65 | 
 66 |         std::vector<float> confirm_scores = dynet::as_vector(cg.get_value(parser.get_confirm_values(wid)));
 67 |         float best_score = -1e9f;
 68 |         for (unsigned i = 0; i < confirm_scores.size(); i++) {
 69 |           if (confirm_scores[i] > best_score) {
 70 |             best_score = confirm_scores[i];
 71 |             best_c = i;
 72 |           }
 73 |         }
 74 |         //std::cerr << "# ::action\t" << "CONFIRM\t" <<
 75 |         //  corpus.word_map.get(wid) << "\t";
 76 |         ofs << "# ::action\t"
 77 |             << "CONFIRM\t"
 78 |             << (corpus.word_map.contains(wid) ? corpus.word_map.get(wid) : std::string("_UNK_"))
 79 |             << "\t";
 80 |         if (corpus.confirm_map.find(wid) == corpus.confirm_map.end()) {
 81 |           //std::cerr << corpus.word_map.get(wid) << std::endl;
 82 |           ofs << (corpus.word_map.contains(wid) ? corpus.word_map.get(wid) : std::string("_UNK_")) << std::endl;
 83 |         } else {
 84 |           //std::cerr << corpus.confirm_map[wid].get(best_c) << std::endl;
 85 |           ofs << corpus.confirm_map[wid].get(best_c) << std::endl;
 86 |         }
 87 |         
 88 |           
 89 |       } else {
 90 |         //std::cerr << "# ::action\t" << parser.sys.action_map.get(best_a) << std::endl;
 91 |         ofs << "# ::action\t" << parser.sys.action_map.get(best_a) << std::endl;
 92 |       }
 93 |       parser.perform_action(best_a, cg, state);
 94 |     }
 95 | 
 96 |     for (InputUnit& u : input_units) { u.wid = u.aux_wid; }
 97 | 
 98 |     ofs << std::endl;
 99 | 
100 |     //ofs && confirm
101 |   }
102 |   ofs.close();
103 |   auto t_end = std::chrono::high_resolution_clock::now();
104 |   float f_score = execute_and_get_result(conf["external_eval"].as<std::string>() +
105 |                                            " " +
106 |                                            (devel ?
107 |                                             conf["devel_gold"].as<std::string>() : conf["test_gold"].as<std::string>()) +
108 |                                            " " +
109 |                                            output);
110 |   _INFO << "Evaluate:: Smatch " << f_score << " [" << corpus.n_devel <<
111 |     " sents in " << std::chrono::duration<double, std::milli>(t_end - t_start).count() << " ms]";
112 |   return f_score;
113 | }
114 | 
115 | float evaluate_oracle(const po::variables_map & conf,
116 |                       Corpus & corpus,
117 |                       Parser & parser,
118 |                       const std::string & output,
119 |                       bool devel) {
120 |   auto t_start = std::chrono::high_resolution_clock::now();
121 |   unsigned kUNK = corpus.get_or_add_word(Corpus::UNK);
122 | 
123 |   std::ofstream ofs(output);
124 |   parser.inactivate_training();
125 | 
126 |   unsigned n = (devel ? corpus.n_devel : corpus.n_test);
127 |   std::unordered_map<unsigned, InputUnits> & inputs = (devel ? corpus.devel_inputs : corpus.test_inputs);
128 |   std::unordered_map<unsigned, ActionUnits> & actions = (devel ? corpus.devel_actions : corpus.test_actions);
129 | 
130 |   for (unsigned sid = 0; sid < n; ++sid) {
131 | 
132 |     ofs << "# ::tok";
133 |     for (unsigned i = 0; i < inputs[sid].size() - 1; ++i) { //except for _ROOT_
134 |       ofs << " " << inputs[sid][i].w_str;
135 |     }
136 |     ofs << std::endl;
137 | 
138 |     InputUnits& input_units = inputs[sid];
139 |     ActionUnits & parse_units = actions[sid];
140 | 
141 |     for (InputUnit& u : input_units) {
142 |       if (!corpus.vocab.count(u.wid)) { u.wid = kUNK; }
143 |     }
144 |     dynet::ComputationGraph cg;
145 |     ActionUnits output;
146 | 
147 |     unsigned len = input_units.size();
148 |     State state(len);
149 | 
150 |     parser.new_graph(cg);
151 | 
152 |     parser.initialize(cg, input_units, state);
153 |     unsigned n_actions = 0;
154 |     while (!state.terminated() && n_actions++ < 500) {
155 |       // collect all valid actions.
156 |       std::vector<unsigned> valid_actions;
157 |       parser.sys.get_valid_actions(state, valid_actions);
158 |       //std::cerr << valid_actions.size() << std::endl;
159 | 
160 |       std::vector<float> scores = dynet::as_vector(cg.get_value(parser.get_scores()));
161 | 
162 |       auto payload = Parser::get_best_action(scores, valid_actions);
163 |       unsigned best_a = payload.first;
164 |       unsigned best_c = 0;
165 |       //if CONFIRM
166 |       if (best_a == 0) {
167 |         unsigned wid = 0;
168 |         if (conf["system"].as<std::string>() == "swap") {
169 |           wid = state.stack.back().first;
170 |         } else if (conf["system"].as<std::string>() == "eager") {
171 |           wid = state.buffer.back().first;
172 |         } else {
173 |           BOOST_ASSERT_MSG(false, "Illegal System");
174 |         }
175 | 
176 |         std::vector<float> confirm_scores = dynet::as_vector(cg.get_value(parser.get_confirm_values(wid)));
177 |         float best_score = -1e9f;
178 |         for (unsigned i = 0; i < confirm_scores.size(); i++) {
179 |           if (confirm_scores[i] > best_score) {
180 |             best_score = confirm_scores[i];
181 |             best_c = i;
182 |           }
183 |         }
184 |         //std::cerr << "# ::action\t" << "CONFIRM\t" <<
185 |         //  corpus.word_map.get(wid) << "\t";
186 |         ofs << "# ::action\t"
187 |             << "CONFIRM\t"
188 |             << (corpus.word_map.contains(wid) ? corpus.word_map.get(wid) : std::string("_UNK_"))
189 |             << "\t";
190 |         if (corpus.confirm_map.find(wid) == corpus.confirm_map.end()) {
191 |           //std::cerr << corpus.word_map.get(wid) << std::endl;
192 |           ofs << (corpus.word_map.contains(wid) ? corpus.word_map.get(wid) : std::string("_UNK_")) << std::endl;
193 |         } else {
194 |           //std::cerr << corpus.confirm_map[wid].get(best_c) << std::endl;
195 |           ofs << corpus.confirm_map[wid].get(best_c) << std::endl;
196 |         }
197 | 
198 | 
199 |       } else {
200 |         //std::cerr << "# ::action\t" << parser.sys.action_map.get(best_a) << std::endl;
201 |         ofs << "# ::action\t" << parser.sys.action_map.get(best_a) << std::endl;
202 |       }
203 |       best_a = parse_units[n_actions].aid;
204 |       parser.perform_action(best_a, cg, state);
205 |     }
206 | 
207 |     for (InputUnit& u : input_units) { u.wid = u.aux_wid; }
208 | 
209 |     ofs << std::endl;
210 | 
211 |     //ofs && confirm
212 |   }
213 |   ofs.close();
214 |   auto t_end = std::chrono::high_resolution_clock::now();
215 |   float f_score = execute_and_get_result(conf["external_eval"].as<std::string>() +
216 |                                          " " +
217 |                                          (devel ?
218 |                                           conf["devel_gold"].as<std::string>() : conf["test_gold"].as<std::string>()) +
219 |                                          " " +
220 |                                          output);
221 |   _INFO << "Evaluate:: Smatch " << f_score << " [" << corpus.n_devel <<
222 |         " sents in " << std::chrono::duration<double, std::milli>(t_end - t_start).count() << " ms]";
223 |   return f_score;
224 | }
225 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/evaluate/evaluate.h:
--------------------------------------------------------------------------------
 1 | #ifndef EVALUATE_H
 2 | #define EVALUATE_H
 3 | 
 4 | #include <iostream>
 5 | #include <set>
 6 | #include "corpus.h"
 7 | #include "parser/parser.h"
 8 | #include <boost/program_options.hpp>
 9 | 
10 | namespace po = boost::program_options;
11 | 
12 | float evaluate(const po::variables_map & conf,
13 |                Corpus & corpus,
14 |                Parser & parser,
15 |                const std::string& output,
16 |                bool devel);
17 | 
18 | float evaluate_oracle(const po::variables_map & conf,
19 |                       Corpus & corpus,
20 |                       Parser & parser,
21 |                       const std::string& output,
22 |                       bool devel);
23 | 
24 | 
25 | #endif  //  end for EVALUATE_H


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/main.cc:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <fstream>
  3 | #include <set>
  4 | #include "dynet/init.h"
  5 | #include "corpus.h"
  6 | #include "logging.h"
  7 | #include "sys_utils.h"
  8 | #include "trainer_utils.h"
  9 | #include "parser/parser_builder.h"
 10 | #include "system/swap.h"
 11 | #include "system/eager.h"
 12 | #include "train/algorithm.h"
 13 | #include "evaluate/evaluate.h"
 14 | #include "decode/testing.h"
 15 | #include <boost/program_options.hpp>
 16 | #include <boost/algorithm/string.hpp>
 17 | 
 18 | namespace po = boost::program_options;
 19 | 
 20 | void init_command_line(int argc, char* argv[], po::variables_map& conf) {
 21 |   po::options_description general("Transition-based AMR parser.");
 22 |   general.add_options()
 23 |     ("train,t", "Use to specify to perform training.")
 24 |     ("architecture", po::value<std::string>()->default_value("eager"), "The architecture [swap, eager].")
 25 |     ("algorithm", po::value<std::string>()->default_value("supervised"),
 26 |      "The choice of reinforcement learning algorithm [supervised]")
 27 |     ("training_data,T", po::value<std::string>(), "The path to the training data.")
 28 |     ("devel_data,d", po::value<std::string>(), "The path to the development data.")
 29 |     ("test_data,e", po::value<std::string>(), "The path to the test data.")
 30 |     ("pretrained,w", po::value<std::string>(), "The path to the word embedding.")
 31 |     ("devel_gold", po::value<std::string>(), "The path to the development data.")
 32 |     ("test_gold", po::value<std::string>(), "The path to the test data.")
 33 |     ("model,m", po::value<std::string>(), "The path to the model.")
 34 |     ("system", po::value<std::string>()->default_value("eager"), "The transition system [swap, eager].")
 35 |     ("unk_strategy,o", po::value<unsigned>()->default_value(1), "The unknown word strategy.")
 36 |     ("unk_prob,u", po::value<float>()->default_value(0.2f), "The probability for replacing the training word.")
 37 |     ("layers", po::value<unsigned>()->default_value(2), "The number of layers in LSTM.")
 38 |     ("word_dim", po::value<unsigned>()->default_value(100), "Word dim")
 39 |     ("pos_dim", po::value<unsigned>()->default_value(20), "POS dim, set it as 0 to disable POS.")
 40 |     ("pretrained_dim", po::value<unsigned>()->default_value(100), "Pretrained input dimension.")
 41 |     ("char_dim", po::value<unsigned>()->default_value(50), "Character input dimension.")
 42 |     ("newnode_dim", po::value<unsigned>()->default_value(100), "Newnode embedding dimension.")
 43 |     ("action_dim", po::value<unsigned>()->default_value(20), "The dimension for action.")
 44 |     ("relation_dim", po::value<unsigned>()->default_value(32), "The dimension for relation.")
 45 |     ("entity_dim", po::value<unsigned>()->default_value(32), "The dimension for entity.")
 46 |     ("label_dim", po::value<unsigned>()->default_value(20), "The dimension for label.")
 47 |     ("lstm_input_dim", po::value<unsigned>()->default_value(100), "The dimension for lstm input.")
 48 |     ("hidden_dim", po::value<unsigned>()->default_value(100), "The dimension for hidden unit.")
 49 |     ("dropout", po::value<float>()->default_value(0.f), "The dropout rate.")
 50 |     ("reward_type", po::value<std::string>()->default_value("local"),
 51 |      "The type of reward [local, local0p10, local00n1, global, global_norm, global_maxout].")
 52 |     ("batch_size", po::value<unsigned>()->default_value(1), "The size of batch.")
 53 |     ("gamma", po::value<float>()->default_value(1.f), "The gamma, reward discount factor.")
 54 |     ("max_iter", po::value<unsigned>()->default_value(10), "The maximum number of iteration.")
 55 |     ("report_stops", po::value<unsigned>()->default_value(100), "The reporting stops")
 56 |     ("report_reward", "Use to specify to report reward and q-value in evaluation.")
 57 |     ("evaluate_oracle", "Use to specify use oracle.")
 58 |     ("evaluate_stops", po::value<unsigned>()->default_value(2500), "The evaluation stops")
 59 |     ("evaluate_skips", po::value<unsigned>()->default_value(0), "skip evaluation on the first n round.")
 60 |     ("external_eval", po::value<std::string>()->default_value("python -u ../scripts/eval.py"), "config the path for evaluation script")
 61 |     ("lambda", po::value<float>()->default_value(0.f), "The L2 regularizer, should not set in --dynet-l2.")
 62 |     ("output", po::value<std::string>(), "The path to the output file.")
 63 |     ("beam_size", po::value<unsigned>(), "The beam size.")
 64 |     ("random_seed", po::value<unsigned>()->default_value(7743), "The value of random seed.")
 65 |     ("verbose,v", "Details logging.")
 66 |     ("help,h", "show help information")
 67 |     ;
 68 | 
 69 |   po::options_description optimizer = get_optimizer_options();
 70 |   po::options_description supervise = SupervisedTrainer::get_options();
 71 |   po::options_description test = Tester::get_options();
 72 | 
 73 |   po::options_description cmd("Allowed options");
 74 |   cmd.add(general)
 75 |     .add(optimizer)
 76 |     .add(supervise)
 77 |     .add(test)
 78 |     ;
 79 | 
 80 |   po::store(po::parse_command_line(argc, argv, cmd), conf);
 81 |   if (conf.count("help")) {
 82 |     std::cerr << cmd << std::endl;
 83 |     exit(1);
 84 |   }
 85 |   init_boost_log(conf.count("verbose") > 0);
 86 |   if (!conf.count("training_data")) {
 87 |     std::cerr << "Please specify --training_data (-T), even in test" << std::endl;
 88 |     exit(1);
 89 |   }
 90 | }
 91 | 
 92 | int main(int argc, char** argv) {
 93 |   dynet::initialize(argc, argv, false);
 94 |   std::cerr << "command:";
 95 |   for (int i = 0; i < argc; ++i) { std::cerr << ' ' << argv[i]; }
 96 |   std::cerr << std::endl;
 97 | 
 98 |   po::variables_map conf;
 99 |   init_command_line(argc, argv, conf);
100 |   
101 |   dynet::rndeng = new std::mt19937(conf["random_seed"].as<unsigned>());
102 | 
103 |   std::string model_name;
104 |   if (conf.count("train")) {
105 |     if (conf.count("model")) {
106 |       model_name = conf["model"].as<std::string>();
107 |       _INFO << "Main:: write parameters to: " << model_name;
108 |     } else {
109 |       std::string prefix("parser_l2r");
110 |       prefix = prefix + "." + conf["algorithm"].as<std::string>();
111 |       model_name = get_model_name(conf, prefix);
112 |       _INFO << "Main:: write parameters to: " << model_name;
113 |     }
114 |   } else {
115 |     model_name = conf["model"].as<std::string>();
116 |     _INFO << "Main:: evaluating model from: " << model_name;
117 |   }
118 | 
119 |   Corpus corpus;
120 |   corpus.load_training_data(conf["training_data"].as<std::string>());
121 |   corpus.stat();
122 | 
123 |   corpus.get_vocabulary_and_singletons();
124 | 
125 |   std::unordered_map<unsigned, std::vector<float>> pretrained;
126 |   if (conf.count("pretrained")) {
127 |     load_pretrained_word_embedding(conf["pretrained"].as<std::string>(),
128 |                                    conf["pretrained_dim"].as<unsigned>(),
129 |                                    pretrained, corpus);
130 |   }
131 |   _INFO << "Main:: after loading pretrained embedding, size(vocabulary)=" << corpus.word_map.size();
132 | 
133 |   dynet::ParameterCollection model;
134 |   TransitionSystem* sys = nullptr;
135 |   
136 |   std::string system_name = conf["system"].as<std::string>();
137 |   if (system_name == "swap") {
138 |     sys = new Swap(corpus.action_map, corpus.node_map, corpus.rel_map, corpus.entity_map);
139 |   } else if (system_name == "eager") {
140 |     sys = new Eager(corpus.action_map, corpus.node_map, corpus.rel_map, corpus.entity_map);
141 |   } else {
142 |     _ERROR << "Main:: Unknown transition system: " << system_name;
143 |     exit(1);
144 |   }
145 |   _INFO << "Main:: transition system: " << system_name;
146 | 
147 |   Parser* parser = ParserBuilder().build(conf, model, (*sys), corpus, pretrained);
148 | 
149 |   _INFO << "Main:: char_map unk id: " << corpus.char_map.get(corpus.UNK);
150 | 
151 |   corpus.load_devel_data(conf["devel_data"].as<std::string>());
152 |   _INFO << "Main:: after loading development data, size(vocabulary)=" << corpus.word_map.size();
153 | 
154 |   if (conf.count("test_data")) {
155 |     corpus.load_test_data(conf["test_data"].as<std::string>());
156 |     _INFO << "Main:: after loading test data, size(vocabulary)=" << corpus.word_map.size();
157 |   }
158 | 
159 |   std::string output;
160 |   if (conf.count("output")) {
161 |     output = conf["output"].as<std::string>();
162 |   } else {
163 |     int pid = portable_getpid();
164 | #ifdef _MSC_VER
165 |     output = "parser_l2r.evaluator." + boost::lexical_cast<std::string>(pid);
166 | #else
167 |     output = "/tmp/parser_l2r.evaluator." + boost::lexical_cast<std::string>(pid);
168 | #endif
169 |   }
170 |   _INFO << "Main:: write tmp file to: " << output;
171 | 
172 |   if (conf.count("train")) {
173 |     const std::string algorithm = conf["algorithm"].as<std::string>();
174 |     _INFO << "Main:: algorithm: " << algorithm;
175 |     if (algorithm == "supervised" || algorithm == "sup") {
176 |       SupervisedTrainer trainer(conf, parser);
177 |       trainer.train(conf, corpus, model_name, output);
178 |     }/* else if (algorithm == "testing") {
179 |       Tester tester(conf, parser);
180 |       tester.test(conf, corpus, model_name);
181 |     } else {
182 |       _ERROR << "Main:: Unknown RL algorithm.";
183 |     }*/
184 |   }
185 | 
186 |   dynet::load_dynet_model(model_name, (&model));
187 |   float dev_f, test_f;
188 |   if (conf.count("evaluate_oracle")) {
189 |     dev_f = evaluate_oracle(conf, corpus, (*parser), output, true);
190 |     test_f = evaluate_oracle(conf, corpus, (*parser), output, false);
191 |   } else {
192 |     dev_f = evaluate(conf, corpus, (*parser), output, true);
193 |     test_f = evaluate(conf, corpus, (*parser), output, false);
194 |   }
195 |   _INFO << "Final score: dev: " << dev_f << ", test: " << test_f;
196 |   
197 |   return 0;
198 | }
199 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/parser/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include_directories (${PROJECT_SOURCE_DIR}/src/ ${PROJECT_SOURCE_DIR}/src/left_to_right/)
 2 | 
 3 | add_library (parser_l2r_parser
 4 |     parser.cc
 5 |     parser.h
 6 |         parser_swap.cc
 7 |         parser_swap.h
 8 |     parser_eager.cc
 9 |     parser_eager.h
10 |     parser_builder.cc
11 |     parser_builder.h)
12 | 
13 | target_link_libraries (parser_l2r_parser parser_l2r_system)
14 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/parser/parser.cc:
--------------------------------------------------------------------------------
 1 | #include "parser.h"
 2 | #include "dynet/expr.h"
 3 | #include "corpus.h"
 4 | #include "logging.h"
 5 | #include <vector>
 6 | #include <random>
 7 | 
 8 | std::pair<unsigned, float> Parser::get_best_action(const std::vector<float>& scores,
 9 |                                                    const std::vector<unsigned>& valid_actions) {
10 |   unsigned best_a = valid_actions[0];
11 |   float best_score = scores[best_a];
12 |   //! should use next valid action.
13 |   for (unsigned i = 1; i < valid_actions.size(); ++i) {
14 |     unsigned a = valid_actions[i];
15 |     if (best_score < scores[a]) {
16 |       best_a = a;
17 |       best_score = scores[a];
18 |     }
19 |   }
20 |   return std::make_pair(best_a, best_score);
21 | }
22 | 
23 | dynet::Expression Parser::get_scores() {
24 |   return get_a_values();
25 | }
26 | 
27 | void Parser::initialize(dynet::ComputationGraph & cg,
28 |                         const InputUnits & input,
29 |                         State & state) {
30 |   initialize_state(input, state);
31 |   initialize_parser(cg, input);
32 | }
33 | 
34 | void Parser::initialize_state(const InputUnits & input, State & state) {
35 |   unsigned len = input.size();
36 |   state.buffer.resize(len);
37 |   for (unsigned i = 0; i < len; ++i) { state.buffer[len - i - 1] = std::make_pair(input[i].wid, 0); }
38 |   state.buffer[0].second = 2; //Corpus::ROOT;
39 | }


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/parser/parser.h:
--------------------------------------------------------------------------------
 1 | #ifndef RLPARSER_LEFT_TO_RIGHT_S2A_PARSER_H
 2 | #define RLPARSER_LEFT_TO_RIGHT_S2A_PARSER_H
 3 | 
 4 | #include <vector>
 5 | #include <unordered_map>
 6 | #include <boost/program_options.hpp>
 7 | #include "corpus.h"
 8 | #include "system/state.h"
 9 | #include "system/system.h"
10 | #include "dynet/expr.h"
11 | 
12 | namespace po = boost::program_options;
13 | 
14 | struct Parser {
15 |   dynet::ParameterCollection& model;
16 |   TransitionSystem& sys;
17 |   std::string system_name;
18 | 
19 |   Parser(dynet::ParameterCollection & m,
20 |          TransitionSystem& s,
21 |          const std::string & sys_name) :
22 |     model(m), sys(s), system_name(sys_name){}
23 | 
24 |   virtual Parser* copy_architecture(dynet::Model& new_model) = 0;
25 |   virtual void activate_training() = 0;
26 |   virtual void inactivate_training() = 0;
27 |   virtual void new_graph(dynet::ComputationGraph& cg) = 0;
28 |   virtual std::vector<dynet::Expression> get_params() = 0;
29 | 
30 |   void initialize(dynet::ComputationGraph& cg,
31 |                   const InputUnits& input,
32 |                   State& state);
33 | 
34 |   void initialize_state(const InputUnits& input,
35 |                         State& state);
36 | 
37 |   virtual void initialize_parser(dynet::ComputationGraph& cg,
38 |                                  const InputUnits& input) = 0;
39 | 
40 |   virtual void perform_action(const unsigned& action,
41 |                               dynet::ComputationGraph& cg,
42 |                               State& state) = 0;
43 | 
44 |   static std::pair<unsigned, float> get_best_action(const std::vector<float>& scores,
45 |                                                     const std::vector<unsigned>& valid_actions);
46 | 
47 |   /// Get the un-softmaxed scores from the LSTM-parser.
48 |   dynet::Expression get_scores();
49 |   
50 |   virtual dynet::Expression get_confirm_values(unsigned wid) = 0;
51 |   virtual dynet::Expression get_a_values() = 0;
52 | };
53 | 
54 | #endif  //  end for RLPARSER_LEFT_TO_RIGHT_S2A_PARSER_H
55 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/parser/parser_builder.cc:
--------------------------------------------------------------------------------
 1 | #include "parser_swap.h"
 2 | #include "parser_eager.h"
 3 | #include "parser_builder.h"
 4 | #include "logging.h"
 5 | 
 6 | po::options_description ParserBuilder::get_options() {
 7 |   po::options_description cmd("Parser settings.");
 8 |   return cmd;
 9 | }
10 | 
11 | Parser * ParserBuilder::build(const po::variables_map& conf,
12 |                               dynet::ParameterCollection & model,
13 |                               TransitionSystem& sys,
14 |                               const Corpus& corpus,
15 |                               const std::unordered_map<unsigned, std::vector<float>>& pretrained) {
16 |   std::string system_name = conf["system"].as<std::string>();
17 |   Parser* parser = nullptr;
18 |   std::string arch_name = conf["architecture"].as<std::string>();
19 |   if (arch_name == "swap") {
20 |     parser = new ParserSwap(model,
21 |                             corpus.vocab.size() + 10,
22 |                             conf["word_dim"].as<unsigned>(),
23 |                             corpus.pos_map.size() + 10,
24 |                             conf["pos_dim"].as<unsigned>(),
25 |                             corpus.word_map.size() + 1,
26 |                             conf["pretrained_dim"].as<unsigned>(),
27 |                             corpus.char_map.size() + 1,
28 |                             conf["char_dim"].as<unsigned>(),
29 |                             sys.num_actions(),
30 |                             conf["action_dim"].as<unsigned>(),
31 |                             sys.node_map.size(),
32 |                             conf["lstm_input_dim"].as<unsigned>(),
33 |                             sys.rel_map.size(),
34 |                             conf["relation_dim"].as<unsigned>(),
35 |                             sys.entity_map.size(),
36 |                             conf["entity_dim"].as<unsigned>(),
37 |                             conf["layers"].as<unsigned>(),
38 |                             conf["lstm_input_dim"].as<unsigned>(),
39 |                             conf["hidden_dim"].as<unsigned>(),
40 |                             system_name,
41 |                             sys,
42 |                             pretrained,
43 |                             corpus.confirm_map,
44 |                             corpus.char_map);
45 |   } else if (arch_name == "eager") {
46 |     parser = new ParserEager(model,
47 |                              corpus.vocab.size() + 10,
48 |                              conf["word_dim"].as<unsigned>(),
49 |                              corpus.pos_map.size() + 10,
50 |                              conf["pos_dim"].as<unsigned>(),
51 |                              corpus.word_map.size() + 1,
52 |                              conf["pretrained_dim"].as<unsigned>(),
53 |                              corpus.char_map.size() + 1,
54 |                              conf["char_dim"].as<unsigned>(),
55 |                              sys.num_actions(),
56 |                              conf["action_dim"].as<unsigned>(),
57 |                              sys.node_map.size(),
58 |                              conf["lstm_input_dim"].as<unsigned>(),
59 |                              sys.rel_map.size(),
60 |                              conf["relation_dim"].as<unsigned>(),
61 |                              sys.entity_map.size(),
62 |                              conf["entity_dim"].as<unsigned>(),
63 |                              conf["layers"].as<unsigned>(),
64 |                              conf["lstm_input_dim"].as<unsigned>(),
65 |                              conf["hidden_dim"].as<unsigned>(),
66 |                              system_name,
67 |                              sys,
68 |                              pretrained,
69 |                              corpus.confirm_map,
70 |                              corpus.char_map);
71 |   } else {
72 |     _ERROR << "Main:: Unknown architecture name: " << arch_name;
73 |   }
74 |   _INFO << "Main:: architecture: " << arch_name;
75 |   return parser;
76 | }
77 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/parser/parser_builder.h:
--------------------------------------------------------------------------------
 1 | #ifndef PARSER_BUILDER_H
 2 | #define PARSER_BUILDER_H
 3 | 
 4 | #include <iostream>
 5 | #include "parser.h"
 6 | #include "dynet/model.h"
 7 | #include <boost/program_options.hpp>
 8 | 
 9 | namespace po = boost::program_options;
10 | 
11 | struct ParserBuilder {
12 |   static po::options_description get_options();
13 |   static Parser* build(const po::variables_map& conf,
14 |                        dynet::ParameterCollection& model,
15 |                        TransitionSystem& sys,
16 |                        const Corpus& corpus,
17 |                        const std::unordered_map<unsigned, std::vector<float>>& pretrained);
18 | };
19 | #endif  //  end for PARSER_BUILDER_H


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/parser/parser_eager.h:
--------------------------------------------------------------------------------
  1 | #ifndef PARSER_EAGER_H
  2 | #define PARSER_EAGER_H
  3 | 
  4 | #include "parser.h"
  5 | #include "lstm.h"
  6 | #include "dynet_layer/layer.h"
  7 | #include <vector>
  8 | #include <unordered_map>
  9 | #include <boost/program_options.hpp>
 10 | 
 11 | namespace po = boost::program_options;
 12 | 
 13 | struct ParserEager : public Parser {
 14 |   struct TransitionSystemFunction {
 15 |     virtual void perform_action(const unsigned& action,
 16 |                                 dynet::ComputationGraph& cg,
 17 |                                 std::vector<dynet::Expression>& stack,
 18 |                                 std::vector<dynet::Expression>& buffer, 
 19 |                                 std::vector<dynet::Expression>& deque,
 20 |                                 dynet::RNNBuilder& a_lstm, dynet::RNNPointer& a_pointer,
 21 |                                 dynet::RNNBuilder& s_lstm, dynet::RNNPointer& s_pointer,
 22 |                                 dynet::RNNBuilder& q_lstm, dynet::RNNPointer& q_pointer,
 23 |                                 dynet::RNNBuilder& d_lstm, dynet::RNNPointer& d_pointer,
 24 |                                 dynet::Expression& act_expr,
 25 |                                 const Alphabet & action_map,
 26 |                                 const Alphabet & node_map,
 27 |                                 SymbolEmbedding & node_emb,
 28 |                                 const Alphabet & rel_map,
 29 |                                 SymbolEmbedding & rel_emb,
 30 |                                 const Alphabet & entity_map,
 31 |                                 SymbolEmbedding & entity_emb,
 32 |                                 DenseLayer & confirm_layer,
 33 |                                 Merge3Layer & merge_parent,
 34 |                                 Merge3Layer & merge_child,
 35 |                                 Merge2Layer & merge_token,
 36 |                                 Merge2Layer & merge_entity) = 0;
 37 |     dynet::Expression get_arg_emb(const std::string & a_str, const Alphabet & arg_map, SymbolEmbedding & arg_emb);
 38 |   };
 39 | 
 40 |   struct EagerFunction : public TransitionSystemFunction {
 41 |     void perform_action(const unsigned& action,
 42 |                         dynet::ComputationGraph& cg,
 43 |                         std::vector<dynet::Expression>& stack,
 44 |                         std::vector<dynet::Expression>& buffer,
 45 |                         std::vector<dynet::Expression>& deque,
 46 |                         dynet::RNNBuilder& a_lstm, dynet::RNNPointer& a_pointer,
 47 |                         dynet::RNNBuilder& s_lstm, dynet::RNNPointer& s_pointer,
 48 |                         dynet::RNNBuilder& q_lstm, dynet::RNNPointer& q_pointer,
 49 |                         dynet::RNNBuilder& d_lstm, dynet::RNNPointer& d_pointer,
 50 |                         dynet::Expression& act_expr,
 51 |                         const Alphabet & action_map,
 52 |                         const Alphabet & node_map,
 53 |                         SymbolEmbedding & node_emb,
 54 |                         const Alphabet & rel_map,
 55 |                         SymbolEmbedding & rel_emb,
 56 |                         const Alphabet & entity_map,
 57 |                         SymbolEmbedding & entity_emb,
 58 |                         DenseLayer & confirm_layer,
 59 |                         Merge3Layer & merge_parent,
 60 |                         Merge3Layer & merge_child,
 61 |                         Merge2Layer & merge_token,
 62 |                         Merge2Layer & merge_entity) override;
 63 |   };
 64 | 
 65 |   LSTMBuilder s_lstm;
 66 |   LSTMBuilder q_lstm;
 67 |   LSTMBuilder a_lstm;
 68 |   LSTMBuilder d_lstm;
 69 | 
 70 |   BiLSTMBuilder c_lstm;
 71 | 
 72 |   SymbolEmbedding word_emb;
 73 |   SymbolEmbedding pos_emb;
 74 |   SymbolEmbedding preword_emb;
 75 |   SymbolEmbedding act_emb;
 76 |   SymbolEmbedding char_emb;
 77 |   SymbolEmbedding node_emb;
 78 |   SymbolEmbedding rel_emb;
 79 |   SymbolEmbedding entity_emb;
 80 | 
 81 |   Merge3Layer merge_input;  // merge (pos, pretained, char_emb)
 82 |   Merge4Layer merge;        // merge (s_lstm, q_lstm, a_lstm, d_lstm)
 83 |   Merge3Layer merge_parent;  // merge (parent, rel, child) -> parent
 84 |   Merge3Layer merge_child; // merge (parent, rel, child) -> child
 85 |   Merge2Layer merge_token; // merge (A, B) -> AB
 86 |   Merge2Layer merge_entity; // merge (AB, entity_label) -> X
 87 |   DenseLayer scorer;        // Q / A value scorer.
 88 |   DenseLayer confirm_layer;
 89 | 
 90 |   
 91 |   Alphabet char_map;
 92 | 
 93 |   std::unordered_map<unsigned, DenseLayer*> confirm_scorer; //confirm scorer.
 94 |   std::unordered_map<unsigned, Alphabet> confirm_map;
 95 | 
 96 |   dynet::Expression confirm_to_one;
 97 | 
 98 |   dynet::Parameter p_action_start;  // start of action
 99 |   dynet::Parameter p_buffer_guard;  // end of buffer
100 |   dynet::Parameter p_stack_guard;   // end of stack
101 |   dynet::Parameter p_deque_guard;   // end of deque
102 |   dynet::Expression action_start;
103 |   dynet::Expression buffer_guard;
104 |   dynet::Expression stack_guard;
105 |   dynet::Expression deque_guard;
106 | 
107 |   /// state machine
108 |   dynet::RNNPointer s_pointer;
109 |   dynet::RNNPointer q_pointer;
110 |   dynet::RNNPointer a_pointer;
111 |   dynet::RNNPointer d_pointer;
112 |   std::vector<dynet::Expression> stack;
113 |   std::vector<dynet::Expression> buffer;
114 |   std::vector<dynet::Expression> deque;
115 | 
116 |   bool trainable;
117 |   /// The reference
118 |   TransitionSystemFunction* sys_func;
119 |   const std::unordered_map<unsigned, std::vector<float>>& pretrained;
120 | 
121 |   /// The Configurations: useful for other models.
122 |   unsigned size_w, dim_w, size_p, dim_p, size_t, dim_t, size_c, dim_c, size_a, dim_a, size_n, dim_n, size_r, dim_r, size_e, dim_e;
123 |   unsigned n_layers, dim_lstm_in, dim_hidden;
124 | 
125 |   explicit ParserEager(dynet::ParameterCollection & m,
126 |                        unsigned size_w,  //
127 |                        unsigned dim_w,   // word size, word dim
128 |                        unsigned size_p,  //
129 |                        unsigned dim_p,   // pos size, pos dim
130 |                        unsigned size_t,  //
131 |                        unsigned dim_t,   // pword size, pword dim
132 |                        unsigned size_c,  //
133 |                        unsigned dim_c,   // char size, char dim
134 |                        unsigned size_a,  //
135 |                        unsigned dim_a,   // act size, act dim
136 |                        unsigned size_n,  //
137 |                        unsigned dim_n,   // newnode size, newnode dim
138 |                        unsigned size_r,
139 |                        unsigned dim_r,   // rel size, rel dim
140 |                        unsigned size_e,
141 |                        unsigned dim_e,   // entity size, entity dim
142 |                        unsigned n_layers,
143 |                        unsigned dim_lstm_in,
144 |                        unsigned dim_hidden,
145 |                        const std::string& system_name,
146 |                        TransitionSystem& system,
147 |                        const std::unordered_map<unsigned, std::vector<float>>& pretrained,
148 |                        const std::unordered_map<unsigned, Alphabet> & confirm_map,
149 |                        const Alphabet & char_map);
150 | 
151 |   Parser* copy_architecture(dynet::Model& new_model) override;
152 |   void activate_training() override;
153 |   void inactivate_training() override;
154 |   void new_graph(dynet::ComputationGraph& cg) override;
155 |   std::vector<dynet::Expression> get_params() override;
156 | 
157 |   void initialize_parser(dynet::ComputationGraph& cg,
158 |                          const InputUnits& input) override;
159 | 
160 |   void perform_action(const unsigned& action,
161 |                       dynet::ComputationGraph& cg,
162 |                       State& state) override;
163 | 
164 |   /// Get the un-softmaxed scores from the LSTM-parser.
165 |   dynet::Expression get_confirm_values(unsigned wid) override;
166 |   dynet::Expression get_a_values() override;
167 | };
168 | 
169 | #endif  //  end for PARSER_H
170 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/parser/parser_swap.h:
--------------------------------------------------------------------------------
  1 | #ifndef PARSER_FULL_H
  2 | #define PARSER_FULL_H
  3 | 
  4 | #include "parser.h"
  5 | #include "lstm.h"
  6 | #include "dynet_layer/layer.h"
  7 | #include <vector>
  8 | #include <unordered_map>
  9 | #include <boost/program_options.hpp>
 10 | 
 11 | namespace po = boost::program_options;
 12 | 
 13 | struct ParserSwap : public Parser {
 14 |   struct TransitionSystemFunction {
 15 |     virtual void perform_action(const unsigned& action,
 16 |                                 dynet::ComputationGraph& cg,
 17 |                                 std::vector<dynet::Expression>& stack,
 18 |                                 std::vector<dynet::Expression>& buffer,
 19 |                                 dynet::RNNBuilder& a_lstm, dynet::RNNPointer& a_pointer,
 20 |                                 dynet::RNNBuilder& s_lstm, dynet::RNNPointer& s_pointer,
 21 |                                 dynet::RNNBuilder& q_lstm, dynet::RNNPointer& q_pointer,
 22 |                                 dynet::Expression& act_expr,
 23 |                                 const Alphabet & action_map,
 24 |                                 const Alphabet & node_map,
 25 |                                 SymbolEmbedding & node_emb,
 26 |                                 const Alphabet & rel_map,
 27 |                                 SymbolEmbedding & rel_emb,
 28 |                                 const Alphabet & entity_map,
 29 |                                 SymbolEmbedding & entity_emb,
 30 |                                 DenseLayer & confirm_layer,
 31 |                                 Merge3Layer & merge_parent,
 32 |                                 Merge3Layer & merge_child,
 33 |                                 Merge2Layer & merge_token,
 34 |                                 Merge2Layer & merge_entity) = 0;
 35 |     dynet::Expression get_arg_emb(const std::string & a_str, const Alphabet & arg_map, SymbolEmbedding & arg_emb);
 36 |   };
 37 | 
 38 |   struct SwapFunction : public TransitionSystemFunction {
 39 |     void perform_action(const unsigned& action,
 40 |                         dynet::ComputationGraph& cg,
 41 |                         std::vector<dynet::Expression>& stack,
 42 |                         std::vector<dynet::Expression>& buffer,
 43 |                         dynet::RNNBuilder& a_lstm, dynet::RNNPointer& a_pointer,
 44 |                         dynet::RNNBuilder& s_lstm, dynet::RNNPointer& s_pointer,
 45 |                         dynet::RNNBuilder& q_lstm, dynet::RNNPointer& q_pointer,
 46 |                         dynet::Expression& act_expr,
 47 |                         const Alphabet & action_map,
 48 |                         const Alphabet & node_map,
 49 |                         SymbolEmbedding & node_emb,
 50 |                         const Alphabet & rel_map,
 51 |                         SymbolEmbedding & rel_emb,
 52 |                         const Alphabet & entity_map,
 53 |                         SymbolEmbedding & entity_emb,
 54 |                         DenseLayer & confirm_layer,
 55 |                         Merge3Layer & merge_parent,
 56 |                         Merge3Layer & merge_child,
 57 |                         Merge2Layer & merge_token,
 58 |                         Merge2Layer & merge_entity) override;
 59 |   };
 60 | 
 61 |   LSTMBuilder s_lstm;
 62 |   LSTMBuilder q_lstm;
 63 |   LSTMBuilder a_lstm;
 64 |   BiLSTMBuilder c_lstm;
 65 | 
 66 |   SymbolEmbedding word_emb;
 67 |   SymbolEmbedding pos_emb;
 68 |   SymbolEmbedding preword_emb;
 69 |   SymbolEmbedding act_emb;
 70 |   SymbolEmbedding char_emb;
 71 |   SymbolEmbedding node_emb;
 72 |   SymbolEmbedding rel_emb;
 73 |   SymbolEmbedding entity_emb;
 74 | 
 75 |   Merge3Layer merge_input;    // merge (pos, pretained, char_emb)
 76 |   Merge3Layer merge;          // merge (s_lstm, q_lstm, a_lstm)
 77 |   Merge3Layer merge_parent;  // merge (parent, rel, child) -> parent
 78 |   Merge3Layer merge_child; // merge (parent, rel, child) -> child
 79 |   Merge2Layer merge_token; // merge (A, B) -> AB
 80 |   Merge2Layer merge_entity; // merge (AB, entity_label) -> X
 81 |   DenseLayer scorer;        // Q / A value scorer.
 82 |   DenseLayer confirm_layer;
 83 |   
 84 |   Alphabet char_map;
 85 |   std::unordered_map<unsigned, DenseLayer*> confirm_scorer; //confirm scorer.
 86 |   std::unordered_map<unsigned, Alphabet> confirm_map;
 87 | 
 88 |   dynet::Expression confirm_to_one;
 89 | 
 90 |   dynet::Parameter p_action_start;  // start of action
 91 |   dynet::Parameter p_buffer_guard;  // end of buffer
 92 |   dynet::Parameter p_stack_guard;   // end of stack
 93 |   dynet::Expression action_start;
 94 |   dynet::Expression buffer_guard;
 95 |   dynet::Expression stack_guard;
 96 | 
 97 |   /// state machine
 98 |   dynet::RNNPointer s_pointer;
 99 |   dynet::RNNPointer q_pointer;
100 |   dynet::RNNPointer a_pointer;
101 |   std::vector<dynet::Expression> stack;
102 |   std::vector<dynet::Expression> buffer;
103 | 
104 |   bool trainable;
105 |   /// The reference
106 |   TransitionSystemFunction* sys_func;
107 |   const std::unordered_map<unsigned, std::vector<float>>& pretrained;
108 | 
109 |   /// The Configurations: useful for other models.
110 |   unsigned size_w, dim_w, size_p, dim_p, size_t, dim_t, size_c, dim_c, size_a, dim_a, size_n, dim_n, size_r, dim_r, size_e, dim_e;
111 |   unsigned n_layers, dim_lstm_in, dim_hidden;
112 | 
113 |   explicit ParserSwap(dynet::ParameterCollection& m,
114 |                       unsigned size_w,  //
115 |                       unsigned dim_w,   // word size, word dim
116 |                       unsigned size_p,  //
117 |                       unsigned dim_p,   // pos size, pos dim
118 |                       unsigned size_t,  //
119 |                       unsigned dim_t,   // pword size, pword dim
120 |                       unsigned size_c,  //
121 |                       unsigned dim_c,   // char size, char dim
122 |                       unsigned size_a,  //
123 |                       unsigned dim_a,   // act size, act dim
124 |                       unsigned size_n,  //
125 |                       unsigned dim_n,   // newnode size, newnode dim
126 |                       unsigned size_r,
127 |                       unsigned dim_r,   // rel size, rel dim
128 |                       unsigned size_e,
129 |                       unsigned dim_e,   // entity size, entity dim
130 |                       unsigned n_layers,
131 |                       unsigned dim_lstm_in,
132 |                       unsigned dim_hidden,
133 |                       const std::string& system_name,
134 |                       TransitionSystem& system,
135 |                       const std::unordered_map<unsigned, std::vector<float>>& pretrained,
136 |                       const std::unordered_map<unsigned, Alphabet> & confirm_map,
137 |                       const Alphabet & char_map);
138 | 
139 |   Parser* copy_architecture(dynet::Model& new_model) override;
140 |   void activate_training() override;
141 |   void inactivate_training() override;
142 |   void new_graph(dynet::ComputationGraph& cg) override;
143 |   std::vector<dynet::Expression> get_params() override;
144 | 
145 |   void initialize_parser(dynet::ComputationGraph& cg,
146 |                          const InputUnits& input) override;
147 | 
148 |   void perform_action(const unsigned& action,
149 |                       dynet::ComputationGraph& cg,
150 |                       State& state) override;
151 | 
152 |   /// Get the un-softmaxed scores from the LSTM-parser.
153 |   dynet::Expression get_confirm_values(unsigned wid) override;
154 |   dynet::Expression get_a_values() override;
155 | };
156 | 
157 | #endif  //  end for PARSER_H
158 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/system/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include_directories (${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/src/left_to_right/)
 2 | 
 3 | add_library (parser_l2r_system
 4 |     swap.cc
 5 |     swap.h
 6 |     eager.cc
 7 |     eager.h
 8 |     system.cc
 9 |     system.h
10 |     state.h
11 |     state.cc)
12 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/system/eager.cc:
--------------------------------------------------------------------------------
  1 | #include "eager.h"
  2 | #include "logging.h"
  3 | #include "corpus.h"
  4 | #include <boost/algorithm/string.hpp>
  5 | #include <iostream>
  6 | 
  7 | Eager::Eager(const Alphabet & action_map, const Alphabet & node_map, const Alphabet & rel_map, const Alphabet & entity_map) :
  8 |   TransitionSystem(action_map, node_map, rel_map, entity_map) {
  9 |   n_actions = action_map.size();
 10 |   _INFO << "TransitionSystem:: show action names:";
 11 |   for (const auto& x : action_map.str_to_id) {
 12 |     _INFO << "- " << x.first;
 13 |   }
 14 | }
 15 | 
 16 | std::string Eager::name(unsigned id) const {
 17 |   BOOST_ASSERT_MSG(id < action_map.size(), "id in illegal range");
 18 |   return action_map.get(id);
 19 | }
 20 | 
 21 | unsigned Eager::num_actions() const { return n_actions; }
 22 | 
 23 | void Eager::perform_action(State & state, const unsigned & action) {
 24 |   std::string action_type = get_action_type(action, action_map);
 25 |   if (action_type == "SHIFT") {
 26 |     shift_unsafe(state);
 27 |   } else if (action_type == "CONFIRM") {
 28 |     confirm_unsafe(state);
 29 |   } else if (action_type == "MERGE") {
 30 |     merge_unsafe(state);
 31 |   } else if (action_type == "ENTITY") {
 32 |     entity_unsafe(state);
 33 |   } else if (action_type == "NEWNODE") {
 34 |     unsigned nid = get_action_arg1(node_map, action);
 35 |     newnode_unsafe(state, nid);
 36 |   } else if (action_type == "REDUCE") {
 37 |     reduce_unsafe(state);
 38 |   } else if (action_type == "DROP") {
 39 |     drop_unsafe(state);
 40 |   } else if (action_type == "CACHE") {
 41 |     cache_unsafe(state);
 42 |   } else if (action_type == "LEFT") {
 43 |     unsigned rid = get_action_arg1(rel_map, action);
 44 |     la_unsafe(state, rid);
 45 |   } else if (action_type == "RIGHT") {
 46 |     unsigned rid = get_action_arg1(rel_map, action);
 47 |     ra_unsafe(state, rid);
 48 |   } else {
 49 |     BOOST_ASSERT_MSG(false, "Illegal Action");
 50 |   }
 51 | }
 52 | 
 53 | void Eager::get_valid_actions(const State & state,
 54 |   std::vector<unsigned>& valid_actions) {
 55 |   valid_actions.clear();
 56 |   for (unsigned a = 0; a < n_actions; ++a) {
 57 |     //if (!is_valid_action(state, action_names[a])) { continue; }
 58 |     if (!is_valid_action(state, a)) { continue; }
 59 |     valid_actions.push_back(a);
 60 |   }
 61 |   BOOST_ASSERT_MSG(valid_actions.size() > 0, "There should be one or more valid action.");
 62 | }
 63 | 
 64 | void Eager::shift_unsafe(State & state) const {
 65 |   while (state.deque.size() > 0) {
 66 |     state.stack.push_back(state.deque.back());
 67 |     state.deque.pop_back();
 68 |   }
 69 |   state.stack.push_back(state.buffer.back());
 70 |   state.buffer.pop_back();
 71 | }
 72 | 
 73 | void Eager::confirm_unsafe(State & state) const {
 74 |   state.buffer[state.buffer.size() - 1] = std::make_pair(state.new_amr_node(), 2);
 75 | }
 76 | 
 77 | void Eager::reduce_unsafe(State & state) const {
 78 |   state.stack.pop_back();
 79 | }
 80 | 
 81 | void Eager::merge_unsafe(State & state) const {
 82 |   state.buffer.pop_back();
 83 |   state.buffer[state.buffer.size() - 1].second = 1;
 84 |   state.buffer[state.buffer.size() - 1].first = 1;
 85 |   // state.buffer[state.buffer.size() - 1].first = -1;
 86 | }
 87 | 
 88 | void Eager::entity_unsafe(State & state) const {
 89 |   state.buffer[state.buffer.size() - 1] = std::make_pair(state.new_amr_node(), 2);
 90 | }
 91 | 
 92 | void Eager::newnode_unsafe(State & state, const unsigned & node) const {
 93 |   state.buffer.push_back(std::make_pair(state.new_amr_node(), state.buffer.back().second + 1));
 94 |   state.buffer[state.buffer.size() - 2].second += 5;
 95 | }
 96 | 
 97 | void Eager::drop_unsafe(State & state) const {
 98 |   state.buffer.pop_back();
 99 | }
100 | 
101 | void Eager::cache_unsafe(State & state) const {
102 |   state.deque.push_back(state.stack.back());
103 |   state.stack.pop_back();
104 | }
105 | 
106 | void Eager::la_unsafe(State & state, const unsigned & rel) const {
107 |   unsigned u = state.buffer.back().first;
108 |   unsigned v = state.stack.back().first;
109 |   state.existing_edges.insert({ u, rel });
110 | }
111 | 
112 | void Eager::ra_unsafe(State& state, const unsigned & rel) const {
113 |   unsigned u = state.stack.back().first;
114 |   unsigned v = state.buffer.back().first;
115 |   state.existing_edges.insert({ u, rel });
116 | }
117 | 
118 | 
119 | std::string Eager::get_action_type(const unsigned & idx, const Alphabet & action_map) {
120 |   std::string action = action_map.get(idx);
121 |   std::vector<std::string> terms;
122 |   boost::algorithm::split(terms, action, boost::is_any_of(" \t"), boost::token_compress_on);
123 |   return terms[0];
124 | }
125 | 
126 | bool Eager::is_valid_action(const State& state, const unsigned& action) const {
127 |   std::string action_type = get_action_type(action, action_map);
128 |   if (action_type == "_UNK_") {
129 |     return false;
130 |   } else if (action_type == "SHIFT") {
131 |     return state.buffer.size() > 0 && state.buffer.back().second > 1;
132 |   } else if (action_type == "CONFIRM") {
133 |     return state.buffer.size() > 0 && state.buffer.back().second < 2;
134 |   } else if (action_type == "MERGE") {
135 |     return state.buffer.size() > 1 && state.buffer.back().second < 2 && state.buffer[state.buffer.size() - 2].second == 0;
136 |   } else if (action_type == "ENTITY") {
137 |     return state.buffer.size() > 0 && state.buffer.back().second < 2;
138 |   } else if (action_type == "REDUCE") {
139 |     return state.stack.size() > 0 && state.stack.back().second > 1;
140 |   } else if (action_type == "DROP") {
141 |     return state.buffer.size() > 0 && state.buffer.back().second == 0;
142 |   } else if (action_type == "CACHE") {
143 |     return state.buffer.size() > 0 && state.stack.size() > 0;
144 |   } else if (action_type == "NEWNODE") {
145 |     return state.buffer.size() > 0 && state.buffer.back().second > 1 && state.buffer.back().second <= 5;
146 |   } else if (action_type == "LEFT" || action_type == "RIGHT") {
147 |     if (state.stack.size() < 1 || state.stack.back().second < 2 || state.buffer.size() < 1 || state.buffer.back().second < 2) {
148 |       return false;
149 |     }
150 |     unsigned u = state.stack.back().first;
151 |     unsigned v = state.buffer.back().first;
152 |     if (action_type == "LEFT") {
153 |       std::swap(u, v);
154 |     }
155 |     std::vector<std::string> terms;
156 |     std::string a_str = action_map.get(action);
157 |     boost::algorithm::split(terms, a_str, boost::is_any_of(" \t"), boost::token_compress_on);
158 |     unsigned rid = rel_map.get(terms[1]);
159 |     return state.existing_edges.find({ u, rid }) == state.existing_edges.end(); 
160 |   } else {
161 |     BOOST_ASSERT_MSG(false, "Illegal Action");
162 |   }
163 |   return true;
164 | }
165 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/system/eager.h:
--------------------------------------------------------------------------------
 1 | #ifndef RLPARSER_LEFT_TO_RIGHT_EAGER_H
 2 | #define RLPARSER_LEFT_TO_RIGHT_EAGER_H
 3 | 
 4 | #include "system.h"
 5 | 
 6 | struct Eager : public TransitionSystem {
 7 |   unsigned n_actions;
 8 | 
 9 |   Eager(const Alphabet & action_map,
10 |         const Alphabet & node_map,
11 |         const Alphabet & rel_map,
12 |         const Alphabet & entity_map);
13 | 
14 |   std::string name(unsigned id) const override;
15 | 
16 |   unsigned num_actions() const override;
17 | 
18 |   void perform_action(State& state, const unsigned& action) override;
19 | 
20 |   void get_valid_actions(const State& state,
21 |                          std::vector<unsigned>& valid_actions) override;
22 | 
23 |   bool is_valid_action(const State& state, const unsigned& act) const override;
24 | 
25 |   void shift_unsafe(State& state) const;
26 | 
27 |   void confirm_unsafe(State & state) const;
28 | 
29 |   void merge_unsafe(State& state) const;
30 | 
31 |   void entity_unsafe(State & state) const;
32 | 
33 |   void reduce_unsafe(State& state) const;
34 | 
35 |   void drop_unsafe(State& state) const;
36 | 
37 |   void cache_unsafe(State& state) const;
38 | 
39 |   void la_unsafe(State & state, const unsigned & rel) const;
40 | 
41 |   void ra_unsafe(State& state, const unsigned & rel) const;
42 | 
43 |   void newnode_unsafe(State& state, const unsigned & node) const;
44 | 
45 |   static std::string get_action_type(const unsigned& action, const Alphabet & action_map);
46 | 
47 | };
48 | 
49 | #endif  //  end for RLPARSER_LEFT_TO_RIGHT_SWAP_H


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/system/state.cc:
--------------------------------------------------------------------------------
 1 | #include "state.h"
 2 | 
 3 | 
 4 | State::State(unsigned n) : num_nodes(0) {
 5 | }
 6 | 
 7 | unsigned State::new_amr_node() {
 8 |   return num_nodes++;
 9 | }
10 | 
11 | bool State::terminated() {
12 |   return stack.empty() && buffer.empty();
13 | }
14 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/system/state.h:
--------------------------------------------------------------------------------
 1 | #ifndef RLPARSER_LEFT_TO_RIGHT_STATE_H
 2 | #define RLPARSER_LEFT_TO_RIGHT_STATE_H
 3 | 
 4 | #include <vector>
 5 | #include <set>
 6 | 
 7 | struct State {
 8 |   static const unsigned MAX_N_WORDS = 1024;
 9 | 
10 |   std::vector<std::pair<unsigned, unsigned>> stack;
11 |   std::vector<std::pair<unsigned, unsigned>> buffer;
12 |   std::vector<std::pair<unsigned, unsigned>> deque;
13 |   std::vector<unsigned> aux;
14 | 
15 |   std::set < std::vector<unsigned> > existing_edges;
16 | 
17 |   unsigned num_nodes;
18 | 
19 |   State(unsigned n);
20 | 
21 |   unsigned new_amr_node();
22 | 
23 |   bool terminated();
24 | };
25 | 
26 | 
27 | #endif  //  end for RLPARSER_LEFT_TO_RIGHT_STATE_H


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/system/swap.cc:
--------------------------------------------------------------------------------
  1 | #include "swap.h"
  2 | #include "logging.h"
  3 | #include "corpus.h"
  4 | #include <boost/algorithm/string.hpp>
  5 | 
  6 | Swap::Swap(const Alphabet & action_map, const Alphabet & node_map, const Alphabet & rel_map, const Alphabet & entity_map) :
  7 |   TransitionSystem(action_map, node_map, rel_map, entity_map) {
  8 |   n_actions = action_map.size();
  9 |   _INFO << "TransitionSystem:: show action names:";
 10 |   for (const auto& x : action_map.str_to_id) {
 11 |     _INFO << "- " << x.first;
 12 |   }
 13 | }
 14 | 
 15 | std::string Swap::name(unsigned id) const {
 16 |   BOOST_ASSERT_MSG(id < action_map.size(), "id in illegal range");
 17 |   return action_map.get(id);
 18 | }
 19 | 
 20 | unsigned Swap::num_actions() const { return n_actions; }
 21 | 
 22 | void Swap::perform_action(State & state, const unsigned & action) {
 23 |   std::string action_type = get_action_type(action, action_map);
 24 |   if (action_type == "SHIFT") {
 25 |     shift_unsafe(state);
 26 |   } else if (action_type == "CONFIRM") {
 27 |     confirm_unsafe(state);
 28 |   } else if (action_type == "REDUCE") {
 29 |     reduce_unsafe(state);
 30 |   } else if (action_type == "MERGE") {
 31 |     merge_unsafe(state);
 32 |   } else if (action_type == "ENTITY") {
 33 |     entity_unsafe(state);
 34 |   } else if (action_type == "NEWNODE") {
 35 |     unsigned nid = get_action_arg1(node_map, action);
 36 |     newnode_unsafe(state, nid);
 37 |   } else if (action_type == "SWAP") {
 38 |     swap_unsafe(state);
 39 |   } else if (action_type == "LEFT") {
 40 |     unsigned rid = get_action_arg1(rel_map, action);
 41 |     la_unsafe(state, rid);
 42 |   } else if (action_type == "RIGHT") {
 43 |     unsigned rid = get_action_arg1(rel_map, action);
 44 |     ra_unsafe(state, rid);
 45 |   } else {
 46 |     BOOST_ASSERT_MSG(false, "Illegal Action");
 47 |   }
 48 | }
 49 | 
 50 | void Swap::get_valid_actions(const State & state,
 51 |   std::vector<unsigned>& valid_actions) {
 52 |   valid_actions.clear();
 53 |   for (unsigned a = 0; a < n_actions; ++a) {
 54 |     //if (!is_valid_action(state, action_names[a])) { continue; }
 55 |     if (!is_valid_action(state, a)) { continue; }
 56 |     valid_actions.push_back(a);
 57 |   }
 58 |   BOOST_ASSERT_MSG(valid_actions.size() > 0, "There should be one or more valid action.");
 59 | }
 60 | 
 61 | void Swap::shift_unsafe(State & state) const {
 62 |   state.stack.push_back(state.buffer.back());
 63 |   state.buffer.pop_back();
 64 | }
 65 | 
 66 | void Swap::confirm_unsafe(State & state) const {
 67 |   state.stack[state.stack.size() - 1] = std::make_pair(state.new_amr_node(), 2);
 68 | }
 69 | 
 70 | void Swap::reduce_unsafe(State & state) const {
 71 |   state.stack.pop_back();
 72 | }
 73 | 
 74 | void Swap::merge_unsafe(State & state) const {
 75 |   state.stack.pop_back();
 76 |   state.stack[state.stack.size() - 1].second = 1;
 77 | }
 78 | 
 79 | void Swap::entity_unsafe(State & state) const {
 80 |   state.stack[state.stack.size() - 1] = std::make_pair(state.new_amr_node(), 2);
 81 | }
 82 | 
 83 | void Swap::newnode_unsafe(State & state, const unsigned & node) const {
 84 |   state.stack.push_back(std::make_pair(state.new_amr_node(), state.stack.back().second + 1));
 85 |   state.stack[state.stack.size() - 2].second += 5;
 86 | }
 87 | 
 88 | void Swap::swap_unsafe(State & state) const {
 89 |   auto j = state.stack.back(); state.stack.pop_back();
 90 |   auto i = state.stack.back(); state.stack.pop_back();
 91 |   state.stack.push_back(j);
 92 |   state.buffer.push_back(i);
 93 | }
 94 | 
 95 | void Swap::la_unsafe(State & state, const unsigned & rel) const {
 96 |   unsigned u = state.stack[state.stack.size() - 2].first;
 97 |   unsigned v = state.stack.back().first;
 98 |   state.existing_edges.insert({ u, rel });
 99 | }
100 | 
101 | void Swap::ra_unsafe(State& state, const unsigned & rel) const {
102 |   unsigned u = state.stack.back().first;
103 |   unsigned v = state.stack[state.stack.size() - 2].first;
104 |   state.existing_edges.insert({ u, rel });
105 | }
106 | 
107 | 
108 | std::string Swap::get_action_type(const unsigned & idx, const Alphabet & action_map) {
109 |   std::string action = action_map.get(idx);
110 |   std::vector<std::string> terms;
111 |   boost::algorithm::split(terms, action, boost::is_any_of(" \t"), boost::token_compress_on);
112 |   return terms[0];
113 | }
114 | 
115 | bool Swap::is_valid_action(const State& state, const unsigned& action) const {
116 |   std::string action_type = get_action_type(action, action_map);
117 |   if (action_type == "_UNK_") {
118 |     return false;
119 |   } else if (action_type == "SHIFT") {
120 |     return state.buffer.size() > 0;
121 |   } else if (action_type == "CONFIRM") {
122 |     return state.stack.size() > 0 && state.stack.back().second == 0;
123 |   } else if (action_type == "REDUCE") {
124 |     return state.stack.size() > 0;
125 |   } else if (action_type == "MERGE") {
126 |     return state.stack.size() > 1 && state.stack.back().second < 2 && state.stack[state.stack.size() - 2].second == 0;
127 |   } else if (action_type == "ENTITY") {
128 |     return state.stack.size() > 0 && state.stack.back().second < 2;
129 |   } else if (action_type == "NEWNODE") {
130 |     return state.stack.size() > 0 && state.stack.back().second > 1 && state.stack.back().second <= 5;
131 |   } else if (action_type == "SWAP") {
132 |     return state.stack.size() > 1 && state.stack.back().second > 1 && state.stack[state.stack.size() - 2].second > 1;
133 |   } else if (action_type == "LEFT" || action_type == "RIGHT") {
134 |     if (state.stack.size() <= 1 || state.stack.back().second < 2 || state.stack[state.stack.size() - 2].second < 2) {
135 |       return false;
136 |     }
137 |     unsigned u = state.stack.back().first;
138 |     unsigned v = state.stack[state.stack.size() - 2].first;
139 |     if (action_type == "LEFT") {
140 |       std::swap(u, v);
141 |     }
142 |     std::vector<std::string> terms;
143 |     std::string a_str = action_map.get(action);
144 |     boost::algorithm::split(terms, a_str, boost::is_any_of(" \t"), boost::token_compress_on);
145 |     unsigned rid = rel_map.get(terms[1]);
146 |     return state.existing_edges.find({ u, rid }) == state.existing_edges.end(); 
147 |   } else {
148 |     BOOST_ASSERT_MSG(false, "Illegal Action");
149 |   }
150 |   return true;
151 | }
152 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/system/swap.h:
--------------------------------------------------------------------------------
 1 | #ifndef RLPARSER_LEFT_TO_RIGHT_SWAP_H
 2 | #define RLPARSER_LEFT_TO_RIGHT_SWAP_H
 3 | 
 4 | #include "system.h"
 5 | 
 6 | struct Swap : public TransitionSystem {
 7 |   unsigned n_actions;
 8 | 
 9 |   Swap(const Alphabet & action_map, const Alphabet & node_map, const Alphabet & rel_map, const Alphabet & entity_map);
10 | 
11 |   std::string name(unsigned id) const override;
12 | 
13 |   unsigned num_actions() const override;
14 | 
15 |   void perform_action(State& state, const unsigned& action) override;
16 | 
17 |   void get_valid_actions(const State& state,
18 |     std::vector<unsigned>& valid_actions) override;
19 | 
20 |   bool is_valid_action(const State& state, const unsigned& act) const override;
21 | 
22 |   void shift_unsafe(State& state) const;
23 |   void confirm_unsafe(State & state) const;
24 |   void reduce_unsafe(State& state) const;
25 |   void merge_unsafe(State& state) const;
26 |   void entity_unsafe(State & state) const;
27 |   void newnode_unsafe(State& state, const unsigned & node) const;
28 |   void swap_unsafe(State& state) const;
29 |   void la_unsafe(State & state, const unsigned & rel) const;
30 |   void ra_unsafe(State& state, const unsigned & rel) const;
31 | 
32 |   static std::string get_action_type(const unsigned& action, const Alphabet & action_map);
33 | 
34 | };
35 | 
36 | #endif  //  end for RLPARSER_LEFT_TO_RIGHT_SWAP_H


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/system/system.cc:
--------------------------------------------------------------------------------
 1 | #include "system.h"
 2 | #include "logging.h"
 3 | #include <boost/lexical_cast.hpp>
 4 | #include <boost/algorithm/string.hpp>
 5 | 
 6 | 
 7 | unsigned TransitionSystem::get_action_arg1(const Alphabet & map, const unsigned &action) {
 8 |   std::vector<std::string> terms;
 9 |   std::string a_str = action_map.get(action);
10 |   boost::algorithm::split(terms, a_str, boost::is_any_of(" \t"), boost::token_compress_on);
11 |   return map.get(terms[1]);
12 | }


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/system/system.h:
--------------------------------------------------------------------------------
 1 | #ifndef RLPARSER_LEFT_TO_RIGHT_SYSTEM_H
 2 | #define RLPARSER_LEFT_TO_RIGHT_SYSTEM_H
 3 | 
 4 | #include <vector>
 5 | #include "state.h"
 6 | #include "corpus.h"
 7 | 
 8 | struct TransitionSystem {
 9 |   enum REWARD { kLocal, kGlobal, kGlobalMaxout };
10 |   REWARD reward_type;
11 | 
12 |   Alphabet action_map;
13 |   Alphabet node_map;
14 |   Alphabet rel_map;
15 |   Alphabet entity_map;
16 | 
17 |   TransitionSystem(const Alphabet & action_map,
18 |                    const Alphabet & node_map,
19 |                    const Alphabet & rel_map,
20 |                    const Alphabet & entity_map) :
21 |     action_map(action_map), node_map(node_map), rel_map(rel_map), entity_map(entity_map) {}
22 | 
23 |   unsigned get_action_arg1(const Alphabet & map, const unsigned & action);
24 | 
25 |   virtual std::string name(unsigned id) const = 0;
26 | 
27 |   virtual unsigned num_actions() const = 0;
28 | 
29 |   virtual void perform_action(State& state, const unsigned& action) = 0;
30 | 
31 |   virtual bool is_valid_action(const State& state, const unsigned& act) const = 0; 
32 | 
33 |   virtual void get_valid_actions(const State& state, std::vector<unsigned>& valid_actions) = 0;
34 | };
35 | 
36 | #endif  //  end for RLPARSER_LEFT_TO_RIGHT_SYSTEM_H
37 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/train/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include_directories (${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/src/left_to_right/)
 2 | 
 3 | add_library (parser_l2r_train
 4 |     train.cc
 5 |     train.h
 6 |     train_supervised.cc
 7 |     train_supervised.h
 8 |     algorithm.h)
 9 | 
10 | target_link_libraries (parser_l2r_train parser_l2r_parser)
11 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/train/algorithm.h:
--------------------------------------------------------------------------------
1 | #ifndef TRAIN_ALGORITHM_H
2 | #define TRAIN_ALGORITHM_H
3 | 
4 | #include "train/train_supervised.h"
5 | 
6 | #endif  //  end for TRAIN_ALGORITHM


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/train/train.cc:
--------------------------------------------------------------------------------
 1 | #include "train.h"
 2 | #include "logging.h"
 3 | #include "evaluate/evaluate.h"
 4 | 
 5 | Trainer::Trainer(const po::variables_map & conf) {
 6 |   gamma = conf["gamma"].as<float>();
 7 |   _INFO << "RL:: gamma = " << gamma;
 8 | 
 9 |   lambda_ = conf["lambda"].as<float>();
10 |   _INFO << "RL:: lambda = " << lambda_;
11 | }
12 | 
13 | void Trainer::eval(const po::variables_map& conf,
14 |                    const std::string & output,
15 |                    const std::string & model_name,
16 |                    float & current_best,
17 |                    Corpus & corpus,
18 |                    Parser & parser,
19 |                    bool update_and_save) {
20 |   float f = evaluate(conf, corpus, parser, output, true);
21 |   if (update_and_save && f > current_best) {
22 |     current_best = f;
23 |     dynet::save_dynet_model(model_name, (&(parser.model)));
24 |     f = evaluate(conf, corpus, parser, output, false);
25 |     _INFO << "Trainer:: new best record achieved " << current_best << ", test: " << f;
26 |   }
27 | }
28 | 
29 | dynet::Expression Trainer::l2(Parser & parser, unsigned n) {
30 |   std::vector<dynet::Expression> reg;
31 |   for (auto e : parser.get_params()) { reg.push_back(dynet::squared_norm(e)); }
32 |   return (0.5 * n) * lambda_ * dynet::sum(reg);
33 | }
34 | 


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/train/train.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRAIN_H
 2 | #define TRAIN_H
 3 | 
 4 | #include <boost/program_options.hpp>
 5 | #include "parser/parser.h"
 6 | #include "corpus.h"
 7 | namespace po = boost::program_options;
 8 | 
 9 | struct Trainer {
10 |   float gamma;
11 |   float lambda_;
12 |  
13 |   Trainer(const po::variables_map& conf);
14 | 
15 |   void eval(const po::variables_map& conf,
16 |             const std::string & output,
17 |             const std::string & model_name,
18 |             float & current_best,
19 |             Corpus & corpus,
20 |             Parser & parser,
21 |             bool update_and_save = true);
22 | 
23 |   void eval(const po::variables_map& conf,
24 |             const std::string & output,
25 |             const std::string & model_name,
26 |             float & current_best,
27 |             Corpus & corpus,
28 |             Parser & parser,
29 |             Parser & parser2,
30 |             bool update_and_save = true);
31 | 
32 |   dynet::Expression l2(Parser & parser, unsigned n);
33 | };
34 | 
35 | #endif  //  end for TRAIN_H


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/train/train_supervised.cc:
--------------------------------------------------------------------------------
  1 | #include "trainer_utils.h"
  2 | #include "train_supervised.h"
  3 | #include "logging.h"
  4 | #include "evaluate/evaluate.h"
  5 | 
  6 | po::options_description SupervisedTrainer::get_options() {
  7 |   po::options_description cmd("Supervised options");
  8 |   cmd.add_options()
  9 |     ("supervised_oracle", po::value<std::string>()->default_value("static"), "The type of oracle in supervised learning [static|dynamic|pseduo_dynamic].")
 10 |     ("supervised_objective", po::value<std::string>()->default_value("crossentropy"), "The learning objective [crossentropy|rank|bipartie_rank]")
 11 |     ("supervised_do_pretrain_iter", po::value<unsigned>()->default_value(1), "The number of pretrain iteration on dynamic oracle.")
 12 |     ("supervised_do_explore_prob", po::value<float>()->default_value(0.9), "The probability of exploration.")
 13 |     ("supervised_pseudo_oracle_model", po::value<std::string>(), "The path to the pseudo dynamic oracle model, must in pseduo_dynamic mode.")
 14 |     ;
 15 |   return cmd;
 16 | }
 17 | 
 18 | SupervisedTrainer::SupervisedTrainer(const po::variables_map& conf, Parser * p) :
 19 |   Trainer(conf), 
 20 |   parser(p),
 21 |   pseudo_dynamic_oracle(nullptr),
 22 |   pseudo_dynamic_oracle_model(nullptr) {
 23 |   if (conf["supervised_oracle"].as<std::string>() == "static") {
 24 |     oracle_type = kStatic;
 25 |   } else {
 26 |     _ERROR << "Unknown oracle :" << conf["supervised_oracle"].as<std::string>();
 27 |   }
 28 | 
 29 |   if (conf["supervised_objective"].as<std::string>() == "crossentropy") {
 30 |     objective_type = kCrossEntropy;
 31 |   } else if (conf["supervised_objective"].as<std::string>() == "rank") {
 32 |     objective_type = kRank;
 33 |   } else {
 34 |     objective_type = kBipartieRank;
 35 |   }
 36 |   lambda_ = conf["lambda"].as<float>();
 37 |   _INFO << "SUP:: learning objective " << conf["supervised_objective"].as<std::string>();
 38 |   
 39 |   system = conf["system"].as<std::string>();
 40 | }
 41 | 
 42 | void SupervisedTrainer::train(const po::variables_map& conf,
 43 |                               Corpus& corpus,
 44 |                               const std::string& name,
 45 |                               const std::string& output) {
 46 |   dynet::ParameterCollection& model = parser->model;
 47 |   _INFO << "SUP:: start lstm-parser supervised training.";
 48 | 
 49 |   dynet::Trainer* trainer = get_trainer(conf, model);
 50 |   // unsigned kUNK = corpus.get_or_add_word(Corpus::UNK);
 51 |   unsigned max_iter = conf["max_iter"].as<unsigned>();
 52 | 
 53 |   float llh = 0.f;
 54 |   float llh_in_batch = 0.f;
 55 |   float best_f = 0.f;
 56 | 
 57 |   std::vector<unsigned> order;
 58 |   get_orders(corpus, order);
 59 |   float n_train = order.size();
 60 | 
 61 |   unsigned logc = 0;
 62 |   // unsigned unk_strategy = conf["unk_strategy"].as<unsigned>();
 63 |   // float unk_prob = conf["unk_prob"].as<float>();
 64 |   unsigned report_stops = conf["report_stops"].as<unsigned>();
 65 |   unsigned evaluate_stops = conf["evaluate_stops"].as<unsigned>();
 66 |   unsigned evaluate_skips = conf["evaluate_skips"].as<unsigned>();
 67 |   float eta0 = trainer->learning_rate;
 68 | 
 69 |   _INFO << "SUP:: will stop after " << max_iter << " iterations.";
 70 |   for (unsigned iter = 0; iter < max_iter; ++iter) {
 71 |     llh = 0;
 72 |     _INFO << "SUP:: start training iteration #" << iter << ", shuffled.";
 73 |     std::shuffle(order.begin(), order.end(), (*dynet::rndeng));
 74 | 
 75 |     for (unsigned sid : order) {
 76 |       _TRACE << "sid=" << sid;
 77 |       InputUnits& input_units = corpus.training_inputs[sid];
 78 |       const ActionUnits& parse_units = corpus.training_actions[sid];
 79 |       //random_replace_singletons(unk_strategy, unk_prob, corpus.singleton, kUNK, input_units);
 80 |       
 81 |       float lp;
 82 |       
 83 |       lp = train_on_one_full_tree(input_units, parse_units, trainer, iter);
 84 |       
 85 |       llh += lp;
 86 |       llh_in_batch += lp;
 87 |       //restore_singletons(unk_strategy, input_units);
 88 | 
 89 |       ++logc;
 90 |       if (logc % report_stops == 0) {
 91 |         float epoch = (float(logc) / n_train);
 92 |         _INFO << "SUP:: iter #" << iter << " (epoch " << epoch << ") loss " << llh_in_batch;
 93 |         llh_in_batch = 0.f;
 94 |       }
 95 |       if (iter >= evaluate_skips && logc % evaluate_stops == 0) {
 96 |         eval(conf, output, name, best_f, corpus, *parser);
 97 |       }
 98 |     }
 99 | 
100 |     _INFO << "SUP:: end of iter #" << iter << " loss " << llh;
101 |     eval(conf, output, name, best_f, corpus, *parser);
102 | 
103 |     update_trainer(conf, eta0, float(iter), trainer);
104 |     trainer->status();
105 |   }
106 | 
107 |   delete trainer;
108 | }
109 | 
110 | float SupervisedTrainer::train_on_one_full_tree(const InputUnits& input_units,
111 |                                                 const ActionUnits& action_units,
112 |                                                 dynet::Trainer* trainer,
113 |                                                 unsigned iter) {
114 |   dynet::ComputationGraph cg;
115 |   parser->activate_training();
116 |   parser->new_graph(cg);
117 |   
118 |   std::vector<dynet::Expression> loss;
119 | 
120 |   unsigned len = input_units.size();
121 |   //for (int i = 0; i < len; i++) {
122 |   //  std::cerr << input_units[i].w_str << " ";
123 |   //}
124 |   //std::cerr << std::endl;
125 |   State state(len);
126 |   parser->initialize(cg, input_units, state);
127 | 
128 |   unsigned illegal_action = parser->sys.num_actions();
129 |   unsigned n_actions = 0;
130 |   while (!state.terminated()) {
131 |     // collect all valid actions.
132 |     std::vector<unsigned> valid_actions;
133 |     parser->sys.get_valid_actions(state, valid_actions);
134 | 
135 |     dynet::Expression score_exprs = parser->get_scores();
136 |     std::vector<float> scores = dynet::as_vector(cg.get_value(score_exprs));
137 |     unsigned action = 0;
138 | 
139 |     unsigned best_gold_action = illegal_action;
140 |     unsigned worst_gold_action = illegal_action;
141 |     unsigned best_non_gold_action = illegal_action;
142 | 
143 |     best_gold_action = action_units[n_actions].aid;
144 |     //std::cerr << action_units[n_actions].a_str << std::endl;
145 |     action = action_units[n_actions].aid;
146 | 
147 |     if (objective_type == kRank || objective_type == kBipartieRank) {
148 |       float best_non_gold_action_score = -1e10;
149 |       for (unsigned i = 0; i < valid_actions.size(); ++i) {
150 |         unsigned act = valid_actions[i];
151 |         if (act != best_gold_action && (scores[act] > best_non_gold_action_score)) {
152 |           best_non_gold_action = act;
153 |           best_non_gold_action_score = scores[act];
154 |         }
155 |       }
156 |     }
157 | 
158 |     if (objective_type == kCrossEntropy) {
159 |       loss.push_back(dynet::pickneglogsoftmax(score_exprs, best_gold_action));
160 |     } else if (objective_type == kRank) {
161 |       if (best_gold_action != illegal_action && best_non_gold_action != illegal_action) {
162 |         loss.push_back(dynet::pairwise_rank_loss(
163 |           dynet::pick(score_exprs, best_gold_action),
164 |           dynet::pick(score_exprs, best_non_gold_action)
165 |         ));
166 |       }
167 |     } else {
168 |       if (worst_gold_action != illegal_action && best_non_gold_action != illegal_action) {
169 |         loss.push_back(dynet::pairwise_rank_loss(
170 |           dynet::pick(score_exprs, worst_gold_action),
171 |           dynet::pick(score_exprs, best_non_gold_action)
172 |         ));
173 |       }
174 |     }
175 | 
176 |     //CONFIRM
177 |     if (action == 0 && best_gold_action == 0) {
178 |       dynet::Expression confirm_scores_expr;
179 |       if (system == "eager") {
180 |         confirm_scores_expr = parser->get_confirm_values(state.buffer.back().first);
181 |       } else if (system == "swap") {
182 |         confirm_scores_expr = parser->get_confirm_values(state.stack.back().first);
183 |       } else {
184 |         BOOST_ASSERT_MSG(false, "Illegal System");
185 |       }
186 |       //std::cerr << confirm_scores_expr.dim()[0] << " " << confirm_scores_expr.dim()[1] << std::endl;
187 |       //std::cerr << "~" << action_units[n_actions].idx << " " << state.stack.back().first << " " << state.stack.back().second << std::endl;
188 |       //std::cerr << action_units[n_actions].idx << std::endl;
189 |       loss.push_back(dynet::pickneglogsoftmax(confirm_scores_expr, action_units[n_actions].idx));
190 |       //std::cerr << action_units[n_actions].idx << std::endl;
191 |     }
192 | 
193 |     parser->perform_action(action, cg, state);
194 |     n_actions++;
195 |   }
196 |   float ret = 0.f;
197 |   if (loss.size() > 0) {
198 |     std::vector<dynet::Expression> all_params = parser->get_params();
199 |     std::vector<dynet::Expression> reg;
200 |     for (auto e : all_params) { reg.push_back(dynet::squared_norm(e)); }
201 |     dynet::Expression l = dynet::sum(loss) + 0.5 * loss.size() * lambda_ * dynet::sum(reg);
202 |     ret = dynet::as_scalar(cg.incremental_forward(l));
203 |     cg.backward(l);
204 |     trainer->update();
205 |   }
206 |   return ret;
207 | }


--------------------------------------------------------------------------------
/amr_parser/src/left_to_right/train/train_supervised.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRAIN_SUPERVISED_H
 2 | #define TRAIN_SUPERVISED_H
 3 | 
 4 | #include <iostream>
 5 | #include <set>
 6 | #include <boost/program_options.hpp>
 7 | #include "parser/parser.h"
 8 | #include "dynet/training.h"
 9 | #include "train.h"
10 | 
11 | namespace po = boost::program_options;
12 | 
13 | struct SupervisedTrainer : public Trainer {
14 |   enum ORACLE_TYPE { kStatic, kDynamic, kPseudoDynamic };
15 |   enum OBJECTIVE_TYPE { kCrossEntropy, kRank, kBipartieRank };
16 |   ORACLE_TYPE oracle_type;
17 |   OBJECTIVE_TYPE objective_type;
18 |   Parser* parser;
19 |   Parser* pseudo_dynamic_oracle;
20 |   dynet::Model* pseudo_dynamic_oracle_model;
21 |   float do_pretrain_iter;
22 |   float do_explore_prob;
23 |   std::string system;
24 | 
25 | 
26 |   static po::options_description get_options();
27 | 
28 |   SupervisedTrainer(const po::variables_map& conf, Parser* parser);
29 | 
30 |   /* Code for supervised pretraining. */
31 |   void train(const po::variables_map& conf,
32 |              Corpus& corpus,
33 |              const std::string& name,
34 |              const std::string& output);
35 | 
36 |   float train_on_one_full_tree(const InputUnits& input_units,
37 |                                const ActionUnits& action_units,
38 |                                dynet::Trainer* trainer,
39 |                                unsigned iter);
40 | };
41 | 
42 | #endif  //  end for TRAIN_SUPERVISED_H


--------------------------------------------------------------------------------
/amr_parser/src/logging.cc:
--------------------------------------------------------------------------------
 1 | #include "logging.h"
 2 | #include <boost/log/core.hpp>
 3 | #include <boost/log/trivial.hpp>
 4 | #include <boost/log/expressions.hpp>
 5 | #include <boost/log/utility/setup/console.hpp>
 6 | #include <boost/log/utility/setup/common_attributes.hpp>
 7 | #include <boost/log/support/date_time.hpp>
 8 | 
 9 | 
10 | void init_boost_log(bool verbose) {
11 |   namespace logging = boost::log;
12 |   namespace src = boost::log::sources;
13 |   namespace expr = boost::log::expressions;
14 |   namespace keywords = boost::log::keywords;
15 | 
16 |   logging::add_console_log(
17 |     std::clog,
18 |     keywords::format = (
19 |     expr::stream
20 |     << expr::format_date_time< boost::posix_time::ptime >(
21 |     "TimeStamp",
22 |     "%Y-%m-%d %H:%M:%S")
23 |     << " [" << logging::trivial::severity << "] "
24 |     << expr::smessage
25 |     )
26 |     );
27 | 
28 |   if (verbose) {
29 |     logging::core::get()->set_filter(logging::trivial::severity >= logging::trivial::trace);
30 |   } else {
31 |     logging::core::get()->set_filter(logging::trivial::severity > logging::trivial::trace);
32 |   }
33 | 
34 |   logging::add_common_attributes();
35 | }
36 | 


--------------------------------------------------------------------------------
/amr_parser/src/logging.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGGING_UTILS_H
 2 | #define LOGGING_UTILS_H
 3 | 
 4 | #include <boost/log/trivial.hpp>
 5 | #define _TRACE BOOST_LOG_TRIVIAL(trace)
 6 | #define _DEBUG BOOST_LOG_TRIVIAL(debug)
 7 | #define _INFO  BOOST_LOG_TRIVIAL(info)
 8 | #define _WARN  BOOST_LOG_TRIVIAL(warning)
 9 | #define _ERROR BOOST_LOG_TRIVIAL(error)
10 | 
11 | 
12 | void init_boost_log(bool verbose);
13 | 
14 | 
15 | #endif  //  end for LOGGING_UTILS_H
16 | 


--------------------------------------------------------------------------------
/amr_parser/src/lstm.cc:
--------------------------------------------------------------------------------
 1 | #include "lstm.h"
 2 | 
 3 | enum { X2I, H2I, C2I, BI, X2O, H2O, C2O, BO, X2C, H2C, BC };
 4 | 
 5 | LSTMBuilder::LSTMBuilder(unsigned layers,
 6 |                          unsigned input_dim,
 7 |                          unsigned hidden_dim,
 8 |                          dynet::ParameterCollection& model,
 9 |                          bool trainable) :
10 |   dynet::CoupledLSTMBuilder(layers, input_dim, hidden_dim, model),
11 |   trainable(trainable) {
12 | }
13 | 
14 | void LSTMBuilder::new_graph(dynet::ComputationGraph& cg) {
15 |   if (trainable) {
16 |     dynet::CoupledLSTMBuilder::new_graph(cg);
17 |   } else {
18 |     // cannot call sm.transition directly. this will waste some nodes
19 |     // in computation graph.
20 |     dynet::CoupledLSTMBuilder::new_graph(cg);
21 |     param_vars.clear();
22 |     for (unsigned i = 0; i < layers; ++i) {
23 |       auto& p = params[i];
24 | 
25 |       //i
26 |       dynet::Expression i_x2i = dynet::const_parameter(cg, p[X2I]);
27 |       dynet::Expression i_h2i = dynet::const_parameter(cg, p[H2I]);
28 |       dynet::Expression i_c2i = dynet::const_parameter(cg, p[C2I]);
29 |       dynet::Expression i_bi = dynet::const_parameter(cg, p[BI]);
30 |       //o
31 |       dynet::Expression i_x2o = dynet::const_parameter(cg, p[X2O]);
32 |       dynet::Expression i_h2o = dynet::const_parameter(cg, p[H2O]);
33 |       dynet::Expression i_c2o = dynet::const_parameter(cg, p[C2O]);
34 |       dynet::Expression i_bo = dynet::const_parameter(cg, p[BO]);
35 |       //c
36 |       dynet::Expression i_x2c = dynet::const_parameter(cg, p[X2C]);
37 |       dynet::Expression i_h2c = dynet::const_parameter(cg, p[H2C]);
38 |       dynet::Expression i_bc = dynet::const_parameter(cg, p[BC]);
39 | 
40 |       std::vector<dynet::Expression> vars = {
41 |         i_x2i, i_h2i, i_c2i, i_bi,
42 |         i_x2o, i_h2o, i_c2o, i_bo,
43 |         i_x2c, i_h2c, i_bc
44 |       };
45 |       param_vars.push_back(vars);
46 |     } //  layers
47 |   }
48 | }
49 | 
50 | 
51 | BiLSTMBuilder::BiLSTMBuilder(unsigned layers,
52 |                              unsigned input_dim,
53 |                              unsigned hidden_dim,
54 |                              dynet::ParameterCollection& model,
55 |                              bool trainable):
56 |   trainable(trainable), 
57 |   fw_lstm(layers, input_dim, hidden_dim, model, trainable),
58 |   bw_lstm(layers, input_dim, hidden_dim, model, trainable),
59 |   p_fw_guard(model.add_parameters({ input_dim, 1 })),
60 |   p_bw_guard(model.add_parameters({ input_dim, 1 })) {
61 | }
62 | 
63 | void BiLSTMBuilder::new_graph(dynet::ComputationGraph &cg) {
64 |   fw_lstm.new_graph(cg);
65 |   bw_lstm.new_graph(cg);
66 |   if (trainable) {
67 |     fw_guard = dynet::parameter(cg, p_fw_guard);
68 |     bw_guard = dynet::parameter(cg, p_bw_guard);
69 |   }
70 |   else {
71 |     fw_guard = dynet::const_parameter(cg, p_fw_guard);
72 |     bw_guard = dynet::const_parameter(cg, p_bw_guard);
73 |   }
74 | }
75 | 
76 | dynet::Expression BiLSTMBuilder::get_h(SymbolEmbedding &char_emb, const std::vector<unsigned> & c_id) {
77 |   fw_lstm.start_new_sequence();
78 |   bw_lstm.start_new_sequence();
79 |   fw_lstm.add_input(fw_guard);
80 |   bw_lstm.add_input(bw_guard);
81 | 
82 |   std::vector<dynet::Expression> inputs(c_id.size());
83 |   for (int i = 0; i < c_id.size(); i++) {
84 |     inputs[i] = char_emb.embed(c_id[i]);
85 |   }
86 |   for (int i = 0; i < inputs.size(); i++) {
87 |     fw_lstm.add_input(inputs[i]);
88 |     bw_lstm.add_input(inputs[inputs.size() - i - 1]);
89 |   }
90 |   return dynet::concatenate({ fw_lstm.get_h(inputs.size()).back(), bw_lstm.get_h(inputs.size()).back() });
91 | }
92 | 
93 | 


--------------------------------------------------------------------------------
/amr_parser/src/lstm.h:
--------------------------------------------------------------------------------
 1 | #ifndef LSTM_CONST_NEW_GRAPH_H
 2 | #define LSTM_CONST_NEW_GRAPH_H
 3 | 
 4 | #include "dynet/lstm.h"
 5 | #include "dynet/expr.h"
 6 | #include "dynet/model.h"
 7 | #include "dynet_layer/layer.h"
 8 | #include "ds.h"
 9 | 
10 | struct LSTMBuilder : public dynet::CoupledLSTMBuilder {
11 |   bool trainable;
12 |   explicit LSTMBuilder(unsigned layers,
13 |                        unsigned input_dim,
14 |                        unsigned hidden_dim,
15 |                        dynet::ParameterCollection& model,
16 |                        bool trainable=true);
17 |   void active_training() { trainable = true; }
18 |   void inactive_training() { trainable = false; }
19 |   void new_graph(dynet::ComputationGraph& cg);
20 | };
21 | 
22 | struct BiLSTMBuilder {
23 |   bool trainable; 
24 |   LSTMBuilder fw_lstm;
25 |   LSTMBuilder bw_lstm;
26 |   dynet::Parameter p_fw_guard;
27 |   dynet::Parameter p_bw_guard;
28 | 
29 |   dynet::Expression fw_guard;
30 |   dynet::Expression bw_guard;
31 |   BiLSTMBuilder(unsigned layers,
32 |                 unsigned input_dim,
33 |                 unsigned hidden_dim,
34 |                 dynet::ParameterCollection& model,
35 |     bool trainable = true);
36 | 
37 |   void active_training() { fw_lstm.active_training(); bw_lstm.active_training(); }
38 |   void inactive_training() { fw_lstm.inactive_training(); bw_lstm.inactive_training(); }
39 |   void new_graph(dynet::ComputationGraph &cg);
40 |   dynet::Expression get_h(SymbolEmbedding &char_emb, const std::vector<unsigned> & c_id);
41 |   
42 | };
43 | 
44 | 
45 | 
46 | #endif  //  end for LSTM_CONST_NEW_GRAPH


--------------------------------------------------------------------------------
/amr_parser/src/math_utils.cc:
--------------------------------------------------------------------------------
  1 | #include "math_utils.h"
  2 | #include <boost/assert.hpp>
  3 | 
  4 | void MeanStdevStreamer::clear() { n = 0; }
  5 | 
  6 | void MeanStdevStreamer::push(double x) {
  7 |   n++;
  8 |   if (n == 1) {
  9 |     old_m = new_m = x;
 10 |     old_s = 0.;
 11 |   } else {
 12 |     new_m = old_m + (x - old_m) / n;
 13 |     new_s = old_s + (x - old_m) * (x - new_m);
 14 | 
 15 |     old_m = new_m;
 16 |     old_s = new_s;
 17 |   }
 18 | }
 19 | 
 20 | int MeanStdevStreamer::num_data_values()  const { return n; }
 21 | double MeanStdevStreamer::mean()          const { return ((n > 0) ? new_m : 0.0); }
 22 | double MeanStdevStreamer::variance()      const { return ((n > 1) ? new_s / (n - 1) : 0.0); }
 23 | double MeanStdevStreamer::stdev()         const { return sqrt(variance()); }
 24 | 
 25 | void mean_and_stddev(const std::deque<float>& data,
 26 |                      float& mean, float& stddev) {
 27 |   float n = 0.;
 28 |   float sum1 = 0., sum2 = 0.;
 29 |   for (auto x : data) { sum1 += x; n += 1.; }
 30 |   mean = sum1 / n;
 31 |   for (auto x : data) { sum2 += (x - mean) * (x - mean); }
 32 |   stddev = sqrt(sum2 / (n - 1));
 33 | }
 34 | 
 35 | void softmax_copy(const std::vector<float>& input, std::vector<float>& output) {
 36 |   BOOST_ASSERT_MSG(input.size() > 0, "input should have one or more element.");
 37 |   float m = input[0];
 38 |   output.resize(input.size());
 39 |   for (unsigned i = 1; i < input.size(); ++i) { m = (input[i] > m ? input[i] : m); }
 40 |   float s = 0.;
 41 |   for (unsigned i = 0; i < input.size(); ++i) {
 42 |     output[i] = exp(input[i] - m);
 43 |     s += output[i];
 44 |   }
 45 |   for (unsigned i = 0; i < output.size(); ++i) { output[i] /= s; }
 46 | }
 47 | 
 48 | void softmax_inplace(std::vector<float>& x) {
 49 |   BOOST_ASSERT_MSG(x.size() > 0, "input should have one or more element.");
 50 |   float m = x[0];
 51 |   for (const float& _x : x) { m = (_x > m ? _x : m); }
 52 |   float s = 0.;
 53 |   for (unsigned i = 0; i < x.size(); ++i) {
 54 |     x[i] = exp(x[i] - m);
 55 |     s += x[i];
 56 |   }
 57 |   for (unsigned i = 0; i < x.size(); ++i) { x[i] /= s; }
 58 | }
 59 | 
 60 | void softmax_inplace_on_valid_indicies(std::vector<float>& x,
 61 |                                        const std::vector<unsigned>& valid_indices) {
 62 |   BOOST_ASSERT_MSG(x.size() > 0, "input should have one or more element.");
 63 |   BOOST_ASSERT_MSG(valid_indices.size() > 0, "input should have one or more indicces.");
 64 |   float m = x[valid_indices[0]];
 65 |   for (unsigned id : valid_indices) { m = (x[id] > m ? x[id] : m); }
 66 |   float s = 0.;
 67 |   for (unsigned id : valid_indices) {
 68 |     x[id] = exp(x[id] - m);
 69 |     s += x[id];
 70 |   }
 71 |   for (unsigned id : valid_indices) { x[id] /= s; }
 72 | }
 73 | 
 74 | void unnormalized_softmax_inplace(std::vector<float>& x) {
 75 |   BOOST_ASSERT_MSG(x.size() > 0, "input should have one or more element.");
 76 |   float m = x[0];
 77 |   for (const float& _x : x) { m = (_x > m ? _x : m); }
 78 |   for (unsigned i = 0; i < x.size(); ++i) { x[i] = exp(x[i] - m); }
 79 | }
 80 | 
 81 | std::vector<unsigned> fisher_yates_shuffle(unsigned size,
 82 |                                            unsigned max_size,
 83 |                                            std::mt19937& gen) {
 84 |   assert(size < max_size);
 85 |   std::vector<unsigned> b(size);
 86 | 
 87 |   for (unsigned i = 0; i < max_size; ++i) {
 88 |     std::uniform_int_distribution<> dis(0, i);
 89 |     unsigned j = dis(gen);
 90 |     if (j < b.size()) {
 91 |       if (i < j) {
 92 |         b[i] = b[j];
 93 |       }
 94 |       b[j] = i;
 95 |     }
 96 |   }
 97 |   return b;
 98 | }
 99 | 
100 | unsigned distribution_sample(const std::vector<float>& prob,
101 |                              std::mt19937& gen) {
102 |   // http://en.cppreference.com/w/cpp/numeric/random/discrete_distribution
103 |   // std::discrete_distribution produces random integers on the interval [0, n)
104 |   // std::discrete_distribution<> d({40, 10, 10, 40});
105 |   std::discrete_distribution<unsigned> distrib(prob.begin(), prob.end());
106 |   return distrib(gen);
107 | }
108 | 
109 | void reservoir_sample_n(const std::vector<unsigned>& S, unsigned N,
110 |                         std::vector<unsigned>& R, unsigned K,
111 |                         std::mt19937& gen) {
112 |   for (unsigned i = 0; i < K; ++i) { R[i] = S[i]; }
113 |   for (unsigned i = K; i < N; ++i) {
114 |     std::uniform_int_distribution<> dis(0, i - 1);
115 |     unsigned j = dis(gen);
116 |     if (j < K) { R[j] = S[i]; }
117 |   }
118 | }
119 | 
120 | void fast_reservoir_sample_n(const std::vector<unsigned>& S, unsigned N,
121 |                              std::vector<unsigned>& R, unsigned K,
122 |                              std::mt19937& gen) {
123 |   for (unsigned i = 0; i < K; ++i) { R[i] = S[i]; }
124 |   unsigned t = 4 * K;
125 |   unsigned j = 1 + K;
126 |   while (j < N && j <= t) {
127 |     std::uniform_int_distribution<> dis(0, j - 1);
128 |     unsigned k = dis(gen);
129 |     if (k < K) { R[k] = S[j]; }
130 |     j++;
131 |   }
132 |   while (j < N) {
133 |     float p = static_cast<float>(K) / j;
134 |     std::uniform_real_distribution<> dis(0, 1);
135 |     float u = dis(gen);
136 |     unsigned g = static_cast<unsigned>(floor(log(u) / log(1 - p)));
137 |     j = j + g;
138 |     if (j < N) {
139 |       std::uniform_int_distribution<> dis(0, K - 1);
140 |       unsigned k = dis(gen);
141 |       if (k < K) { R[k] = S[j]; }
142 |     }
143 |     j++;
144 |   }
145 | }
146 | 


--------------------------------------------------------------------------------
/amr_parser/src/math_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef MATH_UTILS_H
 2 | #define MATH_UTILS_H
 3 | 
 4 | #include <deque>
 5 | #include <vector>
 6 | #include <random>
 7 | 
 8 | struct MeanStdevStreamer {
 9 |   int n;
10 |   double old_m, new_m, old_s, new_s;
11 | 
12 |   void clear();
13 |   void push(double x);
14 |   int num_data_values() const;
15 |   double mean()         const;
16 |   double variance()     const; 
17 |   double stdev()        const;
18 | };
19 | 
20 | void mean_and_stddev(const std::deque<float>& data,
21 |                      float& mean,
22 |                      float& stddev);
23 | 
24 | void softmax_copy(const std::vector<float>& input,
25 |                   std::vector<float>& out);
26 | 
27 | void softmax_inplace(std::vector<float>& x);
28 | 
29 | void softmax_inplace_on_valid_indicies(std::vector<float>& x,
30 |                                        const std::vector<unsigned>& valid_indices);
31 | 
32 | void unnormalized_softmax_inplace(std::vector<float>& x);
33 | 
34 | // Shuffle
35 | std::vector<unsigned> fisher_yates_shuffle(unsigned size,
36 |                                            unsigned max_size,
37 |                                            std::mt19937& gen);
38 | 
39 | // Sample one
40 | unsigned distribution_sample(const std::vector<float>& prob, std::mt19937& gen);
41 | 
42 | // Sample n
43 | void reservoir_sample_n(const std::vector<unsigned>& S, unsigned N,
44 |                         std::vector<unsigned>& R, unsigned K,
45 |                         std::mt19937& gen);
46 | 
47 | void fast_reservoir_sample_n(const std::vector<unsigned>& S, unsigned N,
48 |                              std::vector<unsigned>& R, unsigned K,
49 |                              std::mt19937& gen);
50 | 
51 | #endif  //  end for MATH_UTILS_H
52 | 


--------------------------------------------------------------------------------
/amr_parser/src/sys_utils.cc:
--------------------------------------------------------------------------------
 1 | #include "sys_utils.h"
 2 | #include "logging.h"
 3 | #include <boost/algorithm/string.hpp>
 4 | #include <boost/lexical_cast.hpp>
 5 | #include <vector>
 6 | #include <fstream>
 7 | #if _MSC_VER
 8 | #include <process.h>
 9 | #endif
10 | 
11 | 
12 | int portable_getpid() {
13 | #ifdef _MSC_VER
14 |   return _getpid();
15 | #else
16 |   return getpid();
17 | #endif
18 | }
19 | 
20 | float execute_and_get_result(const std::string& cmd) {
21 |   _TRACE << "Running: " << cmd;
22 |   system(cmd.c_str());
23 | 
24 | #ifndef _MSC_VER
25 |   FILE* pipe = popen(cmd.c_str(), "r");
26 | #else
27 |   FILE* pipe = _popen(cmd.c_str(), "r");
28 | #endif
29 |   if (!pipe) {
30 |     return 0.f;
31 |   }
32 |   char buffer[128];
33 |   std::string result = "";
34 |   while (!feof(pipe)) {
35 |     if (fgets(buffer, 128, pipe) != NULL) { result += buffer; }
36 |   }
37 | #ifndef _MSC_VER
38 |   pclose(pipe);
39 | #else
40 |   _pclose(pipe);
41 | #endif
42 | 
43 |   std::stringstream S(result);
44 |   std::string token;
45 |   while (S >> token) {
46 |     boost::algorithm::trim(token);
47 |     return boost::lexical_cast<float>(token);
48 |   }
49 |   return 0.f;
50 | }
51 | 


--------------------------------------------------------------------------------
/amr_parser/src/sys_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef SYS_UTILS_H
 2 | #define SYS_UTILS_H
 3 | 
 4 | #include <iostream>
 5 | 
 6 | int portable_getpid();
 7 | 
 8 | float execute_and_get_result(const std::string& cmd);
 9 | 
10 | #endif  //  end for SYS_UTILS_H


--------------------------------------------------------------------------------
/amr_parser/src/trainer_utils.cc:
--------------------------------------------------------------------------------
  1 | #include "trainer_utils.h"
  2 | #include "sys_utils.h"
  3 | #include "logging.h"
  4 | #include <fstream>
  5 | #include <sstream>
  6 | 
  7 | void random_replace_singletons(const unsigned & unk_strategy,
  8 |                                const float & unk_prob,
  9 |                                const std::set<unsigned>& singletons,
 10 |                                const unsigned& kUNK,
 11 |                                InputUnits & input_units) {
 12 |   if (unk_strategy != 1) { return; }
 13 |   for (auto& u : input_units) {
 14 |     if (singletons.count(u.wid) && dynet::rand01() < unk_prob) { u.wid = kUNK; }
 15 |   }
 16 | }
 17 | 
 18 | void restore_singletons(const unsigned & unk_strategy,
 19 |                         InputUnits & input_units) {
 20 |   if (unk_strategy != 1) { return; }  
 21 |   for (auto& u : input_units) { u.wid = u.aux_wid; }
 22 | }
 23 | 
 24 | void get_orders(Corpus& corpus,
 25 |                 std::vector<unsigned>& order) {
 26 |   order.clear();
 27 |   for (unsigned i = 0; i < corpus.training_inputs.size(); ++i) {
 28 |     order.push_back(i);
 29 |   }
 30 | }
 31 | 
 32 | std::string get_model_name(const po::variables_map& conf,
 33 |                            const std::string& prefix) {
 34 |   std::ostringstream os;
 35 |   os << prefix << "." << portable_getpid();
 36 |   return os.str();
 37 | }
 38 | 
 39 | po::options_description get_optimizer_options() {
 40 |   po::options_description cmd("Optimizer options");
 41 |   cmd.add_options()
 42 |     ("optimizer", po::value<std::string>()->default_value("simple_sgd"), "The choice of optimizer [simple_sgd, momentum_sgd, adagrad, adadelta, adam].")
 43 |     ("optimizer_eta", po::value<float>(), "The initial value of learning rate (eta).")
 44 |     ("optimizer_final_eta", po::value<float>()->default_value(0.f), "The final value of eta.")
 45 |     ("optimizer_enable_eta_decay", po::value<bool>()->required(), "Specify to update eta at the end of each epoch.")
 46 |     ("optimizer_eta_decay", po::value<float>(), "The decay rate of eta.")
 47 |     ("optimizer_enable_clipping", po::value<bool>()->required(), "Enable clipping.")
 48 |     ("optimizer_adam_beta1", po::value<float>()->default_value(0.9f), "The beta1 hyper-parameter of adam")
 49 |     ("optimizer_adam_beta2", po::value<float>()->default_value(0.999f), "The beta2 hyper-parameter of adam.")
 50 |     ("optimizer_rmsprop_rho", po::value<float>()->default_value(0.99f), "The rho hyper-parameter of rmsprop.")
 51 |     ;
 52 | 
 53 |   return cmd;
 54 | }
 55 | 
 56 | dynet::Trainer* get_trainer(const po::variables_map& conf, dynet::ParameterCollection& model) {
 57 |   dynet::Trainer* trainer = nullptr;
 58 |   if (!conf.count("optimizer") || conf["optimizer"].as<std::string>() == "simple_sgd") {
 59 |     float eta0 = (conf.count("optimizer_eta") ? conf["optimizer_eta"].as<float>() : 0.1f);
 60 |     trainer = new dynet::SimpleSGDTrainer(model, eta0);
 61 |     // trainer->eta_decay = 0.08f;
 62 |   } else if (conf["optimizer"].as<std::string>() == "momentum_sgd") {
 63 |     trainer = new dynet::MomentumSGDTrainer(model);
 64 |     // trainer->eta_decay = 0.08f;
 65 |   } else if (conf["optimizer"].as<std::string>() == "adagrad") {
 66 |     trainer = new dynet::AdagradTrainer(model);
 67 |   } else if (conf["optimizer"].as<std::string>() == "adadelta") {
 68 |     trainer = new dynet::AdadeltaTrainer(model);
 69 |   } else if (conf["optimizer"].as<std::string>() == "rmsprop") {
 70 |     float eta0 = (conf.count("optimizer_eta") ? conf["optimizer_eta"].as<float>() : 0.001f);
 71 |     float rho = (conf.count("optimizer_rmsprop_rho") ? conf["optimizer_rmsprop_rho"].as<float>() : 0.99f);
 72 |     trainer = new dynet::RMSPropTrainer(model, eta0, 1e-8, rho);
 73 |   } else if (conf["optimizer"].as<std::string>() == "adam") {
 74 |     // default setting is same with Kingma and Ba (2015). 
 75 |     float eta0 = (conf.count("optimizer_eta") ? conf["optimizer_eta"].as<float>() : 0.001f);
 76 |     float beta1 = conf["optimizer_adam_beta1"].as<float>();
 77 |     float beta2 = conf["optimizer_adam_beta2"].as<float>();
 78 |     trainer = new dynet::AdamTrainer(model, eta0, beta1, beta2);
 79 |   } else {
 80 |     _ERROR << "Trainier:: unknown optimizer: " << conf["optimizer"].as<std::string>();
 81 |     exit(1);
 82 |   }
 83 |   _INFO << "Trainer:: using " << conf["optimizer"].as<std::string>() << " optimizer";
 84 |   _INFO << "Trainer:: eta = " << trainer->learning_rate;
 85 | 
 86 |   if (conf["optimizer_enable_clipping"].as<bool>()) {
 87 |     trainer->clipping_enabled = true;
 88 |     _INFO << "Trainer:: gradient clipping = enabled";
 89 |   } else {
 90 |     trainer->clipping_enabled = false;
 91 |     _INFO << "Trainer:: gradient clipping = false";
 92 |   }
 93 | 
 94 |   if (conf["optimizer_enable_eta_decay"].as<bool>()) {
 95 |     _INFO << "Trainer:: eta decay = enabled";
 96 |     if (conf.count("optimizer_eta_decay")) {
 97 |       // trainer->eta_decay = conf["optimizer_eta_decay"].as<float>();
 98 |       _INFO << "Trainer:: eta decay rate = " << conf["optimizer_eta_decay"].as<float>();
 99 |     } else {
100 |       _INFO << "Trainer:: eta decay rate not set, use default = " << 0.08f;
101 |     }
102 |   } else {
103 |     _INFO << "Trainer:: eta decay = disabled";
104 |   }
105 |   return trainer;
106 | }
107 | 
108 | void update_trainer(const po::variables_map& conf, const float & eta0, const float & iter, dynet::Trainer* trainer) {
109 |   if (conf.count("optimizer_enable_eta_decay")) {
110 |     float final_eta = conf["optimizer_final_eta"].as<float>();
111 |     float eta_decay = (conf.count("optimizer_eta_decay") ? conf["optimizer_eta_decay"].as<float>() : 0.08f);
112 |     if (trainer->learning_rate > final_eta) {
113 |       // trainer->update_epoch();
114 |       // trainer->status();
115 |       trainer->learning_rate = eta0 / (1.f + eta_decay * iter);
116 |       _INFO << "Trainer:: trainer updated.";
117 |     } else {
118 |       trainer->learning_rate = final_eta;
119 |       _INFO << "Trainer:: eta reach the final value " << final_eta;
120 |     }
121 |   }
122 | }
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/amr_parser/src/trainer_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRAIN_UTILS_H
 2 | #define TRAIN_UTILS_H
 3 | 
 4 | #include <iostream>
 5 | #include <set>
 6 | #include <boost/program_options.hpp>
 7 | #include "corpus.h"
 8 | #include "dynet/model.h"
 9 | #include "dynet/training.h"
10 | 
11 | namespace po = boost::program_options;
12 | 
13 | void random_replace_singletons(const unsigned& unk_strategy,
14 |                                const float& unk_prob,
15 |                                const std::set<unsigned>& singletons,
16 |                                const unsigned& kUNK,
17 |                                InputUnits& units);
18 | 
19 | void restore_singletons(const unsigned& unk_strategy,
20 |                         InputUnits& units);
21 | 
22 | void get_orders(Corpus& corpus,
23 |                 std::vector<unsigned>& order);
24 | 
25 | po::options_description get_optimizer_options();
26 | 
27 | dynet::Trainer* get_trainer(const po::variables_map& conf,
28 |                             dynet::ParameterCollection& model);
29 | 
30 | void update_trainer(const po::variables_map& conf,
31 |                     const float & eta0,
32 |                     const float & iter,
33 |                     dynet::Trainer* trainer);
34 | 
35 | std::string get_model_name(const po::variables_map& conf,
36 |                            const std::string& prefix);
37 | 
38 | #endif  //  end for TRAIN_H


--------------------------------------------------------------------------------
/awesome.md:
--------------------------------------------------------------------------------
 1 | Awesome AMR Parsers
 2 | ===================
 3 | 
 4 | As you may know, there are several open-source AMR parsers and our
 5 | aligner improves these parsers. I would like to share
 6 | some experiences with how to plugin in our alignments into the existing
 7 | AMR parsers, although running most of these parser requires
 8 | a certain amount of hacking.
 9 | 
10 | ## [JAMR](https://github.com/jflanigan/jamr)
11 | 
12 | "A Discriminative Graph-Based Parser for the Abstract Meaning Representation",
13 | Jeffrey Flanigan, Sam Thomson, Jaime Carbonell, Chris Dyer, and Noah A. Smith.
14 | 
15 | ### Alignment Hacking
16 | The JAMR experiments are carried out with a pipeline of shell scripts.
17 | This made plugining our alignments very easy and saved a lot of my life.
18 | The hook for replacing the alignment is in the preprocessing script:
19 | 
20 | ```
21 | jamr/scripts/preprocessing/cmd.aligned
22 | ```
23 | 
24 | It takes an input AMR file with `# ::tok` header for each graph and adds
25 | an additional `# ::alignments` header to each graph.
26 | 
27 | To replace the alignment, you can use the `replace_comments.py` scripts.
28 | 
29 | ### Results on LDC2014T12
30 | 
31 | | JAMR parser     | Smatch |
32 | |-----------------|--------|
33 | | +JAMR alignment |   65.9 |
34 | | +Our alignment  |   67.6 |
35 | 
36 | ### Note
37 | - JAMR uses the `cdec` tokenizer and our released alignments 
38 | include the one preprocessed with `cdec`.
39 | 
40 | ## [CAMR](https://github.com/c-amr/camr)
41 | "A Transition-based Algorithm for AMR Parsing", Chuan Wang, Nianwen Xue, and Sameer Pradhan
42 | 
43 | ### Alignment Hacking
44 | The CAMR uses a single program entry `amr_parsing.py` in their project. 
45 | You can replace the JAMR aligner generated training file with ours,
46 | using the same `replace_comments.py` script.
47 | 
48 | ### Results on LDC2014T12
49 | 
50 | | CAMR parser     | Smatch |
51 | |-----------------|--------|
52 | | +JAMR alignment |   64.6 |
53 | | +Our alignment  |   65.1 |
54 | 
55 | ### Note
56 | - CAMR uses StanfordCoreNLP as tokenizer. In our release,
57 | we includes the alignment results using this tokenization (noted as `sd`).
58 | 
59 | ## [CCG-AMR](https://github.com/clic-lab/amr)
60 | "Broad-coverage CCG Semantic Parsing with AMR", Yoav Artzi, Kenton Lee, and Luke Zettlemoyer.
61 | 
62 | [TBD]
63 | 
64 | ## [amr-eager](https://github.com/mdtux89/amr-eager)
65 | 
66 | [TBD]
67 | 
68 | ## [CacheTransition-Seq2Seq](https://github.com/xiaochang13/CacheTransition-Seq2seq)
69 | 
70 | [TBD]
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/pipeline.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -ueo pipefail
  3 | #===================================
  4 | # Config directory
  5 | #===================================
  6 | # Please specify the JAMR home here.
  7 | JAMR_HOME=/Users/yijialiu/work/projects/jamr/
  8 | # Please specify the TAMR home here.
  9 | TAMR_HOME=/Users/yijialiu/work/projects/tamr/
 10 | 
 11 | if [ -z "$TAMR_HOME" ]; then
 12 |     echo 'Error: please specify $TAMR_HOME'
 13 |     exit 1
 14 | fi
 15 | 
 16 | if [ -z "$JAMR_HOME" ]; then
 17 |     echo 'Error: please specify $JAMR_HOME'
 18 |     exit 1
 19 | fi
 20 | 
 21 | TAMR_DATA=${TAMR_HOME}/data
 22 | TAMR_LP_DATA=${TAMR_DATA}/little_prince
 23 | TAMR_ALIGNER=${TAMR_HOME}/amr_aligner
 24 | TAMR_PARSER=${TAMR_HOME}/amr_parser
 25 | 
 26 | #===================================
 27 | # Download data
 28 | #===================================
 29 | echo 'Downloading dataset (little prince) ...'
 30 | mkdir -p ${TAMR_LP_DATA}
 31 | wget -O ${TAMR_LP_DATA}/training.txt https://amr.isi.edu/download/amr-bank-struct-v1.6-training.txt
 32 | wget -O ${TAMR_LP_DATA}/dev.txt      https://amr.isi.edu/download/amr-bank-struct-v1.6-dev.txt
 33 | wget -O ${TAMR_LP_DATA}/test.txt     https://amr.isi.edu/download/amr-bank-struct-v1.6-test.txt
 34 | 
 35 | pushd "$JAMR_HOME" > /dev/null
 36 | set -x
 37 | 
 38 | #==================================
 39 | # Run JAMR baseline aligner
 40 | #==================================
 41 | . scripts/config.sh
 42 | for split in training dev test;
 43 | do
 44 |     echo 'Running JAMR aligner on '${split};
 45 |     #scripts/ALIGN.sh < ${TAMR_LP_DATA}/${split}.txt > ${TAMR_LP_DATA}/${split}.txt.aligned
 46 | done
 47 | 
 48 | pushd "$TAMR_ALIGNER" > /dev/null
 49 | #==================================
 50 | # Run TAMR aligner
 51 | #==================================
 52 | for split in training dev test;
 53 | do
 54 |     echo 'Running TAMR aligner on '${split};
 55 |     python rule_base_align.py \
 56 |         -verbose \
 57 |         -data \
 58 |         ${TAMR_LP_DATA}/${split}.txt.aligned \
 59 |         -output \
 60 |         ${TAMR_LP_DATA}/${split}.txt.alignment \
 61 |         -wordvec \
 62 |         ${TAMR_ALIGNER}/resources/word2vec/glove.840B.300d.w2v.ldc2014t12_filtered \
 63 |         -trials \
 64 |         10000 \
 65 |         -improve_perfect \
 66 |         -morpho_match \
 67 |         -semantic_match
 68 | done
 69 | 
 70 | #==================================
 71 | # Replace the alignments
 72 | #==================================
 73 | for split in training dev test;
 74 | do
 75 |     echo 'Replacing the alignments on '${split};
 76 |     python replace_comments.py \
 77 |         -key \
 78 |         alignments \
 79 |         -lexicon \
 80 |         ${TAMR_LP_DATA}/${split}.txt.alignment \
 81 |         -data \
 82 |         ${TAMR_LP_DATA}/${split}.txt.aligned \
 83 |         > ${TAMR_LP_DATA}/${split}.txt.new_aligned
 84 | done
 85 | 
 86 | #=================================
 87 | # Generate actions
 88 | #=================================
 89 | for split in training dev test;
 90 | do
 91 |     echo 'Generating actions on '${split};
 92 |     python eager_oracle.py \
 93 |         -mod \
 94 |         dump \
 95 |         -aligned \
 96 |         ${TAMR_LP_DATA}/${split}.txt.new_aligned \
 97 |         > ${TAMR_LP_DATA}/${split}.txt.actions
 98 | done
 99 | 
100 | #================================
101 | # Training and testing the parser
102 | #================================
103 | ./amr_parser/bin/parser_l2r \
104 |     --dynet-seed \
105 |     1 \
106 |     --train \
107 |     --training_data \
108 |     ./data/little_prince/training.txt.actions \
109 |     --devel_data \
110 |     ./data/little_prince/dev.txt.actions \
111 |     --test_data \
112 |     ./data/little_prince/test.txt.actions \
113 |     --pretrained \
114 |     ./amr_aligner/resources/word2vec/glove.840B.300d.w2v.ldc2014t12_filtered \
115 |     --model \
116 |     data/little_prince/model \
117 |     --optimizer_enable_eta_decay \
118 |     true \
119 |     --optimizer_enable_clipping \
120 |     true \
121 |     --external_eval \
122 |     ./amr_parser/scripts/eval_eager.sh \
123 |     --devel_gold \
124 |     ./data/little_prince/dev.txt.new_aligned \
125 |     --test_gold \
126 |     ./data/little_prince/test.txt.new_aligned
127 | 


--------------------------------------------------------------------------------
/release/ldc2014t12/README.md:
--------------------------------------------------------------------------------
 1 | TAMR alignment for LDC2014T12
 2 | =============================
 3 | 
 4 | We release the alignment file (the output of `rule_base_aligner.py`).
 5 | It's in the zipped format. Extract the zip file to use it.
 6 | 
 7 | You can replace the JAMR alignment with ours using the following
 8 | commands:
 9 | ```
10 | python replace_comments.py \
11 |     -key \
12 |     alignments \
13 |     -lexicon \
14 |     /path/to/your/alignment/data \
15 |     -data \
16 |     /path/to/your/baseline/data \
17 |     > /path/to/your/new/alignment/data
18 | ```
19 | 
20 | Since JAMR and CAMR uses different tokenizer, we provide
21 | alignment for cdec tokenizer (used by JAMR) and stanford tokenizer
22 | (used by CAMR).
23 | 
24 | - for cdec tokenizer: see `amr-release-1.0-training_fix.txt.cdec_tok.tamr_alignment.bz2`
25 | - for stanford tokenizer: see `amr-release-1.0-training_fix.txt.sd_tok.tamr_alignment.bz2`
26 | 
27 | To reproduce the alignment, you need to do a patch on the original ldc2014t12,
28 | because there are illegal AMR graph in the original data (like two concepts
29 | using the same variable). You can get the patched ldc2014t12 with the following
30 | steps:
31 | 
32 | ### Merge the Training Data
33 | 
34 | Go into the `amr_anno_1.0/data/split/training` folder of the original release of `ldc2014t12`,
35 | and get a concatenated training data with the following commands:
36 | ```
37 | cat amr-release-1.0-training-proxy.txt \
38 |     amr-release-1.0-training-bolt.txt \
39 |     amr-release-1.0-training-dfa.txt \
40 |     amr-release-1.0-training-mt09sdl.txt \
41 |     amr-release-1.0-training-xinhua.txt > amr-release-1.0-training.txt
42 | ```
43 | 
44 | ### Patching
45 | Do the patching with the following commands:
46 | ```
47 | patch amr-release-1.0-training.txt \
48 |     -i amr-release-1.0-training_fix.patch \
49 |     -o amr-release-1.0-training_fix.txt
50 | ``` 
51 | 
52 | It's done!


--------------------------------------------------------------------------------
/release/ldc2014t12/amr-release-1.0-training_fix.patch:
--------------------------------------------------------------------------------
 1 | 9945c9945
 2 | <                  :name (n / name :op1 "Pakistan"))))
 3 | ---
 4 | >                  :name (n4/ name :op1 "Pakistan"))))
 5 | 41213c41213
 6 | <                   :time (a / after
 7 | ---
 8 | >                   :time (a2 / after
 9 | 58814c58814
10 | <       :time (b / before
11 | ---
12 | >       :time (b3 / before
13 | 81657c81657
14 | < # ::snt A paper prepared for the talks said the expansion of the narcotics industry represents the single greatest threat to afghanistanâ€™s stability and is increasingly linked to insecurity and terrorist activities.
15 | ---
16 | > # ::snt A paper prepared for the talks said the expansion of the narcotics industry represents the single greatest threat to afghanistanâ's stability and is increasingly linked to insecurity and terrorist activities.
17 | 140733c140733
18 | < # ::snt Bjørn Lomborg - Wikipedia, the free encyclopedia
19 | ---
20 | > # ::snt Bjorn Lomborg - Wikipedia, the free encyclopedia
21 | 140735c140735
22 | < (p / person :name (n / name :op1 "Bjørn" :op2 "Lomborg")
23 | ---
24 | > (p / person :name (n / name :op1 "Bjorn" :op2 "Lomborg")
25 | 164536c164536
26 | < # ::snt Rockström said that when rich countries increase their consumption , they also accelerate the exploitation of the world 's national resources , with the result that they emit more greenhouse gases .
27 | ---
28 | > # ::snt Rockstrom said that when rich countries increase their consumption , they also accelerate the exploitation of the world 's national resources , with the result that they emit more greenhouse gases .
29 | 164539c164539
30 | <       :ARG0 (p / person :name (n / name :op1 "Rockström"))
31 | ---
32 | >       :ARG0 (p / person :name (n / name :op1 "Rockstrom"))
33 | 


--------------------------------------------------------------------------------
/release/ldc2014t12/amr-release-1.0-training_fix.txt.cdec_tok.tamr_alignment.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/release/ldc2014t12/amr-release-1.0-training_fix.txt.cdec_tok.tamr_alignment.bz2


--------------------------------------------------------------------------------
/release/ldc2014t12/amr-release-1.0-training_fix.txt.sd_tok.tamr_alignment.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/release/ldc2014t12/amr-release-1.0-training_fix.txt.sd_tok.tamr_alignment.bz2


--------------------------------------------------------------------------------
/release/ldc2017t10/README.md:
--------------------------------------------------------------------------------
 1 | TAMR alignment for LDC2017T10
 2 | =============================
 3 | 
 4 | You can replace the JAMR alignment with ours using the following commands:
 5 | 
 6 | ```
 7 | python replace_comments.py \
 8 |     -key \
 9 |     alignments \
10 |     -lexicon \
11 |     /path/to/your/alignment/data \
12 |     -data \
13 |     /path/to/your/baseline/data \
14 |     > /path/to/your/new/alignment/data
15 | ```
16 | 
17 | Similar to LDC2017T10, you need to do a little patching on the original data
18 | to use this alignment. The patch file in under this folder with `.patch` suffix.
19 |  
20 | In addition to the patching, you will also need to remove the entity linking (`:wiki`).
21 | We provide a python script `remove_wiki.py` and you can use it with as
22 | ```
23 | python remove_wiki.py /path/to/your/input > /path/to/your/output
24 | ```


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-bolt.txt.cdec_tok.tamr_alignment.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/release/ldc2017t10/amr-release-2.0-amrs-training-bolt.txt.cdec_tok.tamr_alignment.bz2


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-cctv.txt.no_wiki.cdec_tok.tamr_alignment.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/release/ldc2017t10/amr-release-2.0-amrs-training-cctv.txt.no_wiki.cdec_tok.tamr_alignment.bz2


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-dfa_fix.patch:
--------------------------------------------------------------------------------
 1 | 6324c6324
 2 | < # ::snt Bjorn Lomborg - Wikipedia, the free encyclopedia
 3 | ---
 4 | > # ::snt Bjørn Lomborg - Wikipedia, the free encyclopedia
 5 | 6326c6326
 6 | < (p / person :wiki "Bjorn_Lomborg"
 7 | ---
 8 | > (p / person :wiki "Bjørn_Lomborg"
 9 | 6331c6331
10 | <       :name (n / name :op1 "Bjorn" :op2 "Lomborg"))
11 | ---
12 | >       :name (n / name :op1 "Bjørn" :op2 "Lomborg"))
13 | 


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-dfa_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/release/ldc2017t10/amr-release-2.0-amrs-training-dfa_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-dfb_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/release/ldc2017t10/amr-release-2.0-amrs-training-dfb_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-guidelines_fix.patch:
--------------------------------------------------------------------------------
1 | 5201c5201
2 | < # ::snt Albert Einstein: Zur Elektrodynamik bewegter Korper (1905) Annalen der Physik, 322 (10): 891-921 (in German)
3 | ---
4 | > # ::snt Albert Einstein: Zur Elektrodynamik bewegter Körper (1905) Annalen der Physik, 322 (10): 891-921 (in German)
5 | 5207c5207
6 | <             :name (n2 / name :op1 "Zur" :op2 "Elektrodynamik" :op3 "bewegter" :op4 "Korper"))
7 | ---
8 | >             :name (n2 / name :op1 "Zur" :op2 "Elektrodynamik" :op3 "bewegter" :op4 "Körper"))
9 | 


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-guidelines_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/release/ldc2017t10/amr-release-2.0-amrs-training-guidelines_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-mt09sdl_fix.patch:
--------------------------------------------------------------------------------
 1 | 4c4
 2 | < # ::snt Rockstrom said that when rich countries increase their consumption , they also accelerate the exploitation of the world 's national resources , with the result that they emit more greenhouse gases .
 3 | ---
 4 | > # ::snt Rockström said that when rich countries increase their consumption , they also accelerate the exploitation of the world 's national resources , with the result that they emit more greenhouse gases .
 5 | 7,8c7,8
 6 | <       :ARG0 (p / person :wiki "Johan_Rockstrom"
 7 | <             :name (n / name :op1 "Rockstrom"))
 8 | ---
 9 | >       :ARG0 (p / person :wiki "Johan_Rockström"
10 | >             :name (n / name :op1 "Rockström"))
11 | 


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-mt09sdl_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/release/ldc2017t10/amr-release-2.0-amrs-training-mt09sdl_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-proxy_fix.patch:
--------------------------------------------------------------------------------
1 | 81140c81140
2 | < # ::snt A paper prepared for the talks said the expansion of the narcotics industry represents the single greatest threat to afghanistan's stability and is increasingly linked to insecurity and terrorist activities.
3 | ---
4 | > # ::snt A paper prepared for the talks said the expansion of the narcotics industry represents the single greatest threat to afghanistanâ€™s stability and is increasingly linked to insecurity and terrorist activities.
5 | 


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-proxy_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/release/ldc2017t10/amr-release-2.0-amrs-training-proxy_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-wb_fix.patch:
--------------------------------------------------------------------------------
1 | 3438a3439,3445
2 | > # ::id wb.eng_0002.163 ::date 2012-11-25T17:27:11 ::annotator SDL-AMR-09 ::preferred
3 | > # ::snt posted by <$BlogBacklinkAuthor$> @ <$BlogBacklinkDateTime$>
4 | > # ::save-date Wed Jul 29, 2015 ::file wb_eng_0002_163.txt
5 | > (p / post-01
6 | >       :ARG0 (p2 / person)
7 | >       :time (d / date-entity))
8 | > 
9 | 


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-wb_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/release/ldc2017t10/amr-release-2.0-amrs-training-wb_fix.txt.no_wiki.cdec_tok.tamr_alignment.bz2


--------------------------------------------------------------------------------
/release/ldc2017t10/amr-release-2.0-amrs-training-xinhua.txt.no_wiki.cdec_tok.tamr_alignment.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oneplus/tamr/c7a480a019d1d765f0ce3d04a37e31709af47f4a/release/ldc2017t10/amr-release-2.0-amrs-training-xinhua.txt.no_wiki.cdec_tok.tamr_alignment.bz2


--------------------------------------------------------------------------------