├── .gitignore ├── COPYING ├── COPYING.LESSER ├── Jamroot ├── LICENSE ├── MEMT ├── Alignment │ ├── CherryPick.cc │ ├── Everything.cc │ ├── Jamfile │ ├── MMBRMatcherMEMT.java │ ├── MatcherMEMT.java │ ├── NBest.cc │ ├── NBest.hh │ ├── Stemmer.cc │ ├── Stemmer.hh │ ├── Summarize.cc │ ├── compile.sh │ └── match.sh ├── Controller │ ├── CommandLine.cc │ ├── CommandLine.hh │ ├── Config.hh │ ├── Connection.hh │ ├── ConnectionHandler.hh │ ├── CoordWrite.hh │ ├── DecoderHandler.hh │ ├── Jamfile │ ├── Main.cc │ ├── OutputHandler.hh │ ├── Sentence.hh │ ├── Sentence │ │ ├── Config.hh │ │ ├── Jamfile │ │ ├── Options.cc │ │ └── Options.hh │ ├── Server.hh │ └── Worker.hh ├── Decoder │ ├── Completed.hh │ ├── Config.cc │ ├── Config.hh │ ├── EndBeam.cc │ ├── EndBeam.hh │ ├── History.hh │ ├── HistoryBeam.cc │ ├── HistoryBeam.hh │ ├── Hypothesis.hh │ ├── Implementation.hh │ ├── InternalBeam.hh │ ├── Jamfile │ ├── Options.cc │ ├── Options.hh │ ├── Score.cc │ └── Score.hh ├── Feature │ ├── Base │ │ ├── Process.hh │ │ └── Sign.hh │ ├── LM │ │ ├── Config.cc │ │ ├── Config.hh │ │ ├── Hypothesis.hh │ │ ├── Jamfile │ │ ├── Options.cc │ │ ├── Options.hh │ │ ├── Process.hh │ │ ├── Sentence.cc │ │ └── Sentence.hh │ ├── Length │ │ ├── Config.hh │ │ ├── Hypothesis.hh │ │ ├── Process.hh │ │ └── Sentence.hh │ ├── Scorer │ │ ├── Config.cc │ │ ├── Config.hh │ │ ├── Fuzz.cc │ │ ├── Fuzz.hh │ │ ├── Hypothesis.hh │ │ ├── Jamfile │ │ ├── Options.cc │ │ ├── Options.hh │ │ └── Sentence.hh │ └── Verbatim │ │ ├── Config.hh │ │ ├── Hypothesis.hh │ │ ├── Jamfile │ │ ├── Options.cc │ │ ├── Options.hh │ │ ├── Process.hh │ │ ├── Sentence.cc │ │ └── Sentence.hh ├── Input │ ├── AlignType.cc │ ├── AlignType.hh │ ├── Alignment.cc │ ├── Alignment.hh │ ├── Capitalization.cc │ ├── Capitalization.hh │ ├── Config.hh │ ├── Dump.cc │ ├── Factory.hh │ ├── Format.cc │ ├── Format.hh │ ├── Input.hh │ ├── Jamfile │ ├── Location.hh │ ├── Options.cc │ ├── Options.hh │ ├── Read.cc │ ├── Read.hh │ ├── ReadDispatcher.cc │ ├── ReadDispatcher.hh │ ├── ReadFromJava.cc │ ├── ReadFromJava.hh │ ├── Same.cc │ ├── Same.hh │ ├── Text.cc │ ├── Text.hh │ ├── Transitive.cc │ ├── Transitive.hh │ └── Word.hh ├── Jamfile ├── Output │ ├── Config.hh │ ├── Jamfile │ ├── NBest.cc │ ├── NBest.hh │ ├── NullBeamDumper.hh │ ├── Options.cc │ ├── Options.hh │ ├── StderrBeamDumper.hh │ ├── ToString.cc │ └── ToString.hh ├── README ├── Strategy │ ├── Graph │ │ ├── Config.hh │ │ ├── Coverage │ │ │ ├── Config.hh │ │ │ ├── Hypothesis.hh │ │ │ └── Sentence.hh │ │ ├── Hypothesis.hh │ │ ├── Jamfile │ │ ├── Options.cc │ │ ├── Options.hh │ │ └── Sentence.hh │ ├── Horizon │ │ ├── Config.hh │ │ ├── Horizon.cc │ │ ├── Horizon.hh │ │ ├── Hypothesis.cc │ │ ├── Hypothesis.hh │ │ ├── Jamfile │ │ ├── Options.cc │ │ └── Options.hh │ ├── Legacy │ │ ├── Config.hh │ │ ├── Hypothesis.hh │ │ ├── Jamfile │ │ ├── Options.cc │ │ ├── Options.hh │ │ └── Sentence.hh │ └── Phrase │ │ ├── Aligned.cc │ │ ├── Aligned.hh │ │ ├── Hypothesis.hh │ │ ├── Jamfile │ │ ├── Phrase.cc │ │ ├── Phrase.hh │ │ ├── Punctuation.cc │ │ ├── Punctuation.hh │ │ ├── Type.cc │ │ └── Type.hh └── scripts │ ├── decode.rb │ ├── experiment │ ├── decode_subgenre.sh │ ├── en.sh │ ├── match.rb │ ├── preprocess.sh │ ├── qsub.sh │ ├── run.rb │ ├── status.rb │ └── stripsgml.rb │ ├── make_filter_vocab.rb │ ├── match.rb │ ├── nbest_first.rb │ ├── server.sh │ ├── shell_escape.rb │ ├── simple_decode.rb │ ├── util.rb │ └── zmert │ ├── decoder.rb │ ├── format.rb │ ├── fuzz.rb │ ├── run.rb │ └── zmert.rb ├── README ├── Utilities ├── Input │ ├── select_gale_docs.rb │ ├── select_nist_docs.rb │ └── unescape.rb ├── Output │ ├── Jamfile │ ├── generateSGMLfromText.perl │ ├── nist_rewrap.rb │ ├── postprocess-rem-nonascii.pl │ ├── postprocess.pl │ ├── remove_nonlatin.cc │ └── replace-oovs-from-giza.pl ├── Tokenization │ └── Moses │ │ ├── README │ │ └── detokenizer.perl ├── Tuning │ └── zmert.jar ├── queue.rb └── scoring │ ├── INSTALL │ ├── LICENSE │ ├── README │ ├── interlace.rb │ ├── lib │ ├── length.rb │ ├── metaify.rb │ ├── meteorify.rb │ ├── nistify.rb │ ├── postprocess.pl │ ├── shell_escape.rb │ └── terrify.rb │ ├── mteval-v13.pl │ ├── score.rb │ └── setup.sh ├── bjam ├── install ├── README ├── ant.sh ├── apache-ant-1.7.1-bin.tar.bz2.md5 ├── apache-ant-1.7.1-bin.tar.bz2.sha1 ├── boost.sh ├── boost_1_49_0.tar.bz2.md5 ├── boost_1_49_0.tar.bz2.sha1 ├── checksum.sh ├── environment.bash ├── environment.tcsh ├── icu.sh ├── icu4c-4_6_1-src.tgz.md5 ├── icu4c-4_6_1-src.tgz.sha1 ├── install.sh ├── lib.sh ├── ruby-1.9.1-p376.tar.gz.md5 ├── ruby-1.9.1-p376.tar.gz.sha1 ├── ruby-1.9.2-p0.tar.gz.md5 ├── ruby-1.9.2-p0.tar.gz.sha1 ├── ruby.sh ├── tercom-0.7.25.tgz.md5 ├── tercom-0.7.25.tgz.sha1 ├── tercom.sh └── zmert.sh ├── jam-files ├── LICENSE_1_0.txt ├── boost-build │ ├── boost-build.jam │ ├── bootstrap.jam │ ├── build-system.jam │ ├── build │ │ ├── ac.jam │ │ ├── alias.jam │ │ ├── build-request.jam │ │ ├── config-cache.jam │ │ ├── configure.jam │ │ ├── feature.jam │ │ ├── generators.jam │ │ ├── project.jam │ │ ├── property-set.jam │ │ ├── property.jam │ │ ├── readme.txt │ │ ├── scanner.jam │ │ ├── targets.jam │ │ ├── toolset.jam │ │ ├── type.jam │ │ ├── version.jam │ │ └── virtual-target.jam │ ├── kernel │ │ ├── boost-build.jam │ │ ├── bootstrap.jam │ │ ├── class.jam │ │ ├── errors.jam │ │ └── modules.jam │ ├── options │ │ └── help.jam │ ├── site-config.jam │ ├── tools │ │ ├── acc.jam │ │ ├── auto-index.jam │ │ ├── bison.jam │ │ ├── boostbook-config.jam │ │ ├── boostbook.jam │ │ ├── borland.jam │ │ ├── builtin.jam │ │ ├── cast.jam │ │ ├── clang-darwin.jam │ │ ├── clang-linux.jam │ │ ├── clang.jam │ │ ├── common.jam │ │ ├── como-linux.jam │ │ ├── como-win.jam │ │ ├── como.jam │ │ ├── convert.jam │ │ ├── cray.jam │ │ ├── cw-config.jam │ │ ├── cw.jam │ │ ├── darwin.jam │ │ ├── dmc.jam │ │ ├── docutils.jam │ │ ├── doxygen-config.jam │ │ ├── doxygen.jam │ │ ├── doxygen │ │ │ ├── windows-paths-check.doxyfile │ │ │ └── windows-paths-check.hpp │ │ ├── fop.jam │ │ ├── fortran.jam │ │ ├── gcc.jam │ │ ├── generate.jam │ │ ├── gettext.jam │ │ ├── gfortran.jam │ │ ├── hp_cxx.jam │ │ ├── hpfortran.jam │ │ ├── ifort.jam │ │ ├── intel-darwin.jam │ │ ├── intel-linux.jam │ │ ├── intel-win.jam │ │ ├── intel.jam │ │ ├── jpeg.jam │ │ ├── lex.jam │ │ ├── make.jam │ │ ├── mc.jam │ │ ├── message.jam │ │ ├── midl.jam │ │ ├── mipspro.jam │ │ ├── mpi.jam │ │ ├── msvc-config.jam │ │ ├── msvc.jam │ │ ├── notfile.jam │ │ ├── package.jam │ │ ├── pathscale.jam │ │ ├── pch.jam │ │ ├── pgi.jam │ │ ├── png.jam │ │ ├── python-config.jam │ │ ├── python.jam │ │ ├── qcc.jam │ │ ├── qt.jam │ │ ├── qt3.jam │ │ ├── qt4.jam │ │ ├── qt5.jam │ │ ├── quickbook-config.jam │ │ ├── quickbook.jam │ │ ├── rc.jam │ │ ├── stage.jam │ │ ├── stlport.jam │ │ ├── sun.jam │ │ ├── symlink.jam │ │ ├── testing-aux.jam │ │ ├── testing.jam │ │ ├── tiff.jam │ │ ├── types │ │ │ ├── asm.jam │ │ │ ├── cpp.jam │ │ │ ├── exe.jam │ │ │ ├── html.jam │ │ │ ├── lib.jam │ │ │ ├── obj.jam │ │ │ ├── objc.jam │ │ │ ├── preprocessed.jam │ │ │ ├── qt.jam │ │ │ ├── register.jam │ │ │ └── rsp.jam │ │ ├── unix.jam │ │ ├── vacpp.jam │ │ ├── whale.jam │ │ ├── xlf.jam │ │ ├── xsltproc-config.jam │ │ ├── xsltproc.jam │ │ ├── xsltproc │ │ │ ├── included.xsl │ │ │ ├── test.xml │ │ │ └── test.xsl │ │ └── zlib.jam │ ├── user-config.jam │ └── util │ │ ├── assert.jam │ │ ├── container.jam │ │ ├── doc.jam │ │ ├── indirect.jam │ │ ├── numbers.jam │ │ ├── option.jam │ │ ├── order.jam │ │ ├── os.jam │ │ ├── path.jam │ │ ├── print.jam │ │ ├── regex.jam │ │ ├── sequence.jam │ │ ├── set.jam │ │ ├── string.jam │ │ └── utility.jam ├── engine │ ├── Jambase │ ├── boost-jam.spec │ ├── boost-no-inspect │ ├── build.bat │ ├── build.jam │ ├── build.sh │ ├── builtins.c │ ├── builtins.h │ ├── bump_version.py │ ├── class.c │ ├── class.h │ ├── command.c │ ├── command.h │ ├── compile.c │ ├── compile.h │ ├── constants.c │ ├── constants.h │ ├── cwd.c │ ├── cwd.h │ ├── debian │ │ ├── changelog │ │ ├── control │ │ ├── copyright │ │ ├── jam.man.sgml │ │ └── rules │ ├── debug.c │ ├── debug.h │ ├── execcmd.c │ ├── execcmd.h │ ├── execnt.c │ ├── execunix.c │ ├── filent.c │ ├── filesys.c │ ├── filesys.h │ ├── fileunix.c │ ├── frames.c │ ├── frames.h │ ├── function.c │ ├── function.h │ ├── glob.c │ ├── hash.c │ ├── hash.h │ ├── hcache.c │ ├── hcache.h │ ├── hdrmacro.c │ ├── hdrmacro.h │ ├── headers.c │ ├── headers.h │ ├── jam.c │ ├── jam.h │ ├── jambase.c │ ├── jambase.h │ ├── jamgram.c │ ├── jamgram.h │ ├── jamgram.y │ ├── jamgram.yy │ ├── jamgramtab.h │ ├── lists.c │ ├── lists.h │ ├── make.c │ ├── make.h │ ├── make1.c │ ├── md5.c │ ├── md5.h │ ├── mem.c │ ├── mem.h │ ├── mkjambase.c │ ├── modules.c │ ├── modules.h │ ├── modules │ │ ├── order.c │ │ ├── path.c │ │ ├── property-set.c │ │ ├── readme.txt │ │ ├── regex.c │ │ ├── sequence.c │ │ └── set.c │ ├── native.c │ ├── native.h │ ├── object.c │ ├── object.h │ ├── option.c │ ├── option.h │ ├── output.c │ ├── output.h │ ├── parse.c │ ├── parse.h │ ├── patchlevel.h │ ├── pathnt.c │ ├── pathsys.c │ ├── pathsys.h │ ├── pathunix.c │ ├── regexp.c │ ├── regexp.h │ ├── rules.c │ ├── rules.h │ ├── scan.c │ ├── scan.h │ ├── search.c │ ├── search.h │ ├── strings.c │ ├── strings.h │ ├── subst.c │ ├── subst.h │ ├── timestamp.c │ ├── timestamp.h │ ├── variable.c │ ├── variable.h │ ├── w32_getreg.c │ └── yyacc.c ├── fail │ └── Jamroot └── sanity.jam ├── lm ├── Jamfile ├── bhiksha.cc ├── bhiksha.hh ├── binary_format.cc ├── binary_format.hh ├── blank.hh ├── build_binary_main.cc ├── builder │ ├── Jamfile │ ├── README.md │ ├── TODO │ ├── adjust_counts.cc │ ├── adjust_counts.hh │ ├── adjust_counts_test.cc │ ├── corpus_count.cc │ ├── corpus_count.hh │ ├── corpus_count_test.cc │ ├── discount.hh │ ├── dump_counts_main.cc │ ├── hash_gamma.hh │ ├── header_info.hh │ ├── initial_probabilities.cc │ ├── initial_probabilities.hh │ ├── interpolate.cc │ ├── interpolate.hh │ ├── joint_order.hh │ ├── lmplz_main.cc │ ├── ngram.hh │ ├── ngram_stream.hh │ ├── output.cc │ ├── output.hh │ ├── pipeline.cc │ ├── pipeline.hh │ ├── print.cc │ ├── print.hh │ └── sort.hh ├── config.cc ├── config.hh ├── enumerate_vocab.hh ├── facade.hh ├── filter │ ├── Jamfile │ ├── arpa_io.cc │ ├── arpa_io.hh │ ├── count_io.hh │ ├── filter_main.cc │ ├── format.hh │ ├── phrase.cc │ ├── phrase.hh │ ├── phrase_table_vocab_main.cc │ ├── thread.hh │ ├── vocab.cc │ ├── vocab.hh │ └── wrapper.hh ├── fragment_main.cc ├── kenlm_benchmark_main.cc ├── left.hh ├── left_test.cc ├── lm_exception.cc ├── lm_exception.hh ├── max_order.hh ├── model.cc ├── model.hh ├── model_test.cc ├── model_type.hh ├── ngram_query.hh ├── partial.hh ├── partial_test.cc ├── quantize.cc ├── quantize.hh ├── query_main.cc ├── read_arpa.cc ├── read_arpa.hh ├── return.hh ├── search_hashed.cc ├── search_hashed.hh ├── search_trie.cc ├── search_trie.hh ├── sizes.cc ├── sizes.hh ├── state.hh ├── test.arpa ├── test_nounk.arpa ├── trie.cc ├── trie.hh ├── trie_sort.cc ├── trie_sort.hh ├── value.hh ├── value_build.cc ├── value_build.hh ├── virtual_interface.cc ├── virtual_interface.hh ├── vocab.cc ├── vocab.hh ├── weights.hh └── word_index.hh └── util ├── Jamfile ├── barrier.hh ├── bit_packing.cc ├── bit_packing.hh ├── bit_packing_test.cc ├── bounded_i_stream.hh ├── cat_compressed_main.cc ├── cat_range_main.cc ├── debug.hh ├── double-conversion ├── Jamfile ├── LICENSE ├── bignum-dtoa.cc ├── bignum-dtoa.h ├── bignum.cc ├── bignum.h ├── cached-powers.cc ├── cached-powers.h ├── diy-fp.cc ├── diy-fp.h ├── double-conversion.cc ├── double-conversion.h ├── fast-dtoa.cc ├── fast-dtoa.h ├── fixed-dtoa.cc ├── fixed-dtoa.h ├── ieee.h ├── strtod.cc ├── strtod.h └── utils.h ├── ersatz_progress.cc ├── ersatz_progress.hh ├── exception.cc ├── exception.hh ├── fake_ofstream.hh ├── file.cc ├── file.hh ├── file_piece.cc ├── file_piece.hh ├── file_piece_test.cc ├── fixed_array.hh ├── getopt.c ├── getopt.hh ├── hash_fusion.hh ├── hash_fusion_test.cc ├── hash_output.hh ├── hash_output_test.cc ├── have.hh ├── joint_sort.hh ├── joint_sort_test.cc ├── latex_escape.cc ├── latex_escape.hh ├── log_num.hh ├── log_num_test.cc ├── lower_main.cc ├── mmap.cc ├── mmap.hh ├── multi_intersection.hh ├── multi_intersection_test.cc ├── murmur_hash.cc ├── murmur_hash.hh ├── n_best.hh ├── n_best_test.cc ├── numbers.hh ├── options.cc ├── options.hh ├── parallel_read.cc ├── parallel_read.hh ├── pcqueue.hh ├── pcqueue_test.cc ├── pool.cc ├── pool.hh ├── print_concurrency_main.cc ├── probing_hash_table.hh ├── probing_hash_table_test.cc ├── proxy_iterator.hh ├── read_compressed.cc ├── read_compressed.hh ├── read_compressed_test.cc ├── scoped.cc ├── scoped.hh ├── sized_iterator.hh ├── sized_iterator_test.cc ├── socket_concurrent_iostream.hh ├── sorted_uniform.hh ├── sorted_uniform_test.cc ├── stream ├── Jamfile ├── block.hh ├── chain.cc ├── chain.hh ├── config.hh ├── io.cc ├── io.hh ├── io_test.cc ├── line_input.cc ├── line_input.hh ├── multi_progress.cc ├── multi_progress.hh ├── multi_stream.hh ├── sort.hh ├── sort_test.cc ├── stream.hh ├── stream_test.cc └── timer.hh ├── string_piece.cc ├── string_piece.hh ├── string_piece_hash.hh ├── thread_pool.hh ├── tokenize_piece.hh ├── tokenize_piece_test.cc ├── usage.cc ├── usage.hh ├── utf8.cc ├── utf8.hh ├── utf8_test.cc └── vocab_main.cc /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | bin 3 | *.so 4 | lm/build_binary 5 | lm/query 6 | previous.sh 7 | -------------------------------------------------------------------------------- /Jamroot: -------------------------------------------------------------------------------- 1 | import option ; 2 | import modules ; 3 | import path ; 4 | path-constant TOP : . ; 5 | include $(TOP)/jam-files/sanity.jam ; 6 | 7 | boost 104200 ; 8 | external-lib z ; 9 | 10 | project : requirements multi 64 on $(requirements) . ; 11 | project : default-build release ; 12 | 13 | use-project /util : util ; 14 | use-project /lm : lm ; 15 | 16 | build-project lm ; 17 | build-project util ; 18 | build-project MEMT ; 19 | 20 | install-bin-libs MEMT/Controller//MEMT MEMT/Input//Dump MEMT/Alignment//SummarizeAlignment lm//query lm//build_binary lm/filter//filter util//programs ; 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Avenue code is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU Lesser General Public License as published 3 | by the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | Avenue code is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU Lesser General Public License for more details. 10 | 11 | You should have received a copy of the GNU Lesser General Public License 12 | along with Avenue code. If not, see . 13 | 14 | Most of the code here is licensed under the LGPL. There are exceptions which have their own licenses, listed below. You may not have been provided with some of these directories or files. 15 | 16 | jam-files contains Boost Jam. See the Boost license in that directory. 17 | 18 | install contains scripts to download and install software, but not the software itself. Downloaded software has its own license. 19 | 20 | Utilities/scoring contains scripts that download and install metrics. This license covers METEOR, but not the other metrics. 21 | 22 | Utilities/Tuning/zmert comes from Joshua. Joshua is LGPL. 23 | 24 | Utilities/Tokenization comes from Moses and Moses is LGPL. 25 | 26 | util/string_piece.hh , util/string_piece.cc , and util/google-sparsehash is Google code and contains its own license. 27 | -------------------------------------------------------------------------------- /MEMT/Alignment/CherryPick.cc: -------------------------------------------------------------------------------- 1 | /* Pick a short sentence pair containing all alignment types. */ 2 | #include "MEMT/Input/AlignType.hh" 3 | #include "MEMT/Input/Input.hh" 4 | #include "MEMT/Input/Format.hh" 5 | #include "MEMT/Input/ReadFromJava.hh" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | int main() { 12 | const input::AlignType kWanted = input::AL_EXACT | input::AL_WN_SYNONYMY | input::AL_SNOWBALL_STEM | input::AL_PARAPHRASE; 13 | const unsigned int kMaxLength = 12; 14 | input::Input input; 15 | for (unsigned int v = 0; ; ++v) { 16 | try { 17 | input::ReadFromJava(std::cin, input); 18 | } 19 | catch (std::ios_base::failure &f) { 20 | if (std::cin.eof()) break; 21 | throw; 22 | } 23 | for (unsigned int e = 0; e < input.engines.size(); ++e) { 24 | for (unsigned int f = e + 1; f < input.engines.size(); ++f) { 25 | input::AlignType got = 0; 26 | for (unsigned int w = 0; w < input.engines[e].words.size(); ++w) { 27 | got |= input.GetWord(e, w).alignments.Ask(f).type; 28 | } 29 | if (((got & kWanted) == kWanted) && (input.engines[e].words.size() < kMaxLength) && (input.engines[f].words.size() < kMaxLength)) { 30 | LaTeXAlignment(std::cout, "System 1", input.engines[e], "System 2", input.engines[f]); 31 | } 32 | } 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /MEMT/Alignment/Jamfile: -------------------------------------------------------------------------------- 1 | exe SummarizeAlignment : Summarize.cc ../Input//input ; 2 | exe CherryPick : CherryPick.cc ../Input//input ; 3 | -------------------------------------------------------------------------------- /MEMT/Alignment/NBest.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Alignment/NBest.hh" 2 | 3 | #include 4 | #include 5 | 6 | namespace input { 7 | 8 | NBestException::NBestException() throw() {} 9 | NBestException::~NBestException() throw() {} 10 | 11 | NBestReader::NBestReader(const char *file) : file_(file) { 12 | ReadSegmentID(); 13 | } 14 | 15 | namespace { 16 | void SkipSpaces(StringPiece &str) { 17 | while (str.size() && isspace(*str.data())) 18 | str.set(str.data() + 1, str.size() - 1); 19 | } 20 | } // namespace 21 | 22 | bool NBestReader::ReadEntry(unsigned int segment, StringPiece &out) { 23 | if (segment < next_segment_) return false; 24 | assert(segment == next_segment_); 25 | out = file_.ReadLine(); 26 | SkipSpaces(out); 27 | if (out.size() < 3 || out.substr(0, 3) != "|||") 28 | UTIL_THROW(NBestException, "Three pipes missing in " << out); 29 | SkipSpaces(out); 30 | // Argh no portable strnstr or find 31 | for (const char *i = out.data(); i < out.data() + out.size() - 2; ++i) { 32 | if (*i == '|' && *(i+1) == '|' && *(i+2) == '|') { 33 | out.set(out.data(), i - out.data()); 34 | break; 35 | } 36 | } 37 | ReadSegmentID(); 38 | return true; 39 | } 40 | 41 | void NBestReader::ReadSegmentID() { 42 | try { 43 | next_segment_ = file_.ReadULong(); 44 | } catch(const util::EndOfFileException &e) { 45 | next_segment_ = std::numeric_limits::max(); 46 | } 47 | } 48 | 49 | } // namespace input 50 | -------------------------------------------------------------------------------- /MEMT/Alignment/NBest.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Alignment_NBest_h 2 | #define _MEMT_Alignment_NBest_h 3 | 4 | #include "util/exception.hh" 5 | #include "util/file_piece.hh" 6 | 7 | #include 8 | 9 | namespace input { 10 | 11 | class NBestException : public util::Exception { 12 | public: 13 | NBestException() throw(); 14 | ~NBestException() throw(); 15 | }; 16 | 17 | class NBestReader { 18 | public: 19 | explicit NBestReader(const char *file); 20 | 21 | bool ReadEntry(unsigned int segment, StringPiece &out); 22 | 23 | bool Ended() const { 24 | return next_segment_ == std::numeric_limits::max(); 25 | } 26 | 27 | private: 28 | void ReadSegmentID(); 29 | 30 | util::FilePiece file_; 31 | 32 | unsigned int next_segment_; 33 | }; 34 | 35 | } // namespace input 36 | 37 | #endif // _MEMT_Alignment_NBest_h 38 | -------------------------------------------------------------------------------- /MEMT/Alignment/Stemmer.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Alignment/Stemmer.hh" 2 | #include "util/exception.hh" 3 | 4 | #include "MEMT/Alignment/libstemmer_c/include/libstemmer.h" 5 | 6 | namespace input { 7 | 8 | SnowballWrap::SnowballWrap(const char *language) : stemmer_(sb_stemmer_new(language, NULL)) { 9 | if (!stemmer_) UTIL_THROW(util::Exception, "Failed to create stemmer for " << language); 10 | } 11 | 12 | SnowballWrap::~SnowballWrap() { 13 | if (stemmer_) sb_stemmer_delete(stemmer_); 14 | } 15 | 16 | StringPiece SnowballWrap::Stem(const StringPiece &word) { 17 | // Snowball likes const unsigned char. StringPiece likes const char. 18 | const char *data = reinterpret_cast(sb_stemmer_stem(stemmer_, reinterpret_cast(word.data()), word.size())); 19 | if (!data) UTIL_THROW(util::Exception, "Stemming " << word << " returned NULL."); 20 | return StringPiece(data, sb_stemmer_length(stemmer_)); 21 | } 22 | 23 | } // namespace input 24 | -------------------------------------------------------------------------------- /MEMT/Alignment/Stemmer.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Alignment_Stemmer_h 2 | #define _MEMT_Alignment_Stemmer_h 3 | 4 | #include "util/string_piece.hh" 5 | 6 | struct sb_stemmer; 7 | 8 | namespace input { 9 | 10 | class SnowballWrap { 11 | public: 12 | explicit SnowballWrap(const char *language); 13 | 14 | ~SnowballWrap(); 15 | 16 | // The returned StringPiece is invalidated after each call. Sadly non-const. 17 | StringPiece Stem(const StringPiece &word); 18 | 19 | private: 20 | sb_stemmer *stemmer_; 21 | }; 22 | 23 | } // namespace input 24 | 25 | #endif // _MEMT_Alignment_Stemmer_h 26 | -------------------------------------------------------------------------------- /MEMT/Alignment/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | loc="$(dirname $0)" 3 | if [ ! -f "$loc"/../../Utilities/scoring/meteor-1.0/dist/meteor-1.0/meteor.jar ]; then 4 | pushd "$loc"/../../Utilities/scoring || exit 1 5 | ./setup.sh || exit 1 6 | popd || exit 1 7 | fi 8 | pushd "$loc" || exit 1 9 | exec javac -cp ../../Utilities/scoring/meteor-1.0/dist/meteor-1.0/meteor.jar MatcherMEMT.java 10 | popd || exit 1 11 | -------------------------------------------------------------------------------- /MEMT/Alignment/match.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | loc="$(dirname $0)" 3 | if [ ! -f "$loc"/MatcherMEMT.class ]; then 4 | "$loc"/compile.sh 1>&2 || exit 1 5 | fi 6 | exec java -Dfile.encoding=UTF8 -cp "$loc":"$loc"/../../Utilities/scoring/meteor-1.0/dist/meteor-1.0/meteor.jar MatcherMEMT "$@" 7 | -------------------------------------------------------------------------------- /MEMT/Controller/Config.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Controller_Config_h 2 | #define _MEMT_Controller_Config_h 3 | 4 | #include 5 | #include 6 | 7 | namespace controller { 8 | struct SentenceTransitionConfig { 9 | size_t requests, decoder_workers; 10 | }; 11 | 12 | struct ConnectionTransitionConfig { 13 | SentenceTransitionConfig sentence; 14 | size_t connections; 15 | size_t output_queue_size; 16 | }; 17 | 18 | struct LMConfig { 19 | std::vector file; 20 | }; 21 | 22 | struct ProcessConfig { 23 | bool daemonize; 24 | bool keep_open; 25 | bool call_setsid; 26 | std::string pidfile, portfile; 27 | }; 28 | 29 | struct ServiceConfig { 30 | ConnectionTransitionConfig connection; 31 | LMConfig lm; 32 | ProcessConfig process; 33 | unsigned short int port; 34 | }; 35 | 36 | } // namespace controller 37 | #endif // _MEMT_Controller_Config_h 38 | -------------------------------------------------------------------------------- /MEMT/Controller/CoordWrite.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Controller_CoordWrite_h 2 | #define _MEMT_Controller_CoordWrite_h 3 | 4 | #include 5 | 6 | #include 7 | 8 | // Coordinate writes so output is returned in blocks that the reader understands. 9 | namespace controller { 10 | 11 | class CoordStream { 12 | public: 13 | explicit CoordStream(std::ostream &stream) : stream_(stream) {} 14 | 15 | private: 16 | friend class CoordWrite; 17 | 18 | std::ostream &stream_; 19 | 20 | boost::mutex mutex_; 21 | }; 22 | 23 | class CoordWrite { 24 | public: 25 | explicit CoordWrite(CoordStream &coord) 26 | : stream_(coord.stream_), lock_(coord.mutex_) { 27 | } 28 | 29 | std::ostream &Get() { return stream_; } 30 | 31 | std::ostream &operator*() { return stream_; } 32 | std::ostream *operator->() { return &stream_; } 33 | 34 | operator std::ostream &() { 35 | return stream_; 36 | } 37 | 38 | private: 39 | std::ostream &stream_; 40 | boost::unique_lock lock_; 41 | }; 42 | 43 | } // namespace controller 44 | 45 | #endif // _MEMT_Controller_CoordWrite_h 46 | -------------------------------------------------------------------------------- /MEMT/Controller/Jamfile: -------------------------------------------------------------------------------- 1 | alias decoder_handler : ../Output//null_beam_dumper ../Decoder//decoder ../Strategy/Legacy//legacy ..//..//boost_thread ; 2 | alias output_handler : ../Output//output ..//..//boost_thread ; 3 | alias sentence : decoder_handler output_handler ../Decoder//completed ../Decoder//decoder_config ../Input//input /util//kenutil ; 4 | 5 | alias connection : sentence Sentence//controller_sentence_options /util//kenutil ..//..//boost_system ; 6 | 7 | fakelib command_line : CommandLine.cc Sentence//controller_sentence_options ..//..//boost_thread ..//..//boost_program_options ; 8 | 9 | exe MEMT : Main.cc connection command_line ../Feature/Verbatim//verbatim_sentence ../Feature/LM//feature_lm_sentence /util//kenutil /lm//kenlm ; 10 | -------------------------------------------------------------------------------- /MEMT/Controller/Sentence/Config.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Controller_Sentence_Config_h 2 | #define _MEMT_Controller_Sentence_Config_h 3 | 4 | #include "MEMT/Input/Config.hh" 5 | #include "MEMT/Decoder/Config.hh" 6 | #include "MEMT/Strategy/Legacy/Config.hh" 7 | #include "MEMT/Output/Config.hh" 8 | 9 | namespace controller { 10 | namespace sentence { 11 | 12 | struct Config { 13 | size_t num_systems; 14 | input::Config input; 15 | decoder::Config decoder; 16 | strategy::legacy::Config legacy; 17 | output::Config output; 18 | }; 19 | 20 | } // namespace sentence 21 | } // namespace controller 22 | 23 | #endif // _MEMT_Controller_Sentence_Config_h 24 | -------------------------------------------------------------------------------- /MEMT/Controller/Sentence/Jamfile: -------------------------------------------------------------------------------- 1 | fakelib controller_sentence_options : Options.cc /util//kenutil ../../Decoder//decoder_options ../../Input//input_options ../../Strategy/Legacy//strategy_legacy_options ../../Output//output_options ../../..//boost_program_options ; 2 | -------------------------------------------------------------------------------- /MEMT/Controller/Sentence/Options.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Controller_Sentence_Options_h 2 | #define _MEMT_Controller_Sentence_Options_h 3 | 4 | #include "MEMT/Decoder/Options.hh" 5 | #include "MEMT/Input/Options.hh" 6 | #include "MEMT/Strategy/Legacy/Options.hh" 7 | #include "MEMT/Output/Options.hh" 8 | 9 | #include "util/options.hh" 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | namespace controller { 19 | namespace sentence { 20 | 21 | class Config; 22 | 23 | class ConfigOptions { 24 | public: 25 | explicit ConfigOptions(Config &config, const std::vector &lm_orders); 26 | 27 | void SetDefaults(); 28 | 29 | const boost::program_options::options_description &Options() const { return options_; } 30 | 31 | void Finish(const boost::program_options::variables_map &vm); 32 | 33 | private: 34 | input::ConfigOptions input_; 35 | decoder::ConfigOptions decoder_; 36 | strategy::legacy::ConfigOptions legacy_; 37 | output::ConfigOptions output_; 38 | 39 | Config &config_; 40 | 41 | boost::program_options::options_description options_; 42 | 43 | bool incremental_; 44 | 45 | std::vector lm_orders_; 46 | }; 47 | 48 | void ConfigCommand(std::istream &stream, ConfigOptions &options); 49 | 50 | } // namespace sentence 51 | } // namespace controller 52 | 53 | #endif // _MEMT_Controller_Sentence_Options_h 54 | -------------------------------------------------------------------------------- /MEMT/Controller/Server.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Controller_Server_h 2 | #define _MEMT_Controller_Server_h 3 | 4 | #include 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace controller { 13 | 14 | template void RunServer(ConnTransition &transition, boost::asio::ip::tcp::acceptor &acceptor) { 15 | using namespace boost::asio::ip; 16 | 17 | while (1) { 18 | ConnectionRequest *req = NULL; 19 | try { 20 | // Get a socket and make sure it's clear. 21 | req = &transition.GetFree(); 22 | tcp::socket &socket = req->GetSocket(); 23 | socket.close(); 24 | 25 | acceptor.accept(socket); 26 | std::cerr << "Got connection." << std::endl; 27 | transition.Opened(*req); 28 | } 29 | catch (std::exception &e) { 30 | std::cerr << e.what() << std::endl; 31 | if (req) transition.Failed(*req); 32 | } 33 | catch (...) { 34 | std::cerr << "Some server exception" << std::endl; 35 | if (req) transition.Failed(*req); 36 | } 37 | } 38 | } 39 | 40 | } // namespace controller 41 | 42 | #endif // _MEMT_Controller_Server_h 43 | -------------------------------------------------------------------------------- /MEMT/Decoder/Completed.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Decoder_Completed_hh 2 | #define _MEMT_Decoder_Completed_hh 3 | 4 | #include "MEMT/Decoder/Score.hh" 5 | #include "MEMT/Input/Location.hh" 6 | 7 | #include 8 | 9 | namespace decoder { 10 | 11 | // Final hypothesis produced by the decoder. This is the only one that should be used outside. 12 | class CompletedHypothesis { 13 | public: 14 | CompletedHypothesis() {} 15 | 16 | void Reset(const Score &score, const std::vector &end_features) { 17 | score_ = score; 18 | end_features_ = end_features; 19 | words_.clear(); 20 | } 21 | 22 | void AppendWord(const input::Location &source) { 23 | words_.push_back(source); 24 | } 25 | 26 | const std::vector &Words() const { return words_; } 27 | 28 | // If length_normalize is set, this is normalized. 29 | const Score &GetScore() const { return score_; } 30 | 31 | const std::vector &EndFeatures() const { return end_features_; } 32 | 33 | private: 34 | std::vector words_; 35 | 36 | Score score_; 37 | 38 | std::vector end_features_; 39 | }; 40 | 41 | } // namespace decoder 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /MEMT/Decoder/Config.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Decoder/Config.hh" 2 | 3 | #include "MEMT/Strategy/Phrase/Type.hh" 4 | 5 | namespace decoder { 6 | 7 | std::ostream &operator<<(std::ostream &str, const Config &config) { 8 | return str 9 | << "beam_size = " << config.internal_beam_size << '\n' 10 | << "output.nbest = " << config.end_beam_size << '\n' 11 | << "length_normalize = " << config.length_normalize << '\n'; 12 | } 13 | 14 | } // namespace decoder 15 | -------------------------------------------------------------------------------- /MEMT/Decoder/Config.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Decoder_Config_h 2 | #define _MEMT_Decoder_Config_h 3 | 4 | #include "MEMT/Strategy/Phrase/Type.hh" 5 | #include "MEMT/Strategy/Horizon/Config.hh" 6 | 7 | #include "util/debug.hh" 8 | #include "util/numbers.hh" 9 | 10 | #include 11 | #include 12 | 13 | namespace decoder { 14 | 15 | struct Config { 16 | unsigned int internal_beam_size, end_beam_size; 17 | bool length_normalize; 18 | }; 19 | 20 | std::ostream &operator<<(std::ostream &str, const Config &config); 21 | 22 | } // namespace decoder 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /MEMT/Decoder/HistoryBeam.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Decoder/HistoryBeam.hh" 2 | 3 | #include 4 | 5 | namespace decoder { 6 | 7 | void MergeSizeOneHistoryBeam(HistoryBeam &into, const HistoryBeam &with) { 8 | assert(with.size() == 1); 9 | const boost::shared_ptr &hist = *with.unordered_begin(); 10 | if (into.MayMakeIt(hist)) { 11 | into.Available() = hist; 12 | into.InsertAvailable(); 13 | } 14 | } 15 | 16 | void DumpBeamToHypHistory(HistoryBeam &in, HypHistory &out) { 17 | HypHistory::Previous &previous = out.MutablePrevious(); 18 | previous.clear(); 19 | previous.reserve(in.size()); 20 | 21 | in.destructive_ordered_make(); 22 | for (HistoryBeam::decreasing_iterator i = in.destructive_decreasing_begin(); i != in.destructive_decreasing_end(); ++i) { 23 | previous.push_back(*i); 24 | } 25 | out.MakeHash(); 26 | } 27 | 28 | } // namespace decoder 29 | -------------------------------------------------------------------------------- /MEMT/Decoder/HistoryBeam.hh: -------------------------------------------------------------------------------- 1 | #include "MEMT/Decoder/History.hh" 2 | #include "util/n_best.hh" 3 | 4 | #include 5 | 6 | #include 7 | 8 | /* When hypotheses are LM dupe detected, they get passed to this beam, which 9 | * does a secondary full equality dupe removal and packs the hypotheses. 10 | */ 11 | 12 | namespace decoder { 13 | 14 | namespace detail { 15 | 16 | struct HistoryLess : public std::binary_function &, const boost::shared_ptr &, bool> { 17 | HistoryLess() : less_() {} 18 | 19 | bool operator()(const boost::shared_ptr &left, const boost::shared_ptr &right) const { 20 | return less_(left->Entry(), right->Entry()); 21 | } 22 | private: 23 | const HistoryEntry::LessByScore less_; 24 | }; 25 | 26 | } // namespace detail 27 | 28 | typedef nbest::NBest< 29 | boost::shared_ptr, 30 | detail::HistoryLess, 31 | nbest::HashDupe, HypHistory::ReturnHash, HypHistory::EqualsHashOnly>, 32 | nbest::OneBestMerge, detail::HistoryLess> > HistoryBeam; 33 | 34 | void MergeSizeOneHistoryBeam(HistoryBeam &into, const HistoryBeam &with); 35 | 36 | void DumpBeamToHypHistory(HistoryBeam &in, HypHistory &out); 37 | 38 | } // namespace decoder 39 | -------------------------------------------------------------------------------- /MEMT/Decoder/InternalBeam.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Decoder_InternalBeam_h 2 | #define _MEMT_Decoder_InternalBeam_h 3 | 4 | #include "util/n_best.hh" 5 | 6 | #include 7 | 8 | namespace decoder { 9 | 10 | namespace detail { 11 | 12 | template struct CallMerge : public std::binary_function { 13 | bool operator()(InternalHypothesisT &to, const InternalHypothesisT &with) const { 14 | return to.Merge(with); 15 | } 16 | }; 17 | 18 | } // namespace detail 19 | 20 | template struct InternalBeam { 21 | typedef nbest::NBest< 22 | Hyp, 23 | typename Hyp::LessByOverall, 24 | nbest::HashDupe, 25 | detail::CallMerge 26 | > T; 27 | }; 28 | 29 | } // namespace decoder 30 | 31 | #endif 32 | 33 | -------------------------------------------------------------------------------- /MEMT/Decoder/Jamfile: -------------------------------------------------------------------------------- 1 | alias completed : /util//kenutil ; 2 | 3 | fakelib decoder_config : Config.cc /util//kenutil ; 4 | 5 | fakelib decoder_options : Options.cc ../Strategy/Horizon//strategy_horizon_options /util//kenutil ../..//boost_program_options ; 6 | 7 | fakelib decoder 8 | : decoder_config EndBeam.cc HistoryBeam.cc Score.cc ../Input//input /util//kenutil ; 9 | -------------------------------------------------------------------------------- /MEMT/Decoder/Options.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Decoder/Options.hh" 2 | 3 | #include "MEMT/Decoder/Config.hh" 4 | 5 | #include "util/options.hh" 6 | 7 | #include 8 | 9 | namespace decoder { 10 | 11 | ConfigOptions::ConfigOptions(Config &config) 12 | : config_(config), options_("Decoding"), incremental_(false) { 13 | namespace po = boost::program_options; 14 | 15 | options_.add_options() 16 | ("beam_size", 17 | po::value(&config_.internal_beam_size), 18 | "Size of the decoder's internal search beam") 19 | 20 | ("length_normalize", 21 | po::value(&config_.length_normalize), 22 | "Length normalize before comparing sentence end scores?"); 23 | 24 | SetDefaults(); 25 | } 26 | 27 | void ConfigOptions::SetDefaults() { 28 | // Defaults are set here because configuration messages may be updates. 29 | config_.internal_beam_size = 500; 30 | config_.end_beam_size = 1; 31 | config_.length_normalize = true; 32 | incremental_ = false; 33 | } 34 | 35 | void ConfigOptions::Finish(const boost::program_options::variables_map &vm, unsigned int end_beam_size) { 36 | config_.end_beam_size = end_beam_size; 37 | } 38 | 39 | } // namespace decoder 40 | -------------------------------------------------------------------------------- /MEMT/Decoder/Options.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Decoder_Options_h 2 | #define _MEMT_Decoder_Options_h 3 | 4 | #include "MEMT/Strategy/Horizon/Options.hh" 5 | 6 | #include 7 | #include 8 | 9 | namespace decoder { 10 | 11 | class Config; 12 | 13 | class ConfigOptions { 14 | public: 15 | explicit ConfigOptions(Config &config); 16 | 17 | void SetDefaults(); 18 | 19 | const boost::program_options::options_description &Options() const { return options_; } 20 | 21 | void Finish(const boost::program_options::variables_map &vm, unsigned int end_beam_size); 22 | 23 | private: 24 | Config &config_; 25 | 26 | boost::program_options::options_description options_; 27 | 28 | bool incremental_; 29 | }; 30 | 31 | } // namespace decoder 32 | 33 | #endif // _MEMT_Decoder_Options_h 34 | -------------------------------------------------------------------------------- /MEMT/Decoder/Score.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Decoder/Score.hh" 2 | 3 | namespace decoder { 4 | 5 | std::ostream &operator<<(std::ostream &s, const Score &score) { 6 | s << "overall=" << score.Overall().Log(); 7 | s << ", features = \""; 8 | for (std::vector::const_iterator i = score.Features().begin(); i != score.Features().end(); ++i) { 9 | if (i != score.Features().begin()) s << ' '; 10 | s << i->Log(); 11 | } 12 | s << '"'; 13 | return s; 14 | } 15 | 16 | } // namespace decoder 17 | -------------------------------------------------------------------------------- /MEMT/Feature/Base/Process.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Base_Process_h 2 | #define _MEMT_Feature_Base_Process_h 3 | 4 | namespace feature { 5 | namespace base { 6 | 7 | // process-level class for features that don't keep process-level state 8 | template class NullProcess { 9 | public: 10 | typedef SentenceT Sentence; 11 | struct Config {}; 12 | 13 | NullProcess() {} 14 | 15 | Sentence GetSentence() const { 16 | return Sentence(); 17 | } 18 | }; 19 | 20 | } // namespace base 21 | } // namespace feature 22 | #endif // _MEMT_Feature_Base_Process_H 23 | -------------------------------------------------------------------------------- /MEMT/Feature/Base/Sign.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Base_Sign_h 2 | #define _MEMT_Feature_Base_Sign_h 3 | 4 | namespace feature { 5 | namespace base { 6 | 7 | // Optimizer sign constraints. These are not enforced by MEMT but passed to the client if requested. That way the client doesn't have to know about positional features. 8 | typedef enum {ANY_WEIGHT=0, POSITIVE_WEIGHT=1, NEGATIVE_WEIGHT=-1} WeightSign; 9 | 10 | } // namespace base 11 | } // namesapce feature 12 | 13 | #endif // _MEMT_Feature_Base_Sign_H 14 | -------------------------------------------------------------------------------- /MEMT/Feature/LM/Config.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Feature/LM/Config.hh" 2 | 3 | namespace feature { 4 | namespace lm { 5 | 6 | void Config::WeightHint(base::WeightSign *out) const { 7 | for (std::vector::const_iterator l = orders.begin(); l != orders.end(); ++l) { 8 | // probability 9 | *(out++) = base::POSITIVE_WEIGHT; 10 | // 11 | *(out++) = base::ANY_WEIGHT; 12 | if (by_length) { 13 | for (unsigned char i = 1; i < *l; ++i) 14 | // length count 15 | *(out++) = base::ANY_WEIGHT; 16 | } 17 | } 18 | } 19 | 20 | } // namespace lm 21 | } // namespace feature 22 | -------------------------------------------------------------------------------- /MEMT/Feature/LM/Config.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_LM_Config_h 2 | #define _MEMT_Feature_LM_Config_h 3 | 4 | #include "MEMT/Feature/Base/Sign.hh" 5 | 6 | #include "util/numbers.hh" 7 | 8 | #include 9 | 10 | namespace feature { 11 | namespace lm { 12 | 13 | struct Config { 14 | // Include counts for each n-gram length? 15 | bool by_length; 16 | std::vector orders; 17 | size_t count; 18 | size_t FeatureCount() const { return count; } 19 | void WeightHint(base::WeightSign *out) const; 20 | }; 21 | 22 | } // namespace lm 23 | } // namespace feature 24 | 25 | #endif // _MEMT_Feature_LM_Config_h 26 | -------------------------------------------------------------------------------- /MEMT/Feature/LM/Hypothesis.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_LM_Hypothesis_h 2 | #define _MEMT_Feature_LM_Hypothesis_h 3 | 4 | namespace feature { 5 | namespace lm { 6 | 7 | template struct Hypothesis { 8 | typedef typename LanguageModel::State T; 9 | }; 10 | 11 | } // namespace lm 12 | } // namespace feature 13 | 14 | #endif // _MEMT_Feature_LM_Hypothesis_h 15 | -------------------------------------------------------------------------------- /MEMT/Feature/LM/Jamfile: -------------------------------------------------------------------------------- 1 | fakelib feature_lm_config : Config.cc ; 2 | 3 | fakelib feature_lm_sentence : Sentence.cc feature_lm_config /util//kenutil ; 4 | 5 | fakelib feature_lm_options : Options.cc feature_lm_config ../../..//boost_program_options ; 6 | -------------------------------------------------------------------------------- /MEMT/Feature/LM/Options.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Feature/LM/Options.hh" 2 | 3 | #include "MEMT/Feature/LM/Config.hh" 4 | 5 | namespace feature { 6 | namespace lm { 7 | 8 | ConfigOptions::ConfigOptions(Config &config) : config_(config), options_("LM feature") { 9 | options_.add_options() 10 | ("score.lm.by_length", boost::program_options::value(&config_.by_length), "Report counts for each n-gram length as a feature?"); 11 | } 12 | 13 | void ConfigOptions::SetDefaults() { 14 | config_.by_length = false; 15 | } 16 | 17 | void ConfigOptions::Finish(const boost::program_options::variables_map &vm, size_t num_systems, const std::vector &lm_order) { 18 | config_.orders = lm_order; 19 | config_.count = 2 * lm_order.size(); 20 | if (config_.by_length) { 21 | // Add a feature for all but longest order 22 | for (std::vector::const_iterator i = lm_order.begin(); i != lm_order.end(); ++i) { 23 | config_.count += *i; 24 | } 25 | config_.count -= lm_order.size(); 26 | } 27 | } 28 | 29 | } // namespace lm 30 | } // namespace feature 31 | -------------------------------------------------------------------------------- /MEMT/Feature/LM/Options.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_LM_Options_h 2 | #define _MEMT_Feature_LM_Options_h 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | namespace feature { 10 | namespace lm { 11 | 12 | class Config; 13 | 14 | class ConfigOptions { 15 | public: 16 | explicit ConfigOptions(Config &config); 17 | 18 | void SetDefaults(); 19 | 20 | const boost::program_options::options_description &Options() const { return options_; } 21 | 22 | void Finish(const boost::program_options::variables_map &vm, size_t num_systems, const std::vector &lm_order); 23 | 24 | private: 25 | Config &config_; 26 | 27 | boost::program_options::options_description options_; 28 | }; 29 | 30 | } // namespace lm 31 | } // namespace feature 32 | 33 | #endif // _MEMT_Feature_LM_Options_h 34 | -------------------------------------------------------------------------------- /MEMT/Feature/LM/Process.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_LM_Process_h 2 | #define _MEMT_Feature_LM_Process_h 3 | 4 | #include "MEMT/Feature/LM/Sentence.hh" 5 | 6 | #include 7 | 8 | namespace feature { 9 | namespace lm { 10 | 11 | /* TODO: move more lm configuration here. */ 12 | template class Process { 13 | public: 14 | typedef LanguageModelT LanguageModel; 15 | typedef lm::Sentence Sentence; 16 | 17 | struct Config {}; 18 | 19 | // Workaround for constructing vector of process objects then configuring them 20 | Process() {} 21 | void SetLM(const std::vector &models) { models_ = models; } 22 | 23 | Sentence GetSentence() const { return Sentence(models_); } 24 | 25 | std::vector Orders() const { 26 | std::vector ret; 27 | for (size_t i = 0; i < models_.size(); ++i) { 28 | ret.push_back(models_[i]->Order()); 29 | } 30 | return ret; 31 | } 32 | 33 | private: 34 | std::vector models_; 35 | }; 36 | 37 | } // namespace lm 38 | } // namespace feature 39 | 40 | #endif // _MEMT_Feature_LM_Process_h 41 | -------------------------------------------------------------------------------- /MEMT/Feature/LM/Sentence.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Feature/LM/Sentence.hh" 2 | 3 | #include "MEMT/Input/Input.hh" 4 | 5 | namespace feature { 6 | namespace lm { 7 | 8 | void LookupVocab(const input::Input &in, const ::lm::base::Vocabulary &vocab, std::vector > &indices) { 9 | indices.resize(in.engines.size()); 10 | for (unsigned int e = 0; e < in.engines.size(); ++e) { 11 | indices[e].resize(in.engines[e].words.size()); 12 | for (unsigned int o = 0; o < in.engines[e].words.size(); ++o) { 13 | indices[e][o] = vocab.Index(in.engines[e].words[o].text.Canonical()); 14 | } 15 | } 16 | } 17 | 18 | } // namespace lm 19 | } // namespace feature 20 | -------------------------------------------------------------------------------- /MEMT/Feature/Length/Config.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Length_Config_h 2 | #define _MEMT_Feature_Length_Config_h 3 | 4 | #include "MEMT/Feature/Base/Sign.hh" 5 | 6 | namespace feature { 7 | namespace length { 8 | 9 | struct Config { 10 | unsigned FeatureCount() const { return 1; } 11 | void WeightHint(base::WeightSign *out) const { *out = base::ANY_WEIGHT; } 12 | }; 13 | 14 | } // namespace length 15 | } // namespace feature 16 | 17 | #endif // _MEMT_Feature_Length_Config_h 18 | -------------------------------------------------------------------------------- /MEMT/Feature/Length/Hypothesis.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Length_Hypothesis_h 2 | #define _MEMT_Feature_Length_Hypothesis_h 3 | 4 | namespace feature { 5 | namespace length { 6 | 7 | struct Hypothesis {}; 8 | 9 | bool operator==(const Hypothesis left, const Hypothesis right) { 10 | return true; 11 | } 12 | 13 | size_t hash_value(const Hypothesis value) { 14 | // Mashing on keyboard. 15 | return 415648974; 16 | } 17 | 18 | } // namespace length 19 | } // namespace feature 20 | 21 | #endif // _MEMT_Feature_Length_Hypothesis_h 22 | -------------------------------------------------------------------------------- /MEMT/Feature/Length/Process.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Length_h 2 | #define _MEMT_Feature_Length_h 3 | 4 | #include "MEMT/Feature/Base/Process.hh" 5 | #include "MEMT/Feature/Length/Sentence.hh" 6 | 7 | namespace feature { namespace length { 8 | typedef base::NullProcess Process; 9 | } } // namespace length feature 10 | 11 | #endif // _MEMT_Feature_Length_h 12 | -------------------------------------------------------------------------------- /MEMT/Feature/Length/Sentence.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Length_Sentence_h 2 | #define _MEMT_Feature_Length_Sentence_h 3 | 4 | #include "MEMT/Feature/Length/Config.hh" 5 | #include "MEMT/Feature/Length/Hypothesis.hh" 6 | 7 | namespace feature { 8 | namespace length { 9 | 10 | class Sentence { 11 | public: 12 | typedef length::Hypothesis Hypothesis; 13 | typedef length::Config Config; 14 | 15 | Sentence() {} 16 | 17 | void Reset(const Config &config, const input::Input &input) {} 18 | 19 | size_t BothFeatures() const { return 0; } 20 | size_t EndFeatures() const { return 1; } 21 | 22 | void Begin(Hypothesis &start_state, LogScore *start_scores) const {} 23 | 24 | void Extend( 25 | const input::Input &input, 26 | const decoder::HypHistory *history, 27 | const input::Location &append, 28 | const Hypothesis &from_state, 29 | const LogScore *from_scores, 30 | Hypothesis &to_state, 31 | LogScore *to_scores) const {} 32 | 33 | void End(size_t length, LogScore *out) const { 34 | out->MutableLog() = static_cast(length); 35 | } 36 | }; 37 | 38 | } // namespace length 39 | } // namespace feature 40 | 41 | #endif // _MEMT_Feature_Length_Sentence_h 42 | -------------------------------------------------------------------------------- /MEMT/Feature/Scorer/Config.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Feature/Scorer/Config.hh" 2 | 3 | #include 4 | 5 | namespace feature { 6 | namespace scorer { 7 | 8 | namespace { 9 | struct GatherWeightHints { 10 | typedef base::WeightSign *result_type; 11 | 12 | template result_type operator()(const result_type previous, const Feature &feature) const { 13 | feature.WeightHint(previous); 14 | return previous + feature.FeatureCount(); 15 | } 16 | }; 17 | 18 | struct FeatureCountFold { 19 | typedef unsigned result_type; 20 | template result_type operator()(const unsigned previous, const Feature &feature) const { 21 | return previous + feature.FeatureCount(); 22 | } 23 | }; 24 | } // namespace 25 | 26 | void Config::WeightHint(base::WeightSign *out) const { 27 | fold(features, out, GatherWeightHints()); 28 | } 29 | 30 | unsigned Config::FeatureCount() const { 31 | return fold(features, 0, FeatureCountFold()); 32 | } 33 | 34 | } // namespace scorer 35 | } // namespace feature 36 | -------------------------------------------------------------------------------- /MEMT/Feature/Scorer/Config.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Scorer_Config_h 2 | #define _MEMT_Feature_Scorer_Config_h 3 | 4 | #include "MEMT/Feature/Base/Sign.hh" 5 | #include "MEMT/Feature/Length/Config.hh" 6 | #include "MEMT/Feature/LM/Config.hh" 7 | #include "MEMT/Feature/Verbatim/Config.hh" 8 | 9 | #include 10 | 11 | namespace feature { 12 | namespace scorer { 13 | 14 | struct Config { 15 | // TODO: this should be tied to main's idea of the features. 16 | typedef boost::fusion::vector Features; 17 | Features features; 18 | std::vector weights; 19 | LinearScore fuzz_ratio; 20 | 21 | unsigned FeatureCount() const; 22 | 23 | void WeightHint(base::WeightSign *out) const; 24 | }; 25 | 26 | } // namespace scorer 27 | } // namespace feature 28 | 29 | #endif // _MEMT_Feature_Scorer_Config_h 30 | -------------------------------------------------------------------------------- /MEMT/Feature/Scorer/Fuzz.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Feature/Scorer/Fuzz.hh" 2 | 3 | #include "util/numbers.hh" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | namespace feature { 12 | namespace scorer { 13 | 14 | void Fuzz::Apply(const std::vector &in_weights, std::vector &out_weights) { 15 | if (ratio_ <= 0.0) { 16 | out_weights = in_weights; 17 | return; 18 | } 19 | boost::uniform_real dist(1.0 - ratio_, 1.0 + ratio_); 20 | boost::variate_generator > sample(rng_, dist); 21 | 22 | out_weights.clear(); 23 | for (std::vector::const_iterator i = in_weights.begin(); i != in_weights.end(); ++i) { 24 | out_weights.push_back(*i * sample()); 25 | } 26 | } 27 | 28 | } // namespace scorer 29 | } // namespace feature 30 | -------------------------------------------------------------------------------- /MEMT/Feature/Scorer/Fuzz.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Scorer_Fuzz_h 2 | #define _MEMT_Feature_Scorer_Fuzz_h 3 | 4 | // Randomly modifies weights based on their existing values. This is used in 5 | // a basic simulated annealing for tuning. 6 | 7 | #include "util/numbers.hh" 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | namespace feature { 15 | namespace scorer { 16 | 17 | class Fuzz { 18 | public: 19 | void Reset(LinearScore ratio) { ratio_ = ratio; } 20 | 21 | void Apply(const std::vector &in_weights, std::vector &out_weights); 22 | 23 | private: 24 | LinearScore ratio_; 25 | 26 | boost::mt19937 rng_; 27 | }; 28 | 29 | } // namespace scorer 30 | } // namespace feature 31 | 32 | #endif // _MEMT_Feature_Scorer_Fuzz_h 33 | -------------------------------------------------------------------------------- /MEMT/Feature/Scorer/Hypothesis.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Scorer_Hypothesis_h 2 | #define _MEMT_Feature_Scorer_Hypothesis_h 3 | 4 | #include "MEMT/Feature/LM/Hypothesis.hh" 5 | #include "MEMT/Feature/Verbatim/Hypothesis.hh" 6 | 7 | // Not used directly here, but clients expect hash_value. 8 | #include "util/hash_fusion.hh" 9 | 10 | #include 11 | // Not used directly here, but clients expect == 12 | #include 13 | #include 14 | 15 | namespace feature { 16 | namespace scorer { 17 | 18 | namespace detail { 19 | template struct HypothesisOp { 20 | typedef typename T::Hypothesis type; 21 | }; 22 | } // namespace detail 23 | 24 | template struct Hypothesis { 25 | // Convert a vector of Sentence objects into their ::Hypothesis objects 26 | typedef typename boost::mpl::transform >::type type; 27 | }; 28 | 29 | } // namespace scorer 30 | } // namespace feature 31 | 32 | #endif // _MEMT_Feature_Scorer_Hypothesis_h 33 | -------------------------------------------------------------------------------- /MEMT/Feature/Scorer/Jamfile: -------------------------------------------------------------------------------- 1 | fakelib feature_scorer_options : Options.cc ../LM//feature_lm_options ../Verbatim//feature_verbatim_options /util//kenutil ; 2 | 3 | fakelib scorer : Config.cc Fuzz.cc ../LM//feature_lm_config /util//kenutil ; 4 | -------------------------------------------------------------------------------- /MEMT/Feature/Scorer/Options.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Scorer_Options_h 2 | #define _MEMT_Feature_Scorer_Options_h 3 | 4 | #include "MEMT/Feature/LM/Options.hh" 5 | #include "MEMT/Feature/Verbatim/Options.hh" 6 | 7 | #include "util/numbers.hh" 8 | #include "util/options.hh" 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | namespace feature { 16 | namespace scorer { 17 | 18 | class Config; 19 | 20 | class WeightCountMismatch : public util::ArgumentParseError { 21 | public: 22 | WeightCountMismatch(size_t expected, size_t provided); 23 | virtual ~WeightCountMismatch() throw() {} 24 | }; 25 | 26 | class ConfigOptions { 27 | public: 28 | explicit ConfigOptions(Config &config); 29 | 30 | void SetDefaults(); 31 | 32 | const boost::program_options::options_description &Options() const { return options_; } 33 | 34 | void Finish(const boost::program_options::variables_map &vm, size_t num_systems, const std::vector &lm_order); 35 | 36 | private: 37 | lm::ConfigOptions lm_; 38 | verbatim::ConfigOptions verbatim0_, verbatim1_; 39 | 40 | Config &config_; 41 | 42 | boost::program_options::options_description options_; 43 | 44 | bool incremental_; 45 | 46 | std::string weight_string_; 47 | }; 48 | 49 | } // namespace scorer 50 | } // namespace feature 51 | 52 | #endif // _MEMT_Feature_Scorer_Options_h 53 | -------------------------------------------------------------------------------- /MEMT/Feature/Verbatim/Config.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Verbatim_Config_h 2 | #define _MEMT_Feature_Verbatim_Config_h 3 | 4 | #include "MEMT/Feature/Base/Sign.hh" 5 | #include "MEMT/Input/Alignment.hh" 6 | 7 | #include "util/numbers.hh" 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | namespace feature { 15 | namespace verbatim { 16 | 17 | struct Config { 18 | std::size_t num_systems; 19 | // ngram length for individual (per-system) scores 20 | std::size_t individual; 21 | // max ngram length for equal weight scores. This should be >= individual. 22 | std::size_t collective; 23 | 24 | // Mask of alignments that count as supporting. 25 | input::AlignType mask; 26 | 27 | size_t FeatureCount() const { 28 | assert(collective >= individual); 29 | assert(num_systems != 0); 30 | return num_systems * individual + collective - individual; 31 | } 32 | 33 | void WeightHint(base::WeightSign *out) const { 34 | base::WeightSign *end = out + FeatureCount(); 35 | for (; out != end; ++out) *out = base::POSITIVE_WEIGHT; 36 | } 37 | }; 38 | 39 | } // namespace verbatim 40 | } // namespace feature 41 | 42 | #endif // _MEMT_Feature_Verbatim_Config_h 43 | -------------------------------------------------------------------------------- /MEMT/Feature/Verbatim/Hypothesis.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Verbatim_Hypothesis_h 2 | #define _MEMT_Feature_Verbatim_Hypothesis_h 3 | 4 | namespace feature { 5 | namespace verbatim { 6 | 7 | typedef std::vector Hypothesis; 8 | 9 | } // namespace verbatim 10 | } // namespace feature 11 | 12 | #endif // _MEMT_Feature_Verbatim_Hypothesis_h 13 | -------------------------------------------------------------------------------- /MEMT/Feature/Verbatim/Jamfile: -------------------------------------------------------------------------------- 1 | fakelib feature_verbatim_options : Options.cc ../../Input//align_type /util//kenutil ; 2 | fakelib verbatim_sentence : Sentence.cc ../../Input//input ; 3 | -------------------------------------------------------------------------------- /MEMT/Feature/Verbatim/Options.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Verbatim_Options_h 2 | #define _MEMT_Feature_Verbatim_Options_h 3 | 4 | #include "util/options.hh" 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | namespace feature { 12 | namespace verbatim { 13 | 14 | class Config; 15 | 16 | class VerbatimIndividualExceedsCollective : public util::ArgumentParseError { 17 | public: 18 | VerbatimIndividualExceedsCollective(size_t individual, size_t collective); 19 | virtual ~VerbatimIndividualExceedsCollective() throw() {} 20 | }; 21 | 22 | class ConfigOptions { 23 | public: 24 | explicit ConfigOptions(Config &config, const char *prefix = "score.verbatim"); 25 | 26 | void SetDefaults(); 27 | 28 | const boost::program_options::options_description &Options() const { return options_; } 29 | 30 | void Finish(const boost::program_options::variables_map &vm, size_t num_systems); 31 | 32 | private: 33 | Config &config_; 34 | 35 | boost::program_options::options_description options_; 36 | }; 37 | 38 | } // namespace verbatim 39 | } // namespace feature 40 | 41 | #endif // _MEMT_Feature_Verbatim_Options_h 42 | -------------------------------------------------------------------------------- /MEMT/Feature/Verbatim/Process.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Verbatim_h 2 | #define _MEMT_Feature_Verbatim_h 3 | 4 | #include "MEMT/Feature/Base/Process.hh" 5 | #include "MEMT/Feature/Verbatim/Sentence.hh" 6 | 7 | namespace feature { namespace verbatim { 8 | typedef base::NullProcess Process; 9 | } } // namespace verbatim feature 10 | 11 | #endif // _MEMT_Feature_Verbatim_h 12 | -------------------------------------------------------------------------------- /MEMT/Feature/Verbatim/Sentence.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Feature_Verbatim_Sentence_h 2 | #define _MEMT_Feature_Verbatim_Sentence_h 3 | 4 | #include "MEMT/Feature/Verbatim/Config.hh" 5 | #include "MEMT/Feature/Verbatim/Hypothesis.hh" 6 | 7 | #include "util/numbers.hh" 8 | 9 | #include 10 | #include 11 | 12 | namespace input { class Input; class Location; } 13 | namespace decoder { class HypHistory; } 14 | 15 | namespace feature { 16 | namespace verbatim { 17 | 18 | class Sentence { 19 | public: 20 | typedef ::feature::verbatim::Config Config; 21 | typedef ::feature::verbatim::Hypothesis Hypothesis; 22 | 23 | Sentence() {} 24 | 25 | void Reset(const Config &config, const input::Input &input) { 26 | config_ = config; 27 | } 28 | 29 | size_t BothFeatures() const { 30 | return config_.FeatureCount(); 31 | } 32 | size_t EndFeatures() const { return 0; } 33 | 34 | void Begin(Hypothesis &start_state, LogScore *start_scores) const; 35 | 36 | void Extend( 37 | const input::Input &input, 38 | const decoder::HypHistory *history, 39 | const input::Location &append, 40 | const Hypothesis &from_state, 41 | const LogScore *from_scores, 42 | Hypothesis &to_state, 43 | LogScore *to_scores) const; 44 | 45 | void End(size_t length, LogScore *out) const {} 46 | 47 | private: 48 | Config config_; 49 | }; 50 | 51 | } // namespace verbatim 52 | } // namespace feature 53 | 54 | #endif // _MEMT_Feature_Verbatim_Sentence_h 55 | -------------------------------------------------------------------------------- /MEMT/Input/AlignType.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Input/AlignType.hh" 2 | #include "util/string_piece_hash.hh" 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | namespace input { 10 | 11 | const char *kAlignTypeNames[AL_COUNT] = {"unknown", "exact", "snowball_stem", "wn_stem", "wn_synonymy", "paraphrase", "artificial", "self", "transitive", "boundary"}; 12 | 13 | NotAlignmentTypeName::NotAlignmentTypeName(const StringPiece &name) throw() { 14 | what_ = "Not an alignment type: "; 15 | what_.append(name.data(), name.length()); 16 | } 17 | 18 | namespace { 19 | std::auto_ptr > strings_to_types; 20 | 21 | void InitializeAlign() { 22 | strings_to_types.reset(new boost::unordered_map()); 23 | for (AlignType i = 0; i < AL_COUNT; ++i) { 24 | (*strings_to_types)[kAlignTypeNames[i]] = 1 << i; 25 | } 26 | } 27 | 28 | boost::once_flag strings_to_types_flag = BOOST_ONCE_INIT; 29 | 30 | } // namespace 31 | 32 | AlignType TypeFromName(const StringPiece &name) { 33 | call_once(strings_to_types_flag, InitializeAlign); 34 | boost::unordered_map::const_iterator i(strings_to_types->find(name)); 35 | if (i == strings_to_types->end()) throw NotAlignmentTypeName(name); 36 | return i->second; 37 | } 38 | 39 | } // namespace input 40 | -------------------------------------------------------------------------------- /MEMT/Input/Alignment.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Input/Alignment.hh" 2 | 3 | using namespace std; 4 | 5 | namespace input { 6 | 7 | const AlignType AL_IGNORE_SCORE = AL_ARTIFICIAL | AL_TRANSITIVE; 8 | 9 | inline bool ListenToScore(AlignType type) { 10 | return type & ~AL_IGNORE_SCORE; 11 | } 12 | 13 | void WordAlignments::Add(unsigned int engine, unsigned int offset, AlignType type) { 14 | assert(alignments_[engine].IsNone() || (alignments_[engine].offset == offset)); 15 | alignments_[engine].type |= type; 16 | alignments_[engine].offset = offset; 17 | } 18 | 19 | } // namespace input 20 | -------------------------------------------------------------------------------- /MEMT/Input/Capitalization.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Input_Capitalization_h 2 | #define _MEMT_Input_Capitalization_h 3 | 4 | namespace input { 5 | class Input; 6 | 7 | void ApplyCapitalization(Input &input); 8 | 9 | } // namespace input 10 | #endif // _MEMT_Input_Capitalization_h 11 | -------------------------------------------------------------------------------- /MEMT/Input/Config.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Input_Config_h 2 | #define _MEMT_Input_Config_h 3 | 4 | #include "util/numbers.hh" 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace input { 11 | 12 | struct Config { 13 | bool transitive; 14 | 15 | // TODO: this really belongs with the LM feature. 16 | bool lowercase_before_lm; 17 | }; 18 | 19 | } // namespace input 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /MEMT/Input/Dump.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "MEMT/Input/Config.hh" 4 | #include "MEMT/Input/Format.hh" 5 | #include "MEMT/Input/Input.hh" 6 | #include "MEMT/Input/ReadFromJava.hh" 7 | #include "util/numbers.hh" 8 | 9 | using namespace std; 10 | 11 | int main() { 12 | input::Input input; 13 | input::Config config; 14 | config.lowercase_before_lm = false; 15 | config.transitive = false; 16 | unsigned int sentence_num = 0; 17 | while (1) { 18 | // TODO: command line option for number of systems. 19 | try { 20 | input::ReadFromJava(config, cin, input, 0); 21 | } 22 | catch (std::ios_base::failure &f) { 23 | break; 24 | } 25 | if (input.engines.size() < 2) { 26 | std::cerr << "Not enough engines." << std::endl; 27 | continue; 28 | } 29 | LaTeXAlignment(std::cout, "First", input.engines[0], "Second", input.engines[1]); 30 | ++sentence_num; 31 | } 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /MEMT/Input/Factory.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Input_Factory_h 2 | #define _MEMT_Input_Factory_h 3 | 4 | #include "MEMT/Input/Capitalization.hh" 5 | #include "MEMT/Input/Config.hh" 6 | #include "MEMT/Input/Transitive.hh" 7 | 8 | namespace input { 9 | 10 | class Input; 11 | 12 | void ProcessAligned( 13 | const Config &config, 14 | Input &input) { 15 | if (config.transitive) MakeAlignmentsTransitive(input); 16 | ApplyCapitalization(input); 17 | } 18 | 19 | } // namespace input 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /MEMT/Input/Format.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Input_Format_h 2 | #define _MEMT_Input_Format_h 3 | 4 | #include 5 | 6 | #include "MEMT/Input/Alignment.hh" 7 | 8 | namespace input { 9 | 10 | class WordText; 11 | struct Word; 12 | struct Engine; 13 | struct Input; 14 | 15 | std::ostream &operator<<(std::ostream &str, const WordText &text); 16 | 17 | std::ostream &AlignTypeOut(std::ostream &str, const AlignType &type, char delim = ' '); 18 | std::ostream &operator<<(std::ostream &str, const WordAlignments &align); 19 | 20 | std::ostream &operator<<(std::ostream &str, const Word &word); 21 | std::ostream &operator<<(std::ostream &str, const Engine &engine); 22 | std::ostream &operator<<(std::ostream &str, const Input &input); 23 | 24 | std::ostream &LaTeXAlignment(std::ostream &str, const std::string &top_title, const Engine &top, const std::string &bottom_title, const Engine &bottom, bool exclude_bounds = true); 25 | 26 | } // namespace input 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /MEMT/Input/Input.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Input_Input_h 2 | #define _MEMT_Input_Input_h 3 | 4 | #include "MEMT/Strategy/Horizon/Config.hh" 5 | #include "MEMT/Input/Location.hh" 6 | #include "MEMT/Input/Word.hh" 7 | 8 | #include "util/numbers.hh" 9 | 10 | #include 11 | 12 | namespace input { 13 | 14 | // This doesn't really do much, but it's useful to pass around an object for 15 | // an engine instead of the entire input and the engine number. 16 | struct Engine { 17 | Engine() {} 18 | 19 | // Length of sentence, including 20 | unsigned int Length() const { return words.size(); } 21 | 22 | unsigned int number; 23 | std::vector words; 24 | }; 25 | 26 | struct Input { 27 | Input() {} 28 | 29 | const Word &GetWord(unsigned int engine, unsigned int offset) const { 30 | return engines[engine].words[offset]; 31 | } 32 | 33 | const Word &GetWord(const Location &l) const { 34 | return engines[l.engine].words[l.offset]; 35 | } 36 | 37 | unsigned int NumEngines() const { 38 | return engines.size(); 39 | } 40 | 41 | void SetupEngines(unsigned int count) { 42 | engines.resize(count); 43 | for (unsigned int i = 0; i < engines.size(); ++i) { 44 | engines[i].number = i; 45 | } 46 | } 47 | 48 | std::vector engines; 49 | }; 50 | 51 | } // namespace input 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /MEMT/Input/Jamfile: -------------------------------------------------------------------------------- 1 | alias horizon_config : /util//kenutil ; 2 | 3 | lib input_options : Options.cc ../Strategy/Horizon//strategy_horizon_options /util//kenutil ../..//boost_program_options ; 4 | 5 | lib align_type : AlignType.cc ../..//boost_thread ; 6 | 7 | lib input_alignment : Alignment.cc align_type /util//kenutil ; 8 | 9 | lib input 10 | : Capitalization.cc Format.cc Text.cc Transitive.cc ReadDispatcher.cc ReadFromJava.cc Read.cc align_type input_options input_alignment /util//kenutil ; 11 | 12 | exe Dump : Dump.cc input /util//kenutil ; 13 | -------------------------------------------------------------------------------- /MEMT/Input/Location.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Input_Location_h 2 | #define _MEMT_Input_Location_h 3 | 4 | namespace input { 5 | 6 | struct Location { 7 | Location() {} 8 | Location(unsigned int in_engine, unsigned int in_offset) : engine(in_engine), offset(in_offset) {} 9 | unsigned int engine; 10 | unsigned int offset; 11 | }; 12 | 13 | // For sets. 14 | inline bool operator<(const Location &left, const Location &right) { 15 | if (left.engine < right.engine) return true; 16 | if (left.engine > right.engine) return false; 17 | return left.offset < right.offset; 18 | } 19 | 20 | } // namespace input 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /MEMT/Input/Options.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Input/Options.hh" 2 | #include "util/options.hh" 3 | 4 | namespace input { 5 | 6 | ConfigOptions::ConfigOptions(Config &config) 7 | : config_(config), options_("Input processing"), incremental_(false) { 8 | namespace po = boost::program_options; 9 | options_.add_options() 10 | ("input.lowercase_before_lm", 11 | po::value(&config_.lowercase_before_lm), 12 | "Lowercase input before looking up in LM?") 13 | 14 | ("align.transitive", 15 | po::value(&config_.transitive), 16 | "Make alignments transitive?"); 17 | 18 | SetDefaults(); 19 | } 20 | 21 | void ConfigOptions::SetDefaults() { 22 | config_.lowercase_before_lm = true; 23 | config_.transitive = false; 24 | 25 | incremental_ = false; 26 | } 27 | 28 | void ConfigOptions::Finish(const boost::program_options::variables_map &vm) { 29 | if (!incremental_) { 30 | incremental_ = true; 31 | } 32 | } 33 | 34 | } // namespace input 35 | -------------------------------------------------------------------------------- /MEMT/Input/Options.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Input_Options_h 2 | #define _MEMT_Input_Options_h 3 | 4 | #include "MEMT/Input/Config.hh" 5 | #include "MEMT/Input/Options.hh" 6 | 7 | #include "util/options.hh" 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | namespace input { 15 | 16 | class ConfigOptions { 17 | public: 18 | explicit ConfigOptions(Config &config); 19 | 20 | void SetDefaults(); 21 | 22 | const boost::program_options::options_description &Options() const { return options_; } 23 | 24 | void Finish(const boost::program_options::variables_map &vm); 25 | 26 | private: 27 | Config &config_; 28 | 29 | boost::program_options::options_description options_; 30 | 31 | bool incremental_; 32 | }; 33 | 34 | } // namespace input 35 | 36 | #endif // _MEMT_Input_Options_h 37 | -------------------------------------------------------------------------------- /MEMT/Input/Read.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Input_Read_h 2 | #define _MEMT_Input_Read_h 3 | 4 | #include 5 | #include 6 | 7 | namespace input { 8 | 9 | class Engine; 10 | 11 | void ReadEngine(const Config &config, const std::string &line, unsigned int num_engines, Engine &engine); 12 | 13 | void ReadAllEngines(const Config &config, std::istream &in, Input &input); 14 | 15 | void AddSelfAlignments(Input &input); 16 | void AddBoundaryAlignments(Input &input); 17 | 18 | } // namespace input 19 | 20 | #endif // _MEMT_Input_Read_h 21 | -------------------------------------------------------------------------------- /MEMT/Input/ReadDispatcher.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Input/ReadDispatcher.hh" 2 | 3 | #include "MEMT/Input/ReadFromJava.hh" 4 | 5 | namespace input { 6 | 7 | BadFormatName::BadFormatName(const std::string &provided) throw() { 8 | what_ = "Bad format name "; 9 | what_ += provided; 10 | } 11 | 12 | void ReadDispatcher(const Config &config, std::istream &in, Input &input, size_t expected) { 13 | std::string format; 14 | in >> format; 15 | if (format == "java") { 16 | ReadFromJava(config, in, input, expected); 17 | } else { 18 | throw BadFormatName(format); 19 | } 20 | } 21 | 22 | } // namespace input 23 | -------------------------------------------------------------------------------- /MEMT/Input/ReadDispatcher.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Input_ReadDispatcher_h 2 | #define _MEMT_Input_ReadDispatcher_h 3 | 4 | #include 5 | #include 6 | 7 | namespace input { 8 | 9 | class Config; 10 | 11 | class FactoryException : public std::exception { 12 | public: 13 | FactoryException() throw() {} 14 | ~FactoryException() throw() {} 15 | 16 | const char *what() const throw() { 17 | return "Reading from matcher failed"; 18 | } 19 | }; 20 | 21 | class BadFormatName : public std::exception { 22 | public: 23 | explicit BadFormatName(const std::string &provided) throw(); 24 | 25 | ~BadFormatName() throw() {} 26 | 27 | const char *what() const throw() { return what_.c_str(); } 28 | 29 | private: 30 | std::string what_; 31 | }; 32 | 33 | class Input; 34 | 35 | // Dispatch reading to Perl or Java aligner. 36 | void ReadDispatcher(const Config &config, std::istream &in, Input &input, size_t expected = 0); 37 | 38 | } // namespace input 39 | 40 | #endif // _MEMT_Input_ReadDispatcher_h 41 | -------------------------------------------------------------------------------- /MEMT/Input/Same.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Input_Same_h 2 | #define _MEMT_Input_Same_h 3 | 4 | namespace input { 5 | 6 | class Input; 7 | 8 | /* Find sets of words that, when on the frontier at the same time, would 9 | * produce equal hypotheses. 10 | * The words must: 11 | * Be aligned via AL_EXACT 12 | * Have equal alignments up to type. 13 | * Have equal phrase lengths. 14 | * Have all words in its phrases the same (recursively according to this definition) 15 | * 16 | * Therefore precondition: 17 | * Alignments and phrases completed 18 | */ 19 | void FindSame(Input &text); 20 | 21 | } // namespace input 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /MEMT/Input/Text.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Input/Text.hh" 2 | #include "util/utf8.hh" 3 | #include "util/murmur_hash.hh" 4 | 5 | #include 6 | 7 | using namespace std; 8 | 9 | namespace input { 10 | 11 | void WordText::ResetBOS() { 12 | original_ = ""; 13 | canonical_ = ""; 14 | is_punctuation_ = false; 15 | is_end_ = false; 16 | canonical_hash_ = util::MurmurHashNative(canonical_.c_str(), canonical_.size()); 17 | } 18 | 19 | void WordText::ResetEOS() { 20 | original_ = ""; 21 | canonical_ = ""; 22 | is_punctuation_ = false; 23 | is_end_ = true; 24 | canonical_hash_ = util::MurmurHashNative(canonical_.c_str(), canonical_.size()); 25 | } 26 | 27 | void WordText::RereadOriginal(bool lowercase_canonical) { 28 | if (lowercase_canonical) { 29 | utf8::ToLower(original_, canonical_); 30 | } else { 31 | canonical_ = original_; 32 | } 33 | is_punctuation_ = utf8::IsPunctuation(canonical_); 34 | is_end_ = false; 35 | canonical_hash_ = util::MurmurHashNative(canonical_.c_str(), canonical_.size()); 36 | } 37 | 38 | } // namespace input 39 | -------------------------------------------------------------------------------- /MEMT/Input/Text.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Input_Text_h 2 | #define _MEMT_Input_Text_h 3 | 4 | #include "util/string_piece.hh" 5 | 6 | #include 7 | 8 | #include 9 | 10 | namespace input { 11 | 12 | class WordText { 13 | public: 14 | WordText() {} 15 | 16 | // Sets everything but vocab_index_, which must be set later. 17 | void Reset(bool lowercase_canonical, const StringPiece &original) { 18 | original_.assign(original.data(), original.size()); 19 | RereadOriginal(lowercase_canonical); 20 | } 21 | 22 | void ResetBOS(); 23 | 24 | void ResetEOS(); 25 | 26 | const std::string &Original() const { return original_; } 27 | const std::string &Canonical() const { return canonical_; } 28 | bool IsPunctuation() const { return is_punctuation_; } 29 | 30 | bool IsEnd() const { return is_end_; } 31 | 32 | std::string &MutableOriginalForCase() { return original_; } 33 | 34 | uint64_t CanonicalHash() const { return canonical_hash_; } 35 | 36 | private: 37 | void RereadOriginal(bool lowercase_canonical); 38 | 39 | // UTF8 input string. 40 | std::string original_; 41 | // UTF8 lowercased. 42 | std::string canonical_; 43 | bool is_punctuation_; 44 | bool is_end_; 45 | 46 | uint64_t canonical_hash_; 47 | }; 48 | 49 | } 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /MEMT/Input/Transitive.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Input_Transitive_h 2 | #define _MEMT_Input_Transitive_h 3 | 4 | namespace input { 5 | 6 | class Input; 7 | 8 | // Returns false if there is a conflict. 9 | bool MakeAlignmentsTransitive(Input &text); 10 | 11 | } // namespace input 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /MEMT/Input/Word.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Input_Word_h 2 | #define _MEMT_Input_Word_h 3 | 4 | #include "MEMT/Input/Alignment.hh" 5 | #include "MEMT/Input/Text.hh" 6 | 7 | #include "util/numbers.hh" 8 | #include "util/string_piece.hh" 9 | 10 | namespace input { 11 | 12 | // Precomputed information about a word. 13 | // This class is scheduled for partitioning into alignment and precompute pieces. 14 | struct Word { 15 | Word() {} 16 | 17 | void ResetBOS(unsigned int engines, unsigned int offset) { 18 | text.ResetBOS(); 19 | alignments.Reset(engines); 20 | } 21 | 22 | void ResetEOS(unsigned int engines, unsigned int offset) { 23 | text.ResetEOS(); 24 | alignments.Reset(engines); 25 | } 26 | 27 | void ResetWord(unsigned int engines, unsigned int offset, bool lowercase_canonical, const StringPiece &word) { 28 | alignments.Reset(engines); 29 | text.Reset(lowercase_canonical, word); 30 | } 31 | 32 | // Actual input. 33 | WordText text; 34 | WordAlignments alignments; 35 | }; 36 | 37 | } // namespace 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /MEMT/Jamfile: -------------------------------------------------------------------------------- 1 | import symlink ; 2 | 3 | project : default-build release ; 4 | 5 | build-project Decoder ; 6 | build-project Input ; 7 | build-project Output ; 8 | build-project Controller ; 9 | 10 | install dist : Controller//MEMT Input//Dump Alignment//SummarizeAlignment ../lm/filter//filter ../lm/filter//FilterLM 11 | : on EXE LIB $(TOP)/MEMT/dist 12 | true 13 | ; 14 | 15 | alias all : dist ../util//programs ../Utilities/Output//dist ; 16 | -------------------------------------------------------------------------------- /MEMT/Output/Config.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Output_Config_h 2 | #define _MEMT_Output_Config_h 3 | 4 | namespace output { 5 | 6 | struct Config { 7 | // Size of n-best list. 8 | unsigned int nbest; 9 | 10 | // Lowercase all output? 11 | bool lowercase; 12 | 13 | // Capitialize initial word in sentence? Only effective if capitalize_everything is false. 14 | bool initial_cap; 15 | 16 | // Include scores? 17 | bool scores; 18 | 19 | // Include alignment back to original system and offset? 20 | bool alignment; 21 | 22 | bool flush_nbest; 23 | }; 24 | 25 | } // namespace output 26 | #endif // _MEMT_Output_Config_h 27 | -------------------------------------------------------------------------------- /MEMT/Output/Jamfile: -------------------------------------------------------------------------------- 1 | alias null_beam_dumper ; 2 | 3 | fakelib output_options : Options.cc ../..//boost_program_options ; 4 | 5 | fakelib output 6 | : NBest.cc ToString.cc ../Decoder//completed /util//kenutil ; 7 | -------------------------------------------------------------------------------- /MEMT/Output/NBest.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Output_NBest_h 2 | #define _MEMT_Output_NBest_h 3 | 4 | #include "MEMT/Decoder/Completed.hh" 5 | 6 | #include 7 | #include 8 | 9 | namespace input { class Input; } 10 | 11 | namespace output { 12 | 13 | class Config; 14 | 15 | void NBest(std::ostream &out, const Config &config, const std::vector &nbest, const input::Input &text, unsigned int sent_id); 16 | 17 | } // namespace output 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /MEMT/Output/NullBeamDumper.hh: -------------------------------------------------------------------------------- 1 | /* The decoder's Run function accepts a beam dumper, to which it passes the 2 | * internal beam of partial hypotheses after each advance. 3 | * This one does nothing. 4 | */ 5 | 6 | namespace output { 7 | 8 | struct NullBeamDumper { 9 | template void DumpBeam(unsigned int length, const BeamT &beam) {} 10 | }; 11 | 12 | } // namespace output 13 | -------------------------------------------------------------------------------- /MEMT/Output/Options.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Output/Options.hh" 2 | 3 | #include "MEMT/Output/Config.hh" 4 | 5 | namespace output { 6 | 7 | ConfigOptions::ConfigOptions(Config &config) 8 | : config_(config), options_("Output"), incremental_(false) { 9 | 10 | namespace po = boost::program_options; 11 | options_.add_options() 12 | ("output.nbest", 13 | po::value(&config_.nbest), 14 | "Number of n-best hypotheses") 15 | 16 | ("output.lowercase", 17 | po::value(&config.lowercase), 18 | "Lowercase all output?") 19 | 20 | ("output.initial_cap", 21 | po::value(&config.initial_cap), 22 | "Capitalize the first word of each output? No effect if lowercase is true.") 23 | 24 | ("output.scores", 25 | po::value(&config.scores), 26 | "Include scores in output?") 27 | 28 | ("output.alignment", 29 | po::value(&config.alignment), 30 | "Include alignment back to a source hypothesis?") 31 | 32 | ("output.flush_nbest", 33 | po::value(&config.flush_nbest), 34 | "Flush after each nbest output?"); 35 | 36 | SetDefaults(); 37 | } 38 | 39 | void ConfigOptions::SetDefaults() { 40 | config_.nbest = 1; 41 | config_.lowercase = false; 42 | config_.initial_cap = true; 43 | config_.scores = true; 44 | config_.alignment = false; 45 | config_.flush_nbest = false; 46 | } 47 | 48 | void ConfigOptions::Finish(const boost::program_options::variables_map &vm) {} 49 | 50 | } // namespace output 51 | -------------------------------------------------------------------------------- /MEMT/Output/Options.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Output_Options_h 2 | #define _MEMT_Output_Options_h 3 | 4 | #include 5 | #include 6 | 7 | namespace output { 8 | 9 | class Config; 10 | 11 | class ConfigOptions { 12 | public: 13 | explicit ConfigOptions(Config &config); 14 | 15 | void SetDefaults(); 16 | 17 | const boost::program_options::options_description &Options() const { return options_; } 18 | 19 | void Finish(const boost::program_options::variables_map &vm); 20 | 21 | private: 22 | Config &config_; 23 | 24 | boost::program_options::options_description options_; 25 | 26 | bool incremental_; 27 | }; 28 | 29 | } // namespace output 30 | 31 | #endif // _MEMT_Output_Options_h 32 | -------------------------------------------------------------------------------- /MEMT/Output/StderrBeamDumper.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Output_StderrBeamDumper_h 2 | #define _MEMT_Output_StderrBeamDumper_h 3 | 4 | #include 5 | 6 | namespace output { 7 | 8 | class StderrBeamDumper { 9 | public: 10 | template void DumpBeam(unsigned int length, const Beam &beam) { 11 | std::cerr << "Length " << length << '\n'; 12 | std::vector values; 13 | for (typename Beam::unordered_iterator i = beam.unordered_begin(); i != beam.unordered_end(); ++i) { 14 | values.push_back(&*i); 15 | } 16 | std::sort(values.begin(), values.end(), boost::indirect_fun()); 17 | for (typename std::vector::const_iterator i = values.begin(); i != values.end(); ++i) { 18 | std::cerr << (*i)->History()->Entry().score; 19 | for (const decoder::HypHistory *hist = (*i)->History().get(); hist; hist = hist->BestPrevious()) { 20 | std::cerr << ' ' << hist->Entry().engine << ' ' << hist->Entry().offset; 21 | } 22 | std::cerr << '\n'; 23 | } 24 | std::cerr << '\n'; 25 | } 26 | }; 27 | } // namespace output 28 | 29 | #endif // _MEMT_Output_StderrBeamDumper_h 30 | -------------------------------------------------------------------------------- /MEMT/Output/ToString.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Output_ToString_h 2 | #define _MEMT_Output_ToString_h 3 | 4 | #include 5 | 6 | namespace decoder { class CompletedHypothesis; } 7 | 8 | namespace input { class Input; } 9 | 10 | namespace output { 11 | class Config; 12 | void CompletedHypothesisString(const Config &config, const decoder::CompletedHypothesis &hyp, const input::Input &text, std::string *out); 13 | } // namespace output 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /MEMT/Strategy/Graph/Config.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Strategy_Graph_Config_h 2 | #define _MEMT_Strategy_Graph_Config_h 3 | 4 | #include "MEMT/Strategy/Scorer/Config.hh" 5 | #include "MEMT/Strategy/Graph/Coverage/Config.hh" 6 | 7 | namespace strategy { 8 | namespace graph { 9 | 10 | struct Config { 11 | coverage::Config coverage; 12 | scorer::Config scorer; 13 | }; 14 | 15 | } // namespace graph 16 | } // namespace strategy 17 | 18 | #endif // _MEMT_Strategy_Graph_Config_h 19 | -------------------------------------------------------------------------------- /MEMT/Strategy/Graph/Coverage/Config.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Strategy_Graph_Coverage_Config_h 2 | #define _MEMT_Strategy_Graph_Coverage_Config_h 3 | 4 | namespace strategy { 5 | namespace graph { 6 | namespace coverage { 7 | 8 | struct Config {}; 9 | 10 | } // namespace coverage 11 | } // namespace graph 12 | } // namespace strategy 13 | 14 | #endif // _MEMT_Strategy_Graph_Coverage_Config_h 15 | -------------------------------------------------------------------------------- /MEMT/Strategy/Graph/Coverage/Hypothesis.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Strategy_Graph_Coverage_Hypothesis_h 2 | #define _MEMT_Strategy_Graph_Coverage_Hypothesis_h 3 | 4 | #include "util/hash_output.hh" 5 | 6 | #include 7 | 8 | namespace strategy { 9 | namespace graph { 10 | namespace coverage { 11 | 12 | class Hypothesis { 13 | public: 14 | Hypothesis() {} 15 | private: 16 | friend class Sentence; 17 | friend size_t hash_value(const Hypothesis &hyp); 18 | friend bool operator==(const Hypothesis &left, const Hypothesis &right); 19 | 20 | boost::dynamic_bitset bits_; 21 | }; 22 | 23 | inline size_t hash_value(const Hypothesis &hyp) { 24 | size_t ret = 0; 25 | to_block_range(hyp.bits_, util::HashOutput(ret)); 26 | return ret; 27 | } 28 | 29 | inline bool operator==(const Hypothesis &left, const Hypothesis &right) { 30 | return left.bits_ == right.bits_; 31 | } 32 | 33 | } // namespace coverage 34 | } // namespace graph 35 | } // namespace strategy 36 | #endif // _MEMT_Strategy_Graph_Coverage_Hypothesis_h 37 | -------------------------------------------------------------------------------- /MEMT/Strategy/Graph/Jamfile: -------------------------------------------------------------------------------- 1 | fakelib strategy_graph_options : Options.cc ../Scorer//strategy_scorer_options /util//kenutil ; 2 | 3 | alias graph : ../../Input//input ../Scorer//scorer /util//kenutil ; 4 | -------------------------------------------------------------------------------- /MEMT/Strategy/Graph/Options.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Strategy/Graph/Options.hh" 2 | 3 | #include "MEMT/Strategy/Graph/Config.hh" 4 | 5 | #include "util/options.hh" 6 | 7 | #include 8 | 9 | namespace strategy { 10 | namespace graph { 11 | 12 | ConfigOptions::ConfigOptions(Config &config) 13 | : config_(config), scorer_(config.scorer), options_("Graph strategy"), incremental_(false) { 14 | namespace po = boost::program_options; 15 | 16 | options_.add(scorer_.Options()); 17 | 18 | SetDefaults(); 19 | } 20 | 21 | void ConfigOptions::SetDefaults() { 22 | scorer_.SetDefaults(); 23 | incremental_ = false; 24 | } 25 | 26 | void ConfigOptions::Finish(const boost::program_options::variables_map &vm, size_t num_systems) { 27 | scorer_.Finish(vm, num_systems); 28 | } 29 | 30 | } // namespace graph 31 | } // namespace strategy 32 | -------------------------------------------------------------------------------- /MEMT/Strategy/Graph/Options.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Strategy_Graph_Options_h 2 | #define _MEMT_Strategy_Graph_Options_h 3 | 4 | #include "MEMT/Strategy/Scorer/Options.hh" 5 | 6 | #include 7 | #include 8 | 9 | namespace strategy { 10 | namespace graph { 11 | 12 | class Config; 13 | 14 | class ConfigOptions { 15 | public: 16 | explicit ConfigOptions(Config &config); 17 | 18 | void SetDefaults(); 19 | 20 | const boost::program_options::options_description &Options() const { return options_; } 21 | 22 | void Finish(const boost::program_options::variables_map &vm, size_t num_systems); 23 | 24 | private: 25 | Config &config_; 26 | 27 | scorer::ConfigOptions scorer_; 28 | 29 | boost::program_options::options_description options_; 30 | 31 | bool incremental_; 32 | }; 33 | 34 | } // namespace graph 35 | } // namespace decoder 36 | 37 | #endif // _MEMT_Strategy_Graph_Options_h 38 | -------------------------------------------------------------------------------- /MEMT/Strategy/Horizon/Config.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Strategy_Horizon_Config_h 2 | #define _MEMT_Strategy_Horizon_Config_h 3 | 4 | #include "util/numbers.hh" 5 | 6 | #include 7 | 8 | // Horizon configuration is part of Input configuration and passed to the 9 | // decoder. This is a separate file because it's also part of Input. 10 | 11 | namespace strategy { 12 | namespace horizon { 13 | 14 | struct Config { 15 | typedef enum {HORIZON_LENGTH, HORIZON_ALIGNMENT} Method; 16 | Method method; 17 | 18 | unsigned int radius; 19 | 20 | // Weights for stay_threshold. Defaults to uniform. 21 | std::vector stay_weights; 22 | // Applies only for method = HORIZON_ALIGNMENT. 23 | LinearScore stay_threshold; 24 | }; 25 | 26 | } // namespace horizon 27 | } // namespace strategy 28 | 29 | #endif // _MEMT_Strategy_Horizon_Config_h 30 | -------------------------------------------------------------------------------- /MEMT/Strategy/Horizon/Jamfile: -------------------------------------------------------------------------------- 1 | fakelib hypothesis : Hypothesis.cc /util//kenutil ../Phrase//phrase ../../Input//input ; 2 | fakelib strategy_horizon_options : Options.cc /util//kenutil ; 3 | fakelib horizon : Horizon.cc ../../Input//input ; 4 | -------------------------------------------------------------------------------- /MEMT/Strategy/Horizon/Options.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Strategy_Horizon_Options_h 2 | #define _MEMT_Strategy_Horizon_Options_h 3 | 4 | #include "util/options.hh" 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | namespace strategy { 12 | namespace horizon { 13 | 14 | class Config; 15 | 16 | class BadHorizonMethod : public util::ArgumentParseError { 17 | public: 18 | explicit BadHorizonMethod(const std::string &provided); 19 | 20 | ~BadHorizonMethod() throw() {} 21 | 22 | private: 23 | std::string provided_; 24 | }; 25 | 26 | class ConfigOptions { 27 | public: 28 | explicit ConfigOptions(Config &config); 29 | 30 | void SetDefaults(); 31 | 32 | const boost::program_options::options_description &Options() const { return options_; } 33 | 34 | void Finish(const boost::program_options::variables_map &vm); 35 | 36 | private: 37 | Config &config_; 38 | 39 | boost::program_options::options_description options_; 40 | 41 | bool incremental_; 42 | }; 43 | 44 | } // namespace horizon 45 | } // namespace strategy 46 | 47 | #endif // _MEMT_Strategy_Horizon_Options_h 48 | -------------------------------------------------------------------------------- /MEMT/Strategy/Legacy/Config.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Strategy_Legacy_Config_h 2 | #define _MEMT_Strategy_Legacy_Config_h 3 | 4 | #include "MEMT/Strategy/Horizon/Config.hh" 5 | #include "MEMT/Strategy/Phrase/Type.hh" 6 | #include "MEMT/Feature/Scorer/Config.hh" 7 | 8 | namespace strategy { 9 | namespace legacy { 10 | 11 | struct LegacyOnlyConfig { 12 | bool continue_recent; 13 | bool extend_aligned; 14 | }; 15 | 16 | struct Config { 17 | phrase::Type phrase; 18 | horizon::Config horizon; 19 | feature::scorer::Config scorer; 20 | LegacyOnlyConfig legacy; 21 | }; 22 | 23 | } // namespace legacy 24 | } // namespace strategy 25 | 26 | #endif // _MEMT_Strategy_Legacy_Config_h 27 | -------------------------------------------------------------------------------- /MEMT/Strategy/Legacy/Jamfile: -------------------------------------------------------------------------------- 1 | fakelib strategy_legacy_options : Options.cc ../Horizon//strategy_horizon_options ../../Feature/Scorer//feature_scorer_options ; 2 | 3 | alias legacy : ../Horizon//horizon ../Horizon//hypothesis ../Phrase//phrase ../../Feature/Scorer//scorer ; 4 | -------------------------------------------------------------------------------- /MEMT/Strategy/Legacy/Options.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Strategy_Legacy_Options_h 2 | #define _MEMT_Strategy_Legacy_Options_h 3 | 4 | #include "MEMT/Strategy/Horizon/Options.hh" 5 | #include "MEMT/Feature/Scorer/Options.hh" 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | namespace strategy { 13 | namespace legacy { 14 | 15 | class Config; 16 | 17 | class ConfigOptions { 18 | public: 19 | explicit ConfigOptions(Config &config); 20 | 21 | void SetDefaults(); 22 | 23 | const boost::program_options::options_description &Options() const { return options_; } 24 | 25 | void Finish(const boost::program_options::variables_map &vm, size_t num_systems, const std::vector &lm_orders); 26 | 27 | private: 28 | Config &config_; 29 | 30 | horizon::ConfigOptions horizon_; 31 | feature::scorer::ConfigOptions scorer_; 32 | 33 | boost::program_options::options_description options_; 34 | 35 | bool incremental_; 36 | }; 37 | 38 | } // namespace legacy 39 | } // namespace decoder 40 | 41 | #endif // _MEMT_Strategy_Legacy_Options_h 42 | -------------------------------------------------------------------------------- /MEMT/Strategy/Phrase/Aligned.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Strategy_Phrase_Aligned_h 2 | #define _MEMT_Strategy_Phrase_Aligned_h 3 | 4 | namespace input { class Engine; } 5 | 6 | namespace strategy { 7 | namespace phrase { 8 | 9 | class System; 10 | 11 | void DetectAligned(const input::Engine &engine, System &system); 12 | 13 | } // namespace phrase 14 | } // namespace strategy 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /MEMT/Strategy/Phrase/Jamfile: -------------------------------------------------------------------------------- 1 | fakelib type : Type.cc ; 2 | fakelib phrase : Aligned.cc Punctuation.cc Phrase.cc type ../../Input//input ; 3 | -------------------------------------------------------------------------------- /MEMT/Strategy/Phrase/Punctuation.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Strategy/Phrase/Punctuation.hh" 2 | 3 | #include "MEMT/Input/Input.hh" 4 | #include "MEMT/Strategy/Phrase/Phrase.hh" 5 | 6 | #include 7 | 8 | namespace strategy { 9 | namespace phrase { 10 | 11 | void DetectPunctuation(const input::Engine &engine, System &system) { 12 | //A punctuation phrase is the punctuation mark itself and the word 13 | //before it, except when it's the first word of the sentence: 14 | size_t start = 0; 15 | bool open = false; 16 | 17 | // is not punctuation. Therefore we will always close. 18 | for (unsigned i = 1; i < engine.Length(); ++i) { 19 | if (engine.words[i].text.IsPunctuation()) { 20 | if (!open) { 21 | open = true; 22 | start = i - 1; 23 | } 24 | } else if (open) { 25 | system[start].AddEnd(i - 1, PHRASE_PUNCTUATION); 26 | open = false; 27 | } 28 | } 29 | assert(!open); 30 | } 31 | 32 | } // namespace phrase 33 | } // namespace strategy 34 | -------------------------------------------------------------------------------- /MEMT/Strategy/Phrase/Punctuation.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Strategy_Phrase_Punctuation_h 2 | #define _MEMT_Strategy_Phrase_Punctuation_h 3 | 4 | namespace input { class Engine; } 5 | 6 | namespace strategy { 7 | namespace phrase { 8 | 9 | class System; 10 | void DetectPunctuation(const input::Engine &engine, System &system); 11 | 12 | } // namespace phrase 13 | } // namespace strategy 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /MEMT/Strategy/Phrase/Type.cc: -------------------------------------------------------------------------------- 1 | #include "MEMT/Strategy/Phrase/Type.hh" 2 | 3 | namespace strategy { 4 | namespace phrase { 5 | 6 | std::ostream &TypeOut(std::ostream &str, const Type type) { 7 | if (type & PHRASE_PUNCTUATION) str << "punctuation "; 8 | if (type & PHRASE_ALIGNMENT) str << "alignment "; 9 | if (type & PHRASE_SOURCE_CHUNK) str << "source_chunk "; 10 | return str; 11 | } 12 | 13 | } // namespace phrase 14 | } // namespace strategy 15 | -------------------------------------------------------------------------------- /MEMT/Strategy/Phrase/Type.hh: -------------------------------------------------------------------------------- 1 | #ifndef _MEMT_Strategy_Phrase_Type_h 2 | #define _MEMT_Strategy_Phrase_Type_h 3 | 4 | #include 5 | 6 | namespace strategy { 7 | namespace phrase { 8 | 9 | typedef unsigned int Type; 10 | 11 | const Type PHRASE_PUNCTUATION = 1 << 0; 12 | const Type PHRASE_ALIGNMENT = 1 << 1; 13 | const Type PHRASE_SOURCE_CHUNK = 1 << 2; 14 | 15 | std::ostream &TypeOut(std::ostream &str, const Type type); 16 | 17 | } // namespace phrase 18 | } // namespace strategy 19 | 20 | #endif // _MEMT_Strategy_Phrase_Type_h 21 | -------------------------------------------------------------------------------- /MEMT/scripts/experiment/decode_subgenre.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | port=$1 4 | run=$2 5 | shift 6 | shift 7 | for i; do 8 | MATCHED="$run/matched/$i/matched" 9 | if [ ! -f $MATCHED ]; then 10 | echo No file $MATCHED 1>&2 11 | exit 12 | fi 13 | mkdir -p "$run/$i" 14 | ~/avenue/MEMT/scripts/simple_decode.rb "$port" "$run/decoder_config" "$MATCHED" "$run/$i/output" 15 | done 16 | -------------------------------------------------------------------------------- /MEMT/scripts/experiment/en.sh: -------------------------------------------------------------------------------- 1 | #PBS -N mert-gale-all 2 | #PBS -S /bin/bash 3 | #PBS -l nodes=1:ppn=8 4 | #PBS -e localhost:$HOME/mert.err 5 | #PBS -o localhost:$HOME/mert.out 6 | #PBS -l mem=12gb 7 | #PBS -l walltime=48:30:00 8 | 9 | JOBDIR=$HOME/jobs/$PBS_JOBID 10 | mkdir -p $JOBDIR 11 | exec 1>$JOBDIR/stdout 2>$JOBDIR/stderr 12 | echo Running on host `hostname` 13 | echo Time is `date` 14 | echo Directory is `pwd` 15 | 16 | l=en 17 | LM=corpus/gale/lm/filtered.arpa 18 | 19 | cd /home/kheafiel/memt/expt 20 | ../../avenue/MEMT/scripts/server.sh --lm.file $LM --daemonize --pidfile $JOBDIR/decoder.pid --portfile $JOBDIR/decoder.port --keep-stdio-open --no-setsid || exit 1 21 | port=$(cat $JOBDIR/decoder.port) 22 | #scripts/run.rb $port corpus/mt09/{ur/match/top7,ar/match/top9} config/exact2,2-all3,3-length-5-msft additional/all 23 | scripts/run.rb $port corpus/gale/nw/match/top{9,{5,6,7,8}-lemans} corpus/gale/wb/match/top{4,5,6,7,8} corpus/gale/audio/match/top{5,6,7,8,9,10} config/exact2,2-all2,2-length-5{,-terbleu} config/exact2,2-all3,3-length-{5,4} additional/all 24 | ret=$? 25 | kill $(cat $JOBDIR/decoder.pid) 26 | exit $re 27 | -------------------------------------------------------------------------------- /MEMT/scripts/experiment/preprocess.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | AVENUE_DIR=$(dirname $0)/../../.. 3 | dir="$1" 4 | if ! [ -f "$dir"/1best.sgm ]; then 5 | echo no "$dir"/1best.sgm 1>&2 6 | exit 1 7 | fi 8 | TAG="${2:-seg}" 9 | $AVENUE_DIR/MEMT/scripts/experiment/stripsgml.rb "$TAG" <"$dir"/1best.sgm >"$dir"/txt 10 | $AVENUE_DIR/Utilities/Tokenization/PTB/tokenizer.perl <"$dir"/txt |sed 's/^ *//; s/ *$//' >"$dir"/tok 11 | -------------------------------------------------------------------------------- /MEMT/scripts/experiment/qsub.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ ! -f "$1" ]; then 3 | echo expected a language model as the first argument 1>&2 4 | exit 5 | fi 6 | LM="$1" 7 | LM_BASE="$(basename $(basename "$LM" .probing) .arpa)" 8 | shift 9 | qsub <(.*)<\/seg>/ do |m| 5 | $stdout.puts CGI::unescapeHTML(m[0]).gsub(/'/, "'").gsub(/& AMP;/, '&').gsub(/\n/, ' ').strip 6 | end 7 | 8 | -------------------------------------------------------------------------------- /MEMT/scripts/make_filter_vocab.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'set' 3 | files = ARGV.map { |f| File.new(f) } 4 | loop do 5 | lines = files.map { |f| f.gets }.compact 6 | break if lines.empty? 7 | throw "Unequal number of lines" unless lines.size == files.size 8 | vocab = Set.new 9 | lines.each do |l| 10 | vocab.merge(l.split) 11 | end 12 | vocab.each do |w| 13 | $stdout.write w 14 | $stdout.write " " 15 | end 16 | $stdout.write "\n" 17 | end 18 | -------------------------------------------------------------------------------- /MEMT/scripts/match.rb: -------------------------------------------------------------------------------- 1 | class JavaMatched 2 | attr_reader :sys_count 3 | def initialize(file) 4 | @file = file 5 | @file.seek(0) 6 | @sys_count = @file.gets.to_i 7 | throw "Bad count" unless @sys_count > 0 8 | @file.seek(0) 9 | end 10 | def get_match 11 | count = @file.gets 12 | return nil unless count 13 | ret = count 14 | 15 | count.to_i.times do 16 | ret += @file.gets 17 | end 18 | ((count.to_i * (count.to_i - 1))/2).times do 19 | while true do 20 | line = @file.gets 21 | ret += line 22 | break if line == "\n" 23 | end 24 | end 25 | 26 | ret 27 | end 28 | 29 | def packet 30 | str = get_match 31 | return nil unless str 32 | "matched 0\njava\n" + str 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /MEMT/scripts/nbest_first.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | pre = nil 3 | while l = gets 4 | splt = l.split("|||") 5 | num = splt[0].to_i 6 | if num != pre 7 | pre = num 8 | puts splt[1].strip 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /MEMT/scripts/server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DIR="$(dirname $0)/../../bin" 3 | export LD_LIBRARY_PATH="$DIR${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" 4 | exec "$DIR"/MEMT $@ 5 | -------------------------------------------------------------------------------- /MEMT/scripts/shell_escape.rb: -------------------------------------------------------------------------------- 1 | ../../Utilities/scoring/lib/shell_escape.rb -------------------------------------------------------------------------------- /MEMT/scripts/simple_decode.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'pathname' 3 | AVENUE_DIR = File.dirname(Pathname.new(File.expand_path(__FILE__) + '/../..').realpath) 4 | require AVENUE_DIR + '/MEMT/scripts/decode' 5 | require AVENUE_DIR + '/MEMT/scripts/zmert/format' 6 | 7 | unless ARGV[2] 8 | $stderr.puts "Usage: connection config_file matched_file [output_prefix] [language for detokenization]" 9 | exit 1 10 | end 11 | 12 | CONN=ARGV[0] 13 | CONFIG=ARGV[1] 14 | MATCHED=ARGV[2] 15 | OUT_BASE=(ARGV[3] ? ARGV[3] : MATCHED) 16 | decode( 17 | File.new(CONFIG).read, 18 | CONN, 19 | File.new(MATCHED, 'r'), 20 | OUT_BASE, 21 | ARGV[4]) 22 | -------------------------------------------------------------------------------- /MEMT/scripts/util.rb: -------------------------------------------------------------------------------- 1 | require 'pathname' 2 | SCRIPT_DIR = File.dirname(Pathname.new(File.expand_path(__FILE__)).realpath) 3 | 4 | class Message 5 | attr_accessor :prefix 6 | def initialize(prefix, file) 7 | @prefix = prefix 8 | @last = Time.now 9 | @file = file 10 | end 11 | def measure 12 | tick = Time.now 13 | ret = tick - @last 14 | @last = tick 15 | ret 16 | end 17 | def tell(func, event) 18 | message = "#{@prefix}#{event} #{func} at #{Time.now} (#{measure}s)" 19 | $stderr.puts message 20 | @file.puts message 21 | @file.sync 22 | end 23 | 24 | def wrap(func) 25 | tell(func, :start) 26 | ret = yield 27 | tell(func, :finish) 28 | ret 29 | end 30 | end 31 | 32 | def write_close(name, content) 33 | f = File.new(name, 'w') 34 | f.write(content) 35 | f.close 36 | end 37 | 38 | #not the most efficient, but it works for references. 39 | def count_lines(file) 40 | count = 0 41 | file.each_line do |l| 42 | count += 1 43 | end 44 | count 45 | end 46 | -------------------------------------------------------------------------------- /MEMT/scripts/zmert/decoder.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | #Decoder process run by zmert that converts the necessary file formats. 3 | require 'pathname' 4 | AVENUE_DIR = File.dirname(Pathname.new(File.expand_path(__FILE__) + '/../../..').realpath) 5 | 6 | require AVENUE_DIR + '/MEMT/scripts/decode' 7 | require AVENUE_DIR + '/MEMT/scripts/zmert/fuzz' 8 | require AVENUE_DIR + '/MEMT/scripts/zmert/format' 9 | 10 | iter = ARGV[0].to_i 11 | $stderr.puts "Guessing iteration #{iter}" 12 | config = make_config("dec_cfg.txt", "decoder_config_base") + "\noutput.lowercase = true\n" + Fuzz.new.string_amount(iter) 13 | 14 | language = File.new("language").read.strip 15 | 16 | decode(config, File.new("connection").read, File.new("dev.matched"), "output", language) 17 | -------------------------------------------------------------------------------- /MEMT/scripts/zmert/fuzz.rb: -------------------------------------------------------------------------------- 1 | FUZZ_SLIDE_AMOUNT=11 2 | class Fuzz 3 | def self.slide_amount 4 | FUZZ_SLIDE_AMOUNT 5 | end 6 | attr_reader :slide 7 | def initialize 8 | @mult = 1.1 9 | @slide = FUZZ_SLIDE_AMOUNT 10 | end 11 | def amount(iter) 12 | return 0.0 if iter >= @slide 13 | return (@slide - iter).to_f / @slide.to_f * @mult 14 | end 15 | def string_amount(iter) 16 | "score.fuzz.ratio = #{amount(iter)}\n" 17 | end 18 | end 19 | 20 | -------------------------------------------------------------------------------- /MEMT/scripts/zmert/run.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'pathname' 3 | AVENUE_DIR = File.dirname(Pathname.new(File.expand_path(__FILE__) + '/../../..').realpath) 4 | require AVENUE_DIR + '/MEMT/scripts/zmert/zmert' 5 | 6 | throw "Tunes MEMT. Pass a working directory, connection, and language" unless ARGV[2] 7 | directory=ARGV[0] 8 | connection=ARGV[1] 9 | language=ARGV[2] 10 | 11 | full_zmert(directory, connection, language) 12 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | MEMT/README -------------------------------------------------------------------------------- /Utilities/Input/unescape.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'cgi' 3 | 4 | $stdin.each_line do |l| 5 | $stdout.write CGI::unescapeHTML(l).gsub(/'/, "'").gsub(/& AMP;/, '&') 6 | end 7 | -------------------------------------------------------------------------------- /Utilities/Output/Jamfile: -------------------------------------------------------------------------------- 1 | exe remove_nonlatin 2 | : remove_nonlatin.cc /util//kenutil /util//icu ; 3 | 4 | install dist : remove_nonlatin 5 | : on EXE 6 | LIB 7 | dist 8 | true 9 | ; 10 | -------------------------------------------------------------------------------- /Utilities/Output/remove_nonlatin.cc: -------------------------------------------------------------------------------- 1 | #include "util/tokenize_piece.hh" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | bool RemoveWord(const StringPiece &str) { 10 | int32_t size_as_int32 = static_cast(str.size()); 11 | UChar32 character = -1; 12 | for (int32_t offset = 0; offset < size_as_int32;) { 13 | U8_NEXT(str.data(), offset, size_as_int32, character); 14 | if (character < 0) { 15 | std::cerr << "Bad UTF8 " << str.data()[offset] << " in " << str << std::endl; 16 | return true; 17 | } 18 | UErrorCode err = UErrorCode(); 19 | UScriptCode code = uscript_getScript(character, &err); 20 | if (err) { 21 | std::cerr << u_errorName(err) << std::endl; 22 | exit(1); 23 | } 24 | if (code == USCRIPT_LATIN || code == USCRIPT_COMMON) { 25 | return false; 26 | } 27 | } 28 | return true; 29 | } 30 | 31 | int main() { 32 | std::string line; 33 | while (std::getline(std::cin, line)) { 34 | bool rest = false; 35 | for (util::TokenIter i(line, ' '); i; ++i) { 36 | if (!RemoveWord(*i)) { 37 | if (rest) std::cout << ' '; 38 | rest = true; 39 | std::cout << *i; 40 | } 41 | } 42 | std::cout << '\n'; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /Utilities/Tokenization/Moses/README: -------------------------------------------------------------------------------- 1 | Copied from Moses, LGPL license 2 | -------------------------------------------------------------------------------- /Utilities/Tuning/zmert.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kpu/MEMT/cfd150b33c33320ee74d643a23e8e909f77a2994/Utilities/Tuning/zmert.jar -------------------------------------------------------------------------------- /Utilities/queue.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'thread' 3 | 4 | queue = Queue.new 5 | number = ARGV[0] ? ARGV[0].to_i : 8 6 | 7 | threads = [] 8 | number.times do |i| 9 | threads << Thread.new do 10 | while (command = queue.pop) 11 | puts "Thread #{i} running #{command.chomp}. #{queue.size} remaining." 12 | system(command) 13 | end 14 | end 15 | end 16 | 17 | while (str = $stdin.gets) do 18 | queue << str 19 | puts "#{queue.size} remaining." 20 | end 21 | 22 | threads.each { queue << nil } 23 | threads.each { |t| t.join } 24 | -------------------------------------------------------------------------------- /Utilities/scoring/INSTALL: -------------------------------------------------------------------------------- 1 | Run ./setup.sh . When that finishes, run ./score.rb for usage instructions. 2 | 3 | Due to licensing issues, the actual metrics are not distributed in the same tarball. ./setup.sh will download, untar, and compile them in the case of METEOR. 4 | 5 | Run time dependencies are Ruby (scoring script), Perl (detokenizer and BLEU), and Java (METEOR and TER). 6 | 7 | At build time you also need Python (METEOR phrase table extraction) and Ant (to build METEOR). 8 | -------------------------------------------------------------------------------- /Utilities/scoring/LICENSE: -------------------------------------------------------------------------------- 1 | Everything except lib/shell.rb is LGPL provided in COPYING.LESSER. lib/shell.rb includes its own license at the top. 2 | -------------------------------------------------------------------------------- /Utilities/scoring/README: -------------------------------------------------------------------------------- 1 | This package makes it easier to score machine translation output using multiple metrics. Currently, it supports BLEU, NIST, TER, and METEOR. 2 | 3 | Installation is simple: run ./setup.sh which installs all the metrics. Then run ./score.rb . See INSTALL for more detailed directions. 4 | 5 | Contact heafield+scoring at cs.cmu.edu 6 | -------------------------------------------------------------------------------- /Utilities/scoring/interlace.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | #Standalone program to interlace multiple files. Useful to make files for --refs-laced 3 | files = ARGV.map { |f| File.new(f) } 4 | loop do 5 | lines = files.map { |f| f.gets }.compact 6 | break if lines.empty? 7 | throw "Unequal number of lines" unless lines.size == files.size 8 | lines.each do |l| 9 | $stdout.puts l 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /Utilities/scoring/lib/length.rb: -------------------------------------------------------------------------------- 1 | # Carnegie Mellon University 2 | # Copyright (c) 2009 3 | # All Rights Reserved. 4 | # 5 | # Any use of this software must follow the terms 6 | # outlined in the included LICENSE file. 7 | # 8 | 9 | #This computes the average over hypotheses of the ratio of hypothesis length to average corresponding reference length. 10 | 11 | def count_words(str) 12 | str.split(' ').size 13 | end 14 | 15 | def score_length(request) 16 | sum = 0.0 17 | ref_count = request.ref.number.to_f 18 | request.hyp.lines.each_index do |i| 19 | hyp_length = count_words(request.hyp.lines[i]) 20 | ref_sum = 0 21 | request.ref.range(i).each do |r| 22 | ref_sum += count_words(request.ref.laced_lines[r]) 23 | end 24 | sum += (hyp_length.to_f / (ref_sum.to_f / ref_count)) 25 | end 26 | sum / request.hyp.lines.size.to_f 27 | end 28 | -------------------------------------------------------------------------------- /Utilities/scoring/lib/meteorify.rb: -------------------------------------------------------------------------------- 1 | # Carnegie Mellon University 2 | # Copyright (c) 2009 3 | # All Rights Reserved. 4 | # 5 | # Any use of this software must follow the terms 6 | # outlined in the included LICENSE file. 7 | # 8 | 9 | require SCORE_DIR + '/lib/shell_escape' 10 | 11 | def parse_meteor(line, expression) 12 | matched = line.match(expression) 13 | throw "Meteor line #{line.inspect} does not match #{expression}." unless matched 14 | matched[1].to_f 15 | end 16 | 17 | def score_meteor(request) 18 | output_file = request.output.perm("meteor_out") 19 | system_with_redirect(["java", "-jar", SCORE_DIR + "/meteor-1.0/dist/meteor-1.0/meteor.jar", request.hyp.file_name, request.ref.laced_name, "-r", request.ref.number.to_s, "-normalize", "-l", request.language, "-t", request.task], nil, output_file) 20 | score_lines = File.new(output_file).readlines 21 | throw "Meteor output should be at least 7 lines" unless score_lines.size > 7 22 | [parse_meteor(score_lines[-1], /^Final score:\t\t([0-9]*\.[0-9]*)$/), parse_meteor(score_lines[-7], /^Precision:\t\t([0-9]*\.[0-9]*)$/), parse_meteor(score_lines[-6], /^Recall:\t\t\t([0-9]*\.[0-9]*)$/)].map do |n| 23 | n.to_f 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /Utilities/scoring/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Carnegie Mellon University 4 | # Copyright (c) 2009 5 | # All Rights Reserved. 6 | # 7 | # Any use of this software must follow the terms 8 | # outlined in the included LICENSE file. 9 | # 10 | function error() { echo Setup failed.; exit 1; } 11 | [ -f mteval-v13.pl ] || wget ftp://jaguar.ncsl.nist.gov/mt/resources/mteval-v13.pl || error 12 | [ -f meteor-1.0.tgz ] || wget http://www.cs.cmu.edu/~alavie/METEOR/download/meteor-1.0.tgz || error 13 | [ -f tercom-0.7.25.tgz ] || wget http://www.cs.umd.edu/~snover/tercom/tercom-0.7.25.tgz || error 14 | [ -f terp-pt.v1.tgz ] || wget http://web.archive.org/web/20120608122411/http://www.umiacs.umd.edu/~snover/terp/downloads/terp-pt.v1.tgz || error 15 | trap error ERR 16 | chmod +x mteval-v13.pl 17 | tar xzf meteor-1.0.tgz 18 | tar xzf tercom-0.7.25.tgz 19 | tar xzf terp-pt.v1.tgz 20 | pushd meteor-1.0 21 | ./scripts/create_paraphrase_file.py . ../terp-pt.v1/unfiltered_phrasetable.txt 22 | ant 23 | popd 24 | 25 | chmod +x score.rb 26 | 27 | echo All setup. Licenses for the various metrics you just downloaded are in mteval-v13.pl, tercom-0.7.25/LICENSE.txt, terp-pt.v1/LICENSE.txt, and meteor-1.0/files/LICENSE . 28 | -------------------------------------------------------------------------------- /bjam: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | if 4 | bjam="$(which bjam 2>/dev/null)" && #exists 5 | [ ${#bjam} != 0 ] && #paranoia about which printing nothing then returning true 6 | ! grep UFIHGUFIHBDJKNCFZXAEVA "${bjam}" /dev/null && #bjam in path isn't this script 7 | "${bjam}" --help >/dev/null 2>/dev/null && #bjam in path isn't broken (i.e. has boost-build) 8 | "${bjam}" --version |grep "Boost.Build 201" >/dev/null 2>/dev/null #It's recent enough. 9 | then 10 | #Delegate to system bjam 11 | exec "${bjam}" "$@" 12 | fi 13 | 14 | top="$(dirname "$0")" 15 | if [ ! -x "$top"/jam-files/bjam ]; then 16 | pushd "$top/jam-files/engine" 17 | ./build.sh 18 | cp -f bin.*/bjam ../bjam 19 | popd 20 | fi 21 | 22 | export BOOST_BUILD_PATH="$top"/jam-files/boost-build 23 | exec "$top"/jam-files/bjam "$@" 24 | -------------------------------------------------------------------------------- /install/README: -------------------------------------------------------------------------------- 1 | This automates installation of packages from source into a prefix directory. 2 | Command line is ./install.sh /some/prefix/directory "list of packages in order" -jparallelism where parallelism is the number of processors to use. 3 | You probably want to set the environment variables CFLAGS and CXXFLAGS to something efficient. See http://en.gentoo-wiki.com/wiki/Safe_Cflags for guidance. 4 | 5 | Once installed, you can run either 6 | source /some/prefix/directory/environment.bash 7 | or 8 | source /some/prefix/directory/environment.tcsh 9 | depending on your shell to setup the environment to use the installed packages. 10 | 11 | For MEMT's dependencies, run ./install.sh /some/prefix/directory "icu boost ruby" -jparallelism 12 | 13 | Dependencies: 14 | MEMT decoder source code (provided in memt.tar.gz) -> icu boost 15 | MEMT tuning scripts (provided in memt.tar.gz) -> ruby 16 | boost -> icu 17 | 18 | Note that there is no separate make and make install as some packages' make depends on other packages' make install, so there would be several iterations. 19 | -------------------------------------------------------------------------------- /install/ant.sh: -------------------------------------------------------------------------------- 1 | . lib.sh 2 | VERSION_ANT=1.7.1 3 | 4 | download_ant() { 5 | download http://archive.apache.org/dist/ant/binaries/apache-ant-${VERSION_ANT}-bin.tar.bz2 6 | } 7 | 8 | compile_ant() { 9 | [ -d apache-ant-${VERSION_ANT} ] && rm -rf apache-ant-${VERSION_ANT} 10 | chk tar xjf apache-ant-${VERSION_ANT}-bin.tar.bz2 11 | chk pushd apache-ant-${VERSION_ANT} 12 | chk popd 13 | } 14 | 15 | install_ant() { 16 | chk pushd apache-ant-${VERSION_ANT} 17 | chk cp -a bin/* $PREFIX/bin 18 | chk cp -a lib/* $PREFIX/lib 19 | chk popd 20 | } 21 | -------------------------------------------------------------------------------- /install/apache-ant-1.7.1-bin.tar.bz2.md5: -------------------------------------------------------------------------------- 1 | 9330447f3763b87570dd1118c49a8efd apache-ant-1.7.1-bin.tar.bz2 2 | -------------------------------------------------------------------------------- /install/apache-ant-1.7.1-bin.tar.bz2.sha1: -------------------------------------------------------------------------------- 1 | b078ba89301687662f100da7b059105f32004f7c apache-ant-1.7.1-bin.tar.bz2 2 | -------------------------------------------------------------------------------- /install/boost.sh: -------------------------------------------------------------------------------- 1 | . lib.sh 2 | BOOST_DOT_VERSION=1.49.0 3 | BOOST_JUST_VERSION=1_49 4 | BOOST_SHORT_VERSION=boost_${BOOST_JUST_VERSION} 5 | BOOST_VERSION=boost_1_49_0 6 | download_boost() { 7 | download http://downloads.sourceforge.net/project/boost/boost/$BOOST_DOT_VERSION/${BOOST_VERSION}.tar.bz2 8 | } 9 | 10 | #Depends on ICU installed 11 | compile_boost() { 12 | tar xjf $BOOST_VERSION.tar.bz2 || fatal "Extract boost tarball" 13 | pushd $BOOST_VERSION || fatal "cd to boost directory" 14 | 15 | #Boost C++ libraries 16 | ./bootstrap.sh --prefix=$PREFIX --libdir=$PREFIX/lib --with-icu=$PREFIX || fatal "Failed to configure boost. Is ICU installed properly?" 17 | chk ./b2 --prefix=$PREFIX --libdir=$PREFIX/lib --layout=tagged link=static,shared threading=single,multi $PARALLEL 18 | 19 | popd 20 | } 21 | 22 | install_boost() { 23 | chk pushd $BOOST_VERSION 24 | 25 | #Boost C++ libraries 26 | chk ./b2 --prefix=$PREFIX --libdir=$PREFIX/lib --layout=tagged link=static,shared threading=single,multi $PARALLEL install || fatal "install boost" 27 | popd 28 | } 29 | -------------------------------------------------------------------------------- /install/boost_1_49_0.tar.bz2.md5: -------------------------------------------------------------------------------- 1 | 0d202cb811f934282dea64856a175698 boost_1_49_0.tar.bz2 2 | -------------------------------------------------------------------------------- /install/boost_1_49_0.tar.bz2.sha1: -------------------------------------------------------------------------------- 1 | 26a52840e9d12f829e3008589abf0a925ce88524 boost_1_49_0.tar.bz2 2 | -------------------------------------------------------------------------------- /install/checksum.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | md5sum "$1" >"$1".md5 3 | sha1sum "$1" >"$1".sha1 4 | -------------------------------------------------------------------------------- /install/environment.bash: -------------------------------------------------------------------------------- 1 | #Prepend to path variables, avoiding a trailing colon if initially empty 2 | #This used to be a function, but people thought that implementation was too complicated 3 | export PATH=$PREFIX/bin${PATH:+:$PATH} 4 | export LD_LIBRARY_PATH=$PREFIX/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} 5 | export LIBRARY_PATH=$PREFIX/lib${LIBRARY_PATH:+:$LIBRARY_PATH} 6 | export CPATH=$PREFIX/include${CPATH:+:$CPATH} 7 | export BOOST_BUILD_PATH=$PREFIX/share/boost-build 8 | export CLASSPATH=$PREFIX/classpath/zmert.jar:${CLASSPATH:+:$CLASSPATH} 9 | export ANT_HOME=$PREFIX 10 | -------------------------------------------------------------------------------- /install/environment.tcsh: -------------------------------------------------------------------------------- 1 | if ( $?PATH == 0 ) then 2 | setenv PATH $PREFIX/bin 3 | else 4 | setenv PATH $PREFIX/bin:$PATH 5 | endif 6 | if ( $?path == 0 ) then 7 | set path = $PREFIX/bin 8 | else 9 | set path = ($PREFIX/bin $path) 10 | endif 11 | if ( $?LD_LIBRARY_PATH == 0 ) then 12 | setenv LD_LIBRARY_PATH $PREFIX/lib 13 | else 14 | setenv LD_LIBRARY_PATH $PREFIX/lib:$LD_LIBRARY_PATH 15 | endif 16 | if ( $?LIBRARY_PATH == 0 ) then 17 | setenv LIBRARY_PATH $PREFIX/lib 18 | else 19 | setenv LIBRARY_PATH $PREFIX/lib:$LIBRARY_PATH 20 | endif 21 | if ( $?CPATH == 0 ) then 22 | setenv CPATH $PREFIX/include 23 | else 24 | setenv CPATH $PREFIX/include:$CPATH 25 | endif 26 | setenv BOOST_BUILD_PATH $PREFIX/share/boost-build 27 | setenv ANT_HOME $PREFIX 28 | -------------------------------------------------------------------------------- /install/icu.sh: -------------------------------------------------------------------------------- 1 | . lib.sh 2 | 3 | download_icu() { 4 | download http://download.icu-project.org/files/icu4c/4.6.1/icu4c-4_6_1-src.tgz 5 | } 6 | 7 | compile_icu() { 8 | [ -d icu ] && chk rm -rf icu 9 | chk tar xzvf icu4c-4_6_1-src.tgz 10 | [ -d icu ] || fatal "ICU did not extract to icu directory" 11 | chk pushd icu/source 12 | ./configure --prefix=$PREFIX || fatal "Configuring ICU failed" 13 | make || fatal "Making ICU failed" #No PARALLEL because I've seen them break ICU 14 | chk popd 15 | } 16 | 17 | install_icu() { 18 | chk pushd icu/source 19 | chk make install 20 | popd 21 | } 22 | -------------------------------------------------------------------------------- /install/icu4c-4_6_1-src.tgz.md5: -------------------------------------------------------------------------------- 1 | da64675d85f0c2191cef93a8cb5eea88 icu4c-4_6_1-src.tgz 2 | -------------------------------------------------------------------------------- /install/icu4c-4_6_1-src.tgz.sha1: -------------------------------------------------------------------------------- 1 | b8bbf80dff1727a7528f9601b0502db1633658c3 icu4c-4_6_1-src.tgz 2 | -------------------------------------------------------------------------------- /install/lib.sh: -------------------------------------------------------------------------------- 1 | fatal() { 2 | echo Error: $1 1>&2 3 | exit 1 4 | } 5 | 6 | chk() { 7 | "$@" || fatal "cd \"$PWD\" && $*" 8 | } 9 | 10 | checksum() { 11 | [ -f "$1.md5" ] && chk md5sum -c "$1.md5" 12 | [ -f "$1.sha1" ] && chk sha1sum -c "$1.sha1" 13 | } 14 | 15 | download() { 16 | [ -f $(basename $1) ] || chk wget $1 17 | checksum $(basename $1) 18 | } 19 | 20 | -------------------------------------------------------------------------------- /install/ruby-1.9.1-p376.tar.gz.md5: -------------------------------------------------------------------------------- 1 | ebb20550a11e7f1a2fbd6fdec2a3e0a3 ruby-1.9.1-p376.tar.gz 2 | -------------------------------------------------------------------------------- /install/ruby-1.9.1-p376.tar.gz.sha1: -------------------------------------------------------------------------------- 1 | 05a520c97a4528951139a1efe9f4933dd4661adb ruby-1.9.1-p376.tar.gz 2 | -------------------------------------------------------------------------------- /install/ruby-1.9.2-p0.tar.gz.md5: -------------------------------------------------------------------------------- 1 | 755aba44607c580fddc25e7c89260460 ruby-1.9.2-p0.tar.gz 2 | -------------------------------------------------------------------------------- /install/ruby-1.9.2-p0.tar.gz.sha1: -------------------------------------------------------------------------------- 1 | 9d79ebbf929e2f6c251fe7a9614b96a3d2427b1c ruby-1.9.2-p0.tar.gz 2 | -------------------------------------------------------------------------------- /install/ruby.sh: -------------------------------------------------------------------------------- 1 | . lib.sh 2 | 3 | VERSION_RUBY=1.9.2-p0 4 | download_ruby() { 5 | download ftp://ftp.ruby-lang.org/pub/ruby/1.9/ruby-${VERSION_RUBY}.tar.gz 6 | } 7 | 8 | compile_ruby() { 9 | [ -d ruby-${VERSION_RUBY} ] && chk rm -rf ruby-${VERSION_RUBY} 10 | chk tar xzvf ruby-${VERSION_RUBY}.tar.gz 11 | chk pushd ruby-${VERSION_RUBY} 12 | chk ./configure --prefix=$PREFIX 13 | chk make -j4 14 | chk popd 15 | } 16 | 17 | install_ruby() { 18 | chk pushd ruby-${VERSION_RUBY} 19 | chk make install 20 | chk popd 21 | } 22 | -------------------------------------------------------------------------------- /install/tercom-0.7.25.tgz.md5: -------------------------------------------------------------------------------- 1 | be1c818e48a782764f2c884d7ee6431c tercom-0.7.25.tgz 2 | -------------------------------------------------------------------------------- /install/tercom-0.7.25.tgz.sha1: -------------------------------------------------------------------------------- 1 | d1545e873c86ea0fb3ce2e3744ec3d48508f5495 tercom-0.7.25.tgz 2 | -------------------------------------------------------------------------------- /install/tercom.sh: -------------------------------------------------------------------------------- 1 | . lib.sh 2 | 3 | VERSION_TERCOM=0.7.25 4 | 5 | download_tercom() { 6 | download http://www.cs.umd.edu/~snover/tercom/tercom-$VERSION_TERCOM.tgz 7 | } 8 | 9 | compile_tercom() { 10 | [ -d tercom-$VERSION_TERCOM ] && chk rm -rf tercom-$VERSION_TERCOM 11 | chk tar xzf tercom-$VERSION_TERCOM.tgz 12 | } 13 | 14 | install_tercom() { 15 | chk pushd tercom-$VERSION_TERCOM 16 | chk mkdir -p $PREFIX/classpath 17 | chk cp tercom.7.25.jar $PREFIX/classpath 18 | chk popd 19 | } 20 | -------------------------------------------------------------------------------- /install/zmert.sh: -------------------------------------------------------------------------------- 1 | . lib.sh 2 | 3 | ZMERT_VERSION=zmert_v1.41 4 | download_zmert() { 5 | download http://www.cs.jhu.edu/~ozaidan/zmert/$ZMERT_VERSION.zip 6 | } 7 | 8 | compile_zmert() { 9 | [ -d $ZMERT_VERSION ] && chk rm -rf $ZMERT_VERSION 10 | chk unzip $ZMERT_VERSION 11 | } 12 | 13 | install_zmert() { 14 | [ -d $PREFIX/classpath ] || chk mkdir $PREFIX/classpath 15 | chk cp $ZMERT_VERSION/lib/zmert.jar $PREFIX/classpath/ 16 | } 17 | -------------------------------------------------------------------------------- /jam-files/LICENSE_1_0.txt: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /jam-files/boost-build/boost-build.jam: -------------------------------------------------------------------------------- 1 | # Copyright 2001, 2002 Dave Abrahams 2 | # Copyright 2002 Rene Rivera 3 | # Copyright 2003 Vladimir Prus 4 | # Distributed under the Boost Software License, Version 1.0. 5 | # (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | 8 | boost-build kernel ; 9 | -------------------------------------------------------------------------------- /jam-files/boost-build/bootstrap.jam: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2003 Vladimir Prus. 2 | # 3 | # Use, modification and distribution is subject to the Boost Software 4 | # License Version 1.0. (See accompanying file LICENSE_1_0.txt or 5 | # http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | # This file handles initial phase of Boost.Build loading. 8 | # Boost.Jam has already figured out where Boost.Build is 9 | # and loads this file, which is responsible for initialization 10 | # of basic facilities such a module system and loading the 11 | # main Boost.Build module, build-system.jam. 12 | # 13 | # Exact operation of this module is not interesting, it makes 14 | # sense to look at build-system.jam right away. 15 | 16 | # Load the kernel/bootstrap.jam, which does all the work. 17 | .bootstrap-file = $(.bootstrap-file:D)/kernel/bootstrap.jam ; 18 | include $(.bootstrap-file) ; -------------------------------------------------------------------------------- /jam-files/boost-build/build/readme.txt: -------------------------------------------------------------------------------- 1 | Copyright 2001, 2002 Dave Abrahams 2 | Copyright 2002 Vladimir Prus 3 | Distributed under the Boost Software License, Version 1.0. 4 | (See accompanying file LICENSE_1_0.txt or copy at 5 | http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | Development code for new build system. To run unit tests for jam code, execute: 8 | 9 | bjam --debug --build-system=test 10 | 11 | Comprehensive tests require Python. See ../test/readme.txt 12 | -------------------------------------------------------------------------------- /jam-files/boost-build/kernel/boost-build.jam: -------------------------------------------------------------------------------- 1 | # Copyright 2003 Dave Abrahams 2 | # Distributed under the Boost Software License, Version 1.0. 3 | # (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | boost-build . ; 6 | -------------------------------------------------------------------------------- /jam-files/boost-build/site-config.jam: -------------------------------------------------------------------------------- 1 | # Copyright 1999-2012 Gentoo Foundation 2 | # Distributed under the Boost Software License, Version 1.0. 3 | # (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | # Define two new variants to be used when building boost (or separate boost-libs) 6 | # on Gentoo. The two variants make use of Gentoo-specific optimization and debug-symbols 7 | # values "none" which are not part of the official boost distribution. 8 | # DO NOT RELY ON THE FOLLOWING VARIANTS TO BE PRESENT ON OTHER OS! 9 | variant gentoorelease : release : none none shared ; 10 | variant gentoodebug : debug : none on shared ; 11 | 12 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/bison.jam: -------------------------------------------------------------------------------- 1 | # Copyright 2003 Vladimir Prus 2 | # Distributed under the Boost Software License, Version 1.0. 3 | # (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | import generators ; 6 | import feature ; 7 | import type ; 8 | import property ; 9 | 10 | feature.feature bison.prefix : : free ; 11 | type.register Y : y ; 12 | type.register YY : yy ; 13 | generators.register-standard bison.bison : Y : C H ; 14 | generators.register-standard bison.bison : YY : CPP HPP ; 15 | 16 | rule init ( ) 17 | { 18 | } 19 | 20 | rule bison ( dst dst_header : src : properties * ) 21 | { 22 | local r = [ property.select bison.prefix : $(properties) ] ; 23 | if $(r) 24 | { 25 | PREFIX_OPT on $(<) = -p $(r:G=) ; 26 | } 27 | } 28 | 29 | actions bison 30 | { 31 | bison $(PREFIX_OPT) -d -o $(<[1]) $(>) 32 | } 33 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/boostbook-config.jam: -------------------------------------------------------------------------------- 1 | #~ Copyright 2005 Rene Rivera. 2 | #~ Distributed under the Boost Software License, Version 1.0. 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | # Automatic configuration for BoostBook tools. To use, just import this module. 6 | # 7 | # This module is deprecated. 8 | # using boostbook ; 9 | # with no arguments now suffices. 10 | 11 | import toolset : using ; 12 | 13 | using boostbook ; 14 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/clang.jam: -------------------------------------------------------------------------------- 1 | # Distributed under the Boost Software License, Version 1.0. 2 | # (See accompanying file LICENSE_1_0.txt 3 | # or copy at http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | # This is a generic 'clang' toolset. Depending on the current system, it 6 | # forwards either to 'clang-unix' or 'clang-darwin' modules. 7 | 8 | import feature ; 9 | import os ; 10 | import toolset ; 11 | 12 | feature.extend toolset : clang ; 13 | feature.subfeature toolset clang : platform : : propagated link-incompatible ; 14 | 15 | rule init ( * : * ) 16 | { 17 | if [ os.name ] = MACOSX 18 | { 19 | toolset.using clang-darwin : 20 | $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; 21 | } 22 | else 23 | { 24 | toolset.using clang-linux : 25 | $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/como.jam: -------------------------------------------------------------------------------- 1 | # Copyright Vladimir Prus 2004. 2 | # Distributed under the Boost Software License, Version 1.0. 3 | # (See accompanying file LICENSE_1_0.txt 4 | # or copy at http://www.boost.org/LICENSE_1_0.txt) 5 | 6 | # This is a generic 'como' toolset. Depending on the current system, it 7 | # forwards either to 'como-linux' or 'como-win' modules. 8 | 9 | import feature ; 10 | import os ; 11 | import toolset ; 12 | 13 | feature.extend toolset : como ; 14 | feature.subfeature toolset como : platform : : propagated link-incompatible ; 15 | 16 | rule init ( * : * ) 17 | { 18 | if [ os.name ] = LINUX 19 | { 20 | toolset.using como-linux : 21 | $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; 22 | } 23 | else 24 | { 25 | toolset.using como-win : 26 | $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; 27 | 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/cw-config.jam: -------------------------------------------------------------------------------- 1 | #~ Copyright 2005 Rene Rivera. 2 | #~ Distributed under the Boost Software License, Version 1.0. 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | # Automatic configuration for CodeWarrior toolset. To use, just import this module. 6 | 7 | import os ; 8 | import toolset : using ; 9 | 10 | if [ os.name ] = NT 11 | { 12 | for local R in 9 8 7 13 | { 14 | local cw-path = [ W32_GETREG 15 | "HKEY_LOCAL_MACHINE\\SOFTWARE\\Metrowerks\\CodeWarrior\\Product Versions\\CodeWarrior for Windows R$(R)" 16 | : "PATH" ] ; 17 | local cw-version = [ W32_GETREG 18 | "HKEY_LOCAL_MACHINE\\SOFTWARE\\Metrowerks\\CodeWarrior\\Product Versions\\CodeWarrior for Windows R$(R)" 19 | : "VERSION" ] ; 20 | cw-path ?= [ W32_GETREG 21 | "HKEY_LOCAL_MACHINE\\SOFTWARE\\Metrowerks\\CodeWarrior for Windows\\$(R).0" 22 | : "PATH" ] ; 23 | cw-version ?= $(R).0 ; 24 | 25 | if $(cw-path) 26 | { 27 | if --debug-configuration in [ modules.peek : ARGV ] 28 | { 29 | ECHO "notice:" using cw ":" $(cw-version) ":" "$(cw-path)\\Other Metrowerks Tools\\Command Line Tools\\mwcc.exe" ; 30 | } 31 | using cw : $(cw-version) : "$(cw-path)\\Other Metrowerks Tools\\Command Line Tools\\mwcc.exe" ; 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/doxygen-config.jam: -------------------------------------------------------------------------------- 1 | #~ Copyright 2005, 2006 Rene Rivera. 2 | #~ Distributed under the Boost Software License, Version 1.0. 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | # Automatic configuration for Doxygen tools. To use, just import this module. 6 | 7 | import toolset : using ; 8 | 9 | ECHO "warning: doxygen-config.jam is deprecated. Use 'using doxygen ;' instead." ; 10 | 11 | using doxygen ; 12 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/doxygen/windows-paths-check.doxyfile: -------------------------------------------------------------------------------- 1 | INPUT = windows-paths-check.hpp 2 | GENERATE_HTML = NO 3 | GENERATE_LATEX = NO 4 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/doxygen/windows-paths-check.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kpu/MEMT/cfd150b33c33320ee74d643a23e8e909f77a2994/jam-files/boost-build/tools/doxygen/windows-paths-check.hpp -------------------------------------------------------------------------------- /jam-files/boost-build/tools/gfortran.jam: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2004 Toon Knapen 2 | # 3 | # Use, modification and distribution is subject to the Boost Software 4 | # License Version 1.0. (See accompanying file LICENSE_1_0.txt or 5 | # http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | import toolset : flags ; 8 | import feature ; 9 | import fortran ; 10 | 11 | rule init ( version ? : command * : options * ) 12 | { 13 | } 14 | 15 | # Declare flags and action for compilation 16 | flags gfortran OPTIONS ; 17 | 18 | flags gfortran OPTIONS off : -O0 ; 19 | flags gfortran OPTIONS speed : -O3 ; 20 | flags gfortran OPTIONS space : -Os ; 21 | 22 | flags gfortran OPTIONS on : -g ; 23 | flags gfortran OPTIONS on : -pg ; 24 | 25 | flags gfortran OPTIONS shared/LIB : -fPIC ; 26 | 27 | flags gfortran DEFINES ; 28 | flags gfortran INCLUDES ; 29 | 30 | rule compile.fortran 31 | { 32 | } 33 | 34 | actions compile.fortran 35 | { 36 | gcc -Wall $(OPTIONS) -D$(DEFINES) -I$(INCLUDES) -c -o "$(<)" "$(>)" 37 | } 38 | 39 | generators.register-fortran-compiler gfortran.compile.fortran : FORTRAN FORTRAN90 : OBJ ; 40 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/hpfortran.jam: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2004 Toon Knapen 2 | # 3 | # Use, modification and distribution is subject to the Boost Software 4 | # License Version 1.0. (See accompanying file LICENSE_1_0.txt or 5 | # http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | import toolset : flags ; 8 | import feature ; 9 | import fortran ; 10 | 11 | rule init ( version ? : command * : options * ) 12 | { 13 | } 14 | 15 | # Declare flags and action for compilation 16 | flags hpfortran OPTIONS off : -O0 ; 17 | flags hpfortran OPTIONS speed : -O3 ; 18 | flags hpfortran OPTIONS space : -O1 ; 19 | 20 | flags hpfortran OPTIONS on : -g ; 21 | flags hpfortran OPTIONS on : -pg ; 22 | 23 | flags hpfortran DEFINES ; 24 | flags hpfortran INCLUDES ; 25 | 26 | rule compile.fortran 27 | { 28 | } 29 | 30 | actions compile.fortran 31 | { 32 | f77 +DD64 $(OPTIONS) -D$(DEFINES) -I$(INCLUDES) -c -o "$(<)" "$(>)" 33 | } 34 | 35 | generators.register-fortran-compiler hpfortran.compile.fortran : FORTRAN : OBJ ; 36 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/intel.jam: -------------------------------------------------------------------------------- 1 | # Copyright Vladimir Prus 2004. 2 | # Distributed under the Boost Software License, Version 1.0. 3 | # (See accompanying file LICENSE_1_0.txt 4 | # or copy at http://www.boost.org/LICENSE_1_0.txt) 5 | 6 | # This is a generic 'intel' toolset. Depending on the current 7 | # system, it forwards either to 'intel-linux' or 'intel-win' 8 | # modules. 9 | 10 | import feature ; 11 | import os ; 12 | import toolset ; 13 | 14 | feature.extend toolset : intel ; 15 | feature.subfeature toolset intel : platform : : propagated link-incompatible ; 16 | 17 | rule init ( * : * ) 18 | { 19 | if [ os.name ] = LINUX 20 | { 21 | toolset.using intel-linux : 22 | $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; 23 | } 24 | else if [ os.name ] = MACOSX 25 | { 26 | toolset.using intel-darwin : 27 | $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; 28 | } 29 | else 30 | { 31 | toolset.using intel-win : 32 | $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/lex.jam: -------------------------------------------------------------------------------- 1 | # Copyright 2003 Vladimir Prus 2 | # Distributed under the Boost Software License, Version 1.0. 3 | # (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | import type ; 6 | import generators ; 7 | import feature ; 8 | import property ; 9 | 10 | 11 | feature.feature flex.prefix : : free ; 12 | type.register LEX : l ; 13 | type.register LEX++ : ll ; 14 | generators.register-standard lex.lex : LEX : C ; 15 | generators.register-standard lex.lex : LEX++ : CPP ; 16 | 17 | rule init ( ) 18 | { 19 | } 20 | 21 | rule lex ( target : source : properties * ) 22 | { 23 | local r = [ property.select flex.prefix : $(properties) ] ; 24 | if $(r) 25 | { 26 | PREFIX on $(<) = $(r:G=) ; 27 | } 28 | } 29 | 30 | actions lex 31 | { 32 | flex -P$(PREFIX) -o$(<) $(>) 33 | } 34 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/mc.jam: -------------------------------------------------------------------------------- 1 | #~ Copyright 2005 Alexey Pakhunov. 2 | #~ Distributed under the Boost Software License, Version 1.0. 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | # Support for Microsoft message compiler tool. 6 | # Notes: 7 | # - there's just message compiler tool, there's no tool for 8 | # extracting message strings from sources 9 | # - This file allows to use Microsoft message compiler 10 | # with any toolset. In msvc.jam, there's more specific 11 | # message compiling action. 12 | 13 | import common ; 14 | import generators ; 15 | import feature : feature get-values ; 16 | import toolset : flags ; 17 | import type ; 18 | import rc ; 19 | 20 | rule init ( ) 21 | { 22 | } 23 | 24 | type.register MC : mc ; 25 | 26 | 27 | # Command line options 28 | feature mc-input-encoding : ansi unicode : free ; 29 | feature mc-output-encoding : unicode ansi : free ; 30 | feature mc-set-customer-bit : no yes : free ; 31 | 32 | flags mc.compile MCFLAGS ansi : -a ; 33 | flags mc.compile MCFLAGS unicode : -u ; 34 | flags mc.compile MCFLAGS ansi : -A ; 35 | flags mc.compile MCFLAGS unicode : -U ; 36 | flags mc.compile MCFLAGS no : ; 37 | flags mc.compile MCFLAGS yes : -c ; 38 | 39 | generators.register-standard mc.compile : MC : H RC ; 40 | 41 | actions compile 42 | { 43 | mc $(MCFLAGS) -h "$(<[1]:DW)" -r "$(<[2]:DW)" "$(>:W)" 44 | } 45 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/msvc-config.jam: -------------------------------------------------------------------------------- 1 | #~ Copyright 2005 Rene Rivera. 2 | #~ Distributed under the Boost Software License, Version 1.0. 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | # Automatic configuration for VisualStudio toolset. To use, just import this module. 6 | 7 | import toolset : using ; 8 | 9 | ECHO "warning: msvc-config.jam is deprecated. Use 'using msvc : all ;' instead." ; 10 | 11 | using msvc : all ; 12 | 13 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/python-config.jam: -------------------------------------------------------------------------------- 1 | #~ Copyright 2005 Rene Rivera. 2 | #~ Distributed under the Boost Software License, Version 1.0. 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | # Automatic configuration for Python tools and librries. To use, just import this module. 6 | 7 | import os ; 8 | import toolset : using ; 9 | 10 | if [ os.name ] = NT 11 | { 12 | for local R in 2.4 2.3 2.2 13 | { 14 | local python-path = [ W32_GETREG 15 | "HKEY_LOCAL_MACHINE\\SOFTWARE\\Python\\PythonCore\\$(R)\\InstallPath" ] ; 16 | local python-version = $(R) ; 17 | 18 | if $(python-path) 19 | { 20 | if --debug-configuration in [ modules.peek : ARGV ] 21 | { 22 | ECHO "notice:" using python ":" $(python-version) ":" $(python-path) ; 23 | } 24 | using python : $(python-version) : $(python-path) ; 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/qt.jam: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2006 Vladimir Prus. 2 | # 3 | # Use, modification and distribution is subject to the Boost Software 4 | # License Version 1.0. (See accompanying file LICENSE_1_0.txt or 5 | # http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | # Forwarning toolset file to Qt GUI library. Forwards to the toolset file 8 | # for the current version of Qt. 9 | 10 | import qt4 ; 11 | 12 | rule init ( prefix : full_bin ? : full_inc ? : full_lib ? : version ? : condition * ) 13 | { 14 | qt4.init $(prefix) : $(full_bin) : $(full_inc) : $(full_lib) : $(version) : $(condition) ; 15 | } 16 | 17 | 18 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/quickbook-config.jam: -------------------------------------------------------------------------------- 1 | #~ Copyright 2005 Rene Rivera. 2 | #~ Distributed under the Boost Software License, Version 1.0. 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | # Automatic configuration for BoostBook tools. To use, just import this module. 6 | 7 | import os ; 8 | import toolset : using ; 9 | 10 | if [ os.name ] = NT 11 | { 12 | local boost-dir = ; 13 | for local R in snapshot cvs 1.33.0 14 | { 15 | boost-dir += [ W32_GETREG 16 | "HKEY_LOCAL_MACHINE\\SOFTWARE\\Boost.org\\$(R)" 17 | : "InstallRoot" ] ; 18 | } 19 | local quickbook-path = [ GLOB "$(boost-dir)\\bin" "\\Boost\\bin" : quickbook.exe ] ; 20 | quickbook-path = $(quickbook-path[1]) ; 21 | 22 | if $(quickbook-path) 23 | { 24 | if --debug-configuration in [ modules.peek : ARGV ] 25 | { 26 | ECHO "notice:" using quickbook ":" $(quickbook-path) ; 27 | } 28 | using quickbook : $(quickbook-path) ; 29 | } 30 | } 31 | else 32 | { 33 | local quickbook-path = [ GLOB "/usr/local/bin" "/usr/bin" "/opt/bin" : quickbook ] ; 34 | quickbook-path = $(quickbook-path[1]) ; 35 | 36 | if $(quickbook-path) 37 | { 38 | if --debug-configuration in [ modules.peek : ARGV ] 39 | { 40 | ECHO "notice:" using quickbook ":" $(quickbook-path) ; 41 | } 42 | using quickbook : $(quickbook-path) ; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/quickbook.jam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kpu/MEMT/cfd150b33c33320ee74d643a23e8e909f77a2994/jam-files/boost-build/tools/quickbook.jam -------------------------------------------------------------------------------- /jam-files/boost-build/tools/types/asm.jam: -------------------------------------------------------------------------------- 1 | # Copyright Craig Rodrigues 2005. Distributed under the Boost 2 | # Software License, Version 1.0. (See accompanying 3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 4 | type ASM : s S asm ; 5 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/types/exe.jam: -------------------------------------------------------------------------------- 1 | # Copyright David Abrahams 2004. Distributed under the Boost 2 | # Software License, Version 1.0. (See accompanying 3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | import type ; 6 | 7 | type.register EXE ; 8 | type.set-generated-target-suffix EXE : windows : "exe" ; 9 | type.set-generated-target-suffix EXE : cygwin : "exe" ; 10 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/types/html.jam: -------------------------------------------------------------------------------- 1 | # Copyright David Abrahams 2004. Distributed under the Boost 2 | # Software License, Version 1.0. (See accompanying 3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 4 | type HTML : html ; 5 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/types/obj.jam: -------------------------------------------------------------------------------- 1 | # Copyright David Abrahams 2004. Distributed under the Boost 2 | # Software License, Version 1.0. (See accompanying 3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | import type ; 6 | 7 | type.register OBJ : o obj ; 8 | type.set-generated-target-suffix OBJ : windows : obj ; 9 | type.set-generated-target-suffix OBJ : cygwin : obj ; 10 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/types/objc.jam: -------------------------------------------------------------------------------- 1 | # Copyright Rene Rivera 2008, 2010. 2 | # Distributed under the Boost Software License, Version 1.0. (See accompanying 3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 4 | import type ; 5 | import scanner ; 6 | import types/cpp ; 7 | 8 | class objc-scanner : c-scanner 9 | { 10 | rule __init__ ( includes * ) 11 | { 12 | c-scanner.__init__ $(includes) ; 13 | } 14 | 15 | rule pattern ( ) 16 | { 17 | return "#[ \t]*include|import[ ]*(<(.*)>|\"(.*)\")" ; 18 | } 19 | } 20 | 21 | scanner.register objc-scanner : include ; 22 | 23 | type.register OBJECTIVE_C : m ; 24 | type.register OBJECTIVE_CPP : mm ; 25 | type.set-scanner OBJECTIVE_C : objc-scanner ; 26 | type.set-scanner OBJECTIVE_CPP : objc-scanner ; 27 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/types/preprocessed.jam: -------------------------------------------------------------------------------- 1 | # Copyright Steven Watanabe 2011 2 | # Distributed under the Boost Software License Version 1.0. (See 3 | # accompanying file LICENSE_1_0.txt or copy at 4 | # http://www.boost.org/LICENSE_1_0.txt) 5 | 6 | import type ; 7 | 8 | type.register PREPROCESSED_C : i : C ; 9 | type.register PREPROCESSED_CPP : ii : CPP ; 10 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/types/qt.jam: -------------------------------------------------------------------------------- 1 | # Copyright Vladimir Prus 2005. Distributed under the Boost 2 | # Software License, Version 1.0. (See accompanying 3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | type UI : ui ; 6 | type QRC : qrc ; 7 | type MOCCABLE_CPP ; 8 | type MOCCABLE_H ; 9 | type MOCCABLE5_CPP ; 10 | type MOCCABLE5_H ; 11 | # Result of running moc. 12 | type MOC : moc : H ; 13 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/types/register.jam: -------------------------------------------------------------------------------- 1 | # Copyright David Abrahams 2004. Distributed under the Boost 2 | # Software License, Version 1.0. (See accompanying 3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | # This module's job is to automatically import all the type 6 | # registration modules in its directory. 7 | import type os path modules ; 8 | 9 | # Register the given type on the specified OSes, or on remaining OSes 10 | # if os is not specified. This rule is injected into each of the type 11 | # modules for the sake of convenience. 12 | local rule type ( type : suffixes * : base-type ? : os * ) 13 | { 14 | if ! [ type.registered $(type) ] 15 | { 16 | if ( ! $(os) ) || [ os.name ] in $(os) 17 | { 18 | type.register $(type) : $(suffixes) : $(base-type) ; 19 | } 20 | } 21 | } 22 | 23 | .this-module's-file = [ modules.binding $(__name__) ] ; 24 | .this-module's-dir = [ path.parent $(.this-module's-file) ] ; 25 | .sibling-jamfiles = [ path.glob $(.this-module's-dir) : *.jam ] ; 26 | .sibling-modules = [ MATCH ^(.*)\.jam$ : $(.sibling-jamfiles) ] ; 27 | 28 | # A loop over all modules in this directory 29 | for m in $(.sibling-modules) 30 | { 31 | m = [ path.basename $(m) ] ; 32 | m = types/$(m) ; 33 | 34 | # Inject the type rule into the new module 35 | IMPORT $(__name__) : type : $(m) : type ; 36 | import $(m) ; 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/types/rsp.jam: -------------------------------------------------------------------------------- 1 | # Copyright David Abrahams 2004. Distributed under the Boost 2 | # Software License, Version 1.0. (See accompanying 3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 4 | type RSP : rsp ; 5 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/xlf.jam: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2004 Toon Knapen 2 | # 3 | # Use, modification and distribution is subject to the Boost Software 4 | # License Version 1.0. (See accompanying file LICENSE_1_0.txt or 5 | # http://www.boost.org/LICENSE_1_0.txt) 6 | 7 | # 8 | # toolset configuration for the IBM Fortran compiler (xlf) 9 | # 10 | 11 | import toolset : flags ; 12 | import feature ; 13 | import fortran ; 14 | 15 | rule init ( version ? : command * : options * ) 16 | { 17 | } 18 | 19 | # Declare flags and action for compilation 20 | flags xlf OPTIONS off : -O0 ; 21 | flags xlf OPTIONS speed : -O3 ; 22 | flags xlf OPTIONS space : -Os ; 23 | 24 | flags xlf OPTIONS on : -g ; 25 | flags xlf OPTIONS on : -pg ; 26 | 27 | flags xlf DEFINES ; 28 | flags xlf INCLUDES ; 29 | 30 | rule compile-fortran 31 | { 32 | } 33 | 34 | actions compile-fortran 35 | { 36 | xlf $(OPTIONS) -I$(INCLUDES) -c -o "$(<)" "$(>)" 37 | } 38 | 39 | generators.register-fortran-compiler xlf.compile-fortran : FORTRAN : OBJ ; 40 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/xsltproc-config.jam: -------------------------------------------------------------------------------- 1 | #~ Copyright 2005 Rene Rivera. 2 | #~ Distributed under the Boost Software License, Version 1.0. 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 4 | 5 | # Automatic configuration for the xsltproc toolset. To use, just import this 6 | # module. 7 | 8 | import os ; 9 | import toolset : using ; 10 | 11 | 12 | local rule locate-executable ( name ) 13 | { 14 | local path = [ modules.peek : PATH ] ; 15 | local exe ; 16 | if [ os.name ] = NT 17 | { 18 | exe = [ GLOB $(path) "C:\\Boost\\bin" : $(name)\.exe ] ; 19 | } 20 | else 21 | { 22 | exe = [ GLOB $(path) : $(name) ] ; 23 | } 24 | return $(exe[1]) ; 25 | } 26 | 27 | 28 | local xsltproc-exe = [ locate-executable xsltproc ] ; 29 | if $(xsltproc-exe) 30 | { 31 | if --debug-configuration in [ modules.peek : ARGV ] 32 | { 33 | ECHO notice: using xsltproc ":" $(xsltproc-exe) ; 34 | } 35 | using xsltproc : $(xsltproc-exe) ; 36 | } 37 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/xsltproc/included.xsl: -------------------------------------------------------------------------------- 1 | 2 | 9 | 11 | 12 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/xsltproc/test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /jam-files/boost-build/tools/xsltproc/test.xsl: -------------------------------------------------------------------------------- 1 | 2 | 9 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /jam-files/engine/boost-no-inspect: -------------------------------------------------------------------------------- 1 | this really out of our hands, so tell inspect to ignore directory -------------------------------------------------------------------------------- /jam-files/engine/class.h: -------------------------------------------------------------------------------- 1 | /* Copyright Vladimir Prus 2003. Distributed under the Boost */ 2 | /* Software License, Version 1.0. (See accompanying */ 3 | /* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */ 4 | 5 | #ifndef CLASS_H_VP_2003_08_01 6 | #define CLASS_H_VP_2003_08_01 7 | 8 | #include "lists.h" 9 | #include "frames.h" 10 | 11 | OBJECT * make_class_module( LIST * xname, LIST * bases, FRAME * frame ); 12 | void class_done( void ); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /jam-files/engine/cwd.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2002. Vladimir Prus 3 | * Distributed under the Boost Software License, Version 1.0. 4 | * (See accompanying file LICENSE_1_0.txt or copy at 5 | * http://www.boost.org/LICENSE_1_0.txt) 6 | */ 7 | 8 | /* 9 | * cwd.h - manages the current working folder information 10 | */ 11 | 12 | #ifndef CWD_H 13 | #define CWD_H 14 | 15 | #include "object.h" 16 | 17 | 18 | /* cwd() - returns the current working folder */ 19 | OBJECT * cwd( void ); 20 | 21 | /* cwd_init() - initialize the cwd module functionality 22 | * 23 | * The current working folder can not change in Boost Jam so this function 24 | * gets the current working folder information from the OS and stores it 25 | * internally. 26 | * 27 | * Expected to be called at program startup before the program's current 28 | * working folder has been changed 29 | */ 30 | void cwd_init( void ); 31 | 32 | /* cwd_done() - cleans up the cwd module functionality */ 33 | void cwd_done( void ); 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /jam-files/engine/debian/control: -------------------------------------------------------------------------------- 1 | Source: bjam 2 | Section: devel 3 | Priority: optional 4 | Maintainer: Vladimir Prus 5 | Build-Depends: debhelper (>> 3.0.0), docbook-to-man, bison 6 | Standards-Version: 3.5.2 7 | 8 | Package: bjam 9 | Architecture: any 10 | Depends: ${shlibs:Depends} 11 | Description: Build tool 12 | Boost.Jam is a portable build tool with its own interpreted language, which 13 | allows to implement rather complex logic in a readable way and without 14 | resorting to external programs. It is a descendant of Jam/MR tool modified to 15 | suit the needs of Boost.Build. In particular, modules and rule parameters 16 | were added, as well as several new builtins. 17 | -------------------------------------------------------------------------------- /jam-files/engine/debian/copyright: -------------------------------------------------------------------------------- 1 | This package was debianized by Vladimir Prus on 2 | Wed, 17 July 2002, 19:27:00 +0400. 3 | 4 | Copyright: 5 | 6 | /+\ 7 | +\ Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. 8 | \+/ 9 | 10 | This is Release 2.4 of Jam/MR, a make-like program. 11 | 12 | License is hereby granted to use this software and distribute it 13 | freely, as long as this copyright notice is retained and modifications 14 | are clearly marked. 15 | 16 | ALL WARRANTIES ARE HEREBY DISCLAIMED. 17 | 18 | Some portions are also: 19 | 20 | Copyright 2001-2006 David Abrahams. 21 | Copyright 2002-2006 Rene Rivera. 22 | Copyright 2003-2006 Vladimir Prus. 23 | 24 | Distributed under the Boost Software License, Version 1.0. 25 | (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 26 | -------------------------------------------------------------------------------- /jam-files/engine/frames.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2001-2004 David Abrahams. 3 | * Distributed under the Boost Software License, Version 1.0. 4 | * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 5 | */ 6 | 7 | #include "jam.h" 8 | #include "frames.h" 9 | 10 | 11 | FRAME * frame_before_python_call; 12 | 13 | 14 | void frame_init( FRAME * frame ) 15 | { 16 | frame->prev = 0; 17 | frame->prev_user = 0; 18 | lol_init( frame->args ); 19 | frame->module = root_module(); 20 | frame->rulename = "module scope"; 21 | frame->file = 0; 22 | frame->line = -1; 23 | } 24 | 25 | 26 | void frame_free( FRAME * frame ) 27 | { 28 | lol_free( frame->args ); 29 | } 30 | -------------------------------------------------------------------------------- /jam-files/engine/frames.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2001-2004 David Abrahams. 3 | * Distributed under the Boost Software License, Version 1.0. 4 | * (See accompanying file LICENSE_1_0.txt or copy at 5 | * http://www.boost.org/LICENSE_1_0.txt) 6 | */ 7 | 8 | #ifndef FRAMES_DWA20011021_H 9 | #define FRAMES_DWA20011021_H 10 | 11 | #include "lists.h" 12 | #include "modules.h" 13 | #include "object.h" 14 | 15 | 16 | typedef struct frame FRAME; 17 | 18 | struct frame 19 | { 20 | FRAME * prev; 21 | FRAME * prev_user; /* The nearest enclosing frame for which 22 | module->user_module is true. */ 23 | LOL args[ 1 ]; 24 | module_t * module; 25 | OBJECT * file; 26 | int line; 27 | char const * rulename; 28 | }; 29 | 30 | 31 | /* When a call into Python is in progress, this variable points to the bjam 32 | * frame that was current at the moment of the call. When the call completes, 33 | * the variable is not defined. Furthermore, if Jam calls Python which calls Jam 34 | * and so on, this variable only keeps the most recent Jam frame. 35 | */ 36 | extern FRAME * frame_before_python_call; 37 | 38 | 39 | void frame_init( FRAME * ); 40 | void frame_free( FRAME * ); 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /jam-files/engine/hcache.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is not part of Jam 3 | */ 4 | 5 | /* 6 | * hcache.h - handle #includes in source files 7 | */ 8 | #ifndef HCACHE_H 9 | #define HCACHE_H 10 | 11 | #include "lists.h" 12 | #include "regexp.h" 13 | #include "rules.h" 14 | 15 | void hcache_init( void ); 16 | void hcache_done( void ); 17 | LIST * hcache( TARGET * t, int rec, regexp * re[], LIST * hdrscan ); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /jam-files/engine/hdrmacro.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993, 1995 Christopher Seiwald. 3 | * 4 | * This file is part of Jam - see jam.c for Copyright information. 5 | */ 6 | 7 | /* 8 | * hdrmacro.h - parses header files for #define MACRO or 9 | * #define MACRO "filename" definitions 10 | */ 11 | 12 | #ifndef HDRMACRO_SW20111118_H 13 | #define HDRMACRO_SW20111118_H 14 | 15 | #include "object.h" 16 | #include "rules.h" 17 | 18 | void macro_headers( TARGET * ); 19 | OBJECT * macro_header_get( OBJECT * macro_name ); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /jam-files/engine/headers.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993, 1995 Christopher Seiwald. 3 | * 4 | * This file is part of Jam - see jam.c for Copyright information. 5 | */ 6 | 7 | /* 8 | * headers.h - handle #includes in source files 9 | */ 10 | 11 | #ifndef HEADERS_SW20111118_H 12 | #define HEADERS_SW20111118_H 13 | 14 | #include "object.h" 15 | #include "rules.h" 16 | #include "regexp.h" 17 | 18 | void headers( TARGET * t ); 19 | 20 | #ifdef OPT_HEADER_CACHE_EXT 21 | struct regexp; 22 | LIST * headers1( LIST *l, OBJECT * file, int rec, struct regexp *re[] ); 23 | #endif 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /jam-files/engine/jambase.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993, 1995 Christopher Seiwald. 3 | * 4 | * This file is part of Jam - see jam.c for Copyright information. 5 | */ 6 | 7 | /* 8 | * jambase.h - declaration for the internal jambase 9 | * 10 | * The file Jambase is turned into a C array of strings in jambase.c 11 | * so that it can be built in to the executable. This is the 12 | * declaration for that array. 13 | */ 14 | 15 | extern char *jambase[]; 16 | -------------------------------------------------------------------------------- /jam-files/engine/jamgramtab.h: -------------------------------------------------------------------------------- 1 | { "!", _BANG_t }, 2 | { "!=", _BANG_EQUALS_t }, 3 | { "&", _AMPER_t }, 4 | { "&&", _AMPERAMPER_t }, 5 | { "(", _LPAREN_t }, 6 | { ")", _RPAREN_t }, 7 | { "+=", _PLUS_EQUALS_t }, 8 | { ":", _COLON_t }, 9 | { ";", _SEMIC_t }, 10 | { "<", _LANGLE_t }, 11 | { "<=", _LANGLE_EQUALS_t }, 12 | { "=", _EQUALS_t }, 13 | { ">", _RANGLE_t }, 14 | { ">=", _RANGLE_EQUALS_t }, 15 | { "?=", _QUESTION_EQUALS_t }, 16 | { "[", _LBRACKET_t }, 17 | { "]", _RBRACKET_t }, 18 | { "actions", ACTIONS_t }, 19 | { "bind", BIND_t }, 20 | { "case", CASE_t }, 21 | { "class", CLASS_t }, 22 | { "default", DEFAULT_t }, 23 | { "else", ELSE_t }, 24 | { "existing", EXISTING_t }, 25 | { "for", FOR_t }, 26 | { "if", IF_t }, 27 | { "ignore", IGNORE_t }, 28 | { "in", IN_t }, 29 | { "include", INCLUDE_t }, 30 | { "local", LOCAL_t }, 31 | { "module", MODULE_t }, 32 | { "on", ON_t }, 33 | { "piecemeal", PIECEMEAL_t }, 34 | { "quietly", QUIETLY_t }, 35 | { "return", RETURN_t }, 36 | { "rule", RULE_t }, 37 | { "switch", SWITCH_t }, 38 | { "together", TOGETHER_t }, 39 | { "updated", UPDATED_t }, 40 | { "while", WHILE_t }, 41 | { "{", _LBRACE_t }, 42 | { "|", _BAR_t }, 43 | { "||", _BARBAR_t }, 44 | { "}", _RBRACE_t }, 45 | -------------------------------------------------------------------------------- /jam-files/engine/make.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993, 1995 Christopher Seiwald. 3 | * 4 | * This file is part of Jam - see jam.c for Copyright information. 5 | */ 6 | 7 | /* 8 | * make.h - bring a target up to date, once rules are in place 9 | */ 10 | 11 | #ifndef MAKE_SW20111118_H 12 | #define MAKE_SW20111118_H 13 | 14 | #include "lists.h" 15 | #include "object.h" 16 | #include "rules.h" 17 | 18 | int make( LIST * targets, int anyhow ); 19 | int make1( LIST * t ); 20 | 21 | typedef struct { 22 | int temp; 23 | int updating; 24 | int cantfind; 25 | int cantmake; 26 | int targets; 27 | int made; 28 | } COUNTS ; 29 | 30 | 31 | void make0( TARGET * t, TARGET * p, int depth, COUNTS * counts, int anyhow, 32 | TARGET * rescanning ); 33 | 34 | 35 | /* Specifies that the target should be updated. */ 36 | void mark_target_for_updating( OBJECT * target ); 37 | 38 | /* Returns targets previously passed to mark_target_for_updating(). */ 39 | LIST * targets_to_update(); 40 | 41 | /* Clears/unmarks all targets currently marked for update. */ 42 | void clear_targets_to_update(); 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /jam-files/engine/modules/path.c: -------------------------------------------------------------------------------- 1 | /* Copyright Vladimir Prus 2003. 2 | * Distributed under the Boost Software License, Version 1.0. 3 | * (See accompanying file LICENSE_1_0.txt or copy at 4 | * http://www.boost.org/LICENSE_1_0.txt) 5 | */ 6 | 7 | #include "../constants.h" 8 | #include "../frames.h" 9 | #include "../lists.h" 10 | #include "../native.h" 11 | #include "../timestamp.h" 12 | 13 | 14 | LIST * path_exists( FRAME * frame, int flags ) 15 | { 16 | return file_query( list_front( lol_get( frame->args, 0 ) ) ) ? 17 | list_new( object_copy( constant_true ) ) : L0; 18 | } 19 | 20 | 21 | void init_path() 22 | { 23 | char const * args[] = { "location", 0 }; 24 | declare_native_rule( "path", "exists", args, path_exists, 1 ); 25 | } 26 | -------------------------------------------------------------------------------- /jam-files/engine/modules/readme.txt: -------------------------------------------------------------------------------- 1 | 2 | This directory constains sources which declare native 3 | rules for Boost.Build modules. -------------------------------------------------------------------------------- /jam-files/engine/modules/set.c: -------------------------------------------------------------------------------- 1 | /* Copyright Vladimir Prus 2003. Distributed under the Boost */ 2 | /* Software License, Version 1.0. (See accompanying */ 3 | /* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */ 4 | 5 | #include "../native.h" 6 | #include "../object.h" 7 | 8 | /* 9 | local result = ; 10 | local element ; 11 | for element in $(B) 12 | { 13 | if ! ( $(element) in $(A) ) 14 | { 15 | result += $(element) ; 16 | } 17 | } 18 | return $(result) ; 19 | */ 20 | LIST *set_difference( FRAME *frame, int flags ) 21 | { 22 | 23 | LIST* b = lol_get( frame->args, 0 ); 24 | LIST* a = lol_get( frame->args, 1 ); 25 | 26 | LIST* result = L0; 27 | LISTITER iter = list_begin( b ), end = list_end( b ); 28 | for( ; iter != end; iter = list_next( iter ) ) 29 | { 30 | if (!list_in(a, list_item(iter))) 31 | result = list_push_back(result, object_copy(list_item(iter))); 32 | } 33 | return result; 34 | } 35 | 36 | void init_set() 37 | { 38 | { 39 | const char* args[] = { "B", "*", ":", "A", "*", 0 }; 40 | declare_native_rule("set", "difference", args, set_difference, 1); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /jam-files/engine/native.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2003. Vladimir Prus 2 | * Distributed under the Boost Software License, Version 1.0. 3 | * (See accompanying file LICENSE_1_0.txt or copy at 4 | * http://www.boost.org/LICENSE_1_0.txt) 5 | */ 6 | 7 | #include "native.h" 8 | 9 | #include "hash.h" 10 | 11 | #include 12 | 13 | 14 | void declare_native_rule( char const * module, char const * rule, 15 | char const * * args, LIST * (*f)( FRAME *, int ), int version ) 16 | { 17 | OBJECT * const module_obj = module ? object_new( module ) : 0 ; 18 | module_t * m = bindmodule( module_obj ); 19 | if ( module_obj ) 20 | object_free( module_obj ); 21 | if ( !m->native_rules ) 22 | m->native_rules = hashinit( sizeof( native_rule_t ), "native rules" ); 23 | 24 | { 25 | OBJECT * const name = object_new( rule ); 26 | int found; 27 | native_rule_t * const np = (native_rule_t *)hash_insert( 28 | m->native_rules, name, &found ); 29 | np->name = name; 30 | assert( !found ); 31 | np->procedure = function_builtin( f, 0, args ); 32 | np->version = version; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /jam-files/engine/native.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2003. David Abrahams 2 | * Distributed under the Boost Software License, Version 1.0. 3 | * (See accompanying file LICENSE_1_0.txt or copy at 4 | * http://www.boost.org/LICENSE_1_0.txt) 5 | */ 6 | 7 | #ifndef NATIVE_H_VP_2003_12_09 8 | #define NATIVE_H_VP_2003_12_09 9 | 10 | #include "function.h" 11 | #include "frames.h" 12 | #include "lists.h" 13 | #include "object.h" 14 | 15 | typedef struct native_rule_t 16 | { 17 | OBJECT * name; 18 | FUNCTION * procedure; 19 | 20 | /* Version of the interface that the native rule provides. It is possible 21 | * that we want to change the set parameter for existing native rule. In 22 | * that case, version number should be incremented so Boost.Build can check 23 | * for the version it relies on. 24 | * 25 | * Versions are numbered from 1. 26 | */ 27 | int version; 28 | } native_rule_t; 29 | /* MSVC debugger gets confused unless the native_rule_t typedef is provided. */ 30 | 31 | void declare_native_rule( char const * module, char const * rule, 32 | char const * * args, LIST * (*f)( FRAME *, int ), int version ); 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /jam-files/engine/object.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Steven Watanabe 3 | * 4 | * This file is part of Jam - see jam.c for Copyright information. 5 | */ 6 | 7 | /* 8 | * object.h - object manipulation routines 9 | */ 10 | 11 | #ifndef BOOST_JAM_OBJECT_H 12 | #define BOOST_JAM_OBJECT_H 13 | 14 | typedef struct _object OBJECT; 15 | 16 | OBJECT * object_new( char const * const ); 17 | OBJECT * object_new_range( char const * const, int const size ); 18 | void object_done( void ); 19 | 20 | #if defined(NDEBUG) && !defined(BJAM_NO_MEM_CACHE) 21 | 22 | struct hash_header 23 | { 24 | unsigned int hash; 25 | struct hash_item * next; 26 | }; 27 | 28 | #define object_str( obj ) ((char const *)(obj)) 29 | #define object_copy( obj ) (obj) 30 | #define object_free( obj ) ((void)0) 31 | #define object_equal( lhs, rhs ) ((lhs) == (rhs)) 32 | #define object_hash( obj ) (((struct hash_header *)((char *)(obj) - sizeof(struct hash_header)))->hash) 33 | 34 | #else 35 | 36 | char const * object_str ( OBJECT * ); 37 | OBJECT * object_copy ( OBJECT * ); 38 | void object_free ( OBJECT * ); 39 | int object_equal( OBJECT *, OBJECT * ); 40 | unsigned int object_hash ( OBJECT * ); 41 | 42 | #endif 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /jam-files/engine/option.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993, 1995 Christopher Seiwald. 3 | * 4 | * This file is part of Jam - see jam.c for Copyright information. 5 | */ 6 | 7 | /* 8 | * option.h - command line option processing 9 | * 10 | * {o >o 11 | * \ -) "Command line option." 12 | */ 13 | 14 | typedef struct bjam_option 15 | { 16 | char flag; /* filled in by getoption() */ 17 | char * val; /* set to random address if true */ 18 | } bjam_option; 19 | 20 | #define N_OPTS 256 21 | 22 | int getoptions( int argc, char * * argv, char * opts, bjam_option * optv ); 23 | char * getoptval( bjam_option * optv, char opt, int subopt ); 24 | -------------------------------------------------------------------------------- /jam-files/engine/output.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007 Rene Rivera 3 | Distributed under the Boost Software License, Version 1.0. 4 | (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 5 | */ 6 | 7 | #ifndef BJAM_OUTPUT_H 8 | #define BJAM_OUTPUT_H 9 | 10 | #include "object.h" 11 | #include "timestamp.h" 12 | 13 | #define EXIT_OK 0 14 | #define EXIT_FAIL 1 15 | #define EXIT_TIMEOUT 2 16 | 17 | void out_action( 18 | char const * const action, 19 | char const * const target, 20 | char const * const command, 21 | char const * const out_data, 22 | char const * const err_data, 23 | int const exit_reason 24 | ); 25 | 26 | OBJECT * outf_int( int const value ); 27 | OBJECT * outf_double( double const value ); 28 | OBJECT * outf_time( timestamp const * const value ); 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /jam-files/engine/patchlevel.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. 3 | * 4 | * This file is part of Jam - see jam.c for Copyright information. 5 | */ 6 | 7 | /* Keep JAMVERSYM in sync with VERSION. */ 8 | /* It can be accessed as $(JAMVERSION) in the Jamfile. */ 9 | 10 | #define VERSION_MAJOR 2011 11 | #define VERSION_MINOR 12 12 | #define VERSION_PATCH 1 13 | #define VERSION_MAJOR_SYM "2011" 14 | #define VERSION_MINOR_SYM "12" 15 | #define VERSION_PATCH_SYM "01" 16 | #define VERSION "2011.12.1" 17 | #define JAMVERSYM "JAMVERSION=2011.12" 18 | -------------------------------------------------------------------------------- /jam-files/engine/pathunix.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. 3 | * 4 | * This file is part of Jam - see jam.c for Copyright information. 5 | */ 6 | 7 | /* This file is ALSO: 8 | * Copyright 2001-2004 David Abrahams. 9 | * Copyright 2005 Rene Rivera. 10 | * Distributed under the Boost Software License, Version 1.0. 11 | * (See accompanying file LICENSE_1_0.txt or copy at 12 | * http://www.boost.org/LICENSE_1_0.txt) 13 | */ 14 | 15 | /* 16 | * pathunix.c - UNIX specific path manipulation support 17 | */ 18 | 19 | #include "pathsys.h" 20 | 21 | #include 22 | #include /* needed for getpid() */ 23 | 24 | 25 | /* 26 | * path_get_process_id_() 27 | */ 28 | 29 | unsigned long path_get_process_id_( void ) 30 | { 31 | return getpid(); 32 | } 33 | 34 | 35 | /* 36 | * path_get_temp_path_() 37 | */ 38 | 39 | void path_get_temp_path_( string * buffer ) 40 | { 41 | char const * t = getenv( "TMPDIR" ); 42 | string_append( buffer, t ? t : "/tmp" ); 43 | } 44 | 45 | 46 | /* 47 | * path_register_key() 48 | */ 49 | 50 | void path_register_key( OBJECT * path ) 51 | { 52 | } 53 | 54 | 55 | /* 56 | * path_as_key() 57 | */ 58 | 59 | OBJECT * path_as_key( OBJECT * path ) 60 | { 61 | return object_copy( path ); 62 | } 63 | 64 | 65 | /* 66 | * path_done() 67 | */ 68 | 69 | void path_done( void ) 70 | { 71 | } 72 | -------------------------------------------------------------------------------- /jam-files/engine/regexp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Definitions etc. for regexp(3) routines. 3 | * 4 | * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof], 5 | * not the System V one. 6 | */ 7 | #ifndef REGEXP_DWA20011023_H 8 | #define REGEXP_DWA20011023_H 9 | 10 | #define NSUBEXP 10 11 | typedef struct regexp { 12 | char const * startp[ NSUBEXP ]; 13 | char const * endp[ NSUBEXP ]; 14 | char regstart; /* Internal use only. */ 15 | char reganch; /* Internal use only. */ 16 | char * regmust; /* Internal use only. */ 17 | int regmlen; /* Internal use only. */ 18 | char program[ 1 ]; /* Unwarranted chumminess with compiler. */ 19 | } regexp; 20 | 21 | 22 | regexp * regcomp( char const * exp ); 23 | int regexec( regexp * prog, char const * string ); 24 | void regerror( char const * s ); 25 | 26 | 27 | /* 28 | * The first byte of the regexp internal "program" is actually this magic 29 | * number; the start node begins in the second byte. 30 | */ 31 | #define MAGIC 0234 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /jam-files/engine/search.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993, 1995 Christopher Seiwald. 3 | * 4 | * This file is part of Jam - see jam.c for Copyright information. 5 | */ 6 | 7 | /* 8 | * search.h - find a target along $(SEARCH) or $(LOCATE) 9 | */ 10 | 11 | #ifndef SEARCH_SW20111118_H 12 | #define SEARCH_SW20111118_H 13 | 14 | #include "object.h" 15 | #include "timestamp.h" 16 | 17 | void set_explicit_binding( OBJECT * target, OBJECT * locate ); 18 | OBJECT * search( OBJECT * target, timestamp * const time, 19 | OBJECT * * another_target, int const file ); 20 | void search_done( void ); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /jam-files/engine/strings.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2004. David Abrahams 3 | * Distributed under the Boost Software License, Version 1.0. 4 | * (See accompanying file LICENSE_1_0.txt or copy at 5 | * http://www.boost.org/LICENSE_1_0.txt) 6 | */ 7 | 8 | #ifndef STRINGS_DWA20011024_H 9 | #define STRINGS_DWA20011024_H 10 | 11 | #include 12 | 13 | typedef struct string 14 | { 15 | char * value; 16 | unsigned long size; 17 | unsigned long capacity; 18 | char opt[ 32 ]; 19 | #ifndef NDEBUG 20 | char magic[ 4 ]; 21 | #endif 22 | } string; 23 | 24 | void string_new( string * ); 25 | void string_copy( string *, char const * ); 26 | void string_free( string * ); 27 | void string_append( string *, char const * ); 28 | void string_append_range( string *, char const *, char const * ); 29 | void string_push_back( string * s, char x ); 30 | void string_reserve( string *, size_t ); 31 | void string_truncate( string *, size_t ); 32 | void string_pop_back( string * ); 33 | char string_back( string * ); 34 | void string_unit_test(); 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /jam-files/engine/subst.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2001-2004 David Abrahams. 2 | * Distributed under the Boost Software License, Version 1.0. 3 | * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 4 | */ 5 | 6 | #ifndef SUBST_JG20120722_H 7 | #define SUBST_JG20120722_H 8 | 9 | #include "object.h" 10 | #include "regexp.h" 11 | 12 | regexp * regex_compile( OBJECT * pattern ); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /jam-files/engine/timestamp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993, 1995 Christopher Seiwald. 3 | * 4 | * This file is part of Jam - see jam.c for Copyright information. 5 | */ 6 | 7 | /* 8 | * timestamp.h - get the timestamp of a file or archive member 9 | */ 10 | 11 | #ifndef TIMESTAMP_H_SW_2011_11_18 12 | #define TIMESTAMP_H_SW_2011_11_18 13 | 14 | #include "object.h" 15 | 16 | #ifdef OS_NT 17 | # define WIN32_LEAN_AND_MEAN 18 | # include 19 | #endif 20 | 21 | #include 22 | 23 | typedef struct timestamp 24 | { 25 | time_t secs; 26 | int nsecs; 27 | } timestamp; 28 | 29 | void timestamp_clear( timestamp * const ); 30 | int timestamp_cmp( timestamp const * const lhs, timestamp const * const rhs ); 31 | void timestamp_copy( timestamp * const target, timestamp const * const source ); 32 | void timestamp_current( timestamp * const ); 33 | int timestamp_empty( timestamp const * const ); 34 | void timestamp_from_path( timestamp * const, OBJECT * const path ); 35 | void timestamp_init( timestamp * const, time_t const secs, int const nsecs ); 36 | void timestamp_max( timestamp * const max, timestamp const * const lhs, 37 | timestamp const * const rhs ); 38 | char const * timestamp_str( timestamp const * const ); 39 | char const * timestamp_timestr( timestamp const * const ); 40 | 41 | #ifdef OS_NT 42 | void timestamp_from_filetime( timestamp * const, FILETIME const * const ); 43 | #endif 44 | 45 | void timestamp_done(); 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /jam-files/engine/variable.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993, 2000 Christopher Seiwald. 3 | * 4 | * This file is part of Jam - see jam.c for Copyright information. 5 | */ 6 | 7 | /* 8 | * variable.h - handle jam multi-element variables 9 | */ 10 | 11 | #ifndef VARIABLE_SW20111119_H 12 | #define VARIABLE_SW20111119_H 13 | 14 | #include "lists.h" 15 | #include "object.h" 16 | 17 | 18 | struct module_t; 19 | 20 | void var_defines( struct module_t *, char * const * e, int preprocess ); 21 | LIST * var_get( struct module_t *, OBJECT * symbol ); 22 | void var_set( struct module_t *, OBJECT * symbol, LIST * value, int flag ); 23 | LIST * var_swap( struct module_t *, OBJECT * symbol, LIST * value ); 24 | void var_done( struct module_t * ); 25 | 26 | /* 27 | * Defines for var_set(). 28 | */ 29 | 30 | #define VAR_SET 0 /* override previous value */ 31 | #define VAR_APPEND 1 /* append to previous value */ 32 | #define VAR_DEFAULT 2 /* set only if no previous value */ 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /jam-files/fail/Jamroot: -------------------------------------------------------------------------------- 1 | actions fail { 2 | false 3 | } 4 | make fail : : fail ; 5 | -------------------------------------------------------------------------------- /lm/blank.hh: -------------------------------------------------------------------------------- 1 | #ifndef LM_BLANK_H 2 | #define LM_BLANK_H 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | namespace lm { 10 | namespace ngram { 11 | 12 | /* Suppose "foo bar" appears with zero backoff but there is no trigram 13 | * beginning with these words. Then, when scoring "foo bar", the model could 14 | * return out_state containing "bar" or even null context if "bar" also has no 15 | * backoff and is never followed by another word. Then the backoff is set to 16 | * kNoExtensionBackoff. If the n-gram might be extended, then out_state must 17 | * contain the full n-gram, in which case kExtensionBackoff is set. In any 18 | * case, if an n-gram has non-zero backoff, the full state is returned so 19 | * backoff can be properly charged. 20 | * These differ only in sign bit because the backoff is in fact zero in either 21 | * case. 22 | */ 23 | const float kNoExtensionBackoff = -0.0; 24 | const float kExtensionBackoff = 0.0; 25 | const uint64_t kNoExtensionQuant = 0; 26 | const uint64_t kExtensionQuant = 1; 27 | 28 | inline void SetExtension(float &backoff) { 29 | if (backoff == kNoExtensionBackoff) backoff = kExtensionBackoff; 30 | } 31 | 32 | // This compiles down nicely. 33 | inline bool HasExtension(const float &backoff) { 34 | typedef union { float f; uint32_t i; } UnionValue; 35 | UnionValue compare, interpret; 36 | compare.f = kNoExtensionBackoff; 37 | interpret.f = backoff; 38 | return compare.i != interpret.i; 39 | } 40 | 41 | } // namespace ngram 42 | } // namespace lm 43 | #endif // LM_BLANK_H 44 | -------------------------------------------------------------------------------- /lm/builder/Jamfile: -------------------------------------------------------------------------------- 1 | fakelib builder : [ glob *.cc : *test.cc *main.cc ] 2 | ../../util//kenutil ../../util/stream//stream ../../util/double-conversion//double-conversion ..//kenlm 3 | : : : /top//boost_thread $(timer-link) ; 4 | 5 | exe lmplz : lmplz_main.cc builder /top//boost_program_options ; 6 | 7 | exe dump_counts : dump_counts_main.cc builder ; 8 | 9 | alias programs : lmplz dump_counts ; 10 | 11 | import testing ; 12 | unit-test corpus_count_test : corpus_count_test.cc builder /top//boost_unit_test_framework ; 13 | unit-test adjust_counts_test : adjust_counts_test.cc builder /top//boost_unit_test_framework ; 14 | -------------------------------------------------------------------------------- /lm/builder/README.md: -------------------------------------------------------------------------------- 1 | Dependencies 2 | ============ 3 | 4 | Boost >= 1.42.0 is required. 5 | 6 | For Ubuntu, 7 | ```bash 8 | sudo apt-get install libboost1.48-all-dev 9 | ``` 10 | 11 | Alternatively, you can download, compile, and install it yourself: 12 | 13 | ```bash 14 | wget http://sourceforge.net/projects/boost/files/boost/1.52.0/boost_1_52_0.tar.gz/download -O boost_1_52_0.tar.gz 15 | tar -xvzf boost_1_52_0.tar.gz 16 | cd boost_1_52_0 17 | ./bootstrap.sh 18 | ./b2 19 | sudo ./b2 install 20 | ``` 21 | 22 | Local install options (in a user-space prefix directory) are also possible. See http://www.boost.org/doc/libs/1_52_0/doc/html/bbv2/installation.html. 23 | 24 | 25 | Building 26 | ======== 27 | 28 | ```bash 29 | bjam 30 | ``` 31 | Your distribution might package bjam and boost-build separately from Boost. Both are required. 32 | 33 | Usage 34 | ===== 35 | 36 | Run 37 | ```bash 38 | $ bin/lmplz 39 | ``` 40 | to see command line arguments 41 | 42 | Running 43 | ======= 44 | 45 | ```bash 46 | bin/lmplz -o 5 text.arpa 47 | ``` 48 | -------------------------------------------------------------------------------- /lm/builder/TODO: -------------------------------------------------------------------------------- 1 | More tests! 2 | Sharding. 3 | Some way to manage all the crazy config options. 4 | Option to build the binary file directly. 5 | Interpolation of different orders. 6 | -------------------------------------------------------------------------------- /lm/builder/discount.hh: -------------------------------------------------------------------------------- 1 | #ifndef LM_BUILDER_DISCOUNT_H 2 | #define LM_BUILDER_DISCOUNT_H 3 | 4 | #include 5 | 6 | #include 7 | 8 | namespace lm { 9 | namespace builder { 10 | 11 | struct Discount { 12 | float amount[4]; 13 | 14 | float Get(uint64_t count) const { 15 | return amount[std::min(count, 3)]; 16 | } 17 | 18 | float Apply(uint64_t count) const { 19 | return static_cast(count) - Get(count); 20 | } 21 | }; 22 | 23 | } // namespace builder 24 | } // namespace lm 25 | 26 | #endif // LM_BUILDER_DISCOUNT_H 27 | -------------------------------------------------------------------------------- /lm/builder/hash_gamma.hh: -------------------------------------------------------------------------------- 1 | #ifndef LM_BUILDER_HASH_GAMMA__ 2 | #define LM_BUILDER_HASH_GAMMA__ 3 | 4 | #include 5 | 6 | namespace lm { namespace builder { 7 | 8 | #pragma pack(push) 9 | #pragma pack(4) 10 | 11 | struct HashGamma { 12 | uint64_t hash_value; 13 | float gamma; 14 | }; 15 | 16 | #pragma pack(pop) 17 | 18 | }} // namespaces 19 | #endif // LM_BUILDER_HASH_GAMMA__ 20 | -------------------------------------------------------------------------------- /lm/builder/header_info.hh: -------------------------------------------------------------------------------- 1 | #ifndef LM_BUILDER_HEADER_INFO_H 2 | #define LM_BUILDER_HEADER_INFO_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | // Some configuration info that is used to add 9 | // comments to the beginning of an ARPA file 10 | struct HeaderInfo { 11 | std::string input_file; 12 | uint64_t token_count; 13 | std::vector counts_pruned; 14 | 15 | HeaderInfo() {} 16 | 17 | HeaderInfo(const std::string& input_file_in, uint64_t token_count_in, const std::vector &counts_pruned_in) 18 | : input_file(input_file_in), token_count(token_count_in), counts_pruned(counts_pruned_in) {} 19 | 20 | // TODO: Add smoothing type 21 | // TODO: More info if multiple models were interpolated 22 | }; 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /lm/builder/interpolate.hh: -------------------------------------------------------------------------------- 1 | #ifndef LM_BUILDER_INTERPOLATE_H 2 | #define LM_BUILDER_INTERPOLATE_H 3 | 4 | #include "util/stream/multi_stream.hh" 5 | 6 | #include 7 | 8 | #include 9 | 10 | namespace lm { namespace builder { 11 | 12 | /* Interpolate step. 13 | * Input: suffix sorted n-grams with (p_uninterpolated, gamma) from 14 | * InitialProbabilities. 15 | * Output: suffix sorted n-grams with complete probability 16 | */ 17 | class Interpolate { 18 | public: 19 | // Normally vocab_size is the unigram count-1 (since p() = 0) but might 20 | // be larger when the user specifies a consistent vocabulary size. 21 | explicit Interpolate(uint64_t vocab_size, const util::stream::ChainPositions &backoffs, const std::vector &prune_thresholds, bool prune_vocab, bool output_q_); 22 | 23 | void Run(const util::stream::ChainPositions &positions); 24 | 25 | private: 26 | float uniform_prob_; 27 | util::stream::ChainPositions backoffs_; 28 | const std::vector prune_thresholds_; 29 | bool prune_vocab_; 30 | bool output_q_; 31 | }; 32 | 33 | }} // namespaces 34 | #endif // LM_BUILDER_INTERPOLATE_H 35 | -------------------------------------------------------------------------------- /lm/builder/output.cc: -------------------------------------------------------------------------------- 1 | #include "lm/builder/output.hh" 2 | #include "util/stream/multi_stream.hh" 3 | 4 | #include 5 | 6 | namespace lm { namespace builder { 7 | 8 | OutputHook::~OutputHook() {} 9 | 10 | void OutputHook::Apply(util::stream::Chains &chains) { 11 | chains >> boost::ref(*this); 12 | } 13 | 14 | }} // namespaces 15 | -------------------------------------------------------------------------------- /lm/config.cc: -------------------------------------------------------------------------------- 1 | #include "lm/config.hh" 2 | 3 | #include 4 | 5 | namespace lm { 6 | namespace ngram { 7 | 8 | Config::Config() : 9 | show_progress(true), 10 | messages(&std::cerr), 11 | enumerate_vocab(NULL), 12 | unknown_missing(COMPLAIN), 13 | sentence_marker_missing(THROW_UP), 14 | positive_log_probability(THROW_UP), 15 | unknown_missing_logprob(-100.0), 16 | probing_multiplier(1.5), 17 | building_memory(1073741824ULL), // 1 GB 18 | temporary_directory_prefix(""), 19 | arpa_complain(ALL), 20 | write_mmap(NULL), 21 | write_method(WRITE_AFTER), 22 | include_vocab(true), 23 | rest_function(REST_MAX), 24 | prob_bits(8), 25 | backoff_bits(8), 26 | pointer_bhiksha_bits(22), 27 | load_method(util::POPULATE_OR_READ) {} 28 | 29 | } // namespace ngram 30 | } // namespace lm 31 | -------------------------------------------------------------------------------- /lm/enumerate_vocab.hh: -------------------------------------------------------------------------------- 1 | #ifndef LM_ENUMERATE_VOCAB_H 2 | #define LM_ENUMERATE_VOCAB_H 3 | 4 | #include "lm/word_index.hh" 5 | #include "util/string_piece.hh" 6 | 7 | namespace lm { 8 | 9 | /* If you need the actual strings in the vocabulary, inherit from this class 10 | * and implement Add. Then put a pointer in Config.enumerate_vocab; it does 11 | * not take ownership. Add is called once per vocab word. index starts at 0 12 | * and increases by 1 each time. This is only used by the Model constructor; 13 | * the pointer is not retained by the class. 14 | */ 15 | class EnumerateVocab { 16 | public: 17 | virtual ~EnumerateVocab() {} 18 | 19 | virtual void Add(WordIndex index, const StringPiece &str) = 0; 20 | 21 | protected: 22 | EnumerateVocab() {} 23 | }; 24 | 25 | } // namespace lm 26 | 27 | #endif // LM_ENUMERATE_VOCAB_H 28 | 29 | -------------------------------------------------------------------------------- /lm/filter/Jamfile: -------------------------------------------------------------------------------- 1 | fakelib lm_filter : phrase.cc vocab.cc arpa_io.cc ../../util//kenutil : multi:/top//boost_thread ; 2 | 3 | obj main : filter_main.cc : single:NTHREAD ../.. ; 4 | 5 | exe filter : main lm_filter ../../util//kenutil ..//kenlm : multi:/top//boost_thread ; 6 | #Second name for MEMT 7 | exe FilterLM : main lm_filter ../../util//kenutil ..//kenlm : multi:/top//boost_thread ; 8 | 9 | 10 | exe phrase_table_vocab : phrase_table_vocab_main.cc ../../util//kenutil ; 11 | -------------------------------------------------------------------------------- /lm/filter/vocab.cc: -------------------------------------------------------------------------------- 1 | #include "lm/filter/vocab.hh" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | namespace lm { 9 | namespace vocab { 10 | 11 | void ReadSingle(std::istream &in, boost::unordered_set &out) { 12 | in.exceptions(std::istream::badbit); 13 | std::string word; 14 | while (in >> word) { 15 | out.insert(word); 16 | } 17 | } 18 | 19 | namespace { 20 | bool IsLineEnd(std::istream &in) { 21 | int got; 22 | do { 23 | got = in.get(); 24 | if (!in) return true; 25 | if (got == '\n') return true; 26 | } while (isspace(got)); 27 | in.unget(); 28 | return false; 29 | } 30 | }// namespace 31 | 32 | // Read space separated words in enter separated lines. These lines can be 33 | // very long, so don't read an entire line at a time. 34 | unsigned int ReadMultiple(std::istream &in, boost::unordered_map > &out) { 35 | in.exceptions(std::istream::badbit); 36 | unsigned int sentence = 0; 37 | bool used_id = false; 38 | std::string word; 39 | while (in >> word) { 40 | used_id = true; 41 | std::vector &posting = out[word]; 42 | if (posting.empty() || (posting.back() != sentence)) 43 | posting.push_back(sentence); 44 | if (IsLineEnd(in)) { 45 | ++sentence; 46 | used_id = false; 47 | } 48 | } 49 | return sentence + used_id; 50 | } 51 | 52 | } // namespace vocab 53 | } // namespace lm 54 | -------------------------------------------------------------------------------- /lm/fragment_main.cc: -------------------------------------------------------------------------------- 1 | #include "lm/binary_format.hh" 2 | #include "lm/model.hh" 3 | #include "lm/left.hh" 4 | #include "util/tokenize_piece.hh" 5 | 6 | template void Query(const char *name) { 7 | Model model(name); 8 | std::string line; 9 | lm::ngram::ChartState ignored; 10 | while (getline(std::cin, line)) { 11 | lm::ngram::RuleScore scorer(model, ignored); 12 | for (util::TokenIter i(line, ' '); i; ++i) { 13 | scorer.Terminal(model.GetVocabulary().Index(*i)); 14 | } 15 | std::cout << scorer.Finish() << '\n'; 16 | } 17 | } 18 | 19 | int main(int argc, char *argv[]) { 20 | if (argc != 2) { 21 | std::cerr << "Expected model file name." << std::endl; 22 | return 1; 23 | } 24 | const char *name = argv[1]; 25 | lm::ngram::ModelType model_type = lm::ngram::PROBING; 26 | lm::ngram::RecognizeBinary(name, model_type); 27 | switch (model_type) { 28 | case lm::ngram::PROBING: 29 | Query(name); 30 | break; 31 | case lm::ngram::REST_PROBING: 32 | Query(name); 33 | break; 34 | default: 35 | std::cerr << "Model type not supported yet." << std::endl; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /lm/lm_exception.cc: -------------------------------------------------------------------------------- 1 | #include "lm/lm_exception.hh" 2 | 3 | #include 4 | #include 5 | 6 | namespace lm { 7 | 8 | ConfigException::ConfigException() throw() {} 9 | ConfigException::~ConfigException() throw() {} 10 | 11 | LoadException::LoadException() throw() {} 12 | LoadException::~LoadException() throw() {} 13 | 14 | FormatLoadException::FormatLoadException() throw() {} 15 | FormatLoadException::~FormatLoadException() throw() {} 16 | 17 | VocabLoadException::VocabLoadException() throw() {} 18 | VocabLoadException::~VocabLoadException() throw() {} 19 | 20 | SpecialWordMissingException::SpecialWordMissingException() throw() {} 21 | SpecialWordMissingException::~SpecialWordMissingException() throw() {} 22 | 23 | } // namespace lm 24 | -------------------------------------------------------------------------------- /lm/lm_exception.hh: -------------------------------------------------------------------------------- 1 | #ifndef LM_LM_EXCEPTION_H 2 | #define LM_LM_EXCEPTION_H 3 | 4 | // Named to avoid conflict with util/exception.hh. 5 | 6 | #include "util/exception.hh" 7 | #include "util/string_piece.hh" 8 | 9 | #include 10 | #include 11 | 12 | namespace lm { 13 | 14 | typedef enum {THROW_UP, COMPLAIN, SILENT} WarningAction; 15 | 16 | class ConfigException : public util::Exception { 17 | public: 18 | ConfigException() throw(); 19 | ~ConfigException() throw(); 20 | }; 21 | 22 | class LoadException : public util::Exception { 23 | public: 24 | virtual ~LoadException() throw(); 25 | 26 | protected: 27 | LoadException() throw(); 28 | }; 29 | 30 | class FormatLoadException : public LoadException { 31 | public: 32 | FormatLoadException() throw(); 33 | ~FormatLoadException() throw(); 34 | }; 35 | 36 | class VocabLoadException : public LoadException { 37 | public: 38 | virtual ~VocabLoadException() throw(); 39 | VocabLoadException() throw(); 40 | }; 41 | 42 | class SpecialWordMissingException : public VocabLoadException { 43 | public: 44 | explicit SpecialWordMissingException() throw(); 45 | ~SpecialWordMissingException() throw(); 46 | }; 47 | 48 | } // namespace lm 49 | 50 | #endif // LM_LM_EXCEPTION 51 | -------------------------------------------------------------------------------- /lm/max_order.hh: -------------------------------------------------------------------------------- 1 | #ifndef LM_MAX_ORDER_H 2 | #define LM_MAX_ORDER_H 3 | /* IF YOUR BUILD SYSTEM PASSES -DKENLM_MAX_ORDER, THEN CHANGE THE BUILD SYSTEM. 4 | * If not, this is the default maximum order. 5 | * Having this limit means that State can be 6 | * (kMaxOrder - 1) * sizeof(float) bytes instead of 7 | * sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead 8 | */ 9 | #ifndef KENLM_ORDER_MESSAGE 10 | #define KENLM_ORDER_MESSAGE "If your build system supports changing KENLM_MAX_ORDER, change it there and recompile. In the KenLM tarball or Moses, use e.g. `bjam --max-kenlm-order=6 -a'. Otherwise, edit lm/max_order.hh." 11 | #endif 12 | 13 | #endif // LM_MAX_ORDER_H 14 | -------------------------------------------------------------------------------- /lm/model_type.hh: -------------------------------------------------------------------------------- 1 | #ifndef LM_MODEL_TYPE_H 2 | #define LM_MODEL_TYPE_H 3 | 4 | namespace lm { 5 | namespace ngram { 6 | 7 | /* Not the best numbering system, but it grew this way for historical reasons 8 | * and I want to preserve existing binary files. */ 9 | typedef enum {PROBING=0, REST_PROBING=1, TRIE=2, QUANT_TRIE=3, ARRAY_TRIE=4, QUANT_ARRAY_TRIE=5} ModelType; 10 | 11 | // Historical names. 12 | const ModelType HASH_PROBING = PROBING; 13 | const ModelType TRIE_SORTED = TRIE; 14 | const ModelType QUANT_TRIE_SORTED = QUANT_TRIE; 15 | const ModelType ARRAY_TRIE_SORTED = ARRAY_TRIE; 16 | const ModelType QUANT_ARRAY_TRIE_SORTED = QUANT_ARRAY_TRIE; 17 | 18 | const static ModelType kQuantAdd = static_cast(QUANT_TRIE - TRIE); 19 | const static ModelType kArrayAdd = static_cast(ARRAY_TRIE - TRIE); 20 | 21 | } // namespace ngram 22 | } // namespace lm 23 | #endif // LM_MODEL_TYPE_H 24 | -------------------------------------------------------------------------------- /lm/return.hh: -------------------------------------------------------------------------------- 1 | #ifndef LM_RETURN_H 2 | #define LM_RETURN_H 3 | 4 | #include 5 | 6 | namespace lm { 7 | /* Structure returned by scoring routines. */ 8 | struct FullScoreReturn { 9 | // log10 probability 10 | float prob; 11 | 12 | /* The length of n-gram matched. Do not use this for recombination. 13 | * Consider a model containing only the following n-grams: 14 | * -1 foo 15 | * -3.14 bar 16 | * -2.718 baz -5 17 | * -6 foo bar 18 | * 19 | * If you score ``bar'' then ngram_length is 1 and recombination state is the 20 | * empty string because bar has zero backoff and does not extend to the 21 | * right. 22 | * If you score ``foo'' then ngram_length is 1 and recombination state is 23 | * ``foo''. 24 | * 25 | * Ideally, keep output states around and compare them. Failing that, 26 | * get out_state.ValidLength() and use that length for recombination. 27 | */ 28 | unsigned char ngram_length; 29 | 30 | /* Left extension information. If independent_left is set, then prob is 31 | * independent of words to the left (up to additional backoff). Otherwise, 32 | * extend_left indicates how to efficiently extend further to the left. 33 | */ 34 | bool independent_left; 35 | uint64_t extend_left; // Defined only if independent_left 36 | 37 | // Rest cost for extension to the left. 38 | float rest; 39 | }; 40 | 41 | } // namespace lm 42 | #endif // LM_RETURN_H 43 | -------------------------------------------------------------------------------- /lm/sizes.hh: -------------------------------------------------------------------------------- 1 | #ifndef LM_SIZES_H 2 | #define LM_SIZES_H 3 | 4 | #include 5 | 6 | #include 7 | 8 | namespace lm { namespace ngram { 9 | 10 | struct Config; 11 | 12 | void ShowSizes(const std::vector &counts, const lm::ngram::Config &config); 13 | void ShowSizes(const std::vector &counts); 14 | void ShowSizes(const char *file, const lm::ngram::Config &config); 15 | 16 | }} // namespaces 17 | #endif // LM_SIZES_H 18 | -------------------------------------------------------------------------------- /lm/virtual_interface.cc: -------------------------------------------------------------------------------- 1 | #include "lm/virtual_interface.hh" 2 | 3 | #include "lm/lm_exception.hh" 4 | 5 | namespace lm { 6 | namespace base { 7 | 8 | Vocabulary::~Vocabulary() {} 9 | 10 | void Vocabulary::SetSpecial(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found) { 11 | begin_sentence_ = begin_sentence; 12 | end_sentence_ = end_sentence; 13 | not_found_ = not_found; 14 | } 15 | 16 | Model::~Model() {} 17 | 18 | } // namespace base 19 | } // namespace lm 20 | -------------------------------------------------------------------------------- /lm/weights.hh: -------------------------------------------------------------------------------- 1 | #ifndef LM_WEIGHTS_H 2 | #define LM_WEIGHTS_H 3 | 4 | // Weights for n-grams. Probability and possibly a backoff. 5 | 6 | namespace lm { 7 | struct Prob { 8 | float prob; 9 | }; 10 | // No inheritance so this will be a POD. 11 | struct ProbBackoff { 12 | float prob; 13 | float backoff; 14 | }; 15 | struct RestWeights { 16 | float prob; 17 | float backoff; 18 | float rest; 19 | }; 20 | 21 | } // namespace lm 22 | #endif // LM_WEIGHTS_H 23 | -------------------------------------------------------------------------------- /lm/word_index.hh: -------------------------------------------------------------------------------- 1 | // Separate header because this is used often. 2 | #ifndef LM_WORD_INDEX_H 3 | #define LM_WORD_INDEX_H 4 | 5 | #include 6 | 7 | namespace lm { 8 | typedef unsigned int WordIndex; 9 | const WordIndex kMaxWordIndex = UINT_MAX; 10 | } // namespace lm 11 | 12 | typedef lm::WordIndex LMWordIndex; 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /util/barrier.hh: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_BARRIER__ 2 | #define UTIL_BARRIER__ 3 | 4 | #include 5 | #include 6 | 7 | namespace util { 8 | 9 | class Barrier { 10 | public: 11 | explicit Barrier(size_t n) : n_(n) { 12 | assert(n); 13 | } 14 | 15 | bool Decrement() { 16 | // There are faster ways to do this hidden in boost/detail/sp_counted_base_*, but they're poorly factored for such. 17 | boost::unique_lock lock(mutex_); 18 | return (0 == --n_); 19 | } 20 | 21 | private: 22 | size_t n_; 23 | boost::mutex mutex_; 24 | }; 25 | 26 | } // namespace util 27 | 28 | #endif // UTIL_BARRIER__ 29 | -------------------------------------------------------------------------------- /util/bit_packing.cc: -------------------------------------------------------------------------------- 1 | #include "util/bit_packing.hh" 2 | #include "util/exception.hh" 3 | 4 | #include 5 | 6 | namespace util { 7 | 8 | namespace { 9 | template struct StaticCheck {}; 10 | template <> struct StaticCheck { typedef bool StaticAssertionPassed; }; 11 | 12 | // If your float isn't 4 bytes, we're hosed. 13 | typedef StaticCheck::StaticAssertionPassed FloatSize; 14 | 15 | } // namespace 16 | 17 | uint8_t RequiredBits(uint64_t max_value) { 18 | if (!max_value) return 0; 19 | uint8_t ret = 1; 20 | while (max_value >>= 1) ++ret; 21 | return ret; 22 | } 23 | 24 | void BitPackingSanity() { 25 | const FloatEnc neg1 = { -1.0 }, pos1 = { 1.0 }; 26 | if ((neg1.i ^ pos1.i) != 0x80000000) UTIL_THROW(Exception, "Sign bit is not 0x80000000"); 27 | char mem[57+8]; 28 | memset(mem, 0, sizeof(mem)); 29 | const uint64_t test57 = 0x123456789abcdefULL; 30 | for (uint64_t b = 0; b < 57 * 8; b += 57) { 31 | WriteInt57(mem, b, 57, test57); 32 | } 33 | for (uint64_t b = 0; b < 57 * 8; b += 57) { 34 | if (test57 != ReadInt57(mem, b, 57, (1ULL << 57) - 1)) 35 | UTIL_THROW(Exception, "The bit packing routines are failing for your architecture. Please send a bug report with your architecture, operating system, and compiler."); 36 | } 37 | // TODO: more checks. 38 | } 39 | 40 | } // namespace util 41 | -------------------------------------------------------------------------------- /util/bounded_i_stream.hh: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_BOUNDED_I_STREAM__ 2 | #define UTIL_BOUNDED_I_STREAM__ 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | namespace util { 10 | 11 | namespace detail { 12 | class BoundedIStreamDevice : public boost::iostreams::source { 13 | public: 14 | BoundedIStreamDevice(std::istream &backend, std::streamsize bound) 15 | : backend_(backend), bound_(bound) {} 16 | 17 | std::streamsize read(char *out, std::streamsize n) { 18 | backend_.read(out, std::min(n, bound_)); 19 | if (backend_.eof()) return -1; 20 | if (backend_.fail()) throw std::ios_base::failure("Backend stream failbit without eof"); 21 | bound_ -= backend_.gcount(); 22 | return backend_.gcount(); 23 | } 24 | 25 | bool Completed() const { 26 | return bound_ == 0; 27 | } 28 | 29 | private: 30 | std::istream &backend_; 31 | std::streamsize bound_; 32 | }; 33 | } // namespace detail 34 | 35 | typedef boost::iostreams::stream BoundedIStream; 36 | 37 | } // namespace util 38 | 39 | #endif // UTIL_BOUNDED_I_STREAM__ 40 | -------------------------------------------------------------------------------- /util/cat_compressed_main.cc: -------------------------------------------------------------------------------- 1 | // Like cat but interprets compressed files. 2 | #include "util/file.hh" 3 | #include "util/read_compressed.hh" 4 | 5 | #include 6 | #include 7 | 8 | namespace { 9 | const std::size_t kBufSize = 16384; 10 | void Copy(util::ReadCompressed &from, int to) { 11 | util::scoped_malloc buffer(util::MallocOrThrow(kBufSize)); 12 | while (std::size_t amount = from.Read(buffer.get(), kBufSize)) { 13 | util::WriteOrThrow(to, buffer.get(), amount); 14 | } 15 | } 16 | } // namespace 17 | 18 | int main(int argc, char *argv[]) { 19 | // Lane Schwartz likes -h and --help 20 | for (int i = 1; i < argc; ++i) { 21 | char *arg = argv[i]; 22 | if (!strcmp(arg, "--")) break; 23 | if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) { 24 | std::cerr << 25 | "A cat implementation that interprets compressed files.\n" 26 | "Usage: " << argv[0] << " [file1] [file2] ...\n" 27 | "If no file is provided, then stdin is read.\n"; 28 | return 1; 29 | } 30 | } 31 | 32 | try { 33 | if (argc == 1) { 34 | util::ReadCompressed in(0); 35 | Copy(in, 1); 36 | } else { 37 | for (int i = 1; i < argc; ++i) { 38 | util::ReadCompressed in(util::OpenReadOrThrow(argv[i])); 39 | Copy(in, 1); 40 | } 41 | } 42 | } catch (const std::exception &e) { 43 | std::cerr << e.what() << std::endl; 44 | return 2; 45 | } 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /util/debug.hh: -------------------------------------------------------------------------------- 1 | // Like assert.h, allow multiple inclusion with different NDEBUG. 2 | #ifndef NDEBUG 3 | #undef DEBUG_ONLY 4 | #define DEBUG_ONLY(inside) inside 5 | #else 6 | #undef DEBUG_ONLY 7 | #define DEBUG_ONLY(inside) 8 | #endif 9 | 10 | #undef DEBUG_ONLY_ASSERT 11 | #define DEBUG_ONLY_ASSERT(inside) DEBUG_ONLY(assert(inside)) 12 | -------------------------------------------------------------------------------- /util/double-conversion/Jamfile: -------------------------------------------------------------------------------- 1 | fakelib double-conversion : [ glob *.cc ] : : : . ; 2 | -------------------------------------------------------------------------------- /util/getopt.hh: -------------------------------------------------------------------------------- 1 | /* 2 | POSIX getopt for Windows 3 | 4 | AT&T Public License 5 | 6 | Code given out at the 1985 UNIFORUM conference in Dallas. 7 | */ 8 | 9 | #ifdef __GNUC__ 10 | #include 11 | #endif 12 | #ifndef __GNUC__ 13 | 14 | #ifndef UTIL_GETOPT_H 15 | #define UTIL_GETOPT_H 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | extern int opterr; 22 | extern int optind; 23 | extern int optopt; 24 | extern char *optarg; 25 | extern int getopt(int argc, char **argv, char *opts); 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | 31 | #endif /* UTIL_GETOPT_H */ 32 | #endif /* __GNUC__ */ 33 | 34 | -------------------------------------------------------------------------------- /util/hash_fusion.hh: -------------------------------------------------------------------------------- 1 | /* Hashing for Boost fusion objects. 2 | * The hash_value function is placed in boost::fusion so calling just hash_value on it works. 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace util { 11 | namespace detail { 12 | 13 | struct HashCombine { 14 | template size_t operator()(size_t value, const T &t) const { 15 | boost::hash_combine(value, t); 16 | return value; 17 | } 18 | typedef size_t result_type; 19 | }; 20 | 21 | } // namespace detail 22 | } // namespace util 23 | 24 | namespace boost { 25 | namespace fusion { 26 | 27 | template inline typename enable_if, size_t>::type hash_value(const T &t) { 28 | return boost::fusion::accumulate(t, static_cast(0), util::detail::HashCombine()); 29 | } 30 | 31 | } // namespace fusion 32 | } // namespace boost 33 | -------------------------------------------------------------------------------- /util/hash_fusion_test.cc: -------------------------------------------------------------------------------- 1 | #include "util/hash_fusion.hh" 2 | 3 | #define BOOST_TEST_MODULE HashFusionTest 4 | #include 5 | #include 6 | 7 | namespace { 8 | 9 | BOOST_AUTO_TEST_CASE(Empty) { 10 | boost::fusion::vector<> vec; 11 | BOOST_CHECK_EQUAL(static_cast(0), hash_value(vec)); 12 | } 13 | 14 | BOOST_AUTO_TEST_CASE(Single) { 15 | boost::fusion::vector vec(1); 16 | size_t hash_accum = 0; 17 | boost::hash_combine(hash_accum, (int)1); 18 | BOOST_CHECK_EQUAL(hash_accum, hash_value(vec)); 19 | } 20 | 21 | } // namespace 22 | -------------------------------------------------------------------------------- /util/hash_output_test.cc: -------------------------------------------------------------------------------- 1 | #include "util/hash_output.hh" 2 | 3 | #define BOOST_TEST_MODULE HashOutputTest 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | namespace util { 10 | namespace { 11 | 12 | BOOST_AUTO_TEST_CASE(vector_copy) { 13 | std::vector vec; 14 | vec.push_back(1); 15 | vec.push_back(14008); 16 | vec.push_back(783712947); 17 | 18 | size_t hash; 19 | HashOutput hasher(hash); 20 | std::copy(vec.begin(), vec.end(), hasher); 21 | 22 | size_t compare = 0; 23 | for (std::vector::const_iterator i = vec.begin(); i != vec.end(); ++i) { 24 | boost::hash_combine(compare, *i); 25 | } 26 | 27 | BOOST_CHECK_EQUAL(compare, hash); 28 | } 29 | 30 | } // namespace 31 | } // namespace util 32 | -------------------------------------------------------------------------------- /util/have.hh: -------------------------------------------------------------------------------- 1 | /* Optional packages. You might want to integrate this with your build system e.g. config.h from ./configure. */ 2 | #ifndef UTIL_HAVE_H 3 | #define UTIL_HAVE_H 4 | 5 | #ifdef HAVE_CONFIG_H 6 | #include "config.h" 7 | #endif 8 | 9 | #ifndef HAVE_ICU 10 | #define HAVE_ICU 11 | #endif 12 | 13 | #endif // UTIL_HAVE_H 14 | -------------------------------------------------------------------------------- /util/latex_escape.cc: -------------------------------------------------------------------------------- 1 | #include "util/latex_escape.hh" 2 | 3 | namespace util { 4 | namespace { 5 | 6 | class Replace { 7 | public: 8 | Replace() { 9 | for (unsigned int i = 0; i < 256; ++i) { 10 | null_separated_bytes_[i * 2] = i; 11 | null_separated_bytes_[i * 2 + 1] = 0; 12 | map_[i] = &null_separated_bytes_[i * 2]; 13 | } 14 | map_[(unsigned char)'<'] = "\\textless "; 15 | map_[(unsigned char)'>'] = "\\textgreater "; 16 | map_[(unsigned char)'$'] = "\\$"; 17 | map_[(unsigned char)'_'] = "\\textunderscore "; 18 | map_[(unsigned char)'{'] = "\\{"; 19 | map_[(unsigned char)'}'] = "\\}"; 20 | map_[(unsigned char)'\\'] = "\\textbacklash "; 21 | map_[(unsigned char)'%'] = "\\%"; 22 | map_[(unsigned char)'#'] = "\\#"; 23 | map_[(unsigned char)'&'] = "\\&"; 24 | } 25 | 26 | const char *operator[](size_t value) { 27 | return map_[value]; 28 | } 29 | 30 | private: 31 | char null_separated_bytes_[512]; 32 | const char *map_[256]; 33 | }; 34 | 35 | Replace replace; 36 | 37 | } // namespace 38 | 39 | void LatexEscape(const StringPiece &in, std::string &out) { 40 | out.clear(); 41 | for (const char *i = in.data(); i != in.data() + in.size(); ++i) { 42 | out.append(replace[*i]); 43 | } 44 | } 45 | 46 | } // namespace util 47 | -------------------------------------------------------------------------------- /util/latex_escape.hh: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_LATEX_ESCAPE__ 2 | #define UTIL_LATEX_ESCAPE__ 3 | 4 | #include "util/string_piece.hh" 5 | 6 | #include 7 | 8 | namespace util { 9 | 10 | // Escape characters for LaTeX. This isn't a formally formal escape, but does list what I encounter. 11 | void LatexEscape(const StringPiece &in, std::string &out); 12 | 13 | } // namespace util 14 | #endif // UTIL_LATEX_ESCAPE__ 15 | -------------------------------------------------------------------------------- /util/lower_main.cc: -------------------------------------------------------------------------------- 1 | #include "util/utf8.hh" 2 | 3 | #include 4 | 5 | int main() { 6 | std::string line, lower; 7 | while (getline(std::cin, line)) { 8 | utf8::ToLower(line, lower); 9 | std::cout << lower << '\n'; 10 | } 11 | if (!std::cin.eof()) { 12 | std::cerr << "Some error other than EOF" << std::endl; 13 | return 1; 14 | } 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /util/murmur_hash.hh: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_MURMUR_HASH_H 2 | #define UTIL_MURMUR_HASH_H 3 | #include 4 | #include 5 | 6 | namespace util { 7 | 8 | // 64-bit machine version 9 | uint64_t MurmurHash64A(const void * key, std::size_t len, uint64_t seed = 0); 10 | // 32-bit machine version (not the same function as above) 11 | uint64_t MurmurHash64B(const void * key, std::size_t len, uint64_t seed = 0); 12 | // Use the version for this arch. Because the values differ across 13 | // architectures, really only use it for in-memory structures. 14 | uint64_t MurmurHashNative(const void * key, std::size_t len, uint64_t seed = 0); 15 | 16 | } // namespace util 17 | 18 | #endif // UTIL_MURMUR_HASH_H 19 | -------------------------------------------------------------------------------- /util/numbers.hh: -------------------------------------------------------------------------------- 1 | // Numeric types used everywhere 2 | #ifndef UTIL_NUMBERS__ 3 | #define UTIL_NUMBERS__ 4 | 5 | #include "util/log_num.hh" 6 | 7 | typedef double LinearScore; 8 | typedef LogNum LogScore; 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /util/options.cc: -------------------------------------------------------------------------------- 1 | #include "util/options.hh" 2 | 3 | #include 4 | 5 | namespace util { 6 | 7 | ArgumentCountException::ArgumentCountException(const char *key, size_t expected, size_t times) throw() 8 | : ArgumentParseError("Expected "), key_(key), expected_(expected), times_(times) { 9 | what_ += key_; 10 | what_ += " "; 11 | what_ += boost::lexical_cast(expected); 12 | what_ += " times, got it "; 13 | what_ += boost::lexical_cast(times); 14 | what_ += "."; 15 | } 16 | 17 | void CheckCountRange(const boost::program_options::variables_map &vm, const char **key_begin, const char **key_end, size_t expected) { 18 | for (const char **key = key_begin; key != key_end; ++key) { 19 | if (vm.count(*key) != expected) 20 | throw ArgumentCountException(*key, expected, vm.count(*key)); 21 | } 22 | } 23 | 24 | } // namespace util 25 | -------------------------------------------------------------------------------- /util/parallel_read.hh: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_PARALLEL_READ__ 2 | #define UTIL_PARALLEL_READ__ 3 | 4 | /* Read pieces of a file in parallel. This has a very specific use case: 5 | * reading files from Lustre is CPU bound so multiple threads actually 6 | * increases throughput. Speed matters when an LM takes a terabyte. 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | namespace util { 13 | void ParallelRead(int fd, void *to, std::size_t amount, uint64_t offset); 14 | } // namespace util 15 | 16 | #endif // UTIL_PARALLEL_READ__ 17 | -------------------------------------------------------------------------------- /util/pcqueue_test.cc: -------------------------------------------------------------------------------- 1 | #include "util/pcqueue.hh" 2 | 3 | #define BOOST_TEST_MODULE PCQueueTest 4 | #include 5 | 6 | namespace util { 7 | namespace { 8 | 9 | BOOST_AUTO_TEST_CASE(SingleThread) { 10 | PCQueue queue(10); 11 | for (int i = 0; i < 10; ++i) { 12 | queue.Produce(i); 13 | } 14 | for (int i = 0; i < 10; ++i) { 15 | BOOST_CHECK_EQUAL(i, queue.Consume()); 16 | } 17 | } 18 | 19 | } 20 | } // namespace util 21 | -------------------------------------------------------------------------------- /util/pool.cc: -------------------------------------------------------------------------------- 1 | #include "util/pool.hh" 2 | 3 | #include "util/scoped.hh" 4 | 5 | #include 6 | 7 | namespace util { 8 | 9 | Pool::Pool() { 10 | current_ = NULL; 11 | current_end_ = NULL; 12 | } 13 | 14 | Pool::~Pool() { 15 | FreeAll(); 16 | } 17 | 18 | void Pool::FreeAll() { 19 | for (std::vector::const_iterator i(free_list_.begin()); i != free_list_.end(); ++i) { 20 | free(*i); 21 | } 22 | free_list_.clear(); 23 | current_ = NULL; 24 | current_end_ = NULL; 25 | } 26 | 27 | void *Pool::More(std::size_t size) { 28 | std::size_t amount = std::max(static_cast(32) << free_list_.size(), size); 29 | uint8_t *ret = static_cast(MallocOrThrow(amount)); 30 | free_list_.push_back(ret); 31 | current_ = ret + size; 32 | current_end_ = ret + amount; 33 | return ret; 34 | } 35 | 36 | } // namespace util 37 | -------------------------------------------------------------------------------- /util/pool.hh: -------------------------------------------------------------------------------- 1 | // Very simple pool. It can only allocate memory. And all of the memory it 2 | // allocates must be freed at the same time. 3 | 4 | #ifndef UTIL_POOL_H 5 | #define UTIL_POOL_H 6 | 7 | #include 8 | 9 | #include 10 | 11 | namespace util { 12 | 13 | class Pool { 14 | public: 15 | Pool(); 16 | 17 | ~Pool(); 18 | 19 | void *Allocate(std::size_t size) { 20 | void *ret = current_; 21 | current_ += size; 22 | if (current_ < current_end_) { 23 | return ret; 24 | } else { 25 | return More(size); 26 | } 27 | } 28 | 29 | void FreeAll(); 30 | 31 | private: 32 | void *More(std::size_t size); 33 | 34 | std::vector free_list_; 35 | 36 | uint8_t *current_, *current_end_; 37 | 38 | // no copying 39 | Pool(const Pool &); 40 | Pool &operator=(const Pool &); 41 | }; 42 | 43 | } // namespace util 44 | 45 | #endif // UTIL_POOL_H 46 | -------------------------------------------------------------------------------- /util/print_concurrency_main.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | int main() { 6 | std::cout << boost::thread::hardware_concurrency() << std::endl; 7 | } 8 | -------------------------------------------------------------------------------- /util/scoped.cc: -------------------------------------------------------------------------------- 1 | #include "util/scoped.hh" 2 | 3 | #include 4 | #if !defined(_WIN32) && !defined(_WIN64) 5 | #include 6 | #endif 7 | 8 | namespace util { 9 | 10 | MallocException::MallocException(std::size_t requested) throw() { 11 | *this << "for " << requested << " bytes "; 12 | } 13 | 14 | MallocException::~MallocException() throw() {} 15 | 16 | namespace { 17 | void *InspectAddr(void *addr, std::size_t requested, const char *func_name) { 18 | UTIL_THROW_IF_ARG(!addr && requested, MallocException, (requested), "in " << func_name); 19 | // These routines are often used for large chunks of memory where huge pages help. 20 | #if MADV_HUGEPAGE 21 | madvise(addr, requested, MADV_HUGEPAGE); 22 | #endif 23 | return addr; 24 | } 25 | } // namespace 26 | 27 | void *MallocOrThrow(std::size_t requested) { 28 | return InspectAddr(std::malloc(requested), requested, "malloc"); 29 | } 30 | 31 | void *CallocOrThrow(std::size_t requested) { 32 | return InspectAddr(std::calloc(1, requested), requested, "calloc"); 33 | } 34 | 35 | void scoped_malloc::call_realloc(std::size_t requested) { 36 | p_ = InspectAddr(std::realloc(p_, requested), requested, "realloc"); 37 | } 38 | 39 | } // namespace util 40 | -------------------------------------------------------------------------------- /util/sized_iterator_test.cc: -------------------------------------------------------------------------------- 1 | #include "util/sized_iterator.hh" 2 | 3 | #define BOOST_TEST_MODULE SizedIteratorTest 4 | #include 5 | 6 | namespace util { namespace { 7 | 8 | BOOST_AUTO_TEST_CASE(swap_works) { 9 | char str[2] = { 0, 1 }; 10 | SizedProxy first(str, 1), second(str + 1, 1); 11 | swap(first, second); 12 | BOOST_CHECK_EQUAL(1, str[0]); 13 | BOOST_CHECK_EQUAL(0, str[1]); 14 | } 15 | 16 | }} // namespace anonymous util 17 | -------------------------------------------------------------------------------- /util/stream/Jamfile: -------------------------------------------------------------------------------- 1 | #if $(BOOST-VERSION) >= 104800 { 2 | # timer-link = /top//boost_timer ; 3 | #} else { 4 | # timer-link = ; 5 | #} 6 | 7 | fakelib stream : chain.cc io.cc line_input.cc multi_progress.cc ..//kenutil /top//boost_thread : : : /top//boost_thread ; 8 | 9 | import testing ; 10 | unit-test io_test : io_test.cc stream /top//boost_unit_test_framework ; 11 | unit-test stream_test : stream_test.cc stream /top//boost_unit_test_framework ; 12 | unit-test sort_test : sort_test.cc stream /top//boost_unit_test_framework ; 13 | -------------------------------------------------------------------------------- /util/stream/io_test.cc: -------------------------------------------------------------------------------- 1 | #include "util/stream/io.hh" 2 | 3 | #include "util/stream/chain.hh" 4 | #include "util/file.hh" 5 | 6 | #define BOOST_TEST_MODULE IOTest 7 | #include 8 | 9 | #include 10 | 11 | namespace util { namespace stream { namespace { 12 | 13 | BOOST_AUTO_TEST_CASE(CopyFile) { 14 | std::string temps("io_test_temp"); 15 | 16 | scoped_fd in(MakeTemp(temps)); 17 | for (uint64_t i = 0; i < 100000; ++i) { 18 | WriteOrThrow(in.get(), &i, sizeof(uint64_t)); 19 | } 20 | SeekOrThrow(in.get(), 0); 21 | scoped_fd out(MakeTemp(temps)); 22 | 23 | ChainConfig config; 24 | config.entry_size = 8; 25 | config.total_memory = 1024; 26 | config.block_count = 10; 27 | 28 | Chain(config) >> PRead(in.get()) >> Write(out.get()); 29 | 30 | SeekOrThrow(out.get(), 0); 31 | for (uint64_t i = 0; i < 100000; ++i) { 32 | uint64_t got; 33 | ReadOrThrow(out.get(), &got, sizeof(uint64_t)); 34 | BOOST_CHECK_EQUAL(i, got); 35 | } 36 | } 37 | 38 | }}} // namespaces 39 | -------------------------------------------------------------------------------- /util/stream/line_input.hh: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_STREAM_LINE_INPUT_H 2 | #define UTIL_STREAM_LINE_INPUT_H 3 | namespace util {namespace stream { 4 | 5 | class ChainPosition; 6 | 7 | /* Worker that reads input into blocks, ensuring that blocks contain whole 8 | * lines. Assumes that the maximum size of a line is less than the block size 9 | */ 10 | class LineInput { 11 | public: 12 | // Takes ownership upon thread execution. 13 | explicit LineInput(int fd); 14 | 15 | void Run(const ChainPosition &position); 16 | 17 | private: 18 | int fd_; 19 | }; 20 | 21 | }} // namespaces 22 | #endif // UTIL_STREAM_LINE_INPUT_H 23 | -------------------------------------------------------------------------------- /util/stream/stream_test.cc: -------------------------------------------------------------------------------- 1 | #include "util/stream/io.hh" 2 | 3 | #include "util/stream/stream.hh" 4 | #include "util/file.hh" 5 | 6 | #define BOOST_TEST_MODULE StreamTest 7 | #include 8 | 9 | #include 10 | 11 | namespace util { namespace stream { namespace { 12 | 13 | BOOST_AUTO_TEST_CASE(StreamTest) { 14 | scoped_fd in(MakeTemp("io_test_temp")); 15 | for (uint64_t i = 0; i < 100000; ++i) { 16 | WriteOrThrow(in.get(), &i, sizeof(uint64_t)); 17 | } 18 | SeekOrThrow(in.get(), 0); 19 | 20 | ChainConfig config; 21 | config.entry_size = 8; 22 | config.total_memory = 100; 23 | config.block_count = 12; 24 | 25 | Stream s; 26 | Chain chain(config); 27 | chain >> Read(in.get()) >> s >> kRecycle; 28 | uint64_t i = 0; 29 | for (; s; ++s, ++i) { 30 | BOOST_CHECK_EQUAL(i, *static_cast(s.Get())); 31 | } 32 | BOOST_CHECK_EQUAL(100000ULL, i); 33 | } 34 | 35 | }}} // namespaces 36 | -------------------------------------------------------------------------------- /util/stream/timer.hh: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_STREAM_TIMER_H 2 | #define UTIL_STREAM_TIMER_H 3 | 4 | // Sorry Jon, this was adding library dependencies in Moses and people complained. 5 | 6 | /*#include 7 | 8 | #if BOOST_VERSION >= 104800 9 | #include 10 | #define UTIL_TIMER(str) boost::timer::auto_cpu_timer timer(std::cerr, 1, (str)) 11 | #else 12 | //#warning Using Boost older than 1.48. Timing information will not be available.*/ 13 | #define UTIL_TIMER(str) 14 | //#endif 15 | 16 | #endif // UTIL_STREAM_TIMER_H 17 | -------------------------------------------------------------------------------- /util/tokenize_piece_test.cc: -------------------------------------------------------------------------------- 1 | #include "util/tokenize_piece.hh" 2 | #include "util/string_piece.hh" 3 | 4 | #define BOOST_TEST_MODULE TokenIteratorTest 5 | #include 6 | 7 | #include 8 | 9 | namespace util { 10 | namespace { 11 | 12 | BOOST_AUTO_TEST_CASE(pipe_pipe_none) { 13 | const char str[] = "nodelimit at all"; 14 | TokenIter it(str, MultiCharacter("|||")); 15 | BOOST_REQUIRE(it); 16 | BOOST_CHECK_EQUAL(StringPiece(str), *it); 17 | ++it; 18 | BOOST_CHECK(!it); 19 | } 20 | BOOST_AUTO_TEST_CASE(pipe_pipe_two) { 21 | const char str[] = "|||"; 22 | TokenIter it(str, MultiCharacter("|||")); 23 | BOOST_REQUIRE(it); 24 | BOOST_CHECK_EQUAL(StringPiece(), *it); 25 | ++it; 26 | BOOST_REQUIRE(it); 27 | BOOST_CHECK_EQUAL(StringPiece(), *it); 28 | ++it; 29 | BOOST_CHECK(!it); 30 | } 31 | 32 | BOOST_AUTO_TEST_CASE(remove_empty) { 33 | const char str[] = "|||"; 34 | TokenIter it(str, MultiCharacter("|||")); 35 | BOOST_CHECK(!it); 36 | } 37 | 38 | BOOST_AUTO_TEST_CASE(remove_empty_keep) { 39 | const char str[] = " |||"; 40 | TokenIter it(str, MultiCharacter("|||")); 41 | BOOST_REQUIRE(it); 42 | BOOST_CHECK_EQUAL(StringPiece(" "), *it); 43 | ++it; 44 | BOOST_CHECK(!it); 45 | } 46 | 47 | } // namespace 48 | } // namespace util 49 | -------------------------------------------------------------------------------- /util/usage.hh: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_USAGE_H 2 | #define UTIL_USAGE_H 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | namespace util { 10 | // Time in seconds since process started. Zero on unsupported platforms. 11 | double WallTime(); 12 | 13 | void PrintUsage(std::ostream &to); 14 | 15 | // Determine how much physical memory there is. Return 0 on failure. 16 | uint64_t GuessPhysicalMemory(); 17 | 18 | // Parse a size like unix sort. Sadly, this means the default multiplier is K. 19 | uint64_t ParseSize(const std::string &arg); 20 | } // namespace util 21 | #endif // UTIL_USAGE_H 22 | -------------------------------------------------------------------------------- /util/vocab_main.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int main() { 7 | boost::unordered_set vocab; 8 | std::string word; 9 | while (std::cin >> word) { 10 | if (vocab.insert(word).second) std::cout << word << '\n'; 11 | } 12 | if (!std::cin.eof()) { 13 | std::cerr << "Error reading" << std::endl; 14 | return 1; 15 | } 16 | return 0; 17 | } 18 | --------------------------------------------------------------------------------