├── .gitignore
├── COPYING
├── COPYING.LESSER
├── Jamroot
├── LICENSE
├── MEMT
    ├── Alignment
    │   ├── CherryPick.cc
    │   ├── Everything.cc
    │   ├── Jamfile
    │   ├── MMBRMatcherMEMT.java
    │   ├── MatcherMEMT.java
    │   ├── NBest.cc
    │   ├── NBest.hh
    │   ├── Stemmer.cc
    │   ├── Stemmer.hh
    │   ├── Summarize.cc
    │   ├── compile.sh
    │   └── match.sh
    ├── Controller
    │   ├── CommandLine.cc
    │   ├── CommandLine.hh
    │   ├── Config.hh
    │   ├── Connection.hh
    │   ├── ConnectionHandler.hh
    │   ├── CoordWrite.hh
    │   ├── DecoderHandler.hh
    │   ├── Jamfile
    │   ├── Main.cc
    │   ├── OutputHandler.hh
    │   ├── Sentence.hh
    │   ├── Sentence
    │   │   ├── Config.hh
    │   │   ├── Jamfile
    │   │   ├── Options.cc
    │   │   └── Options.hh
    │   ├── Server.hh
    │   └── Worker.hh
    ├── Decoder
    │   ├── Completed.hh
    │   ├── Config.cc
    │   ├── Config.hh
    │   ├── EndBeam.cc
    │   ├── EndBeam.hh
    │   ├── History.hh
    │   ├── HistoryBeam.cc
    │   ├── HistoryBeam.hh
    │   ├── Hypothesis.hh
    │   ├── Implementation.hh
    │   ├── InternalBeam.hh
    │   ├── Jamfile
    │   ├── Options.cc
    │   ├── Options.hh
    │   ├── Score.cc
    │   └── Score.hh
    ├── Feature
    │   ├── Base
    │   │   ├── Process.hh
    │   │   └── Sign.hh
    │   ├── LM
    │   │   ├── Config.cc
    │   │   ├── Config.hh
    │   │   ├── Hypothesis.hh
    │   │   ├── Jamfile
    │   │   ├── Options.cc
    │   │   ├── Options.hh
    │   │   ├── Process.hh
    │   │   ├── Sentence.cc
    │   │   └── Sentence.hh
    │   ├── Length
    │   │   ├── Config.hh
    │   │   ├── Hypothesis.hh
    │   │   ├── Process.hh
    │   │   └── Sentence.hh
    │   ├── Scorer
    │   │   ├── Config.cc
    │   │   ├── Config.hh
    │   │   ├── Fuzz.cc
    │   │   ├── Fuzz.hh
    │   │   ├── Hypothesis.hh
    │   │   ├── Jamfile
    │   │   ├── Options.cc
    │   │   ├── Options.hh
    │   │   └── Sentence.hh
    │   └── Verbatim
    │   │   ├── Config.hh
    │   │   ├── Hypothesis.hh
    │   │   ├── Jamfile
    │   │   ├── Options.cc
    │   │   ├── Options.hh
    │   │   ├── Process.hh
    │   │   ├── Sentence.cc
    │   │   └── Sentence.hh
    ├── Input
    │   ├── AlignType.cc
    │   ├── AlignType.hh
    │   ├── Alignment.cc
    │   ├── Alignment.hh
    │   ├── Capitalization.cc
    │   ├── Capitalization.hh
    │   ├── Config.hh
    │   ├── Dump.cc
    │   ├── Factory.hh
    │   ├── Format.cc
    │   ├── Format.hh
    │   ├── Input.hh
    │   ├── Jamfile
    │   ├── Location.hh
    │   ├── Options.cc
    │   ├── Options.hh
    │   ├── Read.cc
    │   ├── Read.hh
    │   ├── ReadDispatcher.cc
    │   ├── ReadDispatcher.hh
    │   ├── ReadFromJava.cc
    │   ├── ReadFromJava.hh
    │   ├── Same.cc
    │   ├── Same.hh
    │   ├── Text.cc
    │   ├── Text.hh
    │   ├── Transitive.cc
    │   ├── Transitive.hh
    │   └── Word.hh
    ├── Jamfile
    ├── Output
    │   ├── Config.hh
    │   ├── Jamfile
    │   ├── NBest.cc
    │   ├── NBest.hh
    │   ├── NullBeamDumper.hh
    │   ├── Options.cc
    │   ├── Options.hh
    │   ├── StderrBeamDumper.hh
    │   ├── ToString.cc
    │   └── ToString.hh
    ├── README
    ├── Strategy
    │   ├── Graph
    │   │   ├── Config.hh
    │   │   ├── Coverage
    │   │   │   ├── Config.hh
    │   │   │   ├── Hypothesis.hh
    │   │   │   └── Sentence.hh
    │   │   ├── Hypothesis.hh
    │   │   ├── Jamfile
    │   │   ├── Options.cc
    │   │   ├── Options.hh
    │   │   └── Sentence.hh
    │   ├── Horizon
    │   │   ├── Config.hh
    │   │   ├── Horizon.cc
    │   │   ├── Horizon.hh
    │   │   ├── Hypothesis.cc
    │   │   ├── Hypothesis.hh
    │   │   ├── Jamfile
    │   │   ├── Options.cc
    │   │   └── Options.hh
    │   ├── Legacy
    │   │   ├── Config.hh
    │   │   ├── Hypothesis.hh
    │   │   ├── Jamfile
    │   │   ├── Options.cc
    │   │   ├── Options.hh
    │   │   └── Sentence.hh
    │   └── Phrase
    │   │   ├── Aligned.cc
    │   │   ├── Aligned.hh
    │   │   ├── Hypothesis.hh
    │   │   ├── Jamfile
    │   │   ├── Phrase.cc
    │   │   ├── Phrase.hh
    │   │   ├── Punctuation.cc
    │   │   ├── Punctuation.hh
    │   │   ├── Type.cc
    │   │   └── Type.hh
    └── scripts
    │   ├── decode.rb
    │   ├── experiment
    │       ├── decode_subgenre.sh
    │       ├── en.sh
    │       ├── match.rb
    │       ├── preprocess.sh
    │       ├── qsub.sh
    │       ├── run.rb
    │       ├── status.rb
    │       └── stripsgml.rb
    │   ├── make_filter_vocab.rb
    │   ├── match.rb
    │   ├── nbest_first.rb
    │   ├── server.sh
    │   ├── shell_escape.rb
    │   ├── simple_decode.rb
    │   ├── util.rb
    │   └── zmert
    │       ├── decoder.rb
    │       ├── format.rb
    │       ├── fuzz.rb
    │       ├── run.rb
    │       └── zmert.rb
├── README
├── Utilities
    ├── Input
    │   ├── select_gale_docs.rb
    │   ├── select_nist_docs.rb
    │   └── unescape.rb
    ├── Output
    │   ├── Jamfile
    │   ├── generateSGMLfromText.perl
    │   ├── nist_rewrap.rb
    │   ├── postprocess-rem-nonascii.pl
    │   ├── postprocess.pl
    │   ├── remove_nonlatin.cc
    │   └── replace-oovs-from-giza.pl
    ├── Tokenization
    │   └── Moses
    │   │   ├── README
    │   │   └── detokenizer.perl
    ├── Tuning
    │   └── zmert.jar
    ├── queue.rb
    └── scoring
    │   ├── INSTALL
    │   ├── LICENSE
    │   ├── README
    │   ├── interlace.rb
    │   ├── lib
    │       ├── length.rb
    │       ├── metaify.rb
    │       ├── meteorify.rb
    │       ├── nistify.rb
    │       ├── postprocess.pl
    │       ├── shell_escape.rb
    │       └── terrify.rb
    │   ├── mteval-v13.pl
    │   ├── score.rb
    │   └── setup.sh
├── bjam
├── install
    ├── README
    ├── ant.sh
    ├── apache-ant-1.7.1-bin.tar.bz2.md5
    ├── apache-ant-1.7.1-bin.tar.bz2.sha1
    ├── boost.sh
    ├── boost_1_49_0.tar.bz2.md5
    ├── boost_1_49_0.tar.bz2.sha1
    ├── checksum.sh
    ├── environment.bash
    ├── environment.tcsh
    ├── icu.sh
    ├── icu4c-4_6_1-src.tgz.md5
    ├── icu4c-4_6_1-src.tgz.sha1
    ├── install.sh
    ├── lib.sh
    ├── ruby-1.9.1-p376.tar.gz.md5
    ├── ruby-1.9.1-p376.tar.gz.sha1
    ├── ruby-1.9.2-p0.tar.gz.md5
    ├── ruby-1.9.2-p0.tar.gz.sha1
    ├── ruby.sh
    ├── tercom-0.7.25.tgz.md5
    ├── tercom-0.7.25.tgz.sha1
    ├── tercom.sh
    └── zmert.sh
├── jam-files
    ├── LICENSE_1_0.txt
    ├── boost-build
    │   ├── boost-build.jam
    │   ├── bootstrap.jam
    │   ├── build-system.jam
    │   ├── build
    │   │   ├── ac.jam
    │   │   ├── alias.jam
    │   │   ├── build-request.jam
    │   │   ├── config-cache.jam
    │   │   ├── configure.jam
    │   │   ├── feature.jam
    │   │   ├── generators.jam
    │   │   ├── project.jam
    │   │   ├── property-set.jam
    │   │   ├── property.jam
    │   │   ├── readme.txt
    │   │   ├── scanner.jam
    │   │   ├── targets.jam
    │   │   ├── toolset.jam
    │   │   ├── type.jam
    │   │   ├── version.jam
    │   │   └── virtual-target.jam
    │   ├── kernel
    │   │   ├── boost-build.jam
    │   │   ├── bootstrap.jam
    │   │   ├── class.jam
    │   │   ├── errors.jam
    │   │   └── modules.jam
    │   ├── options
    │   │   └── help.jam
    │   ├── site-config.jam
    │   ├── tools
    │   │   ├── acc.jam
    │   │   ├── auto-index.jam
    │   │   ├── bison.jam
    │   │   ├── boostbook-config.jam
    │   │   ├── boostbook.jam
    │   │   ├── borland.jam
    │   │   ├── builtin.jam
    │   │   ├── cast.jam
    │   │   ├── clang-darwin.jam
    │   │   ├── clang-linux.jam
    │   │   ├── clang.jam
    │   │   ├── common.jam
    │   │   ├── como-linux.jam
    │   │   ├── como-win.jam
    │   │   ├── como.jam
    │   │   ├── convert.jam
    │   │   ├── cray.jam
    │   │   ├── cw-config.jam
    │   │   ├── cw.jam
    │   │   ├── darwin.jam
    │   │   ├── dmc.jam
    │   │   ├── docutils.jam
    │   │   ├── doxygen-config.jam
    │   │   ├── doxygen.jam
    │   │   ├── doxygen
    │   │   │   ├── windows-paths-check.doxyfile
    │   │   │   └── windows-paths-check.hpp
    │   │   ├── fop.jam
    │   │   ├── fortran.jam
    │   │   ├── gcc.jam
    │   │   ├── generate.jam
    │   │   ├── gettext.jam
    │   │   ├── gfortran.jam
    │   │   ├── hp_cxx.jam
    │   │   ├── hpfortran.jam
    │   │   ├── ifort.jam
    │   │   ├── intel-darwin.jam
    │   │   ├── intel-linux.jam
    │   │   ├── intel-win.jam
    │   │   ├── intel.jam
    │   │   ├── jpeg.jam
    │   │   ├── lex.jam
    │   │   ├── make.jam
    │   │   ├── mc.jam
    │   │   ├── message.jam
    │   │   ├── midl.jam
    │   │   ├── mipspro.jam
    │   │   ├── mpi.jam
    │   │   ├── msvc-config.jam
    │   │   ├── msvc.jam
    │   │   ├── notfile.jam
    │   │   ├── package.jam
    │   │   ├── pathscale.jam
    │   │   ├── pch.jam
    │   │   ├── pgi.jam
    │   │   ├── png.jam
    │   │   ├── python-config.jam
    │   │   ├── python.jam
    │   │   ├── qcc.jam
    │   │   ├── qt.jam
    │   │   ├── qt3.jam
    │   │   ├── qt4.jam
    │   │   ├── qt5.jam
    │   │   ├── quickbook-config.jam
    │   │   ├── quickbook.jam
    │   │   ├── rc.jam
    │   │   ├── stage.jam
    │   │   ├── stlport.jam
    │   │   ├── sun.jam
    │   │   ├── symlink.jam
    │   │   ├── testing-aux.jam
    │   │   ├── testing.jam
    │   │   ├── tiff.jam
    │   │   ├── types
    │   │   │   ├── asm.jam
    │   │   │   ├── cpp.jam
    │   │   │   ├── exe.jam
    │   │   │   ├── html.jam
    │   │   │   ├── lib.jam
    │   │   │   ├── obj.jam
    │   │   │   ├── objc.jam
    │   │   │   ├── preprocessed.jam
    │   │   │   ├── qt.jam
    │   │   │   ├── register.jam
    │   │   │   └── rsp.jam
    │   │   ├── unix.jam
    │   │   ├── vacpp.jam
    │   │   ├── whale.jam
    │   │   ├── xlf.jam
    │   │   ├── xsltproc-config.jam
    │   │   ├── xsltproc.jam
    │   │   ├── xsltproc
    │   │   │   ├── included.xsl
    │   │   │   ├── test.xml
    │   │   │   └── test.xsl
    │   │   └── zlib.jam
    │   ├── user-config.jam
    │   └── util
    │   │   ├── assert.jam
    │   │   ├── container.jam
    │   │   ├── doc.jam
    │   │   ├── indirect.jam
    │   │   ├── numbers.jam
    │   │   ├── option.jam
    │   │   ├── order.jam
    │   │   ├── os.jam
    │   │   ├── path.jam
    │   │   ├── print.jam
    │   │   ├── regex.jam
    │   │   ├── sequence.jam
    │   │   ├── set.jam
    │   │   ├── string.jam
    │   │   └── utility.jam
    ├── engine
    │   ├── Jambase
    │   ├── boost-jam.spec
    │   ├── boost-no-inspect
    │   ├── build.bat
    │   ├── build.jam
    │   ├── build.sh
    │   ├── builtins.c
    │   ├── builtins.h
    │   ├── bump_version.py
    │   ├── class.c
    │   ├── class.h
    │   ├── command.c
    │   ├── command.h
    │   ├── compile.c
    │   ├── compile.h
    │   ├── constants.c
    │   ├── constants.h
    │   ├── cwd.c
    │   ├── cwd.h
    │   ├── debian
    │   │   ├── changelog
    │   │   ├── control
    │   │   ├── copyright
    │   │   ├── jam.man.sgml
    │   │   └── rules
    │   ├── debug.c
    │   ├── debug.h
    │   ├── execcmd.c
    │   ├── execcmd.h
    │   ├── execnt.c
    │   ├── execunix.c
    │   ├── filent.c
    │   ├── filesys.c
    │   ├── filesys.h
    │   ├── fileunix.c
    │   ├── frames.c
    │   ├── frames.h
    │   ├── function.c
    │   ├── function.h
    │   ├── glob.c
    │   ├── hash.c
    │   ├── hash.h
    │   ├── hcache.c
    │   ├── hcache.h
    │   ├── hdrmacro.c
    │   ├── hdrmacro.h
    │   ├── headers.c
    │   ├── headers.h
    │   ├── jam.c
    │   ├── jam.h
    │   ├── jambase.c
    │   ├── jambase.h
    │   ├── jamgram.c
    │   ├── jamgram.h
    │   ├── jamgram.y
    │   ├── jamgram.yy
    │   ├── jamgramtab.h
    │   ├── lists.c
    │   ├── lists.h
    │   ├── make.c
    │   ├── make.h
    │   ├── make1.c
    │   ├── md5.c
    │   ├── md5.h
    │   ├── mem.c
    │   ├── mem.h
    │   ├── mkjambase.c
    │   ├── modules.c
    │   ├── modules.h
    │   ├── modules
    │   │   ├── order.c
    │   │   ├── path.c
    │   │   ├── property-set.c
    │   │   ├── readme.txt
    │   │   ├── regex.c
    │   │   ├── sequence.c
    │   │   └── set.c
    │   ├── native.c
    │   ├── native.h
    │   ├── object.c
    │   ├── object.h
    │   ├── option.c
    │   ├── option.h
    │   ├── output.c
    │   ├── output.h
    │   ├── parse.c
    │   ├── parse.h
    │   ├── patchlevel.h
    │   ├── pathnt.c
    │   ├── pathsys.c
    │   ├── pathsys.h
    │   ├── pathunix.c
    │   ├── regexp.c
    │   ├── regexp.h
    │   ├── rules.c
    │   ├── rules.h
    │   ├── scan.c
    │   ├── scan.h
    │   ├── search.c
    │   ├── search.h
    │   ├── strings.c
    │   ├── strings.h
    │   ├── subst.c
    │   ├── subst.h
    │   ├── timestamp.c
    │   ├── timestamp.h
    │   ├── variable.c
    │   ├── variable.h
    │   ├── w32_getreg.c
    │   └── yyacc.c
    ├── fail
    │   └── Jamroot
    └── sanity.jam
├── lm
    ├── Jamfile
    ├── bhiksha.cc
    ├── bhiksha.hh
    ├── binary_format.cc
    ├── binary_format.hh
    ├── blank.hh
    ├── build_binary_main.cc
    ├── builder
    │   ├── Jamfile
    │   ├── README.md
    │   ├── TODO
    │   ├── adjust_counts.cc
    │   ├── adjust_counts.hh
    │   ├── adjust_counts_test.cc
    │   ├── corpus_count.cc
    │   ├── corpus_count.hh
    │   ├── corpus_count_test.cc
    │   ├── discount.hh
    │   ├── dump_counts_main.cc
    │   ├── hash_gamma.hh
    │   ├── header_info.hh
    │   ├── initial_probabilities.cc
    │   ├── initial_probabilities.hh
    │   ├── interpolate.cc
    │   ├── interpolate.hh
    │   ├── joint_order.hh
    │   ├── lmplz_main.cc
    │   ├── ngram.hh
    │   ├── ngram_stream.hh
    │   ├── output.cc
    │   ├── output.hh
    │   ├── pipeline.cc
    │   ├── pipeline.hh
    │   ├── print.cc
    │   ├── print.hh
    │   └── sort.hh
    ├── config.cc
    ├── config.hh
    ├── enumerate_vocab.hh
    ├── facade.hh
    ├── filter
    │   ├── Jamfile
    │   ├── arpa_io.cc
    │   ├── arpa_io.hh
    │   ├── count_io.hh
    │   ├── filter_main.cc
    │   ├── format.hh
    │   ├── phrase.cc
    │   ├── phrase.hh
    │   ├── phrase_table_vocab_main.cc
    │   ├── thread.hh
    │   ├── vocab.cc
    │   ├── vocab.hh
    │   └── wrapper.hh
    ├── fragment_main.cc
    ├── kenlm_benchmark_main.cc
    ├── left.hh
    ├── left_test.cc
    ├── lm_exception.cc
    ├── lm_exception.hh
    ├── max_order.hh
    ├── model.cc
    ├── model.hh
    ├── model_test.cc
    ├── model_type.hh
    ├── ngram_query.hh
    ├── partial.hh
    ├── partial_test.cc
    ├── quantize.cc
    ├── quantize.hh
    ├── query_main.cc
    ├── read_arpa.cc
    ├── read_arpa.hh
    ├── return.hh
    ├── search_hashed.cc
    ├── search_hashed.hh
    ├── search_trie.cc
    ├── search_trie.hh
    ├── sizes.cc
    ├── sizes.hh
    ├── state.hh
    ├── test.arpa
    ├── test_nounk.arpa
    ├── trie.cc
    ├── trie.hh
    ├── trie_sort.cc
    ├── trie_sort.hh
    ├── value.hh
    ├── value_build.cc
    ├── value_build.hh
    ├── virtual_interface.cc
    ├── virtual_interface.hh
    ├── vocab.cc
    ├── vocab.hh
    ├── weights.hh
    └── word_index.hh
└── util
    ├── Jamfile
    ├── barrier.hh
    ├── bit_packing.cc
    ├── bit_packing.hh
    ├── bit_packing_test.cc
    ├── bounded_i_stream.hh
    ├── cat_compressed_main.cc
    ├── cat_range_main.cc
    ├── debug.hh
    ├── double-conversion
        ├── Jamfile
        ├── LICENSE
        ├── bignum-dtoa.cc
        ├── bignum-dtoa.h
        ├── bignum.cc
        ├── bignum.h
        ├── cached-powers.cc
        ├── cached-powers.h
        ├── diy-fp.cc
        ├── diy-fp.h
        ├── double-conversion.cc
        ├── double-conversion.h
        ├── fast-dtoa.cc
        ├── fast-dtoa.h
        ├── fixed-dtoa.cc
        ├── fixed-dtoa.h
        ├── ieee.h
        ├── strtod.cc
        ├── strtod.h
        └── utils.h
    ├── ersatz_progress.cc
    ├── ersatz_progress.hh
    ├── exception.cc
    ├── exception.hh
    ├── fake_ofstream.hh
    ├── file.cc
    ├── file.hh
    ├── file_piece.cc
    ├── file_piece.hh
    ├── file_piece_test.cc
    ├── fixed_array.hh
    ├── getopt.c
    ├── getopt.hh
    ├── hash_fusion.hh
    ├── hash_fusion_test.cc
    ├── hash_output.hh
    ├── hash_output_test.cc
    ├── have.hh
    ├── joint_sort.hh
    ├── joint_sort_test.cc
    ├── latex_escape.cc
    ├── latex_escape.hh
    ├── log_num.hh
    ├── log_num_test.cc
    ├── lower_main.cc
    ├── mmap.cc
    ├── mmap.hh
    ├── multi_intersection.hh
    ├── multi_intersection_test.cc
    ├── murmur_hash.cc
    ├── murmur_hash.hh
    ├── n_best.hh
    ├── n_best_test.cc
    ├── numbers.hh
    ├── options.cc
    ├── options.hh
    ├── parallel_read.cc
    ├── parallel_read.hh
    ├── pcqueue.hh
    ├── pcqueue_test.cc
    ├── pool.cc
    ├── pool.hh
    ├── print_concurrency_main.cc
    ├── probing_hash_table.hh
    ├── probing_hash_table_test.cc
    ├── proxy_iterator.hh
    ├── read_compressed.cc
    ├── read_compressed.hh
    ├── read_compressed_test.cc
    ├── scoped.cc
    ├── scoped.hh
    ├── sized_iterator.hh
    ├── sized_iterator_test.cc
    ├── socket_concurrent_iostream.hh
    ├── sorted_uniform.hh
    ├── sorted_uniform_test.cc
    ├── stream
        ├── Jamfile
        ├── block.hh
        ├── chain.cc
        ├── chain.hh
        ├── config.hh
        ├── io.cc
        ├── io.hh
        ├── io_test.cc
        ├── line_input.cc
        ├── line_input.hh
        ├── multi_progress.cc
        ├── multi_progress.hh
        ├── multi_stream.hh
        ├── sort.hh
        ├── sort_test.cc
        ├── stream.hh
        ├── stream_test.cc
        └── timer.hh
    ├── string_piece.cc
    ├── string_piece.hh
    ├── string_piece_hash.hh
    ├── thread_pool.hh
    ├── tokenize_piece.hh
    ├── tokenize_piece_test.cc
    ├── usage.cc
    ├── usage.hh
    ├── utf8.cc
    ├── utf8.hh
    ├── utf8_test.cc
    └── vocab_main.cc


/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | bin
3 | *.so
4 | lm/build_binary
5 | lm/query
6 | previous.sh
7 | 


--------------------------------------------------------------------------------
/Jamroot:
--------------------------------------------------------------------------------
 1 | import option ;
 2 | import modules ;
 3 | import path ;
 4 | path-constant TOP : . ;
 5 | include $(TOP)/jam-files/sanity.jam ;
 6 | 
 7 | boost 104200 ;
 8 | external-lib z ;
 9 | 
10 | project : requirements <threading>multi <address-model>64 <warnings>on $(requirements) <include>. ;
11 | project : default-build release ;
12 | 
13 | use-project /util : util ;
14 | use-project /lm : lm ;
15 | 
16 | build-project lm ;
17 | build-project util ;
18 | build-project MEMT ;
19 | 
20 | install-bin-libs MEMT/Controller//MEMT MEMT/Input//Dump MEMT/Alignment//SummarizeAlignment lm//query lm//build_binary lm/filter//filter util//programs ;
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     Avenue code is free software: you can redistribute it and/or modify
 2 |     it under the terms of the GNU Lesser General Public License as published
 3 |     by the Free Software Foundation, either version 3 of the License, or
 4 |     (at your option) any later version.
 5 | 
 6 |     Avenue code is distributed in the hope that it will be useful,
 7 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 9 |     GNU Lesser General Public License for more details.
10 | 
11 |     You should have received a copy of the GNU Lesser General Public License
12 |     along with Avenue code.  If not, see <http://www.gnu.org/licenses/>.
13 | 
14 | Most of the code here is licensed under the LGPL.  There are exceptions which have their own licenses, listed below.  You may not have been provided with some of these directories or files.  
15 | 
16 | jam-files contains Boost Jam.  See the Boost license in that directory.  
17 | 
18 | install contains scripts to download and install software, but not the software itself.  Downloaded software has its own license.  
19 | 
20 | Utilities/scoring contains scripts that download and install metrics.  This license covers METEOR, but not the other metrics.  
21 | 
22 | Utilities/Tuning/zmert comes from Joshua.  Joshua is LGPL.  
23 | 
24 | Utilities/Tokenization comes from Moses and Moses is LGPL.  
25 | 
26 | util/string_piece.hh , util/string_piece.cc , and util/google-sparsehash is Google code and contains its own license.  
27 | 


--------------------------------------------------------------------------------
/MEMT/Alignment/CherryPick.cc:
--------------------------------------------------------------------------------
 1 | /* Pick a short sentence pair containing all alignment types. */
 2 | #include "MEMT/Input/AlignType.hh"
 3 | #include "MEMT/Input/Input.hh"
 4 | #include "MEMT/Input/Format.hh"
 5 | #include "MEMT/Input/ReadFromJava.hh"
 6 | 
 7 | #include <iostream>
 8 | #include <string>
 9 | #include <vector>
10 | 
11 | int main() {
12 |   const input::AlignType kWanted = input::AL_EXACT | input::AL_WN_SYNONYMY | input::AL_SNOWBALL_STEM | input::AL_PARAPHRASE;
13 |   const unsigned int kMaxLength = 12;
14 |   input::Input input;
15 |   for (unsigned int v = 0; ; ++v) {
16 |     try {
17 |       input::ReadFromJava(std::cin, input);
18 |     }
19 |     catch (std::ios_base::failure &f) {
20 |       if (std::cin.eof()) break;
21 |       throw;
22 |     }
23 |     for (unsigned int e = 0; e < input.engines.size(); ++e) {
24 |       for (unsigned int f = e + 1; f < input.engines.size(); ++f) {
25 |         input::AlignType got = 0;
26 |         for (unsigned int w = 0; w < input.engines[e].words.size(); ++w) {
27 |           got |= input.GetWord(e, w).alignments.Ask(f).type;
28 |         }
29 |         if (((got & kWanted) == kWanted) && (input.engines[e].words.size() < kMaxLength) && (input.engines[f].words.size() < kMaxLength)) {
30 |           LaTeXAlignment(std::cout, "System 1", input.engines[e], "System 2", input.engines[f]);
31 |         }
32 |       }
33 |     }
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/MEMT/Alignment/Jamfile:
--------------------------------------------------------------------------------
1 | exe SummarizeAlignment : Summarize.cc ../Input//input ;
2 | exe CherryPick : CherryPick.cc ../Input//input ;
3 | 


--------------------------------------------------------------------------------
/MEMT/Alignment/NBest.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Alignment/NBest.hh"
 2 | 
 3 | #include <assert.h>
 4 | #include <ctype.h>
 5 | 
 6 | namespace input {
 7 | 
 8 | NBestException::NBestException() throw() {}
 9 | NBestException::~NBestException() throw() {}
10 | 
11 | NBestReader::NBestReader(const char *file) : file_(file) {
12 |   ReadSegmentID();
13 | }
14 | 
15 | namespace {
16 | void SkipSpaces(StringPiece &str) {
17 |   while (str.size() && isspace(*str.data()))
18 |     str.set(str.data() + 1, str.size() - 1);
19 | }
20 | } // namespace
21 | 
22 | bool NBestReader::ReadEntry(unsigned int segment, StringPiece &out) {
23 |   if (segment < next_segment_) return false;
24 |   assert(segment == next_segment_);
25 |   out = file_.ReadLine();
26 |   SkipSpaces(out);
27 |   if (out.size() < 3 || out.substr(0, 3) != "|||")
28 |     UTIL_THROW(NBestException, "Three pipes missing in " << out);
29 |   SkipSpaces(out);
30 |   // Argh no portable strnstr or find
31 |   for (const char *i = out.data(); i < out.data() + out.size() - 2; ++i) {
32 |     if (*i == '|' && *(i+1) == '|' && *(i+2) == '|') {
33 |       out.set(out.data(), i - out.data());
34 |       break;
35 |     }
36 |   }
37 |   ReadSegmentID();
38 |   return true;
39 | }
40 | 
41 | void NBestReader::ReadSegmentID() {
42 |   try {
43 |     next_segment_ = file_.ReadULong();
44 |   } catch(const util::EndOfFileException &e) {
45 |     next_segment_ = std::numeric_limits<unsigned int>::max();
46 |   }
47 | }
48 | 
49 | } // namespace input
50 | 


--------------------------------------------------------------------------------
/MEMT/Alignment/NBest.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Alignment_NBest_h
 2 | #define _MEMT_Alignment_NBest_h
 3 | 
 4 | #include "util/exception.hh"
 5 | #include "util/file_piece.hh"
 6 | 
 7 | #include <limits>
 8 | 
 9 | namespace input {
10 | 
11 | class NBestException : public util::Exception {
12 |   public:
13 |     NBestException() throw();
14 |     ~NBestException() throw();
15 | };
16 | 
17 | class NBestReader {
18 |   public:
19 |     explicit NBestReader(const char *file);
20 | 
21 |     bool ReadEntry(unsigned int segment, StringPiece &out);
22 | 
23 |     bool Ended() const {
24 |       return next_segment_ == std::numeric_limits<unsigned int>::max();
25 |     }
26 | 
27 |   private:
28 |     void ReadSegmentID();
29 | 
30 |     util::FilePiece file_;
31 | 
32 |     unsigned int next_segment_;
33 | };
34 | 
35 | } // namespace input
36 | 
37 | #endif // _MEMT_Alignment_NBest_h
38 | 


--------------------------------------------------------------------------------
/MEMT/Alignment/Stemmer.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Alignment/Stemmer.hh"
 2 | #include "util/exception.hh"
 3 | 
 4 | #include "MEMT/Alignment/libstemmer_c/include/libstemmer.h"
 5 | 
 6 | namespace input {
 7 | 
 8 | SnowballWrap::SnowballWrap(const char *language) : stemmer_(sb_stemmer_new(language, NULL)) {
 9 |   if (!stemmer_) UTIL_THROW(util::Exception, "Failed to create stemmer for " << language);
10 | }
11 | 
12 | SnowballWrap::~SnowballWrap() {
13 |   if (stemmer_) sb_stemmer_delete(stemmer_);
14 | }
15 | 
16 | StringPiece SnowballWrap::Stem(const StringPiece &word) {
17 |   // Snowball likes const unsigned char.  StringPiece likes const char.  
18 |   const char *data = reinterpret_cast<const char*>(sb_stemmer_stem(stemmer_, reinterpret_cast<const unsigned char*>(word.data()), word.size()));
19 |   if (!data) UTIL_THROW(util::Exception, "Stemming " << word << " returned NULL.");
20 |   return StringPiece(data, sb_stemmer_length(stemmer_));
21 | }
22 | 
23 | } // namespace input
24 | 


--------------------------------------------------------------------------------
/MEMT/Alignment/Stemmer.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Alignment_Stemmer_h
 2 | #define _MEMT_Alignment_Stemmer_h
 3 | 
 4 | #include "util/string_piece.hh"
 5 | 
 6 | struct sb_stemmer;
 7 | 
 8 | namespace input {
 9 | 
10 | class SnowballWrap {
11 |   public:
12 |     explicit SnowballWrap(const char *language);
13 | 
14 |     ~SnowballWrap();
15 | 
16 |     // The returned StringPiece is invalidated after each call.  Sadly non-const.  
17 |     StringPiece Stem(const StringPiece &word);
18 | 
19 |   private:
20 |     sb_stemmer *stemmer_;
21 | };
22 | 
23 | } // namespace input
24 | 
25 | #endif // _MEMT_Alignment_Stemmer_h
26 | 


--------------------------------------------------------------------------------
/MEMT/Alignment/compile.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | loc="$(dirname $0)"
 3 | if [ ! -f "$loc"/../../Utilities/scoring/meteor-1.0/dist/meteor-1.0/meteor.jar ]; then
 4 |   pushd "$loc"/../../Utilities/scoring || exit 1
 5 |   ./setup.sh || exit 1
 6 |   popd || exit 1
 7 | fi
 8 | pushd "$loc" || exit 1
 9 | exec javac -cp ../../Utilities/scoring/meteor-1.0/dist/meteor-1.0/meteor.jar MatcherMEMT.java
10 | popd || exit 1
11 | 


--------------------------------------------------------------------------------
/MEMT/Alignment/match.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | loc="$(dirname $0)"
3 | if [ ! -f "$loc"/MatcherMEMT.class ]; then
4 |   "$loc"/compile.sh 1>&2 || exit 1
5 | fi
6 | exec java -Dfile.encoding=UTF8 -cp "$loc":"$loc"/../../Utilities/scoring/meteor-1.0/dist/meteor-1.0/meteor.jar MatcherMEMT "$@"
7 | 


--------------------------------------------------------------------------------
/MEMT/Controller/Config.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Controller_Config_h
 2 | #define _MEMT_Controller_Config_h
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | 
 7 | namespace controller {
 8 | struct SentenceTransitionConfig {
 9 |   size_t requests, decoder_workers;
10 | };
11 | 
12 | struct ConnectionTransitionConfig {
13 |   SentenceTransitionConfig sentence;
14 |   size_t connections;
15 |   size_t output_queue_size;
16 | };
17 | 
18 | struct LMConfig {
19 |   std::vector<std::string> file;
20 | };
21 | 
22 | struct ProcessConfig {
23 |   bool daemonize;
24 |   bool keep_open;
25 |   bool call_setsid;
26 |   std::string pidfile, portfile;
27 | };
28 | 
29 | struct ServiceConfig {
30 |   ConnectionTransitionConfig connection;
31 |   LMConfig lm;
32 |   ProcessConfig process;
33 |   unsigned short int port;
34 | };
35 | 
36 | } // namespace controller
37 | #endif // _MEMT_Controller_Config_h
38 | 


--------------------------------------------------------------------------------
/MEMT/Controller/CoordWrite.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Controller_CoordWrite_h
 2 | #define _MEMT_Controller_CoordWrite_h
 3 | 
 4 | #include <boost/thread/mutex.hpp>
 5 | 
 6 | #include <ostream>
 7 | 
 8 | // Coordinate writes so output is returned in blocks that the reader understands.
 9 | namespace controller {
10 | 
11 | class CoordStream {
12 |   public:
13 |     explicit CoordStream(std::ostream &stream) : stream_(stream) {}
14 | 
15 |   private:
16 |     friend class CoordWrite;
17 | 
18 |     std::ostream &stream_;
19 | 
20 |     boost::mutex mutex_;
21 | };
22 | 
23 | class CoordWrite {
24 |   public:
25 |     explicit CoordWrite(CoordStream &coord)
26 |       : stream_(coord.stream_), lock_(coord.mutex_) {
27 |     }
28 | 
29 |     std::ostream &Get() {  return stream_;  }
30 | 
31 |     std::ostream &operator*() { return stream_; }
32 |     std::ostream *operator->() { return &stream_; }
33 | 
34 |     operator std::ostream &() {
35 |       return stream_;
36 |     }
37 | 
38 |   private:
39 |     std::ostream &stream_;
40 |     boost::unique_lock<boost::mutex> lock_;
41 | };
42 | 
43 | } // namespace controller
44 | 
45 | #endif // _MEMT_Controller_CoordWrite_h
46 | 


--------------------------------------------------------------------------------
/MEMT/Controller/Jamfile:
--------------------------------------------------------------------------------
 1 | alias decoder_handler : ../Output//null_beam_dumper ../Decoder//decoder ../Strategy/Legacy//legacy ..//..//boost_thread ;
 2 | alias output_handler : ../Output//output ..//..//boost_thread ;
 3 | alias sentence : decoder_handler output_handler ../Decoder//completed ../Decoder//decoder_config ../Input//input /util//kenutil ;
 4 | 
 5 | alias connection : sentence Sentence//controller_sentence_options /util//kenutil ..//..//boost_system ;
 6 | 
 7 | fakelib command_line : CommandLine.cc Sentence//controller_sentence_options ..//..//boost_thread ..//..//boost_program_options ;
 8 | 
 9 | exe MEMT : Main.cc connection command_line ../Feature/Verbatim//verbatim_sentence ../Feature/LM//feature_lm_sentence /util//kenutil /lm//kenlm ;
10 | 


--------------------------------------------------------------------------------
/MEMT/Controller/Sentence/Config.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Controller_Sentence_Config_h
 2 | #define _MEMT_Controller_Sentence_Config_h
 3 | 
 4 | #include "MEMT/Input/Config.hh"
 5 | #include "MEMT/Decoder/Config.hh"
 6 | #include "MEMT/Strategy/Legacy/Config.hh"
 7 | #include "MEMT/Output/Config.hh"
 8 | 
 9 | namespace controller {
10 | namespace sentence {
11 | 
12 | struct Config {
13 |   size_t num_systems;
14 |   input::Config input;
15 |   decoder::Config decoder;
16 |   strategy::legacy::Config legacy;
17 |   output::Config output;
18 | };
19 | 
20 | } // namespace sentence
21 | } // namespace controller
22 | 
23 | #endif // _MEMT_Controller_Sentence_Config_h
24 | 


--------------------------------------------------------------------------------
/MEMT/Controller/Sentence/Jamfile:
--------------------------------------------------------------------------------
1 | fakelib controller_sentence_options : Options.cc /util//kenutil ../../Decoder//decoder_options ../../Input//input_options ../../Strategy/Legacy//strategy_legacy_options ../../Output//output_options ../../..//boost_program_options ;
2 | 


--------------------------------------------------------------------------------
/MEMT/Controller/Sentence/Options.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Controller_Sentence_Options_h
 2 | #define _MEMT_Controller_Sentence_Options_h
 3 | 
 4 | #include "MEMT/Decoder/Options.hh"
 5 | #include "MEMT/Input/Options.hh"
 6 | #include "MEMT/Strategy/Legacy/Options.hh"
 7 | #include "MEMT/Output/Options.hh"
 8 | 
 9 | #include "util/options.hh"
10 | 
11 | #include <boost/program_options/options_description.hpp>
12 | #include <boost/program_options/variables_map.hpp>
13 | 
14 | #include <istream>
15 | #include <string>
16 | #include <vector>
17 | 
18 | namespace controller {
19 | namespace sentence {
20 | 
21 | class Config;
22 | 
23 | class ConfigOptions {
24 |   public:
25 |     explicit ConfigOptions(Config &config, const std::vector<unsigned char> &lm_orders);
26 | 
27 |     void SetDefaults();
28 | 
29 |     const boost::program_options::options_description &Options() const { return options_; }
30 | 
31 |     void Finish(const boost::program_options::variables_map &vm);
32 | 
33 |   private:
34 |     input::ConfigOptions input_;
35 |     decoder::ConfigOptions decoder_;
36 |     strategy::legacy::ConfigOptions legacy_;
37 |     output::ConfigOptions output_;
38 | 
39 |     Config &config_;
40 | 
41 |     boost::program_options::options_description options_;
42 | 
43 |     bool incremental_;
44 | 
45 |     std::vector<unsigned char> lm_orders_;
46 | };
47 | 
48 | void ConfigCommand(std::istream &stream, ConfigOptions &options);
49 | 
50 | } // namespace sentence
51 | } // namespace controller
52 | 
53 | #endif // _MEMT_Controller_Sentence_Options_h
54 | 


--------------------------------------------------------------------------------
/MEMT/Controller/Server.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Controller_Server_h
 2 | #define _MEMT_Controller_Server_h
 3 | 
 4 | #include <boost/asio/ip/tcp.hpp>
 5 | 
 6 | #include <fstream>
 7 | 
 8 | #include <err.h>
 9 | #include <sys/types.h>
10 | #include <unistd.h>
11 | 
12 | namespace controller {
13 | 
14 | template <class ConnTransition> void RunServer(ConnTransition &transition, boost::asio::ip::tcp::acceptor &acceptor) {
15 |   using namespace boost::asio::ip;
16 | 
17 |   while (1) {
18 |     ConnectionRequest *req = NULL;
19 |     try {
20 |       // Get a socket and make sure it's clear.
21 |       req = &transition.GetFree();
22 |       tcp::socket &socket = req->GetSocket();
23 |       socket.close();
24 | 
25 |       acceptor.accept(socket);
26 |       std::cerr << "Got connection." << std::endl;
27 |       transition.Opened(*req);
28 |     }
29 |     catch (std::exception &e) {
30 |       std::cerr << e.what() << std::endl;
31 |       if (req) transition.Failed(*req);
32 |     }
33 |     catch (...) {
34 |       std::cerr << "Some server exception" << std::endl;
35 |       if (req) transition.Failed(*req);
36 |     }
37 |   }
38 | }
39 | 
40 | } // namespace controller
41 | 
42 | #endif // _MEMT_Controller_Server_h
43 | 


--------------------------------------------------------------------------------
/MEMT/Decoder/Completed.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Decoder_Completed_hh
 2 | #define _MEMT_Decoder_Completed_hh
 3 | 
 4 | #include "MEMT/Decoder/Score.hh"
 5 | #include "MEMT/Input/Location.hh"
 6 | 
 7 | #include <vector>
 8 | 
 9 | namespace decoder {
10 | 
11 | // Final hypothesis produced by the decoder.  This is the only one that should be used outside.
12 | class CompletedHypothesis {
13 |   public:
14 |     CompletedHypothesis() {}
15 | 
16 |     void Reset(const Score &score, const std::vector<LogScore> &end_features) {
17 |       score_ = score; 
18 |       end_features_ = end_features;
19 |       words_.clear();
20 |     }
21 | 
22 |     void AppendWord(const input::Location &source) {
23 |       words_.push_back(source);
24 |     }
25 | 
26 |     const std::vector<input::Location> &Words() const { return words_; }
27 | 
28 |     // If length_normalize is set, this is normalized.
29 |     const Score &GetScore() const { return score_; }
30 | 
31 |     const std::vector<LogScore> &EndFeatures() const { return end_features_; }
32 | 
33 |   private:
34 |     std::vector<input::Location> words_;
35 | 
36 |     Score score_;
37 | 
38 |     std::vector<LogScore> end_features_;
39 | };
40 | 
41 | } // namespace decoder
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/MEMT/Decoder/Config.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Decoder/Config.hh"
 2 | 
 3 | #include "MEMT/Strategy/Phrase/Type.hh"
 4 | 
 5 | namespace decoder {
 6 | 
 7 | std::ostream &operator<<(std::ostream &str, const Config &config) {
 8 |   return str
 9 |     << "beam_size = " << config.internal_beam_size << '\n'
10 |     << "output.nbest = " << config.end_beam_size << '\n'
11 |     << "length_normalize = " << config.length_normalize << '\n';
12 | }
13 | 
14 | } // namespace decoder
15 | 


--------------------------------------------------------------------------------
/MEMT/Decoder/Config.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Decoder_Config_h
 2 | #define _MEMT_Decoder_Config_h
 3 | 
 4 | #include "MEMT/Strategy/Phrase/Type.hh"
 5 | #include "MEMT/Strategy/Horizon/Config.hh"
 6 | 
 7 | #include "util/debug.hh"
 8 | #include "util/numbers.hh"
 9 | 
10 | #include <ostream>
11 | #include <vector>
12 | 
13 | namespace decoder {
14 | 
15 | struct Config {
16 |   unsigned int internal_beam_size, end_beam_size;
17 |   bool length_normalize;
18 | };
19 | 
20 | std::ostream &operator<<(std::ostream &str, const Config &config);
21 | 
22 | } // namespace decoder
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/MEMT/Decoder/HistoryBeam.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Decoder/HistoryBeam.hh"
 2 | 
 3 | #include <algorithm>
 4 | 
 5 | namespace decoder {
 6 | 
 7 | void MergeSizeOneHistoryBeam(HistoryBeam &into, const HistoryBeam &with) {
 8 |   assert(with.size() == 1);
 9 |   const boost::shared_ptr<HypHistory> &hist = *with.unordered_begin();
10 |   if (into.MayMakeIt(hist)) {
11 |     into.Available() = hist;
12 |     into.InsertAvailable();
13 |   }
14 | }
15 | 
16 | void DumpBeamToHypHistory(HistoryBeam &in, HypHistory &out) {
17 |   HypHistory::Previous &previous = out.MutablePrevious();
18 |   previous.clear();
19 |   previous.reserve(in.size());
20 |   
21 |   in.destructive_ordered_make();
22 |   for (HistoryBeam::decreasing_iterator i = in.destructive_decreasing_begin(); i != in.destructive_decreasing_end(); ++i) {
23 |     previous.push_back(*i);
24 |   }
25 |   out.MakeHash();
26 | }
27 | 
28 | } // namespace decoder
29 | 


--------------------------------------------------------------------------------
/MEMT/Decoder/HistoryBeam.hh:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Decoder/History.hh"
 2 | #include "util/n_best.hh"
 3 | 
 4 | #include <functional>
 5 | 
 6 | #include <boost/shared_ptr.hpp>
 7 | 
 8 | /* When hypotheses are LM dupe detected, they get passed to this beam, which 
 9 |  * does a secondary full equality dupe removal and packs the hypotheses.
10 |  */
11 | 
12 | namespace decoder {
13 | 
14 | namespace detail {
15 | 
16 | struct HistoryLess : public std::binary_function<const boost::shared_ptr<HypHistory> &, const boost::shared_ptr<HypHistory> &, bool> {
17 |   HistoryLess() : less_() {}
18 | 
19 |   bool operator()(const boost::shared_ptr<HypHistory> &left, const boost::shared_ptr<HypHistory> &right) const {
20 |     return less_(left->Entry(), right->Entry());
21 |   }
22 |   private:
23 |     const HistoryEntry::LessByScore less_;
24 | };
25 | 
26 | } // namespace detail
27 | 
28 | typedef nbest::NBest<
29 |   boost::shared_ptr<HypHistory>,
30 |   detail::HistoryLess,
31 |   nbest::HashDupe<boost::shared_ptr<HypHistory>, HypHistory::ReturnHash, HypHistory::EqualsHashOnly>,
32 |   nbest::OneBestMerge<boost::shared_ptr<HypHistory>, detail::HistoryLess> > HistoryBeam;
33 | 
34 | void MergeSizeOneHistoryBeam(HistoryBeam &into, const HistoryBeam &with);
35 | 
36 | void DumpBeamToHypHistory(HistoryBeam &in, HypHistory &out);
37 | 
38 | } // namespace decoder
39 | 


--------------------------------------------------------------------------------
/MEMT/Decoder/InternalBeam.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Decoder_InternalBeam_h
 2 | #define _MEMT_Decoder_InternalBeam_h
 3 | 
 4 | #include "util/n_best.hh"
 5 | 
 6 | #include <functional>
 7 | 
 8 | namespace decoder {
 9 | 
10 | namespace detail {
11 | 
12 | template <class InternalHypothesisT> struct CallMerge : public std::binary_function<InternalHypothesisT &, const InternalHypothesisT &, bool> {
13 |   bool operator()(InternalHypothesisT &to, const InternalHypothesisT &with) const {
14 |     return to.Merge(with);
15 |   }
16 | };
17 | 
18 | }  // namespace detail
19 | 
20 | template <class Hyp> struct InternalBeam {
21 |   typedef nbest::NBest<
22 |     Hyp,
23 |     typename Hyp::LessByOverall,
24 |     nbest::HashDupe<Hyp, typename Hyp::ReturnCachedHash, typename Hyp::EqualsUpToLM>,
25 |     detail::CallMerge<Hyp>
26 |   > T;
27 | };
28 | 
29 | } // namespace decoder
30 | 
31 | #endif
32 | 
33 | 


--------------------------------------------------------------------------------
/MEMT/Decoder/Jamfile:
--------------------------------------------------------------------------------
1 | alias completed : /util//kenutil ;
2 | 
3 | fakelib decoder_config : Config.cc /util//kenutil ;
4 | 
5 | fakelib decoder_options : Options.cc ../Strategy/Horizon//strategy_horizon_options /util//kenutil ../..//boost_program_options ;
6 | 
7 | fakelib decoder
8 | 	: decoder_config EndBeam.cc HistoryBeam.cc Score.cc ../Input//input /util//kenutil ;
9 | 


--------------------------------------------------------------------------------
/MEMT/Decoder/Options.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Decoder/Options.hh"
 2 | 
 3 | #include "MEMT/Decoder/Config.hh"
 4 | 
 5 | #include "util/options.hh"
 6 | 
 7 | #include <boost/lexical_cast.hpp>
 8 | 
 9 | namespace decoder {
10 | 
11 | ConfigOptions::ConfigOptions(Config &config)
12 |   : config_(config), options_("Decoding"), incremental_(false) {
13 |   namespace po = boost::program_options;
14 | 
15 |   options_.add_options()
16 |     ("beam_size",
17 |      po::value(&config_.internal_beam_size),
18 |      "Size of the decoder's internal search beam")
19 | 
20 |     ("length_normalize",
21 |      po::value(&config_.length_normalize),
22 |      "Length normalize before comparing sentence end scores?");
23 | 
24 |   SetDefaults();
25 | }
26 | 
27 | void ConfigOptions::SetDefaults() {
28 |   // Defaults are set here because configuration messages may be updates.
29 |   config_.internal_beam_size = 500;
30 |   config_.end_beam_size = 1;
31 |   config_.length_normalize = true;
32 |   incremental_ = false;
33 | }
34 | 
35 | void ConfigOptions::Finish(const boost::program_options::variables_map &vm, unsigned int end_beam_size) {
36 |   config_.end_beam_size = end_beam_size;
37 | }
38 | 
39 | } // namespace decoder
40 | 


--------------------------------------------------------------------------------
/MEMT/Decoder/Options.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Decoder_Options_h
 2 | #define _MEMT_Decoder_Options_h
 3 | 
 4 | #include "MEMT/Strategy/Horizon/Options.hh"
 5 | 
 6 | #include <boost/program_options/options_description.hpp>
 7 | #include <boost/program_options/variables_map.hpp>
 8 | 
 9 | namespace decoder {
10 | 
11 | class Config;
12 | 
13 | class ConfigOptions {
14 |   public:
15 |     explicit ConfigOptions(Config &config);
16 | 
17 |     void SetDefaults();
18 | 
19 |     const boost::program_options::options_description &Options() const { return options_; }
20 | 
21 |     void Finish(const boost::program_options::variables_map &vm, unsigned int end_beam_size);
22 | 
23 |   private:
24 |     Config &config_;
25 | 
26 |     boost::program_options::options_description options_;
27 | 
28 |     bool incremental_;
29 | };
30 | 
31 | } // namespace decoder
32 | 
33 | #endif // _MEMT_Decoder_Options_h
34 | 


--------------------------------------------------------------------------------
/MEMT/Decoder/Score.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Decoder/Score.hh"
 2 | 
 3 | namespace decoder {
 4 | 
 5 | std::ostream &operator<<(std::ostream &s, const Score &score) {
 6 |   s << "overall=" << score.Overall().Log();
 7 |   s << ", features = \"";
 8 |   for (std::vector<LogScore>::const_iterator i = score.Features().begin(); i != score.Features().end(); ++i) {
 9 |     if (i != score.Features().begin()) s << ' ';
10 |     s << i->Log();
11 |   }
12 |   s << '"';
13 |   return s;
14 | }
15 | 
16 | } // namespace decoder
17 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Base/Process.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Base_Process_h
 2 | #define _MEMT_Feature_Base_Process_h
 3 | 
 4 | namespace feature {
 5 | namespace base {
 6 | 
 7 | // process-level class for features that don't keep process-level state
 8 | template <class SentenceT> class NullProcess {
 9 |   public:
10 |     typedef SentenceT Sentence;
11 |     struct Config {};
12 | 
13 |     NullProcess() {}
14 | 
15 |     Sentence GetSentence() const {
16 |       return Sentence();
17 |     }
18 | };
19 | 
20 | } // namespace base
21 | } // namespace feature
22 | #endif // _MEMT_Feature_Base_Process_H
23 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Base/Sign.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Base_Sign_h
 2 | #define _MEMT_Feature_Base_Sign_h
 3 | 
 4 | namespace feature {
 5 | namespace base {
 6 | 
 7 | // Optimizer sign constraints.  These are not enforced by MEMT but passed to the client if requested.  That way the client doesn't have to know about positional features.  
 8 | typedef enum {ANY_WEIGHT=0, POSITIVE_WEIGHT=1, NEGATIVE_WEIGHT=-1} WeightSign;
 9 | 
10 | } // namespace base
11 | } // namesapce feature
12 | 
13 | #endif // _MEMT_Feature_Base_Sign_H
14 | 


--------------------------------------------------------------------------------
/MEMT/Feature/LM/Config.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Feature/LM/Config.hh"
 2 | 
 3 | namespace feature {
 4 | namespace lm {
 5 | 
 6 | void Config::WeightHint(base::WeightSign *out) const {
 7 |   for (std::vector<unsigned char>::const_iterator l = orders.begin(); l != orders.end(); ++l) {
 8 |     // probability
 9 |     *(out++) = base::POSITIVE_WEIGHT;
10 |     // <unk>
11 |     *(out++) = base::ANY_WEIGHT;
12 |     if (by_length) {
13 |       for (unsigned char i = 1; i < *l; ++i)
14 |         // length count
15 |         *(out++) = base::ANY_WEIGHT;
16 |     }
17 |   }
18 | }
19 | 
20 | } // namespace lm
21 | } // namespace feature
22 | 


--------------------------------------------------------------------------------
/MEMT/Feature/LM/Config.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_LM_Config_h
 2 | #define _MEMT_Feature_LM_Config_h
 3 | 
 4 | #include "MEMT/Feature/Base/Sign.hh"
 5 | 
 6 | #include "util/numbers.hh"
 7 | 
 8 | #include <vector>
 9 | 
10 | namespace feature {
11 | namespace lm {
12 | 
13 | struct Config {
14 |   // Include counts for each n-gram length?
15 |   bool by_length;
16 |   std::vector<unsigned char> orders;
17 |   size_t count;
18 |   size_t FeatureCount() const { return count; }
19 |   void WeightHint(base::WeightSign *out) const;
20 | };
21 | 
22 | } // namespace lm
23 | } // namespace feature
24 | 
25 | #endif // _MEMT_Feature_LM_Config_h
26 | 


--------------------------------------------------------------------------------
/MEMT/Feature/LM/Hypothesis.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_LM_Hypothesis_h
 2 | #define _MEMT_Feature_LM_Hypothesis_h
 3 | 
 4 | namespace feature {
 5 | namespace lm {
 6 | 
 7 | template <class LanguageModel> struct Hypothesis {
 8 |   typedef typename LanguageModel::State T;
 9 | };
10 | 
11 | } // namespace lm
12 | } // namespace feature
13 | 
14 | #endif // _MEMT_Feature_LM_Hypothesis_h
15 | 


--------------------------------------------------------------------------------
/MEMT/Feature/LM/Jamfile:
--------------------------------------------------------------------------------
1 | fakelib feature_lm_config : Config.cc ;
2 | 
3 | fakelib feature_lm_sentence : Sentence.cc feature_lm_config /util//kenutil ;
4 | 
5 | fakelib feature_lm_options : Options.cc feature_lm_config ../../..//boost_program_options ; 
6 | 


--------------------------------------------------------------------------------
/MEMT/Feature/LM/Options.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Feature/LM/Options.hh"
 2 | 
 3 | #include "MEMT/Feature/LM/Config.hh"
 4 | 
 5 | namespace feature {
 6 | namespace lm {
 7 | 
 8 | ConfigOptions::ConfigOptions(Config &config) : config_(config), options_("LM feature") {
 9 |   options_.add_options()
10 |     ("score.lm.by_length", boost::program_options::value(&config_.by_length), "Report counts for each n-gram length as a feature?");
11 | }
12 | 
13 | void ConfigOptions::SetDefaults() {
14 |   config_.by_length = false;
15 | }
16 | 
17 | void ConfigOptions::Finish(const boost::program_options::variables_map &vm, size_t num_systems, const std::vector<unsigned char> &lm_order) {
18 |   config_.orders = lm_order;
19 |   config_.count = 2 * lm_order.size();
20 |   if (config_.by_length) {
21 |     // Add a feature for all but longest order
22 |     for (std::vector<unsigned char>::const_iterator i = lm_order.begin(); i != lm_order.end(); ++i) {
23 |       config_.count += *i;
24 |     }
25 |     config_.count -= lm_order.size();
26 |   }
27 | }
28 | 
29 | } // namespace lm
30 | } // namespace feature
31 | 


--------------------------------------------------------------------------------
/MEMT/Feature/LM/Options.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_LM_Options_h
 2 | #define _MEMT_Feature_LM_Options_h
 3 | 
 4 | #include <boost/program_options/options_description.hpp>
 5 | #include <boost/program_options/variables_map.hpp>
 6 | 
 7 | #include <vector>
 8 | 
 9 | namespace feature {
10 | namespace lm {
11 | 
12 | class Config;
13 | 
14 | class ConfigOptions {
15 |   public:
16 |     explicit ConfigOptions(Config &config);
17 | 
18 |     void SetDefaults();
19 | 
20 |     const boost::program_options::options_description &Options() const { return options_; }
21 | 
22 |     void Finish(const boost::program_options::variables_map &vm, size_t num_systems, const std::vector<unsigned char> &lm_order);
23 | 
24 |   private:
25 |     Config &config_;
26 | 
27 |     boost::program_options::options_description options_;
28 | };
29 | 
30 | } // namespace lm
31 | } // namespace feature
32 | 
33 | #endif // _MEMT_Feature_LM_Options_h
34 | 


--------------------------------------------------------------------------------
/MEMT/Feature/LM/Process.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_LM_Process_h
 2 | #define _MEMT_Feature_LM_Process_h
 3 | 
 4 | #include "MEMT/Feature/LM/Sentence.hh"
 5 | 
 6 | #include <vector>
 7 | 
 8 | namespace feature {
 9 | namespace lm {
10 | 
11 | /* TODO: move more lm configuration here. */
12 | template <class LanguageModelT> class Process {
13 |   public:
14 |     typedef LanguageModelT LanguageModel;
15 |     typedef lm::Sentence<LanguageModel> Sentence;
16 | 
17 |     struct Config {};
18 | 
19 |     // Workaround for constructing vector of process objects then configuring them
20 |     Process() {}
21 |     void SetLM(const std::vector<const LanguageModel*> &models) { models_ = models; }
22 | 
23 |     Sentence GetSentence() const { return Sentence(models_); }
24 | 
25 |     std::vector<unsigned char> Orders() const {
26 |       std::vector<unsigned char> ret;
27 |       for (size_t i = 0; i < models_.size(); ++i) {
28 |         ret.push_back(models_[i]->Order());
29 |       }
30 |       return ret;
31 |     }
32 | 
33 |   private:
34 |     std::vector<const LanguageModel*> models_;
35 | };
36 | 
37 | } // namespace lm
38 | } // namespace feature
39 | 
40 | #endif // _MEMT_Feature_LM_Process_h
41 | 


--------------------------------------------------------------------------------
/MEMT/Feature/LM/Sentence.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Feature/LM/Sentence.hh"
 2 | 
 3 | #include "MEMT/Input/Input.hh"
 4 | 
 5 | namespace feature {
 6 | namespace lm {
 7 | 
 8 | void LookupVocab(const input::Input &in, const ::lm::base::Vocabulary &vocab, std::vector<std::vector< ::lm::WordIndex> > &indices) {
 9 |   indices.resize(in.engines.size());
10 |   for (unsigned int e = 0; e < in.engines.size(); ++e) {
11 |     indices[e].resize(in.engines[e].words.size());
12 |     for (unsigned int o = 0; o < in.engines[e].words.size(); ++o) {
13 |       indices[e][o] = vocab.Index(in.engines[e].words[o].text.Canonical());
14 |     }
15 |   }
16 | }
17 | 
18 | } // namespace lm
19 | } // namespace feature
20 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Length/Config.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Length_Config_h
 2 | #define _MEMT_Feature_Length_Config_h
 3 | 
 4 | #include "MEMT/Feature/Base/Sign.hh"
 5 | 
 6 | namespace feature {
 7 | namespace length {
 8 | 
 9 | struct Config {
10 |   unsigned FeatureCount() const { return 1; }
11 |   void WeightHint(base::WeightSign *out) const { *out = base::ANY_WEIGHT; }
12 | };
13 | 
14 | } // namespace length
15 | } // namespace feature
16 | 
17 | #endif // _MEMT_Feature_Length_Config_h
18 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Length/Hypothesis.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Length_Hypothesis_h
 2 | #define _MEMT_Feature_Length_Hypothesis_h
 3 | 
 4 | namespace feature {
 5 | namespace length {
 6 | 
 7 | struct Hypothesis {};
 8 | 
 9 | bool operator==(const Hypothesis left, const Hypothesis right) {
10 |   return true;
11 | }
12 | 
13 | size_t hash_value(const Hypothesis value) {
14 |   // Mashing on keyboard.
15 |   return 415648974;
16 | }
17 | 
18 | } // namespace length
19 | } // namespace feature
20 | 
21 | #endif // _MEMT_Feature_Length_Hypothesis_h
22 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Length/Process.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Length_h
 2 | #define _MEMT_Feature_Length_h
 3 | 
 4 | #include "MEMT/Feature/Base/Process.hh"
 5 | #include "MEMT/Feature/Length/Sentence.hh"
 6 | 
 7 | namespace feature { namespace length {
 8 | typedef base::NullProcess<Sentence> Process;
 9 | } } // namespace length feature
10 | 
11 | #endif // _MEMT_Feature_Length_h
12 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Length/Sentence.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Length_Sentence_h
 2 | #define _MEMT_Feature_Length_Sentence_h
 3 | 
 4 | #include "MEMT/Feature/Length/Config.hh"
 5 | #include "MEMT/Feature/Length/Hypothesis.hh"
 6 | 
 7 | namespace feature {
 8 | namespace length {
 9 | 
10 | class Sentence {
11 |   public:
12 |     typedef length::Hypothesis Hypothesis;
13 |     typedef length::Config Config;
14 | 
15 |     Sentence() {}
16 | 
17 |     void Reset(const Config &config, const input::Input &input) {}
18 | 
19 |     size_t BothFeatures() const { return 0; }
20 |     size_t EndFeatures() const { return 1; }
21 | 
22 |     void Begin(Hypothesis &start_state, LogScore *start_scores) const {}
23 | 
24 |     void Extend(
25 |         const input::Input &input,
26 |         const decoder::HypHistory *history,
27 |         const input::Location &append,
28 |         const Hypothesis &from_state,
29 |         const LogScore *from_scores,
30 |         Hypothesis &to_state,
31 |         LogScore *to_scores) const {}
32 | 
33 |     void End(size_t length, LogScore *out) const {
34 |       out->MutableLog() = static_cast<LinearScore>(length);
35 |     }
36 | };
37 | 
38 | } // namespace length
39 | } // namespace feature
40 | 
41 | #endif // _MEMT_Feature_Length_Sentence_h
42 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Scorer/Config.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Feature/Scorer/Config.hh"
 2 | 
 3 | #include <boost/fusion/algorithm.hpp>
 4 | 
 5 | namespace feature {
 6 | namespace scorer {
 7 | 
 8 | namespace {
 9 | struct GatherWeightHints {
10 |   typedef base::WeightSign *result_type;
11 | 
12 |   template <class Feature> result_type operator()(const result_type previous, const Feature &feature) const {
13 |     feature.WeightHint(previous);
14 |     return previous + feature.FeatureCount();
15 |   }
16 | };
17 | 
18 | struct FeatureCountFold {
19 |   typedef unsigned result_type;
20 |   template <class Feature> result_type operator()(const unsigned previous, const Feature &feature) const {
21 |     return previous + feature.FeatureCount();
22 |   }
23 | };
24 | } // namespace
25 | 
26 | void Config::WeightHint(base::WeightSign *out) const {
27 |   fold(features, out, GatherWeightHints());
28 | }
29 | 
30 | unsigned Config::FeatureCount() const {
31 |   return fold(features, 0, FeatureCountFold());
32 | }
33 | 
34 | } // namespace scorer
35 | } // namespace feature
36 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Scorer/Config.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Scorer_Config_h
 2 | #define _MEMT_Feature_Scorer_Config_h
 3 | 
 4 | #include "MEMT/Feature/Base/Sign.hh"
 5 | #include "MEMT/Feature/Length/Config.hh"
 6 | #include "MEMT/Feature/LM/Config.hh"
 7 | #include "MEMT/Feature/Verbatim/Config.hh"
 8 | 
 9 | #include <boost/fusion/container/vector.hpp>
10 | 
11 | namespace feature {
12 | namespace scorer {
13 | 
14 | struct Config {
15 |   // TODO: this should be tied to main's idea of the features.
16 |   typedef boost::fusion::vector<lm::Config, verbatim::Config, verbatim::Config, length::Config> Features;
17 |   Features features;
18 |   std::vector<LinearScore> weights;
19 |   LinearScore fuzz_ratio;
20 | 
21 |   unsigned FeatureCount() const;
22 | 
23 |   void WeightHint(base::WeightSign *out) const;
24 | };
25 | 
26 | } // namespace scorer
27 | } // namespace feature
28 | 
29 | #endif // _MEMT_Feature_Scorer_Config_h
30 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Scorer/Fuzz.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Feature/Scorer/Fuzz.hh"
 2 | 
 3 | #include "util/numbers.hh"
 4 | 
 5 | #include <boost/random/mersenne_twister.hpp>
 6 | #include <boost/random/uniform_real.hpp>
 7 | #include <boost/random/variate_generator.hpp>
 8 | 
 9 | #include <vector>
10 | 
11 | namespace feature {
12 | namespace scorer {
13 | 
14 | void Fuzz::Apply(const std::vector<LinearScore> &in_weights, std::vector<LinearScore> &out_weights) {
15 |   if (ratio_ <= 0.0) {
16 |     out_weights = in_weights;
17 |     return;
18 |   }
19 |   boost::uniform_real<LinearScore> dist(1.0 - ratio_, 1.0 + ratio_);
20 |   boost::variate_generator<boost::mt19937 &, boost::uniform_real<LinearScore> > sample(rng_, dist);
21 | 
22 |   out_weights.clear();
23 |   for (std::vector<LinearScore>::const_iterator i = in_weights.begin(); i != in_weights.end(); ++i) {
24 |     out_weights.push_back(*i * sample());
25 |   }
26 | }
27 | 
28 | } // namespace scorer
29 | } // namespace feature
30 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Scorer/Fuzz.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Scorer_Fuzz_h
 2 | #define _MEMT_Feature_Scorer_Fuzz_h
 3 | 
 4 | // Randomly modifies weights based on their existing values.  This is used in
 5 | // a basic simulated annealing for tuning.
 6 | 
 7 | #include "util/numbers.hh"
 8 | 
 9 | #include <boost/scoped_ptr.hpp>
10 | #include <boost/random/mersenne_twister.hpp>
11 | 
12 | #include <vector>
13 | 
14 | namespace feature {
15 | namespace scorer {
16 | 
17 | class Fuzz {
18 |   public:
19 |     void Reset(LinearScore ratio) { ratio_ = ratio; }
20 | 
21 |     void Apply(const std::vector<LinearScore> &in_weights, std::vector<LinearScore> &out_weights);
22 | 
23 |   private:
24 |     LinearScore ratio_;
25 | 
26 |     boost::mt19937 rng_;
27 | };
28 | 
29 | } // namespace scorer
30 | } // namespace feature
31 | 
32 | #endif // _MEMT_Feature_Scorer_Fuzz_h
33 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Scorer/Hypothesis.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Scorer_Hypothesis_h
 2 | #define _MEMT_Feature_Scorer_Hypothesis_h
 3 | 
 4 | #include "MEMT/Feature/LM/Hypothesis.hh"
 5 | #include "MEMT/Feature/Verbatim/Hypothesis.hh"
 6 | 
 7 | // Not used directly here, but clients expect hash_value.
 8 | #include "util/hash_fusion.hh"
 9 | 
10 | #include <boost/fusion/algorithm.hpp>
11 | // Not used directly here, but clients expect ==
12 | #include <boost/fusion/sequence/comparison.hpp>
13 | #include <boost/mpl/transform.hpp>
14 | 
15 | namespace feature {
16 | namespace scorer {
17 | 
18 | namespace detail {
19 | template <class T> struct HypothesisOp {
20 |   typedef typename T::Hypothesis type;
21 | };
22 | } // namespace detail
23 | 
24 | template <class Sentences> struct Hypothesis {
25 |   // Convert a vector of Sentence objects into their ::Hypothesis objects
26 |   typedef typename boost::mpl::transform<Sentences, detail::HypothesisOp<boost::mpl::_1> >::type type;
27 | };
28 | 
29 | } // namespace scorer
30 | } // namespace feature
31 | 
32 | #endif // _MEMT_Feature_Scorer_Hypothesis_h
33 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Scorer/Jamfile:
--------------------------------------------------------------------------------
1 | fakelib feature_scorer_options : Options.cc ../LM//feature_lm_options ../Verbatim//feature_verbatim_options /util//kenutil ;
2 | 
3 | fakelib scorer : Config.cc Fuzz.cc ../LM//feature_lm_config /util//kenutil ;
4 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Scorer/Options.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Scorer_Options_h
 2 | #define _MEMT_Feature_Scorer_Options_h
 3 | 
 4 | #include "MEMT/Feature/LM/Options.hh"
 5 | #include "MEMT/Feature/Verbatim/Options.hh"
 6 | 
 7 | #include "util/numbers.hh"
 8 | #include "util/options.hh"
 9 | 
10 | #include <boost/program_options/options_description.hpp>
11 | #include <boost/program_options/variables_map.hpp>
12 | 
13 | #include <string>
14 | 
15 | namespace feature {
16 | namespace scorer {
17 | 
18 | class Config;
19 | 
20 | class WeightCountMismatch : public util::ArgumentParseError {
21 |   public:
22 |     WeightCountMismatch(size_t expected, size_t provided);
23 |     virtual ~WeightCountMismatch() throw() {}
24 | };
25 | 
26 | class ConfigOptions {
27 |   public:
28 |     explicit ConfigOptions(Config &config);
29 | 
30 |     void SetDefaults();
31 | 
32 |     const boost::program_options::options_description &Options() const { return options_; }
33 | 
34 |     void Finish(const boost::program_options::variables_map &vm, size_t num_systems, const std::vector<unsigned char> &lm_order);
35 | 
36 |   private:
37 |     lm::ConfigOptions lm_;
38 |     verbatim::ConfigOptions verbatim0_, verbatim1_;
39 | 
40 |     Config &config_;
41 | 
42 |     boost::program_options::options_description options_;
43 | 
44 |     bool incremental_;
45 | 
46 |     std::string weight_string_;
47 | };
48 | 
49 | } // namespace scorer
50 | } // namespace feature
51 | 
52 | #endif // _MEMT_Feature_Scorer_Options_h
53 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Verbatim/Config.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Verbatim_Config_h
 2 | #define _MEMT_Feature_Verbatim_Config_h
 3 | 
 4 | #include "MEMT/Feature/Base/Sign.hh"
 5 | #include "MEMT/Input/Alignment.hh"
 6 | 
 7 | #include "util/numbers.hh"
 8 | 
 9 | #include <cstddef>
10 | #include <vector>
11 | 
12 | #include <assert.h>
13 | 
14 | namespace feature {
15 | namespace verbatim {
16 | 
17 | struct Config {
18 |   std::size_t num_systems;
19 |   // ngram length for individual (per-system) scores
20 |   std::size_t individual;
21 |   // max ngram length for equal weight scores.  This should be >= individual.
22 |   std::size_t collective;
23 | 
24 |   // Mask of alignments that count as supporting.
25 |   input::AlignType mask;
26 | 
27 |   size_t FeatureCount() const {
28 |     assert(collective >= individual);
29 |     assert(num_systems != 0);
30 |     return num_systems * individual + collective - individual;
31 |   }
32 | 
33 |   void WeightHint(base::WeightSign *out) const {
34 |     base::WeightSign *end = out + FeatureCount();
35 |     for (; out != end; ++out) *out = base::POSITIVE_WEIGHT;
36 |   }
37 | };
38 | 
39 | } // namespace verbatim
40 | } // namespace feature
41 | 
42 | #endif // _MEMT_Feature_Verbatim_Config_h
43 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Verbatim/Hypothesis.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Verbatim_Hypothesis_h
 2 | #define _MEMT_Feature_Verbatim_Hypothesis_h
 3 | 
 4 | namespace feature {
 5 | namespace verbatim {
 6 | 
 7 | typedef std::vector<std::size_t> Hypothesis;
 8 | 
 9 | } // namespace verbatim
10 | } // namespace feature
11 | 
12 | #endif // _MEMT_Feature_Verbatim_Hypothesis_h
13 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Verbatim/Jamfile:
--------------------------------------------------------------------------------
1 | fakelib feature_verbatim_options : Options.cc ../../Input//align_type /util//kenutil ;
2 | fakelib verbatim_sentence : Sentence.cc ../../Input//input ;
3 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Verbatim/Options.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Verbatim_Options_h
 2 | #define _MEMT_Feature_Verbatim_Options_h
 3 | 
 4 | #include "util/options.hh"
 5 | 
 6 | #include <boost/program_options/options_description.hpp>
 7 | #include <boost/program_options/variables_map.hpp>
 8 | 
 9 | #include <string>
10 | 
11 | namespace feature {
12 | namespace verbatim {
13 | 
14 | class Config;
15 | 
16 | class VerbatimIndividualExceedsCollective : public util::ArgumentParseError {
17 |   public:
18 |     VerbatimIndividualExceedsCollective(size_t individual, size_t collective);
19 |     virtual ~VerbatimIndividualExceedsCollective() throw() {}
20 | };
21 | 
22 | class ConfigOptions {
23 |   public:
24 |     explicit ConfigOptions(Config &config, const char *prefix = "score.verbatim");
25 | 
26 |     void SetDefaults();
27 | 
28 |     const boost::program_options::options_description &Options() const { return options_; }
29 | 
30 |     void Finish(const boost::program_options::variables_map &vm, size_t num_systems);
31 | 
32 |   private:
33 |     Config &config_;
34 | 
35 |     boost::program_options::options_description options_;
36 | };
37 | 
38 | } // namespace verbatim
39 | } // namespace feature
40 | 
41 | #endif // _MEMT_Feature_Verbatim_Options_h
42 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Verbatim/Process.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Verbatim_h
 2 | #define _MEMT_Feature_Verbatim_h
 3 | 
 4 | #include "MEMT/Feature/Base/Process.hh"
 5 | #include "MEMT/Feature/Verbatim/Sentence.hh"
 6 | 
 7 | namespace feature { namespace verbatim {
 8 | typedef base::NullProcess<Sentence> Process;
 9 | } } // namespace verbatim feature
10 | 
11 | #endif // _MEMT_Feature_Verbatim_h
12 | 


--------------------------------------------------------------------------------
/MEMT/Feature/Verbatim/Sentence.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Feature_Verbatim_Sentence_h
 2 | #define _MEMT_Feature_Verbatim_Sentence_h
 3 | 
 4 | #include "MEMT/Feature/Verbatim/Config.hh"
 5 | #include "MEMT/Feature/Verbatim/Hypothesis.hh"
 6 | 
 7 | #include "util/numbers.hh"
 8 | 
 9 | #include <algorithm>
10 | #include <vector>
11 | 
12 | namespace input { class Input; class Location; }
13 | namespace decoder { class HypHistory; }
14 | 
15 | namespace feature {
16 | namespace verbatim {
17 | 
18 | class Sentence {
19 |   public:
20 |     typedef ::feature::verbatim::Config Config;
21 |     typedef ::feature::verbatim::Hypothesis Hypothesis;
22 |     
23 |     Sentence() {}
24 | 
25 |     void Reset(const Config &config, const input::Input &input) {
26 |       config_ = config;
27 |     }
28 | 
29 |     size_t BothFeatures() const {
30 |       return config_.FeatureCount();
31 |     }
32 |     size_t EndFeatures() const { return 0; }
33 | 
34 |     void Begin(Hypothesis &start_state, LogScore *start_scores) const;
35 | 
36 |     void Extend(
37 |         const input::Input &input,
38 |         const decoder::HypHistory *history,
39 |         const input::Location &append,
40 |         const Hypothesis &from_state,
41 |         const LogScore *from_scores,
42 |         Hypothesis &to_state,
43 |         LogScore *to_scores) const;
44 | 
45 |     void End(size_t length, LogScore *out) const {}
46 | 
47 |   private:
48 |     Config config_;
49 | };
50 | 
51 | } // namespace verbatim
52 | } // namespace feature
53 | 
54 | #endif // _MEMT_Feature_Verbatim_Sentence_h
55 | 


--------------------------------------------------------------------------------
/MEMT/Input/AlignType.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Input/AlignType.hh"
 2 | #include "util/string_piece_hash.hh"
 3 | 
 4 | #include <boost/thread/once.hpp>
 5 | #include <boost/unordered_map.hpp>
 6 | 
 7 | #include <memory>
 8 | 
 9 | namespace input {
10 | 
11 | const char *kAlignTypeNames[AL_COUNT] = {"unknown", "exact", "snowball_stem", "wn_stem", "wn_synonymy", "paraphrase", "artificial", "self", "transitive", "boundary"};
12 | 
13 | NotAlignmentTypeName::NotAlignmentTypeName(const StringPiece &name) throw() {
14 |   what_ = "Not an alignment type: ";
15 |   what_.append(name.data(), name.length());
16 | }
17 | 
18 | namespace {
19 | std::auto_ptr<boost::unordered_map<StringPiece, AlignType> > strings_to_types;
20 | 
21 | void InitializeAlign() {
22 |   strings_to_types.reset(new boost::unordered_map<StringPiece, AlignType>());
23 |   for (AlignType i = 0; i < AL_COUNT; ++i) {
24 |     (*strings_to_types)[kAlignTypeNames[i]] = 1 << i;
25 |   }
26 | }
27 | 
28 | boost::once_flag strings_to_types_flag = BOOST_ONCE_INIT;
29 | 
30 | } // namespace
31 | 
32 | AlignType TypeFromName(const StringPiece &name) {
33 |   call_once(strings_to_types_flag, InitializeAlign);
34 |   boost::unordered_map<StringPiece, AlignType>::const_iterator i(strings_to_types->find(name));
35 |   if (i == strings_to_types->end()) throw NotAlignmentTypeName(name);
36 |   return i->second;
37 | }
38 | 
39 | } // namespace input
40 | 


--------------------------------------------------------------------------------
/MEMT/Input/Alignment.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Input/Alignment.hh"
 2 | 
 3 | using namespace std;
 4 | 
 5 | namespace input {
 6 | 
 7 | const AlignType AL_IGNORE_SCORE = AL_ARTIFICIAL | AL_TRANSITIVE;
 8 | 
 9 | inline bool ListenToScore(AlignType type) {
10 |   return type & ~AL_IGNORE_SCORE;
11 | }
12 | 
13 | void WordAlignments::Add(unsigned int engine, unsigned int offset, AlignType type) {
14 |   assert(alignments_[engine].IsNone() || (alignments_[engine].offset == offset));
15 |   alignments_[engine].type |= type;
16 |   alignments_[engine].offset = offset;
17 | }
18 | 
19 | }  // namespace input
20 | 


--------------------------------------------------------------------------------
/MEMT/Input/Capitalization.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Input_Capitalization_h
 2 | #define _MEMT_Input_Capitalization_h
 3 | 
 4 | namespace input {
 5 | class Input;
 6 | 
 7 | void ApplyCapitalization(Input &input);
 8 | 
 9 | } // namespace input
10 | #endif // _MEMT_Input_Capitalization_h
11 | 


--------------------------------------------------------------------------------
/MEMT/Input/Config.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Input_Config_h
 2 | #define _MEMT_Input_Config_h
 3 | 
 4 | #include "util/numbers.hh"
 5 | 
 6 | #include <ostream>
 7 | #include <string>
 8 | #include <vector>
 9 | 
10 | namespace input {
11 | 
12 | struct Config {
13 |   bool transitive;
14 | 
15 |   // TODO: this really belongs with the LM feature.
16 |   bool lowercase_before_lm;
17 | };
18 | 
19 | } // namespace input
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/MEMT/Input/Dump.cc:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include "MEMT/Input/Config.hh"
 4 | #include "MEMT/Input/Format.hh"
 5 | #include "MEMT/Input/Input.hh"
 6 | #include "MEMT/Input/ReadFromJava.hh"
 7 | #include "util/numbers.hh"
 8 | 
 9 | using namespace std;
10 | 
11 | int main() {
12 |   input::Input input;
13 |   input::Config config;
14 |   config.lowercase_before_lm = false;
15 |   config.transitive = false;
16 |   unsigned int sentence_num = 0;
17 |   while (1) {
18 |     // TODO: command line option for number of systems.
19 |     try {
20 |       input::ReadFromJava(config, cin, input, 0);
21 |     }
22 |     catch (std::ios_base::failure &f) {
23 |       break;
24 |     }
25 |     if (input.engines.size() < 2) {
26 |       std::cerr << "Not enough engines." << std::endl;
27 |       continue;
28 |     }
29 |     LaTeXAlignment(std::cout, "First", input.engines[0], "Second", input.engines[1]);
30 |     ++sentence_num;
31 |   }
32 |   return 0;
33 | }
34 | 


--------------------------------------------------------------------------------
/MEMT/Input/Factory.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Input_Factory_h
 2 | #define _MEMT_Input_Factory_h
 3 | 
 4 | #include "MEMT/Input/Capitalization.hh"
 5 | #include "MEMT/Input/Config.hh"
 6 | #include "MEMT/Input/Transitive.hh"
 7 | 
 8 | namespace input {
 9 | 
10 | class Input;
11 | 
12 | void ProcessAligned(
13 |     const Config &config,
14 |     Input &input) {
15 |   if (config.transitive) MakeAlignmentsTransitive(input);
16 |   ApplyCapitalization(input);
17 | }
18 | 
19 | }  // namespace input
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/MEMT/Input/Format.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Input_Format_h
 2 | #define _MEMT_Input_Format_h
 3 | 
 4 | #include <ostream>
 5 | 
 6 | #include "MEMT/Input/Alignment.hh"
 7 | 
 8 | namespace input {
 9 | 
10 | class WordText;
11 | struct Word;
12 | struct Engine;
13 | struct Input;
14 | 
15 | std::ostream &operator<<(std::ostream &str, const WordText &text);
16 | 
17 | std::ostream &AlignTypeOut(std::ostream &str, const AlignType &type, char delim = ' ');
18 | std::ostream &operator<<(std::ostream &str, const WordAlignments &align);
19 | 
20 | std::ostream &operator<<(std::ostream &str, const Word &word);
21 | std::ostream &operator<<(std::ostream &str, const Engine &engine);
22 | std::ostream &operator<<(std::ostream &str, const Input &input);
23 | 
24 | std::ostream &LaTeXAlignment(std::ostream &str, const std::string &top_title, const Engine &top, const std::string &bottom_title, const Engine &bottom, bool exclude_bounds = true);
25 | 
26 | }  // namespace input
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/MEMT/Input/Input.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Input_Input_h
 2 | #define _MEMT_Input_Input_h
 3 | 
 4 | #include "MEMT/Strategy/Horizon/Config.hh"
 5 | #include "MEMT/Input/Location.hh"
 6 | #include "MEMT/Input/Word.hh"
 7 | 
 8 | #include "util/numbers.hh"
 9 | 
10 | #include <vector>
11 | 
12 | namespace input {
13 | 
14 | // This doesn't really do much, but it's useful to pass around an object for
15 | // an engine instead of the entire input and the engine number.
16 | struct Engine {
17 |   Engine() {}
18 | 
19 |   // Length of sentence, including <EOS>
20 |   unsigned int Length() const { return words.size(); }
21 | 
22 |   unsigned int number;
23 |   std::vector<Word> words;
24 | };
25 | 
26 | struct Input {
27 |   Input() {}
28 | 
29 |   const Word &GetWord(unsigned int engine, unsigned int offset) const {
30 |     return engines[engine].words[offset];
31 |   }
32 | 
33 |   const Word &GetWord(const Location &l) const {
34 |     return engines[l.engine].words[l.offset];
35 |   }
36 | 
37 |   unsigned int NumEngines() const {
38 |     return engines.size();
39 |   }
40 | 
41 |   void SetupEngines(unsigned int count) {
42 |     engines.resize(count);
43 |     for (unsigned int i = 0; i < engines.size(); ++i) {
44 |       engines[i].number = i;
45 |     }
46 |   }
47 | 
48 |   std::vector<Engine> engines;
49 | };
50 | 
51 | } // namespace input
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/MEMT/Input/Jamfile:
--------------------------------------------------------------------------------
 1 | alias horizon_config : /util//kenutil ;
 2 | 
 3 | lib input_options : Options.cc ../Strategy/Horizon//strategy_horizon_options /util//kenutil ../..//boost_program_options ;
 4 | 
 5 | lib align_type : AlignType.cc ../..//boost_thread ;
 6 | 
 7 | lib input_alignment : Alignment.cc align_type /util//kenutil ;
 8 | 
 9 | lib input
10 | 	: Capitalization.cc Format.cc Text.cc Transitive.cc ReadDispatcher.cc ReadFromJava.cc Read.cc align_type input_options input_alignment /util//kenutil ;
11 | 
12 | exe Dump : Dump.cc input /util//kenutil ;
13 | 


--------------------------------------------------------------------------------
/MEMT/Input/Location.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Input_Location_h
 2 | #define _MEMT_Input_Location_h
 3 | 
 4 | namespace input {
 5 | 
 6 | struct Location {
 7 |   Location() {}
 8 |   Location(unsigned int in_engine, unsigned int in_offset) : engine(in_engine), offset(in_offset) {}
 9 |   unsigned int engine;
10 |   unsigned int offset;
11 | };
12 | 
13 | // For sets.
14 | inline bool operator<(const Location &left, const Location &right) {
15 |   if (left.engine < right.engine) return true;
16 |   if (left.engine > right.engine) return false;
17 |   return left.offset < right.offset;
18 | }
19 | 
20 | } // namespace input
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/MEMT/Input/Options.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Input/Options.hh"
 2 | #include "util/options.hh"
 3 | 
 4 | namespace input {
 5 | 
 6 | ConfigOptions::ConfigOptions(Config &config)
 7 |   : config_(config), options_("Input processing"), incremental_(false) {
 8 |   namespace po = boost::program_options;
 9 |   options_.add_options()
10 |     ("input.lowercase_before_lm",
11 |      po::value(&config_.lowercase_before_lm),
12 |      "Lowercase input before looking up in LM?")
13 | 
14 |     ("align.transitive",
15 |      po::value(&config_.transitive),
16 |      "Make alignments transitive?");
17 | 
18 |   SetDefaults();
19 | }
20 | 
21 | void ConfigOptions::SetDefaults() {
22 |   config_.lowercase_before_lm = true;
23 |   config_.transitive = false;
24 | 
25 |   incremental_ = false;
26 | }
27 | 
28 | void ConfigOptions::Finish(const boost::program_options::variables_map &vm) {
29 |   if (!incremental_) {
30 |     incremental_ = true;
31 |   }
32 | }
33 | 
34 | } // namespace input
35 | 


--------------------------------------------------------------------------------
/MEMT/Input/Options.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Input_Options_h
 2 | #define _MEMT_Input_Options_h
 3 | 
 4 | #include "MEMT/Input/Config.hh"
 5 | #include "MEMT/Input/Options.hh"
 6 | 
 7 | #include "util/options.hh"
 8 | 
 9 | #include <boost/program_options/options_description.hpp>
10 | #include <boost/program_options/variables_map.hpp>
11 | 
12 | #include <string>
13 | 
14 | namespace input {
15 | 
16 | class ConfigOptions {
17 |   public:
18 |     explicit ConfigOptions(Config &config);
19 | 
20 |     void SetDefaults();
21 | 
22 |     const boost::program_options::options_description &Options() const { return options_; }
23 | 
24 |     void Finish(const boost::program_options::variables_map &vm);
25 | 
26 |   private:
27 |     Config &config_;
28 | 
29 |     boost::program_options::options_description options_;
30 | 
31 |     bool incremental_;
32 | };
33 | 
34 | } // namespace input
35 | 
36 | #endif // _MEMT_Input_Options_h
37 | 


--------------------------------------------------------------------------------
/MEMT/Input/Read.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Input_Read_h
 2 | #define _MEMT_Input_Read_h
 3 | 
 4 | #include <istream>
 5 | #include <string>
 6 | 
 7 | namespace input {
 8 | 
 9 | class Engine;
10 | 
11 | void ReadEngine(const Config &config, const std::string &line, unsigned int num_engines, Engine &engine);
12 | 
13 | void ReadAllEngines(const Config &config, std::istream &in, Input &input);
14 | 
15 | void AddSelfAlignments(Input &input);
16 | void AddBoundaryAlignments(Input &input);
17 | 
18 | } // namespace input
19 | 
20 | #endif // _MEMT_Input_Read_h
21 | 


--------------------------------------------------------------------------------
/MEMT/Input/ReadDispatcher.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Input/ReadDispatcher.hh"
 2 | 
 3 | #include "MEMT/Input/ReadFromJava.hh"
 4 | 
 5 | namespace input {
 6 | 
 7 | BadFormatName::BadFormatName(const std::string &provided) throw() {
 8 |   what_ = "Bad format name ";
 9 |   what_ += provided;
10 | }
11 | 
12 | void ReadDispatcher(const Config &config, std::istream &in, Input &input, size_t expected) {
13 |   std::string format;
14 |   in >> format;
15 |   if (format == "java") {
16 |     ReadFromJava(config, in, input, expected);
17 |   } else {
18 |     throw BadFormatName(format);
19 |   }
20 | }
21 | 
22 | } // namespace input
23 | 


--------------------------------------------------------------------------------
/MEMT/Input/ReadDispatcher.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Input_ReadDispatcher_h
 2 | #define _MEMT_Input_ReadDispatcher_h
 3 | 
 4 | #include <istream>
 5 | #include <string>
 6 | 
 7 | namespace input {
 8 | 
 9 | class Config;
10 | 
11 | class FactoryException : public std::exception {
12 |   public:
13 |     FactoryException() throw() {}
14 |     ~FactoryException() throw() {}
15 | 
16 |     const char *what() const throw() {
17 |       return "Reading from matcher failed";
18 |     }
19 | };
20 | 
21 | class BadFormatName : public std::exception {
22 |   public:
23 |     explicit BadFormatName(const std::string &provided) throw();
24 | 
25 |     ~BadFormatName() throw() {}
26 | 
27 |     const char *what() const throw() { return what_.c_str(); }
28 | 
29 |   private:
30 |     std::string what_;
31 | };
32 | 
33 | class Input;
34 | 
35 | // Dispatch reading to Perl or Java aligner.
36 | void ReadDispatcher(const Config &config, std::istream &in, Input &input, size_t expected = 0);
37 | 
38 | } // namespace input
39 | 
40 | #endif // _MEMT_Input_ReadDispatcher_h
41 | 


--------------------------------------------------------------------------------
/MEMT/Input/Same.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Input_Same_h
 2 | #define _MEMT_Input_Same_h
 3 | 
 4 | namespace input {
 5 | 
 6 | class Input;
 7 | 
 8 | /* Find sets of words that, when on the frontier at the same time, would
 9 |  * produce equal hypotheses.  
10 |  * The words must:
11 |  *   Be aligned via AL_EXACT
12 |  *   Have equal alignments up to type.
13 |  *   Have equal phrase lengths.
14 |  *   Have all words in its phrases the same (recursively according to this definition)
15 |  *
16 |  * Therefore precondition:
17 |  *   Alignments and phrases completed
18 |  */
19 | void FindSame(Input &text);
20 | 
21 | } // namespace input
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/MEMT/Input/Text.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Input/Text.hh"
 2 | #include "util/utf8.hh"
 3 | #include "util/murmur_hash.hh"
 4 | 
 5 | #include <string>
 6 | 
 7 | using namespace std;
 8 | 
 9 | namespace input {
10 | 
11 | void WordText::ResetBOS() {
12 |   original_ = "<s>";
13 |   canonical_ = "<s>";
14 |   is_punctuation_ = false;
15 |   is_end_ = false;
16 |   canonical_hash_ = util::MurmurHashNative(canonical_.c_str(), canonical_.size());
17 | }
18 | 
19 | void WordText::ResetEOS() {
20 |   original_ = "</s>";
21 |   canonical_ = "</s>";
22 |   is_punctuation_ = false;
23 |   is_end_ = true;
24 |   canonical_hash_ = util::MurmurHashNative(canonical_.c_str(), canonical_.size());
25 | }
26 | 
27 | void WordText::RereadOriginal(bool lowercase_canonical) {
28 |   if (lowercase_canonical) {
29 |     utf8::ToLower(original_, canonical_);
30 |   } else {
31 |     canonical_ = original_;
32 |   }
33 |   is_punctuation_ = utf8::IsPunctuation(canonical_);
34 |   is_end_ = false;
35 |   canonical_hash_ = util::MurmurHashNative(canonical_.c_str(), canonical_.size());
36 | }
37 | 
38 | } // namespace input
39 | 


--------------------------------------------------------------------------------
/MEMT/Input/Text.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Input_Text_h
 2 | #define _MEMT_Input_Text_h
 3 | 
 4 | #include "util/string_piece.hh"
 5 | 
 6 | #include <string>
 7 | 
 8 | #include <inttypes.h>
 9 | 
10 | namespace input {
11 | 
12 | class WordText {
13 |   public:
14 |     WordText() {}
15 | 
16 |     // Sets everything but vocab_index_, which must be set later.
17 |     void Reset(bool lowercase_canonical, const StringPiece &original) {
18 |       original_.assign(original.data(), original.size());
19 |       RereadOriginal(lowercase_canonical);
20 |     }
21 | 
22 |     void ResetBOS();
23 | 
24 |     void ResetEOS();
25 | 
26 |     const std::string &Original() const { return original_; }
27 |     const std::string &Canonical() const { return canonical_; }
28 |     bool IsPunctuation() const { return is_punctuation_; }
29 | 
30 |     bool IsEnd() const { return is_end_; }
31 | 
32 |     std::string &MutableOriginalForCase() { return original_; }
33 | 
34 |     uint64_t CanonicalHash() const { return canonical_hash_; }
35 | 
36 |   private:
37 |     void RereadOriginal(bool lowercase_canonical);
38 | 
39 |     // UTF8 input string.
40 |     std::string original_;
41 |     // UTF8 lowercased.
42 |     std::string canonical_;
43 |     bool is_punctuation_;
44 |     bool is_end_;
45 | 
46 |     uint64_t canonical_hash_;
47 | };
48 | 
49 | }
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/MEMT/Input/Transitive.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Input_Transitive_h
 2 | #define _MEMT_Input_Transitive_h
 3 | 
 4 | namespace input {
 5 | 
 6 | class Input;
 7 |   
 8 | // Returns false if there is a conflict.
 9 | bool MakeAlignmentsTransitive(Input &text);
10 | 
11 | }  // namespace input
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/MEMT/Input/Word.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Input_Word_h
 2 | #define _MEMT_Input_Word_h
 3 | 
 4 | #include "MEMT/Input/Alignment.hh"
 5 | #include "MEMT/Input/Text.hh"
 6 | 
 7 | #include "util/numbers.hh"
 8 | #include "util/string_piece.hh"
 9 | 
10 | namespace input {
11 | 
12 |   // Precomputed information about a word.
13 |   // This class is scheduled for partitioning into alignment and precompute pieces.
14 |   struct Word {
15 |     Word() {}
16 | 
17 |     void ResetBOS(unsigned int engines, unsigned int offset) {
18 |       text.ResetBOS();
19 |       alignments.Reset(engines);
20 |     }
21 | 
22 |     void ResetEOS(unsigned int engines, unsigned int offset) {
23 |       text.ResetEOS();
24 |       alignments.Reset(engines);
25 |     }
26 | 
27 |     void ResetWord(unsigned int engines, unsigned int offset, bool lowercase_canonical, const StringPiece &word) {
28 |       alignments.Reset(engines);
29 |       text.Reset(lowercase_canonical, word);
30 |     }
31 | 
32 |     // Actual input.
33 |     WordText text;
34 |     WordAlignments alignments;
35 |   };
36 | 
37 | }  // namespace
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/MEMT/Jamfile:
--------------------------------------------------------------------------------
 1 | import symlink ;
 2 | 
 3 | project : default-build release ;
 4 | 
 5 | build-project Decoder ;
 6 | build-project Input ;
 7 | build-project Output ;
 8 | build-project Controller ;
 9 | 
10 | install dist : Controller//MEMT Input//Dump Alignment//SummarizeAlignment ../lm/filter//filter ../lm/filter//FilterLM
11 |            : <install-dependencies>on <install-type>EXE <install-type>LIB <dll-path>$(TOP)/MEMT/dist
12 |              <hardcode-dll-paths>true
13 |            ;
14 | 
15 | alias all : dist ../util//programs ../Utilities/Output//dist ;
16 | 


--------------------------------------------------------------------------------
/MEMT/Output/Config.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Output_Config_h
 2 | #define _MEMT_Output_Config_h
 3 | 
 4 | namespace output {
 5 | 
 6 | struct Config {
 7 |   // Size of n-best list.
 8 |   unsigned int nbest;
 9 | 
10 |   // Lowercase all output?  
11 |   bool lowercase;
12 | 
13 |   // Capitialize initial word in sentence?  Only effective if capitalize_everything is false. 
14 |   bool initial_cap;
15 | 
16 |   // Include scores?
17 |   bool scores;
18 | 
19 |   // Include alignment back to original system and offset?
20 |   bool alignment;
21 | 
22 |   bool flush_nbest;
23 | };
24 | 
25 | } // namespace output
26 | #endif // _MEMT_Output_Config_h
27 | 


--------------------------------------------------------------------------------
/MEMT/Output/Jamfile:
--------------------------------------------------------------------------------
1 | alias null_beam_dumper ;
2 | 
3 | fakelib output_options : Options.cc ../..//boost_program_options ;
4 | 
5 | fakelib output
6 | 	: NBest.cc ToString.cc ../Decoder//completed /util//kenutil ;
7 | 


--------------------------------------------------------------------------------
/MEMT/Output/NBest.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Output_NBest_h
 2 | #define _MEMT_Output_NBest_h
 3 | 
 4 | #include "MEMT/Decoder/Completed.hh"
 5 | 
 6 | #include <ostream>
 7 | #include <vector>
 8 | 
 9 | namespace input { class Input; }
10 | 
11 | namespace output {
12 | 
13 | class Config;
14 | 
15 | void NBest(std::ostream &out, const Config &config, const std::vector<decoder::CompletedHypothesis> &nbest, const input::Input &text, unsigned int sent_id);
16 | 
17 | } // namespace output
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/MEMT/Output/NullBeamDumper.hh:
--------------------------------------------------------------------------------
 1 | /* The decoder's Run function accepts a beam dumper, to which it passes the
 2 |  * internal beam of partial hypotheses after each advance. 
 3 |  * This one does nothing.
 4 |  */
 5 | 
 6 | namespace output {
 7 |   
 8 | struct NullBeamDumper {
 9 |   template <class BeamT> void DumpBeam(unsigned int length, const BeamT &beam) {}
10 | };
11 | 
12 | } // namespace output
13 | 


--------------------------------------------------------------------------------
/MEMT/Output/Options.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Output/Options.hh"
 2 | 
 3 | #include "MEMT/Output/Config.hh"
 4 | 
 5 | namespace output {
 6 | 
 7 | ConfigOptions::ConfigOptions(Config &config) 
 8 |   : config_(config), options_("Output"), incremental_(false) {
 9 | 
10 |   namespace po = boost::program_options;
11 |   options_.add_options()
12 |     ("output.nbest",
13 |      po::value(&config_.nbest),
14 |      "Number of n-best hypotheses")
15 | 
16 |     ("output.lowercase",
17 |      po::value(&config.lowercase),
18 |      "Lowercase all output?")
19 | 
20 |     ("output.initial_cap",
21 |      po::value(&config.initial_cap),
22 |      "Capitalize the first word of each output?  No effect if lowercase is true.")
23 | 
24 |     ("output.scores",
25 |      po::value(&config.scores),
26 |      "Include scores in output?")
27 |     
28 |     ("output.alignment",
29 |      po::value(&config.alignment),
30 |      "Include alignment back to a source hypothesis?")
31 | 
32 |     ("output.flush_nbest",
33 |      po::value(&config.flush_nbest),
34 |      "Flush after each nbest output?");
35 | 
36 |   SetDefaults();
37 | }
38 | 
39 | void ConfigOptions::SetDefaults() {
40 |   config_.nbest = 1;
41 |   config_.lowercase = false;
42 |   config_.initial_cap = true;
43 |   config_.scores = true;
44 |   config_.alignment = false;
45 |   config_.flush_nbest = false;
46 | }
47 | 
48 | void ConfigOptions::Finish(const boost::program_options::variables_map &vm) {}
49 | 
50 | } // namespace output
51 | 


--------------------------------------------------------------------------------
/MEMT/Output/Options.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Output_Options_h
 2 | #define _MEMT_Output_Options_h
 3 | 
 4 | #include <boost/program_options/options_description.hpp>
 5 | #include <boost/program_options/variables_map.hpp>
 6 | 
 7 | namespace output {
 8 | 
 9 | class Config;
10 | 
11 | class ConfigOptions {
12 |   public:
13 |     explicit ConfigOptions(Config &config);
14 | 
15 |     void SetDefaults();
16 | 
17 |     const boost::program_options::options_description &Options() const { return options_; }
18 | 
19 |     void Finish(const boost::program_options::variables_map &vm);
20 | 
21 |   private:
22 |     Config &config_;
23 | 
24 |     boost::program_options::options_description options_;
25 | 
26 |     bool incremental_;
27 | };
28 | 
29 | } // namespace output
30 | 
31 | #endif // _MEMT_Output_Options_h
32 | 


--------------------------------------------------------------------------------
/MEMT/Output/StderrBeamDumper.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Output_StderrBeamDumper_h
 2 | #define _MEMT_Output_StderrBeamDumper_h
 3 | 
 4 | #include <iostream>
 5 | 
 6 | namespace output {
 7 | 
 8 | class StderrBeamDumper {
 9 |   public:
10 |     template <class Beam> void DumpBeam(unsigned int length, const Beam &beam) {
11 |       std::cerr << "Length " << length << '\n';
12 |       std::vector<typename Beam::Value*> values;
13 |       for (typename Beam::unordered_iterator i = beam.unordered_begin(); i != beam.unordered_end(); ++i) {
14 |         values.push_back(&*i);
15 |       }
16 |       std::sort(values.begin(), values.end(), boost::indirect_fun<typename Beam::Value::LessByOverall>());
17 |       for (typename std::vector<typename Beam::Value*>::const_iterator i = values.begin(); i != values.end(); ++i) {
18 |         std::cerr << (*i)->History()->Entry().score;
19 |         for (const decoder::HypHistory *hist = (*i)->History().get(); hist; hist = hist->BestPrevious()) {
20 |           std::cerr << ' ' << hist->Entry().engine << ' ' << hist->Entry().offset;
21 |         }
22 |         std::cerr << '\n';
23 |       }
24 |       std::cerr << '\n';
25 |     }
26 | };
27 | } // namespace output
28 | 
29 | #endif // _MEMT_Output_StderrBeamDumper_h
30 | 


--------------------------------------------------------------------------------
/MEMT/Output/ToString.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Output_ToString_h
 2 | #define _MEMT_Output_ToString_h
 3 | 
 4 | #include <string>
 5 | 
 6 | namespace decoder { class CompletedHypothesis; }
 7 | 
 8 | namespace input { class Input; }
 9 | 
10 | namespace output {
11 | class Config;
12 | void CompletedHypothesisString(const Config &config, const decoder::CompletedHypothesis &hyp, const input::Input &text, std::string *out);
13 | } // namespace output
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Graph/Config.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Strategy_Graph_Config_h
 2 | #define _MEMT_Strategy_Graph_Config_h
 3 | 
 4 | #include "MEMT/Strategy/Scorer/Config.hh"
 5 | #include "MEMT/Strategy/Graph/Coverage/Config.hh"
 6 | 
 7 | namespace strategy {
 8 | namespace graph {
 9 | 
10 | struct Config {
11 |   coverage::Config coverage;
12 |   scorer::Config scorer;
13 | };
14 | 
15 | } // namespace graph
16 | } // namespace strategy
17 | 
18 | #endif // _MEMT_Strategy_Graph_Config_h
19 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Graph/Coverage/Config.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Strategy_Graph_Coverage_Config_h
 2 | #define _MEMT_Strategy_Graph_Coverage_Config_h
 3 | 
 4 | namespace strategy {
 5 | namespace graph {
 6 | namespace coverage {
 7 | 
 8 | struct Config {};
 9 | 
10 | } // namespace coverage
11 | } // namespace graph
12 | } // namespace strategy
13 | 
14 | #endif // _MEMT_Strategy_Graph_Coverage_Config_h
15 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Graph/Coverage/Hypothesis.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Strategy_Graph_Coverage_Hypothesis_h
 2 | #define _MEMT_Strategy_Graph_Coverage_Hypothesis_h
 3 | 
 4 | #include "util/hash_output.hh"
 5 | 
 6 | #include <boost/dynamic_bitset.hpp>
 7 | 
 8 | namespace strategy {
 9 | namespace graph {
10 | namespace coverage {
11 | 
12 | class Hypothesis {
13 |   public:
14 |     Hypothesis() {}
15 |   private:
16 |     friend class Sentence;
17 |     friend size_t hash_value(const Hypothesis &hyp);
18 |     friend bool operator==(const Hypothesis &left, const Hypothesis &right);
19 | 
20 |     boost::dynamic_bitset<unsigned int> bits_;
21 | };
22 | 
23 | inline size_t hash_value(const Hypothesis &hyp) {
24 |   size_t ret = 0;
25 |   to_block_range(hyp.bits_, util::HashOutput(ret));
26 |   return ret;
27 | }
28 | 
29 | inline bool operator==(const Hypothesis &left, const Hypothesis &right) {
30 |   return left.bits_ == right.bits_;
31 | }
32 | 
33 | } // namespace coverage
34 | } // namespace graph
35 | } // namespace strategy
36 | #endif // _MEMT_Strategy_Graph_Coverage_Hypothesis_h
37 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Graph/Jamfile:
--------------------------------------------------------------------------------
1 | fakelib strategy_graph_options : Options.cc ../Scorer//strategy_scorer_options /util//kenutil ;
2 | 
3 | alias graph : ../../Input//input ../Scorer//scorer /util//kenutil ; 
4 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Graph/Options.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Strategy/Graph/Options.hh"
 2 | 
 3 | #include "MEMT/Strategy/Graph/Config.hh"
 4 | 
 5 | #include "util/options.hh"
 6 | 
 7 | #include <boost/lexical_cast.hpp>
 8 | 
 9 | namespace strategy {
10 | namespace graph {
11 | 
12 | ConfigOptions::ConfigOptions(Config &config)
13 |   : config_(config), scorer_(config.scorer), options_("Graph strategy"), incremental_(false) {
14 |   namespace po = boost::program_options;
15 | 
16 |   options_.add(scorer_.Options());
17 | 
18 |   SetDefaults();
19 | }
20 | 
21 | void ConfigOptions::SetDefaults() {
22 |   scorer_.SetDefaults();
23 |   incremental_ = false;
24 | }
25 | 
26 | void ConfigOptions::Finish(const boost::program_options::variables_map &vm, size_t num_systems) {
27 |   scorer_.Finish(vm, num_systems);
28 | }
29 | 
30 | } // namespace graph
31 | } // namespace strategy
32 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Graph/Options.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Strategy_Graph_Options_h
 2 | #define _MEMT_Strategy_Graph_Options_h
 3 | 
 4 | #include "MEMT/Strategy/Scorer/Options.hh"
 5 | 
 6 | #include <boost/program_options/options_description.hpp>
 7 | #include <boost/program_options/variables_map.hpp>
 8 | 
 9 | namespace strategy {
10 | namespace graph {
11 | 
12 | class Config;
13 | 
14 | class ConfigOptions {
15 |   public:
16 |     explicit ConfigOptions(Config &config);
17 | 
18 |     void SetDefaults();
19 | 
20 |     const boost::program_options::options_description &Options() const { return options_; }
21 | 
22 |     void Finish(const boost::program_options::variables_map &vm, size_t num_systems);
23 | 
24 |   private:
25 |     Config &config_;
26 | 
27 |     scorer::ConfigOptions scorer_;    
28 | 
29 |     boost::program_options::options_description options_;
30 | 
31 |     bool incremental_;
32 | };
33 | 
34 | } // namespace graph
35 | } // namespace decoder
36 | 
37 | #endif // _MEMT_Strategy_Graph_Options_h
38 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Horizon/Config.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Strategy_Horizon_Config_h
 2 | #define _MEMT_Strategy_Horizon_Config_h
 3 | 
 4 | #include "util/numbers.hh"
 5 | 
 6 | #include <vector>
 7 | 
 8 | // Horizon configuration is part of Input configuration and passed to the
 9 | // decoder.  This is a separate file because it's also part of Input.
10 | 
11 | namespace strategy {
12 | namespace horizon {
13 | 
14 | struct Config {
15 |   typedef enum {HORIZON_LENGTH, HORIZON_ALIGNMENT} Method;
16 |   Method method;
17 | 
18 |   unsigned int radius;
19 | 
20 |   // Weights for stay_threshold.  Defaults to uniform.
21 |   std::vector<LinearScore> stay_weights;
22 |   // Applies only for method = HORIZON_ALIGNMENT.
23 |   LinearScore stay_threshold;
24 | };
25 | 
26 | } // namespace horizon
27 | } // namespace strategy
28 | 
29 | #endif // _MEMT_Strategy_Horizon_Config_h
30 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Horizon/Jamfile:
--------------------------------------------------------------------------------
1 | fakelib hypothesis : Hypothesis.cc /util//kenutil ../Phrase//phrase ../../Input//input ;
2 | fakelib strategy_horizon_options : Options.cc /util//kenutil ;
3 | fakelib horizon : Horizon.cc ../../Input//input ;
4 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Horizon/Options.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Strategy_Horizon_Options_h
 2 | #define _MEMT_Strategy_Horizon_Options_h
 3 | 
 4 | #include "util/options.hh"
 5 | 
 6 | #include <boost/program_options/options_description.hpp>
 7 | #include <boost/program_options/variables_map.hpp>
 8 | 
 9 | #include <string>
10 | 
11 | namespace strategy {
12 | namespace horizon {
13 | 
14 | class Config;
15 | 
16 | class BadHorizonMethod : public util::ArgumentParseError {
17 |   public:
18 |     explicit BadHorizonMethod(const std::string &provided);
19 | 
20 |     ~BadHorizonMethod() throw() {}
21 | 
22 |   private:
23 |     std::string provided_;
24 | };
25 | 
26 | class ConfigOptions {
27 |   public:
28 |     explicit ConfigOptions(Config &config);
29 | 
30 |     void SetDefaults();
31 | 
32 |     const boost::program_options::options_description &Options() const { return options_; }
33 | 
34 |     void Finish(const boost::program_options::variables_map &vm);
35 | 
36 |   private:
37 |     Config &config_;
38 | 
39 |     boost::program_options::options_description options_;
40 | 
41 |     bool incremental_;
42 | };
43 | 
44 | } // namespace horizon
45 | } // namespace strategy
46 | 
47 | #endif // _MEMT_Strategy_Horizon_Options_h
48 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Legacy/Config.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Strategy_Legacy_Config_h
 2 | #define _MEMT_Strategy_Legacy_Config_h
 3 | 
 4 | #include "MEMT/Strategy/Horizon/Config.hh"
 5 | #include "MEMT/Strategy/Phrase/Type.hh"
 6 | #include "MEMT/Feature/Scorer/Config.hh"
 7 | 
 8 | namespace strategy {
 9 | namespace legacy {
10 | 
11 | struct LegacyOnlyConfig {
12 |   bool continue_recent;
13 |   bool extend_aligned;
14 | };
15 | 
16 | struct Config {
17 |   phrase::Type phrase;
18 |   horizon::Config horizon;
19 |   feature::scorer::Config scorer;
20 |   LegacyOnlyConfig legacy;
21 | };
22 | 
23 | } // namespace legacy
24 | } // namespace strategy
25 | 
26 | #endif // _MEMT_Strategy_Legacy_Config_h
27 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Legacy/Jamfile:
--------------------------------------------------------------------------------
1 | fakelib strategy_legacy_options : Options.cc ../Horizon//strategy_horizon_options ../../Feature/Scorer//feature_scorer_options ;
2 | 
3 | alias legacy : ../Horizon//horizon ../Horizon//hypothesis ../Phrase//phrase ../../Feature/Scorer//scorer ;
4 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Legacy/Options.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Strategy_Legacy_Options_h
 2 | #define _MEMT_Strategy_Legacy_Options_h
 3 | 
 4 | #include "MEMT/Strategy/Horizon/Options.hh"
 5 | #include "MEMT/Feature/Scorer/Options.hh"
 6 | 
 7 | #include <boost/program_options/options_description.hpp>
 8 | #include <boost/program_options/variables_map.hpp>
 9 | 
10 | #include <vector>
11 | 
12 | namespace strategy {
13 | namespace legacy {
14 | 
15 | class Config;
16 | 
17 | class ConfigOptions {
18 |   public:
19 |     explicit ConfigOptions(Config &config);
20 | 
21 |     void SetDefaults();
22 | 
23 |     const boost::program_options::options_description &Options() const { return options_; }
24 | 
25 |     void Finish(const boost::program_options::variables_map &vm, size_t num_systems, const std::vector<unsigned char> &lm_orders);
26 | 
27 |   private:
28 |     Config &config_;
29 | 
30 |     horizon::ConfigOptions horizon_;
31 |     feature::scorer::ConfigOptions scorer_;    
32 | 
33 |     boost::program_options::options_description options_;
34 | 
35 |     bool incremental_;
36 | };
37 | 
38 | } // namespace legacy
39 | } // namespace decoder
40 | 
41 | #endif // _MEMT_Strategy_Legacy_Options_h
42 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Phrase/Aligned.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Strategy_Phrase_Aligned_h
 2 | #define _MEMT_Strategy_Phrase_Aligned_h
 3 | 
 4 | namespace input { class Engine; }
 5 | 
 6 | namespace strategy {
 7 | namespace phrase {
 8 | 
 9 | class System;
10 | 
11 | void DetectAligned(const input::Engine &engine, System &system);
12 | 
13 | }  // namespace phrase
14 | }  // namespace strategy
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Phrase/Jamfile:
--------------------------------------------------------------------------------
1 | fakelib type : Type.cc ;
2 | fakelib phrase : Aligned.cc Punctuation.cc Phrase.cc type ../../Input//input ;
3 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Phrase/Punctuation.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Strategy/Phrase/Punctuation.hh"
 2 | 
 3 | #include "MEMT/Input/Input.hh"
 4 | #include "MEMT/Strategy/Phrase/Phrase.hh"
 5 | 
 6 | #include <assert.h>
 7 | 
 8 | namespace strategy {
 9 | namespace phrase {
10 | 
11 | void DetectPunctuation(const input::Engine &engine, System &system) {
12 |   //A punctuation phrase is the punctuation mark itself and the word
13 |   //before it, except when it's the first word of the sentence:
14 |   size_t start = 0;
15 |   bool open = false;
16 | 
17 |   // <EOS> is not punctuation.  Therefore we will always close.
18 |   for (unsigned i = 1; i < engine.Length(); ++i) {
19 |     if (engine.words[i].text.IsPunctuation()) {
20 |       if (!open) {
21 |         open = true;
22 |         start = i - 1;
23 |       }
24 |     } else if (open) {
25 |       system[start].AddEnd(i - 1, PHRASE_PUNCTUATION);
26 |       open = false;
27 |     }
28 |   }
29 |   assert(!open);
30 | }
31 | 
32 | }  // namespace phrase
33 | }  // namespace strategy
34 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Phrase/Punctuation.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Strategy_Phrase_Punctuation_h
 2 | #define _MEMT_Strategy_Phrase_Punctuation_h
 3 | 
 4 | namespace input { class Engine; }
 5 | 
 6 | namespace strategy {
 7 | namespace phrase {
 8 | 
 9 | class System;
10 | void DetectPunctuation(const input::Engine &engine, System &system);
11 | 
12 | }  // namespace phrase
13 | }  // namespace strategy
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Phrase/Type.cc:
--------------------------------------------------------------------------------
 1 | #include "MEMT/Strategy/Phrase/Type.hh"
 2 | 
 3 | namespace strategy {
 4 | namespace phrase {
 5 | 
 6 | std::ostream &TypeOut(std::ostream &str, const Type type) {
 7 |   if (type & PHRASE_PUNCTUATION) str << "punctuation ";
 8 |   if (type & PHRASE_ALIGNMENT) str << "alignment ";
 9 |   if (type & PHRASE_SOURCE_CHUNK) str << "source_chunk ";
10 |   return str;
11 | }
12 | 
13 | } // namespace phrase
14 | } // namespace strategy
15 | 


--------------------------------------------------------------------------------
/MEMT/Strategy/Phrase/Type.hh:
--------------------------------------------------------------------------------
 1 | #ifndef _MEMT_Strategy_Phrase_Type_h
 2 | #define _MEMT_Strategy_Phrase_Type_h
 3 | 
 4 | #include <ostream>
 5 | 
 6 | namespace strategy {
 7 | namespace phrase {
 8 | 
 9 | typedef unsigned int Type;
10 | 
11 | const Type PHRASE_PUNCTUATION = 1 << 0;
12 | const Type PHRASE_ALIGNMENT = 1 << 1;
13 | const Type PHRASE_SOURCE_CHUNK = 1 << 2;
14 | 
15 | std::ostream &TypeOut(std::ostream &str, const Type type);
16 | 
17 | } // namespace phrase
18 | } // namespace strategy
19 | 
20 | #endif // _MEMT_Strategy_Phrase_Type_h
21 | 


--------------------------------------------------------------------------------
/MEMT/scripts/experiment/decode_subgenre.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | port=$1
 4 | run=$2
 5 | shift
 6 | shift
 7 | for i; do
 8 |   MATCHED="$run/matched/$i/matched"
 9 |   if [ ! -f $MATCHED ]; then
10 |     echo No file $MATCHED 1>&2
11 |     exit
12 |   fi
13 |   mkdir -p "$run/$i"
14 |   ~/avenue/MEMT/scripts/simple_decode.rb "$port" "$run/decoder_config" "$MATCHED" "$run/$i/output"
15 | done
16 | 


--------------------------------------------------------------------------------
/MEMT/scripts/experiment/en.sh:
--------------------------------------------------------------------------------
 1 | #PBS -N mert-gale-all
 2 | #PBS -S /bin/bash
 3 | #PBS -l nodes=1:ppn=8
 4 | #PBS -e localhost:$HOME/mert.err
 5 | #PBS -o localhost:$HOME/mert.out
 6 | #PBS -l mem=12gb
 7 | #PBS -l walltime=48:30:00
 8 | 
 9 | JOBDIR=$HOME/jobs/$PBS_JOBID
10 | mkdir -p $JOBDIR
11 | exec 1>$JOBDIR/stdout 2>$JOBDIR/stderr
12 | echo Running on host `hostname`
13 | echo Time is `date`
14 | echo Directory is `pwd`
15 | 
16 | l=en
17 | LM=corpus/gale/lm/filtered.arpa
18 | 
19 | cd /home/kheafiel/memt/expt
20 | ../../avenue/MEMT/scripts/server.sh --lm.file $LM --daemonize --pidfile $JOBDIR/decoder.pid --portfile $JOBDIR/decoder.port --keep-stdio-open --no-setsid || exit 1
21 | port=$(cat $JOBDIR/decoder.port)
22 | #scripts/run.rb $port corpus/mt09/{ur/match/top7,ar/match/top9} config/exact2,2-all3,3-length-5-msft additional/all
23 | scripts/run.rb $port corpus/gale/nw/match/top{9,{5,6,7,8}-lemans} corpus/gale/wb/match/top{4,5,6,7,8} corpus/gale/audio/match/top{5,6,7,8,9,10} config/exact2,2-all2,2-length-5{,-terbleu} config/exact2,2-all3,3-length-{5,4} additional/all
24 | ret=$?
25 | kill $(cat $JOBDIR/decoder.pid)
26 | exit $re
27 | 


--------------------------------------------------------------------------------
/MEMT/scripts/experiment/preprocess.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | AVENUE_DIR=$(dirname $0)/../../..
 3 | dir="$1"
 4 | if ! [ -f "$dir"/1best.sgm ]; then
 5 |   echo no "$dir"/1best.sgm 1>&2
 6 |   exit 1
 7 | fi
 8 | TAG="${2:-seg}"
 9 | $AVENUE_DIR/MEMT/scripts/experiment/stripsgml.rb "$TAG" <"$dir"/1best.sgm >"$dir"/txt
10 | $AVENUE_DIR/Utilities/Tokenization/PTB/tokenizer.perl <"$dir"/txt |sed 's/^ *//; s/ *$//' >"$dir"/tok
11 | 


--------------------------------------------------------------------------------
/MEMT/scripts/experiment/qsub.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | if [ ! -f "$1" ]; then
 3 |   echo expected a language model as the first argument 1>&2
 4 |   exit
 5 | fi
 6 | LM="$1"
 7 | LM_BASE="$(basename $(basename "$LM" .probing) .arpa)"
 8 | shift
 9 | qsub <<EOF
10 | #$ -N mert-$LM_BASE
11 | #$ -S /bin/bash
12 | #$ -e mert.err
13 | #$ -o mert.out
14 | #$ -l h_vmem=300m
15 | #$ -l h_rt=12:00:00
16 | #$ -pe smp 7
17 | 
18 | declare -x LANG="en_US.UTF-8"
19 | 
20 | source $HOME/job_prefix.sh
21 | echo $@
22 | 
23 | cd /home/kheafiel/memt/expt
24 | ../../avenue/MEMT/scripts/server.sh --lm.file "$LM" --daemonize --pidfile \$JOBDIR/decoder.pid --portfile \$JOBDIR/decoder.port --keep-stdio-open --no-setsid || exit 1
25 | port=\$(cat \$JOBDIR/decoder.port)
26 | scripts/run.rb \$port additional/"$LM_BASE" $@
27 | ret=\$?
28 | kill \$(cat \$JOBDIR/decoder.pid)
29 | exit \$ret
30 | EOF
31 | 


--------------------------------------------------------------------------------
/MEMT/scripts/experiment/stripsgml.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | require 'cgi'
3 | open = false
4 | $stdin.read.scan /<seg id="?[^>]*"?>(.*)<\/seg>/ do |m|
5 |   $stdout.puts CGI::unescapeHTML(m[0]).gsub(/&apos;/, "'").gsub(/& AMP;/, '&').gsub(/\n/, ' ').strip
6 | end
7 | 
8 | 


--------------------------------------------------------------------------------
/MEMT/scripts/make_filter_vocab.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'set'
 3 | files = ARGV.map { |f| File.new(f) }
 4 | loop do
 5 |   lines = files.map { |f| f.gets }.compact
 6 |   break if lines.empty?
 7 |   throw "Unequal number of lines" unless lines.size == files.size
 8 |   vocab = Set.new
 9 |   lines.each do |l|
10 |     vocab.merge(l.split)
11 |   end
12 |   vocab.each do |w|
13 |     $stdout.write w
14 |     $stdout.write " "
15 |   end
16 |   $stdout.write "\n"
17 | end
18 | 


--------------------------------------------------------------------------------
/MEMT/scripts/match.rb:
--------------------------------------------------------------------------------
 1 | class JavaMatched
 2 |   attr_reader :sys_count
 3 |   def initialize(file)
 4 |     @file = file
 5 |     @file.seek(0)
 6 |     @sys_count = @file.gets.to_i
 7 |     throw "Bad count" unless @sys_count > 0
 8 |     @file.seek(0)
 9 |   end
10 |   def get_match
11 |     count = @file.gets
12 |     return nil unless count
13 |     ret = count
14 |   
15 |     count.to_i.times do
16 |       ret += @file.gets
17 |     end
18 |     ((count.to_i * (count.to_i - 1))/2).times do
19 |       while true do
20 |         line = @file.gets
21 |         ret += line
22 |         break if line == "\n"
23 |       end
24 |     end
25 |   
26 |     ret
27 |   end
28 | 
29 |   def packet
30 |     str = get_match
31 |     return nil unless str
32 |     "matched 0\njava\n" + str
33 |   end
34 | end
35 | 


--------------------------------------------------------------------------------
/MEMT/scripts/nbest_first.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | pre = nil
 3 | while l = gets
 4 |   splt = l.split("|||")
 5 |   num = splt[0].to_i
 6 |   if num != pre
 7 |     pre = num
 8 |     puts splt[1].strip
 9 |   end
10 | end
11 | 


--------------------------------------------------------------------------------
/MEMT/scripts/server.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | DIR="$(dirname $0)/../../bin"
3 | export LD_LIBRARY_PATH="$DIR${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
4 | exec "$DIR"/MEMT $@
5 | 


--------------------------------------------------------------------------------
/MEMT/scripts/shell_escape.rb:
--------------------------------------------------------------------------------
1 | ../../Utilities/scoring/lib/shell_escape.rb


--------------------------------------------------------------------------------
/MEMT/scripts/simple_decode.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'pathname'
 3 | AVENUE_DIR = File.dirname(Pathname.new(File.expand_path(__FILE__) + '/../..').realpath)
 4 | require AVENUE_DIR + '/MEMT/scripts/decode'
 5 | require AVENUE_DIR + '/MEMT/scripts/zmert/format'
 6 | 
 7 | unless ARGV[2]
 8 |   $stderr.puts "Usage: connection config_file matched_file [output_prefix] [language for detokenization]"
 9 |   exit 1
10 | end
11 | 
12 | CONN=ARGV[0]
13 | CONFIG=ARGV[1]
14 | MATCHED=ARGV[2]
15 | OUT_BASE=(ARGV[3] ? ARGV[3] : MATCHED)
16 | decode(
17 | 	File.new(CONFIG).read,
18 | 	CONN,
19 | 	File.new(MATCHED, 'r'),
20 | 	OUT_BASE,
21 |   ARGV[4])
22 | 


--------------------------------------------------------------------------------
/MEMT/scripts/util.rb:
--------------------------------------------------------------------------------
 1 | require 'pathname'
 2 | SCRIPT_DIR = File.dirname(Pathname.new(File.expand_path(__FILE__)).realpath)
 3 | 
 4 | class Message
 5 |   attr_accessor :prefix
 6 | 	def initialize(prefix, file)
 7 | 		@prefix = prefix
 8 | 	  @last = Time.now
 9 | 	  @file = file
10 | 	end
11 | 	def measure
12 | 		tick = Time.now
13 | 		ret = tick - @last
14 | 		@last = tick
15 | 	  ret
16 | 	end
17 |   def tell(func, event)
18 | 		message = "#{@prefix}#{event} #{func} at #{Time.now} (#{measure}s)"
19 | 		$stderr.puts message
20 | 		@file.puts message
21 | 		@file.sync
22 | 	end
23 | 
24 | 	def wrap(func)
25 | 	  tell(func, :start)
26 | 	  ret = yield
27 | 	  tell(func, :finish)
28 | 	  ret
29 | 	end
30 | end
31 | 
32 | def write_close(name, content)
33 |   f = File.new(name, 'w')
34 |   f.write(content)
35 |   f.close
36 | end
37 | 
38 | #not the most efficient, but it works for references.
39 | def count_lines(file)
40 | 	count = 0
41 | 	file.each_line do |l|
42 | 	  count += 1
43 | 	end
44 | 	count
45 | end
46 | 


--------------------------------------------------------------------------------
/MEMT/scripts/zmert/decoder.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | #Decoder process run by zmert that converts the necessary file formats.
 3 | require 'pathname'
 4 | AVENUE_DIR = File.dirname(Pathname.new(File.expand_path(__FILE__) + '/../../..').realpath)
 5 | 
 6 | require AVENUE_DIR + '/MEMT/scripts/decode'
 7 | require AVENUE_DIR + '/MEMT/scripts/zmert/fuzz'
 8 | require AVENUE_DIR + '/MEMT/scripts/zmert/format'
 9 | 
10 | iter = ARGV[0].to_i
11 | $stderr.puts "Guessing iteration #{iter}"
12 | config = make_config("dec_cfg.txt", "decoder_config_base") + "\noutput.lowercase = true\n" + Fuzz.new.string_amount(iter)
13 | 
14 | language = File.new("language").read.strip
15 | 
16 | decode(config, File.new("connection").read, File.new("dev.matched"), "output", language)
17 | 


--------------------------------------------------------------------------------
/MEMT/scripts/zmert/fuzz.rb:
--------------------------------------------------------------------------------
 1 | FUZZ_SLIDE_AMOUNT=11
 2 | class Fuzz
 3 |   def self.slide_amount
 4 |     FUZZ_SLIDE_AMOUNT
 5 |   end
 6 |   attr_reader :slide
 7 |   def initialize
 8 |     @mult = 1.1
 9 |     @slide = FUZZ_SLIDE_AMOUNT
10 |   end
11 |   def amount(iter)
12 |     return 0.0 if iter >= @slide
13 |     return (@slide - iter).to_f / @slide.to_f * @mult
14 |   end
15 |   def string_amount(iter)
16 |     "score.fuzz.ratio = #{amount(iter)}\n"
17 |   end
18 | end
19 | 
20 | 


--------------------------------------------------------------------------------
/MEMT/scripts/zmert/run.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'pathname'
 3 | AVENUE_DIR = File.dirname(Pathname.new(File.expand_path(__FILE__) + '/../../..').realpath)
 4 | require AVENUE_DIR + '/MEMT/scripts/zmert/zmert'
 5 | 
 6 | throw "Tunes MEMT.  Pass a working directory, connection, and language" unless ARGV[2]
 7 | directory=ARGV[0]
 8 | connection=ARGV[1]
 9 | language=ARGV[2]
10 | 
11 | full_zmert(directory, connection, language)
12 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | MEMT/README


--------------------------------------------------------------------------------
/Utilities/Input/unescape.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | require 'cgi'
3 | 
4 | $stdin.each_line do |l|
5 |   $stdout.write CGI::unescapeHTML(l).gsub(/&apos;/, "'").gsub(/& AMP;/, '&')
6 | end
7 | 


--------------------------------------------------------------------------------
/Utilities/Output/Jamfile:
--------------------------------------------------------------------------------
 1 | exe remove_nonlatin
 2 |   : remove_nonlatin.cc /util//kenutil /util//icu ;
 3 | 
 4 | install dist : remove_nonlatin
 5 |            : <install-dependencies>on <install-type>EXE
 6 |              <install-type>LIB
 7 |              <dll-path>dist
 8 |              <hardcode-dll-paths>true
 9 |            ;
10 | 


--------------------------------------------------------------------------------
/Utilities/Output/remove_nonlatin.cc:
--------------------------------------------------------------------------------
 1 | #include "util/tokenize_piece.hh"
 2 | 
 3 | #include <unicode/uscript.h>
 4 | #include <unicode/utf8.h>
 5 | 
 6 | #include <string>
 7 | #include <iostream>
 8 | 
 9 | bool RemoveWord(const StringPiece &str) {
10 |   int32_t size_as_int32 = static_cast<uint32_t>(str.size());
11 |   UChar32 character = -1;
12 |   for (int32_t offset = 0; offset < size_as_int32;) {
13 |     U8_NEXT(str.data(), offset, size_as_int32, character);
14 |     if (character < 0) {
15 |       std::cerr << "Bad UTF8 " << str.data()[offset] << " in " << str << std::endl;
16 |       return true;
17 |     }
18 |     UErrorCode err = UErrorCode();
19 |     UScriptCode code = uscript_getScript(character, &err);
20 |     if (err) {
21 |       std::cerr << u_errorName(err) << std::endl;
22 |       exit(1);
23 |     }
24 |     if (code == USCRIPT_LATIN || code == USCRIPT_COMMON) {
25 |       return false;
26 |     }
27 |   }
28 |   return true;
29 | }
30 | 
31 | int main() {
32 |   std::string line;
33 |   while (std::getline(std::cin, line)) {
34 |     bool rest = false;
35 |     for (util::TokenIter<util::SingleCharacter, true> i(line, ' '); i; ++i) {
36 |       if (!RemoveWord(*i)) {
37 |         if (rest) std::cout << ' ';
38 |         rest = true;
39 |         std::cout << *i;
40 |       }
41 |     }
42 |     std::cout << '\n';
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/Utilities/Tokenization/Moses/README:
--------------------------------------------------------------------------------
1 | Copied from Moses, LGPL license
2 | 


--------------------------------------------------------------------------------
/Utilities/Tuning/zmert.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kpu/MEMT/cfd150b33c33320ee74d643a23e8e909f77a2994/Utilities/Tuning/zmert.jar


--------------------------------------------------------------------------------
/Utilities/queue.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'thread'
 3 | 
 4 | queue = Queue.new
 5 | number = ARGV[0] ? ARGV[0].to_i : 8
 6 | 
 7 | threads = []
 8 | number.times do |i|
 9 | 	threads << Thread.new do
10 | 		while (command = queue.pop)
11 | 			puts "Thread #{i} running #{command.chomp}.  #{queue.size} remaining."
12 | 			system(command)
13 | 		end
14 | 	end
15 | end
16 | 
17 | while (str = $stdin.gets) do
18 | 	queue << str
19 | 	puts "#{queue.size} remaining."
20 | end
21 | 
22 | threads.each { queue << nil }
23 | threads.each { |t| t.join }
24 | 


--------------------------------------------------------------------------------
/Utilities/scoring/INSTALL:
--------------------------------------------------------------------------------
1 | Run ./setup.sh .  When that finishes, run ./score.rb for usage instructions.  
2 | 
3 | Due to licensing issues, the actual metrics are not distributed in the same tarball.  ./setup.sh will download, untar, and compile them in the case of METEOR.  
4 | 
5 | Run time dependencies are Ruby (scoring script), Perl (detokenizer and BLEU), and Java (METEOR and TER).
6 | 
7 | At build time you also need Python (METEOR phrase table extraction) and Ant (to build METEOR).
8 | 


--------------------------------------------------------------------------------
/Utilities/scoring/LICENSE:
--------------------------------------------------------------------------------
1 | Everything except lib/shell.rb is LGPL provided in COPYING.LESSER.  lib/shell.rb includes its own license at the top.  
2 | 


--------------------------------------------------------------------------------
/Utilities/scoring/README:
--------------------------------------------------------------------------------
1 | This package makes it easier to score machine translation output using multiple metrics.  Currently, it supports BLEU, NIST, TER, and METEOR.  
2 | 
3 | Installation is simple: run ./setup.sh which installs all the metrics.  Then run ./score.rb .  See INSTALL for more detailed directions.  
4 | 
5 | Contact heafield+scoring at cs.cmu.edu
6 | 


--------------------------------------------------------------------------------
/Utilities/scoring/interlace.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | #Standalone program to interlace multiple files.  Useful to make files for --refs-laced
 3 | files = ARGV.map { |f| File.new(f) }
 4 | loop do
 5 |   lines = files.map { |f| f.gets }.compact
 6 |   break if lines.empty?
 7 |   throw "Unequal number of lines" unless lines.size == files.size
 8 |   lines.each do |l|
 9 |     $stdout.puts l
10 |   end
11 | end
12 | 


--------------------------------------------------------------------------------
/Utilities/scoring/lib/length.rb:
--------------------------------------------------------------------------------
 1 | # Carnegie Mellon University
 2 | # Copyright (c) 2009
 3 | # All Rights Reserved.
 4 | #
 5 | # Any use of this software must follow the terms
 6 | # outlined in the included LICENSE file.
 7 | #
 8 | 
 9 | #This computes the average over hypotheses of the ratio of hypothesis length to average corresponding reference length.
10 | 
11 | def count_words(str)
12 |   str.split(' ').size
13 | end
14 | 
15 | def score_length(request)
16 |   sum = 0.0
17 |   ref_count = request.ref.number.to_f
18 |   request.hyp.lines.each_index do |i|
19 |     hyp_length = count_words(request.hyp.lines[i])
20 |     ref_sum = 0
21 |     request.ref.range(i).each do |r|
22 |       ref_sum += count_words(request.ref.laced_lines[r])
23 |     end
24 |     sum += (hyp_length.to_f / (ref_sum.to_f / ref_count))
25 |   end
26 |   sum / request.hyp.lines.size.to_f
27 | end
28 | 


--------------------------------------------------------------------------------
/Utilities/scoring/lib/meteorify.rb:
--------------------------------------------------------------------------------
 1 | # Carnegie Mellon University
 2 | # Copyright (c) 2009
 3 | # All Rights Reserved.
 4 | #
 5 | # Any use of this software must follow the terms
 6 | # outlined in the included LICENSE file.
 7 | #
 8 | 
 9 | require SCORE_DIR + '/lib/shell_escape'
10 | 
11 | def parse_meteor(line, expression)
12 |   matched = line.match(expression)
13 |   throw "Meteor line #{line.inspect} does not match #{expression}." unless matched
14 |   matched[1].to_f
15 | end
16 | 
17 | def score_meteor(request)
18 |   output_file = request.output.perm("meteor_out")
19 |   system_with_redirect(["java", "-jar", SCORE_DIR + "/meteor-1.0/dist/meteor-1.0/meteor.jar", request.hyp.file_name, request.ref.laced_name, "-r", request.ref.number.to_s, "-normalize", "-l", request.language, "-t", request.task], nil, output_file)
20 |   score_lines = File.new(output_file).readlines
21 |   throw "Meteor output should be at least 7 lines" unless score_lines.size > 7
22 |   [parse_meteor(score_lines[-1], /^Final score:\t\t([0-9]*\.[0-9]*)$/), parse_meteor(score_lines[-7], /^Precision:\t\t([0-9]*\.[0-9]*)$/), parse_meteor(score_lines[-6], /^Recall:\t\t\t([0-9]*\.[0-9]*)$/)].map do |n|
23 |     n.to_f
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/Utilities/scoring/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Carnegie Mellon University
 4 | # Copyright (c) 2009
 5 | # All Rights Reserved.
 6 | #
 7 | # Any use of this software must follow the terms
 8 | # outlined in the included LICENSE file.
 9 | #
10 | function error() { echo Setup failed.; exit 1; }
11 | [ -f mteval-v13.pl ] || wget ftp://jaguar.ncsl.nist.gov/mt/resources/mteval-v13.pl || error
12 | [ -f meteor-1.0.tgz ] || wget http://www.cs.cmu.edu/~alavie/METEOR/download/meteor-1.0.tgz || error
13 | [ -f tercom-0.7.25.tgz ] || wget http://www.cs.umd.edu/~snover/tercom/tercom-0.7.25.tgz || error
14 | [ -f terp-pt.v1.tgz ] || wget http://web.archive.org/web/20120608122411/http://www.umiacs.umd.edu/~snover/terp/downloads/terp-pt.v1.tgz || error
15 | trap error ERR
16 | chmod +x mteval-v13.pl
17 | tar xzf meteor-1.0.tgz
18 | tar xzf tercom-0.7.25.tgz
19 | tar xzf terp-pt.v1.tgz
20 | pushd meteor-1.0
21 | ./scripts/create_paraphrase_file.py . ../terp-pt.v1/unfiltered_phrasetable.txt
22 | ant
23 | popd
24 | 
25 | chmod +x score.rb
26 | 
27 | echo All setup.  Licenses for the various metrics you just downloaded are in mteval-v13.pl, tercom-0.7.25/LICENSE.txt, terp-pt.v1/LICENSE.txt, and meteor-1.0/files/LICENSE .  
28 | 


--------------------------------------------------------------------------------
/bjam:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | if
 4 |   bjam="$(which bjam 2>/dev/null)" && #exists
 5 |   [ ${#bjam} != 0 ] && #paranoia about which printing nothing then returning true
 6 |   ! grep UFIHGUFIHBDJKNCFZXAEVA "${bjam}" </dev/null >/dev/null && #bjam in path isn't this script
 7 |   "${bjam}" --help >/dev/null 2>/dev/null && #bjam in path isn't broken (i.e. has boost-build)
 8 |   "${bjam}" --version |grep "Boost.Build 201" >/dev/null 2>/dev/null #It's recent enough.  
 9 | then
10 |   #Delegate to system bjam
11 |   exec "${bjam}" "$@"
12 | fi
13 | 
14 | top="$(dirname "$0")"
15 | if [ ! -x "$top"/jam-files/bjam ]; then
16 |   pushd "$top/jam-files/engine"
17 |   ./build.sh
18 |   cp -f bin.*/bjam ../bjam
19 |   popd
20 | fi
21 | 
22 | export BOOST_BUILD_PATH="$top"/jam-files/boost-build 
23 | exec "$top"/jam-files/bjam "$@"
24 | 


--------------------------------------------------------------------------------
/install/README:
--------------------------------------------------------------------------------
 1 | This automates installation of packages from source into a prefix directory.  
 2 | Command line is ./install.sh /some/prefix/directory "list of packages in order" -jparallelism where parallelism is the number of processors to use.
 3 | You probably want to set the environment variables CFLAGS and CXXFLAGS to something efficient.  See http://en.gentoo-wiki.com/wiki/Safe_Cflags for guidance.  
 4 | 
 5 | Once installed, you can run either
 6 | source /some/prefix/directory/environment.bash
 7 | or
 8 | source /some/prefix/directory/environment.tcsh
 9 | depending on your shell to setup the environment to use the installed packages.  
10 | 
11 | For MEMT's dependencies, run ./install.sh /some/prefix/directory "icu boost ruby" -jparallelism
12 | 
13 | Dependencies:
14 | MEMT decoder source code (provided in memt.tar.gz) -> icu boost
15 | MEMT tuning scripts (provided in memt.tar.gz) -> ruby
16 | boost -> icu
17 | 
18 | Note that there is no separate make and make install as some packages' make depends on other packages' make install, so there would be several iterations.  
19 | 


--------------------------------------------------------------------------------
/install/ant.sh:
--------------------------------------------------------------------------------
 1 | . lib.sh
 2 | VERSION_ANT=1.7.1
 3 | 
 4 | download_ant() {
 5 | 	download http://archive.apache.org/dist/ant/binaries/apache-ant-${VERSION_ANT}-bin.tar.bz2
 6 | }
 7 | 
 8 | compile_ant() {
 9 | 	[ -d apache-ant-${VERSION_ANT} ] && rm -rf apache-ant-${VERSION_ANT}
10 | 	chk tar xjf apache-ant-${VERSION_ANT}-bin.tar.bz2
11 | 	chk pushd apache-ant-${VERSION_ANT}
12 | 	chk popd
13 | }
14 | 
15 | install_ant() {
16 | 	chk pushd apache-ant-${VERSION_ANT}
17 | 	chk cp -a bin/* $PREFIX/bin
18 | 	chk cp -a lib/* $PREFIX/lib
19 | 	chk popd
20 | }
21 | 


--------------------------------------------------------------------------------
/install/apache-ant-1.7.1-bin.tar.bz2.md5:
--------------------------------------------------------------------------------
1 | 9330447f3763b87570dd1118c49a8efd  apache-ant-1.7.1-bin.tar.bz2
2 | 


--------------------------------------------------------------------------------
/install/apache-ant-1.7.1-bin.tar.bz2.sha1:
--------------------------------------------------------------------------------
1 | b078ba89301687662f100da7b059105f32004f7c  apache-ant-1.7.1-bin.tar.bz2
2 | 


--------------------------------------------------------------------------------
/install/boost.sh:
--------------------------------------------------------------------------------
 1 | . lib.sh
 2 | BOOST_DOT_VERSION=1.49.0
 3 | BOOST_JUST_VERSION=1_49
 4 | BOOST_SHORT_VERSION=boost_${BOOST_JUST_VERSION}
 5 | BOOST_VERSION=boost_1_49_0
 6 | download_boost() {
 7 | 	download http://downloads.sourceforge.net/project/boost/boost/$BOOST_DOT_VERSION/${BOOST_VERSION}.tar.bz2
 8 | }
 9 | 
10 | #Depends on ICU installed
11 | compile_boost() {
12 | 	tar xjf $BOOST_VERSION.tar.bz2 || fatal "Extract boost tarball"
13 | 	pushd $BOOST_VERSION || fatal "cd to boost directory"
14 | 
15 | 	#Boost C++ libraries
16 | 	./bootstrap.sh --prefix=$PREFIX --libdir=$PREFIX/lib --with-icu=$PREFIX || fatal "Failed to configure boost.  Is ICU installed properly?"
17 | 	chk ./b2 --prefix=$PREFIX --libdir=$PREFIX/lib --layout=tagged link=static,shared threading=single,multi $PARALLEL
18 | 
19 | 	popd
20 | }
21 | 
22 | install_boost() {
23 | 	chk pushd $BOOST_VERSION
24 | 
25 | 	#Boost C++ libraries
26 | 	chk ./b2 --prefix=$PREFIX --libdir=$PREFIX/lib --layout=tagged link=static,shared threading=single,multi $PARALLEL install || fatal "install boost"
27 | 	popd
28 | }
29 | 


--------------------------------------------------------------------------------
/install/boost_1_49_0.tar.bz2.md5:
--------------------------------------------------------------------------------
1 | 0d202cb811f934282dea64856a175698  boost_1_49_0.tar.bz2
2 | 


--------------------------------------------------------------------------------
/install/boost_1_49_0.tar.bz2.sha1:
--------------------------------------------------------------------------------
1 | 26a52840e9d12f829e3008589abf0a925ce88524  boost_1_49_0.tar.bz2
2 | 


--------------------------------------------------------------------------------
/install/checksum.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | md5sum "$1" >"$1".md5
3 | sha1sum "$1" >"$1".sha1
4 | 


--------------------------------------------------------------------------------
/install/environment.bash:
--------------------------------------------------------------------------------
 1 | #Prepend to path variables, avoiding a trailing colon if initially empty
 2 | #This used to be a function, but people thought that implementation was too complicated
 3 | export PATH=$PREFIX/bin${PATH:+:$PATH}
 4 | export LD_LIBRARY_PATH=$PREFIX/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
 5 | export LIBRARY_PATH=$PREFIX/lib${LIBRARY_PATH:+:$LIBRARY_PATH}
 6 | export CPATH=$PREFIX/include${CPATH:+:$CPATH}
 7 | export BOOST_BUILD_PATH=$PREFIX/share/boost-build
 8 | export CLASSPATH=$PREFIX/classpath/zmert.jar:${CLASSPATH:+:$CLASSPATH}
 9 | export ANT_HOME=$PREFIX
10 | 


--------------------------------------------------------------------------------
/install/environment.tcsh:
--------------------------------------------------------------------------------
 1 | if ( $?PATH == 0 ) then
 2 |         setenv PATH $PREFIX/bin
 3 | else
 4 |         setenv PATH $PREFIX/bin:$PATH
 5 | endif
 6 | if ( $?path == 0 ) then
 7 |         set path = $PREFIX/bin
 8 | else
 9 |         set path = ($PREFIX/bin $path)
10 | endif
11 | if ( $?LD_LIBRARY_PATH == 0 ) then
12 |         setenv LD_LIBRARY_PATH $PREFIX/lib
13 | else
14 |         setenv LD_LIBRARY_PATH $PREFIX/lib:$LD_LIBRARY_PATH
15 | endif
16 | if ( $?LIBRARY_PATH == 0 ) then
17 |         setenv LIBRARY_PATH $PREFIX/lib
18 | else
19 |         setenv LIBRARY_PATH $PREFIX/lib:$LIBRARY_PATH
20 | endif
21 | if ( $?CPATH == 0 ) then
22 |         setenv CPATH $PREFIX/include
23 | else
24 |         setenv CPATH $PREFIX/include:$CPATH
25 | endif
26 | setenv BOOST_BUILD_PATH $PREFIX/share/boost-build
27 | setenv ANT_HOME $PREFIX
28 | 


--------------------------------------------------------------------------------
/install/icu.sh:
--------------------------------------------------------------------------------
 1 | . lib.sh
 2 | 
 3 | download_icu() {
 4 |         download http://download.icu-project.org/files/icu4c/4.6.1/icu4c-4_6_1-src.tgz
 5 | }
 6 | 
 7 | compile_icu() {
 8 |         [ -d icu ] && chk rm -rf icu
 9 |         chk tar xzvf icu4c-4_6_1-src.tgz
10 |         [ -d icu ] || fatal "ICU did not extract to icu directory"
11 |         chk pushd icu/source
12 |         ./configure --prefix=$PREFIX || fatal "Configuring ICU failed"
13 |         make || fatal "Making ICU failed" #No PARALLEL because I've seen them break ICU
14 |         chk popd
15 | }
16 | 
17 | install_icu() {
18 |         chk pushd icu/source
19 |         chk make install
20 |         popd
21 | }
22 | 


--------------------------------------------------------------------------------
/install/icu4c-4_6_1-src.tgz.md5:
--------------------------------------------------------------------------------
1 | da64675d85f0c2191cef93a8cb5eea88  icu4c-4_6_1-src.tgz
2 | 


--------------------------------------------------------------------------------
/install/icu4c-4_6_1-src.tgz.sha1:
--------------------------------------------------------------------------------
1 | b8bbf80dff1727a7528f9601b0502db1633658c3  icu4c-4_6_1-src.tgz
2 | 


--------------------------------------------------------------------------------
/install/lib.sh:
--------------------------------------------------------------------------------
 1 | fatal() {
 2 | 	echo Error: $1 1>&2
 3 | 	exit 1
 4 | }
 5 | 
 6 | chk() {
 7 | 	"$@" || fatal "cd \"$PWD\" && $*"
 8 | }
 9 | 
10 | checksum() {
11 |   [ -f "$1.md5" ] && chk md5sum -c "$1.md5"
12 |   [ -f "$1.sha1" ] && chk sha1sum -c "$1.sha1"
13 | }
14 | 
15 | download() {
16 | 	[ -f $(basename $1) ] || chk wget $1
17 | 	checksum $(basename $1)
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/install/ruby-1.9.1-p376.tar.gz.md5:
--------------------------------------------------------------------------------
1 | ebb20550a11e7f1a2fbd6fdec2a3e0a3  ruby-1.9.1-p376.tar.gz
2 | 


--------------------------------------------------------------------------------
/install/ruby-1.9.1-p376.tar.gz.sha1:
--------------------------------------------------------------------------------
1 | 05a520c97a4528951139a1efe9f4933dd4661adb  ruby-1.9.1-p376.tar.gz
2 | 


--------------------------------------------------------------------------------
/install/ruby-1.9.2-p0.tar.gz.md5:
--------------------------------------------------------------------------------
1 | 755aba44607c580fddc25e7c89260460  ruby-1.9.2-p0.tar.gz
2 | 


--------------------------------------------------------------------------------
/install/ruby-1.9.2-p0.tar.gz.sha1:
--------------------------------------------------------------------------------
1 | 9d79ebbf929e2f6c251fe7a9614b96a3d2427b1c  ruby-1.9.2-p0.tar.gz
2 | 


--------------------------------------------------------------------------------
/install/ruby.sh:
--------------------------------------------------------------------------------
 1 | . lib.sh
 2 | 
 3 | VERSION_RUBY=1.9.2-p0
 4 | download_ruby() {
 5 | 	download ftp://ftp.ruby-lang.org/pub/ruby/1.9/ruby-${VERSION_RUBY}.tar.gz
 6 | }
 7 | 
 8 | compile_ruby() {
 9 | 	[ -d ruby-${VERSION_RUBY} ] && chk rm -rf ruby-${VERSION_RUBY}
10 | 	chk tar xzvf ruby-${VERSION_RUBY}.tar.gz
11 | 	chk pushd ruby-${VERSION_RUBY}
12 | 	chk ./configure --prefix=$PREFIX
13 | 	chk make -j4
14 | 	chk popd
15 | }
16 | 
17 | install_ruby() {
18 | 	chk pushd ruby-${VERSION_RUBY}
19 | 	chk make install
20 | 	chk popd
21 | }
22 | 


--------------------------------------------------------------------------------
/install/tercom-0.7.25.tgz.md5:
--------------------------------------------------------------------------------
1 | be1c818e48a782764f2c884d7ee6431c  tercom-0.7.25.tgz
2 | 


--------------------------------------------------------------------------------
/install/tercom-0.7.25.tgz.sha1:
--------------------------------------------------------------------------------
1 | d1545e873c86ea0fb3ce2e3744ec3d48508f5495  tercom-0.7.25.tgz
2 | 


--------------------------------------------------------------------------------
/install/tercom.sh:
--------------------------------------------------------------------------------
 1 | . lib.sh
 2 | 
 3 | VERSION_TERCOM=0.7.25
 4 | 
 5 | download_tercom() {
 6 | 	download http://www.cs.umd.edu/~snover/tercom/tercom-$VERSION_TERCOM.tgz
 7 | }
 8 | 
 9 | compile_tercom() {
10 | 	[ -d tercom-$VERSION_TERCOM ] && chk rm -rf tercom-$VERSION_TERCOM
11 | 	chk tar xzf tercom-$VERSION_TERCOM.tgz
12 | }
13 | 
14 | install_tercom() {
15 | 	chk pushd tercom-$VERSION_TERCOM
16 | 	chk mkdir -p $PREFIX/classpath
17 | 	chk cp tercom.7.25.jar $PREFIX/classpath
18 | 	chk popd
19 | }
20 | 


--------------------------------------------------------------------------------
/install/zmert.sh:
--------------------------------------------------------------------------------
 1 | . lib.sh
 2 | 
 3 | ZMERT_VERSION=zmert_v1.41
 4 | download_zmert() {
 5 | 	download http://www.cs.jhu.edu/~ozaidan/zmert/$ZMERT_VERSION.zip
 6 | }
 7 | 
 8 | compile_zmert() {
 9 | 	[ -d $ZMERT_VERSION ] && chk rm -rf $ZMERT_VERSION
10 | 	chk unzip $ZMERT_VERSION
11 | }
12 | 
13 | install_zmert() {
14 | 	[ -d $PREFIX/classpath ] || chk mkdir $PREFIX/classpath
15 |   chk cp $ZMERT_VERSION/lib/zmert.jar $PREFIX/classpath/
16 | }
17 | 


--------------------------------------------------------------------------------
/jam-files/LICENSE_1_0.txt:
--------------------------------------------------------------------------------
 1 | Boost Software License - Version 1.0 - August 17th, 2003
 2 | 
 3 | Permission is hereby granted, free of charge, to any person or organization
 4 | obtaining a copy of the software and accompanying documentation covered by
 5 | this license (the "Software") to use, reproduce, display, distribute,
 6 | execute, and transmit the Software, and to prepare derivative works of the
 7 | Software, and to permit third-parties to whom the Software is furnished to
 8 | do so, all subject to the following:
 9 | 
10 | The copyright notices in the Software and this entire statement, including
11 | the above license grant, this restriction and the following disclaimer,
12 | must be included in all copies of the Software, in whole or in part, and
13 | all derivative works of the Software, unless such copies or derivative
14 | works are solely in the form of machine-executable object code generated by
15 | a source language processor.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/boost-build.jam:
--------------------------------------------------------------------------------
1 | # Copyright 2001, 2002 Dave Abrahams
2 | # Copyright 2002 Rene Rivera
3 | # Copyright 2003 Vladimir Prus
4 | # Distributed under the Boost Software License, Version 1.0.
5 | # (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
6 | 
7 | 
8 | boost-build kernel ;
9 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/bootstrap.jam:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) 2003 Vladimir Prus.
 2 | #
 3 | #  Use, modification and distribution is subject to the Boost Software
 4 | #  License Version 1.0. (See accompanying file LICENSE_1_0.txt or
 5 | #  http://www.boost.org/LICENSE_1_0.txt)
 6 | 
 7 | # This file handles initial phase of Boost.Build loading.
 8 | # Boost.Jam has already figured out where Boost.Build is
 9 | # and loads this file, which is responsible for initialization
10 | # of basic facilities such a module system and loading the
11 | # main Boost.Build module, build-system.jam.
12 | #
13 | # Exact operation of this module is not interesting, it makes
14 | # sense to look at build-system.jam right away.
15 | 
16 | # Load the kernel/bootstrap.jam, which does all the work.
17 | .bootstrap-file = $(.bootstrap-file:D)/kernel/bootstrap.jam ;
18 | include $(.bootstrap-file) ;


--------------------------------------------------------------------------------
/jam-files/boost-build/build/readme.txt:
--------------------------------------------------------------------------------
 1 | Copyright 2001, 2002 Dave Abrahams
 2 | Copyright 2002 Vladimir Prus
 3 | Distributed under the Boost Software License, Version 1.0.
 4 | (See accompanying file LICENSE_1_0.txt or copy at
 5 | http://www.boost.org/LICENSE_1_0.txt)
 6 | 
 7 | Development code for new build system. To run unit tests for jam code, execute:
 8 | 
 9 |   bjam --debug --build-system=test
10 | 
11 | Comprehensive tests require Python. See ../test/readme.txt
12 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/kernel/boost-build.jam:
--------------------------------------------------------------------------------
1 | # Copyright 2003 Dave Abrahams 
2 | # Distributed under the Boost Software License, Version 1.0. 
3 | # (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 
4 | 
5 | boost-build . ;
6 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/site-config.jam:
--------------------------------------------------------------------------------
 1 | # Copyright 1999-2012 Gentoo Foundation
 2 | # Distributed under the Boost Software License, Version 1.0. 
 3 | # (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 
 4 | 
 5 | # Define two new variants to be used when building boost (or separate boost-libs)
 6 | # on Gentoo. The two variants make use of Gentoo-specific optimization and debug-symbols
 7 | # values "none" which are not part of the official boost distribution.
 8 | # DO NOT RELY ON THE FOLLOWING VARIANTS TO BE PRESENT ON OTHER OS!
 9 | variant gentoorelease : release : <optimization>none <debug-symbols>none <runtime-link>shared ;
10 | variant gentoodebug   : debug   : <optimization>none <debug-symbols>on   <runtime-link>shared ;
11 | 
12 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/bison.jam:
--------------------------------------------------------------------------------
 1 | # Copyright 2003 Vladimir Prus 
 2 | # Distributed under the Boost Software License, Version 1.0. 
 3 | # (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 
 4 | 
 5 | import generators ;
 6 | import feature ;
 7 | import type ;
 8 | import property ;
 9 | 
10 | feature.feature bison.prefix : : free ;
11 | type.register Y : y ;
12 | type.register YY : yy ;
13 | generators.register-standard bison.bison : Y : C H ;
14 | generators.register-standard bison.bison : YY : CPP HPP ;
15 | 
16 | rule init ( )
17 | {
18 | }
19 | 
20 | rule bison ( dst dst_header : src : properties * )
21 | {
22 |     local r = [ property.select bison.prefix : $(properties) ] ;
23 |     if $(r)
24 |     {
25 |         PREFIX_OPT on $(<) = -p $(r:G=) ;
26 |     }
27 | }
28 | 
29 | actions bison 
30 | {
31 |     bison $(PREFIX_OPT) -d -o $(<[1]) $(>)
32 | }
33 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/boostbook-config.jam:
--------------------------------------------------------------------------------
 1 | #~ Copyright 2005 Rene Rivera.
 2 | #~ Distributed under the Boost Software License, Version 1.0.
 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | # Automatic configuration for BoostBook tools. To use, just import this module.
 6 | #
 7 | # This module is deprecated.
 8 | #   using boostbook ;
 9 | # with no arguments now suffices.
10 | 
11 | import toolset : using ;
12 | 
13 | using boostbook ;
14 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/clang.jam:
--------------------------------------------------------------------------------
 1 | # Distributed under the Boost Software License, Version 1.0.
 2 | # (See accompanying file LICENSE_1_0.txt
 3 | # or copy at http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | # This is a generic 'clang' toolset. Depending on the current system, it
 6 | # forwards either to 'clang-unix' or 'clang-darwin' modules.
 7 | 
 8 | import feature ;
 9 | import os ;
10 | import toolset ;
11 | 
12 | feature.extend toolset : clang ;
13 | feature.subfeature toolset clang : platform : : propagated link-incompatible ;
14 | 
15 | rule init ( * : * )
16 | {
17 |     if [ os.name ] = MACOSX
18 |     {
19 |         toolset.using clang-darwin : 
20 |           $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
21 |     }
22 |     else
23 |     {
24 |         toolset.using clang-linux : 
25 |           $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/como.jam:
--------------------------------------------------------------------------------
 1 | # Copyright Vladimir Prus 2004.
 2 | # Distributed under the Boost Software License, Version 1.0.
 3 | # (See accompanying file LICENSE_1_0.txt
 4 | # or copy at http://www.boost.org/LICENSE_1_0.txt)
 5 | 
 6 | # This is a generic 'como' toolset. Depending on the current system, it
 7 | # forwards either to 'como-linux' or 'como-win' modules.
 8 | 
 9 | import feature ;
10 | import os ;
11 | import toolset ;
12 | 
13 | feature.extend toolset : como ;
14 | feature.subfeature toolset como : platform : : propagated link-incompatible ;
15 | 
16 | rule init ( * : * )
17 | {
18 |     if [ os.name ] = LINUX
19 |     {
20 |         toolset.using como-linux : 
21 |           $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
22 |     }
23 |     else
24 |     {
25 |         toolset.using como-win :
26 |           $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
27 | 
28 |     }        
29 | }
30 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/cw-config.jam:
--------------------------------------------------------------------------------
 1 | #~ Copyright 2005 Rene Rivera.
 2 | #~ Distributed under the Boost Software License, Version 1.0.
 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | # Automatic configuration for CodeWarrior toolset. To use, just import this module.
 6 | 
 7 | import os ;
 8 | import toolset : using ;
 9 | 
10 | if [ os.name ] = NT
11 | {
12 |     for local R in 9 8 7
13 |     {
14 |         local cw-path = [ W32_GETREG
15 |             "HKEY_LOCAL_MACHINE\\SOFTWARE\\Metrowerks\\CodeWarrior\\Product Versions\\CodeWarrior for Windows R$(R)"
16 |             : "PATH" ] ;
17 |         local cw-version = [ W32_GETREG
18 |             "HKEY_LOCAL_MACHINE\\SOFTWARE\\Metrowerks\\CodeWarrior\\Product Versions\\CodeWarrior for Windows R$(R)"
19 |             : "VERSION" ] ;
20 |         cw-path ?= [ W32_GETREG
21 |             "HKEY_LOCAL_MACHINE\\SOFTWARE\\Metrowerks\\CodeWarrior for Windows\\$(R).0"
22 |             : "PATH" ] ;
23 |         cw-version ?= $(R).0 ;
24 |         
25 |         if $(cw-path)
26 |         {
27 |             if --debug-configuration in [ modules.peek : ARGV ]
28 |             {
29 |                 ECHO "notice:" using cw ":" $(cw-version) ":" "$(cw-path)\\Other Metrowerks Tools\\Command Line Tools\\mwcc.exe" ;
30 |             }
31 |             using cw : $(cw-version) : "$(cw-path)\\Other Metrowerks Tools\\Command Line Tools\\mwcc.exe" ;
32 |         }
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/doxygen-config.jam:
--------------------------------------------------------------------------------
 1 | #~ Copyright 2005, 2006 Rene Rivera.
 2 | #~ Distributed under the Boost Software License, Version 1.0.
 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | # Automatic configuration for Doxygen tools. To use, just import this module.
 6 | 
 7 | import toolset : using ;
 8 | 
 9 | ECHO "warning: doxygen-config.jam is deprecated. Use 'using doxygen ;' instead." ;
10 | 
11 | using doxygen ;
12 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/doxygen/windows-paths-check.doxyfile:
--------------------------------------------------------------------------------
1 | INPUT = windows-paths-check.hpp
2 | GENERATE_HTML = NO
3 | GENERATE_LATEX = NO
4 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/doxygen/windows-paths-check.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kpu/MEMT/cfd150b33c33320ee74d643a23e8e909f77a2994/jam-files/boost-build/tools/doxygen/windows-paths-check.hpp


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/gfortran.jam:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2004 Toon Knapen
 2 | #
 3 | #  Use, modification and distribution is subject to the Boost Software
 4 | #  License Version 1.0. (See accompanying file LICENSE_1_0.txt or
 5 | #  http://www.boost.org/LICENSE_1_0.txt)
 6 | 
 7 | import toolset : flags ;
 8 | import feature ;
 9 | import fortran ;
10 | 
11 | rule init ( version ? : command * : options * )
12 | {
13 | }
14 | 
15 | # Declare flags and action for compilation
16 | flags gfortran OPTIONS <fflags> ;
17 | 
18 | flags gfortran OPTIONS <optimization>off : -O0 ;
19 | flags gfortran OPTIONS <optimization>speed : -O3 ;
20 | flags gfortran OPTIONS <optimization>space : -Os ;
21 | 
22 | flags gfortran OPTIONS <debug-symbols>on : -g ;
23 | flags gfortran OPTIONS <profiling>on : -pg ;
24 | 
25 | flags gfortran OPTIONS <link>shared/<main-target-type>LIB : -fPIC ;
26 | 
27 | flags gfortran DEFINES <define> ;
28 | flags gfortran INCLUDES <include> ;
29 | 
30 | rule compile.fortran
31 | {
32 | }
33 | 
34 | actions compile.fortran
35 | {
36 |   gcc -Wall $(OPTIONS) -D$(DEFINES) -I$(INCLUDES) -c -o "$(<)" "$(>)" 
37 | }
38 | 
39 | generators.register-fortran-compiler gfortran.compile.fortran : FORTRAN FORTRAN90 : OBJ ; 
40 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/hpfortran.jam:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2004 Toon Knapen
 2 | #
 3 | #  Use, modification and distribution is subject to the Boost Software
 4 | #  License Version 1.0. (See accompanying file LICENSE_1_0.txt or
 5 | #  http://www.boost.org/LICENSE_1_0.txt)
 6 | 
 7 | import toolset : flags ;
 8 | import feature ;
 9 | import fortran ;
10 | 
11 | rule init ( version ? : command * : options * )
12 | {
13 | }
14 | 
15 | # Declare flags and action for compilation
16 | flags hpfortran OPTIONS <optimization>off : -O0 ;
17 | flags hpfortran OPTIONS <optimization>speed : -O3 ;
18 | flags hpfortran OPTIONS <optimization>space : -O1 ;
19 | 
20 | flags hpfortran OPTIONS <debug-symbols>on : -g ;
21 | flags hpfortran OPTIONS <profiling>on : -pg ;
22 | 
23 | flags hpfortran DEFINES <define> ;
24 | flags hpfortran INCLUDES <include> ;
25 | 
26 | rule compile.fortran
27 | {
28 | }
29 | 
30 | actions compile.fortran
31 | {
32 |   f77 +DD64 $(OPTIONS) -D$(DEFINES) -I$(INCLUDES) -c -o "$(<)" "$(>)" 
33 | }
34 | 
35 | generators.register-fortran-compiler hpfortran.compile.fortran : FORTRAN : OBJ ; 
36 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/intel.jam:
--------------------------------------------------------------------------------
 1 | # Copyright Vladimir Prus 2004.
 2 | # Distributed under the Boost Software License, Version 1.0.
 3 | # (See accompanying file LICENSE_1_0.txt
 4 | # or copy at http://www.boost.org/LICENSE_1_0.txt)
 5 | 
 6 | # This is a generic 'intel' toolset. Depending on the current
 7 | # system, it forwards either to 'intel-linux' or 'intel-win'
 8 | # modules.
 9 | 
10 | import feature ;
11 | import os ;
12 | import toolset ;
13 | 
14 | feature.extend toolset : intel ;
15 | feature.subfeature toolset intel : platform : : propagated link-incompatible ;
16 | 
17 | rule init ( * : * )
18 | {
19 |     if [ os.name ] = LINUX
20 |     {
21 |         toolset.using intel-linux : 
22 |           $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
23 |     }
24 |     else if [ os.name ] = MACOSX
25 |     {
26 |         toolset.using intel-darwin : 
27 |           $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
28 |     }
29 |     else
30 |     {
31 |         toolset.using intel-win :
32 |           $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ;
33 |     }        
34 | }
35 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/lex.jam:
--------------------------------------------------------------------------------
 1 | # Copyright 2003 Vladimir Prus 
 2 | # Distributed under the Boost Software License, Version 1.0. 
 3 | # (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 
 4 | 
 5 | import type ;
 6 | import generators ;
 7 | import feature ;
 8 | import property ;
 9 | 
10 | 
11 | feature.feature flex.prefix : : free ;
12 | type.register LEX : l ;
13 | type.register LEX++ : ll ;
14 | generators.register-standard lex.lex : LEX : C ;
15 | generators.register-standard lex.lex : LEX++ : CPP ;
16 | 
17 | rule init ( )
18 | {
19 | }
20 | 
21 | rule lex ( target : source : properties * )
22 | {   
23 |     local r = [ property.select flex.prefix : $(properties) ] ;
24 |     if $(r)
25 |     {
26 |         PREFIX on $(<) = $(r:G=) ;
27 |     }
28 | }
29 | 
30 | actions lex 
31 | {
32 |     flex -P$(PREFIX) -o$(<) $(>)    
33 | }
34 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/mc.jam:
--------------------------------------------------------------------------------
 1 | #~ Copyright 2005 Alexey Pakhunov.
 2 | #~ Distributed under the Boost Software License, Version 1.0.
 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | #  Support for Microsoft message compiler tool.
 6 | #  Notes:
 7 | #  - there's just message compiler tool, there's no tool for 
 8 | #    extracting message strings from sources
 9 | #  - This file allows to use Microsoft message compiler
10 | #    with any toolset. In msvc.jam, there's more specific
11 | #    message compiling action.
12 | 
13 | import common ;
14 | import generators ;
15 | import feature : feature get-values ;
16 | import toolset : flags ;
17 | import type ;
18 | import rc ;
19 | 
20 | rule init ( )
21 | {
22 | }
23 | 
24 | type.register MC : mc ;
25 | 
26 | 
27 | # Command line options
28 | feature mc-input-encoding : ansi unicode : free ;
29 | feature mc-output-encoding : unicode ansi : free ;
30 | feature mc-set-customer-bit : no yes : free ;
31 | 
32 | flags mc.compile MCFLAGS <mc-input-encoding>ansi : -a ;
33 | flags mc.compile MCFLAGS <mc-input-encoding>unicode : -u ;
34 | flags mc.compile MCFLAGS <mc-output-encoding>ansi : -A ;
35 | flags mc.compile MCFLAGS <mc-output-encoding>unicode : -U ;
36 | flags mc.compile MCFLAGS <mc-set-customer-bit>no : ;
37 | flags mc.compile MCFLAGS <mc-set-customer-bit>yes : -c ;
38 | 
39 | generators.register-standard mc.compile : MC : H RC ;
40 | 
41 | actions compile
42 | {
43 |     mc $(MCFLAGS) -h "$(<[1]:DW)" -r "$(<[2]:DW)" "$(>:W)"
44 | }
45 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/msvc-config.jam:
--------------------------------------------------------------------------------
 1 | #~ Copyright 2005 Rene Rivera.
 2 | #~ Distributed under the Boost Software License, Version 1.0.
 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | # Automatic configuration for VisualStudio toolset. To use, just import this module.
 6 | 
 7 | import toolset : using ;
 8 | 
 9 | ECHO "warning: msvc-config.jam is deprecated. Use 'using msvc : all ;' instead." ;
10 | 
11 | using msvc : all ;
12 | 
13 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/python-config.jam:
--------------------------------------------------------------------------------
 1 | #~ Copyright 2005 Rene Rivera.
 2 | #~ Distributed under the Boost Software License, Version 1.0.
 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | # Automatic configuration for Python tools and librries. To use, just import this module.
 6 | 
 7 | import os ;
 8 | import toolset : using ;
 9 | 
10 | if [ os.name ] = NT
11 | {
12 |     for local R in 2.4 2.3 2.2
13 |     {
14 |         local python-path = [ W32_GETREG
15 |             "HKEY_LOCAL_MACHINE\\SOFTWARE\\Python\\PythonCore\\$(R)\\InstallPath" ] ;
16 |         local python-version = $(R) ;
17 |         
18 |         if $(python-path)
19 |         {
20 |             if --debug-configuration in [ modules.peek : ARGV ]
21 |             {
22 |                 ECHO "notice:" using python ":" $(python-version) ":" $(python-path) ;
23 |             }
24 |             using python : $(python-version) : $(python-path) ;
25 |         }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/qt.jam:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) 2006 Vladimir Prus.
 2 | #
 3 | #  Use, modification and distribution is subject to the Boost Software
 4 | #  License Version 1.0. (See accompanying file LICENSE_1_0.txt or
 5 | #  http://www.boost.org/LICENSE_1_0.txt)
 6 | 
 7 | #  Forwarning toolset file to Qt GUI library. Forwards to the toolset file
 8 | #  for the current version of Qt.
 9 | 
10 | import qt4 ;
11 | 
12 | rule init ( prefix : full_bin ? : full_inc ? : full_lib ? : version ? : condition * )
13 | {
14 |     qt4.init $(prefix) : $(full_bin)  : $(full_inc) : $(full_lib) : $(version) : $(condition) ;
15 | }
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/quickbook-config.jam:
--------------------------------------------------------------------------------
 1 | #~ Copyright 2005 Rene Rivera.
 2 | #~ Distributed under the Boost Software License, Version 1.0.
 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | # Automatic configuration for BoostBook tools. To use, just import this module.
 6 | 
 7 | import os ;
 8 | import toolset : using ;
 9 | 
10 | if [ os.name ] = NT
11 | {
12 |     local boost-dir = ;
13 |     for local R in snapshot cvs 1.33.0
14 |     {
15 |         boost-dir += [ W32_GETREG
16 |             "HKEY_LOCAL_MACHINE\\SOFTWARE\\Boost.org\\$(R)"
17 |             : "InstallRoot" ] ;
18 |     }
19 |     local quickbook-path = [ GLOB "$(boost-dir)\\bin" "\\Boost\\bin" : quickbook.exe ] ;
20 |     quickbook-path = $(quickbook-path[1]) ;
21 |     
22 |     if $(quickbook-path)
23 |     {
24 |         if --debug-configuration in [ modules.peek : ARGV ]
25 |         {
26 |             ECHO "notice:" using quickbook ":" $(quickbook-path) ;
27 |         }
28 |         using quickbook : $(quickbook-path) ;
29 |     }
30 | }
31 | else
32 | {
33 |     local quickbook-path = [ GLOB "/usr/local/bin" "/usr/bin" "/opt/bin" : quickbook ] ;
34 |     quickbook-path = $(quickbook-path[1]) ;
35 |     
36 |     if $(quickbook-path)
37 |     {
38 |         if --debug-configuration in [ modules.peek : ARGV ]
39 |         {
40 |             ECHO "notice:" using quickbook ":" $(quickbook-path) ;
41 |         }
42 |         using quickbook : $(quickbook-path) ;
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/quickbook.jam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kpu/MEMT/cfd150b33c33320ee74d643a23e8e909f77a2994/jam-files/boost-build/tools/quickbook.jam


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/types/asm.jam:
--------------------------------------------------------------------------------
1 | # Copyright Craig Rodrigues 2005. Distributed under the Boost
2 | # Software License, Version 1.0. (See accompanying
3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
4 | type ASM : s S asm ;
5 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/types/exe.jam:
--------------------------------------------------------------------------------
 1 | # Copyright David Abrahams 2004. Distributed under the Boost
 2 | # Software License, Version 1.0. (See accompanying
 3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | import type ;
 6 | 
 7 | type.register EXE ;
 8 | type.set-generated-target-suffix EXE : <target-os>windows : "exe" ;
 9 | type.set-generated-target-suffix EXE : <target-os>cygwin : "exe" ;
10 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/types/html.jam:
--------------------------------------------------------------------------------
1 | # Copyright David Abrahams 2004. Distributed under the Boost
2 | # Software License, Version 1.0. (See accompanying
3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
4 | type HTML : html ;
5 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/types/obj.jam:
--------------------------------------------------------------------------------
 1 | # Copyright David Abrahams 2004. Distributed under the Boost
 2 | # Software License, Version 1.0. (See accompanying
 3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | import type ;
 6 | 
 7 | type.register OBJ : o obj ;
 8 | type.set-generated-target-suffix OBJ : <target-os>windows : obj ;
 9 | type.set-generated-target-suffix OBJ : <target-os>cygwin : obj ;
10 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/types/objc.jam:
--------------------------------------------------------------------------------
 1 | # Copyright Rene Rivera 2008, 2010.
 2 | # Distributed under the Boost Software License, Version 1.0. (See accompanying
 3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 4 | import type ;
 5 | import scanner ;
 6 | import types/cpp ;
 7 | 
 8 | class objc-scanner : c-scanner
 9 | {
10 |     rule __init__ ( includes * )
11 |     {
12 |         c-scanner.__init__ $(includes) ;
13 |     }
14 | 
15 |     rule pattern ( )
16 |     {
17 |         return "#[ \t]*include|import[ ]*(<(.*)>|\"(.*)\")" ;
18 |     }
19 | }
20 | 
21 | scanner.register objc-scanner : include ;
22 | 
23 | type.register OBJECTIVE_C : m ;
24 | type.register OBJECTIVE_CPP : mm ;
25 | type.set-scanner OBJECTIVE_C : objc-scanner ;
26 | type.set-scanner OBJECTIVE_CPP : objc-scanner ;
27 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/types/preprocessed.jam:
--------------------------------------------------------------------------------
 1 | # Copyright Steven Watanabe 2011
 2 | # Distributed under the Boost Software License Version 1.0. (See
 3 | # accompanying file LICENSE_1_0.txt or copy at
 4 | # http://www.boost.org/LICENSE_1_0.txt)
 5 | 
 6 | import type ;
 7 | 
 8 | type.register PREPROCESSED_C : i : C ;
 9 | type.register PREPROCESSED_CPP : ii : CPP ;
10 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/types/qt.jam:
--------------------------------------------------------------------------------
 1 | # Copyright Vladimir Prus 2005. Distributed under the Boost
 2 | # Software License, Version 1.0. (See accompanying
 3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | type UI : ui ;
 6 | type QRC : qrc ;
 7 | type MOCCABLE_CPP ;
 8 | type MOCCABLE_H ;
 9 | type MOCCABLE5_CPP ;
10 | type MOCCABLE5_H ;
11 | # Result of running moc.
12 | type MOC : moc : H ;
13 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/types/register.jam:
--------------------------------------------------------------------------------
 1 | # Copyright David Abrahams 2004. Distributed under the Boost
 2 | # Software License, Version 1.0. (See accompanying
 3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | # This module's job is to automatically import all the type
 6 | # registration modules in its directory.
 7 | import type os path modules ;
 8 | 
 9 | # Register the given type on the specified OSes, or on remaining OSes
10 | # if os is not specified.  This rule is injected into each of the type
11 | # modules for the sake of convenience.
12 | local rule type ( type : suffixes * : base-type ? : os * )
13 | {
14 |     if ! [ type.registered $(type) ]
15 |     {
16 |         if ( ! $(os) ) || [ os.name ] in $(os)
17 |         {
18 |             type.register $(type) : $(suffixes) : $(base-type) ;
19 |         }
20 |     }
21 | }
22 | 
23 | .this-module's-file = [ modules.binding $(__name__) ] ;
24 | .this-module's-dir = [ path.parent $(.this-module's-file) ] ;
25 | .sibling-jamfiles =  [ path.glob $(.this-module's-dir) : *.jam ] ;
26 | .sibling-modules = [ MATCH ^(.*)\.jam$ : $(.sibling-jamfiles) ] ;
27 | 
28 | # A loop over all modules in this directory
29 | for m in $(.sibling-modules)
30 | {
31 |     m = [ path.basename $(m) ] ;
32 |     m = types/$(m) ;
33 |     
34 |     # Inject the type rule into the new module
35 |     IMPORT $(__name__) : type : $(m) : type ;
36 |     import $(m) ;
37 | }
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/types/rsp.jam:
--------------------------------------------------------------------------------
1 | # Copyright David Abrahams 2004. Distributed under the Boost
2 | # Software License, Version 1.0. (See accompanying
3 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
4 | type RSP : rsp ;
5 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/xlf.jam:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2004 Toon Knapen
 2 | #
 3 | #  Use, modification and distribution is subject to the Boost Software
 4 | #  License Version 1.0. (See accompanying file LICENSE_1_0.txt or
 5 | #  http://www.boost.org/LICENSE_1_0.txt)
 6 | 
 7 | #
 8 | # toolset configuration for the IBM Fortran compiler (xlf)
 9 | # 
10 | 
11 | import toolset : flags ;
12 | import feature ;
13 | import fortran ;
14 | 
15 | rule init ( version ? : command * : options * )
16 | {
17 | }
18 | 
19 | # Declare flags and action for compilation
20 | flags xlf OPTIONS <optimization>off : -O0 ;
21 | flags xlf OPTIONS <optimization>speed : -O3 ;
22 | flags xlf OPTIONS <optimization>space : -Os ;
23 | 
24 | flags xlf OPTIONS <debug-symbols>on : -g ;
25 | flags xlf OPTIONS <profiling>on : -pg ;
26 | 
27 | flags xlf DEFINES <define> ;
28 | flags xlf INCLUDES <include> ;
29 | 
30 | rule compile-fortran
31 | {
32 | }
33 | 
34 | actions compile-fortran
35 | {
36 |   xlf $(OPTIONS) -I$(INCLUDES) -c -o "$(<)" "$(>)" 
37 | }
38 | 
39 | generators.register-fortran-compiler xlf.compile-fortran : FORTRAN : OBJ ; 
40 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/xsltproc-config.jam:
--------------------------------------------------------------------------------
 1 | #~ Copyright 2005 Rene Rivera.
 2 | #~ Distributed under the Boost Software License, Version 1.0.
 3 | #~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
 4 | 
 5 | # Automatic configuration for the xsltproc toolset. To use, just import this
 6 | # module.
 7 | 
 8 | import os ;
 9 | import toolset : using ;
10 | 
11 | 
12 | local rule locate-executable ( name )
13 | {
14 |     local path = [ modules.peek : PATH ] ;
15 |     local exe ;
16 |     if [ os.name ] = NT
17 |     {
18 |         exe = [ GLOB $(path) "C:\\Boost\\bin" : $(name)\.exe ] ;
19 |     }
20 |     else
21 |     {
22 |         exe = [ GLOB $(path) : $(name) ] ;
23 |     }
24 |     return $(exe[1]) ;
25 | }
26 | 
27 | 
28 | local xsltproc-exe = [ locate-executable xsltproc ] ;
29 | if $(xsltproc-exe)
30 | {
31 |     if --debug-configuration in [ modules.peek : ARGV ]
32 |     {
33 |         ECHO notice: using xsltproc ":" $(xsltproc-exe) ;
34 |     }
35 |     using xsltproc : $(xsltproc-exe) ;
36 | }
37 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/xsltproc/included.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!--
 3 |    Copyright (c) 2010 Steven Watanabe
 4 |   
 5 |    Distributed under the Boost Software License, Version 1.0.
 6 |    (See accompanying file LICENSE_1_0.txt or copy at
 7 |    http://www.boost.org/LICENSE_1_0.txt)
 8 |   -->
 9 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
10 |                 version="1.0">
11 | </xsl:stylesheet>
12 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/xsltproc/test.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <root/>
3 | 


--------------------------------------------------------------------------------
/jam-files/boost-build/tools/xsltproc/test.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!--
 3 |    Copyright (c) 2010 Steven Watanabe
 4 |   
 5 |    Distributed under the Boost Software License, Version 1.0.
 6 |    (See accompanying file LICENSE_1_0.txt or copy at
 7 |    http://www.boost.org/LICENSE_1_0.txt)
 8 |   -->
 9 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
10 |                 version="1.0">
11 |   <xsl:include href="included.xsl"/>
12 | </xsl:stylesheet>
13 | 


--------------------------------------------------------------------------------
/jam-files/engine/boost-no-inspect:
--------------------------------------------------------------------------------
1 | this really out of our hands, so tell inspect to ignore directory


--------------------------------------------------------------------------------
/jam-files/engine/class.h:
--------------------------------------------------------------------------------
 1 | /* Copyright Vladimir Prus 2003. Distributed under the Boost */
 2 | /* Software License, Version 1.0. (See accompanying */
 3 | /* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */
 4 | 
 5 | #ifndef CLASS_H_VP_2003_08_01
 6 | #define CLASS_H_VP_2003_08_01
 7 | 
 8 | #include "lists.h"
 9 | #include "frames.h"
10 | 
11 | OBJECT * make_class_module( LIST * xname, LIST * bases, FRAME * frame );
12 | void class_done( void );
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/jam-files/engine/cwd.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2002. Vladimir Prus
 3 |  * Distributed under the Boost Software License, Version 1.0.
 4 |  * (See accompanying file LICENSE_1_0.txt or copy at
 5 |  * http://www.boost.org/LICENSE_1_0.txt)
 6 |  */
 7 | 
 8 | /*
 9 |  * cwd.h - manages the current working folder information
10 |  */
11 | 
12 | #ifndef CWD_H
13 | #define CWD_H
14 | 
15 | #include "object.h"
16 | 
17 | 
18 | /* cwd() - returns the current working folder */
19 | OBJECT * cwd( void );
20 | 
21 | /* cwd_init() - initialize the cwd module functionality
22 |  *
23 |  *   The current working folder can not change in Boost Jam so this function
24 |  * gets the current working folder information from the OS and stores it
25 |  * internally.
26 |  *
27 |  *   Expected to be called at program startup before the program's current
28 |  * working folder has been changed
29 |  */
30 | void cwd_init( void );
31 | 
32 | /* cwd_done() - cleans up the cwd module functionality */
33 | void cwd_done( void );
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/jam-files/engine/debian/control:
--------------------------------------------------------------------------------
 1 | Source: bjam
 2 | Section: devel
 3 | Priority: optional
 4 | Maintainer: Vladimir Prus <ghost@cs.msu.su>
 5 | Build-Depends: debhelper (>> 3.0.0), docbook-to-man, bison
 6 | Standards-Version: 3.5.2
 7 | 
 8 | Package: bjam
 9 | Architecture: any
10 | Depends: ${shlibs:Depends}
11 | Description: Build tool 
12 |  Boost.Jam is a portable build tool with its own interpreted language, which 
13 |  allows to implement rather complex logic in a readable way and without 
14 |  resorting to external programs. It is a descendant of Jam/MR tool modified to 
15 |  suit the needs of Boost.Build. In particular, modules and rule parameters
16 |  were added, as well as several new builtins.
17 | 


--------------------------------------------------------------------------------
/jam-files/engine/debian/copyright:
--------------------------------------------------------------------------------
 1 | This package was debianized by Vladimir Prus <ghost@cs.msu.su> on
 2 | Wed, 17 July 2002, 19:27:00 +0400.
 3 | 
 4 | Copyright:
 5 | 
 6 |     /+\
 7 |     +\	Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
 8 |     \+/
 9 | 
10 |     This is Release 2.4 of Jam/MR, a make-like program.
11 | 
12 |     License is hereby granted to use this software and distribute it
13 |     freely, as long as this copyright notice is retained and modifications 
14 |     are clearly marked.
15 | 
16 |     ALL WARRANTIES ARE HEREBY DISCLAIMED.
17 | 
18 | Some portions are also:
19 | 
20 |     Copyright 2001-2006 David Abrahams.
21 |     Copyright 2002-2006 Rene Rivera.
22 |     Copyright 2003-2006 Vladimir Prus.
23 |     
24 |     Distributed under the Boost Software License, Version 1.0.
25 |     (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
26 | 


--------------------------------------------------------------------------------
/jam-files/engine/frames.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright 2001-2004 David Abrahams.
 3 |  *  Distributed under the Boost Software License, Version 1.0.
 4 |  *  (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
 5 |  */
 6 | 
 7 | #include "jam.h"
 8 | #include "frames.h"
 9 | 
10 | 
11 | FRAME * frame_before_python_call;
12 | 
13 | 
14 | void frame_init( FRAME * frame )
15 | {
16 |     frame->prev = 0;
17 |     frame->prev_user = 0;
18 |     lol_init( frame->args );
19 |     frame->module = root_module();
20 |     frame->rulename = "module scope";
21 |     frame->file = 0;
22 |     frame->line = -1;
23 | }
24 | 
25 | 
26 | void frame_free( FRAME * frame )
27 | {
28 |     lol_free( frame->args );
29 | }
30 | 


--------------------------------------------------------------------------------
/jam-files/engine/frames.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2001-2004 David Abrahams.
 3 |  * Distributed under the Boost Software License, Version 1.0.
 4 |  * (See accompanying file LICENSE_1_0.txt or copy at
 5 |  * http://www.boost.org/LICENSE_1_0.txt)
 6 |  */
 7 | 
 8 | #ifndef FRAMES_DWA20011021_H
 9 | #define FRAMES_DWA20011021_H
10 | 
11 | #include "lists.h"
12 | #include "modules.h"
13 | #include "object.h"
14 | 
15 | 
16 | typedef struct frame FRAME;
17 | 
18 | struct frame
19 | {
20 |     FRAME      * prev;
21 |     FRAME      * prev_user;  /* The nearest enclosing frame for which
22 |                                 module->user_module is true. */
23 |     LOL          args[ 1 ];
24 |     module_t   * module;
25 |     OBJECT     * file;
26 |     int          line;
27 |     char const * rulename;
28 | };
29 | 
30 | 
31 | /* When a call into Python is in progress, this variable points to the bjam
32 |  * frame that was current at the moment of the call. When the call completes,
33 |  * the variable is not defined. Furthermore, if Jam calls Python which calls Jam
34 |  * and so on, this variable only keeps the most recent Jam frame.
35 |  */
36 | extern FRAME * frame_before_python_call;
37 | 
38 | 
39 | void frame_init( FRAME * );
40 | void frame_free( FRAME * );
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/jam-files/engine/hcache.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is not part of Jam
 3 |  */
 4 | 
 5 | /*
 6 |  * hcache.h - handle #includes in source files
 7 |  */
 8 | #ifndef HCACHE_H
 9 | #define HCACHE_H
10 | 
11 | #include "lists.h"
12 | #include "regexp.h"
13 | #include "rules.h"
14 | 
15 | void hcache_init( void );
16 | void hcache_done( void );
17 | LIST * hcache( TARGET * t, int rec, regexp * re[], LIST * hdrscan );
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/jam-files/engine/hdrmacro.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 1993, 1995 Christopher Seiwald.
 3 |  *
 4 |  * This file is part of Jam - see jam.c for Copyright information.
 5 |  */
 6 | 
 7 | /*
 8 |  * hdrmacro.h - parses header files for #define MACRO  <filename> or
 9 |  *              #define MACRO  "filename" definitions
10 |  */
11 | 
12 | #ifndef HDRMACRO_SW20111118_H
13 | #define HDRMACRO_SW20111118_H
14 | 
15 | #include "object.h"
16 | #include "rules.h"
17 | 
18 | void macro_headers( TARGET * );
19 | OBJECT * macro_header_get( OBJECT * macro_name );
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/jam-files/engine/headers.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 1993, 1995 Christopher Seiwald.
 3 |  *
 4 |  * This file is part of Jam - see jam.c for Copyright information.
 5 |  */
 6 | 
 7 | /*
 8 |  * headers.h - handle #includes in source files
 9 |  */
10 | 
11 | #ifndef HEADERS_SW20111118_H
12 | #define HEADERS_SW20111118_H
13 | 
14 | #include "object.h"
15 | #include "rules.h"
16 | #include "regexp.h"
17 | 
18 | void headers( TARGET * t );
19 | 
20 | #ifdef OPT_HEADER_CACHE_EXT
21 | struct regexp;
22 | LIST * headers1( LIST *l, OBJECT * file, int rec, struct regexp *re[] );
23 | #endif
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/jam-files/engine/jambase.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 1993, 1995 Christopher Seiwald.
 3 |  *
 4 |  * This file is part of Jam - see jam.c for Copyright information.
 5 |  */
 6 | 
 7 | /*
 8 |  * jambase.h - declaration for the internal jambase
 9 |  *
10 |  * The file Jambase is turned into a C array of strings in jambase.c
11 |  * so that it can be built in to the executable.  This is the
12 |  * declaration for that array.
13 |  */
14 | 
15 | extern char *jambase[];
16 | 


--------------------------------------------------------------------------------
/jam-files/engine/jamgramtab.h:
--------------------------------------------------------------------------------
 1 |     { "!", _BANG_t },
 2 |     { "!=", _BANG_EQUALS_t },
 3 |     { "&", _AMPER_t },
 4 |     { "&&", _AMPERAMPER_t },
 5 |     { "(", _LPAREN_t },
 6 |     { ")", _RPAREN_t },
 7 |     { "+=", _PLUS_EQUALS_t },
 8 |     { ":", _COLON_t },
 9 |     { ";", _SEMIC_t },
10 |     { "<", _LANGLE_t },
11 |     { "<=", _LANGLE_EQUALS_t },
12 |     { "=", _EQUALS_t },
13 |     { ">", _RANGLE_t },
14 |     { ">=", _RANGLE_EQUALS_t },
15 |     { "?=", _QUESTION_EQUALS_t },
16 |     { "[", _LBRACKET_t },
17 |     { "]", _RBRACKET_t },
18 |     { "actions", ACTIONS_t },
19 |     { "bind", BIND_t },
20 |     { "case", CASE_t },
21 |     { "class", CLASS_t },
22 |     { "default", DEFAULT_t },
23 |     { "else", ELSE_t },
24 |     { "existing", EXISTING_t },
25 |     { "for", FOR_t },
26 |     { "if", IF_t },
27 |     { "ignore", IGNORE_t },
28 |     { "in", IN_t },
29 |     { "include", INCLUDE_t },
30 |     { "local", LOCAL_t },
31 |     { "module", MODULE_t },
32 |     { "on", ON_t },
33 |     { "piecemeal", PIECEMEAL_t },
34 |     { "quietly", QUIETLY_t },
35 |     { "return", RETURN_t },
36 |     { "rule", RULE_t },
37 |     { "switch", SWITCH_t },
38 |     { "together", TOGETHER_t },
39 |     { "updated", UPDATED_t },
40 |     { "while", WHILE_t },
41 |     { "{", _LBRACE_t },
42 |     { "|", _BAR_t },
43 |     { "||", _BARBAR_t },
44 |     { "}", _RBRACE_t },
45 | 


--------------------------------------------------------------------------------
/jam-files/engine/make.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 1993, 1995 Christopher Seiwald.
 3 |  *
 4 |  * This file is part of Jam - see jam.c for Copyright information.
 5 |  */
 6 | 
 7 | /*
 8 |  * make.h - bring a target up to date, once rules are in place
 9 |  */
10 | 
11 | #ifndef MAKE_SW20111118_H
12 | #define MAKE_SW20111118_H
13 | 
14 | #include "lists.h"
15 | #include "object.h"
16 | #include "rules.h"
17 | 
18 | int make( LIST * targets, int anyhow );
19 | int make1( LIST * t );
20 | 
21 | typedef struct {
22 |     int temp;
23 |     int updating;
24 |     int cantfind;
25 |     int cantmake;
26 |     int targets;
27 |     int made;
28 | } COUNTS ;
29 | 
30 | 
31 | void make0( TARGET * t, TARGET * p, int depth, COUNTS * counts, int anyhow,
32 |     TARGET * rescanning );
33 | 
34 | 
35 | /* Specifies that the target should be updated. */
36 | void mark_target_for_updating( OBJECT * target );
37 | 
38 | /* Returns targets previously passed to mark_target_for_updating(). */
39 | LIST * targets_to_update();
40 | 
41 | /* Clears/unmarks all targets currently marked for update. */
42 | void clear_targets_to_update();
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/jam-files/engine/modules/path.c:
--------------------------------------------------------------------------------
 1 | /* Copyright Vladimir Prus 2003.
 2 |  * Distributed under the Boost Software License, Version 1.0.
 3 |  * (See accompanying file LICENSE_1_0.txt or copy at
 4 |  * http://www.boost.org/LICENSE_1_0.txt)
 5 |  */
 6 | 
 7 | #include "../constants.h"
 8 | #include "../frames.h"
 9 | #include "../lists.h"
10 | #include "../native.h"
11 | #include "../timestamp.h"
12 | 
13 | 
14 | LIST * path_exists( FRAME * frame, int flags )
15 | {
16 |     return file_query( list_front( lol_get( frame->args, 0 ) ) ) ?
17 |         list_new( object_copy( constant_true ) ) : L0;
18 | }
19 | 
20 | 
21 | void init_path()
22 | {
23 |     char const * args[] = { "location", 0 };
24 |     declare_native_rule( "path", "exists", args, path_exists, 1 );
25 | }
26 | 


--------------------------------------------------------------------------------
/jam-files/engine/modules/readme.txt:
--------------------------------------------------------------------------------
1 | 
2 | This directory constains sources which declare native
3 | rules for Boost.Build modules.


--------------------------------------------------------------------------------
/jam-files/engine/modules/set.c:
--------------------------------------------------------------------------------
 1 | /* Copyright Vladimir Prus 2003. Distributed under the Boost */
 2 | /* Software License, Version 1.0. (See accompanying */
 3 | /* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */
 4 | 
 5 | #include "../native.h"
 6 | #include "../object.h"
 7 | 
 8 | /*
 9 |     local result = ;
10 |     local element ;
11 |     for element in $(B)
12 |     {
13 |         if ! ( $(element) in $(A) )
14 |         {
15 |             result += $(element) ;
16 |         }
17 |     }
18 |     return $(result) ;
19 | */
20 | LIST *set_difference( FRAME *frame, int flags )
21 | {
22 | 
23 |     LIST* b = lol_get( frame->args, 0 );    
24 |     LIST* a = lol_get( frame->args, 1 );    
25 | 
26 |     LIST* result = L0;
27 |     LISTITER iter = list_begin( b ), end = list_end( b );
28 |     for( ; iter != end; iter = list_next( iter ) )
29 |     {
30 |         if (!list_in(a, list_item(iter)))
31 |             result = list_push_back(result, object_copy(list_item(iter)));
32 |     }
33 |     return result;
34 | }
35 | 
36 | void init_set()
37 | {
38 |     {
39 |         const char* args[] = { "B", "*", ":", "A", "*", 0 };
40 |         declare_native_rule("set", "difference", args, set_difference, 1);
41 |     }
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/jam-files/engine/native.c:
--------------------------------------------------------------------------------
 1 | /* Copyright 2003. Vladimir Prus
 2 |  * Distributed under the Boost Software License, Version 1.0.
 3 |  * (See accompanying file LICENSE_1_0.txt or copy at
 4 |  * http://www.boost.org/LICENSE_1_0.txt)
 5 |  */
 6 | 
 7 | #include "native.h"
 8 | 
 9 | #include "hash.h"
10 | 
11 | #include <assert.h>
12 | 
13 | 
14 | void declare_native_rule( char const * module, char const * rule,
15 |     char const * * args, LIST * (*f)( FRAME *, int ), int version )
16 | {
17 |     OBJECT * const module_obj = module ? object_new( module ) : 0 ;
18 |     module_t * m = bindmodule( module_obj );
19 |     if ( module_obj )
20 |         object_free( module_obj );
21 |     if ( !m->native_rules )
22 |         m->native_rules = hashinit( sizeof( native_rule_t ), "native rules" );
23 | 
24 |     {
25 |         OBJECT * const name = object_new( rule );
26 |         int found;
27 |         native_rule_t * const np = (native_rule_t *)hash_insert(
28 |             m->native_rules, name, &found );
29 |         np->name = name;
30 |         assert( !found );
31 |         np->procedure = function_builtin( f, 0, args );
32 |         np->version = version;
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/jam-files/engine/native.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2003. David Abrahams
 2 |  * Distributed under the Boost Software License, Version 1.0.
 3 |  * (See accompanying file LICENSE_1_0.txt or copy at
 4 |  * http://www.boost.org/LICENSE_1_0.txt)
 5 |  */
 6 | 
 7 | #ifndef NATIVE_H_VP_2003_12_09
 8 | #define NATIVE_H_VP_2003_12_09
 9 | 
10 | #include "function.h"
11 | #include "frames.h"
12 | #include "lists.h"
13 | #include "object.h"
14 | 
15 | typedef struct native_rule_t
16 | {
17 |     OBJECT * name;
18 |     FUNCTION * procedure;
19 | 
20 |     /* Version of the interface that the native rule provides. It is possible
21 |      * that we want to change the set parameter for existing native rule. In
22 |      * that case, version number should be incremented so Boost.Build can check
23 |      * for the version it relies on.
24 |      *
25 |      * Versions are numbered from 1.
26 |     */
27 |     int version;
28 | } native_rule_t;
29 | /* MSVC debugger gets confused unless the native_rule_t typedef is provided. */
30 | 
31 | void declare_native_rule( char const * module, char const * rule,
32 |     char const * * args, LIST * (*f)( FRAME *, int ), int version );
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/jam-files/engine/object.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2011 Steven Watanabe
 3 |  *
 4 |  * This file is part of Jam - see jam.c for Copyright information.
 5 |  */
 6 | 
 7 | /*
 8 |  * object.h - object manipulation routines
 9 |  */
10 | 
11 | #ifndef BOOST_JAM_OBJECT_H
12 | #define BOOST_JAM_OBJECT_H
13 | 
14 | typedef struct _object OBJECT;
15 | 
16 | OBJECT * object_new( char const * const );
17 | OBJECT * object_new_range( char const * const, int const size );
18 | void object_done( void );
19 | 
20 | #if defined(NDEBUG) && !defined(BJAM_NO_MEM_CACHE)
21 | 
22 | struct hash_header
23 | {
24 |     unsigned int hash;
25 |     struct hash_item * next;
26 | };
27 | 
28 | #define object_str( obj ) ((char const *)(obj))
29 | #define object_copy( obj ) (obj)
30 | #define object_free( obj ) ((void)0)
31 | #define object_equal( lhs, rhs ) ((lhs) == (rhs))
32 | #define object_hash( obj ) (((struct hash_header *)((char *)(obj) - sizeof(struct hash_header)))->hash)
33 | 
34 | #else
35 | 
36 | char const * object_str  ( OBJECT * );
37 | OBJECT *     object_copy ( OBJECT * );
38 | void         object_free ( OBJECT * );
39 | int          object_equal( OBJECT *, OBJECT * );
40 | unsigned int object_hash ( OBJECT * );
41 | 
42 | #endif
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/jam-files/engine/option.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 1993, 1995 Christopher Seiwald.
 3 |  *
 4 |  * This file is part of Jam - see jam.c for Copyright information.
 5 |  */
 6 | 
 7 | /*
 8 |  * option.h - command line option processing
 9 |  *
10 |  * {o >o
11 |  *  \ -) "Command line option."
12 |  */
13 | 
14 | typedef struct bjam_option
15 | {
16 |     char flag;   /* filled in by getoption() */
17 |     char * val;  /* set to random address if true */
18 | } bjam_option;
19 | 
20 | #define N_OPTS 256
21 | 
22 | int    getoptions( int argc, char * * argv, char * opts, bjam_option * optv );
23 | char * getoptval( bjam_option * optv, char opt, int subopt );
24 | 


--------------------------------------------------------------------------------
/jam-files/engine/output.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Copyright 2007 Rene Rivera
 3 |     Distributed under the Boost Software License, Version 1.0.
 4 |     (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
 5 | */
 6 | 
 7 | #ifndef BJAM_OUTPUT_H
 8 | #define BJAM_OUTPUT_H
 9 | 
10 | #include "object.h"
11 | #include "timestamp.h"
12 | 
13 | #define EXIT_OK 0
14 | #define EXIT_FAIL 1
15 | #define EXIT_TIMEOUT 2
16 | 
17 | void out_action(
18 |     char const * const action,
19 |     char const * const target,
20 |     char const * const command,
21 |     char const * const out_data,
22 |     char const * const err_data,
23 |     int const exit_reason
24 | );
25 | 
26 | OBJECT * outf_int( int const value );
27 | OBJECT * outf_double( double const value );
28 | OBJECT * outf_time( timestamp const * const value );
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/jam-files/engine/patchlevel.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
 3 |  *
 4 |  * This file is part of Jam - see jam.c for Copyright information.
 5 |  */
 6 | 
 7 | /* Keep JAMVERSYM in sync with VERSION. */
 8 | /* It can be accessed as $(JAMVERSION) in the Jamfile. */
 9 | 
10 | #define VERSION_MAJOR 2011
11 | #define VERSION_MINOR 12
12 | #define VERSION_PATCH 1
13 | #define VERSION_MAJOR_SYM "2011"
14 | #define VERSION_MINOR_SYM "12"
15 | #define VERSION_PATCH_SYM "01"
16 | #define VERSION "2011.12.1"
17 | #define JAMVERSYM "JAMVERSION=2011.12"
18 | 


--------------------------------------------------------------------------------
/jam-files/engine/pathunix.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
 3 |  *
 4 |  * This file is part of Jam - see jam.c for Copyright information.
 5 |  */
 6 | 
 7 | /* This file is ALSO:
 8 |  * Copyright 2001-2004 David Abrahams.
 9 |  * Copyright 2005 Rene Rivera.
10 |  * Distributed under the Boost Software License, Version 1.0.
11 |  * (See accompanying file LICENSE_1_0.txt or copy at
12 |  * http://www.boost.org/LICENSE_1_0.txt)
13 |  */
14 | 
15 | /*
16 |  * pathunix.c - UNIX specific path manipulation support
17 |  */
18 | 
19 | #include "pathsys.h"
20 | 
21 | #include <stdlib.h>
22 | #include <unistd.h>  /* needed for getpid() */
23 | 
24 | 
25 | /*
26 |  * path_get_process_id_()
27 |  */
28 | 
29 | unsigned long path_get_process_id_( void )
30 | {
31 |     return getpid();
32 | }
33 | 
34 | 
35 | /*
36 |  * path_get_temp_path_()
37 |  */
38 | 
39 | void path_get_temp_path_( string * buffer )
40 | {
41 |     char const * t = getenv( "TMPDIR" );
42 |     string_append( buffer, t ? t : "/tmp" );
43 | }
44 | 
45 | 
46 | /*
47 |  * path_register_key()
48 |  */
49 | 
50 | void path_register_key( OBJECT * path )
51 | {
52 | }
53 | 
54 | 
55 | /*
56 |  * path_as_key()
57 |  */
58 | 
59 | OBJECT * path_as_key( OBJECT * path )
60 | {
61 |     return object_copy( path );
62 | }
63 | 
64 | 
65 | /*
66 |  * path_done()
67 |  */
68 | 
69 | void path_done( void )
70 | {
71 | }
72 | 


--------------------------------------------------------------------------------
/jam-files/engine/regexp.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Definitions etc. for regexp(3) routines.
 3 |  *
 4 |  * Caveat:  this is V8 regexp(3) [actually, a reimplementation thereof],
 5 |  * not the System V one.
 6 |  */
 7 | #ifndef REGEXP_DWA20011023_H
 8 | #define REGEXP_DWA20011023_H
 9 | 
10 | #define NSUBEXP  10
11 | typedef struct regexp {
12 |     char const * startp[ NSUBEXP ];
13 |     char const * endp[ NSUBEXP ];
14 |     char regstart;      /* Internal use only. */
15 |     char reganch;       /* Internal use only. */
16 |     char * regmust;     /* Internal use only. */
17 |     int regmlen;        /* Internal use only. */
18 |     char program[ 1 ];  /* Unwarranted chumminess with compiler. */
19 | } regexp;
20 | 
21 | 
22 | regexp * regcomp( char const * exp );
23 | int regexec( regexp * prog, char const * string );
24 | void regerror( char const * s );
25 | 
26 | 
27 | /*
28 |  * The first byte of the regexp internal "program" is actually this magic
29 |  * number; the start node begins in the second byte.
30 |  */
31 | #define MAGIC  0234
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/jam-files/engine/search.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 1993, 1995 Christopher Seiwald.
 3 |  *
 4 |  * This file is part of Jam - see jam.c for Copyright information.
 5 |  */
 6 | 
 7 | /*
 8 |  * search.h - find a target along $(SEARCH) or $(LOCATE)
 9 |  */
10 | 
11 | #ifndef SEARCH_SW20111118_H
12 | #define SEARCH_SW20111118_H
13 | 
14 | #include "object.h"
15 | #include "timestamp.h"
16 | 
17 | void set_explicit_binding( OBJECT * target, OBJECT * locate );
18 | OBJECT * search( OBJECT * target, timestamp * const time,
19 |     OBJECT * * another_target, int const file );
20 | void search_done( void );
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/jam-files/engine/strings.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2004. David Abrahams
 3 |  * Distributed under the Boost Software License, Version 1.0.
 4 |  * (See accompanying file LICENSE_1_0.txt or copy at
 5 |  * http://www.boost.org/LICENSE_1_0.txt)
 6 |  */
 7 | 
 8 | #ifndef STRINGS_DWA20011024_H
 9 | #define STRINGS_DWA20011024_H
10 | 
11 | #include <stddef.h>
12 | 
13 | typedef struct string
14 | {
15 |     char * value;
16 |     unsigned long size;
17 |     unsigned long capacity;
18 |     char opt[ 32 ];
19 | #ifndef NDEBUG
20 |     char magic[ 4 ];
21 | #endif
22 | } string;
23 | 
24 | void string_new( string * );
25 | void string_copy( string *, char const * );
26 | void string_free( string * );
27 | void string_append( string *, char const * );
28 | void string_append_range( string *, char const *, char const * );
29 | void string_push_back( string * s, char x );
30 | void string_reserve( string *, size_t );
31 | void string_truncate( string *, size_t );
32 | void string_pop_back( string * );
33 | char string_back( string * );
34 | void string_unit_test();
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/jam-files/engine/subst.h:
--------------------------------------------------------------------------------
 1 | /*  Copyright 2001-2004 David Abrahams.
 2 |  *  Distributed under the Boost Software License, Version 1.0.
 3 |  *  (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
 4 |  */
 5 | 
 6 | #ifndef SUBST_JG20120722_H
 7 | #define SUBST_JG20120722_H
 8 | 
 9 | #include "object.h"
10 | #include "regexp.h"
11 | 
12 | regexp * regex_compile( OBJECT * pattern );
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/jam-files/engine/timestamp.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 1993, 1995 Christopher Seiwald.
 3 |  *
 4 |  * This file is part of Jam - see jam.c for Copyright information.
 5 |  */
 6 | 
 7 | /*
 8 |  * timestamp.h - get the timestamp of a file or archive member
 9 |  */
10 | 
11 | #ifndef TIMESTAMP_H_SW_2011_11_18
12 | #define TIMESTAMP_H_SW_2011_11_18
13 | 
14 | #include "object.h"
15 | 
16 | #ifdef OS_NT
17 | # define WIN32_LEAN_AND_MEAN
18 | # include <windows.h>
19 | #endif
20 | 
21 | #include <time.h>
22 | 
23 | typedef struct timestamp
24 | {
25 |     time_t secs;
26 |     int nsecs;
27 | } timestamp;
28 | 
29 | void timestamp_clear( timestamp * const );
30 | int timestamp_cmp( timestamp const * const lhs, timestamp const * const rhs );
31 | void timestamp_copy( timestamp * const target, timestamp const * const source );
32 | void timestamp_current( timestamp * const );
33 | int timestamp_empty( timestamp const * const );
34 | void timestamp_from_path( timestamp * const, OBJECT * const path );
35 | void timestamp_init( timestamp * const, time_t const secs, int const nsecs );
36 | void timestamp_max( timestamp * const max, timestamp const * const lhs,
37 |     timestamp const * const rhs );
38 | char const * timestamp_str( timestamp const * const );
39 | char const * timestamp_timestr( timestamp const * const );
40 | 
41 | #ifdef OS_NT
42 | void timestamp_from_filetime( timestamp * const, FILETIME const * const );
43 | #endif
44 | 
45 | void timestamp_done();
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/jam-files/engine/variable.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 1993, 2000 Christopher Seiwald.
 3 |  *
 4 |  * This file is part of Jam - see jam.c for Copyright information.
 5 |  */
 6 | 
 7 | /*
 8 |  * variable.h - handle jam multi-element variables
 9 |  */
10 | 
11 | #ifndef VARIABLE_SW20111119_H
12 | #define VARIABLE_SW20111119_H
13 | 
14 | #include "lists.h"
15 | #include "object.h"
16 | 
17 | 
18 | struct module_t;
19 | 
20 | void   var_defines( struct module_t *, char * const * e, int preprocess );
21 | LIST * var_get( struct module_t *, OBJECT * symbol );
22 | void   var_set( struct module_t *, OBJECT * symbol, LIST * value, int flag );
23 | LIST * var_swap( struct module_t *, OBJECT * symbol, LIST * value );
24 | void   var_done( struct module_t * );
25 | 
26 | /*
27 |  * Defines for var_set().
28 |  */
29 | 
30 | #define VAR_SET      0   /* override previous value */
31 | #define VAR_APPEND   1   /* append to previous value */
32 | #define VAR_DEFAULT  2   /* set only if no previous value */
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/jam-files/fail/Jamroot:
--------------------------------------------------------------------------------
1 | actions fail {
2 |   false
3 | }
4 | make fail : : fail ;
5 | 


--------------------------------------------------------------------------------
/lm/blank.hh:
--------------------------------------------------------------------------------
 1 | #ifndef LM_BLANK_H
 2 | #define LM_BLANK_H
 3 | 
 4 | #include <limits>
 5 | 
 6 | #include <stdint.h>
 7 | #include <math.h>
 8 | 
 9 | namespace lm {
10 | namespace ngram {
11 | 
12 | /* Suppose "foo bar" appears with zero backoff but there is no trigram
13 |  * beginning with these words.  Then, when scoring "foo bar", the model could
14 |  * return out_state containing "bar" or even null context if "bar" also has no
15 |  * backoff and is never followed by another word.  Then the backoff is set to
16 |  * kNoExtensionBackoff.  If the n-gram might be extended, then out_state must
17 |  * contain the full n-gram, in which case kExtensionBackoff is set.  In any
18 |  * case, if an n-gram has non-zero backoff, the full state is returned so
19 |  * backoff can be properly charged.  
20 |  * These differ only in sign bit because the backoff is in fact zero in either
21 |  * case.   
22 |  */
23 | const float kNoExtensionBackoff = -0.0;
24 | const float kExtensionBackoff = 0.0;
25 | const uint64_t kNoExtensionQuant = 0;
26 | const uint64_t kExtensionQuant = 1;
27 | 
28 | inline void SetExtension(float &backoff) {
29 |   if (backoff == kNoExtensionBackoff) backoff = kExtensionBackoff;
30 | }
31 | 
32 | // This compiles down nicely.  
33 | inline bool HasExtension(const float &backoff) {
34 |   typedef union { float f; uint32_t i; } UnionValue;
35 |   UnionValue compare, interpret;
36 |   compare.f = kNoExtensionBackoff;
37 |   interpret.f = backoff;
38 |   return compare.i != interpret.i;
39 | }
40 | 
41 | } // namespace ngram
42 | } // namespace lm
43 | #endif // LM_BLANK_H
44 | 


--------------------------------------------------------------------------------
/lm/builder/Jamfile:
--------------------------------------------------------------------------------
 1 | fakelib builder : [ glob *.cc : *test.cc *main.cc ] 
 2 |   ../../util//kenutil ../../util/stream//stream ../../util/double-conversion//double-conversion ..//kenlm
 3 |   : : : <library>/top//boost_thread $(timer-link) ;
 4 | 
 5 | exe lmplz : lmplz_main.cc builder /top//boost_program_options ;
 6 | 
 7 | exe dump_counts : dump_counts_main.cc builder ;
 8 | 
 9 | alias programs : lmplz dump_counts ;
10 | 
11 | import testing ;
12 | unit-test corpus_count_test : corpus_count_test.cc builder /top//boost_unit_test_framework ;
13 | unit-test adjust_counts_test : adjust_counts_test.cc builder /top//boost_unit_test_framework ;
14 | 


--------------------------------------------------------------------------------
/lm/builder/README.md:
--------------------------------------------------------------------------------
 1 | Dependencies
 2 | ============
 3 | 
 4 | Boost >= 1.42.0 is required.  
 5 | 
 6 | For Ubuntu,
 7 | ```bash
 8 | sudo apt-get install libboost1.48-all-dev
 9 | ```
10 | 
11 | Alternatively, you can download, compile, and install it yourself:
12 | 
13 | ```bash
14 | wget http://sourceforge.net/projects/boost/files/boost/1.52.0/boost_1_52_0.tar.gz/download -O boost_1_52_0.tar.gz
15 | tar -xvzf boost_1_52_0.tar.gz
16 | cd boost_1_52_0
17 | ./bootstrap.sh
18 | ./b2
19 | sudo ./b2 install
20 | ```
21 | 
22 | Local install options (in a user-space prefix directory) are also possible. See http://www.boost.org/doc/libs/1_52_0/doc/html/bbv2/installation.html.
23 | 
24 | 
25 | Building
26 | ========
27 | 
28 | ```bash
29 | bjam
30 | ```
31 | Your distribution might package bjam and boost-build separately from Boost.  Both are required.   
32 | 
33 | Usage
34 | =====
35 | 
36 | Run
37 | ```bash
38 | $ bin/lmplz
39 | ```
40 | to see command line arguments
41 | 
42 | Running
43 | =======
44 | 
45 | ```bash
46 | bin/lmplz -o 5 <text >text.arpa
47 | ```
48 | 


--------------------------------------------------------------------------------
/lm/builder/TODO:
--------------------------------------------------------------------------------
1 | More tests!
2 | Sharding.
3 | Some way to manage all the crazy config options.
4 | Option to build the binary file directly.  
5 | Interpolation of different orders.  
6 | 


--------------------------------------------------------------------------------
/lm/builder/discount.hh:
--------------------------------------------------------------------------------
 1 | #ifndef LM_BUILDER_DISCOUNT_H
 2 | #define LM_BUILDER_DISCOUNT_H
 3 | 
 4 | #include <algorithm>
 5 | 
 6 | #include <stdint.h>
 7 | 
 8 | namespace lm {
 9 | namespace builder {
10 | 
11 | struct Discount {
12 |   float amount[4];
13 | 
14 |   float Get(uint64_t count) const {
15 |     return amount[std::min<uint64_t>(count, 3)];
16 |   }
17 | 
18 |   float Apply(uint64_t count) const {
19 |     return static_cast<float>(count) - Get(count);
20 |   }
21 | };
22 | 
23 | } // namespace builder
24 | } // namespace lm
25 | 
26 | #endif // LM_BUILDER_DISCOUNT_H
27 | 


--------------------------------------------------------------------------------
/lm/builder/hash_gamma.hh:
--------------------------------------------------------------------------------
 1 | #ifndef LM_BUILDER_HASH_GAMMA__
 2 | #define LM_BUILDER_HASH_GAMMA__
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | namespace lm { namespace builder {
 7 | 
 8 | #pragma pack(push)
 9 | #pragma pack(4)
10 | 
11 | struct HashGamma {
12 |     uint64_t hash_value;
13 |     float gamma;
14 | };
15 | 
16 | #pragma pack(pop)
17 | 
18 | }} // namespaces
19 | #endif // LM_BUILDER_HASH_GAMMA__
20 | 


--------------------------------------------------------------------------------
/lm/builder/header_info.hh:
--------------------------------------------------------------------------------
 1 | #ifndef LM_BUILDER_HEADER_INFO_H
 2 | #define LM_BUILDER_HEADER_INFO_H
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | #include <stdint.h>
 7 | 
 8 | // Some configuration info that is used to add
 9 | // comments to the beginning of an ARPA file
10 | struct HeaderInfo {
11 |   std::string input_file;
12 |   uint64_t token_count;
13 |   std::vector<uint64_t> counts_pruned;
14 | 
15 |   HeaderInfo() {}
16 | 
17 |   HeaderInfo(const std::string& input_file_in, uint64_t token_count_in, const std::vector<uint64_t> &counts_pruned_in)
18 |     : input_file(input_file_in), token_count(token_count_in), counts_pruned(counts_pruned_in) {}
19 | 
20 |   // TODO: Add smoothing type
21 |   // TODO: More info if multiple models were interpolated
22 | };
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/lm/builder/interpolate.hh:
--------------------------------------------------------------------------------
 1 | #ifndef LM_BUILDER_INTERPOLATE_H
 2 | #define LM_BUILDER_INTERPOLATE_H
 3 | 
 4 | #include "util/stream/multi_stream.hh"
 5 | 
 6 | #include <vector>
 7 | 
 8 | #include <stdint.h>
 9 | 
10 | namespace lm { namespace builder {
11 |  
12 | /* Interpolate step.  
13 |  * Input: suffix sorted n-grams with (p_uninterpolated, gamma) from
14 |  * InitialProbabilities.
15 |  * Output: suffix sorted n-grams with complete probability
16 |  */
17 | class Interpolate {
18 |   public:
19 |     // Normally vocab_size is the unigram count-1 (since p(<s>) = 0) but might
20 |     // be larger when the user specifies a consistent vocabulary size.
21 |     explicit Interpolate(uint64_t vocab_size, const util::stream::ChainPositions &backoffs, const std::vector<uint64_t> &prune_thresholds, bool prune_vocab, bool output_q_);
22 | 
23 |     void Run(const util::stream::ChainPositions &positions);
24 | 
25 |   private:
26 |     float uniform_prob_;
27 |     util::stream::ChainPositions backoffs_;
28 |     const std::vector<uint64_t> prune_thresholds_;
29 |     bool prune_vocab_;
30 |     bool output_q_;
31 | };
32 | 
33 | }} // namespaces
34 | #endif // LM_BUILDER_INTERPOLATE_H
35 | 


--------------------------------------------------------------------------------
/lm/builder/output.cc:
--------------------------------------------------------------------------------
 1 | #include "lm/builder/output.hh"
 2 | #include "util/stream/multi_stream.hh"
 3 | 
 4 | #include <boost/ref.hpp>
 5 | 
 6 | namespace lm { namespace builder {
 7 | 
 8 | OutputHook::~OutputHook() {}
 9 | 
10 | void OutputHook::Apply(util::stream::Chains &chains) {
11 |   chains >> boost::ref(*this);
12 | }
13 | 
14 | }} // namespaces
15 | 


--------------------------------------------------------------------------------
/lm/config.cc:
--------------------------------------------------------------------------------
 1 | #include "lm/config.hh"
 2 | 
 3 | #include <iostream>
 4 | 
 5 | namespace lm {
 6 | namespace ngram {
 7 | 
 8 | Config::Config() :
 9 |   show_progress(true),
10 |   messages(&std::cerr),
11 |   enumerate_vocab(NULL),
12 |   unknown_missing(COMPLAIN),
13 |   sentence_marker_missing(THROW_UP),
14 |   positive_log_probability(THROW_UP),
15 |   unknown_missing_logprob(-100.0),
16 |   probing_multiplier(1.5),
17 |   building_memory(1073741824ULL), // 1 GB
18 |   temporary_directory_prefix(""),
19 |   arpa_complain(ALL),
20 |   write_mmap(NULL),
21 |   write_method(WRITE_AFTER),
22 |   include_vocab(true),
23 |   rest_function(REST_MAX),
24 |   prob_bits(8),
25 |   backoff_bits(8),
26 |   pointer_bhiksha_bits(22),
27 |   load_method(util::POPULATE_OR_READ) {}
28 | 
29 | } // namespace ngram
30 | } // namespace lm
31 | 


--------------------------------------------------------------------------------
/lm/enumerate_vocab.hh:
--------------------------------------------------------------------------------
 1 | #ifndef LM_ENUMERATE_VOCAB_H
 2 | #define LM_ENUMERATE_VOCAB_H
 3 | 
 4 | #include "lm/word_index.hh"
 5 | #include "util/string_piece.hh"
 6 | 
 7 | namespace lm {
 8 | 
 9 | /* If you need the actual strings in the vocabulary, inherit from this class
10 |  * and implement Add.  Then put a pointer in Config.enumerate_vocab; it does
11 |  * not take ownership.  Add is called once per vocab word.  index starts at 0
12 |  * and increases by 1 each time.  This is only used by the Model constructor;
13 |  * the pointer is not retained by the class.  
14 |  */
15 | class EnumerateVocab {
16 |   public:
17 |     virtual ~EnumerateVocab() {}
18 | 
19 |     virtual void Add(WordIndex index, const StringPiece &str) = 0;
20 | 
21 |   protected:
22 |     EnumerateVocab() {}
23 | };
24 | 
25 | } // namespace lm
26 | 
27 | #endif // LM_ENUMERATE_VOCAB_H
28 | 
29 | 


--------------------------------------------------------------------------------
/lm/filter/Jamfile:
--------------------------------------------------------------------------------
 1 | fakelib lm_filter : phrase.cc vocab.cc arpa_io.cc ../../util//kenutil : <threading>multi:<library>/top//boost_thread ;
 2 | 
 3 | obj main : filter_main.cc : <threading>single:<define>NTHREAD <include>../.. ;
 4 | 
 5 | exe filter : main lm_filter ../../util//kenutil ..//kenlm : <threading>multi:<library>/top//boost_thread ;
 6 | #Second name for MEMT
 7 | exe FilterLM : main lm_filter ../../util//kenutil ..//kenlm : <threading>multi:<library>/top//boost_thread ;
 8 | 
 9 | 
10 | exe phrase_table_vocab : phrase_table_vocab_main.cc ../../util//kenutil ;
11 | 


--------------------------------------------------------------------------------
/lm/filter/vocab.cc:
--------------------------------------------------------------------------------
 1 | #include "lm/filter/vocab.hh"
 2 | 
 3 | #include <istream>
 4 | #include <iostream>
 5 | 
 6 | #include <ctype.h>
 7 | 
 8 | namespace lm {
 9 | namespace vocab {
10 | 
11 | void ReadSingle(std::istream &in, boost::unordered_set<std::string> &out) {
12 |   in.exceptions(std::istream::badbit);
13 |   std::string word;
14 |   while (in >> word) {
15 |     out.insert(word);
16 |   }
17 | }
18 | 
19 | namespace {
20 | bool IsLineEnd(std::istream &in) {
21 |   int got;
22 |   do {
23 |     got = in.get();
24 |     if (!in) return true;
25 |     if (got == '\n') return true;
26 |   } while (isspace(got));
27 |   in.unget();
28 |   return false;
29 | }
30 | }// namespace
31 | 
32 | // Read space separated words in enter separated lines.  These lines can be
33 | // very long, so don't read an entire line at a time.  
34 | unsigned int ReadMultiple(std::istream &in, boost::unordered_map<std::string, std::vector<unsigned int> > &out) {
35 |   in.exceptions(std::istream::badbit);
36 |   unsigned int sentence = 0;
37 |   bool used_id = false;
38 |   std::string word;
39 |   while (in >> word) {
40 |     used_id = true;
41 |     std::vector<unsigned int> &posting = out[word];
42 |     if (posting.empty() || (posting.back() != sentence))
43 |       posting.push_back(sentence);
44 |     if (IsLineEnd(in)) {
45 |       ++sentence;
46 |       used_id = false;
47 |     }
48 |   }
49 |   return sentence + used_id;
50 | }
51 | 
52 | } // namespace vocab
53 | } // namespace lm
54 | 


--------------------------------------------------------------------------------
/lm/fragment_main.cc:
--------------------------------------------------------------------------------
 1 | #include "lm/binary_format.hh"
 2 | #include "lm/model.hh"
 3 | #include "lm/left.hh"
 4 | #include "util/tokenize_piece.hh"
 5 | 
 6 | template <class Model> void Query(const char *name) {
 7 |   Model model(name);
 8 |   std::string line;
 9 |   lm::ngram::ChartState ignored;
10 |   while (getline(std::cin, line)) {
11 |     lm::ngram::RuleScore<Model> scorer(model, ignored);
12 |     for (util::TokenIter<util::SingleCharacter, true> i(line, ' '); i; ++i) {
13 |       scorer.Terminal(model.GetVocabulary().Index(*i));
14 |     }
15 |     std::cout << scorer.Finish() << '\n';
16 |   }
17 | }
18 | 
19 | int main(int argc, char *argv[]) {
20 |   if (argc != 2) {
21 |     std::cerr << "Expected model file name." << std::endl;
22 |     return 1;
23 |   }
24 |   const char *name = argv[1];
25 |   lm::ngram::ModelType model_type = lm::ngram::PROBING;
26 |   lm::ngram::RecognizeBinary(name, model_type);
27 |   switch (model_type) {
28 |     case lm::ngram::PROBING:
29 |       Query<lm::ngram::ProbingModel>(name);
30 |       break;
31 |     case lm::ngram::REST_PROBING:
32 |       Query<lm::ngram::RestProbingModel>(name);
33 |       break;
34 |     default:
35 |       std::cerr << "Model type not supported yet." << std::endl;
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/lm/lm_exception.cc:
--------------------------------------------------------------------------------
 1 | #include "lm/lm_exception.hh"
 2 | 
 3 | #include<errno.h>
 4 | #include<stdio.h>
 5 | 
 6 | namespace lm {
 7 | 
 8 | ConfigException::ConfigException() throw() {}
 9 | ConfigException::~ConfigException() throw() {}
10 | 
11 | LoadException::LoadException() throw() {}
12 | LoadException::~LoadException() throw() {}
13 | 
14 | FormatLoadException::FormatLoadException() throw() {}
15 | FormatLoadException::~FormatLoadException() throw() {}
16 | 
17 | VocabLoadException::VocabLoadException() throw() {}
18 | VocabLoadException::~VocabLoadException() throw() {}
19 | 
20 | SpecialWordMissingException::SpecialWordMissingException() throw() {}
21 | SpecialWordMissingException::~SpecialWordMissingException() throw() {}
22 | 
23 | } // namespace lm
24 | 


--------------------------------------------------------------------------------
/lm/lm_exception.hh:
--------------------------------------------------------------------------------
 1 | #ifndef LM_LM_EXCEPTION_H
 2 | #define LM_LM_EXCEPTION_H
 3 | 
 4 | // Named to avoid conflict with util/exception.hh.  
 5 | 
 6 | #include "util/exception.hh"
 7 | #include "util/string_piece.hh"
 8 | 
 9 | #include <exception>
10 | #include <string>
11 | 
12 | namespace lm {
13 | 
14 | typedef enum {THROW_UP, COMPLAIN, SILENT} WarningAction;
15 | 
16 | class ConfigException : public util::Exception {
17 |   public:
18 |     ConfigException() throw();
19 |     ~ConfigException() throw();
20 | };
21 | 
22 | class LoadException : public util::Exception {
23 |    public:
24 |       virtual ~LoadException() throw();
25 | 
26 |    protected:
27 |       LoadException() throw();
28 | };
29 | 
30 | class FormatLoadException : public LoadException {
31 |   public:
32 |     FormatLoadException() throw();
33 |     ~FormatLoadException() throw();
34 | };
35 | 
36 | class VocabLoadException : public LoadException {
37 |   public:
38 |     virtual ~VocabLoadException() throw();
39 |     VocabLoadException() throw();
40 | };
41 | 
42 | class SpecialWordMissingException : public VocabLoadException {
43 |   public:
44 |     explicit SpecialWordMissingException() throw();
45 |     ~SpecialWordMissingException() throw();
46 | };
47 | 
48 | } // namespace lm
49 | 
50 | #endif // LM_LM_EXCEPTION
51 | 


--------------------------------------------------------------------------------
/lm/max_order.hh:
--------------------------------------------------------------------------------
 1 | #ifndef LM_MAX_ORDER_H
 2 | #define LM_MAX_ORDER_H
 3 | /* IF YOUR BUILD SYSTEM PASSES -DKENLM_MAX_ORDER, THEN CHANGE THE BUILD SYSTEM.
 4 |  * If not, this is the default maximum order.  
 5 |  * Having this limit means that State can be
 6 |  * (kMaxOrder - 1) * sizeof(float) bytes instead of
 7 |  * sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead
 8 |  */
 9 | #ifndef KENLM_ORDER_MESSAGE
10 | #define KENLM_ORDER_MESSAGE "If your build system supports changing KENLM_MAX_ORDER, change it there and recompile.  In the KenLM tarball or Moses, use e.g. `bjam --max-kenlm-order=6 -a'.  Otherwise, edit lm/max_order.hh."
11 | #endif
12 | 
13 | #endif // LM_MAX_ORDER_H
14 | 


--------------------------------------------------------------------------------
/lm/model_type.hh:
--------------------------------------------------------------------------------
 1 | #ifndef LM_MODEL_TYPE_H
 2 | #define LM_MODEL_TYPE_H
 3 | 
 4 | namespace lm {
 5 | namespace ngram {
 6 | 
 7 | /* Not the best numbering system, but it grew this way for historical reasons
 8 |  * and I want to preserve existing binary files. */
 9 | typedef enum {PROBING=0, REST_PROBING=1, TRIE=2, QUANT_TRIE=3, ARRAY_TRIE=4, QUANT_ARRAY_TRIE=5} ModelType;
10 | 
11 | // Historical names.  
12 | const ModelType HASH_PROBING = PROBING;
13 | const ModelType TRIE_SORTED = TRIE;
14 | const ModelType QUANT_TRIE_SORTED = QUANT_TRIE;
15 | const ModelType ARRAY_TRIE_SORTED = ARRAY_TRIE;
16 | const ModelType QUANT_ARRAY_TRIE_SORTED = QUANT_ARRAY_TRIE;
17 | 
18 | const static ModelType kQuantAdd = static_cast<ModelType>(QUANT_TRIE - TRIE);
19 | const static ModelType kArrayAdd = static_cast<ModelType>(ARRAY_TRIE - TRIE);
20 | 
21 | } // namespace ngram
22 | } // namespace lm
23 | #endif // LM_MODEL_TYPE_H
24 | 


--------------------------------------------------------------------------------
/lm/return.hh:
--------------------------------------------------------------------------------
 1 | #ifndef LM_RETURN_H
 2 | #define LM_RETURN_H
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | namespace lm {
 7 | /* Structure returned by scoring routines. */
 8 | struct FullScoreReturn {
 9 |   // log10 probability
10 |   float prob;
11 | 
12 |   /* The length of n-gram matched.  Do not use this for recombination.  
13 |    * Consider a model containing only the following n-grams:
14 |    * -1 foo
15 |    * -3.14  bar
16 |    * -2.718 baz -5
17 |    * -6 foo bar
18 |    *
19 |    * If you score ``bar'' then ngram_length is 1 and recombination state is the
20 |    * empty string because bar has zero backoff and does not extend to the
21 |    * right.  
22 |    * If you score ``foo'' then ngram_length is 1 and recombination state is 
23 |    * ``foo''.  
24 |    *
25 |    * Ideally, keep output states around and compare them.  Failing that,
26 |    * get out_state.ValidLength() and use that length for recombination.
27 |    */
28 |   unsigned char ngram_length;
29 | 
30 |   /* Left extension information.  If independent_left is set, then prob is
31 |    * independent of words to the left (up to additional backoff).  Otherwise,
32 |    * extend_left indicates how to efficiently extend further to the left.  
33 |    */
34 |   bool independent_left;
35 |   uint64_t extend_left; // Defined only if independent_left
36 | 
37 |   // Rest cost for extension to the left.
38 |   float rest;
39 | };
40 | 
41 | } // namespace lm
42 | #endif // LM_RETURN_H
43 | 


--------------------------------------------------------------------------------
/lm/sizes.hh:
--------------------------------------------------------------------------------
 1 | #ifndef LM_SIZES_H
 2 | #define LM_SIZES_H
 3 | 
 4 | #include <vector>
 5 | 
 6 | #include <stdint.h>
 7 | 
 8 | namespace lm { namespace ngram {
 9 | 
10 | struct Config;
11 | 
12 | void ShowSizes(const std::vector<uint64_t> &counts, const lm::ngram::Config &config);
13 | void ShowSizes(const std::vector<uint64_t> &counts);
14 | void ShowSizes(const char *file, const lm::ngram::Config &config);
15 | 
16 | }} // namespaces
17 | #endif // LM_SIZES_H
18 | 


--------------------------------------------------------------------------------
/lm/virtual_interface.cc:
--------------------------------------------------------------------------------
 1 | #include "lm/virtual_interface.hh"
 2 | 
 3 | #include "lm/lm_exception.hh"
 4 | 
 5 | namespace lm {
 6 | namespace base {
 7 | 
 8 | Vocabulary::~Vocabulary() {}
 9 | 
10 | void Vocabulary::SetSpecial(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found) {
11 |   begin_sentence_ = begin_sentence;
12 |   end_sentence_ = end_sentence;
13 |   not_found_ = not_found;
14 | }
15 | 
16 | Model::~Model() {}
17 | 
18 | } // namespace base
19 | } // namespace lm
20 | 


--------------------------------------------------------------------------------
/lm/weights.hh:
--------------------------------------------------------------------------------
 1 | #ifndef LM_WEIGHTS_H
 2 | #define LM_WEIGHTS_H
 3 | 
 4 | // Weights for n-grams.  Probability and possibly a backoff.  
 5 | 
 6 | namespace lm {
 7 | struct Prob {
 8 |   float prob;
 9 | };
10 | // No inheritance so this will be a POD.  
11 | struct ProbBackoff {
12 |   float prob;
13 |   float backoff;
14 | };
15 | struct RestWeights {
16 |   float prob;
17 |   float backoff;
18 |   float rest;
19 | };
20 | 
21 | } // namespace lm
22 | #endif // LM_WEIGHTS_H
23 | 


--------------------------------------------------------------------------------
/lm/word_index.hh:
--------------------------------------------------------------------------------
 1 | // Separate header because this is used often.
 2 | #ifndef LM_WORD_INDEX_H
 3 | #define LM_WORD_INDEX_H
 4 | 
 5 | #include <limits.h>
 6 | 
 7 | namespace lm {
 8 | typedef unsigned int WordIndex;
 9 | const WordIndex kMaxWordIndex = UINT_MAX;
10 | } // namespace lm
11 | 
12 | typedef lm::WordIndex LMWordIndex;
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/util/barrier.hh:
--------------------------------------------------------------------------------
 1 | #ifndef UTIL_BARRIER__
 2 | #define UTIL_BARRIER__
 3 | 
 4 | #include <boost/thread/mutex.hpp>
 5 | #include <boost/thread/locks.hpp>
 6 | 
 7 | namespace util {
 8 | 
 9 | class Barrier {
10 |   public:
11 |     explicit Barrier(size_t n) : n_(n) {
12 |       assert(n);
13 |     }
14 | 
15 |     bool Decrement() {
16 |       // There are faster ways to do this hidden in boost/detail/sp_counted_base_*, but they're poorly factored for such.
17 |       boost::unique_lock<boost::mutex> lock(mutex_);
18 |       return (0 == --n_);
19 |     }
20 | 
21 |   private:
22 |     size_t n_;
23 |     boost::mutex mutex_;
24 | };
25 | 
26 | } // namespace util
27 | 
28 | #endif // UTIL_BARRIER__
29 | 


--------------------------------------------------------------------------------
/util/bit_packing.cc:
--------------------------------------------------------------------------------
 1 | #include "util/bit_packing.hh"
 2 | #include "util/exception.hh"
 3 | 
 4 | #include <string.h>
 5 | 
 6 | namespace util {
 7 | 
 8 | namespace {
 9 | template <bool> struct StaticCheck {};
10 | template <> struct StaticCheck<true> { typedef bool StaticAssertionPassed; };
11 | 
12 | // If your float isn't 4 bytes, we're hosed.  
13 | typedef StaticCheck<sizeof(float) == 4>::StaticAssertionPassed FloatSize;
14 | 
15 | } // namespace
16 | 
17 | uint8_t RequiredBits(uint64_t max_value) {
18 |   if (!max_value) return 0;
19 |   uint8_t ret = 1;
20 |   while (max_value >>= 1) ++ret;
21 |   return ret;
22 | }
23 | 
24 | void BitPackingSanity() {
25 |   const FloatEnc neg1 = { -1.0 }, pos1 = { 1.0 };
26 |   if ((neg1.i ^ pos1.i) != 0x80000000) UTIL_THROW(Exception, "Sign bit is not 0x80000000");
27 |   char mem[57+8];
28 |   memset(mem, 0, sizeof(mem));
29 |   const uint64_t test57 = 0x123456789abcdefULL;
30 |   for (uint64_t b = 0; b < 57 * 8; b += 57) {
31 |     WriteInt57(mem, b, 57, test57);
32 |   }
33 |   for (uint64_t b = 0; b < 57 * 8; b += 57) {
34 |     if (test57 != ReadInt57(mem, b, 57, (1ULL << 57) - 1))
35 |       UTIL_THROW(Exception, "The bit packing routines are failing for your architecture.  Please send a bug report with your architecture, operating system, and compiler.");
36 |   }
37 |   // TODO: more checks.  
38 | }
39 | 
40 | } // namespace util
41 | 


--------------------------------------------------------------------------------
/util/bounded_i_stream.hh:
--------------------------------------------------------------------------------
 1 | #ifndef UTIL_BOUNDED_I_STREAM__
 2 | #define UTIL_BOUNDED_I_STREAM__
 3 | 
 4 | #include <boost/iostreams/concepts.hpp>
 5 | #include <boost/iostreams/stream.hpp>
 6 | 
 7 | #include <istream>
 8 | 
 9 | namespace util {
10 | 
11 | namespace detail {
12 | class BoundedIStreamDevice : public boost::iostreams::source {
13 |   public:
14 |     BoundedIStreamDevice(std::istream &backend, std::streamsize bound)
15 |       : backend_(backend), bound_(bound) {}
16 | 
17 |     std::streamsize read(char *out, std::streamsize n) {
18 |       backend_.read(out, std::min(n, bound_));
19 |       if (backend_.eof()) return -1;
20 |       if (backend_.fail()) throw std::ios_base::failure("Backend stream failbit without eof");
21 |       bound_ -= backend_.gcount();
22 |       return backend_.gcount();
23 |     }
24 | 
25 |     bool Completed() const {
26 |       return bound_ == 0;
27 |     }
28 | 
29 |   private:
30 |     std::istream &backend_;
31 |     std::streamsize bound_;
32 | };
33 | } // namespace detail
34 | 
35 | typedef boost::iostreams::stream<detail::BoundedIStreamDevice> BoundedIStream;
36 | 
37 | } // namespace util
38 | 
39 | #endif // UTIL_BOUNDED_I_STREAM__
40 | 


--------------------------------------------------------------------------------
/util/cat_compressed_main.cc:
--------------------------------------------------------------------------------
 1 | // Like cat but interprets compressed files.
 2 | #include "util/file.hh"
 3 | #include "util/read_compressed.hh"
 4 | 
 5 | #include <string.h>
 6 | #include <iostream>
 7 | 
 8 | namespace {
 9 | const std::size_t kBufSize = 16384;
10 | void Copy(util::ReadCompressed &from, int to) {
11 |   util::scoped_malloc buffer(util::MallocOrThrow(kBufSize));
12 |   while (std::size_t amount = from.Read(buffer.get(), kBufSize)) {
13 |     util::WriteOrThrow(to, buffer.get(), amount);
14 |   }
15 | }
16 | } // namespace
17 | 
18 | int main(int argc, char *argv[]) {
19 |   // Lane Schwartz likes -h and --help
20 |   for (int i = 1; i < argc; ++i) {
21 |     char *arg = argv[i];
22 |     if (!strcmp(arg, "--")) break;
23 |     if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) {
24 |       std::cerr << 
25 |         "A cat implementation that interprets compressed files.\n"
26 |         "Usage: " << argv[0] << " [file1] [file2] ...\n"
27 |         "If no file is provided, then stdin is read.\n";
28 |       return 1;
29 |     }
30 |   }
31 | 
32 |   try {
33 |     if (argc == 1) {
34 |       util::ReadCompressed in(0);
35 |       Copy(in, 1);
36 |     } else {
37 |       for (int i = 1; i < argc; ++i) {
38 |         util::ReadCompressed in(util::OpenReadOrThrow(argv[i]));
39 |         Copy(in, 1);
40 |       }
41 |     }
42 |   } catch (const std::exception &e) {
43 |     std::cerr << e.what() << std::endl;
44 |     return 2;
45 |   }
46 |   return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/util/debug.hh:
--------------------------------------------------------------------------------
 1 | // Like assert.h, allow multiple inclusion with different NDEBUG.
 2 | #ifndef NDEBUG
 3 | #undef DEBUG_ONLY
 4 | #define DEBUG_ONLY(inside) inside
 5 | #else
 6 | #undef DEBUG_ONLY
 7 | #define DEBUG_ONLY(inside)
 8 | #endif
 9 | 
10 | #undef DEBUG_ONLY_ASSERT
11 | #define DEBUG_ONLY_ASSERT(inside) DEBUG_ONLY(assert(inside))
12 | 


--------------------------------------------------------------------------------
/util/double-conversion/Jamfile:
--------------------------------------------------------------------------------
1 | fakelib double-conversion : [ glob *.cc ] : : : <include>. ;
2 | 


--------------------------------------------------------------------------------
/util/getopt.hh:
--------------------------------------------------------------------------------
 1 | /*
 2 | POSIX getopt for Windows
 3 | 
 4 | AT&T Public License
 5 | 
 6 | Code given out at the 1985 UNIFORUM conference in Dallas.  
 7 | */
 8 | 
 9 | #ifdef __GNUC__
10 | #include <getopt.h>
11 | #endif
12 | #ifndef __GNUC__
13 | 
14 | #ifndef UTIL_GETOPT_H
15 | #define UTIL_GETOPT_H
16 | 
17 | #ifdef __cplusplus
18 | extern "C" {
19 | #endif
20 | 
21 | extern int opterr;
22 | extern int optind;
23 | extern int optopt;
24 | extern char *optarg;
25 | extern int getopt(int argc, char **argv, char *opts);
26 | 
27 | #ifdef __cplusplus
28 | }
29 | #endif
30 | 
31 | #endif  /* UTIL_GETOPT_H */
32 | #endif  /* __GNUC__ */
33 | 
34 | 


--------------------------------------------------------------------------------
/util/hash_fusion.hh:
--------------------------------------------------------------------------------
 1 | /* Hashing for Boost fusion objects.  
 2 |  * The hash_value function is placed in boost::fusion so calling just hash_value on it works.  
 3 |  */
 4 | 
 5 | #include <boost/functional/hash/hash.hpp>
 6 | #include <boost/fusion/algorithm/iteration/accumulate.hpp>
 7 | #include <boost/fusion/support/is_sequence.hpp>
 8 | #include <boost/utility/enable_if.hpp>
 9 | 
10 | namespace util {
11 | namespace detail {
12 | 
13 | struct HashCombine {
14 |   template <class T> size_t operator()(size_t value, const T &t) const {
15 |     boost::hash_combine(value, t);
16 |     return value;
17 |   }
18 |   typedef size_t result_type;
19 | };
20 | 
21 | } // namespace detail
22 | } // namespace util
23 | 
24 | namespace boost {
25 | namespace fusion {
26 | 
27 | template <class T> inline typename enable_if<traits::is_sequence<T>, size_t>::type hash_value(const T &t) {
28 |   return boost::fusion::accumulate(t, static_cast<size_t>(0), util::detail::HashCombine());
29 | }
30 | 
31 | } // namespace fusion
32 | } // namespace boost
33 | 


--------------------------------------------------------------------------------
/util/hash_fusion_test.cc:
--------------------------------------------------------------------------------
 1 | #include "util/hash_fusion.hh"
 2 | 
 3 | #define BOOST_TEST_MODULE HashFusionTest
 4 | #include <boost/fusion/container/vector.hpp>
 5 | #include <boost/test/unit_test.hpp>
 6 | 
 7 | namespace {
 8 | 
 9 | BOOST_AUTO_TEST_CASE(Empty) {
10 |   boost::fusion::vector<> vec;
11 |   BOOST_CHECK_EQUAL(static_cast<size_t>(0), hash_value(vec));
12 | }
13 | 
14 | BOOST_AUTO_TEST_CASE(Single) {
15 |   boost::fusion::vector<int> vec(1);
16 |   size_t hash_accum = 0;
17 |   boost::hash_combine(hash_accum, (int)1);
18 |   BOOST_CHECK_EQUAL(hash_accum, hash_value(vec));
19 | }
20 | 
21 | } // namespace
22 | 


--------------------------------------------------------------------------------
/util/hash_output_test.cc:
--------------------------------------------------------------------------------
 1 | #include "util/hash_output.hh"
 2 | 
 3 | #define BOOST_TEST_MODULE HashOutputTest
 4 | #include <boost/test/unit_test.hpp>
 5 | 
 6 | #include <algorithm>
 7 | #include <vector>
 8 | 
 9 | namespace util {
10 | namespace {
11 | 
12 | BOOST_AUTO_TEST_CASE(vector_copy) {
13 |   std::vector<int> vec;
14 |   vec.push_back(1);
15 |   vec.push_back(14008);
16 |   vec.push_back(783712947);
17 | 
18 |   size_t hash;
19 |   HashOutput hasher(hash);
20 |   std::copy(vec.begin(), vec.end(), hasher);
21 | 
22 |   size_t compare = 0;
23 |   for (std::vector<int>::const_iterator i = vec.begin(); i != vec.end(); ++i) {
24 |     boost::hash_combine(compare, *i);
25 |   }
26 | 
27 |   BOOST_CHECK_EQUAL(compare, hash);
28 | }
29 | 
30 | } // namespace
31 | } // namespace util
32 | 


--------------------------------------------------------------------------------
/util/have.hh:
--------------------------------------------------------------------------------
 1 | /* Optional packages.  You might want to integrate this with your build system e.g. config.h from ./configure. */
 2 | #ifndef UTIL_HAVE_H
 3 | #define UTIL_HAVE_H
 4 | 
 5 | #ifdef HAVE_CONFIG_H
 6 | #include "config.h"
 7 | #endif
 8 | 
 9 | #ifndef HAVE_ICU
10 | #define HAVE_ICU
11 | #endif
12 | 
13 | #endif // UTIL_HAVE_H
14 | 


--------------------------------------------------------------------------------
/util/latex_escape.cc:
--------------------------------------------------------------------------------
 1 | #include "util/latex_escape.hh"
 2 | 
 3 | namespace util {
 4 | namespace {
 5 | 
 6 | class Replace {
 7 |   public:
 8 |     Replace() {
 9 |       for (unsigned int i = 0; i < 256; ++i) {
10 |         null_separated_bytes_[i * 2] = i;
11 |         null_separated_bytes_[i * 2 + 1] = 0;
12 |         map_[i] = &null_separated_bytes_[i * 2];
13 |       }
14 |       map_[(unsigned char)'<'] = "\\textless ";
15 |       map_[(unsigned char)'>'] = "\\textgreater ";
16 |       map_[(unsigned char)'$'] = "\\$";
17 |       map_[(unsigned char)'_'] = "\\textunderscore ";
18 |       map_[(unsigned char)'{'] = "\\{";
19 |       map_[(unsigned char)'}'] = "\\}";
20 |       map_[(unsigned char)'\\'] = "\\textbacklash ";
21 |       map_[(unsigned char)'%'] = "\\%";
22 |       map_[(unsigned char)'#'] = "\\#";
23 |       map_[(unsigned char)'&'] = "\\&";
24 |     }
25 | 
26 |     const char *operator[](size_t value) {
27 |       return map_[value];
28 |     }
29 | 
30 |   private:
31 |     char null_separated_bytes_[512];
32 |     const char *map_[256];
33 | };
34 | 
35 | Replace replace;
36 | 
37 | } // namespace
38 | 
39 | void LatexEscape(const StringPiece &in, std::string &out) {
40 |   out.clear();
41 |   for (const char *i = in.data(); i != in.data() + in.size(); ++i) {
42 |     out.append(replace[*i]);
43 |   }
44 | }
45 | 
46 | } // namespace util
47 | 


--------------------------------------------------------------------------------
/util/latex_escape.hh:
--------------------------------------------------------------------------------
 1 | #ifndef UTIL_LATEX_ESCAPE__
 2 | #define UTIL_LATEX_ESCAPE__
 3 | 
 4 | #include "util/string_piece.hh"
 5 | 
 6 | #include <string>
 7 | 
 8 | namespace util {
 9 | 
10 | // Escape characters for LaTeX.  This isn't a formally formal escape, but does list what I encounter.
11 | void LatexEscape(const StringPiece &in, std::string &out);
12 | 
13 | } // namespace util
14 | #endif // UTIL_LATEX_ESCAPE__
15 | 


--------------------------------------------------------------------------------
/util/lower_main.cc:
--------------------------------------------------------------------------------
 1 | #include "util/utf8.hh"
 2 | 
 3 | #include <iostream>
 4 | 
 5 | int main() {
 6 |   std::string line, lower;
 7 |   while (getline(std::cin, line)) {
 8 |     utf8::ToLower(line, lower);
 9 |     std::cout << lower << '\n';
10 |   }
11 |   if (!std::cin.eof()) {
12 |     std::cerr << "Some error other than EOF" << std::endl;
13 |     return 1;
14 |   }
15 |   return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/util/murmur_hash.hh:
--------------------------------------------------------------------------------
 1 | #ifndef UTIL_MURMUR_HASH_H
 2 | #define UTIL_MURMUR_HASH_H
 3 | #include <cstddef>
 4 | #include <stdint.h>
 5 | 
 6 | namespace util {
 7 | 
 8 | // 64-bit machine version
 9 | uint64_t MurmurHash64A(const void * key, std::size_t len, uint64_t seed = 0);
10 | // 32-bit machine version (not the same function as above)
11 | uint64_t MurmurHash64B(const void * key, std::size_t len, uint64_t seed = 0);
12 | // Use the version for this arch.  Because the values differ across
13 | // architectures, really only use it for in-memory structures.
14 | uint64_t MurmurHashNative(const void * key, std::size_t len, uint64_t seed = 0);
15 | 
16 | } // namespace util
17 | 
18 | #endif // UTIL_MURMUR_HASH_H
19 | 


--------------------------------------------------------------------------------
/util/numbers.hh:
--------------------------------------------------------------------------------
 1 | // Numeric types used everywhere
 2 | #ifndef UTIL_NUMBERS__
 3 | #define UTIL_NUMBERS__
 4 | 
 5 | #include "util/log_num.hh"
 6 | 
 7 | typedef double LinearScore;
 8 | typedef LogNum<LinearScore> LogScore;
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/util/options.cc:
--------------------------------------------------------------------------------
 1 | #include "util/options.hh"
 2 | 
 3 | #include <boost/lexical_cast.hpp>
 4 | 
 5 | namespace util {
 6 | 
 7 | ArgumentCountException::ArgumentCountException(const char *key, size_t expected, size_t times) throw()
 8 |   : ArgumentParseError("Expected "), key_(key), expected_(expected), times_(times) {
 9 |   what_ += key_;
10 |   what_ += " ";
11 |   what_ += boost::lexical_cast<std::string>(expected);
12 |   what_ += " times, got it ";
13 |   what_ += boost::lexical_cast<std::string>(times);
14 |   what_ += ".";
15 | }
16 | 
17 | void CheckCountRange(const boost::program_options::variables_map &vm, const char **key_begin, const char **key_end, size_t expected) {
18 |   for (const char **key = key_begin; key != key_end; ++key) {
19 |     if (vm.count(*key) != expected)
20 |       throw ArgumentCountException(*key, expected, vm.count(*key));
21 |   }
22 | }
23 | 
24 | } // namespace util
25 | 


--------------------------------------------------------------------------------
/util/parallel_read.hh:
--------------------------------------------------------------------------------
 1 | #ifndef UTIL_PARALLEL_READ__
 2 | #define UTIL_PARALLEL_READ__
 3 | 
 4 | /* Read pieces of a file in parallel.  This has a very specific use case:
 5 |  * reading files from Lustre is CPU bound so multiple threads actually
 6 |  * increases throughput.  Speed matters when an LM takes a terabyte.
 7 |  */
 8 | 
 9 | #include <cstddef>
10 | #include <stdint.h>
11 | 
12 | namespace util {
13 | void ParallelRead(int fd, void *to, std::size_t amount, uint64_t offset);
14 | } // namespace util
15 | 
16 | #endif // UTIL_PARALLEL_READ__
17 | 


--------------------------------------------------------------------------------
/util/pcqueue_test.cc:
--------------------------------------------------------------------------------
 1 | #include "util/pcqueue.hh"
 2 | 
 3 | #define BOOST_TEST_MODULE PCQueueTest
 4 | #include <boost/test/unit_test.hpp>
 5 | 
 6 | namespace util {
 7 | namespace {
 8 | 
 9 | BOOST_AUTO_TEST_CASE(SingleThread) {
10 |   PCQueue<int> queue(10);
11 |   for (int i = 0; i < 10; ++i) {
12 |     queue.Produce(i);
13 |   }
14 |   for (int i = 0; i < 10; ++i) {
15 |     BOOST_CHECK_EQUAL(i, queue.Consume());
16 |   }
17 | }
18 | 
19 | }
20 | } // namespace util
21 | 


--------------------------------------------------------------------------------
/util/pool.cc:
--------------------------------------------------------------------------------
 1 | #include "util/pool.hh"
 2 | 
 3 | #include "util/scoped.hh"
 4 | 
 5 | #include <stdlib.h>
 6 | 
 7 | namespace util {
 8 | 
 9 | Pool::Pool() {
10 |   current_ = NULL;
11 |   current_end_ = NULL;
12 | }
13 | 
14 | Pool::~Pool() {
15 |   FreeAll();
16 | }
17 | 
18 | void Pool::FreeAll() {
19 |   for (std::vector<void *>::const_iterator i(free_list_.begin()); i != free_list_.end(); ++i) {
20 |     free(*i);
21 |   }
22 |   free_list_.clear();
23 |   current_ = NULL;
24 |   current_end_ = NULL;
25 | }
26 | 
27 | void *Pool::More(std::size_t size) {
28 |   std::size_t amount = std::max(static_cast<size_t>(32) << free_list_.size(), size);
29 |   uint8_t *ret = static_cast<uint8_t*>(MallocOrThrow(amount));
30 |   free_list_.push_back(ret);
31 |   current_ = ret + size;
32 |   current_end_ = ret + amount;
33 |   return ret;
34 | }
35 | 
36 | } // namespace util
37 | 


--------------------------------------------------------------------------------
/util/pool.hh:
--------------------------------------------------------------------------------
 1 | // Very simple pool.  It can only allocate memory.  And all of the memory it
 2 | // allocates must be freed at the same time.  
 3 | 
 4 | #ifndef UTIL_POOL_H
 5 | #define UTIL_POOL_H
 6 | 
 7 | #include <vector>
 8 | 
 9 | #include <stdint.h>
10 | 
11 | namespace util {
12 | 
13 | class Pool {
14 |   public:
15 |     Pool();
16 | 
17 |     ~Pool();
18 | 
19 |     void *Allocate(std::size_t size) {
20 |       void *ret = current_;
21 |       current_ += size;
22 |       if (current_ < current_end_) {
23 |         return ret;
24 |       } else {
25 |         return More(size);
26 |       }
27 |     }
28 | 
29 |     void FreeAll();
30 | 
31 |   private:
32 |     void *More(std::size_t size);
33 | 
34 |     std::vector<void *> free_list_;
35 | 
36 |     uint8_t *current_, *current_end_;
37 | 
38 |     // no copying
39 |     Pool(const Pool &);
40 |     Pool &operator=(const Pool &);
41 | }; 
42 | 
43 | } // namespace util
44 | 
45 | #endif // UTIL_POOL_H
46 | 


--------------------------------------------------------------------------------
/util/print_concurrency_main.cc:
--------------------------------------------------------------------------------
1 | #include <boost/thread/thread.hpp>
2 | 
3 | #include <iostream>
4 | 
5 | int main() {
6 |   std::cout << boost::thread::hardware_concurrency() << std::endl;
7 | }
8 | 


--------------------------------------------------------------------------------
/util/scoped.cc:
--------------------------------------------------------------------------------
 1 | #include "util/scoped.hh"
 2 | 
 3 | #include <cstdlib>
 4 | #if !defined(_WIN32) && !defined(_WIN64)
 5 | #include <sys/mman.h>
 6 | #endif
 7 | 
 8 | namespace util {
 9 | 
10 | MallocException::MallocException(std::size_t requested) throw() {
11 |   *this << "for " << requested << " bytes ";
12 | }
13 | 
14 | MallocException::~MallocException() throw() {}
15 | 
16 | namespace {
17 | void *InspectAddr(void *addr, std::size_t requested, const char *func_name) {
18 |   UTIL_THROW_IF_ARG(!addr && requested, MallocException, (requested), "in " << func_name);
19 |   // These routines are often used for large chunks of memory where huge pages help.
20 | #if MADV_HUGEPAGE
21 |   madvise(addr, requested, MADV_HUGEPAGE);
22 | #endif
23 |   return addr;
24 | }
25 | } // namespace
26 | 
27 | void *MallocOrThrow(std::size_t requested) {
28 |   return InspectAddr(std::malloc(requested), requested, "malloc");
29 | }
30 | 
31 | void *CallocOrThrow(std::size_t requested) {
32 |   return InspectAddr(std::calloc(1, requested), requested, "calloc");
33 | }
34 | 
35 | void scoped_malloc::call_realloc(std::size_t requested) {
36 |   p_ = InspectAddr(std::realloc(p_, requested), requested, "realloc");
37 | }
38 | 
39 | } // namespace util
40 | 


--------------------------------------------------------------------------------
/util/sized_iterator_test.cc:
--------------------------------------------------------------------------------
 1 | #include "util/sized_iterator.hh"
 2 | 
 3 | #define BOOST_TEST_MODULE SizedIteratorTest
 4 | #include <boost/test/unit_test.hpp>
 5 | 
 6 | namespace util { namespace {
 7 | 
 8 | BOOST_AUTO_TEST_CASE(swap_works) {
 9 |   char str[2] = { 0, 1 };
10 |   SizedProxy first(str, 1), second(str + 1, 1);
11 |   swap(first, second);
12 |   BOOST_CHECK_EQUAL(1, str[0]);
13 |   BOOST_CHECK_EQUAL(0, str[1]);
14 | }
15 | 
16 | }} // namespace anonymous util
17 | 


--------------------------------------------------------------------------------
/util/stream/Jamfile:
--------------------------------------------------------------------------------
 1 | #if $(BOOST-VERSION) >= 104800 {
 2 | #    timer-link = <library>/top//boost_timer ;
 3 | #} else {
 4 | #   timer-link = ;
 5 | #}
 6 | 
 7 | fakelib stream : chain.cc io.cc line_input.cc multi_progress.cc ..//kenutil /top//boost_thread : : : <library>/top//boost_thread ;
 8 | 
 9 | import testing ;
10 | unit-test io_test : io_test.cc stream /top//boost_unit_test_framework ;
11 | unit-test stream_test : stream_test.cc stream /top//boost_unit_test_framework ;
12 | unit-test sort_test : sort_test.cc stream /top//boost_unit_test_framework ;
13 | 


--------------------------------------------------------------------------------
/util/stream/io_test.cc:
--------------------------------------------------------------------------------
 1 | #include "util/stream/io.hh"
 2 | 
 3 | #include "util/stream/chain.hh"
 4 | #include "util/file.hh"
 5 | 
 6 | #define BOOST_TEST_MODULE IOTest
 7 | #include <boost/test/unit_test.hpp>
 8 | 
 9 | #include <unistd.h>
10 | 
11 | namespace util { namespace stream { namespace {
12 | 
13 | BOOST_AUTO_TEST_CASE(CopyFile) {
14 |   std::string temps("io_test_temp");
15 | 
16 |   scoped_fd in(MakeTemp(temps));
17 |   for (uint64_t i = 0; i < 100000; ++i) {
18 |     WriteOrThrow(in.get(), &i, sizeof(uint64_t));
19 |   }
20 |   SeekOrThrow(in.get(), 0);
21 |   scoped_fd out(MakeTemp(temps));
22 | 
23 |   ChainConfig config;
24 |   config.entry_size = 8;
25 |   config.total_memory = 1024;
26 |   config.block_count = 10;
27 | 
28 |   Chain(config) >> PRead(in.get()) >> Write(out.get());
29 | 
30 |   SeekOrThrow(out.get(), 0);
31 |   for (uint64_t i = 0; i < 100000; ++i) {
32 |     uint64_t got;
33 |     ReadOrThrow(out.get(), &got, sizeof(uint64_t));
34 |     BOOST_CHECK_EQUAL(i, got);
35 |   }
36 | }
37 | 
38 | }}} // namespaces
39 | 


--------------------------------------------------------------------------------
/util/stream/line_input.hh:
--------------------------------------------------------------------------------
 1 | #ifndef UTIL_STREAM_LINE_INPUT_H
 2 | #define UTIL_STREAM_LINE_INPUT_H
 3 | namespace util {namespace stream {
 4 | 
 5 | class ChainPosition;
 6 | 
 7 | /* Worker that reads input into blocks, ensuring that blocks contain whole
 8 |  * lines.  Assumes that the maximum size of a line is less than the block size
 9 |  */
10 | class LineInput {
11 |   public:
12 |     // Takes ownership upon thread execution.
13 |     explicit LineInput(int fd);
14 | 
15 |     void Run(const ChainPosition &position);
16 | 
17 |   private:
18 |     int fd_;
19 | };
20 | 
21 | }} // namespaces
22 | #endif // UTIL_STREAM_LINE_INPUT_H
23 | 


--------------------------------------------------------------------------------
/util/stream/stream_test.cc:
--------------------------------------------------------------------------------
 1 | #include "util/stream/io.hh"
 2 | 
 3 | #include "util/stream/stream.hh"
 4 | #include "util/file.hh"
 5 | 
 6 | #define BOOST_TEST_MODULE StreamTest
 7 | #include <boost/test/unit_test.hpp>
 8 | 
 9 | #include <unistd.h>
10 | 
11 | namespace util { namespace stream { namespace {
12 | 
13 | BOOST_AUTO_TEST_CASE(StreamTest) {
14 |   scoped_fd in(MakeTemp("io_test_temp"));
15 |   for (uint64_t i = 0; i < 100000; ++i) {
16 |     WriteOrThrow(in.get(), &i, sizeof(uint64_t));
17 |   }
18 |   SeekOrThrow(in.get(), 0);
19 | 
20 |   ChainConfig config;
21 |   config.entry_size = 8;
22 |   config.total_memory = 100;
23 |   config.block_count = 12;
24 | 
25 |   Stream s;
26 |   Chain chain(config);
27 |   chain >> Read(in.get()) >> s >> kRecycle;
28 |   uint64_t i = 0;
29 |   for (; s; ++s, ++i) {
30 |     BOOST_CHECK_EQUAL(i, *static_cast<const uint64_t*>(s.Get()));
31 |   }
32 |   BOOST_CHECK_EQUAL(100000ULL, i);
33 | }
34 | 
35 | }}} // namespaces
36 | 


--------------------------------------------------------------------------------
/util/stream/timer.hh:
--------------------------------------------------------------------------------
 1 | #ifndef UTIL_STREAM_TIMER_H
 2 | #define UTIL_STREAM_TIMER_H
 3 | 
 4 | // Sorry Jon, this was adding library dependencies in Moses and people complained.
 5 | 
 6 | /*#include <boost/version.hpp>
 7 | 
 8 | #if BOOST_VERSION >= 104800
 9 | #include <boost/timer/timer.hpp>
10 | #define UTIL_TIMER(str) boost::timer::auto_cpu_timer timer(std::cerr, 1, (str))
11 | #else
12 | //#warning Using Boost older than 1.48. Timing information will not be available.*/
13 | #define UTIL_TIMER(str) 
14 | //#endif
15 | 
16 | #endif // UTIL_STREAM_TIMER_H
17 | 


--------------------------------------------------------------------------------
/util/tokenize_piece_test.cc:
--------------------------------------------------------------------------------
 1 | #include "util/tokenize_piece.hh"
 2 | #include "util/string_piece.hh"
 3 | 
 4 | #define BOOST_TEST_MODULE TokenIteratorTest
 5 | #include <boost/test/unit_test.hpp>
 6 | 
 7 | #include <iostream>
 8 | 
 9 | namespace util {
10 | namespace {
11 | 
12 | BOOST_AUTO_TEST_CASE(pipe_pipe_none) {
13 |   const char str[] = "nodelimit at all";
14 |   TokenIter<MultiCharacter> it(str, MultiCharacter("|||"));
15 |   BOOST_REQUIRE(it);
16 |   BOOST_CHECK_EQUAL(StringPiece(str), *it);
17 |   ++it;
18 |   BOOST_CHECK(!it);
19 | }
20 | BOOST_AUTO_TEST_CASE(pipe_pipe_two) {
21 |   const char str[] = "|||";
22 |   TokenIter<MultiCharacter> it(str, MultiCharacter("|||"));
23 |   BOOST_REQUIRE(it);
24 |   BOOST_CHECK_EQUAL(StringPiece(), *it);
25 |   ++it;
26 |   BOOST_REQUIRE(it);
27 |   BOOST_CHECK_EQUAL(StringPiece(), *it);
28 |   ++it;
29 |   BOOST_CHECK(!it);
30 | }
31 | 
32 | BOOST_AUTO_TEST_CASE(remove_empty) {
33 |   const char str[] = "|||";
34 |   TokenIter<MultiCharacter, true> it(str, MultiCharacter("|||"));
35 |   BOOST_CHECK(!it);
36 | }
37 | 
38 | BOOST_AUTO_TEST_CASE(remove_empty_keep) {
39 |   const char str[] = " |||";
40 |   TokenIter<MultiCharacter, true> it(str, MultiCharacter("|||"));
41 |   BOOST_REQUIRE(it);
42 |   BOOST_CHECK_EQUAL(StringPiece(" "), *it);
43 |   ++it;
44 |   BOOST_CHECK(!it);
45 | }
46 | 
47 | } // namespace
48 | } // namespace util
49 | 


--------------------------------------------------------------------------------
/util/usage.hh:
--------------------------------------------------------------------------------
 1 | #ifndef UTIL_USAGE_H
 2 | #define UTIL_USAGE_H
 3 | #include <cstddef>
 4 | #include <iosfwd>
 5 | #include <string>
 6 | 
 7 | #include <stdint.h>
 8 | 
 9 | namespace util {
10 | // Time in seconds since process started.  Zero on unsupported platforms.
11 | double WallTime();
12 | 
13 | void PrintUsage(std::ostream &to);
14 | 
15 | // Determine how much physical memory there is.  Return 0 on failure.
16 | uint64_t GuessPhysicalMemory();
17 | 
18 | // Parse a size like unix sort.  Sadly, this means the default multiplier is K.
19 | uint64_t ParseSize(const std::string &arg);
20 | } // namespace util
21 | #endif // UTIL_USAGE_H
22 | 


--------------------------------------------------------------------------------
/util/vocab_main.cc:
--------------------------------------------------------------------------------
 1 | #include <boost/unordered_set.hpp>
 2 | 
 3 | #include <iostream>
 4 | #include <string>
 5 | 
 6 | int main() {
 7 |   boost::unordered_set<std::string> vocab;
 8 |   std::string word;
 9 |   while (std::cin >> word) {
10 |     if (vocab.insert(word).second) std::cout << word << '\n';
11 |   }
12 |   if (!std::cin.eof()) {
13 |     std::cerr << "Error reading" << std::endl;
14 |     return 1;
15 |   }
16 |   return 0;
17 | }
18 | 


--------------------------------------------------------------------------------