├── VERSION
├── vs
    ├── .gitignore
    └── BuildRelease.bat
├── src
    ├── 3rd_party
    │   ├── nccl
    │   │   ├── pkg
    │   │   │   ├── debian
    │   │   │   │   ├── compat
    │   │   │   │   ├── copyright
    │   │   │   │   ├── source
    │   │   │   │   │   └── format
    │   │   │   │   ├── .gitignore
    │   │   │   │   ├── libnccl-dev.install.in
    │   │   │   │   ├── libnccl2.install.in
    │   │   │   │   ├── gbp.conf
    │   │   │   │   ├── changelog.in
    │   │   │   │   ├── rules
    │   │   │   │   └── control.in
    │   │   │   ├── Makefile
    │   │   │   ├── txz
    │   │   │   │   └── create_txz.sh.in
    │   │   │   └── srctxz
    │   │   │   │   ├── create_srctxz.sh.in
    │   │   │   │   └── Makefile
    │   │   ├── .gitignore
    │   │   ├── makefiles
    │   │   │   ├── version.mk
    │   │   │   └── formatting.mk
    │   │   ├── ext-net
    │   │   │   └── dummy
    │   │   │   │   └── Makefile
    │   │   ├── src
    │   │   │   ├── include
    │   │   │   │   ├── ring.h
    │   │   │   │   ├── rings.h
    │   │   │   │   ├── bootstrap.h
    │   │   │   │   ├── utils.h
    │   │   │   │   ├── group.h
    │   │   │   │   └── enqueue.h
    │   │   │   └── collectives
    │   │   │   │   └── device
    │   │   │   │       ├── all_gather.cu
    │   │   │   │       ├── broadcast.cu
    │   │   │   │       ├── reduce.cu
    │   │   │   │       ├── all_reduce.cu
    │   │   │   │       └── reduce_scatter.cu
    │   │   └── Makefile
    │   ├── sentencepiece
    │   │   ├── VERSION
    │   │   ├── python
    │   │   │   ├── test
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── botchan.txt
    │   │   │   │   ├── test_model.model
    │   │   │   │   └── test_ja_model.model
    │   │   │   ├── .gitignore
    │   │   │   ├── setup.cfg
    │   │   │   └── MANIFEST.in
    │   │   ├── tensorflow
    │   │   │   ├── __init__.py
    │   │   │   ├── test
    │   │   │   │   └── __init__.py
    │   │   │   ├── .gitignore
    │   │   │   └── tf_sentencepiece
    │   │   │   │   ├── _sentencepiece_processor_ops.so.1.7.0
    │   │   │   │   ├── _sentencepiece_processor_ops.so.1.8.0
    │   │   │   │   ├── _sentencepiece_processor_ops.so.1.9.0
    │   │   │   │   ├── _sentencepiece_processor_ops.so.1.10.0
    │   │   │   │   ├── _sentencepiece_processor_ops.so.1.11.0
    │   │   │   │   ├── _sentencepiece_processor_ops.so.1.12.0
    │   │   │   │   └── __init__.py
    │   │   ├── config.h.in
    │   │   ├── sentencepiece.pc.in
    │   │   ├── appveyor.yml
    │   │   ├── src
    │   │   │   ├── freelist_test.cc
    │   │   │   ├── test_main.cc
    │   │   │   ├── model_factory.h
    │   │   │   ├── trainer_factory.h
    │   │   │   ├── word_model.h
    │   │   │   ├── char_model.h
    │   │   │   ├── char_model_trainer.h
    │   │   │   ├── word_model.cc
    │   │   │   ├── unicode_script.cc
    │   │   │   └── bpe_model.h
    │   │   ├── .gitignore
    │   │   ├── doc
    │   │   │   └── special_symbols.md
    │   │   └── third_party
    │   │   │   └── esaxx
    │   │   │       └── LICENSE
    │   ├── spdlog
    │   │   ├── tests
    │   │   │   ├── main.cpp
    │   │   │   ├── utils.h
    │   │   │   ├── includes.h
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── install_libcxx.sh
    │   │   │   ├── utils.cpp
    │   │   │   └── tests.sln
    │   │   ├── bench
    │   │   │   ├── logs
    │   │   │   │   └── .gitignore
    │   │   │   ├── latency
    │   │   │   │   ├── compare.sh
    │   │   │   │   ├── utils.h
    │   │   │   │   └── g3log-crush.cpp
    │   │   │   ├── easyl.conf
    │   │   │   ├── glog-bench.cpp
    │   │   │   ├── easylogging-bench.cpp
    │   │   │   ├── spdlog-bench.cpp
    │   │   │   ├── utils.h
    │   │   │   ├── glog-bench-mt.cpp
    │   │   │   ├── easylogging-bench-mt.cpp
    │   │   │   └── spdlog-bench-mt.cpp
    │   │   ├── example
    │   │   │   ├── jni
    │   │   │   │   ├── Application.mk
    │   │   │   │   └── Android.mk
    │   │   │   ├── utils.h
    │   │   │   ├── Makefile.mingw
    │   │   │   ├── Makefile.clang
    │   │   │   └── example.sln
    │   │   ├── astyle.sh
    │   │   ├── cmake
    │   │   │   └── spdlog.pc.in
    │   │   ├── INSTALL
    │   │   ├── include
    │   │   │   └── spdlog
    │   │   │   │   ├── fmt
    │   │   │   │       ├── ostr.h
    │   │   │   │       ├── fmt.h
    │   │   │   │       └── bundled
    │   │   │   │       │   └── ostream.cc
    │   │   │   │   ├── sinks
    │   │   │   │       ├── null_sink.h
    │   │   │   │       ├── msvc_sink.h
    │   │   │   │       ├── sink.h
    │   │   │   │       ├── ostream_sink.h
    │   │   │   │       └── base_sink.h
    │   │   │   │   ├── details
    │   │   │   │       ├── null_mutex.h
    │   │   │   │       └── log_msg.h
    │   │   │   │   └── formatter.h
    │   │   ├── sinks
    │   │   │   ├── sink.h
    │   │   │   ├── null_sink.h
    │   │   │   ├── msvc_sink.h
    │   │   │   ├── base_sink.h
    │   │   │   └── ostream_sink.h
    │   │   ├── details
    │   │   │   └── null_mutex.h
    │   │   ├── .gitignore
    │   │   ├── formatter.h
    │   │   └── LICENSE
    │   ├── zlib
    │   │   ├── zlib.3.pdf
    │   │   ├── CMakeLists.txt
    │   │   ├── inffast.h
    │   │   └── gzclose.c
    │   ├── yaml-cpp
    │   │   ├── CMakeLists.txt
    │   │   ├── null.cpp
    │   │   ├── yaml-node.cpp
    │   │   ├── contrib
    │   │   │   ├── graphbuilder.cpp
    │   │   │   └── anchordict.h
    │   │   ├── directives.cpp
    │   │   ├── anchor.h
    │   │   ├── emitterstyle.h
    │   │   ├── node
    │   │   │   ├── type.h
    │   │   │   ├── detail
    │   │   │   │   ├── iterator_fwd.h
    │   │   │   │   ├── bool_type.h
    │   │   │   │   └── memory.h
    │   │   │   ├── ptr.h
    │   │   │   ├── emit.h
    │   │   │   └── iterator.h
    │   │   ├── emitterdef.h
    │   │   ├── memory.cpp
    │   │   ├── emit.cpp
    │   │   ├── scantag.h
    │   │   ├── exceptions.cpp
    │   │   ├── noncopyable.h
    │   │   ├── directives.h
    │   │   ├── tag.h
    │   │   ├── yaml.h
    │   │   ├── null.h
    │   │   ├── mark.h
    │   │   ├── LICENSE
    │   │   ├── regex_yaml.cpp
    │   │   ├── indentation.h
    │   │   ├── collectionstack.h
    │   │   ├── tag.cpp
    │   │   └── ptr_vector.h
    │   ├── pathie-cpp
    │   │   ├── CMakeLists.txt
    │   │   └── LICENSE
    │   ├── CLI
    │   │   ├── Version.hpp
    │   │   ├── CLI.hpp
    │   │   └── Macros.hpp
    │   ├── SQLiteCpp
    │   │   ├── sqlite3
    │   │   │   └── README.md
    │   │   └── LICENSE.txt
    │   ├── cnpy
    │   │   └── LICENSE
    │   ├── zstr
    │   │   └── LICENSE
    │   ├── CMakeLists.txt
    │   └── ExceptionWithCallStack.h
    ├── tests
    │   ├── run_tests.cpp
    │   ├── README.md
    │   ├── dropout_test.cpp
    │   └── logger_test.cpp
    ├── data
    │   ├── revo_stub.cpp
    │   ├── rng_engine.h
    │   ├── types.h
    │   ├── batch.h
    │   ├── iterator_facade.h
    │   └── revo_stub.h
    ├── common
    │   ├── version.h
    │   ├── regex.h
    │   ├── version.cpp
    │   ├── hash.h
    │   ├── project_version.h.in
    │   ├── io_item.h
    │   ├── binary.h
    │   ├── config_validator.h
    │   ├── cli_helper.cpp
    │   ├── io.h
    │   └── utils.h
    ├── examples
    │   ├── mnist
    │   │   ├── .gitignore
    │   │   └── download.sh
    │   ├── README.md
    │   └── CMakeLists.txt
    ├── translator
    │   ├── history.cpp
    │   ├── helpers.h
    │   ├── nth_element.h
    │   └── output_printer.cpp
    ├── models
    │   ├── transformer_stub.cpp
    │   ├── model_task.h
    │   ├── transformer_factory.h
    │   └── model_base.h
    ├── tensors
    │   ├── gpu
    │   │   ├── element.h
    │   │   ├── add.h
    │   │   ├── algorithm.h
    │   │   ├── prod.h
    │   │   └── backend.h
    │   ├── cpu
    │   │   ├── backend.h
    │   │   └── sharp
    │   │   │   └── int_gemm.h
    │   ├── rand.h
    │   ├── backend.cpp
    │   ├── memory_piece.h
    │   └── backend.h
    ├── functional
    │   ├── defs.h
    │   ├── functional.h
    │   ├── array.h
    │   └── tensor.h
    ├── command
    │   ├── marian_scorer.cpp
    │   ├── marian_decoder.cpp
    │   ├── marian_vocab.cpp
    │   └── marian_conv.cpp
    ├── optimizers
    │   ├── clippers.cpp
    │   └── clippers.h
    ├── marian.h
    ├── training
    │   ├── gradient_dropping
    │   │   └── gpu
    │   │   │   └── sparse_algorithm.h
    │   ├── exponential_smoothing.h
    │   └── graph_group_async_drop.h
    ├── layers
    │   ├── weight.cpp
    │   ├── weight.h
    │   └── convolution.cpp
    ├── rnn
    │   └── attention_constructors.h
    └── graph
    │   └── node_operators.cpp
├── examples
    ├── transformer
    │   ├── .gitignore
    │   └── scripts
    │   │   ├── validate.sh
    │   │   └── download-files.sh
    ├── translating-amun
    │   └── .gitignore
    ├── tools
    │   ├── .gitignore
    │   └── Makefile
    ├── wmt2017-transformer
    │   ├── .gitignore
    │   └── scripts
    │   │   ├── validate.en.sh
    │   │   ├── validate.sh
    │   │   ├── download-files-mono.sh
    │   │   ├── rescore.py
    │   │   ├── download-files.sh
    │   │   └── preprocess-data-mono.sh
    ├── wmt2017-uedin
    │   ├── .gitignore
    │   └── scripts
    │   │   ├── validate.en.sh
    │   │   ├── validate.sh
    │   │   ├── download-files-mono.sh
    │   │   ├── rescore.py
    │   │   ├── download-files.sh
    │   │   └── preprocess-data-mono.sh
    ├── training-basics-sentencepiece
    │   ├── clean.sh
    │   ├── .gitignore
    │   └── data
    │   │   └── norm_romanian.tsv
    ├── training-basics
    │   ├── .gitignore
    │   ├── clean.sh
    │   └── scripts
    │   │   ├── validate.sh
    │   │   ├── normalise-romanian.py
    │   │   ├── remove-diacritics.py
    │   │   └── download-files.sh
    ├── .gitignore
    └── LICENSE.md
├── NBCL4NMT.pdf
├── contrib
    ├── other-builds
    │   ├── cmake_doze.txt
    │   └── eclipse
    │   │   └── .project
    ├── autoformat.sh
    └── vim
    │   └── .vimrc
├── CL_tools
    ├── display_enc_mod.sh
    ├── pipline.sh
    ├── plt_cdf.py
    ├── pre_sent_score.py
    └── stat_mod.py
├── runner
    ├── validate-en-de.sh
    └── decode_validate.sh
├── scripts
    ├── contrib
    │   ├── fix_hard.py
    │   └── inject_ctt.py
    └── server
    │   └── client_example.py
├── cmake
    ├── FindTcmalloc.cmake
    └── FindNCCL.cmake
└── LICENSE.md


/VERSION:
--------------------------------------------------------------------------------
1 | v2.0.0
2 | 


--------------------------------------------------------------------------------
/vs/.gitignore:
--------------------------------------------------------------------------------
1 | build-vs
2 | deps
3 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/debian/compat:
--------------------------------------------------------------------------------
1 | 9
2 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/VERSION:
--------------------------------------------------------------------------------
1 | 0.1.6
2 | 


--------------------------------------------------------------------------------
/examples/transformer/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | model
3 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/python/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/tensorflow/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/debian/copyright:
--------------------------------------------------------------------------------
1 | ../../LICENSE.txt


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/tensorflow/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/debian/source/format:
--------------------------------------------------------------------------------
1 | 3.0 (native)
2 | 


--------------------------------------------------------------------------------
/examples/translating-amun/.gitignore:
--------------------------------------------------------------------------------
1 | en-de
2 | data
3 | *.yml
4 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/python/.gitignore:
--------------------------------------------------------------------------------
1 | /*.so
2 | /build
3 | 


--------------------------------------------------------------------------------
/examples/tools/.gitignore:
--------------------------------------------------------------------------------
1 | moses-scripts
2 | subword-nmt
3 | sacreBLEU
4 | 


--------------------------------------------------------------------------------
/examples/wmt2017-transformer/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | model
3 | model.back
4 | 


--------------------------------------------------------------------------------
/examples/wmt2017-uedin/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | model
3 | model.back
4 | 
5 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/python/test/botchan.txt:
--------------------------------------------------------------------------------
1 | ../../data/botchan.txt


--------------------------------------------------------------------------------
/NBCL4NMT.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/NBCL4NMT.pdf


--------------------------------------------------------------------------------
/src/tests/run_tests.cpp:
--------------------------------------------------------------------------------
1 | #define CATCH_CONFIG_MAIN
2 | #include "catch.hpp"
3 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/tests/main.cpp:
--------------------------------------------------------------------------------
1 | #define CATCH_CONFIG_MAIN
2 | #include "catch.hpp"


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/python/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/examples/training-basics-sentencepiece/clean.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -v
2 | 
3 | git clean -f -d -f
4 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/tensorflow/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | sdist/
3 | dist/
4 | tmp/
5 | *py[cod]
6 | 


--------------------------------------------------------------------------------
/src/3rd_party/zlib/zlib.3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/zlib/zlib.3.pdf


--------------------------------------------------------------------------------
/src/data/revo_stub.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by revo on 8/2/19.
3 | //
4 | 
5 | #include "data/gap_training.h"
6 | 
7 | 


--------------------------------------------------------------------------------
/contrib/other-builds/cmake_doze.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/contrib/other-builds/cmake_doze.txt


--------------------------------------------------------------------------------
/examples/training-basics/.gitignore:
--------------------------------------------------------------------------------
1 | data/corpus.*
2 | data/news*.*.ro
3 | data/news*.*.en
4 | data/*.output
5 | model
6 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/python/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include test *.py *.model botchan.txt
2 | include *.i *.md
3 | 
4 | 


--------------------------------------------------------------------------------
/examples/training-basics-sentencepiece/.gitignore:
--------------------------------------------------------------------------------
1 | data/corpus.*
2 | data/news*.ro
3 | data/news*.en
4 | data/*.output
5 | model
6 | 


--------------------------------------------------------------------------------
/examples/training-basics/clean.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -v
2 | 
3 | git clean -f
4 | rm -rf moses-scripts subword-nmt model data/*.output
5 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/bench/logs/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore
5 | 


--------------------------------------------------------------------------------
/src/common/version.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include <string>
4 | 
5 | namespace marian {
6 |   std::string buildVersion();
7 | }
8 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/.gitignore:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
2 | /build
3 | *.gcov
4 | /coverage/
5 | 


--------------------------------------------------------------------------------
/src/examples/mnist/.gitignore:
--------------------------------------------------------------------------------
1 | t10k-images-idx3-ubyte
2 | t10k-labels-idx1-ubyte
3 | train-images-idx3-ubyte
4 | train-labels-idx1-ubyte
5 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/debian/.gitignore:
--------------------------------------------------------------------------------
1 | /*.debhelper.log
2 | /*.debhelper
3 | /*.substvars
4 | /tmp/
5 | /files
6 | /libnccl1/
7 | /libnccl-dev/
8 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/example/jni/Application.mk:
--------------------------------------------------------------------------------
1 | # Exceptions are used in spdlog. Link to an exception-ready C++ runtime.
2 | APP_STL = gnustl_static
3 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/python/test/test_model.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/python/test/test_model.model


--------------------------------------------------------------------------------
/src/3rd_party/nccl/makefiles/version.mk:
--------------------------------------------------------------------------------
1 | ##### version
2 | NCCL_MAJOR   := 2
3 | NCCL_MINOR   := 3
4 | NCCL_PATCH   := 7
5 | NCCL_SUFFIX  :=
6 | PKG_REVISION := 1
7 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/python/test/test_ja_model.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/python/test/test_ja_model.model


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/debian/libnccl-dev.install.in:
--------------------------------------------------------------------------------
1 | include/nccl.h /usr/include
2 | lib/libnccl.so /usr/lib/${pkg:MultiArch}
3 | lib/libnccl_static.a /usr/lib/${pkg:MultiArch}
4 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/astyle.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | find . -name "*\.h" -o -name "*\.cpp"|xargs dos2unix
3 | find . -name "*\.h" -o -name "*\.cpp"|xargs astyle -n -c -A1 
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/src/common/regex.h:
--------------------------------------------------------------------------------
1 | 
2 | #ifdef USE_BOOST_REGEX
3 | #include <boost/regex.hpp>
4 | namespace regex = boost;
5 | #else
6 | #include <regex>
7 | namespace regex = std;
8 | #endif
9 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/debian/libnccl2.install.in:
--------------------------------------------------------------------------------
1 | lib/libnccl.so.${nccl:Major} /usr/lib/${pkg:MultiArch}
2 | lib/libnccl.so.${nccl:Major}.${nccl:Minor}.${nccl:Patch} /usr/lib/${pkg:MultiArch}
3 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/debian/gbp.conf:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | debian-branch   = master
 3 | upstream-branch = master
 4 | 
 5 | ignore-new = True
 6 | 
 7 | [git-buildpackage]
 8 | 
 9 | no-purge = True
10 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/cmake/spdlog.pc.in:
--------------------------------------------------------------------------------
1 | prefix=@CMAKE_INSTALL_PREFIX@
2 | includedir=${prefix}/include
3 | 
4 | Name: @PROJECT_NAME@
5 | Description: Super fast C++ logging library. 
6 | Version: @PROJECT_VERSION@
7 | 


--------------------------------------------------------------------------------
/src/translator/history.cpp:
--------------------------------------------------------------------------------
1 | #include "history.h"
2 | 
3 | namespace marian {
4 | 
5 | History::History(size_t lineNo, float alpha, float wp)
6 |     : lineNo_(lineNo), alpha_(alpha), wp_(wp) {}
7 | }  // namespace marian
8 | 


--------------------------------------------------------------------------------
/examples/training-basics-sentencepiece/data/norm_romanian.tsv:
--------------------------------------------------------------------------------
 1 | 015E	53
 2 | 015F	73
 3 | 0162	54
 4 | 0163	74
 5 | 0218	53
 6 | 0219	73
 7 | 021A	54
 8 | 021B	74
 9 | 0102	41
10 | 0103	61
11 | 00C2	41
12 | 00E2	61
13 | 00CE	49
14 | 00EE	69


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.7.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.7.0


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.8.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.8.0


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.9.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.9.0


--------------------------------------------------------------------------------
/CL_tools/display_enc_mod.sh:
--------------------------------------------------------------------------------
1 | ls model_revo.iter* | xargs -I {} python ~/GOD.util/performance/competence/dis_enc_mod_print.py {} encoder_Wemb > CL_MOD.log
2 | python ~/GOD.util/performance/competence/plt_avg_mod.py BASE_AVG_MOD < CL_MOD.log
3 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.10.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.10.0


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.11.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.11.0


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.12.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.12.0


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | include_directories(..)
2 | include_directories(.)
3 | 
4 | FILE(GLOB YamlCppSources *.cpp contrib/*.cpp)
5 | if (NOT TARGET libyaml-cpp)
6 |   add_library(libyaml-cpp OBJECT ${YamlCppSources})
7 | endif()
8 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/config.h.in:
--------------------------------------------------------------------------------
 1 | #ifndef CONFIG_H_
 2 | #define CONFIG_H_
 3 | 
 4 | #define VERSION "@PROJECT_VERSION@"
 5 | #define PACKAGE "@PROJECT_NAME@"
 6 | #define PACKAGE_STRING "@PROJECT_NAME@"
 7 | 
 8 | 
 9 | #endif  // CONFIG_H_
10 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | 
5 | from tf_sentencepiece.sentencepiece_processor_ops import *
6 | 


--------------------------------------------------------------------------------
/runner/validate-en-de.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SRCL=en
 4 | TGTL=de
 5 | TERM=News
 6 | VALID=data/newstest2013.tc.$TGTL
 7 | 
 8 | cat $1 | sed 's/@@ //g' \
 9 |     | ~/NBCL-marian/runner/multi-bleu.perl -lc $VALID \
10 |     | sed -r 's/BLEU = ([0-9.]+),.*/\1/'
11 | 


--------------------------------------------------------------------------------
/src/3rd_party/pathie-cpp/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | include_directories(..)
2 | include_directories(.)
3 | include_directories(include)
4 | 
5 | FILE(GLOB PathieCppSources src/*.cpp)
6 | if (NOT TARGET pathie-cpp)
7 |   add_library(pathie-cpp OBJECT ${PathieCppSources})
8 | endif()
9 | 


--------------------------------------------------------------------------------
/src/models/transformer_stub.cpp:
--------------------------------------------------------------------------------
1 | // TODO: This is a wrapper around transformer.h. We kept the .H name to minimize confusing git, until this is code-reviewed.
2 | // This is meant to speed-up builds, and to support Ctrl-F7 to rebuild.
3 | 
4 | #include "models/transformer.h"
5 | 


--------------------------------------------------------------------------------
/CL_tools/pipline.sh:
--------------------------------------------------------------------------------
1 | python ~/GOD.util/performance/competence/process_fasttext.py -i corpus.bpe.en -o en.emb -v ../BASE_2_REVO_en-ro/vocab.en.yml -w ~/fast/fasttext
2 | python ~/GOD.util/performance/competence/build_cdf_mod.py --emb_vector en.emb.orig.vec corpus.bpe.en en-mod
3 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/null.cpp:
--------------------------------------------------------------------------------
 1 | #include "yaml-cpp/null.h"
 2 | 
 3 | namespace YAML {
 4 | _Null Null;
 5 | 
 6 | bool IsNullString(const std::string& str) {
 7 |   return str.empty() || str == "~" || str == "null" || str == "Null" ||
 8 |          str == "NULL";
 9 | }
10 | }
11 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/debian/changelog.in:
--------------------------------------------------------------------------------
1 | nccl (${nccl:Major}.${nccl:Minor}.${nccl:Patch}${nccl:Suffix}-${pkg:Revision}+cuda${cuda:Major}.${cuda:Minor}) trusty; urgency=medium
2 | 
3 |   * Automatic Debian package from build
4 | 
5 |  -- cudatools <cudatools@nvidia.com>  ${pkg:Timestamp}
6 | 


--------------------------------------------------------------------------------
/src/tensors/gpu/element.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "tensors/tensor.h"
 4 | 
 5 | namespace marian {
 6 | namespace gpu {
 7 | 
 8 | template <class Functor, class... Tensors>
 9 | void Element(Functor functor, Tensor out, Tensors... tensors);
10 | }
11 | }  // namespace marian
12 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/debian/rules:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | 
 3 | %:
 4 | 	dh $@ --parallel
 5 | 
 6 | override_dh_auto_install:
 7 | 	PREFIX=debian/tmp dh_auto_install
 8 | 
 9 | override_dh_auto_test:
10 | 	# Do not make test
11 | 
12 | override_dh_auto_clean:
13 | 	# Do not make clean
14 | 


--------------------------------------------------------------------------------
/examples/tools/Makefile:
--------------------------------------------------------------------------------
1 | all: moses-scripts subword-nmt sacreBLEU
2 | 
3 | moses-scripts:
4 | 	git clone https://github.com/marian-nmt/moses-scripts
5 | subword-nmt:
6 | 	git clone https://github.com/rsennrich/subword-nmt
7 | sacreBLEU:
8 | 	git clone https://github.com/marian-nmt/sacreBLEU -b master
9 | 


--------------------------------------------------------------------------------
/src/tensors/gpu/add.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "tensors/tensor.h"
 4 | 
 5 | namespace marian {
 6 | 
 7 | namespace gpu {
 8 | 
 9 | template <class Functor, class... Tensors>
10 | void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors);
11 | }
12 | }  // namespace marian
13 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/yaml-node.cpp:
--------------------------------------------------------------------------------
 1 | #include "yaml-cpp/node/node.h"
 2 | #include "nodebuilder.h"
 3 | #include "nodeevents.h"
 4 | 
 5 | namespace YAML {
 6 | Node Clone(const Node& node) {
 7 |   NodeEvents events(node);
 8 |   NodeBuilder builder;
 9 |   events.Emit(builder);
10 |   return builder.Root();
11 | }
12 | }
13 | 


--------------------------------------------------------------------------------
/src/models/model_task.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <string>
 4 | 
 5 | namespace marian {
 6 | 
 7 | struct ModelTask {
 8 |   virtual void run() = 0;
 9 | };
10 | 
11 | struct ModelServiceTask {
12 |   virtual void init() = 0;
13 |   virtual std::string run(const std::string&) = 0;
14 | };
15 | }  // namespace marian
16 | 


--------------------------------------------------------------------------------
/src/examples/README.md:
--------------------------------------------------------------------------------
 1 | Marian examples
 2 | ---------------
 3 | 
 4 | Examples are enabled with CMake option `-DCOMPILE_EXAMPLES=ON`.
 5 | 
 6 | ## MNIST
 7 | 
 8 | You will need MNIST data for training and testing. Download them with the
 9 | script `src/examples/mnist/download.sh` or provide paths to the files with
10 | `--train-sets` and `--valid-sets` options.
11 | 


--------------------------------------------------------------------------------
/src/common/version.cpp:
--------------------------------------------------------------------------------
 1 | #include "common/version.h"
 2 | #include "common/git_revision.h"     // make-generated file, contains git commit info
 3 | #include "common/project_version.h"  // cmake-generated file, major/minor/tweak versions
 4 | 
 5 | namespace marian {
 6 | 
 7 | std::string buildVersion() {
 8 |   return std::string(PROJECT_VERSION) + " " + GIT_REVISION;
 9 | }
10 | }
11 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/bench/latency/compare.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo "running spdlog and g3log tests 10 time with ${1:-10} threads each (total 1,000,000 entries).."
 3 | rm -f *.log
 4 | for i in {1..10}
 5 | 
 6 | do
 7 |    echo
 8 |    sleep 0.5
 9 |    ./spdlog-latency ${1:-10} 2>/dev/null || exit
10 |    sleep 0.5
11 |    ./g3log-latency ${1:-10} 2>/dev/null || exit
12 | 
13 | done
14 | 


--------------------------------------------------------------------------------
/examples/transformer/scripts/validate.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cat $1 \
4 |     | sed 's/\@\@ //g' \
5 |     | ../tools/moses-scripts/scripts/recaser/detruecase.perl 2>/dev/null \
6 |     | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l de 2>/dev/null \
7 |     | ../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/valid.de \
8 |     | sed -r 's/BLEU = ([0-9.]+),.*/\1/'
9 | 


--------------------------------------------------------------------------------
/src/3rd_party/CLI/Version.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Distributed under the 3-Clause BSD License.  See accompanying
 4 | // file LICENSE or https://github.com/CLIUtils/CLI11 for details.
 5 | 
 6 | // [CLI11:verbatim]
 7 | 
 8 | #define CLI11_VERSION_MAJOR 1
 9 | #define CLI11_VERSION_MINOR 6
10 | #define CLI11_VERSION_PATCH 1
11 | #define CLI11_VERSION "1.6.1"
12 | 
13 | // [CLI11:verbatim]
14 | 


--------------------------------------------------------------------------------
/examples/wmt2017-uedin/scripts/validate.en.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cat $1 \
4 |     | sed 's/\@\@ //g' \
5 |     | ../tools/moses-scripts/scripts/recaser/detruecase.perl 2>/dev/null \
6 |     | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l en 2>/dev/null \
7 |     | ../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/valid.en \
8 |     | sed -r 's/BLEU = ([0-9.]+),.*/\1/'
9 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/INSTALL:
--------------------------------------------------------------------------------
 1 | spdlog is header only library.
 2 | Just copy the files to your build tree and use a C++11 compiler
 3 | 
 4 | Tested on:
 5 | gcc 4.8.1 and above
 6 | clang 3.5
 7 | Visual Studio 2013
 8 | 
 9 | gcc 4.8 flags: --std==c++11 -pthread -O3 -flto -Wl,--no-as-needed
10 | gcc 4.9 flags: --std=c++11 -pthread -O3 -flto
11 | 
12 | 
13 | see the makefile in the example folder
14 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/tests/utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <string>
 4 | #include<cstddef>
 5 | 
 6 | std::size_t count_lines(const std::string& filename);
 7 | 
 8 | void prepare_logdir();
 9 | 
10 | std::string file_contents(const std::string& filename);
11 | 
12 | std::size_t count_lines(const std::string& filename);
13 | 
14 | std::size_t get_filesize(const std::string& filename);
15 | 
16 | 


--------------------------------------------------------------------------------
/examples/training-basics/scripts/validate.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cat $1 \
4 |     | sed 's/\@\@ //g' \
5 |     | ../tools/moses-scripts/scripts/recaser/detruecase.perl 2> /dev/null \
6 |     | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l en 2>/dev/null \
7 |     | ../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/newsdev2016.en \
8 |     | sed -r 's/BLEU = ([0-9.]+),.*/\1/'
9 | 


--------------------------------------------------------------------------------
/examples/wmt2017-transformer/scripts/validate.en.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cat $1 \
4 |     | sed 's/\@\@ //g' \
5 |     | ../tools/moses-scripts/scripts/recaser/detruecase.perl 2>/dev/null \
6 |     | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l en 2>/dev/null \
7 |     | ../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/valid.en \
8 |     | sed -r 's/BLEU = ([0-9.]+),.*/\1/'
9 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/tests/includes.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdio>
 4 | #include <fstream>
 5 | #include <string>
 6 | #include <ostream>
 7 | #include <chrono>
 8 | #include <exception>
 9 | 
10 | #include "catch.hpp"
11 | #include "utils.h"
12 | 
13 | #include "../include/spdlog/spdlog.h"
14 | #include "../include/spdlog/sinks/null_sink.h"
15 | #include "../include/spdlog/sinks/ostream_sink.h"
16 | 
17 | 


--------------------------------------------------------------------------------
/src/functional/defs.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #ifdef __CUDA_ARCH__
 4 | 
 5 | #include <cuda.h>
 6 | #define __H__ __host__
 7 | #define __D__ __device__
 8 | #define __HI__ __host__ inline
 9 | #define __HD__ __host__ __device__
10 | #define __HDI__ __host__ __device__ inline
11 | 
12 | #else
13 | 
14 | #define __H__
15 | #define __D__
16 | #define __HI__ inline
17 | #define __HD__
18 | #define __HDI__ inline
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/examples/wmt2017-uedin/scripts/validate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export LC_ALL=C.UTF-8
 4 | 
 5 | cat $1 \
 6 |     | sed 's/\@\@ //g' \
 7 |     | ../tools/moses-scripts/scripts/recaser/detruecase.perl 2>/dev/null \
 8 |     | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l de 2>/dev/null \
 9 |     | ../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/valid.de \
10 |     | sed -r 's/BLEU = ([0-9.]+),.*/\1/'
11 | 


--------------------------------------------------------------------------------
/src/3rd_party/zlib/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # list of sources files of the library
 2 | file(GLOB ZLIB_SRC *.c)
 3 | file(GLOB ZLIB_INC *.h)
 4 | 
 5 | # add sources of the wrapper as a "SQLiteCpp" static library
 6 | add_library(zlib OBJECT ${ZLIB_SRC} ${ZLIB_INC})
 7 | 
 8 | if(MSVC)
 9 |   target_compile_options(zlib PUBLIC /wd"4996" /wd"4267")
10 | else()
11 |   target_compile_options(zlib PUBLIC -Wno-implicit-function-declaration)
12 | endif()
13 | 


--------------------------------------------------------------------------------
/examples/wmt2017-transformer/scripts/validate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export LC_ALL=C.UTF-8
 4 | 
 5 | cat $1 \
 6 |     | sed 's/\@\@ //g' \
 7 |     | ../tools/moses-scripts/scripts/recaser/detruecase.perl 2>/dev/null \
 8 |     | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l de 2>/dev/null \
 9 |     | ../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/valid.de \
10 |     | sed -r 's/BLEU = ([0-9.]+),.*/\1/'
11 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/sentencepiece.pc.in:
--------------------------------------------------------------------------------
 1 | prefix=@prefix@
 2 | exec_prefix=@exec_prefix@
 3 | libdir=@libdir@
 4 | includedir=@includedir@
 5 | 
 6 | Name: @PROJECT_NAME@
 7 | Description: Unsupervised text tokenizer and detokenizer for Neural Network-based text generation.
 8 | Version: @PROJECT_VERSION@
 9 | Libs: -L${libdir} -lsentencepiece -lsentencepiece_train -lprotobuf @pkgconfiglibs@
10 | Cflags: -I${includedir} @pkgconfigcflags@
11 | 


--------------------------------------------------------------------------------
/examples/wmt2017-uedin/scripts/download-files-mono.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -v
 2 | 
 3 | mkdir -p data
 4 | cd data
 5 | 
 6 | # get En-De training data for WMT17
 7 | wget -nc http://data.statmt.org/wmt17/translation-task/news.2016.de.shuffled.gz
 8 | 
 9 | zcat news.2016.de.shuffled.gz | shuf -n 11000000 | perl -ne 'print if(split(/\s/, $_) < 100)' | head -n 10000000 > news.2016.de
10 | 
11 | # clean
12 | rm -r news.2016.de.shuffled.gz
13 | 
14 | cd ..
15 | 


--------------------------------------------------------------------------------
/src/models/transformer_factory.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "marian.h"
 4 | 
 5 | #include "models/decoder.h"
 6 | #include "models/encoder.h"
 7 | //#include "models/states.h"
 8 | //#include "layers/constructors.h"
 9 | //#include "layers/factory.h"
10 | 
11 | namespace marian {
12 | Ptr<EncoderBase> NewEncoderTransformer(Ptr<Options> options);
13 | Ptr<DecoderBase> NewDecoderTransformer(Ptr<Options> options);
14 | }  // namespace marian
15 | 


--------------------------------------------------------------------------------
/examples/wmt2017-transformer/scripts/download-files-mono.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -v
 2 | 
 3 | mkdir -p data
 4 | cd data
 5 | 
 6 | # get En-De training data for WMT17
 7 | wget -nc http://data.statmt.org/wmt17/translation-task/news.2016.de.shuffled.gz
 8 | 
 9 | zcat news.2016.de.shuffled.gz | shuf -n 11000000 | perl -ne 'print if(split(/\s/, $_) < 100)' | head -n 10000000 > news.2016.de
10 | 
11 | # clean
12 | rm -r news.2016.de.shuffled.gz
13 | 
14 | cd ..
15 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/bench/easyl.conf:
--------------------------------------------------------------------------------
 1 | * GLOBAL:
 2 |     FORMAT                  =   "[%datetime]: %msg"
 3 |     FILENAME                =   ./logs/easylogging.log
 4 |     ENABLED                 =   true
 5 |     TO_FILE                 =   true
 6 |     TO_STANDARD_OUTPUT      =   false
 7 |     MILLISECONDS_WIDTH      =   3
 8 |     PERFORMANCE_TRACKING    =   false
 9 |     MAX_LOG_FILE_SIZE       =   10485760
10 |     Log_Flush_Threshold		= 	10485760
11 | 


--------------------------------------------------------------------------------
/contrib/autoformat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -v
 2 | 
 3 | if ! [ -x "$( command -v clang-format )" ]
 4 | then
 5 |     mkdir -p $HOME/.local
 6 |     wget -O- http://releases.llvm.org/6.0.0/clang+llvm-6.0.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz | tar --xz -xf - -C $HOME/.local --strip 1
 7 | fi
 8 | 
 9 | find ./src \( -path ./src/3rd_party -o -path ./src/tests -o -path ./src/models/experimental \) -prune -o -iname *.h -o -iname *.cpp -o -iname *.cu | xargs clang-format -i
10 | 


--------------------------------------------------------------------------------
/runner/decode_validate.sh:
--------------------------------------------------------------------------------
 1 | testset=data/newstest2014.bpe.en
 2 | testset_ref=data/newstest2014.tc.de
 3 | cat $testset | build/marian-decoder -m $1/model_revo.npz.best-translation.npz -v \
 4 | $1/vocab.en.yml $1/vocab.de.yml -b 6 -n 0.6 --mini-batch 100 -d 6 7 -o output.txt
 5 | 
 6 | 
 7 | 
 8 | cat output.txt | sed 's/@@ //g' \
 9 |     | ~/GOD.util/moses-scripts/scripts/generic/multi-bleu.perl -lc $testset_ref
10 | #     | sed -r 's/BLEU = ([0-9.]+),.*/\1/'
11 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/include/spdlog/fmt/ostr.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | // include external or bundled copy of fmtlib's ostream support
 9 | //
10 | #if !defined(SPDLOG_FMT_EXTERNAL)
11 | #include "spdlog/fmt/fmt.h"
12 | #include "spdlog/fmt/bundled/ostream.h"
13 | #else
14 | #include <fmt/ostream.h>
15 | #endif
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/src/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(iris_example iris/iris.cpp)
 2 | add_executable(mnist_example mnist/mnist_ffnn.cpp)
 3 | 
 4 | foreach(exec iris_example mnist_example)
 5 |   target_link_libraries(${exec} marian ${EXT_LIBS})
 6 |   if(CUDA_FOUND)
 7 |     target_link_libraries(${exec} marian marian_cuda ${EXT_LIBS})
 8 |   endif(CUDA_FOUND)
 9 |   set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
10 | endforeach(exec)
11 | 


--------------------------------------------------------------------------------
/scripts/contrib/fix_hard.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | 
 4 | d = dict()
 5 | m = np.load(sys.argv[1])
 6 | for k in m:
 7 |   if "ff_" == k[0:3]:
 8 |     d["decoder_" + k] = m[k]
 9 |   elif k == "special:model.yml":
10 |    info = m[k].tobytes()
11 |    info = info.replace("layers-dec", "dec-depth")
12 |    info = info.replace("layers-enc", "enc-depth")
13 |    d[k] = info
14 |    print info
15 |   else:
16 |     d[k] = m[k]
17 | np.savez(sys.argv[1] + ".fixed", **d)


--------------------------------------------------------------------------------
/src/command/marian_scorer.cpp:
--------------------------------------------------------------------------------
 1 | #include "marian.h"
 2 | 
 3 | #include "models/model_task.h"
 4 | #include "rescorer/rescorer.h"
 5 | #include "common/timer.h"
 6 | 
 7 | int main(int argc, char** argv) {
 8 |   using namespace marian;
 9 | 
10 |   auto options = parseOptions(argc, argv, cli::mode::scoring);
11 | 
12 |   timer::Timer timer;
13 |   New<Rescore<Rescorer>>(options)->run();
14 |   LOG(info, "Total time: {:.5f}s wall", timer.elapsed());
15 | 
16 |   return 0;
17 | }
18 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/sinks/sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <spdlog/details/log_msg.h>
10 | 
11 | namespace spdlog
12 | {
13 | namespace sinks
14 | {
15 | class sink
16 | {
17 | public:
18 |     virtual ~sink() {}
19 |     virtual void log(const details::log_msg& msg) = 0;
20 |     virtual void flush() = 0;
21 | };
22 | }
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/src/examples/mnist/download.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ `ls -1 *-ubyte 2>/dev/null | wc -l ` == 4 ]; then
 4 |     echo Files exist: `ls -1 *-ubyte`;
 5 |     exit;
 6 | fi
 7 | 
 8 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
 9 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
10 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
11 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
12 | 
13 | gzip -d *-ubyte.gz
14 | 


--------------------------------------------------------------------------------
/src/tests/README.md:
--------------------------------------------------------------------------------
 1 | Marian tests
 2 | ============
 3 | 
 4 | Unit tests and application tests are enabled with CMake option
 5 | `-DCOMPILE_TESTS=ON`, e.g.:
 6 | 
 7 |     cd build
 8 |     cmake .. -DCOMPILE_TESTS=ON
 9 |     make -j8
10 | 
11 | Running all unit tests:
12 | 
13 |     make test
14 | 
15 | Running a single unit test is also possible:
16 | 
17 |     ./src/tests/run_graph_tests
18 | 
19 | We use [Catch framework](https://github.com/philsquared/Catch) for unit
20 | testing.
21 | 


--------------------------------------------------------------------------------
/src/tensors/cpu/backend.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <functional>
 4 | #include <random>
 5 | 
 6 | #include "common/config.h"
 7 | #include "tensors/backend.h"
 8 | 
 9 | namespace marian {
10 | namespace cpu {
11 | 
12 | class Backend : public marian::Backend {
13 | public:
14 |   Backend(DeviceId deviceId, size_t seed) : marian::Backend(deviceId, seed) {}
15 |   void setDevice() override {}
16 |   void synchronize() override {}
17 | };
18 | }  // namespace cpu
19 | }  // namespace marian
20 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/ext-net/dummy/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # See LICENSE.txt for license information
 5 | #
 6 | NCCL_HOME:=../../build/
 7 | CUDA_HOME:=/usr/local/cuda
 8 | INC:= -I$(NCCL_HOME)/include -I$(CUDA_HOME)/include
 9 | PLUGIN_SO:=libnccl-net.so
10 | 
11 | default: $(PLUGIN_SO)
12 | 
13 | $(PLUGIN_SO): plugin.c
14 | 	$(CC) $(INC) -fPIC -shared -o $@ -Wl,-soname,$(PLUGIN_SO) $^
15 | 
16 | clean:
17 | 	rm -f $(PLUGIN_SO)
18 | 


--------------------------------------------------------------------------------
/src/3rd_party/zlib/inffast.h:
--------------------------------------------------------------------------------
 1 | /* inffast.h -- header to use inffast.c
 2 |  * Copyright (C) 1995-2003, 2010 Mark Adler
 3 |  * For conditions of distribution and use, see copyright notice in zlib.h
 4 |  */
 5 | 
 6 | /* WARNING: this file should *not* be used by applications. It is
 7 |    part of the implementation of the compression library and is
 8 |    subject to change. Applications should only use zlib.h.
 9 |  */
10 | 
11 | void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start));
12 | 


--------------------------------------------------------------------------------
/src/optimizers/clippers.cpp:
--------------------------------------------------------------------------------
 1 | #include "clippers.h"
 2 | 
 3 | #include "functional/functional.h"
 4 | #include "tensors/tensor_operators.h"
 5 | 
 6 | namespace marian {
 7 | void Elementwise::clip(Tensor t) {
 8 |   using namespace functional;
 9 |   Element(_1 = functional::clip(_1, c_), t);
10 | }
11 | 
12 | void Norm::clip(Tensor t) {
13 |   using namespace functional;
14 |   float l2Norm = L2Norm(t);
15 |   if(l2Norm >= c_)
16 |     Element(_1 = (c_ / l2Norm) * _1, t);
17 | }
18 | }  // namespace marian
19 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/example/jni/Android.mk:
--------------------------------------------------------------------------------
 1 | # Setup a project
 2 | LOCAL_PATH := $(call my-dir)
 3 | include $(CLEAR_VARS)
 4 | 
 5 | LOCAL_MODULE := example
 6 | LOCAL_SRC_FILES := example.cpp
 7 | LOCAL_CPPFLAGS += -Wall -Wshadow -Wextra -pedantic -std=c++11 -fPIE -pie
 8 | LOCAL_LDFLAGS +=  -fPIE -pie
 9 | 
10 | # Add exception support and set path for spdlog's headers
11 | LOCAL_CPPFLAGS += -fexceptions -I../include
12 | # Use android's log library
13 | LOCAL_LDFLAGS += -llog
14 | 
15 | include $(BUILD_EXECUTABLE)
16 | 


--------------------------------------------------------------------------------
/src/marian.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // clang-format off
 4 | #include "common/version.h"
 5 | #include "common/config.h"
 6 | #include "common/definitions.h"
 7 | #include "common/logging.h"
 8 | #include "common/options.h"
 9 | #include "common/io.h"
10 | 
11 | #include "data/batch_generator.h"
12 | #include "data/corpus.h"
13 | 
14 | #include "graph/expression_graph.h"
15 | #include "graph/expression_operators.h"
16 | #include "graph/node_initializers.h"
17 | 
18 | #include "optimizers/optimizers.h"
19 | // clang-format on
20 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/src/include/ring.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |  * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * See LICENSE.txt for license information
 5 |  ************************************************************************/
 6 | 
 7 | #ifndef NCCL_RING_H_
 8 | #define NCCL_RING_H_
 9 | #include "core.h"
10 | 
11 | ncclResult_t initRing(struct ncclComm* comm, int ringid);
12 | ncclResult_t freeRing(struct ncclRing* ring);
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | *.obj
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Compiled Dynamic libraries
12 | *.so
13 | *.dylib
14 | *.dll
15 | 
16 | # Fortran module files
17 | *.mod
18 | 
19 | # python compiled files
20 | *.pyc
21 | 
22 | # Compiled Static libraries
23 | *.lai
24 | *.la
25 | *.a
26 | *.lib
27 | 
28 | # Executables
29 | *.exe
30 | *.out
31 | *.app
32 | 
33 | # Temporary files created by editors
34 | .*.sw*
35 | *~
36 | 
37 | # Contrib
38 | /.vimrc
39 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/contrib/graphbuilder.cpp:
--------------------------------------------------------------------------------
 1 | #include "graphbuilderadapter.h"
 2 | 
 3 | #include "yaml-cpp/parser.h"  // IWYU pragma: keep
 4 | 
 5 | namespace YAML {
 6 | class GraphBuilderInterface;
 7 | 
 8 | void* BuildGraphOfNextDocument(Parser& parser,
 9 |                                GraphBuilderInterface& graphBuilder) {
10 |   GraphBuilderAdapter eventHandler(graphBuilder);
11 |   if (parser.HandleNextDocument(eventHandler)) {
12 |     return eventHandler.RootNode();
13 |   } else {
14 |     return NULL;
15 |   }
16 | }
17 | }
18 | 


--------------------------------------------------------------------------------
/src/common/hash.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <functional>
 4 | 
 5 | namespace marian {
 6 | namespace util {
 7 | 
 8 | template <class T> using hash = std::hash<T>;
 9 | 
10 | // This combinator is based on boost::hash_combine, but uses
11 | // std::hash as the hash implementation. Used as a drop-in
12 | // replacement for boost::hash_combine.
13 | 
14 | template <class T>
15 | inline void hash_combine(std::size_t& seed, T const& v) {
16 |     hash<T> hasher;
17 |     seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2);
18 | }
19 | 
20 | }
21 | }


--------------------------------------------------------------------------------
/src/3rd_party/SQLiteCpp/sqlite3/README.md:
--------------------------------------------------------------------------------
 1 | sqlite3
 2 | -------
 3 | 
 4 | "sqlite3.c" and "sqlite3.h" files from sqlite-amalgamation-3120200.zip (SQLite 3.12.2 2016-04-18)
 5 | 
 6 | Those files are provided for easy setup and compatibility under Windows/Linux/MacOS.
 7 | They are used by default by the CMake build.
 8 | 
 9 | Use -DSQLITECPP_INTERNAL_SQLITE=OFF to link against the Linux "libsqlite3-dev" package instead.
10 | 
11 | ### License:
12 | 
13 | All of the code and documentation in SQLite has been dedicated to the public domain by the authors.
14 | 
15 | 


--------------------------------------------------------------------------------
/vs/BuildRelease.bat:
--------------------------------------------------------------------------------
 1 | ::
 2 | :: Usage: BuildRelease.bat [<build-directory>=.\build]
 3 | ::
 4 | :: This script runs the dependency checks, generate the projects/makefiles and then 
 5 | :: build the project in Release configuration.
 6 | ::
 7 | ::
 8 | @echo off
 9 | setlocal
10 | 
11 | set ROOT=%~dp0
12 | set MARIAN_ROOT=%ROOT%..
13 | 
14 | set BUILD_ROOT=%1
15 | if "%BUILD_ROOT%"=="" set BUILD_ROOT=%ROOT%build
16 | 
17 | call CreateVSProjects.bat %BUILD_ROOT%
18 | if errorlevel 1 exit /b 1
19 | 
20 | cmake --build %BUILD_ROOT% --config Release
21 | 
22 | exit /b 0


--------------------------------------------------------------------------------
/src/common/project_version.h.in:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | /*
 4 |  * File project-version.h is generated using CMake. Do NOT modify it manually! Edit
 5 |  * project-version.h.in file instead.
 6 |  */
 7 | 
 8 | // e.g. v1.2.3-beta+1.abc123d
 9 | #define PROJECT_VERSION_FULL  "@PROJECT_VERSION_STRING_FULL@"
10 | // e.g. v1.2.3-beta
11 | #define PROJECT_VERSION       "@PROJECT_VERSION_STRING@"
12 | #define PROJECT_VERSION_MAJOR @PROJECT_VERSION_MAJOR@
13 | #define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR@
14 | #define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH@
15 | 


--------------------------------------------------------------------------------
/src/tensors/rand.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common/definitions.h"
 4 | 
 5 | #include <random>
 6 | 
 7 | namespace marian {
 8 | 
 9 | class TensorBase;
10 | typedef Ptr<TensorBase> Tensor;
11 | 
12 | class RandomGenerator {
13 | protected:
14 |   size_t seed_;
15 | 
16 | public:
17 |   RandomGenerator(size_t seed) : seed_(seed) { }
18 | 
19 |   virtual void uniform(Tensor, float a, float b) = 0;
20 |   virtual void normal(Tensor, float mean, float stddev) = 0;
21 | };
22 | 
23 | Ptr<RandomGenerator> createRandomGenerator(size_t /*seed*/, DeviceId);
24 | 
25 | }


--------------------------------------------------------------------------------
/src/command/marian_decoder.cpp:
--------------------------------------------------------------------------------
 1 | #include "marian.h"
 2 | #include "translator/beam_search.h"
 3 | #include "translator/translator.h"
 4 | #include "common/timer.h"
 5 | #ifdef _WIN32
 6 | #include <Windows.h>
 7 | #endif
 8 | 
 9 | int main(int argc, char** argv) {
10 |   using namespace marian;
11 | 
12 |   auto options = parseOptions(argc, argv, cli::mode::translation);
13 |   auto task = New<Translate<BeamSearch>>(options);
14 | 
15 |   timer::Timer timer;
16 |   task->run();
17 |   LOG(info, "Total time: {:.5f}s wall", timer.elapsed());
18 | 
19 |   return 0;
20 | }
21 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/bench/glog-bench.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #include "glog/logging.h"
 7 | 
 8 | 
 9 | int main(int, char* argv[])
10 | {
11 |     int howmany = 1000000;
12 | 
13 | 
14 |     FLAGS_logtostderr = 0;
15 |     FLAGS_log_dir = "logs";
16 |     google::InitGoogleLogging(argv[0]);
17 |     for(int i  = 0 ; i < howmany; ++i)
18 |         LOG(INFO) << "glog message # " << i << ": This is some text for your pleasure";
19 | 
20 |     return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/src/collectives/device/all_gather.cu:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |  * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * See LICENSE.txt for license information
 5 |  ************************************************************************/
 6 | 
 7 | #include "common.h"
 8 | #include "all_gather.h"
 9 | #include "collectives.h"
10 | 
11 | #define UNROLL 4
12 | 
13 | #if NCCL_OP == 0
14 | IMPL_COLL3(ncclAllGather, copy, FuncSum, i8, int8_t, ncclCollAllGather, ncclSum, ncclInt8);
15 | #endif
16 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/src/collectives/device/broadcast.cu:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |  * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * See LICENSE.txt for license information
 5 |  ************************************************************************/
 6 | 
 7 | #include "common.h"
 8 | #include "broadcast.h"
 9 | #include "collectives.h"
10 | 
11 | #define UNROLL 4
12 | 
13 | #if NCCL_OP == 0
14 | IMPL_COLL3(ncclBroadcast, copy, FuncSum, i8, int8_t, ncclCollBroadcast, ncclSum, ncclInt8);
15 | #endif
16 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/directives.cpp:
--------------------------------------------------------------------------------
 1 | #include "directives.h"
 2 | 
 3 | namespace YAML {
 4 | Directives::Directives() {
 5 |   // version
 6 |   version.isDefault = true;
 7 |   version.major = 1;
 8 |   version.minor = 2;
 9 | }
10 | 
11 | const std::string Directives::TranslateTagHandle(
12 |     const std::string& handle) const {
13 |   std::map<std::string, std::string>::const_iterator it = tags.find(handle);
14 |   if (it == tags.end()) {
15 |     if (handle == "!!")
16 |       return "tag:yaml.org,2002:";
17 |     return handle;
18 |   }
19 | 
20 |   return it->second;
21 | }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/anchor.h:
--------------------------------------------------------------------------------
 1 | #ifndef ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include <cstddef>
11 | 
12 | namespace YAML {
13 | typedef std::size_t anchor_t;
14 | const anchor_t NullAnchor = 0;
15 | }
16 | 
17 | #endif  // ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
18 | 


--------------------------------------------------------------------------------
/examples/training-basics/scripts/normalise-romanian.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # Author: Barry Haddow
 4 | # Distributed under MIT license
 5 | 
 6 | #
 7 | # Normalise Romanian s-comma and t-comma
 8 | 
 9 | import io
10 | import sys
11 | istream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
12 | ostream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
13 | 
14 | for line in istream:
15 |   line = line.replace("\u015e", "\u0218").replace("\u015f", "\u0219")
16 |   line = line.replace("\u0162", "\u021a").replace("\u0163", "\u021b")
17 |   ostream.write(line)
18 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/emitterstyle.h:
--------------------------------------------------------------------------------
 1 | #ifndef EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | namespace YAML {
11 | struct EmitterStyle {
12 |   enum value { Default, Block, Flow };
13 | };
14 | }
15 | 
16 | #endif  // EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
17 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/node/type.h:
--------------------------------------------------------------------------------
 1 | #ifndef VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | namespace YAML {
11 | struct NodeType {
12 |   enum value { Undefined, Null, Scalar, Sequence, Map };
13 | };
14 | }
15 | 
16 | #endif  // VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
17 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # Tests
 3 | #
 4 | 
 5 | enable_testing()
 6 | 
 7 | find_package(Threads)
 8 | 
 9 | # Build Catch unit tests
10 | add_library(catch INTERFACE)
11 | target_include_directories(catch INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
12 | 
13 | file(GLOB catch_tests LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp *.h *.hpp)
14 | 
15 | add_executable(catch_tests ${catch_tests})
16 | target_link_libraries(catch_tests spdlog ${CMAKE_THREAD_LIBS_INIT})
17 | add_test(NAME catch_tests COMMAND catch_tests)
18 | file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/logs")
19 | 
20 | 


--------------------------------------------------------------------------------
/src/functional/functional.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "functional/operands.h"
 4 | #include "functional/predicates.h"
 5 | 
 6 | namespace marian {
 7 | namespace functional {
 8 | 
 9 | template <int N>
10 | using ref = Assignee<N>;
11 | 
12 | static ref<1> _1;
13 | static ref<2> _2;
14 | static ref<3> _3;
15 | static ref<4> _4;
16 | static ref<5> _5;
17 | static ref<6> _6;
18 | static ref<7> _7;
19 | static ref<8> _8;
20 | static ref<9> _9;
21 | 
22 | const C<0> _0c;
23 | const C<1> _1c;
24 | const C<2> _2c;
25 | const C<-1> _1cneg;
26 | const C<-2> _2cneg;
27 | }  // namespace functional
28 | }  // namespace marian


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/tests/install_libcxx.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Install libc++ under travis
 4 | 
 5 | svn --quiet co http://llvm.org/svn/llvm-project/libcxx/trunk libcxx
 6 | mkdir libcxx/build
 7 | (cd libcxx/build && cmake .. -DLIBCXX_CXX_ABI=libstdc++ -DLIBCXX_CXX_ABI_INCLUDE_PATHS="/usr/include/c++/4.6;/usr/include/c++/4.6/x86_64-linux-gnu")
 8 | make -C libcxx/build cxx -j2
 9 | sudo cp libcxx/build/lib/libc++.so.1.0 /usr/lib/
10 | sudo cp -r libcxx/build/include/c++/v1 /usr/include/c++/v1/
11 | sudo ln -sf /usr/lib/libc++.so.1.0 /usr/lib/libc++.so
12 | sudo ln -sf /usr/lib/libc++.so.1.0 /usr/lib/libc++.so.1
13 | 


--------------------------------------------------------------------------------
/src/tensors/backend.cpp:
--------------------------------------------------------------------------------
 1 | #include "tensors/backend.h"
 2 | 
 3 | #ifdef CUDA_FOUND
 4 | #include "tensors/gpu/backend.h"
 5 | #pragma warning(disable:4505) // "unreferenced local function has been removed" in cuda\v9.2\include\cuda_fp16.hpp
 6 | #endif
 7 | 
 8 | #include "tensors/cpu/backend.h"
 9 | 
10 | namespace marian {
11 | 
12 | Ptr<Backend> BackendByDeviceId(DeviceId deviceId, size_t seed) {
13 | #ifdef CUDA_FOUND
14 |   if(deviceId.type == DeviceType::gpu)
15 |     return New<gpu::Backend>(deviceId, seed);
16 |   else
17 | #endif
18 |     return New<cpu::Backend>(deviceId, seed);
19 | }
20 | }  // namespace marian
21 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/emitterdef.h:
--------------------------------------------------------------------------------
 1 | #ifndef EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | namespace YAML {
11 | struct EmitterNodeType {
12 |   enum value { NoType, Property, Scalar, FlowSeq, BlockSeq, FlowMap, BlockMap };
13 | };
14 | }
15 | 
16 | #endif  // EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
17 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/bench/easylogging-bench.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | 
 7 | #include "easylogging++.h"
 8 | 
 9 | _INITIALIZE_EASYLOGGINGPP
10 | 
11 | int main(int, char* [])
12 | {
13 |     int howmany = 1000000;
14 | 
15 |     // Load configuration from file
16 |     el::Configurations conf("easyl.conf");
17 |     el::Loggers::reconfigureLogger("default", conf);
18 | 
19 |     for(int i  = 0 ; i < howmany; ++i)
20 |         LOG(INFO) << "easylog message #" << i << ": This is some text for your pleasure";
21 |     return 0;
22 | }
23 | 


--------------------------------------------------------------------------------
/CL_tools/plt_cdf.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import sys
 4 | 
 5 | print "python x.py cdf_base.npz T"
 6 | f = np.load(sys.argv[1])
 7 | plt.title(sys.argv[1])
 8 | base = f['base']
 9 | cdf = f['cdf']
10 | plt.plot(base[:-1], cdf)
11 | # plt.show()
12 | 
13 | # make competence graph
14 | g = plt.figure(2)
15 | c0 = 0.001
16 | T = int(sys.argv[2])
17 | def c(t):
18 |     tmp = t * ((1 - c0 * c0) / T) + c0 * c0
19 |     c_square = tmp ** 0.5
20 |     return min(1.0, c_square)
21 | 
22 | t_data = [t for t in range(T)]
23 | c_data = [c(t) for t in range(T)]
24 | 
25 | plt.title("Competence")
26 | plt.plot(t_data, c_data)
27 | plt.show()
28 | 


--------------------------------------------------------------------------------
/contrib/vim/.vimrc:
--------------------------------------------------------------------------------
 1 | autocmd BufRead,BufNewFile *.cu set filetype=cpp
 2 | augroup cpp
 3 |     au!
 4 |     au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set tabstop=2
 5 |     au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set shiftwidth=2
 6 |     au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set expandtab
 7 |     au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set softtabstop=2  "Insert 2 spaces when tab is pressed
 8 |     au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set smarttab       "Indent instead of tab at start of line
 9 |     au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set shiftround     "Round spaces to nearest shiftwidth multiple
10 | augroup end
11 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/include/spdlog/fmt/fmt.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | //
 9 | // Include a bundled header-only copy of fmtlib or an external one.
10 | // By default spdlog include its own copy.
11 | //
12 | 
13 | #if !defined(SPDLOG_FMT_EXTERNAL)
14 | 
15 | #ifndef FMT_HEADER_ONLY
16 | #define FMT_HEADER_ONLY
17 | #endif
18 | #ifndef FMT_USE_WINDOWS_H
19 | #define FMT_USE_WINDOWS_H 0
20 | #endif
21 | #include "spdlog/fmt/bundled/format.h"
22 | 
23 | #else //external fmtlib
24 | 
25 | #include <fmt/format.h>
26 | 
27 | #endif
28 | 
29 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # See LICENSE.txt for license information
 5 | #
 6 | .PHONY : all clean
 7 | 
 8 | default : build
 9 | build : debian.build txz.build
10 | 
11 | BUILDDIR ?= $(abspath ../build)
12 | ABSBUILDDIR := $(abspath $(BUILDDIR))
13 | TARGETS := debian txz
14 | all:   ${TARGETS:%=%.build}
15 | prep:  ${TARGETS:%=%.prep}
16 | build: ${TARGETS:%=%.build}
17 | clean: ${TARGETS:%=%.clean}
18 | 
19 | %.prep:
20 | 	${MAKE} -C $* prep BUILDDIR=${ABSBUILDDIR}
21 | 
22 | %.build:
23 | 	${MAKE} -C $* build BUILDDIR=${ABSBUILDDIR}
24 | 
25 | %.clean:
26 | 	${MAKE} -C $* clean
27 | 


--------------------------------------------------------------------------------
/examples/wmt2017-uedin/scripts/rescore.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | lastNum = 0
 4 | bestScore = -9999
 5 | 
 6 | for line in sys.stdin:
 7 |     line = line.rstrip("\n")
 8 |     fields = line.split(" ||| ")
 9 |     score = sum(float(score) for score in fields[2].split(" ") if score[-1] != "=")
10 |     length = float(len(fields[1].split(" ")) + 1)
11 | 
12 |     score = score / length
13 | 
14 |     num = int(fields[0])
15 |     if num > lastNum:
16 |       print bestLine
17 |       bestScore = -99999
18 |       bestLine = fields[1]
19 |     lastNum = num
20 | 
21 |     if score > bestScore:
22 |       bestScore = score
23 |       bestLine = fields[1]
24 | 
25 | print bestLine
26 | 


--------------------------------------------------------------------------------
/examples/wmt2017-transformer/scripts/rescore.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | lastNum = 0
 4 | bestScore = -9999
 5 | 
 6 | for line in sys.stdin:
 7 |     line = line.rstrip("\n")
 8 |     fields = line.split(" ||| ")
 9 |     score = sum(float(score) for score in fields[2].split(" ") if score[-1] != "=")
10 |     length = float(len(fields[1].split(" ")) + 1)
11 | 
12 |     score = score / length
13 | 
14 |     num = int(fields[0])
15 |     if num > lastNum:
16 |       print bestLine
17 |       bestScore = -99999
18 |       bestLine = fields[1]
19 |     lastNum = num
20 | 
21 |     if score > bestScore:
22 |       bestScore = score
23 |       bestLine = fields[1]
24 | 
25 | print bestLine
26 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/src/include/rings.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |  * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * See LICENSE.txt for license information
 5 |  ************************************************************************/
 6 | 
 7 | #ifndef NCCL_RINGS_H_
 8 | #define NCCL_RINGS_H_
 9 | 
10 | static int getDefaultThreads() {
11 |   // On Kepler, rings are doubled later.
12 |   return ncclCudaCompCap() == 3 ? 128 : 256;
13 | }
14 | 
15 | ncclResult_t ncclGetRings(int* nrings, int* nthreads, int rank, int nranks, int* transports, ncclTvalue_t* values, int* prev, int* next);
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/src/tensors/gpu/algorithm.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "tensors/backend.h"
 4 | 
 5 | namespace marian {
 6 | namespace gpu {
 7 | template <typename T>
 8 | void copy(Ptr<marian::Backend> backend, const T* begin, const T* end, T* dest);
 9 | 
10 | template <typename T>
11 | void fill(Ptr<marian::Backend> backend, T* begin, T* end, T value);
12 | 
13 | template <typename T>
14 | void swap_ranges(Ptr<marian::Backend> backend, T* begin, T* end, T* dest);
15 | 
16 | void setSparse(Ptr<marian::Backend> backend,
17 |                const std::vector<size_t>&,
18 |                const std::vector<float>&,
19 |                float*);
20 | }  // namespace gpu
21 | }  // namespace marian
22 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/appveyor.yml:
--------------------------------------------------------------------------------
 1 | version: '{branch} build {build}'
 2 | image: Visual Studio 2015
 3 | platform:
 4 |   - x64
 5 |   - Win32
 6 | configuration: Release
 7 | clone_depth: 50
 8 | clone_folder: c:\projects\sentencepiece
 9 | init:
10 | build_script:
11 | - cmd: call test.bat %platform%
12 | artifacts:
13 |  - path: build\sentencepiece*.7z
14 |  - path: python\dist\*.whl
15 | deploy:
16 |   description: 'SentencePiece Windows release'
17 |   provider: GitHub
18 |   auth_token:
19 |     secure: Aq4jHo/HY6WFFKs1h9cCWfi3U4ZsVTooUEhtgBfcJM6SUhnZdPVazIcKCtiR32kc
20 |   draft: false
21 |   prerelease: false
22 |   on:
23 |     branch: master
24 |     appveyor_repo_tag: true
25 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/memory.cpp:
--------------------------------------------------------------------------------
 1 | #include "yaml-cpp/node/detail/memory.h"
 2 | #include "yaml-cpp/node/detail/node.h"  // IWYU pragma: keep
 3 | #include "yaml-cpp/node/ptr.h"
 4 | 
 5 | namespace YAML {
 6 | namespace detail {
 7 | 
 8 | void memory_holder::merge(memory_holder& rhs) {
 9 |   if (m_pMemory == rhs.m_pMemory)
10 |     return;
11 | 
12 |   m_pMemory->merge(*rhs.m_pMemory);
13 |   rhs.m_pMemory = m_pMemory;
14 | }
15 | 
16 | node& memory::create_node() {
17 |   shared_node pNode(new node);
18 |   m_nodes.insert(pNode);
19 |   return *pNode;
20 | }
21 | 
22 | void memory::merge(const memory& rhs) {
23 |   m_nodes.insert(rhs.m_nodes.begin(), rhs.m_nodes.end());
24 | }
25 | }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/emit.cpp:
--------------------------------------------------------------------------------
 1 | #include "yaml-cpp/node/emit.h"
 2 | #include "yaml-cpp/emitfromevents.h"
 3 | #include "yaml-cpp/emitter.h"
 4 | #include "nodeevents.h"
 5 | 
 6 | namespace YAML {
 7 | Emitter& operator<<(Emitter& out, const Node& node) {
 8 |   EmitFromEvents emitFromEvents(out);
 9 |   NodeEvents events(node);
10 |   events.Emit(emitFromEvents);
11 |   return out;
12 | }
13 | 
14 | std::ostream& operator<<(std::ostream& out, const Node& node) {
15 |   Emitter emitter(out);
16 |   emitter << node;
17 |   return out;
18 | }
19 | 
20 | std::string Dump(const Node& node) {
21 |   Emitter emitter;
22 |   emitter << node;
23 |   return emitter.c_str();
24 | }
25 | }  // namespace YAML
26 | 


--------------------------------------------------------------------------------
/src/translator/helpers.h:
--------------------------------------------------------------------------------
 1 | /* All or part of this file was contributed by Intel under license:
 2 |  *   Copyright (C) 2017-2018 Intel Corporation
 3 |  *   SPDX-License-Identifier: MIT
 4 |  */
 5 | 
 6 | #pragma once
 7 | 
 8 | #include "graph/expression_graph.h"
 9 | 
10 | namespace marian {
11 | 
12 | namespace cpu {
13 | 
14 | void suppressWord(Expr logProbs, Word id);
15 | }
16 | 
17 | namespace gpu {
18 | 
19 | void suppressWord(Expr logProbs, Word id);
20 | 
21 | void SetColumnId(Tensor in_, size_t col, float value);
22 | }
23 | 
24 | void suppressWord(Expr logProbs, Word id);
25 | 
26 | void suppressWordSent(Expr logProbs, Word id, std::vector<size_t> sent_ids);
27 | 
28 | 
29 | }  // namespace marian
30 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/bench/spdlog-bench.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #include "spdlog/spdlog.h"
 7 | 
 8 | 
 9 | int main(int, char* [])
10 | {
11 |     int howmany = 1000000;
12 |     namespace spd = spdlog;
13 |     ///Create a file rotating logger with 5mb size max and 3 rotated files
14 |     auto logger = spdlog::create<spd::sinks::simple_file_sink_st>("file_logger", "logs/spd-bench-st.txt", false);
15 | 
16 |     logger->set_pattern("[%Y-%b-%d %T.%e]: %v");
17 |     for(int i  = 0 ; i < howmany; ++i)
18 |         logger->info("spdlog message #{} : This is some text for your pleasure", i);
19 |     return 0;
20 | }
21 | 


--------------------------------------------------------------------------------
/src/common/io_item.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common/shape.h"
 4 | #include "common/types.h"
 5 | 
 6 | #include <string>
 7 | 
 8 | namespace marian {
 9 | namespace io {
10 | 
11 | struct Item {
12 |   std::vector<char> bytes;
13 |   const char* ptr{0};
14 |   bool mapped{false};
15 | 
16 |   std::string name;
17 |   Shape shape;
18 |   Type type{Type::float32};
19 | 
20 |   const char* data() const {
21 |     if(mapped)
22 |       return ptr;
23 |     else
24 |       return bytes.data();
25 |   }
26 | 
27 |   size_t size() const {
28 |     if(mapped)
29 |       return shape.elements() * sizeOf(type);
30 |     else
31 |       return bytes.size();
32 |   }
33 | };
34 | 
35 | }  // namespace io
36 | }  // namespace marian
37 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/sinks/null_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include <spdlog/sinks/base_sink.h>
 9 | #include <spdlog/details/null_mutex.h>
10 | 
11 | #include <mutex>
12 | 
13 | namespace spdlog
14 | {
15 | namespace sinks
16 | {
17 | 
18 | template <class Mutex>
19 | class null_sink : public base_sink < Mutex >
20 | {
21 | protected:
22 |     void _sink_it(const details::log_msg&) override
23 |     {}
24 | 
25 |     void flush() override
26 |     {}
27 | 
28 | };
29 | typedef null_sink<details::null_mutex> null_sink_st;
30 | typedef null_sink<std::mutex> null_sink_mt;
31 | 
32 | }
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/scantag.h:
--------------------------------------------------------------------------------
 1 | #ifndef SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include <string>
11 | #include "stream.h"
12 | 
13 | namespace YAML {
14 | const std::string ScanVerbatimTag(Stream& INPUT);
15 | const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle);
16 | const std::string ScanTagSuffix(Stream& INPUT);
17 | }
18 | 
19 | #endif  // SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
20 | 


--------------------------------------------------------------------------------
/src/data/rng_engine.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <random>
 4 | #include <sstream>
 5 | 
 6 | #include "common/config.h"
 7 | 
 8 | namespace marian {
 9 | namespace data {
10 | 
11 | /**
12 |  * @brief Class providing an engine for pseudo-random number generation.
13 |  */
14 | class RNGEngine {
15 | protected:
16 |   std::mt19937 eng_;
17 | 
18 | public:
19 |   RNGEngine() : eng_((unsigned int)Config::seed) {}
20 | 
21 |   std::string getRNGState() {
22 |     std::ostringstream oss;
23 |     oss << eng_;
24 |     return oss.str();
25 |   }
26 | 
27 |   void setRNGState(std::string engineState) {
28 |     std::istringstream iss(engineState);
29 |     iss >> eng_;
30 |   }
31 | };
32 | }  // namespace data
33 | }  // namespace marian
34 | 


--------------------------------------------------------------------------------
/src/translator/nth_element.h:
--------------------------------------------------------------------------------
 1 | /* All or part of this file was contributed by Intel under license:
 2 |  *   Copyright (C) 2017-2018 Intel Corporation
 3 |  *   SPDX-License-Identifier: MIT
 4 |  */
 5 | 
 6 | #pragma once
 7 | 
 8 | #include "tensors/tensor.h"
 9 | #include <vector>
10 | 
11 | namespace marian {
12 | 
13 | typedef std::function<void(const std::vector<size_t>& beamSizes,
14 |                            Tensor logProbs,
15 |                            std::vector<float>& outCosts,
16 |                            std::vector<unsigned>& outKeys,
17 |                            const bool isFirst)> GetNBestListFn;
18 | 
19 | GetNBestListFn createGetNBestListFn(size_t beamSize, size_t dimBatch, DeviceId deviceId);
20 | }  // namespace marian
21 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/include/spdlog/sinks/null_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include "spdlog/sinks/base_sink.h"
 9 | #include "spdlog/details/null_mutex.h"
10 | 
11 | #include <mutex>
12 | 
13 | namespace spdlog
14 | {
15 | namespace sinks
16 | {
17 | 
18 | template <class Mutex>
19 | class null_sink : public base_sink < Mutex >
20 | {
21 | protected:
22 |     void _sink_it(const details::log_msg&) override
23 |     {}
24 | 
25 |     void _flush() override
26 |     {}
27 | 
28 | };
29 | typedef null_sink<details::null_mutex> null_sink_st;
30 | typedef null_sink<details::null_mutex> null_sink_mt;
31 | 
32 | }
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/src/include/bootstrap.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |  * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * See LICENSE.txt for license information
 5 |  ************************************************************************/
 6 | 
 7 | #ifndef NCCL_BOOTSTRAP_H_
 8 | #define NCCL_BOOTSTRAP_H_
 9 | 
10 | #include "nccl.h"
11 | 
12 | ncclResult_t bootstrapCreateRoot(ncclUniqueId* commId, bool idFromEnv);
13 | ncclResult_t bootstrapGetUniqueId(ncclUniqueId* out);
14 | ncclResult_t bootstrapInit(ncclUniqueId* id, int rank, int nranks, void** commState);
15 | ncclResult_t bootstrapAllGather(void* commState, void* allData, int size);
16 | ncclResult_t bootstrapClose(void* commState);
17 | #endif
18 | 


--------------------------------------------------------------------------------
/src/3rd_party/CLI/CLI.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Distributed under the 3-Clause BSD License.  See accompanying
 4 | // file LICENSE or https://github.com/CLIUtils/CLI11 for details.
 5 | 
 6 | // CLI Library includes
 7 | // Order is important for combiner script
 8 | 
 9 | #include "CLI/Version.hpp"
10 | 
11 | #include "CLI/Macros.hpp"
12 | 
13 | #include "CLI/Optional.hpp"
14 | 
15 | #include "CLI/StringTools.hpp"
16 | 
17 | #include "CLI/Error.hpp"
18 | 
19 | #include "CLI/TypeTools.hpp"
20 | 
21 | #include "CLI/Split.hpp"
22 | 
23 | #include "CLI/ConfigFwd.hpp"
24 | 
25 | #include "CLI/Validators.hpp"
26 | 
27 | #include "CLI/FormatterFwd.hpp"
28 | 
29 | #include "CLI/Option.hpp"
30 | 
31 | #include "CLI/App.hpp"
32 | 
33 | #include "CLI/Config.hpp"
34 | 
35 | #include "CLI/Formatter.hpp"
36 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/txz/create_txz.sh.in:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # See LICENSE.txt for license information
 6 | #
 7 | 
 8 | # To run from $BUILDDIR/
 9 | 
10 | BUILDDIR=`basename $PWD`
11 | 
12 | cd ..
13 | NCCL_MAJOR=${nccl:Major}
14 | NCCL_MINOR=${nccl:Minor}
15 | NCCL_PATCH=${nccl:Patch}
16 | NCCL_SUFFIX=${nccl:Suffix}
17 | CUDA_MAJOR=${cuda:Major}
18 | CUDA_MINOR=${cuda:Minor}
19 | PKG_REVISION=${pkg:Revision}
20 | PKG_ARCH=${pkg:Arch}
21 | 
22 | NCCLNAME="nccl_${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_PATCH}${NCCL_SUFFIX}-${PKG_REVISION}+cuda${CUDA_MAJOR}.${CUDA_MINOR}_${PKG_ARCH}"
23 | 
24 | tar --transform "s/^$BUILDDIR/$NCCLNAME/" -Jcf $NCCLNAME.txz --owner=0 --group=0 $BUILDDIR/include $BUILDDIR/lib $BUILDDIR/*.txt
25 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/bench/utils.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include <sstream>
 9 | #include <iomanip>
10 | #include <locale>
11 | 
12 | namespace utils
13 | {
14 | 
15 | template<typename T>
16 | inline std::string format(const T& value)
17 | {
18 |     static std::locale loc("");
19 |     std::stringstream ss;
20 |     ss.imbue(loc);
21 |     ss << value;
22 |     return ss.str();
23 | }
24 | 
25 | template<>
26 | inline std::string format(const double & value)
27 | {
28 |     static std::locale loc("");
29 |     std::stringstream ss;
30 |     ss.imbue(loc);
31 |     ss << std::fixed << std::setprecision(1) << value;
32 |     return ss.str();
33 | }
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/example/utils.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include <sstream>
 9 | #include <iomanip>
10 | #include <locale>
11 | 
12 | namespace utils
13 | {
14 | 
15 | template<typename T>
16 | inline std::string format(const T& value)
17 | {
18 |     static std::locale loc("");
19 |     std::stringstream ss;
20 |     ss.imbue(loc);
21 |     ss << value;
22 |     return ss.str();
23 | }
24 | 
25 | template<>
26 | inline std::string format(const double & value)
27 | {
28 |     static std::locale loc("");
29 |     std::stringstream ss;
30 |     ss.imbue(loc);
31 |     ss << std::fixed << std::setprecision(1) << value;
32 |     return ss.str();
33 | }
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/bench/latency/utils.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include <sstream>
 9 | #include <iomanip>
10 | #include <locale>
11 | 
12 | namespace utils
13 | {
14 | 
15 | template<typename T>
16 | inline std::string format(const T& value)
17 | {
18 |     static std::locale loc("");
19 |     std::stringstream ss;
20 |     ss.imbue(loc);
21 |     ss << value;
22 |     return ss.str();
23 | }
24 | 
25 | template<>
26 | inline std::string format(const double & value)
27 | {
28 |     static std::locale loc("");
29 |     std::stringstream ss;
30 |     ss.imbue(loc);
31 |     ss << std::fixed << std::setprecision(1) << value;
32 |     return ss.str();
33 | }
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/exceptions.cpp:
--------------------------------------------------------------------------------
 1 | #include "yaml-cpp/exceptions.h"
 2 | 
 3 | namespace YAML {
 4 | 
 5 | // These destructors are defined out-of-line so the vtable is only emitted once.
 6 | Exception::~Exception() noexcept {}
 7 | ParserException::~ParserException() noexcept {}
 8 | RepresentationException::~RepresentationException() noexcept {}
 9 | InvalidScalar::~InvalidScalar() noexcept {}
10 | KeyNotFound::~KeyNotFound() noexcept {}
11 | InvalidNode::~InvalidNode() noexcept {}
12 | BadConversion::~BadConversion() noexcept {}
13 | BadDereference::~BadDereference() noexcept {}
14 | BadSubscript::~BadSubscript() noexcept {}
15 | BadPushback::~BadPushback() noexcept {}
16 | BadInsert::~BadInsert() noexcept {}
17 | EmitterException::~EmitterException() noexcept {}
18 | BadFile::~BadFile() noexcept {}
19 | }
20 | 


--------------------------------------------------------------------------------
/src/3rd_party/zlib/gzclose.c:
--------------------------------------------------------------------------------
 1 | /* gzclose.c -- zlib gzclose() function
 2 |  * Copyright (C) 2004, 2010 Mark Adler
 3 |  * For conditions of distribution and use, see copyright notice in zlib.h
 4 |  */
 5 | 
 6 | #include "gzguts.h"
 7 | 
 8 | /* gzclose() is in a separate file so that it is linked in only if it is used.
 9 |    That way the other gzclose functions can be used instead to avoid linking in
10 |    unneeded compression or decompression routines. */
11 | int ZEXPORT gzclose(file)
12 |     gzFile file;
13 | {
14 | #ifndef NO_GZCOMPRESS
15 |     gz_statep state;
16 | 
17 |     if (file == NULL)
18 |         return Z_STREAM_ERROR;
19 |     state = (gz_statep)file;
20 | 
21 |     return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file);
22 | #else
23 |     return gzclose_r(file);
24 | #endif
25 | }
26 | 


--------------------------------------------------------------------------------
/examples/transformer/scripts/download-files.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -v
 2 | 
 3 | mkdir -p data
 4 | cd data
 5 | 
 6 | # get En-De training data for WMT17
 7 | wget -nc http://www.statmt.org/europarl/v7/de-en.tgz
 8 | wget -nc http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz
 9 | wget -nc http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz
10 | 
11 | # extract data
12 | tar -xf de-en.tgz
13 | tar -xf training-parallel-commoncrawl.tgz
14 | tar -xf training-parallel-nc-v12.tgz
15 | 
16 | # create corpus files
17 | cat europarl-v7.de-en.de commoncrawl.de-en.de training/news-commentary-v12.de-en.de > corpus.de
18 | cat europarl-v7.de-en.en commoncrawl.de-en.en training/news-commentary-v12.de-en.en > corpus.en
19 | 
20 | # clean
21 | rm -r europarl-* commoncrawl.* training/ *.tgz
22 | 
23 | cd ..
24 | 


--------------------------------------------------------------------------------
/examples/wmt2017-uedin/scripts/download-files.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -v
 2 | 
 3 | mkdir -p data
 4 | cd data
 5 | 
 6 | # get En-De training data for WMT17
 7 | wget -nc http://www.statmt.org/europarl/v7/de-en.tgz
 8 | wget -nc http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz
 9 | wget -nc http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz
10 | 
11 | # extract data
12 | tar -xf de-en.tgz
13 | tar -xf training-parallel-commoncrawl.tgz
14 | tar -xf training-parallel-nc-v12.tgz
15 | 
16 | # create corpus files
17 | cat europarl-v7.de-en.de commoncrawl.de-en.de training/news-commentary-v12.de-en.de > corpus.de
18 | cat europarl-v7.de-en.en commoncrawl.de-en.en training/news-commentary-v12.de-en.en > corpus.en
19 | 
20 | # clean
21 | rm -r europarl-* commoncrawl.* training/ *.tgz
22 | 
23 | cd ..
24 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/src/include/utils.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |  * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * See LICENSE.txt for license information
 5 |  ************************************************************************/
 6 | 
 7 | #ifndef NCCL_UTILS_H_
 8 | #define NCCL_UTILS_H_
 9 | 
10 | #include "nccl.h"
11 | #include <stdint.h>
12 | 
13 | ncclResult_t getHostName(char* hostname, int maxlen);
14 | uint64_t getHostHash();
15 | uint64_t getPidHash();
16 | 
17 | struct netIf {
18 |   char prefix[64];
19 |   int port;
20 | };
21 | 
22 | int parseStringList(const char* string, struct netIf* ifList, int maxList);
23 | bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize);
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/examples/training-basics/scripts/remove-diacritics.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # Author: Barry Haddow
 4 | # Distributed under MIT license
 5 | 
 6 | #
 7 | # Remove Romanian diacritics. Assumes s-comma and t-comma are normalised
 8 | 
 9 | import io
10 | import sys
11 | istream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
12 | ostream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
13 | 
14 | for line in istream:
15 |   line = line.replace("\u0218", "S").replace("\u0219", "s") #s-comma
16 |   line = line.replace("\u021a", "T").replace("\u021b", "t") #t-comma
17 |   line = line.replace("\u0102", "A").replace("\u0103", "a")
18 |   line = line.replace("\u00C2", "A").replace("\u00E2", "a")
19 |   line = line.replace("\u00CE", "I").replace("\u00EE", "i")
20 |   ostream.write(line)
21 | 


--------------------------------------------------------------------------------
/src/common/binary.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common/io_item.h"
 4 | 
 5 | #include <string>
 6 | #include <vector>
 7 | 
 8 | // Increase this if binary format changes
 9 | #define BINARY_FILE_VERSION 1
10 | 
11 | namespace marian {
12 | namespace io {
13 | namespace binary {
14 | 
15 | void loadItems(const void* current,
16 |                std::vector<io::Item>& items,
17 |                bool mapped = false);
18 | void loadItems(const std::string& fileName, std::vector<io::Item>& items);
19 | 
20 | io::Item getItem(const void* current, const std::string& vName);
21 | io::Item getItem(const std::string& fileName, const std::string& vName);
22 | 
23 | void saveItems(const std::string& fileName, const std::vector<io::Item>& items);
24 | 
25 | }  // namespace binary
26 | }  // namespace io
27 | }  // namespace marian
28 | 


--------------------------------------------------------------------------------
/examples/wmt2017-transformer/scripts/download-files.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -v
 2 | 
 3 | mkdir -p data
 4 | cd data
 5 | 
 6 | # get En-De training data for WMT17
 7 | wget -nc http://www.statmt.org/europarl/v7/de-en.tgz
 8 | wget -nc http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz
 9 | wget -nc http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz
10 | 
11 | # extract data
12 | tar -xf de-en.tgz
13 | tar -xf training-parallel-commoncrawl.tgz
14 | tar -xf training-parallel-nc-v12.tgz
15 | 
16 | # create corpus files
17 | cat europarl-v7.de-en.de commoncrawl.de-en.de training/news-commentary-v12.de-en.de > corpus.de
18 | cat europarl-v7.de-en.en commoncrawl.de-en.en training/news-commentary-v12.de-en.en > corpus.en
19 | 
20 | # clean
21 | rm -r europarl-* commoncrawl.* training/ *.tgz
22 | 
23 | cd ..
24 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # See LICENSE.txt for license information
 5 | #
 6 | .PHONY : all clean
 7 | 
 8 | default : src.build
 9 | install : src.install
10 | BUILDDIR ?= $(abspath ./build)
11 | ABSBUILDDIR := $(abspath $(BUILDDIR))
12 | TARGETS := src pkg
13 | clean: ${TARGETS:%=%.clean}
14 | test.build: src.build
15 | LICENSE_FILES := LICENSE.txt
16 | LICENSE_TARGETS := $(LICENSE_FILES:%=$(BUILDDIR)/%)
17 | lic: $(LICENSE_TARGETS)
18 | 
19 | ${BUILDDIR}/%.txt: %.txt
20 | 	@printf "Copying    %-35s > %s\n" $< $@
21 | 	mkdir -p ${BUILDDIR}
22 | 	cp $< $@
23 | 
24 | src.%:
25 | 	${MAKE} -C src $* BUILDDIR=${ABSBUILDDIR}
26 | 
27 | pkg.%:
28 | 	${MAKE} -C pkg $* BUILDDIR=${ABSBUILDDIR}
29 | 
30 | pkg.debian.prep: lic
31 | pkg.txz.prep: lic
32 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/src/collectives/device/reduce.cu:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |  * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * See LICENSE.txt for license information
 5 |  ************************************************************************/
 6 | 
 7 | #include "common.h"
 8 | #include "reduce.h"
 9 | #include "collectives.h"
10 | 
11 | #define UNROLL 4
12 | 
13 | #if NCCL_OP == 0
14 | IMPL_COLL2(ncclReduce, sum,  FuncSum,  ncclCollReduce, ncclSum);
15 | #elif NCCL_OP == 1
16 | IMPL_COLL2(ncclReduce, prod, FuncProd, ncclCollReduce, ncclProd);
17 | #elif NCCL_OP == 2
18 | IMPL_COLL2(ncclReduce, min,  FuncMin,  ncclCollReduce, ncclMin);
19 | #elif NCCL_OP == 3
20 | IMPL_COLL2(ncclReduce, max,  FuncMax,  ncclCollReduce, ncclMax);
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/src/freelist_test.cc:
--------------------------------------------------------------------------------
 1 | #include "freelist.h"
 2 | #include "testharness.h"
 3 | 
 4 | namespace sentencepiece {
 5 | namespace model {
 6 | 
 7 | TEST(FreeListTest, BasicTest) {
 8 |   FreeList<int> l(5);
 9 |   EXPECT_EQ(0, l.size());
10 | 
11 |   constexpr size_t kSize = 32;
12 | 
13 |   for (size_t i = 0; i < kSize; ++i) {
14 |     int *n = l.Allocate();
15 |     EXPECT_EQ(0, *n);
16 |     *n = i;
17 |   }
18 | 
19 |   EXPECT_EQ(kSize, l.size());
20 |   for (size_t i = 0; i < kSize; ++i) {
21 |     EXPECT_EQ(i, *l[i]);
22 |   }
23 | 
24 |   l.Free();
25 |   EXPECT_EQ(0, l.size());
26 | 
27 |   // Zero-initialized after `Free`.
28 |   for (size_t i = 0; i < kSize; ++i) {
29 |     int *n = l.Allocate();
30 |     EXPECT_EQ(0, *n);
31 |   }
32 | }
33 | }  // namespace model
34 | }  // namespace sentencepiece
35 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/noncopyable.h:
--------------------------------------------------------------------------------
 1 | #ifndef NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include "yaml-cpp/dll.h"
11 | 
12 | namespace YAML {
13 | // this is basically boost::noncopyable
14 | class YAML_CPP_API noncopyable {
15 |  protected:
16 |   noncopyable() {}
17 |   ~noncopyable() {}
18 | 
19 |  private:
20 |   noncopyable(const noncopyable&);
21 |   const noncopyable& operator=(const noncopyable&);
22 | };
23 | }
24 | 
25 | #endif  // NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
26 | 


--------------------------------------------------------------------------------
/src/tests/dropout_test.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <vector>
 4 | 
 5 | #include "marian.h"
 6 | 
 7 | using namespace marian;
 8 | 
 9 | int main(int argc, char** argv) {
10 |   auto c = New<Config>(argc, argv);
11 | 
12 |   auto type = c->get<size_t>("cpu-threads") > 0
13 |     ? DeviceType::cpu
14 |     : DeviceType::gpu;
15 |   DeviceId deviceId{0, type};
16 | 
17 |   auto g = New<ExpressionGraph>();
18 |   g->setDevice(deviceId);
19 |   g->reserveWorkspaceMB(512);
20 | 
21 |   for(int i = 0; i < 10; ++i) {
22 |     g->clear();
23 |     auto mask1 = g->dropout(0.2, {10, 3072});
24 |     auto mask2 = g->dropout(0.3, {1, 3072});
25 |     auto mask = mask1 + mask2;
26 |     debug(mask1, "mask1");
27 |     debug(mask2, "mask2");
28 |     debug(mask, "mask");
29 |     g->forward();
30 |   }
31 | 
32 |   return 0;
33 | }
34 | 


--------------------------------------------------------------------------------
/CL_tools/pre_sent_score.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | 
 4 | cdf = 'de-rarity-cdf_base.npz'
 5 | rarity_t = open('de-rarity.txt', 'r').readlines()
 6 | rarity = {}
 7 | for l in rarity_t:
 8 |     word, pos = l.split()
 9 |     rarity[word] = float(pos)
10 | 
11 | base = np.load(cdf)['base'][:-1]
12 | cdf = np.load(cdf)['cdf']
13 | 
14 | def get_cdf_by_sent(sent):
15 |     words = sent.split()
16 |     score = 0.
17 |     for word in words:
18 |         if word in rarity:
19 |             score += np.log(rarity[word])
20 |         else:
21 |             print(word)
22 |     score = -score
23 |     # print("s:", score)
24 |     for idx, b in enumerate(base):
25 |         if score <= b:
26 |             return cdf[idx]
27 |     return 1.
28 | 
29 | 
30 | for ll in sys.stdin:
31 |     ll = ll.strip()
32 |     # print(ll)
33 |     print(get_cdf_by_sent(ll))
34 | 


--------------------------------------------------------------------------------
/examples/wmt2017-uedin/scripts/preprocess-data-mono.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -v
 2 | 
 3 | # suffix of target language files
 4 | SRC=en
 5 | TRG=de
 6 | 
 7 | # path to moses decoder: https://github.com/moses-smt/mosesdecoder
 8 | mosesdecoder=../tools/moses-scripts
 9 | 
10 | # path to subword segmentation scripts: https://github.com/rsennrich/subword-nmt
11 | subword_nmt=../tools/subword-nmt
12 | 
13 | # tokenize
14 | 
15 | prefix=news.2016
16 | 
17 | cat data/$prefix.$TRG \
18 |     | $mosesdecoder/scripts/tokenizer/normalize-punctuation.perl -l $TRG \
19 |     | $mosesdecoder/scripts/tokenizer/tokenizer.perl -a -l $TRG > data/$prefix.tok.$TRG
20 | 
21 | $mosesdecoder/scripts/recaser/truecase.perl -model model/tc.$TRG < data/$prefix.tok.$TRG > data/$prefix.tc.$TRG
22 | 
23 | $subword_nmt/apply_bpe.py -c model/$SRC$TRG.bpe < data/$prefix.tc.$TRG > data/$prefix.bpe.$TRG
24 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/src/collectives/device/all_reduce.cu:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |  * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * See LICENSE.txt for license information
 5 |  ************************************************************************/
 6 | 
 7 | #include "common.h"
 8 | #include "all_reduce.h"
 9 | #include "collectives.h"
10 | 
11 | #define UNROLL 4
12 | 
13 | #if NCCL_OP == 0
14 | IMPL_COLL2(ncclAllReduce, sum,  FuncSum,  ncclCollAllReduce, ncclSum);
15 | #elif NCCL_OP == 1
16 | IMPL_COLL2(ncclAllReduce, prod, FuncProd, ncclCollAllReduce, ncclProd);
17 | #elif NCCL_OP == 2
18 | IMPL_COLL2(ncclAllReduce, min,  FuncMin,  ncclCollAllReduce, ncclMin);
19 | #elif NCCL_OP == 3
20 | IMPL_COLL2(ncclAllReduce, max,  FuncMax,  ncclCollAllReduce, ncclMax);
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/directives.h:
--------------------------------------------------------------------------------
 1 | #ifndef DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include <string>
11 | #include <map>
12 | 
13 | namespace YAML {
14 | struct Version {
15 |   bool isDefault;
16 |   int major, minor;
17 | };
18 | 
19 | struct Directives {
20 |   Directives();
21 | 
22 |   const std::string TranslateTagHandle(const std::string& handle) const;
23 | 
24 |   Version version;
25 |   std::map<std::string, std::string> tags;
26 | };
27 | }
28 | 
29 | #endif  // DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66
30 | 


--------------------------------------------------------------------------------
/examples/wmt2017-transformer/scripts/preprocess-data-mono.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -v
 2 | 
 3 | # suffix of target language files
 4 | SRC=en
 5 | TRG=de
 6 | 
 7 | # path to moses decoder: https://github.com/moses-smt/mosesdecoder
 8 | mosesdecoder=../tools/moses-scripts
 9 | 
10 | # path to subword segmentation scripts: https://github.com/rsennrich/subword-nmt
11 | subword_nmt=../tools/subword-nmt
12 | 
13 | # tokenize
14 | 
15 | prefix=news.2016
16 | 
17 | cat data/$prefix.$TRG \
18 |     | $mosesdecoder/scripts/tokenizer/normalize-punctuation.perl -l $TRG \
19 |     | $mosesdecoder/scripts/tokenizer/tokenizer.perl -a -l $TRG > data/$prefix.tok.$TRG
20 | 
21 | $mosesdecoder/scripts/recaser/truecase.perl -model model/tc.$TRG < data/$prefix.tok.$TRG > data/$prefix.tc.$TRG
22 | 
23 | $subword_nmt/apply_bpe.py -c model/$SRC$TRG.bpe < data/$prefix.tc.$TRG > data/$prefix.bpe.$TRG
24 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/details/null_mutex.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include <atomic>
 9 | // null, no cost dummy "mutex" and dummy "atomic" int
10 | 
11 | namespace spdlog
12 | {
13 | namespace details
14 | {
15 | struct null_mutex
16 | {
17 |     void lock() {}
18 |     void unlock() {}
19 |     bool try_lock()
20 |     {
21 |         return true;
22 |     }
23 | };
24 | 
25 | struct null_atomic_int
26 | {
27 |     int value;
28 |     null_atomic_int() = default;
29 | 
30 |     null_atomic_int(int val):value(val)
31 |     {}
32 | 
33 |     int load(std::memory_order) const
34 |     {
35 |         return value;
36 |     }
37 | 
38 |     void store(int val)
39 |     {
40 |         value = val;
41 |     }
42 | };
43 | 
44 | }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/include/spdlog/details/null_mutex.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include <atomic>
 9 | // null, no cost dummy "mutex" and dummy "atomic" int
10 | 
11 | namespace spdlog
12 | {
13 | namespace details
14 | {
15 | struct null_mutex
16 | {
17 |     void lock() {}
18 |     void unlock() {}
19 |     bool try_lock()
20 |     {
21 |         return true;
22 |     }
23 | };
24 | 
25 | struct null_atomic_int
26 | {
27 |     int value;
28 |     null_atomic_int() = default;
29 | 
30 |     null_atomic_int(int val):value(val)
31 |     {}
32 | 
33 |     int load(std::memory_order) const
34 |     {
35 |         return value;
36 |     }
37 | 
38 |     void store(int val)
39 |     {
40 |         value = val;
41 |     }
42 | };
43 | 
44 | }
45 | }
46 | 


--------------------------------------------------------------------------------
/examples/training-basics/scripts/download-files.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -v
 2 | 
 3 | cd data
 4 | 
 5 | # get En-Ro training data for WMT16
 6 | wget -nc http://www.statmt.org/europarl/v7/ro-en.tgz
 7 | wget -nc http://opus.lingfil.uu.se/download.php?f=SETIMES2/en-ro.txt.zip -O SETIMES2.ro-en.txt.zip
 8 | wget -nc http://data.statmt.org/rsennrich/wmt16_backtranslations/ro-en/corpus.bt.ro-en.en.gz
 9 | wget -nc http://data.statmt.org/rsennrich/wmt16_backtranslations/ro-en/corpus.bt.ro-en.ro.gz
10 | 
11 | # extract data
12 | tar -xf ro-en.tgz
13 | unzip SETIMES2.ro-en.txt.zip
14 | gzip -d corpus.bt.ro-en.en.gz corpus.bt.ro-en.ro.gz
15 | 
16 | # create corpus files
17 | cat europarl-v7.ro-en.en SETIMES2.en-ro.en corpus.bt.ro-en.en > corpus.en
18 | cat europarl-v7.ro-en.ro SETIMES2.en-ro.ro corpus.bt.ro-en.ro > corpus.ro
19 | 
20 | # clean
21 | rm ro-en.tgz SETIMES2.* corpus.bt.* europarl-*
22 | 
23 | cd ..
24 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/tag.h:
--------------------------------------------------------------------------------
 1 | #ifndef TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include <string>
11 | 
12 | namespace YAML {
13 | struct Directives;
14 | struct Token;
15 | 
16 | struct Tag {
17 |   enum TYPE {
18 |     VERBATIM,
19 |     PRIMARY_HANDLE,
20 |     SECONDARY_HANDLE,
21 |     NAMED_HANDLE,
22 |     NON_SPECIFIC
23 |   };
24 | 
25 |   Tag(const Token& token);
26 |   const std::string Translate(const Directives& directives);
27 | 
28 |   TYPE type;
29 |   std::string handle, value;
30 | };
31 | }
32 | 
33 | #endif  // TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
34 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/src/collectives/device/reduce_scatter.cu:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |  * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * See LICENSE.txt for license information
 5 |  ************************************************************************/
 6 | 
 7 | #include "common.h"
 8 | #include "reduce_scatter.h"
 9 | #include "collectives.h"
10 | 
11 | #define UNROLL 4
12 | 
13 | #if NCCL_OP == 0
14 | IMPL_COLL2(ncclReduceScatter, sum,  FuncSum,  ncclCollReduceScatter, ncclSum);
15 | #elif NCCL_OP == 1
16 | IMPL_COLL2(ncclReduceScatter, prod, FuncProd, ncclCollReduceScatter, ncclProd);
17 | #elif NCCL_OP == 2
18 | IMPL_COLL2(ncclReduceScatter, min,  FuncMin,  ncclCollReduceScatter, ncclMin);
19 | #elif NCCL_OP == 3
20 | IMPL_COLL2(ncclReduceScatter, max,  FuncMax,  ncclCollReduceScatter, ncclMax);
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/example/Makefile.mingw:
--------------------------------------------------------------------------------
 1 | CXX	?= g++
 2 | CXXFLAGS	=  -D_WIN32_WINNT=0x600 -march=native -Wall -Wextra -Wshadow -pedantic -std=c++11 -pthread -Wl,--no-as-needed  -I../include 
 3 | CXX_RELEASE_FLAGS = -O3 
 4 | CXX_DEBUG_FLAGS= -g 
 5 | 
 6 | 
 7 | all:	example bench
 8 | debug: example-debug bench-debug
 9 | 
10 | example: example.cpp
11 | 	$(CXX) example.cpp -o example $(CXXFLAGS) $(CXX_RELEASE_FLAGS)
12 | 
13 | bench: bench.cpp
14 | 	$(CXX) bench.cpp -o bench $(CXXFLAGS) $(CXX_RELEASE_FLAGS)
15 | 	
16 | 
17 | example-debug: example.cpp
18 | 	$(CXX) example.cpp -o example-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS)
19 | 	
20 | bench-debug: bench.cpp
21 | 	$(CXX) bench.cpp -o bench-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS)	
22 | 
23 | 
24 | 
25 | clean:
26 | 	rm -f *.o logs/*.txt example example-debug bench bench-debug 
27 | 
28 | 
29 | rebuild: clean all
30 | rebuild-debug: clean debug
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/src/data/types.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common/definitions.h"
 4 | 
 5 | #include <cstdint>
 6 | #include <cstdlib>
 7 | #include <string>
 8 | #include <unordered_map>
 9 | #include <vector>
10 | 
11 | namespace marian {
12 | 
13 | // Type for all vocabulary items, based on IndexType
14 | typedef IndexType Word;
15 | 
16 | // Sequence of vocabulary items
17 | typedef std::vector<Word> Words;
18 | 
19 | // EOS and UNK are placed in these positions in Marian-generated vocabs
20 | const Word DEFAULT_EOS_ID = 0;
21 | const Word DEFAULT_UNK_ID = 1;
22 | 
23 | // names of EOS and UNK symbols
24 | const std::string DEFAULT_EOS_STR = "</s>";
25 | const std::string DEFAULT_UNK_STR = "<unk>";
26 | 
27 | // alternatively accepted names in Yaml dictionaries for ids 0 and 1, resp.
28 | const std::string NEMATUS_EOS_STR = "eos";
29 | const std::string NEMATUS_UNK_STR = "UNK";
30 | 
31 | }  // namespace marian
32 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/example/Makefile.clang:
--------------------------------------------------------------------------------
 1 | CXX	?= clang++
 2 | CXXFLAGS	= -march=native -Wall -Wextra -Wshadow -pedantic -std=c++11 -pthread -I../include
 3 | CXX_RELEASE_FLAGS = -O2
 4 | CXX_DEBUG_FLAGS= -g 
 5 | 
 6 | 
 7 | all:	example bench
 8 | debug: example-debug bench-debug
 9 | 
10 | example: example.cpp
11 | 	$(CXX) example.cpp -o example-clang $(CXXFLAGS) $(CXX_RELEASE_FLAGS)
12 | 
13 | bench: bench.cpp
14 | 	$(CXX) bench.cpp -o bench-clang $(CXXFLAGS) $(CXX_RELEASE_FLAGS)
15 | 	
16 | 
17 | example-debug: example.cpp
18 | 	$(CXX) example.cpp -o example-clang-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS)
19 | 	
20 | bench-debug: bench.cpp
21 | 	$(CXX) bench.cpp -o bench-clang-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS)	
22 | 
23 | 
24 | 
25 | clean:
26 | 	rm -f *.o logs/*.txt example-clang example-clang-debug bench-clang bench-clang-debug 
27 | 
28 | 
29 | rebuild: clean all
30 | rebuild-debug: clean debug
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/yaml.h:
--------------------------------------------------------------------------------
 1 | #ifndef YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include "yaml-cpp/parser.h"
11 | #include "yaml-cpp/emitter.h"
12 | #include "yaml-cpp/emitterstyle.h"
13 | #include "yaml-cpp/stlemitter.h"
14 | #include "yaml-cpp/exceptions.h"
15 | 
16 | #include "yaml-cpp/node/node.h"
17 | #include "yaml-cpp/node/impl.h"
18 | #include "yaml-cpp/node/convert.h"
19 | #include "yaml-cpp/node/iterator.h"
20 | #include "yaml-cpp/node/detail/impl.h"
21 | #include "yaml-cpp/node/parse.h"
22 | #include "yaml-cpp/node/emit.h"
23 | 
24 | #endif  // YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
25 | 


--------------------------------------------------------------------------------
/src/training/gradient_dropping/gpu/sparse_algorithm.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common/definitions.h"
 4 | #include "tensors/backend.h"
 5 | #include "tensors/tensor.h"
 6 | 
 7 | namespace marian {
 8 | namespace gpu {
 9 | /**
10 |  * @brief Output[i] is lower_bound of values[i] in data.
11 |  *
12 |  * @return A vector of size values.size
13 |  */
14 | std::vector<int> lower_bounds(int* data,
15 |                               std::vector<int> values,
16 |                               int size,
17 |                               DeviceId device);
18 | 
19 | int buildSparse(Tensor t, float* data, int* indices);
20 | 
21 | void scatterAdd(Tensor t, float* data, int* indices, int size, int offset);
22 | 
23 | void scatterUpdate(Tensor t, float* data, int* indices, int size, int offset);
24 | 
25 | void gather(Tensor t, float* data, int* indices, int size, int offset);
26 | }  // namespace gpu
27 | }  // namespace marian
28 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/.gitignore:
--------------------------------------------------------------------------------
 1 | Makefile
 2 | Makefile.in
 3 | /ar-lib
 4 | /mdate-sh
 5 | /py-compile
 6 | /test-driver
 7 | /ylwrap
 8 | /build
 9 | 
10 | /autom4te.cache
11 | /autoscan.log
12 | /autoscan-*.log
13 | /aclocal.m4
14 | /compile
15 | /config.guess
16 | /config.sub
17 | /configure
18 | /configure.scan
19 | /depcomp
20 | /install-sh
21 | /missing
22 | /stamp-h1
23 | /libtool
24 | /config.h
25 | /config.status
26 | /autogen.sh
27 | /ltmain.sh
28 | 
29 | CMakeFiles
30 | CMakeCache.txt
31 | config.h
32 | sentencepiece.pc
33 | 
34 | *.o
35 | *.lo
36 | *.a
37 | *.la
38 | *.pyc
39 | 
40 | .libs
41 | .deps
42 | 
43 | *.m4
44 | *.log
45 | *.trs
46 | 
47 | compile_charsmap
48 | 
49 | spm_decode
50 | spm_encode
51 | spm_export_vocab
52 | spm_train
53 | spm_normalize
54 | spm_test
55 | 
56 | *.pb.cc
57 | *.pb.h
58 | 
59 | .DS_Store
60 | *.egg-info/
61 | dist/
62 | *.swp
63 | *.swo
64 | *.pyc
65 | 
66 | m.model
67 | m.vocab
68 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/null.h:
--------------------------------------------------------------------------------
 1 | #ifndef NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include "yaml-cpp/dll.h"
11 | #include <string>
12 | 
13 | namespace YAML {
14 | class Node;
15 | 
16 | struct YAML_CPP_API _Null {};
17 | inline bool operator==(const _Null&, const _Null&) { return true; }
18 | inline bool operator!=(const _Null&, const _Null&) { return false; }
19 | 
20 | YAML_CPP_API bool IsNull(const Node& node);  // old API only
21 | YAML_CPP_API bool IsNullString(const std::string& str);
22 | 
23 | extern YAML_CPP_API _Null Null;
24 | }
25 | 
26 | #endif  // NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
27 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/mark.h:
--------------------------------------------------------------------------------
 1 | #ifndef MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include "yaml-cpp/dll.h"
11 | 
12 | namespace YAML {
13 | struct YAML_CPP_API Mark {
14 |   Mark() : pos(0), line(0), column(0) {}
15 | 
16 |   static const Mark null_mark() { return Mark(-1, -1, -1); }
17 | 
18 |   bool is_null() const { return pos == -1 && line == -1 && column == -1; }
19 | 
20 |   int pos;
21 |   int line, column;
22 | 
23 |  private:
24 |   Mark(int pos_, int line_, int column_)
25 |       : pos(pos_), line(line_), column(column_) {}
26 | };
27 | }
28 | 
29 | #endif  // MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
30 | 


--------------------------------------------------------------------------------
/src/training/exponential_smoothing.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common/definitions.h"
 4 | #include "functional/functional.h"
 5 | #include "tensors/tensor_operators.h"
 6 | 
 7 | namespace marian {
 8 | 
 9 | /**
10 |  * Class implementing exponential smoothing for graph groups.
11 |  * The smoothed parameters themselves are not stored in here.
12 |  */
13 | class ExponentialSmoothing {
14 | public:
15 |   ExponentialSmoothing(float decay = 0.0f)
16 |       : mvAvg_{decay > 0}, mvDecay_{decay} {}
17 | 
18 | protected:
19 |   void updateAvgParams(Tensor paramsAvg, Tensor params, size_t batches) {
20 |     using namespace functional;
21 |     float decay = std::max(mvDecay_,
22 |                            1.f - (float)(batches + 1) / (float)(batches + 10));
23 |     Element(_1 = ((1.f - decay) * _1) + (decay * _2), paramsAvg, params);
24 |   }
25 | 
26 |   bool mvAvg_{false};
27 |   float mvDecay_{1e-4f};
28 | };
29 | }  // namespace marian
30 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/node/detail/iterator_fwd.h:
--------------------------------------------------------------------------------
 1 | #ifndef VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include "yaml-cpp/dll.h"
11 | #include <list>
12 | #include <utility>
13 | #include <vector>
14 | 
15 | namespace YAML {
16 | 
17 | namespace detail {
18 | struct iterator_value;
19 | template <typename V>
20 | class iterator_base;
21 | }
22 | 
23 | typedef detail::iterator_base<detail::iterator_value> iterator;
24 | typedef detail::iterator_base<const detail::iterator_value> const_iterator;
25 | }
26 | 
27 | #endif  // VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
28 | 


--------------------------------------------------------------------------------
/src/functional/array.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "functional/defs.h"
 4 | 
 5 | namespace marian {
 6 | 
 7 | namespace functional {
 8 | 
 9 | template <typename T, size_t N>
10 | struct Array {
11 |   typedef T value_type;
12 | 
13 |   T data_[N];
14 | 
15 |   __HDI__ const T* data() const { return data_; }
16 | 
17 |   __HDI__ T* data() { return data_; }
18 | 
19 |   __HDI__ constexpr static size_t size() { return N; }
20 | 
21 |   __HDI__ T& operator[](size_t i) { return data_[i]; }
22 |   __HDI__ const T& operator[](size_t i) const { return data_[i]; }
23 | 
24 |   __HDI__ T* begin() { return data_; }
25 |   __HDI__ const T* begin() const { return data_; }
26 | 
27 |   __HDI__ T* end() { return data_ + N; }
28 |   __HDI__ const T* end() const { return data_ + N; }
29 | 
30 |   __HDI__ void fill(T val) {
31 |     for(int i = 0; i < N; ++i)
32 |       data_[i] = val;
33 |   }
34 | };
35 | }  // namespace functional
36 | }  // namespace marian
37 | 


--------------------------------------------------------------------------------
/src/layers/weight.cpp:
--------------------------------------------------------------------------------
 1 | #include "layers/weight.h"
 2 | 
 3 | namespace marian {
 4 | 
 5 | Ptr<WeightingBase> WeightingFactory(Ptr<Options> options) {
 6 |   ABORT_IF(!options->has("data-weighting"),
 7 |            "No data-weighting specified in options");
 8 |   return New<DataWeighting>(options->get<std::string>("data-weighting-type"));
 9 | }
10 | 
11 | Expr DataWeighting::getWeights(Ptr<ExpressionGraph> graph,
12 |                                Ptr<data::CorpusBatch> batch) {
13 |   ABORT_IF(batch->getDataWeights().empty(),
14 |            "Vector of weights is unexpectedly empty!");
15 |   bool sentenceWeighting = weightingType_ == "sentence";
16 |   int dimBatch = (int)batch->size();
17 |   int dimWords = sentenceWeighting ? 1 : (int)batch->back()->batchWidth();
18 |   auto weights = graph->constant({1, dimWords, dimBatch, 1},
19 |                                  inits::from_vector(batch->getDataWeights()));
20 |   return weights;
21 | }
22 | }  // namespace marian
23 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/node/ptr.h:
--------------------------------------------------------------------------------
 1 | #ifndef VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include "yaml-cpp/dll.h"
11 | #include <memory>
12 | 
13 | namespace YAML {
14 | namespace detail {
15 | class node;
16 | class node_ref;
17 | class node_data;
18 | class memory;
19 | class memory_holder;
20 | 
21 | typedef std::shared_ptr<node> shared_node;
22 | typedef std::shared_ptr<node_ref> shared_node_ref;
23 | typedef std::shared_ptr<node_data> shared_node_data;
24 | typedef std::shared_ptr<memory_holder> shared_memory_holder;
25 | typedef std::shared_ptr<memory> shared_memory;
26 | }
27 | }
28 | 
29 | #endif  // VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
30 | 


--------------------------------------------------------------------------------
/src/command/marian_vocab.cpp:
--------------------------------------------------------------------------------
 1 | #include "marian.h"
 2 | 
 3 | #include "common/cli_wrapper.h"
 4 | #include "common/logging.h"
 5 | #include "data/vocab.h"
 6 | 
 7 | int main(int argc, char** argv) {
 8 |   using namespace marian;
 9 | 
10 |   createLoggers();
11 | 
12 |   auto options = New<Options>();
13 |   {
14 |     auto cli = New<cli::CLIWrapper>(
15 |         options,
16 |         "Create a vocabulary from text corpora given on STDIN",
17 |         "Allowed options",
18 |         "Examples:\n"
19 |         "  ./marian-vocab < text.src > vocab.yml\n"
20 |         "  cat text.src text.trg | ./marian-vocab > vocab.yml");
21 |     cli->add<size_t>("--max-size,-m", "Generate only UINT most common vocabulary items", 0);
22 |     cli->parse(argc, argv);
23 |   }
24 | 
25 |   LOG(info, "Creating vocabulary...");
26 | 
27 |   auto vocab = New<Vocab>(options, 0);
28 |   vocab->create("stdout", "stdin", options->get<size_t>("max-size"));
29 | 
30 |   LOG(info, "Finished");
31 | 
32 |   return 0;
33 | }
34 | 


--------------------------------------------------------------------------------
/src/translator/output_printer.cpp:
--------------------------------------------------------------------------------
 1 | #include "output_printer.h"
 2 | 
 3 | namespace marian {
 4 | 
 5 | std::string OutputPrinter::getAlignment(const Ptr<Hypothesis>& hyp) {
 6 |   data::SoftAlignment align;
 7 |   auto last = hyp;
 8 |   // get soft alignments for each target word starting from the last one
 9 |   while(last->GetPrevHyp().get() != nullptr) {
10 |     align.push_back(last->GetAlignment());
11 |     last = last->GetPrevHyp();
12 |   }
13 | 
14 |   // reverse alignments
15 |   std::reverse(align.begin(), align.end());
16 | 
17 |   if(alignment_ == "soft") {
18 |     return data::SoftAlignToString(align);
19 |   } else if(alignment_ == "hard") {
20 |     return data::ConvertSoftAlignToHardAlign(align, 1.f).toString();
21 |   } else if(alignmentThreshold_ > 0.f) {
22 |     return data::ConvertSoftAlignToHardAlign(align, alignmentThreshold_)
23 |         .toString();
24 |   } else {
25 |     ABORT("Unrecognized word alignment type");
26 |   }
27 | }
28 | 
29 | }  // namespace marian
30 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/srctxz/create_srctxz.sh.in:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # See LICENSE.txt for license information
 6 | #
 7 | 
 8 | # To run from $BUILDDIR/
 9 | 
10 | cd ..
11 | NCCLDIR=`basename $PWD`
12 | 
13 | echo "Checking for unclean directory ..."
14 | git clean -x -i
15 | echo "Clean done."
16 | echo "Checking for uncommited files ..."
17 | if [ "`git status -s | wc -l`" != "0" ]; then
18 |   git status -s
19 |   echo "Some changes are not committed yet. Continue ? (Ctrl-C to abort)"
20 |   read
21 | fi
22 | 
23 | cd ..
24 | NCCL_MAJOR=${nccl:Major}
25 | NCCL_MINOR=${nccl:Minor}
26 | NCCL_PATCH=${nccl:Patch}
27 | NCCL_SUFFIX=${nccl:Suffix}
28 | 
29 | NCCLNAME="nccl-src_${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_PATCH}${NCCL_SUFFIX}"
30 | 
31 | tar --exclude build \
32 |     --exclude ".git*" \
33 |     --exclude pkg/srctxz \
34 |     --transform "s/^$NCCLDIR/$NCCLNAME/" -Jcf $NCCLNAME.txz --owner=0 --group=0 $NCCLDIR
35 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/src/include/group.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |  * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * See LICENSE.txt for license information
 5 |  ************************************************************************/
 6 | 
 7 | #ifndef NCCL_GROUP_H_
 8 | #define NCCL_GROUP_H_
 9 | 
10 | #include "nccl.h"
11 | #include "core.h"
12 | 
13 | bool ncclAsyncMode();
14 | ncclResult_t ncclAsyncErrCheck(ncclResult_t ret);
15 | 
16 | typedef ncclResult_t(*ncclInitFunc_t)(ncclComm_t* newcomm, int ndev, ncclUniqueId commId, int myrank);
17 | 
18 | ncclResult_t ncclAsyncInit(ncclInitFunc_t func, int cudaDev, ncclComm_t* newcomm, int ndev, ncclUniqueId commId, int myrank);
19 | 
20 | typedef ncclResult_t(*ncclCollFunc_t)(const void* sendbuff, void* recvbuff, size_t count,
21 |     ncclDataType_t type, ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream);
22 | 
23 | ncclResult_t ncclAsyncColl(ncclComm_t comm);
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/models/model_base.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <string>
 4 | #include "marian.h"
 5 | 
 6 | namespace marian {
 7 | namespace models {
 8 | 
 9 | enum struct usage { raw, training, scoring, translation };
10 | }
11 | }  // namespace marian
12 | 
13 | YAML_REGISTER_TYPE(marian::models::usage, int)
14 | 
15 | namespace marian {
16 | namespace models {
17 | 
18 | class ModelBase {
19 | public:
20 |   virtual void load(Ptr<ExpressionGraph>,
21 |                     const std::string&,
22 |                     bool markReloaded = true)
23 |       = 0;
24 |   virtual void save(Ptr<ExpressionGraph>,
25 |                     const std::string&,
26 |                     bool saveTranslatorConfig = false)
27 |       = 0;
28 | 
29 |   virtual Expr build(Ptr<ExpressionGraph> graph,
30 |                      Ptr<data::Batch> batch,
31 |                      bool clearGraph = true)
32 |       = 0;
33 | 
34 |   virtual void clear(Ptr<ExpressionGraph> graph) = 0;
35 | };
36 | 
37 | }  // namespace models
38 | }  // namespace marian
39 | 


--------------------------------------------------------------------------------
/src/data/batch.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <vector>
 4 | 
 5 | #include "common/definitions.h"
 6 | 
 7 | namespace marian {
 8 | namespace data {
 9 | 
10 | class Batch {
11 | public:
12 |   virtual size_t size() const = 0;
13 |   virtual size_t words(int /*which*/ = 0) const { return 0; };
14 |   virtual size_t width() const { return 0; };
15 | 
16 |   virtual size_t sizeTrg() const { return 0; };
17 |   virtual size_t wordsTrg() const { return 0; };
18 |   virtual size_t widthTrg() const { return 0; };
19 | 
20 |   virtual void debug(){};
21 | 
22 |   virtual std::vector<Ptr<Batch>> split(size_t n) = 0;
23 | 
24 |   const std::vector<size_t>& getSentenceIds() const { return sentenceIds_; }
25 |   void setSentenceIds(const std::vector<size_t>& ids) { sentenceIds_ = ids; }
26 | 
27 |   virtual void setGuidedAlignment(std::vector<float>&&) = 0;
28 |   virtual void setDataWeights(const std::vector<float>&) = 0;
29 | 
30 | protected:
31 |   std::vector<size_t> sentenceIds_;
32 | };
33 | }  // namespace data
34 | }  // namespace marian
35 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/.gitignore:
--------------------------------------------------------------------------------
 1 | # Auto generated files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | *.obj
 6 | *.suo
 7 | *.tlog
 8 | *.ilk
 9 | *.log
10 | *.pdb
11 | *.idb
12 | *.iobj
13 | *.ipdb
14 | *.opensdf
15 | *.sdf
16 | 
17 | # Compiled Dynamic libraries
18 | *.so
19 | *.dylib
20 | *.dll
21 | 
22 | # Compiled Static libraries
23 | *.lai
24 | *.la
25 | *.a
26 | *.lib
27 | 
28 | # Executables
29 | *.exe
30 | *.out
31 | *.app
32 | 
33 | # Codelite
34 | .codelite
35 | 
36 | # .orig files
37 | *.orig
38 | 
39 | # example  files
40 | example/*
41 | !example/example.cpp
42 | !example/bench.cpp
43 | !example/utils.h
44 | !example/Makefile*
45 | !example/example.sln
46 | !example/example.vcxproj
47 | !example/CMakeLists.txt
48 | !example/multisink.cpp
49 | !example/jni
50 | 
51 | # generated files
52 | generated
53 | 
54 | # Cmake
55 | CMakeCache.txt
56 | CMakeFiles
57 | CMakeScripts
58 | Makefile
59 | cmake_install.cmake
60 | install_manifest.txt
61 | /tests/tests.VC.VC.opendb
62 | /tests/tests.VC.db
63 | /tests/tests
64 | /tests/logs/file_helper_test.txt
65 | 


--------------------------------------------------------------------------------
/src/tensors/memory_piece.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <iostream>
 4 | 
 5 | namespace marian {
 6 | 
 7 | class MemoryPiece {
 8 | private:
 9 |   uint8_t* data_;
10 |   size_t size_;
11 | 
12 | public:
13 |   MemoryPiece(uint8_t* data, size_t size) : data_(data), size_(size) {}
14 | 
15 |   uint8_t* data() const { return data_; }
16 |   uint8_t* data() { return data_; }
17 | 
18 |   template <typename T>
19 |   T* data() const {
20 |     return (T*)data_;
21 |   }
22 | 
23 |   template <typename T>
24 |   T* data() {
25 |     return (T*)data_;
26 |   }
27 | 
28 |   size_t size() const { return size_; }
29 | 
30 |   void set(uint8_t* data, size_t size) {
31 |     data_ = data;
32 |     size_ = size;
33 |   }
34 | 
35 |   void setPtr(uint8_t* data) { data_ = data; }
36 | 
37 |   friend std::ostream& operator<<(std::ostream& out, const MemoryPiece mp) {
38 |     out << "MemoryPiece - ptr: " << std::hex << (size_t)mp.data() << std::dec
39 |         << " size: " << mp.size();
40 |     return out;
41 |   }
42 | };
43 | }  // namespace marian
44 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/bench/latency/g3log-crush.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include <g3log/g3log.hpp>
 4 | #include <g3log/logworker.hpp>
 5 | 
 6 | void CrusherLoop()
 7 | {
 8 |     size_t counter = 0;
 9 |     while (true)
10 |     {
11 |         LOGF(INFO, "Some text to crush you machine. thread:");
12 |         if(++counter % 1000000 == 0)
13 |         {
14 |             std::cout << "Wrote " << counter << " entries" << std::endl;
15 |         }
16 |     }
17 | }
18 | 
19 | 
20 | int main(int argc, char** argv)
21 | {
22 |     std::cout << "WARNING: This test will exaust all your machine memory and will crush it!" << std::endl;
23 |     std::cout << "Are you sure you want to continue ? " << std::endl;
24 |     char c;
25 |     std::cin >> c;
26 |     if (toupper( c ) != 'Y')
27 |         return 0;
28 | 
29 |     auto worker = g3::LogWorker::createLogWorker();
30 |     auto handle= worker->addDefaultLogger(argv[0], "g3log.txt");
31 |     g3::initializeLogging(worker.get());
32 |     CrusherLoop();
33 | 
34 |     return 0;
35 | }
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/sinks/msvc_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016 Alexander Dalshov.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #if defined(_MSC_VER)
 9 | 
10 | #include <spdlog/sinks/base_sink.h>
11 | #include <spdlog/details/null_mutex.h>
12 | 
13 | #include <WinBase.h>
14 | 
15 | #include <mutex>
16 | #include <string>
17 | 
18 | namespace spdlog
19 | {
20 | namespace sinks
21 | {
22 | /*
23 | * MSVC sink (logging using OutputDebugStringA)
24 | */
25 | template<class Mutex>
26 | class msvc_sink : public base_sink < Mutex >
27 | {
28 | public:
29 |     explicit msvc_sink()
30 |     {
31 |     }
32 | 
33 |     void flush() override
34 |     {
35 |     }
36 | 
37 | protected:
38 |     void _sink_it(const details::log_msg& msg) override
39 |     {
40 |         OutputDebugStringA(msg.formatted.c_str());
41 |     }
42 | };
43 | 
44 | typedef msvc_sink<std::mutex> msvc_sink_mt;
45 | typedef msvc_sink<details::null_mutex> msvc_sink_st;
46 | 
47 | }
48 | }
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/src/rnn/attention_constructors.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "marian.h"
 4 | 
 5 | #include "layers/factory.h"
 6 | #include "rnn/attention.h"
 7 | #include "rnn/constructors.h"
 8 | #include "rnn/types.h"
 9 | 
10 | namespace marian {
11 | namespace rnn {
12 | 
13 | class AttentionFactory : public InputFactory {
14 | protected:
15 |   Ptr<EncoderState> state_;
16 | 
17 | public:
18 |   AttentionFactory(Ptr<ExpressionGraph> graph) : InputFactory(graph) {}
19 | 
20 |   Ptr<CellInput> construct() override {
21 |     ABORT_IF(!state_, "EncoderState not set");
22 |     return New<Attention>(graph_, options_, state_);
23 |   }
24 | 
25 |   Accumulator<AttentionFactory> set_state(Ptr<EncoderState> state) {
26 |     state_ = state;
27 |     return Accumulator<AttentionFactory>(*this);
28 |   }
29 | 
30 |   int dimAttended() {
31 |     ABORT_IF(!state_, "EncoderState not set");
32 |     return state_->getAttended()->shape()[1];
33 |   }
34 | };
35 | 
36 | typedef Accumulator<AttentionFactory> attention;
37 | }  // namespace rnn
38 | }  // namespace marian
39 | 


--------------------------------------------------------------------------------
/src/tensors/cpu/sharp/int_gemm.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "tensors/tensor.h"
 4 | 
 5 | namespace marian {
 6 | namespace cpu {
 7 | namespace int16 {
 8 | 
 9 | const int BITS = 10;
10 | 
11 | void Quantize16(marian::Tensor out,
12 |                 const marian::Tensor in,
13 |                 float /*clipValue*/);
14 | 
15 | void Quantize8(marian::Tensor out,
16 |                const marian::Tensor in,
17 |                float clipValue);
18 | 
19 | // This operates on floats after processing so doesn't care about int8_t vs
20 | // int16_t.
21 | void AddBias(marian::Tensor C, const marian::Tensor Bias);
22 | 
23 | void ProdInt16(marian::Tensor C,
24 |                const marian::Tensor A,
25 |                const marian::Tensor B,
26 |                float scale);
27 | 
28 | void ProdInt8(marian::Tensor C,
29 |               const marian::Tensor A,
30 |               const marian::Tensor B,
31 |               float scale,
32 |               float clipValue);
33 | 
34 | }  // namespace int16
35 | }  // namespace cpu
36 | }  // namespace marian
37 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/include/spdlog/sinks/msvc_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016 Alexander Dalshov.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #if defined(_MSC_VER)
 9 | 
10 | #include "spdlog/sinks/base_sink.h"
11 | #include "spdlog/details/null_mutex.h"
12 | 
13 | #include <WinBase.h>
14 | 
15 | #include <mutex>
16 | #include <string>
17 | 
18 | namespace spdlog
19 | {
20 | namespace sinks
21 | {
22 | /*
23 | * MSVC sink (logging using OutputDebugStringA)
24 | */
25 | template<class Mutex>
26 | class msvc_sink : public base_sink < Mutex >
27 | {
28 | public:
29 |     explicit msvc_sink()
30 |     {
31 |     }
32 | 
33 | 
34 | 
35 | protected:
36 |     void _sink_it(const details::log_msg& msg) override
37 |     {
38 |         OutputDebugStringA(msg.formatted.c_str());
39 |     }
40 | 
41 |     void _flush() override
42 |     {}
43 | };
44 | 
45 | typedef msvc_sink<std::mutex> msvc_sink_mt;
46 | typedef msvc_sink<details::null_mutex> msvc_sink_st;
47 | 
48 | }
49 | }
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/src/common/config_validator.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "3rd_party/yaml-cpp/yaml.h"
 4 | #include "common/config_parser.h"
 5 | 
 6 | namespace marian {
 7 | 
 8 | // TODO: Finally refactorize Config, Options, ConfigParser and ConfigValidator
 9 | // classes.
10 | class ConfigValidator {
11 | private:
12 |   const YAML::Node& config_;
13 | 
14 |   bool has(const std::string& key) const;
15 | 
16 |   template <typename T>
17 |   T get(const std::string& key) const {
18 |     return config_[key].as<T>();
19 |   }
20 | 
21 |   void validateOptionsTranslation() const;
22 |   void validateOptionsParallelData() const;
23 |   void validateOptionsScoring() const;
24 |   void validateOptionsTraining() const;
25 | 
26 |   void validateModelExtension(cli::mode mode) const;
27 |   void validateDevices(cli::mode mode) const;
28 | 
29 | public:
30 |   ConfigValidator(const YAML::Node& config);
31 |   virtual ~ConfigValidator();
32 | 
33 |   // Validate options according to the given mode. Abort on first validation error
34 |   void validateOptions(cli::mode mode) const;
35 | };
36 | 
37 | }  // namespace marian
38 | 


--------------------------------------------------------------------------------
/src/layers/weight.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common/options.h"
 4 | #include "data/corpus.h"
 5 | #include "graph/expression_graph.h"
 6 | #include "graph/expression_operators.h"
 7 | #include "graph/node_initializers.h"
 8 | 
 9 | namespace marian {
10 | 
11 | class WeightingBase {
12 | public:
13 |   WeightingBase(){};
14 |   virtual Expr getWeights(Ptr<ExpressionGraph> graph,
15 |                           Ptr<data::CorpusBatch> batch)
16 |       = 0;
17 |   virtual void debugWeighting(std::vector<float> /*weightedMask*/,
18 |                               std::vector<float> /*freqMask*/,
19 |                               Ptr<data::CorpusBatch> /*batch*/){};
20 | };
21 | 
22 | class DataWeighting : public WeightingBase {
23 | protected:
24 |   std::string weightingType_;
25 | 
26 | public:
27 |   DataWeighting(std::string weightingType)
28 |       : WeightingBase(), weightingType_(weightingType){};
29 |   Expr getWeights(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch) override;
30 | };
31 | 
32 | Ptr<WeightingBase> WeightingFactory(Ptr<Options> options);
33 | }  // namespace marian
34 | 


--------------------------------------------------------------------------------
/src/optimizers/clippers.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <map>
 4 | #include <memory>
 5 | 
 6 | #include "tensors/tensor.h"
 7 | 
 8 | namespace marian {
 9 | 
10 | // @TODO: modify computation graph to group all paramters in single matrix
11 | // object.
12 | // This will allow to perform a single large SGD update per batch. Currently
13 | // there
14 | // are as many updates as different parameters.
15 | 
16 | class ClipperBase {
17 | public:
18 |   virtual void clip(Tensor) = 0;
19 | };
20 | 
21 | typedef std::shared_ptr<ClipperBase> ClipperPtr;
22 | 
23 | class Elementwise : public ClipperBase {
24 | public:
25 |   Elementwise(float c = 10.0) : c_(c) {}
26 | 
27 |   void clip(Tensor t) override;
28 | 
29 | private:
30 |   float c_;
31 | };
32 | 
33 | class Norm : public ClipperBase {
34 | public:
35 |   Norm(float c = 1.0) : c_(c) {}
36 | 
37 |   void clip(Tensor t) override;
38 | 
39 | private:
40 |   float c_;
41 | };
42 | 
43 | template <class Algorithm, typename... Args>
44 | ClipperBasePtr Clipper(Args&&... args) {
45 |   return ClipperBasePtr(new Algorithm(args...));
46 | }
47 | }  // namespace marian
48 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/node/detail/bool_type.h:
--------------------------------------------------------------------------------
 1 | #ifndef NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | namespace YAML {
11 | namespace detail {
12 | struct unspecified_bool {
13 |   struct NOT_ALLOWED;
14 |   static void true_value(NOT_ALLOWED*) {}
15 | };
16 | typedef void (*unspecified_bool_type)(unspecified_bool::NOT_ALLOWED*);
17 | }
18 | }
19 | 
20 | #define YAML_CPP_OPERATOR_BOOL()                                            \
21 |   operator YAML::detail::unspecified_bool_type() const {                    \
22 |     return this->operator!() ? 0                                            \
23 |                              : &YAML::detail::unspecified_bool::true_value; \
24 |   }
25 | 
26 | #endif  // NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
27 | 


--------------------------------------------------------------------------------
/src/tensors/backend.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common/definitions.h"
 4 | #include "tensors/rand.h"
 5 | 
 6 | namespace marian {
 7 | 
 8 | class Backend {
 9 | protected:
10 |   DeviceId deviceId_;
11 |   size_t seed_;
12 |   Ptr<RandomGenerator> randomGenerator_;
13 | 
14 |   // global clipping value for matrix-multiplies, should soon be removed.
15 |   float clipValue_{0.f};
16 | 
17 | public:
18 |   Backend(DeviceId deviceId, size_t seed)
19 |   : deviceId_(deviceId),
20 |     seed_(seed),
21 |     randomGenerator_(createRandomGenerator(seed, deviceId)) {}
22 | 
23 |   virtual DeviceId getDeviceId() { return deviceId_; };
24 |   virtual Ptr<RandomGenerator> getRandomGenerator() { return randomGenerator_; }
25 | 
26 |   // for GPU only, calls cudaSetDevice, does nothing on CPU. Maybe change name.
27 |   virtual void setDevice() = 0;
28 |   virtual void synchronize() = 0;
29 | 
30 |   virtual void setClip(float clipValue) { clipValue_ = clipValue; }
31 |   float getClip() { return clipValue_; }
32 | };
33 | 
34 | Ptr<Backend> BackendByDeviceId(DeviceId deviceId, size_t seed);
35 | 
36 | }  // namespace marian
37 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/node/emit.h:
--------------------------------------------------------------------------------
 1 | #ifndef NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include <string>
11 | #include <iosfwd>
12 | 
13 | #include "yaml-cpp/dll.h"
14 | 
15 | namespace YAML {
16 | class Emitter;
17 | class Node;
18 | 
19 | /**
20 |  * Emits the node to the given {@link Emitter}. If there is an error in writing,
21 |  * {@link Emitter#good} will return false.
22 |  */
23 | YAML_CPP_API Emitter& operator<<(Emitter& out, const Node& node);
24 | 
25 | /** Emits the node to the given output stream. */
26 | YAML_CPP_API std::ostream& operator<<(std::ostream& out, const Node& node);
27 | 
28 | /** Converts the node to a YAML string. */
29 | YAML_CPP_API std::string Dump(const Node& node);
30 | }  // namespace YAML
31 | 
32 | #endif  // NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
33 | 


--------------------------------------------------------------------------------
/src/data/iterator_facade.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // simplistic replacement for boost::iterator_facade
 4 | template <class Iterator, class Item>
 5 | struct IteratorFacade {
 6 |   // to create DummyIterator inherit from public IteratorFacade<DummyIterator, ValueType>
 7 |   // and implement these three functions
 8 |   virtual bool equal(const Iterator& other) const = 0;
 9 |   virtual const Item& dereference() const = 0;
10 |   virtual void increment() = 0;
11 | 
12 |   bool operator==(const Iterator& other) const {
13 |     return equal(other);
14 |   }
15 | 
16 |   bool operator!=(const Iterator& other) const {
17 |     return !equal(other);
18 |   }
19 | 
20 |   const Item& operator*() const {
21 |     return dereference();
22 |   }
23 | 
24 |   // prefix ++
25 |   Iterator& operator++() {
26 |     increment();
27 |     return dynamic_cast<Iterator&>(*this);
28 |   }
29 | 
30 |   // postfix ++
31 |   Iterator operator++(int) {
32 |     auto ret = dynamic_cast<Iterator&>(*this);
33 |     increment();
34 |     return ret;
35 |   }
36 | 
37 |   const Item* operator->() const {
38 |     return &dereference();
39 |   }
40 | };
41 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/src/test_main.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2016 Google Inc.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.!
14 | 
15 | #include "flags.h"
16 | #include "testharness.h"
17 | 
18 | #ifdef OS_WIN
19 | DEFINE_string(data_dir, "..\\data", "Data directory");
20 | #else
21 | DEFINE_string(data_dir, "../data", "Data directory");
22 | #endif
23 | 
24 | int main(int argc, char **argv) {
25 |   std::vector<std::string> rest_args;
26 |   sentencepiece::flags::ParseCommandLineFlags(argc, argv, &rest_args);
27 | 
28 |   sentencepiece::test::RunAllTests();
29 |   return 0;
30 | }
31 | 


--------------------------------------------------------------------------------
/src/graph/node_operators.cpp:
--------------------------------------------------------------------------------
 1 | #include "node_operators.h"
 2 | #include "expression_graph.h"
 3 | 
 4 | #include "tensors/tensor_operators.h"
 5 | 
 6 | namespace marian {
 7 | 
 8 | size_t ConstantNode::allocate() {
 9 |   size_t elements = 0;
10 |   if(!val_) {
11 |     graph()->allocateForward(shared_from_this());
12 |     elements = val_->shape().elements();
13 |   }
14 |   return elements;
15 | }
16 | 
17 | void ConstantNode::init() {
18 |   if(!initialized_) {
19 |     (*init_)(val_);
20 |     initialized_ = true;
21 |   }
22 |   init_.reset();
23 | }
24 | 
25 | ParamNode::ParamNode(Ptr<ExpressionGraph> graph,
26 |                      const Shape& shape,
27 |                      const NodeInitializer& init,
28 |                      bool fixed)
29 |     : Node(graph, shape),  // TODO: add value_type
30 |       init_(new NodeInitializer(init)),
31 |       initialized_(false) {
32 |   setTrainable(!fixed);
33 |   setMemoize(graph->isInference());
34 | }
35 | 
36 | void ParamNode::init() {
37 |   if(!initialized_) {
38 |     (*init_)(val_);
39 |     initialized_ = true;
40 |   }
41 |   init_.reset();
42 | }
43 | }  // namespace marian
44 | 


--------------------------------------------------------------------------------
/contrib/other-builds/eclipse/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>marian</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
10 | 			<triggers>clean,full,incremental,</triggers>
11 | 			<arguments>
12 | 			</arguments>
13 | 		</buildCommand>
14 | 		<buildCommand>
15 | 			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
16 | 			<triggers>full,incremental,</triggers>
17 | 			<arguments>
18 | 			</arguments>
19 | 		</buildCommand>
20 | 	</buildSpec>
21 | 	<natures>
22 | 		<nature>org.eclipse.cdt.core.cnature</nature>
23 | 		<nature>org.eclipse.cdt.core.ccnature</nature>
24 | 		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
25 | 		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
26 | 	</natures>
27 | 	<linkedResources>
28 | 		<link>
29 | 			<name>src</name>
30 | 			<type>2</type>
31 | 			<locationURI>PARENT-1-PROJECT_LOC/src</locationURI>
32 | 		</link>
33 | 	</linkedResources>
34 | </projectDescription>
35 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/src/model_factory.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2016 Google Inc.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.!
14 | 
15 | #ifndef MODEL_FACTORY_H_
16 | #define MODEL_FACTORY_H_
17 | 
18 | #include <memory>
19 | #include "model_interface.h"
20 | #include "sentencepiece_model.pb.h"
21 | 
22 | namespace sentencepiece {
23 | 
24 | class ModelFactory {
25 |  public:
26 |   // Creates Model instance from |model_proto|.
27 |   static std::unique_ptr<ModelInterface> Create(const ModelProto &model_proto);
28 | };
29 | }  // namespace sentencepiece
30 | #endif  // MODEL_FACTORY_H_
31 | 


--------------------------------------------------------------------------------
/scripts/contrib/inject_ctt.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import print_function
 4 | 
 5 | import sys
 6 | import argparse
 7 | import numpy as np
 8 | 
 9 | DESC = "Add 'decoder_c_tt' required by Amun to a model trained with Marian v1.6.0+"
10 | 
11 | 
12 | def main():
13 |     args = parse_args()
14 | 
15 |     print("Loading model {}".format(args.input))
16 |     model = np.load(args.input)
17 | 
18 |     if "decoder_c_tt" in model:
19 |         print("The model already contains 'decoder_c_tt'")
20 |         exit()
21 | 
22 |     print("Adding 'decoder_c_tt' to the model")
23 |     amun = {"decoder_c_tt": np.zeros((1, 0))}
24 |     for tensor_name in model:
25 |         amun[tensor_name] = model[tensor_name]
26 | 
27 |     print("Saving model...")
28 |     np.savez(args.output, **amun)
29 | 
30 | 
31 | def parse_args():
32 |     parser = argparse.ArgumentParser(description=DESC)
33 |     parser.add_argument("-i", "--input", help="input model", required=True)
34 |     parser.add_argument("-o", "--output", help="output model", required=True)
35 |     return parser.parse_args()
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     main()
40 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2008-2015 Jesse Beder.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/src/tensors/gpu/prod.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "tensors/tensor.h"
 4 | #include "tensors/tensor_operators.h"
 5 | 
 6 | #include "functional/functional.h"
 7 | 
 8 | namespace marian {
 9 | namespace gpu {
10 | 
11 | void Prod(marian::Tensor C,
12 |           const marian::Tensor& A,
13 |           const marian::Tensor& B,
14 |           bool transA,
15 |           bool transB,
16 |           float beta = 0,
17 |           float scalar = 1);
18 | 
19 | void ProdWithBias(marian::Tensor C,
20 |                   const marian::Tensor& A,
21 |                   const marian::Tensor& B,
22 |                   const marian::Tensor& bias,
23 |                   bool transA,
24 |                   bool transB,
25 |                   float beta = 0,
26 |                   float scalar = 1);
27 | 
28 | void ProdBatched(marian::Tensor C,
29 |                  Ptr<Allocator> allocator,
30 |                  const marian::Tensor A,
31 |                  const marian::Tensor B,
32 |                  bool transA,
33 |                  bool transB,
34 |                  float beta = 0,
35 |                  float scalar = 1);
36 | }  // namespace gpu
37 | }  // namespace marian
38 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/formatter.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include <spdlog/details/log_msg.h>
 9 | 
10 | #include <vector>
11 | #include <string>
12 | #include <memory>
13 | 
14 | namespace spdlog
15 | {
16 | namespace details
17 | {
18 | class flag_formatter;
19 | }
20 | 
21 | class formatter
22 | {
23 | public:
24 |     virtual ~formatter() {}
25 |     virtual void format(details::log_msg& msg) = 0;
26 | };
27 | 
28 | class pattern_formatter : public formatter
29 | {
30 | 
31 | public:
32 |     explicit pattern_formatter(const std::string& pattern);
33 |     pattern_formatter(const pattern_formatter&) = delete;
34 |     pattern_formatter& operator=(const pattern_formatter&) = delete;
35 |     void format(details::log_msg& msg) override;
36 | private:
37 |     const std::string _pattern;
38 |     std::vector<std::unique_ptr<details::flag_formatter>> _formatters;
39 |     void handle_flag(char flag);
40 |     void compile_pattern(const std::string& pattern);
41 | };
42 | }
43 | 
44 | #include <spdlog/details/pattern_formatter_impl.h>
45 | 
46 | 


--------------------------------------------------------------------------------
/src/3rd_party/cnpy/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) Carl Rogers, 2011
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/tensors/gpu/backend.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "common/config.h"
 4 | #include "tensors/backend.h" // note: this is one folder up
 5 | #include "tensors/gpu/cuda_helpers.h"
 6 | 
 7 | #include <cuda.h>
 8 | #include <cublas_v2.h>
 9 | #include <curand.h>
10 | 
11 | namespace marian {
12 | namespace gpu {
13 | 
14 | class Backend : public marian::Backend {
15 | public:
16 |   Backend(DeviceId deviceId, size_t seed) : marian::Backend(deviceId, seed) {
17 |     setDevice();
18 |     setHandles();
19 |   }
20 | 
21 |   ~Backend() {
22 |     setDevice();
23 |     cublasDestroy(cublasHandle_);
24 |   }
25 | 
26 |   void setDevice() override { cudaSetDevice((int)deviceId_.no); }
27 | 
28 |   void synchronize() override { cudaStreamSynchronize(0); }
29 | 
30 |   cublasHandle_t getCublasHandle() { return cublasHandle_; }
31 | 
32 | private:
33 |   cublasHandle_t cublasHandle_;
34 | 
35 |   void setHandles() {
36 |     cublasHandle_ = create_handle();
37 |   }
38 | 
39 |   cublasHandle_t create_handle() {
40 |     cublasHandle_t cublasHandle;
41 |     cublasCreate(&cublasHandle);
42 |     return cublasHandle;
43 |   }
44 | };
45 | }  // namespace gpu
46 | }  // namespace marian
47 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/node/iterator.h:
--------------------------------------------------------------------------------
 1 | #ifndef VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include "yaml-cpp/dll.h"
11 | #include "yaml-cpp/node/node.h"
12 | #include "yaml-cpp/node/detail/iterator_fwd.h"
13 | #include "yaml-cpp/node/detail/iterator.h"
14 | #include <list>
15 | #include <utility>
16 | #include <vector>
17 | 
18 | namespace YAML {
19 | namespace detail {
20 | struct iterator_value : public Node, std::pair<Node, Node> {
21 |   iterator_value() {}
22 |   explicit iterator_value(const Node& rhs)
23 |       : Node(rhs),
24 |         std::pair<Node, Node>(Node(Node::ZombieNode), Node(Node::ZombieNode)) {}
25 |   explicit iterator_value(const Node& key, const Node& value)
26 |       : Node(Node::ZombieNode), std::pair<Node, Node>(key, value) {}
27 | };
28 | }
29 | }
30 | 
31 | #endif  // VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
32 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/sinks/base_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | //
 8 | // base sink templated over a mutex (either dummy or realy)
 9 | // concrete implementation should only overrid the _sink_it method.
10 | // all locking is taken care of here so no locking needed by the implementors..
11 | //
12 | 
13 | #include <spdlog/sinks/sink.h>
14 | #include <spdlog/formatter.h>
15 | #include <spdlog/common.h>
16 | #include <spdlog/details/log_msg.h>
17 | 
18 | #include <mutex>
19 | 
20 | namespace spdlog
21 | {
22 | namespace sinks
23 | {
24 | template<class Mutex>
25 | class base_sink:public sink
26 | {
27 | public:
28 |     base_sink():_mutex() {}
29 |     virtual ~base_sink() = default;
30 | 
31 |     base_sink(const base_sink&) = delete;
32 |     base_sink& operator=(const base_sink&) = delete;
33 | 
34 |     void log(const details::log_msg& msg) override
35 |     {
36 |         std::lock_guard<Mutex> lock(_mutex);
37 |         _sink_it(msg);
38 |     }
39 | 
40 | protected:
41 |     virtual void _sink_it(const details::log_msg& msg) = 0;
42 |     Mutex _mutex;
43 | };
44 | }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/doc/special_symbols.md:
--------------------------------------------------------------------------------
 1 | # Use custom symbols
 2 | SentencePiece model supports two types of special symbols.
 3 | 
 4 | ## Control symbol
 5 | Control symbols are used to encode special indicators for the decoder to change the behavior dynamically.
 6 | Example includes the language indicators in multi-lingual models. `<s>` and `</s>` are reserved control symbols.
 7 | Control symbols must be inserted outside of the SentencePiece segmentation. Developers need to take the responsibility to insert these symbols in data generation and decoding.
 8 | 
 9 | It is guaranteed that control symbols have no corresponding surface strings in the original user input. Control symbols are decoded into empty strings.
10 | 
11 | ## User defined symbol
12 | User defined symbol is handled as one piece in any context. If this symbol is included in the input text, this symbol is always extracted as one piece.
13 | 
14 | ## Specify special symbols in training time
15 | Use `--control_symbols` and `--user_defined_symbols` flags as follows
16 | 
17 | ```
18 | % spm_train --control_symbols=<foo>,<bar> --user_defined_symbols=<user1>,<user2> --input=<input file> --model_prefix=<model file> --vocab_size=8000
19 | ```
20 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/include/spdlog/details/log_msg.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include "spdlog/common.h"
 9 | #include "spdlog/details/os.h"
10 | 
11 | 
12 | #include <string>
13 | #include <utility>
14 | 
15 | namespace spdlog
16 | {
17 | namespace details
18 | {
19 | struct log_msg
20 | {
21 |     log_msg() = default;
22 |     log_msg(const std::string *loggers_name, level::level_enum lvl) :
23 |         logger_name(loggers_name),
24 |         level(lvl),
25 |         msg_id(0)
26 |     {
27 | #ifndef SPDLOG_NO_DATETIME
28 |         time = os::now();
29 | #endif
30 | 
31 | #ifndef SPDLOG_NO_THREAD_ID
32 |         thread_id = os::thread_id();
33 | #endif
34 |     }
35 | 
36 |     log_msg(const log_msg& other)  = delete;
37 |     log_msg& operator=(log_msg&& other) = delete;
38 |     log_msg(log_msg&& other) = delete;
39 | 
40 | 
41 |     const std::string *logger_name;
42 |     level::level_enum level;
43 |     log_clock::time_point time;
44 |     size_t thread_id;
45 |     fmt::MemoryWriter raw;
46 |     fmt::MemoryWriter formatted;
47 |     size_t msg_id;
48 | };
49 | }
50 | }
51 | 


--------------------------------------------------------------------------------
/cmake/FindTcmalloc.cmake:
--------------------------------------------------------------------------------
 1 | # - Find Tcmalloc
 2 | # Find the native Tcmalloc includes and library
 3 | #
 4 | #  Tcmalloc_INCLUDE_DIR - where to find Tcmalloc.h, etc.
 5 | #  Tcmalloc_LIBRARIES   - List of libraries when using Tcmalloc.
 6 | #  Tcmalloc_FOUND       - True if Tcmalloc found.
 7 | 
 8 | find_path(Tcmalloc_INCLUDE_DIR google/tcmalloc.h)
 9 | 
10 | if (USE_TCMALLOC)
11 |   set(Tcmalloc_NAMES tcmalloc)
12 | else ()
13 |   set(Tcmalloc_NAMES tcmalloc_minimal tcmalloc)
14 | endif ()
15 | 
16 | find_library(Tcmalloc_LIBRARY NAMES ${Tcmalloc_NAMES})
17 | 
18 | if (Tcmalloc_INCLUDE_DIR AND Tcmalloc_LIBRARY)
19 |   set(Tcmalloc_FOUND TRUE)
20 |   set( Tcmalloc_LIBRARIES ${Tcmalloc_LIBRARY} )
21 | else ()
22 |   set(Tcmalloc_FOUND FALSE)
23 |   set( Tcmalloc_LIBRARIES )
24 | endif ()
25 | 
26 | if (Tcmalloc_FOUND)
27 |   message(STATUS "Found Tcmalloc: ${Tcmalloc_LIBRARY}")
28 | else ()
29 |   message(STATUS "Not Found Tcmalloc")
30 |   if (Tcmalloc_FIND_REQUIRED)
31 |     message(STATUS "Looked for Tcmalloc libraries named ${Tcmalloc_NAMES}.")
32 |     message(FATAL_ERROR "Could NOT find Tcmalloc library")
33 |   endif ()
34 | endif ()
35 | 
36 | mark_as_advanced(
37 |   Tcmalloc_LIBRARY
38 |   Tcmalloc_INCLUDE_DIR
39 |   )


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/contrib/anchordict.h:
--------------------------------------------------------------------------------
 1 | #ifndef ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include <vector>
11 | 
12 | #include "../anchor.h"
13 | 
14 | namespace YAML {
15 | /**
16 |  * An object that stores and retrieves values correlating to {@link anchor_t}
17 |  * values.
18 |  *
19 |  * <p>Efficient implementation that can make assumptions about how
20 |  * {@code anchor_t} values are assigned by the {@link Parser} class.
21 |  */
22 | template <class T>
23 | class AnchorDict {
24 |  public:
25 |   void Register(anchor_t anchor, T value) {
26 |     if (anchor > m_data.size()) {
27 |       m_data.resize(anchor);
28 |     }
29 |     m_data[anchor - 1] = value;
30 |   }
31 | 
32 |   T Get(anchor_t anchor) const { return m_data[anchor - 1]; }
33 | 
34 |  private:
35 |   std::vector<T> m_data;
36 | };
37 | }
38 | 
39 | #endif  // ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
40 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/srctxz/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # See LICENSE.txt for license information
 5 | #
 6 | 
 7 | include ../../makefiles/common.mk
 8 | include ../../makefiles/version.mk
 9 | BUILDDIR ?= $(abspath ../../build)
10 | TXZPREPDIR  := $(BUILDDIR)/srctxz
11 | PKGDIR  := $(BUILDDIR)/pkg/srctxz/
12 | 
13 | TXZGEN_IN  := $(wildcard *.in)
14 | TXZGEN     := $(TXZGEN_IN:.in=)
15 | TXZTARGETS := $(patsubst %, $(TXZPREPDIR)/%, $(TXZGEN))
16 | 
17 | PKG_REVISION   ?= 3
18 | PKG_ARCH       := $(shell uname -m)
19 | 
20 | prep: $(TXZTARGETS)
21 | 
22 | build: prep
23 | 	$(MAKE) -C ../../src clean
24 | 	@printf "Building source tar.xz package\n"
25 | 	(cd $(BUILDDIR); bash srctxz/create_srctxz.sh)
26 | 	mkdir -p $(PKGDIR)
27 | 	mv $(BUILDDIR)/../../nccl-src*.txz $(PKGDIR)
28 | 
29 | clean:
30 | 	rm -Rf $(TXZPREPDIR) $(PKGDIR)
31 | 
32 | $(TXZPREPDIR)/% : %.in
33 | 	@printf "Generating %-35s > %s\n" $< $@
34 | 	mkdir -p $(TXZPREPDIR)
35 | 	sed -e "s/\$${nccl:Major}/$(NCCL_MAJOR)/g" \
36 | 	    -e "s/\$${nccl:Minor}/$(NCCL_MINOR)/g" \
37 | 	    -e "s/\$${nccl:Patch}/$(NCCL_PATCH)/g" \
38 | 	    -e "s/\$${nccl:Suffix}/$(NCCL_SUFFIX)/g" \
39 | 	    $< > $@
40 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/include/spdlog/sinks/sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | 
 7 | #pragma once
 8 | 
 9 | #include "spdlog/details/log_msg.h"
10 | 
11 | namespace spdlog
12 | {
13 | namespace sinks
14 | {
15 | class sink
16 | {
17 | public:
18 |     sink()
19 |     {
20 |         _level = level::trace;
21 |     }
22 | 
23 |     virtual ~sink() {}
24 |     virtual void log(const details::log_msg& msg) = 0;
25 |     virtual void flush() = 0;
26 | 
27 |     bool should_log(level::level_enum msg_level) const;
28 |     void set_level(level::level_enum log_level);
29 |     level::level_enum level() const;
30 | 
31 | private:
32 |     level_t _level;
33 | 
34 | };
35 | 
36 | inline bool sink::should_log(level::level_enum msg_level) const
37 | {
38 |     return msg_level >= _level.load(std::memory_order_relaxed);
39 | }
40 | 
41 | inline void sink::set_level(level::level_enum log_level)
42 | {
43 |     _level.store(log_level);
44 | }
45 | 
46 | inline level::level_enum sink::level() const
47 | {
48 |     return static_cast<spdlog::level::level_enum>(_level.load(std::memory_order_relaxed));
49 | }
50 | 
51 | }
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/src/3rd_party/zstr/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Matei David, Ontario Institute for Cancer Research
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE. 
22 | 


--------------------------------------------------------------------------------
/src/3rd_party/SQLiteCpp/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2012-2016 Sebastien Rombauts (sebastien.rombauts@gmail.com)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is furnished
10 | to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
20 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/src/include/enqueue.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |  * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
 3 |  *
 4 |  * See LICENSE.txt for license information
 5 |  ************************************************************************/
 6 | 
 7 | #ifndef NCCL_ENQUEUE_H_
 8 | #define NCCL_ENQUEUE_H_
 9 | 
10 | #include "core.h"
11 | #include "group.h"
12 | 
13 | typedef ncclResult_t(*ncclFunc_t)(const void* sendbuff, void* recvbuff, size_t count,
14 |     ncclDataType_t type, ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream);
15 | 
16 | ncclResult_t ncclEnqueueCheck(ncclFunc_t func, const char* primName, const void* sendbuff,
17 |     void* recvbuff, size_t count, ncclDataType_t type, ncclRedOp_t op, int root,
18 |     ncclComm_t comm, cudaStream_t stream);
19 | ncclResult_t ncclCpuBarrierIn(ncclComm_t comm, int* isLast);
20 | ncclResult_t ncclCpuBarrierLast(ncclComm_t comm);
21 | ncclResult_t ncclCpuBarrierOut(ncclComm_t comm);
22 | ncclResult_t ncclBarrierEnqueue(ncclComm_t comm);
23 | ncclResult_t ncclBarrierEnqueueWait(ncclComm_t comm);
24 | ncclResult_t ncclEnqueueEvents(ncclComm_t comm);
25 | 
26 | #endif // End include guard
27 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License 
 2 | 
 3 | Copyright (c) 2016 Marcin Junczys-Dowmunt, the University of Edinburgh, Adam 
 4 | Mickiewicz University
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in
14 | all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/src/trainer_factory.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2016 Google Inc.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.!
14 | 
15 | #ifndef TRAINER_FACTORY_H_
16 | #define TRAINER_FACTORY_H_
17 | 
18 | #include <memory>
19 | #include "sentencepiece_model.pb.h"
20 | #include "trainer_interface.h"
21 | 
22 | namespace sentencepiece {
23 | 
24 | class TrainerFactory {
25 |  public:
26 |   // Creates Trainer instance from |trainer_spec| and |normalizer_spec|.
27 |   static std::unique_ptr<TrainerInterface> Create(
28 |       const TrainerSpec &trainer_spec, const NormalizerSpec &normalizer_spec);
29 | };
30 | }  // namespace sentencepiece
31 | #endif  // TRAINER_FACTORY_H_
32 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/src/word_model.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2016 Google Inc.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.!
14 | 
15 | #ifndef WORD_MODEL_H_
16 | #define WORD_MODEL_H_
17 | 
18 | #include "model_interface.h"
19 | #include "sentencepiece_model.pb.h"
20 | 
21 | namespace sentencepiece {
22 | namespace word {
23 | 
24 | // Tokenize text with whitespaces.
25 | class Model : public ModelInterface {
26 |  public:
27 |   explicit Model(const ModelProto &model_proto);
28 |   ~Model() override;
29 | 
30 |   EncodeResult Encode(absl::string_view normalized) const override;
31 | };
32 | }  // namespace word
33 | }  // namespace sentencepiece
34 | #endif  // WORD_MODEL_H_
35 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Gabi Melman.                                       
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/src/data/revo_stub.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by revo on 8/2/19.
 3 | //
 4 | 
 5 | #pragma once
 6 | 
 7 | #include "graph/expression_graph.h"
 8 | #include "marian.h"
 9 | 
10 | namespace marian {
11 | namespace data {
12 | 
13 | class DataTrainingBase {
14 | protected:
15 |     typedef typename CorpusBase::Sample Sample;
16 |     typedef std::vector<Sample> Samples;  // @TODO: type names should be capitalized
17 | 
18 |     Ptr<Options> options_;
19 |     std::vector<Ptr<Vocab>> vocabs_;
20 | public:
21 |     DataTrainingBase(Ptr<Options> options, std::vector<Ptr<Vocab>> &vocabs) : options_(options), vocabs_(vocabs) {};
22 | 
23 |     virtual void run(Samples &batch, size_t e) = 0;
24 | 
25 |     virtual float get_mod_from_emb(size_t step) { return 0.0f;};
26 | 
27 |     virtual float get_mod_from_graph(size_t step) { return 0.0f;};
28 | 
29 |     virtual float get_init_value() { return 0.0f;};
30 | };
31 | 
32 |     Ptr<DataTrainingBase> NewGapTraining(Ptr<Options> options, std::vector<Ptr<Vocab>> vocabs, const std::vector<Ptr<ExpressionGraph>> &graphs);
33 | 
34 |     Ptr<DataTrainingBase> NewModTraining(Ptr<Options> options, std::vector<Ptr<Vocab>> vocabs, const std::vector<Ptr<ExpressionGraph>> &graphs);
35 | }
36 | }  // namespace marian


--------------------------------------------------------------------------------
/examples/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Marcin Junczys-Dowmunt, the University of Edinburgh, Adam
 4 | Mickiewicz University
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in
14 | all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/bench/glog-bench-mt.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #include <thread>
 7 | #include <vector>
 8 | #include <atomic>
 9 | 
10 | #include "glog/logging.h"
11 | 
12 | using namespace std;
13 | 
14 | int main(int argc, char* argv[])
15 | {
16 | 
17 |     int thread_count = 10;
18 |     if(argc > 1)
19 |         thread_count = atoi(argv[1]);
20 | 
21 |     int howmany = 1000000;
22 | 
23 |     FLAGS_logtostderr = 0;
24 |     FLAGS_log_dir = "logs";
25 |     google::InitGoogleLogging(argv[0]);
26 | 
27 |     std::atomic<int > msg_counter {0};
28 |     vector<thread> threads;
29 | 
30 |     for (int t = 0; t < thread_count; ++t)
31 |     {
32 |         threads.push_back(std::thread([&]()
33 |         {
34 |             while (true)
35 |             {
36 |                 int counter = ++msg_counter;
37 |                 if (counter > howmany) break;
38 |                 LOG(INFO) << "glog message #" << counter << ": This is some text for your pleasure";
39 |             }
40 |         }));
41 |     }
42 | 
43 | 
44 |     for(auto &t:threads)
45 |     {
46 |         t.join();
47 |     };
48 | 
49 |     return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/src/char_model.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2016 Google Inc.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.!
14 | 
15 | #ifndef CHAR_MODEL_H_
16 | #define CHAR_MODEL_H_
17 | 
18 | #include "model_interface.h"
19 | #include "sentencepiece_model.pb.h"
20 | 
21 | namespace sentencepiece {
22 | namespace character {
23 | 
24 | // Tokenize text into character sequence
25 | class Model : public ModelInterface {
26 |  public:
27 |   explicit Model(const ModelProto &model_proto);
28 |   ~Model() override;
29 | 
30 |   EncodeResult Encode(absl::string_view normalized) const override;
31 | };
32 | }  // namespace character
33 | }  // namespace sentencepiece
34 | #endif  // CHAR_MODEL_H_
35 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/third_party/esaxx/LICENSE:
--------------------------------------------------------------------------------
 1 | This is the esaxx copyright.
 2 | 
 3 | Copyright (c) 2010 Daisuke Okanohara All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person
 6 | obtaining a copy of this software and associated documentation
 7 | files (the "Software"), to deal in the Software without
 8 | restriction, including without limitation the rights to use,
 9 | copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the
11 | Software is furnished to do so, subject to the following
12 | conditions:
13 | 
14 | The above copyright notice and this permission notice shall be
15 | included in all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 | OTHER DEALINGS IN THE SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/regex_yaml.cpp:
--------------------------------------------------------------------------------
 1 | #include "regex_yaml.h"
 2 | 
 3 | namespace YAML {
 4 | // constructors
 5 | RegEx::RegEx() : m_op(REGEX_EMPTY) {}
 6 | 
 7 | RegEx::RegEx(REGEX_OP op) : m_op(op) {}
 8 | 
 9 | RegEx::RegEx(char ch) : m_op(REGEX_MATCH), m_a(ch) {}
10 | 
11 | RegEx::RegEx(char a, char z) : m_op(REGEX_RANGE), m_a(a), m_z(z) {}
12 | 
13 | RegEx::RegEx(const std::string& str, REGEX_OP op) : m_op(op) {
14 |   for (std::size_t i = 0; i < str.size(); i++)
15 |     m_params.push_back(RegEx(str[i]));
16 | }
17 | 
18 | // combination constructors
19 | RegEx operator!(const RegEx& ex) {
20 |   RegEx ret(REGEX_NOT);
21 |   ret.m_params.push_back(ex);
22 |   return ret;
23 | }
24 | 
25 | RegEx operator||(const RegEx& ex1, const RegEx& ex2) {
26 |   RegEx ret(REGEX_OR);
27 |   ret.m_params.push_back(ex1);
28 |   ret.m_params.push_back(ex2);
29 |   return ret;
30 | }
31 | 
32 | RegEx operator&&(const RegEx& ex1, const RegEx& ex2) {
33 |   RegEx ret(REGEX_AND);
34 |   ret.m_params.push_back(ex1);
35 |   ret.m_params.push_back(ex2);
36 |   return ret;
37 | }
38 | 
39 | RegEx operator+(const RegEx& ex1, const RegEx& ex2) {
40 |   RegEx ret(REGEX_SEQ);
41 |   ret.m_params.push_back(ex1);
42 |   ret.m_params.push_back(ex2);
43 |   return ret;
44 | }
45 | }
46 | 


--------------------------------------------------------------------------------
/cmake/FindNCCL.cmake:
--------------------------------------------------------------------------------
 1 | set(NCCL_INC_PATHS
 2 |     /usr/include
 3 |     /usr/local/include
 4 |     /usr/local/cuda/include
 5 |     $ENV{NCCL_DIR}/include
 6 |     $ENV{CUDA_TOOLKIT_ROOT_DIRCUDA_ROOT}/include
 7 | )
 8 | 
 9 | set(NCCL_LIB_PATHS
10 |     /lib
11 |     /lib64
12 |     /usr/lib
13 |     /usr/lib64
14 |     /usr/local/lib
15 |     /usr/local/lib64
16 |     /usr/local/cuda/lib64
17 |     $ENV{NCCL_DIR}/lib64
18 |     $ENV{CUDA_TOOLKIT_ROOT_DIR}/lib64
19 |     /usr/local/cuda/lib
20 |     $ENV{NCCL_DIR}/lib
21 |     $ENV{CUDA_TOOLKIT_ROOT_DIR}/lib
22 | )
23 | 
24 | find_path(NCCL_INCLUDE_DIR NAMES nccl.h PATHS ${NCCL_INC_PATHS})
25 | 
26 | if (USE_STATIC_LIBS)
27 |   message(STATUS "Trying to find static NCCL library")
28 |   find_library(NCCL_LIBRARIES NAMES libnccl_static.a PATHS ${NCCL_LIB_PATHS})
29 | else (USE_STATIC_LIBS)
30 |   find_library(NCCL_LIBRARIES NAMES nccl PATHS ${NCCL_LIB_PATHS})
31 | endif (USE_STATIC_LIBS)
32 | 
33 | include(FindPackageHandleStandardArgs)
34 | find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIR NCCL_LIBRARIES)
35 | 
36 | if (NCCL_FOUND)
37 |   message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIR}, library: ${NCCL_LIBRARIES})")
38 |   mark_as_advanced(NCCL_INCLUDE_DIR NCCL_LIBRARIES)
39 | endif ()
40 | 


--------------------------------------------------------------------------------
/src/common/cli_helper.cpp:
--------------------------------------------------------------------------------
 1 | #include "common/cli_helper.h"
 2 | #include "common/filesystem.h"
 3 | 
 4 | namespace marian {
 5 | namespace cli {
 6 | 
 7 | void makeAbsolutePaths(YAML::Node& config,
 8 |                        const std::string& configPath,
 9 |                        const std::set<std::string>& PATHS) {
10 |   auto configDir = filesystem::Path{configPath}.parentPath();
11 | 
12 |   auto transformFunc = [&](const std::string& nodePath) -> std::string {
13 |     // Catch stdin/stdout and do not process
14 |     if(nodePath == "stdin" || nodePath == "stdout")
15 |       return nodePath;
16 | 
17 |     // replace relative path w.r.t. config directory
18 |     try {
19 |       return canonical(filesystem::Path{nodePath}, configDir).string();
20 |     } catch(filesystem::FilesystemError& e) {
21 |       // will fail if file does not exist; use parent in that case
22 |       std::cerr << e.what() << std::endl;
23 |       auto parentPath = filesystem::Path{nodePath}.parentPath();
24 |       return (canonical(parentPath, configDir)
25 |               / filesystem::Path{nodePath}.filename())
26 |           .string();
27 |     }
28 |   };
29 | 
30 |   processPaths(config, transformFunc, PATHS);
31 | }
32 | 
33 | }  // namespace cli
34 | }  // namespace marian
35 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/indentation.h:
--------------------------------------------------------------------------------
 1 | #ifndef INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include <iostream>
11 | #include <cstddef>
12 | 
13 | #include "yaml-cpp/ostream_wrapper.h"
14 | 
15 | namespace YAML {
16 | struct Indentation {
17 |   Indentation(std::size_t n_) : n(n_) {}
18 |   std::size_t n;
19 | };
20 | 
21 | inline ostream_wrapper& operator<<(ostream_wrapper& out,
22 |                                    const Indentation& indent) {
23 |   for (std::size_t i = 0; i < indent.n; i++)
24 |     out << ' ';
25 |   return out;
26 | }
27 | 
28 | struct IndentTo {
29 |   IndentTo(std::size_t n_) : n(n_) {}
30 |   std::size_t n;
31 | };
32 | 
33 | inline ostream_wrapper& operator<<(ostream_wrapper& out,
34 |                                    const IndentTo& indent) {
35 |   while (out.col() < indent.n)
36 |     out << ' ';
37 |   return out;
38 | }
39 | }
40 | 
41 | #endif  // INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66
42 | 


--------------------------------------------------------------------------------
/src/common/io.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "3rd_party/yaml-cpp/yaml.h"
 4 | #include "common/io_item.h"
 5 | 
 6 | #include <string>
 7 | #include <vector>
 8 | 
 9 | // interface for handling model files in marian, both *.npz files and
10 | // *.bin files have the same way of accessing them and are identified
11 | // by suffixes (*.npz or *.bin).
12 | 
13 | // Files with the *.bin suffix are supposed to be memory-mappable for
14 | // CPU decoding.
15 | 
16 | namespace marian {
17 | namespace io {
18 | 
19 | bool isNpz(const std::string& fileName);
20 | bool isBin(const std::string& fileName);
21 | 
22 | void getYamlFromModel(YAML::Node& yaml, const std::string& varName, const std::string& fileName);
23 | void getYamlFromModel(YAML::Node& yaml, const std::string& varName, const void* ptr);
24 | 
25 | void addMetaToItems(const std::string& meta,
26 |                     const std::string& varName,
27 |                     std::vector<io::Item>& items);
28 | 
29 | std::vector<Item> loadItems(const std::string& fileName);
30 | std::vector<Item> loadItems(const void* ptr);
31 | 
32 | std::vector<Item> mmapItems(const void* ptr);
33 | 
34 | void saveItems(const std::string& fileName, const std::vector<Item>& items);
35 | 
36 | }  // namespace io
37 | }  // namespace marian
38 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/example/example.sln:
--------------------------------------------------------------------------------
 1 | 
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 14
 4 | VisualStudioVersion = 14.0.25420.1
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example", "example.vcxproj", "{9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|Win32 = Debug|Win32
11 | 		Debug|x64 = Debug|x64
12 | 		Release|Win32 = Release|Win32
13 | 		Release|x64 = Release|x64
14 | 	EndGlobalSection
15 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | 		{9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Debug|Win32.ActiveCfg = Debug|Win32
17 | 		{9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Debug|Win32.Build.0 = Debug|Win32
18 | 		{9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Debug|x64.ActiveCfg = Debug|Win32
19 | 		{9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Release|Win32.ActiveCfg = Release|Win32
20 | 		{9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Release|Win32.Build.0 = Release|Win32
21 | 		{9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Release|x64.ActiveCfg = Release|Win32
22 | 	EndGlobalSection
23 | 	GlobalSection(SolutionProperties) = preSolution
24 | 		HideSolutionNode = FALSE
25 | 	EndGlobalSection
26 | EndGlobal
27 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/node/detail/memory.h:
--------------------------------------------------------------------------------
 1 | #ifndef VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include <set>
11 | 
12 | #include "yaml-cpp/dll.h"
13 | #include "yaml-cpp/node/ptr.h"
14 | 
15 | namespace YAML {
16 | namespace detail {
17 | class node;
18 | }  // namespace detail
19 | }  // namespace YAML
20 | 
21 | namespace YAML {
22 | namespace detail {
23 | class YAML_CPP_API memory {
24 |  public:
25 |   node& create_node();
26 |   void merge(const memory& rhs);
27 | 
28 |  private:
29 |   typedef std::set<shared_node> Nodes;
30 |   Nodes m_nodes;
31 | };
32 | 
33 | class YAML_CPP_API memory_holder {
34 |  public:
35 |   memory_holder() : m_pMemory(new memory) {}
36 | 
37 |   node& create_node() { return m_pMemory->create_node(); }
38 |   void merge(memory_holder& rhs);
39 | 
40 |  private:
41 |   shared_memory m_pMemory;
42 | };
43 | }
44 | }
45 | 
46 | #endif  // VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
47 | 


--------------------------------------------------------------------------------
/src/tests/logger_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "common/timer.h"
 2 | #include "common/logging.h"
 3 | 
 4 | #include <iostream>
 5 | #include <string>
 6 | #include <vector>
 7 | 
 8 | // small test program for playing around with spdlog formatting of messages
 9 | 
10 | std::shared_ptr<spdlog::logger> stderrLoggerTest(
11 |     const std::string& name,
12 |     const std::string& pattern,
13 |     const std::vector<std::string>& files) {
14 |   std::vector<spdlog::sink_ptr> sinks;
15 | 
16 |   auto stderr_sink = spdlog::sinks::stderr_sink_mt::instance();
17 |   sinks.push_back(stderr_sink);
18 | 
19 |   for(auto&& file : files) {
20 |     auto file_sink
21 |         = std::make_shared<spdlog::sinks::simple_file_sink_st>(file, true);
22 |     sinks.push_back(file_sink);
23 |   }
24 | 
25 |   auto logger
26 |       = std::make_shared<spdlog::logger>(name, begin(sinks), end(sinks));
27 | 
28 |   spdlog::register_logger(logger);
29 |   logger->set_pattern(pattern);
30 |   return logger;
31 | }
32 | 
33 | int main() {
34 |   std::vector<std::string> logfiles;
35 |   Logger info(stderrLoggerTest("info", "[%Y-%m-%d %T] %v", logfiles));
36 | 
37 |   info->info("hello {:06.2f}", .7);
38 | 
39 |   marian::timer::Timer timer;
40 | 
41 |   info->info("time is {:.5f} bla {:.2f}", timer.elapsed(), .7);
42 | }
43 | 


--------------------------------------------------------------------------------
/scripts/server/client_example.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import print_function, unicode_literals, division
 4 | 
 5 | import sys
 6 | import time
 7 | import argparse
 8 | 
 9 | from websocket import create_connection
10 | 
11 | 
12 | if __name__ == "__main__":
13 |     # handle command-line options
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument("-b", "--batch-size", type=int, default=1)
16 |     parser.add_argument("-p", "--port", type=int, default=8080)
17 |     args = parser.parse_args()
18 | 
19 |     # open connection
20 |     ws = create_connection("ws://localhost:{}/translate".format(args.port))
21 | 
22 |     count = 0
23 |     batch = ""
24 |     for line in sys.stdin:
25 |         count += 1
26 |         batch += line.decode('utf-8') if sys.version_info < (3, 0) else line
27 |         if count == args.batch_size:
28 |             # translate the batch
29 |             ws.send(batch)
30 |             result = ws.recv()
31 |             print(result.rstrip())
32 | 
33 |             count = 0
34 |             batch = ""
35 | 
36 |     if count:
37 |         # translate the remaining sentences
38 |         ws.send(batch)
39 |         result = ws.recv()
40 |         print(result.rstrip())
41 | 
42 |     # close connection
43 |     ws.close()
44 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/sinks/ostream_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include <spdlog/details/null_mutex.h>
 9 | #include <spdlog/sinks/base_sink.h>
10 | 
11 | #include <ostream>
12 | #include <mutex>
13 | 
14 | namespace spdlog
15 | {
16 | namespace sinks
17 | {
18 | template<class Mutex>
19 | class ostream_sink: public base_sink<Mutex>
20 | {
21 | public:
22 |     explicit ostream_sink(std::ostream& os, bool force_flush=false) :_ostream(os), _force_flush(force_flush) {}
23 |     ostream_sink(const ostream_sink&) = delete;
24 |     ostream_sink& operator=(const ostream_sink&) = delete;
25 |     virtual ~ostream_sink() = default;
26 | 
27 | protected:
28 |     void _sink_it(const details::log_msg& msg) override
29 |     {
30 |         _ostream.write(msg.formatted.data(), msg.formatted.size());
31 |         if (_force_flush)
32 |             _ostream.flush();
33 |     }
34 | 
35 |     void flush() override
36 |     {
37 |         _ostream.flush();
38 |     }
39 | 
40 |     std::ostream& _ostream;
41 |     bool _force_flush;
42 | };
43 | 
44 | typedef ostream_sink<std::mutex> ostream_sink_mt;
45 | typedef ostream_sink<details::null_mutex> ostream_sink_st;
46 | }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/collectionstack.h:
--------------------------------------------------------------------------------
 1 | #ifndef COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include <stack>
11 | #include <cassert>
12 | 
13 | namespace YAML {
14 | struct CollectionType {
15 |   enum value { NoCollection, BlockMap, BlockSeq, FlowMap, FlowSeq, CompactMap };
16 | };
17 | 
18 | class CollectionStack {
19 |  public:
20 |   CollectionType::value GetCurCollectionType() const {
21 |     if (collectionStack.empty())
22 |       return CollectionType::NoCollection;
23 |     return collectionStack.top();
24 |   }
25 | 
26 |   void PushCollectionType(CollectionType::value type) {
27 |     collectionStack.push(type);
28 |   }
29 |   void PopCollectionType(CollectionType::value type) {
30 |     assert(type == GetCurCollectionType()); type;
31 |     collectionStack.pop();
32 |   }
33 | 
34 |  private:
35 |   std::stack<CollectionType::value> collectionStack;
36 | };
37 | }
38 | 
39 | #endif  // COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
40 | 


--------------------------------------------------------------------------------
/src/functional/tensor.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "functional/array.h"
 4 | #include "functional/shape.h"
 5 | #include "tensors/tensor.h"
 6 | 
 7 | namespace marian {
 8 | namespace functional {
 9 | 
10 | template <typename T>
11 | struct Tensor {
12 |   T* data_;
13 |   functional::Shape shape_;
14 | 
15 |   __HD__ Tensor() {}
16 | 
17 |   __HD__ Tensor(T* ptr, const functional::Shape& shape)
18 |       : data_(ptr), shape_(shape) {}
19 | 
20 |   __H__ Tensor(marian::Tensor t) : data_(t->data()), shape_(t->shape()) {}
21 | 
22 |   __HDI__ float& operator[](size_t i) { return data_[i]; }
23 |   __HDI__ const float& operator[](size_t i) const { return data_[i]; }
24 | 
25 |   __HDI__ float& operator[](
26 |       const functional::Array<int, functional::Shape::size()>& indices) {
27 |     return data_[shape_.index(indices)];
28 |   }
29 | 
30 |   __HDI__ const float& operator[](
31 |       const functional::Array<int, functional::Shape::size()>& indices) const {
32 |     return data_[shape_.index(indices)];
33 |   }
34 | 
35 |   __HDI__ T* data() { return data_; }
36 |   __HDI__ const T* data() const { return data_; }
37 | 
38 |   __HDI__ Shape& shape() { return shape_; }
39 |   __HDI__ const Shape& shape() const { return shape_; }
40 | };
41 | }  // namespace functional
42 | }  // namespace marian


--------------------------------------------------------------------------------
/CL_tools/stat_mod.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding=utf-8 -*-
 2 | import numpy as np
 3 | import codecs
 4 | import sys
 5 | 
 6 | print "python .py vocab.txt iter*.npz"
 7 | 
 8 | out = codecs.open('output', 'w', encoding='utf-8')
 9 | 
10 | vocab = codecs.open(sys.argv[1], 'r', encoding='utf-8', errors='ignore').readlines()
11 | vocab = [s.split(":")[0] for s in vocab]
12 | 
13 | both = []
14 | for i in range(2, len(sys.argv)):
15 |     name = sys.argv[i]
16 |     iter = name.split(".")[1][4:]
17 |     both.append((name, iter))
18 | both.sort(key=lambda x: int(x[1]))
19 | print both
20 | models_names = [x[0] for x in both]
21 | models_iters = [x[1] for x in both]
22 | # loading emb
23 | vocab_stats = [[] for _ in range(len(vocab))]
24 | for name in models_names:
25 |     model = np.load(name)
26 |     # encoder_Wemb
27 |     print "=" * 100
28 |     Wemb = model['encoder_Wemb']
29 |     print Wemb, "===>", Wemb.shape, "===>", name
30 |     vocab_id = 0
31 |     for word in Wemb:
32 |         t = [x * x for x in word]
33 |         mod = np.sum(t) ** 0.5
34 |         vocab_stats[vocab_id].append(mod)
35 |         vocab_id += 1
36 |     # print vocab_stats
37 | 
38 | # output stats
39 | out.write("ID:" + ",".join(models_iters) + '\n')
40 | for id, v_data in enumerate(vocab_stats):
41 |     out.write("%s:%s\n" % (vocab[id], str(v_data)))
42 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/include/spdlog/fmt/bundled/ostream.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Formatting library for C++ - std::ostream support
 3 | 
 4 |  Copyright (c) 2012 - 2016, Victor Zverovich
 5 |  All rights reserved.
 6 | 
 7 |  For the license information refer to format.h.
 8 |  */
 9 | 
10 | #include "ostream.h"
11 | 
12 | namespace fmt {
13 | 
14 | namespace {
15 | // Write the content of w to os.
16 | void write(std::ostream &os, Writer &w) {
17 |   const char *data = w.data();
18 |   typedef internal::MakeUnsigned<std::streamsize>::Type UnsignedStreamSize;
19 |   UnsignedStreamSize size = w.size();
20 |   UnsignedStreamSize max_size =
21 |       internal::to_unsigned((std::numeric_limits<std::streamsize>::max)());
22 |   do {
23 |     UnsignedStreamSize n = size <= max_size ? size : max_size;
24 |     os.write(data, static_cast<std::streamsize>(n));
25 |     data += n;
26 |     size -= n;
27 |   } while (size != 0);
28 | }
29 | }
30 | 
31 | FMT_FUNC void print(std::ostream &os, CStringRef format_str, ArgList args) {
32 |   MemoryWriter w;
33 |   w.write(format_str, args);
34 |   write(os, w);
35 | }
36 | 
37 | FMT_FUNC int fprintf(std::ostream &os, CStringRef format, ArgList args) {
38 |   MemoryWriter w;
39 |   printf(w, format, args);
40 |   write(os, w);
41 |   return static_cast<int>(w.size());
42 | }
43 | }  // namespace fmt
44 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/include/spdlog/sinks/ostream_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include "spdlog/details/null_mutex.h"
 9 | #include "spdlog/sinks/base_sink.h"
10 | 
11 | #include <ostream>
12 | #include <mutex>
13 | 
14 | namespace spdlog
15 | {
16 | namespace sinks
17 | {
18 | template<class Mutex>
19 | class ostream_sink: public base_sink<Mutex>
20 | {
21 | public:
22 |     explicit ostream_sink(std::ostream& os, bool force_flush=false) :_ostream(os), _force_flush(force_flush) {}
23 |     ostream_sink(const ostream_sink&) = delete;
24 |     ostream_sink& operator=(const ostream_sink&) = delete;
25 |     virtual ~ostream_sink() = default;
26 | 
27 | protected:
28 |     void _sink_it(const details::log_msg& msg) override
29 |     {
30 |         _ostream.write(msg.formatted.data(), msg.formatted.size());
31 |         if (_force_flush)
32 |             _ostream.flush();
33 |     }
34 | 
35 |     void _flush() override
36 |     {
37 |         _ostream.flush();
38 |     }
39 | 
40 |     std::ostream& _ostream;
41 |     bool _force_flush;
42 | };
43 | 
44 | typedef ostream_sink<std::mutex> ostream_sink_mt;
45 | typedef ostream_sink<details::null_mutex> ostream_sink_st;
46 | }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/training/graph_group_async_drop.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "training/graph_group_async.h"
 4 | 
 5 | #include "training/gradient_dropping/dropper.h"
 6 | #include "training/gradient_dropping/sparse_tensor.h"
 7 | 
 8 | namespace marian {
 9 | 
10 | class AsyncGraphGroupDrop : public AsyncGraphGroup {
11 |   std::vector<int> fetchStep_;
12 |   std::vector<int> pushStep_;
13 |   std::vector<bool> fetch_ready;
14 | 
15 |   bool drop_first = 1;
16 | 
17 |   size_t dropping_warmup;
18 |   float droping_rate;
19 |   float dropping_momentum;
20 | 
21 |   std::vector<std::vector<GradientDrop>> droppers_;
22 | 
23 |   std::vector<std::vector<SparseTensor>> sparseGrads_, sparseShards_;
24 | 
25 | protected:
26 |   void init(Ptr<data::Batch> batch) override;
27 |   void pushGradients(Tensor newGrads, int device_id) override;
28 |   void fetchParams(Tensor oldParams,
29 |                    const std::vector<Tensor>& params,
30 |                    int device_id) override;
31 | 
32 | public:
33 |   AsyncGraphGroupDrop(Ptr<Options> options)
34 |       : AsyncGraphGroup(options),
35 |         dropping_warmup{options->get<size_t>("grad-dropping-warmup")},
36 |         droping_rate{options->get<float>("grad-dropping-rate")},
37 |         dropping_momentum{options->get<float>("grad-dropping-momentum")} {}
38 | };
39 | }  // namespace marian
40 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/tests/utils.cpp:
--------------------------------------------------------------------------------
 1 | #include "includes.h"
 2 | 
 3 | 
 4 | void prepare_logdir()
 5 | {
 6 |     spdlog::drop_all();
 7 | #ifdef _WIN32
 8 |     system("if not exist logs mkdir logs");
 9 |     system("del /F /Q logs\\*");
10 | #else
11 |     auto rv = system("mkdir -p logs");
12 |     rv = system("rm -f logs/*");
13 |     (void)rv;
14 | #endif
15 | }
16 | 
17 | 
18 | std::string file_contents(const std::string& filename)
19 | {
20 |     std::ifstream ifs(filename);
21 |     if (!ifs)
22 |         throw std::runtime_error("Failed open file ");
23 |     return std::string((std::istreambuf_iterator<char>(ifs)),
24 |                        (std::istreambuf_iterator<char>()));
25 | 
26 | }
27 | 
28 | std::size_t count_lines(const std::string& filename)
29 | {
30 |     std::ifstream ifs(filename);
31 |     if (!ifs)
32 |         throw std::runtime_error("Failed open file ");
33 | 
34 |     std::string line;
35 |     size_t counter = 0;
36 |     while(std::getline(ifs, line))
37 |         counter++;
38 |     return counter;
39 | }
40 | 
41 | std::size_t get_filesize(const std::string& filename)
42 | {
43 |     std::ifstream ifs(filename, std::ifstream::ate | std::ifstream::binary);
44 |     if (!ifs)
45 |         throw std::runtime_error("Failed open file ");
46 | 
47 |     return static_cast<std::size_t>(ifs.tellg());
48 | }
49 | 


--------------------------------------------------------------------------------
/src/layers/convolution.cpp:
--------------------------------------------------------------------------------
 1 | #include "layers/convolution.h"
 2 | #include "graph/node_operators_binary.h"
 3 | 
 4 | namespace marian {
 5 | 
 6 | #ifdef CUDNN
 7 | Convolution::Convolution(Ptr<ExpressionGraph> graph) : Factory(graph) {}
 8 | 
 9 | Expr Convolution::apply(Expr x) {
10 |   auto prefix = opt<std::string>("prefix");
11 |   auto kernelDims = opt<std::pair<int, int>>("kernel-dims");
12 |   auto kernelNum = opt<int>("kernel-num");
13 |   auto paddings = opt<std::pair<int, int>>("paddings", std::make_pair(0, 0));
14 |   auto strides = opt<std::pair<int, int>>("strides", std::make_pair(1, 1));
15 | 
16 |   int layerIn = x->shape()[1];
17 |   auto kernel
18 |       = graph_->param(prefix + "_conv_kernels",
19 |                       {layerIn, kernelNum, kernelDims.first, kernelDims.second},
20 |                       inits::glorot_uniform);
21 | 
22 |   auto bias = graph_->param(
23 |       prefix + "_conv_bias", {1, kernelNum, 1, 1}, inits::zeros);
24 | 
25 |   std::vector<Expr> nodes = {x, kernel, bias};
26 |   return Expression<ConvolutionOp>(
27 |       nodes, paddings.first, paddings.second, strides.first, strides.second);
28 | }
29 | 
30 | Expr Convolution::apply(const std::vector<Expr>&) {
31 |   ABORT("Can't apply convolution on many inputs at once");
32 |   return nullptr;
33 | }
34 | #endif
35 | 
36 | }  // namespace marian
37 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/bench/easylogging-bench-mt.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #include <thread>
 7 | #include <vector>
 8 | #include <atomic>
 9 | 
10 | #define _ELPP_THREAD_SAFE
11 | #include "easylogging++.h"
12 | _INITIALIZE_EASYLOGGINGPP
13 | 
14 | using namespace std;
15 | 
16 | int main(int argc, char* argv[])
17 | {
18 | 
19 |     int thread_count = 10;
20 |     if(argc > 1)
21 |         thread_count = atoi(argv[1]);
22 | 
23 |     int howmany = 1000000;
24 | 
25 |     // Load configuration from file
26 |     el::Configurations conf("easyl.conf");
27 |     el::Loggers::reconfigureLogger("default", conf);
28 | 
29 |     std::atomic<int > msg_counter {0};
30 |     vector<thread> threads;
31 | 
32 |     for (int t = 0; t < thread_count; ++t)
33 |     {
34 |         threads.push_back(std::thread([&]()
35 |         {
36 |             while (true)
37 |             {
38 |                 int counter = ++msg_counter;
39 |                 if (counter > howmany) break;
40 |                 LOG(INFO) << "easylog message #" << counter << ": This is some text for your pleasure";
41 |             }
42 |         }));
43 |     }
44 | 
45 | 
46 |     for(auto &t:threads)
47 |     {
48 |         t.join();
49 |     };
50 | 
51 |     return 0;
52 | }
53 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/src/char_model_trainer.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2016 Google Inc.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.!
14 | 
15 | #ifndef CHAR_MODEL_TRAINER_H_
16 | #define CHAR_MODEL_TRAINER_H_
17 | 
18 | #include "sentencepiece_model.pb.h"
19 | #include "trainer_interface.h"
20 | 
21 | namespace sentencepiece {
22 | namespace character {
23 | 
24 | // Trainer class for character model.
25 | class Trainer : public TrainerInterface {
26 |  public:
27 |   Trainer(const TrainerSpec &trainer_spec,
28 |           const NormalizerSpec &normalizer_spec)
29 |       : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec) {}
30 | 
31 |   util::Status Train() override;
32 | };
33 | }  // namespace character
34 | }  // namespace sentencepiece
35 | #endif  // CHAR_MODEL_TRAINER_H_
36 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/src/word_model.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2016 Google Inc.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.!
14 | 
15 | #include "word_model.h"
16 | #include "util.h"
17 | 
18 | namespace sentencepiece {
19 | namespace word {
20 | 
21 | Model::Model(const ModelProto &model_proto) {
22 |   model_proto_ = &model_proto;
23 |   InitializePieces();
24 | }
25 | 
26 | Model::~Model() {}
27 | 
28 | EncodeResult Model::Encode(absl::string_view normalized) const {
29 |   if (!status().ok() || normalized.empty()) {
30 |     return {};
31 |   }
32 | 
33 |   EncodeResult output;
34 |   for (const auto &w : SplitIntoWords(normalized)) {
35 |     output.emplace_back(w, PieceToId(w));
36 |   }
37 | 
38 |   return output;
39 | }
40 | 
41 | }  // namespace word
42 | }  // namespace sentencepiece
43 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/include/spdlog/formatter.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include "spdlog/details/log_msg.h"
 9 | 
10 | #include <vector>
11 | #include <string>
12 | #include <memory>
13 | 
14 | namespace spdlog
15 | {
16 | namespace details
17 | {
18 | class flag_formatter;
19 | }
20 | 
21 | class formatter
22 | {
23 | public:
24 |     virtual ~formatter() {}
25 |     virtual void format(details::log_msg& msg) = 0;
26 | };
27 | 
28 | class pattern_formatter SPDLOG_FINAL : public formatter
29 | {
30 | 
31 | public:
32 |     explicit pattern_formatter(const std::string& pattern, pattern_time_type pattern_time = pattern_time_type::local);
33 |     pattern_formatter(const pattern_formatter&) = delete;
34 |     pattern_formatter& operator=(const pattern_formatter&) = delete;
35 |     void format(details::log_msg& msg) override;
36 | private:
37 |     const std::string _pattern;
38 |     const pattern_time_type _pattern_time;
39 |     std::vector<std::unique_ptr<details::flag_formatter>> _formatters;
40 |     std::tm get_time(details::log_msg& msg);
41 |     void handle_flag(char flag);
42 |     void compile_pattern(const std::string& pattern);
43 | };
44 | }
45 | 
46 | #include "spdlog/details/pattern_formatter_impl.h"
47 | 
48 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/makefiles/formatting.mk:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # See LICENSE.txt for license information
 5 | #
 6 | 
 7 | # Prerequisite: $(FILESTOFORMAT) contains the list of files of interest for formatting
 8 | # As this file defines a new target (format), it should be included at least after the definition of the
 9 | # default target.
10 | 
11 | ASTYLE_FORMAT_OPTS=-Qv --style=java --indent-after-parens --indent-modifiers --indent-switches --indent-continuation=2 --keep-one-line-blocks --keep-one-line-statements --indent=spaces=2 --lineend=linux --suffix=none
12 | ASTYLEDIR := $(BUILDDIR)/contrib
13 | ASTYLETAR := $(ASTYLEDIR)/astyle.tar.gz
14 | ASTYLEBIN := $(ASTYLEDIR)/astyle/build/gcc/bin/astyle
15 | ASTYLEBLD := $(ASTYLEDIR)/astyle/build/gcc/
16 | ASTYLEVER := 3.1
17 | ASTYLEURL := "https://versaweb.dl.sourceforge.net/project/astyle/astyle/astyle%20$(ASTYLEVER)/astyle_$(ASTYLEVER)_linux.tar.gz"
18 | 
19 | $(ASTYLEDIR) :
20 | 	@mkdir -p $(ASTYLEDIR)
21 | 
22 | $(ASTYLETAR) : $(ASTYLEDIR)
23 | 	@wget -q -O $(ASTYLETAR) $(ASTYLEURL)
24 | 
25 | $(ASTYLEBLD) : $(ASTYLETAR)
26 | 	@cd $(ASTYLEDIR) && tar xzf $(ASTYLETAR)
27 | 
28 | $(ASTYLEBIN) : $(ASTYLEBLD)
29 | 	${MAKE} -C $(ASTYLEBLD)
30 | 
31 | .PHONY : format
32 | format : $(ASTYLEBIN)
33 | 	@$(ASTYLEBIN) $(ASTYLE_FORMAT_OPTS) $(FILESTOFORMAT)
34 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/include/spdlog/sinks/base_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | //
 8 | // base sink templated over a mutex (either dummy or real)
 9 | // concrete implementation should only override the _sink_it method.
10 | // all locking is taken care of here so no locking needed by the implementers..
11 | //
12 | 
13 | #include "spdlog/sinks/sink.h"
14 | #include "spdlog/formatter.h"
15 | #include "spdlog/common.h"
16 | #include "spdlog/details/log_msg.h"
17 | 
18 | #include <mutex>
19 | 
20 | namespace spdlog
21 | {
22 | namespace sinks
23 | {
24 | template<class Mutex>
25 | class base_sink:public sink
26 | {
27 | public:
28 |     base_sink():_mutex() {}
29 |     virtual ~base_sink() = default;
30 | 
31 |     base_sink(const base_sink&) = delete;
32 |     base_sink& operator=(const base_sink&) = delete;
33 | 
34 |     void log(const details::log_msg& msg) SPDLOG_FINAL override
35 |     {
36 |         std::lock_guard<Mutex> lock(_mutex);
37 |         _sink_it(msg);
38 |     }
39 |     void flush() SPDLOG_FINAL override
40 |     {
41 |         _flush();
42 |     }
43 | 
44 | protected:
45 |     virtual void _sink_it(const details::log_msg& msg) = 0;
46 |     virtual void _flush() = 0;
47 |     Mutex _mutex;
48 | };
49 | }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/command/marian_conv.cpp:
--------------------------------------------------------------------------------
 1 | #include "marian.h"
 2 | 
 3 | #include "common/cli_wrapper.h"
 4 | 
 5 | #include <sstream>
 6 | 
 7 | int main(int argc, char** argv) {
 8 |   using namespace marian;
 9 | 
10 |   createLoggers();
11 | 
12 |   auto options = New<Options>();
13 |   {
14 |     auto cli = New<cli::CLIWrapper>(
15 |         options,
16 |         "Convert a model in the .npz format to a mmap-able binary model",
17 |         "Allowed options",
18 |         "Examples:\n"
19 |         "  ./marian-conv -f model.npz -t model.bin");
20 |     cli->add<std::string>("--from,-f", "Input model", "model.npz");
21 |     cli->add<std::string>("--to,-t", "Output model", "model.bin");
22 |     cli->parse(argc, argv);
23 |   }
24 |   auto modelFrom = options->get<std::string>("from");
25 |   auto modelTo = options->get<std::string>("to");
26 | 
27 |   LOG(info, "Outputting {}", modelTo);
28 | 
29 |   YAML::Node config;
30 |   std::stringstream configStr;
31 |   marian::io::getYamlFromModel(config, "special:model.yml", modelFrom);
32 |   configStr << config;
33 | 
34 |   auto graph = New<ExpressionGraph>(true, false);
35 |   graph->setDevice(CPU0);
36 | 
37 |   graph->load(modelFrom);
38 |   graph->forward();
39 |   graph->save(modelTo, configStr.str());
40 | 
41 |   // graph->saveBinary(vm["bin"].as<std::string>());
42 | 
43 |   LOG(info, "Finished");
44 | 
45 |   return 0;
46 | }
47 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/bench/spdlog-bench-mt.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #include <thread>
 7 | #include <vector>
 8 | #include <atomic>
 9 | #include <cstdlib>
10 | #include "spdlog/spdlog.h"
11 | 
12 | 
13 | using namespace std;
14 | 
15 | int main(int argc, char* argv[])
16 | {
17 | 
18 |     int thread_count = 10;
19 |     if(argc > 1)
20 |         thread_count = std::atoi(argv[1]);
21 | 
22 |     int howmany = 1000000;
23 | 
24 |     namespace spd = spdlog;
25 | 
26 |     auto logger = spdlog::create<spd::sinks::simple_file_sink_mt>("file_logger", "logs/spd-bench-mt.txt", false);
27 | 
28 |     logger->set_pattern("[%Y-%b-%d %T.%e]: %v");
29 | 
30 |     std::atomic<int > msg_counter {0};
31 |     std::vector<thread> threads;
32 | 
33 |     for (int t = 0; t < thread_count; ++t)
34 |     {
35 |         threads.push_back(std::thread([&]()
36 |         {
37 |             while (true)
38 |             {
39 |                 int counter = ++msg_counter;
40 |                 if (counter > howmany) break;
41 |                 logger->info("spdlog message #{}: This is some text for your pleasure", counter);
42 |             }
43 |         }));
44 |     }
45 | 
46 | 
47 |     for(auto &t:threads)
48 |     {
49 |         t.join();
50 |     };
51 | 
52 | 
53 | 
54 |     return 0;
55 | }
56 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/tag.cpp:
--------------------------------------------------------------------------------
 1 | #include <cassert>
 2 | #include <stdexcept>
 3 | 
 4 | #include "directives.h"  // IWYU pragma: keep
 5 | #include "tag.h"
 6 | #include "token.h"
 7 | 
 8 | namespace YAML {
 9 | Tag::Tag(const Token& token) : type(static_cast<TYPE>(token.data)) {
10 |   switch (type) {
11 |     case VERBATIM:
12 |       value = token.value;
13 |       break;
14 |     case PRIMARY_HANDLE:
15 |       value = token.value;
16 |       break;
17 |     case SECONDARY_HANDLE:
18 |       value = token.value;
19 |       break;
20 |     case NAMED_HANDLE:
21 |       handle = token.value;
22 |       value = token.params[0];
23 |       break;
24 |     case NON_SPECIFIC:
25 |       break;
26 |     default:
27 |       assert(false);
28 |   }
29 | }
30 | 
31 | const std::string Tag::Translate(const Directives& directives) {
32 |   switch (type) {
33 |     case VERBATIM:
34 |       return value;
35 |     case PRIMARY_HANDLE:
36 |       return directives.TranslateTagHandle("!") + value;
37 |     case SECONDARY_HANDLE:
38 |       return directives.TranslateTagHandle("!!") + value;
39 |     case NAMED_HANDLE:
40 |       return directives.TranslateTagHandle("!" + handle + "!") + value;
41 |     case NON_SPECIFIC:
42 |       // TODO:
43 |       return "!";
44 |     default:
45 |       assert(false);
46 |   }
47 |   throw std::runtime_error("yaml-cpp: internal error, bad tag type");
48 | }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/3rd_party/CLI/Macros.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Distributed under the 3-Clause BSD License.  See accompanying
 4 | // file LICENSE or https://github.com/CLIUtils/CLI11 for details.
 5 | 
 6 | // [CLI11:verbatim]
 7 | 
 8 | // The following version macro is very similar to the one in PyBind11
 9 | #if !(defined(_MSC_VER) && __cplusplus == 199711L) && !defined(__INTEL_COMPILER)
10 | #if __cplusplus >= 201402L
11 | #define CLI11_CPP14
12 | #if __cplusplus >= 201703L
13 | #define CLI11_CPP17
14 | #if __cplusplus > 201703L
15 | #define CLI11_CPP20
16 | #endif
17 | #endif
18 | #endif
19 | #elif defined(_MSC_VER) && __cplusplus == 199711L
20 | // MSVC sets _MSVC_LANG rather than __cplusplus (supposedly until the standard is fully implemented)
21 | // Unless you use the /Zc:__cplusplus flag on Visual Studio 2017 15.7 Preview 3 or newer
22 | #if _MSVC_LANG >= 201402L
23 | #define CLI11_CPP14
24 | #if _MSVC_LANG > 201402L && _MSC_VER >= 1910
25 | #define CLI11_CPP17
26 | #if __MSVC_LANG > 201703L && _MSC_VER >= 1910
27 | #define CLI11_CPP20
28 | #endif
29 | #endif
30 | #endif
31 | #endif
32 | 
33 | #if defined(CLI11_CPP14)
34 | #define CLI11_DEPRECATED(reason) [[deprecated(reason)]]
35 | #elif defined(_MSC_VER)
36 | #define CLI11_DEPRECATED(reason) __declspec(deprecated(reason))
37 | #else
38 | #define CLI11_DEPRECATED(reason) __attribute__((deprecated(reason)))
39 | #endif
40 | 
41 | // [CLI11:verbatim]
42 | 


--------------------------------------------------------------------------------
/src/3rd_party/yaml-cpp/ptr_vector.h:
--------------------------------------------------------------------------------
 1 | #ifndef PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 2 | #define PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
 3 | 
 4 | #if defined(_MSC_VER) ||                                            \
 5 |     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 6 |      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
 7 | #pragma once
 8 | #endif
 9 | 
10 | #include <cstddef>
11 | #include <cstdlib>
12 | #include <memory>
13 | #include <vector>
14 | 
15 | #include "yaml-cpp/noncopyable.h"
16 | 
17 | namespace YAML {
18 | 
19 | // TODO: This class is no longer needed
20 | template <typename T>
21 | class ptr_vector : private YAML::noncopyable {
22 |  public:
23 |   ptr_vector() {}
24 | 
25 |   void clear() { m_data.clear(); }
26 | 
27 |   std::size_t size() const { return m_data.size(); }
28 |   bool empty() const { return m_data.empty(); }
29 | 
30 |   void push_back(std::unique_ptr<T>&& t) { m_data.push_back(std::move(t)); }
31 |   T& operator[](std::size_t i) { return *m_data[i]; }
32 |   const T& operator[](std::size_t i) const { return *m_data[i]; }
33 | 
34 |   T& back() { return *(m_data.back().get()); }
35 | 
36 |   const T& back() const { return *(m_data.back().get()); }
37 | 
38 |  private:
39 |   std::vector<std::unique_ptr<T>> m_data;
40 | };
41 | }
42 | 
43 | #endif  // PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
44 | 


--------------------------------------------------------------------------------
/src/3rd_party/pathie-cpp/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright © 2015, 2017 Marvin Gülker
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright
 8 |    notice, this list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright
11 |    notice, this list of conditions and the following disclaimer in the
12 |    documentation and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 | “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
18 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
20 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/src/unicode_script.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2016 Google Inc.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.!
14 | 
15 | #include "unicode_script.h"
16 | #include <unordered_map>
17 | #include "unicode_script_map.h"
18 | #include "util.h"
19 | 
20 | namespace sentencepiece {
21 | namespace unicode_script {
22 | namespace {
23 | class GetScriptInternal {
24 |  public:
25 |   GetScriptInternal() { InitTable(&smap_); }
26 | 
27 |   ScriptType GetScript(char32 c) const {
28 |     return port::FindWithDefault(smap_, c, ScriptType::U_Common);
29 |   }
30 | 
31 |  private:
32 |   std::unordered_map<char32, ScriptType> smap_;
33 | };
34 | }  // namespace
35 | 
36 | ScriptType GetScript(char32 c) {
37 |   static GetScriptInternal sc;
38 |   return sc.GetScript(c);
39 | }
40 | }  // namespace unicode_script
41 | }  // namespace sentencepiece
42 | 


--------------------------------------------------------------------------------
/src/common/utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <string>
 4 | #include <vector>
 5 | 
 6 | namespace marian {
 7 | namespace utils {
 8 | 
 9 | void trim(std::string& s);
10 | void trimLeft(std::string& s);
11 | void trimRight(std::string& s);
12 | 
13 | void split(const std::string& line,
14 |            std::vector<std::string>& pieces,
15 |            const std::string del = " ",
16 |            bool keepEmpty = false);
17 | 
18 | std::vector<std::string> split(const std::string& line,
19 |                                const std::string del = " ",
20 |                                bool keepEmpty = false);
21 | 
22 | void splitAny(const std::string& line,
23 |               std::vector<std::string>& pieces,
24 |               const std::string del = " ",
25 |               bool keepEmpty = false);
26 | 
27 | std::vector<std::string> splitAny(const std::string& line,
28 |                                   const std::string del = " ",
29 |                                   bool keepEmpty = false);
30 | 
31 | std::string join(const std::vector<std::string>& words,
32 |                  const std::string& del = " ");
33 | 
34 | std::string exec(const std::string& cmd);
35 | 
36 | std::pair<std::string, int> hostnameAndProcessId();
37 | 
38 | std::string withCommas(size_t n);
39 | bool endsWith(const std::string& text, const std::string& suffix);
40 | 
41 | }  // namespace utils
42 | }  // namespace marian
43 | 


--------------------------------------------------------------------------------
/src/3rd_party/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | include_directories(.)
 3 | 
 4 | add_subdirectory(./yaml-cpp)
 5 | add_subdirectory(./SQLiteCpp)
 6 | add_subdirectory(./pathie-cpp)
 7 | add_subdirectory(./zlib)
 8 | 
 9 | if(USE_SENTENCEPIECE)
10 |   if(USE_STATIC_LIBS)
11 |     set(_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
12 |     if(WIN32)
13 |       list(INSERT CMAKE_FIND_LIBRARY_SUFFIXES 0 .lib .a)
14 |     else()
15 |       set(CMAKE_FIND_LIBRARY_SUFFIXES .a)
16 |     endif()
17 |   endif()
18 | 
19 |   set(SPM_ENABLE_SHARED OFF CACHE BOOL "Builds shared libaries in addition to static libraries." FORCE)
20 |   set(SPM_ENABLE_TCMALLOC ON CACHE BOOL "Enable TCMalloc if available." FORCE)
21 |   set(SPM_TCMALLOC_STATIC ON CACHE BOOL "Link static library of TCMALLOC." FORCE)
22 | 
23 |   add_subdirectory(./sentencepiece)
24 |   include_directories(./sentencepiece)
25 | 
26 |   set_target_properties(spm_encode spm_decode spm_train spm_normalize spm_export_vocab
27 |                         PROPERTIES
28 |                         RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
29 | 
30 |   if(USE_STATIC_LIBS)
31 |     set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
32 |   endif()
33 | endif(USE_SENTENCEPIECE)
34 | 
35 | include_directories(./SQLiteCpp/include)
36 | include_directories(./CLI)
37 | include_directories(./pathie-cpp/include)
38 | 
39 | include_directories(./zlib)
40 | 
41 | 


--------------------------------------------------------------------------------
/src/3rd_party/ExceptionWithCallStack.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright (c) Microsoft. All rights reserved.
 3 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
 4 | //
 5 | // ExceptionWithCallStack.h - debug util functions
 6 | //
 7 | 
 8 | #include <string>
 9 | 
10 | namespace Microsoft { namespace MSR { namespace CNTK {
11 | 
12 | // base class that we can catch, independent of the type parameter
13 | struct /*interface*/ IExceptionWithCallStackBase
14 | {
15 |     virtual const char * CallStack() const = 0;
16 |     virtual ~IExceptionWithCallStackBase() noexcept = default;
17 | };
18 | 
19 | // Exception wrapper to include native call stack string
20 | template <class E>
21 | class ExceptionWithCallStack : public E, public IExceptionWithCallStackBase
22 | {
23 | public:
24 |     ExceptionWithCallStack(const std::string& msg, const std::string& callstack) :
25 |         E(msg), m_callStack(callstack)
26 |     { }
27 | 
28 |     virtual const char * CallStack() const override { return m_callStack.c_str(); }
29 | 
30 | protected:
31 |     std::string m_callStack;
32 | };
33 | 
34 | // some older code uses this namespace
35 | namespace DebugUtil
36 | {
37 |     void PrintCallStack(size_t skipLevels = 0, bool makeFunctionNamesStandOut = false);
38 | 
39 |     std::string GetCallStack(size_t skipLevels = 0, bool makeFunctionNamesStandOut = false);
40 | };
41 | 
42 | }}}
43 | 


--------------------------------------------------------------------------------
/src/3rd_party/spdlog/tests/tests.sln:
--------------------------------------------------------------------------------
 1 | 
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 2015
 4 | VisualStudioVersion = 14.0
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tests", "tests.vcxproj", "{59A07559-5F38-4DD6-A7FA-DB4153690B42}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|Win32 = Debug|Win32
11 | 		Debug|x64 = Debug|x64
12 | 		Release|Win32 = Release|Win32
13 | 		Release|x64 = Release|x64
14 | 	EndGlobalSection
15 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | 		{59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|Win32.ActiveCfg = Debug|Win32
17 | 		{59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|Win32.Build.0 = Debug|Win32
18 | 		{59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|x64.ActiveCfg = Debug|x64
19 | 		{59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|x64.Build.0 = Debug|x64
20 | 		{59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|Win32.ActiveCfg = Release|Win32
21 | 		{59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|Win32.Build.0 = Release|Win32
22 | 		{59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|x64.ActiveCfg = Release|x64
23 | 		{59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|x64.Build.0 = Release|x64
24 | 	EndGlobalSection
25 | 	GlobalSection(SolutionProperties) = preSolution
26 | 		HideSolutionNode = FALSE
27 | 	EndGlobalSection
28 | EndGlobal
29 | 


--------------------------------------------------------------------------------
/src/3rd_party/nccl/pkg/debian/control.in:
--------------------------------------------------------------------------------
 1 | Source: nccl
 2 | Section: libs
 3 | Maintainer: cudatools <cudatools@nvidia.com>
 4 | Priority: optional
 5 | Build-depends: debhelper(>=9)
 6 | Standards-Version: 3.9.5
 7 | 
 8 | Package: libnccl${nccl:Major}
 9 | Section: libs
10 | Architecture: ${pkg:Arch}
11 | Depends: ${misc:Depends}, ${shlibs:Depends}
12 | Description: NVIDIA Collectives Communication Library (NCCL) Runtime
13 |  NCCL (pronounced "Nickel") is a stand-alone library of standard collective
14 |  communication routines for GPUs, implementing all-reduce, all-gather, reduce,
15 |  broadcast, and reduce-scatter.
16 |  It has been optimized to achieve high bandwidth on any platform using PCIe,
17 |  NVLink, NVswitch, as well as networking using InfiniBand Verbs or TCP/IP
18 |  sockets.
19 | 
20 | Package: libnccl-dev
21 | Section: libdevel
22 | Architecture: ${pkg:Arch}
23 | Depends: ${misc:Depends}, ${shlibs:Depends}, libnccl${nccl:Major} (= ${binary:Version})
24 | Description: NVIDIA Collectives Communication Library (NCCL) Development Files
25 |  NCCL (pronounced "Nickel") is a stand-alone library of standard collective
26 |  communication routines for GPUs, implementing all-reduce, all-gather, reduce,
27 |  broadcast, and reduce-scatter.
28 |  It has been optimized to achieve high bandwidth on any platform using PCIe,
29 |  NVLink, NVswitch, as well as networking using InfiniBand Verbs or TCP/IP
30 |  sockets.
31 | 


--------------------------------------------------------------------------------
/src/3rd_party/sentencepiece/src/bpe_model.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2016 Google Inc.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.!
14 | 
15 | #ifndef BPE_MODEL_H_
16 | #define BPE_MODEL_H_
17 | 
18 | #include "model_interface.h"
19 | #include "sentencepiece_model.pb.h"
20 | 
21 | namespace sentencepiece {
22 | namespace bpe {
23 | 
24 | // Segmentation model with BPE (Byte Pair Encoding)
25 | // Details:
26 | // Neural Machine Translation of Rare Words with Subword Units
27 | // https://arxiv.org/abs/1508.07909
28 | //
29 | // https://en.wikipedia.org/wiki/Byte_pair_encoding
30 | class Model : public ModelInterface {
31 |  public:
32 |   explicit Model(const ModelProto &model_proto);
33 |   ~Model() override;
34 | 
35 |   EncodeResult Encode(absl::string_view normalized) const override;
36 | };
37 | }  // namespace bpe
38 | }  // namespace sentencepiece
39 | #endif  // BPE_MODEL_H_
40 | 


--------------------------------------------------------------------------------