├── VERSION ├── src ├── common │ ├── timer.cpp │ ├── regex.h │ ├── version.h │ ├── blas_initializer.h │ ├── build_info.h │ ├── version.cpp │ ├── build_info.cpp.in │ ├── file_utils.h │ ├── blas_initializer.cpp │ ├── project_version.h.in │ ├── filesystem.cpp │ ├── binary.h │ ├── file_utils.cpp │ ├── config_validator.h │ ├── cli_helper.cpp │ └── hash.h ├── python │ ├── pymarian │ │ ├── pypdl │ │ │ └── __init__.py │ │ └── __main__.py │ ├── tests │ │ └── regression │ │ │ └── __init__.py │ ├── binding │ │ └── embedder.hpp │ └── build.sh ├── 3rd_party │ ├── spdlog │ │ ├── tests │ │ │ ├── main.cpp │ │ │ ├── utils.h │ │ │ ├── includes.h │ │ │ ├── install_libcxx.sh │ │ │ ├── CMakeLists.txt │ │ │ ├── utils.cpp │ │ │ ├── tests.sln │ │ │ └── format.cpp │ │ ├── bench │ │ │ ├── logs │ │ │ │ └── .gitignore │ │ │ ├── latency │ │ │ │ ├── compare.sh │ │ │ │ ├── utils.h │ │ │ │ └── g3log-crush.cpp │ │ │ ├── easyl.conf │ │ │ ├── glog-bench.cpp │ │ │ ├── easylogging-bench.cpp │ │ │ ├── spdlog-bench.cpp │ │ │ ├── utils.h │ │ │ ├── glog-bench-mt.cpp │ │ │ ├── easylogging-bench-mt.cpp │ │ │ ├── spdlog-bench-mt.cpp │ │ │ └── boost-bench.cpp │ │ ├── example │ │ │ ├── jni │ │ │ │ ├── Application.mk │ │ │ │ └── Android.mk │ │ │ ├── utils.h │ │ │ ├── Makefile.mingw │ │ │ ├── Makefile.clang │ │ │ └── example.sln │ │ ├── astyle.sh │ │ ├── cmake │ │ │ └── spdlog.pc.in │ │ ├── INSTALL │ │ ├── include │ │ │ └── spdlog │ │ │ │ ├── fmt │ │ │ │ ├── ostr.h │ │ │ │ ├── fmt.h │ │ │ │ └── bundled │ │ │ │ │ └── ostream.cc │ │ │ │ ├── sinks │ │ │ │ ├── null_sink.h │ │ │ │ ├── msvc_sink.h │ │ │ │ ├── sink.h │ │ │ │ ├── ostream_sink.h │ │ │ │ └── base_sink.h │ │ │ │ ├── details │ │ │ │ ├── null_mutex.h │ │ │ │ └── log_msg.h │ │ │ │ └── formatter.h │ │ ├── sinks │ │ │ ├── sink.h │ │ │ ├── null_sink.h │ │ │ ├── msvc_sink.h │ │ │ ├── base_sink.h │ │ │ └── ostream_sink.h │ │ ├── details │ │ │ └── null_mutex.h │ │ ├── .gitignore │ │ ├── formatter.h │ │ └── LICENSE │ ├── zlib │ │ ├── zlib.3.pdf │ │ ├── CMakeLists.txt │ │ ├── inffast.h │ │ └── gzclose.c │ ├── faiss │ │ ├── README │ │ ├── CMakeLists.txt │ │ ├── LICENSE │ │ └── Index.h │ ├── yaml-cpp │ │ ├── CMakeLists.txt │ │ ├── null.cpp │ │ ├── yaml-node.cpp │ │ ├── contrib │ │ │ ├── graphbuilder.cpp │ │ │ └── anchordict.h │ │ ├── directives.cpp │ │ ├── anchor.h │ │ ├── emitterstyle.h │ │ ├── node │ │ │ ├── type.h │ │ │ ├── detail │ │ │ │ ├── iterator_fwd.h │ │ │ │ ├── bool_type.h │ │ │ │ └── memory.h │ │ │ ├── ptr.h │ │ │ ├── emit.h │ │ │ └── iterator.h │ │ ├── emitterdef.h │ │ ├── memory.cpp │ │ ├── emit.cpp │ │ ├── scantag.h │ │ ├── exceptions.cpp │ │ ├── noncopyable.h │ │ ├── directives.h │ │ ├── tag.h │ │ ├── yaml.h │ │ ├── null.h │ │ ├── mark.h │ │ ├── LICENSE │ │ ├── regex_yaml.cpp │ │ ├── indentation.h │ │ ├── collectionstack.h │ │ ├── tag.cpp │ │ ├── ptr_vector.h │ │ ├── stringsource.h │ │ ├── eventhandler.h │ │ ├── ostream_wrapper.cpp │ │ └── dll.h │ ├── pathie-cpp │ │ ├── CMakeLists.txt │ │ └── LICENSE │ ├── CLI │ │ ├── Version.hpp │ │ ├── CLI.hpp │ │ └── Macros.hpp │ ├── SQLiteCpp │ │ ├── sqlite3 │ │ │ └── README.md │ │ ├── LICENSE.txt │ │ └── src │ │ │ └── Transaction.cpp │ ├── onnx │ │ └── protobuf │ │ │ ├── onnx-ml.pb-wrapper.h │ │ │ └── onnx-ml.pb-wrapper.cpp │ ├── phf │ │ └── LICENSE │ ├── cnpy │ │ └── LICENSE │ ├── mio │ │ └── LICENSE │ ├── zstr │ │ └── LICENSE │ ├── ExceptionWithCallStack.h │ └── half_float │ │ └── Readme.md ├── tests │ ├── units │ │ ├── run_tests.cpp │ │ ├── CMakeLists.txt │ │ └── utils_tests.cpp │ ├── README.md │ ├── dropout.cpp │ ├── CMakeLists.txt │ └── logger.cpp ├── examples │ ├── mnist │ │ ├── .gitignore │ │ ├── download.sh │ │ └── mnist_ffnn.cpp │ ├── README.md │ └── CMakeLists.txt ├── translator │ ├── history.cpp │ ├── helpers.h │ ├── nth_element.h │ ├── output_printer.cpp │ └── helpers.cpp ├── tensors │ ├── gpu │ │ ├── element.h │ │ ├── cusparse_include.h │ │ ├── gpu_info.cpp │ │ ├── add.h │ │ ├── algorithm.h │ │ └── device.cu │ ├── backend.cpp │ ├── cpu │ │ ├── device.cpp │ │ ├── aligned.h │ │ └── mjdgemm │ │ │ └── mjdgemm.h │ ├── rand.h │ └── memory_piece.h ├── layers │ ├── generic.cpp │ ├── weight.h │ ├── convolution.cpp │ └── weight.cpp ├── microsoft │ ├── shortlist │ │ ├── utils │ │ │ └── PrintTypes.h │ │ └── logging │ │ │ └── LoggerMacros.h │ └── sentencepiece.h ├── command │ ├── marian_embedder.cpp │ ├── marian_evaluator.cpp │ ├── marian_scorer.cpp │ ├── marian_decoder.cpp │ └── marian_vocab.cpp ├── models │ ├── transformer_stub.cpp │ ├── costs.cpp │ ├── model_task.h │ ├── encoder.h │ └── classifier.h ├── marian.h ├── functional │ ├── defs.h │ ├── functional.h │ └── array.h ├── layers_new │ └── neuralnet.cpp ├── data │ ├── rng_engine.h │ ├── batch.h │ └── iterator_facade.h ├── optimizers │ ├── clippers.cpp │ ├── exponential_smoothing.h │ ├── clippers.h │ └── exponential_smoothing.cpp ├── rnn │ └── attention_constructors.h ├── training │ └── graph_group_singleton.h └── onnx │ └── expression_graph_onnx_exporter.h ├── vs ├── .gitignore ├── BuildRelease.bat └── Marian.sln ├── doc ├── contributing.rst ├── .gitignore ├── images │ ├── example2.png │ ├── formula1.png │ ├── example1_dot.png │ ├── example1_dot2.png │ ├── example1_dot3.png │ ├── gelu_formula.png │ └── graph_example1.jpg ├── _static │ └── css │ │ └── custom.css ├── requirements.txt ├── make.bat └── Makefile ├── scripts ├── ci │ ├── README.md │ └── install_mkl.sh ├── shortlist │ ├── .gitignore │ ├── README.md │ └── install.sh ├── metrics │ ├── .gitignore │ ├── known-models.txt │ ├── requirements.txt │ ├── run.sh │ └── README.md ├── mbr │ └── generic │ │ ├── metrics │ │ ├── bleu.sh │ │ ├── chrf.sh │ │ └── bleurt.sh │ │ └── explode_collapse.pl ├── contrib │ ├── fix_hard.py │ └── inject_ctt.py ├── bert │ └── contrib │ │ └── chpt2pt.py └── server │ └── client_example.py ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── feature_request.md │ └── bug_report.md ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── ios.yml │ └── documentation.yml ├── .dockerignore ├── contrib ├── other-builds │ ├── cmake_doze.txt │ └── eclipse │ │ └── .project ├── triton-aml │ ├── marian_backend │ │ ├── src │ │ │ ├── libtriton_marian.ldscript │ │ │ └── marian.h │ │ ├── cmake │ │ │ └── TritonMarianBackendConfig.cmake.in │ │ └── README.md │ └── build.sh ├── autoformat.sh └── vim │ └── .vimrc ├── cmake ├── Tarball.cmake ├── FindNCCL.cmake └── FindTcmalloc.cmake ├── LICENSE.md ├── .gitmodules └── .gitignore /VERSION: -------------------------------------------------------------------------------- 1 | v1.12.43 2 | -------------------------------------------------------------------------------- /src/common/timer.cpp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vs/.gitignore: -------------------------------------------------------------------------------- 1 | build-vs 2 | deps 3 | -------------------------------------------------------------------------------- /doc/contributing.rst: -------------------------------------------------------------------------------- 1 | .. mdinclude:: ../CONTRIBUTING.md 2 | -------------------------------------------------------------------------------- /scripts/ci/README.md: -------------------------------------------------------------------------------- 1 | Scripts for continuous integration. 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # git should never touch line endings 2 | * -text 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | -------------------------------------------------------------------------------- /scripts/shortlist/.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | fast_align 3 | extract-lex 4 | -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | api 2 | build 3 | doxygen 4 | venv 5 | CONTRIBUTING.md 6 | -------------------------------------------------------------------------------- /src/python/pymarian/pypdl/__init__.py: -------------------------------------------------------------------------------- 1 | from .main import Downloader 2 | -------------------------------------------------------------------------------- /scripts/metrics/.gitignore: -------------------------------------------------------------------------------- 1 | /bins 2 | tmp.* 3 | /workspace 4 | /marian-metric -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/main.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_MAIN 2 | #include "catch.hpp" -------------------------------------------------------------------------------- /src/common/regex.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | namespace regex = std; 5 | -------------------------------------------------------------------------------- /src/tests/units/run_tests.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_MAIN 2 | #include "catch.hpp" 3 | -------------------------------------------------------------------------------- /doc/images/example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marian-nmt/marian-dev/HEAD/doc/images/example2.png -------------------------------------------------------------------------------- /doc/images/formula1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marian-nmt/marian-dev/HEAD/doc/images/formula1.png -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | /regression-tests 2 | /build* 3 | /.pytest_cache 4 | /.vscode 5 | /dist 6 | /doc 7 | .history* 8 | -------------------------------------------------------------------------------- /doc/images/example1_dot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marian-nmt/marian-dev/HEAD/doc/images/example1_dot.png -------------------------------------------------------------------------------- /doc/images/example1_dot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marian-nmt/marian-dev/HEAD/doc/images/example1_dot2.png -------------------------------------------------------------------------------- /doc/images/example1_dot3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marian-nmt/marian-dev/HEAD/doc/images/example1_dot3.png -------------------------------------------------------------------------------- /doc/images/gelu_formula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marian-nmt/marian-dev/HEAD/doc/images/gelu_formula.png -------------------------------------------------------------------------------- /doc/images/graph_example1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marian-nmt/marian-dev/HEAD/doc/images/graph_example1.jpg -------------------------------------------------------------------------------- /src/3rd_party/zlib/zlib.3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marian-nmt/marian-dev/HEAD/src/3rd_party/zlib/zlib.3.pdf -------------------------------------------------------------------------------- /contrib/other-builds/cmake_doze.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marian-nmt/marian-dev/HEAD/contrib/other-builds/cmake_doze.txt -------------------------------------------------------------------------------- /src/3rd_party/faiss/README: -------------------------------------------------------------------------------- 1 | This is code extracted from the original FAISS repository: https://github.com/facebookresearch/faiss -------------------------------------------------------------------------------- /contrib/triton-aml/marian_backend/src/libtriton_marian.ldscript: -------------------------------------------------------------------------------- 1 | { 2 | global: 3 | TRITONBACKEND_*; 4 | local: *; 5 | }; 6 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/logs/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /src/common/version.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace marian { 6 | std::string buildVersion(); 7 | } 8 | -------------------------------------------------------------------------------- /src/examples/mnist/.gitignore: -------------------------------------------------------------------------------- 1 | t10k-images-idx3-ubyte 2 | t10k-labels-idx1-ubyte 3 | train-images-idx3-ubyte 4 | train-labels-idx1-ubyte 5 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/jni/Application.mk: -------------------------------------------------------------------------------- 1 | # Exceptions are used in spdlog. Link to an exception-ready C++ runtime. 2 | APP_STL = gnustl_static 3 | -------------------------------------------------------------------------------- /doc/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | .wy-body-for-nav > .wy-grid-for-nav > .wy-nav-side { 2 | border-bottom: 5px solid #28bbee; 3 | /*background-color: #494d55;*/ 4 | } 5 | -------------------------------------------------------------------------------- /scripts/mbr/generic/metrics/bleu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | parallel --cat -k -j32 --block 10M "sacrebleu <(cut -f 1 {}) < <(cut -f 2 {}) -b -w 4 -sl --format text --metrics bleu" 4 | -------------------------------------------------------------------------------- /scripts/mbr/generic/metrics/chrf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | parallel --cat -k -j32 --block 10M "sacrebleu <(cut -f 1 {}) < <(cut -f 2 {}) -b -w 4 -sl --format text --metrics chrf" 4 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/astyle.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | find . -name "*\.h" -o -name "*\.cpp"|xargs dos2unix 3 | find . -name "*\.h" -o -name "*\.cpp"|xargs astyle -n -c -A1 4 | 5 | 6 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | lxml>=4.9.1 2 | docutils<=0.17 3 | sphinx==2.4.4 4 | breathe==4.13.0 5 | exhale 6 | sphinx_rtd_theme 7 | myst-parser==0.14.0a3 8 | sphinx-mathjax-offline 9 | Jinja2<3.1 10 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | # Maintain dependencies for Git Submodules 5 | - package-ecosystem: "gitsubmodule" 6 | directory: "/" 7 | schedule: 8 | interval: "daily" 9 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/cmake/spdlog.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | includedir=${prefix}/include 3 | 4 | Name: @PROJECT_NAME@ 5 | Description: Super fast C++ logging library. 6 | Version: @PROJECT_VERSION@ 7 | -------------------------------------------------------------------------------- /src/python/pymarian/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import _pymarian 3 | 4 | 5 | def main(): 6 | code = _pymarian.main(sys.argv[1:]) 7 | sys.exit(code) 8 | 9 | if __name__ == '__main__': 10 | main() 11 | -------------------------------------------------------------------------------- /src/translator/history.cpp: -------------------------------------------------------------------------------- 1 | #include "history.h" 2 | 3 | namespace marian { 4 | 5 | History::History(size_t lineNo, float alpha, float wp) 6 | : lineNo_(lineNo), alpha_(alpha), wp_(wp) {} 7 | } // namespace marian 8 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(..) 2 | include_directories(.) 3 | 4 | FILE(GLOB YamlCppSources *.cpp contrib/*.cpp) 5 | if (NOT TARGET libyaml-cpp) 6 | add_library(libyaml-cpp OBJECT ${YamlCppSources}) 7 | endif() 8 | -------------------------------------------------------------------------------- /src/3rd_party/pathie-cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(..) 2 | include_directories(.) 3 | include_directories(include) 4 | 5 | FILE(GLOB PathieCppSources src/*.cpp) 6 | if (NOT TARGET pathie-cpp) 7 | add_library(pathie-cpp OBJECT ${PathieCppSources}) 8 | endif() 9 | -------------------------------------------------------------------------------- /src/common/blas_initializer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace marian { 4 | 5 | class BLASInitializer { 6 | public: 7 | BLASInitializer(); 8 | }; 9 | 10 | // Declare a global instance 11 | extern BLASInitializer blasInitializer; 12 | 13 | } // namespace marian 14 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/null.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/null.h" 2 | 3 | namespace YAML { 4 | _Null Null; 5 | 6 | bool IsNullString(const std::string& str) { 7 | return str.empty() || str == "~" || str == "null" || str == "Null" || 8 | str == "NULL"; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/tensors/gpu/element.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tensors/tensor.h" 4 | 5 | namespace marian { 6 | namespace gpu { 7 | 8 | template 9 | void Element(Functor functor, Tensor out, Tensors... tensors); 10 | } 11 | } // namespace marian 12 | -------------------------------------------------------------------------------- /src/3rd_party/faiss/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # adding a new file require explicittly modifing the CMakeLists.txt 2 | 3 | add_definitions(-DFINTEGER=uint64_t) 4 | 5 | include_directories("impl") 6 | FILE(GLOB FaissCppSources *.cpp impl/*.cpp utils/*.cpp) 7 | add_library(faiss OBJECT ${FaissCppSources}) 8 | -------------------------------------------------------------------------------- /src/layers/generic.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | 3 | #include "data/factored_vocab.h" 4 | #include "layers/constructors.h" 5 | #include "layers/generic.h" 6 | #include "layers/loss.h" 7 | #include "models/states.h" // for EncoderState 8 | 9 | namespace marian {} // namespace marian 10 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/yaml-node.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/node/node.h" 2 | #include "nodebuilder.h" 3 | #include "nodeevents.h" 4 | 5 | namespace YAML { 6 | Node Clone(const Node& node) { 7 | NodeEvents events(node); 8 | NodeBuilder builder; 9 | events.Emit(builder); 10 | return builder.Root(); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/examples/mnist/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ `ls -1 *-ubyte 2>/dev/null | wc -l ` == 4 ]; then 4 | echo Files exist: `ls -1 *-ubyte`; 5 | exit; 6 | fi 7 | 8 | wget https://romang.blob.core.windows.net/mariandev/regression-tests/data/exdb_mnist.tar.gz 9 | tar zxvf exdb_mnist.tar.gz 10 | mv exdb_mnist/*-ubyte . 11 | -------------------------------------------------------------------------------- /scripts/metrics/known-models.txt: -------------------------------------------------------------------------------- 1 | BLEURT-20 2 | wmt20-comet-qe-da 3 | wmt20-comet-qe-da-v2 4 | wmt20-comet-da 5 | wmt21-comet-qe-mqm 6 | wmt21-comet-qe-da 7 | wmt21-comet-da 8 | Unbabel/wmt22-comet-da 9 | Unbabel/wmt22-cometkiwi-da 10 | Unbabel/wmt23-cometkiwi-da-xl 11 | Unbabel/wmt23-cometkiwi-da-xxl 12 | Unbabel/XCOMET-XL 13 | Unbabel/XCOMET-XXL -------------------------------------------------------------------------------- /src/microsoft/shortlist/utils/PrintTypes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #ifdef QUICKSAND_WINDOWS_BUILD 6 | #define PI32 "d" 7 | #define PI64 "lld" 8 | #define PU32 "u" 9 | #define PU64 "llu" 10 | #else 11 | #define PI32 PRId32 12 | #define PI64 PRId64 13 | #define PU32 PRIu32 14 | #define PU64 PRIu64 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /src/tensors/gpu/cusparse_include.h: -------------------------------------------------------------------------------- 1 | // header file to include cusparse.h while ignoring deprecated warnings locally 2 | 3 | #ifdef __GNUC__ 4 | #pragma GCC diagnostic push 5 | #pragma GCC diagnostic ignored "-Wdeprecated-declarations" 6 | #endif 7 | 8 | #include 9 | 10 | #ifdef __GNUC__ 11 | #pragma GCC diagnostic pop 12 | #endif -------------------------------------------------------------------------------- /src/common/build_info.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace marian { 6 | 7 | // Returns list of non-advanced cache variables used by CMake 8 | std::string cmakeBuildOptions(); 9 | 10 | // Returns list of advanced cache variables used by CMake 11 | std::string cmakeBuildOptionsAdvanced(); 12 | 13 | } // namespace marian 14 | -------------------------------------------------------------------------------- /src/examples/README.md: -------------------------------------------------------------------------------- 1 | Marian examples 2 | --------------- 3 | 4 | Examples are enabled with CMake option `-DCOMPILE_EXAMPLES=ON`. 5 | 6 | ## MNIST 7 | 8 | You will need MNIST data for training and testing. Download them with the 9 | script `src/examples/mnist/download.sh` or provide paths to the files with 10 | `--train-sets` and `--valid-sets` options. 11 | -------------------------------------------------------------------------------- /contrib/triton-aml/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo Building Triton Marian backend ... 3 | 4 | docker build -t triton-marian-build . 5 | 6 | echo Copying artifacts ... 7 | 8 | docker container create --name extract triton-marian-build 9 | docker container cp extract:/opt/tritonserver/marian_backend/build/libtriton_marian.so . 10 | docker container rm -f extract 11 | -------------------------------------------------------------------------------- /scripts/shortlist/README.md: -------------------------------------------------------------------------------- 1 | `install.sh` is a helper script that downloads and compiles fastalign and extract-lex, and copies 2 | required binaries into _./bin_. 3 | 4 | Shortlist files (_lex.s2t_ and _lex.t2s_) can be created using `generate_shortlists.pl`, for 5 | example: 6 | 7 | perl generate_shortlists.pl --bindir ./bin -s corpus.bpe.src -t corpus.bpe.tgt 8 | 9 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/latency/compare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "running spdlog and g3log tests 10 time with ${1:-10} threads each (total 1,000,000 entries).." 3 | rm -f *.log 4 | for i in {1..10} 5 | 6 | do 7 | echo 8 | sleep 0.5 9 | ./spdlog-latency ${1:-10} 2>/dev/null || exit 10 | sleep 0.5 11 | ./g3log-latency ${1:-10} 2>/dev/null || exit 12 | 13 | done 14 | -------------------------------------------------------------------------------- /src/command/marian_embedder.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | 3 | #include "models/model_task.h" 4 | #include "embedder/embedder.h" 5 | #include "common/timer.h" 6 | 7 | int main(int argc, char** argv) { 8 | using namespace marian; 9 | 10 | auto options = parseOptions(argc, argv, cli::mode::embedding); 11 | New>(options)->run(); 12 | 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /src/3rd_party/CLI/Version.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Distributed under the 3-Clause BSD License. See accompanying 4 | // file LICENSE or https://github.com/CLIUtils/CLI11 for details. 5 | 6 | // [CLI11:verbatim] 7 | 8 | #define CLI11_VERSION_MAJOR 1 9 | #define CLI11_VERSION_MINOR 6 10 | #define CLI11_VERSION_PATCH 1 11 | #define CLI11_VERSION "1.6.1" 12 | 13 | // [CLI11:verbatim] 14 | -------------------------------------------------------------------------------- /src/common/version.cpp: -------------------------------------------------------------------------------- 1 | #include "common/version.h" 2 | #include "common/git_revision.h" // make-generated file, contains git commit info 3 | #include "common/project_version.h" // cmake-generated file, major/minor/tweak versions 4 | 5 | namespace marian { 6 | 7 | std::string buildVersion() { 8 | return std::string(PROJECT_VERSION) + " " + GIT_REVISION; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | std::size_t count_lines(const std::string& filename); 7 | 8 | void prepare_logdir(); 9 | 10 | std::string file_contents(const std::string& filename); 11 | 12 | std::size_t count_lines(const std::string& filename); 13 | 14 | std::size_t get_filesize(const std::string& filename); 15 | 16 | -------------------------------------------------------------------------------- /src/translator/helpers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "graph/expression_graph.h" 4 | 5 | namespace marian { 6 | 7 | namespace cpu { 8 | 9 | void suppressWords(Expr logProbs, Expr wordIndices); 10 | } 11 | 12 | namespace gpu { 13 | 14 | void suppressWords(Expr logProbs, Expr wordIndices); 15 | } 16 | 17 | void suppressWords(Expr logProbs, Expr wordIndices); 18 | } // namespace marian 19 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/INSTALL: -------------------------------------------------------------------------------- 1 | spdlog is header only library. 2 | Just copy the files to your build tree and use a C++11 compiler 3 | 4 | Tested on: 5 | gcc 4.8.1 and above 6 | clang 3.5 7 | Visual Studio 2013 8 | 9 | gcc 4.8 flags: --std==c++11 -pthread -O3 -flto -Wl,--no-as-needed 10 | gcc 4.9 flags: --std=c++11 -pthread -O3 -flto 11 | 12 | 13 | see the makefile in the example folder 14 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/includes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "catch.hpp" 11 | #include "utils.h" 12 | 13 | #include "../include/spdlog/spdlog.h" 14 | #include "../include/spdlog/sinks/null_sink.h" 15 | #include "../include/spdlog/sinks/ostream_sink.h" 16 | 17 | -------------------------------------------------------------------------------- /src/tensors/gpu/gpu_info.cpp: -------------------------------------------------------------------------------- 1 | #include "common/definitions.h" 2 | 3 | #if CUDA_FOUND 4 | #include "tensors/gpu/cuda_helpers.h" 5 | #endif 6 | 7 | namespace marian { 8 | namespace gpu { 9 | size_t availableDevices() { 10 | #if CUDA_FOUND 11 | int deviceCount; 12 | CUDA_CHECK(cudaGetDeviceCount(&deviceCount)); 13 | return (size_t)deviceCount; 14 | #else 15 | return 0; 16 | #endif 17 | } 18 | } 19 | } -------------------------------------------------------------------------------- /scripts/metrics/requirements.txt: -------------------------------------------------------------------------------- 1 | # assume it downloads the correct pytorch 2 | unbabel-comet==2.2.1 3 | sacrebleu 4 | # this is the original bleurt; used for comparing scores 5 | git+https://github.com/google-research/bleurt.git 6 | # this is the pytorch version of bleurt; used in blert2marian 7 | git+https://github.com/lucadiliello/bleurt-pytorch.git 8 | huggingface_hub[cli] # required for login to hf to authenticate private models -------------------------------------------------------------------------------- /src/3rd_party/zlib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # list of sources files of the library 2 | file(GLOB ZLIB_SRC *.c) 3 | file(GLOB ZLIB_INC *.h) 4 | 5 | # add sources of the wrapper as a "zlib" static library 6 | add_library(zlib OBJECT ${ZLIB_SRC} ${ZLIB_INC}) 7 | 8 | if(MSVC) 9 | target_compile_options(zlib PUBLIC /wd4996 /wd4267) 10 | else() 11 | target_compile_options(zlib PUBLIC -Wno-implicit-function-declaration) 12 | endif() 13 | -------------------------------------------------------------------------------- /src/command/marian_evaluator.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | 3 | #include "models/model_task.h" 4 | #include "evaluator/evaluator.h" 5 | #include "common/timer.h" 6 | 7 | int main(int argc, char** argv) { 8 | using namespace marian; 9 | 10 | // @TODO: add mode evaluating 11 | auto options = parseOptions(argc, argv, cli::mode::evaluating); 12 | New>(options)->run(); 13 | 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /src/common/build_info.cpp.in: -------------------------------------------------------------------------------- 1 | #include "common/build_info.h" 2 | 3 | /* 4 | * File build_info.cpp is generated using CMake. Do NOT modify it manually! Edit 5 | * build_info.cpp.in file instead. 6 | */ 7 | 8 | std::string marian::cmakeBuildOptions() { 9 | return "" 10 | @PROJECT_CMAKE_CACHE@ 11 | ; 12 | } 13 | 14 | std::string marian::cmakeBuildOptionsAdvanced() { 15 | return "" 16 | @PROJECT_CMAKE_CACHE_ADVANCED@ 17 | ; 18 | } 19 | -------------------------------------------------------------------------------- /contrib/autoformat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | if ! [ -x "$( command -v clang-format )" ] 4 | then 5 | mkdir -p $HOME/.local 6 | wget -O- http://releases.llvm.org/6.0.0/clang+llvm-6.0.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz | tar --xz -xf - -C $HOME/.local --strip 1 7 | fi 8 | 9 | find ./src \( -path ./src/3rd_party -o -path ./src/tests -o -path ./src/models/experimental \) -prune -o -iname *.h -o -iname *.cpp -o -iname *.cu | xargs clang-format -i 10 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/fmt/ostr.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | // include external or bundled copy of fmtlib's ostream support 9 | // 10 | #if !defined(SPDLOG_FMT_EXTERNAL) 11 | #include "spdlog/fmt/fmt.h" 12 | #include "spdlog/fmt/bundled/ostream.h" 13 | #else 14 | #include 15 | #endif 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/common/file_utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "common/file_stream.h" 7 | 8 | namespace marian { 9 | namespace fileutils { 10 | 11 | void cut(const std::string& tsvIn, 12 | Ptr tsvOut, 13 | const std::vector& fields, 14 | size_t numFields, 15 | const std::string& sep = "\t"); 16 | 17 | } // namespace utils 18 | } // namespace marian 19 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/easyl.conf: -------------------------------------------------------------------------------- 1 | * GLOBAL: 2 | FORMAT = "[%datetime]: %msg" 3 | FILENAME = ./logs/easylogging.log 4 | ENABLED = true 5 | TO_FILE = true 6 | TO_STANDARD_OUTPUT = false 7 | MILLISECONDS_WIDTH = 3 8 | PERFORMANCE_TRACKING = false 9 | MAX_LOG_FILE_SIZE = 10485760 10 | Log_Flush_Threshold = 10485760 11 | -------------------------------------------------------------------------------- /scripts/contrib/fix_hard.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | 4 | d = dict() 5 | m = np.load(sys.argv[1]) 6 | for k in m: 7 | if "ff_" == k[0:3]: 8 | d["decoder_" + k] = m[k] 9 | elif k == "special:model.yml": 10 | info = m[k].tobytes() 11 | info = info.replace("layers-dec", "dec-depth") 12 | info = info.replace("layers-enc", "enc-depth") 13 | d[k] = info 14 | print info 15 | else: 16 | d[k] = m[k] 17 | np.savez(sys.argv[1] + ".fixed", **d) -------------------------------------------------------------------------------- /src/command/marian_scorer.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | 3 | #include "models/model_task.h" 4 | #include "rescorer/rescorer.h" 5 | #include "common/timer.h" 6 | 7 | int main(int argc, char** argv) { 8 | using namespace marian; 9 | 10 | auto options = parseOptions(argc, argv, cli::mode::scoring); 11 | 12 | timer::Timer timer; 13 | New>(options)->run(); 14 | LOG(info, "Total time: {:.5f}s wall", timer.elapsed()); 15 | 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /src/models/transformer_stub.cpp: -------------------------------------------------------------------------------- 1 | #include "models/transformer.h" 2 | 3 | namespace marian { 4 | // factory functions 5 | Ptr NewEncoderTransformer(Ptr graph, Ptr options) 6 | { 7 | return New(graph, options); 8 | } 9 | 10 | Ptr NewDecoderTransformer(Ptr graph, Ptr options) 11 | { 12 | return New(graph, options); 13 | } 14 | } // namespace marian 15 | -------------------------------------------------------------------------------- /src/examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(iris_example iris/iris.cpp) 2 | add_executable(mnist_example mnist/mnist_ffnn.cpp) 3 | 4 | foreach(exec iris_example mnist_example) 5 | target_link_libraries(${exec} marian ${EXT_LIBS}) 6 | if(CUDA_FOUND) 7 | target_link_libraries(${exec} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS}) 8 | endif(CUDA_FOUND) 9 | set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") 10 | endforeach(exec) 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Feature description 11 | Please add a concise description of what the problem is and describe the solution you would like to see. 12 | Add links to a paper, another toolkit, etc. if relevant. 13 | 14 | ### Example 15 | Add a usage examples for the new feature, e.g. a command line. 16 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | namespace spdlog 12 | { 13 | namespace sinks 14 | { 15 | class sink 16 | { 17 | public: 18 | virtual ~sink() {} 19 | virtual void log(const details::log_msg& msg) = 0; 20 | virtual void flush() = 0; 21 | }; 22 | } 23 | } 24 | 25 | -------------------------------------------------------------------------------- /src/tests/README.md: -------------------------------------------------------------------------------- 1 | Marian tests 2 | ============ 3 | 4 | Unit tests and application tests are enabled with CMake option 5 | `-DCOMPILE_TESTS=ON`, e.g.: 6 | 7 | cd build 8 | cmake .. -DCOMPILE_TESTS=ON 9 | make -j8 10 | 11 | Running all unit tests: 12 | 13 | make test 14 | 15 | Running a single unit test is also possible: 16 | 17 | ./src/tests/run_graph_tests 18 | 19 | We use [Catch framework](https://github.com/philsquared/Catch) for unit 20 | testing. 21 | -------------------------------------------------------------------------------- /src/3rd_party/zlib/inffast.h: -------------------------------------------------------------------------------- 1 | /* inffast.h -- header to use inffast.c 2 | * Copyright (C) 1995-2003, 2010 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | /* WARNING: this file should *not* be used by applications. It is 7 | part of the implementation of the compression library and is 8 | subject to change. Applications should only use zlib.h. 9 | */ 10 | 11 | void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); 12 | -------------------------------------------------------------------------------- /src/tensors/gpu/add.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tensors/tensor.h" 4 | 5 | namespace marian { 6 | 7 | namespace gpu { 8 | 9 | template 10 | void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors); 11 | 12 | template 13 | void Aggregate(Functor functor, float initAgg, AggFunctor aggFunctor, float scale, marian::Tensor out, Tensors... tensors); 14 | } 15 | } // namespace marian 16 | -------------------------------------------------------------------------------- /src/common/blas_initializer.cpp: -------------------------------------------------------------------------------- 1 | #include "blas_initializer.h" 2 | #if MKL_FOUND 3 | #include 4 | #endif 5 | #if OPENBLAS_FOUND 6 | #include 7 | #endif 8 | 9 | namespace marian { 10 | 11 | BLASInitializer::BLASInitializer() { 12 | #if MKL_FOUND 13 | mkl_set_num_threads(1); 14 | #endif 15 | #if OPENBLAS_FOUND 16 | openblas_set_num_threads(1); 17 | #endif 18 | } 19 | 20 | // Define the global instance 21 | BLASInitializer blasInitializer; 22 | 23 | } // namespace marian 24 | -------------------------------------------------------------------------------- /src/tensors/backend.cpp: -------------------------------------------------------------------------------- 1 | #include "tensors/backend.h" 2 | 3 | #ifdef CUDA_FOUND 4 | #include "tensors/gpu/backend.h" 5 | #endif 6 | 7 | #include "tensors/cpu/backend.h" 8 | 9 | namespace marian { 10 | 11 | Ptr BackendByDeviceId(DeviceId deviceId, size_t seed) { 12 | #ifdef CUDA_FOUND 13 | if(deviceId.type == DeviceType::gpu) 14 | return New(deviceId, seed); 15 | else 16 | #endif 17 | return New(deviceId, seed); 18 | } 19 | } // namespace marian 20 | -------------------------------------------------------------------------------- /contrib/triton-aml/marian_backend/src/marian.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #ifdef _WIN32 9 | #define DLLEXPORT extern "C" __declspec(dllexport) 10 | #else 11 | #define DLLEXPORT extern "C" 12 | #endif 13 | 14 | DLLEXPORT void* init(char* path, int device_num); 15 | DLLEXPORT char* translate(void* marian, char* sent); 16 | DLLEXPORT void free_result(char* to_free); 17 | -------------------------------------------------------------------------------- /src/models/costs.cpp: -------------------------------------------------------------------------------- 1 | #include "costs.h" 2 | 3 | namespace marian { 4 | namespace models { 5 | 6 | Ptr LogSoftmaxStep::apply(Ptr state) { 7 | // decoder needs normalized probabilities (note: skipped if beam 1 and --skip-cost) 8 | state->setLogProbs(state->getLogProbs().applyUnaryFunction(logsoftmax)); 9 | // @TODO: This is becoming more and more opaque ^^. Can we simplify this? 10 | return state; 11 | } 12 | 13 | } // namespace models 14 | } // namespace marian 15 | -------------------------------------------------------------------------------- /src/models/model_task.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace marian { 6 | 7 | struct ModelTask { 8 | virtual ~ModelTask() {} 9 | virtual void run() = 0; 10 | }; 11 | 12 | struct ModelServiceTask { 13 | virtual ~ModelServiceTask() {} 14 | virtual std::string run(const std::string& /*input*/, const std::string& /*yaml*/) = 0; 15 | virtual std::vector run(const std::vector& /*input*/, const std::string& /*yaml*/) = 0; 16 | }; 17 | } // namespace marian 18 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/jni/Android.mk: -------------------------------------------------------------------------------- 1 | # Setup a project 2 | LOCAL_PATH := $(call my-dir) 3 | include $(CLEAR_VARS) 4 | 5 | LOCAL_MODULE := example 6 | LOCAL_SRC_FILES := example.cpp 7 | LOCAL_CPPFLAGS += -Wall -Wshadow -Wextra -pedantic -std=c++11 -fPIE -pie 8 | LOCAL_LDFLAGS += -fPIE -pie 9 | 10 | # Add exception support and set path for spdlog's headers 11 | LOCAL_CPPFLAGS += -fexceptions -I../include 12 | # Use android's log library 13 | LOCAL_LDFLAGS += -llog 14 | 15 | include $(BUILD_EXECUTABLE) 16 | -------------------------------------------------------------------------------- /src/marian.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // clang-format off 4 | #include "common/version.h" 5 | #include "common/config.h" 6 | #include "common/definitions.h" 7 | #include "common/logging.h" 8 | #include "common/options.h" 9 | #include "common/io.h" 10 | 11 | #include "data/batch_generator.h" 12 | #include "data/corpus.h" 13 | 14 | #include "graph/expression_graph.h" 15 | #include "graph/expression_operators.h" 16 | #include "graph/node_initializers.h" 17 | 18 | #include "optimizers/optimizers.h" 19 | // clang-format on 20 | -------------------------------------------------------------------------------- /src/python/tests/regression/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | QUIET = os.getenv('MARIAN_QUIET', "").lower() in ("1", "yes", "y", "true", "on") 4 | CPU_THREADS = int(os.getenv('MARIAN_CPU_THREADS', "4")) 5 | WORKSPACE_MEMORY = int(os.getenv('MARIAN_WORKSPACE_MEMORY', "6000")) 6 | 7 | EPSILON = 0.0001 # the precision error we afford in float comparison 8 | 9 | BASE_ARGS = dict( 10 | mini_batch=8, 11 | maxi_batch=64, 12 | cpu_threads=CPU_THREADS, 13 | workspace=WORKSPACE_MEMORY, 14 | quiet=QUIET, 15 | ) 16 | -------------------------------------------------------------------------------- /scripts/mbr/generic/metrics/bleurt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | gpus=${1:-8} 4 | scriptPath=$(dirname $0) 5 | root=$scriptPath/../../../../. 6 | marian=$root/build/marian 7 | bleurt=$root/scripts/bleurt 8 | devices=$(seq 0 $(($gpus-1))) 9 | 10 | # we reverse the input here since the scorer expects "hypref" but we output pseudo-references first 11 | perl -F'\t' -ane 'chomp(@F); print "$F[1]\t$F[0]\n"' \ 12 | | $marian evaluate -m $bleurt/bleurt-20.npz -v $bleurt/bleurt-vocab.{spm,spm} --like bleurt -d $devices --fp16 --quiet 13 | -------------------------------------------------------------------------------- /contrib/triton-aml/marian_backend/cmake/TritonMarianBackendConfig.cmake.in: -------------------------------------------------------------------------------- 1 | include(CMakeFindDependencyMacro) 2 | 3 | get_filename_component( 4 | TRITONMARIANBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH 5 | ) 6 | 7 | list(APPEND CMAKE_MODULE_PATH ${TRITONMARIANBACKEND_CMAKE_DIR}) 8 | 9 | if(NOT TARGET TritonMarianBackend::triton-marian-backend) 10 | include("${TRITONMARIANBACKEND_CMAKE_DIR}/TritonMarianBackendTargets.cmake") 11 | endif() 12 | 13 | set(TRITONMARIANBACKEND_LIBRARIES TritonMarianBackend::triton-marian-backend) 14 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/contrib/graphbuilder.cpp: -------------------------------------------------------------------------------- 1 | #include "graphbuilderadapter.h" 2 | 3 | #include "yaml-cpp/parser.h" // IWYU pragma: keep 4 | 5 | namespace YAML { 6 | class GraphBuilderInterface; 7 | 8 | void* BuildGraphOfNextDocument(Parser& parser, 9 | GraphBuilderInterface& graphBuilder) { 10 | GraphBuilderAdapter eventHandler(graphBuilder); 11 | if (parser.HandleNextDocument(eventHandler)) { 12 | return eventHandler.RootNode(); 13 | } else { 14 | return NULL; 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /scripts/ci/install_mkl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html 4 | wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB" | sudo apt-key add - 5 | sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list" 6 | sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list" 7 | sudo apt-get install --no-install-recommends intel-mkl-64bit-2020.0-088 8 | -------------------------------------------------------------------------------- /src/3rd_party/SQLiteCpp/sqlite3/README.md: -------------------------------------------------------------------------------- 1 | sqlite3 2 | ------- 3 | 4 | "sqlite3.c" and "sqlite3.h" files from sqlite-amalgamation-3120200.zip (SQLite 3.12.2 2016-04-18) 5 | 6 | Those files are provided for easy setup and compatibility under Windows/Linux/MacOS. 7 | They are used by default by the CMake build. 8 | 9 | Use -DSQLITECPP_INTERNAL_SQLITE=OFF to link against the Linux "libsqlite3-dev" package instead. 10 | 11 | ### License: 12 | 13 | All of the code and documentation in SQLite has been dedicated to the public domain by the authors. 14 | 15 | -------------------------------------------------------------------------------- /src/common/project_version.h.in: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* 4 | * File project_version.h is generated using CMake. Do NOT modify it manually! Edit 5 | * project_version.h.in file instead. 6 | */ 7 | 8 | // e.g. v1.2.3-beta+1.abc123d 9 | #define PROJECT_VERSION_FULL "@PROJECT_VERSION_STRING_FULL@" 10 | // e.g. v1.2.3-beta 11 | #define PROJECT_VERSION "@PROJECT_VERSION_STRING@" 12 | #define PROJECT_VERSION_MAJOR @PROJECT_VERSION_MAJOR@ 13 | #define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR@ 14 | #define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH@ 15 | -------------------------------------------------------------------------------- /src/command/marian_decoder.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | #include "translator/beam_search.h" 3 | #include "translator/translator.h" 4 | #include "common/timer.h" 5 | 6 | #ifdef _WIN32 7 | #include 8 | #endif 9 | 10 | int main(int argc, char** argv) { 11 | using namespace marian; 12 | auto options = parseOptions(argc, argv, cli::mode::translation); 13 | auto task = New>(options); 14 | 15 | timer::Timer timer; 16 | task->run(); 17 | LOG(info, "Total time: {:.5f}s wall", timer.elapsed()); 18 | 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/glog-bench.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include "glog/logging.h" 7 | 8 | 9 | int main(int, char* argv[]) 10 | { 11 | int howmany = 1000000; 12 | 13 | 14 | FLAGS_logtostderr = 0; 15 | FLAGS_log_dir = "logs"; 16 | google::InitGoogleLogging(argv[0]); 17 | for(int i = 0 ; i < howmany; ++i) 18 | LOG(INFO) << "glog message # " << i << ": This is some text for your pleasure"; 19 | 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /vs/BuildRelease.bat: -------------------------------------------------------------------------------- 1 | :: 2 | :: Usage: BuildRelease.bat [=.\build] 3 | :: 4 | :: This script runs the dependency checks, generate the projects/makefiles and then 5 | :: build the project in Release configuration. 6 | :: 7 | :: 8 | @echo off 9 | setlocal 10 | 11 | set ROOT=%~dp0 12 | set MARIAN_ROOT=%ROOT%.. 13 | 14 | set BUILD_ROOT=%1 15 | if "%BUILD_ROOT%"=="" set BUILD_ROOT=%ROOT%build 16 | 17 | call CreateVSProjects.bat %BUILD_ROOT% 18 | if errorlevel 1 exit /b 1 19 | 20 | cmake --build %BUILD_ROOT% --config Release 21 | 22 | exit /b 0 -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/directives.cpp: -------------------------------------------------------------------------------- 1 | #include "directives.h" 2 | 3 | namespace YAML { 4 | Directives::Directives() { 5 | // version 6 | version.isDefault = true; 7 | version.major = 1; 8 | version.minor = 2; 9 | } 10 | 11 | const std::string Directives::TranslateTagHandle( 12 | const std::string& handle) const { 13 | std::map::const_iterator it = tags.find(handle); 14 | if (it == tags.end()) { 15 | if (handle == "!!") 16 | return "tag:yaml.org,2002:"; 17 | return handle; 18 | } 19 | 20 | return it->second; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/anchor.h: -------------------------------------------------------------------------------- 1 | #ifndef ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | namespace YAML { 13 | typedef std::size_t anchor_t; 14 | const anchor_t NullAnchor = 0; 15 | } 16 | 17 | #endif // ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 18 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/emitterstyle.h: -------------------------------------------------------------------------------- 1 | #ifndef EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | namespace YAML { 11 | struct EmitterStyle { 12 | enum value { Default, Block, Flow }; 13 | }; 14 | } 15 | 16 | #endif // EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 17 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/type.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | namespace YAML { 11 | struct NodeType { 12 | enum value { Undefined, Null, Scalar, Sequence, Map }; 13 | }; 14 | } 15 | 16 | #endif // VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 17 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/install_libcxx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Install libc++ under travis 4 | 5 | svn --quiet co http://llvm.org/svn/llvm-project/libcxx/trunk libcxx 6 | mkdir libcxx/build 7 | (cd libcxx/build && cmake .. -DLIBCXX_CXX_ABI=libstdc++ -DLIBCXX_CXX_ABI_INCLUDE_PATHS="/usr/include/c++/4.6;/usr/include/c++/4.6/x86_64-linux-gnu") 8 | make -C libcxx/build cxx -j2 9 | sudo cp libcxx/build/lib/libc++.so.1.0 /usr/lib/ 10 | sudo cp -r libcxx/build/include/c++/v1 /usr/include/c++/v1/ 11 | sudo ln -sf /usr/lib/libc++.so.1.0 /usr/lib/libc++.so 12 | sudo ln -sf /usr/lib/libc++.so.1.0 /usr/lib/libc++.so.1 13 | -------------------------------------------------------------------------------- /src/functional/defs.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef __CUDACC__ // Compiling with NVCC, host or device code 4 | 5 | #include 6 | #define HOST __host__ 7 | #define DEVICE __device__ 8 | #define DEVICE_INLINE __device__ inline 9 | #define HOST_INLINE __host__ inline 10 | #define HOST_DEVICE __host__ __device__ 11 | #define HOST_DEVICE_INLINE __host__ __device__ inline 12 | 13 | #else // Compiling with GCC or other host compiler 14 | 15 | #define HOST 16 | #define DEVICE 17 | #define DEVICE_INLINE inline 18 | #define HOST_INLINE inline 19 | #define HOST_DEVICE 20 | #define HOST_DEVICE_INLINE inline 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Tests 3 | # 4 | 5 | enable_testing() 6 | 7 | find_package(Threads) 8 | 9 | # Build Catch unit tests 10 | add_library(catch INTERFACE) 11 | target_include_directories(catch INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) 12 | 13 | file(GLOB catch_tests LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp *.h *.hpp) 14 | 15 | add_executable(catch_tests ${catch_tests}) 16 | target_link_libraries(catch_tests spdlog ${CMAKE_THREAD_LIBS_INIT}) 17 | add_test(NAME catch_tests COMMAND catch_tests) 18 | file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/logs") 19 | 20 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/emitterdef.h: -------------------------------------------------------------------------------- 1 | #ifndef EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | namespace YAML { 11 | struct EmitterNodeType { 12 | enum value { NoType, Property, Scalar, FlowSeq, BlockSeq, FlowMap, BlockMap }; 13 | }; 14 | } 15 | 16 | #endif // EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66 17 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/easylogging-bench.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | 7 | #include "easylogging++.h" 8 | 9 | _INITIALIZE_EASYLOGGINGPP 10 | 11 | int main(int, char* []) 12 | { 13 | int howmany = 1000000; 14 | 15 | // Load configuration from file 16 | el::Configurations conf("easyl.conf"); 17 | el::Loggers::reconfigureLogger("default", conf); 18 | 19 | for(int i = 0 ; i < howmany; ++i) 20 | LOG(INFO) << "easylog message #" << i << ": This is some text for your pleasure"; 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /contrib/triton-aml/marian_backend/README.md: -------------------------------------------------------------------------------- 1 | Use cmake to build and install in a local directory. 2 | 3 | ``` 4 | $ mkdir build 5 | $ cd build 6 | $ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install .. 7 | $ make install 8 | ``` 9 | 10 | The following required Triton repositories will be pulled and used in 11 | the build. By default the "main" branch/tag will be used for each repo 12 | but the listed CMake argument can be used to override. 13 | 14 | * triton-inference-server/backend: -DTRITON_BACKEND_REPO_TAG=[tag] 15 | * triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag] 16 | * triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag] 17 | -------------------------------------------------------------------------------- /contrib/vim/.vimrc: -------------------------------------------------------------------------------- 1 | autocmd BufRead,BufNewFile *.cu set filetype=cpp 2 | augroup cpp 3 | au! 4 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set tabstop=2 5 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set shiftwidth=2 6 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set expandtab 7 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set softtabstop=2 "Insert 2 spaces when tab is pressed 8 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set smarttab "Indent instead of tab at start of line 9 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set shiftround "Round spaces to nearest shiftwidth multiple 10 | augroup end 11 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/fmt/fmt.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | // 9 | // Include a bundled header-only copy of fmtlib or an external one. 10 | // By default spdlog include its own copy. 11 | // 12 | 13 | #if !defined(SPDLOG_FMT_EXTERNAL) 14 | 15 | #ifndef FMT_HEADER_ONLY 16 | #define FMT_HEADER_ONLY 17 | #endif 18 | #ifndef FMT_USE_WINDOWS_H 19 | #define FMT_USE_WINDOWS_H 0 20 | #endif 21 | #include "spdlog/fmt/bundled/format.h" 22 | 23 | #else //external fmtlib 24 | 25 | #include 26 | 27 | #endif 28 | 29 | -------------------------------------------------------------------------------- /scripts/shortlist/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | mkdir -p bin 4 | 5 | # download and compile fast_align 6 | if [ ! -e bin/fast_align ]; then 7 | git clone https://github.com/clab/fast_align 8 | mkdir -p fast_align/build 9 | cd fast_align/build 10 | cmake .. 11 | make -j4 12 | cp fast_align atools ../../bin 13 | cd ../../ 14 | fi 15 | 16 | # download and compile extract-lex 17 | if [ ! -e bin/extract_lex ]; then 18 | git clone https://github.com/marian-nmt/extract-lex 19 | mkdir -p extract-lex/build 20 | cd extract-lex/build 21 | cmake .. 22 | make -j4 23 | cp extract_lex ../../bin 24 | cd ../../ 25 | fi 26 | -------------------------------------------------------------------------------- /src/tensors/gpu/algorithm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tensors/backend.h" 4 | 5 | namespace marian { 6 | namespace gpu { 7 | template 8 | void copy(Ptr backend, const T* begin, const T* end, T* dest); 9 | 10 | template 11 | void fill(Ptr backend, T* begin, T* end, T value); 12 | 13 | template 14 | void swap_ranges(Ptr backend, T* begin, T* end, T* dest); 15 | 16 | void setSparse(Ptr backend, 17 | const std::vector&, 18 | const std::vector&, 19 | float*); 20 | } // namespace gpu 21 | } // namespace marian 22 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/memory.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/node/detail/memory.h" 2 | #include "yaml-cpp/node/detail/node.h" // IWYU pragma: keep 3 | #include "yaml-cpp/node/ptr.h" 4 | 5 | namespace YAML { 6 | namespace detail { 7 | 8 | void memory_holder::merge(memory_holder& rhs) { 9 | if (m_pMemory == rhs.m_pMemory) 10 | return; 11 | 12 | m_pMemory->merge(*rhs.m_pMemory); 13 | rhs.m_pMemory = m_pMemory; 14 | } 15 | 16 | node& memory::create_node() { 17 | shared_node pNode(new node); 18 | m_nodes.insert(pNode); 19 | return *pNode; 20 | } 21 | 22 | void memory::merge(const memory& rhs) { 23 | m_nodes.insert(rhs.m_nodes.begin(), rhs.m_nodes.end()); 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/emit.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/node/emit.h" 2 | #include "yaml-cpp/emitfromevents.h" 3 | #include "yaml-cpp/emitter.h" 4 | #include "nodeevents.h" 5 | 6 | namespace YAML { 7 | Emitter& operator<<(Emitter& out, const Node& node) { 8 | EmitFromEvents emitFromEvents(out); 9 | NodeEvents events(node); 10 | events.Emit(emitFromEvents); 11 | return out; 12 | } 13 | 14 | std::ostream& operator<<(std::ostream& out, const Node& node) { 15 | Emitter emitter(out); 16 | emitter << node; 17 | return out; 18 | } 19 | 20 | std::string Dump(const Node& node) { 21 | Emitter emitter; 22 | emitter << node; 23 | return emitter.c_str(); 24 | } 25 | } // namespace YAML 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Bug description 11 | Please add a clear and concise description of the bug, including observed and if possible expected behavior. 12 | 13 | ### How to reproduce 14 | Describe steps or include command to reproduce the behavior. 15 | 16 | ### Context 17 | * Marian version: Paste the output of `--version` here 18 | * CMake command: Type the cmake command you used and attach the output of `--build-info all` 19 | * Log file: Attach your training/decoding logs 20 | 21 | Add any other information about the problem here. 22 | -------------------------------------------------------------------------------- /src/tensors/cpu/device.cpp: -------------------------------------------------------------------------------- 1 | #include "tensors/device.h" 2 | #include "tensors/cpu/aligned.h" 3 | #include 4 | namespace marian { 5 | namespace cpu { 6 | 7 | Device::~Device() { 8 | genericFree(data_); 9 | } 10 | 11 | void Device::reserve(size_t size) { 12 | size = align(size); 13 | ABORT_IF(size < size_ || size == 0, 14 | "New size must be larger than old size and larger than 0"); 15 | 16 | uint8_t *temp = static_cast(genericMalloc(alignment_, size)); 17 | if(data_) { 18 | std::copy(data_, data_ + size_, temp); 19 | genericFree(data_); 20 | } 21 | data_ = temp; 22 | size_ = size; 23 | } 24 | } // namespace cpu 25 | } // namespace marian 26 | -------------------------------------------------------------------------------- /src/translator/nth_element.h: -------------------------------------------------------------------------------- 1 | /* All or part of this file was contributed by Intel under license: 2 | * Copyright (C) 2017-2018 Intel Corporation 3 | * SPDX-License-Identifier: MIT 4 | */ 5 | 6 | #pragma once 7 | 8 | #include "tensors/tensor.h" 9 | #include 10 | 11 | namespace marian { 12 | 13 | typedef std::function& outCosts, 16 | std::vector& outKeys, 17 | const bool isFirst)> GetNBestListFn; 18 | 19 | GetNBestListFn createGetNBestListFn(size_t beamSize, size_t dimBatch, DeviceId deviceId); 20 | } // namespace marian 21 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/spdlog-bench.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include "spdlog/spdlog.h" 7 | 8 | 9 | int main(int, char* []) 10 | { 11 | int howmany = 1000000; 12 | namespace spd = spdlog; 13 | ///Create a file rotating logger with 5mb size max and 3 rotated files 14 | auto logger = spdlog::create("file_logger", "logs/spd-bench-st.txt", false); 15 | 16 | logger->set_pattern("[%Y-%b-%d %T.%e]: %v"); 17 | for(int i = 0 ; i < howmany; ++i) 18 | logger->info("spdlog message #{} : This is some text for your pleasure", i); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /src/models/encoder.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "marian.h" 4 | #include "models/states.h" 5 | 6 | namespace marian { 7 | 8 | class EncoderBase : public EncoderDecoderLayerBase { 9 | public: 10 | EncoderBase(Ptr graph, Ptr options) : 11 | EncoderDecoderLayerBase(graph, options, "encoder", /*batchIndex=*/0, 12 | options->get("dropout-src", 0.0f), 13 | options->get("embedding-fix-src", false)) {} 14 | 15 | // @TODO: turn into an interface. Also see if we can get rid of the graph parameter. 16 | virtual Ptr build(Ptr, Ptr) = 0; 17 | 18 | virtual void clear() = 0; 19 | }; 20 | 21 | } // namespace marian 22 | -------------------------------------------------------------------------------- /src/tests/dropout.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "marian.h" 6 | 7 | using namespace marian; 8 | 9 | int main(int argc, char** argv) { 10 | auto c = parseOptions(argc, argv, cli::mode::scoring, false); 11 | 12 | auto type = c->get("cpu-threads") > 0 13 | ? DeviceType::cpu 14 | : DeviceType::gpu; 15 | DeviceId deviceId{0, type}; 16 | 17 | auto g = New(); 18 | g->setDevice(deviceId); 19 | g->reserveWorkspaceMB(512); 20 | 21 | for(int i = 0; i < 10; ++i) { 22 | g->clear(); 23 | auto mask = g->dropoutMask(0.2, {1000, 16384}); 24 | debug(mask, "mask"); 25 | g->forward(); 26 | } 27 | 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/null_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace spdlog 14 | { 15 | namespace sinks 16 | { 17 | 18 | template 19 | class null_sink : public base_sink < Mutex > 20 | { 21 | protected: 22 | void _sink_it(const details::log_msg&) override 23 | {} 24 | 25 | void flush() override 26 | {} 27 | 28 | }; 29 | typedef null_sink null_sink_st; 30 | typedef null_sink null_sink_mt; 31 | 32 | } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /src/microsoft/shortlist/logging/LoggerMacros.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Do NOT include this file directly except in special circumstances. 4 | // (E.g., you want to define macros which call these but don't want to include Logger.h everywhere). 5 | // Normally you should include logging/Logger.h 6 | 7 | #define LOG_WRITE(format, ...) do {\ 8 | abort(); \ 9 | } while (0) 10 | 11 | #define LOG_WRITE_STRING(str) do {\ 12 | abort(); \ 13 | } while (0) 14 | 15 | #define LOG_ERROR(format, ...) do {\ 16 | abort(); \ 17 | } while (0) 18 | 19 | #define LOG_ERROR_AND_THROW(format, ...) do {\ 20 | abort(); \ 21 | } while (0) 22 | 23 | #define DECODING_LOGIC_ERROR(format, ...) do {\ 24 | abort(); \ 25 | } while (0) 26 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/scantag.h: -------------------------------------------------------------------------------- 1 | #ifndef SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include "stream.h" 12 | 13 | namespace YAML { 14 | const std::string ScanVerbatimTag(Stream& INPUT); 15 | const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle); 16 | const std::string ScanTagSuffix(Stream& INPUT); 17 | } 18 | 19 | #endif // SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 20 | -------------------------------------------------------------------------------- /src/3rd_party/onnx/protobuf/onnx-ml.pb-wrapper.h: -------------------------------------------------------------------------------- 1 | // protobuf-generated files don't compile clean. This compiles them with warnings 2 | // disabled, without having to disable it for the entire project whole-sale. 3 | 4 | #pragma once 5 | 6 | #ifdef _MSC_VER 7 | #pragma warning(push) 8 | #pragma warning(disable : 4800 4610 4512 4510 4267 4127 4125 4100 4456) 9 | #endif 10 | #ifdef __GNUC__ 11 | #pragma GCC diagnostic push 12 | #pragma GCC diagnostic ignored "-Wunused-variable" 13 | #pragma GCC diagnostic ignored "-Wsuggest-override" 14 | #endif 15 | 16 | #include "onnx-ml.pb.h" // this is the actual file we include 17 | 18 | #ifdef __GNUC__ 19 | #pragma GCC diagnostic pop 20 | #endif 21 | #ifdef _MSC_VER 22 | #pragma warning(pop) 23 | #endif 24 | -------------------------------------------------------------------------------- /src/functional/functional.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // this header is meant to be included for all operations from the "functional" namespace. 4 | 5 | #include "functional/operands.h" 6 | #include "functional/predicates.h" 7 | #include "functional/operators.h" 8 | 9 | namespace marian { 10 | namespace functional { 11 | 12 | template 13 | using ref = Assignee; 14 | 15 | static ref<1> _1; 16 | static ref<2> _2; 17 | static ref<3> _3; 18 | static ref<4> _4; 19 | static ref<5> _5; 20 | static ref<6> _6; 21 | static ref<7> _7; 22 | static ref<8> _8; 23 | static ref<9> _9; 24 | 25 | const C<0> _0c; 26 | const C<1> _1c; 27 | const C<2> _2c; 28 | const C<-1> _1cneg; 29 | const C<-2> _2cneg; 30 | } // namespace functional 31 | } // namespace marian -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/null_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/sinks/base_sink.h" 9 | #include "spdlog/details/null_mutex.h" 10 | 11 | #include 12 | 13 | namespace spdlog 14 | { 15 | namespace sinks 16 | { 17 | 18 | template 19 | class null_sink : public base_sink < Mutex > 20 | { 21 | protected: 22 | void _sink_it(const details::log_msg&) override 23 | {} 24 | 25 | void _flush() override 26 | {} 27 | 28 | }; 29 | typedef null_sink null_sink_st; 30 | typedef null_sink null_sink_mt; 31 | 32 | } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /src/layers_new/neuralnet.cpp: -------------------------------------------------------------------------------- 1 | #include "layers_new/neuralnet.h" 2 | 3 | namespace marian { 4 | namespace nn { 5 | 6 | // Factory for activation function layers from name as string. 7 | Ptr activationLayerByName(Ptr graph, const std::string& actName) { 8 | // @TODO: lowercase actName first? 9 | if(actName == "relu") 10 | return New(graph); 11 | else if(actName == "gelu") 12 | return New(graph); 13 | else if(actName == "tanh") 14 | return New(graph); 15 | else if(actName == "sigmoid") 16 | return New(graph); 17 | else if(actName == "swish") 18 | return New(graph); 19 | else 20 | ABORT("Unknown activation function: {}", actName); 21 | } 22 | 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Unit tests 2 | add_subdirectory(units) 3 | 4 | if(NOT MSVC) 5 | # Testing apps 6 | set(APP_TESTS 7 | logger 8 | dropout 9 | sqlite 10 | prod 11 | cli 12 | pooling 13 | # transformer_new 14 | ) 15 | 16 | foreach(test ${APP_TESTS}) 17 | add_executable("test_${test}" "${test}.cpp") 18 | 19 | if(CUDA_FOUND) 20 | target_link_libraries("test_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS}) 21 | else(CUDA_FOUND) 22 | target_link_libraries("test_${test}" marian ${EXT_LIBS}) 23 | endif(CUDA_FOUND) 24 | 25 | set_target_properties("test_${test}" PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") 26 | endforeach(test) 27 | endif(NOT MSVC) -------------------------------------------------------------------------------- /src/3rd_party/CLI/CLI.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Distributed under the 3-Clause BSD License. See accompanying 4 | // file LICENSE or https://github.com/CLIUtils/CLI11 for details. 5 | 6 | // CLI Library includes 7 | // Order is important for combiner script 8 | 9 | #include "CLI/Version.hpp" 10 | 11 | #include "CLI/Macros.hpp" 12 | 13 | #include "CLI/Optional.hpp" 14 | 15 | #include "CLI/StringTools.hpp" 16 | 17 | #include "CLI/Error.hpp" 18 | 19 | #include "CLI/TypeTools.hpp" 20 | 21 | #include "CLI/Split.hpp" 22 | 23 | #include "CLI/ConfigFwd.hpp" 24 | 25 | #include "CLI/Validators.hpp" 26 | 27 | #include "CLI/FormatterFwd.hpp" 28 | 29 | #include "CLI/Option.hpp" 30 | 31 | #include "CLI/App.hpp" 32 | 33 | #include "CLI/Config.hpp" 34 | 35 | #include "CLI/Formatter.hpp" 36 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace utils 13 | { 14 | 15 | template 16 | inline std::string format(const T& value) 17 | { 18 | static std::locale loc(""); 19 | std::stringstream ss; 20 | ss.imbue(loc); 21 | ss << value; 22 | return ss.str(); 23 | } 24 | 25 | template<> 26 | inline std::string format(const double & value) 27 | { 28 | static std::locale loc(""); 29 | std::stringstream ss; 30 | ss.imbue(loc); 31 | ss << std::fixed << std::setprecision(1) << value; 32 | return ss.str(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/data/rng_engine.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "common/config.h" 7 | 8 | namespace marian { 9 | namespace data { 10 | 11 | /** 12 | * @brief Class providing an engine for pseudo-random number generation. 13 | */ 14 | class RNGEngine { 15 | protected: 16 | std::mt19937 eng_; 17 | 18 | public: 19 | RNGEngine() : eng_((unsigned int)Config::seed) {} 20 | RNGEngine(size_t seed) : eng_((unsigned int)seed) {} 21 | 22 | std::string getRNGState() { 23 | std::ostringstream oss; 24 | oss << eng_; 25 | return oss.str(); 26 | } 27 | 28 | void setRNGState(std::string engineState) { 29 | std::istringstream iss(engineState); 30 | iss >> eng_; 31 | } 32 | }; 33 | } // namespace data 34 | } // namespace marian 35 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace utils 13 | { 14 | 15 | template 16 | inline std::string format(const T& value) 17 | { 18 | static std::locale loc(""); 19 | std::stringstream ss; 20 | ss.imbue(loc); 21 | ss << value; 22 | return ss.str(); 23 | } 24 | 25 | template<> 26 | inline std::string format(const double & value) 27 | { 28 | static std::locale loc(""); 29 | std::stringstream ss; 30 | ss.imbue(loc); 31 | ss << std::fixed << std::setprecision(1) << value; 32 | return ss.str(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/latency/utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace utils 13 | { 14 | 15 | template 16 | inline std::string format(const T& value) 17 | { 18 | static std::locale loc(""); 19 | std::stringstream ss; 20 | ss.imbue(loc); 21 | ss << value; 22 | return ss.str(); 23 | } 24 | 25 | template<> 26 | inline std::string format(const double & value) 27 | { 28 | static std::locale loc(""); 29 | std::stringstream ss; 30 | ss.imbue(loc); 31 | ss << std::fixed << std::setprecision(1) << value; 32 | return ss.str(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/exceptions.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/exceptions.h" 2 | 3 | namespace YAML { 4 | 5 | // These destructors are defined out-of-line so the vtable is only emitted once. 6 | Exception::~Exception() noexcept {} 7 | ParserException::~ParserException() noexcept {} 8 | RepresentationException::~RepresentationException() noexcept {} 9 | InvalidScalar::~InvalidScalar() noexcept {} 10 | KeyNotFound::~KeyNotFound() noexcept {} 11 | InvalidNode::~InvalidNode() noexcept {} 12 | BadConversion::~BadConversion() noexcept {} 13 | BadDereference::~BadDereference() noexcept {} 14 | BadSubscript::~BadSubscript() noexcept {} 15 | BadPushback::~BadPushback() noexcept {} 16 | BadInsert::~BadInsert() noexcept {} 17 | EmitterException::~EmitterException() noexcept {} 18 | BadFile::~BadFile() noexcept {} 19 | } 20 | -------------------------------------------------------------------------------- /src/3rd_party/zlib/gzclose.c: -------------------------------------------------------------------------------- 1 | /* gzclose.c -- zlib gzclose() function 2 | * Copyright (C) 2004, 2010 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | #include "gzguts.h" 7 | 8 | /* gzclose() is in a separate file so that it is linked in only if it is used. 9 | That way the other gzclose functions can be used instead to avoid linking in 10 | unneeded compression or decompression routines. */ 11 | int ZEXPORT gzclose(file) 12 | gzFile file; 13 | { 14 | #ifndef NO_GZCOMPRESS 15 | gz_statep state; 16 | 17 | if (file == NULL) 18 | return Z_STREAM_ERROR; 19 | state = (gz_statep)file; 20 | 21 | return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file); 22 | #else 23 | return gzclose_r(file); 24 | #endif 25 | } 26 | -------------------------------------------------------------------------------- /scripts/bert/contrib/chpt2pt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | This script converts *.chpt files to *.pt files, potentially useful for extracting weights only from larger checkpoints. 4 | """ 5 | 6 | import torch 7 | import argparse 8 | 9 | # Create a parser for command line arguments 10 | parser = argparse.ArgumentParser() 11 | 12 | # Add arguments for the source and target files 13 | parser.add_argument("--source", type=str, required=True, help="Path to the source *.chpt file") 14 | parser.add_argument("--target", type=str, required=True, help="Path to the target *.pt file") 15 | 16 | # Parse the command line arguments 17 | args = parser.parse_args() 18 | 19 | # Load the model from the source file 20 | model = torch.load(args.source) 21 | 22 | # Save the model to the target file 23 | torch.save(model, args.target) -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/noncopyable.h: -------------------------------------------------------------------------------- 1 | #ifndef NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | 12 | namespace YAML { 13 | // this is basically boost::noncopyable 14 | class YAML_CPP_API noncopyable { 15 | protected: 16 | noncopyable() {} 17 | ~noncopyable() {} 18 | 19 | private: 20 | noncopyable(const noncopyable&); 21 | const noncopyable& operator=(const noncopyable&); 22 | }; 23 | } 24 | 25 | #endif // NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 26 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ### Description 2 | Please add a clear and concise description of the changes. 3 | 4 | This PR fixes a bug/adds a new feature/refactorizes the code/does something else. 5 | It is related to issues: #998, #999, ... 6 | 7 | List of changes: 8 | - ... 9 | - ... 10 | - ... 11 | 12 | Added dependencies: none 13 | 14 | ### How to test 15 | Describe how to test your changes, adding command line examples and sample input/output files if relevant. 16 | Point to unit tests or regression tests covering the changes if they have been added. 17 | 18 | Describe how you have tested your code, including OS and the cmake command. 19 | 20 | ### Checklist 21 | 22 | - [ ] I have tested the code manually 23 | - [ ] I have run regression tests 24 | - [ ] I have read and followed CONTRIBUTING.md 25 | - [ ] I have updated CHANGELOG.md 26 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/directives.h: -------------------------------------------------------------------------------- 1 | #ifndef DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | namespace YAML { 14 | struct Version { 15 | bool isDefault; 16 | int major, minor; 17 | }; 18 | 19 | struct Directives { 20 | Directives(); 21 | 22 | const std::string TranslateTagHandle(const std::string& handle) const; 23 | 24 | Version version; 25 | std::map tags; 26 | }; 27 | } 28 | 29 | #endif // DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66 30 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/details/null_mutex.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | // null, no cost dummy "mutex" and dummy "atomic" int 10 | 11 | namespace spdlog 12 | { 13 | namespace details 14 | { 15 | struct null_mutex 16 | { 17 | void lock() {} 18 | void unlock() {} 19 | bool try_lock() 20 | { 21 | return true; 22 | } 23 | }; 24 | 25 | struct null_atomic_int 26 | { 27 | int value; 28 | null_atomic_int() = default; 29 | 30 | null_atomic_int(int val):value(val) 31 | {} 32 | 33 | int load(std::memory_order) const 34 | { 35 | return value; 36 | } 37 | 38 | void store(int val) 39 | { 40 | value = val; 41 | } 42 | }; 43 | 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/python/binding/embedder.hpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | 3 | #include "common/timer.h" 4 | #include "embedder/embedder.h" 5 | #include "models/model_task.h" 6 | 7 | 8 | using namespace marian; 9 | 10 | namespace pymarian { 11 | class PyEmbedder { 12 | private: 13 | Ptr options_; 14 | Ptr> embedder_; 15 | public: 16 | PyEmbedder(const std::string& cliString) { 17 | options_ = parseOptions(cliString, cli::mode::embedding, true); 18 | embedder_ = New>(options_); 19 | } 20 | 21 | int embed() { 22 | //TODO: add options_ override from args to embed() 23 | //TODO: read input from args instead of STDIN 24 | embedder_->run(); 25 | return 0; 26 | } 27 | }; 28 | 29 | } // namespace pymarian -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/details/null_mutex.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | // null, no cost dummy "mutex" and dummy "atomic" int 10 | 11 | namespace spdlog 12 | { 13 | namespace details 14 | { 15 | struct null_mutex 16 | { 17 | void lock() {} 18 | void unlock() {} 19 | bool try_lock() 20 | { 21 | return true; 22 | } 23 | }; 24 | 25 | struct null_atomic_int 26 | { 27 | int value; 28 | null_atomic_int() = default; 29 | 30 | null_atomic_int(int val):value(val) 31 | {} 32 | 33 | int load(std::memory_order) const 34 | { 35 | return value; 36 | } 37 | 38 | void store(int val) 39 | { 40 | value = val; 41 | } 42 | }; 43 | 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/tag.h: -------------------------------------------------------------------------------- 1 | #ifndef TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | namespace YAML { 13 | struct Directives; 14 | struct Token; 15 | 16 | struct Tag { 17 | enum TYPE { 18 | VERBATIM, 19 | PRIMARY_HANDLE, 20 | SECONDARY_HANDLE, 21 | NAMED_HANDLE, 22 | NON_SPECIFIC 23 | }; 24 | 25 | Tag(const Token& token); 26 | const std::string Translate(const Directives& directives); 27 | 28 | TYPE type; 29 | std::string handle, value; 30 | }; 31 | } 32 | 33 | #endif // TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 34 | -------------------------------------------------------------------------------- /src/common/filesystem.cpp: -------------------------------------------------------------------------------- 1 | #include "filesystem.h" 2 | 3 | #ifndef _MSC_VER 4 | // don't include these on Windows: 5 | #include 6 | #include 7 | #include 8 | #endif 9 | 10 | namespace marian { 11 | namespace filesystem { 12 | 13 | #ifdef _MSC_VER 14 | // Pretend that Windows knows no named pipes. It does, by the way, but 15 | // they seem to be different from pipes on Unix / Linux. See 16 | // https://docs.microsoft.com/en-us/windows/win32/ipc/named-pipes 17 | bool is_fifo(char const* /*path*/) { 18 | return false; 19 | } 20 | #else 21 | bool is_fifo(char const* path) { 22 | struct stat buf; 23 | stat(path, &buf); 24 | return S_ISFIFO(buf.st_mode); 25 | } 26 | #endif 27 | 28 | bool is_fifo(std::string const& path) { 29 | return is_fifo(path.c_str()); 30 | } 31 | 32 | } // end of namespace marian::filesystem 33 | } // end of namespace marian 34 | -------------------------------------------------------------------------------- /src/common/binary.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/io_item.h" 4 | 5 | #include 6 | #include 7 | 8 | namespace marian { 9 | 10 | const static uint64_t BINARY_FILE_VERSION = 1; 11 | const static uint64_t BINARY_FILE_VERSION_WITH_ENCRYPTION = 2; 12 | 13 | namespace io { 14 | namespace binary { 15 | 16 | void loadItems(const void* current, 17 | std::vector& items, 18 | bool mapped = false); 19 | void loadItems(const std::string& fileName, 20 | std::vector& items); 21 | 22 | io::Item getItem(const void* current, const std::string& vName); 23 | io::Item getItem(const std::string& fileName, const std::string& vName); 24 | 25 | void saveItems(const std::string& fileName, 26 | const std::vector& items); 27 | 28 | } // namespace binary 29 | } // namespace io 30 | } // namespace marian 31 | -------------------------------------------------------------------------------- /src/tests/units/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Unit tests 2 | set(UNIT_TESTS 3 | graph_tests 4 | operator_tests 5 | rnn_tests 6 | attention_tests 7 | fastopt_tests 8 | utils_tests 9 | binary_tests 10 | transformer_tests 11 | # cosmos_tests # optional, uncomment to test with specific files. 12 | ) 13 | 14 | foreach(test ${UNIT_TESTS}) 15 | add_executable("run_${test}" run_tests.cpp "${test}.cpp") 16 | 17 | if(CUDA_FOUND) 18 | target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch) 19 | else(CUDA_FOUND) 20 | target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch) 21 | endif(CUDA_FOUND) 22 | 23 | if(MSVC) 24 | # Disable C4305: truncation from 'double' to '_Ty' 25 | target_compile_options("run_${test}" PUBLIC /wd4305) 26 | endif(MSVC) 27 | 28 | add_test(NAME ${test} COMMAND "run_${test}") 29 | endforeach(test) 30 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/yaml.h: -------------------------------------------------------------------------------- 1 | #ifndef YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/parser.h" 11 | #include "yaml-cpp/emitter.h" 12 | #include "yaml-cpp/emitterstyle.h" 13 | #include "yaml-cpp/stlemitter.h" 14 | #include "yaml-cpp/exceptions.h" 15 | 16 | #include "yaml-cpp/node/node.h" 17 | #include "yaml-cpp/node/impl.h" 18 | #include "yaml-cpp/node/convert.h" 19 | #include "yaml-cpp/node/iterator.h" 20 | #include "yaml-cpp/node/detail/impl.h" 21 | #include "yaml-cpp/node/parse.h" 22 | #include "yaml-cpp/node/emit.h" 23 | 24 | #endif // YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66 25 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/null.h: -------------------------------------------------------------------------------- 1 | #ifndef NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | #include 12 | 13 | namespace YAML { 14 | class Node; 15 | 16 | struct YAML_CPP_API _Null {}; 17 | inline bool operator==(const _Null&, const _Null&) { return true; } 18 | inline bool operator!=(const _Null&, const _Null&) { return false; } 19 | 20 | YAML_CPP_API bool IsNull(const Node& node); // old API only 21 | YAML_CPP_API bool IsNullString(const std::string& str); 22 | 23 | extern YAML_CPP_API _Null Null; 24 | } 25 | 26 | #endif // NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 27 | -------------------------------------------------------------------------------- /src/python/build.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env bash 3 | 4 | # This script is used to build the Python wheels. 5 | # A requirement is that we have to use older GLIBC versions to ensure maximum compatibility. 6 | # Python folks call it "manylinux" wheels and recommed using docker images to build them. 7 | # official manylinux docs: https://github.com/pypa/manylinux 8 | # But the official manylinux images doesnt have CUDA support. 9 | # So we use the "pytorch/manylinux-builder" image which has CUDA support. 10 | # Available tags: https://hub.docker.com/r/pytorch/manylinux-builder/tags 11 | 12 | 13 | LINUX_IMAGE="pytorch/manylinux-builder:cuda12.1" 14 | MYDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 15 | MARIAN_ROOT="$( cd "$MYDIR/../.." && pwd )" 16 | 17 | set -x 18 | LINUX_BUILDER="src/python/build-manylinux.sh" 19 | MOUNT="/work" 20 | docker run --rm -it -v $MARIAN_ROOT:$MOUNT $LINUX_IMAGE $MOUNT/$LINUX_BUILDER 21 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/Makefile.mingw: -------------------------------------------------------------------------------- 1 | CXX ?= g++ 2 | CXXFLAGS = -D_WIN32_WINNT=0x600 -march=native -Wall -Wextra -Wshadow -pedantic -std=c++11 -pthread -Wl,--no-as-needed -I../include 3 | CXX_RELEASE_FLAGS = -O3 4 | CXX_DEBUG_FLAGS= -g 5 | 6 | 7 | all: example bench 8 | debug: example-debug bench-debug 9 | 10 | example: example.cpp 11 | $(CXX) example.cpp -o example $(CXXFLAGS) $(CXX_RELEASE_FLAGS) 12 | 13 | bench: bench.cpp 14 | $(CXX) bench.cpp -o bench $(CXXFLAGS) $(CXX_RELEASE_FLAGS) 15 | 16 | 17 | example-debug: example.cpp 18 | $(CXX) example.cpp -o example-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS) 19 | 20 | bench-debug: bench.cpp 21 | $(CXX) bench.cpp -o bench-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS) 22 | 23 | 24 | 25 | clean: 26 | rm -f *.o logs/*.txt example example-debug bench bench-debug 27 | 28 | 29 | rebuild: clean all 30 | rebuild-debug: clean debug 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/mark.h: -------------------------------------------------------------------------------- 1 | #ifndef MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | 12 | namespace YAML { 13 | struct YAML_CPP_API Mark { 14 | Mark() : pos(0), line(0), column(0) {} 15 | 16 | static const Mark null_mark() { return Mark(-1, -1, -1); } 17 | 18 | bool is_null() const { return pos == -1 && line == -1 && column == -1; } 19 | 20 | int pos; 21 | int line, column; 22 | 23 | private: 24 | Mark(int pos_, int line_, int column_) 25 | : pos(pos_), line(line_), column(column_) {} 26 | }; 27 | } 28 | 29 | #endif // MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 30 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/Makefile.clang: -------------------------------------------------------------------------------- 1 | CXX ?= clang++ 2 | CXXFLAGS = -march=native -Wall -Wextra -Wshadow -pedantic -std=c++11 -pthread -I../include 3 | CXX_RELEASE_FLAGS = -O2 4 | CXX_DEBUG_FLAGS= -g 5 | 6 | 7 | all: example bench 8 | debug: example-debug bench-debug 9 | 10 | example: example.cpp 11 | $(CXX) example.cpp -o example-clang $(CXXFLAGS) $(CXX_RELEASE_FLAGS) 12 | 13 | bench: bench.cpp 14 | $(CXX) bench.cpp -o bench-clang $(CXXFLAGS) $(CXX_RELEASE_FLAGS) 15 | 16 | 17 | example-debug: example.cpp 18 | $(CXX) example.cpp -o example-clang-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS) 19 | 20 | bench-debug: bench.cpp 21 | $(CXX) bench.cpp -o bench-clang-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS) 22 | 23 | 24 | 25 | clean: 26 | rm -f *.o logs/*.txt example-clang example-clang-debug bench-clang bench-clang-debug 27 | 28 | 29 | rebuild: clean all 30 | rebuild-debug: clean debug 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/detail/iterator_fwd.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | #include 12 | #include 13 | #include 14 | 15 | namespace YAML { 16 | 17 | namespace detail { 18 | struct iterator_value; 19 | template 20 | class iterator_base; 21 | } 22 | 23 | typedef detail::iterator_base iterator; 24 | typedef detail::iterator_base const_iterator; 25 | } 26 | 27 | #endif // VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66 28 | -------------------------------------------------------------------------------- /src/common/file_utils.cpp: -------------------------------------------------------------------------------- 1 | #include "common/file_utils.h" 2 | #include "common/utils.h" 3 | 4 | namespace marian { 5 | namespace fileutils { 6 | 7 | void cut(const std::string& tsvIn, 8 | Ptr tsvOut, 9 | const std::vector& fields, 10 | size_t numFields, 11 | const std::string& sep /*= "\t"*/) { 12 | std::vector tsvFields(numFields); 13 | std::string line; 14 | io::InputFileStream ioIn(tsvIn); 15 | while(getline(ioIn, line)) { 16 | tsvFields.clear(); 17 | utils::splitTsv(line, tsvFields, numFields); // split tab-separated fields 18 | for(size_t i = 0; i < fields.size(); ++i) { 19 | *tsvOut << tsvFields[fields[i]]; 20 | if(i < fields.size() - 1) 21 | *tsvOut << sep; // concatenating fields with the custom separator 22 | } 23 | *tsvOut << std::endl; 24 | } 25 | }; 26 | 27 | } // namespace fileutils 28 | } // namespace marian 29 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/ptr.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | #include 12 | 13 | namespace YAML { 14 | namespace detail { 15 | class node; 16 | class node_ref; 17 | class node_data; 18 | class memory; 19 | class memory_holder; 20 | 21 | typedef std::shared_ptr shared_node; 22 | typedef std::shared_ptr shared_node_ref; 23 | typedef std::shared_ptr shared_node_data; 24 | typedef std::shared_ptr shared_memory_holder; 25 | typedef std::shared_ptr shared_memory; 26 | } 27 | } 28 | 29 | #endif // VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 30 | -------------------------------------------------------------------------------- /src/optimizers/clippers.cpp: -------------------------------------------------------------------------------- 1 | #include "clippers.h" 2 | 3 | #include "functional/functional.h" 4 | #include "tensors/tensor_operators.h" 5 | 6 | namespace marian { 7 | float ElementwiseClipper::clip(Tensor t, float costScalingFactor) { 8 | using namespace functional; 9 | Element(_1 = functional::clip(_1, c_ * costScalingFactor), t); 10 | return 0.f; // dummy 11 | } 12 | 13 | float NormClipper::clip(Tensor t, float costScalingFactor) { 14 | using namespace functional; 15 | float l2Norm = L2Norm(t, allocator_); 16 | float clipValue = c_ * costScalingFactor; 17 | if(l2Norm > clipValue) { 18 | LOG(debug, "Re-scaling gradient by {}", clipValue / l2Norm); 19 | Element(_1 = (clipValue / l2Norm) * _1, t); 20 | } 21 | return l2Norm; 22 | } 23 | 24 | // don't clip, just report L2Norm 25 | float ReportNormClipper::clip(Tensor t, float /*costScalingFactor*/) { 26 | using namespace functional; 27 | return L2Norm(t, allocator_); 28 | } 29 | 30 | } // namespace marian 31 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # You can set these variables from the command line, and also 2 | # from the environment for the first two. 3 | SPHINXOPTS ?= 4 | SPHINXBUILD ?= sphinx-build 5 | SOURCEDIR = . 6 | BUILDDIR = build 7 | 8 | # Put it first so that "make" without argument is like "make help". 9 | help: 10 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 11 | 12 | .PHONY: clean help Makefile 13 | 14 | # Catch-all target: route all unknown targets to Sphinx using the new 15 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 16 | %: Makefile 17 | cp $(SOURCEDIR)/../CONTRIBUTING.md $(SOURCEDIR)/ 18 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 19 | 20 | # Clean target as recommended by Exhale 21 | # https://exhale.readthedocs.io/en/latest/usage.html#optional-create-a-proper-clean-target 22 | clean: 23 | rm -rf doxygen/ api/ $(SOURCEDIR)/CONTRIBUTING.md 24 | @$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 25 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/latency/g3log-crush.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | void CrusherLoop() 7 | { 8 | size_t counter = 0; 9 | while (true) 10 | { 11 | LOGF(INFO, "Some text to crush you machine. thread:"); 12 | if(++counter % 1000000 == 0) 13 | { 14 | std::cout << "Wrote " << counter << " entries" << std::endl; 15 | } 16 | } 17 | } 18 | 19 | 20 | int main(int argc, char** argv) 21 | { 22 | std::cout << "WARNING: This test will exaust all your machine memory and will crush it!" << std::endl; 23 | std::cout << "Are you sure you want to continue ? " << std::endl; 24 | char c; 25 | std::cin >> c; 26 | if (toupper( c ) != 'Y') 27 | return 0; 28 | 29 | auto worker = g3::LogWorker::createLogWorker(); 30 | auto handle= worker->addDefaultLogger(argv[0], "g3log.txt"); 31 | g3::initializeLogging(worker.get()); 32 | CrusherLoop(); 33 | 34 | return 0; 35 | } 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/msvc_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Alexander Dalshov. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #if defined(_MSC_VER) 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | namespace spdlog 19 | { 20 | namespace sinks 21 | { 22 | /* 23 | * MSVC sink (logging using OutputDebugStringA) 24 | */ 25 | template 26 | class msvc_sink : public base_sink < Mutex > 27 | { 28 | public: 29 | explicit msvc_sink() 30 | { 31 | } 32 | 33 | void flush() override 34 | { 35 | } 36 | 37 | protected: 38 | void _sink_it(const details::log_msg& msg) override 39 | { 40 | OutputDebugStringA(msg.formatted.c_str()); 41 | } 42 | }; 43 | 44 | typedef msvc_sink msvc_sink_mt; 45 | typedef msvc_sink msvc_sink_st; 46 | 47 | } 48 | } 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/msvc_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Alexander Dalshov. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #if defined(_MSC_VER) 9 | 10 | #include "spdlog/sinks/base_sink.h" 11 | #include "spdlog/details/null_mutex.h" 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | namespace spdlog 19 | { 20 | namespace sinks 21 | { 22 | /* 23 | * MSVC sink (logging using OutputDebugStringA) 24 | */ 25 | template 26 | class msvc_sink : public base_sink < Mutex > 27 | { 28 | public: 29 | explicit msvc_sink() 30 | { 31 | } 32 | 33 | 34 | 35 | protected: 36 | void _sink_it(const details::log_msg& msg) override 37 | { 38 | OutputDebugStringA(msg.formatted.c_str()); 39 | } 40 | 41 | void _flush() override 42 | {} 43 | }; 44 | 45 | typedef msvc_sink msvc_sink_mt; 46 | typedef msvc_sink msvc_sink_st; 47 | 48 | } 49 | } 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/.gitignore: -------------------------------------------------------------------------------- 1 | # Auto generated files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | *.suo 7 | *.tlog 8 | *.ilk 9 | *.log 10 | *.pdb 11 | *.idb 12 | *.iobj 13 | *.ipdb 14 | *.opensdf 15 | *.sdf 16 | 17 | # Compiled Dynamic libraries 18 | *.so 19 | *.dylib 20 | *.dll 21 | 22 | # Compiled Static libraries 23 | *.lai 24 | *.la 25 | *.a 26 | *.lib 27 | 28 | # Executables 29 | *.exe 30 | *.out 31 | *.app 32 | 33 | # Codelite 34 | .codelite 35 | 36 | # .orig files 37 | *.orig 38 | 39 | # example files 40 | example/* 41 | !example/example.cpp 42 | !example/bench.cpp 43 | !example/utils.h 44 | !example/Makefile* 45 | !example/example.sln 46 | !example/example.vcxproj 47 | !example/CMakeLists.txt 48 | !example/multisink.cpp 49 | !example/jni 50 | 51 | # generated files 52 | generated 53 | 54 | # Cmake 55 | CMakeCache.txt 56 | CMakeFiles 57 | CMakeScripts 58 | Makefile 59 | cmake_install.cmake 60 | install_manifest.txt 61 | /tests/tests.VC.VC.opendb 62 | /tests/tests.VC.db 63 | /tests/tests 64 | /tests/logs/file_helper_test.txt 65 | -------------------------------------------------------------------------------- /scripts/metrics/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eu 3 | MYDIR=$(dirname ${BASH_SOURCE[0]}) 4 | cd $MYDIR 5 | 6 | ENV_NAME=metrics 7 | which conda > /dev/null || (echo "conda not found" && exit 1) 8 | # conda functions are not exported in non-interactive shell, so we source conda.sh 9 | CONDA_BASE=$(conda info --base) 10 | source $CONDA_BASE/etc/profile.d/conda.sh 11 | FOUND="$(conda env list | awk -v name=$ENV_NAME '$1==name { print $1 }')" 12 | 13 | log() { 14 | echo -e "\e[32m$@\e[0m" >&2 15 | } 16 | #### SETUP ######### 17 | if [[ -z "$FOUND" ]]; then 18 | log "Creating conda environment $ENV_NAME" 19 | # create conda environment and install requirements 20 | conda create -n $ENV_NAME python=3.10 21 | conda activate $ENV_NAME 22 | log "Installing requirements" 23 | pip install -r $MYDIR/requirements.txt 24 | else 25 | log "Activating conda environment $ENV_NAME" 26 | conda activate $ENV_NAME 27 | fi 28 | 29 | which pymarian-eval > /dev/null || ( 30 | echo "pymarian-eval not found. Please install and return" && exit 1 ) 31 | 32 | ##################### 33 | bash ./compare.sh -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/detail/bool_type.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | namespace YAML { 11 | namespace detail { 12 | struct unspecified_bool { 13 | struct NOT_ALLOWED; 14 | static void true_value(NOT_ALLOWED*) {} 15 | }; 16 | typedef void (*unspecified_bool_type)(unspecified_bool::NOT_ALLOWED*); 17 | } 18 | } 19 | 20 | #define YAML_CPP_OPERATOR_BOOL() \ 21 | operator YAML::detail::unspecified_bool_type() const { \ 22 | return this->operator!() ? 0 \ 23 | : &YAML::detail::unspecified_bool::true_value; \ 24 | } 25 | 26 | #endif // NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 27 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/emit.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | #include "yaml-cpp/dll.h" 14 | 15 | namespace YAML { 16 | class Emitter; 17 | class Node; 18 | 19 | /** 20 | * Emits the node to the given {@link Emitter}. If there is an error in writing, 21 | * {@link Emitter#good} will return false. 22 | */ 23 | YAML_CPP_API Emitter& operator<<(Emitter& out, const Node& node); 24 | 25 | /** Emits the node to the given output stream. */ 26 | YAML_CPP_API std::ostream& operator<<(std::ostream& out, const Node& node); 27 | 28 | /** Converts the node to a YAML string. */ 29 | YAML_CPP_API std::string Dump(const Node& node); 30 | } // namespace YAML 31 | 32 | #endif // NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 33 | -------------------------------------------------------------------------------- /src/data/batch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "common/definitions.h" 6 | 7 | namespace marian { 8 | namespace data { 9 | 10 | class Batch { 11 | public: 12 | virtual size_t size() const = 0; 13 | virtual size_t words(int /*which*/ = 0) const { return 0; }; 14 | virtual size_t width() const { return 0; }; 15 | 16 | virtual size_t sizeTrg() const { return 0; }; 17 | virtual size_t wordsTrg() const { return 0; }; 18 | virtual size_t widthTrg() const { return 0; }; 19 | 20 | virtual void debug(bool /*printIndices*/ = false) {}; 21 | 22 | virtual std::vector> split(size_t n, size_t sizeLimit = SIZE_MAX) = 0; 23 | 24 | const std::vector& getSentenceIds() const { return sentenceIds_; } 25 | void setSentenceIds(const std::vector& ids) { sentenceIds_ = ids; } 26 | 27 | virtual void setGuidedAlignment(std::vector&&) = 0; 28 | virtual void setDataWeights(const std::vector&) = 0; 29 | virtual ~Batch() {}; 30 | protected: 31 | std::vector sentenceIds_; 32 | }; 33 | } // namespace data 34 | } // namespace marian 35 | -------------------------------------------------------------------------------- /src/data/iterator_facade.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // simplistic replacement for boost::iterator_facade 4 | template 5 | struct IteratorFacade { 6 | // to create DummyIterator inherit from public IteratorFacade 7 | // and implement these three functions 8 | virtual bool equal(const Iterator& other) const = 0; 9 | virtual const Item& dereference() const = 0; 10 | virtual void increment() = 0; 11 | 12 | bool operator==(const Iterator& other) const { 13 | return equal(other); 14 | } 15 | 16 | bool operator!=(const Iterator& other) const { 17 | return !equal(other); 18 | } 19 | 20 | const Item& operator*() const { 21 | return dereference(); 22 | } 23 | 24 | // prefix ++ 25 | Iterator& operator++() { 26 | increment(); 27 | return dynamic_cast(*this); 28 | } 29 | 30 | // postfix ++ 31 | Iterator operator++(int) { 32 | auto ret = dynamic_cast(*this); 33 | increment(); 34 | return ret; 35 | } 36 | 37 | const Item* operator->() const { 38 | return &dereference(); 39 | } 40 | }; 41 | -------------------------------------------------------------------------------- /cmake/Tarball.cmake: -------------------------------------------------------------------------------- 1 | # marian-YYYY-MM-DD-revision.tgz 2 | # This combines marian, marian_decoder in a single TAR file for 3 | # execution in MSFT internal tools FLO and Singularity. 4 | 5 | execute_process( 6 | COMMAND bash -c "TZ=America/Los_Angeles date +%Y-%m-%d" 7 | OUTPUT_VARIABLE TGZ_DATE 8 | OUTPUT_STRIP_TRAILING_WHITESPACE) 9 | 10 | execute_process( 11 | COMMAND git rev-parse --short=7 HEAD 12 | OUTPUT_VARIABLE TGZ_REV 13 | OUTPUT_STRIP_TRAILING_WHITESPACE) 14 | 15 | message("Generating ${CWD}/marian-${TGZ_DATE}-${TGZ_REV}.tgz") 16 | 17 | # check if pigz is available for faster compression 18 | execute_process( 19 | COMMAND bash -c "which pigz || which gzip" 20 | OUTPUT_VARIABLE COMPRESS 21 | OUTPUT_STRIP_TRAILING_WHITESPACE) 22 | 23 | execute_process( 24 | COMMAND tar -I ${COMPRESS} -cvvf "${CWD}/marian-${TGZ_DATE}-${TGZ_REV}.tgz" -C "${CWD}" 25 | marian 26 | marian-decoder 27 | marian-scorer 28 | marian-vocab 29 | marian-conv 30 | WORKING_DIRECTORY "${CWD}") -------------------------------------------------------------------------------- /src/layers/weight.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/options.h" 4 | #include "data/corpus.h" 5 | #include "graph/expression_graph.h" 6 | #include "graph/expression_operators.h" 7 | #include "graph/node_initializers.h" 8 | 9 | namespace marian { 10 | 11 | class WeightingBase { 12 | public: 13 | WeightingBase(){}; 14 | virtual Expr getWeights(Ptr graph, 15 | Ptr batch) 16 | = 0; 17 | virtual void debugWeighting(std::vector /*weightedMask*/, 18 | std::vector /*freqMask*/, 19 | Ptr /*batch*/){}; 20 | virtual ~WeightingBase() {} 21 | }; 22 | 23 | class DataWeighting : public WeightingBase { 24 | protected: 25 | std::string weightingType_; 26 | 27 | public: 28 | DataWeighting(std::string weightingType) 29 | : WeightingBase(), weightingType_(weightingType){}; 30 | Expr getWeights(Ptr graph, Ptr batch) override; 31 | }; 32 | 33 | Ptr WeightingFactory(Ptr options); 34 | } // namespace marian 35 | -------------------------------------------------------------------------------- /src/optimizers/exponential_smoothing.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | #include "functional/functional.h" 5 | #include "tensors/tensor_operators.h" 6 | #include "common/options.h" 7 | 8 | namespace marian { 9 | 10 | /** 11 | * Class implementing exponential smoothing for graph groups. 12 | * The smoothed parameters themselves are not stored in here. 13 | */ 14 | class ExponentialSmoothing { 15 | public: 16 | ExponentialSmoothing(Ptr options) { 17 | mvDecayBy_ = options->get("exponential-smoothing", 0); 18 | refBatchTrgWords_ = options->get("mini-batch-words-ref", 0); // adjust as if our MB size (in target labels) was this value 19 | mvAvg_ = (mvDecayBy_ > 0); 20 | } 21 | 22 | protected: 23 | void updateAvgParams(Tensor paramsAvg, Tensor params, size_t batches, size_t actualBatchTrgWords); 24 | 25 | bool mvAvg_{false}; 26 | float mvDecayBy_{1e-4f}; // decay prior model by this factor 27 | size_t refBatchTrgWords_{0}; // mvDecayBy_ is specified for this batch size (in target words) (0 means not specified) 28 | }; 29 | } // namespace marian 30 | -------------------------------------------------------------------------------- /contrib/other-builds/eclipse/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | marian 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.cdt.managedbuilder.core.genmakebuilder 10 | clean,full,incremental, 11 | 12 | 13 | 14 | 15 | org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder 16 | full,incremental, 17 | 18 | 19 | 20 | 21 | 22 | org.eclipse.cdt.core.cnature 23 | org.eclipse.cdt.core.ccnature 24 | org.eclipse.cdt.managedbuilder.core.managedBuildNature 25 | org.eclipse.cdt.managedbuilder.core.ScannerConfigNature 26 | 27 | 28 | 29 | src 30 | 2 31 | PARENT-1-PROJECT_LOC/src 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /src/rnn/attention_constructors.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "marian.h" 4 | 5 | #include "layers/factory.h" 6 | #include "rnn/attention.h" 7 | #include "rnn/constructors.h" 8 | #include "rnn/types.h" 9 | 10 | namespace marian { 11 | namespace rnn { 12 | 13 | class AttentionFactory : public InputFactory { 14 | protected: 15 | Ptr state_; 16 | 17 | public: 18 | // AttentionFactory(Ptr graph) : InputFactory(graph) {} 19 | 20 | Ptr construct(Ptr graph) override { 21 | ABORT_IF(!state_, "EncoderState not set"); 22 | return New(graph, options_, state_); 23 | } 24 | 25 | Accumulator set_state(Ptr state) { 26 | state_ = state; 27 | return Accumulator(*this); 28 | } 29 | 30 | int dimAttended() { 31 | ABORT_IF(!state_, "EncoderState not set"); 32 | return state_->getAttended()->shape()[1]; 33 | } 34 | }; 35 | 36 | /** A convenient typedef for constructing RNN attention layers. */ 37 | typedef Accumulator attention; 38 | } // namespace rnn 39 | } // namespace marian 40 | -------------------------------------------------------------------------------- /scripts/contrib/inject_ctt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from __future__ import print_function 4 | 5 | import sys 6 | import argparse 7 | import numpy as np 8 | 9 | DESC = "Add 'decoder_c_tt' required by Amun to a model trained with Marian v1.6.0+" 10 | 11 | 12 | def main(): 13 | args = parse_args() 14 | 15 | print("Loading model {}".format(args.input)) 16 | model = np.load(args.input) 17 | 18 | if "decoder_c_tt" in model: 19 | print("The model already contains 'decoder_c_tt'") 20 | exit() 21 | 22 | print("Adding 'decoder_c_tt' to the model") 23 | amun = {"decoder_c_tt": np.zeros((1, 0))} 24 | for tensor_name in model: 25 | amun[tensor_name] = model[tensor_name] 26 | 27 | print("Saving model...") 28 | np.savez(args.output, **amun) 29 | 30 | 31 | def parse_args(): 32 | parser = argparse.ArgumentParser(description=DESC) 33 | parser.add_argument("-i", "--input", help="input model", required=True) 34 | parser.add_argument("-o", "--output", help="output model", required=True) 35 | return parser.parse_args() 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /src/3rd_party/phf/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014-2015 William Ahern 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to 5 | deal in the Software without restriction, including without limitation the 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008-2015 Jesse Beder. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /vs/Marian.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.28307.902 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Marian", "Marian.vcxproj", "{E2F320FE-0C01-4C80-810C-3A92205A29DC}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {E2F320FE-0C01-4C80-810C-3A92205A29DC}.Debug|x64.ActiveCfg = Debug|x64 15 | {E2F320FE-0C01-4C80-810C-3A92205A29DC}.Debug|x64.Build.0 = Debug|x64 16 | {E2F320FE-0C01-4C80-810C-3A92205A29DC}.Release|x64.ActiveCfg = Release|x64 17 | {E2F320FE-0C01-4C80-810C-3A92205A29DC}.Release|x64.Build.0 = Release|x64 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {3B922907-3384-4D39-9CEB-816BF7BB390D} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/formatter.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | namespace spdlog 15 | { 16 | namespace details 17 | { 18 | class flag_formatter; 19 | } 20 | 21 | class formatter 22 | { 23 | public: 24 | virtual ~formatter() {} 25 | virtual void format(details::log_msg& msg) = 0; 26 | }; 27 | 28 | class pattern_formatter : public formatter 29 | { 30 | 31 | public: 32 | explicit pattern_formatter(const std::string& pattern); 33 | pattern_formatter(const pattern_formatter&) = delete; 34 | pattern_formatter& operator=(const pattern_formatter&) = delete; 35 | void format(details::log_msg& msg) override; 36 | private: 37 | const std::string _pattern; 38 | std::vector> _formatters; 39 | void handle_flag(char flag); 40 | void compile_pattern(const std::string& pattern); 41 | }; 42 | } 43 | 44 | #include 45 | 46 | -------------------------------------------------------------------------------- /src/3rd_party/cnpy/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) Carl Rogers, 2011 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/3rd_party/mio/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 https://github.com/mandreyel/ 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/3rd_party/faiss/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/iterator.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | #include "yaml-cpp/node/node.h" 12 | #include "yaml-cpp/node/detail/iterator_fwd.h" 13 | #include "yaml-cpp/node/detail/iterator.h" 14 | #include 15 | #include 16 | #include 17 | 18 | namespace YAML { 19 | namespace detail { 20 | struct iterator_value : public Node, std::pair { 21 | iterator_value() {} 22 | explicit iterator_value(const Node& rhs) 23 | : Node(rhs), 24 | std::pair(Node(Node::ZombieNode), Node(Node::ZombieNode)) {} 25 | explicit iterator_value(const Node& key, const Node& value) 26 | : Node(Node::ZombieNode), std::pair(key, value) {} 27 | }; 28 | } 29 | } 30 | 31 | #endif // VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 32 | -------------------------------------------------------------------------------- /src/command/marian_vocab.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | 3 | #include "common/cli_wrapper.h" 4 | #include "common/logging.h" 5 | #include "data/vocab.h" 6 | 7 | int main(int argc, char** argv) { 8 | using namespace marian; 9 | 10 | createLoggers(); 11 | 12 | Ptr options = New(); 13 | { 14 | YAML::Node config; // @TODO: get rid of YAML::Node here entirely to avoid the pattern. Currently not fixing as it requires more changes to the Options object. 15 | auto cli = New( 16 | config, 17 | "Create a vocabulary from text corpora given on STDIN", 18 | "Allowed options", 19 | "Examples:\n" 20 | " ./marian-vocab < text.src > vocab.yml\n" 21 | " cat text.src text.trg | ./marian-vocab > vocab.yml"); 22 | cli->add("--max-size,-m", "Generate only UINT most common vocabulary items", 0); 23 | cli->parse(argc, argv); 24 | options->merge(config); 25 | } 26 | 27 | LOG(info, "Creating vocabulary..."); 28 | 29 | auto vocab = New(options, 0); 30 | vocab->create("stdout", "stdin", options->get("max-size")); 31 | 32 | LOG(info, "Finished"); 33 | 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/base_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // base sink templated over a mutex (either dummy or realy) 9 | // concrete implementation should only overrid the _sink_it method. 10 | // all locking is taken care of here so no locking needed by the implementors.. 11 | // 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | namespace spdlog 21 | { 22 | namespace sinks 23 | { 24 | template 25 | class base_sink:public sink 26 | { 27 | public: 28 | base_sink():_mutex() {} 29 | virtual ~base_sink() = default; 30 | 31 | base_sink(const base_sink&) = delete; 32 | base_sink& operator=(const base_sink&) = delete; 33 | 34 | void log(const details::log_msg& msg) override 35 | { 36 | std::lock_guard lock(_mutex); 37 | _sink_it(msg); 38 | } 39 | 40 | protected: 41 | virtual void _sink_it(const details::log_msg& msg) = 0; 42 | Mutex _mutex; 43 | }; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/details/log_msg.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/common.h" 9 | #include "spdlog/details/os.h" 10 | 11 | 12 | #include 13 | #include 14 | 15 | namespace spdlog 16 | { 17 | namespace details 18 | { 19 | struct log_msg 20 | { 21 | log_msg() = default; 22 | log_msg(const std::string *loggers_name, level::level_enum lvl) : 23 | logger_name(loggers_name), 24 | level(lvl), 25 | msg_id(0) 26 | { 27 | #ifndef SPDLOG_NO_DATETIME 28 | time = os::now(); 29 | #endif 30 | 31 | #ifndef SPDLOG_NO_THREAD_ID 32 | thread_id = os::thread_id(); 33 | #endif 34 | } 35 | 36 | log_msg(const log_msg& other) = delete; 37 | log_msg& operator=(log_msg&& other) = delete; 38 | log_msg(log_msg&& other) = delete; 39 | 40 | 41 | const std::string *logger_name; 42 | level::level_enum level; 43 | log_clock::time_point time; 44 | size_t thread_id; 45 | fmt::MemoryWriter raw; 46 | fmt::MemoryWriter formatted; 47 | size_t msg_id; 48 | }; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/contrib/anchordict.h: -------------------------------------------------------------------------------- 1 | #ifndef ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | #include "../anchor.h" 13 | 14 | namespace YAML { 15 | /** 16 | * An object that stores and retrieves values correlating to {@link anchor_t} 17 | * values. 18 | * 19 | *

Efficient implementation that can make assumptions about how 20 | * {@code anchor_t} values are assigned by the {@link Parser} class. 21 | */ 22 | template 23 | class AnchorDict { 24 | public: 25 | void Register(anchor_t anchor, T value) { 26 | if (anchor > m_data.size()) { 27 | m_data.resize(anchor); 28 | } 29 | m_data[anchor - 1] = value; 30 | } 31 | 32 | T Get(anchor_t anchor) const { return m_data[anchor - 1]; } 33 | 34 | private: 35 | std::vector m_data; 36 | }; 37 | } 38 | 39 | #endif // ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 40 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | 7 | #pragma once 8 | 9 | #include "spdlog/details/log_msg.h" 10 | 11 | namespace spdlog 12 | { 13 | namespace sinks 14 | { 15 | class sink 16 | { 17 | public: 18 | sink() 19 | { 20 | _level = level::trace; 21 | } 22 | 23 | virtual ~sink() {} 24 | virtual void log(const details::log_msg& msg) = 0; 25 | virtual void flush() = 0; 26 | 27 | bool should_log(level::level_enum msg_level) const; 28 | void set_level(level::level_enum log_level); 29 | level::level_enum level() const; 30 | 31 | private: 32 | level_t _level; 33 | 34 | }; 35 | 36 | inline bool sink::should_log(level::level_enum msg_level) const 37 | { 38 | return msg_level >= _level.load(std::memory_order_relaxed); 39 | } 40 | 41 | inline void sink::set_level(level::level_enum log_level) 42 | { 43 | _level.store(log_level); 44 | } 45 | 46 | inline level::level_enum sink::level() const 47 | { 48 | return static_cast(_level.load(std::memory_order_relaxed)); 49 | } 50 | 51 | } 52 | } 53 | 54 | -------------------------------------------------------------------------------- /src/3rd_party/zstr/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Matei David, Ontario Institute for Cancer Research 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/ios.yml: -------------------------------------------------------------------------------- 1 | name: iOS 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build-macos: 11 | name: iOS CPU-only 12 | runs-on: macos-12 13 | 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v2 17 | with: 18 | submodules: recursive 19 | 20 | - name: Install dependencies 21 | run: brew install boost openblas openssl protobuf 22 | 23 | - name: Configure CMake 24 | run: | 25 | export LDFLAGS="-L/usr/local/opt/openblas/lib" 26 | export CPPFLAGS="-I/usr/local/opt/openblas/include" 27 | mkdir -p build 28 | cd build 29 | cmake .. \ 30 | -DCOMPILE_CPU=on \ 31 | -DCOMPILE_CUDA=off \ 32 | -DCOMPILE_EXAMPLES=on \ 33 | -DCOMPILE_SERVER=off \ 34 | -DCOMPILE_TESTS=on \ 35 | -DUSE_SENTENCEPIECE=on \ 36 | -DCMAKE_TOOLCHAIN_FILE=../cmake/ios.toolchain.cmake \ 37 | -DUSE_SENTENCEPIECE=on \ 38 | -DPLATFORM=OS64 \ 39 | -DDEPLOYMENT_TARGET=13.0 40 | 41 | - name: Compile 42 | working-directory: build 43 | run: cmake --build . --config Release -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Marcin Junczys-Dowmunt, the University of Edinburgh, Adam 4 | Mickiewicz University 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Gabi Melman. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/glog-bench-mt.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "glog/logging.h" 11 | 12 | using namespace std; 13 | 14 | int main(int argc, char* argv[]) 15 | { 16 | 17 | int thread_count = 10; 18 | if(argc > 1) 19 | thread_count = atoi(argv[1]); 20 | 21 | int howmany = 1000000; 22 | 23 | FLAGS_logtostderr = 0; 24 | FLAGS_log_dir = "logs"; 25 | google::InitGoogleLogging(argv[0]); 26 | 27 | std::atomic msg_counter {0}; 28 | vector threads; 29 | 30 | for (int t = 0; t < thread_count; ++t) 31 | { 32 | threads.push_back(std::thread([&]() 33 | { 34 | while (true) 35 | { 36 | int counter = ++msg_counter; 37 | if (counter > howmany) break; 38 | LOG(INFO) << "glog message #" << counter << ": This is some text for your pleasure"; 39 | } 40 | })); 41 | } 42 | 43 | 44 | for(auto &t:threads) 45 | { 46 | t.join(); 47 | }; 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /src/common/config_validator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "3rd_party/yaml-cpp/yaml.h" 4 | #include "common/config_parser.h" 5 | 6 | namespace marian { 7 | 8 | class ConfigValidator { 9 | private: 10 | const YAML::Node& config_; 11 | 12 | bool has(const std::string& key) const; 13 | 14 | template 15 | T get(const std::string& key) const { 16 | return config_[key].as(); 17 | } 18 | 19 | // When --dump-config is used, alleviate some constraints, for example, do not 20 | // require --train-sets or --vocabs 21 | bool dumpConfigOnly_{false}; 22 | 23 | void validateOptionsTranslation() const; 24 | void validateOptionsParallelData() const; 25 | void validateOptionsScoring() const; 26 | void validateOptionsTraining() const; 27 | 28 | void validateModelExtension(cli::mode mode) const; 29 | void validateDevices(cli::mode mode) const; 30 | 31 | public: 32 | ConfigValidator(const YAML::Node& config); 33 | ConfigValidator(const YAML::Node& config, bool dumpConfigOnly); 34 | virtual ~ConfigValidator(); 35 | 36 | // Validate options according to the given mode. Abort on first validation error 37 | void validateOptions(cli::mode mode) const; 38 | }; 39 | 40 | } // namespace marian 41 | -------------------------------------------------------------------------------- /src/tests/units/utils_tests.cpp: -------------------------------------------------------------------------------- 1 | #include "catch.hpp" 2 | #include "common/utils.h" 3 | 4 | using namespace marian; 5 | 6 | TEST_CASE("utils::splitTsv", "[utils]") { 7 | std::string line1 = "foo bar"; 8 | std::string line2 = "foo bar\tbazz"; 9 | std::string line3 = "foo bar\tbazz\tfoo quux"; 10 | 11 | std::vector fields; 12 | 13 | SECTION("the tab-separated input is split") { 14 | utils::splitTsv(line1, fields, 1); 15 | CHECK( fields.size() == 1 ); 16 | CHECK( fields[0] == "foo bar" ); 17 | 18 | utils::splitTsv(line3, fields, 3); 19 | CHECK( fields == std::vector({"foo bar", "bazz", "foo quux"}) ); 20 | } 21 | 22 | SECTION("the output has at least as many elements as requested") { 23 | utils::splitTsv(line1, fields, 1); 24 | CHECK( fields.size() == 1 ); 25 | 26 | utils::splitTsv(line1, fields, 3); 27 | CHECK( fields.size() == 3 ); 28 | CHECK( fields == std::vector({"foo bar", "", ""}) ); 29 | 30 | utils::splitTsv(line1, fields, 2); 31 | CHECK( fields.size() == 2 ); 32 | CHECK( fields == std::vector({"foo bar", ""}) ); 33 | } 34 | 35 | //SECTION("excessive tab-separated fields abort the execution") {} 36 | } 37 | -------------------------------------------------------------------------------- /src/3rd_party/SQLiteCpp/LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2012-2016 Sebastien Rombauts (sebastien.rombauts@gmail.com) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is furnished 10 | to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 19 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 20 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/regex_yaml.cpp: -------------------------------------------------------------------------------- 1 | #include "regex_yaml.h" 2 | 3 | namespace YAML { 4 | // constructors 5 | RegEx::RegEx() : m_op(REGEX_EMPTY) {} 6 | 7 | RegEx::RegEx(REGEX_OP op) : m_op(op) {} 8 | 9 | RegEx::RegEx(char ch) : m_op(REGEX_MATCH), m_a(ch) {} 10 | 11 | RegEx::RegEx(char a, char z) : m_op(REGEX_RANGE), m_a(a), m_z(z) {} 12 | 13 | RegEx::RegEx(const std::string& str, REGEX_OP op) : m_op(op) { 14 | for (std::size_t i = 0; i < str.size(); i++) 15 | m_params.push_back(RegEx(str[i])); 16 | } 17 | 18 | // combination constructors 19 | RegEx operator!(const RegEx& ex) { 20 | RegEx ret(REGEX_NOT); 21 | ret.m_params.push_back(ex); 22 | return ret; 23 | } 24 | 25 | RegEx operator||(const RegEx& ex1, const RegEx& ex2) { 26 | RegEx ret(REGEX_OR); 27 | ret.m_params.push_back(ex1); 28 | ret.m_params.push_back(ex2); 29 | return ret; 30 | } 31 | 32 | RegEx operator&&(const RegEx& ex1, const RegEx& ex2) { 33 | RegEx ret(REGEX_AND); 34 | ret.m_params.push_back(ex1); 35 | ret.m_params.push_back(ex2); 36 | return ret; 37 | } 38 | 39 | RegEx operator+(const RegEx& ex1, const RegEx& ex2) { 40 | RegEx ret(REGEX_SEQ); 41 | ret.m_params.push_back(ex1); 42 | ret.m_params.push_back(ex2); 43 | return ret; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /cmake/FindNCCL.cmake: -------------------------------------------------------------------------------- 1 | set(NCCL_INC_PATHS 2 | /usr/include 3 | /usr/local/include 4 | /usr/local/cuda/include 5 | $ENV{NCCL_DIR}/include 6 | $ENV{CUDA_TOOLKIT_ROOT_DIRCUDA_ROOT}/include 7 | ) 8 | 9 | set(NCCL_LIB_PATHS 10 | /lib 11 | /lib64 12 | /usr/lib 13 | /usr/lib64 14 | /usr/local/lib 15 | /usr/local/lib64 16 | /usr/local/cuda/lib64 17 | $ENV{NCCL_DIR}/lib64 18 | $ENV{CUDA_TOOLKIT_ROOT_DIR}/lib64 19 | /usr/local/cuda/lib 20 | $ENV{NCCL_DIR}/lib 21 | $ENV{CUDA_TOOLKIT_ROOT_DIR}/lib 22 | ) 23 | 24 | find_path(NCCL_INCLUDE_DIR NAMES nccl.h PATHS ${NCCL_INC_PATHS}) 25 | 26 | if (USE_STATIC_LIBS) 27 | message(STATUS "Trying to find static NCCL library") 28 | find_library(NCCL_LIBRARIES NAMES libnccl_static.a PATHS ${NCCL_LIB_PATHS}) 29 | else (USE_STATIC_LIBS) 30 | find_library(NCCL_LIBRARIES NAMES nccl PATHS ${NCCL_LIB_PATHS}) 31 | endif (USE_STATIC_LIBS) 32 | 33 | include(FindPackageHandleStandardArgs) 34 | find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIR NCCL_LIBRARIES) 35 | 36 | if (NCCL_FOUND) 37 | message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIR}, library: ${NCCL_LIBRARIES})") 38 | mark_as_advanced(NCCL_INCLUDE_DIR NCCL_LIBRARIES) 39 | endif () 40 | -------------------------------------------------------------------------------- /src/common/cli_helper.cpp: -------------------------------------------------------------------------------- 1 | #include "common/cli_helper.h" 2 | #include "common/filesystem.h" 3 | 4 | namespace marian { 5 | namespace cli { 6 | 7 | void makeAbsolutePaths(YAML::Node& config, 8 | const std::string& configPath, 9 | const std::set& PATHS) { 10 | auto configDir = filesystem::Path{configPath}.parentPath(); 11 | 12 | auto transformFunc = [&](const std::string& nodePath) -> std::string { 13 | // Catch stdin/stdout and do not process 14 | if(nodePath == "stdin" || nodePath == "stdout") 15 | return nodePath; 16 | 17 | // replace relative path w.r.t. config directory 18 | try { 19 | return canonical(filesystem::Path{nodePath}, configDir).string(); 20 | } catch(filesystem::FilesystemError& e) { 21 | // will fail if file does not exist; use parent in that case 22 | std::cerr << e.what() << std::endl; 23 | auto parentPath = filesystem::Path{nodePath}.parentPath(); 24 | return (canonical(parentPath, configDir) 25 | / filesystem::Path{nodePath}.filename()) 26 | .string(); 27 | } 28 | }; 29 | 30 | processPaths(config, transformFunc, PATHS); 31 | } 32 | 33 | } // namespace cli 34 | } // namespace marian 35 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/indentation.h: -------------------------------------------------------------------------------- 1 | #ifndef INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | #include "yaml-cpp/ostream_wrapper.h" 14 | 15 | namespace YAML { 16 | struct Indentation { 17 | Indentation(std::size_t n_) : n(n_) {} 18 | std::size_t n; 19 | }; 20 | 21 | inline ostream_wrapper& operator<<(ostream_wrapper& out, 22 | const Indentation& indent) { 23 | for (std::size_t i = 0; i < indent.n; i++) 24 | out << ' '; 25 | return out; 26 | } 27 | 28 | struct IndentTo { 29 | IndentTo(std::size_t n_) : n(n_) {} 30 | std::size_t n; 31 | }; 32 | 33 | inline ostream_wrapper& operator<<(ostream_wrapper& out, 34 | const IndentTo& indent) { 35 | while (out.col() < indent.n) 36 | out << ' '; 37 | return out; 38 | } 39 | } 40 | 41 | #endif // INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66 42 | -------------------------------------------------------------------------------- /src/tests/logger.cpp: -------------------------------------------------------------------------------- 1 | #include "common/timer.h" 2 | #include "common/logging.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | // small test program for playing around with spdlog formatting of messages 9 | 10 | std::shared_ptr stderrLoggerTest( 11 | const std::string& name, 12 | const std::string& pattern, 13 | const std::vector& files) { 14 | std::vector sinks; 15 | 16 | auto stderr_sink = spdlog::sinks::stderr_sink_mt::instance(); 17 | sinks.push_back(stderr_sink); 18 | 19 | for(auto&& file : files) { 20 | auto file_sink 21 | = std::make_shared(file, true); 22 | sinks.push_back(file_sink); 23 | } 24 | 25 | auto logger 26 | = std::make_shared(name, begin(sinks), end(sinks)); 27 | 28 | spdlog::register_logger(logger); 29 | logger->set_pattern(pattern); 30 | return logger; 31 | } 32 | 33 | int main() { 34 | std::vector logfiles; 35 | Logger info(stderrLoggerTest("info", "[%Y-%m-%d %T] %v", logfiles)); 36 | 37 | info->info("hello {:06.2f}", .7); 38 | 39 | marian::timer::Timer timer; 40 | 41 | info->info("time is {:.5f} bla {:.2f}", timer.elapsed(), .7); 42 | } 43 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/detail/memory.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | #include "yaml-cpp/dll.h" 13 | #include "yaml-cpp/node/ptr.h" 14 | 15 | namespace YAML { 16 | namespace detail { 17 | class node; 18 | } // namespace detail 19 | } // namespace YAML 20 | 21 | namespace YAML { 22 | namespace detail { 23 | class YAML_CPP_API memory { 24 | public: 25 | node& create_node(); 26 | void merge(const memory& rhs); 27 | 28 | private: 29 | typedef std::set Nodes; 30 | Nodes m_nodes; 31 | }; 32 | 33 | class YAML_CPP_API memory_holder { 34 | public: 35 | memory_holder() : m_pMemory(new memory) {} 36 | 37 | node& create_node() { return m_pMemory->create_node(); } 38 | void merge(memory_holder& rhs); 39 | 40 | private: 41 | shared_memory m_pMemory; 42 | }; 43 | } 44 | } 45 | 46 | #endif // VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66 47 | -------------------------------------------------------------------------------- /scripts/mbr/generic/explode_collapse.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # Helper script that takes the sample file with N samples and M references (first M among N samples) 4 | # and creates deduped(!) N' x M' pairs (N' is N after deduplication, same for M') for scoring. 5 | # Creating the pairs is "exploding", deduping is "collapsing", hence the name. 6 | # Includes ids so that the original order from before deduplication can be restored. 7 | 8 | my $N = $ARGV[0]; 9 | my $R = $ARGV[1]; 10 | $R = $N if not defined($R); 11 | 12 | sub explodeCollapse { 13 | my $id = shift; 14 | my @samples = @_; 15 | 16 | my %cnd; 17 | foreach(@samples) { 18 | $cnd{$_} = scalar keys %cnd if not exists($cnd{$_}); 19 | } 20 | 21 | my @uniq = sort { $cnd{$a} <=> $cnd{$b} } keys %cnd; 22 | foreach my $t (@uniq) { 23 | my $c = 0; 24 | foreach my $r (@uniq) { 25 | last if($c >= $R); 26 | # this outputs the pseudo-reference first! 27 | printf("%d\t%d\t%d\t%s\t%s\n", $id, $cnd{$r}, $cnd{$t}, $r, $t); 28 | $c++; 29 | } 30 | } 31 | } 32 | 33 | my @samples; 34 | my $id = 0; 35 | while() { 36 | chomp; 37 | push(@samples, $_); 38 | if(@samples == $N) { 39 | explodeCollapse($id, @samples); 40 | @samples = (); 41 | $id++; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/training/graph_group_singleton.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "training/graph_group.h" 4 | #include "common/filesystem.h" 5 | 6 | #include 7 | 8 | namespace marian { 9 | 10 | /** 11 | * Single GPU training 12 | */ 13 | class SingletonGraph : public GraphGroup { 14 | public: 15 | virtual void setScheduler(Ptr scheduler) override; 16 | 17 | private: 18 | void execute(Ptr batch); 19 | 20 | public: 21 | SingletonGraph(Ptr options, Ptr mpi) 22 | : GraphGroup(options, mpi) { 23 | 24 | LOG(warn, "This class only serves demonstration purposes. It should currently not be called from actual Marian code."); 25 | ABORT_IF(mpi->numMPIProcesses() != 1, "SingletonGraph does not support multiple MPI processes"); 26 | ABORT_IF(devices_.size() != 1, "Only one device ID should be provided for singleton training"); 27 | } 28 | 29 | void update(Ptr batch) override { 30 | validate(); 31 | execute(batch); 32 | } 33 | 34 | Ptr collectStats(const std::vector>& vocabs) override { 35 | return GraphGroup::collectStats(graphs_[0], models_[0], vocabs); 36 | } 37 | 38 | virtual void finalize() override { finalized_ = true; } 39 | }; 40 | } // namespace marian 41 | -------------------------------------------------------------------------------- /src/translator/output_printer.cpp: -------------------------------------------------------------------------------- 1 | #include "output_printer.h" 2 | 3 | #include 4 | 5 | namespace marian { 6 | 7 | std::string OutputPrinter::getAlignment(const Hypothesis::PtrType& hyp) { 8 | data::SoftAlignment align; 9 | auto last = hyp; 10 | // get soft alignments for each target word starting from the last one 11 | while(last->getPrevHyp().get() != nullptr) { 12 | align.push_back(last->getAlignment()); 13 | last = last->getPrevHyp(); 14 | } 15 | 16 | // reverse alignments 17 | std::reverse(align.begin(), align.end()); 18 | 19 | if(alignment_ == "soft") { 20 | return data::SoftAlignToString(align); 21 | } else if(alignment_ == "hard") { 22 | return data::ConvertSoftAlignToHardAlign(align, 1.f).toString(); 23 | } else if(alignmentThreshold_ > 0.f) { 24 | return data::ConvertSoftAlignToHardAlign(align, alignmentThreshold_).toString(); 25 | } else { 26 | ABORT("Unrecognized word alignment type"); 27 | } 28 | } 29 | 30 | std::string OutputPrinter::getWordScores(const Hypothesis::PtrType& hyp) { 31 | std::ostringstream scores; 32 | scores.precision(5); 33 | for(const auto& score : hyp->tracebackWordScores()) 34 | scores << " " << std::fixed << score; 35 | return scores.str(); 36 | } 37 | 38 | } // namespace marian 39 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/ostream_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace spdlog 15 | { 16 | namespace sinks 17 | { 18 | template 19 | class ostream_sink: public base_sink 20 | { 21 | public: 22 | explicit ostream_sink(std::ostream& os, bool force_flush=false) :_ostream(os), _force_flush(force_flush) {} 23 | ostream_sink(const ostream_sink&) = delete; 24 | ostream_sink& operator=(const ostream_sink&) = delete; 25 | virtual ~ostream_sink() = default; 26 | 27 | protected: 28 | void _sink_it(const details::log_msg& msg) override 29 | { 30 | _ostream.write(msg.formatted.data(), msg.formatted.size()); 31 | if (_force_flush) 32 | _ostream.flush(); 33 | } 34 | 35 | void flush() override 36 | { 37 | _ostream.flush(); 38 | } 39 | 40 | std::ostream& _ostream; 41 | bool _force_flush; 42 | }; 43 | 44 | typedef ostream_sink ostream_sink_mt; 45 | typedef ostream_sink ostream_sink_st; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/collectionstack.h: -------------------------------------------------------------------------------- 1 | #ifndef COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | namespace YAML { 14 | struct CollectionType { 15 | enum value { NoCollection, BlockMap, BlockSeq, FlowMap, FlowSeq, CompactMap }; 16 | }; 17 | 18 | class CollectionStack { 19 | public: 20 | CollectionType::value GetCurCollectionType() const { 21 | if (collectionStack.empty()) 22 | return CollectionType::NoCollection; 23 | return collectionStack.top(); 24 | } 25 | 26 | void PushCollectionType(CollectionType::value type) { 27 | collectionStack.push(type); 28 | } 29 | void PopCollectionType(CollectionType::value type) { 30 | assert(type == GetCurCollectionType()); type; 31 | collectionStack.pop(); 32 | } 33 | 34 | private: 35 | std::stack collectionStack; 36 | }; 37 | } 38 | 39 | #endif // COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 40 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/example.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.25420.1 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example", "example.vcxproj", "{9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Debug|x64 = Debug|x64 12 | Release|Win32 = Release|Win32 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Debug|Win32.ActiveCfg = Debug|Win32 17 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Debug|Win32.Build.0 = Debug|Win32 18 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Debug|x64.ActiveCfg = Debug|Win32 19 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Release|Win32.ActiveCfg = Release|Win32 20 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Release|Win32.Build.0 = Release|Win32 21 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Release|x64.ActiveCfg = Release|Win32 22 | EndGlobalSection 23 | GlobalSection(SolutionProperties) = preSolution 24 | HideSolutionNode = FALSE 25 | EndGlobalSection 26 | EndGlobal 27 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "examples"] 2 | path = examples 3 | url = https://github.com/marian-nmt/marian-examples 4 | [submodule "regression-tests"] 5 | path = regression-tests 6 | url = https://github.com/marian-nmt/marian-regression-tests 7 | [submodule "src/3rd_party/sentencepiece"] 8 | path = src/3rd_party/sentencepiece 9 | url = https://github.com/marian-nmt/sentencepiece 10 | [submodule "src/3rd_party/nccl"] 11 | path = src/3rd_party/nccl 12 | url = https://github.com/marian-nmt/nccl 13 | [submodule "src/3rd_party/fbgemm"] 14 | path = src/3rd_party/fbgemm 15 | url = https://github.com/marian-nmt/FBGEMM 16 | branch = master 17 | [submodule "src/3rd_party/intgemm"] 18 | path = src/3rd_party/intgemm 19 | url = https://github.com/marian-nmt/intgemm/ 20 | [submodule "src/3rd_party/simple-websocket-server"] 21 | path = src/3rd_party/simple-websocket-server 22 | url = https://github.com/marian-nmt/Simple-WebSocket-Server 23 | [submodule "src/3rd_party/ruy"] 24 | path = src/3rd_party/ruy 25 | url = https://github.com/marian-nmt/ruy.git 26 | [submodule "src/3rd_party/simd_utils"] 27 | path = src/3rd_party/simd_utils 28 | url = https://github.com/marian-nmt/simd_utils.git 29 | [submodule "src/3rd_party/pybind11"] 30 | path = src/3rd_party/pybind11 31 | url = https://github.com/pybind/pybind11.git 32 | -------------------------------------------------------------------------------- /scripts/metrics/README.md: -------------------------------------------------------------------------------- 1 | # Marian Metrics 2 | 3 | The main script is `compare.sh`, however it needs to be run in an environment where all three -- marian, unbabel-comet(pytorch), and bleurt(tensorflow) are available. 4 | Hence we create a new python environment using conda to run comparisons. 5 | 6 | ## Setup 7 | 8 | ```bash 9 | ./run.sh 10 | ``` 11 | This setups a conda environment named `metrics` which will have all the necessary requirements, except pymarian-eval, which you will have to install based on your CMAKE settings 12 | ```bash 13 | # from the root dir of this repository 14 | conda activate metrics 15 | mkdir build; cd build 16 | cmake .. -DPYMARIAN=on #.. other flags 17 | pip install pymarian-*.whl 18 | ``` 19 | 20 | ## Run Compare.sh 21 | 22 | ```bash 23 | 24 | # option 1: 25 | ./run.sh 26 | 27 | # option 2 28 | conda activate metrics 29 | bash compare.sh 30 | ``` 31 | 32 | This script produces reports at `workspace/*.report.txt`, which shows average difference segment level scores between original implementation and `pymarian-eval` 33 | 34 | ## Convert Metrics Weights to Marian format 35 | 36 | ```bash 37 | conda activate metrics 38 | MARIAN=../build/marian ./convert-all-models.sh 39 | ``` 40 | 41 | To add a new model ID, edit `known-models.txt` file in the same directory as this README 42 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/fmt/bundled/ostream.cc: -------------------------------------------------------------------------------- 1 | /* 2 | Formatting library for C++ - std::ostream support 3 | 4 | Copyright (c) 2012 - 2016, Victor Zverovich 5 | All rights reserved. 6 | 7 | For the license information refer to format.h. 8 | */ 9 | 10 | #include "ostream.h" 11 | 12 | namespace fmt { 13 | 14 | namespace { 15 | // Write the content of w to os. 16 | void write(std::ostream &os, Writer &w) { 17 | const char *data = w.data(); 18 | typedef internal::MakeUnsigned::Type UnsignedStreamSize; 19 | UnsignedStreamSize size = w.size(); 20 | UnsignedStreamSize max_size = 21 | internal::to_unsigned((std::numeric_limits::max)()); 22 | do { 23 | UnsignedStreamSize n = size <= max_size ? size : max_size; 24 | os.write(data, static_cast(n)); 25 | data += n; 26 | size -= n; 27 | } while (size != 0); 28 | } 29 | } 30 | 31 | FMT_FUNC void print(std::ostream &os, CStringRef format_str, ArgList args) { 32 | MemoryWriter w; 33 | w.write(format_str, args); 34 | write(os, w); 35 | } 36 | 37 | FMT_FUNC int fprintf(std::ostream &os, CStringRef format, ArgList args) { 38 | MemoryWriter w; 39 | printf(w, format, args); 40 | write(os, w); 41 | return static_cast(w.size()); 42 | } 43 | } // namespace fmt 44 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/ostream_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/details/null_mutex.h" 9 | #include "spdlog/sinks/base_sink.h" 10 | 11 | #include 12 | #include 13 | 14 | namespace spdlog 15 | { 16 | namespace sinks 17 | { 18 | template 19 | class ostream_sink: public base_sink 20 | { 21 | public: 22 | explicit ostream_sink(std::ostream& os, bool force_flush=false) :_ostream(os), _force_flush(force_flush) {} 23 | ostream_sink(const ostream_sink&) = delete; 24 | ostream_sink& operator=(const ostream_sink&) = delete; 25 | virtual ~ostream_sink() = default; 26 | 27 | protected: 28 | void _sink_it(const details::log_msg& msg) override 29 | { 30 | _ostream.write(msg.formatted.data(), msg.formatted.size()); 31 | if (_force_flush) 32 | _ostream.flush(); 33 | } 34 | 35 | void _flush() override 36 | { 37 | _ostream.flush(); 38 | } 39 | 40 | std::ostream& _ostream; 41 | bool _force_flush; 42 | }; 43 | 44 | typedef ostream_sink ostream_sink_mt; 45 | typedef ostream_sink ostream_sink_st; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /scripts/server/client_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from __future__ import print_function, unicode_literals, division 4 | 5 | import sys 6 | import time 7 | import argparse 8 | 9 | # pip install websocket_client 10 | from websocket import create_connection 11 | 12 | 13 | if __name__ == "__main__": 14 | # handle command-line options 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("-b", "--batch-size", type=int, default=1) 17 | parser.add_argument("-p", "--port", type=int, default=8080) 18 | args = parser.parse_args() 19 | 20 | # open connection 21 | ws = create_connection("ws://localhost:{}/translate".format(args.port)) 22 | 23 | count = 0 24 | batch = "" 25 | for line in sys.stdin: 26 | count += 1 27 | batch += line.decode('utf-8') if sys.version_info < (3, 0) else line 28 | if count == args.batch_size: 29 | # translate the batch 30 | ws.send(batch) 31 | result = ws.recv() 32 | print(result.rstrip()) 33 | 34 | count = 0 35 | batch = "" 36 | 37 | if count: 38 | # translate the remaining sentences 39 | ws.send(batch) 40 | result = ws.recv() 41 | print(result.rstrip()) 42 | 43 | # close connection 44 | ws.close() 45 | -------------------------------------------------------------------------------- /src/optimizers/clippers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "tensors/tensor.h" 7 | #include "tensors/allocator.h" 8 | 9 | namespace marian { 10 | 11 | class Clipper { 12 | protected: 13 | Ptr allocator_; 14 | 15 | public: 16 | virtual ~Clipper() {} 17 | 18 | virtual float clip(Tensor, float /*costScalingFactor*/ = 1.f) = 0; 19 | virtual void setAllocator(Ptr allocator) { allocator_ = allocator; } 20 | }; 21 | 22 | class ElementwiseClipper : public Clipper { 23 | public: 24 | ElementwiseClipper(float c = 10.0) : c_(c) {} 25 | ~ElementwiseClipper() override {} 26 | 27 | float clip(Tensor t, float costScalingFactor = 1.f) override; 28 | 29 | private: 30 | float c_; 31 | }; 32 | 33 | class NormClipper : public Clipper { 34 | public: 35 | NormClipper(float c = 1.0) : c_(c) {} 36 | ~NormClipper() override {} 37 | 38 | float clip(Tensor t, float costScalingFactor = 1.f) override; 39 | 40 | private: 41 | float c_; 42 | }; 43 | 44 | // don't clip, just report Froebenius norm 45 | class ReportNormClipper : public Clipper { 46 | public: 47 | ReportNormClipper(float /*c = 1.0*/) {} 48 | ~ReportNormClipper() override {} 49 | 50 | float clip(Tensor t, float costScalingFactor = 1.f) override; 51 | }; 52 | 53 | } // namespace marian 54 | -------------------------------------------------------------------------------- /src/optimizers/exponential_smoothing.cpp: -------------------------------------------------------------------------------- 1 | #include "common/definitions.h" 2 | #include "functional/functional.h" 3 | #include "tensors/tensor_operators.h" 4 | #include "common/options.h" 5 | #include "optimizers/optimizers.h" 6 | #include "optimizers/exponential_smoothing.h" 7 | 8 | namespace marian { 9 | 10 | void ExponentialSmoothing::updateAvgParams(Tensor paramsAvg, Tensor params, size_t batches, size_t actualBatchTrgWords) { 11 | double beta = 1. - mvDecayBy_; 12 | 13 | // correction term if batch size is different from what mvDecayBy_ was specified for 14 | if (refBatchTrgWords_ > 0 && actualBatchTrgWords > refBatchTrgWords_) { 15 | LOG_ONCE(info, "Exponential smoothing gets automatically adjusted as if update size was {} target words", refBatchTrgWords_); 16 | beta = pow(beta, (double)actualBatchTrgWords / (double)refBatchTrgWords_); 17 | batches = (size_t)((double)batches * (double)actualBatchTrgWords / (double)refBatchTrgWords_); 18 | } 19 | 20 | // reduce effect of decay parameter in early training stages 21 | float decayBy = std::max(1.f - (float)beta, 22 | 1.f - (float)(batches + 1) / (float)(batches + 10)); 23 | using namespace functional; 24 | Element(_1 = ((1.f - decayBy) * _1) + (decayBy * _2), paramsAvg, params); 25 | } 26 | 27 | } // namespace marian 28 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/utils.cpp: -------------------------------------------------------------------------------- 1 | #include "includes.h" 2 | 3 | 4 | void prepare_logdir() 5 | { 6 | spdlog::drop_all(); 7 | #ifdef _WIN32 8 | system("if not exist logs mkdir logs"); 9 | system("del /F /Q logs\\*"); 10 | #else 11 | auto rv = system("mkdir -p logs"); 12 | rv = system("rm -f logs/*"); 13 | (void)rv; 14 | #endif 15 | } 16 | 17 | 18 | std::string file_contents(const std::string& filename) 19 | { 20 | std::ifstream ifs(filename); 21 | if (!ifs) 22 | throw std::runtime_error("Failed open file "); 23 | return std::string((std::istreambuf_iterator(ifs)), 24 | (std::istreambuf_iterator())); 25 | 26 | } 27 | 28 | std::size_t count_lines(const std::string& filename) 29 | { 30 | std::ifstream ifs(filename); 31 | if (!ifs) 32 | throw std::runtime_error("Failed open file "); 33 | 34 | std::string line; 35 | size_t counter = 0; 36 | while(std::getline(ifs, line)) 37 | counter++; 38 | return counter; 39 | } 40 | 41 | std::size_t get_filesize(const std::string& filename) 42 | { 43 | std::ifstream ifs(filename, std::ifstream::ate | std::ifstream::binary); 44 | if (!ifs) 45 | throw std::runtime_error("Failed open file "); 46 | 47 | return static_cast(ifs.tellg()); 48 | } 49 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/easylogging-bench-mt.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #define _ELPP_THREAD_SAFE 11 | #include "easylogging++.h" 12 | _INITIALIZE_EASYLOGGINGPP 13 | 14 | using namespace std; 15 | 16 | int main(int argc, char* argv[]) 17 | { 18 | 19 | int thread_count = 10; 20 | if(argc > 1) 21 | thread_count = atoi(argv[1]); 22 | 23 | int howmany = 1000000; 24 | 25 | // Load configuration from file 26 | el::Configurations conf("easyl.conf"); 27 | el::Loggers::reconfigureLogger("default", conf); 28 | 29 | std::atomic msg_counter {0}; 30 | vector threads; 31 | 32 | for (int t = 0; t < thread_count; ++t) 33 | { 34 | threads.push_back(std::thread([&]() 35 | { 36 | while (true) 37 | { 38 | int counter = ++msg_counter; 39 | if (counter > howmany) break; 40 | LOG(INFO) << "easylog message #" << counter << ": This is some text for your pleasure"; 41 | } 42 | })); 43 | } 44 | 45 | 46 | for(auto &t:threads) 47 | { 48 | t.join(); 49 | }; 50 | 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /cmake/FindTcmalloc.cmake: -------------------------------------------------------------------------------- 1 | # - Find Tcmalloc 2 | # Find the native Tcmalloc includes and library 3 | # 4 | # Tcmalloc_INCLUDE_DIR - where to find Tcmalloc.h, etc. 5 | # Tcmalloc_LIBRARIES - List of libraries when using Tcmalloc. 6 | # Tcmalloc_FOUND - True if Tcmalloc found. 7 | 8 | find_path(Tcmalloc_INCLUDE_DIR google/tcmalloc.h) 9 | 10 | if (USE_TCMALLOC) 11 | set(Tcmalloc_NAMES tcmalloc) 12 | else () 13 | set(Tcmalloc_NAMES tcmalloc_minimal tcmalloc) 14 | endif () 15 | 16 | find_library(Tcmalloc_LIBRARY 17 | NAMES ${Tcmalloc_NAMES} 18 | PATHS /usr/lib /usr/lib64 /usr/local/lib /usr/local/lib64 /usr/lib/x86_64-linux-gnu 19 | ) 20 | 21 | if (Tcmalloc_INCLUDE_DIR AND Tcmalloc_LIBRARY) 22 | set(Tcmalloc_FOUND TRUE) 23 | set( Tcmalloc_LIBRARIES ${Tcmalloc_LIBRARY} ) 24 | else () 25 | set(Tcmalloc_FOUND FALSE) 26 | set( Tcmalloc_LIBRARIES ) 27 | endif () 28 | 29 | if (Tcmalloc_FOUND) 30 | message(STATUS "Found Tcmalloc: ${Tcmalloc_LIBRARY}") 31 | else () 32 | message(STATUS "Not Found Tcmalloc") 33 | if (Tcmalloc_FIND_REQUIRED) 34 | message(STATUS "Looked for Tcmalloc libraries named ${Tcmalloc_NAMES}.") 35 | message(FATAL_ERROR "Could NOT find Tcmalloc library") 36 | endif () 37 | endif () 38 | 39 | mark_as_advanced( 40 | Tcmalloc_LIBRARY 41 | Tcmalloc_INCLUDE_DIR 42 | ) -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/formatter.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/details/log_msg.h" 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | namespace spdlog 15 | { 16 | namespace details 17 | { 18 | class flag_formatter; 19 | } 20 | 21 | class formatter 22 | { 23 | public: 24 | virtual ~formatter() {} 25 | virtual void format(details::log_msg& msg) = 0; 26 | }; 27 | 28 | class pattern_formatter SPDLOG_FINAL : public formatter 29 | { 30 | 31 | public: 32 | explicit pattern_formatter(const std::string& pattern, pattern_time_type pattern_time = pattern_time_type::local); 33 | pattern_formatter(const pattern_formatter&) = delete; 34 | pattern_formatter& operator=(const pattern_formatter&) = delete; 35 | void format(details::log_msg& msg) override; 36 | private: 37 | const std::string _pattern; 38 | const pattern_time_type _pattern_time; 39 | std::vector> _formatters; 40 | std::tm get_time(details::log_msg& msg); 41 | void handle_flag(char flag); 42 | void compile_pattern(const std::string& pattern); 43 | }; 44 | } 45 | 46 | #include "spdlog/details/pattern_formatter_impl.h" 47 | 48 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/base_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // base sink templated over a mutex (either dummy or real) 9 | // concrete implementation should only override the _sink_it method. 10 | // all locking is taken care of here so no locking needed by the implementers.. 11 | // 12 | 13 | #include "spdlog/sinks/sink.h" 14 | #include "spdlog/formatter.h" 15 | #include "spdlog/common.h" 16 | #include "spdlog/details/log_msg.h" 17 | 18 | #include 19 | 20 | namespace spdlog 21 | { 22 | namespace sinks 23 | { 24 | template 25 | class base_sink:public sink 26 | { 27 | public: 28 | base_sink():_mutex() {} 29 | virtual ~base_sink() = default; 30 | 31 | base_sink(const base_sink&) = delete; 32 | base_sink& operator=(const base_sink&) = delete; 33 | 34 | void log(const details::log_msg& msg) SPDLOG_FINAL override 35 | { 36 | std::lock_guard lock(_mutex); 37 | _sink_it(msg); 38 | } 39 | void flush() SPDLOG_FINAL override 40 | { 41 | _flush(); 42 | } 43 | 44 | protected: 45 | virtual void _sink_it(const details::log_msg& msg) = 0; 46 | virtual void _flush() = 0; 47 | Mutex _mutex; 48 | }; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/layers/convolution.cpp: -------------------------------------------------------------------------------- 1 | #include "layers/convolution.h" 2 | #include "graph/node_operators_binary.h" 3 | 4 | namespace marian { 5 | 6 | #ifdef CUDNN 7 | Convolution::Convolution(Ptr graph) {} 8 | 9 | Expr Convolution::apply(Expr x) { 10 | auto graph = x->graph(); 11 | 12 | auto prefix = opt("prefix"); 13 | auto kernelDims = opt>("kernel-dims"); 14 | auto kernelNum = opt("kernel-num"); 15 | auto paddings = opt>("paddings", std::make_pair(0, 0)); 16 | auto strides = opt>("strides", std::make_pair(1, 1)); 17 | 18 | int layerIn = x->shape()[1]; 19 | auto kernel 20 | = graph->param(prefix + "_conv_kernels", 21 | {layerIn, kernelNum, kernelDims.first, kernelDims.second}, 22 | inits::glorotUniform()); 23 | 24 | auto bias = graph->param( 25 | prefix + "_conv_bias", {1, kernelNum, 1, 1}, inits::zeros()); 26 | 27 | std::vector nodes = {x, kernel, bias}; 28 | return Expression( 29 | nodes, paddings.first, paddings.second, strides.first, strides.second); 30 | } 31 | 32 | Expr Convolution::apply(const std::vector&) { 33 | ABORT("Can't apply convolution on many inputs at once"); 34 | return nullptr; 35 | } 36 | #endif 37 | 38 | } // namespace marian 39 | -------------------------------------------------------------------------------- /src/translator/helpers.cpp: -------------------------------------------------------------------------------- 1 | /* All or part of this file was contributed by Intel under license: 2 | * Copyright (C) 2017-2018 Intel Corporation 3 | * SPDX-License-Identifier: MIT 4 | */ 5 | 6 | #include 7 | 8 | #include "data/types.h" 9 | #include "tensors/tensor.h" 10 | #include "translator/helpers.h" 11 | 12 | namespace marian { 13 | 14 | namespace cpu { 15 | 16 | void SetColumns(Tensor in, Tensor indices, float value) { 17 | int nRows = in->shape().elements() / in->shape()[-1]; 18 | int nColumns = in->shape()[-1]; 19 | int nSuppress = indices->shape()[-1]; 20 | 21 | for(int rowNumber = 0; rowNumber < nRows; ++rowNumber) { 22 | float* row = in->data() + rowNumber * nColumns; 23 | for(int i = 0; i < nSuppress; ++i) 24 | row[indices->data()[i]] = value; 25 | } 26 | } 27 | 28 | void suppressWords(Expr logProbs, Expr wordIndices) { 29 | SetColumns(logProbs->val(), wordIndices->val(), std::numeric_limits::lowest()); 30 | } 31 | } // namespace cpu 32 | 33 | void suppressWords(Expr logProbs, Expr wordIndices) { 34 | if(logProbs->val()->getBackend()->getDeviceId().type == DeviceType::cpu) { 35 | cpu::suppressWords(logProbs, wordIndices); 36 | } 37 | #ifdef CUDA_FOUND 38 | else { 39 | gpu::suppressWords(logProbs, wordIndices); 40 | } 41 | #endif 42 | } 43 | } // namespace marian 44 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/spdlog-bench-mt.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "spdlog/spdlog.h" 11 | 12 | 13 | using namespace std; 14 | 15 | int main(int argc, char* argv[]) 16 | { 17 | 18 | int thread_count = 10; 19 | if(argc > 1) 20 | thread_count = std::atoi(argv[1]); 21 | 22 | int howmany = 1000000; 23 | 24 | namespace spd = spdlog; 25 | 26 | auto logger = spdlog::create("file_logger", "logs/spd-bench-mt.txt", false); 27 | 28 | logger->set_pattern("[%Y-%b-%d %T.%e]: %v"); 29 | 30 | std::atomic msg_counter {0}; 31 | std::vector threads; 32 | 33 | for (int t = 0; t < thread_count; ++t) 34 | { 35 | threads.push_back(std::thread([&]() 36 | { 37 | while (true) 38 | { 39 | int counter = ++msg_counter; 40 | if (counter > howmany) break; 41 | logger->info("spdlog message #{}: This is some text for your pleasure", counter); 42 | } 43 | })); 44 | } 45 | 46 | 47 | for(auto &t:threads) 48 | { 49 | t.join(); 50 | }; 51 | 52 | 53 | 54 | return 0; 55 | } 56 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/tag.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "directives.h" // IWYU pragma: keep 5 | #include "tag.h" 6 | #include "token.h" 7 | 8 | namespace YAML { 9 | Tag::Tag(const Token& token) : type(static_cast(token.data)) { 10 | switch (type) { 11 | case VERBATIM: 12 | value = token.value; 13 | break; 14 | case PRIMARY_HANDLE: 15 | value = token.value; 16 | break; 17 | case SECONDARY_HANDLE: 18 | value = token.value; 19 | break; 20 | case NAMED_HANDLE: 21 | handle = token.value; 22 | value = token.params[0]; 23 | break; 24 | case NON_SPECIFIC: 25 | break; 26 | default: 27 | assert(false); 28 | } 29 | } 30 | 31 | const std::string Tag::Translate(const Directives& directives) { 32 | switch (type) { 33 | case VERBATIM: 34 | return value; 35 | case PRIMARY_HANDLE: 36 | return directives.TranslateTagHandle("!") + value; 37 | case SECONDARY_HANDLE: 38 | return directives.TranslateTagHandle("!!") + value; 39 | case NAMED_HANDLE: 40 | return directives.TranslateTagHandle("!" + handle + "!") + value; 41 | case NON_SPECIFIC: 42 | // TODO: 43 | return "!"; 44 | default: 45 | assert(false); 46 | } 47 | throw std::runtime_error("yaml-cpp: internal error, bad tag type"); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/functional/array.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "functional/defs.h" 4 | 5 | namespace marian { 6 | 7 | namespace functional { 8 | 9 | template 10 | struct Array { 11 | typedef T value_type; 12 | T data_[N]; 13 | 14 | HOST_DEVICE_INLINE const T* data() const { return data_; } 15 | 16 | HOST_DEVICE_INLINE T* data() { return data_; } 17 | 18 | HOST_DEVICE_INLINE constexpr static size_t size() { return N; } 19 | 20 | HOST_DEVICE_INLINE T& operator[](size_t i) { return data_[i]; } 21 | HOST_DEVICE_INLINE const T& operator[](size_t i) const { return data_[i]; } 22 | 23 | HOST_DEVICE_INLINE T* begin() { return data_; } 24 | HOST_DEVICE_INLINE const T* begin() const { return data_; } 25 | 26 | HOST_DEVICE_INLINE T* end() { return data_ + N; } 27 | HOST_DEVICE_INLINE const T* end() const { return data_ + N; } 28 | 29 | HOST_DEVICE_INLINE void fill(T val) { 30 | for(int i = 0; i < N; ++i) 31 | data_[i] = val; 32 | } 33 | 34 | HOST_DEVICE_INLINE T& back() { return data_[N - 1]; } 35 | HOST_DEVICE_INLINE const T& back() const { return data_[N - 1]; } 36 | 37 | HOST_DEVICE_INLINE bool operator==(const Array& other) { 38 | for(int i = 0; i < N; ++i) 39 | if(data_[i] != other[i]) 40 | return false; 41 | return true; 42 | } 43 | }; 44 | } // namespace functional 45 | } // namespace marian 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .history* 2 | src/common/project_version.h 3 | src/common/git_revision.h 4 | src/common/build_info.cpp 5 | 6 | *.vcxproj.user 7 | /vs/x64 8 | pingme.txt 9 | /local 10 | # TODO: ^^ the correct solution for /local is to add that to some local git config, don't remember which one. Cf. mtmain. 11 | 12 | # Compiled Object files 13 | *.slo 14 | *.lo 15 | *.o 16 | *.obj 17 | 18 | # Precompiled Headers 19 | *.gch 20 | *.pch 21 | 22 | # Compiled Dynamic libraries 23 | *.so 24 | *.dylib 25 | *.dll 26 | 27 | # Fortran module files 28 | *.mod 29 | 30 | # python compiled files 31 | *.pyc 32 | 33 | # Compiled Static libraries 34 | *.lai 35 | *.la 36 | *.a 37 | *.lib 38 | 39 | # Executables 40 | *.exe 41 | *.out 42 | *.app 43 | 44 | # Temporaty files created by editors 45 | .*.sw* 46 | *~ 47 | 48 | # CMake files 49 | build 50 | build-* 51 | !build*.sh 52 | # pymarian wheels 53 | dist/ 54 | tmp 55 | tmp-* 56 | tmp.* 57 | 58 | # Examples 59 | examples/*/*.gz 60 | examples/mnist/*ubyte 61 | 62 | # Contrib 63 | /.ycm_extra_conf.py 64 | /.vimrc 65 | /vs/MarianDll.sln 66 | /vs/MarianDll.VC.db 67 | /vs/MarianDll.VC.VC.opendb 68 | 69 | .vs 70 | .vscode 71 | 72 | # Python : pymarian 73 | *.whl 74 | *.egg-info 75 | src/python/pymarian/_version.py 76 | src/python/tests/data 77 | __pycache__ 78 | .pytest_cache 79 | 80 | -------------------------------------------------------------------------------- /src/3rd_party/CLI/Macros.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Distributed under the 3-Clause BSD License. See accompanying 4 | // file LICENSE or https://github.com/CLIUtils/CLI11 for details. 5 | 6 | // [CLI11:verbatim] 7 | 8 | // The following version macro is very similar to the one in PyBind11 9 | #if !(defined(_MSC_VER) && __cplusplus == 199711L) && !defined(__INTEL_COMPILER) 10 | #if __cplusplus >= 201402L 11 | #define CLI11_CPP14 12 | #if __cplusplus >= 201703L 13 | #define CLI11_CPP17 14 | #if __cplusplus > 201703L 15 | #define CLI11_CPP20 16 | #endif 17 | #endif 18 | #endif 19 | #elif defined(_MSC_VER) && __cplusplus == 199711L 20 | // MSVC sets _MSVC_LANG rather than __cplusplus (supposedly until the standard is fully implemented) 21 | // Unless you use the /Zc:__cplusplus flag on Visual Studio 2017 15.7 Preview 3 or newer 22 | #if _MSVC_LANG >= 201402L 23 | #define CLI11_CPP14 24 | #if _MSVC_LANG > 201402L && _MSC_VER >= 1910 25 | #define CLI11_CPP17 26 | #if __MSVC_LANG > 201703L && _MSC_VER >= 1910 27 | #define CLI11_CPP20 28 | #endif 29 | #endif 30 | #endif 31 | #endif 32 | 33 | #if defined(CLI11_CPP14) 34 | #define CLI11_DEPRECATED(reason) [[deprecated(reason)]] 35 | #elif defined(_MSC_VER) 36 | #define CLI11_DEPRECATED(reason) __declspec(deprecated(reason)) 37 | #else 38 | #define CLI11_DEPRECATED(reason) __attribute__((deprecated(reason))) 39 | #endif 40 | 41 | // [CLI11:verbatim] 42 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/ptr_vector.h: -------------------------------------------------------------------------------- 1 | #ifndef PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "yaml-cpp/noncopyable.h" 16 | 17 | namespace YAML { 18 | 19 | // TODO: This class is no longer needed 20 | template 21 | class ptr_vector : private YAML::noncopyable { 22 | public: 23 | ptr_vector() {} 24 | 25 | void clear() { m_data.clear(); } 26 | 27 | std::size_t size() const { return m_data.size(); } 28 | bool empty() const { return m_data.empty(); } 29 | 30 | void push_back(std::unique_ptr&& t) { m_data.push_back(std::move(t)); } 31 | T& operator[](std::size_t i) { return *m_data[i]; } 32 | const T& operator[](std::size_t i) const { return *m_data[i]; } 33 | 34 | T& back() { return *(m_data.back().get()); } 35 | 36 | const T& back() const { return *(m_data.back().get()); } 37 | 38 | private: 39 | std::vector> m_data; 40 | }; 41 | } 42 | 43 | #endif // PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 44 | -------------------------------------------------------------------------------- /src/3rd_party/pathie-cpp/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © 2015, 2017 Marvin Gülker 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /src/models/classifier.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "marian.h" 4 | #include "models/states.h" 5 | #include "layers/constructors.h" 6 | #include "layers/factory.h" 7 | 8 | namespace marian { 9 | 10 | /** 11 | * Simple base class for Classifiers to be used in EncoderClassifier framework 12 | * Currently only implementations are in bert.h 13 | */ 14 | class ClassifierBase :public LayerBase { 15 | using LayerBase::LayerBase; 16 | protected: 17 | Ptr options_; 18 | const std::string prefix_{"classifier"}; 19 | const bool inference_{false}; 20 | const size_t batchIndex_{0}; 21 | 22 | public: 23 | ClassifierBase(Ptr graph, Ptr options) 24 | : LayerBase(graph, options), 25 | prefix_(options->get("prefix", "classifier")), 26 | inference_(options->get("inference", false)), 27 | batchIndex_(options->get("index", 1)) {} // assume that training input has batch index 0 and labels has 1 28 | 29 | virtual ~ClassifierBase() {} 30 | 31 | virtual Ptr apply(Ptr, Ptr, const std::vector>&) = 0; 32 | 33 | template 34 | T opt(const std::string& key) const { 35 | return options_->get(key); 36 | } 37 | 38 | // Should be used to clear any batch-wise temporary objects if present 39 | virtual void clear() = 0; 40 | }; 41 | 42 | } -------------------------------------------------------------------------------- /src/3rd_party/ExceptionWithCallStack.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) Microsoft. All rights reserved. 3 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 4 | // 5 | // ExceptionWithCallStack.h - debug util functions 6 | // 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | namespace Microsoft { namespace MSR { namespace CNTK { 13 | 14 | // base class that we can catch, independent of the type parameter 15 | struct /*interface*/ IExceptionWithCallStackBase 16 | { 17 | virtual const char * CallStack() const = 0; 18 | virtual ~IExceptionWithCallStackBase() noexcept = default; 19 | }; 20 | 21 | // Exception wrapper to include native call stack string 22 | template 23 | class ExceptionWithCallStack : public E, public IExceptionWithCallStackBase 24 | { 25 | public: 26 | ExceptionWithCallStack(const std::string& msg, const std::string& callstack) : 27 | E(msg), m_callStack(callstack) 28 | { } 29 | 30 | virtual const char * CallStack() const override { return m_callStack.c_str(); } 31 | 32 | protected: 33 | std::string m_callStack; 34 | }; 35 | 36 | // some older code uses this namespace 37 | namespace DebugUtil 38 | { 39 | void PrintCallStack(size_t skipLevels = 0, bool makeFunctionNamesStandOut = false); 40 | 41 | std::string GetCallStack(size_t skipLevels = 0, bool makeFunctionNamesStandOut = false); 42 | }; 43 | 44 | }}} 45 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/tests.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2015 4 | VisualStudioVersion = 14.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tests", "tests.vcxproj", "{59A07559-5F38-4DD6-A7FA-DB4153690B42}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Debug|x64 = Debug|x64 12 | Release|Win32 = Release|Win32 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|Win32.ActiveCfg = Debug|Win32 17 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|Win32.Build.0 = Debug|Win32 18 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|x64.ActiveCfg = Debug|x64 19 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|x64.Build.0 = Debug|x64 20 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|Win32.ActiveCfg = Release|Win32 21 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|Win32.Build.0 = Release|Win32 22 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|x64.ActiveCfg = Release|x64 23 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|x64.Build.0 = Release|x64 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /.github/workflows/documentation.yml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | api-documentation: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v2 15 | with: 16 | submodules: recursive 17 | 18 | - name: Set up Doxygen 19 | run: sudo apt-get install -y doxygen 20 | 21 | - name: Set up Python 22 | uses: actions/setup-python@v2 23 | with: 24 | python-version: 3.7 25 | 26 | - name: Set up dependency cache 27 | uses: actions/cache@v2 28 | with: 29 | path: ~/.cache/pip 30 | key: ${{ runner.os }}-pip-${{ hashFiles('doc/requirements.txt') }} 31 | restore-keys: | 32 | ${{ runner.os }}-pip- 33 | 34 | - name: Install dependencies 35 | working-directory: ./doc 36 | run: pip install -r requirements.txt 37 | 38 | - name: Build documentation 39 | working-directory: ./doc 40 | run: make html 41 | 42 | # This artifact contains the HTML output of Sphinx only. 43 | # With index.html at the root of the produced zip file. 44 | - name: Upload documentation 45 | uses: actions/upload-artifact@v2 46 | with: 47 | name: api-docs 48 | path: ./doc/build/html 49 | if-no-files-found: error 50 | -------------------------------------------------------------------------------- /src/tensors/cpu/aligned.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | #include 5 | #ifdef _WIN32 6 | #include 7 | #endif 8 | 9 | namespace marian { 10 | namespace cpu { 11 | namespace { 12 | 13 | // allocate function for tensor reserve() below. 14 | // Alignment is needed because we use AVX512 and AVX2 vectors. We should fail if we can't allocate aligned memory. 15 | 16 | #ifdef _WIN32 17 | void *genericMalloc(size_t alignment, size_t size) { 18 | void *ret = _aligned_malloc(size, alignment); 19 | ABORT_IF(!ret, "Failed to allocate memory on CPU"); 20 | return ret; 21 | } 22 | void genericFree(void *ptr) { 23 | _aligned_free(ptr); 24 | } 25 | #else 26 | // Linux and OS X. There is no fallback to malloc because we need it to be aligned. 27 | void *genericMalloc(size_t alignment, size_t size) { 28 | // On macos, aligned_alloc is available only on c++17 29 | // Furthermore, it requires that the memory requested is an exact multiple of the alignment, otherwise it fails. 30 | // posix_memalign is available both Mac (Since 2016) and Linux and in both gcc and clang 31 | void *result; 32 | // Error could be detected by return value or just remaining nullptr. 33 | ABORT_IF(posix_memalign(&result, alignment, size), "Failed to allocate memory on CPU"); 34 | return result; 35 | } 36 | void genericFree(void *ptr) { 37 | free(ptr); 38 | } 39 | #endif 40 | 41 | } 42 | } // namespace cpu 43 | } // namespace marian 44 | -------------------------------------------------------------------------------- /src/tensors/cpu/mjdgemm/mjdgemm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace marian { 6 | namespace cpu { 7 | 8 | /** 9 | * @brief Namespace for the mjdgemm implementation. 10 | * 11 | * Environment variables MJD_FORCE and MJD_ENABLE_INSTRUCTIONS can be used to force the use of mjdgemm 12 | * and to select the instruction set to use, respectively. The instruction set can be set to AVX512, AVX2, 13 | * AVX, SSE4_2, or NEON. Currently, AVX falls back to SSE4_2. E.g. to force the use of mjdgemm with AVX2 14 | * instructions, set MJD_ENABLE_INSTRUCTIONS=AVX2 and MJD_FORCE=true. 15 | */ 16 | namespace mjdgemm { 17 | 18 | /** 19 | * @brief Determines whether to force the use of mjdgemm based on environment variables. 20 | * 21 | * @return True if mjdgemm is forced, otherwise false. 22 | */ 23 | bool forceMjdgemm(); 24 | 25 | /** 26 | * @brief Performs a GEMM operation with int8 packed matrices. 27 | * 28 | * @param A Pointer to matrix A. 29 | * @param B Pointer to matrix B. 30 | * @param bias Pointer to the bias matrix. 31 | * @param C Pointer to the destination matrix. 32 | * @param M Number of rows in matrix A and C. 33 | * @param N Number of columns in matrix B and C. 34 | * @param K Number of columns in matrix A and rows in matrix B. 35 | */ 36 | void gemmInt8Packed(const float* A, const int8_t* B, const float* bias, float* C, int M, int N, int K); 37 | 38 | } // namespace mjdgemm 39 | } // namespace cpu 40 | } // namespace marian 41 | -------------------------------------------------------------------------------- /src/tensors/rand.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | #include "common/hash.h" 5 | #include "common/logging.h" 6 | 7 | #include 8 | 9 | namespace marian { 10 | 11 | class TensorBase; 12 | typedef IPtr Tensor; 13 | 14 | class RandomGenerator { 15 | protected: 16 | size_t seed_; 17 | 18 | // hashing device type and id to get a unique seed for each device, e.g. for different samples on different devices 19 | size_t hashSeed(size_t seed, DeviceId deviceId) { 20 | // on the first device, use the seed as is. This keeps unit tests etc. working correctly 21 | // on other devices, hash the seed with the device type and id, so that we get different seeds for different devices 22 | // this is important for e.g. different samples on different devices 23 | if(deviceId.no == 0) 24 | return seed; 25 | else 26 | return util::hashArgs(seed, deviceId.type, deviceId.no); 27 | } 28 | 29 | public: 30 | RandomGenerator(size_t seed, DeviceId deviceId) 31 | : seed_(hashSeed(seed, deviceId)) { 32 | LOG(debug, "Setting random seed to {} (device {}{})", seed_, deviceId.typeAsString(), deviceId.no); 33 | } 34 | virtual ~RandomGenerator() {} 35 | virtual void uniform(Tensor, float a, float b) = 0; 36 | virtual void normal(Tensor, float mean, float stddev) = 0; 37 | virtual size_t seed() { return seed_; } 38 | }; 39 | 40 | Ptr createRandomGenerator(size_t /*seed*/, DeviceId); 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/layers/weight.cpp: -------------------------------------------------------------------------------- 1 | #include "layers/weight.h" 2 | 3 | namespace marian { 4 | 5 | Ptr WeightingFactory(Ptr options) { 6 | ABORT_IF(!options->hasAndNotEmpty("data-weighting"), 7 | "No data-weighting specified in options"); 8 | return New(options->get("data-weighting-type")); 9 | } 10 | 11 | Expr DataWeighting::getWeights(Ptr graph, 12 | Ptr batch) { 13 | ABORT_IF(batch->getDataWeights().empty(), 14 | "Vector of weights is unexpectedly empty!"); 15 | bool sentenceWeighting = weightingType_ == "sentence"; 16 | int dimBatch = (int)batch->size(); 17 | int dimWords = sentenceWeighting ? 1 : (int)batch->back()->batchWidth(); 18 | 19 | // This would abort anyway in fromVector(...), but has clearer error message 20 | // here for this particular case 21 | ABORT_IF(batch->getDataWeights().size() != dimWords * dimBatch, 22 | "Number of sentence/word-level weights ({}) does not match tensor size ({})", 23 | batch->getDataWeights().size(), dimWords * dimBatch); 24 | 25 | auto weights = graph->constant({1, dimWords, dimBatch, 1}, 26 | inits::fromVector(batch->getDataWeights())); 27 | return weights; // [1, dimWords, dimBatch, 1] in case of word-level weights or 28 | // [1, 1, dimBatch, 1] in case of sentence-level weights 29 | } 30 | } // namespace marian 31 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/stringsource.h: -------------------------------------------------------------------------------- 1 | #ifndef STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | namespace YAML { 13 | class StringCharSource { 14 | public: 15 | StringCharSource(const char* str, std::size_t size) 16 | : m_str(str), m_size(size), m_offset(0) {} 17 | 18 | operator bool() const { return m_offset < m_size; } 19 | char operator[](std::size_t i) const { return m_str[m_offset + i]; } 20 | bool operator!() const { return !static_cast(*this); } 21 | 22 | const StringCharSource operator+(int i) const { 23 | StringCharSource source(*this); 24 | if (static_cast(source.m_offset) + i >= 0) 25 | source.m_offset += i; 26 | else 27 | source.m_offset = 0; 28 | return source; 29 | } 30 | 31 | StringCharSource& operator++() { 32 | ++m_offset; 33 | return *this; 34 | } 35 | 36 | StringCharSource& operator+=(std::size_t offset) { 37 | m_offset += offset; 38 | return *this; 39 | } 40 | 41 | private: 42 | const char* m_str; 43 | std::size_t m_size; 44 | std::size_t m_offset; 45 | }; 46 | } 47 | 48 | #endif // STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 49 | -------------------------------------------------------------------------------- /src/microsoft/sentencepiece.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace marian { 5 | namespace spm { 6 | 7 | // Describes an individual token in a sentencepiece encoding 8 | struct Native_SentencePiecePiece { 9 | int id; 10 | int begin; 11 | int end; 12 | char* surface; 13 | char* piece; 14 | }; 15 | 16 | // Mirrors the SentencePieceText protobuf struct returned by SPM 17 | // and provides individual piece and corresponding surface details 18 | struct Native_SentencePieceText { 19 | char* text; 20 | int num_pieces; 21 | Native_SentencePiecePiece** pieces; 22 | }; 23 | 24 | int SentencePieceInteropFreeNativeSentencePieceText(Native_SentencePieceText* spt); 25 | intptr_t SentencePieceInteropLoadModel(const uint16_t* modelPath, 26 | const uint16_t** vocab, 27 | size_t vocabSize); 28 | int SentencePieceInteropDecodeAligned(intptr_t object, 29 | int num_tokens, 30 | char** tokens, 31 | Native_SentencePieceText** nSpt); 32 | int SentencePieceInteropEncodeAligned(intptr_t object, char* word, Native_SentencePieceText** nSpt); 33 | int SentencePieceInteropGetPieceID(intptr_t object, char* word); 34 | int SentencePieceInteropUnloadModel(intptr_t object); 35 | int SentencepieceInteropTrainModel(char* args); 36 | 37 | } // namespace spm 38 | } // namespace marian -------------------------------------------------------------------------------- /src/tensors/gpu/device.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "tensors/device.h" 5 | #include "tensors/gpu/cuda_helpers.h" 6 | 7 | namespace marian { 8 | namespace gpu { 9 | 10 | Device::~Device() { 11 | // No CUDA error checking as this is a destructor and we cannot do anything about errors anyway. 12 | cudaSetDevice(deviceId_.no); 13 | if(data_) { 14 | cudaFree(data_); 15 | } 16 | cudaDeviceSynchronize(); 17 | } 18 | 19 | void Device::reserve(size_t size) { 20 | size = align(size); 21 | CUDA_CHECK(cudaSetDevice(deviceId_.no)); 22 | 23 | ABORT_IF(size < size_ || size == 0, 24 | "New size must be larger than old size and larger than 0"); 25 | 26 | if(data_) { 27 | // Allocate memory while temporarily parking original content in host memory 28 | std::vector temp(size_); 29 | CUDA_CHECK(cudaMemcpy(temp.data(), data_, size_, cudaMemcpyDeviceToHost)); 30 | CUDA_CHECK(cudaFree(data_)); 31 | LOG(debug, "[memory] Re-allocating from {} to {} bytes on device {}", size_, size, deviceId_.no); 32 | CUDA_CHECK(cudaMalloc(&data_, size)); 33 | CUDA_CHECK(cudaMemcpy(data_, temp.data(), size_, cudaMemcpyHostToDevice)); 34 | //logCallStack(0); 35 | } else { 36 | // No data_ yet: Just alloc. 37 | LOG(debug, "[memory] Allocating {} bytes in device {}", size, deviceId_.no); 38 | CUDA_CHECK(cudaMalloc(&data_, size)); 39 | } 40 | 41 | size_ = size; 42 | } 43 | } // namespace gpu 44 | } // namespace marian 45 | -------------------------------------------------------------------------------- /src/tensors/memory_piece.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | 5 | #include 6 | 7 | namespace marian { 8 | 9 | class MemoryPiece { 10 | private: 11 | uint8_t* data_; 12 | size_t size_; 13 | 14 | ENABLE_INTRUSIVE_PTR(MemoryPiece) 15 | 16 | // Contructor is private, use MemoryPiece::New(...) 17 | MemoryPiece(uint8_t* data, size_t size) : data_(data), size_(size) {} 18 | 19 | public: 20 | // Use this whenever pointing to MemoryPiece 21 | typedef IPtr PtrType; 22 | 23 | // Use this whenever creating a pointer to MemoryPiece 24 | template 25 | static PtrType New(Args&& ...args) { 26 | return PtrType(new MemoryPiece(std::forward(args)...)); 27 | } 28 | 29 | uint8_t* data() const { return data_; } 30 | uint8_t* data() { return data_; } 31 | 32 | template 33 | T* data() const { 34 | return (T*)data_; 35 | } 36 | 37 | template 38 | T* data() { 39 | return (T*)data_; 40 | } 41 | 42 | size_t size() const { return size_; } 43 | 44 | void set(uint8_t* data, size_t size) { 45 | data_ = data; 46 | size_ = size; 47 | } 48 | 49 | void setPtr(uint8_t* data) { data_ = data; } 50 | 51 | friend std::ostream& operator<<(std::ostream& out, const MemoryPiece mp) { 52 | out << "MemoryPiece - ptr: " << std::hex << (size_t)mp.data() << std::dec 53 | << " size: " << mp.size(); 54 | return out; 55 | } 56 | 57 | }; 58 | } // namespace marian 59 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/eventhandler.h: -------------------------------------------------------------------------------- 1 | #ifndef EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | #include "yaml-cpp/anchor.h" 13 | #include "yaml-cpp/emitterstyle.h" 14 | 15 | namespace YAML { 16 | struct Mark; 17 | 18 | class EventHandler { 19 | public: 20 | virtual ~EventHandler() {} 21 | 22 | virtual void OnDocumentStart(const Mark& mark) = 0; 23 | virtual void OnDocumentEnd() = 0; 24 | 25 | virtual void OnNull(const Mark& mark, anchor_t anchor) = 0; 26 | virtual void OnAlias(const Mark& mark, anchor_t anchor) = 0; 27 | virtual void OnScalar(const Mark& mark, const std::string& tag, 28 | anchor_t anchor, const std::string& value) = 0; 29 | 30 | virtual void OnSequenceStart(const Mark& mark, const std::string& tag, 31 | anchor_t anchor, EmitterStyle::value style) = 0; 32 | virtual void OnSequenceEnd() = 0; 33 | 34 | virtual void OnMapStart(const Mark& mark, const std::string& tag, 35 | anchor_t anchor, EmitterStyle::value style) = 0; 36 | virtual void OnMapEnd() = 0; 37 | }; 38 | } 39 | 40 | #endif // EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 41 | -------------------------------------------------------------------------------- /src/common/hash.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace marian { 6 | namespace util { 7 | 8 | template using hash = std::hash; 9 | 10 | // This combinator is based on boost::hash_combine, but uses 11 | // std::hash as the hash implementation. Used as a drop-in 12 | // replacement for boost::hash_combine. 13 | template 14 | inline void hash_combine(HashType& seed, T const& v) { 15 | hash hasher; 16 | seed ^= static_cast(hasher(v)) + 0x9e3779b9 + (seed<<6) + (seed>>2); 17 | } 18 | 19 | // Hash a whole chunk of memory, mostly used for diagnostics 20 | template 21 | inline HashType hashMem(const T* beg, size_t len, HashType seed = 0) { 22 | for(auto it = beg; it < beg + len; ++it) 23 | hash_combine(seed, *it); 24 | return seed; 25 | } 26 | 27 | /** 28 | * Base case for template recursion below (no arguments are hashed to 0) 29 | */ 30 | template 31 | inline HashType hashArgs() { 32 | return 0; 33 | } 34 | 35 | /** 36 | * Hash an arbitrary number of arguments of arbitrary type via template recursion 37 | */ 38 | template 39 | inline HashType hashArgs(T arg, Args... args) { 40 | // Hash arguments without first arg 41 | HashType seed = hashArgs(args...); 42 | // Hash first arg and combine which above hash 43 | hash_combine(seed, arg); 44 | return seed; 45 | } 46 | 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/examples/mnist/mnist_ffnn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "marian.h" 8 | 9 | #include "examples/mnist/model.h" 10 | #include "examples/mnist/training.h" 11 | #include "training/graph_group_async.h" 12 | #include "training/graph_group_singleton.h" 13 | #include "training/graph_group_sync.h" 14 | 15 | const std::vector TRAIN_SET 16 | = {"../src/examples/mnist/train-images-idx3-ubyte", 17 | "../src/examples/mnist/train-labels-idx1-ubyte"}; 18 | const std::vector VALID_SET 19 | = {"../src/examples/mnist/t10k-images-idx3-ubyte", 20 | "../src/examples/mnist/t10k-labels-idx1-ubyte"}; 21 | 22 | using namespace marian; 23 | 24 | int main(int argc, char** argv) { 25 | auto options = parseOptions(argc, argv, cli::mode::training, false); 26 | 27 | if(!options->hasAndNotEmpty("train-sets")) 28 | options->set("train-sets", TRAIN_SET); 29 | if(!options->hasAndNotEmpty("valid-sets")) 30 | options->set("valid-sets", VALID_SET); 31 | 32 | if(options->get("type") != "mnist-lenet") 33 | options->set("type", "mnist-ffnn"); 34 | 35 | auto devices = Config::getDevices(options); 36 | 37 | if(devices.size() == 1) 38 | New>(options)->run(); 39 | else if(options->get("sync-sgd")) 40 | New>(options)->run(); 41 | else 42 | New>(options)->run(); 43 | 44 | return 0; 45 | } 46 | -------------------------------------------------------------------------------- /src/onnx/expression_graph_onnx_exporter.h: -------------------------------------------------------------------------------- 1 | #include "graph/expression_graph.h" 2 | 3 | namespace marian { 4 | // export of Marian models to ONNX 5 | class ExpressionGraphONNXExporter : public ExpressionGraph { 6 | #ifdef USE_ONNX 7 | public: 8 | // export a seq2seq model to a set of ONNX files 9 | void exportToONNX(const std::string& modelToPrefix, Ptr modelOptions, const std::vector& vocabPaths); 10 | 11 | private: 12 | // [name] -> (vector(name, Expr), vector(name, Expr)) 13 | typedef std::map>, std::vector> >> FunctionDefs; 14 | 15 | // serialize the current nodesForward_ to an ONNX file. This operation is destructive. 16 | void serializeToONNX(const std::string& filename, FunctionDefs&& functionDefs, size_t sentinelDim); 17 | 18 | // find a node on the current forward tape 19 | Expr tryFindForwardNodeByName(const std::string& nodeName) const; 20 | 21 | // helper to transform nodesForward_ to only use the subset of operations supported by ONNX 22 | void expandMacroOpsForONNX(std::map>, std::vector> >>& functionDefs); 23 | 24 | // helper to build nodesForward_ from root nodes 25 | void rebuildNodesForward(const struct InputsMap& inputsMap, 26 | const std::vector>& outputDefs); 27 | #endif // USE_ONNX 28 | }; 29 | } 30 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/ostream_wrapper.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/ostream_wrapper.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace YAML { 8 | ostream_wrapper::ostream_wrapper() 9 | : m_buffer(1, '\0'), 10 | m_pStream(0), 11 | m_pos(0), 12 | m_row(0), 13 | m_col(0), 14 | m_comment(false) {} 15 | 16 | ostream_wrapper::ostream_wrapper(std::ostream& stream) 17 | : m_pStream(&stream), m_pos(0), m_row(0), m_col(0), m_comment(false) {} 18 | 19 | ostream_wrapper::~ostream_wrapper() {} 20 | 21 | void ostream_wrapper::write(const std::string& str) { 22 | if (m_pStream) { 23 | m_pStream->write(str.c_str(), str.size()); 24 | } else { 25 | m_buffer.resize(std::max(m_buffer.size(), m_pos + str.size() + 1)); 26 | std::copy(str.begin(), str.end(), m_buffer.begin() + m_pos); 27 | } 28 | 29 | for (std::size_t i = 0; i < str.size(); i++) { 30 | update_pos(str[i]); 31 | } 32 | } 33 | 34 | void ostream_wrapper::write(const char* str, std::size_t size) { 35 | if (m_pStream) { 36 | m_pStream->write(str, size); 37 | } else { 38 | m_buffer.resize(std::max(m_buffer.size(), m_pos + size + 1)); 39 | std::copy(str, str + size, m_buffer.begin() + m_pos); 40 | } 41 | 42 | for (std::size_t i = 0; i < size; i++) { 43 | update_pos(str[i]); 44 | } 45 | } 46 | 47 | void ostream_wrapper::update_pos(char ch) { 48 | m_pos++; 49 | m_col++; 50 | 51 | if (ch == '\n') { 52 | m_row++; 53 | m_col = 0; 54 | m_comment = false; 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/boost-bench.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace logging = boost::log; 15 | namespace src = boost::log::sources; 16 | namespace sinks = boost::log::sinks; 17 | namespace keywords = boost::log::keywords; 18 | 19 | void init() 20 | { 21 | logging::add_file_log 22 | ( 23 | keywords::file_name = "logs/boost-sample_%N.log", /*< file name pattern >*/ 24 | keywords::auto_flush = false, 25 | keywords::format = "[%TimeStamp%]: %Message%" 26 | ); 27 | 28 | logging::core::get()->set_filter 29 | ( 30 | logging::trivial::severity >= logging::trivial::info 31 | ); 32 | } 33 | 34 | 35 | int main(int argc, char* []) 36 | { 37 | int howmany = 1000000; 38 | init(); 39 | logging::add_common_attributes(); 40 | 41 | using namespace logging::trivial; 42 | src::severity_logger_mt< severity_level > lg; 43 | for(int i = 0 ; i < howmany; ++i) 44 | BOOST_LOG_SEV(lg, info) << "boost message #" << i << ": This is some text for your pleasure"; 45 | 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/format.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "includes.h" 3 | 4 | template 5 | std::string log_info(const T& what, spdlog::level::level_enum logger_level = spdlog::level::info) 6 | { 7 | 8 | std::ostringstream oss; 9 | auto oss_sink = std::make_shared(oss); 10 | 11 | spdlog::logger oss_logger("oss", oss_sink); 12 | oss_logger.set_level(logger_level); 13 | oss_logger.set_pattern("%v"); 14 | oss_logger.info(what); 15 | 16 | return oss.str().substr(0, oss.str().length() - spdlog::details::os::eol_size); 17 | } 18 | 19 | 20 | 21 | 22 | 23 | 24 | TEST_CASE("basic_logging ", "[basic_logging]") 25 | { 26 | //const char 27 | REQUIRE(log_info("Hello") == "Hello"); 28 | REQUIRE(log_info("") == ""); 29 | 30 | //std::string 31 | REQUIRE(log_info(std::string("Hello")) == "Hello"); 32 | REQUIRE(log_info(std::string()) == std::string()); 33 | 34 | //Numbers 35 | REQUIRE(log_info(5) == "5"); 36 | REQUIRE(log_info(5.6) == "5.6"); 37 | 38 | //User defined class 39 | //REQUIRE(log_info(some_logged_class("some_val")) == "some_val"); 40 | } 41 | 42 | 43 | TEST_CASE("log_levels", "[log_levels]") 44 | { 45 | REQUIRE(log_info("Hello", spdlog::level::err) == ""); 46 | REQUIRE(log_info("Hello", spdlog::level::critical) == ""); 47 | REQUIRE(log_info("Hello", spdlog::level::info) == "Hello"); 48 | REQUIRE(log_info("Hello", spdlog::level::debug) == "Hello"); 49 | REQUIRE(log_info("Hello", spdlog::level::trace) == "Hello"); 50 | } 51 | 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /src/3rd_party/SQLiteCpp/src/Transaction.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file Transaction.cpp 3 | * @ingroup SQLiteCpp 4 | * @brief A Transaction is way to group multiple SQL statements into an atomic secured operation. 5 | * 6 | * Copyright (c) 2012-2013 Sebastien Rombauts (sebastien.rombauts@gmail.com) 7 | * 8 | * Distributed under the MIT License (MIT) (See accompanying file LICENSE.txt 9 | * or copy at http://opensource.org/licenses/MIT) 10 | */ 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | 17 | namespace SQLite 18 | { 19 | 20 | 21 | // Begins the SQLite transaction 22 | Transaction::Transaction(Database& aDatabase) : 23 | mDatabase(aDatabase), 24 | mbCommited(false) 25 | { 26 | mDatabase.exec("BEGIN"); 27 | } 28 | 29 | // Safely rollback the transaction if it has not been committed. 30 | Transaction::~Transaction() 31 | { 32 | if (false == mbCommited) 33 | { 34 | try 35 | { 36 | mDatabase.exec("ROLLBACK"); 37 | } 38 | catch (SQLite::Exception&) 39 | { 40 | // Never throw an exception in a destructor: error if already rollbacked, but no harm is caused by this. 41 | } 42 | } 43 | } 44 | 45 | // Commit the transaction. 46 | void Transaction::commit() 47 | { 48 | if (false == mbCommited) 49 | { 50 | mDatabase.exec("COMMIT"); 51 | mbCommited = true; 52 | } 53 | else 54 | { 55 | throw SQLite::Exception("Transaction already commited."); 56 | } 57 | } 58 | 59 | 60 | } // namespace SQLite 61 | -------------------------------------------------------------------------------- /src/3rd_party/onnx/protobuf/onnx-ml.pb-wrapper.cpp: -------------------------------------------------------------------------------- 1 | // protobuf-generated files don't compile clean. This compiles them with warnings 2 | // disabled, without having to disable it for the entire project whole-sale. 3 | 4 | #ifdef USE_ONNX 5 | 6 | // Get protobuf this way: 7 | // sudo apt-get install cmake pkg-config libprotobuf9v5 protobuf-compiler libprotobuf-dev libgoogle-perftools-dev 8 | 9 | // Since we don't develop the ONNX .proto file, I just hand-created the .pb. files. 10 | // The automatic process that CMake would invoke fails because protobuf generates 11 | // source code that is not warning-free. So let's use this manual process for now, 12 | // and just version-control the resulting files. The command is simple enough: 13 | // cd src/3rd_party/onnx/protobuf 14 | // protoc -I=. --cpp_out=. onnx-ml.proto 15 | 16 | #ifdef _MSC_VER 17 | #pragma warning(push) 18 | #pragma warning(disable : 4100 4125 4127 4244 4267 4512 4456 4510 4610 4800) 19 | #endif 20 | #ifdef __GNUC__ 21 | #pragma GCC diagnostic ignored "-Wunused-variable" // note: GCC <6.0 ignores this when inside push/pop 22 | #pragma GCC diagnostic push 23 | #pragma GCC diagnostic ignored "-Wsuggest-override" 24 | #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" 25 | #endif 26 | 27 | #define AuxillaryParseTableField AuxiliaryParseTableField // in protobuf 3.12, the generated source has a spelling error 28 | 29 | #include "onnx-ml.pb.cc" // this is the actual file we compile 30 | 31 | #ifdef __GNUC__ 32 | #pragma GCC diagnostic pop 33 | #endif 34 | #ifdef _MSC_VER 35 | #pragma warning(pop) 36 | #endif 37 | 38 | #endif // USE_ONNX 39 | -------------------------------------------------------------------------------- /src/3rd_party/half_float/Readme.md: -------------------------------------------------------------------------------- 1 | half_float 2 | ======== 3 | 4 | #### 16 bit floating-point data type for C++ #### 5 | 6 | Implements a `HalfFloat` class that implements all the common arithmetic operations for a 16 bit 7 | floating-point type (10 bits mantissa, 5 bits exponent and one sign bit) and can thus be used (almost) 8 | interchangeably with regular `float`s. Not all operations have efficent implementations (some just convert to `float`, 9 | compute the result and convert back again) - if in doubt, check out the source code. 10 | 11 | The implementation tries to adhere to IEEE 754 in that it supports NaN and Infinity, but fails in other points: 12 | 13 | - no difference between qnan and snan 14 | - no traps 15 | - no well-defined rounding mode 16 | 17 | 18 | We also supply a specialization for `std::numeric_limits` that `half` be usable in template code 19 | dependent on type traits. 20 | 21 | 22 | #### Usage #### 23 | 24 | // get some halfs (half is a typedef for HalfFloat) 25 | half a = 1.0f; 26 | half b = 0.5f; 27 | 28 | // and have some FUN 29 | half c = (a+b) / (a-b); 30 | ++c; 31 | 32 | // now that we have a result in loosy precision, 33 | // convert it back to double precision. 34 | // if anybody asks, it's for the lulz. 35 | double result = c; 36 | 37 | 38 | Credits to _Chris Maiwald_ for the conversion code to `double` and extensive testing. 39 | 40 | 41 | #### License #### 42 | 43 | 3-clause BSD license: use it for anything, but give credit, don't blame us if your rocket crashes and don't advertise with it (who would). -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/dll.h: -------------------------------------------------------------------------------- 1 | #ifndef DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | // The following ifdef block is the standard way of creating macros which make 11 | // exporting from a DLL simpler. All files within this DLL are compiled with the 12 | // yaml_cpp_EXPORTS symbol defined on the command line. This symbol should not 13 | // be defined on any project that uses this DLL. This way any other project 14 | // whose source files include this file see YAML_CPP_API functions as being 15 | // imported from a DLL, whereas this DLL sees symbols defined with this macro as 16 | // being exported. 17 | #undef YAML_CPP_API 18 | 19 | #ifdef YAML_CPP_DLL // Using or Building YAML-CPP DLL (definition defined 20 | // manually) 21 | #ifdef yaml_cpp_EXPORTS // Building YAML-CPP DLL (definition created by CMake 22 | // or defined manually) 23 | // #pragma message( "Defining YAML_CPP_API for DLL export" ) 24 | #define YAML_CPP_API __declspec(dllexport) 25 | #else // yaml_cpp_EXPORTS 26 | // #pragma message( "Defining YAML_CPP_API for DLL import" ) 27 | #define YAML_CPP_API __declspec(dllimport) 28 | #endif // yaml_cpp_EXPORTS 29 | #else // YAML_CPP_DLL 30 | #define YAML_CPP_API 31 | #endif // YAML_CPP_DLL 32 | 33 | #endif // DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 34 | -------------------------------------------------------------------------------- /src/3rd_party/faiss/Index.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | // -*- c++ -*- 9 | 10 | #ifndef FAISS_INDEX_H 11 | #define FAISS_INDEX_H 12 | 13 | #include "utils/misc.h" 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #define FAISS_VERSION_MAJOR 1 20 | #define FAISS_VERSION_MINOR 6 21 | #define FAISS_VERSION_PATCH 3 22 | 23 | /** 24 | * @namespace faiss 25 | * 26 | * Throughout the library, vectors are provided as float * pointers. 27 | * Most algorithms can be optimized when several vectors are processed 28 | * (added/searched) together in a batch. In this case, they are passed 29 | * in as a matrix. When n vectors of size d are provided as float * x, 30 | * component j of vector i is 31 | * 32 | * x[ i * d + j ] 33 | * 34 | * where 0 <= i < n and 0 <= j < d. In other words, matrices are 35 | * always compact. When specifying the size of the matrix, we call it 36 | * an n*d matrix, which implies a row-major storage. 37 | */ 38 | 39 | 40 | namespace faiss { 41 | 42 | /** Abstract structure for an index, supports adding vectors and searching them. 43 | * 44 | * All vectors provided at add or search time are 32-bit float arrays, 45 | * although the internal representation may vary. 46 | */ 47 | struct Index { 48 | using idx_t = int64_t; ///< all indices are this type 49 | using component_t = float; 50 | using distance_t = float; 51 | }; 52 | 53 | } 54 | 55 | 56 | #endif 57 | --------------------------------------------------------------------------------