├── VERSION ├── wasm ├── post-module.js ├── Dockerfile ├── import-gemm-module.js ├── package-benchmark.sh └── test_stdin.cpp ├── vs ├── .gitignore ├── BuildRelease.bat └── Marian.sln ├── scripts ├── ci │ ├── README.md │ └── install_mkl.sh ├── shortlist │ ├── .gitignore │ ├── README.md │ └── install.sh ├── contrib │ ├── fix_hard.py │ └── inject_ctt.py ├── server │ └── client_example.py ├── embeddings │ └── export_embeddings.py └── checkpoints │ └── average.py ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── feature_request.md │ └── bug_report.md ├── pull_request_template.md └── workflows │ ├── native-customized_marian-macos.yml │ ├── wasm-customized_marian-macos.yml │ ├── wasm-customized_marian-ubuntu.yml │ └── macos.yml ├── src ├── 3rd_party │ ├── spdlog │ │ ├── tests │ │ │ ├── main.cpp │ │ │ ├── utils.h │ │ │ ├── includes.h │ │ │ ├── install_libcxx.sh │ │ │ ├── CMakeLists.txt │ │ │ ├── utils.cpp │ │ │ ├── tests.sln │ │ │ └── format.cpp │ │ ├── bench │ │ │ ├── logs │ │ │ │ └── .gitignore │ │ │ ├── latency │ │ │ │ ├── compare.sh │ │ │ │ ├── utils.h │ │ │ │ └── g3log-crush.cpp │ │ │ ├── easyl.conf │ │ │ ├── glog-bench.cpp │ │ │ ├── easylogging-bench.cpp │ │ │ ├── spdlog-bench.cpp │ │ │ ├── utils.h │ │ │ ├── glog-bench-mt.cpp │ │ │ ├── easylogging-bench-mt.cpp │ │ │ ├── spdlog-bench-mt.cpp │ │ │ └── boost-bench.cpp │ │ ├── example │ │ │ ├── jni │ │ │ │ ├── Application.mk │ │ │ │ └── Android.mk │ │ │ ├── utils.h │ │ │ ├── Makefile.mingw │ │ │ ├── Makefile.clang │ │ │ └── example.sln │ │ ├── astyle.sh │ │ ├── cmake │ │ │ └── spdlog.pc.in │ │ ├── INSTALL │ │ ├── include │ │ │ └── spdlog │ │ │ │ ├── fmt │ │ │ │ ├── ostr.h │ │ │ │ ├── fmt.h │ │ │ │ └── bundled │ │ │ │ │ └── ostream.cc │ │ │ │ ├── sinks │ │ │ │ ├── null_sink.h │ │ │ │ ├── msvc_sink.h │ │ │ │ ├── sink.h │ │ │ │ ├── ostream_sink.h │ │ │ │ └── base_sink.h │ │ │ │ ├── details │ │ │ │ ├── null_mutex.h │ │ │ │ └── log_msg.h │ │ │ │ └── formatter.h │ │ ├── sinks │ │ │ ├── sink.h │ │ │ ├── null_sink.h │ │ │ ├── msvc_sink.h │ │ │ ├── base_sink.h │ │ │ └── ostream_sink.h │ │ ├── details │ │ │ └── null_mutex.h │ │ ├── .gitignore │ │ ├── formatter.h │ │ └── LICENSE │ ├── zlib │ │ ├── zlib.3.pdf │ │ ├── CMakeLists.txt │ │ ├── inffast.h │ │ └── gzclose.c │ ├── faiss │ │ ├── README │ │ ├── CMakeLists.txt │ │ ├── LICENSE │ │ └── utils │ │ │ └── random.h │ ├── yaml-cpp │ │ ├── CMakeLists.txt │ │ ├── null.cpp │ │ ├── yaml-node.cpp │ │ ├── contrib │ │ │ ├── graphbuilder.cpp │ │ │ └── anchordict.h │ │ ├── directives.cpp │ │ ├── anchor.h │ │ ├── emitterstyle.h │ │ ├── node │ │ │ ├── type.h │ │ │ ├── detail │ │ │ │ ├── iterator_fwd.h │ │ │ │ ├── bool_type.h │ │ │ │ └── memory.h │ │ │ ├── ptr.h │ │ │ ├── emit.h │ │ │ └── iterator.h │ │ ├── emitterdef.h │ │ ├── memory.cpp │ │ ├── emit.cpp │ │ ├── scantag.h │ │ ├── exceptions.cpp │ │ ├── noncopyable.h │ │ ├── directives.h │ │ ├── tag.h │ │ ├── yaml.h │ │ ├── null.h │ │ ├── mark.h │ │ ├── LICENSE │ │ ├── regex_yaml.cpp │ │ ├── indentation.h │ │ ├── collectionstack.h │ │ ├── tag.cpp │ │ ├── ptr_vector.h │ │ ├── stringsource.h │ │ ├── eventhandler.h │ │ ├── ostream_wrapper.cpp │ │ ├── dll.h │ │ ├── streamcharsource.h │ │ ├── stlemitter.h │ │ └── parse.cpp │ ├── pathie-cpp │ │ ├── CMakeLists.txt │ │ └── LICENSE │ ├── CLI │ │ ├── Version.hpp │ │ ├── CLI.hpp │ │ ├── Macros.hpp │ │ └── LICENSE │ ├── onnx │ │ └── protobuf │ │ │ ├── onnx-ml.pb-wrapper.h │ │ │ └── onnx-ml.pb-wrapper.cpp │ ├── phf │ │ └── LICENSE │ ├── cnpy │ │ └── LICENSE │ ├── mio │ │ └── LICENSE │ ├── zstr │ │ └── LICENSE │ ├── ExceptionWithCallStack.h │ └── half_float │ │ └── Readme.md ├── common │ ├── regex.h │ ├── version.h │ ├── build_info.h │ ├── version.cpp │ ├── build_info.cpp.in │ ├── file_utils.h │ ├── project_version.h.in │ ├── binary.h │ ├── filesystem.cpp │ ├── hash.h │ ├── file_utils.cpp │ ├── config_validator.h │ ├── cli_helper.cpp │ ├── io.h │ └── signal_handling.h ├── tests │ ├── units │ │ ├── run_tests.cpp │ │ ├── CMakeLists.txt │ │ └── utils_tests.cpp │ ├── README.md │ ├── CMakeLists.txt │ ├── dropout.cpp │ └── logger.cpp ├── examples │ ├── mnist │ │ ├── .gitignore │ │ ├── download.sh │ │ └── mnist_ffnn.cpp │ ├── README.md │ └── CMakeLists.txt ├── translator │ ├── history.cpp │ ├── helpers.h │ ├── nth_element.h │ ├── helpers.cpp │ └── output_printer.cpp ├── tensors │ ├── gpu │ │ ├── element.h │ │ ├── add.h │ │ ├── algorithm.h │ │ ├── prod.h │ │ └── device.cu │ ├── rand.h │ ├── backend.cpp │ ├── cpu │ │ ├── device.cpp │ │ └── aligned.h │ └── memory_piece.h ├── models │ ├── model_task.h │ ├── transformer_stub.cpp │ ├── transformer_factory.h │ ├── encoder.h │ └── classifier.h ├── command │ ├── marian_embedder.cpp │ ├── marian_scorer.cpp │ ├── marian_decoder.cpp │ └── marian_vocab.cpp ├── marian.h ├── optimizers │ ├── clippers.cpp │ ├── clippers.h │ └── quantizer.h ├── functional │ ├── defs.h │ ├── functional.h │ └── array.h ├── data │ ├── rng_engine.h │ ├── sentencepiece_vocab.h │ ├── batch.h │ ├── iterator_facade.h │ └── dataset.h ├── layers │ ├── lsh.h │ ├── weight.h │ ├── convolution.cpp │ └── weight.cpp ├── training │ ├── deprecated │ │ ├── gradient_dropping │ │ │ └── gpu │ │ │ │ └── sparse_algorithm.h │ │ └── graph_group_async_drop.h │ └── graph_group_singleton.cpp ├── embedder │ └── vector_collector.h ├── rnn │ └── attention_constructors.h ├── onnx │ └── expression_graph_onnx_exporter.h └── graph │ └── node_operators.cpp ├── contrib ├── other-builds │ ├── cmake_doze.txt │ └── eclipse │ │ └── .project ├── triton-aml │ ├── marian_backend │ │ ├── src │ │ │ ├── libtriton_marian.ldscript │ │ │ └── marian.h │ │ ├── cmake │ │ │ └── TritonMarianBackendConfig.cmake.in │ │ └── README.md │ └── build.sh ├── autoformat.sh └── vim │ └── .vimrc ├── .gitignore ├── cmake ├── FindTcmalloc.cmake └── FindNCCL.cmake ├── LICENSE.md └── .gitmodules /VERSION: -------------------------------------------------------------------------------- 1 | v1.9.56 2 | -------------------------------------------------------------------------------- /wasm/post-module.js: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vs/.gitignore: -------------------------------------------------------------------------------- 1 | build-vs 2 | deps 3 | -------------------------------------------------------------------------------- /scripts/ci/README.md: -------------------------------------------------------------------------------- 1 | Scripts for continuous integration. 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # git should never touch line endings 2 | * -text 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | -------------------------------------------------------------------------------- /scripts/shortlist/.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | fast_align 3 | extract-lex 4 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/main.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_MAIN 2 | #include "catch.hpp" -------------------------------------------------------------------------------- /src/common/regex.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | namespace regex = std; 5 | -------------------------------------------------------------------------------- /src/tests/units/run_tests.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_MAIN 2 | #include "catch.hpp" 3 | -------------------------------------------------------------------------------- /src/3rd_party/zlib/zlib.3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browsermt/marian-dev/HEAD/src/3rd_party/zlib/zlib.3.pdf -------------------------------------------------------------------------------- /contrib/other-builds/cmake_doze.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/browsermt/marian-dev/HEAD/contrib/other-builds/cmake_doze.txt -------------------------------------------------------------------------------- /src/3rd_party/faiss/README: -------------------------------------------------------------------------------- 1 | This is code extracted from the original FAISS repository: https://github.com/facebookresearch/faiss -------------------------------------------------------------------------------- /contrib/triton-aml/marian_backend/src/libtriton_marian.ldscript: -------------------------------------------------------------------------------- 1 | { 2 | global: 3 | TRITONBACKEND_*; 4 | local: *; 5 | }; 6 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/logs/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /src/common/version.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace marian { 6 | std::string buildVersion(); 7 | } 8 | -------------------------------------------------------------------------------- /src/examples/mnist/.gitignore: -------------------------------------------------------------------------------- 1 | t10k-images-idx3-ubyte 2 | t10k-labels-idx1-ubyte 3 | train-images-idx3-ubyte 4 | train-labels-idx1-ubyte 5 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/jni/Application.mk: -------------------------------------------------------------------------------- 1 | # Exceptions are used in spdlog. Link to an exception-ready C++ runtime. 2 | APP_STL = gnustl_static 3 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/astyle.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | find . -name "*\.h" -o -name "*\.cpp"|xargs dos2unix 3 | find . -name "*\.h" -o -name "*\.cpp"|xargs astyle -n -c -A1 4 | 5 | 6 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/cmake/spdlog.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | includedir=${prefix}/include 3 | 4 | Name: @PROJECT_NAME@ 5 | Description: Super fast C++ logging library. 6 | Version: @PROJECT_VERSION@ 7 | -------------------------------------------------------------------------------- /src/translator/history.cpp: -------------------------------------------------------------------------------- 1 | #include "history.h" 2 | 3 | namespace marian { 4 | 5 | History::History(size_t lineNo, float alpha, float wp) 6 | : lineNo_(lineNo), alpha_(alpha), wp_(wp) {} 7 | } // namespace marian 8 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(..) 2 | include_directories(.) 3 | 4 | FILE(GLOB YamlCppSources *.cpp contrib/*.cpp) 5 | if (NOT TARGET libyaml-cpp) 6 | add_library(libyaml-cpp OBJECT ${YamlCppSources}) 7 | endif() 8 | -------------------------------------------------------------------------------- /src/3rd_party/pathie-cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(..) 2 | include_directories(.) 3 | include_directories(include) 4 | 5 | FILE(GLOB PathieCppSources src/*.cpp) 6 | if (NOT TARGET pathie-cpp) 7 | add_library(pathie-cpp OBJECT ${PathieCppSources}) 8 | endif() 9 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/null.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/null.h" 2 | 3 | namespace YAML { 4 | _Null Null; 5 | 6 | bool IsNullString(const std::string& str) { 7 | return str.empty() || str == "~" || str == "null" || str == "Null" || 8 | str == "NULL"; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/tensors/gpu/element.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tensors/tensor.h" 4 | 5 | namespace marian { 6 | namespace gpu { 7 | 8 | template 9 | void Element(Functor functor, Tensor out, Tensors... tensors); 10 | } 11 | } // namespace marian 12 | -------------------------------------------------------------------------------- /src/3rd_party/faiss/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # adding a new file require explicittly modifing the CMakeLists.txt 2 | 3 | add_definitions(-DFINTEGER=uint64_t) 4 | 5 | include_directories("impl") 6 | FILE(GLOB FaissCppSources *.cpp impl/*.cpp utils/*.cpp) 7 | add_library(faiss OBJECT ${FaissCppSources}) 8 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/yaml-node.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/node/node.h" 2 | #include "nodebuilder.h" 3 | #include "nodeevents.h" 4 | 5 | namespace YAML { 6 | Node Clone(const Node& node) { 7 | NodeEvents events(node); 8 | NodeBuilder builder; 9 | events.Emit(builder); 10 | return builder.Root(); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/common/build_info.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace marian { 6 | 7 | // Returns list of non-advanced cache variables used by CMake 8 | std::string cmakeBuildOptions(); 9 | 10 | // Returns list of advanced cache variables used by CMake 11 | std::string cmakeBuildOptionsAdvanced(); 12 | 13 | } // namespace marian 14 | -------------------------------------------------------------------------------- /src/models/model_task.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace marian { 6 | 7 | struct ModelTask { 8 | virtual ~ModelTask() {} 9 | virtual void run() = 0; 10 | }; 11 | 12 | struct ModelServiceTask { 13 | virtual ~ModelServiceTask() {} 14 | virtual std::string run(const std::string&) = 0; 15 | }; 16 | } // namespace marian 17 | -------------------------------------------------------------------------------- /src/examples/README.md: -------------------------------------------------------------------------------- 1 | Marian examples 2 | --------------- 3 | 4 | Examples are enabled with CMake option `-DCOMPILE_EXAMPLES=ON`. 5 | 6 | ## MNIST 7 | 8 | You will need MNIST data for training and testing. Download them with the 9 | script `src/examples/mnist/download.sh` or provide paths to the files with 10 | `--train-sets` and `--valid-sets` options. 11 | -------------------------------------------------------------------------------- /contrib/triton-aml/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo Building Triton Marian backend ... 3 | 4 | docker build -t triton-marian-build . 5 | 6 | echo Copying artifacts ... 7 | 8 | docker container create --name extract triton-marian-build 9 | docker container cp extract:/opt/tritonserver/marian_backend/build/libtriton_marian.so . 10 | docker container rm -f extract 11 | -------------------------------------------------------------------------------- /scripts/shortlist/README.md: -------------------------------------------------------------------------------- 1 | `install.sh` is a helper script that downloads and compiles fastalign and extract-lex, and copies 2 | required binaries into _./bin_. 3 | 4 | Shortlist files (_lex.s2t_ and _lex.t2s_) can be created using `generate_shortlists.pl`, for 5 | example: 6 | 7 | perl generate_shortlists.pl --bindir ./bin -s corpus.bpe.src -t corpus.bpe.tgt 8 | 9 | -------------------------------------------------------------------------------- /contrib/triton-aml/marian_backend/src/marian.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef _WIN32 4 | #define DLLEXPORT extern "C" __declspec(dllexport) 5 | #else 6 | #define DLLEXPORT extern "C" 7 | #endif 8 | 9 | DLLEXPORT void* init(char* path, int device_num); 10 | DLLEXPORT char* translate(void* marian, char* sent); 11 | DLLEXPORT void free_result(char* to_free); 12 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/latency/compare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "running spdlog and g3log tests 10 time with ${1:-10} threads each (total 1,000,000 entries).." 3 | rm -f *.log 4 | for i in {1..10} 5 | 6 | do 7 | echo 8 | sleep 0.5 9 | ./spdlog-latency ${1:-10} 2>/dev/null || exit 10 | sleep 0.5 11 | ./g3log-latency ${1:-10} 2>/dev/null || exit 12 | 13 | done 14 | -------------------------------------------------------------------------------- /src/command/marian_embedder.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | 3 | #include "models/model_task.h" 4 | #include "embedder/embedder.h" 5 | #include "common/timer.h" 6 | 7 | int main(int argc, char** argv) { 8 | using namespace marian; 9 | 10 | auto options = parseOptions(argc, argv, cli::mode::embedding); 11 | New>(options)->run(); 12 | 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /src/3rd_party/CLI/Version.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Distributed under the 3-Clause BSD License. See accompanying 4 | // file LICENSE or https://github.com/CLIUtils/CLI11 for details. 5 | 6 | // [CLI11:verbatim] 7 | 8 | #define CLI11_VERSION_MAJOR 1 9 | #define CLI11_VERSION_MINOR 6 10 | #define CLI11_VERSION_PATCH 1 11 | #define CLI11_VERSION "1.6.1" 12 | 13 | // [CLI11:verbatim] 14 | -------------------------------------------------------------------------------- /src/common/version.cpp: -------------------------------------------------------------------------------- 1 | #include "common/version.h" 2 | #include "common/git_revision.h" // make-generated file, contains git commit info 3 | #include "common/project_version.h" // cmake-generated file, major/minor/tweak versions 4 | 5 | namespace marian { 6 | 7 | std::string buildVersion() { 8 | return std::string(PROJECT_VERSION) + " " + GIT_REVISION; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | std::size_t count_lines(const std::string& filename); 7 | 8 | void prepare_logdir(); 9 | 10 | std::string file_contents(const std::string& filename); 11 | 12 | std::size_t count_lines(const std::string& filename); 13 | 14 | std::size_t get_filesize(const std::string& filename); 15 | 16 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/INSTALL: -------------------------------------------------------------------------------- 1 | spdlog is header only library. 2 | Just copy the files to your build tree and use a C++11 compiler 3 | 4 | Tested on: 5 | gcc 4.8.1 and above 6 | clang 3.5 7 | Visual Studio 2013 8 | 9 | gcc 4.8 flags: --std==c++11 -pthread -O3 -flto -Wl,--no-as-needed 10 | gcc 4.9 flags: --std=c++11 -pthread -O3 -flto 11 | 12 | 13 | see the makefile in the example folder 14 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/includes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "catch.hpp" 11 | #include "utils.h" 12 | 13 | #include "../include/spdlog/spdlog.h" 14 | #include "../include/spdlog/sinks/null_sink.h" 15 | #include "../include/spdlog/sinks/ostream_sink.h" 16 | 17 | -------------------------------------------------------------------------------- /src/3rd_party/zlib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # list of sources files of the library 2 | file(GLOB ZLIB_SRC *.c) 3 | file(GLOB ZLIB_INC *.h) 4 | 5 | # add sources of the wrapper as a "zlib" static library 6 | add_library(zlib OBJECT ${ZLIB_SRC} ${ZLIB_INC}) 7 | 8 | if(MSVC) 9 | target_compile_options(zlib PUBLIC /wd4996 /wd4267) 10 | else() 11 | target_compile_options(zlib PUBLIC -Wno-implicit-function-declaration) 12 | endif() 13 | -------------------------------------------------------------------------------- /src/common/build_info.cpp.in: -------------------------------------------------------------------------------- 1 | #include "common/build_info.h" 2 | 3 | /* 4 | * File build_info.cpp is generated using CMake. Do NOT modify it manually! Edit 5 | * build_info.cpp.in file instead. 6 | */ 7 | 8 | std::string marian::cmakeBuildOptions() { 9 | return "" 10 | @PROJECT_CMAKE_CACHE@ 11 | ; 12 | } 13 | 14 | std::string marian::cmakeBuildOptionsAdvanced() { 15 | return "" 16 | @PROJECT_CMAKE_CACHE_ADVANCED@ 17 | ; 18 | } 19 | -------------------------------------------------------------------------------- /contrib/autoformat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | if ! [ -x "$( command -v clang-format )" ] 4 | then 5 | mkdir -p $HOME/.local 6 | wget -O- http://releases.llvm.org/6.0.0/clang+llvm-6.0.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz | tar --xz -xf - -C $HOME/.local --strip 1 7 | fi 8 | 9 | find ./src \( -path ./src/3rd_party -o -path ./src/tests -o -path ./src/models/experimental \) -prune -o -iname *.h -o -iname *.cpp -o -iname *.cu | xargs clang-format -i 10 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/fmt/ostr.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | // include external or bundled copy of fmtlib's ostream support 9 | // 10 | #if !defined(SPDLOG_FMT_EXTERNAL) 11 | #include "spdlog/fmt/fmt.h" 12 | #include "spdlog/fmt/bundled/ostream.h" 13 | #else 14 | #include 15 | #endif 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/common/file_utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "common/file_stream.h" 7 | 8 | namespace marian { 9 | namespace fileutils { 10 | 11 | void cut(const std::string& tsvIn, 12 | Ptr tsvOut, 13 | const std::vector& fields, 14 | size_t numFields, 15 | const std::string& sep = "\t"); 16 | 17 | } // namespace utils 18 | } // namespace marian 19 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/easyl.conf: -------------------------------------------------------------------------------- 1 | * GLOBAL: 2 | FORMAT = "[%datetime]: %msg" 3 | FILENAME = ./logs/easylogging.log 4 | ENABLED = true 5 | TO_FILE = true 6 | TO_STANDARD_OUTPUT = false 7 | MILLISECONDS_WIDTH = 3 8 | PERFORMANCE_TRACKING = false 9 | MAX_LOG_FILE_SIZE = 10485760 10 | Log_Flush_Threshold = 10485760 11 | -------------------------------------------------------------------------------- /scripts/contrib/fix_hard.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | 4 | d = dict() 5 | m = np.load(sys.argv[1]) 6 | for k in m: 7 | if "ff_" == k[0:3]: 8 | d["decoder_" + k] = m[k] 9 | elif k == "special:model.yml": 10 | info = m[k].tobytes() 11 | info = info.replace("layers-dec", "dec-depth") 12 | info = info.replace("layers-enc", "enc-depth") 13 | d[k] = info 14 | print info 15 | else: 16 | d[k] = m[k] 17 | np.savez(sys.argv[1] + ".fixed", **d) -------------------------------------------------------------------------------- /src/command/marian_scorer.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | 3 | #include "models/model_task.h" 4 | #include "rescorer/rescorer.h" 5 | #include "common/timer.h" 6 | 7 | int main(int argc, char** argv) { 8 | using namespace marian; 9 | 10 | auto options = parseOptions(argc, argv, cli::mode::scoring); 11 | 12 | timer::Timer timer; 13 | New>(options)->run(); 14 | LOG(info, "Total time: {:.5f}s wall", timer.elapsed()); 15 | 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /src/models/transformer_stub.cpp: -------------------------------------------------------------------------------- 1 | #include "models/transformer.h" 2 | 3 | namespace marian { 4 | // factory functions 5 | Ptr NewEncoderTransformer(Ptr graph, Ptr options) 6 | { 7 | return New(graph, options); 8 | } 9 | 10 | Ptr NewDecoderTransformer(Ptr graph, Ptr options) 11 | { 12 | return New(graph, options); 13 | } 14 | } // namespace marian 15 | -------------------------------------------------------------------------------- /src/examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(iris_example iris/iris.cpp) 2 | add_executable(mnist_example mnist/mnist_ffnn.cpp) 3 | 4 | foreach(exec iris_example mnist_example) 5 | target_link_libraries(${exec} marian ${EXT_LIBS}) 6 | if(CUDA_FOUND) 7 | target_link_libraries(${exec} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS}) 8 | endif(CUDA_FOUND) 9 | set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") 10 | endforeach(exec) 11 | -------------------------------------------------------------------------------- /src/models/transformer_factory.h: -------------------------------------------------------------------------------- 1 | // @TODO: rename to transformer.h eventually. This is not a Factory as in factory.h. 2 | #pragma once 3 | 4 | #include "marian.h" 5 | 6 | #include "models/decoder.h" 7 | #include "models/encoder.h" 8 | 9 | namespace marian { 10 | Ptr NewEncoderTransformer(Ptr graph, Ptr options); 11 | Ptr NewDecoderTransformer(Ptr graph, Ptr options); 12 | } // namespace marian 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Feature description 11 | Please add a concise description of what the problem is and describe the solution you would like to see. 12 | Add links to a paper, another toolkit, etc. if relevant. 13 | 14 | ### Example 15 | Add a usage examples for the new feature, e.g. a command line. 16 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | namespace spdlog 12 | { 13 | namespace sinks 14 | { 15 | class sink 16 | { 17 | public: 18 | virtual ~sink() {} 19 | virtual void log(const details::log_msg& msg) = 0; 20 | virtual void flush() = 0; 21 | }; 22 | } 23 | } 24 | 25 | -------------------------------------------------------------------------------- /src/examples/mnist/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ `ls -1 *-ubyte 2>/dev/null | wc -l ` == 4 ]; then 4 | echo Files exist: `ls -1 *-ubyte`; 5 | exit; 6 | fi 7 | 8 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz 9 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz 10 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz 11 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz 12 | 13 | gzip -d *-ubyte.gz 14 | -------------------------------------------------------------------------------- /src/tests/README.md: -------------------------------------------------------------------------------- 1 | Marian tests 2 | ============ 3 | 4 | Unit tests and application tests are enabled with CMake option 5 | `-DCOMPILE_TESTS=ON`, e.g.: 6 | 7 | cd build 8 | cmake .. -DCOMPILE_TESTS=ON 9 | make -j8 10 | 11 | Running all unit tests: 12 | 13 | make test 14 | 15 | Running a single unit test is also possible: 16 | 17 | ./src/tests/run_graph_tests 18 | 19 | We use [Catch framework](https://github.com/philsquared/Catch) for unit 20 | testing. 21 | -------------------------------------------------------------------------------- /src/3rd_party/zlib/inffast.h: -------------------------------------------------------------------------------- 1 | /* inffast.h -- header to use inffast.c 2 | * Copyright (C) 1995-2003, 2010 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | /* WARNING: this file should *not* be used by applications. It is 7 | part of the implementation of the compression library and is 8 | subject to change. Applications should only use zlib.h. 9 | */ 10 | 11 | void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); 12 | -------------------------------------------------------------------------------- /src/tensors/gpu/add.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tensors/tensor.h" 4 | 5 | namespace marian { 6 | 7 | namespace gpu { 8 | 9 | template 10 | void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors); 11 | 12 | template 13 | void Aggregate(Functor functor, float initAgg, AggFunctor aggFunctor, float scale, marian::Tensor out, Tensors... tensors); 14 | } 15 | } // namespace marian 16 | -------------------------------------------------------------------------------- /wasm/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM emscripten/emsdk:3.1.8 2 | 3 | # Install specific version of CMake 4 | WORKDIR /usr 5 | RUN wget https://github.com/Kitware/CMake/releases/download/v3.17.2/cmake-3.17.2-Linux-x86_64.tar.gz -qO-\ 6 | | tar xzf - --strip-components 1 7 | 8 | # Install Python and Java (needed for Closure Compiler minification) 9 | RUN apt-get update \ 10 | && apt-get install -y \ 11 | python3 \ 12 | default-jre 13 | 14 | # Necessary for benchmarking 15 | RUN pip3 install sacrebleu 16 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/jni/Android.mk: -------------------------------------------------------------------------------- 1 | # Setup a project 2 | LOCAL_PATH := $(call my-dir) 3 | include $(CLEAR_VARS) 4 | 5 | LOCAL_MODULE := example 6 | LOCAL_SRC_FILES := example.cpp 7 | LOCAL_CPPFLAGS += -Wall -Wshadow -Wextra -pedantic -std=c++11 -fPIE -pie 8 | LOCAL_LDFLAGS += -fPIE -pie 9 | 10 | # Add exception support and set path for spdlog's headers 11 | LOCAL_CPPFLAGS += -fexceptions -I../include 12 | # Use android's log library 13 | LOCAL_LDFLAGS += -llog 14 | 15 | include $(BUILD_EXECUTABLE) 16 | -------------------------------------------------------------------------------- /src/marian.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // clang-format off 4 | #include "common/version.h" 5 | #include "common/config.h" 6 | #include "common/definitions.h" 7 | #include "common/logging.h" 8 | #include "common/options.h" 9 | #include "common/io.h" 10 | 11 | #include "data/batch_generator.h" 12 | #include "data/corpus.h" 13 | 14 | #include "graph/expression_graph.h" 15 | #include "graph/expression_operators.h" 16 | #include "graph/node_initializers.h" 17 | 18 | #include "optimizers/optimizers.h" 19 | // clang-format on 20 | -------------------------------------------------------------------------------- /contrib/triton-aml/marian_backend/cmake/TritonMarianBackendConfig.cmake.in: -------------------------------------------------------------------------------- 1 | include(CMakeFindDependencyMacro) 2 | 3 | get_filename_component( 4 | TRITONMARIANBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH 5 | ) 6 | 7 | list(APPEND CMAKE_MODULE_PATH ${TRITONMARIANBACKEND_CMAKE_DIR}) 8 | 9 | if(NOT TARGET TritonMarianBackend::triton-marian-backend) 10 | include("${TRITONMARIANBACKEND_CMAKE_DIR}/TritonMarianBackendTargets.cmake") 11 | endif() 12 | 13 | set(TRITONMARIANBACKEND_LIBRARIES TritonMarianBackend::triton-marian-backend) 14 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/contrib/graphbuilder.cpp: -------------------------------------------------------------------------------- 1 | #include "graphbuilderadapter.h" 2 | 3 | #include "yaml-cpp/parser.h" // IWYU pragma: keep 4 | 5 | namespace YAML { 6 | class GraphBuilderInterface; 7 | 8 | void* BuildGraphOfNextDocument(Parser& parser, 9 | GraphBuilderInterface& graphBuilder) { 10 | GraphBuilderAdapter eventHandler(graphBuilder); 11 | if (parser.HandleNextDocument(eventHandler)) { 12 | return eventHandler.RootNode(); 13 | } else { 14 | return NULL; 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /scripts/ci/install_mkl.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html 4 | wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add - 5 | sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list" 6 | sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list" 7 | sudo apt-get install --no-install-recommends intel-mkl-64bit-2020.0-088 8 | -------------------------------------------------------------------------------- /src/command/marian_decoder.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | #include "translator/beam_search.h" 3 | #include "translator/translator.h" 4 | #include "common/timer.h" 5 | #ifdef _WIN32 6 | #include 7 | #endif 8 | 9 | int main(int argc, char** argv) { 10 | using namespace marian; 11 | auto options = parseOptions(argc, argv, cli::mode::translation); 12 | auto task = New>(options); 13 | 14 | timer::Timer timer; 15 | task->run(); 16 | LOG(info, "Total time: {:.5f}s wall", timer.elapsed()); 17 | 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /src/common/project_version.h.in: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* 4 | * File project_version.h is generated using CMake. Do NOT modify it manually! Edit 5 | * project_version.h.in file instead. 6 | */ 7 | 8 | // e.g. v1.2.3-beta+1.abc123d 9 | #define PROJECT_VERSION_FULL "@PROJECT_VERSION_STRING_FULL@" 10 | // e.g. v1.2.3-beta 11 | #define PROJECT_VERSION "@PROJECT_VERSION_STRING@" 12 | #define PROJECT_VERSION_MAJOR @PROJECT_VERSION_MAJOR@ 13 | #define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR@ 14 | #define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH@ 15 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/glog-bench.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include "glog/logging.h" 7 | 8 | 9 | int main(int, char* argv[]) 10 | { 11 | int howmany = 1000000; 12 | 13 | 14 | FLAGS_logtostderr = 0; 15 | FLAGS_log_dir = "logs"; 16 | google::InitGoogleLogging(argv[0]); 17 | for(int i = 0 ; i < howmany; ++i) 18 | LOG(INFO) << "glog message # " << i << ": This is some text for your pleasure"; 19 | 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /src/translator/helpers.h: -------------------------------------------------------------------------------- 1 | /* All or part of this file was contributed by Intel under license: 2 | * Copyright (C) 2017-2018 Intel Corporation 3 | * SPDX-License-Identifier: MIT 4 | */ 5 | 6 | #pragma once 7 | 8 | #include "graph/expression_graph.h" 9 | 10 | namespace marian { 11 | 12 | namespace cpu { 13 | 14 | void suppressWord(Expr logProbs, WordIndex wordIndex); 15 | } 16 | 17 | namespace gpu { 18 | 19 | void suppressWord(Expr logProbs, WordIndex wordIndex); 20 | } 21 | 22 | void suppressWord(Expr logProbs, WordIndex wordIndex); 23 | } // namespace marian 24 | -------------------------------------------------------------------------------- /vs/BuildRelease.bat: -------------------------------------------------------------------------------- 1 | :: 2 | :: Usage: BuildRelease.bat [=.\build] 3 | :: 4 | :: This script runs the dependency checks, generate the projects/makefiles and then 5 | :: build the project in Release configuration. 6 | :: 7 | :: 8 | @echo off 9 | setlocal 10 | 11 | set ROOT=%~dp0 12 | set MARIAN_ROOT=%ROOT%.. 13 | 14 | set BUILD_ROOT=%1 15 | if "%BUILD_ROOT%"=="" set BUILD_ROOT=%ROOT%build 16 | 17 | call CreateVSProjects.bat %BUILD_ROOT% 18 | if errorlevel 1 exit /b 1 19 | 20 | cmake --build %BUILD_ROOT% --config Release 21 | 22 | exit /b 0 -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/directives.cpp: -------------------------------------------------------------------------------- 1 | #include "directives.h" 2 | 3 | namespace YAML { 4 | Directives::Directives() { 5 | // version 6 | version.isDefault = true; 7 | version.major = 1; 8 | version.minor = 2; 9 | } 10 | 11 | const std::string Directives::TranslateTagHandle( 12 | const std::string& handle) const { 13 | std::map::const_iterator it = tags.find(handle); 14 | if (it == tags.end()) { 15 | if (handle == "!!") 16 | return "tag:yaml.org,2002:"; 17 | return handle; 18 | } 19 | 20 | return it->second; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/anchor.h: -------------------------------------------------------------------------------- 1 | #ifndef ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | namespace YAML { 13 | typedef std::size_t anchor_t; 14 | const anchor_t NullAnchor = 0; 15 | } 16 | 17 | #endif // ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 18 | -------------------------------------------------------------------------------- /src/tensors/rand.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | 5 | #include 6 | 7 | namespace marian { 8 | 9 | class TensorBase; 10 | typedef IPtr Tensor; 11 | 12 | class RandomGenerator { 13 | protected: 14 | size_t seed_; 15 | 16 | public: 17 | RandomGenerator(size_t seed) : seed_(seed) { } 18 | virtual ~RandomGenerator() {} 19 | virtual void uniform(Tensor, float a, float b) = 0; 20 | virtual void normal(Tensor, float mean, float stddev) = 0; 21 | }; 22 | 23 | Ptr createRandomGenerator(size_t /*seed*/, DeviceId); 24 | 25 | } 26 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/emitterstyle.h: -------------------------------------------------------------------------------- 1 | #ifndef EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | namespace YAML { 11 | struct EmitterStyle { 12 | enum value { Default, Block, Flow }; 13 | }; 14 | } 15 | 16 | #endif // EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 17 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/type.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | namespace YAML { 11 | struct NodeType { 12 | enum value { Undefined, Null, Scalar, Sequence, Map }; 13 | }; 14 | } 15 | 16 | #endif // VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 17 | -------------------------------------------------------------------------------- /src/optimizers/clippers.cpp: -------------------------------------------------------------------------------- 1 | #include "clippers.h" 2 | 3 | #include "functional/functional.h" 4 | #include "tensors/tensor_operators.h" 5 | 6 | namespace marian { 7 | void Elementwise::clip(Tensor t) { 8 | using namespace functional; 9 | Element(_1 = functional::clip(_1, c_), t); 10 | } 11 | 12 | void Norm::clip(Tensor t) { 13 | using namespace functional; 14 | float l2Norm = L2Norm(t, nullptr); // @TODO: this is a placeholder for a memory allocator, will be replaced with better version in a PR or two. 15 | if(l2Norm >= c_) 16 | Element(_1 = (c_ / l2Norm) * _1, t); 17 | } 18 | } // namespace marian 19 | -------------------------------------------------------------------------------- /src/tensors/backend.cpp: -------------------------------------------------------------------------------- 1 | #include "tensors/backend.h" 2 | 3 | #ifdef CUDA_FOUND 4 | #include "tensors/gpu/backend.h" 5 | #pragma warning(disable:4505) // "unreferenced local function has been removed" in cuda\v9.2\include\cuda_fp16.hpp 6 | #endif 7 | 8 | #include "tensors/cpu/backend.h" 9 | 10 | namespace marian { 11 | 12 | Ptr BackendByDeviceId(DeviceId deviceId, size_t seed) { 13 | #ifdef CUDA_FOUND 14 | if(deviceId.type == DeviceType::gpu) 15 | return New(deviceId, seed); 16 | else 17 | #endif 18 | return New(deviceId, seed); 19 | } 20 | } // namespace marian 21 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/install_libcxx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Install libc++ under travis 4 | 5 | svn --quiet co http://llvm.org/svn/llvm-project/libcxx/trunk libcxx 6 | mkdir libcxx/build 7 | (cd libcxx/build && cmake .. -DLIBCXX_CXX_ABI=libstdc++ -DLIBCXX_CXX_ABI_INCLUDE_PATHS="/usr/include/c++/4.6;/usr/include/c++/4.6/x86_64-linux-gnu") 8 | make -C libcxx/build cxx -j2 9 | sudo cp libcxx/build/lib/libc++.so.1.0 /usr/lib/ 10 | sudo cp -r libcxx/build/include/c++/v1 /usr/include/c++/v1/ 11 | sudo ln -sf /usr/lib/libc++.so.1.0 /usr/lib/libc++.so 12 | sudo ln -sf /usr/lib/libc++.so.1.0 /usr/lib/libc++.so.1 13 | -------------------------------------------------------------------------------- /src/functional/defs.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef __CUDACC__ // Compiling with NVCC, host or device code 4 | 5 | #include 6 | #define HOST __host__ 7 | #define DEVICE __device__ 8 | #define DEVICE_INLINE __device__ inline 9 | #define HOST_INLINE __host__ inline 10 | #define HOST_DEVICE __host__ __device__ 11 | #define HOST_DEVICE_INLINE __host__ __device__ inline 12 | 13 | #else // Compiling with GCC or other host compiler 14 | 15 | #define HOST 16 | #define DEVICE 17 | #define DEVICE_INLINE inline 18 | #define HOST_INLINE inline 19 | #define HOST_DEVICE 20 | #define HOST_DEVICE_INLINE inline 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Tests 3 | # 4 | 5 | enable_testing() 6 | 7 | find_package(Threads) 8 | 9 | # Build Catch unit tests 10 | add_library(catch INTERFACE) 11 | target_include_directories(catch INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) 12 | 13 | file(GLOB catch_tests LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp *.h *.hpp) 14 | 15 | add_executable(catch_tests ${catch_tests}) 16 | target_link_libraries(catch_tests spdlog ${CMAKE_THREAD_LIBS_INIT}) 17 | add_test(NAME catch_tests COMMAND catch_tests) 18 | file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/logs") 19 | 20 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/emitterdef.h: -------------------------------------------------------------------------------- 1 | #ifndef EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | namespace YAML { 11 | struct EmitterNodeType { 12 | enum value { NoType, Property, Scalar, FlowSeq, BlockSeq, FlowMap, BlockMap }; 13 | }; 14 | } 15 | 16 | #endif // EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66 17 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/easylogging-bench.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | 7 | #include "easylogging++.h" 8 | 9 | _INITIALIZE_EASYLOGGINGPP 10 | 11 | int main(int, char* []) 12 | { 13 | int howmany = 1000000; 14 | 15 | // Load configuration from file 16 | el::Configurations conf("easyl.conf"); 17 | el::Loggers::reconfigureLogger("default", conf); 18 | 19 | for(int i = 0 ; i < howmany; ++i) 20 | LOG(INFO) << "easylog message #" << i << ": This is some text for your pleasure"; 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /contrib/triton-aml/marian_backend/README.md: -------------------------------------------------------------------------------- 1 | Use cmake to build and install in a local directory. 2 | 3 | ``` 4 | $ mkdir build 5 | $ cd build 6 | $ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install .. 7 | $ make install 8 | ``` 9 | 10 | The following required Triton repositories will be pulled and used in 11 | the build. By default the "main" branch/tag will be used for each repo 12 | but the listed CMake argument can be used to override. 13 | 14 | * triton-inference-server/backend: -DTRITON_BACKEND_REPO_TAG=[tag] 15 | * triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag] 16 | * triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag] 17 | -------------------------------------------------------------------------------- /contrib/vim/.vimrc: -------------------------------------------------------------------------------- 1 | autocmd BufRead,BufNewFile *.cu set filetype=cpp 2 | augroup cpp 3 | au! 4 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set tabstop=2 5 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set shiftwidth=2 6 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set expandtab 7 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set softtabstop=2 "Insert 2 spaces when tab is pressed 8 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set smarttab "Indent instead of tab at start of line 9 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set shiftround "Round spaces to nearest shiftwidth multiple 10 | augroup end 11 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/fmt/fmt.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | // 9 | // Include a bundled header-only copy of fmtlib or an external one. 10 | // By default spdlog include its own copy. 11 | // 12 | 13 | #if !defined(SPDLOG_FMT_EXTERNAL) 14 | 15 | #ifndef FMT_HEADER_ONLY 16 | #define FMT_HEADER_ONLY 17 | #endif 18 | #ifndef FMT_USE_WINDOWS_H 19 | #define FMT_USE_WINDOWS_H 0 20 | #endif 21 | #include "spdlog/fmt/bundled/format.h" 22 | 23 | #else //external fmtlib 24 | 25 | #include 26 | 27 | #endif 28 | 29 | -------------------------------------------------------------------------------- /scripts/shortlist/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | mkdir -p bin 4 | 5 | # download and compile fast_align 6 | if [ ! -e bin/fast_align ]; then 7 | git clone https://github.com/clab/fast_align 8 | mkdir -p fast_align/build 9 | cd fast_align/build 10 | cmake .. 11 | make -j4 12 | cp fast_align atools ../../bin 13 | cd ../../ 14 | fi 15 | 16 | # download and compile extract-lex 17 | if [ ! -e bin/extract_lex ]; then 18 | git clone https://github.com/marian-nmt/extract-lex 19 | mkdir -p extract-lex/build 20 | cd extract-lex/build 21 | cmake .. 22 | make -j4 23 | cp extract_lex ../../bin 24 | cd ../../ 25 | fi 26 | -------------------------------------------------------------------------------- /src/tensors/gpu/algorithm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tensors/backend.h" 4 | 5 | namespace marian { 6 | namespace gpu { 7 | template 8 | void copy(Ptr backend, const T* begin, const T* end, T* dest); 9 | 10 | template 11 | void fill(Ptr backend, T* begin, T* end, T value); 12 | 13 | template 14 | void swap_ranges(Ptr backend, T* begin, T* end, T* dest); 15 | 16 | void setSparse(Ptr backend, 17 | const std::vector&, 18 | const std::vector&, 19 | float*); 20 | } // namespace gpu 21 | } // namespace marian 22 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/memory.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/node/detail/memory.h" 2 | #include "yaml-cpp/node/detail/node.h" // IWYU pragma: keep 3 | #include "yaml-cpp/node/ptr.h" 4 | 5 | namespace YAML { 6 | namespace detail { 7 | 8 | void memory_holder::merge(memory_holder& rhs) { 9 | if (m_pMemory == rhs.m_pMemory) 10 | return; 11 | 12 | m_pMemory->merge(*rhs.m_pMemory); 13 | rhs.m_pMemory = m_pMemory; 14 | } 15 | 16 | node& memory::create_node() { 17 | shared_node pNode(new node); 18 | m_nodes.insert(pNode); 19 | return *pNode; 20 | } 21 | 22 | void memory::merge(const memory& rhs) { 23 | m_nodes.insert(rhs.m_nodes.begin(), rhs.m_nodes.end()); 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/emit.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/node/emit.h" 2 | #include "yaml-cpp/emitfromevents.h" 3 | #include "yaml-cpp/emitter.h" 4 | #include "nodeevents.h" 5 | 6 | namespace YAML { 7 | Emitter& operator<<(Emitter& out, const Node& node) { 8 | EmitFromEvents emitFromEvents(out); 9 | NodeEvents events(node); 10 | events.Emit(emitFromEvents); 11 | return out; 12 | } 13 | 14 | std::ostream& operator<<(std::ostream& out, const Node& node) { 15 | Emitter emitter(out); 16 | emitter << node; 17 | return out; 18 | } 19 | 20 | std::string Dump(const Node& node) { 21 | Emitter emitter; 22 | emitter << node; 23 | return emitter.c_str(); 24 | } 25 | } // namespace YAML 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Bug description 11 | Please add a clear and concise description of the bug, including observed and if possible expected behavior. 12 | 13 | ### How to reproduce 14 | Describe steps or include command to reproduce the behavior. 15 | 16 | ### Context 17 | * Marian version: Paste the output of `--version` here 18 | * CMake command: Type the cmake command you used and attach the output of `--build-info all` 19 | * Log file: Attach your training/decoding logs 20 | 21 | Add any other information about the problem here. 22 | -------------------------------------------------------------------------------- /src/tensors/cpu/device.cpp: -------------------------------------------------------------------------------- 1 | #include "tensors/device.h" 2 | #include "tensors/cpu/aligned.h" 3 | #include 4 | namespace marian { 5 | namespace cpu { 6 | 7 | Device::~Device() { 8 | genericFree(data_); 9 | } 10 | 11 | void Device::reserve(size_t size) { 12 | size = align(size); 13 | ABORT_IF(size < size_ || size == 0, 14 | "New size must be larger than old size and larger than 0"); 15 | 16 | uint8_t *temp = static_cast(genericMalloc(alignment_, size)); 17 | if(data_) { 18 | std::copy(data_, data_ + size_, temp); 19 | genericFree(data_); 20 | } 21 | data_ = temp; 22 | size_ = size; 23 | } 24 | } // namespace cpu 25 | } // namespace marian 26 | -------------------------------------------------------------------------------- /src/translator/nth_element.h: -------------------------------------------------------------------------------- 1 | /* All or part of this file was contributed by Intel under license: 2 | * Copyright (C) 2017-2018 Intel Corporation 3 | * SPDX-License-Identifier: MIT 4 | */ 5 | 6 | #pragma once 7 | 8 | #include "tensors/tensor.h" 9 | #include 10 | 11 | namespace marian { 12 | 13 | typedef std::function& outCosts, 16 | std::vector& outKeys, 17 | const bool isFirst)> GetNBestListFn; 18 | 19 | GetNBestListFn createGetNBestListFn(size_t beamSize, size_t dimBatch, DeviceId deviceId); 20 | } // namespace marian 21 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/spdlog-bench.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include "spdlog/spdlog.h" 7 | 8 | 9 | int main(int, char* []) 10 | { 11 | int howmany = 1000000; 12 | namespace spd = spdlog; 13 | ///Create a file rotating logger with 5mb size max and 3 rotated files 14 | auto logger = spdlog::create("file_logger", "logs/spd-bench-st.txt", false); 15 | 16 | logger->set_pattern("[%Y-%b-%d %T.%e]: %v"); 17 | for(int i = 0 ; i < howmany; ++i) 18 | logger->info("spdlog message #{} : This is some text for your pleasure", i); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /src/models/encoder.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "marian.h" 4 | #include "models/states.h" 5 | 6 | namespace marian { 7 | 8 | class EncoderBase : public EncoderDecoderLayerBase { 9 | public: 10 | EncoderBase(Ptr graph, Ptr options) : 11 | EncoderDecoderLayerBase(graph, options, "encoder", /*batchIndex=*/0, 12 | options->get("dropout-src", 0.0f), 13 | options->get("embedding-fix-src", false)) {} 14 | 15 | // @TODO: turn into an interface. Also see if we can get rid of the graph parameter. 16 | virtual Ptr build(Ptr, Ptr) = 0; 17 | 18 | virtual void clear() = 0; 19 | }; 20 | 21 | } // namespace marian 22 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/null_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace spdlog 14 | { 15 | namespace sinks 16 | { 17 | 18 | template 19 | class null_sink : public base_sink < Mutex > 20 | { 21 | protected: 22 | void _sink_it(const details::log_msg&) override 23 | {} 24 | 25 | void flush() override 26 | {} 27 | 28 | }; 29 | typedef null_sink null_sink_st; 30 | typedef null_sink null_sink_mt; 31 | 32 | } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/scantag.h: -------------------------------------------------------------------------------- 1 | #ifndef SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include "stream.h" 12 | 13 | namespace YAML { 14 | const std::string ScanVerbatimTag(Stream& INPUT); 15 | const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle); 16 | const std::string ScanTagSuffix(Stream& INPUT); 17 | } 18 | 19 | #endif // SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 20 | -------------------------------------------------------------------------------- /src/data/rng_engine.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "common/config.h" 7 | 8 | namespace marian { 9 | namespace data { 10 | 11 | /** 12 | * @brief Class providing an engine for pseudo-random number generation. 13 | */ 14 | class RNGEngine { 15 | protected: 16 | std::mt19937 eng_; 17 | 18 | public: 19 | RNGEngine() : eng_((unsigned int)Config::seed) {} 20 | 21 | std::string getRNGState() { 22 | std::ostringstream oss; 23 | oss << eng_; 24 | return oss.str(); 25 | } 26 | 27 | void setRNGState(std::string engineState) { 28 | std::istringstream iss(engineState); 29 | iss >> eng_; 30 | } 31 | }; 32 | } // namespace data 33 | } // namespace marian 34 | -------------------------------------------------------------------------------- /src/layers/lsh.h: -------------------------------------------------------------------------------- 1 | #include "graph/expression_graph.h" 2 | #include 3 | 4 | namespace faiss { 5 | struct IndexLSH; 6 | } 7 | 8 | namespace marian { 9 | 10 | class LSH { 11 | public: 12 | LSH(int k, int nbits) : k_{k}, nbits_{nbits} { 13 | #if !BLAS_FOUND 14 | ABORT("LSH-based output approximation requires BLAS library"); 15 | #endif 16 | } 17 | 18 | Expr apply(Expr query, Expr values, Expr bias); 19 | 20 | private: 21 | #ifndef WASM_COMPATIBLE_SOURCE 22 | Ptr index_; 23 | #endif 24 | size_t indexHash_{0}; 25 | 26 | int k_{100}; 27 | int nbits_{1024}; 28 | 29 | Expr search(Expr query, Expr values); 30 | Expr affine(Expr idx, Expr query, Expr values, Expr bias); 31 | }; 32 | 33 | } -------------------------------------------------------------------------------- /src/3rd_party/onnx/protobuf/onnx-ml.pb-wrapper.h: -------------------------------------------------------------------------------- 1 | // protobuf-generated files don't compile clean. This compiles them with warnings 2 | // disabled, without having to disable it for the entire project whole-sale. 3 | 4 | #pragma once 5 | 6 | #ifdef _MSC_VER 7 | #pragma warning(push) 8 | #pragma warning(disable : 4800 4610 4512 4510 4267 4127 4125 4100 4456) 9 | #endif 10 | #ifdef __GNUC__ 11 | #pragma GCC diagnostic push 12 | #pragma GCC diagnostic ignored "-Wunused-variable" 13 | #pragma GCC diagnostic ignored "-Wsuggest-override" 14 | #endif 15 | 16 | #include "onnx-ml.pb.h" // this is the actual file we include 17 | 18 | #ifdef __GNUC__ 19 | #pragma GCC diagnostic pop 20 | #endif 21 | #ifdef _MSC_VER 22 | #pragma warning(pop) 23 | #endif 24 | -------------------------------------------------------------------------------- /src/functional/functional.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // this header is meant to be included for all operations from the "functional" namespace. 4 | 5 | #include "functional/operands.h" 6 | #include "functional/predicates.h" 7 | #include "functional/operators.h" 8 | 9 | namespace marian { 10 | namespace functional { 11 | 12 | template 13 | using ref = Assignee; 14 | 15 | static ref<1> _1; 16 | static ref<2> _2; 17 | static ref<3> _3; 18 | static ref<4> _4; 19 | static ref<5> _5; 20 | static ref<6> _6; 21 | static ref<7> _7; 22 | static ref<8> _8; 23 | static ref<9> _9; 24 | 25 | const C<0> _0c; 26 | const C<1> _1c; 27 | const C<2> _2c; 28 | const C<-1> _1cneg; 29 | const C<-2> _2cneg; 30 | } // namespace functional 31 | } // namespace marian -------------------------------------------------------------------------------- /src/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Unit tests 2 | add_subdirectory(units) 3 | 4 | if(NOT MSVC) 5 | # Testing apps 6 | set(APP_TESTS 7 | logger 8 | dropout 9 | prod 10 | cli 11 | pooling 12 | sentencepiece_norm 13 | ) 14 | 15 | foreach(test ${APP_TESTS}) 16 | add_executable("test_${test}" "${test}.cpp") 17 | 18 | if(CUDA_FOUND) 19 | target_link_libraries("test_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS}) 20 | else(CUDA_FOUND) 21 | target_link_libraries("test_${test}" marian ${EXT_LIBS}) 22 | endif(CUDA_FOUND) 23 | 24 | set_target_properties("test_${test}" PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") 25 | endforeach(test) 26 | endif(NOT MSVC) 27 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/null_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/sinks/base_sink.h" 9 | #include "spdlog/details/null_mutex.h" 10 | 11 | #include 12 | 13 | namespace spdlog 14 | { 15 | namespace sinks 16 | { 17 | 18 | template 19 | class null_sink : public base_sink < Mutex > 20 | { 21 | protected: 22 | void _sink_it(const details::log_msg&) override 23 | {} 24 | 25 | void _flush() override 26 | {} 27 | 28 | }; 29 | typedef null_sink null_sink_st; 30 | typedef null_sink null_sink_mt; 31 | 32 | } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /src/3rd_party/CLI/CLI.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Distributed under the 3-Clause BSD License. See accompanying 4 | // file LICENSE or https://github.com/CLIUtils/CLI11 for details. 5 | 6 | // CLI Library includes 7 | // Order is important for combiner script 8 | 9 | #include "CLI/Version.hpp" 10 | 11 | #include "CLI/Macros.hpp" 12 | 13 | #include "CLI/Optional.hpp" 14 | 15 | #include "CLI/StringTools.hpp" 16 | 17 | #include "CLI/Error.hpp" 18 | 19 | #include "CLI/TypeTools.hpp" 20 | 21 | #include "CLI/Split.hpp" 22 | 23 | #include "CLI/ConfigFwd.hpp" 24 | 25 | #include "CLI/Validators.hpp" 26 | 27 | #include "CLI/FormatterFwd.hpp" 28 | 29 | #include "CLI/Option.hpp" 30 | 31 | #include "CLI/App.hpp" 32 | 33 | #include "CLI/Config.hpp" 34 | 35 | #include "CLI/Formatter.hpp" 36 | -------------------------------------------------------------------------------- /src/common/binary.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/io_item.h" 4 | 5 | #include 6 | #include 7 | 8 | namespace marian { 9 | 10 | const static int BINARY_FILE_VERSION = 1; 11 | 12 | namespace io { 13 | namespace binary { 14 | 15 | void loadItems(const void* current, 16 | std::vector& items, 17 | bool mapped = false); 18 | void loadItems(const std::string& fileName, std::vector& items); 19 | 20 | io::Item getItem(const void* current, const std::string& vName); 21 | io::Item getItem(const std::string& fileName, const std::string& vName); 22 | 23 | void saveItems(const std::string& fileName, const std::vector& items); 24 | 25 | } // namespace binary 26 | } // namespace io 27 | } // namespace marian 28 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace utils 13 | { 14 | 15 | template 16 | inline std::string format(const T& value) 17 | { 18 | static std::locale loc(""); 19 | std::stringstream ss; 20 | ss.imbue(loc); 21 | ss << value; 22 | return ss.str(); 23 | } 24 | 25 | template<> 26 | inline std::string format(const double & value) 27 | { 28 | static std::locale loc(""); 29 | std::stringstream ss; 30 | ss.imbue(loc); 31 | ss << std::fixed << std::setprecision(1) << value; 32 | return ss.str(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace utils 13 | { 14 | 15 | template 16 | inline std::string format(const T& value) 17 | { 18 | static std::locale loc(""); 19 | std::stringstream ss; 20 | ss.imbue(loc); 21 | ss << value; 22 | return ss.str(); 23 | } 24 | 25 | template<> 26 | inline std::string format(const double & value) 27 | { 28 | static std::locale loc(""); 29 | std::stringstream ss; 30 | ss.imbue(loc); 31 | ss << std::fixed << std::setprecision(1) << value; 32 | return ss.str(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/latency/utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace utils 13 | { 14 | 15 | template 16 | inline std::string format(const T& value) 17 | { 18 | static std::locale loc(""); 19 | std::stringstream ss; 20 | ss.imbue(loc); 21 | ss << value; 22 | return ss.str(); 23 | } 24 | 25 | template<> 26 | inline std::string format(const double & value) 27 | { 28 | static std::locale loc(""); 29 | std::stringstream ss; 30 | ss.imbue(loc); 31 | ss << std::fixed << std::setprecision(1) << value; 32 | return ss.str(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/exceptions.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/exceptions.h" 2 | 3 | namespace YAML { 4 | 5 | // These destructors are defined out-of-line so the vtable is only emitted once. 6 | Exception::~Exception() noexcept {} 7 | ParserException::~ParserException() noexcept {} 8 | RepresentationException::~RepresentationException() noexcept {} 9 | InvalidScalar::~InvalidScalar() noexcept {} 10 | KeyNotFound::~KeyNotFound() noexcept {} 11 | InvalidNode::~InvalidNode() noexcept {} 12 | BadConversion::~BadConversion() noexcept {} 13 | BadDereference::~BadDereference() noexcept {} 14 | BadSubscript::~BadSubscript() noexcept {} 15 | BadPushback::~BadPushback() noexcept {} 16 | BadInsert::~BadInsert() noexcept {} 17 | EmitterException::~EmitterException() noexcept {} 18 | BadFile::~BadFile() noexcept {} 19 | } 20 | -------------------------------------------------------------------------------- /src/3rd_party/zlib/gzclose.c: -------------------------------------------------------------------------------- 1 | /* gzclose.c -- zlib gzclose() function 2 | * Copyright (C) 2004, 2010 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | #include "gzguts.h" 7 | 8 | /* gzclose() is in a separate file so that it is linked in only if it is used. 9 | That way the other gzclose functions can be used instead to avoid linking in 10 | unneeded compression or decompression routines. */ 11 | int ZEXPORT gzclose(file) 12 | gzFile file; 13 | { 14 | #ifndef NO_GZCOMPRESS 15 | gz_statep state; 16 | 17 | if (file == NULL) 18 | return Z_STREAM_ERROR; 19 | state = (gz_statep)file; 20 | 21 | return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file); 22 | #else 23 | return gzclose_r(file); 24 | #endif 25 | } 26 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/noncopyable.h: -------------------------------------------------------------------------------- 1 | #ifndef NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | 12 | namespace YAML { 13 | // this is basically boost::noncopyable 14 | class YAML_CPP_API noncopyable { 15 | protected: 16 | noncopyable() {} 17 | ~noncopyable() {} 18 | 19 | private: 20 | noncopyable(const noncopyable&); 21 | const noncopyable& operator=(const noncopyable&); 22 | }; 23 | } 24 | 25 | #endif // NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 26 | -------------------------------------------------------------------------------- /src/tests/dropout.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "marian.h" 6 | 7 | using namespace marian; 8 | 9 | int main(int argc, char** argv) { 10 | auto c = New(argc, argv); 11 | 12 | auto type = c->get("cpu-threads") > 0 13 | ? DeviceType::cpu 14 | : DeviceType::gpu; 15 | DeviceId deviceId{0, type}; 16 | 17 | auto g = New(); 18 | g->setDevice(deviceId); 19 | g->reserveWorkspaceMB(512); 20 | 21 | for(int i = 0; i < 10; ++i) { 22 | g->clear(); 23 | auto mask1 = g->dropoutMask(0.2, {10, 3072}); 24 | auto mask2 = g->dropoutMask(0.3, {1, 3072}); 25 | auto mask = mask1 + mask2; 26 | debug(mask1, "mask1"); 27 | debug(mask2, "mask2"); 28 | debug(mask, "mask"); 29 | g->forward(); 30 | } 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ### Description 2 | Please add a clear and concise description of the changes. 3 | 4 | This PR fixes a bug/adds a new feature/refactorizes the code/does something else. 5 | It is related to issues: #998, #999, ... 6 | 7 | List of changes: 8 | - ... 9 | - ... 10 | - ... 11 | 12 | Added dependencies: none 13 | 14 | ### How to test 15 | Describe how to test your changes, adding command line examples and sample input/output files if relevant. 16 | Point to unit tests or regression tests covering the changes if they have been added. 17 | 18 | Describe how you have tested your code, including OS and the cmake command. 19 | 20 | ### Checklist 21 | 22 | - [ ] I have tested the code manually 23 | - [ ] I have run regression tests 24 | - [ ] I have read and followed CONTRIBUTING.md 25 | - [ ] I have updated CHANGELOG.md 26 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/directives.h: -------------------------------------------------------------------------------- 1 | #ifndef DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | namespace YAML { 14 | struct Version { 15 | bool isDefault; 16 | int major, minor; 17 | }; 18 | 19 | struct Directives { 20 | Directives(); 21 | 22 | const std::string TranslateTagHandle(const std::string& handle) const; 23 | 24 | Version version; 25 | std::map tags; 26 | }; 27 | } 28 | 29 | #endif // DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66 30 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/details/null_mutex.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | // null, no cost dummy "mutex" and dummy "atomic" int 10 | 11 | namespace spdlog 12 | { 13 | namespace details 14 | { 15 | struct null_mutex 16 | { 17 | void lock() {} 18 | void unlock() {} 19 | bool try_lock() 20 | { 21 | return true; 22 | } 23 | }; 24 | 25 | struct null_atomic_int 26 | { 27 | int value; 28 | null_atomic_int() = default; 29 | 30 | null_atomic_int(int val):value(val) 31 | {} 32 | 33 | int load(std::memory_order) const 34 | { 35 | return value; 36 | } 37 | 38 | void store(int val) 39 | { 40 | value = val; 41 | } 42 | }; 43 | 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/tests/units/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Unit tests 2 | set(UNIT_TESTS 3 | graph_tests 4 | operator_tests 5 | rnn_tests 6 | attention_tests 7 | fastopt_tests 8 | utils_tests 9 | # cosmos_tests # optional, uncomment to test with specific files. 10 | ) 11 | 12 | foreach(test ${UNIT_TESTS}) 13 | add_executable("run_${test}" run_tests.cpp "${test}.cpp") 14 | 15 | if(CUDA_FOUND) 16 | target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch) 17 | else(CUDA_FOUND) 18 | target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch) 19 | endif(CUDA_FOUND) 20 | 21 | if(MSVC) 22 | # Disable C4305: truncation from 'double' to '_Ty' 23 | target_compile_options("run_${test}" PUBLIC /wd4305) 24 | endif(MSVC) 25 | 26 | add_test(NAME ${test} COMMAND "run_${test}") 27 | endforeach(test) 28 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/details/null_mutex.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | // null, no cost dummy "mutex" and dummy "atomic" int 10 | 11 | namespace spdlog 12 | { 13 | namespace details 14 | { 15 | struct null_mutex 16 | { 17 | void lock() {} 18 | void unlock() {} 19 | bool try_lock() 20 | { 21 | return true; 22 | } 23 | }; 24 | 25 | struct null_atomic_int 26 | { 27 | int value; 28 | null_atomic_int() = default; 29 | 30 | null_atomic_int(int val):value(val) 31 | {} 32 | 33 | int load(std::memory_order) const 34 | { 35 | return value; 36 | } 37 | 38 | void store(int val) 39 | { 40 | value = val; 41 | } 42 | }; 43 | 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/tag.h: -------------------------------------------------------------------------------- 1 | #ifndef TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | namespace YAML { 13 | struct Directives; 14 | struct Token; 15 | 16 | struct Tag { 17 | enum TYPE { 18 | VERBATIM, 19 | PRIMARY_HANDLE, 20 | SECONDARY_HANDLE, 21 | NAMED_HANDLE, 22 | NON_SPECIFIC 23 | }; 24 | 25 | Tag(const Token& token); 26 | const std::string Translate(const Directives& directives); 27 | 28 | TYPE type; 29 | std::string handle, value; 30 | }; 31 | } 32 | 33 | #endif // TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 34 | -------------------------------------------------------------------------------- /src/common/filesystem.cpp: -------------------------------------------------------------------------------- 1 | #include "filesystem.h" 2 | 3 | #ifndef _MSC_VER 4 | // don't include these on Windows: 5 | #include 6 | #include 7 | #include 8 | #endif 9 | 10 | namespace marian { 11 | namespace filesystem { 12 | 13 | #ifdef _MSC_VER 14 | // Pretend that Windows knows no named pipes. It does, by the way, but 15 | // they seem to be different from pipes on Unix / Linux. See 16 | // https://docs.microsoft.com/en-us/windows/win32/ipc/named-pipes 17 | bool is_fifo(char const* /*path*/) { 18 | return false; 19 | } 20 | #else 21 | bool is_fifo(char const* path) { 22 | struct stat buf; 23 | stat(path, &buf); 24 | return S_ISFIFO(buf.st_mode); 25 | } 26 | #endif 27 | 28 | bool is_fifo(std::string const& path) { 29 | return is_fifo(path.c_str()); 30 | } 31 | 32 | } // end of namespace marian::filesystem 33 | } // end of namespace marian 34 | -------------------------------------------------------------------------------- /src/common/hash.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace marian { 6 | namespace util { 7 | 8 | template using hash = std::hash; 9 | 10 | // This combinator is based on boost::hash_combine, but uses 11 | // std::hash as the hash implementation. Used as a drop-in 12 | // replacement for boost::hash_combine. 13 | template 14 | inline void hash_combine(HashType& seed, T const& v) { 15 | hash hasher; 16 | seed ^= static_cast(hasher(v)) + 0x9e3779b9 + (seed<<6) + (seed>>2); 17 | } 18 | 19 | // Hash a whole chunk of memory, mostly used for diagnostics 20 | template 21 | inline HashType hashMem(const T* beg, size_t len) { 22 | HashType seed = 0; 23 | for(auto it = beg; it < beg + len; ++it) 24 | hash_combine(seed, *it); 25 | return seed; 26 | } 27 | 28 | } 29 | } -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/yaml.h: -------------------------------------------------------------------------------- 1 | #ifndef YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/parser.h" 11 | #include "yaml-cpp/emitter.h" 12 | #include "yaml-cpp/emitterstyle.h" 13 | #include "yaml-cpp/stlemitter.h" 14 | #include "yaml-cpp/exceptions.h" 15 | 16 | #include "yaml-cpp/node/node.h" 17 | #include "yaml-cpp/node/impl.h" 18 | #include "yaml-cpp/node/convert.h" 19 | #include "yaml-cpp/node/iterator.h" 20 | #include "yaml-cpp/node/detail/impl.h" 21 | #include "yaml-cpp/node/parse.h" 22 | #include "yaml-cpp/node/emit.h" 23 | 24 | #endif // YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66 25 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/null.h: -------------------------------------------------------------------------------- 1 | #ifndef NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | #include 12 | 13 | namespace YAML { 14 | class Node; 15 | 16 | struct YAML_CPP_API _Null {}; 17 | inline bool operator==(const _Null&, const _Null&) { return true; } 18 | inline bool operator!=(const _Null&, const _Null&) { return false; } 19 | 20 | YAML_CPP_API bool IsNull(const Node& node); // old API only 21 | YAML_CPP_API bool IsNullString(const std::string& str); 22 | 23 | extern YAML_CPP_API _Null Null; 24 | } 25 | 26 | #endif // NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 27 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/Makefile.mingw: -------------------------------------------------------------------------------- 1 | CXX ?= g++ 2 | CXXFLAGS = -D_WIN32_WINNT=0x600 -march=native -Wall -Wextra -Wshadow -pedantic -std=c++11 -pthread -Wl,--no-as-needed -I../include 3 | CXX_RELEASE_FLAGS = -O3 4 | CXX_DEBUG_FLAGS= -g 5 | 6 | 7 | all: example bench 8 | debug: example-debug bench-debug 9 | 10 | example: example.cpp 11 | $(CXX) example.cpp -o example $(CXXFLAGS) $(CXX_RELEASE_FLAGS) 12 | 13 | bench: bench.cpp 14 | $(CXX) bench.cpp -o bench $(CXXFLAGS) $(CXX_RELEASE_FLAGS) 15 | 16 | 17 | example-debug: example.cpp 18 | $(CXX) example.cpp -o example-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS) 19 | 20 | bench-debug: bench.cpp 21 | $(CXX) bench.cpp -o bench-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS) 22 | 23 | 24 | 25 | clean: 26 | rm -f *.o logs/*.txt example example-debug bench bench-debug 27 | 28 | 29 | rebuild: clean all 30 | rebuild-debug: clean debug 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/mark.h: -------------------------------------------------------------------------------- 1 | #ifndef MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | 12 | namespace YAML { 13 | struct YAML_CPP_API Mark { 14 | Mark() : pos(0), line(0), column(0) {} 15 | 16 | static const Mark null_mark() { return Mark(-1, -1, -1); } 17 | 18 | bool is_null() const { return pos == -1 && line == -1 && column == -1; } 19 | 20 | int pos; 21 | int line, column; 22 | 23 | private: 24 | Mark(int pos_, int line_, int column_) 25 | : pos(pos_), line(line_), column(column_) {} 26 | }; 27 | } 28 | 29 | #endif // MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 30 | -------------------------------------------------------------------------------- /src/training/deprecated/gradient_dropping/gpu/sparse_algorithm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | #include "tensors/backend.h" 5 | #include "tensors/tensor.h" 6 | 7 | namespace marian { 8 | namespace gpu { 9 | /** 10 | * @brief Output[i] is lower_bound of values[i] in data. 11 | * 12 | * @return A vector of size values.size 13 | */ 14 | std::vector lower_bounds(int* data, 15 | std::vector values, 16 | int size, 17 | DeviceId device); 18 | 19 | int buildSparse(Tensor t, float* data, int* indices); 20 | 21 | void scatterAdd(Tensor t, float* data, int* indices, int size, int offset); 22 | 23 | void scatterUpdate(Tensor t, float* data, int* indices, int size, int offset); 24 | 25 | void gather(Tensor t, float* data, int* indices, int size, int offset); 26 | } // namespace gpu 27 | } // namespace marian 28 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/Makefile.clang: -------------------------------------------------------------------------------- 1 | CXX ?= clang++ 2 | CXXFLAGS = -march=native -Wall -Wextra -Wshadow -pedantic -std=c++11 -pthread -I../include 3 | CXX_RELEASE_FLAGS = -O2 4 | CXX_DEBUG_FLAGS= -g 5 | 6 | 7 | all: example bench 8 | debug: example-debug bench-debug 9 | 10 | example: example.cpp 11 | $(CXX) example.cpp -o example-clang $(CXXFLAGS) $(CXX_RELEASE_FLAGS) 12 | 13 | bench: bench.cpp 14 | $(CXX) bench.cpp -o bench-clang $(CXXFLAGS) $(CXX_RELEASE_FLAGS) 15 | 16 | 17 | example-debug: example.cpp 18 | $(CXX) example.cpp -o example-clang-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS) 19 | 20 | bench-debug: bench.cpp 21 | $(CXX) bench.cpp -o bench-clang-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS) 22 | 23 | 24 | 25 | clean: 26 | rm -f *.o logs/*.txt example-clang example-clang-debug bench-clang bench-clang-debug 27 | 28 | 29 | rebuild: clean all 30 | rebuild-debug: clean debug 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/detail/iterator_fwd.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | #include 12 | #include 13 | #include 14 | 15 | namespace YAML { 16 | 17 | namespace detail { 18 | struct iterator_value; 19 | template 20 | class iterator_base; 21 | } 22 | 23 | typedef detail::iterator_base iterator; 24 | typedef detail::iterator_base const_iterator; 25 | } 26 | 27 | #endif // VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66 28 | -------------------------------------------------------------------------------- /src/common/file_utils.cpp: -------------------------------------------------------------------------------- 1 | #include "common/file_utils.h" 2 | #include "common/utils.h" 3 | 4 | namespace marian { 5 | namespace fileutils { 6 | 7 | void cut(const std::string& tsvIn, 8 | Ptr tsvOut, 9 | const std::vector& fields, 10 | size_t numFields, 11 | const std::string& sep /*= "\t"*/) { 12 | std::vector tsvFields(numFields); 13 | std::string line; 14 | io::InputFileStream ioIn(tsvIn); 15 | while(getline(ioIn, line)) { 16 | tsvFields.clear(); 17 | utils::splitTsv(line, tsvFields, numFields); // split tab-separated fields 18 | for(size_t i = 0; i < fields.size(); ++i) { 19 | *tsvOut << tsvFields[fields[i]]; 20 | if(i < fields.size() - 1) 21 | *tsvOut << sep; // concatenating fields with the custom separator 22 | } 23 | *tsvOut << std::endl; 24 | } 25 | }; 26 | 27 | } // namespace fileutils 28 | } // namespace marian 29 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/ptr.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | #include 12 | 13 | namespace YAML { 14 | namespace detail { 15 | class node; 16 | class node_ref; 17 | class node_data; 18 | class memory; 19 | class memory_holder; 20 | 21 | typedef std::shared_ptr shared_node; 22 | typedef std::shared_ptr shared_node_ref; 23 | typedef std::shared_ptr shared_node_data; 24 | typedef std::shared_ptr shared_memory_holder; 25 | typedef std::shared_ptr shared_memory; 26 | } 27 | } 28 | 29 | #endif // VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 30 | -------------------------------------------------------------------------------- /src/data/sentencepiece_vocab.h: -------------------------------------------------------------------------------- 1 | #ifdef USE_SENTENCEPIECE 2 | 3 | /* This should be the only place this warning is suppressed to get Windows build working. 4 | * Protobuf dependency creates W4100, which cannot be modified. 5 | */ 6 | 7 | #pragma warning(disable : 4100) 8 | 9 | #include "sentencepiece/src/sentencepiece_processor.h" 10 | #include "sentencepiece/src/sentencepiece_trainer.h" 11 | 12 | /* https://github.com/google/googletest/issues/1063#issuecomment-332518392 */ 13 | #if __GNUC__ >= 5 14 | // Disable GCC 5's -Wsuggest-override warnings in gtest 15 | # pragma GCC diagnostic push 16 | # pragma GCC diagnostic ignored "-Wsuggest-override" 17 | #endif 18 | 19 | /* Current inclusion of SentencePiece structures assume builtin-protobuf hard. 20 | * Future TODO: Make it work with standard protobuf as well. 21 | * */ 22 | #include "sentencepiece/src/builtin_pb/sentencepiece.pb.h" 23 | 24 | #if __GNUC__ >= 5 25 | # pragma GCC diagnostic pop 26 | #endif 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/embedder/vector_collector.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/options.h" 4 | #include "common/definitions.h" 5 | #include "common/file_stream.h" 6 | 7 | #include 8 | #include 9 | 10 | namespace marian { 11 | 12 | // This class manages multi-threaded writing of embedded vectors to stdout or an output file. 13 | // It will either output string versions of float vectors or binary equal length versions depending 14 | // on its binary_ flag. 15 | class VectorCollector { 16 | public: 17 | VectorCollector(const Ptr& options); 18 | virtual ~VectorCollector() {} 19 | 20 | virtual void Write(long id, const std::vector& vec); 21 | 22 | protected: 23 | long nextId_{0}; 24 | UPtr outStrm_; 25 | bool binary_; // output binary floating point vectors if set 26 | 27 | std::mutex mutex_; 28 | 29 | typedef std::map> Outputs; 30 | Outputs outputs_; 31 | 32 | virtual void WriteVector(const std::vector& vec); 33 | }; 34 | } // namespace marian 35 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/latency/g3log-crush.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | void CrusherLoop() 7 | { 8 | size_t counter = 0; 9 | while (true) 10 | { 11 | LOGF(INFO, "Some text to crush you machine. thread:"); 12 | if(++counter % 1000000 == 0) 13 | { 14 | std::cout << "Wrote " << counter << " entries" << std::endl; 15 | } 16 | } 17 | } 18 | 19 | 20 | int main(int argc, char** argv) 21 | { 22 | std::cout << "WARNING: This test will exaust all your machine memory and will crush it!" << std::endl; 23 | std::cout << "Are you sure you want to continue ? " << std::endl; 24 | char c; 25 | std::cin >> c; 26 | if (toupper( c ) != 'Y') 27 | return 0; 28 | 29 | auto worker = g3::LogWorker::createLogWorker(); 30 | auto handle= worker->addDefaultLogger(argv[0], "g3log.txt"); 31 | g3::initializeLogging(worker.get()); 32 | CrusherLoop(); 33 | 34 | return 0; 35 | } 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/msvc_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Alexander Dalshov. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #if defined(_MSC_VER) 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | namespace spdlog 19 | { 20 | namespace sinks 21 | { 22 | /* 23 | * MSVC sink (logging using OutputDebugStringA) 24 | */ 25 | template 26 | class msvc_sink : public base_sink < Mutex > 27 | { 28 | public: 29 | explicit msvc_sink() 30 | { 31 | } 32 | 33 | void flush() override 34 | { 35 | } 36 | 37 | protected: 38 | void _sink_it(const details::log_msg& msg) override 39 | { 40 | OutputDebugStringA(msg.formatted.c_str()); 41 | } 42 | }; 43 | 44 | typedef msvc_sink msvc_sink_mt; 45 | typedef msvc_sink msvc_sink_st; 46 | 47 | } 48 | } 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/msvc_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Alexander Dalshov. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #if defined(_MSC_VER) 9 | 10 | #include "spdlog/sinks/base_sink.h" 11 | #include "spdlog/details/null_mutex.h" 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | namespace spdlog 19 | { 20 | namespace sinks 21 | { 22 | /* 23 | * MSVC sink (logging using OutputDebugStringA) 24 | */ 25 | template 26 | class msvc_sink : public base_sink < Mutex > 27 | { 28 | public: 29 | explicit msvc_sink() 30 | { 31 | } 32 | 33 | 34 | 35 | protected: 36 | void _sink_it(const details::log_msg& msg) override 37 | { 38 | OutputDebugStringA(msg.formatted.c_str()); 39 | } 40 | 41 | void _flush() override 42 | {} 43 | }; 44 | 45 | typedef msvc_sink msvc_sink_mt; 46 | typedef msvc_sink msvc_sink_st; 47 | 48 | } 49 | } 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /src/rnn/attention_constructors.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "marian.h" 4 | 5 | #include "layers/factory.h" 6 | #include "rnn/attention.h" 7 | #include "rnn/constructors.h" 8 | #include "rnn/types.h" 9 | 10 | namespace marian { 11 | namespace rnn { 12 | 13 | class AttentionFactory : public InputFactory { 14 | protected: 15 | Ptr state_; 16 | 17 | public: 18 | // AttentionFactory(Ptr graph) : InputFactory(graph) {} 19 | 20 | Ptr construct(Ptr graph) override { 21 | ABORT_IF(!state_, "EncoderState not set"); 22 | return New(graph, options_, state_); 23 | } 24 | 25 | Accumulator set_state(Ptr state) { 26 | state_ = state; 27 | return Accumulator(*this); 28 | } 29 | 30 | int dimAttended() { 31 | ABORT_IF(!state_, "EncoderState not set"); 32 | return state_->getAttended()->shape()[1]; 33 | } 34 | }; 35 | 36 | typedef Accumulator attention; 37 | } // namespace rnn 38 | } // namespace marian 39 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/.gitignore: -------------------------------------------------------------------------------- 1 | # Auto generated files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | *.suo 7 | *.tlog 8 | *.ilk 9 | *.log 10 | *.pdb 11 | *.idb 12 | *.iobj 13 | *.ipdb 14 | *.opensdf 15 | *.sdf 16 | 17 | # Compiled Dynamic libraries 18 | *.so 19 | *.dylib 20 | *.dll 21 | 22 | # Compiled Static libraries 23 | *.lai 24 | *.la 25 | *.a 26 | *.lib 27 | 28 | # Executables 29 | *.exe 30 | *.out 31 | *.app 32 | 33 | # Codelite 34 | .codelite 35 | 36 | # .orig files 37 | *.orig 38 | 39 | # example files 40 | example/* 41 | !example/example.cpp 42 | !example/bench.cpp 43 | !example/utils.h 44 | !example/Makefile* 45 | !example/example.sln 46 | !example/example.vcxproj 47 | !example/CMakeLists.txt 48 | !example/multisink.cpp 49 | !example/jni 50 | 51 | # generated files 52 | generated 53 | 54 | # Cmake 55 | CMakeCache.txt 56 | CMakeFiles 57 | CMakeScripts 58 | Makefile 59 | cmake_install.cmake 60 | install_manifest.txt 61 | /tests/tests.VC.VC.opendb 62 | /tests/tests.VC.db 63 | /tests/tests 64 | /tests/logs/file_helper_test.txt 65 | -------------------------------------------------------------------------------- /src/data/batch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "common/definitions.h" 6 | 7 | namespace marian { 8 | namespace data { 9 | 10 | class Batch { 11 | public: 12 | virtual size_t size() const = 0; 13 | virtual size_t words(int /*which*/ = 0) const { return 0; }; 14 | virtual size_t width() const { return 0; }; 15 | 16 | virtual size_t sizeTrg() const { return 0; }; 17 | virtual size_t wordsTrg() const { return 0; }; 18 | virtual size_t widthTrg() const { return 0; }; 19 | 20 | virtual void debug(bool /*printIndices*/ = false) {}; 21 | 22 | virtual std::vector> split(size_t n, size_t sizeLimit = SIZE_MAX) = 0; 23 | 24 | const std::vector& getSentenceIds() const { return sentenceIds_; } 25 | void setSentenceIds(const std::vector& ids) { sentenceIds_ = ids; } 26 | 27 | virtual void setGuidedAlignment(std::vector&&) = 0; 28 | virtual void setDataWeights(const std::vector&) = 0; 29 | virtual ~Batch() {}; 30 | protected: 31 | std::vector sentenceIds_; 32 | }; 33 | } // namespace data 34 | } // namespace marian 35 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/detail/bool_type.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | namespace YAML { 11 | namespace detail { 12 | struct unspecified_bool { 13 | struct NOT_ALLOWED; 14 | static void true_value(NOT_ALLOWED*) {} 15 | }; 16 | typedef void (*unspecified_bool_type)(unspecified_bool::NOT_ALLOWED*); 17 | } 18 | } 19 | 20 | #define YAML_CPP_OPERATOR_BOOL() \ 21 | operator YAML::detail::unspecified_bool_type() const { \ 22 | return this->operator!() ? 0 \ 23 | : &YAML::detail::unspecified_bool::true_value; \ 24 | } 25 | 26 | #endif // NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 27 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/emit.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | #include "yaml-cpp/dll.h" 14 | 15 | namespace YAML { 16 | class Emitter; 17 | class Node; 18 | 19 | /** 20 | * Emits the node to the given {@link Emitter}. If there is an error in writing, 21 | * {@link Emitter#good} will return false. 22 | */ 23 | YAML_CPP_API Emitter& operator<<(Emitter& out, const Node& node); 24 | 25 | /** Emits the node to the given output stream. */ 26 | YAML_CPP_API std::ostream& operator<<(std::ostream& out, const Node& node); 27 | 28 | /** Converts the node to a YAML string. */ 29 | YAML_CPP_API std::string Dump(const Node& node); 30 | } // namespace YAML 31 | 32 | #endif // NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 33 | -------------------------------------------------------------------------------- /src/data/iterator_facade.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // simplistic replacement for boost::iterator_facade 4 | template 5 | struct IteratorFacade { 6 | // to create DummyIterator inherit from public IteratorFacade 7 | // and implement these three functions 8 | virtual bool equal(const Iterator& other) const = 0; 9 | virtual const Item& dereference() const = 0; 10 | virtual void increment() = 0; 11 | 12 | bool operator==(const Iterator& other) const { 13 | return equal(other); 14 | } 15 | 16 | bool operator!=(const Iterator& other) const { 17 | return !equal(other); 18 | } 19 | 20 | const Item& operator*() const { 21 | return dereference(); 22 | } 23 | 24 | // prefix ++ 25 | Iterator& operator++() { 26 | increment(); 27 | return dynamic_cast(*this); 28 | } 29 | 30 | // postfix ++ 31 | Iterator operator++(int) { 32 | auto ret = dynamic_cast(*this); 33 | increment(); 34 | return ret; 35 | } 36 | 37 | const Item* operator->() const { 38 | return &dereference(); 39 | } 40 | }; 41 | -------------------------------------------------------------------------------- /src/layers/weight.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/options.h" 4 | #include "data/corpus.h" 5 | #include "graph/expression_graph.h" 6 | #include "graph/expression_operators.h" 7 | #include "graph/node_initializers.h" 8 | 9 | namespace marian { 10 | 11 | class WeightingBase { 12 | public: 13 | WeightingBase(){}; 14 | virtual Expr getWeights(Ptr graph, 15 | Ptr batch) 16 | = 0; 17 | virtual void debugWeighting(std::vector /*weightedMask*/, 18 | std::vector /*freqMask*/, 19 | Ptr /*batch*/){}; 20 | virtual ~WeightingBase() {} 21 | }; 22 | 23 | class DataWeighting : public WeightingBase { 24 | protected: 25 | std::string weightingType_; 26 | 27 | public: 28 | DataWeighting(std::string weightingType) 29 | : WeightingBase(), weightingType_(weightingType){}; 30 | Expr getWeights(Ptr graph, Ptr batch) override; 31 | }; 32 | 33 | Ptr WeightingFactory(Ptr options); 34 | } // namespace marian 35 | -------------------------------------------------------------------------------- /contrib/other-builds/eclipse/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | marian 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.cdt.managedbuilder.core.genmakebuilder 10 | clean,full,incremental, 11 | 12 | 13 | 14 | 15 | org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder 16 | full,incremental, 17 | 18 | 19 | 20 | 21 | 22 | org.eclipse.cdt.core.cnature 23 | org.eclipse.cdt.core.ccnature 24 | org.eclipse.cdt.managedbuilder.core.managedBuildNature 25 | org.eclipse.cdt.managedbuilder.core.ScannerConfigNature 26 | 27 | 28 | 29 | src 30 | 2 31 | PARENT-1-PROJECT_LOC/src 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /src/optimizers/clippers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "tensors/tensor.h" 7 | 8 | namespace marian { 9 | 10 | // @TODO: modify computation graph to group all paramters in single matrix 11 | // object. 12 | // This will allow to perform a single large SGD update per batch. Currently 13 | // there 14 | // are as many updates as different parameters. 15 | 16 | class ClipperBase { 17 | public: 18 | virtual void clip(Tensor) = 0; 19 | virtual ~ClipperBase() {} 20 | }; 21 | 22 | typedef std::shared_ptr ClipperPtr; 23 | 24 | class Elementwise : public ClipperBase { 25 | public: 26 | Elementwise(float c = 10.0) : c_(c) {} 27 | 28 | void clip(Tensor t) override; 29 | 30 | private: 31 | float c_; 32 | }; 33 | 34 | class Norm : public ClipperBase { 35 | public: 36 | Norm(float c = 1.0) : c_(c) {} 37 | 38 | void clip(Tensor t) override; 39 | 40 | private: 41 | float c_; 42 | }; 43 | 44 | template 45 | ClipperBasePtr Clipper(Args&&... args) { 46 | return ClipperBasePtr(new Algorithm(args...)); 47 | } 48 | } // namespace marian 49 | -------------------------------------------------------------------------------- /.github/workflows/native-customized_marian-macos.yml: -------------------------------------------------------------------------------- 1 | name: Native (wasm-customized marian) 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ "**" ] 8 | 9 | jobs: 10 | build-macos: 11 | name: MacOS CPU-only 12 | runs-on: macos-latest 13 | 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v2 17 | with: 18 | submodules: recursive 19 | 20 | - name: Configure CMake 21 | run: | 22 | mkdir -p build 23 | cd build 24 | cmake cmake \ 25 | -DCOMPILE_CUDA=off \ 26 | -DUSE_DOXYGEN=off \ 27 | -DCOMPILE_EXAMPLES=off \ 28 | -DCOMPILE_SERVER=off \ 29 | -DCOMPILE_TESTS=off \ 30 | -DUSE_FBGEMM=off \ 31 | -DUSE_SENTENCEPIECE=on \ 32 | -DUSE_STATIC_LIBS=on \ 33 | -DUSE_MKL=off \ 34 | -DUSE_WASM_COMPATIBLE_SOURCE=on ../ 35 | 36 | - name: Compile 37 | working-directory: build 38 | run: make -j2 39 | 40 | - name: Print versions 41 | working-directory: build 42 | run: | 43 | ./marian-decoder --version 44 | 45 | -------------------------------------------------------------------------------- /scripts/contrib/inject_ctt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from __future__ import print_function 4 | 5 | import sys 6 | import argparse 7 | import numpy as np 8 | 9 | DESC = "Add 'decoder_c_tt' required by Amun to a model trained with Marian v1.6.0+" 10 | 11 | 12 | def main(): 13 | args = parse_args() 14 | 15 | print("Loading model {}".format(args.input)) 16 | model = np.load(args.input) 17 | 18 | if "decoder_c_tt" in model: 19 | print("The model already contains 'decoder_c_tt'") 20 | exit() 21 | 22 | print("Adding 'decoder_c_tt' to the model") 23 | amun = {"decoder_c_tt": np.zeros((1, 0))} 24 | for tensor_name in model: 25 | amun[tensor_name] = model[tensor_name] 26 | 27 | print("Saving model...") 28 | np.savez(args.output, **amun) 29 | 30 | 31 | def parse_args(): 32 | parser = argparse.ArgumentParser(description=DESC) 33 | parser.add_argument("-i", "--input", help="input model", required=True) 34 | parser.add_argument("-o", "--output", help="output model", required=True) 35 | return parser.parse_args() 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /src/3rd_party/phf/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014-2015 William Ahern 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to 5 | deal in the Software without restriction, including without limitation the 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008-2015 Jesse Beder. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Config files from CMake 2 | src/common/project_version.h 3 | src/common/git_revision.h 4 | src/common/build_info.cpp 5 | 6 | *.vcxproj.user 7 | /vs/x64 8 | pingme.txt 9 | /local 10 | # TODO: ^^ the correct solution for /local is to add that to some local git config, don't remember which one. Cf. mtmain. 11 | 12 | # Compiled Object files 13 | *.slo 14 | *.lo 15 | *.o 16 | *.obj 17 | 18 | # Precompiled Headers 19 | *.gch 20 | *.pch 21 | 22 | # Compiled Dynamic libraries 23 | *.so 24 | *.dylib 25 | *.dll 26 | 27 | # Fortran module files 28 | *.mod 29 | 30 | # python compiled files 31 | *.pyc 32 | 33 | # Compiled Static libraries 34 | *.lai 35 | *.la 36 | *.a 37 | *.lib 38 | 39 | # Executables 40 | *.exe 41 | *.out 42 | *.app 43 | 44 | # Temporaty files created by editors 45 | .*.sw* 46 | *~ 47 | 48 | # CMake files 49 | build 50 | build-* 51 | 52 | # Examples 53 | examples/*/*.gz 54 | examples/mnist/*ubyte 55 | 56 | # Contrib 57 | /.ycm_extra_conf.py 58 | /.vimrc 59 | /vs/MarianDll.sln 60 | /vs/MarianDll.VC.db 61 | /vs/MarianDll.VC.VC.opendb 62 | 63 | .vs 64 | .vscode 65 | 66 | -------------------------------------------------------------------------------- /vs/Marian.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.28307.902 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Marian", "Marian.vcxproj", "{E2F320FE-0C01-4C80-810C-3A92205A29DC}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {E2F320FE-0C01-4C80-810C-3A92205A29DC}.Debug|x64.ActiveCfg = Debug|x64 15 | {E2F320FE-0C01-4C80-810C-3A92205A29DC}.Debug|x64.Build.0 = Debug|x64 16 | {E2F320FE-0C01-4C80-810C-3A92205A29DC}.Release|x64.ActiveCfg = Release|x64 17 | {E2F320FE-0C01-4C80-810C-3A92205A29DC}.Release|x64.Build.0 = Release|x64 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {3B922907-3384-4D39-9CEB-816BF7BB390D} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/formatter.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | namespace spdlog 15 | { 16 | namespace details 17 | { 18 | class flag_formatter; 19 | } 20 | 21 | class formatter 22 | { 23 | public: 24 | virtual ~formatter() {} 25 | virtual void format(details::log_msg& msg) = 0; 26 | }; 27 | 28 | class pattern_formatter : public formatter 29 | { 30 | 31 | public: 32 | explicit pattern_formatter(const std::string& pattern); 33 | pattern_formatter(const pattern_formatter&) = delete; 34 | pattern_formatter& operator=(const pattern_formatter&) = delete; 35 | void format(details::log_msg& msg) override; 36 | private: 37 | const std::string _pattern; 38 | std::vector> _formatters; 39 | void handle_flag(char flag); 40 | void compile_pattern(const std::string& pattern); 41 | }; 42 | } 43 | 44 | #include 45 | 46 | -------------------------------------------------------------------------------- /src/common/config_validator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "3rd_party/yaml-cpp/yaml.h" 4 | #include "common/config_parser.h" 5 | 6 | namespace marian { 7 | 8 | class ConfigValidator { 9 | private: 10 | const YAML::Node& config_; 11 | 12 | bool has(const std::string& key) const; 13 | template 14 | T get(const std::string& key) const { 15 | return config_[key].as(); 16 | } 17 | 18 | // The option --dump-config is used, so alleviate some constraints, e.g. we don't want to require 19 | // --train-sets or --vocabs 20 | bool dumpConfigOnly_{false}; 21 | 22 | void validateOptionsTranslation() const; 23 | void validateOptionsParallelData() const; 24 | void validateOptionsScoring() const; 25 | void validateOptionsTraining() const; 26 | 27 | void validateModelExtension(cli::mode mode) const; 28 | void validateDevices(cli::mode mode) const; 29 | 30 | public: 31 | ConfigValidator(const YAML::Node& config); 32 | virtual ~ConfigValidator(); 33 | 34 | // Validate options according to the given mode. Abort on first validation error 35 | void validateOptions(cli::mode mode) const; 36 | }; 37 | 38 | } // namespace marian 39 | -------------------------------------------------------------------------------- /src/3rd_party/cnpy/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) Carl Rogers, 2011 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/3rd_party/mio/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 https://github.com/mandreyel/ 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/3rd_party/faiss/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/iterator.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | #include "yaml-cpp/node/node.h" 12 | #include "yaml-cpp/node/detail/iterator_fwd.h" 13 | #include "yaml-cpp/node/detail/iterator.h" 14 | #include 15 | #include 16 | #include 17 | 18 | namespace YAML { 19 | namespace detail { 20 | struct iterator_value : public Node, std::pair { 21 | iterator_value() {} 22 | explicit iterator_value(const Node& rhs) 23 | : Node(rhs), 24 | std::pair(Node(Node::ZombieNode), Node(Node::ZombieNode)) {} 25 | explicit iterator_value(const Node& key, const Node& value) 26 | : Node(Node::ZombieNode), std::pair(key, value) {} 27 | }; 28 | } 29 | } 30 | 31 | #endif // VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 32 | -------------------------------------------------------------------------------- /src/command/marian_vocab.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | 3 | #include "common/cli_wrapper.h" 4 | #include "common/logging.h" 5 | #include "data/vocab.h" 6 | 7 | int main(int argc, char** argv) { 8 | using namespace marian; 9 | 10 | createLoggers(); 11 | 12 | Ptr options = New(); 13 | { 14 | YAML::Node config; // @TODO: get rid of YAML::Node here entirely to avoid the pattern. Currently not fixing as it requires more changes to the Options object. 15 | auto cli = New( 16 | config, 17 | "Create a vocabulary from text corpora given on STDIN", 18 | "Allowed options", 19 | "Examples:\n" 20 | " ./marian-vocab < text.src > vocab.yml\n" 21 | " cat text.src text.trg | ./marian-vocab > vocab.yml"); 22 | cli->add("--max-size,-m", "Generate only UINT most common vocabulary items", 0); 23 | cli->parse(argc, argv); 24 | options->merge(config); 25 | } 26 | 27 | LOG(info, "Creating vocabulary..."); 28 | 29 | auto vocab = New(options, 0); 30 | vocab->create("stdout", "stdin", options->get("max-size")); 31 | 32 | LOG(info, "Finished"); 33 | 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /src/tensors/gpu/prod.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tensors/tensor.h" 4 | #include "tensors/tensor_operators.h" 5 | 6 | #include "functional/functional.h" 7 | 8 | namespace marian { 9 | namespace gpu { 10 | 11 | void Prod(marian::Tensor C, 12 | const marian::Tensor& A, 13 | const marian::Tensor& B, 14 | bool transA, 15 | bool transB, 16 | float beta = 0, 17 | float scalar = 1); 18 | 19 | void ProdBatched(marian::Tensor C, 20 | Ptr allocator, 21 | const marian::Tensor A, 22 | const marian::Tensor B, 23 | bool transA, 24 | bool transB, 25 | float beta = 0, 26 | float scalar = 1); 27 | 28 | void CSRProd(marian::Tensor C, 29 | Ptr allocator, 30 | const marian::Tensor& A_values, 31 | const marian::Tensor& A_indices, 32 | const marian::Tensor& A_offsets, 33 | const marian::Tensor& B, 34 | bool transA, 35 | bool swapOperands, 36 | float beta = 0); 37 | } // namespace gpu 38 | } // namespace marian 39 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/base_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // base sink templated over a mutex (either dummy or realy) 9 | // concrete implementation should only overrid the _sink_it method. 10 | // all locking is taken care of here so no locking needed by the implementors.. 11 | // 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | namespace spdlog 21 | { 22 | namespace sinks 23 | { 24 | template 25 | class base_sink:public sink 26 | { 27 | public: 28 | base_sink():_mutex() {} 29 | virtual ~base_sink() = default; 30 | 31 | base_sink(const base_sink&) = delete; 32 | base_sink& operator=(const base_sink&) = delete; 33 | 34 | void log(const details::log_msg& msg) override 35 | { 36 | std::lock_guard lock(_mutex); 37 | _sink_it(msg); 38 | } 39 | 40 | protected: 41 | virtual void _sink_it(const details::log_msg& msg) = 0; 42 | Mutex _mutex; 43 | }; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/details/log_msg.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/common.h" 9 | #include "spdlog/details/os.h" 10 | 11 | 12 | #include 13 | #include 14 | 15 | namespace spdlog 16 | { 17 | namespace details 18 | { 19 | struct log_msg 20 | { 21 | log_msg() = default; 22 | log_msg(const std::string *loggers_name, level::level_enum lvl) : 23 | logger_name(loggers_name), 24 | level(lvl), 25 | msg_id(0) 26 | { 27 | #ifndef SPDLOG_NO_DATETIME 28 | time = os::now(); 29 | #endif 30 | 31 | #ifndef SPDLOG_NO_THREAD_ID 32 | thread_id = os::thread_id(); 33 | #endif 34 | } 35 | 36 | log_msg(const log_msg& other) = delete; 37 | log_msg& operator=(log_msg&& other) = delete; 38 | log_msg(log_msg&& other) = delete; 39 | 40 | 41 | const std::string *logger_name; 42 | level::level_enum level; 43 | log_clock::time_point time; 44 | size_t thread_id; 45 | fmt::MemoryWriter raw; 46 | fmt::MemoryWriter formatted; 47 | size_t msg_id; 48 | }; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /cmake/FindTcmalloc.cmake: -------------------------------------------------------------------------------- 1 | # - Find Tcmalloc 2 | # Find the native Tcmalloc includes and library 3 | # 4 | # Tcmalloc_INCLUDE_DIR - where to find Tcmalloc.h, etc. 5 | # Tcmalloc_LIBRARIES - List of libraries when using Tcmalloc. 6 | # Tcmalloc_FOUND - True if Tcmalloc found. 7 | 8 | find_path(Tcmalloc_INCLUDE_DIR google/tcmalloc.h) 9 | 10 | if (USE_TCMALLOC) 11 | set(Tcmalloc_NAMES tcmalloc) 12 | else () 13 | set(Tcmalloc_NAMES tcmalloc_minimal tcmalloc) 14 | endif () 15 | 16 | find_library(Tcmalloc_LIBRARY NAMES ${Tcmalloc_NAMES}) 17 | 18 | if (Tcmalloc_INCLUDE_DIR AND Tcmalloc_LIBRARY) 19 | set(Tcmalloc_FOUND TRUE) 20 | set( Tcmalloc_LIBRARIES ${Tcmalloc_LIBRARY} ) 21 | else () 22 | set(Tcmalloc_FOUND FALSE) 23 | set( Tcmalloc_LIBRARIES ) 24 | endif () 25 | 26 | if (Tcmalloc_FOUND) 27 | message(STATUS "Found Tcmalloc: ${Tcmalloc_LIBRARY}") 28 | else () 29 | message(STATUS "Not Found Tcmalloc") 30 | if (Tcmalloc_FIND_REQUIRED) 31 | message(STATUS "Looked for Tcmalloc libraries named ${Tcmalloc_NAMES}.") 32 | message(FATAL_ERROR "Could NOT find Tcmalloc library") 33 | endif () 34 | endif () 35 | 36 | mark_as_advanced( 37 | Tcmalloc_LIBRARY 38 | Tcmalloc_INCLUDE_DIR 39 | ) -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/contrib/anchordict.h: -------------------------------------------------------------------------------- 1 | #ifndef ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | #include "../anchor.h" 13 | 14 | namespace YAML { 15 | /** 16 | * An object that stores and retrieves values correlating to {@link anchor_t} 17 | * values. 18 | * 19 | *

Efficient implementation that can make assumptions about how 20 | * {@code anchor_t} values are assigned by the {@link Parser} class. 21 | */ 22 | template 23 | class AnchorDict { 24 | public: 25 | void Register(anchor_t anchor, T value) { 26 | if (anchor > m_data.size()) { 27 | m_data.resize(anchor); 28 | } 29 | m_data[anchor - 1] = value; 30 | } 31 | 32 | T Get(anchor_t anchor) const { return m_data[anchor - 1]; } 33 | 34 | private: 35 | std::vector m_data; 36 | }; 37 | } 38 | 39 | #endif // ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 40 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | 7 | #pragma once 8 | 9 | #include "spdlog/details/log_msg.h" 10 | 11 | namespace spdlog 12 | { 13 | namespace sinks 14 | { 15 | class sink 16 | { 17 | public: 18 | sink() 19 | { 20 | _level = level::trace; 21 | } 22 | 23 | virtual ~sink() {} 24 | virtual void log(const details::log_msg& msg) = 0; 25 | virtual void flush() = 0; 26 | 27 | bool should_log(level::level_enum msg_level) const; 28 | void set_level(level::level_enum log_level); 29 | level::level_enum level() const; 30 | 31 | private: 32 | level_t _level; 33 | 34 | }; 35 | 36 | inline bool sink::should_log(level::level_enum msg_level) const 37 | { 38 | return msg_level >= _level.load(std::memory_order_relaxed); 39 | } 40 | 41 | inline void sink::set_level(level::level_enum log_level) 42 | { 43 | _level.store(log_level); 44 | } 45 | 46 | inline level::level_enum sink::level() const 47 | { 48 | return static_cast(_level.load(std::memory_order_relaxed)); 49 | } 50 | 51 | } 52 | } 53 | 54 | -------------------------------------------------------------------------------- /src/3rd_party/zstr/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Matei David, Ontario Institute for Cancer Research 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Marcin Junczys-Dowmunt, the University of Edinburgh, Adam 4 | Mickiewicz University 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Gabi Melman. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/glog-bench-mt.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "glog/logging.h" 11 | 12 | using namespace std; 13 | 14 | int main(int argc, char* argv[]) 15 | { 16 | 17 | int thread_count = 10; 18 | if(argc > 1) 19 | thread_count = atoi(argv[1]); 20 | 21 | int howmany = 1000000; 22 | 23 | FLAGS_logtostderr = 0; 24 | FLAGS_log_dir = "logs"; 25 | google::InitGoogleLogging(argv[0]); 26 | 27 | std::atomic msg_counter {0}; 28 | vector threads; 29 | 30 | for (int t = 0; t < thread_count; ++t) 31 | { 32 | threads.push_back(std::thread([&]() 33 | { 34 | while (true) 35 | { 36 | int counter = ++msg_counter; 37 | if (counter > howmany) break; 38 | LOG(INFO) << "glog message #" << counter << ": This is some text for your pleasure"; 39 | } 40 | })); 41 | } 42 | 43 | 44 | for(auto &t:threads) 45 | { 46 | t.join(); 47 | }; 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /src/tests/units/utils_tests.cpp: -------------------------------------------------------------------------------- 1 | #include "catch.hpp" 2 | #include "common/utils.h" 3 | 4 | using namespace marian; 5 | 6 | TEST_CASE("utils::splitTsv", "[utils]") { 7 | std::string line1 = "foo bar"; 8 | std::string line2 = "foo bar\tbazz"; 9 | std::string line3 = "foo bar\tbazz\tfoo quux"; 10 | 11 | std::vector fields; 12 | 13 | SECTION("the tab-separated input is split") { 14 | utils::splitTsv(line1, fields, 1); 15 | CHECK( fields.size() == 1 ); 16 | CHECK( fields[0] == "foo bar" ); 17 | 18 | utils::splitTsv(line3, fields, 3); 19 | CHECK( fields == std::vector({"foo bar", "bazz", "foo quux"}) ); 20 | } 21 | 22 | SECTION("the output has at least as many elements as requested") { 23 | utils::splitTsv(line1, fields, 1); 24 | CHECK( fields.size() == 1 ); 25 | 26 | utils::splitTsv(line1, fields, 3); 27 | CHECK( fields.size() == 3 ); 28 | CHECK( fields == std::vector({"foo bar", "", ""}) ); 29 | 30 | utils::splitTsv(line1, fields, 2); 31 | CHECK( fields.size() == 2 ); 32 | CHECK( fields == std::vector({"foo bar", ""}) ); 33 | } 34 | 35 | //SECTION("excessive tab-separated fields abort the execution") {} 36 | } 37 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/regex_yaml.cpp: -------------------------------------------------------------------------------- 1 | #include "regex_yaml.h" 2 | 3 | namespace YAML { 4 | // constructors 5 | RegEx::RegEx() : m_op(REGEX_EMPTY) {} 6 | 7 | RegEx::RegEx(REGEX_OP op) : m_op(op) {} 8 | 9 | RegEx::RegEx(char ch) : m_op(REGEX_MATCH), m_a(ch) {} 10 | 11 | RegEx::RegEx(char a, char z) : m_op(REGEX_RANGE), m_a(a), m_z(z) {} 12 | 13 | RegEx::RegEx(const std::string& str, REGEX_OP op) : m_op(op) { 14 | for (std::size_t i = 0; i < str.size(); i++) 15 | m_params.push_back(RegEx(str[i])); 16 | } 17 | 18 | // combination constructors 19 | RegEx operator!(const RegEx& ex) { 20 | RegEx ret(REGEX_NOT); 21 | ret.m_params.push_back(ex); 22 | return ret; 23 | } 24 | 25 | RegEx operator||(const RegEx& ex1, const RegEx& ex2) { 26 | RegEx ret(REGEX_OR); 27 | ret.m_params.push_back(ex1); 28 | ret.m_params.push_back(ex2); 29 | return ret; 30 | } 31 | 32 | RegEx operator&&(const RegEx& ex1, const RegEx& ex2) { 33 | RegEx ret(REGEX_AND); 34 | ret.m_params.push_back(ex1); 35 | ret.m_params.push_back(ex2); 36 | return ret; 37 | } 38 | 39 | RegEx operator+(const RegEx& ex1, const RegEx& ex2) { 40 | RegEx ret(REGEX_SEQ); 41 | ret.m_params.push_back(ex1); 42 | ret.m_params.push_back(ex2); 43 | return ret; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/translator/helpers.cpp: -------------------------------------------------------------------------------- 1 | /* All or part of this file was contributed by Intel under license: 2 | * Copyright (C) 2017-2018 Intel Corporation 3 | * SPDX-License-Identifier: MIT 4 | */ 5 | 6 | #include 7 | 8 | #include "data/types.h" 9 | #include "tensors/tensor.h" 10 | #include "translator/helpers.h" 11 | 12 | namespace marian { 13 | 14 | namespace cpu { 15 | 16 | void SetColumn(Tensor in_, size_t col, float value) { 17 | int nRows = in_->shape().elements() / in_->shape()[-1]; 18 | int nColumns = in_->shape()[-1]; 19 | 20 | float* in = in_->data(); 21 | for(int rowNumber = 0; rowNumber < nRows; ++rowNumber) { 22 | auto index = col + rowNumber * nColumns; 23 | in[index] = value; 24 | } 25 | } 26 | 27 | void suppressWord(Expr logProbs, WordIndex wordIndex) { 28 | SetColumn(logProbs->val(), wordIndex, std::numeric_limits::lowest()); 29 | } 30 | } // namespace cpu 31 | 32 | void suppressWord(Expr logProbs, WordIndex wordIndex) { 33 | if(logProbs->val()->getBackend()->getDeviceId().type == DeviceType::cpu) { 34 | cpu::suppressWord(logProbs, wordIndex); 35 | } 36 | #ifdef CUDA_FOUND 37 | else { 38 | gpu::suppressWord(logProbs, wordIndex); 39 | } 40 | #endif 41 | } 42 | } // namespace marian 43 | -------------------------------------------------------------------------------- /cmake/FindNCCL.cmake: -------------------------------------------------------------------------------- 1 | set(NCCL_INC_PATHS 2 | /usr/include 3 | /usr/local/include 4 | /usr/local/cuda/include 5 | $ENV{NCCL_DIR}/include 6 | $ENV{CUDA_TOOLKIT_ROOT_DIRCUDA_ROOT}/include 7 | ) 8 | 9 | set(NCCL_LIB_PATHS 10 | /lib 11 | /lib64 12 | /usr/lib 13 | /usr/lib64 14 | /usr/local/lib 15 | /usr/local/lib64 16 | /usr/local/cuda/lib64 17 | $ENV{NCCL_DIR}/lib64 18 | $ENV{CUDA_TOOLKIT_ROOT_DIR}/lib64 19 | /usr/local/cuda/lib 20 | $ENV{NCCL_DIR}/lib 21 | $ENV{CUDA_TOOLKIT_ROOT_DIR}/lib 22 | ) 23 | 24 | find_path(NCCL_INCLUDE_DIR NAMES nccl.h PATHS ${NCCL_INC_PATHS}) 25 | 26 | if (USE_STATIC_LIBS) 27 | message(STATUS "Trying to find static NCCL library") 28 | find_library(NCCL_LIBRARIES NAMES libnccl_static.a PATHS ${NCCL_LIB_PATHS}) 29 | else (USE_STATIC_LIBS) 30 | find_library(NCCL_LIBRARIES NAMES nccl PATHS ${NCCL_LIB_PATHS}) 31 | endif (USE_STATIC_LIBS) 32 | 33 | include(FindPackageHandleStandardArgs) 34 | find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIR NCCL_LIBRARIES) 35 | 36 | if (NCCL_FOUND) 37 | message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIR}, library: ${NCCL_LIBRARIES})") 38 | mark_as_advanced(NCCL_INCLUDE_DIR NCCL_LIBRARIES) 39 | endif () 40 | -------------------------------------------------------------------------------- /src/common/cli_helper.cpp: -------------------------------------------------------------------------------- 1 | #include "common/cli_helper.h" 2 | #include "common/filesystem.h" 3 | 4 | namespace marian { 5 | namespace cli { 6 | 7 | void makeAbsolutePaths(YAML::Node& config, 8 | const std::string& configPath, 9 | const std::set& PATHS) { 10 | auto configDir = filesystem::Path{configPath}.parentPath(); 11 | 12 | auto transformFunc = [&](const std::string& nodePath) -> std::string { 13 | // Catch stdin/stdout and do not process 14 | if(nodePath == "stdin" || nodePath == "stdout") 15 | return nodePath; 16 | 17 | // replace relative path w.r.t. config directory 18 | try { 19 | return canonical(filesystem::Path{nodePath}, configDir).string(); 20 | } catch(filesystem::FilesystemError& e) { 21 | // will fail if file does not exist; use parent in that case 22 | std::cerr << e.what() << std::endl; 23 | auto parentPath = filesystem::Path{nodePath}.parentPath(); 24 | return (canonical(parentPath, configDir) 25 | / filesystem::Path{nodePath}.filename()) 26 | .string(); 27 | } 28 | }; 29 | 30 | processPaths(config, transformFunc, PATHS); 31 | } 32 | 33 | } // namespace cli 34 | } // namespace marian 35 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/indentation.h: -------------------------------------------------------------------------------- 1 | #ifndef INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | #include "yaml-cpp/ostream_wrapper.h" 14 | 15 | namespace YAML { 16 | struct Indentation { 17 | Indentation(std::size_t n_) : n(n_) {} 18 | std::size_t n; 19 | }; 20 | 21 | inline ostream_wrapper& operator<<(ostream_wrapper& out, 22 | const Indentation& indent) { 23 | for (std::size_t i = 0; i < indent.n; i++) 24 | out << ' '; 25 | return out; 26 | } 27 | 28 | struct IndentTo { 29 | IndentTo(std::size_t n_) : n(n_) {} 30 | std::size_t n; 31 | }; 32 | 33 | inline ostream_wrapper& operator<<(ostream_wrapper& out, 34 | const IndentTo& indent) { 35 | while (out.col() < indent.n) 36 | out << ' '; 37 | return out; 38 | } 39 | } 40 | 41 | #endif // INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66 42 | -------------------------------------------------------------------------------- /src/tests/logger.cpp: -------------------------------------------------------------------------------- 1 | #include "common/timer.h" 2 | #include "common/logging.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | // small test program for playing around with spdlog formatting of messages 9 | 10 | std::shared_ptr stderrLoggerTest( 11 | const std::string& name, 12 | const std::string& pattern, 13 | const std::vector& files) { 14 | std::vector sinks; 15 | 16 | auto stderr_sink = spdlog::sinks::stderr_sink_mt::instance(); 17 | sinks.push_back(stderr_sink); 18 | 19 | for(auto&& file : files) { 20 | auto file_sink 21 | = std::make_shared(file, true); 22 | sinks.push_back(file_sink); 23 | } 24 | 25 | auto logger 26 | = std::make_shared(name, begin(sinks), end(sinks)); 27 | 28 | spdlog::register_logger(logger); 29 | logger->set_pattern(pattern); 30 | return logger; 31 | } 32 | 33 | int main() { 34 | std::vector logfiles; 35 | Logger info(stderrLoggerTest("info", "[%Y-%m-%d %T] %v", logfiles)); 36 | 37 | info->info("hello {:06.2f}", .7); 38 | 39 | marian::timer::Timer timer; 40 | 41 | info->info("time is {:.5f} bla {:.2f}", timer.elapsed(), .7); 42 | } 43 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/detail/memory.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | #include "yaml-cpp/dll.h" 13 | #include "yaml-cpp/node/ptr.h" 14 | 15 | namespace YAML { 16 | namespace detail { 17 | class node; 18 | } // namespace detail 19 | } // namespace YAML 20 | 21 | namespace YAML { 22 | namespace detail { 23 | class YAML_CPP_API memory { 24 | public: 25 | node& create_node(); 26 | void merge(const memory& rhs); 27 | 28 | private: 29 | typedef std::set Nodes; 30 | Nodes m_nodes; 31 | }; 32 | 33 | class YAML_CPP_API memory_holder { 34 | public: 35 | memory_holder() : m_pMemory(new memory) {} 36 | 37 | node& create_node() { return m_pMemory->create_node(); } 38 | void merge(memory_holder& rhs); 39 | 40 | private: 41 | shared_memory m_pMemory; 42 | }; 43 | } 44 | } 45 | 46 | #endif // VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66 47 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "examples"] 2 | path = examples 3 | url = https://github.com/marian-nmt/marian-examples 4 | [submodule "regression-tests"] 5 | path = regression-tests 6 | url = https://github.com/marian-nmt/marian-regression-tests 7 | [submodule "src/3rd_party/sentencepiece"] 8 | path = src/3rd_party/sentencepiece 9 | url = https://github.com/browsermt/sentencepiece 10 | [submodule "src/3rd_party/nccl"] 11 | path = src/3rd_party/nccl 12 | url = https://github.com/marian-nmt/nccl 13 | [submodule "src/3rd_party/fbgemm"] 14 | path = src/3rd_party/fbgemm 15 | url = https://github.com/marian-nmt/FBGEMM 16 | branch = master 17 | [submodule "src/3rd_party/intgemm"] 18 | path = src/3rd_party/intgemm 19 | url = https://github.com/kpu/intgemm 20 | [submodule "src/3rd_party/simple-websocket-server"] 21 | path = src/3rd_party/simple-websocket-server 22 | url = https://github.com/marian-nmt/Simple-WebSocket-Server 23 | [submodule "src/3rd_party/onnxjs"] 24 | path = src/3rd_party/onnxjs 25 | url = https://github.com/browsermt/onnxjs 26 | [submodule "src/3rd_party/ruy"] 27 | path = src/3rd_party/ruy 28 | url = https://github.com/google/ruy 29 | [submodule "src/3rd_party/simd_utils"] 30 | path = src/3rd_party/simd_utils 31 | url = https://github.com/browsermt/simd_utils.git 32 | -------------------------------------------------------------------------------- /src/translator/output_printer.cpp: -------------------------------------------------------------------------------- 1 | #include "output_printer.h" 2 | 3 | #include 4 | 5 | namespace marian { 6 | 7 | std::string OutputPrinter::getAlignment(const Hypothesis::PtrType& hyp) { 8 | data::SoftAlignment align; 9 | auto last = hyp; 10 | // get soft alignments for each target word starting from the last one 11 | while(last->getPrevHyp().get() != nullptr) { 12 | align.push_back(last->getAlignment()); 13 | last = last->getPrevHyp(); 14 | } 15 | 16 | // reverse alignments 17 | std::reverse(align.begin(), align.end()); 18 | 19 | if(alignment_ == "soft") { 20 | return data::SoftAlignToString(align); 21 | } else if(alignment_ == "hard") { 22 | return data::ConvertSoftAlignToHardAlign(align, 1.f).toString(); 23 | } else if(alignmentThreshold_ > 0.f) { 24 | return data::ConvertSoftAlignToHardAlign(align, alignmentThreshold_).toString(); 25 | } else { 26 | ABORT("Unrecognized word alignment type"); 27 | } 28 | } 29 | 30 | std::string OutputPrinter::getWordScores(const Hypothesis::PtrType& hyp) { 31 | std::ostringstream scores; 32 | scores.precision(5); 33 | for(const auto& score : hyp->tracebackWordScores()) 34 | scores << " " << std::fixed << score; 35 | return scores.str(); 36 | } 37 | 38 | } // namespace marian 39 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/ostream_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace spdlog 15 | { 16 | namespace sinks 17 | { 18 | template 19 | class ostream_sink: public base_sink 20 | { 21 | public: 22 | explicit ostream_sink(std::ostream& os, bool force_flush=false) :_ostream(os), _force_flush(force_flush) {} 23 | ostream_sink(const ostream_sink&) = delete; 24 | ostream_sink& operator=(const ostream_sink&) = delete; 25 | virtual ~ostream_sink() = default; 26 | 27 | protected: 28 | void _sink_it(const details::log_msg& msg) override 29 | { 30 | _ostream.write(msg.formatted.data(), msg.formatted.size()); 31 | if (_force_flush) 32 | _ostream.flush(); 33 | } 34 | 35 | void flush() override 36 | { 37 | _ostream.flush(); 38 | } 39 | 40 | std::ostream& _ostream; 41 | bool _force_flush; 42 | }; 43 | 44 | typedef ostream_sink ostream_sink_mt; 45 | typedef ostream_sink ostream_sink_st; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/collectionstack.h: -------------------------------------------------------------------------------- 1 | #ifndef COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | namespace YAML { 14 | struct CollectionType { 15 | enum value { NoCollection, BlockMap, BlockSeq, FlowMap, FlowSeq, CompactMap }; 16 | }; 17 | 18 | class CollectionStack { 19 | public: 20 | CollectionType::value GetCurCollectionType() const { 21 | if (collectionStack.empty()) 22 | return CollectionType::NoCollection; 23 | return collectionStack.top(); 24 | } 25 | 26 | void PushCollectionType(CollectionType::value type) { 27 | collectionStack.push(type); 28 | } 29 | void PopCollectionType(CollectionType::value type) { 30 | assert(type == GetCurCollectionType()); type; 31 | collectionStack.pop(); 32 | } 33 | 34 | private: 35 | std::stack collectionStack; 36 | }; 37 | } 38 | 39 | #endif // COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 40 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/example.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.25420.1 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example", "example.vcxproj", "{9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Debug|x64 = Debug|x64 12 | Release|Win32 = Release|Win32 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Debug|Win32.ActiveCfg = Debug|Win32 17 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Debug|Win32.Build.0 = Debug|Win32 18 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Debug|x64.ActiveCfg = Debug|Win32 19 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Release|Win32.ActiveCfg = Release|Win32 20 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Release|Win32.Build.0 = Release|Win32 21 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Release|x64.ActiveCfg = Release|Win32 22 | EndGlobalSection 23 | GlobalSection(SolutionProperties) = preSolution 24 | HideSolutionNode = FALSE 25 | EndGlobalSection 26 | EndGlobal 27 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/fmt/bundled/ostream.cc: -------------------------------------------------------------------------------- 1 | /* 2 | Formatting library for C++ - std::ostream support 3 | 4 | Copyright (c) 2012 - 2016, Victor Zverovich 5 | All rights reserved. 6 | 7 | For the license information refer to format.h. 8 | */ 9 | 10 | #include "ostream.h" 11 | 12 | namespace fmt { 13 | 14 | namespace { 15 | // Write the content of w to os. 16 | void write(std::ostream &os, Writer &w) { 17 | const char *data = w.data(); 18 | typedef internal::MakeUnsigned::Type UnsignedStreamSize; 19 | UnsignedStreamSize size = w.size(); 20 | UnsignedStreamSize max_size = 21 | internal::to_unsigned((std::numeric_limits::max)()); 22 | do { 23 | UnsignedStreamSize n = size <= max_size ? size : max_size; 24 | os.write(data, static_cast(n)); 25 | data += n; 26 | size -= n; 27 | } while (size != 0); 28 | } 29 | } 30 | 31 | FMT_FUNC void print(std::ostream &os, CStringRef format_str, ArgList args) { 32 | MemoryWriter w; 33 | w.write(format_str, args); 34 | write(os, w); 35 | } 36 | 37 | FMT_FUNC int fprintf(std::ostream &os, CStringRef format, ArgList args) { 38 | MemoryWriter w; 39 | printf(w, format, args); 40 | write(os, w); 41 | return static_cast(w.size()); 42 | } 43 | } // namespace fmt 44 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/ostream_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/details/null_mutex.h" 9 | #include "spdlog/sinks/base_sink.h" 10 | 11 | #include 12 | #include 13 | 14 | namespace spdlog 15 | { 16 | namespace sinks 17 | { 18 | template 19 | class ostream_sink: public base_sink 20 | { 21 | public: 22 | explicit ostream_sink(std::ostream& os, bool force_flush=false) :_ostream(os), _force_flush(force_flush) {} 23 | ostream_sink(const ostream_sink&) = delete; 24 | ostream_sink& operator=(const ostream_sink&) = delete; 25 | virtual ~ostream_sink() = default; 26 | 27 | protected: 28 | void _sink_it(const details::log_msg& msg) override 29 | { 30 | _ostream.write(msg.formatted.data(), msg.formatted.size()); 31 | if (_force_flush) 32 | _ostream.flush(); 33 | } 34 | 35 | void _flush() override 36 | { 37 | _ostream.flush(); 38 | } 39 | 40 | std::ostream& _ostream; 41 | bool _force_flush; 42 | }; 43 | 44 | typedef ostream_sink ostream_sink_mt; 45 | typedef ostream_sink ostream_sink_st; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /scripts/server/client_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from __future__ import print_function, unicode_literals, division 4 | 5 | import sys 6 | import time 7 | import argparse 8 | 9 | # pip install websocket_client 10 | from websocket import create_connection 11 | 12 | 13 | if __name__ == "__main__": 14 | # handle command-line options 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("-b", "--batch-size", type=int, default=1) 17 | parser.add_argument("-p", "--port", type=int, default=8080) 18 | args = parser.parse_args() 19 | 20 | # open connection 21 | ws = create_connection("ws://localhost:{}/translate".format(args.port)) 22 | 23 | count = 0 24 | batch = "" 25 | for line in sys.stdin: 26 | count += 1 27 | batch += line.decode('utf-8') if sys.version_info < (3, 0) else line 28 | if count == args.batch_size: 29 | # translate the batch 30 | ws.send(batch) 31 | result = ws.recv() 32 | print(result.rstrip()) 33 | 34 | count = 0 35 | batch = "" 36 | 37 | if count: 38 | # translate the remaining sentences 39 | ws.send(batch) 40 | result = ws.recv() 41 | print(result.rstrip()) 42 | 43 | # close connection 44 | ws.close() 45 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/utils.cpp: -------------------------------------------------------------------------------- 1 | #include "includes.h" 2 | 3 | 4 | void prepare_logdir() 5 | { 6 | spdlog::drop_all(); 7 | #ifdef _WIN32 8 | system("if not exist logs mkdir logs"); 9 | system("del /F /Q logs\\*"); 10 | #else 11 | auto rv = system("mkdir -p logs"); 12 | rv = system("rm -f logs/*"); 13 | (void)rv; 14 | #endif 15 | } 16 | 17 | 18 | std::string file_contents(const std::string& filename) 19 | { 20 | std::ifstream ifs(filename); 21 | if (!ifs) 22 | throw std::runtime_error("Failed open file "); 23 | return std::string((std::istreambuf_iterator(ifs)), 24 | (std::istreambuf_iterator())); 25 | 26 | } 27 | 28 | std::size_t count_lines(const std::string& filename) 29 | { 30 | std::ifstream ifs(filename); 31 | if (!ifs) 32 | throw std::runtime_error("Failed open file "); 33 | 34 | std::string line; 35 | size_t counter = 0; 36 | while(std::getline(ifs, line)) 37 | counter++; 38 | return counter; 39 | } 40 | 41 | std::size_t get_filesize(const std::string& filename) 42 | { 43 | std::ifstream ifs(filename, std::ifstream::ate | std::ifstream::binary); 44 | if (!ifs) 45 | throw std::runtime_error("Failed open file "); 46 | 47 | return static_cast(ifs.tellg()); 48 | } 49 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/easylogging-bench-mt.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #define _ELPP_THREAD_SAFE 11 | #include "easylogging++.h" 12 | _INITIALIZE_EASYLOGGINGPP 13 | 14 | using namespace std; 15 | 16 | int main(int argc, char* argv[]) 17 | { 18 | 19 | int thread_count = 10; 20 | if(argc > 1) 21 | thread_count = atoi(argv[1]); 22 | 23 | int howmany = 1000000; 24 | 25 | // Load configuration from file 26 | el::Configurations conf("easyl.conf"); 27 | el::Loggers::reconfigureLogger("default", conf); 28 | 29 | std::atomic msg_counter {0}; 30 | vector threads; 31 | 32 | for (int t = 0; t < thread_count; ++t) 33 | { 34 | threads.push_back(std::thread([&]() 35 | { 36 | while (true) 37 | { 38 | int counter = ++msg_counter; 39 | if (counter > howmany) break; 40 | LOG(INFO) << "easylog message #" << counter << ": This is some text for your pleasure"; 41 | } 42 | })); 43 | } 44 | 45 | 46 | for(auto &t:threads) 47 | { 48 | t.join(); 49 | }; 50 | 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/formatter.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/details/log_msg.h" 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | namespace spdlog 15 | { 16 | namespace details 17 | { 18 | class flag_formatter; 19 | } 20 | 21 | class formatter 22 | { 23 | public: 24 | virtual ~formatter() {} 25 | virtual void format(details::log_msg& msg) = 0; 26 | }; 27 | 28 | class pattern_formatter SPDLOG_FINAL : public formatter 29 | { 30 | 31 | public: 32 | explicit pattern_formatter(const std::string& pattern, pattern_time_type pattern_time = pattern_time_type::local); 33 | pattern_formatter(const pattern_formatter&) = delete; 34 | pattern_formatter& operator=(const pattern_formatter&) = delete; 35 | void format(details::log_msg& msg) override; 36 | private: 37 | const std::string _pattern; 38 | const pattern_time_type _pattern_time; 39 | std::vector> _formatters; 40 | std::tm get_time(details::log_msg& msg); 41 | void handle_flag(char flag); 42 | void compile_pattern(const std::string& pattern); 43 | }; 44 | } 45 | 46 | #include "spdlog/details/pattern_formatter_impl.h" 47 | 48 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/base_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // base sink templated over a mutex (either dummy or real) 9 | // concrete implementation should only override the _sink_it method. 10 | // all locking is taken care of here so no locking needed by the implementers.. 11 | // 12 | 13 | #include "spdlog/sinks/sink.h" 14 | #include "spdlog/formatter.h" 15 | #include "spdlog/common.h" 16 | #include "spdlog/details/log_msg.h" 17 | 18 | #include 19 | 20 | namespace spdlog 21 | { 22 | namespace sinks 23 | { 24 | template 25 | class base_sink:public sink 26 | { 27 | public: 28 | base_sink():_mutex() {} 29 | virtual ~base_sink() = default; 30 | 31 | base_sink(const base_sink&) = delete; 32 | base_sink& operator=(const base_sink&) = delete; 33 | 34 | void log(const details::log_msg& msg) SPDLOG_FINAL override 35 | { 36 | std::lock_guard lock(_mutex); 37 | _sink_it(msg); 38 | } 39 | void flush() SPDLOG_FINAL override 40 | { 41 | _flush(); 42 | } 43 | 44 | protected: 45 | virtual void _sink_it(const details::log_msg& msg) = 0; 46 | virtual void _flush() = 0; 47 | Mutex _mutex; 48 | }; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/layers/convolution.cpp: -------------------------------------------------------------------------------- 1 | #include "layers/convolution.h" 2 | #include "graph/node_operators_binary.h" 3 | 4 | namespace marian { 5 | 6 | #ifdef CUDNN 7 | Convolution::Convolution(Ptr graph) {} 8 | 9 | Expr Convolution::apply(Expr x) { 10 | auto graph = x->graph(); 11 | 12 | auto prefix = opt("prefix"); 13 | auto kernelDims = opt>("kernel-dims"); 14 | auto kernelNum = opt("kernel-num"); 15 | auto paddings = opt>("paddings", std::make_pair(0, 0)); 16 | auto strides = opt>("strides", std::make_pair(1, 1)); 17 | 18 | int layerIn = x->shape()[1]; 19 | auto kernel 20 | = graph->param(prefix + "_conv_kernels", 21 | {layerIn, kernelNum, kernelDims.first, kernelDims.second}, 22 | inits::glorotUniform()); 23 | 24 | auto bias = graph->param( 25 | prefix + "_conv_bias", {1, kernelNum, 1, 1}, inits::zeros()); 26 | 27 | std::vector nodes = {x, kernel, bias}; 28 | return Expression( 29 | nodes, paddings.first, paddings.second, strides.first, strides.second); 30 | } 31 | 32 | Expr Convolution::apply(const std::vector&) { 33 | ABORT("Can't apply convolution on many inputs at once"); 34 | return nullptr; 35 | } 36 | #endif 37 | 38 | } // namespace marian 39 | -------------------------------------------------------------------------------- /src/training/deprecated/graph_group_async_drop.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "training/graph_group_async.h" 4 | 5 | #include "training/gradient_dropping/dropper.h" 6 | #include "training/gradient_dropping/sparse_tensor.h" 7 | 8 | namespace marian { 9 | 10 | class AsyncGraphGroupDrop : public AsyncGraphGroup { 11 | std::vector fetchStep_; 12 | std::vector pushStep_; 13 | std::vector fetch_ready; 14 | 15 | bool drop_first = 1; 16 | 17 | size_t dropping_warmup; 18 | float droping_rate; 19 | float dropping_momentum; 20 | 21 | std::vector> droppers_; 22 | 23 | std::vector> sparseGrads_, sparseShards_; 24 | 25 | protected: 26 | void init(Ptr batch) override; 27 | void pushGradients(Tensor newGrads, int device_id) override; 28 | void fetchParams(Tensor oldParams, 29 | const std::vector& params, 30 | int device_id) override; 31 | 32 | public: 33 | AsyncGraphGroupDrop(Ptr options, Ptr mpi) 34 | : AsyncGraphGroup(options, mpi), 35 | dropping_warmup{options->get("grad-dropping-warmup")}, 36 | droping_rate{options->get("grad-dropping-rate")}, 37 | dropping_momentum{options->get("grad-dropping-momentum")} {} 38 | }; 39 | } // namespace marian 40 | -------------------------------------------------------------------------------- /src/common/io.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "3rd_party/yaml-cpp/yaml.h" 4 | #include "common/io_item.h" 5 | 6 | #include 7 | #include 8 | 9 | // interface for handling model files in marian, both *.npz files and 10 | // *.bin files have the same way of accessing them and are identified 11 | // by suffixes (*.npz or *.bin). 12 | 13 | // Files with the *.bin suffix are supposed to be memory-mappable for 14 | // CPU decoding. 15 | 16 | namespace marian { 17 | namespace io { 18 | 19 | bool isNpz(const std::string& fileName); 20 | bool isBin(const std::string& fileName); 21 | 22 | void getYamlFromModel(YAML::Node& yaml, const std::string& varName, const std::string& fileName); 23 | void getYamlFromModel(YAML::Node& yaml, const std::string& varName, const void* ptr); 24 | void getYamlFromModel(YAML::Node& yaml, const std::string& varName, const std::vector& items); 25 | 26 | void addMetaToItems(const std::string& meta, 27 | const std::string& varName, 28 | std::vector& items); 29 | 30 | std::vector loadItems(const std::string& fileName); 31 | std::vector loadItems(const void* ptr); 32 | 33 | std::vector mmapItems(const void* ptr); 34 | 35 | void saveItems(const std::string& fileName, const std::vector& items); 36 | 37 | } // namespace io 38 | } // namespace marian 39 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/spdlog-bench-mt.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "spdlog/spdlog.h" 11 | 12 | 13 | using namespace std; 14 | 15 | int main(int argc, char* argv[]) 16 | { 17 | 18 | int thread_count = 10; 19 | if(argc > 1) 20 | thread_count = std::atoi(argv[1]); 21 | 22 | int howmany = 1000000; 23 | 24 | namespace spd = spdlog; 25 | 26 | auto logger = spdlog::create("file_logger", "logs/spd-bench-mt.txt", false); 27 | 28 | logger->set_pattern("[%Y-%b-%d %T.%e]: %v"); 29 | 30 | std::atomic msg_counter {0}; 31 | std::vector threads; 32 | 33 | for (int t = 0; t < thread_count; ++t) 34 | { 35 | threads.push_back(std::thread([&]() 36 | { 37 | while (true) 38 | { 39 | int counter = ++msg_counter; 40 | if (counter > howmany) break; 41 | logger->info("spdlog message #{}: This is some text for your pleasure", counter); 42 | } 43 | })); 44 | } 45 | 46 | 47 | for(auto &t:threads) 48 | { 49 | t.join(); 50 | }; 51 | 52 | 53 | 54 | return 0; 55 | } 56 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/tag.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "directives.h" // IWYU pragma: keep 5 | #include "tag.h" 6 | #include "token.h" 7 | 8 | namespace YAML { 9 | Tag::Tag(const Token& token) : type(static_cast(token.data)) { 10 | switch (type) { 11 | case VERBATIM: 12 | value = token.value; 13 | break; 14 | case PRIMARY_HANDLE: 15 | value = token.value; 16 | break; 17 | case SECONDARY_HANDLE: 18 | value = token.value; 19 | break; 20 | case NAMED_HANDLE: 21 | handle = token.value; 22 | value = token.params[0]; 23 | break; 24 | case NON_SPECIFIC: 25 | break; 26 | default: 27 | assert(false); 28 | } 29 | } 30 | 31 | const std::string Tag::Translate(const Directives& directives) { 32 | switch (type) { 33 | case VERBATIM: 34 | return value; 35 | case PRIMARY_HANDLE: 36 | return directives.TranslateTagHandle("!") + value; 37 | case SECONDARY_HANDLE: 38 | return directives.TranslateTagHandle("!!") + value; 39 | case NAMED_HANDLE: 40 | return directives.TranslateTagHandle("!" + handle + "!") + value; 41 | case NON_SPECIFIC: 42 | // TODO: 43 | return "!"; 44 | default: 45 | assert(false); 46 | } 47 | throw std::runtime_error("yaml-cpp: internal error, bad tag type"); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/functional/array.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "functional/defs.h" 4 | 5 | namespace marian { 6 | 7 | namespace functional { 8 | 9 | template 10 | struct Array { 11 | typedef T value_type; 12 | T data_[N]; 13 | 14 | HOST_DEVICE_INLINE const T* data() const { return data_; } 15 | 16 | HOST_DEVICE_INLINE T* data() { return data_; } 17 | 18 | HOST_DEVICE_INLINE constexpr static size_t size() { return N; } 19 | 20 | HOST_DEVICE_INLINE T& operator[](size_t i) { return data_[i]; } 21 | HOST_DEVICE_INLINE const T& operator[](size_t i) const { return data_[i]; } 22 | 23 | HOST_DEVICE_INLINE T* begin() { return data_; } 24 | HOST_DEVICE_INLINE const T* begin() const { return data_; } 25 | 26 | HOST_DEVICE_INLINE T* end() { return data_ + N; } 27 | HOST_DEVICE_INLINE const T* end() const { return data_ + N; } 28 | 29 | HOST_DEVICE_INLINE void fill(T val) { 30 | for(int i = 0; i < N; ++i) 31 | data_[i] = val; 32 | } 33 | 34 | HOST_DEVICE_INLINE T& back() { return data_[N - 1]; } 35 | HOST_DEVICE_INLINE const T& back() const { return data_[N - 1]; } 36 | 37 | HOST_DEVICE_INLINE bool operator==(const Array& other) { 38 | for(int i = 0; i < N; ++i) 39 | if(data_[i] != other[i]) 40 | return false; 41 | return true; 42 | } 43 | }; 44 | } // namespace functional 45 | } // namespace marian 46 | -------------------------------------------------------------------------------- /src/3rd_party/CLI/Macros.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Distributed under the 3-Clause BSD License. See accompanying 4 | // file LICENSE or https://github.com/CLIUtils/CLI11 for details. 5 | 6 | // [CLI11:verbatim] 7 | 8 | // The following version macro is very similar to the one in PyBind11 9 | #if !(defined(_MSC_VER) && __cplusplus == 199711L) && !defined(__INTEL_COMPILER) 10 | #if __cplusplus >= 201402L 11 | #define CLI11_CPP14 12 | #if __cplusplus >= 201703L 13 | #define CLI11_CPP17 14 | #if __cplusplus > 201703L 15 | #define CLI11_CPP20 16 | #endif 17 | #endif 18 | #endif 19 | #elif defined(_MSC_VER) && __cplusplus == 199711L 20 | // MSVC sets _MSVC_LANG rather than __cplusplus (supposedly until the standard is fully implemented) 21 | // Unless you use the /Zc:__cplusplus flag on Visual Studio 2017 15.7 Preview 3 or newer 22 | #if _MSVC_LANG >= 201402L 23 | #define CLI11_CPP14 24 | #if _MSVC_LANG > 201402L && _MSC_VER >= 1910 25 | #define CLI11_CPP17 26 | #if __MSVC_LANG > 201703L && _MSC_VER >= 1910 27 | #define CLI11_CPP20 28 | #endif 29 | #endif 30 | #endif 31 | #endif 32 | 33 | #if defined(CLI11_CPP14) 34 | #define CLI11_DEPRECATED(reason) [[deprecated(reason)]] 35 | #elif defined(_MSC_VER) 36 | #define CLI11_DEPRECATED(reason) __declspec(deprecated(reason)) 37 | #else 38 | #define CLI11_DEPRECATED(reason) __attribute__((deprecated(reason))) 39 | #endif 40 | 41 | // [CLI11:verbatim] 42 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/ptr_vector.h: -------------------------------------------------------------------------------- 1 | #ifndef PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "yaml-cpp/noncopyable.h" 16 | 17 | namespace YAML { 18 | 19 | // TODO: This class is no longer needed 20 | template 21 | class ptr_vector : private YAML::noncopyable { 22 | public: 23 | ptr_vector() {} 24 | 25 | void clear() { m_data.clear(); } 26 | 27 | std::size_t size() const { return m_data.size(); } 28 | bool empty() const { return m_data.empty(); } 29 | 30 | void push_back(std::unique_ptr&& t) { m_data.push_back(std::move(t)); } 31 | T& operator[](std::size_t i) { return *m_data[i]; } 32 | const T& operator[](std::size_t i) const { return *m_data[i]; } 33 | 34 | T& back() { return *(m_data.back().get()); } 35 | 36 | const T& back() const { return *(m_data.back().get()); } 37 | 38 | private: 39 | std::vector> m_data; 40 | }; 41 | } 42 | 43 | #endif // PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 44 | -------------------------------------------------------------------------------- /src/3rd_party/pathie-cpp/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © 2015, 2017 Marvin Gülker 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /src/models/classifier.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "marian.h" 4 | #include "models/states.h" 5 | #include "layers/constructors.h" 6 | #include "layers/factory.h" 7 | 8 | namespace marian { 9 | 10 | /** 11 | * Simple base class for Classifiers to be used in EncoderClassifier framework 12 | * Currently only implementations are in bert.h 13 | */ 14 | class ClassifierBase :public LayerBase { 15 | using LayerBase::LayerBase; 16 | protected: 17 | Ptr options_; 18 | const std::string prefix_{"classifier"}; 19 | const bool inference_{false}; 20 | const size_t batchIndex_{0}; 21 | 22 | public: 23 | ClassifierBase(Ptr graph, Ptr options) 24 | : LayerBase(graph, options), 25 | prefix_(options->get("prefix", "classifier")), 26 | inference_(options->get("inference", false)), 27 | batchIndex_(options->get("index", 1)) {} // assume that training input has batch index 0 and labels has 1 28 | 29 | virtual ~ClassifierBase() {} 30 | 31 | virtual Ptr apply(Ptr, Ptr, const std::vector>&) = 0; 32 | 33 | template 34 | T opt(const std::string& key) const { 35 | return options_->get(key); 36 | } 37 | 38 | // Should be used to clear any batch-wise temporary objects if present 39 | virtual void clear() = 0; 40 | }; 41 | 42 | } -------------------------------------------------------------------------------- /src/3rd_party/ExceptionWithCallStack.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) Microsoft. All rights reserved. 3 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 4 | // 5 | // ExceptionWithCallStack.h - debug util functions 6 | // 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | namespace Microsoft { namespace MSR { namespace CNTK { 13 | 14 | // base class that we can catch, independent of the type parameter 15 | struct /*interface*/ IExceptionWithCallStackBase 16 | { 17 | virtual const char * CallStack() const = 0; 18 | virtual ~IExceptionWithCallStackBase() noexcept = default; 19 | }; 20 | 21 | // Exception wrapper to include native call stack string 22 | template 23 | class ExceptionWithCallStack : public E, public IExceptionWithCallStackBase 24 | { 25 | public: 26 | ExceptionWithCallStack(const std::string& msg, const std::string& callstack) : 27 | E(msg), m_callStack(callstack) 28 | { } 29 | 30 | virtual const char * CallStack() const override { return m_callStack.c_str(); } 31 | 32 | protected: 33 | std::string m_callStack; 34 | }; 35 | 36 | // some older code uses this namespace 37 | namespace DebugUtil 38 | { 39 | void PrintCallStack(size_t skipLevels = 0, bool makeFunctionNamesStandOut = false); 40 | 41 | std::string GetCallStack(size_t skipLevels = 0, bool makeFunctionNamesStandOut = false); 42 | }; 43 | 44 | }}} 45 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/tests.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2015 4 | VisualStudioVersion = 14.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tests", "tests.vcxproj", "{59A07559-5F38-4DD6-A7FA-DB4153690B42}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Debug|x64 = Debug|x64 12 | Release|Win32 = Release|Win32 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|Win32.ActiveCfg = Debug|Win32 17 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|Win32.Build.0 = Debug|Win32 18 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|x64.ActiveCfg = Debug|x64 19 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|x64.Build.0 = Debug|x64 20 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|Win32.ActiveCfg = Release|Win32 21 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|Win32.Build.0 = Release|Win32 22 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|x64.ActiveCfg = Release|x64 23 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|x64.Build.0 = Release|x64 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /wasm/import-gemm-module.js: -------------------------------------------------------------------------------- 1 | 2 | /* Use an optimized gemm implementation if available, otherwise use the fallback 3 | * implementation. 4 | */ 5 | function createWasmGemm() { 6 | const OPTIMIZED_GEMM = "mozIntGemm"; 7 | const FALLBACK_GEMM = "asm"; 8 | 9 | if (WebAssembly[OPTIMIZED_GEMM]) { 10 | console.log(`Using optimized gemm (${OPTIMIZED_GEMM}) implementation`); 11 | return new WebAssembly.Instance(WebAssembly[OPTIMIZED_GEMM](), {"": {memory: wasmMemory}}).exports; 12 | } 13 | else { 14 | console.log(`Using fallback gemm implementation`); 15 | return { 16 | "int8_prepare_a": (...a) => Module[FALLBACK_GEMM]["int8PrepareAFallback"](...a), 17 | "int8_prepare_b": (...a) => Module[FALLBACK_GEMM]["int8PrepareBFallback"](...a), 18 | "int8_prepare_b_from_transposed": (...a) => Module[FALLBACK_GEMM]["int8PrepareBFromTransposedFallback"](...a), 19 | "int8_prepare_b_from_quantized_transposed": (...a) => Module[FALLBACK_GEMM]["int8PrepareBFromQuantizedTransposedFallback"](...a), 20 | "int8_prepare_bias": (...a) => Module[FALLBACK_GEMM]["int8PrepareBiasFallback"](...a), 21 | "int8_multiply_and_add_bias": (...a) => Module[FALLBACK_GEMM]["int8MultiplyAndAddBiasFallback"](...a), 22 | "int8_select_columns_of_b": (...a) => Module[FALLBACK_GEMM]["int8SelectColumnsOfBFallback"](...a) 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/tensors/cpu/aligned.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | #include 5 | #ifdef _WIN32 6 | #include 7 | #endif 8 | 9 | namespace marian { 10 | namespace cpu { 11 | namespace { 12 | 13 | // allocate function for tensor reserve() below. 14 | // Alignment is needed because we use AVX512 and AVX2 vectors. We should fail if we can't allocate aligned memory. 15 | 16 | #ifdef _WIN32 17 | void *genericMalloc(size_t alignment, size_t size) { 18 | void *ret = _aligned_malloc(size, alignment); 19 | ABORT_IF(!ret, "Failed to allocate memory on CPU"); 20 | return ret; 21 | } 22 | void genericFree(void *ptr) { 23 | _aligned_free(ptr); 24 | } 25 | #else 26 | // Linux and OS X. There is no fallback to malloc because we need it to be aligned. 27 | void *genericMalloc(size_t alignment, size_t size) { 28 | // On macos, aligned_alloc is available only on c++17 29 | // Furthermore, it requires that the memory requested is an exact multiple of the alignment, otherwise it fails. 30 | // posix_memalign is available both Mac (Since 2016) and Linux and in both gcc and clang 31 | void *result; 32 | // Error could be detected by return value or just remaining nullptr. 33 | ABORT_IF(posix_memalign(&result, alignment, size), "Failed to allocate memory on CPU"); 34 | return result; 35 | } 36 | void genericFree(void *ptr) { 37 | free(ptr); 38 | } 39 | #endif 40 | 41 | } 42 | } // namespace cpu 43 | } // namespace marian 44 | -------------------------------------------------------------------------------- /src/layers/weight.cpp: -------------------------------------------------------------------------------- 1 | #include "layers/weight.h" 2 | 3 | namespace marian { 4 | 5 | Ptr WeightingFactory(Ptr options) { 6 | ABORT_IF(!options->hasAndNotEmpty("data-weighting"), 7 | "No data-weighting specified in options"); 8 | return New(options->get("data-weighting-type")); 9 | } 10 | 11 | Expr DataWeighting::getWeights(Ptr graph, 12 | Ptr batch) { 13 | ABORT_IF(batch->getDataWeights().empty(), 14 | "Vector of weights is unexpectedly empty!"); 15 | bool sentenceWeighting = weightingType_ == "sentence"; 16 | int dimBatch = (int)batch->size(); 17 | int dimWords = sentenceWeighting ? 1 : (int)batch->back()->batchWidth(); 18 | 19 | // This would abort anyway in fromVector(...), but has clearer error message 20 | // here for this particular case 21 | ABORT_IF(batch->getDataWeights().size() != dimWords * dimBatch, 22 | "Number of sentence/word-level weights ({}) does not match tensor size ({})", 23 | batch->getDataWeights().size(), dimWords * dimBatch); 24 | 25 | auto weights = graph->constant({1, dimWords, dimBatch, 1}, 26 | inits::fromVector(batch->getDataWeights())); 27 | return weights; // [1, dimWords, dimBatch, 1] in case of word-level weights or 28 | // [1, 1, dimBatch, 1] in case of sentence-level weights 29 | } 30 | } // namespace marian 31 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/stringsource.h: -------------------------------------------------------------------------------- 1 | #ifndef STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | namespace YAML { 13 | class StringCharSource { 14 | public: 15 | StringCharSource(const char* str, std::size_t size) 16 | : m_str(str), m_size(size), m_offset(0) {} 17 | 18 | operator bool() const { return m_offset < m_size; } 19 | char operator[](std::size_t i) const { return m_str[m_offset + i]; } 20 | bool operator!() const { return !static_cast(*this); } 21 | 22 | const StringCharSource operator+(int i) const { 23 | StringCharSource source(*this); 24 | if (static_cast(source.m_offset) + i >= 0) 25 | source.m_offset += i; 26 | else 27 | source.m_offset = 0; 28 | return source; 29 | } 30 | 31 | StringCharSource& operator++() { 32 | ++m_offset; 33 | return *this; 34 | } 35 | 36 | StringCharSource& operator+=(std::size_t offset) { 37 | m_offset += offset; 38 | return *this; 39 | } 40 | 41 | private: 42 | const char* m_str; 43 | std::size_t m_size; 44 | std::size_t m_offset; 45 | }; 46 | } 47 | 48 | #endif // STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 49 | -------------------------------------------------------------------------------- /src/tensors/gpu/device.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "tensors/device.h" 5 | #include "tensors/gpu/cuda_helpers.h" 6 | 7 | namespace marian { 8 | namespace gpu { 9 | 10 | Device::~Device() { 11 | // No CUDA error checking as this is a destructor and we cannot do anything about errors anyway. 12 | cudaSetDevice(deviceId_.no); 13 | if(data_) { 14 | cudaFree(data_); 15 | } 16 | cudaDeviceSynchronize(); 17 | } 18 | 19 | void Device::reserve(size_t size) { 20 | size = align(size); 21 | CUDA_CHECK(cudaSetDevice(deviceId_.no)); 22 | 23 | ABORT_IF(size < size_ || size == 0, 24 | "New size must be larger than old size and larger than 0"); 25 | 26 | if(data_) { 27 | // Allocate memory while temporarily parking original content in host memory 28 | std::vector temp(size_); 29 | CUDA_CHECK(cudaMemcpy(temp.data(), data_, size_, cudaMemcpyDeviceToHost)); 30 | CUDA_CHECK(cudaFree(data_)); 31 | LOG(debug, "[memory] Re-allocating from {} to {} bytes on device {}", size_, size, deviceId_.no); 32 | CUDA_CHECK(cudaMalloc(&data_, size)); 33 | CUDA_CHECK(cudaMemcpy(data_, temp.data(), size_, cudaMemcpyHostToDevice)); 34 | //logCallStack(0); 35 | } else { 36 | // No data_ yet: Just alloc. 37 | LOG(debug, "[memory] Allocating {} bytes in device {}", size, deviceId_.no); 38 | CUDA_CHECK(cudaMalloc(&data_, size)); 39 | } 40 | 41 | size_ = size; 42 | } 43 | } // namespace gpu 44 | } // namespace marian 45 | -------------------------------------------------------------------------------- /src/tensors/memory_piece.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | 5 | #include 6 | 7 | namespace marian { 8 | 9 | class MemoryPiece { 10 | private: 11 | uint8_t* data_; 12 | size_t size_; 13 | 14 | ENABLE_INTRUSIVE_PTR(MemoryPiece) 15 | 16 | // Contructor is private, use MemoryPiece::New(...) 17 | MemoryPiece(uint8_t* data, size_t size) : data_(data), size_(size) {} 18 | 19 | public: 20 | // Use this whenever pointing to MemoryPiece 21 | typedef IPtr PtrType; 22 | 23 | // Use this whenever creating a pointer to MemoryPiece 24 | template 25 | static PtrType New(Args&& ...args) { 26 | return PtrType(new MemoryPiece(std::forward(args)...)); 27 | } 28 | 29 | uint8_t* data() const { return data_; } 30 | uint8_t* data() { return data_; } 31 | 32 | template 33 | T* data() const { 34 | return (T*)data_; 35 | } 36 | 37 | template 38 | T* data() { 39 | return (T*)data_; 40 | } 41 | 42 | size_t size() const { return size_; } 43 | 44 | void set(uint8_t* data, size_t size) { 45 | data_ = data; 46 | size_ = size; 47 | } 48 | 49 | void setPtr(uint8_t* data) { data_ = data; } 50 | 51 | friend std::ostream& operator<<(std::ostream& out, const MemoryPiece mp) { 52 | out << "MemoryPiece - ptr: " << std::hex << (size_t)mp.data() << std::dec 53 | << " size: " << mp.size(); 54 | return out; 55 | } 56 | 57 | }; 58 | } // namespace marian 59 | -------------------------------------------------------------------------------- /wasm/package-benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ ! -e ../models ]]; then 4 | mkdir -p ../models 5 | if [[ ! -e ../students ]]; then 6 | echo "Cloning https://github.com/browsermt/students)" 7 | git clone --depth 1 --branch main --single-branch https://github.com/browsermt/students ../ 8 | fi 9 | 10 | echo "Downloading files" 11 | ../students/esen/download-models.sh 12 | 13 | echo "Copying downloaded files to models folder" 14 | cp ../students/esen/esen.student.tiny11/vocab.esen* ../students/esen/esen.student.tiny11/model* ../students/esen/esen.student.tiny11/lex.s2t* ../models/ 15 | sacrebleu -t wmt13 -l es-en --echo src > ../models/newstest2013.es 16 | head -n300 ../models/newstest2013.es > ../models/newstest2013.es.top300lines 17 | gunzip ../models/* 18 | else 19 | echo "models directory already exists in root folder; Using it to package files without downloading anything" 20 | fi 21 | 22 | echo "Packaging files for wasm binary" 23 | $EMSDK_PYTHON $EMSDK/upstream/emscripten/tools/file_packager.py model-files.data --preload ../models/@ --js-output=model-files.js 24 | 25 | echo "Importing integer (8-bit) gemm implementation" 26 | SCRIPT_ABSOLUTE_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" 27 | sed -i.bak 's/"env"[[:space:]]*:[[:space:]]*asmLibraryArg,/"env": asmLibraryArg,\ 28 | "wasm_gemm": createWasmGemm(),/g' marian-decoder.js 29 | cat $SCRIPT_ABSOLUTE_PATH/import-gemm-module.js >> marian-decoder.js 30 | echo "SUCCESS" 31 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/eventhandler.h: -------------------------------------------------------------------------------- 1 | #ifndef EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | #include "yaml-cpp/anchor.h" 13 | #include "yaml-cpp/emitterstyle.h" 14 | 15 | namespace YAML { 16 | struct Mark; 17 | 18 | class EventHandler { 19 | public: 20 | virtual ~EventHandler() {} 21 | 22 | virtual void OnDocumentStart(const Mark& mark) = 0; 23 | virtual void OnDocumentEnd() = 0; 24 | 25 | virtual void OnNull(const Mark& mark, anchor_t anchor) = 0; 26 | virtual void OnAlias(const Mark& mark, anchor_t anchor) = 0; 27 | virtual void OnScalar(const Mark& mark, const std::string& tag, 28 | anchor_t anchor, const std::string& value) = 0; 29 | 30 | virtual void OnSequenceStart(const Mark& mark, const std::string& tag, 31 | anchor_t anchor, EmitterStyle::value style) = 0; 32 | virtual void OnSequenceEnd() = 0; 33 | 34 | virtual void OnMapStart(const Mark& mark, const std::string& tag, 35 | anchor_t anchor, EmitterStyle::value style) = 0; 36 | virtual void OnMapEnd() = 0; 37 | }; 38 | } 39 | 40 | #endif // EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 41 | -------------------------------------------------------------------------------- /src/examples/mnist/mnist_ffnn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "marian.h" 8 | 9 | #include "examples/mnist/model.h" 10 | #include "examples/mnist/training.h" 11 | #include "training/graph_group_async.h" 12 | #include "training/graph_group_singleton.h" 13 | #include "training/graph_group_sync.h" 14 | 15 | const std::vector TRAIN_SET 16 | = {"../src/examples/mnist/train-images-idx3-ubyte", 17 | "../src/examples/mnist/train-labels-idx1-ubyte"}; 18 | const std::vector VALID_SET 19 | = {"../src/examples/mnist/t10k-images-idx3-ubyte", 20 | "../src/examples/mnist/t10k-labels-idx1-ubyte"}; 21 | 22 | using namespace marian; 23 | 24 | int main(int argc, char** argv) { 25 | auto options = parseOptions(argc, argv, cli::mode::training, false); 26 | 27 | if(!options->has("train-sets")) 28 | options->set("train-sets", TRAIN_SET); 29 | if(!options->has("valid-sets")) 30 | options->set("valid-sets", VALID_SET); 31 | 32 | if(options->get("type") != "mnist-lenet") 33 | options->set("type", "mnist-ffnn"); 34 | 35 | auto devices = Config::getDevices(options); 36 | 37 | if(devices.size() > 1) { 38 | if(options->get("sync-sgd")) 39 | New>(options)->run(); 40 | else 41 | New>(options)->run(); 42 | } else 43 | New>(options)->run(); 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /src/onnx/expression_graph_onnx_exporter.h: -------------------------------------------------------------------------------- 1 | #include "graph/expression_graph.h" 2 | 3 | namespace marian { 4 | // export of Marian models to ONNX 5 | class ExpressionGraphONNXExporter : public ExpressionGraph { 6 | #ifdef USE_ONNX 7 | public: 8 | // export a seq2seq model to a set of ONNX files 9 | void exportToONNX(const std::string& modelToPrefix, Ptr modelOptions, const std::vector& vocabPaths); 10 | 11 | private: 12 | // [name] -> (vector(name, Expr), vector(name, Expr)) 13 | typedef std::map>, std::vector> >> FunctionDefs; 14 | 15 | // serialize the current nodesForward_ to an ONNX file. This operation is destructive. 16 | void serializeToONNX(const std::string& filename, FunctionDefs&& functionDefs, size_t sentinelDim); 17 | 18 | // find a node on the current forward tape 19 | Expr tryFindForwardNodeByName(const std::string& nodeName) const; 20 | 21 | // helper to transform nodesForward_ to only use the subset of operations supported by ONNX 22 | void expandMacroOpsForONNX(std::map>, std::vector> >>& functionDefs); 23 | 24 | // helper to build nodesForward_ from root nodes 25 | void rebuildNodesForward(const struct InputsMap& inputsMap, 26 | const std::vector>& outputDefs); 27 | #endif // USE_ONNX 28 | }; 29 | } 30 | -------------------------------------------------------------------------------- /src/training/graph_group_singleton.cpp: -------------------------------------------------------------------------------- 1 | #include "training/graph_group_singleton.h" 2 | 3 | namespace marian { 4 | 5 | void SingletonGraph::setScheduler(Ptr scheduler) { 6 | scheduler_ = scheduler; 7 | // optimizer has to be registered last to see changes of learning rate 8 | scheduler_->registerTrainingObserver(scheduler_); 9 | scheduler_->registerTrainingObserver(opt_); 10 | } 11 | 12 | void SingletonGraph::execute(Ptr batch) { 13 | auto lossNode = builder_->build(graph_, batch); 14 | graph_->forward(); 15 | graph_->backward(); 16 | 17 | // Get batch stats 18 | opt_->update(graph_); 19 | 20 | if(mvAvg_) { 21 | ABORT_IF(!scheduler_, "Scheduler is required for exponential smoothing"); 22 | 23 | if(!graphAvg_) { 24 | graphAvg_ = New(); 25 | graphAvg_->setDevice(graph_->getDeviceId()); 26 | graphAvg_->copyParams(graph_); 27 | } else { 28 | updateAvgParams(graphAvg_->params()->vals(), 29 | graph_->params()->vals(), 30 | scheduler_->numberOfBatches()); 31 | } 32 | } 33 | 34 | if(scheduler_) { 35 | scheduler_->update(*lossNode, batch); 36 | 37 | if(scheduler_->validating()) { 38 | if(mvAvg_) { 39 | graphAvg_->reuseWorkspace(graph_); 40 | scheduler_->validate({graphAvg_}); 41 | } else { 42 | scheduler_->validate({graph_}); 43 | } 44 | } 45 | 46 | if(scheduler_->saving()) 47 | this->save(); 48 | } 49 | } 50 | } // namespace marian 51 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/ostream_wrapper.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/ostream_wrapper.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace YAML { 8 | ostream_wrapper::ostream_wrapper() 9 | : m_buffer(1, '\0'), 10 | m_pStream(0), 11 | m_pos(0), 12 | m_row(0), 13 | m_col(0), 14 | m_comment(false) {} 15 | 16 | ostream_wrapper::ostream_wrapper(std::ostream& stream) 17 | : m_pStream(&stream), m_pos(0), m_row(0), m_col(0), m_comment(false) {} 18 | 19 | ostream_wrapper::~ostream_wrapper() {} 20 | 21 | void ostream_wrapper::write(const std::string& str) { 22 | if (m_pStream) { 23 | m_pStream->write(str.c_str(), str.size()); 24 | } else { 25 | m_buffer.resize(std::max(m_buffer.size(), m_pos + str.size() + 1)); 26 | std::copy(str.begin(), str.end(), m_buffer.begin() + m_pos); 27 | } 28 | 29 | for (std::size_t i = 0; i < str.size(); i++) { 30 | update_pos(str[i]); 31 | } 32 | } 33 | 34 | void ostream_wrapper::write(const char* str, std::size_t size) { 35 | if (m_pStream) { 36 | m_pStream->write(str, size); 37 | } else { 38 | m_buffer.resize(std::max(m_buffer.size(), m_pos + size + 1)); 39 | std::copy(str, str + size, m_buffer.begin() + m_pos); 40 | } 41 | 42 | for (std::size_t i = 0; i < size; i++) { 43 | update_pos(str[i]); 44 | } 45 | } 46 | 47 | void ostream_wrapper::update_pos(char ch) { 48 | m_pos++; 49 | m_col++; 50 | 51 | if (ch == '\n') { 52 | m_row++; 53 | m_col = 0; 54 | m_comment = false; 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/boost-bench.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace logging = boost::log; 15 | namespace src = boost::log::sources; 16 | namespace sinks = boost::log::sinks; 17 | namespace keywords = boost::log::keywords; 18 | 19 | void init() 20 | { 21 | logging::add_file_log 22 | ( 23 | keywords::file_name = "logs/boost-sample_%N.log", /*< file name pattern >*/ 24 | keywords::auto_flush = false, 25 | keywords::format = "[%TimeStamp%]: %Message%" 26 | ); 27 | 28 | logging::core::get()->set_filter 29 | ( 30 | logging::trivial::severity >= logging::trivial::info 31 | ); 32 | } 33 | 34 | 35 | int main(int argc, char* []) 36 | { 37 | int howmany = 1000000; 38 | init(); 39 | logging::add_common_attributes(); 40 | 41 | using namespace logging::trivial; 42 | src::severity_logger_mt< severity_level > lg; 43 | for(int i = 0 ; i < howmany; ++i) 44 | BOOST_LOG_SEV(lg, info) << "boost message #" << i << ": This is some text for your pleasure"; 45 | 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /src/data/dataset.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | #include "common/options.h" 5 | #include "data/batch.h" 6 | #include "data/rng_engine.h" 7 | #include "data/vocab.h" 8 | #include "training/training_state.h" 9 | 10 | namespace marian { 11 | namespace data { 12 | 13 | template 14 | class DatasetBase { 15 | protected: 16 | std::vector paths_; 17 | Ptr options_; 18 | 19 | // Data processing may differ in training/inference settings 20 | bool inference_{false}; 21 | 22 | public: 23 | typedef Batch batch_type; 24 | typedef Ptr batch_ptr; // @TODO: rename to camel case 25 | typedef Iterator iterator; 26 | typedef SampleType Sample; 27 | 28 | DatasetBase(std::vector paths, Ptr options) 29 | : paths_(paths), 30 | options_(options), 31 | inference_(options != nullptr ? options->get("inference", false) : false) {} 32 | 33 | DatasetBase(Ptr options) : DatasetBase({}, options) {} 34 | 35 | virtual Iterator begin() = 0; 36 | virtual Iterator end() = 0; 37 | virtual void shuffle() = 0; 38 | 39 | virtual Sample next() = 0; 40 | 41 | virtual batch_ptr toBatch(const std::vector&) = 0; 42 | 43 | virtual void reset() {} 44 | virtual void prepare() {} 45 | virtual void restore(Ptr) {} 46 | 47 | // @TODO: remove after cleaning traininig/training.h 48 | virtual Ptr options() { return options_; } 49 | }; 50 | 51 | 52 | } // namespace data 53 | } // namespace marian 54 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/format.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "includes.h" 3 | 4 | template 5 | std::string log_info(const T& what, spdlog::level::level_enum logger_level = spdlog::level::info) 6 | { 7 | 8 | std::ostringstream oss; 9 | auto oss_sink = std::make_shared(oss); 10 | 11 | spdlog::logger oss_logger("oss", oss_sink); 12 | oss_logger.set_level(logger_level); 13 | oss_logger.set_pattern("%v"); 14 | oss_logger.info(what); 15 | 16 | return oss.str().substr(0, oss.str().length() - spdlog::details::os::eol_size); 17 | } 18 | 19 | 20 | 21 | 22 | 23 | 24 | TEST_CASE("basic_logging ", "[basic_logging]") 25 | { 26 | //const char 27 | REQUIRE(log_info("Hello") == "Hello"); 28 | REQUIRE(log_info("") == ""); 29 | 30 | //std::string 31 | REQUIRE(log_info(std::string("Hello")) == "Hello"); 32 | REQUIRE(log_info(std::string()) == std::string()); 33 | 34 | //Numbers 35 | REQUIRE(log_info(5) == "5"); 36 | REQUIRE(log_info(5.6) == "5.6"); 37 | 38 | //User defined class 39 | //REQUIRE(log_info(some_logged_class("some_val")) == "some_val"); 40 | } 41 | 42 | 43 | TEST_CASE("log_levels", "[log_levels]") 44 | { 45 | REQUIRE(log_info("Hello", spdlog::level::err) == ""); 46 | REQUIRE(log_info("Hello", spdlog::level::critical) == ""); 47 | REQUIRE(log_info("Hello", spdlog::level::info) == "Hello"); 48 | REQUIRE(log_info("Hello", spdlog::level::debug) == "Hello"); 49 | REQUIRE(log_info("Hello", spdlog::level::trace) == "Hello"); 50 | } 51 | 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /src/3rd_party/onnx/protobuf/onnx-ml.pb-wrapper.cpp: -------------------------------------------------------------------------------- 1 | // protobuf-generated files don't compile clean. This compiles them with warnings 2 | // disabled, without having to disable it for the entire project whole-sale. 3 | 4 | #ifdef USE_ONNX 5 | 6 | // Get protobuf this way: 7 | // sudo apt-get install cmake pkg-config libprotobuf9v5 protobuf-compiler libprotobuf-dev libgoogle-perftools-dev 8 | 9 | // Since we don't develop the ONNX .proto file, I just hand-created the .pb. files. 10 | // The automatic process that CMake would invoke fails because protobuf generates 11 | // source code that is not warning-free. So let's use this manual process for now, 12 | // and just version-control the resulting files. The command is simple enough: 13 | // cd src/3rd_party/onnx/protobuf 14 | // protoc -I=. --cpp_out=. onnx-ml.proto 15 | 16 | #ifdef _MSC_VER 17 | #pragma warning(push) 18 | #pragma warning(disable : 4100 4125 4127 4244 4267 4512 4456 4510 4610 4800) 19 | #endif 20 | #ifdef __GNUC__ 21 | #pragma GCC diagnostic ignored "-Wunused-variable" // note: GCC <6.0 ignores this when inside push/pop 22 | #pragma GCC diagnostic push 23 | #pragma GCC diagnostic ignored "-Wsuggest-override" 24 | #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" 25 | #endif 26 | 27 | #define AuxillaryParseTableField AuxiliaryParseTableField // in protobuf 3.12, the generated source has a spelling error 28 | 29 | #include "onnx-ml.pb.cc" // this is the actual file we compile 30 | 31 | #ifdef __GNUC__ 32 | #pragma GCC diagnostic pop 33 | #endif 34 | #ifdef _MSC_VER 35 | #pragma warning(pop) 36 | #endif 37 | 38 | #endif // USE_ONNX 39 | -------------------------------------------------------------------------------- /src/3rd_party/half_float/Readme.md: -------------------------------------------------------------------------------- 1 | half_float 2 | ======== 3 | 4 | #### 16 bit floating-point data type for C++ #### 5 | 6 | Implements a `HalfFloat` class that implements all the common arithmetic operations for a 16 bit 7 | floating-point type (10 bits mantissa, 5 bits exponent and one sign bit) and can thus be used (almost) 8 | interchangeably with regular `float`s. Not all operations have efficent implementations (some just convert to `float`, 9 | compute the result and convert back again) - if in doubt, check out the source code. 10 | 11 | The implementation tries to adhere to IEEE 754 in that it supports NaN and Infinity, but fails in other points: 12 | 13 | - no difference between qnan and snan 14 | - no traps 15 | - no well-defined rounding mode 16 | 17 | 18 | We also supply a specialization for `std::numeric_limits` that `half` be usable in template code 19 | dependent on type traits. 20 | 21 | 22 | #### Usage #### 23 | 24 | // get some halfs (half is a typedef for HalfFloat) 25 | half a = 1.0f; 26 | half b = 0.5f; 27 | 28 | // and have some FUN 29 | half c = (a+b) / (a-b); 30 | ++c; 31 | 32 | // now that we have a result in loosy precision, 33 | // convert it back to double precision. 34 | // if anybody asks, it's for the lulz. 35 | double result = c; 36 | 37 | 38 | Credits to _Chris Maiwald_ for the conversion code to `double` and extensive testing. 39 | 40 | 41 | #### License #### 42 | 43 | 3-clause BSD license: use it for anything, but give credit, don't blame us if your rocket crashes and don't advertise with it (who would). -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/dll.h: -------------------------------------------------------------------------------- 1 | #ifndef DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | // The following ifdef block is the standard way of creating macros which make 11 | // exporting from a DLL simpler. All files within this DLL are compiled with the 12 | // yaml_cpp_EXPORTS symbol defined on the command line. This symbol should not 13 | // be defined on any project that uses this DLL. This way any other project 14 | // whose source files include this file see YAML_CPP_API functions as being 15 | // imported from a DLL, whereas this DLL sees symbols defined with this macro as 16 | // being exported. 17 | #undef YAML_CPP_API 18 | 19 | #ifdef YAML_CPP_DLL // Using or Building YAML-CPP DLL (definition defined 20 | // manually) 21 | #ifdef yaml_cpp_EXPORTS // Building YAML-CPP DLL (definition created by CMake 22 | // or defined manually) 23 | // #pragma message( "Defining YAML_CPP_API for DLL export" ) 24 | #define YAML_CPP_API __declspec(dllexport) 25 | #else // yaml_cpp_EXPORTS 26 | // #pragma message( "Defining YAML_CPP_API for DLL import" ) 27 | #define YAML_CPP_API __declspec(dllimport) 28 | #endif // yaml_cpp_EXPORTS 29 | #else // YAML_CPP_DLL 30 | #define YAML_CPP_API 31 | #endif // YAML_CPP_DLL 32 | 33 | #endif // DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 34 | -------------------------------------------------------------------------------- /src/common/signal_handling.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | // SIGNAL HANDLING 6 | 7 | // The signal handlers (and checkers) here are implemented in line with with the recommendations 8 | // for signal handling in the SEI CERT C Coding Standard, specifically 9 | // 10 | // - SIG30-C: 11 | // https://wiki.sei.cmu.edu/confluence/display/c/SIG30-C.+Call+only+asynchronous-safe+functions+within+signal+handlers 12 | // 13 | // - SIG31-C: 14 | // https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers 15 | // 16 | // The exact behavior of 'graceful exit' depends on the application; for training, it means 'save model and exit', 17 | // for a server (not implemented yet): 'block new requests but serve pending requests and then exit'. 18 | // 19 | // Graceful exit for training is useful for training on clusters with time limits on jobs. Slurm, for example, can be 20 | // set up to send a custom signal at a set time before the end of the time slot, giving Marian time to save its current 21 | // state before getting killed. 22 | 23 | namespace marian { 24 | 25 | 26 | /// Request graceful exit (signal handler) 27 | void requestSaveAndExit(int sig); 28 | 29 | /// Check if graceful exit was requested. 30 | bool saveAndExitRequested(); 31 | 32 | /// General purpose signal handler that simply sets a flag when a signal is received. 33 | // (only for SIGNAL No. < 32). 34 | void setSignalFlag(int sig); // custom handler (set flag) for sig 35 | 36 | /// Check if a setSignalFlag was triggered for this signal 37 | bool getSignalFlag(int sig); 38 | 39 | } // End of namespace marian 40 | -------------------------------------------------------------------------------- /.github/workflows/wasm-customized_marian-macos.yml: -------------------------------------------------------------------------------- 1 | name: WASM (wasm-customized marian) MacOS 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ "**" ] 8 | 9 | jobs: 10 | build-wasm: 11 | name: WASM CPU-only 12 | runs-on: macos-latest 13 | 14 | steps: 15 | - name: Setup Emscripten toolchain 16 | uses: mymindstorm/setup-emsdk@v11 17 | with: 18 | version: 3.1.8 19 | 20 | - name: Verify Emscripten setup 21 | run: emcc -v 22 | 23 | - name: Checkout 24 | uses: actions/checkout@v2 25 | with: 26 | submodules: recursive 27 | 28 | - name: Configure builds 29 | run: | 30 | mkdir -p build-wasm 31 | cd build-wasm 32 | emcmake cmake \ 33 | -DCOMPILE_CUDA=off \ 34 | -DUSE_DOXYGEN=off \ 35 | -DCOMPILE_EXAMPLES=off \ 36 | -DCOMPILE_SERVER=off \ 37 | -DCOMPILE_TESTS=off \ 38 | -DUSE_FBGEMM=off \ 39 | -DUSE_SENTENCEPIECE=on \ 40 | -DUSE_STATIC_LIBS=on \ 41 | -DUSE_MKL=off \ 42 | -DUSE_WASM_COMPATIBLE_SOURCE=on \ 43 | -DCOMPILE_WASM=on ../ 44 | 45 | - name: Compile 46 | working-directory: build-wasm 47 | run: emmake make -j2 48 | 49 | - name: Check artifacts 50 | working-directory: build-wasm 51 | run: | 52 | ls -all . 53 | if ls marian-decoder.wasm &>/dev/null && ls marian-decoder.js &>/dev/null 54 | then 55 | echo "Artifacts Successfully Generated" 56 | else 57 | echo "Failure: Artifacts Not Present" 58 | exit 1 59 | fi 60 | -------------------------------------------------------------------------------- /.github/workflows/wasm-customized_marian-ubuntu.yml: -------------------------------------------------------------------------------- 1 | name: WASM (wasm-customized marian) Ubuntu 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ "**" ] 8 | 9 | jobs: 10 | build-wasm: 11 | name: WASM CPU-only 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: Setup Emscripten toolchain 16 | uses: mymindstorm/setup-emsdk@v11 17 | with: 18 | version: 3.1.8 19 | 20 | - name: Verify Emscripten setup 21 | run: emcc -v 22 | 23 | - name: Checkout 24 | uses: actions/checkout@v2 25 | with: 26 | submodules: recursive 27 | 28 | - name: Configure builds 29 | run: | 30 | mkdir -p build-wasm 31 | cd build-wasm 32 | emcmake cmake \ 33 | -DCOMPILE_CUDA=off \ 34 | -DUSE_DOXYGEN=off \ 35 | -DCOMPILE_EXAMPLES=off \ 36 | -DCOMPILE_SERVER=off \ 37 | -DCOMPILE_TESTS=off \ 38 | -DUSE_FBGEMM=off \ 39 | -DUSE_SENTENCEPIECE=on \ 40 | -DUSE_STATIC_LIBS=on \ 41 | -DUSE_MKL=off \ 42 | -DUSE_WASM_COMPATIBLE_SOURCE=on \ 43 | -DCOMPILE_WASM=on ../ 44 | 45 | - name: Compile 46 | working-directory: build-wasm 47 | run: emmake make -j2 48 | 49 | - name: Check artifacts 50 | working-directory: build-wasm 51 | run: | 52 | ls -all . 53 | if ls marian-decoder.wasm &>/dev/null && ls marian-decoder.js &>/dev/null 54 | then 55 | echo "Artifacts Successfully Generated" 56 | else 57 | echo "Failure: Artifacts Not Present" 58 | exit 1 59 | fi 60 | -------------------------------------------------------------------------------- /scripts/embeddings/export_embeddings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | 6 | import argparse 7 | import yaml 8 | import numpy as np 9 | 10 | 11 | def main(): 12 | desc = """Export word embeddings from model""" 13 | parser = argparse.ArgumentParser( 14 | formatter_class=argparse.RawDescriptionHelpFormatter, description=desc) 15 | parser.add_argument("-m", "--model", help="path to model.npz file", required=True) 16 | parser.add_argument("-o", "--output-prefix", help="prefix for output files", required=True) 17 | args = parser.parse_args() 18 | 19 | print("Loading model") 20 | model = np.load(args.model) 21 | special = yaml.load(model["special:model.yml"][:-1].tobytes()) 22 | 23 | if special["tied-embeddings-all"] or special["tied-embeddings-src"]: 24 | all_emb = model["Wemb"] 25 | export_emb(args.output_prefix + ".all", all_emb) 26 | exit() 27 | 28 | if special["type"] == "amun": 29 | enc_emb = model["Wemb"] 30 | dec_emb = model["Wemb_dec"] 31 | else: 32 | enc_emb = model["encoder_Wemb"] 33 | dec_emb = model["decoder_Wemb"] 34 | 35 | export_emb(args.output_prefix + ".src", enc_emb) 36 | export_emb(args.output_prefix + ".trg", dec_emb) 37 | 38 | 39 | def export_emb(filename, emb): 40 | with open(filename, "w") as out: 41 | out.write("{0} {1}\n".format(*emb.shape)) 42 | for i in range(emb.shape[0]): 43 | vec = " ".join("{0:.8f}".format(v) for v in emb[i]) 44 | out.write("{0} {1}\n".format(i, vec)) 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/streamcharsource.h: -------------------------------------------------------------------------------- 1 | #ifndef STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/noncopyable.h" 11 | #include 12 | 13 | namespace YAML { 14 | class StreamCharSource { 15 | public: 16 | StreamCharSource(const Stream& stream) : m_offset(0), m_stream(stream) {} 17 | StreamCharSource(const StreamCharSource& source) 18 | : m_offset(source.m_offset), m_stream(source.m_stream) {} 19 | ~StreamCharSource() {} 20 | 21 | operator bool() const; 22 | char operator[](std::size_t i) const { return m_stream.CharAt(m_offset + i); } 23 | bool operator!() const { return !static_cast(*this); } 24 | 25 | const StreamCharSource operator+(int i) const; 26 | 27 | private: 28 | std::size_t m_offset; 29 | const Stream& m_stream; 30 | 31 | StreamCharSource& operator=(const StreamCharSource&); // non-assignable 32 | }; 33 | 34 | inline StreamCharSource::operator bool() const { 35 | return m_stream.ReadAheadTo(m_offset); 36 | } 37 | 38 | inline const StreamCharSource StreamCharSource::operator+(int i) const { 39 | StreamCharSource source(*this); 40 | if (static_cast(source.m_offset) + i >= 0) 41 | source.m_offset += i; 42 | else 43 | source.m_offset = 0; 44 | return source; 45 | } 46 | } 47 | 48 | #endif // STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 49 | -------------------------------------------------------------------------------- /src/3rd_party/CLI/LICENSE: -------------------------------------------------------------------------------- 1 | CLI11 1.6 Copyright (c) 2017-2018 University of Cincinnati, developed by Henry 2 | Schreiner under NSF AWARD 1414736. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms of CLI11, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 3. Neither the name of the copyright holder nor the names of its contributors 13 | may be used to endorse or promote products derived from this software without 14 | specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 20 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/stlemitter.h: -------------------------------------------------------------------------------- 1 | #ifndef STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace YAML { 16 | template 17 | inline Emitter& EmitSeq(Emitter& emitter, const Seq& seq) { 18 | emitter << BeginSeq; 19 | for (typename Seq::const_iterator it = seq.begin(); it != seq.end(); ++it) 20 | emitter << *it; 21 | emitter << EndSeq; 22 | return emitter; 23 | } 24 | 25 | template 26 | inline Emitter& operator<<(Emitter& emitter, const std::vector& v) { 27 | return EmitSeq(emitter, v); 28 | } 29 | 30 | template 31 | inline Emitter& operator<<(Emitter& emitter, const std::list& v) { 32 | return EmitSeq(emitter, v); 33 | } 34 | 35 | template 36 | inline Emitter& operator<<(Emitter& emitter, const std::set& v) { 37 | return EmitSeq(emitter, v); 38 | } 39 | 40 | template 41 | inline Emitter& operator<<(Emitter& emitter, const std::map& m) { 42 | typedef typename std::map map; 43 | emitter << BeginMap; 44 | for (typename map::const_iterator it = m.begin(); it != m.end(); ++it) 45 | emitter << Key << it->first << Value << it->second; 46 | emitter << EndMap; 47 | return emitter; 48 | } 49 | } 50 | 51 | #endif // STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 52 | -------------------------------------------------------------------------------- /wasm/test_stdin.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 The Emscripten Authors. All rights reserved. 3 | * Emscripten is available under two separate licenses, the MIT license and the 4 | * University of Illinois/NCSA Open Source License. Both these licenses can be 5 | * found in the LICENSE file. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #ifdef __EMSCRIPTEN__ 14 | #include 15 | #endif 16 | 17 | int line = 0; 18 | 19 | void main_loop() 20 | { 21 | char str[10] = {0}; 22 | int ret; 23 | 24 | errno = 0; 25 | while (errno != EAGAIN) { 26 | if (line == 0) { 27 | ret = fgetc(stdin); 28 | if (ret != EOF) putc(ret, stdout); 29 | if (ret == '\n') line++; 30 | } else if (line > 0) { 31 | ret = scanf("%10s", str); 32 | if (ret > 0) puts(str); 33 | } 34 | 35 | int err = ferror(stdin); 36 | if (ferror(stdin) && errno != EAGAIN) { 37 | printf("error %d\n", err); 38 | exit(EXIT_FAILURE); 39 | } 40 | 41 | if (feof(stdin)) { 42 | puts("eof"); 43 | exit(EXIT_SUCCESS); 44 | } 45 | 46 | clearerr(stdin); 47 | } 48 | } 49 | 50 | int main(int argc, char const *argv[]) 51 | { 52 | fcntl(STDIN_FILENO, F_SETFL, O_NONBLOCK); 53 | 54 | // SM shell doesn't implement an event loop and therefor doesn't support 55 | // emscripten_set_main_loop. However, its stdin reads are sync so it 56 | // should exit out after calling main_loop once. 57 | main_loop(); 58 | 59 | #ifdef __EMSCRIPTEN__ 60 | emscripten_set_main_loop(main_loop, 60, 0); 61 | #else 62 | while (1) main_loop(); sleep(1); 63 | #endif 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /scripts/checkpoints/average.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | This script takes multiple Marian *.npz model files and outputs an elementwise average of the model, 4 | meant to do check-point averaging from: 5 | 6 | https://www.aclweb.org/anthology/W16-2316 7 | 8 | usage: 9 | 10 | ./average.py -m model.1.npz model.2.npz --output model.avg.npz 11 | """ 12 | 13 | from __future__ import print_function 14 | 15 | import os 16 | import sys 17 | import argparse 18 | 19 | import numpy as np 20 | 21 | # Parse arguments 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('-m', '--model', nargs='+', required=True, 24 | help="models to average") 25 | parser.add_argument('-o', '--output', required=True, 26 | help="output path") 27 | args = parser.parse_args() 28 | 29 | # *average* holds the model matrix 30 | average = dict() 31 | # No. of models. 32 | n = len(args.model) 33 | 34 | for filename in args.model: 35 | print("Loading {}".format(filename)) 36 | with open(filename, "rb") as mfile: 37 | # Loads matrix from model file 38 | m = np.load(mfile) 39 | for k in m: 40 | if k != "history_errs": 41 | # Initialize the key 42 | if k not in average: 43 | average[k] = m[k] 44 | # Add to the appropriate value 45 | elif average[k].shape == m[k].shape and "special" not in k: 46 | average[k] += m[k] 47 | 48 | # Actual averaging 49 | for k in average: 50 | if "special" not in k: 51 | average[k] /= n 52 | 53 | # Save averaged model to file 54 | print("Saving to {}".format(args.output)) 55 | np.savez(args.output, **average) 56 | -------------------------------------------------------------------------------- /.github/workflows/macos.yml: -------------------------------------------------------------------------------- 1 | name: MacOS 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ "**" ] 8 | 9 | jobs: 10 | build-macos: 11 | name: MacOS CPU-only 12 | runs-on: macos-latest 13 | 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v2 17 | with: 18 | submodules: recursive 19 | 20 | - name: Install dependencies 21 | run: | 22 | brew update 23 | brew install openblas protobuf 24 | 25 | # Openblas location is exported explicitly because openblas is keg-only, 26 | # which means it was not symlinked into /usr/local/. 27 | # CMake cannot find BLAS on GitHub runners if Marian is being compiled 28 | # statically, hence USE_STATIC_LIBS=off 29 | - name: Configure CMake 30 | run: | 31 | export LDFLAGS="-L/usr/local/opt/openblas/lib" 32 | export CPPFLAGS="-I/usr/local/opt/openblas/include" 33 | mkdir -p build 34 | cd build 35 | cmake .. \ 36 | -DCOMPILE_CPU=on \ 37 | -DCOMPILE_CUDA=off \ 38 | -DCOMPILE_EXAMPLES=on \ 39 | -DCOMPILE_SERVER=off \ 40 | -DCOMPILE_TESTS=on \ 41 | -DUSE_FBGEMM=on \ 42 | -DUSE_SENTENCEPIECE=on \ 43 | -DUSE_STATIC_LIBS=off 44 | 45 | - name: Compile 46 | working-directory: build 47 | run: make -j2 48 | 49 | - name: Run unit tests 50 | working-directory: build 51 | run: make test 52 | 53 | - name: Print versions 54 | working-directory: build 55 | run: | 56 | ./marian --version 57 | ./marian-decoder --version 58 | ./marian-scorer --version 59 | ./spm_encode --version 60 | 61 | -------------------------------------------------------------------------------- /src/optimizers/quantizer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/options.h" 4 | #include "functional/functional.h" 5 | #include "graph/expression_graph.h" 6 | #include "tensors/backend.h" 7 | #include "tensors/tensor.h" 8 | #include "tensors/tensor_allocator.h" 9 | #include "tensors/tensor_operators.h" 10 | 11 | namespace marian { 12 | 13 | /* Class to implement quantization of all the parameters in a model graph 14 | * This class handles the required error-feedback mechanism internally. 15 | * Example: 16 | * auto mq = New(options_); 17 | * mq->quantize(graph_); 18 | * 19 | * Parameters in graph_ will be quantized every time quantize is called. 20 | * The internal error-residual is also updated each quantize call, 21 | * therefore, use the same ModelQuantizer object to quantize the same graph. 22 | */ 23 | class ModelQuantizer { 24 | public: 25 | ModelQuantizer(Ptr options) 26 | : bits_{options->get("quantize-bits")}, 27 | optSteps_{options->get("quantize-optimization-steps")}, 28 | quantBias_{options->get("quantize-biases")}, 29 | logQuant_{options->get("quantize-log-based")} {} 30 | 31 | void quantize(Ptr graph); 32 | 33 | protected: 34 | void quantizeImpl(Tensor t); 35 | 36 | size_t bits_; 37 | size_t optSteps_; 38 | bool quantBias_; 39 | bool logQuant_; 40 | bool isFirstError_; 41 | 42 | std::vector> allocators_; 43 | 44 | Tensor errorResidual_; // Tensor to store the error-residual 45 | Tensor delta_; // temporary Tensor for storing q to calculate optimal S 46 | Tensor tempVar_; // single element Tensor for Reduce swap variable 47 | }; 48 | } // namespace marian 49 | -------------------------------------------------------------------------------- /src/3rd_party/faiss/utils/random.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Facebook, Inc. and its affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | // -*- c++ -*- 9 | 10 | /* Random generators. Implemented here for speed and to make 11 | * sequences reproducible. 12 | */ 13 | 14 | #pragma once 15 | 16 | #include 17 | #include 18 | 19 | 20 | namespace faiss { 21 | 22 | /************************************************** 23 | * Random data generation functions 24 | **************************************************/ 25 | 26 | /// random generator that can be used in multithreaded contexts 27 | struct RandomGenerator { 28 | 29 | std::mt19937 mt; 30 | 31 | /// random positive integer 32 | int rand_int (); 33 | 34 | /// random int64_t 35 | int64_t rand_int64 (); 36 | 37 | /// generate random integer between 0 and max-1 38 | int rand_int (int max); 39 | 40 | /// between 0 and 1 41 | float rand_float (); 42 | 43 | double rand_double (); 44 | 45 | explicit RandomGenerator (int64_t seed = 1234); 46 | }; 47 | 48 | /* Generate an array of uniform random floats / multi-threaded implementation */ 49 | void float_rand (float * x, size_t n, int64_t seed); 50 | void float_randn (float * x, size_t n, int64_t seed); 51 | void int64_rand (int64_t * x, size_t n, int64_t seed); 52 | void byte_rand (uint8_t * x, size_t n, int64_t seed); 53 | // max is actually the maximum value + 1 54 | void int64_rand_max (int64_t * x, size_t n, uint64_t max, int64_t seed); 55 | 56 | /* random permutation */ 57 | void rand_perm (int * perm, size_t n, int64_t seed); 58 | 59 | 60 | } // namespace faiss 61 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/parse.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/node/parse.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "yaml-cpp/node/node.h" 7 | #include "yaml-cpp/node/impl.h" 8 | #include "yaml-cpp/parser.h" 9 | #include "nodebuilder.h" 10 | 11 | namespace YAML { 12 | Node Load(const std::string& input) { 13 | std::stringstream stream(input); 14 | return Load(stream); 15 | } 16 | 17 | Node Load(const char* input) { 18 | std::stringstream stream(input); 19 | return Load(stream); 20 | } 21 | 22 | Node Load(std::istream& input) { 23 | Parser parser(input); 24 | NodeBuilder builder; 25 | if (!parser.HandleNextDocument(builder)) { 26 | return Node(); 27 | } 28 | 29 | return builder.Root(); 30 | } 31 | 32 | Node LoadFile(const std::string& filename) { 33 | std::ifstream fin(filename.c_str()); 34 | if (!fin) { 35 | throw BadFile(); 36 | } 37 | return Load(fin); 38 | } 39 | 40 | std::vector LoadAll(const std::string& input) { 41 | std::stringstream stream(input); 42 | return LoadAll(stream); 43 | } 44 | 45 | std::vector LoadAll(const char* input) { 46 | std::stringstream stream(input); 47 | return LoadAll(stream); 48 | } 49 | 50 | std::vector LoadAll(std::istream& input) { 51 | std::vector docs; 52 | 53 | Parser parser(input); 54 | while (1) { 55 | NodeBuilder builder; 56 | if (!parser.HandleNextDocument(builder)) { 57 | break; 58 | } 59 | docs.push_back(builder.Root()); 60 | } 61 | 62 | return docs; 63 | } 64 | 65 | std::vector LoadAllFromFile(const std::string& filename) { 66 | std::ifstream fin(filename.c_str()); 67 | if (!fin) { 68 | throw BadFile(); 69 | } 70 | return LoadAll(fin); 71 | } 72 | } // namespace YAML 73 | -------------------------------------------------------------------------------- /src/graph/node_operators.cpp: -------------------------------------------------------------------------------- 1 | #include "node_operators.h" 2 | #include "expression_graph.h" 3 | 4 | #include "tensors/tensor_operators.h" 5 | 6 | namespace marian { 7 | 8 | ConstantNode::ConstantNode(Ptr graph, 9 | const Shape& shape, 10 | const Ptr& init, 11 | Type valueType) 12 | : Node(graph, shape, valueType), 13 | init_(init), 14 | initialized_(false) { 15 | init_->setAllocator(graph->allocator()); 16 | setTrainable(false); 17 | } 18 | 19 | void ConstantNode::allocate() { 20 | if(!val_) { 21 | graph()->allocateForward(this); 22 | } 23 | } 24 | 25 | void ConstantNode::init() { 26 | if(!initialized_) { 27 | init_->apply(val_); 28 | initialized_ = true; 29 | } 30 | init_.reset(); 31 | } 32 | 33 | ParamNode::ParamNode(Ptr graph, 34 | const Shape& shape, 35 | const Ptr& init, 36 | bool fixed) 37 | : ParamNode(graph, shape, init, Type::float32, fixed) {} 38 | 39 | ParamNode::ParamNode(Ptr graph, 40 | const Shape& shape, 41 | const Ptr& init, 42 | Type valueType, 43 | bool fixed) 44 | : Node(graph, shape, valueType), 45 | init_(init), 46 | initialized_(false) { 47 | init_->setAllocator(graph->allocator()); 48 | setTrainable(!fixed); 49 | setMemoize(graph->isInference()); 50 | } 51 | 52 | void ParamNode::init() { 53 | if(!initialized_) { 54 | init_->apply(val_); 55 | initialized_ = true; 56 | } 57 | init_.reset(); 58 | } 59 | } // namespace marian 60 | --------------------------------------------------------------------------------