├── VERSION ├── vs ├── .gitignore └── BuildRelease.bat ├── src ├── 3rd_party │ ├── nccl │ │ ├── pkg │ │ │ ├── debian │ │ │ │ ├── compat │ │ │ │ ├── copyright │ │ │ │ ├── source │ │ │ │ │ └── format │ │ │ │ ├── .gitignore │ │ │ │ ├── libnccl-dev.install.in │ │ │ │ ├── libnccl2.install.in │ │ │ │ ├── gbp.conf │ │ │ │ ├── changelog.in │ │ │ │ ├── rules │ │ │ │ └── control.in │ │ │ ├── Makefile │ │ │ ├── txz │ │ │ │ └── create_txz.sh.in │ │ │ └── srctxz │ │ │ │ ├── create_srctxz.sh.in │ │ │ │ └── Makefile │ │ ├── .gitignore │ │ ├── makefiles │ │ │ ├── version.mk │ │ │ └── formatting.mk │ │ ├── ext-net │ │ │ └── dummy │ │ │ │ └── Makefile │ │ ├── src │ │ │ ├── include │ │ │ │ ├── ring.h │ │ │ │ ├── rings.h │ │ │ │ ├── bootstrap.h │ │ │ │ ├── utils.h │ │ │ │ ├── group.h │ │ │ │ └── enqueue.h │ │ │ └── collectives │ │ │ │ └── device │ │ │ │ ├── all_gather.cu │ │ │ │ ├── broadcast.cu │ │ │ │ ├── reduce.cu │ │ │ │ ├── all_reduce.cu │ │ │ │ └── reduce_scatter.cu │ │ └── Makefile │ ├── sentencepiece │ │ ├── VERSION │ │ ├── python │ │ │ ├── test │ │ │ │ ├── __init__.py │ │ │ │ ├── botchan.txt │ │ │ │ ├── test_model.model │ │ │ │ └── test_ja_model.model │ │ │ ├── .gitignore │ │ │ ├── setup.cfg │ │ │ └── MANIFEST.in │ │ ├── tensorflow │ │ │ ├── __init__.py │ │ │ ├── test │ │ │ │ └── __init__.py │ │ │ ├── .gitignore │ │ │ └── tf_sentencepiece │ │ │ │ ├── _sentencepiece_processor_ops.so.1.7.0 │ │ │ │ ├── _sentencepiece_processor_ops.so.1.8.0 │ │ │ │ ├── _sentencepiece_processor_ops.so.1.9.0 │ │ │ │ ├── _sentencepiece_processor_ops.so.1.10.0 │ │ │ │ ├── _sentencepiece_processor_ops.so.1.11.0 │ │ │ │ ├── _sentencepiece_processor_ops.so.1.12.0 │ │ │ │ └── __init__.py │ │ ├── config.h.in │ │ ├── sentencepiece.pc.in │ │ ├── appveyor.yml │ │ ├── src │ │ │ ├── freelist_test.cc │ │ │ ├── test_main.cc │ │ │ ├── model_factory.h │ │ │ ├── trainer_factory.h │ │ │ ├── word_model.h │ │ │ ├── char_model.h │ │ │ ├── char_model_trainer.h │ │ │ ├── word_model.cc │ │ │ ├── unicode_script.cc │ │ │ └── bpe_model.h │ │ ├── .gitignore │ │ ├── doc │ │ │ └── special_symbols.md │ │ └── third_party │ │ │ └── esaxx │ │ │ └── LICENSE │ ├── spdlog │ │ ├── tests │ │ │ ├── main.cpp │ │ │ ├── utils.h │ │ │ ├── includes.h │ │ │ ├── CMakeLists.txt │ │ │ ├── install_libcxx.sh │ │ │ ├── utils.cpp │ │ │ └── tests.sln │ │ ├── bench │ │ │ ├── logs │ │ │ │ └── .gitignore │ │ │ ├── latency │ │ │ │ ├── compare.sh │ │ │ │ ├── utils.h │ │ │ │ └── g3log-crush.cpp │ │ │ ├── easyl.conf │ │ │ ├── glog-bench.cpp │ │ │ ├── easylogging-bench.cpp │ │ │ ├── spdlog-bench.cpp │ │ │ ├── utils.h │ │ │ ├── glog-bench-mt.cpp │ │ │ ├── easylogging-bench-mt.cpp │ │ │ └── spdlog-bench-mt.cpp │ │ ├── example │ │ │ ├── jni │ │ │ │ ├── Application.mk │ │ │ │ └── Android.mk │ │ │ ├── utils.h │ │ │ ├── Makefile.mingw │ │ │ ├── Makefile.clang │ │ │ └── example.sln │ │ ├── astyle.sh │ │ ├── cmake │ │ │ └── spdlog.pc.in │ │ ├── INSTALL │ │ ├── include │ │ │ └── spdlog │ │ │ │ ├── fmt │ │ │ │ ├── ostr.h │ │ │ │ ├── fmt.h │ │ │ │ └── bundled │ │ │ │ │ └── ostream.cc │ │ │ │ ├── sinks │ │ │ │ ├── null_sink.h │ │ │ │ ├── msvc_sink.h │ │ │ │ ├── sink.h │ │ │ │ ├── ostream_sink.h │ │ │ │ └── base_sink.h │ │ │ │ ├── details │ │ │ │ ├── null_mutex.h │ │ │ │ └── log_msg.h │ │ │ │ └── formatter.h │ │ ├── sinks │ │ │ ├── sink.h │ │ │ ├── null_sink.h │ │ │ ├── msvc_sink.h │ │ │ ├── base_sink.h │ │ │ └── ostream_sink.h │ │ ├── details │ │ │ └── null_mutex.h │ │ ├── .gitignore │ │ ├── formatter.h │ │ └── LICENSE │ ├── zlib │ │ ├── zlib.3.pdf │ │ ├── CMakeLists.txt │ │ ├── inffast.h │ │ └── gzclose.c │ ├── yaml-cpp │ │ ├── CMakeLists.txt │ │ ├── null.cpp │ │ ├── yaml-node.cpp │ │ ├── contrib │ │ │ ├── graphbuilder.cpp │ │ │ └── anchordict.h │ │ ├── directives.cpp │ │ ├── anchor.h │ │ ├── emitterstyle.h │ │ ├── node │ │ │ ├── type.h │ │ │ ├── detail │ │ │ │ ├── iterator_fwd.h │ │ │ │ ├── bool_type.h │ │ │ │ └── memory.h │ │ │ ├── ptr.h │ │ │ ├── emit.h │ │ │ └── iterator.h │ │ ├── emitterdef.h │ │ ├── memory.cpp │ │ ├── emit.cpp │ │ ├── scantag.h │ │ ├── exceptions.cpp │ │ ├── noncopyable.h │ │ ├── directives.h │ │ ├── tag.h │ │ ├── yaml.h │ │ ├── null.h │ │ ├── mark.h │ │ ├── LICENSE │ │ ├── regex_yaml.cpp │ │ ├── indentation.h │ │ ├── collectionstack.h │ │ ├── tag.cpp │ │ └── ptr_vector.h │ ├── pathie-cpp │ │ ├── CMakeLists.txt │ │ └── LICENSE │ ├── CLI │ │ ├── Version.hpp │ │ ├── CLI.hpp │ │ └── Macros.hpp │ ├── SQLiteCpp │ │ ├── sqlite3 │ │ │ └── README.md │ │ └── LICENSE.txt │ ├── cnpy │ │ └── LICENSE │ ├── zstr │ │ └── LICENSE │ ├── CMakeLists.txt │ └── ExceptionWithCallStack.h ├── tests │ ├── run_tests.cpp │ ├── README.md │ ├── dropout_test.cpp │ └── logger_test.cpp ├── data │ ├── revo_stub.cpp │ ├── rng_engine.h │ ├── types.h │ ├── batch.h │ ├── iterator_facade.h │ └── revo_stub.h ├── common │ ├── version.h │ ├── regex.h │ ├── version.cpp │ ├── hash.h │ ├── project_version.h.in │ ├── io_item.h │ ├── binary.h │ ├── config_validator.h │ ├── cli_helper.cpp │ ├── io.h │ └── utils.h ├── examples │ ├── mnist │ │ ├── .gitignore │ │ └── download.sh │ ├── README.md │ └── CMakeLists.txt ├── translator │ ├── history.cpp │ ├── helpers.h │ ├── nth_element.h │ └── output_printer.cpp ├── models │ ├── transformer_stub.cpp │ ├── model_task.h │ ├── transformer_factory.h │ └── model_base.h ├── tensors │ ├── gpu │ │ ├── element.h │ │ ├── add.h │ │ ├── algorithm.h │ │ ├── prod.h │ │ └── backend.h │ ├── cpu │ │ ├── backend.h │ │ └── sharp │ │ │ └── int_gemm.h │ ├── rand.h │ ├── backend.cpp │ ├── memory_piece.h │ └── backend.h ├── functional │ ├── defs.h │ ├── functional.h │ ├── array.h │ └── tensor.h ├── command │ ├── marian_scorer.cpp │ ├── marian_decoder.cpp │ ├── marian_vocab.cpp │ └── marian_conv.cpp ├── optimizers │ ├── clippers.cpp │ └── clippers.h ├── marian.h ├── training │ ├── gradient_dropping │ │ └── gpu │ │ │ └── sparse_algorithm.h │ ├── exponential_smoothing.h │ └── graph_group_async_drop.h ├── layers │ ├── weight.cpp │ ├── weight.h │ └── convolution.cpp ├── rnn │ └── attention_constructors.h └── graph │ └── node_operators.cpp ├── examples ├── transformer │ ├── .gitignore │ └── scripts │ │ ├── validate.sh │ │ └── download-files.sh ├── translating-amun │ └── .gitignore ├── tools │ ├── .gitignore │ └── Makefile ├── wmt2017-transformer │ ├── .gitignore │ └── scripts │ │ ├── validate.en.sh │ │ ├── validate.sh │ │ ├── download-files-mono.sh │ │ ├── rescore.py │ │ ├── download-files.sh │ │ └── preprocess-data-mono.sh ├── wmt2017-uedin │ ├── .gitignore │ └── scripts │ │ ├── validate.en.sh │ │ ├── validate.sh │ │ ├── download-files-mono.sh │ │ ├── rescore.py │ │ ├── download-files.sh │ │ └── preprocess-data-mono.sh ├── training-basics-sentencepiece │ ├── clean.sh │ ├── .gitignore │ └── data │ │ └── norm_romanian.tsv ├── training-basics │ ├── .gitignore │ ├── clean.sh │ └── scripts │ │ ├── validate.sh │ │ ├── normalise-romanian.py │ │ ├── remove-diacritics.py │ │ └── download-files.sh ├── .gitignore └── LICENSE.md ├── NBCL4NMT.pdf ├── contrib ├── other-builds │ ├── cmake_doze.txt │ └── eclipse │ │ └── .project ├── autoformat.sh └── vim │ └── .vimrc ├── CL_tools ├── display_enc_mod.sh ├── pipline.sh ├── plt_cdf.py ├── pre_sent_score.py └── stat_mod.py ├── runner ├── validate-en-de.sh └── decode_validate.sh ├── scripts ├── contrib │ ├── fix_hard.py │ └── inject_ctt.py └── server │ └── client_example.py ├── cmake ├── FindTcmalloc.cmake └── FindNCCL.cmake └── LICENSE.md /VERSION: -------------------------------------------------------------------------------- 1 | v2.0.0 2 | -------------------------------------------------------------------------------- /vs/.gitignore: -------------------------------------------------------------------------------- 1 | build-vs 2 | deps 3 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/debian/compat: -------------------------------------------------------------------------------- 1 | 9 2 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/VERSION: -------------------------------------------------------------------------------- 1 | 0.1.6 2 | -------------------------------------------------------------------------------- /examples/transformer/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | model 3 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/python/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/tensorflow/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/debian/copyright: -------------------------------------------------------------------------------- 1 | ../../LICENSE.txt -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/tensorflow/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (native) 2 | -------------------------------------------------------------------------------- /examples/translating-amun/.gitignore: -------------------------------------------------------------------------------- 1 | en-de 2 | data 3 | *.yml 4 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/python/.gitignore: -------------------------------------------------------------------------------- 1 | /*.so 2 | /build 3 | -------------------------------------------------------------------------------- /examples/tools/.gitignore: -------------------------------------------------------------------------------- 1 | moses-scripts 2 | subword-nmt 3 | sacreBLEU 4 | -------------------------------------------------------------------------------- /examples/wmt2017-transformer/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | model 3 | model.back 4 | -------------------------------------------------------------------------------- /examples/wmt2017-uedin/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | model 3 | model.back 4 | 5 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/python/test/botchan.txt: -------------------------------------------------------------------------------- 1 | ../../data/botchan.txt -------------------------------------------------------------------------------- /NBCL4NMT.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/NBCL4NMT.pdf -------------------------------------------------------------------------------- /src/tests/run_tests.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_MAIN 2 | #include "catch.hpp" 3 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/main.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_MAIN 2 | #include "catch.hpp" -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/python/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /examples/training-basics-sentencepiece/clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | git clean -f -d -f 4 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/tensorflow/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | sdist/ 3 | dist/ 4 | tmp/ 5 | *py[cod] 6 | -------------------------------------------------------------------------------- /src/3rd_party/zlib/zlib.3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/zlib/zlib.3.pdf -------------------------------------------------------------------------------- /src/data/revo_stub.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by revo on 8/2/19. 3 | // 4 | 5 | #include "data/gap_training.h" 6 | 7 | -------------------------------------------------------------------------------- /contrib/other-builds/cmake_doze.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/contrib/other-builds/cmake_doze.txt -------------------------------------------------------------------------------- /examples/training-basics/.gitignore: -------------------------------------------------------------------------------- 1 | data/corpus.* 2 | data/news*.*.ro 3 | data/news*.*.en 4 | data/*.output 5 | model 6 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/python/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include test *.py *.model botchan.txt 2 | include *.i *.md 3 | 4 | -------------------------------------------------------------------------------- /examples/training-basics-sentencepiece/.gitignore: -------------------------------------------------------------------------------- 1 | data/corpus.* 2 | data/news*.ro 3 | data/news*.en 4 | data/*.output 5 | model 6 | -------------------------------------------------------------------------------- /examples/training-basics/clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | git clean -f 4 | rm -rf moses-scripts subword-nmt model data/*.output 5 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/logs/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /src/common/version.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace marian { 6 | std::string buildVersion(); 7 | } 8 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/.gitignore: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved. 2 | /build 3 | *.gcov 4 | /coverage/ 5 | -------------------------------------------------------------------------------- /src/examples/mnist/.gitignore: -------------------------------------------------------------------------------- 1 | t10k-images-idx3-ubyte 2 | t10k-labels-idx1-ubyte 3 | train-images-idx3-ubyte 4 | train-labels-idx1-ubyte 5 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/debian/.gitignore: -------------------------------------------------------------------------------- 1 | /*.debhelper.log 2 | /*.debhelper 3 | /*.substvars 4 | /tmp/ 5 | /files 6 | /libnccl1/ 7 | /libnccl-dev/ 8 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/jni/Application.mk: -------------------------------------------------------------------------------- 1 | # Exceptions are used in spdlog. Link to an exception-ready C++ runtime. 2 | APP_STL = gnustl_static 3 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/python/test/test_model.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/python/test/test_model.model -------------------------------------------------------------------------------- /src/3rd_party/nccl/makefiles/version.mk: -------------------------------------------------------------------------------- 1 | ##### version 2 | NCCL_MAJOR := 2 3 | NCCL_MINOR := 3 4 | NCCL_PATCH := 7 5 | NCCL_SUFFIX := 6 | PKG_REVISION := 1 7 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/python/test/test_ja_model.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/python/test/test_ja_model.model -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/debian/libnccl-dev.install.in: -------------------------------------------------------------------------------- 1 | include/nccl.h /usr/include 2 | lib/libnccl.so /usr/lib/${pkg:MultiArch} 3 | lib/libnccl_static.a /usr/lib/${pkg:MultiArch} 4 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/astyle.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | find . -name "*\.h" -o -name "*\.cpp"|xargs dos2unix 3 | find . -name "*\.h" -o -name "*\.cpp"|xargs astyle -n -c -A1 4 | 5 | 6 | -------------------------------------------------------------------------------- /src/common/regex.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef USE_BOOST_REGEX 3 | #include 4 | namespace regex = boost; 5 | #else 6 | #include 7 | namespace regex = std; 8 | #endif 9 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/debian/libnccl2.install.in: -------------------------------------------------------------------------------- 1 | lib/libnccl.so.${nccl:Major} /usr/lib/${pkg:MultiArch} 2 | lib/libnccl.so.${nccl:Major}.${nccl:Minor}.${nccl:Patch} /usr/lib/${pkg:MultiArch} 3 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/debian/gbp.conf: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | debian-branch = master 3 | upstream-branch = master 4 | 5 | ignore-new = True 6 | 7 | [git-buildpackage] 8 | 9 | no-purge = True 10 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/cmake/spdlog.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | includedir=${prefix}/include 3 | 4 | Name: @PROJECT_NAME@ 5 | Description: Super fast C++ logging library. 6 | Version: @PROJECT_VERSION@ 7 | -------------------------------------------------------------------------------- /src/translator/history.cpp: -------------------------------------------------------------------------------- 1 | #include "history.h" 2 | 3 | namespace marian { 4 | 5 | History::History(size_t lineNo, float alpha, float wp) 6 | : lineNo_(lineNo), alpha_(alpha), wp_(wp) {} 7 | } // namespace marian 8 | -------------------------------------------------------------------------------- /examples/training-basics-sentencepiece/data/norm_romanian.tsv: -------------------------------------------------------------------------------- 1 | 015E 53 2 | 015F 73 3 | 0162 54 4 | 0163 74 5 | 0218 53 6 | 0219 73 7 | 021A 54 8 | 021B 74 9 | 0102 41 10 | 0103 61 11 | 00C2 41 12 | 00E2 61 13 | 00CE 49 14 | 00EE 69 -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.7.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.7.0 -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.8.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.8.0 -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.9.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.9.0 -------------------------------------------------------------------------------- /CL_tools/display_enc_mod.sh: -------------------------------------------------------------------------------- 1 | ls model_revo.iter* | xargs -I {} python ~/GOD.util/performance/competence/dis_enc_mod_print.py {} encoder_Wemb > CL_MOD.log 2 | python ~/GOD.util/performance/competence/plt_avg_mod.py BASE_AVG_MOD < CL_MOD.log 3 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.10.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.10.0 -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.11.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.11.0 -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.12.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NLP2CT/norm-nmt/HEAD/src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/_sentencepiece_processor_ops.so.1.12.0 -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(..) 2 | include_directories(.) 3 | 4 | FILE(GLOB YamlCppSources *.cpp contrib/*.cpp) 5 | if (NOT TARGET libyaml-cpp) 6 | add_library(libyaml-cpp OBJECT ${YamlCppSources}) 7 | endif() 8 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/config.h.in: -------------------------------------------------------------------------------- 1 | #ifndef CONFIG_H_ 2 | #define CONFIG_H_ 3 | 4 | #define VERSION "@PROJECT_VERSION@" 5 | #define PACKAGE "@PROJECT_NAME@" 6 | #define PACKAGE_STRING "@PROJECT_NAME@" 7 | 8 | 9 | #endif // CONFIG_H_ 10 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/tensorflow/tf_sentencepiece/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from tf_sentencepiece.sentencepiece_processor_ops import * 6 | -------------------------------------------------------------------------------- /runner/validate-en-de.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SRCL=en 4 | TGTL=de 5 | TERM=News 6 | VALID=data/newstest2013.tc.$TGTL 7 | 8 | cat $1 | sed 's/@@ //g' \ 9 | | ~/NBCL-marian/runner/multi-bleu.perl -lc $VALID \ 10 | | sed -r 's/BLEU = ([0-9.]+),.*/\1/' 11 | -------------------------------------------------------------------------------- /src/3rd_party/pathie-cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(..) 2 | include_directories(.) 3 | include_directories(include) 4 | 5 | FILE(GLOB PathieCppSources src/*.cpp) 6 | if (NOT TARGET pathie-cpp) 7 | add_library(pathie-cpp OBJECT ${PathieCppSources}) 8 | endif() 9 | -------------------------------------------------------------------------------- /src/models/transformer_stub.cpp: -------------------------------------------------------------------------------- 1 | // TODO: This is a wrapper around transformer.h. We kept the .H name to minimize confusing git, until this is code-reviewed. 2 | // This is meant to speed-up builds, and to support Ctrl-F7 to rebuild. 3 | 4 | #include "models/transformer.h" 5 | -------------------------------------------------------------------------------- /CL_tools/pipline.sh: -------------------------------------------------------------------------------- 1 | python ~/GOD.util/performance/competence/process_fasttext.py -i corpus.bpe.en -o en.emb -v ../BASE_2_REVO_en-ro/vocab.en.yml -w ~/fast/fasttext 2 | python ~/GOD.util/performance/competence/build_cdf_mod.py --emb_vector en.emb.orig.vec corpus.bpe.en en-mod 3 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/null.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/null.h" 2 | 3 | namespace YAML { 4 | _Null Null; 5 | 6 | bool IsNullString(const std::string& str) { 7 | return str.empty() || str == "~" || str == "null" || str == "Null" || 8 | str == "NULL"; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/debian/changelog.in: -------------------------------------------------------------------------------- 1 | nccl (${nccl:Major}.${nccl:Minor}.${nccl:Patch}${nccl:Suffix}-${pkg:Revision}+cuda${cuda:Major}.${cuda:Minor}) trusty; urgency=medium 2 | 3 | * Automatic Debian package from build 4 | 5 | -- cudatools ${pkg:Timestamp} 6 | -------------------------------------------------------------------------------- /src/tensors/gpu/element.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tensors/tensor.h" 4 | 5 | namespace marian { 6 | namespace gpu { 7 | 8 | template 9 | void Element(Functor functor, Tensor out, Tensors... tensors); 10 | } 11 | } // namespace marian 12 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | %: 4 | dh $@ --parallel 5 | 6 | override_dh_auto_install: 7 | PREFIX=debian/tmp dh_auto_install 8 | 9 | override_dh_auto_test: 10 | # Do not make test 11 | 12 | override_dh_auto_clean: 13 | # Do not make clean 14 | -------------------------------------------------------------------------------- /examples/tools/Makefile: -------------------------------------------------------------------------------- 1 | all: moses-scripts subword-nmt sacreBLEU 2 | 3 | moses-scripts: 4 | git clone https://github.com/marian-nmt/moses-scripts 5 | subword-nmt: 6 | git clone https://github.com/rsennrich/subword-nmt 7 | sacreBLEU: 8 | git clone https://github.com/marian-nmt/sacreBLEU -b master 9 | -------------------------------------------------------------------------------- /src/tensors/gpu/add.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tensors/tensor.h" 4 | 5 | namespace marian { 6 | 7 | namespace gpu { 8 | 9 | template 10 | void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors); 11 | } 12 | } // namespace marian 13 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/yaml-node.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/node/node.h" 2 | #include "nodebuilder.h" 3 | #include "nodeevents.h" 4 | 5 | namespace YAML { 6 | Node Clone(const Node& node) { 7 | NodeEvents events(node); 8 | NodeBuilder builder; 9 | events.Emit(builder); 10 | return builder.Root(); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/models/model_task.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace marian { 6 | 7 | struct ModelTask { 8 | virtual void run() = 0; 9 | }; 10 | 11 | struct ModelServiceTask { 12 | virtual void init() = 0; 13 | virtual std::string run(const std::string&) = 0; 14 | }; 15 | } // namespace marian 16 | -------------------------------------------------------------------------------- /src/examples/README.md: -------------------------------------------------------------------------------- 1 | Marian examples 2 | --------------- 3 | 4 | Examples are enabled with CMake option `-DCOMPILE_EXAMPLES=ON`. 5 | 6 | ## MNIST 7 | 8 | You will need MNIST data for training and testing. Download them with the 9 | script `src/examples/mnist/download.sh` or provide paths to the files with 10 | `--train-sets` and `--valid-sets` options. 11 | -------------------------------------------------------------------------------- /src/common/version.cpp: -------------------------------------------------------------------------------- 1 | #include "common/version.h" 2 | #include "common/git_revision.h" // make-generated file, contains git commit info 3 | #include "common/project_version.h" // cmake-generated file, major/minor/tweak versions 4 | 5 | namespace marian { 6 | 7 | std::string buildVersion() { 8 | return std::string(PROJECT_VERSION) + " " + GIT_REVISION; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/latency/compare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "running spdlog and g3log tests 10 time with ${1:-10} threads each (total 1,000,000 entries).." 3 | rm -f *.log 4 | for i in {1..10} 5 | 6 | do 7 | echo 8 | sleep 0.5 9 | ./spdlog-latency ${1:-10} 2>/dev/null || exit 10 | sleep 0.5 11 | ./g3log-latency ${1:-10} 2>/dev/null || exit 12 | 13 | done 14 | -------------------------------------------------------------------------------- /examples/transformer/scripts/validate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cat $1 \ 4 | | sed 's/\@\@ //g' \ 5 | | ../tools/moses-scripts/scripts/recaser/detruecase.perl 2>/dev/null \ 6 | | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l de 2>/dev/null \ 7 | | ../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/valid.de \ 8 | | sed -r 's/BLEU = ([0-9.]+),.*/\1/' 9 | -------------------------------------------------------------------------------- /src/3rd_party/CLI/Version.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Distributed under the 3-Clause BSD License. See accompanying 4 | // file LICENSE or https://github.com/CLIUtils/CLI11 for details. 5 | 6 | // [CLI11:verbatim] 7 | 8 | #define CLI11_VERSION_MAJOR 1 9 | #define CLI11_VERSION_MINOR 6 10 | #define CLI11_VERSION_PATCH 1 11 | #define CLI11_VERSION "1.6.1" 12 | 13 | // [CLI11:verbatim] 14 | -------------------------------------------------------------------------------- /examples/wmt2017-uedin/scripts/validate.en.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cat $1 \ 4 | | sed 's/\@\@ //g' \ 5 | | ../tools/moses-scripts/scripts/recaser/detruecase.perl 2>/dev/null \ 6 | | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l en 2>/dev/null \ 7 | | ../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/valid.en \ 8 | | sed -r 's/BLEU = ([0-9.]+),.*/\1/' 9 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/INSTALL: -------------------------------------------------------------------------------- 1 | spdlog is header only library. 2 | Just copy the files to your build tree and use a C++11 compiler 3 | 4 | Tested on: 5 | gcc 4.8.1 and above 6 | clang 3.5 7 | Visual Studio 2013 8 | 9 | gcc 4.8 flags: --std==c++11 -pthread -O3 -flto -Wl,--no-as-needed 10 | gcc 4.9 flags: --std=c++11 -pthread -O3 -flto 11 | 12 | 13 | see the makefile in the example folder 14 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | std::size_t count_lines(const std::string& filename); 7 | 8 | void prepare_logdir(); 9 | 10 | std::string file_contents(const std::string& filename); 11 | 12 | std::size_t count_lines(const std::string& filename); 13 | 14 | std::size_t get_filesize(const std::string& filename); 15 | 16 | -------------------------------------------------------------------------------- /examples/training-basics/scripts/validate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cat $1 \ 4 | | sed 's/\@\@ //g' \ 5 | | ../tools/moses-scripts/scripts/recaser/detruecase.perl 2> /dev/null \ 6 | | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l en 2>/dev/null \ 7 | | ../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/newsdev2016.en \ 8 | | sed -r 's/BLEU = ([0-9.]+),.*/\1/' 9 | -------------------------------------------------------------------------------- /examples/wmt2017-transformer/scripts/validate.en.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cat $1 \ 4 | | sed 's/\@\@ //g' \ 5 | | ../tools/moses-scripts/scripts/recaser/detruecase.perl 2>/dev/null \ 6 | | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l en 2>/dev/null \ 7 | | ../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/valid.en \ 8 | | sed -r 's/BLEU = ([0-9.]+),.*/\1/' 9 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/includes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "catch.hpp" 11 | #include "utils.h" 12 | 13 | #include "../include/spdlog/spdlog.h" 14 | #include "../include/spdlog/sinks/null_sink.h" 15 | #include "../include/spdlog/sinks/ostream_sink.h" 16 | 17 | -------------------------------------------------------------------------------- /src/functional/defs.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef __CUDA_ARCH__ 4 | 5 | #include 6 | #define __H__ __host__ 7 | #define __D__ __device__ 8 | #define __HI__ __host__ inline 9 | #define __HD__ __host__ __device__ 10 | #define __HDI__ __host__ __device__ inline 11 | 12 | #else 13 | 14 | #define __H__ 15 | #define __D__ 16 | #define __HI__ inline 17 | #define __HD__ 18 | #define __HDI__ inline 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /examples/wmt2017-uedin/scripts/validate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export LC_ALL=C.UTF-8 4 | 5 | cat $1 \ 6 | | sed 's/\@\@ //g' \ 7 | | ../tools/moses-scripts/scripts/recaser/detruecase.perl 2>/dev/null \ 8 | | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l de 2>/dev/null \ 9 | | ../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/valid.de \ 10 | | sed -r 's/BLEU = ([0-9.]+),.*/\1/' 11 | -------------------------------------------------------------------------------- /src/3rd_party/zlib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # list of sources files of the library 2 | file(GLOB ZLIB_SRC *.c) 3 | file(GLOB ZLIB_INC *.h) 4 | 5 | # add sources of the wrapper as a "SQLiteCpp" static library 6 | add_library(zlib OBJECT ${ZLIB_SRC} ${ZLIB_INC}) 7 | 8 | if(MSVC) 9 | target_compile_options(zlib PUBLIC /wd"4996" /wd"4267") 10 | else() 11 | target_compile_options(zlib PUBLIC -Wno-implicit-function-declaration) 12 | endif() 13 | -------------------------------------------------------------------------------- /examples/wmt2017-transformer/scripts/validate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export LC_ALL=C.UTF-8 4 | 5 | cat $1 \ 6 | | sed 's/\@\@ //g' \ 7 | | ../tools/moses-scripts/scripts/recaser/detruecase.perl 2>/dev/null \ 8 | | ../tools/moses-scripts/scripts/tokenizer/detokenizer.perl -l de 2>/dev/null \ 9 | | ../tools/moses-scripts/scripts/generic/multi-bleu-detok.perl data/valid.de \ 10 | | sed -r 's/BLEU = ([0-9.]+),.*/\1/' 11 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/sentencepiece.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | libdir=@libdir@ 4 | includedir=@includedir@ 5 | 6 | Name: @PROJECT_NAME@ 7 | Description: Unsupervised text tokenizer and detokenizer for Neural Network-based text generation. 8 | Version: @PROJECT_VERSION@ 9 | Libs: -L${libdir} -lsentencepiece -lsentencepiece_train -lprotobuf @pkgconfiglibs@ 10 | Cflags: -I${includedir} @pkgconfigcflags@ 11 | -------------------------------------------------------------------------------- /examples/wmt2017-uedin/scripts/download-files-mono.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | mkdir -p data 4 | cd data 5 | 6 | # get En-De training data for WMT17 7 | wget -nc http://data.statmt.org/wmt17/translation-task/news.2016.de.shuffled.gz 8 | 9 | zcat news.2016.de.shuffled.gz | shuf -n 11000000 | perl -ne 'print if(split(/\s/, $_) < 100)' | head -n 10000000 > news.2016.de 10 | 11 | # clean 12 | rm -r news.2016.de.shuffled.gz 13 | 14 | cd .. 15 | -------------------------------------------------------------------------------- /src/models/transformer_factory.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "marian.h" 4 | 5 | #include "models/decoder.h" 6 | #include "models/encoder.h" 7 | //#include "models/states.h" 8 | //#include "layers/constructors.h" 9 | //#include "layers/factory.h" 10 | 11 | namespace marian { 12 | Ptr NewEncoderTransformer(Ptr options); 13 | Ptr NewDecoderTransformer(Ptr options); 14 | } // namespace marian 15 | -------------------------------------------------------------------------------- /examples/wmt2017-transformer/scripts/download-files-mono.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | mkdir -p data 4 | cd data 5 | 6 | # get En-De training data for WMT17 7 | wget -nc http://data.statmt.org/wmt17/translation-task/news.2016.de.shuffled.gz 8 | 9 | zcat news.2016.de.shuffled.gz | shuf -n 11000000 | perl -ne 'print if(split(/\s/, $_) < 100)' | head -n 10000000 > news.2016.de 10 | 11 | # clean 12 | rm -r news.2016.de.shuffled.gz 13 | 14 | cd .. 15 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/easyl.conf: -------------------------------------------------------------------------------- 1 | * GLOBAL: 2 | FORMAT = "[%datetime]: %msg" 3 | FILENAME = ./logs/easylogging.log 4 | ENABLED = true 5 | TO_FILE = true 6 | TO_STANDARD_OUTPUT = false 7 | MILLISECONDS_WIDTH = 3 8 | PERFORMANCE_TRACKING = false 9 | MAX_LOG_FILE_SIZE = 10485760 10 | Log_Flush_Threshold = 10485760 11 | -------------------------------------------------------------------------------- /contrib/autoformat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | if ! [ -x "$( command -v clang-format )" ] 4 | then 5 | mkdir -p $HOME/.local 6 | wget -O- http://releases.llvm.org/6.0.0/clang+llvm-6.0.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz | tar --xz -xf - -C $HOME/.local --strip 1 7 | fi 8 | 9 | find ./src \( -path ./src/3rd_party -o -path ./src/tests -o -path ./src/models/experimental \) -prune -o -iname *.h -o -iname *.cpp -o -iname *.cu | xargs clang-format -i 10 | -------------------------------------------------------------------------------- /runner/decode_validate.sh: -------------------------------------------------------------------------------- 1 | testset=data/newstest2014.bpe.en 2 | testset_ref=data/newstest2014.tc.de 3 | cat $testset | build/marian-decoder -m $1/model_revo.npz.best-translation.npz -v \ 4 | $1/vocab.en.yml $1/vocab.de.yml -b 6 -n 0.6 --mini-batch 100 -d 6 7 -o output.txt 5 | 6 | 7 | 8 | cat output.txt | sed 's/@@ //g' \ 9 | | ~/GOD.util/moses-scripts/scripts/generic/multi-bleu.perl -lc $testset_ref 10 | # | sed -r 's/BLEU = ([0-9.]+),.*/\1/' 11 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/fmt/ostr.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | // include external or bundled copy of fmtlib's ostream support 9 | // 10 | #if !defined(SPDLOG_FMT_EXTERNAL) 11 | #include "spdlog/fmt/fmt.h" 12 | #include "spdlog/fmt/bundled/ostream.h" 13 | #else 14 | #include 15 | #endif 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(iris_example iris/iris.cpp) 2 | add_executable(mnist_example mnist/mnist_ffnn.cpp) 3 | 4 | foreach(exec iris_example mnist_example) 5 | target_link_libraries(${exec} marian ${EXT_LIBS}) 6 | if(CUDA_FOUND) 7 | target_link_libraries(${exec} marian marian_cuda ${EXT_LIBS}) 8 | endif(CUDA_FOUND) 9 | set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") 10 | endforeach(exec) 11 | -------------------------------------------------------------------------------- /scripts/contrib/fix_hard.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | 4 | d = dict() 5 | m = np.load(sys.argv[1]) 6 | for k in m: 7 | if "ff_" == k[0:3]: 8 | d["decoder_" + k] = m[k] 9 | elif k == "special:model.yml": 10 | info = m[k].tobytes() 11 | info = info.replace("layers-dec", "dec-depth") 12 | info = info.replace("layers-enc", "enc-depth") 13 | d[k] = info 14 | print info 15 | else: 16 | d[k] = m[k] 17 | np.savez(sys.argv[1] + ".fixed", **d) -------------------------------------------------------------------------------- /src/command/marian_scorer.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | 3 | #include "models/model_task.h" 4 | #include "rescorer/rescorer.h" 5 | #include "common/timer.h" 6 | 7 | int main(int argc, char** argv) { 8 | using namespace marian; 9 | 10 | auto options = parseOptions(argc, argv, cli::mode::scoring); 11 | 12 | timer::Timer timer; 13 | New>(options)->run(); 14 | LOG(info, "Total time: {:.5f}s wall", timer.elapsed()); 15 | 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | namespace spdlog 12 | { 13 | namespace sinks 14 | { 15 | class sink 16 | { 17 | public: 18 | virtual ~sink() {} 19 | virtual void log(const details::log_msg& msg) = 0; 20 | virtual void flush() = 0; 21 | }; 22 | } 23 | } 24 | 25 | -------------------------------------------------------------------------------- /src/examples/mnist/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ `ls -1 *-ubyte 2>/dev/null | wc -l ` == 4 ]; then 4 | echo Files exist: `ls -1 *-ubyte`; 5 | exit; 6 | fi 7 | 8 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz 9 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz 10 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz 11 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz 12 | 13 | gzip -d *-ubyte.gz 14 | -------------------------------------------------------------------------------- /src/tests/README.md: -------------------------------------------------------------------------------- 1 | Marian tests 2 | ============ 3 | 4 | Unit tests and application tests are enabled with CMake option 5 | `-DCOMPILE_TESTS=ON`, e.g.: 6 | 7 | cd build 8 | cmake .. -DCOMPILE_TESTS=ON 9 | make -j8 10 | 11 | Running all unit tests: 12 | 13 | make test 14 | 15 | Running a single unit test is also possible: 16 | 17 | ./src/tests/run_graph_tests 18 | 19 | We use [Catch framework](https://github.com/philsquared/Catch) for unit 20 | testing. 21 | -------------------------------------------------------------------------------- /src/tensors/cpu/backend.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "common/config.h" 7 | #include "tensors/backend.h" 8 | 9 | namespace marian { 10 | namespace cpu { 11 | 12 | class Backend : public marian::Backend { 13 | public: 14 | Backend(DeviceId deviceId, size_t seed) : marian::Backend(deviceId, seed) {} 15 | void setDevice() override {} 16 | void synchronize() override {} 17 | }; 18 | } // namespace cpu 19 | } // namespace marian 20 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/ext-net/dummy/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # See LICENSE.txt for license information 5 | # 6 | NCCL_HOME:=../../build/ 7 | CUDA_HOME:=/usr/local/cuda 8 | INC:= -I$(NCCL_HOME)/include -I$(CUDA_HOME)/include 9 | PLUGIN_SO:=libnccl-net.so 10 | 11 | default: $(PLUGIN_SO) 12 | 13 | $(PLUGIN_SO): plugin.c 14 | $(CC) $(INC) -fPIC -shared -o $@ -Wl,-soname,$(PLUGIN_SO) $^ 15 | 16 | clean: 17 | rm -f $(PLUGIN_SO) 18 | -------------------------------------------------------------------------------- /src/3rd_party/zlib/inffast.h: -------------------------------------------------------------------------------- 1 | /* inffast.h -- header to use inffast.c 2 | * Copyright (C) 1995-2003, 2010 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | /* WARNING: this file should *not* be used by applications. It is 7 | part of the implementation of the compression library and is 8 | subject to change. Applications should only use zlib.h. 9 | */ 10 | 11 | void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); 12 | -------------------------------------------------------------------------------- /src/optimizers/clippers.cpp: -------------------------------------------------------------------------------- 1 | #include "clippers.h" 2 | 3 | #include "functional/functional.h" 4 | #include "tensors/tensor_operators.h" 5 | 6 | namespace marian { 7 | void Elementwise::clip(Tensor t) { 8 | using namespace functional; 9 | Element(_1 = functional::clip(_1, c_), t); 10 | } 11 | 12 | void Norm::clip(Tensor t) { 13 | using namespace functional; 14 | float l2Norm = L2Norm(t); 15 | if(l2Norm >= c_) 16 | Element(_1 = (c_ / l2Norm) * _1, t); 17 | } 18 | } // namespace marian 19 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/jni/Android.mk: -------------------------------------------------------------------------------- 1 | # Setup a project 2 | LOCAL_PATH := $(call my-dir) 3 | include $(CLEAR_VARS) 4 | 5 | LOCAL_MODULE := example 6 | LOCAL_SRC_FILES := example.cpp 7 | LOCAL_CPPFLAGS += -Wall -Wshadow -Wextra -pedantic -std=c++11 -fPIE -pie 8 | LOCAL_LDFLAGS += -fPIE -pie 9 | 10 | # Add exception support and set path for spdlog's headers 11 | LOCAL_CPPFLAGS += -fexceptions -I../include 12 | # Use android's log library 13 | LOCAL_LDFLAGS += -llog 14 | 15 | include $(BUILD_EXECUTABLE) 16 | -------------------------------------------------------------------------------- /src/marian.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // clang-format off 4 | #include "common/version.h" 5 | #include "common/config.h" 6 | #include "common/definitions.h" 7 | #include "common/logging.h" 8 | #include "common/options.h" 9 | #include "common/io.h" 10 | 11 | #include "data/batch_generator.h" 12 | #include "data/corpus.h" 13 | 14 | #include "graph/expression_graph.h" 15 | #include "graph/expression_operators.h" 16 | #include "graph/node_initializers.h" 17 | 18 | #include "optimizers/optimizers.h" 19 | // clang-format on 20 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/src/include/ring.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * See LICENSE.txt for license information 5 | ************************************************************************/ 6 | 7 | #ifndef NCCL_RING_H_ 8 | #define NCCL_RING_H_ 9 | #include "core.h" 10 | 11 | ncclResult_t initRing(struct ncclComm* comm, int ringid); 12 | ncclResult_t freeRing(struct ncclRing* ring); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # python compiled files 20 | *.pyc 21 | 22 | # Compiled Static libraries 23 | *.lai 24 | *.la 25 | *.a 26 | *.lib 27 | 28 | # Executables 29 | *.exe 30 | *.out 31 | *.app 32 | 33 | # Temporary files created by editors 34 | .*.sw* 35 | *~ 36 | 37 | # Contrib 38 | /.vimrc 39 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/contrib/graphbuilder.cpp: -------------------------------------------------------------------------------- 1 | #include "graphbuilderadapter.h" 2 | 3 | #include "yaml-cpp/parser.h" // IWYU pragma: keep 4 | 5 | namespace YAML { 6 | class GraphBuilderInterface; 7 | 8 | void* BuildGraphOfNextDocument(Parser& parser, 9 | GraphBuilderInterface& graphBuilder) { 10 | GraphBuilderAdapter eventHandler(graphBuilder); 11 | if (parser.HandleNextDocument(eventHandler)) { 12 | return eventHandler.RootNode(); 13 | } else { 14 | return NULL; 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/common/hash.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace marian { 6 | namespace util { 7 | 8 | template using hash = std::hash; 9 | 10 | // This combinator is based on boost::hash_combine, but uses 11 | // std::hash as the hash implementation. Used as a drop-in 12 | // replacement for boost::hash_combine. 13 | 14 | template 15 | inline void hash_combine(std::size_t& seed, T const& v) { 16 | hash hasher; 17 | seed ^= hasher(v) + 0x9e3779b9 + (seed<<6) + (seed>>2); 18 | } 19 | 20 | } 21 | } -------------------------------------------------------------------------------- /src/3rd_party/SQLiteCpp/sqlite3/README.md: -------------------------------------------------------------------------------- 1 | sqlite3 2 | ------- 3 | 4 | "sqlite3.c" and "sqlite3.h" files from sqlite-amalgamation-3120200.zip (SQLite 3.12.2 2016-04-18) 5 | 6 | Those files are provided for easy setup and compatibility under Windows/Linux/MacOS. 7 | They are used by default by the CMake build. 8 | 9 | Use -DSQLITECPP_INTERNAL_SQLITE=OFF to link against the Linux "libsqlite3-dev" package instead. 10 | 11 | ### License: 12 | 13 | All of the code and documentation in SQLite has been dedicated to the public domain by the authors. 14 | 15 | -------------------------------------------------------------------------------- /vs/BuildRelease.bat: -------------------------------------------------------------------------------- 1 | :: 2 | :: Usage: BuildRelease.bat [=.\build] 3 | :: 4 | :: This script runs the dependency checks, generate the projects/makefiles and then 5 | :: build the project in Release configuration. 6 | :: 7 | :: 8 | @echo off 9 | setlocal 10 | 11 | set ROOT=%~dp0 12 | set MARIAN_ROOT=%ROOT%.. 13 | 14 | set BUILD_ROOT=%1 15 | if "%BUILD_ROOT%"=="" set BUILD_ROOT=%ROOT%build 16 | 17 | call CreateVSProjects.bat %BUILD_ROOT% 18 | if errorlevel 1 exit /b 1 19 | 20 | cmake --build %BUILD_ROOT% --config Release 21 | 22 | exit /b 0 -------------------------------------------------------------------------------- /src/common/project_version.h.in: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* 4 | * File project-version.h is generated using CMake. Do NOT modify it manually! Edit 5 | * project-version.h.in file instead. 6 | */ 7 | 8 | // e.g. v1.2.3-beta+1.abc123d 9 | #define PROJECT_VERSION_FULL "@PROJECT_VERSION_STRING_FULL@" 10 | // e.g. v1.2.3-beta 11 | #define PROJECT_VERSION "@PROJECT_VERSION_STRING@" 12 | #define PROJECT_VERSION_MAJOR @PROJECT_VERSION_MAJOR@ 13 | #define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR@ 14 | #define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH@ 15 | -------------------------------------------------------------------------------- /src/tensors/rand.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | 5 | #include 6 | 7 | namespace marian { 8 | 9 | class TensorBase; 10 | typedef Ptr Tensor; 11 | 12 | class RandomGenerator { 13 | protected: 14 | size_t seed_; 15 | 16 | public: 17 | RandomGenerator(size_t seed) : seed_(seed) { } 18 | 19 | virtual void uniform(Tensor, float a, float b) = 0; 20 | virtual void normal(Tensor, float mean, float stddev) = 0; 21 | }; 22 | 23 | Ptr createRandomGenerator(size_t /*seed*/, DeviceId); 24 | 25 | } -------------------------------------------------------------------------------- /src/command/marian_decoder.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | #include "translator/beam_search.h" 3 | #include "translator/translator.h" 4 | #include "common/timer.h" 5 | #ifdef _WIN32 6 | #include 7 | #endif 8 | 9 | int main(int argc, char** argv) { 10 | using namespace marian; 11 | 12 | auto options = parseOptions(argc, argv, cli::mode::translation); 13 | auto task = New>(options); 14 | 15 | timer::Timer timer; 16 | task->run(); 17 | LOG(info, "Total time: {:.5f}s wall", timer.elapsed()); 18 | 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/glog-bench.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include "glog/logging.h" 7 | 8 | 9 | int main(int, char* argv[]) 10 | { 11 | int howmany = 1000000; 12 | 13 | 14 | FLAGS_logtostderr = 0; 15 | FLAGS_log_dir = "logs"; 16 | google::InitGoogleLogging(argv[0]); 17 | for(int i = 0 ; i < howmany; ++i) 18 | LOG(INFO) << "glog message # " << i << ": This is some text for your pleasure"; 19 | 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/src/collectives/device/all_gather.cu: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * See LICENSE.txt for license information 5 | ************************************************************************/ 6 | 7 | #include "common.h" 8 | #include "all_gather.h" 9 | #include "collectives.h" 10 | 11 | #define UNROLL 4 12 | 13 | #if NCCL_OP == 0 14 | IMPL_COLL3(ncclAllGather, copy, FuncSum, i8, int8_t, ncclCollAllGather, ncclSum, ncclInt8); 15 | #endif 16 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/src/collectives/device/broadcast.cu: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * See LICENSE.txt for license information 5 | ************************************************************************/ 6 | 7 | #include "common.h" 8 | #include "broadcast.h" 9 | #include "collectives.h" 10 | 11 | #define UNROLL 4 12 | 13 | #if NCCL_OP == 0 14 | IMPL_COLL3(ncclBroadcast, copy, FuncSum, i8, int8_t, ncclCollBroadcast, ncclSum, ncclInt8); 15 | #endif 16 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/directives.cpp: -------------------------------------------------------------------------------- 1 | #include "directives.h" 2 | 3 | namespace YAML { 4 | Directives::Directives() { 5 | // version 6 | version.isDefault = true; 7 | version.major = 1; 8 | version.minor = 2; 9 | } 10 | 11 | const std::string Directives::TranslateTagHandle( 12 | const std::string& handle) const { 13 | std::map::const_iterator it = tags.find(handle); 14 | if (it == tags.end()) { 15 | if (handle == "!!") 16 | return "tag:yaml.org,2002:"; 17 | return handle; 18 | } 19 | 20 | return it->second; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/anchor.h: -------------------------------------------------------------------------------- 1 | #ifndef ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | namespace YAML { 13 | typedef std::size_t anchor_t; 14 | const anchor_t NullAnchor = 0; 15 | } 16 | 17 | #endif // ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 18 | -------------------------------------------------------------------------------- /examples/training-basics/scripts/normalise-romanian.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Author: Barry Haddow 4 | # Distributed under MIT license 5 | 6 | # 7 | # Normalise Romanian s-comma and t-comma 8 | 9 | import io 10 | import sys 11 | istream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') 12 | ostream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') 13 | 14 | for line in istream: 15 | line = line.replace("\u015e", "\u0218").replace("\u015f", "\u0219") 16 | line = line.replace("\u0162", "\u021a").replace("\u0163", "\u021b") 17 | ostream.write(line) 18 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/emitterstyle.h: -------------------------------------------------------------------------------- 1 | #ifndef EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | namespace YAML { 11 | struct EmitterStyle { 12 | enum value { Default, Block, Flow }; 13 | }; 14 | } 15 | 16 | #endif // EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 17 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/type.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | namespace YAML { 11 | struct NodeType { 12 | enum value { Undefined, Null, Scalar, Sequence, Map }; 13 | }; 14 | } 15 | 16 | #endif // VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 17 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Tests 3 | # 4 | 5 | enable_testing() 6 | 7 | find_package(Threads) 8 | 9 | # Build Catch unit tests 10 | add_library(catch INTERFACE) 11 | target_include_directories(catch INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) 12 | 13 | file(GLOB catch_tests LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp *.h *.hpp) 14 | 15 | add_executable(catch_tests ${catch_tests}) 16 | target_link_libraries(catch_tests spdlog ${CMAKE_THREAD_LIBS_INIT}) 17 | add_test(NAME catch_tests COMMAND catch_tests) 18 | file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/logs") 19 | 20 | -------------------------------------------------------------------------------- /src/functional/functional.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "functional/operands.h" 4 | #include "functional/predicates.h" 5 | 6 | namespace marian { 7 | namespace functional { 8 | 9 | template 10 | using ref = Assignee; 11 | 12 | static ref<1> _1; 13 | static ref<2> _2; 14 | static ref<3> _3; 15 | static ref<4> _4; 16 | static ref<5> _5; 17 | static ref<6> _6; 18 | static ref<7> _7; 19 | static ref<8> _8; 20 | static ref<9> _9; 21 | 22 | const C<0> _0c; 23 | const C<1> _1c; 24 | const C<2> _2c; 25 | const C<-1> _1cneg; 26 | const C<-2> _2cneg; 27 | } // namespace functional 28 | } // namespace marian -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/install_libcxx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Install libc++ under travis 4 | 5 | svn --quiet co http://llvm.org/svn/llvm-project/libcxx/trunk libcxx 6 | mkdir libcxx/build 7 | (cd libcxx/build && cmake .. -DLIBCXX_CXX_ABI=libstdc++ -DLIBCXX_CXX_ABI_INCLUDE_PATHS="/usr/include/c++/4.6;/usr/include/c++/4.6/x86_64-linux-gnu") 8 | make -C libcxx/build cxx -j2 9 | sudo cp libcxx/build/lib/libc++.so.1.0 /usr/lib/ 10 | sudo cp -r libcxx/build/include/c++/v1 /usr/include/c++/v1/ 11 | sudo ln -sf /usr/lib/libc++.so.1.0 /usr/lib/libc++.so 12 | sudo ln -sf /usr/lib/libc++.so.1.0 /usr/lib/libc++.so.1 13 | -------------------------------------------------------------------------------- /src/tensors/backend.cpp: -------------------------------------------------------------------------------- 1 | #include "tensors/backend.h" 2 | 3 | #ifdef CUDA_FOUND 4 | #include "tensors/gpu/backend.h" 5 | #pragma warning(disable:4505) // "unreferenced local function has been removed" in cuda\v9.2\include\cuda_fp16.hpp 6 | #endif 7 | 8 | #include "tensors/cpu/backend.h" 9 | 10 | namespace marian { 11 | 12 | Ptr BackendByDeviceId(DeviceId deviceId, size_t seed) { 13 | #ifdef CUDA_FOUND 14 | if(deviceId.type == DeviceType::gpu) 15 | return New(deviceId, seed); 16 | else 17 | #endif 18 | return New(deviceId, seed); 19 | } 20 | } // namespace marian 21 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/emitterdef.h: -------------------------------------------------------------------------------- 1 | #ifndef EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | namespace YAML { 11 | struct EmitterNodeType { 12 | enum value { NoType, Property, Scalar, FlowSeq, BlockSeq, FlowMap, BlockMap }; 13 | }; 14 | } 15 | 16 | #endif // EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66 17 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/easylogging-bench.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | 7 | #include "easylogging++.h" 8 | 9 | _INITIALIZE_EASYLOGGINGPP 10 | 11 | int main(int, char* []) 12 | { 13 | int howmany = 1000000; 14 | 15 | // Load configuration from file 16 | el::Configurations conf("easyl.conf"); 17 | el::Loggers::reconfigureLogger("default", conf); 18 | 19 | for(int i = 0 ; i < howmany; ++i) 20 | LOG(INFO) << "easylog message #" << i << ": This is some text for your pleasure"; 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /CL_tools/plt_cdf.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import sys 4 | 5 | print "python x.py cdf_base.npz T" 6 | f = np.load(sys.argv[1]) 7 | plt.title(sys.argv[1]) 8 | base = f['base'] 9 | cdf = f['cdf'] 10 | plt.plot(base[:-1], cdf) 11 | # plt.show() 12 | 13 | # make competence graph 14 | g = plt.figure(2) 15 | c0 = 0.001 16 | T = int(sys.argv[2]) 17 | def c(t): 18 | tmp = t * ((1 - c0 * c0) / T) + c0 * c0 19 | c_square = tmp ** 0.5 20 | return min(1.0, c_square) 21 | 22 | t_data = [t for t in range(T)] 23 | c_data = [c(t) for t in range(T)] 24 | 25 | plt.title("Competence") 26 | plt.plot(t_data, c_data) 27 | plt.show() 28 | -------------------------------------------------------------------------------- /contrib/vim/.vimrc: -------------------------------------------------------------------------------- 1 | autocmd BufRead,BufNewFile *.cu set filetype=cpp 2 | augroup cpp 3 | au! 4 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set tabstop=2 5 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set shiftwidth=2 6 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set expandtab 7 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set softtabstop=2 "Insert 2 spaces when tab is pressed 8 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set smarttab "Indent instead of tab at start of line 9 | au BufRead,BufNewFile *.c,*.cpp,*.cu,*.h,*.hpp set shiftround "Round spaces to nearest shiftwidth multiple 10 | augroup end 11 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/fmt/fmt.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | // 9 | // Include a bundled header-only copy of fmtlib or an external one. 10 | // By default spdlog include its own copy. 11 | // 12 | 13 | #if !defined(SPDLOG_FMT_EXTERNAL) 14 | 15 | #ifndef FMT_HEADER_ONLY 16 | #define FMT_HEADER_ONLY 17 | #endif 18 | #ifndef FMT_USE_WINDOWS_H 19 | #define FMT_USE_WINDOWS_H 0 20 | #endif 21 | #include "spdlog/fmt/bundled/format.h" 22 | 23 | #else //external fmtlib 24 | 25 | #include 26 | 27 | #endif 28 | 29 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # See LICENSE.txt for license information 5 | # 6 | .PHONY : all clean 7 | 8 | default : build 9 | build : debian.build txz.build 10 | 11 | BUILDDIR ?= $(abspath ../build) 12 | ABSBUILDDIR := $(abspath $(BUILDDIR)) 13 | TARGETS := debian txz 14 | all: ${TARGETS:%=%.build} 15 | prep: ${TARGETS:%=%.prep} 16 | build: ${TARGETS:%=%.build} 17 | clean: ${TARGETS:%=%.clean} 18 | 19 | %.prep: 20 | ${MAKE} -C $* prep BUILDDIR=${ABSBUILDDIR} 21 | 22 | %.build: 23 | ${MAKE} -C $* build BUILDDIR=${ABSBUILDDIR} 24 | 25 | %.clean: 26 | ${MAKE} -C $* clean 27 | -------------------------------------------------------------------------------- /examples/wmt2017-uedin/scripts/rescore.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | lastNum = 0 4 | bestScore = -9999 5 | 6 | for line in sys.stdin: 7 | line = line.rstrip("\n") 8 | fields = line.split(" ||| ") 9 | score = sum(float(score) for score in fields[2].split(" ") if score[-1] != "=") 10 | length = float(len(fields[1].split(" ")) + 1) 11 | 12 | score = score / length 13 | 14 | num = int(fields[0]) 15 | if num > lastNum: 16 | print bestLine 17 | bestScore = -99999 18 | bestLine = fields[1] 19 | lastNum = num 20 | 21 | if score > bestScore: 22 | bestScore = score 23 | bestLine = fields[1] 24 | 25 | print bestLine 26 | -------------------------------------------------------------------------------- /examples/wmt2017-transformer/scripts/rescore.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | lastNum = 0 4 | bestScore = -9999 5 | 6 | for line in sys.stdin: 7 | line = line.rstrip("\n") 8 | fields = line.split(" ||| ") 9 | score = sum(float(score) for score in fields[2].split(" ") if score[-1] != "=") 10 | length = float(len(fields[1].split(" ")) + 1) 11 | 12 | score = score / length 13 | 14 | num = int(fields[0]) 15 | if num > lastNum: 16 | print bestLine 17 | bestScore = -99999 18 | bestLine = fields[1] 19 | lastNum = num 20 | 21 | if score > bestScore: 22 | bestScore = score 23 | bestLine = fields[1] 24 | 25 | print bestLine 26 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/src/include/rings.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * See LICENSE.txt for license information 5 | ************************************************************************/ 6 | 7 | #ifndef NCCL_RINGS_H_ 8 | #define NCCL_RINGS_H_ 9 | 10 | static int getDefaultThreads() { 11 | // On Kepler, rings are doubled later. 12 | return ncclCudaCompCap() == 3 ? 128 : 256; 13 | } 14 | 15 | ncclResult_t ncclGetRings(int* nrings, int* nthreads, int rank, int nranks, int* transports, ncclTvalue_t* values, int* prev, int* next); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/tensors/gpu/algorithm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tensors/backend.h" 4 | 5 | namespace marian { 6 | namespace gpu { 7 | template 8 | void copy(Ptr backend, const T* begin, const T* end, T* dest); 9 | 10 | template 11 | void fill(Ptr backend, T* begin, T* end, T value); 12 | 13 | template 14 | void swap_ranges(Ptr backend, T* begin, T* end, T* dest); 15 | 16 | void setSparse(Ptr backend, 17 | const std::vector&, 18 | const std::vector&, 19 | float*); 20 | } // namespace gpu 21 | } // namespace marian 22 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/appveyor.yml: -------------------------------------------------------------------------------- 1 | version: '{branch} build {build}' 2 | image: Visual Studio 2015 3 | platform: 4 | - x64 5 | - Win32 6 | configuration: Release 7 | clone_depth: 50 8 | clone_folder: c:\projects\sentencepiece 9 | init: 10 | build_script: 11 | - cmd: call test.bat %platform% 12 | artifacts: 13 | - path: build\sentencepiece*.7z 14 | - path: python\dist\*.whl 15 | deploy: 16 | description: 'SentencePiece Windows release' 17 | provider: GitHub 18 | auth_token: 19 | secure: Aq4jHo/HY6WFFKs1h9cCWfi3U4ZsVTooUEhtgBfcJM6SUhnZdPVazIcKCtiR32kc 20 | draft: false 21 | prerelease: false 22 | on: 23 | branch: master 24 | appveyor_repo_tag: true 25 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/memory.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/node/detail/memory.h" 2 | #include "yaml-cpp/node/detail/node.h" // IWYU pragma: keep 3 | #include "yaml-cpp/node/ptr.h" 4 | 5 | namespace YAML { 6 | namespace detail { 7 | 8 | void memory_holder::merge(memory_holder& rhs) { 9 | if (m_pMemory == rhs.m_pMemory) 10 | return; 11 | 12 | m_pMemory->merge(*rhs.m_pMemory); 13 | rhs.m_pMemory = m_pMemory; 14 | } 15 | 16 | node& memory::create_node() { 17 | shared_node pNode(new node); 18 | m_nodes.insert(pNode); 19 | return *pNode; 20 | } 21 | 22 | void memory::merge(const memory& rhs) { 23 | m_nodes.insert(rhs.m_nodes.begin(), rhs.m_nodes.end()); 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/emit.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/node/emit.h" 2 | #include "yaml-cpp/emitfromevents.h" 3 | #include "yaml-cpp/emitter.h" 4 | #include "nodeevents.h" 5 | 6 | namespace YAML { 7 | Emitter& operator<<(Emitter& out, const Node& node) { 8 | EmitFromEvents emitFromEvents(out); 9 | NodeEvents events(node); 10 | events.Emit(emitFromEvents); 11 | return out; 12 | } 13 | 14 | std::ostream& operator<<(std::ostream& out, const Node& node) { 15 | Emitter emitter(out); 16 | emitter << node; 17 | return out; 18 | } 19 | 20 | std::string Dump(const Node& node) { 21 | Emitter emitter; 22 | emitter << node; 23 | return emitter.c_str(); 24 | } 25 | } // namespace YAML 26 | -------------------------------------------------------------------------------- /src/translator/helpers.h: -------------------------------------------------------------------------------- 1 | /* All or part of this file was contributed by Intel under license: 2 | * Copyright (C) 2017-2018 Intel Corporation 3 | * SPDX-License-Identifier: MIT 4 | */ 5 | 6 | #pragma once 7 | 8 | #include "graph/expression_graph.h" 9 | 10 | namespace marian { 11 | 12 | namespace cpu { 13 | 14 | void suppressWord(Expr logProbs, Word id); 15 | } 16 | 17 | namespace gpu { 18 | 19 | void suppressWord(Expr logProbs, Word id); 20 | 21 | void SetColumnId(Tensor in_, size_t col, float value); 22 | } 23 | 24 | void suppressWord(Expr logProbs, Word id); 25 | 26 | void suppressWordSent(Expr logProbs, Word id, std::vector sent_ids); 27 | 28 | 29 | } // namespace marian 30 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/spdlog-bench.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include "spdlog/spdlog.h" 7 | 8 | 9 | int main(int, char* []) 10 | { 11 | int howmany = 1000000; 12 | namespace spd = spdlog; 13 | ///Create a file rotating logger with 5mb size max and 3 rotated files 14 | auto logger = spdlog::create("file_logger", "logs/spd-bench-st.txt", false); 15 | 16 | logger->set_pattern("[%Y-%b-%d %T.%e]: %v"); 17 | for(int i = 0 ; i < howmany; ++i) 18 | logger->info("spdlog message #{} : This is some text for your pleasure", i); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /src/common/io_item.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/shape.h" 4 | #include "common/types.h" 5 | 6 | #include 7 | 8 | namespace marian { 9 | namespace io { 10 | 11 | struct Item { 12 | std::vector bytes; 13 | const char* ptr{0}; 14 | bool mapped{false}; 15 | 16 | std::string name; 17 | Shape shape; 18 | Type type{Type::float32}; 19 | 20 | const char* data() const { 21 | if(mapped) 22 | return ptr; 23 | else 24 | return bytes.data(); 25 | } 26 | 27 | size_t size() const { 28 | if(mapped) 29 | return shape.elements() * sizeOf(type); 30 | else 31 | return bytes.size(); 32 | } 33 | }; 34 | 35 | } // namespace io 36 | } // namespace marian 37 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/null_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace spdlog 14 | { 15 | namespace sinks 16 | { 17 | 18 | template 19 | class null_sink : public base_sink < Mutex > 20 | { 21 | protected: 22 | void _sink_it(const details::log_msg&) override 23 | {} 24 | 25 | void flush() override 26 | {} 27 | 28 | }; 29 | typedef null_sink null_sink_st; 30 | typedef null_sink null_sink_mt; 31 | 32 | } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/scantag.h: -------------------------------------------------------------------------------- 1 | #ifndef SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include "stream.h" 12 | 13 | namespace YAML { 14 | const std::string ScanVerbatimTag(Stream& INPUT); 15 | const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle); 16 | const std::string ScanTagSuffix(Stream& INPUT); 17 | } 18 | 19 | #endif // SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 20 | -------------------------------------------------------------------------------- /src/data/rng_engine.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "common/config.h" 7 | 8 | namespace marian { 9 | namespace data { 10 | 11 | /** 12 | * @brief Class providing an engine for pseudo-random number generation. 13 | */ 14 | class RNGEngine { 15 | protected: 16 | std::mt19937 eng_; 17 | 18 | public: 19 | RNGEngine() : eng_((unsigned int)Config::seed) {} 20 | 21 | std::string getRNGState() { 22 | std::ostringstream oss; 23 | oss << eng_; 24 | return oss.str(); 25 | } 26 | 27 | void setRNGState(std::string engineState) { 28 | std::istringstream iss(engineState); 29 | iss >> eng_; 30 | } 31 | }; 32 | } // namespace data 33 | } // namespace marian 34 | -------------------------------------------------------------------------------- /src/translator/nth_element.h: -------------------------------------------------------------------------------- 1 | /* All or part of this file was contributed by Intel under license: 2 | * Copyright (C) 2017-2018 Intel Corporation 3 | * SPDX-License-Identifier: MIT 4 | */ 5 | 6 | #pragma once 7 | 8 | #include "tensors/tensor.h" 9 | #include 10 | 11 | namespace marian { 12 | 13 | typedef std::function& beamSizes, 14 | Tensor logProbs, 15 | std::vector& outCosts, 16 | std::vector& outKeys, 17 | const bool isFirst)> GetNBestListFn; 18 | 19 | GetNBestListFn createGetNBestListFn(size_t beamSize, size_t dimBatch, DeviceId deviceId); 20 | } // namespace marian 21 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/null_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/sinks/base_sink.h" 9 | #include "spdlog/details/null_mutex.h" 10 | 11 | #include 12 | 13 | namespace spdlog 14 | { 15 | namespace sinks 16 | { 17 | 18 | template 19 | class null_sink : public base_sink < Mutex > 20 | { 21 | protected: 22 | void _sink_it(const details::log_msg&) override 23 | {} 24 | 25 | void _flush() override 26 | {} 27 | 28 | }; 29 | typedef null_sink null_sink_st; 30 | typedef null_sink null_sink_mt; 31 | 32 | } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/src/include/bootstrap.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * See LICENSE.txt for license information 5 | ************************************************************************/ 6 | 7 | #ifndef NCCL_BOOTSTRAP_H_ 8 | #define NCCL_BOOTSTRAP_H_ 9 | 10 | #include "nccl.h" 11 | 12 | ncclResult_t bootstrapCreateRoot(ncclUniqueId* commId, bool idFromEnv); 13 | ncclResult_t bootstrapGetUniqueId(ncclUniqueId* out); 14 | ncclResult_t bootstrapInit(ncclUniqueId* id, int rank, int nranks, void** commState); 15 | ncclResult_t bootstrapAllGather(void* commState, void* allData, int size); 16 | ncclResult_t bootstrapClose(void* commState); 17 | #endif 18 | -------------------------------------------------------------------------------- /src/3rd_party/CLI/CLI.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Distributed under the 3-Clause BSD License. See accompanying 4 | // file LICENSE or https://github.com/CLIUtils/CLI11 for details. 5 | 6 | // CLI Library includes 7 | // Order is important for combiner script 8 | 9 | #include "CLI/Version.hpp" 10 | 11 | #include "CLI/Macros.hpp" 12 | 13 | #include "CLI/Optional.hpp" 14 | 15 | #include "CLI/StringTools.hpp" 16 | 17 | #include "CLI/Error.hpp" 18 | 19 | #include "CLI/TypeTools.hpp" 20 | 21 | #include "CLI/Split.hpp" 22 | 23 | #include "CLI/ConfigFwd.hpp" 24 | 25 | #include "CLI/Validators.hpp" 26 | 27 | #include "CLI/FormatterFwd.hpp" 28 | 29 | #include "CLI/Option.hpp" 30 | 31 | #include "CLI/App.hpp" 32 | 33 | #include "CLI/Config.hpp" 34 | 35 | #include "CLI/Formatter.hpp" 36 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/txz/create_txz.sh.in: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # See LICENSE.txt for license information 6 | # 7 | 8 | # To run from $BUILDDIR/ 9 | 10 | BUILDDIR=`basename $PWD` 11 | 12 | cd .. 13 | NCCL_MAJOR=${nccl:Major} 14 | NCCL_MINOR=${nccl:Minor} 15 | NCCL_PATCH=${nccl:Patch} 16 | NCCL_SUFFIX=${nccl:Suffix} 17 | CUDA_MAJOR=${cuda:Major} 18 | CUDA_MINOR=${cuda:Minor} 19 | PKG_REVISION=${pkg:Revision} 20 | PKG_ARCH=${pkg:Arch} 21 | 22 | NCCLNAME="nccl_${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_PATCH}${NCCL_SUFFIX}-${PKG_REVISION}+cuda${CUDA_MAJOR}.${CUDA_MINOR}_${PKG_ARCH}" 23 | 24 | tar --transform "s/^$BUILDDIR/$NCCLNAME/" -Jcf $NCCLNAME.txz --owner=0 --group=0 $BUILDDIR/include $BUILDDIR/lib $BUILDDIR/*.txt 25 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace utils 13 | { 14 | 15 | template 16 | inline std::string format(const T& value) 17 | { 18 | static std::locale loc(""); 19 | std::stringstream ss; 20 | ss.imbue(loc); 21 | ss << value; 22 | return ss.str(); 23 | } 24 | 25 | template<> 26 | inline std::string format(const double & value) 27 | { 28 | static std::locale loc(""); 29 | std::stringstream ss; 30 | ss.imbue(loc); 31 | ss << std::fixed << std::setprecision(1) << value; 32 | return ss.str(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace utils 13 | { 14 | 15 | template 16 | inline std::string format(const T& value) 17 | { 18 | static std::locale loc(""); 19 | std::stringstream ss; 20 | ss.imbue(loc); 21 | ss << value; 22 | return ss.str(); 23 | } 24 | 25 | template<> 26 | inline std::string format(const double & value) 27 | { 28 | static std::locale loc(""); 29 | std::stringstream ss; 30 | ss.imbue(loc); 31 | ss << std::fixed << std::setprecision(1) << value; 32 | return ss.str(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/latency/utils.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace utils 13 | { 14 | 15 | template 16 | inline std::string format(const T& value) 17 | { 18 | static std::locale loc(""); 19 | std::stringstream ss; 20 | ss.imbue(loc); 21 | ss << value; 22 | return ss.str(); 23 | } 24 | 25 | template<> 26 | inline std::string format(const double & value) 27 | { 28 | static std::locale loc(""); 29 | std::stringstream ss; 30 | ss.imbue(loc); 31 | ss << std::fixed << std::setprecision(1) << value; 32 | return ss.str(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/exceptions.cpp: -------------------------------------------------------------------------------- 1 | #include "yaml-cpp/exceptions.h" 2 | 3 | namespace YAML { 4 | 5 | // These destructors are defined out-of-line so the vtable is only emitted once. 6 | Exception::~Exception() noexcept {} 7 | ParserException::~ParserException() noexcept {} 8 | RepresentationException::~RepresentationException() noexcept {} 9 | InvalidScalar::~InvalidScalar() noexcept {} 10 | KeyNotFound::~KeyNotFound() noexcept {} 11 | InvalidNode::~InvalidNode() noexcept {} 12 | BadConversion::~BadConversion() noexcept {} 13 | BadDereference::~BadDereference() noexcept {} 14 | BadSubscript::~BadSubscript() noexcept {} 15 | BadPushback::~BadPushback() noexcept {} 16 | BadInsert::~BadInsert() noexcept {} 17 | EmitterException::~EmitterException() noexcept {} 18 | BadFile::~BadFile() noexcept {} 19 | } 20 | -------------------------------------------------------------------------------- /src/3rd_party/zlib/gzclose.c: -------------------------------------------------------------------------------- 1 | /* gzclose.c -- zlib gzclose() function 2 | * Copyright (C) 2004, 2010 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | #include "gzguts.h" 7 | 8 | /* gzclose() is in a separate file so that it is linked in only if it is used. 9 | That way the other gzclose functions can be used instead to avoid linking in 10 | unneeded compression or decompression routines. */ 11 | int ZEXPORT gzclose(file) 12 | gzFile file; 13 | { 14 | #ifndef NO_GZCOMPRESS 15 | gz_statep state; 16 | 17 | if (file == NULL) 18 | return Z_STREAM_ERROR; 19 | state = (gz_statep)file; 20 | 21 | return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file); 22 | #else 23 | return gzclose_r(file); 24 | #endif 25 | } 26 | -------------------------------------------------------------------------------- /examples/transformer/scripts/download-files.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | mkdir -p data 4 | cd data 5 | 6 | # get En-De training data for WMT17 7 | wget -nc http://www.statmt.org/europarl/v7/de-en.tgz 8 | wget -nc http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz 9 | wget -nc http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz 10 | 11 | # extract data 12 | tar -xf de-en.tgz 13 | tar -xf training-parallel-commoncrawl.tgz 14 | tar -xf training-parallel-nc-v12.tgz 15 | 16 | # create corpus files 17 | cat europarl-v7.de-en.de commoncrawl.de-en.de training/news-commentary-v12.de-en.de > corpus.de 18 | cat europarl-v7.de-en.en commoncrawl.de-en.en training/news-commentary-v12.de-en.en > corpus.en 19 | 20 | # clean 21 | rm -r europarl-* commoncrawl.* training/ *.tgz 22 | 23 | cd .. 24 | -------------------------------------------------------------------------------- /examples/wmt2017-uedin/scripts/download-files.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | mkdir -p data 4 | cd data 5 | 6 | # get En-De training data for WMT17 7 | wget -nc http://www.statmt.org/europarl/v7/de-en.tgz 8 | wget -nc http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz 9 | wget -nc http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz 10 | 11 | # extract data 12 | tar -xf de-en.tgz 13 | tar -xf training-parallel-commoncrawl.tgz 14 | tar -xf training-parallel-nc-v12.tgz 15 | 16 | # create corpus files 17 | cat europarl-v7.de-en.de commoncrawl.de-en.de training/news-commentary-v12.de-en.de > corpus.de 18 | cat europarl-v7.de-en.en commoncrawl.de-en.en training/news-commentary-v12.de-en.en > corpus.en 19 | 20 | # clean 21 | rm -r europarl-* commoncrawl.* training/ *.tgz 22 | 23 | cd .. 24 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/src/include/utils.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * See LICENSE.txt for license information 5 | ************************************************************************/ 6 | 7 | #ifndef NCCL_UTILS_H_ 8 | #define NCCL_UTILS_H_ 9 | 10 | #include "nccl.h" 11 | #include 12 | 13 | ncclResult_t getHostName(char* hostname, int maxlen); 14 | uint64_t getHostHash(); 15 | uint64_t getPidHash(); 16 | 17 | struct netIf { 18 | char prefix[64]; 19 | int port; 20 | }; 21 | 22 | int parseStringList(const char* string, struct netIf* ifList, int maxList); 23 | bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /examples/training-basics/scripts/remove-diacritics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Author: Barry Haddow 4 | # Distributed under MIT license 5 | 6 | # 7 | # Remove Romanian diacritics. Assumes s-comma and t-comma are normalised 8 | 9 | import io 10 | import sys 11 | istream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') 12 | ostream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') 13 | 14 | for line in istream: 15 | line = line.replace("\u0218", "S").replace("\u0219", "s") #s-comma 16 | line = line.replace("\u021a", "T").replace("\u021b", "t") #t-comma 17 | line = line.replace("\u0102", "A").replace("\u0103", "a") 18 | line = line.replace("\u00C2", "A").replace("\u00E2", "a") 19 | line = line.replace("\u00CE", "I").replace("\u00EE", "i") 20 | ostream.write(line) 21 | -------------------------------------------------------------------------------- /src/common/binary.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/io_item.h" 4 | 5 | #include 6 | #include 7 | 8 | // Increase this if binary format changes 9 | #define BINARY_FILE_VERSION 1 10 | 11 | namespace marian { 12 | namespace io { 13 | namespace binary { 14 | 15 | void loadItems(const void* current, 16 | std::vector& items, 17 | bool mapped = false); 18 | void loadItems(const std::string& fileName, std::vector& items); 19 | 20 | io::Item getItem(const void* current, const std::string& vName); 21 | io::Item getItem(const std::string& fileName, const std::string& vName); 22 | 23 | void saveItems(const std::string& fileName, const std::vector& items); 24 | 25 | } // namespace binary 26 | } // namespace io 27 | } // namespace marian 28 | -------------------------------------------------------------------------------- /examples/wmt2017-transformer/scripts/download-files.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | mkdir -p data 4 | cd data 5 | 6 | # get En-De training data for WMT17 7 | wget -nc http://www.statmt.org/europarl/v7/de-en.tgz 8 | wget -nc http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz 9 | wget -nc http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz 10 | 11 | # extract data 12 | tar -xf de-en.tgz 13 | tar -xf training-parallel-commoncrawl.tgz 14 | tar -xf training-parallel-nc-v12.tgz 15 | 16 | # create corpus files 17 | cat europarl-v7.de-en.de commoncrawl.de-en.de training/news-commentary-v12.de-en.de > corpus.de 18 | cat europarl-v7.de-en.en commoncrawl.de-en.en training/news-commentary-v12.de-en.en > corpus.en 19 | 20 | # clean 21 | rm -r europarl-* commoncrawl.* training/ *.tgz 22 | 23 | cd .. 24 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # See LICENSE.txt for license information 5 | # 6 | .PHONY : all clean 7 | 8 | default : src.build 9 | install : src.install 10 | BUILDDIR ?= $(abspath ./build) 11 | ABSBUILDDIR := $(abspath $(BUILDDIR)) 12 | TARGETS := src pkg 13 | clean: ${TARGETS:%=%.clean} 14 | test.build: src.build 15 | LICENSE_FILES := LICENSE.txt 16 | LICENSE_TARGETS := $(LICENSE_FILES:%=$(BUILDDIR)/%) 17 | lic: $(LICENSE_TARGETS) 18 | 19 | ${BUILDDIR}/%.txt: %.txt 20 | @printf "Copying %-35s > %s\n" $< $@ 21 | mkdir -p ${BUILDDIR} 22 | cp $< $@ 23 | 24 | src.%: 25 | ${MAKE} -C src $* BUILDDIR=${ABSBUILDDIR} 26 | 27 | pkg.%: 28 | ${MAKE} -C pkg $* BUILDDIR=${ABSBUILDDIR} 29 | 30 | pkg.debian.prep: lic 31 | pkg.txz.prep: lic 32 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/src/collectives/device/reduce.cu: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * See LICENSE.txt for license information 5 | ************************************************************************/ 6 | 7 | #include "common.h" 8 | #include "reduce.h" 9 | #include "collectives.h" 10 | 11 | #define UNROLL 4 12 | 13 | #if NCCL_OP == 0 14 | IMPL_COLL2(ncclReduce, sum, FuncSum, ncclCollReduce, ncclSum); 15 | #elif NCCL_OP == 1 16 | IMPL_COLL2(ncclReduce, prod, FuncProd, ncclCollReduce, ncclProd); 17 | #elif NCCL_OP == 2 18 | IMPL_COLL2(ncclReduce, min, FuncMin, ncclCollReduce, ncclMin); 19 | #elif NCCL_OP == 3 20 | IMPL_COLL2(ncclReduce, max, FuncMax, ncclCollReduce, ncclMax); 21 | #endif 22 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/src/freelist_test.cc: -------------------------------------------------------------------------------- 1 | #include "freelist.h" 2 | #include "testharness.h" 3 | 4 | namespace sentencepiece { 5 | namespace model { 6 | 7 | TEST(FreeListTest, BasicTest) { 8 | FreeList l(5); 9 | EXPECT_EQ(0, l.size()); 10 | 11 | constexpr size_t kSize = 32; 12 | 13 | for (size_t i = 0; i < kSize; ++i) { 14 | int *n = l.Allocate(); 15 | EXPECT_EQ(0, *n); 16 | *n = i; 17 | } 18 | 19 | EXPECT_EQ(kSize, l.size()); 20 | for (size_t i = 0; i < kSize; ++i) { 21 | EXPECT_EQ(i, *l[i]); 22 | } 23 | 24 | l.Free(); 25 | EXPECT_EQ(0, l.size()); 26 | 27 | // Zero-initialized after `Free`. 28 | for (size_t i = 0; i < kSize; ++i) { 29 | int *n = l.Allocate(); 30 | EXPECT_EQ(0, *n); 31 | } 32 | } 33 | } // namespace model 34 | } // namespace sentencepiece 35 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/noncopyable.h: -------------------------------------------------------------------------------- 1 | #ifndef NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | 12 | namespace YAML { 13 | // this is basically boost::noncopyable 14 | class YAML_CPP_API noncopyable { 15 | protected: 16 | noncopyable() {} 17 | ~noncopyable() {} 18 | 19 | private: 20 | noncopyable(const noncopyable&); 21 | const noncopyable& operator=(const noncopyable&); 22 | }; 23 | } 24 | 25 | #endif // NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 26 | -------------------------------------------------------------------------------- /src/tests/dropout_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "marian.h" 6 | 7 | using namespace marian; 8 | 9 | int main(int argc, char** argv) { 10 | auto c = New(argc, argv); 11 | 12 | auto type = c->get("cpu-threads") > 0 13 | ? DeviceType::cpu 14 | : DeviceType::gpu; 15 | DeviceId deviceId{0, type}; 16 | 17 | auto g = New(); 18 | g->setDevice(deviceId); 19 | g->reserveWorkspaceMB(512); 20 | 21 | for(int i = 0; i < 10; ++i) { 22 | g->clear(); 23 | auto mask1 = g->dropout(0.2, {10, 3072}); 24 | auto mask2 = g->dropout(0.3, {1, 3072}); 25 | auto mask = mask1 + mask2; 26 | debug(mask1, "mask1"); 27 | debug(mask2, "mask2"); 28 | debug(mask, "mask"); 29 | g->forward(); 30 | } 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /CL_tools/pre_sent_score.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | 4 | cdf = 'de-rarity-cdf_base.npz' 5 | rarity_t = open('de-rarity.txt', 'r').readlines() 6 | rarity = {} 7 | for l in rarity_t: 8 | word, pos = l.split() 9 | rarity[word] = float(pos) 10 | 11 | base = np.load(cdf)['base'][:-1] 12 | cdf = np.load(cdf)['cdf'] 13 | 14 | def get_cdf_by_sent(sent): 15 | words = sent.split() 16 | score = 0. 17 | for word in words: 18 | if word in rarity: 19 | score += np.log(rarity[word]) 20 | else: 21 | print(word) 22 | score = -score 23 | # print("s:", score) 24 | for idx, b in enumerate(base): 25 | if score <= b: 26 | return cdf[idx] 27 | return 1. 28 | 29 | 30 | for ll in sys.stdin: 31 | ll = ll.strip() 32 | # print(ll) 33 | print(get_cdf_by_sent(ll)) 34 | -------------------------------------------------------------------------------- /examples/wmt2017-uedin/scripts/preprocess-data-mono.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | # suffix of target language files 4 | SRC=en 5 | TRG=de 6 | 7 | # path to moses decoder: https://github.com/moses-smt/mosesdecoder 8 | mosesdecoder=../tools/moses-scripts 9 | 10 | # path to subword segmentation scripts: https://github.com/rsennrich/subword-nmt 11 | subword_nmt=../tools/subword-nmt 12 | 13 | # tokenize 14 | 15 | prefix=news.2016 16 | 17 | cat data/$prefix.$TRG \ 18 | | $mosesdecoder/scripts/tokenizer/normalize-punctuation.perl -l $TRG \ 19 | | $mosesdecoder/scripts/tokenizer/tokenizer.perl -a -l $TRG > data/$prefix.tok.$TRG 20 | 21 | $mosesdecoder/scripts/recaser/truecase.perl -model model/tc.$TRG < data/$prefix.tok.$TRG > data/$prefix.tc.$TRG 22 | 23 | $subword_nmt/apply_bpe.py -c model/$SRC$TRG.bpe < data/$prefix.tc.$TRG > data/$prefix.bpe.$TRG 24 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/src/collectives/device/all_reduce.cu: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * See LICENSE.txt for license information 5 | ************************************************************************/ 6 | 7 | #include "common.h" 8 | #include "all_reduce.h" 9 | #include "collectives.h" 10 | 11 | #define UNROLL 4 12 | 13 | #if NCCL_OP == 0 14 | IMPL_COLL2(ncclAllReduce, sum, FuncSum, ncclCollAllReduce, ncclSum); 15 | #elif NCCL_OP == 1 16 | IMPL_COLL2(ncclAllReduce, prod, FuncProd, ncclCollAllReduce, ncclProd); 17 | #elif NCCL_OP == 2 18 | IMPL_COLL2(ncclAllReduce, min, FuncMin, ncclCollAllReduce, ncclMin); 19 | #elif NCCL_OP == 3 20 | IMPL_COLL2(ncclAllReduce, max, FuncMax, ncclCollAllReduce, ncclMax); 21 | #endif 22 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/directives.h: -------------------------------------------------------------------------------- 1 | #ifndef DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | namespace YAML { 14 | struct Version { 15 | bool isDefault; 16 | int major, minor; 17 | }; 18 | 19 | struct Directives { 20 | Directives(); 21 | 22 | const std::string TranslateTagHandle(const std::string& handle) const; 23 | 24 | Version version; 25 | std::map tags; 26 | }; 27 | } 28 | 29 | #endif // DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66 30 | -------------------------------------------------------------------------------- /examples/wmt2017-transformer/scripts/preprocess-data-mono.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | # suffix of target language files 4 | SRC=en 5 | TRG=de 6 | 7 | # path to moses decoder: https://github.com/moses-smt/mosesdecoder 8 | mosesdecoder=../tools/moses-scripts 9 | 10 | # path to subword segmentation scripts: https://github.com/rsennrich/subword-nmt 11 | subword_nmt=../tools/subword-nmt 12 | 13 | # tokenize 14 | 15 | prefix=news.2016 16 | 17 | cat data/$prefix.$TRG \ 18 | | $mosesdecoder/scripts/tokenizer/normalize-punctuation.perl -l $TRG \ 19 | | $mosesdecoder/scripts/tokenizer/tokenizer.perl -a -l $TRG > data/$prefix.tok.$TRG 20 | 21 | $mosesdecoder/scripts/recaser/truecase.perl -model model/tc.$TRG < data/$prefix.tok.$TRG > data/$prefix.tc.$TRG 22 | 23 | $subword_nmt/apply_bpe.py -c model/$SRC$TRG.bpe < data/$prefix.tc.$TRG > data/$prefix.bpe.$TRG 24 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/details/null_mutex.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | // null, no cost dummy "mutex" and dummy "atomic" int 10 | 11 | namespace spdlog 12 | { 13 | namespace details 14 | { 15 | struct null_mutex 16 | { 17 | void lock() {} 18 | void unlock() {} 19 | bool try_lock() 20 | { 21 | return true; 22 | } 23 | }; 24 | 25 | struct null_atomic_int 26 | { 27 | int value; 28 | null_atomic_int() = default; 29 | 30 | null_atomic_int(int val):value(val) 31 | {} 32 | 33 | int load(std::memory_order) const 34 | { 35 | return value; 36 | } 37 | 38 | void store(int val) 39 | { 40 | value = val; 41 | } 42 | }; 43 | 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/details/null_mutex.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | // null, no cost dummy "mutex" and dummy "atomic" int 10 | 11 | namespace spdlog 12 | { 13 | namespace details 14 | { 15 | struct null_mutex 16 | { 17 | void lock() {} 18 | void unlock() {} 19 | bool try_lock() 20 | { 21 | return true; 22 | } 23 | }; 24 | 25 | struct null_atomic_int 26 | { 27 | int value; 28 | null_atomic_int() = default; 29 | 30 | null_atomic_int(int val):value(val) 31 | {} 32 | 33 | int load(std::memory_order) const 34 | { 35 | return value; 36 | } 37 | 38 | void store(int val) 39 | { 40 | value = val; 41 | } 42 | }; 43 | 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /examples/training-basics/scripts/download-files.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -v 2 | 3 | cd data 4 | 5 | # get En-Ro training data for WMT16 6 | wget -nc http://www.statmt.org/europarl/v7/ro-en.tgz 7 | wget -nc http://opus.lingfil.uu.se/download.php?f=SETIMES2/en-ro.txt.zip -O SETIMES2.ro-en.txt.zip 8 | wget -nc http://data.statmt.org/rsennrich/wmt16_backtranslations/ro-en/corpus.bt.ro-en.en.gz 9 | wget -nc http://data.statmt.org/rsennrich/wmt16_backtranslations/ro-en/corpus.bt.ro-en.ro.gz 10 | 11 | # extract data 12 | tar -xf ro-en.tgz 13 | unzip SETIMES2.ro-en.txt.zip 14 | gzip -d corpus.bt.ro-en.en.gz corpus.bt.ro-en.ro.gz 15 | 16 | # create corpus files 17 | cat europarl-v7.ro-en.en SETIMES2.en-ro.en corpus.bt.ro-en.en > corpus.en 18 | cat europarl-v7.ro-en.ro SETIMES2.en-ro.ro corpus.bt.ro-en.ro > corpus.ro 19 | 20 | # clean 21 | rm ro-en.tgz SETIMES2.* corpus.bt.* europarl-* 22 | 23 | cd .. 24 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/tag.h: -------------------------------------------------------------------------------- 1 | #ifndef TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | namespace YAML { 13 | struct Directives; 14 | struct Token; 15 | 16 | struct Tag { 17 | enum TYPE { 18 | VERBATIM, 19 | PRIMARY_HANDLE, 20 | SECONDARY_HANDLE, 21 | NAMED_HANDLE, 22 | NON_SPECIFIC 23 | }; 24 | 25 | Tag(const Token& token); 26 | const std::string Translate(const Directives& directives); 27 | 28 | TYPE type; 29 | std::string handle, value; 30 | }; 31 | } 32 | 33 | #endif // TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 34 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/src/collectives/device/reduce_scatter.cu: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * See LICENSE.txt for license information 5 | ************************************************************************/ 6 | 7 | #include "common.h" 8 | #include "reduce_scatter.h" 9 | #include "collectives.h" 10 | 11 | #define UNROLL 4 12 | 13 | #if NCCL_OP == 0 14 | IMPL_COLL2(ncclReduceScatter, sum, FuncSum, ncclCollReduceScatter, ncclSum); 15 | #elif NCCL_OP == 1 16 | IMPL_COLL2(ncclReduceScatter, prod, FuncProd, ncclCollReduceScatter, ncclProd); 17 | #elif NCCL_OP == 2 18 | IMPL_COLL2(ncclReduceScatter, min, FuncMin, ncclCollReduceScatter, ncclMin); 19 | #elif NCCL_OP == 3 20 | IMPL_COLL2(ncclReduceScatter, max, FuncMax, ncclCollReduceScatter, ncclMax); 21 | #endif 22 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/Makefile.mingw: -------------------------------------------------------------------------------- 1 | CXX ?= g++ 2 | CXXFLAGS = -D_WIN32_WINNT=0x600 -march=native -Wall -Wextra -Wshadow -pedantic -std=c++11 -pthread -Wl,--no-as-needed -I../include 3 | CXX_RELEASE_FLAGS = -O3 4 | CXX_DEBUG_FLAGS= -g 5 | 6 | 7 | all: example bench 8 | debug: example-debug bench-debug 9 | 10 | example: example.cpp 11 | $(CXX) example.cpp -o example $(CXXFLAGS) $(CXX_RELEASE_FLAGS) 12 | 13 | bench: bench.cpp 14 | $(CXX) bench.cpp -o bench $(CXXFLAGS) $(CXX_RELEASE_FLAGS) 15 | 16 | 17 | example-debug: example.cpp 18 | $(CXX) example.cpp -o example-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS) 19 | 20 | bench-debug: bench.cpp 21 | $(CXX) bench.cpp -o bench-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS) 22 | 23 | 24 | 25 | clean: 26 | rm -f *.o logs/*.txt example example-debug bench bench-debug 27 | 28 | 29 | rebuild: clean all 30 | rebuild-debug: clean debug 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/data/types.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace marian { 12 | 13 | // Type for all vocabulary items, based on IndexType 14 | typedef IndexType Word; 15 | 16 | // Sequence of vocabulary items 17 | typedef std::vector Words; 18 | 19 | // EOS and UNK are placed in these positions in Marian-generated vocabs 20 | const Word DEFAULT_EOS_ID = 0; 21 | const Word DEFAULT_UNK_ID = 1; 22 | 23 | // names of EOS and UNK symbols 24 | const std::string DEFAULT_EOS_STR = ""; 25 | const std::string DEFAULT_UNK_STR = ""; 26 | 27 | // alternatively accepted names in Yaml dictionaries for ids 0 and 1, resp. 28 | const std::string NEMATUS_EOS_STR = "eos"; 29 | const std::string NEMATUS_UNK_STR = "UNK"; 30 | 31 | } // namespace marian 32 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/Makefile.clang: -------------------------------------------------------------------------------- 1 | CXX ?= clang++ 2 | CXXFLAGS = -march=native -Wall -Wextra -Wshadow -pedantic -std=c++11 -pthread -I../include 3 | CXX_RELEASE_FLAGS = -O2 4 | CXX_DEBUG_FLAGS= -g 5 | 6 | 7 | all: example bench 8 | debug: example-debug bench-debug 9 | 10 | example: example.cpp 11 | $(CXX) example.cpp -o example-clang $(CXXFLAGS) $(CXX_RELEASE_FLAGS) 12 | 13 | bench: bench.cpp 14 | $(CXX) bench.cpp -o bench-clang $(CXXFLAGS) $(CXX_RELEASE_FLAGS) 15 | 16 | 17 | example-debug: example.cpp 18 | $(CXX) example.cpp -o example-clang-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS) 19 | 20 | bench-debug: bench.cpp 21 | $(CXX) bench.cpp -o bench-clang-debug $(CXXFLAGS) $(CXX_DEBUG_FLAGS) 22 | 23 | 24 | 25 | clean: 26 | rm -f *.o logs/*.txt example-clang example-clang-debug bench-clang bench-clang-debug 27 | 28 | 29 | rebuild: clean all 30 | rebuild-debug: clean debug 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/yaml.h: -------------------------------------------------------------------------------- 1 | #ifndef YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/parser.h" 11 | #include "yaml-cpp/emitter.h" 12 | #include "yaml-cpp/emitterstyle.h" 13 | #include "yaml-cpp/stlemitter.h" 14 | #include "yaml-cpp/exceptions.h" 15 | 16 | #include "yaml-cpp/node/node.h" 17 | #include "yaml-cpp/node/impl.h" 18 | #include "yaml-cpp/node/convert.h" 19 | #include "yaml-cpp/node/iterator.h" 20 | #include "yaml-cpp/node/detail/impl.h" 21 | #include "yaml-cpp/node/parse.h" 22 | #include "yaml-cpp/node/emit.h" 23 | 24 | #endif // YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66 25 | -------------------------------------------------------------------------------- /src/training/gradient_dropping/gpu/sparse_algorithm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | #include "tensors/backend.h" 5 | #include "tensors/tensor.h" 6 | 7 | namespace marian { 8 | namespace gpu { 9 | /** 10 | * @brief Output[i] is lower_bound of values[i] in data. 11 | * 12 | * @return A vector of size values.size 13 | */ 14 | std::vector lower_bounds(int* data, 15 | std::vector values, 16 | int size, 17 | DeviceId device); 18 | 19 | int buildSparse(Tensor t, float* data, int* indices); 20 | 21 | void scatterAdd(Tensor t, float* data, int* indices, int size, int offset); 22 | 23 | void scatterUpdate(Tensor t, float* data, int* indices, int size, int offset); 24 | 25 | void gather(Tensor t, float* data, int* indices, int size, int offset); 26 | } // namespace gpu 27 | } // namespace marian 28 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/.gitignore: -------------------------------------------------------------------------------- 1 | Makefile 2 | Makefile.in 3 | /ar-lib 4 | /mdate-sh 5 | /py-compile 6 | /test-driver 7 | /ylwrap 8 | /build 9 | 10 | /autom4te.cache 11 | /autoscan.log 12 | /autoscan-*.log 13 | /aclocal.m4 14 | /compile 15 | /config.guess 16 | /config.sub 17 | /configure 18 | /configure.scan 19 | /depcomp 20 | /install-sh 21 | /missing 22 | /stamp-h1 23 | /libtool 24 | /config.h 25 | /config.status 26 | /autogen.sh 27 | /ltmain.sh 28 | 29 | CMakeFiles 30 | CMakeCache.txt 31 | config.h 32 | sentencepiece.pc 33 | 34 | *.o 35 | *.lo 36 | *.a 37 | *.la 38 | *.pyc 39 | 40 | .libs 41 | .deps 42 | 43 | *.m4 44 | *.log 45 | *.trs 46 | 47 | compile_charsmap 48 | 49 | spm_decode 50 | spm_encode 51 | spm_export_vocab 52 | spm_train 53 | spm_normalize 54 | spm_test 55 | 56 | *.pb.cc 57 | *.pb.h 58 | 59 | .DS_Store 60 | *.egg-info/ 61 | dist/ 62 | *.swp 63 | *.swo 64 | *.pyc 65 | 66 | m.model 67 | m.vocab 68 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/null.h: -------------------------------------------------------------------------------- 1 | #ifndef NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | #include 12 | 13 | namespace YAML { 14 | class Node; 15 | 16 | struct YAML_CPP_API _Null {}; 17 | inline bool operator==(const _Null&, const _Null&) { return true; } 18 | inline bool operator!=(const _Null&, const _Null&) { return false; } 19 | 20 | YAML_CPP_API bool IsNull(const Node& node); // old API only 21 | YAML_CPP_API bool IsNullString(const std::string& str); 22 | 23 | extern YAML_CPP_API _Null Null; 24 | } 25 | 26 | #endif // NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 27 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/mark.h: -------------------------------------------------------------------------------- 1 | #ifndef MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | 12 | namespace YAML { 13 | struct YAML_CPP_API Mark { 14 | Mark() : pos(0), line(0), column(0) {} 15 | 16 | static const Mark null_mark() { return Mark(-1, -1, -1); } 17 | 18 | bool is_null() const { return pos == -1 && line == -1 && column == -1; } 19 | 20 | int pos; 21 | int line, column; 22 | 23 | private: 24 | Mark(int pos_, int line_, int column_) 25 | : pos(pos_), line(line_), column(column_) {} 26 | }; 27 | } 28 | 29 | #endif // MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 30 | -------------------------------------------------------------------------------- /src/training/exponential_smoothing.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | #include "functional/functional.h" 5 | #include "tensors/tensor_operators.h" 6 | 7 | namespace marian { 8 | 9 | /** 10 | * Class implementing exponential smoothing for graph groups. 11 | * The smoothed parameters themselves are not stored in here. 12 | */ 13 | class ExponentialSmoothing { 14 | public: 15 | ExponentialSmoothing(float decay = 0.0f) 16 | : mvAvg_{decay > 0}, mvDecay_{decay} {} 17 | 18 | protected: 19 | void updateAvgParams(Tensor paramsAvg, Tensor params, size_t batches) { 20 | using namespace functional; 21 | float decay = std::max(mvDecay_, 22 | 1.f - (float)(batches + 1) / (float)(batches + 10)); 23 | Element(_1 = ((1.f - decay) * _1) + (decay * _2), paramsAvg, params); 24 | } 25 | 26 | bool mvAvg_{false}; 27 | float mvDecay_{1e-4f}; 28 | }; 29 | } // namespace marian 30 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/detail/iterator_fwd.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | #include 12 | #include 13 | #include 14 | 15 | namespace YAML { 16 | 17 | namespace detail { 18 | struct iterator_value; 19 | template 20 | class iterator_base; 21 | } 22 | 23 | typedef detail::iterator_base iterator; 24 | typedef detail::iterator_base const_iterator; 25 | } 26 | 27 | #endif // VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66 28 | -------------------------------------------------------------------------------- /src/functional/array.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "functional/defs.h" 4 | 5 | namespace marian { 6 | 7 | namespace functional { 8 | 9 | template 10 | struct Array { 11 | typedef T value_type; 12 | 13 | T data_[N]; 14 | 15 | __HDI__ const T* data() const { return data_; } 16 | 17 | __HDI__ T* data() { return data_; } 18 | 19 | __HDI__ constexpr static size_t size() { return N; } 20 | 21 | __HDI__ T& operator[](size_t i) { return data_[i]; } 22 | __HDI__ const T& operator[](size_t i) const { return data_[i]; } 23 | 24 | __HDI__ T* begin() { return data_; } 25 | __HDI__ const T* begin() const { return data_; } 26 | 27 | __HDI__ T* end() { return data_ + N; } 28 | __HDI__ const T* end() const { return data_ + N; } 29 | 30 | __HDI__ void fill(T val) { 31 | for(int i = 0; i < N; ++i) 32 | data_[i] = val; 33 | } 34 | }; 35 | } // namespace functional 36 | } // namespace marian 37 | -------------------------------------------------------------------------------- /src/layers/weight.cpp: -------------------------------------------------------------------------------- 1 | #include "layers/weight.h" 2 | 3 | namespace marian { 4 | 5 | Ptr WeightingFactory(Ptr options) { 6 | ABORT_IF(!options->has("data-weighting"), 7 | "No data-weighting specified in options"); 8 | return New(options->get("data-weighting-type")); 9 | } 10 | 11 | Expr DataWeighting::getWeights(Ptr graph, 12 | Ptr batch) { 13 | ABORT_IF(batch->getDataWeights().empty(), 14 | "Vector of weights is unexpectedly empty!"); 15 | bool sentenceWeighting = weightingType_ == "sentence"; 16 | int dimBatch = (int)batch->size(); 17 | int dimWords = sentenceWeighting ? 1 : (int)batch->back()->batchWidth(); 18 | auto weights = graph->constant({1, dimWords, dimBatch, 1}, 19 | inits::from_vector(batch->getDataWeights())); 20 | return weights; 21 | } 22 | } // namespace marian 23 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/ptr.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | #include 12 | 13 | namespace YAML { 14 | namespace detail { 15 | class node; 16 | class node_ref; 17 | class node_data; 18 | class memory; 19 | class memory_holder; 20 | 21 | typedef std::shared_ptr shared_node; 22 | typedef std::shared_ptr shared_node_ref; 23 | typedef std::shared_ptr shared_node_data; 24 | typedef std::shared_ptr shared_memory_holder; 25 | typedef std::shared_ptr shared_memory; 26 | } 27 | } 28 | 29 | #endif // VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 30 | -------------------------------------------------------------------------------- /src/command/marian_vocab.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | 3 | #include "common/cli_wrapper.h" 4 | #include "common/logging.h" 5 | #include "data/vocab.h" 6 | 7 | int main(int argc, char** argv) { 8 | using namespace marian; 9 | 10 | createLoggers(); 11 | 12 | auto options = New(); 13 | { 14 | auto cli = New( 15 | options, 16 | "Create a vocabulary from text corpora given on STDIN", 17 | "Allowed options", 18 | "Examples:\n" 19 | " ./marian-vocab < text.src > vocab.yml\n" 20 | " cat text.src text.trg | ./marian-vocab > vocab.yml"); 21 | cli->add("--max-size,-m", "Generate only UINT most common vocabulary items", 0); 22 | cli->parse(argc, argv); 23 | } 24 | 25 | LOG(info, "Creating vocabulary..."); 26 | 27 | auto vocab = New(options, 0); 28 | vocab->create("stdout", "stdin", options->get("max-size")); 29 | 30 | LOG(info, "Finished"); 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /src/translator/output_printer.cpp: -------------------------------------------------------------------------------- 1 | #include "output_printer.h" 2 | 3 | namespace marian { 4 | 5 | std::string OutputPrinter::getAlignment(const Ptr& hyp) { 6 | data::SoftAlignment align; 7 | auto last = hyp; 8 | // get soft alignments for each target word starting from the last one 9 | while(last->GetPrevHyp().get() != nullptr) { 10 | align.push_back(last->GetAlignment()); 11 | last = last->GetPrevHyp(); 12 | } 13 | 14 | // reverse alignments 15 | std::reverse(align.begin(), align.end()); 16 | 17 | if(alignment_ == "soft") { 18 | return data::SoftAlignToString(align); 19 | } else if(alignment_ == "hard") { 20 | return data::ConvertSoftAlignToHardAlign(align, 1.f).toString(); 21 | } else if(alignmentThreshold_ > 0.f) { 22 | return data::ConvertSoftAlignToHardAlign(align, alignmentThreshold_) 23 | .toString(); 24 | } else { 25 | ABORT("Unrecognized word alignment type"); 26 | } 27 | } 28 | 29 | } // namespace marian 30 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/srctxz/create_srctxz.sh.in: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # See LICENSE.txt for license information 6 | # 7 | 8 | # To run from $BUILDDIR/ 9 | 10 | cd .. 11 | NCCLDIR=`basename $PWD` 12 | 13 | echo "Checking for unclean directory ..." 14 | git clean -x -i 15 | echo "Clean done." 16 | echo "Checking for uncommited files ..." 17 | if [ "`git status -s | wc -l`" != "0" ]; then 18 | git status -s 19 | echo "Some changes are not committed yet. Continue ? (Ctrl-C to abort)" 20 | read 21 | fi 22 | 23 | cd .. 24 | NCCL_MAJOR=${nccl:Major} 25 | NCCL_MINOR=${nccl:Minor} 26 | NCCL_PATCH=${nccl:Patch} 27 | NCCL_SUFFIX=${nccl:Suffix} 28 | 29 | NCCLNAME="nccl-src_${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_PATCH}${NCCL_SUFFIX}" 30 | 31 | tar --exclude build \ 32 | --exclude ".git*" \ 33 | --exclude pkg/srctxz \ 34 | --transform "s/^$NCCLDIR/$NCCLNAME/" -Jcf $NCCLNAME.txz --owner=0 --group=0 $NCCLDIR 35 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/src/include/group.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * See LICENSE.txt for license information 5 | ************************************************************************/ 6 | 7 | #ifndef NCCL_GROUP_H_ 8 | #define NCCL_GROUP_H_ 9 | 10 | #include "nccl.h" 11 | #include "core.h" 12 | 13 | bool ncclAsyncMode(); 14 | ncclResult_t ncclAsyncErrCheck(ncclResult_t ret); 15 | 16 | typedef ncclResult_t(*ncclInitFunc_t)(ncclComm_t* newcomm, int ndev, ncclUniqueId commId, int myrank); 17 | 18 | ncclResult_t ncclAsyncInit(ncclInitFunc_t func, int cudaDev, ncclComm_t* newcomm, int ndev, ncclUniqueId commId, int myrank); 19 | 20 | typedef ncclResult_t(*ncclCollFunc_t)(const void* sendbuff, void* recvbuff, size_t count, 21 | ncclDataType_t type, ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream); 22 | 23 | ncclResult_t ncclAsyncColl(ncclComm_t comm); 24 | #endif 25 | -------------------------------------------------------------------------------- /src/models/model_base.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "marian.h" 5 | 6 | namespace marian { 7 | namespace models { 8 | 9 | enum struct usage { raw, training, scoring, translation }; 10 | } 11 | } // namespace marian 12 | 13 | YAML_REGISTER_TYPE(marian::models::usage, int) 14 | 15 | namespace marian { 16 | namespace models { 17 | 18 | class ModelBase { 19 | public: 20 | virtual void load(Ptr, 21 | const std::string&, 22 | bool markReloaded = true) 23 | = 0; 24 | virtual void save(Ptr, 25 | const std::string&, 26 | bool saveTranslatorConfig = false) 27 | = 0; 28 | 29 | virtual Expr build(Ptr graph, 30 | Ptr batch, 31 | bool clearGraph = true) 32 | = 0; 33 | 34 | virtual void clear(Ptr graph) = 0; 35 | }; 36 | 37 | } // namespace models 38 | } // namespace marian 39 | -------------------------------------------------------------------------------- /src/data/batch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "common/definitions.h" 6 | 7 | namespace marian { 8 | namespace data { 9 | 10 | class Batch { 11 | public: 12 | virtual size_t size() const = 0; 13 | virtual size_t words(int /*which*/ = 0) const { return 0; }; 14 | virtual size_t width() const { return 0; }; 15 | 16 | virtual size_t sizeTrg() const { return 0; }; 17 | virtual size_t wordsTrg() const { return 0; }; 18 | virtual size_t widthTrg() const { return 0; }; 19 | 20 | virtual void debug(){}; 21 | 22 | virtual std::vector> split(size_t n) = 0; 23 | 24 | const std::vector& getSentenceIds() const { return sentenceIds_; } 25 | void setSentenceIds(const std::vector& ids) { sentenceIds_ = ids; } 26 | 27 | virtual void setGuidedAlignment(std::vector&&) = 0; 28 | virtual void setDataWeights(const std::vector&) = 0; 29 | 30 | protected: 31 | std::vector sentenceIds_; 32 | }; 33 | } // namespace data 34 | } // namespace marian 35 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/.gitignore: -------------------------------------------------------------------------------- 1 | # Auto generated files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | *.suo 7 | *.tlog 8 | *.ilk 9 | *.log 10 | *.pdb 11 | *.idb 12 | *.iobj 13 | *.ipdb 14 | *.opensdf 15 | *.sdf 16 | 17 | # Compiled Dynamic libraries 18 | *.so 19 | *.dylib 20 | *.dll 21 | 22 | # Compiled Static libraries 23 | *.lai 24 | *.la 25 | *.a 26 | *.lib 27 | 28 | # Executables 29 | *.exe 30 | *.out 31 | *.app 32 | 33 | # Codelite 34 | .codelite 35 | 36 | # .orig files 37 | *.orig 38 | 39 | # example files 40 | example/* 41 | !example/example.cpp 42 | !example/bench.cpp 43 | !example/utils.h 44 | !example/Makefile* 45 | !example/example.sln 46 | !example/example.vcxproj 47 | !example/CMakeLists.txt 48 | !example/multisink.cpp 49 | !example/jni 50 | 51 | # generated files 52 | generated 53 | 54 | # Cmake 55 | CMakeCache.txt 56 | CMakeFiles 57 | CMakeScripts 58 | Makefile 59 | cmake_install.cmake 60 | install_manifest.txt 61 | /tests/tests.VC.VC.opendb 62 | /tests/tests.VC.db 63 | /tests/tests 64 | /tests/logs/file_helper_test.txt 65 | -------------------------------------------------------------------------------- /src/tensors/memory_piece.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace marian { 6 | 7 | class MemoryPiece { 8 | private: 9 | uint8_t* data_; 10 | size_t size_; 11 | 12 | public: 13 | MemoryPiece(uint8_t* data, size_t size) : data_(data), size_(size) {} 14 | 15 | uint8_t* data() const { return data_; } 16 | uint8_t* data() { return data_; } 17 | 18 | template 19 | T* data() const { 20 | return (T*)data_; 21 | } 22 | 23 | template 24 | T* data() { 25 | return (T*)data_; 26 | } 27 | 28 | size_t size() const { return size_; } 29 | 30 | void set(uint8_t* data, size_t size) { 31 | data_ = data; 32 | size_ = size; 33 | } 34 | 35 | void setPtr(uint8_t* data) { data_ = data; } 36 | 37 | friend std::ostream& operator<<(std::ostream& out, const MemoryPiece mp) { 38 | out << "MemoryPiece - ptr: " << std::hex << (size_t)mp.data() << std::dec 39 | << " size: " << mp.size(); 40 | return out; 41 | } 42 | }; 43 | } // namespace marian 44 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/latency/g3log-crush.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | void CrusherLoop() 7 | { 8 | size_t counter = 0; 9 | while (true) 10 | { 11 | LOGF(INFO, "Some text to crush you machine. thread:"); 12 | if(++counter % 1000000 == 0) 13 | { 14 | std::cout << "Wrote " << counter << " entries" << std::endl; 15 | } 16 | } 17 | } 18 | 19 | 20 | int main(int argc, char** argv) 21 | { 22 | std::cout << "WARNING: This test will exaust all your machine memory and will crush it!" << std::endl; 23 | std::cout << "Are you sure you want to continue ? " << std::endl; 24 | char c; 25 | std::cin >> c; 26 | if (toupper( c ) != 'Y') 27 | return 0; 28 | 29 | auto worker = g3::LogWorker::createLogWorker(); 30 | auto handle= worker->addDefaultLogger(argv[0], "g3log.txt"); 31 | g3::initializeLogging(worker.get()); 32 | CrusherLoop(); 33 | 34 | return 0; 35 | } 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/msvc_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Alexander Dalshov. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #if defined(_MSC_VER) 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | namespace spdlog 19 | { 20 | namespace sinks 21 | { 22 | /* 23 | * MSVC sink (logging using OutputDebugStringA) 24 | */ 25 | template 26 | class msvc_sink : public base_sink < Mutex > 27 | { 28 | public: 29 | explicit msvc_sink() 30 | { 31 | } 32 | 33 | void flush() override 34 | { 35 | } 36 | 37 | protected: 38 | void _sink_it(const details::log_msg& msg) override 39 | { 40 | OutputDebugStringA(msg.formatted.c_str()); 41 | } 42 | }; 43 | 44 | typedef msvc_sink msvc_sink_mt; 45 | typedef msvc_sink msvc_sink_st; 46 | 47 | } 48 | } 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/rnn/attention_constructors.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "marian.h" 4 | 5 | #include "layers/factory.h" 6 | #include "rnn/attention.h" 7 | #include "rnn/constructors.h" 8 | #include "rnn/types.h" 9 | 10 | namespace marian { 11 | namespace rnn { 12 | 13 | class AttentionFactory : public InputFactory { 14 | protected: 15 | Ptr state_; 16 | 17 | public: 18 | AttentionFactory(Ptr graph) : InputFactory(graph) {} 19 | 20 | Ptr construct() override { 21 | ABORT_IF(!state_, "EncoderState not set"); 22 | return New(graph_, options_, state_); 23 | } 24 | 25 | Accumulator set_state(Ptr state) { 26 | state_ = state; 27 | return Accumulator(*this); 28 | } 29 | 30 | int dimAttended() { 31 | ABORT_IF(!state_, "EncoderState not set"); 32 | return state_->getAttended()->shape()[1]; 33 | } 34 | }; 35 | 36 | typedef Accumulator attention; 37 | } // namespace rnn 38 | } // namespace marian 39 | -------------------------------------------------------------------------------- /src/tensors/cpu/sharp/int_gemm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tensors/tensor.h" 4 | 5 | namespace marian { 6 | namespace cpu { 7 | namespace int16 { 8 | 9 | const int BITS = 10; 10 | 11 | void Quantize16(marian::Tensor out, 12 | const marian::Tensor in, 13 | float /*clipValue*/); 14 | 15 | void Quantize8(marian::Tensor out, 16 | const marian::Tensor in, 17 | float clipValue); 18 | 19 | // This operates on floats after processing so doesn't care about int8_t vs 20 | // int16_t. 21 | void AddBias(marian::Tensor C, const marian::Tensor Bias); 22 | 23 | void ProdInt16(marian::Tensor C, 24 | const marian::Tensor A, 25 | const marian::Tensor B, 26 | float scale); 27 | 28 | void ProdInt8(marian::Tensor C, 29 | const marian::Tensor A, 30 | const marian::Tensor B, 31 | float scale, 32 | float clipValue); 33 | 34 | } // namespace int16 35 | } // namespace cpu 36 | } // namespace marian 37 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/msvc_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Alexander Dalshov. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #if defined(_MSC_VER) 9 | 10 | #include "spdlog/sinks/base_sink.h" 11 | #include "spdlog/details/null_mutex.h" 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | namespace spdlog 19 | { 20 | namespace sinks 21 | { 22 | /* 23 | * MSVC sink (logging using OutputDebugStringA) 24 | */ 25 | template 26 | class msvc_sink : public base_sink < Mutex > 27 | { 28 | public: 29 | explicit msvc_sink() 30 | { 31 | } 32 | 33 | 34 | 35 | protected: 36 | void _sink_it(const details::log_msg& msg) override 37 | { 38 | OutputDebugStringA(msg.formatted.c_str()); 39 | } 40 | 41 | void _flush() override 42 | {} 43 | }; 44 | 45 | typedef msvc_sink msvc_sink_mt; 46 | typedef msvc_sink msvc_sink_st; 47 | 48 | } 49 | } 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /src/common/config_validator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "3rd_party/yaml-cpp/yaml.h" 4 | #include "common/config_parser.h" 5 | 6 | namespace marian { 7 | 8 | // TODO: Finally refactorize Config, Options, ConfigParser and ConfigValidator 9 | // classes. 10 | class ConfigValidator { 11 | private: 12 | const YAML::Node& config_; 13 | 14 | bool has(const std::string& key) const; 15 | 16 | template 17 | T get(const std::string& key) const { 18 | return config_[key].as(); 19 | } 20 | 21 | void validateOptionsTranslation() const; 22 | void validateOptionsParallelData() const; 23 | void validateOptionsScoring() const; 24 | void validateOptionsTraining() const; 25 | 26 | void validateModelExtension(cli::mode mode) const; 27 | void validateDevices(cli::mode mode) const; 28 | 29 | public: 30 | ConfigValidator(const YAML::Node& config); 31 | virtual ~ConfigValidator(); 32 | 33 | // Validate options according to the given mode. Abort on first validation error 34 | void validateOptions(cli::mode mode) const; 35 | }; 36 | 37 | } // namespace marian 38 | -------------------------------------------------------------------------------- /src/layers/weight.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/options.h" 4 | #include "data/corpus.h" 5 | #include "graph/expression_graph.h" 6 | #include "graph/expression_operators.h" 7 | #include "graph/node_initializers.h" 8 | 9 | namespace marian { 10 | 11 | class WeightingBase { 12 | public: 13 | WeightingBase(){}; 14 | virtual Expr getWeights(Ptr graph, 15 | Ptr batch) 16 | = 0; 17 | virtual void debugWeighting(std::vector /*weightedMask*/, 18 | std::vector /*freqMask*/, 19 | Ptr /*batch*/){}; 20 | }; 21 | 22 | class DataWeighting : public WeightingBase { 23 | protected: 24 | std::string weightingType_; 25 | 26 | public: 27 | DataWeighting(std::string weightingType) 28 | : WeightingBase(), weightingType_(weightingType){}; 29 | Expr getWeights(Ptr graph, Ptr batch) override; 30 | }; 31 | 32 | Ptr WeightingFactory(Ptr options); 33 | } // namespace marian 34 | -------------------------------------------------------------------------------- /src/optimizers/clippers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "tensors/tensor.h" 7 | 8 | namespace marian { 9 | 10 | // @TODO: modify computation graph to group all paramters in single matrix 11 | // object. 12 | // This will allow to perform a single large SGD update per batch. Currently 13 | // there 14 | // are as many updates as different parameters. 15 | 16 | class ClipperBase { 17 | public: 18 | virtual void clip(Tensor) = 0; 19 | }; 20 | 21 | typedef std::shared_ptr ClipperPtr; 22 | 23 | class Elementwise : public ClipperBase { 24 | public: 25 | Elementwise(float c = 10.0) : c_(c) {} 26 | 27 | void clip(Tensor t) override; 28 | 29 | private: 30 | float c_; 31 | }; 32 | 33 | class Norm : public ClipperBase { 34 | public: 35 | Norm(float c = 1.0) : c_(c) {} 36 | 37 | void clip(Tensor t) override; 38 | 39 | private: 40 | float c_; 41 | }; 42 | 43 | template 44 | ClipperBasePtr Clipper(Args&&... args) { 45 | return ClipperBasePtr(new Algorithm(args...)); 46 | } 47 | } // namespace marian 48 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/detail/bool_type.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | namespace YAML { 11 | namespace detail { 12 | struct unspecified_bool { 13 | struct NOT_ALLOWED; 14 | static void true_value(NOT_ALLOWED*) {} 15 | }; 16 | typedef void (*unspecified_bool_type)(unspecified_bool::NOT_ALLOWED*); 17 | } 18 | } 19 | 20 | #define YAML_CPP_OPERATOR_BOOL() \ 21 | operator YAML::detail::unspecified_bool_type() const { \ 22 | return this->operator!() ? 0 \ 23 | : &YAML::detail::unspecified_bool::true_value; \ 24 | } 25 | 26 | #endif // NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 27 | -------------------------------------------------------------------------------- /src/tensors/backend.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/definitions.h" 4 | #include "tensors/rand.h" 5 | 6 | namespace marian { 7 | 8 | class Backend { 9 | protected: 10 | DeviceId deviceId_; 11 | size_t seed_; 12 | Ptr randomGenerator_; 13 | 14 | // global clipping value for matrix-multiplies, should soon be removed. 15 | float clipValue_{0.f}; 16 | 17 | public: 18 | Backend(DeviceId deviceId, size_t seed) 19 | : deviceId_(deviceId), 20 | seed_(seed), 21 | randomGenerator_(createRandomGenerator(seed, deviceId)) {} 22 | 23 | virtual DeviceId getDeviceId() { return deviceId_; }; 24 | virtual Ptr getRandomGenerator() { return randomGenerator_; } 25 | 26 | // for GPU only, calls cudaSetDevice, does nothing on CPU. Maybe change name. 27 | virtual void setDevice() = 0; 28 | virtual void synchronize() = 0; 29 | 30 | virtual void setClip(float clipValue) { clipValue_ = clipValue; } 31 | float getClip() { return clipValue_; } 32 | }; 33 | 34 | Ptr BackendByDeviceId(DeviceId deviceId, size_t seed); 35 | 36 | } // namespace marian 37 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/emit.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | #include "yaml-cpp/dll.h" 14 | 15 | namespace YAML { 16 | class Emitter; 17 | class Node; 18 | 19 | /** 20 | * Emits the node to the given {@link Emitter}. If there is an error in writing, 21 | * {@link Emitter#good} will return false. 22 | */ 23 | YAML_CPP_API Emitter& operator<<(Emitter& out, const Node& node); 24 | 25 | /** Emits the node to the given output stream. */ 26 | YAML_CPP_API std::ostream& operator<<(std::ostream& out, const Node& node); 27 | 28 | /** Converts the node to a YAML string. */ 29 | YAML_CPP_API std::string Dump(const Node& node); 30 | } // namespace YAML 31 | 32 | #endif // NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 33 | -------------------------------------------------------------------------------- /src/data/iterator_facade.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // simplistic replacement for boost::iterator_facade 4 | template 5 | struct IteratorFacade { 6 | // to create DummyIterator inherit from public IteratorFacade 7 | // and implement these three functions 8 | virtual bool equal(const Iterator& other) const = 0; 9 | virtual const Item& dereference() const = 0; 10 | virtual void increment() = 0; 11 | 12 | bool operator==(const Iterator& other) const { 13 | return equal(other); 14 | } 15 | 16 | bool operator!=(const Iterator& other) const { 17 | return !equal(other); 18 | } 19 | 20 | const Item& operator*() const { 21 | return dereference(); 22 | } 23 | 24 | // prefix ++ 25 | Iterator& operator++() { 26 | increment(); 27 | return dynamic_cast(*this); 28 | } 29 | 30 | // postfix ++ 31 | Iterator operator++(int) { 32 | auto ret = dynamic_cast(*this); 33 | increment(); 34 | return ret; 35 | } 36 | 37 | const Item* operator->() const { 38 | return &dereference(); 39 | } 40 | }; 41 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/src/test_main.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License.! 14 | 15 | #include "flags.h" 16 | #include "testharness.h" 17 | 18 | #ifdef OS_WIN 19 | DEFINE_string(data_dir, "..\\data", "Data directory"); 20 | #else 21 | DEFINE_string(data_dir, "../data", "Data directory"); 22 | #endif 23 | 24 | int main(int argc, char **argv) { 25 | std::vector rest_args; 26 | sentencepiece::flags::ParseCommandLineFlags(argc, argv, &rest_args); 27 | 28 | sentencepiece::test::RunAllTests(); 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /src/graph/node_operators.cpp: -------------------------------------------------------------------------------- 1 | #include "node_operators.h" 2 | #include "expression_graph.h" 3 | 4 | #include "tensors/tensor_operators.h" 5 | 6 | namespace marian { 7 | 8 | size_t ConstantNode::allocate() { 9 | size_t elements = 0; 10 | if(!val_) { 11 | graph()->allocateForward(shared_from_this()); 12 | elements = val_->shape().elements(); 13 | } 14 | return elements; 15 | } 16 | 17 | void ConstantNode::init() { 18 | if(!initialized_) { 19 | (*init_)(val_); 20 | initialized_ = true; 21 | } 22 | init_.reset(); 23 | } 24 | 25 | ParamNode::ParamNode(Ptr graph, 26 | const Shape& shape, 27 | const NodeInitializer& init, 28 | bool fixed) 29 | : Node(graph, shape), // TODO: add value_type 30 | init_(new NodeInitializer(init)), 31 | initialized_(false) { 32 | setTrainable(!fixed); 33 | setMemoize(graph->isInference()); 34 | } 35 | 36 | void ParamNode::init() { 37 | if(!initialized_) { 38 | (*init_)(val_); 39 | initialized_ = true; 40 | } 41 | init_.reset(); 42 | } 43 | } // namespace marian 44 | -------------------------------------------------------------------------------- /contrib/other-builds/eclipse/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | marian 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.cdt.managedbuilder.core.genmakebuilder 10 | clean,full,incremental, 11 | 12 | 13 | 14 | 15 | org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder 16 | full,incremental, 17 | 18 | 19 | 20 | 21 | 22 | org.eclipse.cdt.core.cnature 23 | org.eclipse.cdt.core.ccnature 24 | org.eclipse.cdt.managedbuilder.core.managedBuildNature 25 | org.eclipse.cdt.managedbuilder.core.ScannerConfigNature 26 | 27 | 28 | 29 | src 30 | 2 31 | PARENT-1-PROJECT_LOC/src 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/src/model_factory.h: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License.! 14 | 15 | #ifndef MODEL_FACTORY_H_ 16 | #define MODEL_FACTORY_H_ 17 | 18 | #include 19 | #include "model_interface.h" 20 | #include "sentencepiece_model.pb.h" 21 | 22 | namespace sentencepiece { 23 | 24 | class ModelFactory { 25 | public: 26 | // Creates Model instance from |model_proto|. 27 | static std::unique_ptr Create(const ModelProto &model_proto); 28 | }; 29 | } // namespace sentencepiece 30 | #endif // MODEL_FACTORY_H_ 31 | -------------------------------------------------------------------------------- /scripts/contrib/inject_ctt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | 5 | import sys 6 | import argparse 7 | import numpy as np 8 | 9 | DESC = "Add 'decoder_c_tt' required by Amun to a model trained with Marian v1.6.0+" 10 | 11 | 12 | def main(): 13 | args = parse_args() 14 | 15 | print("Loading model {}".format(args.input)) 16 | model = np.load(args.input) 17 | 18 | if "decoder_c_tt" in model: 19 | print("The model already contains 'decoder_c_tt'") 20 | exit() 21 | 22 | print("Adding 'decoder_c_tt' to the model") 23 | amun = {"decoder_c_tt": np.zeros((1, 0))} 24 | for tensor_name in model: 25 | amun[tensor_name] = model[tensor_name] 26 | 27 | print("Saving model...") 28 | np.savez(args.output, **amun) 29 | 30 | 31 | def parse_args(): 32 | parser = argparse.ArgumentParser(description=DESC) 33 | parser.add_argument("-i", "--input", help="input model", required=True) 34 | parser.add_argument("-o", "--output", help="output model", required=True) 35 | return parser.parse_args() 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008-2015 Jesse Beder. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /src/tensors/gpu/prod.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "tensors/tensor.h" 4 | #include "tensors/tensor_operators.h" 5 | 6 | #include "functional/functional.h" 7 | 8 | namespace marian { 9 | namespace gpu { 10 | 11 | void Prod(marian::Tensor C, 12 | const marian::Tensor& A, 13 | const marian::Tensor& B, 14 | bool transA, 15 | bool transB, 16 | float beta = 0, 17 | float scalar = 1); 18 | 19 | void ProdWithBias(marian::Tensor C, 20 | const marian::Tensor& A, 21 | const marian::Tensor& B, 22 | const marian::Tensor& bias, 23 | bool transA, 24 | bool transB, 25 | float beta = 0, 26 | float scalar = 1); 27 | 28 | void ProdBatched(marian::Tensor C, 29 | Ptr allocator, 30 | const marian::Tensor A, 31 | const marian::Tensor B, 32 | bool transA, 33 | bool transB, 34 | float beta = 0, 35 | float scalar = 1); 36 | } // namespace gpu 37 | } // namespace marian 38 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/formatter.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | namespace spdlog 15 | { 16 | namespace details 17 | { 18 | class flag_formatter; 19 | } 20 | 21 | class formatter 22 | { 23 | public: 24 | virtual ~formatter() {} 25 | virtual void format(details::log_msg& msg) = 0; 26 | }; 27 | 28 | class pattern_formatter : public formatter 29 | { 30 | 31 | public: 32 | explicit pattern_formatter(const std::string& pattern); 33 | pattern_formatter(const pattern_formatter&) = delete; 34 | pattern_formatter& operator=(const pattern_formatter&) = delete; 35 | void format(details::log_msg& msg) override; 36 | private: 37 | const std::string _pattern; 38 | std::vector> _formatters; 39 | void handle_flag(char flag); 40 | void compile_pattern(const std::string& pattern); 41 | }; 42 | } 43 | 44 | #include 45 | 46 | -------------------------------------------------------------------------------- /src/3rd_party/cnpy/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) Carl Rogers, 2011 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/tensors/gpu/backend.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "common/config.h" 4 | #include "tensors/backend.h" // note: this is one folder up 5 | #include "tensors/gpu/cuda_helpers.h" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace marian { 12 | namespace gpu { 13 | 14 | class Backend : public marian::Backend { 15 | public: 16 | Backend(DeviceId deviceId, size_t seed) : marian::Backend(deviceId, seed) { 17 | setDevice(); 18 | setHandles(); 19 | } 20 | 21 | ~Backend() { 22 | setDevice(); 23 | cublasDestroy(cublasHandle_); 24 | } 25 | 26 | void setDevice() override { cudaSetDevice((int)deviceId_.no); } 27 | 28 | void synchronize() override { cudaStreamSynchronize(0); } 29 | 30 | cublasHandle_t getCublasHandle() { return cublasHandle_; } 31 | 32 | private: 33 | cublasHandle_t cublasHandle_; 34 | 35 | void setHandles() { 36 | cublasHandle_ = create_handle(); 37 | } 38 | 39 | cublasHandle_t create_handle() { 40 | cublasHandle_t cublasHandle; 41 | cublasCreate(&cublasHandle); 42 | return cublasHandle; 43 | } 44 | }; 45 | } // namespace gpu 46 | } // namespace marian 47 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/iterator.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include "yaml-cpp/dll.h" 11 | #include "yaml-cpp/node/node.h" 12 | #include "yaml-cpp/node/detail/iterator_fwd.h" 13 | #include "yaml-cpp/node/detail/iterator.h" 14 | #include 15 | #include 16 | #include 17 | 18 | namespace YAML { 19 | namespace detail { 20 | struct iterator_value : public Node, std::pair { 21 | iterator_value() {} 22 | explicit iterator_value(const Node& rhs) 23 | : Node(rhs), 24 | std::pair(Node(Node::ZombieNode), Node(Node::ZombieNode)) {} 25 | explicit iterator_value(const Node& key, const Node& value) 26 | : Node(Node::ZombieNode), std::pair(key, value) {} 27 | }; 28 | } 29 | } 30 | 31 | #endif // VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 32 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/base_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // base sink templated over a mutex (either dummy or realy) 9 | // concrete implementation should only overrid the _sink_it method. 10 | // all locking is taken care of here so no locking needed by the implementors.. 11 | // 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | namespace spdlog 21 | { 22 | namespace sinks 23 | { 24 | template 25 | class base_sink:public sink 26 | { 27 | public: 28 | base_sink():_mutex() {} 29 | virtual ~base_sink() = default; 30 | 31 | base_sink(const base_sink&) = delete; 32 | base_sink& operator=(const base_sink&) = delete; 33 | 34 | void log(const details::log_msg& msg) override 35 | { 36 | std::lock_guard lock(_mutex); 37 | _sink_it(msg); 38 | } 39 | 40 | protected: 41 | virtual void _sink_it(const details::log_msg& msg) = 0; 42 | Mutex _mutex; 43 | }; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/doc/special_symbols.md: -------------------------------------------------------------------------------- 1 | # Use custom symbols 2 | SentencePiece model supports two types of special symbols. 3 | 4 | ## Control symbol 5 | Control symbols are used to encode special indicators for the decoder to change the behavior dynamically. 6 | Example includes the language indicators in multi-lingual models. `` and `` are reserved control symbols. 7 | Control symbols must be inserted outside of the SentencePiece segmentation. Developers need to take the responsibility to insert these symbols in data generation and decoding. 8 | 9 | It is guaranteed that control symbols have no corresponding surface strings in the original user input. Control symbols are decoded into empty strings. 10 | 11 | ## User defined symbol 12 | User defined symbol is handled as one piece in any context. If this symbol is included in the input text, this symbol is always extracted as one piece. 13 | 14 | ## Specify special symbols in training time 15 | Use `--control_symbols` and `--user_defined_symbols` flags as follows 16 | 17 | ``` 18 | % spm_train --control_symbols=, --user_defined_symbols=, --input= --model_prefix= --vocab_size=8000 19 | ``` 20 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/details/log_msg.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/common.h" 9 | #include "spdlog/details/os.h" 10 | 11 | 12 | #include 13 | #include 14 | 15 | namespace spdlog 16 | { 17 | namespace details 18 | { 19 | struct log_msg 20 | { 21 | log_msg() = default; 22 | log_msg(const std::string *loggers_name, level::level_enum lvl) : 23 | logger_name(loggers_name), 24 | level(lvl), 25 | msg_id(0) 26 | { 27 | #ifndef SPDLOG_NO_DATETIME 28 | time = os::now(); 29 | #endif 30 | 31 | #ifndef SPDLOG_NO_THREAD_ID 32 | thread_id = os::thread_id(); 33 | #endif 34 | } 35 | 36 | log_msg(const log_msg& other) = delete; 37 | log_msg& operator=(log_msg&& other) = delete; 38 | log_msg(log_msg&& other) = delete; 39 | 40 | 41 | const std::string *logger_name; 42 | level::level_enum level; 43 | log_clock::time_point time; 44 | size_t thread_id; 45 | fmt::MemoryWriter raw; 46 | fmt::MemoryWriter formatted; 47 | size_t msg_id; 48 | }; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /cmake/FindTcmalloc.cmake: -------------------------------------------------------------------------------- 1 | # - Find Tcmalloc 2 | # Find the native Tcmalloc includes and library 3 | # 4 | # Tcmalloc_INCLUDE_DIR - where to find Tcmalloc.h, etc. 5 | # Tcmalloc_LIBRARIES - List of libraries when using Tcmalloc. 6 | # Tcmalloc_FOUND - True if Tcmalloc found. 7 | 8 | find_path(Tcmalloc_INCLUDE_DIR google/tcmalloc.h) 9 | 10 | if (USE_TCMALLOC) 11 | set(Tcmalloc_NAMES tcmalloc) 12 | else () 13 | set(Tcmalloc_NAMES tcmalloc_minimal tcmalloc) 14 | endif () 15 | 16 | find_library(Tcmalloc_LIBRARY NAMES ${Tcmalloc_NAMES}) 17 | 18 | if (Tcmalloc_INCLUDE_DIR AND Tcmalloc_LIBRARY) 19 | set(Tcmalloc_FOUND TRUE) 20 | set( Tcmalloc_LIBRARIES ${Tcmalloc_LIBRARY} ) 21 | else () 22 | set(Tcmalloc_FOUND FALSE) 23 | set( Tcmalloc_LIBRARIES ) 24 | endif () 25 | 26 | if (Tcmalloc_FOUND) 27 | message(STATUS "Found Tcmalloc: ${Tcmalloc_LIBRARY}") 28 | else () 29 | message(STATUS "Not Found Tcmalloc") 30 | if (Tcmalloc_FIND_REQUIRED) 31 | message(STATUS "Looked for Tcmalloc libraries named ${Tcmalloc_NAMES}.") 32 | message(FATAL_ERROR "Could NOT find Tcmalloc library") 33 | endif () 34 | endif () 35 | 36 | mark_as_advanced( 37 | Tcmalloc_LIBRARY 38 | Tcmalloc_INCLUDE_DIR 39 | ) -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/contrib/anchordict.h: -------------------------------------------------------------------------------- 1 | #ifndef ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | #include "../anchor.h" 13 | 14 | namespace YAML { 15 | /** 16 | * An object that stores and retrieves values correlating to {@link anchor_t} 17 | * values. 18 | * 19 | *

Efficient implementation that can make assumptions about how 20 | * {@code anchor_t} values are assigned by the {@link Parser} class. 21 | */ 22 | template 23 | class AnchorDict { 24 | public: 25 | void Register(anchor_t anchor, T value) { 26 | if (anchor > m_data.size()) { 27 | m_data.resize(anchor); 28 | } 29 | m_data[anchor - 1] = value; 30 | } 31 | 32 | T Get(anchor_t anchor) const { return m_data[anchor - 1]; } 33 | 34 | private: 35 | std::vector m_data; 36 | }; 37 | } 38 | 39 | #endif // ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66 40 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/srctxz/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # See LICENSE.txt for license information 5 | # 6 | 7 | include ../../makefiles/common.mk 8 | include ../../makefiles/version.mk 9 | BUILDDIR ?= $(abspath ../../build) 10 | TXZPREPDIR := $(BUILDDIR)/srctxz 11 | PKGDIR := $(BUILDDIR)/pkg/srctxz/ 12 | 13 | TXZGEN_IN := $(wildcard *.in) 14 | TXZGEN := $(TXZGEN_IN:.in=) 15 | TXZTARGETS := $(patsubst %, $(TXZPREPDIR)/%, $(TXZGEN)) 16 | 17 | PKG_REVISION ?= 3 18 | PKG_ARCH := $(shell uname -m) 19 | 20 | prep: $(TXZTARGETS) 21 | 22 | build: prep 23 | $(MAKE) -C ../../src clean 24 | @printf "Building source tar.xz package\n" 25 | (cd $(BUILDDIR); bash srctxz/create_srctxz.sh) 26 | mkdir -p $(PKGDIR) 27 | mv $(BUILDDIR)/../../nccl-src*.txz $(PKGDIR) 28 | 29 | clean: 30 | rm -Rf $(TXZPREPDIR) $(PKGDIR) 31 | 32 | $(TXZPREPDIR)/% : %.in 33 | @printf "Generating %-35s > %s\n" $< $@ 34 | mkdir -p $(TXZPREPDIR) 35 | sed -e "s/\$${nccl:Major}/$(NCCL_MAJOR)/g" \ 36 | -e "s/\$${nccl:Minor}/$(NCCL_MINOR)/g" \ 37 | -e "s/\$${nccl:Patch}/$(NCCL_PATCH)/g" \ 38 | -e "s/\$${nccl:Suffix}/$(NCCL_SUFFIX)/g" \ 39 | $< > $@ 40 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | 7 | #pragma once 8 | 9 | #include "spdlog/details/log_msg.h" 10 | 11 | namespace spdlog 12 | { 13 | namespace sinks 14 | { 15 | class sink 16 | { 17 | public: 18 | sink() 19 | { 20 | _level = level::trace; 21 | } 22 | 23 | virtual ~sink() {} 24 | virtual void log(const details::log_msg& msg) = 0; 25 | virtual void flush() = 0; 26 | 27 | bool should_log(level::level_enum msg_level) const; 28 | void set_level(level::level_enum log_level); 29 | level::level_enum level() const; 30 | 31 | private: 32 | level_t _level; 33 | 34 | }; 35 | 36 | inline bool sink::should_log(level::level_enum msg_level) const 37 | { 38 | return msg_level >= _level.load(std::memory_order_relaxed); 39 | } 40 | 41 | inline void sink::set_level(level::level_enum log_level) 42 | { 43 | _level.store(log_level); 44 | } 45 | 46 | inline level::level_enum sink::level() const 47 | { 48 | return static_cast(_level.load(std::memory_order_relaxed)); 49 | } 50 | 51 | } 52 | } 53 | 54 | -------------------------------------------------------------------------------- /src/3rd_party/zstr/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Matei David, Ontario Institute for Cancer Research 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/3rd_party/SQLiteCpp/LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2012-2016 Sebastien Rombauts (sebastien.rombauts@gmail.com) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is furnished 10 | to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 19 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 20 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/src/include/enqueue.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * See LICENSE.txt for license information 5 | ************************************************************************/ 6 | 7 | #ifndef NCCL_ENQUEUE_H_ 8 | #define NCCL_ENQUEUE_H_ 9 | 10 | #include "core.h" 11 | #include "group.h" 12 | 13 | typedef ncclResult_t(*ncclFunc_t)(const void* sendbuff, void* recvbuff, size_t count, 14 | ncclDataType_t type, ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream); 15 | 16 | ncclResult_t ncclEnqueueCheck(ncclFunc_t func, const char* primName, const void* sendbuff, 17 | void* recvbuff, size_t count, ncclDataType_t type, ncclRedOp_t op, int root, 18 | ncclComm_t comm, cudaStream_t stream); 19 | ncclResult_t ncclCpuBarrierIn(ncclComm_t comm, int* isLast); 20 | ncclResult_t ncclCpuBarrierLast(ncclComm_t comm); 21 | ncclResult_t ncclCpuBarrierOut(ncclComm_t comm); 22 | ncclResult_t ncclBarrierEnqueue(ncclComm_t comm); 23 | ncclResult_t ncclBarrierEnqueueWait(ncclComm_t comm); 24 | ncclResult_t ncclEnqueueEvents(ncclComm_t comm); 25 | 26 | #endif // End include guard 27 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Marcin Junczys-Dowmunt, the University of Edinburgh, Adam 4 | Mickiewicz University 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/src/trainer_factory.h: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License.! 14 | 15 | #ifndef TRAINER_FACTORY_H_ 16 | #define TRAINER_FACTORY_H_ 17 | 18 | #include 19 | #include "sentencepiece_model.pb.h" 20 | #include "trainer_interface.h" 21 | 22 | namespace sentencepiece { 23 | 24 | class TrainerFactory { 25 | public: 26 | // Creates Trainer instance from |trainer_spec| and |normalizer_spec|. 27 | static std::unique_ptr Create( 28 | const TrainerSpec &trainer_spec, const NormalizerSpec &normalizer_spec); 29 | }; 30 | } // namespace sentencepiece 31 | #endif // TRAINER_FACTORY_H_ 32 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/src/word_model.h: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License.! 14 | 15 | #ifndef WORD_MODEL_H_ 16 | #define WORD_MODEL_H_ 17 | 18 | #include "model_interface.h" 19 | #include "sentencepiece_model.pb.h" 20 | 21 | namespace sentencepiece { 22 | namespace word { 23 | 24 | // Tokenize text with whitespaces. 25 | class Model : public ModelInterface { 26 | public: 27 | explicit Model(const ModelProto &model_proto); 28 | ~Model() override; 29 | 30 | EncodeResult Encode(absl::string_view normalized) const override; 31 | }; 32 | } // namespace word 33 | } // namespace sentencepiece 34 | #endif // WORD_MODEL_H_ 35 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Gabi Melman. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /src/data/revo_stub.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by revo on 8/2/19. 3 | // 4 | 5 | #pragma once 6 | 7 | #include "graph/expression_graph.h" 8 | #include "marian.h" 9 | 10 | namespace marian { 11 | namespace data { 12 | 13 | class DataTrainingBase { 14 | protected: 15 | typedef typename CorpusBase::Sample Sample; 16 | typedef std::vector Samples; // @TODO: type names should be capitalized 17 | 18 | Ptr options_; 19 | std::vector> vocabs_; 20 | public: 21 | DataTrainingBase(Ptr options, std::vector> &vocabs) : options_(options), vocabs_(vocabs) {}; 22 | 23 | virtual void run(Samples &batch, size_t e) = 0; 24 | 25 | virtual float get_mod_from_emb(size_t step) { return 0.0f;}; 26 | 27 | virtual float get_mod_from_graph(size_t step) { return 0.0f;}; 28 | 29 | virtual float get_init_value() { return 0.0f;}; 30 | }; 31 | 32 | Ptr NewGapTraining(Ptr options, std::vector> vocabs, const std::vector> &graphs); 33 | 34 | Ptr NewModTraining(Ptr options, std::vector> vocabs, const std::vector> &graphs); 35 | } 36 | } // namespace marian -------------------------------------------------------------------------------- /examples/LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Marcin Junczys-Dowmunt, the University of Edinburgh, Adam 4 | Mickiewicz University 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/glog-bench-mt.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "glog/logging.h" 11 | 12 | using namespace std; 13 | 14 | int main(int argc, char* argv[]) 15 | { 16 | 17 | int thread_count = 10; 18 | if(argc > 1) 19 | thread_count = atoi(argv[1]); 20 | 21 | int howmany = 1000000; 22 | 23 | FLAGS_logtostderr = 0; 24 | FLAGS_log_dir = "logs"; 25 | google::InitGoogleLogging(argv[0]); 26 | 27 | std::atomic msg_counter {0}; 28 | vector threads; 29 | 30 | for (int t = 0; t < thread_count; ++t) 31 | { 32 | threads.push_back(std::thread([&]() 33 | { 34 | while (true) 35 | { 36 | int counter = ++msg_counter; 37 | if (counter > howmany) break; 38 | LOG(INFO) << "glog message #" << counter << ": This is some text for your pleasure"; 39 | } 40 | })); 41 | } 42 | 43 | 44 | for(auto &t:threads) 45 | { 46 | t.join(); 47 | }; 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/src/char_model.h: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License.! 14 | 15 | #ifndef CHAR_MODEL_H_ 16 | #define CHAR_MODEL_H_ 17 | 18 | #include "model_interface.h" 19 | #include "sentencepiece_model.pb.h" 20 | 21 | namespace sentencepiece { 22 | namespace character { 23 | 24 | // Tokenize text into character sequence 25 | class Model : public ModelInterface { 26 | public: 27 | explicit Model(const ModelProto &model_proto); 28 | ~Model() override; 29 | 30 | EncodeResult Encode(absl::string_view normalized) const override; 31 | }; 32 | } // namespace character 33 | } // namespace sentencepiece 34 | #endif // CHAR_MODEL_H_ 35 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/third_party/esaxx/LICENSE: -------------------------------------------------------------------------------- 1 | This is the esaxx copyright. 2 | 3 | Copyright (c) 2010 Daisuke Okanohara All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person 6 | obtaining a copy of this software and associated documentation 7 | files (the "Software"), to deal in the Software without 8 | restriction, including without limitation the rights to use, 9 | copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the 11 | Software is furnished to do so, subject to the following 12 | conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | OTHER DEALINGS IN THE SOFTWARE. 25 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/regex_yaml.cpp: -------------------------------------------------------------------------------- 1 | #include "regex_yaml.h" 2 | 3 | namespace YAML { 4 | // constructors 5 | RegEx::RegEx() : m_op(REGEX_EMPTY) {} 6 | 7 | RegEx::RegEx(REGEX_OP op) : m_op(op) {} 8 | 9 | RegEx::RegEx(char ch) : m_op(REGEX_MATCH), m_a(ch) {} 10 | 11 | RegEx::RegEx(char a, char z) : m_op(REGEX_RANGE), m_a(a), m_z(z) {} 12 | 13 | RegEx::RegEx(const std::string& str, REGEX_OP op) : m_op(op) { 14 | for (std::size_t i = 0; i < str.size(); i++) 15 | m_params.push_back(RegEx(str[i])); 16 | } 17 | 18 | // combination constructors 19 | RegEx operator!(const RegEx& ex) { 20 | RegEx ret(REGEX_NOT); 21 | ret.m_params.push_back(ex); 22 | return ret; 23 | } 24 | 25 | RegEx operator||(const RegEx& ex1, const RegEx& ex2) { 26 | RegEx ret(REGEX_OR); 27 | ret.m_params.push_back(ex1); 28 | ret.m_params.push_back(ex2); 29 | return ret; 30 | } 31 | 32 | RegEx operator&&(const RegEx& ex1, const RegEx& ex2) { 33 | RegEx ret(REGEX_AND); 34 | ret.m_params.push_back(ex1); 35 | ret.m_params.push_back(ex2); 36 | return ret; 37 | } 38 | 39 | RegEx operator+(const RegEx& ex1, const RegEx& ex2) { 40 | RegEx ret(REGEX_SEQ); 41 | ret.m_params.push_back(ex1); 42 | ret.m_params.push_back(ex2); 43 | return ret; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /cmake/FindNCCL.cmake: -------------------------------------------------------------------------------- 1 | set(NCCL_INC_PATHS 2 | /usr/include 3 | /usr/local/include 4 | /usr/local/cuda/include 5 | $ENV{NCCL_DIR}/include 6 | $ENV{CUDA_TOOLKIT_ROOT_DIRCUDA_ROOT}/include 7 | ) 8 | 9 | set(NCCL_LIB_PATHS 10 | /lib 11 | /lib64 12 | /usr/lib 13 | /usr/lib64 14 | /usr/local/lib 15 | /usr/local/lib64 16 | /usr/local/cuda/lib64 17 | $ENV{NCCL_DIR}/lib64 18 | $ENV{CUDA_TOOLKIT_ROOT_DIR}/lib64 19 | /usr/local/cuda/lib 20 | $ENV{NCCL_DIR}/lib 21 | $ENV{CUDA_TOOLKIT_ROOT_DIR}/lib 22 | ) 23 | 24 | find_path(NCCL_INCLUDE_DIR NAMES nccl.h PATHS ${NCCL_INC_PATHS}) 25 | 26 | if (USE_STATIC_LIBS) 27 | message(STATUS "Trying to find static NCCL library") 28 | find_library(NCCL_LIBRARIES NAMES libnccl_static.a PATHS ${NCCL_LIB_PATHS}) 29 | else (USE_STATIC_LIBS) 30 | find_library(NCCL_LIBRARIES NAMES nccl PATHS ${NCCL_LIB_PATHS}) 31 | endif (USE_STATIC_LIBS) 32 | 33 | include(FindPackageHandleStandardArgs) 34 | find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIR NCCL_LIBRARIES) 35 | 36 | if (NCCL_FOUND) 37 | message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIR}, library: ${NCCL_LIBRARIES})") 38 | mark_as_advanced(NCCL_INCLUDE_DIR NCCL_LIBRARIES) 39 | endif () 40 | -------------------------------------------------------------------------------- /src/common/cli_helper.cpp: -------------------------------------------------------------------------------- 1 | #include "common/cli_helper.h" 2 | #include "common/filesystem.h" 3 | 4 | namespace marian { 5 | namespace cli { 6 | 7 | void makeAbsolutePaths(YAML::Node& config, 8 | const std::string& configPath, 9 | const std::set& PATHS) { 10 | auto configDir = filesystem::Path{configPath}.parentPath(); 11 | 12 | auto transformFunc = [&](const std::string& nodePath) -> std::string { 13 | // Catch stdin/stdout and do not process 14 | if(nodePath == "stdin" || nodePath == "stdout") 15 | return nodePath; 16 | 17 | // replace relative path w.r.t. config directory 18 | try { 19 | return canonical(filesystem::Path{nodePath}, configDir).string(); 20 | } catch(filesystem::FilesystemError& e) { 21 | // will fail if file does not exist; use parent in that case 22 | std::cerr << e.what() << std::endl; 23 | auto parentPath = filesystem::Path{nodePath}.parentPath(); 24 | return (canonical(parentPath, configDir) 25 | / filesystem::Path{nodePath}.filename()) 26 | .string(); 27 | } 28 | }; 29 | 30 | processPaths(config, transformFunc, PATHS); 31 | } 32 | 33 | } // namespace cli 34 | } // namespace marian 35 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/indentation.h: -------------------------------------------------------------------------------- 1 | #ifndef INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | #include "yaml-cpp/ostream_wrapper.h" 14 | 15 | namespace YAML { 16 | struct Indentation { 17 | Indentation(std::size_t n_) : n(n_) {} 18 | std::size_t n; 19 | }; 20 | 21 | inline ostream_wrapper& operator<<(ostream_wrapper& out, 22 | const Indentation& indent) { 23 | for (std::size_t i = 0; i < indent.n; i++) 24 | out << ' '; 25 | return out; 26 | } 27 | 28 | struct IndentTo { 29 | IndentTo(std::size_t n_) : n(n_) {} 30 | std::size_t n; 31 | }; 32 | 33 | inline ostream_wrapper& operator<<(ostream_wrapper& out, 34 | const IndentTo& indent) { 35 | while (out.col() < indent.n) 36 | out << ' '; 37 | return out; 38 | } 39 | } 40 | 41 | #endif // INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66 42 | -------------------------------------------------------------------------------- /src/common/io.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "3rd_party/yaml-cpp/yaml.h" 4 | #include "common/io_item.h" 5 | 6 | #include 7 | #include 8 | 9 | // interface for handling model files in marian, both *.npz files and 10 | // *.bin files have the same way of accessing them and are identified 11 | // by suffixes (*.npz or *.bin). 12 | 13 | // Files with the *.bin suffix are supposed to be memory-mappable for 14 | // CPU decoding. 15 | 16 | namespace marian { 17 | namespace io { 18 | 19 | bool isNpz(const std::string& fileName); 20 | bool isBin(const std::string& fileName); 21 | 22 | void getYamlFromModel(YAML::Node& yaml, const std::string& varName, const std::string& fileName); 23 | void getYamlFromModel(YAML::Node& yaml, const std::string& varName, const void* ptr); 24 | 25 | void addMetaToItems(const std::string& meta, 26 | const std::string& varName, 27 | std::vector& items); 28 | 29 | std::vector loadItems(const std::string& fileName); 30 | std::vector loadItems(const void* ptr); 31 | 32 | std::vector mmapItems(const void* ptr); 33 | 34 | void saveItems(const std::string& fileName, const std::vector& items); 35 | 36 | } // namespace io 37 | } // namespace marian 38 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/example/example.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.25420.1 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example", "example.vcxproj", "{9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Debug|x64 = Debug|x64 12 | Release|Win32 = Release|Win32 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Debug|Win32.ActiveCfg = Debug|Win32 17 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Debug|Win32.Build.0 = Debug|Win32 18 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Debug|x64.ActiveCfg = Debug|Win32 19 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Release|Win32.ActiveCfg = Release|Win32 20 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Release|Win32.Build.0 = Release|Win32 21 | {9E5AB93A-0CCE-4BAC-9FCB-0FC9CB5EB8D2}.Release|x64.ActiveCfg = Release|Win32 22 | EndGlobalSection 23 | GlobalSection(SolutionProperties) = preSolution 24 | HideSolutionNode = FALSE 25 | EndGlobalSection 26 | EndGlobal 27 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/node/detail/memory.h: -------------------------------------------------------------------------------- 1 | #ifndef VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | 12 | #include "yaml-cpp/dll.h" 13 | #include "yaml-cpp/node/ptr.h" 14 | 15 | namespace YAML { 16 | namespace detail { 17 | class node; 18 | } // namespace detail 19 | } // namespace YAML 20 | 21 | namespace YAML { 22 | namespace detail { 23 | class YAML_CPP_API memory { 24 | public: 25 | node& create_node(); 26 | void merge(const memory& rhs); 27 | 28 | private: 29 | typedef std::set Nodes; 30 | Nodes m_nodes; 31 | }; 32 | 33 | class YAML_CPP_API memory_holder { 34 | public: 35 | memory_holder() : m_pMemory(new memory) {} 36 | 37 | node& create_node() { return m_pMemory->create_node(); } 38 | void merge(memory_holder& rhs); 39 | 40 | private: 41 | shared_memory m_pMemory; 42 | }; 43 | } 44 | } 45 | 46 | #endif // VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66 47 | -------------------------------------------------------------------------------- /src/tests/logger_test.cpp: -------------------------------------------------------------------------------- 1 | #include "common/timer.h" 2 | #include "common/logging.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | // small test program for playing around with spdlog formatting of messages 9 | 10 | std::shared_ptr stderrLoggerTest( 11 | const std::string& name, 12 | const std::string& pattern, 13 | const std::vector& files) { 14 | std::vector sinks; 15 | 16 | auto stderr_sink = spdlog::sinks::stderr_sink_mt::instance(); 17 | sinks.push_back(stderr_sink); 18 | 19 | for(auto&& file : files) { 20 | auto file_sink 21 | = std::make_shared(file, true); 22 | sinks.push_back(file_sink); 23 | } 24 | 25 | auto logger 26 | = std::make_shared(name, begin(sinks), end(sinks)); 27 | 28 | spdlog::register_logger(logger); 29 | logger->set_pattern(pattern); 30 | return logger; 31 | } 32 | 33 | int main() { 34 | std::vector logfiles; 35 | Logger info(stderrLoggerTest("info", "[%Y-%m-%d %T] %v", logfiles)); 36 | 37 | info->info("hello {:06.2f}", .7); 38 | 39 | marian::timer::Timer timer; 40 | 41 | info->info("time is {:.5f} bla {:.2f}", timer.elapsed(), .7); 42 | } 43 | -------------------------------------------------------------------------------- /scripts/server/client_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function, unicode_literals, division 4 | 5 | import sys 6 | import time 7 | import argparse 8 | 9 | from websocket import create_connection 10 | 11 | 12 | if __name__ == "__main__": 13 | # handle command-line options 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument("-b", "--batch-size", type=int, default=1) 16 | parser.add_argument("-p", "--port", type=int, default=8080) 17 | args = parser.parse_args() 18 | 19 | # open connection 20 | ws = create_connection("ws://localhost:{}/translate".format(args.port)) 21 | 22 | count = 0 23 | batch = "" 24 | for line in sys.stdin: 25 | count += 1 26 | batch += line.decode('utf-8') if sys.version_info < (3, 0) else line 27 | if count == args.batch_size: 28 | # translate the batch 29 | ws.send(batch) 30 | result = ws.recv() 31 | print(result.rstrip()) 32 | 33 | count = 0 34 | batch = "" 35 | 36 | if count: 37 | # translate the remaining sentences 38 | ws.send(batch) 39 | result = ws.recv() 40 | print(result.rstrip()) 41 | 42 | # close connection 43 | ws.close() 44 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/sinks/ostream_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace spdlog 15 | { 16 | namespace sinks 17 | { 18 | template 19 | class ostream_sink: public base_sink 20 | { 21 | public: 22 | explicit ostream_sink(std::ostream& os, bool force_flush=false) :_ostream(os), _force_flush(force_flush) {} 23 | ostream_sink(const ostream_sink&) = delete; 24 | ostream_sink& operator=(const ostream_sink&) = delete; 25 | virtual ~ostream_sink() = default; 26 | 27 | protected: 28 | void _sink_it(const details::log_msg& msg) override 29 | { 30 | _ostream.write(msg.formatted.data(), msg.formatted.size()); 31 | if (_force_flush) 32 | _ostream.flush(); 33 | } 34 | 35 | void flush() override 36 | { 37 | _ostream.flush(); 38 | } 39 | 40 | std::ostream& _ostream; 41 | bool _force_flush; 42 | }; 43 | 44 | typedef ostream_sink ostream_sink_mt; 45 | typedef ostream_sink ostream_sink_st; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/collectionstack.h: -------------------------------------------------------------------------------- 1 | #ifndef COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | namespace YAML { 14 | struct CollectionType { 15 | enum value { NoCollection, BlockMap, BlockSeq, FlowMap, FlowSeq, CompactMap }; 16 | }; 17 | 18 | class CollectionStack { 19 | public: 20 | CollectionType::value GetCurCollectionType() const { 21 | if (collectionStack.empty()) 22 | return CollectionType::NoCollection; 23 | return collectionStack.top(); 24 | } 25 | 26 | void PushCollectionType(CollectionType::value type) { 27 | collectionStack.push(type); 28 | } 29 | void PopCollectionType(CollectionType::value type) { 30 | assert(type == GetCurCollectionType()); type; 31 | collectionStack.pop(); 32 | } 33 | 34 | private: 35 | std::stack collectionStack; 36 | }; 37 | } 38 | 39 | #endif // COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 40 | -------------------------------------------------------------------------------- /src/functional/tensor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "functional/array.h" 4 | #include "functional/shape.h" 5 | #include "tensors/tensor.h" 6 | 7 | namespace marian { 8 | namespace functional { 9 | 10 | template 11 | struct Tensor { 12 | T* data_; 13 | functional::Shape shape_; 14 | 15 | __HD__ Tensor() {} 16 | 17 | __HD__ Tensor(T* ptr, const functional::Shape& shape) 18 | : data_(ptr), shape_(shape) {} 19 | 20 | __H__ Tensor(marian::Tensor t) : data_(t->data()), shape_(t->shape()) {} 21 | 22 | __HDI__ float& operator[](size_t i) { return data_[i]; } 23 | __HDI__ const float& operator[](size_t i) const { return data_[i]; } 24 | 25 | __HDI__ float& operator[]( 26 | const functional::Array& indices) { 27 | return data_[shape_.index(indices)]; 28 | } 29 | 30 | __HDI__ const float& operator[]( 31 | const functional::Array& indices) const { 32 | return data_[shape_.index(indices)]; 33 | } 34 | 35 | __HDI__ T* data() { return data_; } 36 | __HDI__ const T* data() const { return data_; } 37 | 38 | __HDI__ Shape& shape() { return shape_; } 39 | __HDI__ const Shape& shape() const { return shape_; } 40 | }; 41 | } // namespace functional 42 | } // namespace marian -------------------------------------------------------------------------------- /CL_tools/stat_mod.py: -------------------------------------------------------------------------------- 1 | # -*- encoding=utf-8 -*- 2 | import numpy as np 3 | import codecs 4 | import sys 5 | 6 | print "python .py vocab.txt iter*.npz" 7 | 8 | out = codecs.open('output', 'w', encoding='utf-8') 9 | 10 | vocab = codecs.open(sys.argv[1], 'r', encoding='utf-8', errors='ignore').readlines() 11 | vocab = [s.split(":")[0] for s in vocab] 12 | 13 | both = [] 14 | for i in range(2, len(sys.argv)): 15 | name = sys.argv[i] 16 | iter = name.split(".")[1][4:] 17 | both.append((name, iter)) 18 | both.sort(key=lambda x: int(x[1])) 19 | print both 20 | models_names = [x[0] for x in both] 21 | models_iters = [x[1] for x in both] 22 | # loading emb 23 | vocab_stats = [[] for _ in range(len(vocab))] 24 | for name in models_names: 25 | model = np.load(name) 26 | # encoder_Wemb 27 | print "=" * 100 28 | Wemb = model['encoder_Wemb'] 29 | print Wemb, "===>", Wemb.shape, "===>", name 30 | vocab_id = 0 31 | for word in Wemb: 32 | t = [x * x for x in word] 33 | mod = np.sum(t) ** 0.5 34 | vocab_stats[vocab_id].append(mod) 35 | vocab_id += 1 36 | # print vocab_stats 37 | 38 | # output stats 39 | out.write("ID:" + ",".join(models_iters) + '\n') 40 | for id, v_data in enumerate(vocab_stats): 41 | out.write("%s:%s\n" % (vocab[id], str(v_data))) 42 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/fmt/bundled/ostream.cc: -------------------------------------------------------------------------------- 1 | /* 2 | Formatting library for C++ - std::ostream support 3 | 4 | Copyright (c) 2012 - 2016, Victor Zverovich 5 | All rights reserved. 6 | 7 | For the license information refer to format.h. 8 | */ 9 | 10 | #include "ostream.h" 11 | 12 | namespace fmt { 13 | 14 | namespace { 15 | // Write the content of w to os. 16 | void write(std::ostream &os, Writer &w) { 17 | const char *data = w.data(); 18 | typedef internal::MakeUnsigned::Type UnsignedStreamSize; 19 | UnsignedStreamSize size = w.size(); 20 | UnsignedStreamSize max_size = 21 | internal::to_unsigned((std::numeric_limits::max)()); 22 | do { 23 | UnsignedStreamSize n = size <= max_size ? size : max_size; 24 | os.write(data, static_cast(n)); 25 | data += n; 26 | size -= n; 27 | } while (size != 0); 28 | } 29 | } 30 | 31 | FMT_FUNC void print(std::ostream &os, CStringRef format_str, ArgList args) { 32 | MemoryWriter w; 33 | w.write(format_str, args); 34 | write(os, w); 35 | } 36 | 37 | FMT_FUNC int fprintf(std::ostream &os, CStringRef format, ArgList args) { 38 | MemoryWriter w; 39 | printf(w, format, args); 40 | write(os, w); 41 | return static_cast(w.size()); 42 | } 43 | } // namespace fmt 44 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/ostream_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/details/null_mutex.h" 9 | #include "spdlog/sinks/base_sink.h" 10 | 11 | #include 12 | #include 13 | 14 | namespace spdlog 15 | { 16 | namespace sinks 17 | { 18 | template 19 | class ostream_sink: public base_sink 20 | { 21 | public: 22 | explicit ostream_sink(std::ostream& os, bool force_flush=false) :_ostream(os), _force_flush(force_flush) {} 23 | ostream_sink(const ostream_sink&) = delete; 24 | ostream_sink& operator=(const ostream_sink&) = delete; 25 | virtual ~ostream_sink() = default; 26 | 27 | protected: 28 | void _sink_it(const details::log_msg& msg) override 29 | { 30 | _ostream.write(msg.formatted.data(), msg.formatted.size()); 31 | if (_force_flush) 32 | _ostream.flush(); 33 | } 34 | 35 | void _flush() override 36 | { 37 | _ostream.flush(); 38 | } 39 | 40 | std::ostream& _ostream; 41 | bool _force_flush; 42 | }; 43 | 44 | typedef ostream_sink ostream_sink_mt; 45 | typedef ostream_sink ostream_sink_st; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/training/graph_group_async_drop.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "training/graph_group_async.h" 4 | 5 | #include "training/gradient_dropping/dropper.h" 6 | #include "training/gradient_dropping/sparse_tensor.h" 7 | 8 | namespace marian { 9 | 10 | class AsyncGraphGroupDrop : public AsyncGraphGroup { 11 | std::vector fetchStep_; 12 | std::vector pushStep_; 13 | std::vector fetch_ready; 14 | 15 | bool drop_first = 1; 16 | 17 | size_t dropping_warmup; 18 | float droping_rate; 19 | float dropping_momentum; 20 | 21 | std::vector> droppers_; 22 | 23 | std::vector> sparseGrads_, sparseShards_; 24 | 25 | protected: 26 | void init(Ptr batch) override; 27 | void pushGradients(Tensor newGrads, int device_id) override; 28 | void fetchParams(Tensor oldParams, 29 | const std::vector& params, 30 | int device_id) override; 31 | 32 | public: 33 | AsyncGraphGroupDrop(Ptr options) 34 | : AsyncGraphGroup(options), 35 | dropping_warmup{options->get("grad-dropping-warmup")}, 36 | droping_rate{options->get("grad-dropping-rate")}, 37 | dropping_momentum{options->get("grad-dropping-momentum")} {} 38 | }; 39 | } // namespace marian 40 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/utils.cpp: -------------------------------------------------------------------------------- 1 | #include "includes.h" 2 | 3 | 4 | void prepare_logdir() 5 | { 6 | spdlog::drop_all(); 7 | #ifdef _WIN32 8 | system("if not exist logs mkdir logs"); 9 | system("del /F /Q logs\\*"); 10 | #else 11 | auto rv = system("mkdir -p logs"); 12 | rv = system("rm -f logs/*"); 13 | (void)rv; 14 | #endif 15 | } 16 | 17 | 18 | std::string file_contents(const std::string& filename) 19 | { 20 | std::ifstream ifs(filename); 21 | if (!ifs) 22 | throw std::runtime_error("Failed open file "); 23 | return std::string((std::istreambuf_iterator(ifs)), 24 | (std::istreambuf_iterator())); 25 | 26 | } 27 | 28 | std::size_t count_lines(const std::string& filename) 29 | { 30 | std::ifstream ifs(filename); 31 | if (!ifs) 32 | throw std::runtime_error("Failed open file "); 33 | 34 | std::string line; 35 | size_t counter = 0; 36 | while(std::getline(ifs, line)) 37 | counter++; 38 | return counter; 39 | } 40 | 41 | std::size_t get_filesize(const std::string& filename) 42 | { 43 | std::ifstream ifs(filename, std::ifstream::ate | std::ifstream::binary); 44 | if (!ifs) 45 | throw std::runtime_error("Failed open file "); 46 | 47 | return static_cast(ifs.tellg()); 48 | } 49 | -------------------------------------------------------------------------------- /src/layers/convolution.cpp: -------------------------------------------------------------------------------- 1 | #include "layers/convolution.h" 2 | #include "graph/node_operators_binary.h" 3 | 4 | namespace marian { 5 | 6 | #ifdef CUDNN 7 | Convolution::Convolution(Ptr graph) : Factory(graph) {} 8 | 9 | Expr Convolution::apply(Expr x) { 10 | auto prefix = opt("prefix"); 11 | auto kernelDims = opt>("kernel-dims"); 12 | auto kernelNum = opt("kernel-num"); 13 | auto paddings = opt>("paddings", std::make_pair(0, 0)); 14 | auto strides = opt>("strides", std::make_pair(1, 1)); 15 | 16 | int layerIn = x->shape()[1]; 17 | auto kernel 18 | = graph_->param(prefix + "_conv_kernels", 19 | {layerIn, kernelNum, kernelDims.first, kernelDims.second}, 20 | inits::glorot_uniform); 21 | 22 | auto bias = graph_->param( 23 | prefix + "_conv_bias", {1, kernelNum, 1, 1}, inits::zeros); 24 | 25 | std::vector nodes = {x, kernel, bias}; 26 | return Expression( 27 | nodes, paddings.first, paddings.second, strides.first, strides.second); 28 | } 29 | 30 | Expr Convolution::apply(const std::vector&) { 31 | ABORT("Can't apply convolution on many inputs at once"); 32 | return nullptr; 33 | } 34 | #endif 35 | 36 | } // namespace marian 37 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/easylogging-bench-mt.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #define _ELPP_THREAD_SAFE 11 | #include "easylogging++.h" 12 | _INITIALIZE_EASYLOGGINGPP 13 | 14 | using namespace std; 15 | 16 | int main(int argc, char* argv[]) 17 | { 18 | 19 | int thread_count = 10; 20 | if(argc > 1) 21 | thread_count = atoi(argv[1]); 22 | 23 | int howmany = 1000000; 24 | 25 | // Load configuration from file 26 | el::Configurations conf("easyl.conf"); 27 | el::Loggers::reconfigureLogger("default", conf); 28 | 29 | std::atomic msg_counter {0}; 30 | vector threads; 31 | 32 | for (int t = 0; t < thread_count; ++t) 33 | { 34 | threads.push_back(std::thread([&]() 35 | { 36 | while (true) 37 | { 38 | int counter = ++msg_counter; 39 | if (counter > howmany) break; 40 | LOG(INFO) << "easylog message #" << counter << ": This is some text for your pleasure"; 41 | } 42 | })); 43 | } 44 | 45 | 46 | for(auto &t:threads) 47 | { 48 | t.join(); 49 | }; 50 | 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/src/char_model_trainer.h: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License.! 14 | 15 | #ifndef CHAR_MODEL_TRAINER_H_ 16 | #define CHAR_MODEL_TRAINER_H_ 17 | 18 | #include "sentencepiece_model.pb.h" 19 | #include "trainer_interface.h" 20 | 21 | namespace sentencepiece { 22 | namespace character { 23 | 24 | // Trainer class for character model. 25 | class Trainer : public TrainerInterface { 26 | public: 27 | Trainer(const TrainerSpec &trainer_spec, 28 | const NormalizerSpec &normalizer_spec) 29 | : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec) {} 30 | 31 | util::Status Train() override; 32 | }; 33 | } // namespace character 34 | } // namespace sentencepiece 35 | #endif // CHAR_MODEL_TRAINER_H_ 36 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/src/word_model.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License.! 14 | 15 | #include "word_model.h" 16 | #include "util.h" 17 | 18 | namespace sentencepiece { 19 | namespace word { 20 | 21 | Model::Model(const ModelProto &model_proto) { 22 | model_proto_ = &model_proto; 23 | InitializePieces(); 24 | } 25 | 26 | Model::~Model() {} 27 | 28 | EncodeResult Model::Encode(absl::string_view normalized) const { 29 | if (!status().ok() || normalized.empty()) { 30 | return {}; 31 | } 32 | 33 | EncodeResult output; 34 | for (const auto &w : SplitIntoWords(normalized)) { 35 | output.emplace_back(w, PieceToId(w)); 36 | } 37 | 38 | return output; 39 | } 40 | 41 | } // namespace word 42 | } // namespace sentencepiece 43 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/formatter.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/details/log_msg.h" 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | namespace spdlog 15 | { 16 | namespace details 17 | { 18 | class flag_formatter; 19 | } 20 | 21 | class formatter 22 | { 23 | public: 24 | virtual ~formatter() {} 25 | virtual void format(details::log_msg& msg) = 0; 26 | }; 27 | 28 | class pattern_formatter SPDLOG_FINAL : public formatter 29 | { 30 | 31 | public: 32 | explicit pattern_formatter(const std::string& pattern, pattern_time_type pattern_time = pattern_time_type::local); 33 | pattern_formatter(const pattern_formatter&) = delete; 34 | pattern_formatter& operator=(const pattern_formatter&) = delete; 35 | void format(details::log_msg& msg) override; 36 | private: 37 | const std::string _pattern; 38 | const pattern_time_type _pattern_time; 39 | std::vector> _formatters; 40 | std::tm get_time(details::log_msg& msg); 41 | void handle_flag(char flag); 42 | void compile_pattern(const std::string& pattern); 43 | }; 44 | } 45 | 46 | #include "spdlog/details/pattern_formatter_impl.h" 47 | 48 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/makefiles/formatting.mk: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # See LICENSE.txt for license information 5 | # 6 | 7 | # Prerequisite: $(FILESTOFORMAT) contains the list of files of interest for formatting 8 | # As this file defines a new target (format), it should be included at least after the definition of the 9 | # default target. 10 | 11 | ASTYLE_FORMAT_OPTS=-Qv --style=java --indent-after-parens --indent-modifiers --indent-switches --indent-continuation=2 --keep-one-line-blocks --keep-one-line-statements --indent=spaces=2 --lineend=linux --suffix=none 12 | ASTYLEDIR := $(BUILDDIR)/contrib 13 | ASTYLETAR := $(ASTYLEDIR)/astyle.tar.gz 14 | ASTYLEBIN := $(ASTYLEDIR)/astyle/build/gcc/bin/astyle 15 | ASTYLEBLD := $(ASTYLEDIR)/astyle/build/gcc/ 16 | ASTYLEVER := 3.1 17 | ASTYLEURL := "https://versaweb.dl.sourceforge.net/project/astyle/astyle/astyle%20$(ASTYLEVER)/astyle_$(ASTYLEVER)_linux.tar.gz" 18 | 19 | $(ASTYLEDIR) : 20 | @mkdir -p $(ASTYLEDIR) 21 | 22 | $(ASTYLETAR) : $(ASTYLEDIR) 23 | @wget -q -O $(ASTYLETAR) $(ASTYLEURL) 24 | 25 | $(ASTYLEBLD) : $(ASTYLETAR) 26 | @cd $(ASTYLEDIR) && tar xzf $(ASTYLETAR) 27 | 28 | $(ASTYLEBIN) : $(ASTYLEBLD) 29 | ${MAKE} -C $(ASTYLEBLD) 30 | 31 | .PHONY : format 32 | format : $(ASTYLEBIN) 33 | @$(ASTYLEBIN) $(ASTYLE_FORMAT_OPTS) $(FILESTOFORMAT) 34 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/include/spdlog/sinks/base_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // base sink templated over a mutex (either dummy or real) 9 | // concrete implementation should only override the _sink_it method. 10 | // all locking is taken care of here so no locking needed by the implementers.. 11 | // 12 | 13 | #include "spdlog/sinks/sink.h" 14 | #include "spdlog/formatter.h" 15 | #include "spdlog/common.h" 16 | #include "spdlog/details/log_msg.h" 17 | 18 | #include 19 | 20 | namespace spdlog 21 | { 22 | namespace sinks 23 | { 24 | template 25 | class base_sink:public sink 26 | { 27 | public: 28 | base_sink():_mutex() {} 29 | virtual ~base_sink() = default; 30 | 31 | base_sink(const base_sink&) = delete; 32 | base_sink& operator=(const base_sink&) = delete; 33 | 34 | void log(const details::log_msg& msg) SPDLOG_FINAL override 35 | { 36 | std::lock_guard lock(_mutex); 37 | _sink_it(msg); 38 | } 39 | void flush() SPDLOG_FINAL override 40 | { 41 | _flush(); 42 | } 43 | 44 | protected: 45 | virtual void _sink_it(const details::log_msg& msg) = 0; 46 | virtual void _flush() = 0; 47 | Mutex _mutex; 48 | }; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/command/marian_conv.cpp: -------------------------------------------------------------------------------- 1 | #include "marian.h" 2 | 3 | #include "common/cli_wrapper.h" 4 | 5 | #include 6 | 7 | int main(int argc, char** argv) { 8 | using namespace marian; 9 | 10 | createLoggers(); 11 | 12 | auto options = New(); 13 | { 14 | auto cli = New( 15 | options, 16 | "Convert a model in the .npz format to a mmap-able binary model", 17 | "Allowed options", 18 | "Examples:\n" 19 | " ./marian-conv -f model.npz -t model.bin"); 20 | cli->add("--from,-f", "Input model", "model.npz"); 21 | cli->add("--to,-t", "Output model", "model.bin"); 22 | cli->parse(argc, argv); 23 | } 24 | auto modelFrom = options->get("from"); 25 | auto modelTo = options->get("to"); 26 | 27 | LOG(info, "Outputting {}", modelTo); 28 | 29 | YAML::Node config; 30 | std::stringstream configStr; 31 | marian::io::getYamlFromModel(config, "special:model.yml", modelFrom); 32 | configStr << config; 33 | 34 | auto graph = New(true, false); 35 | graph->setDevice(CPU0); 36 | 37 | graph->load(modelFrom); 38 | graph->forward(); 39 | graph->save(modelTo, configStr.str()); 40 | 41 | // graph->saveBinary(vm["bin"].as()); 42 | 43 | LOG(info, "Finished"); 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/bench/spdlog-bench-mt.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "spdlog/spdlog.h" 11 | 12 | 13 | using namespace std; 14 | 15 | int main(int argc, char* argv[]) 16 | { 17 | 18 | int thread_count = 10; 19 | if(argc > 1) 20 | thread_count = std::atoi(argv[1]); 21 | 22 | int howmany = 1000000; 23 | 24 | namespace spd = spdlog; 25 | 26 | auto logger = spdlog::create("file_logger", "logs/spd-bench-mt.txt", false); 27 | 28 | logger->set_pattern("[%Y-%b-%d %T.%e]: %v"); 29 | 30 | std::atomic msg_counter {0}; 31 | std::vector threads; 32 | 33 | for (int t = 0; t < thread_count; ++t) 34 | { 35 | threads.push_back(std::thread([&]() 36 | { 37 | while (true) 38 | { 39 | int counter = ++msg_counter; 40 | if (counter > howmany) break; 41 | logger->info("spdlog message #{}: This is some text for your pleasure", counter); 42 | } 43 | })); 44 | } 45 | 46 | 47 | for(auto &t:threads) 48 | { 49 | t.join(); 50 | }; 51 | 52 | 53 | 54 | return 0; 55 | } 56 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/tag.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "directives.h" // IWYU pragma: keep 5 | #include "tag.h" 6 | #include "token.h" 7 | 8 | namespace YAML { 9 | Tag::Tag(const Token& token) : type(static_cast(token.data)) { 10 | switch (type) { 11 | case VERBATIM: 12 | value = token.value; 13 | break; 14 | case PRIMARY_HANDLE: 15 | value = token.value; 16 | break; 17 | case SECONDARY_HANDLE: 18 | value = token.value; 19 | break; 20 | case NAMED_HANDLE: 21 | handle = token.value; 22 | value = token.params[0]; 23 | break; 24 | case NON_SPECIFIC: 25 | break; 26 | default: 27 | assert(false); 28 | } 29 | } 30 | 31 | const std::string Tag::Translate(const Directives& directives) { 32 | switch (type) { 33 | case VERBATIM: 34 | return value; 35 | case PRIMARY_HANDLE: 36 | return directives.TranslateTagHandle("!") + value; 37 | case SECONDARY_HANDLE: 38 | return directives.TranslateTagHandle("!!") + value; 39 | case NAMED_HANDLE: 40 | return directives.TranslateTagHandle("!" + handle + "!") + value; 41 | case NON_SPECIFIC: 42 | // TODO: 43 | return "!"; 44 | default: 45 | assert(false); 46 | } 47 | throw std::runtime_error("yaml-cpp: internal error, bad tag type"); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/3rd_party/CLI/Macros.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Distributed under the 3-Clause BSD License. See accompanying 4 | // file LICENSE or https://github.com/CLIUtils/CLI11 for details. 5 | 6 | // [CLI11:verbatim] 7 | 8 | // The following version macro is very similar to the one in PyBind11 9 | #if !(defined(_MSC_VER) && __cplusplus == 199711L) && !defined(__INTEL_COMPILER) 10 | #if __cplusplus >= 201402L 11 | #define CLI11_CPP14 12 | #if __cplusplus >= 201703L 13 | #define CLI11_CPP17 14 | #if __cplusplus > 201703L 15 | #define CLI11_CPP20 16 | #endif 17 | #endif 18 | #endif 19 | #elif defined(_MSC_VER) && __cplusplus == 199711L 20 | // MSVC sets _MSVC_LANG rather than __cplusplus (supposedly until the standard is fully implemented) 21 | // Unless you use the /Zc:__cplusplus flag on Visual Studio 2017 15.7 Preview 3 or newer 22 | #if _MSVC_LANG >= 201402L 23 | #define CLI11_CPP14 24 | #if _MSVC_LANG > 201402L && _MSC_VER >= 1910 25 | #define CLI11_CPP17 26 | #if __MSVC_LANG > 201703L && _MSC_VER >= 1910 27 | #define CLI11_CPP20 28 | #endif 29 | #endif 30 | #endif 31 | #endif 32 | 33 | #if defined(CLI11_CPP14) 34 | #define CLI11_DEPRECATED(reason) [[deprecated(reason)]] 35 | #elif defined(_MSC_VER) 36 | #define CLI11_DEPRECATED(reason) __declspec(deprecated(reason)) 37 | #else 38 | #define CLI11_DEPRECATED(reason) __attribute__((deprecated(reason))) 39 | #endif 40 | 41 | // [CLI11:verbatim] 42 | -------------------------------------------------------------------------------- /src/3rd_party/yaml-cpp/ptr_vector.h: -------------------------------------------------------------------------------- 1 | #ifndef PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 | #define PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 | 4 | #if defined(_MSC_VER) || \ 5 | (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 | (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 | #pragma once 8 | #endif 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "yaml-cpp/noncopyable.h" 16 | 17 | namespace YAML { 18 | 19 | // TODO: This class is no longer needed 20 | template 21 | class ptr_vector : private YAML::noncopyable { 22 | public: 23 | ptr_vector() {} 24 | 25 | void clear() { m_data.clear(); } 26 | 27 | std::size_t size() const { return m_data.size(); } 28 | bool empty() const { return m_data.empty(); } 29 | 30 | void push_back(std::unique_ptr&& t) { m_data.push_back(std::move(t)); } 31 | T& operator[](std::size_t i) { return *m_data[i]; } 32 | const T& operator[](std::size_t i) const { return *m_data[i]; } 33 | 34 | T& back() { return *(m_data.back().get()); } 35 | 36 | const T& back() const { return *(m_data.back().get()); } 37 | 38 | private: 39 | std::vector> m_data; 40 | }; 41 | } 42 | 43 | #endif // PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 44 | -------------------------------------------------------------------------------- /src/3rd_party/pathie-cpp/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © 2015, 2017 Marvin Gülker 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/src/unicode_script.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License.! 14 | 15 | #include "unicode_script.h" 16 | #include 17 | #include "unicode_script_map.h" 18 | #include "util.h" 19 | 20 | namespace sentencepiece { 21 | namespace unicode_script { 22 | namespace { 23 | class GetScriptInternal { 24 | public: 25 | GetScriptInternal() { InitTable(&smap_); } 26 | 27 | ScriptType GetScript(char32 c) const { 28 | return port::FindWithDefault(smap_, c, ScriptType::U_Common); 29 | } 30 | 31 | private: 32 | std::unordered_map smap_; 33 | }; 34 | } // namespace 35 | 36 | ScriptType GetScript(char32 c) { 37 | static GetScriptInternal sc; 38 | return sc.GetScript(c); 39 | } 40 | } // namespace unicode_script 41 | } // namespace sentencepiece 42 | -------------------------------------------------------------------------------- /src/common/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace marian { 7 | namespace utils { 8 | 9 | void trim(std::string& s); 10 | void trimLeft(std::string& s); 11 | void trimRight(std::string& s); 12 | 13 | void split(const std::string& line, 14 | std::vector& pieces, 15 | const std::string del = " ", 16 | bool keepEmpty = false); 17 | 18 | std::vector split(const std::string& line, 19 | const std::string del = " ", 20 | bool keepEmpty = false); 21 | 22 | void splitAny(const std::string& line, 23 | std::vector& pieces, 24 | const std::string del = " ", 25 | bool keepEmpty = false); 26 | 27 | std::vector splitAny(const std::string& line, 28 | const std::string del = " ", 29 | bool keepEmpty = false); 30 | 31 | std::string join(const std::vector& words, 32 | const std::string& del = " "); 33 | 34 | std::string exec(const std::string& cmd); 35 | 36 | std::pair hostnameAndProcessId(); 37 | 38 | std::string withCommas(size_t n); 39 | bool endsWith(const std::string& text, const std::string& suffix); 40 | 41 | } // namespace utils 42 | } // namespace marian 43 | -------------------------------------------------------------------------------- /src/3rd_party/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | include_directories(.) 3 | 4 | add_subdirectory(./yaml-cpp) 5 | add_subdirectory(./SQLiteCpp) 6 | add_subdirectory(./pathie-cpp) 7 | add_subdirectory(./zlib) 8 | 9 | if(USE_SENTENCEPIECE) 10 | if(USE_STATIC_LIBS) 11 | set(_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) 12 | if(WIN32) 13 | list(INSERT CMAKE_FIND_LIBRARY_SUFFIXES 0 .lib .a) 14 | else() 15 | set(CMAKE_FIND_LIBRARY_SUFFIXES .a) 16 | endif() 17 | endif() 18 | 19 | set(SPM_ENABLE_SHARED OFF CACHE BOOL "Builds shared libaries in addition to static libraries." FORCE) 20 | set(SPM_ENABLE_TCMALLOC ON CACHE BOOL "Enable TCMalloc if available." FORCE) 21 | set(SPM_TCMALLOC_STATIC ON CACHE BOOL "Link static library of TCMALLOC." FORCE) 22 | 23 | add_subdirectory(./sentencepiece) 24 | include_directories(./sentencepiece) 25 | 26 | set_target_properties(spm_encode spm_decode spm_train spm_normalize spm_export_vocab 27 | PROPERTIES 28 | RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") 29 | 30 | if(USE_STATIC_LIBS) 31 | set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) 32 | endif() 33 | endif(USE_SENTENCEPIECE) 34 | 35 | include_directories(./SQLiteCpp/include) 36 | include_directories(./CLI) 37 | include_directories(./pathie-cpp/include) 38 | 39 | include_directories(./zlib) 40 | 41 | -------------------------------------------------------------------------------- /src/3rd_party/ExceptionWithCallStack.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) Microsoft. All rights reserved. 3 | // Licensed under the MIT license. See LICENSE.md file in the project root for full license information. 4 | // 5 | // ExceptionWithCallStack.h - debug util functions 6 | // 7 | 8 | #include 9 | 10 | namespace Microsoft { namespace MSR { namespace CNTK { 11 | 12 | // base class that we can catch, independent of the type parameter 13 | struct /*interface*/ IExceptionWithCallStackBase 14 | { 15 | virtual const char * CallStack() const = 0; 16 | virtual ~IExceptionWithCallStackBase() noexcept = default; 17 | }; 18 | 19 | // Exception wrapper to include native call stack string 20 | template 21 | class ExceptionWithCallStack : public E, public IExceptionWithCallStackBase 22 | { 23 | public: 24 | ExceptionWithCallStack(const std::string& msg, const std::string& callstack) : 25 | E(msg), m_callStack(callstack) 26 | { } 27 | 28 | virtual const char * CallStack() const override { return m_callStack.c_str(); } 29 | 30 | protected: 31 | std::string m_callStack; 32 | }; 33 | 34 | // some older code uses this namespace 35 | namespace DebugUtil 36 | { 37 | void PrintCallStack(size_t skipLevels = 0, bool makeFunctionNamesStandOut = false); 38 | 39 | std::string GetCallStack(size_t skipLevels = 0, bool makeFunctionNamesStandOut = false); 40 | }; 41 | 42 | }}} 43 | -------------------------------------------------------------------------------- /src/3rd_party/spdlog/tests/tests.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2015 4 | VisualStudioVersion = 14.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tests", "tests.vcxproj", "{59A07559-5F38-4DD6-A7FA-DB4153690B42}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Debug|x64 = Debug|x64 12 | Release|Win32 = Release|Win32 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|Win32.ActiveCfg = Debug|Win32 17 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|Win32.Build.0 = Debug|Win32 18 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|x64.ActiveCfg = Debug|x64 19 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Debug|x64.Build.0 = Debug|x64 20 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|Win32.ActiveCfg = Release|Win32 21 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|Win32.Build.0 = Release|Win32 22 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|x64.ActiveCfg = Release|x64 23 | {59A07559-5F38-4DD6-A7FA-DB4153690B42}.Release|x64.Build.0 = Release|x64 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /src/3rd_party/nccl/pkg/debian/control.in: -------------------------------------------------------------------------------- 1 | Source: nccl 2 | Section: libs 3 | Maintainer: cudatools 4 | Priority: optional 5 | Build-depends: debhelper(>=9) 6 | Standards-Version: 3.9.5 7 | 8 | Package: libnccl${nccl:Major} 9 | Section: libs 10 | Architecture: ${pkg:Arch} 11 | Depends: ${misc:Depends}, ${shlibs:Depends} 12 | Description: NVIDIA Collectives Communication Library (NCCL) Runtime 13 | NCCL (pronounced "Nickel") is a stand-alone library of standard collective 14 | communication routines for GPUs, implementing all-reduce, all-gather, reduce, 15 | broadcast, and reduce-scatter. 16 | It has been optimized to achieve high bandwidth on any platform using PCIe, 17 | NVLink, NVswitch, as well as networking using InfiniBand Verbs or TCP/IP 18 | sockets. 19 | 20 | Package: libnccl-dev 21 | Section: libdevel 22 | Architecture: ${pkg:Arch} 23 | Depends: ${misc:Depends}, ${shlibs:Depends}, libnccl${nccl:Major} (= ${binary:Version}) 24 | Description: NVIDIA Collectives Communication Library (NCCL) Development Files 25 | NCCL (pronounced "Nickel") is a stand-alone library of standard collective 26 | communication routines for GPUs, implementing all-reduce, all-gather, reduce, 27 | broadcast, and reduce-scatter. 28 | It has been optimized to achieve high bandwidth on any platform using PCIe, 29 | NVLink, NVswitch, as well as networking using InfiniBand Verbs or TCP/IP 30 | sockets. 31 | -------------------------------------------------------------------------------- /src/3rd_party/sentencepiece/src/bpe_model.h: -------------------------------------------------------------------------------- 1 | // Copyright 2016 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License.! 14 | 15 | #ifndef BPE_MODEL_H_ 16 | #define BPE_MODEL_H_ 17 | 18 | #include "model_interface.h" 19 | #include "sentencepiece_model.pb.h" 20 | 21 | namespace sentencepiece { 22 | namespace bpe { 23 | 24 | // Segmentation model with BPE (Byte Pair Encoding) 25 | // Details: 26 | // Neural Machine Translation of Rare Words with Subword Units 27 | // https://arxiv.org/abs/1508.07909 28 | // 29 | // https://en.wikipedia.org/wiki/Byte_pair_encoding 30 | class Model : public ModelInterface { 31 | public: 32 | explicit Model(const ModelProto &model_proto); 33 | ~Model() override; 34 | 35 | EncodeResult Encode(absl::string_view normalized) const override; 36 | }; 37 | } // namespace bpe 38 | } // namespace sentencepiece 39 | #endif // BPE_MODEL_H_ 40 | --------------------------------------------------------------------------------