├── .gitattributes ├── .gitignore ├── .gitmodules ├── .travis.yml ├── Applications ├── LogisticRegression │ ├── .gitignore │ ├── CMakeLists.txt │ ├── LogisticRegression.vcxproj │ ├── LogisticRegression.vcxproj.filters │ ├── README.md │ ├── example │ │ ├── README.md │ │ ├── convert.py │ │ ├── mnist.config │ │ └── run.sh │ └── src │ │ ├── configure.cpp │ │ ├── configure.h │ │ ├── data_type.h │ │ ├── logreg.cpp │ │ ├── logreg.h │ │ ├── main.cpp │ │ ├── model │ │ ├── model.cpp │ │ ├── model.h │ │ ├── ps_model.cpp │ │ └── ps_model.h │ │ ├── objective │ │ ├── ftrl_objective.h │ │ ├── objective.cpp │ │ ├── objective.h │ │ ├── sigmoid_objective.h │ │ └── softmax_objective.h │ │ ├── reader.cpp │ │ ├── reader.h │ │ ├── regular │ │ ├── l1_regular.h │ │ ├── l2_regular.h │ │ ├── regular.cpp │ │ └── regular.h │ │ ├── updater │ │ ├── ftrl_updater.h │ │ ├── sgd_updater.h │ │ ├── updater.cpp │ │ └── updater.h │ │ └── util │ │ ├── common.h │ │ ├── ftrl_sparse_table.h │ │ ├── hopscotch_hash.h │ │ ├── log.cpp │ │ ├── log.h │ │ ├── sparse_table.h │ │ └── timer.h └── WordEmbedding │ ├── CMakeLists.txt │ ├── README.md │ ├── WordEmbedding.vcxproj │ ├── WordEmbedding.vcxproj.filters │ ├── example │ ├── Readme.txt │ ├── imges │ │ ├── Analogical Reasoning google vs dmtk.png │ │ └── WS 353 google vs dmtk.png │ └── run.bat │ ├── preprocess │ ├── Readme.txt │ ├── stopwords_simple.txt │ ├── util.cpp │ ├── util.h │ └── word_count.cpp │ └── src │ ├── block_queue.cpp │ ├── block_queue.h │ ├── communicator.cpp │ ├── communicator.h │ ├── constant.h │ ├── data_block.cpp │ ├── data_block.h │ ├── dictionary.cpp │ ├── dictionary.h │ ├── distributed_wordembedding.cpp │ ├── distributed_wordembedding.h │ ├── huffman_encoder.cpp │ ├── huffman_encoder.h │ ├── main.cpp │ ├── memory_manager.cpp │ ├── memory_manager.h │ ├── reader.cpp │ ├── reader.h │ ├── trainer.cpp │ ├── trainer.h │ ├── util.cpp │ ├── util.h │ ├── wordembedding.cpp │ └── wordembedding.h ├── CMakeLists.txt ├── LICENSE.md ├── Multiverso.sln ├── README.md ├── Test ├── CMakeLists.txt ├── Test.vcxproj ├── Test.vcxproj.filters ├── common.h ├── main.cpp ├── test_allreduce.cpp ├── test_array_table.cpp ├── test_kv_table.cpp ├── test_matrix_perf.cpp ├── test_matrix_table.cpp ├── test_net.cpp └── unittests │ ├── CMakeLists.txt │ ├── MultiversoTests.vcxproj │ ├── MultiversoTests.vcxproj.filters │ ├── multiverso_env.h │ ├── test_array.cpp │ ├── test_blob.cpp │ ├── test_kv.cpp │ ├── test_message.cpp │ ├── test_multiverso.cpp │ ├── test_node.cpp │ └── test_sync.cpp ├── binding ├── C# │ ├── MultiversoCLR │ │ ├── AssemblyInfo.cpp │ │ ├── MatrixTable.h │ │ ├── MultiversoCLR.cpp │ │ ├── MultiversoCLR.h │ │ ├── MultiversoCLR.vcxproj │ │ ├── MultiversoCLR.vcxproj.filters │ │ ├── ReadMe.txt │ │ └── multiverso.snk │ └── NuGet │ │ ├── GenerateNugetPackage.ps1 │ │ └── MultiversoCLR.nuspec ├── lua │ ├── .gitignore │ ├── ArrayTableHandler.lua │ ├── CMakeLists.txt │ ├── Makefile │ ├── MatrixTableHandler.lua │ ├── README.md │ ├── demos │ │ └── xor │ │ │ ├── Makefile │ │ │ ├── README.md │ │ │ ├── xor-multiverso.lua │ │ │ └── xor.lua │ ├── docs │ │ ├── API.md │ │ ├── BENCHMARK.md │ │ ├── TUTORIAL.md │ │ └── imgs │ │ │ ├── top1error_vs_epoch.png │ │ │ ├── top1error_vs_runningtime.png │ │ │ ├── top5error_vs_epoch.png │ │ │ └── top5error_vs_runningtime.png │ ├── init.lua │ ├── multiverso-scm-1.rockspec │ ├── test.lua │ └── util.lua └── python │ ├── README.md │ ├── docs │ ├── BENCHMARK.md │ ├── TUTORIAL.md │ └── imgs │ │ ├── accuracy_epoch.png │ │ └── accuracy_time.png │ ├── examples │ ├── __init__.py │ └── theano │ │ ├── __init__.py │ │ ├── cnn.py │ │ ├── keras │ │ ├── README.md │ │ └── addition_rnn_mv.py │ │ ├── lasagne │ │ ├── Deep_Residual_Learning_CIFAR-10.py │ │ ├── Makefile │ │ └── __init__.py │ │ ├── load_data.py │ │ └── logistic_regression.py │ ├── multiverso │ ├── __init__.py │ ├── api.py │ ├── tables.py │ ├── tests │ │ └── test_multiverso.py │ ├── theano_ext │ │ ├── __init__.py │ │ ├── keras_ext │ │ │ ├── __init__.py │ │ │ ├── callbacks.py │ │ │ └── param_manager.py │ │ ├── lasagne_ext │ │ │ ├── __init__.py │ │ │ └── param_manager.py │ │ ├── param_manager.py │ │ └── sharedvar.py │ └── utils.py │ └── setup.py ├── cmake_uninstall.cmake.in ├── deploy └── docker │ └── Dockerfile ├── include └── multiverso │ ├── actor.h │ ├── blob.h │ ├── c_api.h │ ├── communicator.h │ ├── controller.h │ ├── dashboard.h │ ├── io │ ├── hdfs_stream.h │ ├── io.h │ └── local_stream.h │ ├── message.h │ ├── multiverso.h │ ├── net.h │ ├── net │ ├── allreduce_engine.h │ ├── mpi_net.h │ └── zmq_net.h │ ├── node.h │ ├── server.h │ ├── table │ ├── array_table.h │ ├── kv_table.h │ ├── matrix.h │ ├── matrix_table.h │ └── sparse_matrix_table.h │ ├── table_factory.h │ ├── table_interface.h │ ├── updater │ ├── adagrad_updater.h │ ├── momentum_updater.h │ ├── sgd_updater.h │ └── updater.h │ ├── util │ ├── allocator.h │ ├── async_buffer.h │ ├── configure.h │ ├── log.h │ ├── mt_queue.h │ ├── net_util.h │ ├── quantization_util.h │ ├── timer.h │ └── waiter.h │ ├── worker.h │ └── zoo.h └── src ├── .gitignore ├── CMakeLists.txt ├── Multiverso.vcxproj ├── Multiverso.vcxproj.filters ├── Multiverso_zmq.vcxproj ├── actor.cpp ├── blob.cpp ├── build_dll.bat ├── c_api.cpp ├── communicator.cpp ├── controller.cpp ├── dashboard.cpp ├── io ├── hdfs_stream.cpp ├── io.cpp └── local_stream.cpp ├── multiverso.cpp ├── net.cpp ├── net ├── allreduce_engine.cpp ├── allreduce_topo.cpp └── mpi_net.cpp ├── node.cpp ├── server.cpp ├── table.cpp ├── table ├── array_table.cpp ├── matrix.cpp ├── matrix_table.cpp └── sparse_matrix_table.cpp ├── table_factory.cpp ├── timer.cpp ├── updater └── updater.cpp ├── util ├── allocator.cpp ├── configure.cpp ├── log.cpp └── net_util.cpp ├── worker.cpp └── zoo.cpp /.gitattributes: -------------------------------------------------------------------------------- 1 | .gitattributes text 2 | .gitignore text 3 | .gitmodules text 4 | 5 | 6 | *.md text 7 | *.txt text 8 | *.TXT text 9 | *.yml text 10 | *.yml.bak text 11 | *.config text 12 | 13 | Makefile text 14 | CMakeLists.txt 15 | *.cmake.in text 16 | *.sln text 17 | *.pyproj text 18 | *.vcxproj text 19 | *.vcxproj.filters text 20 | *.vssettings text 21 | *.csproj text 22 | *.props text 23 | *.asax text 24 | *.nuspec text 25 | *.rockspec text 26 | 27 | *.h text 28 | *.cpp text 29 | *.cc text 30 | *.cu text 31 | *.cuh text 32 | *.proto text 33 | *.sh text 34 | *.bat text 35 | *.cmd text 36 | *.py text 37 | *.ipynb text 38 | *.pl text 39 | *.ps1 text 40 | *.ps text 41 | *.i text 42 | *.lua text 43 | 44 | Dockerfile* text 45 | 46 | # Binary extensions: 47 | *.ark binary 48 | *.chunk binary 49 | *.cmf binary 50 | *.docx binary 51 | *.jpg binary 52 | *.pdf binary 53 | *.png binary 54 | *.pptx binary 55 | *.snk binary 56 | *.vsdm binary 57 | *.zip binary -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "include/multiverso/updater/dcasgd"] 2 | path = include/multiverso/updater/dcasgd 3 | url = https://github.com/Microsoft/Delayed-Compensation-Asynchronous-Stochastic-Gradient-Descent-for-Multiverso.git 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | sudo: required 3 | dist: trusty 4 | 5 | # solving MPI conflict https://docs.travis-ci.com/user/languages/cpp#OpenMP-projects 6 | before_install: 7 | - test -n $CC && unset CC 8 | - test -n $CXX && unset CXX 9 | 10 | install: 11 | - sudo apt-get install -y libopenmpi-dev openmpi-bin build-essential 12 | 13 | # for boost unit test 14 | - sudo apt-get install -y libboost-test-dev 15 | 16 | # for testing python binding 17 | - sudo apt-get install -y gfortran libblas-dev liblapack-dev libatlas-base-dev 18 | - sudo apt-get install -y cmake python-numpy python-scipy python-nose 19 | 20 | # for testing lua binding 21 | #- curl -sk https://raw.githubusercontent.com/torch/ezinstall/master/install-deps | bash -e 22 | #- git clone https://github.com/torch/distro.git ~/torch --recursive 23 | #- cd ~/torch; ./install.sh -b 24 | #- source ~/.bashrc 25 | 26 | before_script: 27 | - cd $TRAVIS_BUILD_DIR 28 | - mkdir build && cd build && cmake .. 29 | 30 | script: 31 | - make && sudo make install 32 | # run cpp tests 33 | # - mpirun -np 4 ./Test/multiverso.test kv 34 | # - mpirun -np 4 ./Test/multiverso.test array 35 | # - mpirun -np 4 ./Test/multiverso.test allreduce 36 | 37 | # - ./Test/unittests/multiverso.ut --log_level=test_suite 38 | 39 | # lua tests 40 | #- cd ../binding/lua/ 41 | #- make install 42 | # - make test 43 | 44 | # python tests 45 | # - cd ../binding/python/ 46 | # - sudo python setup.py install 47 | # - sudo nosetests # sudo is needed when testing python on travis 48 | 49 | notifications: 50 | email: false 51 | 52 | matrix: 53 | include: 54 | - compiler: gcc 55 | - compiler: clang 56 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/.gitignore: -------------------------------------------------------------------------------- 1 | # User-specific files 2 | *.suo 3 | *.user 4 | *.userosscache 5 | *.sln.docstates 6 | 7 | # User-specific files (MonoDevelop/Xamarin Studio) 8 | *.userprefs 9 | 10 | # Build results 11 | [Dd]ebug/ 12 | [Dd]ebugPublic/ 13 | [Rr]elease/ 14 | [Rr]eleases/ 15 | x64/ 16 | x86/ 17 | bld/ 18 | [Bb]in/ 19 | [Oo]bj/ 20 | [Ll]og/ 21 | [Bb]uild/ 22 | 23 | # Visual C++ cache files 24 | ipch/ 25 | *.aps 26 | *.ncb 27 | *.opendb 28 | *.opensdf 29 | *.sdf 30 | *.cachefile 31 | 32 | # Visual Studio profiler 33 | *.psess 34 | *.vsp 35 | *.vspx 36 | *.sap# Compiled Object files 37 | *.slo 38 | *.lo 39 | *.o 40 | *.obj 41 | 42 | # Precompiled Headers 43 | *.gch 44 | *.pch 45 | 46 | # Compiled Dynamic libraries 47 | *.so 48 | *.dylib 49 | *.dll 50 | 51 | # Fortran module files 52 | *.mod 53 | 54 | # Compiled Static libraries 55 | *.lai 56 | *.la 57 | *.a 58 | *.lib 59 | 60 | # Executables 61 | *.exe 62 | *.out 63 | *.app 64 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(MULTIVERSO_DIR ${PROJECT_SOURCE_DIR}) 2 | set(LR_DIR ${PROJECT_SOURCE_DIR}/Applications/LogisticRegression) 3 | 4 | find_package(MPI REQUIRED) 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11") 6 | 7 | if(LOGLEVEL STREQUAL "DEBUG") 8 | add_definitions(-DLOGLEVEL_DEBUG) 9 | elseif(LOGLEVEL STREQUAL "FATAL") 10 | add_definitions(-DLOGLEVEL_FATAL) 11 | elseif(LOGLEVEL STREQUAL "ERROR") 12 | add_definitions(-DLOGLEVEL_ERROR) 13 | else() 14 | add_definitions(-DLOGLEVEL_INFO) 15 | endif() 16 | 17 | set(MULTIVERSO_INC ${MULTIVERSO_DIR}/include) 18 | set(MULTIVERSO_LIB ${MULTIVERSO_DIR}/build/src) 19 | set(MULTIVERSO_SRC ${MULTIVERSO_DIR}/src) 20 | 21 | include_directories(${MULTIVERSO_INC}) 22 | include_directories(${LR_DIR}/src) 23 | 24 | link_directories(${MULTIVERSO_LIB}) 25 | 26 | set(SRCDIR ${LR_DIR}/src) 27 | aux_source_directory(${LR_DIR}/src SRC_ROOT) 28 | aux_source_directory(${SRCDIR}/model SRC_MODEL) 29 | aux_source_directory(${SRCDIR}/objective SRC_OBJECTIVE) 30 | aux_source_directory(${SRCDIR}/regular SRC_REGULAR) 31 | aux_source_directory(${SRCDIR}/updater SRC_UPDATER) 32 | aux_source_directory(${SRCDIR}/util SRC_UTIL) 33 | set(SRC ${SRC_MODEL} ${SRC_OBJECTIVE} ${SRC_REGULAR} ${SRC_UPDATER} ${SRC_UTIL} ${MULTIVERSO_SRC}/table/array_table.cpp ${SRC_ROOT}) 34 | 35 | add_executable(LogisticRegression ${SRC}) 36 | 37 | target_link_libraries(LogisticRegression multiverso ${MPI_CXX_LIBRARIES} pthread) 38 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/README.md: -------------------------------------------------------------------------------- 1 | 2 | Logistic Regression 3 | ====== 4 | The Logistic Regression tool is a parallel implementation of the logistic regression on top of multiverso. It is a easy-to-use tool for training model on big data with numbers of machines. 5 | 6 | We test the tool in a Bing Ads click prediction dataset in Microsoft. The dataset is about 4TB with more than 5 billions of samples. The experiment is running on a cluster with 24 machines. Each machine has 20 physical cores and 256 GB ram and machines are connected with InfiniBand. The training of one epoch can be finished in about 18 minutes. 7 | 8 | 9 | For more details, please refer to [wiki](https://github.com/Microsoft/multiverso/wiki/Logistic-Regression). 10 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/example/README.md: -------------------------------------------------------------------------------- 1 | This is a simple example for running [MNIST](http://yann.lecun.com/exdb/mnist/) data set with a multiple classification task using no parameter server. 2 | 3 | In Linux just run `sh run.sh`. This script will do build project, download data, convert data format and run the project. 4 | 5 | In windows, you can build the project and download the data set, then run `python convert.py` to convert the data format and use the `mnist.config` as the command argument to start an program instance. 6 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/example/convert.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import struct 3 | 4 | def convert(info): 5 | in_image, in_label, out_file, line_count = info 6 | 7 | binfile = open(in_label , 'rb') 8 | labels = binfile.read() 9 | binfile.close() 10 | 11 | binfile = open(in_image, 'rb') 12 | images = binfile.read() 13 | binfile.close() 14 | 15 | label_index = 0 16 | image_index = 0; 17 | magic, numImages , numRows , numColumns = struct.unpack_from('>IIII' , images , image_index) 18 | image_index += struct.calcsize('>IIII') 19 | magic, num = struct.unpack_from('>II', labels, label_index) 20 | label_index += struct.calcsize('>II') 21 | 22 | output = open(out_file,'w') 23 | for i in range(line_count): 24 | label = struct.unpack_from('>B', labels, label_index) 25 | label = int(label[0]) 26 | output.write(str(label)); 27 | 28 | im = struct.unpack_from('>784B' ,images, image_index) 29 | im = np.array(im) 30 | for j in range(784): 31 | output.write(' ' + str(im[j])) 32 | 33 | output.write('\n') 34 | image_index += struct.calcsize('>784B') 35 | label_index += struct.calcsize('>B') 36 | output.close() 37 | 38 | a={'train':('train-images-idx3-ubyte','train-labels-idx1-ubyte','train.data', 60000),'test':('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte', 'test.data', 10000)} 39 | import sys 40 | convert(a[sys.argv[1]]) 41 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/example/mnist.config: -------------------------------------------------------------------------------- 1 | input_size=784 2 | output_size=10 3 | objective_type=softmax 4 | regular_type=L2 5 | updater_type=sgd 6 | train_epoch=9 7 | sparse=false 8 | use_ps=false 9 | minibatch_size=20 10 | train_file=train.data 11 | test_file=test.data 12 | output_file=test.out 13 | learning_rate_coef=7e6 14 | regular_coef=0.0007 15 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/example/run.sh: -------------------------------------------------------------------------------- 1 | cd ../../../ 2 | mkdir build 3 | cd build 4 | cmake .. && make 5 | 6 | cd ../Applications/LogisticRegression/example/ 7 | 8 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz && gunzip train-images-idx3-ubyte.gz & 9 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz && gunzip train-labels-idx1-ubyte.gz & 10 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz && gunzip t10k-images-idx3-ubyte.gz & 11 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz && gunzip t10k-labels-idx1-ubyte.gz & 12 | wait 13 | 14 | python convert.py train && rm train-images-idx3-ubyte -f && rm train-labels-idx1-ubyte -f & 15 | python convert.py test && rm t10k-images-idx3-ubyte -f && rm t10k-labels-idx1-ubyte -f & 16 | wait 17 | 18 | ../../../build/Applications/LogisticRegression/LogisticRegression mnist.config 19 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/logreg.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_LOGREG_H_ 2 | #define LOGREG_LOGREG_H_ 3 | 4 | #include 5 | 6 | #include "data_type.h" 7 | #include "model/model.h" 8 | #include "configure.h" 9 | 10 | namespace logreg { 11 | 12 | // only support EleType = int/float/double 13 | template 14 | class LogReg { 15 | public: 16 | // \param config_file each line as: key=value 17 | explicit LogReg(const std::string &config_file); 18 | ~LogReg(); 19 | 20 | void Train(const std::string& train_file); 21 | // config file should provide 22 | // train file 23 | void Train(); 24 | 25 | // will save output in result if result != nullptr 26 | // return test error 27 | double Test(const std::string& test_file, EleType**result = nullptr); 28 | // config file should provide 29 | // test file 30 | double Test(EleType**result = nullptr); 31 | // When model is too large, the program may crash... 32 | void SaveModel(); 33 | void SaveModel(const std::string& model_file); 34 | 35 | // return the data block of model data 36 | DataBlock* model() const; 37 | 38 | private: 39 | Model *model_; 40 | Configure* config_; 41 | }; 42 | 43 | } // namespace logreg 44 | 45 | #endif // LOGREG_LOGREG_H_ 46 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "logreg.h" 2 | 3 | #include 4 | #include 5 | using namespace logreg; 6 | 7 | int main(int argc, char* argv[]) { 8 | LogReg lr(argv[1]); 9 | 10 | lr.Train(); 11 | 12 | return 0; 13 | } -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/model/model.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_MODEL_H_ 2 | #define LOGREG_MODEL_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "data_type.h" 8 | #include "configure.h" 9 | #include "updater/updater.h" 10 | #include "objective/objective.h" 11 | #include "regular/regular.h" 12 | 13 | #include "util/timer.h" 14 | #include "multiverso/util/mt_queue.h" 15 | 16 | namespace logreg { 17 | 18 | // class for model data management 19 | // local model 20 | template 21 | class Model { 22 | public: 23 | // initiate with config data 24 | // \param config should provide: 25 | // objective type 26 | // updater type 27 | // input size 28 | // output size 29 | explicit Model(Configure& config); 30 | virtual ~Model(); 31 | // update model with #count samples 32 | // \return sum of train loss of every sample 33 | virtual float Update(int count, Sample**samples); 34 | // \param input one input 35 | // \return correct number 36 | virtual int Predict(int count, Sample**samples, EleType**predicts); 37 | // load model data from a binary file 38 | virtual void Load(const std::string& model_file); 39 | // write model data in binary method 40 | virtual void Store(const std::string& model_file); 41 | virtual void SetKeys(multiverso::MtQueue*> *keys) {} 42 | virtual void DisplayTime(); 43 | DataBlock* table() const { return table_; } 44 | // factory method to get a new instance 45 | // \param config should contain model needed configs 46 | // when use_ps=true, return a distributed model 47 | // default use a local version 48 | static Model* Get(Configure& config); 49 | 50 | protected: 51 | // compute update delta 52 | virtual float GetGradient(Sample* sample, DataBlock* delta); 53 | // update table 54 | virtual void UpdateTable(DataBlock* delta); 55 | 56 | protected: 57 | bool ftrl_; 58 | 59 | Objective* objective_; 60 | Updater* updater_; 61 | // local cache 62 | DataBlock* table_; 63 | 64 | int num_row_; 65 | 66 | int minibatch_size_; 67 | 68 | DataBlock* delta_; 69 | 70 | Timer timer_; 71 | double computation_time_; 72 | double compute_count_; 73 | }; 74 | 75 | } // namespace logreg 76 | 77 | #endif // LOGREG_MODEL_H_ 78 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/model/ps_model.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_MODEL_PS_MODEL_H_ 2 | #define LOGREG_MODEL_PS_MODEL_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "model.h" 8 | 9 | #include "multiverso/multiverso.h" 10 | #include "multiverso/table_interface.h" 11 | #include "multiverso/table/array_table.h" 12 | #include "multiverso/util/async_buffer.h" 13 | 14 | #include "util/timer.h" 15 | 16 | namespace logreg { 17 | 18 | template 19 | class PSModel : public Model { 20 | public: 21 | explicit PSModel(Configure& config); 22 | ~PSModel(); 23 | int Predict(int count, Sample**samples, EleType**predicts); 24 | void Load(const std::string& model_file); 25 | void Store(const std::string& model_file); 26 | void SetKeys(multiverso::MtQueue*> *keys); 27 | void DisplayTime(); 28 | 29 | private: 30 | // use multiverso table add interface 31 | void UpdateTable(DataBlock* delta); 32 | void PullModel(); 33 | // sync table if needed 34 | void DoesNeedSync(); 35 | void PullWholeModel(); 36 | void GetPipelineTable(); 37 | 38 | private: 39 | // multiverso table 40 | multiverso::WorkerTable* worker_table_; 41 | // for pipeline sync 42 | DataBlock* buffer_[2]; 43 | int wait_id_; 44 | int buffer_index_; 45 | // works when not pipeline 46 | int count_sample_; 47 | int sync_frequency_; 48 | 49 | multiverso::MtQueue*> *keys_; 50 | 51 | Timer network_timer_; 52 | double push_time_; 53 | double pull_time_; 54 | size_t pull_count_; 55 | size_t push_count_; 56 | }; 57 | 58 | } // namespace logreg 59 | 60 | #endif // LOGREG_MODEL_PS_MODEL_H_ 61 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/objective/ftrl_objective.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_OBJECTIVE_FTRL_OBJECTIVE_H_ 2 | #define LOGREG_OBJECTIVE_FTRL_OBJECTIVE_H_ 3 | 4 | #include "objective.h" 5 | 6 | namespace logreg { 7 | 8 | template 9 | class FTRLObjective : public Objective { 10 | public: 11 | explicit FTRLObjective(const Configure& config); 12 | 13 | ~FTRLObjective(); 14 | 15 | float Gradient(Sample* sample, 16 | DataBlock* model, 17 | DataBlock* gradient); 18 | 19 | float Predict(Sample*sample, 20 | DataBlock* model, EleType* predict); 21 | 22 | private: 23 | float Predict(Sample*sample, 24 | DataBlock* model, EleType* predict, DataBlock* w); 25 | EleType sgn(const EleType x); 26 | 27 | private: 28 | Objective *objective_; 29 | 30 | double lambda1_; 31 | double lambda2_; 32 | double alpha_; 33 | double beta_; 34 | }; 35 | 36 | } // namespace logreg 37 | 38 | #endif // LOGREG_OBJECTIVE_FTRL_OBJECTIVE_H_ 39 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/objective/objective.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_OBJECTIVE_OBJECTIVE_H_ 2 | #define LOGREG_OBJECTIVE_OBJECTIVE_H_ 3 | 4 | #include 5 | 6 | #include "data_type.h" 7 | #include "configure.h" 8 | #include "regular/regular.h" 9 | 10 | namespace logreg { 11 | 12 | // provide methods for predict and calculate gradient 13 | template 14 | class Objective { 15 | public: 16 | // \param config should provide: 17 | // input size 18 | // output size 19 | // regular type 20 | explicit Objective(const Configure& config); 21 | virtual ~Objective(); 22 | // return train loss 23 | virtual float Gradient(Sample* sample, 24 | DataBlock* model, 25 | DataBlock* gradient); 26 | // return test loss 27 | virtual float Predict(Sample*sample, 28 | DataBlock* model, EleType* predict); 29 | 30 | virtual bool Correct(const int label, EleType*predict); 31 | 32 | // factory method to get a new instance 33 | // \param config should contain objective type 34 | // and params for Objective initialization 35 | static Objective* Get(const Configure& config); 36 | 37 | protected: 38 | // diff -= (label == i) 39 | virtual void Diff(int label, EleType*diff); 40 | virtual void AddRegularization(Sample*sample, 41 | DataBlock* model, 42 | EleType* loss, 43 | DataBlock* gradient); 44 | virtual float Loss(Sample*sample, EleType* predict); 45 | 46 | protected: 47 | Regular *regular_; 48 | 49 | size_t input_size_; 50 | int output_size_; 51 | }; 52 | 53 | } // namespace logreg 54 | 55 | #endif // LOGREG_OBJECTIVE_OBJECTIVE_H_ 56 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/objective/sigmoid_objective.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_OBJECTIVE_SIGMOID_OBJECTIVE_H_ 2 | #define LOGREG_OBJECTIVE_SIGMOID_OBJECTIVE_H_ 3 | 4 | #include "objective.h" 5 | 6 | namespace logreg { 7 | 8 | template 9 | class SigmoidObjective : public Objective { 10 | public: 11 | explicit SigmoidObjective(const Configure& config); 12 | 13 | float Gradient(Sample* sample, 14 | DataBlock* model, 15 | DataBlock* gradient); 16 | 17 | float Predict(Sample*sample, 18 | DataBlock* model, EleType* predict); 19 | 20 | private: 21 | float Sigmoid(Sample* sample, 22 | DataBlock*model); 23 | float Loss(Sample*sample, EleType* predict); 24 | }; 25 | 26 | } // namespace logreg 27 | 28 | #endif // LOGREG_OBJECTIVE_SIGMOID_OBJECTIVE_H_ 29 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/objective/softmax_objective.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_OBJECTIVE_SOFTMAX_OBJECTIVE_H_ 2 | #define LOGREG_OBJECTIVE_SOFTMAX_OBJECTIVE_H_ 3 | 4 | #include "objective.h" 5 | 6 | namespace logreg { 7 | 8 | template 9 | class SoftmaxObjective : public Objective { 10 | public: 11 | explicit SoftmaxObjective(const Configure& config); 12 | 13 | virtual float Predict(Sample*sample, 14 | DataBlock* model, EleType* predict); 15 | 16 | protected: 17 | float Sigmoid(Sample* sample, 18 | DataBlock*model, EleType*sigmoid); 19 | float Loss(Sample*sample, EleType* predict); 20 | }; 21 | 22 | } // namespace logreg 23 | 24 | #endif // LOGREG_OBJECTIVE_SOFTMAX_OBJECTIVE_H 25 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/regular/l1_regular.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_REGULAR_L1_REGULAR_H_ 2 | #define LOGREG_REGULAR_L1_REGULAR_H_ 3 | 4 | #include "regular.h" 5 | 6 | namespace logreg { 7 | 8 | template 9 | class L1Regular : public Regular { 10 | public: 11 | explicit L1Regular(const Configure& config); 12 | virtual ~L1Regular() = default; 13 | 14 | EleType Calculate( 15 | size_t key, 16 | DataBlock*model); 17 | }; 18 | 19 | } // namespace logreg 20 | 21 | #endif // LOGREG_REGULAR_L1_REGULAR_H_ 22 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/regular/l2_regular.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_REGULAR_L2_REGULAR_H_ 2 | #define LOGREG_REGULAR_L2_REGULAR_H_ 3 | 4 | #include "regular.h" 5 | 6 | namespace logreg { 7 | 8 | template 9 | class L2Regular : public Regular { 10 | public: 11 | explicit L2Regular(const Configure& config); 12 | virtual ~L2Regular() = default; 13 | 14 | EleType Calculate( 15 | size_t key, 16 | DataBlock*model); 17 | }; 18 | 19 | } // namespace logreg 20 | 21 | #endif // LOGREG_REGULAR_L2_REGULAR_H_ 22 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/regular/regular.cpp: -------------------------------------------------------------------------------- 1 | #include "regular/regular.h" 2 | 3 | #include 4 | 5 | #include "regular/l1_regular.h" 6 | #include "regular/l2_regular.h" 7 | 8 | #include "util/common.h" 9 | #include "util/log.h" 10 | 11 | namespace logreg { 12 | 13 | template 14 | Regular::Regular(const Configure& config) { 15 | this->input_size_ = config.input_size; 16 | this->output_size_ = config.output_size; 17 | this->regular_coef_ = config.regular_coef; 18 | } 19 | 20 | template 21 | EleType Regular::Calculate( 22 | size_t key, 23 | DataBlock*model) { 24 | return 0; 25 | } 26 | 27 | template 28 | L1Regular::L1Regular(const Configure& config) : 29 | Regular(config) { 30 | } 31 | 32 | template 33 | EleType L1Regular::Calculate( 34 | size_t key, 35 | DataBlock*model) { 36 | EleType* pval = model->Get(key); 37 | // sgn(x) * regular_coef 38 | return (pval == nullptr || *pval == 0) ? 0 39 | : (EleType)(*pval > 0 ? this->regular_coef_ : -this->regular_coef_); 40 | } 41 | 42 | DECLARE_TEMPLATE_CLASS_WITH_BASIC_TYPE(L1Regular); 43 | 44 | template 45 | L2Regular::L2Regular(const Configure& config) : 46 | Regular(config) { 47 | } 48 | 49 | template 50 | EleType L2Regular::Calculate( 51 | size_t key, 52 | DataBlock*model) { 53 | EleType* pval = model->Get(key); 54 | // abs(x) * regular_coef 55 | return pval == nullptr ? 0 : (EleType)(abs(*pval) * this->regular_coef_); 56 | } 57 | 58 | DECLARE_TEMPLATE_CLASS_WITH_BASIC_TYPE(L2Regular); 59 | 60 | template 61 | Regular* Regular::Get(const Configure& config) { 62 | const std::string &type = config.regular_type; 63 | Log::Write(Info, "Regular type %s\n", type.c_str()); 64 | if (type == "L1") { 65 | return new L1Regular(config); 66 | } else if (type == "L2") { 67 | return new L2Regular(config); 68 | } 69 | return new Regular(config); 70 | } 71 | 72 | DECLARE_TEMPLATE_CLASS_WITH_BASIC_TYPE(Regular); 73 | } // namespace logreg 74 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/regular/regular.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_REGULAR_REGULAR_H_ 2 | #define LOGREG_REGULAR_REGULAR_H_ 3 | 4 | #include 5 | 6 | #include "data_type.h" 7 | #include "configure.h" 8 | 9 | namespace logreg { 10 | 11 | // provide regularization term 12 | template 13 | class Regular { 14 | public: 15 | // \param config should provide: 16 | // input size 17 | // output size 18 | explicit Regular(const Configure& config); 19 | virtual ~Regular() = default; 20 | // get regularization term 21 | virtual EleType Calculate( 22 | size_t key, 23 | DataBlock*model); 24 | 25 | // factory method to get a new instance 26 | // \param config should provide regular type 27 | // and needed params for Regular initialization 28 | static Regular* Get(const Configure& config); 29 | 30 | protected: 31 | size_t input_size_; 32 | int output_size_; 33 | 34 | double regular_coef_; 35 | }; 36 | 37 | } // namespace logreg 38 | 39 | #endif // LOGREG_REGULAR_REGULAR_H_ 40 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/updater/ftrl_updater.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_UPDATER_FTRL_UPDATER_ 2 | #define LOGREG_UPDATER_FTRL_UPDATER_ 3 | 4 | #include "updater.h" 5 | 6 | namespace logreg { 7 | 8 | template 9 | class FTRLUpdater : public Updater { 10 | public: 11 | explicit FTRLUpdater(const Configure& config); 12 | void Update(DataBlock* data, DataBlock* delta); 13 | }; 14 | 15 | } // namespace logreg 16 | 17 | #endif // LOGREG_UPDATER_FTRL_UPDATER_ 18 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/updater/sgd_updater.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_UPDATER_SGD_UPDATER_H_ 2 | #define LOGREG_UPDATER_SGD_UPDATER_H_ 3 | 4 | #include "updater.h" 5 | 6 | namespace logreg { 7 | 8 | template 9 | class SGDUpdater : public Updater { 10 | public: 11 | explicit SGDUpdater(const Configure& config); 12 | void Process(DataBlock* delta); 13 | 14 | private: 15 | double initial_learning_rate_; 16 | double learning_rate_; 17 | double learning_rate_coef_; 18 | size_t update_count_; 19 | int minibatch_size_; 20 | }; 21 | 22 | } // namespace logreg 23 | 24 | #endif // LOGREG_UPDATER_SGD_UPDATER_H_ 25 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/updater/updater.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_UPDATER_UPDATER_H_ 2 | #define LOGREG_UPDATER_UPDATER_H_ 3 | 4 | #include 5 | 6 | #include "data_type.h" 7 | #include "configure.h" 8 | 9 | namespace logreg { 10 | 11 | template 12 | class Updater { 13 | public: 14 | virtual ~Updater() = default; 15 | 16 | virtual void Update(DataBlock* data, 17 | DataBlock* delta); 18 | 19 | virtual void Process(DataBlock* delta) {} 20 | 21 | // factory method to get a new instance 22 | // \param config should provide updater type and 23 | // params for updater initiate 24 | static Updater* Get(const Configure& config); 25 | 26 | protected: 27 | int row_size_; 28 | int num_row_; 29 | }; 30 | 31 | } // namespace logreg 32 | 33 | #endif // LOGREG_UPDATER_UPDATER_H_ 34 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/util/common.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_UTIL_HELPER_H_ 2 | #define LOGREG_UTIL_HELPER_H_ 3 | 4 | #include "data_type.h" 5 | 6 | namespace logreg { 7 | 8 | template 9 | EleType** CreateMatrix(int num_row, int num_col) { 10 | EleType **matrix = new EleType*[num_row]; 11 | for (int i = 0; i < num_row; ++i) 12 | matrix[i] = new EleType[num_col]; 13 | return matrix; 14 | } 15 | 16 | template 17 | void FreeMatrix(int num_row, EleType**matrix) { 18 | for (int i = 0; i < num_row; ++i) 19 | delete[]matrix[i]; 20 | delete[]matrix; 21 | } 22 | 23 | template 24 | EleType Dot(size_t offset, DataBlock*matrix, Sample*sample) { 25 | EleType sum = 0; 26 | int size = static_cast(sample->values.size()); 27 | if (matrix->sparse()) { 28 | DEBUG_CHECK(sample->keys.size() == sample->values.size()); 29 | for (int i = 0; i < size; ++i) { 30 | EleType* pval = matrix->Get(sample->keys[i] + offset); 31 | sum += (pval == nullptr ? 0 : (sample->values[i] * (*pval))); 32 | } 33 | } else { 34 | EleType*rawa = static_cast(matrix->raw()) + offset; 35 | EleType*rawb = sample->values.data(); 36 | for (int i = 0; i < size; ++i) { 37 | sum += rawa[i] * rawb[i]; 38 | } 39 | } 40 | return sum; 41 | } 42 | 43 | template 44 | inline EleType* MatrixRow(EleType*matrix, int row_id, size_t num_col) { 45 | return matrix + row_id * num_col; 46 | } 47 | 48 | template 49 | Sample** CeateSamples(int num, size_t size, bool sparse) { 50 | Sample**samples = new Sample*[num]; 51 | for (int i = 0; i < num; ++i) { 52 | samples[i] = new Sample(sparse, size); 53 | } 54 | return samples; 55 | } 56 | 57 | template 58 | void FreeSamples(int num, Sample**samples) { 59 | for (int i = 0; i < num; ++i) { 60 | delete samples[i]; 61 | } 62 | delete[]samples; 63 | } 64 | 65 | #define DECLARE_TEMPLATE_CLASS_WITH_BASIC_TYPE(name) \ 66 | template class name; \ 67 | template class name; \ 68 | template class name; 69 | 70 | } // namespace logreg 71 | 72 | #endif // LOGREG_UTIL_HELPER_H_ 73 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/util/ftrl_sparse_table.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_UTIL_FTRL_SPARSE_TABLE_H_ 2 | #define LOGREG_UTIL_FTRL_SPARSE_TABLE_H_ 3 | 4 | #include "util/sparse_table.h" 5 | 6 | namespace logreg { 7 | 8 | template 9 | struct FTRLTableOption; 10 | 11 | template 12 | class FTRLWorkerTable : public SparseWorkerTable> { 13 | protected: 14 | using Blob = multiverso::Blob; 15 | 16 | public: 17 | explicit FTRLWorkerTable(size_t size) : 18 | SparseWorkerTable>(size) {} 19 | 20 | explicit FTRLWorkerTable(const FTRLTableOption &option) : 21 | FTRLWorkerTable(option.size) {} 22 | 23 | int GetAsync(DataBlock>* data) { 24 | LR_CHECK(data != nullptr && data->sparse()); 25 | this->data_ = (DataBlock>*)data; 26 | size_t all_key = -1; 27 | Blob whole_table(&all_key, sizeof(size_t)); 28 | return multiverso::WorkerTable::GetAsync(whole_table); 29 | } 30 | void Get(DataBlock>* data) { 31 | this->Wait(GetAsync(data)); 32 | } 33 | int GetAsync(SparseBlock* keys, DataBlock>* data) { 34 | LR_CHECK(keys != nullptr && data != nullptr && data->sparse()); 35 | data->Clear(); 36 | this->data_ = (DataBlock>*)data; 37 | 38 | size_t size = keys->size(); 39 | Blob key(size * sizeof(size_t)); 40 | size_t* pkey = reinterpret_cast(key.data()); 41 | 42 | SparseBlockIter iter(keys); 43 | while (iter.Next()) { 44 | *(pkey++) = iter.Key(); 45 | } 46 | 47 | return multiverso::WorkerTable::GetAsync(key); 48 | } 49 | void Get(SparseBlock* keys, DataBlock>* data) { 50 | this->Wait(GetAsync(keys, data)); 51 | } 52 | 53 | void ProcessReplyGet(std::vector& reply_data) { 54 | DEBUG_CHECK(reply_data.size() == 2 || reply_data.size() == 1); 55 | DEBUG_CHECK(this->data_ != nullptr); 56 | // no data 57 | if (reply_data.size() == 1) { 58 | return; 59 | } 60 | size_t *keys = reinterpret_cast(reply_data[0].data()); 61 | auto vals = reinterpret_cast*>(reply_data[1].data()); 62 | size_t size = reply_data[0].size(); 63 | auto data = (DataBlock>*)this->data_; 64 | for (size_t i = 0; i < size; ++i) { 65 | data->Set(keys[i], FTRLEntry(vals+i)); 66 | } 67 | } 68 | }; 69 | 70 | template 71 | class FTRLServerTable : public SparseServerTable> { 72 | public: 73 | explicit FTRLServerTable(size_t size) : 74 | SparseServerTable>(size) { } 75 | 76 | explicit FTRLServerTable(const FTRLTableOption &option) : 77 | FTRLServerTable(option.size) {} 78 | }; 79 | 80 | template 81 | struct FTRLTableOption { 82 | explicit FTRLTableOption(size_t size) : 83 | size(size) {} 84 | size_t size; 85 | DEFINE_TABLE_TYPE(EleType, FTRLWorkerTable, FTRLServerTable); 86 | }; 87 | 88 | } // namespace logreg 89 | 90 | #endif // LOGREG_UTIL_FTRL_SPARSE_TABLE_H_ 91 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/util/log.cpp: -------------------------------------------------------------------------------- 1 | #include "util/log.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace logreg { 8 | 9 | // default in Info level 10 | #ifdef LOGLEVEL_FATAL 11 | LogLevel Log::log_level_ = LogLevel::Fatal; 12 | #elif LOGLEVEL_ERROR 13 | LogLevel Log::log_level_ = LogLevel::Error; 14 | #elif LOGLEVEL_DEBUG 15 | LogLevel Log::log_level_ = LogLevel::Debug; 16 | #else 17 | LogLevel Log::log_level_ = LogLevel::Info; 18 | #endif 19 | 20 | void Log::Write(LogLevel level, const char *format, ...) { 21 | if (static_cast(log_level_) > static_cast(level)) { 22 | return; 23 | } 24 | std::string level_str; 25 | 26 | switch (level) { 27 | case Debug: 28 | level_str = "DEBUG"; 29 | break; 30 | case Info: 31 | level_str = "INFO"; 32 | break; 33 | case Error: 34 | level_str = "ERROR"; 35 | break; 36 | case Fatal: 37 | level_str = "FATAL"; 38 | break; 39 | default: 40 | break; 41 | } 42 | va_list val; 43 | va_start(val, format); 44 | printf("[%s] [%s] ", level_str.c_str(), GetSystemTime().c_str()); 45 | vprintf(format, val); 46 | fflush(stdout); 47 | va_end(val); 48 | 49 | if (level == Fatal) { 50 | exit(1); 51 | } 52 | } 53 | 54 | inline std::string Log::GetSystemTime() { 55 | time_t t = time(0); 56 | char str[64]; 57 | #ifdef _MSC_VER 58 | tm time; 59 | localtime_s(&time, &t); 60 | strftime(str, sizeof(str), "%Y-%m-%d %H:%M:%S", &time); 61 | #else 62 | strftime(str, sizeof(str), "%Y-%m-%d %H:%M:%S", localtime(&t)); 63 | #endif 64 | return str; 65 | } 66 | 67 | } // namespace logreg 68 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/util/log.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_UTIL_LOG_H_ 2 | #define LOGREG_UTIL_LOG_H_ 3 | 4 | #include 5 | 6 | namespace logreg { 7 | enum LogLevel : int { 8 | Debug = 0, 9 | Info = 1, 10 | Error = 2, 11 | Fatal = 3 12 | }; 13 | class Log { 14 | public: 15 | // print log to stdout 16 | static void Write(LogLevel level, const char *format, ...); 17 | static LogLevel& log_level() { return log_level_; } 18 | private: 19 | static LogLevel log_level_; 20 | static std::string GetSystemTime(); 21 | }; 22 | 23 | #define LR_CHECK(condition) \ 24 | if (!(condition)) { \ 25 | Log::Write(Fatal, "Check failed: " \ 26 | #condition " at %s, line %d .\n", \ 27 | __FILE__, __LINE__); \ 28 | } 29 | 30 | #ifdef LOGLEVEL_DEBUG 31 | #define DEBUG_CHECK(condition) \ 32 | LR_CHECK(condition) 33 | #else 34 | #define DEBUG_CHECK(condition) 35 | #endif 36 | 37 | 38 | } // namespace logreg 39 | 40 | #endif // LOGREG_UTIL_LOG_H_ 41 | -------------------------------------------------------------------------------- /Applications/LogisticRegression/src/util/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGREG_UTIL_TIMER_H_ 2 | #define LOGREG_UTIL_TIMER_H_ 3 | 4 | #include 5 | 6 | namespace logreg { 7 | 8 | class Timer { 9 | public: 10 | void Start() { 11 | start_ = Clock::now(); 12 | } 13 | double ElapseMilliSeconds() { 14 | Clock::time_point now = Clock::now(); 15 | return std::chrono::duration(now - start_).count(); 16 | } 17 | double ElapseSeconds() { 18 | return ElapseMilliSeconds() / 1000.0; 19 | } 20 | 21 | private: 22 | using Clock = std::chrono::high_resolution_clock; 23 | Clock::time_point start_; 24 | }; 25 | 26 | } // namespace logreg 27 | 28 | #endif // LOGREG_UTIL_TIMER_H_ 29 | -------------------------------------------------------------------------------- /Applications/WordEmbedding/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | PROJECT(WORDEMBEDDING) 4 | 5 | find_package(MPI REQUIRED) 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11 -Wno-sign-compare -fno-omit-frame-pointer -fopenmp") 7 | 8 | set(MULTIVERSO_INC ${MULTIVERSO_DIR}/include) 9 | set(MULTIVERSO_LIB ${MULTIVERSO_DIR}/build/src) 10 | set(MULTIVERSO_SRC ${MULTIVERSO_DIR}/src) 11 | 12 | include_directories(${MULTIVERSO_INC}) 13 | include_directories(${PROJECT_SOURCE_DIR}/src) 14 | 15 | link_directories(${MULTIVERSO_LIB}) 16 | 17 | set(SRCDIR ${PROJECT_SOURCE_DIR}/src) 18 | aux_source_directory(${PROJECT_SOURCE_DIR}/src SRC_ROOT) 19 | 20 | set(SRC ${MULTIVERSO_SRC} ${SRC_ROOT}) 21 | 22 | add_executable(wordembedding ${SRC}) 23 | 24 | target_link_libraries(wordembedding multiverso ${MPI_CXX_LIBRARIES}) 25 | -------------------------------------------------------------------------------- /Applications/WordEmbedding/README.md: -------------------------------------------------------------------------------- 1 | Word Embedding 2 | ========== 3 | 4 | The DMTK Word Embedding is a parallelization of the Word2Vec algorithm on top of Multiverso. It provides an efficient "scaling to industry size" solution for word embedding. 5 | 6 | For more details about parameters setting and performance, please view our [Wiki](https://github.com/Microsoft/multiverso/wiki/Word-Embedding). 7 | 8 | ##Why DMTK Word Embedding? 9 | 10 | 1. **For traning a large dataset.** 11 | 12 | The DMTK parameter server stores the parameters in a distributed way, which means that each machine just holds a partition of the entire parameter set. This allows the entire embedding vector to be very large. For example, in experiment on the ClueWeb data, the vocabulary size is 21 Million, and the parameter size reaches 6 Billion, which is the largest word embedding model ever reported in the literature,as far as we know. 13 | 14 | 2. **For high quality word embedding.** 15 | 16 | You can view the performance of Distributed Word Embedding in [Wiki](https://github.com/Microsoft/multiverso/wiki/Word-Embedding). 17 | 18 | 3. **For less traning time.** 19 | 20 | Large dataset need long traning time. You can accelerate process of training by use multi-machines. 21 | 22 | ## Linux Installation 23 | 24 | 1. cmake ./CMakeLists.txt 25 | 26 | 2. make 27 | 28 | ## Windows Installation 29 | 30 | 1. Get and build the DMTK Framework [Multiverso](https://github.com/Microsoft/multiverso.git). 31 | 32 | 2. Open Multiverso.sln, change configuration and platform to Release and x64 of WordEmbedding(default setting), set the ```include``` and ```lib``` path of multiverso in project property. 33 | 34 | 3. Enable openmp 2.0 support. 35 | 36 | To set this **compiler** option in the Visual Studio development environment 37 | 38 | 1)Open the **project's Property** Pages dialog box. 39 | 40 | 2)Expand the **Configuration Properties** node. 41 | 42 | 3)Expand the **C/C++** node. 43 | 44 | 4)Select the **Language** property page. 45 | 46 | 5)Modify the **OpenMP Support property** to "yes". 47 | 48 | 4. Build the solution. 49 | -------------------------------------------------------------------------------- /Applications/WordEmbedding/WordEmbedding.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /Applications/WordEmbedding/example/Readme.txt: -------------------------------------------------------------------------------- 1 | Usage: 2 | -size: word embedding size, e.g. 300 3 | -train_file: the training corpus file, e.g.enwik2014 4 | -read_vocab: the file to read all the vocab counts info 5 | -binary: 0 or 1,indicates whether to write all the embeddings vectors into binary format 6 | -cbow: 0 or 1, default 1, whether to use cbow, otherwise skip-gram 7 | -alpha: initial learning rate, usually set to 0.025 8 | -output: the output file to store all the embedding vectors 9 | -window: the window size 10 | -sample: the sub - sample size, usually set to 0 11 | -hs: 0 or 1, default 1, whether to use hierarchical softmax, otherwise negative-sampling 12 | -negative: the negative word count in negative sampling, please set it to 0 when - hs = 1 13 | -threads: the thread number to run in one machine 14 | -min_count: words with lower frequency than min_count is removed from dictionary 15 | -epoch: the epoch number 16 | -stopwords: 0 or 1, whether to avoid training stop words 17 | -sw_file: the stop words file storing all the stop words, valid when -stopwords = 1 18 | -use_adagrad: 0 or 1, whether to use adagrad to adjust learning rate 19 | -data_block_size: default 1MB, the maximum bytes which a data block will store 20 | -max_preload_data_size: default 8GB, the maximum data size(bytes) which multiverse_WordEmbedding will preload 21 | -is_pipeline: 0 or 1, whether to use pipeline 22 | -server_endpoint_file: default "", server ZMQ socket endpoint file in MPI - free version -------------------------------------------------------------------------------- /Applications/WordEmbedding/example/imges/Analogical Reasoning google vs dmtk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/Applications/WordEmbedding/example/imges/Analogical Reasoning google vs dmtk.png -------------------------------------------------------------------------------- /Applications/WordEmbedding/example/imges/WS 353 google vs dmtk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/Applications/WordEmbedding/example/imges/WS 353 google vs dmtk.png -------------------------------------------------------------------------------- /Applications/WordEmbedding/example/run.bat: -------------------------------------------------------------------------------- 1 | set size=300 2 | set text=(train_file's name,e.g. enwiki2014) 3 | set read_vocab=(vocab's Directory string,e.g. "C:\Users\Leif\dataset\enwiki2014_vocab.txt") 4 | set train_file=(train_file's Directory string,e.g. "C:\Users\Leif\dataset\enwiki2014") 5 | set binary=1 6 | set cbow=1 7 | set alpha=0.01 8 | set epoch=20 9 | set window=5 10 | set sample=0 11 | set hs=0 12 | set negative=5 13 | set threads=16 14 | set mincount=5 15 | set sw_file=stopwords_simple.txt 16 | set stopwords=0 17 | set data_block_size=1000000000 18 | set max_preload_data_size=20000000000 19 | set use_adagrad=0 20 | set is_pipeline=0 21 | set output=%text%_%size%.bin 22 | 23 | distributed_word_embedding.exe -max_preload_data_size %max_preload_data_size% -is_pipeline %is_pipeline% -alpha %alpha% -data_block_size %data_block_size% -train_file %train_file% -output %output% -threads %threads% -size %size% -binary %binary% -cbow %cbow% -epoch %epoch% -negative %negative% -hs %hs% -sample %sample% -min_count %mincount% -window %window% -stopwords %stopwords% -sw_file %sw_file% -read_vocab %read_vocab% -use_adagrad %use_adagrad% 24 | 25 | -------------------------------------------------------------------------------- /Applications/WordEmbedding/preprocess/Readme.txt: -------------------------------------------------------------------------------- 1 | Distributed_word_embedding's input_file format instruction: 2 | 1.train_file is normal format,in which words are separated by space. 3 | 2.word_count.cpp is a word_frequency generator on the basis of train_file. 4 | How to use in commandline: word_count.exe [-train_file ] [-save_vocab_file ] [-min_count ] 5 | 3.stopwords_simple.txt is sw_file which is used to filter dictionary. -------------------------------------------------------------------------------- /Applications/WordEmbedding/preprocess/stopwords_simple.txt: -------------------------------------------------------------------------------- 1 | a 2 | an 3 | and 4 | are 5 | as 6 | at 7 | be 8 | but 9 | by 10 | for 11 | if 12 | in 13 | into 14 | is 15 | it 16 | no 17 | not 18 | of 19 | on 20 | or 21 | s 22 | such 23 | t 24 | that 25 | the 26 | their 27 | then 28 | there 29 | these 30 | they 31 | this 32 | to 33 | was 34 | will 35 | with -------------------------------------------------------------------------------- /Applications/WordEmbedding/preprocess/util.cpp: -------------------------------------------------------------------------------- 1 | #include "util.h" 2 | 3 | void Option::ParseArgs(int argc, char* argv[]) 4 | { 5 | for (int i = 1; i < argc; i += 2) 6 | { 7 | if (strcmp(argv[i], "-train_file") == 0) train_file = argv[i + 1]; 8 | if (strcmp(argv[i], "-save_vocab_file") == 0) save_vocab_file = argv[i + 1]; 9 | if (strcmp(argv[i], "-min_count") == 0) min_count = atoi(argv[i + 1]); 10 | } 11 | } -------------------------------------------------------------------------------- /Applications/WordEmbedding/preprocess/util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /*! 3 | * \file util.h 4 | * \brief Struct Option stores many input arguments 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | struct Option 14 | { 15 | const char* train_file; 16 | const char* save_vocab_file; 17 | int min_count; 18 | 19 | void ParseArgs(int argc, char *argv[]); 20 | }; -------------------------------------------------------------------------------- /Applications/WordEmbedding/preprocess/word_count.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | * \file word_count.cpp 3 | * \brief word_frequency generator on the basis of train_file 4 | * Usage: 5 | * [-train_file ] [-save_vocab ] [-min_count ] 6 | */ 7 | #define _CRT_SECURE_NO_WARNINGS 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "util.h" 14 | 15 | using namespace std; 16 | 17 | void display_map(map &wmap, FILE * file_,Option * option_) 18 | { 19 | map::const_iterator map_it; 20 | for (map_it = wmap.begin(); map_it != wmap.end(); map_it++) 21 | { 22 | if (map_it->second >= option_->min_count) 23 | { 24 | fprintf(file_, "%s %d\n", (map_it->first).c_str(), map_it->second); 25 | } 26 | 27 | } 28 | } 29 | 30 | int main(int argc, char *argv[]) 31 | { 32 | Option *option_= new Option(); 33 | FILE * output_file; 34 | option_->ParseArgs(argc, argv); 35 | output_file = fopen(option_->save_vocab_file, "w"); 36 | ifstream ifs(option_->train_file); 37 | string szTemp; 38 | map wmap; 39 | 40 | while (ifs >> szTemp) 41 | wmap[szTemp]++; 42 | 43 | display_map(wmap,output_file,option_); 44 | 45 | return false; 46 | } 47 | 48 | -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/block_queue.cpp: -------------------------------------------------------------------------------- 1 | #include "block_queue.h" 2 | 3 | namespace wordembedding { 4 | 5 | void BlockQueue::Push(DataBlock *data_block) { 6 | std::unique_lock lock(mtx_); 7 | queues_.push(data_block); 8 | repo_not_empty_.notify_all(); 9 | lock.unlock(); 10 | } 11 | 12 | DataBlock* BlockQueue::Pop() { 13 | std::unique_lock lock(mtx_); 14 | // block queue is empty, just wait here. 15 | while (queues_.size() == 0) { 16 | (repo_not_empty_).wait(lock); 17 | } 18 | 19 | DataBlock* temp = queues_.front(); 20 | queues_.pop(); 21 | lock.unlock(); 22 | return temp; 23 | } 24 | 25 | int const BlockQueue::GetQueueSize() { 26 | int size = -1; 27 | //This operation is safe in here and more efficient. 28 | //std::unique_lock lock(mtx_); 29 | size = queues_.size(); 30 | //lock.unlock(); 31 | return size; 32 | } 33 | } -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/block_queue.h: -------------------------------------------------------------------------------- 1 | #ifndef WORDEMBEDDING_BLOCK_QUEUE_H_ 2 | #define WORDEMBEDDING_BLOCK_QUEUE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "data_block.h" 9 | 10 | namespace wordembedding { 11 | 12 | /*! 13 | * \brief The block queue push and pop the block data. Load data thread push 14 | * datablock in it and training thread take datablock from it. 15 | */ 16 | class BlockQueue { 17 | public: 18 | void Push(DataBlock *data_block); 19 | DataBlock* Pop(); 20 | int const GetQueueSize(); 21 | 22 | private: 23 | std::queue queues_; 24 | std::mutex mtx_; 25 | std::condition_variable repo_not_empty_; 26 | }; 27 | } 28 | #endif 29 | -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/communicator.h: -------------------------------------------------------------------------------- 1 | #ifndef WORDEMBEDDING_COMMUNICATOR_H_ 2 | #define WORDEMBEDDING_COMMUNICATOR_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "memory_manager.h" 9 | #include "block_queue.h" 10 | 11 | namespace wordembedding { 12 | 13 | class Communicator { 14 | public: 15 | Communicator(Option* option, MemoryManager* memory_mamanger); 16 | ~Communicator(); 17 | 18 | void RequestParameter(DataBlock *data_block); 19 | void AddDeltaParameter(DataBlock *data_block); 20 | 21 | int64 const GetWordCount(); 22 | void AddWordCount(int64 word_count_num); 23 | 24 | void GetWorkerTableRows(std::vector &row_nums, 25 | std::vector &blocks, int embeding_size); 26 | 27 | void PrepareParameterTables(int row_size, int column_size); 28 | 29 | private: 30 | Option* option_ = nullptr; 31 | MemoryManager* memory_mamanger_ = nullptr; 32 | int process_id_ = -1; 33 | int process_count_ = -1; 34 | 35 | multiverso::MatrixWorkerTable* worker_input_table_ = nullptr; 36 | multiverso::MatrixWorkerTable* worker_output_table_ = nullptr; 37 | multiverso::MatrixServerTable* server_input_table_ = nullptr; 38 | multiverso::MatrixServerTable* server_output_table_ = nullptr; 39 | 40 | multiverso::MatrixWorkerTable* worker_input_gradient_table_ = nullptr; 41 | multiverso::MatrixWorkerTable* worker_output_gradient_table_ = nullptr; 42 | multiverso::MatrixServerTable* server_input_gradient_table_ = nullptr; 43 | multiverso::MatrixServerTable* server_output_gradient_table_ = nullptr; 44 | 45 | multiverso::KVWorkerTable* worker_wordcount_table_ = nullptr; 46 | multiverso::KVServerTable* server_wordcount_table_ = nullptr; 47 | 48 | void ClearParameterTables(); 49 | 50 | void GetRows(multiverso::MatrixWorkerTable* table_, std::vector &row_ids, 51 | std::vector &ptrs, int size); 52 | 53 | void RequestParameterByTableId(DataBlock *data_block, int table_id, 54 | std::vector &nodes, std::vector &blocks); 55 | 56 | void SetDataBlockEmbedding(DataBlock *data_block, std::vector &blocks, 57 | std::vector &nodes, std::function get_function); 58 | 59 | void AddRows(multiverso::MatrixWorkerTable* table, std::vector &row_ids, 60 | std::vector &ptrs, int size); 61 | 62 | void AddParameterByTableId(DataBlock *data_block, int table_id, 63 | std::vector &nodes, std::vector &blocks, 64 | std::vector &recycle_blocks); 65 | 66 | void GetDeltaLoop(DataBlock *data_block, std::vector &blocks, 67 | std::vector &nodes, std::vector &recycle_blocks, 68 | std::function get_function); 69 | }; 70 | } 71 | #endif -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/constant.h: -------------------------------------------------------------------------------- 1 | #ifndef WORDEMBEDDING_CONSTANT_H_ 2 | #define WORDEMBEDDING_CONSTANT_H_ 3 | 4 | /*! 5 | * \file constant.h 6 | * \brief The index of parameter tables and some constant. 7 | */ 8 | #include 9 | 10 | namespace wordembedding { 11 | typedef int64_t int64; 12 | typedef uint64_t uint64; 13 | typedef float real; 14 | 15 | //multiverso table id 16 | const int kInputEmbeddingTableId = 0; 17 | const int kEmbeddingOutputTableId = 1; 18 | const int kSumGradient2IETableId = 2; 19 | const int kSumGradient2EOTableId = 3; 20 | const int kWordCountId = 4; 21 | 22 | const int kTableSize = (int)1e8; 23 | 24 | const int kMaxWordSize = 901; 25 | const int kMaxCodeLength = 100; 26 | const int kMaxString = 500; 27 | const int kMaxSentenceLength = 1000; 28 | const int kMaxExp = 6; 29 | 30 | const int kExpTableSize = 1000; 31 | const int kSaveBatch = 100000; 32 | } 33 | #endif 34 | -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/dictionary.h: -------------------------------------------------------------------------------- 1 | #ifndef WORDEMBEDDING_DICTIONARY_H_ 2 | #define WORDEMBEDDING_DICTIONARY_H_ 3 | /*! 4 | * \brief Class dictionary stores the vocabulary and it's frequency 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #include "constant.h" 16 | 17 | namespace wordembedding { 18 | /*! 19 | * \brief struct WordInfo stores the pair of word&freq 20 | */ 21 | struct WordInfo { 22 | std::string word; 23 | int64 freq; 24 | WordInfo() { 25 | freq = 0; 26 | word.clear(); 27 | } 28 | WordInfo(const std::string& _word, int64 _freq) { 29 | word = _word; 30 | freq = _freq; 31 | } 32 | }; 33 | 34 | class Dictionary { 35 | public: 36 | Dictionary(); 37 | Dictionary(int i); 38 | void Clear(); 39 | /*! 40 | * \brief Assign value to the set word_whitelist_ 41 | */ 42 | void SetWhiteList(const std::vector& whitelist); 43 | /*! 44 | * \brief Remove the low-freq word 45 | */ 46 | void RemoveWordsLessThan(int64 min_count); 47 | /*! 48 | * \brief Merge in the frequent words according to threshold 49 | */ 50 | void MergeInfrequentWords(int64 threshold); 51 | /*! 52 | * \brief Insert word-freq pair to the dictionary 53 | * \param word the word string 54 | * \param cnt the word's frequency 55 | */ 56 | void Insert(const char* word, int64 cnt = 1); 57 | /*! 58 | * \brief Load the word-freq pair from file 59 | */ 60 | void LoadFromFile(const char* filename); 61 | void LoadTriLetterFromFile(const char* filename, 62 | unsigned int min_cnt = 1, unsigned int letter_count = 3); 63 | int GetWordIdx(const char* word); 64 | /*! 65 | * \brief Get the index of the word according to the dictionary 66 | */ 67 | const WordInfo* GetWordInfo(const char* word); 68 | const WordInfo* GetWordInfo(int word_idx); 69 | int Size(); 70 | void StartIteration(); 71 | /*! 72 | * \brief Judge the word_iterator_ is the end 73 | */ 74 | bool HasMore(); 75 | /*! 76 | * \brief Get the next wordinfo pointer in the vector 77 | */ 78 | const WordInfo* Next(); 79 | std::vector::iterator Begin(); 80 | std::vector::iterator End(); 81 | 82 | void PrintVocab(); 83 | 84 | private: 85 | int combine_; 86 | std::vector word_info_; 87 | std::vector::iterator word_iterator_; 88 | std::unordered_map word_idx_map_; 89 | std::unordered_set word_whitelist_; 90 | }; 91 | } 92 | #endif -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/distributed_wordembedding.h: -------------------------------------------------------------------------------- 1 | #ifndef WORDEMBEDDING_DISTRIBUTED_WORDEMBEDDING_H_ 2 | #define WORDEMBEDDING_DISTRIBUTED_WORDEMBEDDING_H_ 3 | /*! 4 | * file distributed_wordembedding.h 5 | * \brief Class Distributed_wordembedding describes the main frame of 6 | * WordEmbedding and some useful functions 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | #include "util.h" 18 | #include "huffman_encoder.h" 19 | #include "reader.h" 20 | #include "trainer.h" 21 | #include "block_queue.h" 22 | #include "communicator.h" 23 | #include "wordembedding.h" 24 | 25 | namespace wordembedding { 26 | 27 | extern std::string g_log_suffix; 28 | 29 | class DistributedWordembedding { 30 | public: 31 | /*! 32 | * \brief Run Function contains everything 33 | */ 34 | void Run(int argc, char *argv[]); 35 | 36 | private: 37 | clock_t start_; 38 | int process_id_; 39 | Option* option_ = nullptr; 40 | Dictionary* dictionary_ = nullptr; 41 | HuffmanEncoder* huffman_encoder_ = nullptr; 42 | Sampler* sampler_ = nullptr; 43 | Reader* reader_ = nullptr; 44 | WordEmbedding* WordEmbedding_ = nullptr; 45 | BlockQueue *block_queue_ = nullptr; 46 | std::thread load_data_thread_; 47 | std::thread collect_wordcount_thread_; 48 | bool is_running_ = false; 49 | std::vector trainers_; 50 | Communicator* communicator_ = nullptr; 51 | MemoryManager* memory_mamanger_ = nullptr; 52 | 53 | /*! 54 | * \brief Load Dictionary from the vocabulary_file 55 | * \param opt Some model-set setparams 56 | * \param dictionary save the vocabulary and its frequency 57 | * \param huffman_encoder convert dictionary to the huffman_code 58 | */ 59 | int64 LoadVocab(Option *opt, Dictionary *dictionary, 60 | HuffmanEncoder *huffman_encoder); 61 | 62 | void Train(int argc, char *argv[]); 63 | void TrainNeuralNetwork(); 64 | 65 | void PrepareData(DataBlock *data_block); 66 | 67 | void StartLoadDataThread(Reader *reader, int64 file_size); 68 | void LoadOneBlock(DataBlock *data_block, 69 | Reader *reader, int64 size); 70 | 71 | void StartCollectWordcountThread(); 72 | void StopCollectWordcountThread(); 73 | 74 | void StartWordCount(); 75 | void GetAllWordCount(); 76 | void AddDeltaWordCount(); 77 | 78 | DataBlock* GetDataFromQueue(); 79 | DataBlock* GetBlockAndPrepareParameter(); 80 | 81 | void SaveEmbedding(const char *file_path, bool is_binary); 82 | void WriteToFile(bool is_binary, std::vector &blocks, FILE* fid, 83 | std::vector &nodes); 84 | const char* ChangeFileName(const char *file_path, int iteration); 85 | }; 86 | } 87 | #endif -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/huffman_encoder.h: -------------------------------------------------------------------------------- 1 | #ifndef WORDEMBEDDING_HUFFMAN_ENCODER_H_ 2 | #define WORDEMBEDDING_HUFFMAN_ENCODER_H_ 3 | /*! 4 | * \brief Class Huffman_encoder stores the huffman_encode of the vocabulary according the dictionary 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "dictionary.h" 12 | #include "constant.h" 13 | 14 | namespace wordembedding { 15 | struct HuffLabelInfo 16 | { /*! 17 | * \brief Internal node ids in the code path 18 | */ 19 | std::vector point; 20 | /*! 21 | * \brief Huffman code 22 | */ 23 | std::vector code; 24 | int codelen; 25 | HuffLabelInfo() { 26 | codelen = 0; 27 | point.clear(); 28 | code.clear(); 29 | } 30 | }; 31 | 32 | class HuffmanEncoder { 33 | public: 34 | HuffmanEncoder(); 35 | /*! 36 | * \brief Save the word-huffmancode in the file 37 | */ 38 | void Save2File(const char* filename); 39 | /*! 40 | * \brief Recover the word-huffmancode from the file 41 | */ 42 | void RecoverFromFile(const char* filename); 43 | /*! 44 | * \brief Get the dictionary file and build 45 | * \hufflabel_info from the dictionary 46 | */ 47 | void BuildFromTermFrequency(const char* filename); 48 | void BuildFromTermFrequency(Dictionary* dict); 49 | /*! 50 | * \brief Get the label size 51 | */ 52 | int GetLabelSize(); 53 | /*! 54 | * \brief Get the label's index 55 | */ 56 | int GetLabelIdx(const char* label); 57 | HuffLabelInfo* GetLabelInfo(char* label); 58 | HuffLabelInfo* GetLabelInfo(int label_idx); 59 | Dictionary* GetDict(); 60 | 61 | private: 62 | void BuildHuffmanTreeFromDict(); 63 | std::vector hufflabel_info_; 64 | Dictionary* dict_; 65 | }; 66 | } 67 | #endif -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "distributed_wordembedding.h" 11 | #include "memory_manager.h" 12 | #include "util.h" 13 | 14 | using namespace wordembedding; 15 | 16 | int main(int argc, char *argv[]) { 17 | try { 18 | DistributedWordembedding dwe; 19 | dwe.Run(argc, argv); 20 | } 21 | catch (std::bad_alloc &memExp) { 22 | multiverso::Log::Info("Something wrong with new() %s\n", memExp.what()); 23 | } 24 | catch (...) { 25 | multiverso::Log::Info("Something wrong with other reason!\n"); 26 | } 27 | return 0; 28 | } 29 | 30 | -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/memory_manager.cpp: -------------------------------------------------------------------------------- 1 | #include "memory_manager.h" 2 | 3 | namespace wordembedding { 4 | 5 | MemoryManager::MemoryManager(int block_size) { 6 | block_size_ = block_size; 7 | } 8 | //Request memory for blocks 9 | void MemoryManager::RequestBlocks(int64 block_number, 10 | std::vector& result) { 11 | std::unique_lock lock(mutex_); 12 | for (int64 i = 0; i < block_number; ++i) { 13 | result.push_back(new (std::nothrow) real[block_size_]); 14 | assert(result[i] != nullptr); 15 | } 16 | } 17 | //Free the memory for blocks 18 | void MemoryManager::ReturnBlocks(std::vector& blocks) { 19 | std::unique_lock lock(mutex_); 20 | for (size_t i = 0; i < blocks.size(); ++i) 21 | delete[] blocks[i]; 22 | } 23 | 24 | MemoryManager::~MemoryManager() { 25 | 26 | } 27 | } -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/memory_manager.h: -------------------------------------------------------------------------------- 1 | #ifndef WORDEMBEDDING_MEMORY_MANAGER_H_ 2 | #define WORDEMBEDDING_MEMORY_MANAGER_H_ 3 | /*! 4 | * file memory_manager.h 5 | * \brief Class MemoryManager creates and allocates memory for the local parameter which is needed by the datablock training. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "constant.h" 13 | 14 | namespace wordembedding { 15 | 16 | class MemoryManager { 17 | public: 18 | explicit MemoryManager(int block_size); 19 | /*! 20 | * \brief Create memory for the blocks 21 | * \param block_number the block quantity needed 22 | * \param result the vector of the head address of allocated memory 23 | */ 24 | void RequestBlocks(int64 block_number, std::vector& result); 25 | /*! 26 | * \brief Delete the blocks memory 27 | * \param blocks the vector of the head address of allocated memory 28 | */ 29 | void ReturnBlocks(std::vector& blocks); 30 | ~MemoryManager(); 31 | 32 | private: 33 | int64 block_size_; 34 | std::mutex mutex_; 35 | 36 | // No copying allowed 37 | MemoryManager(const MemoryManager&); 38 | void operator=(const MemoryManager&); 39 | }; 40 | } 41 | #endif 42 | -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/reader.cpp: -------------------------------------------------------------------------------- 1 | #include "reader.h" 2 | 3 | namespace wordembedding { 4 | 5 | Reader::Reader(Dictionary *dictionary, Option *option, 6 | Sampler *sampler, const char *input_file) { 7 | dictionary_ = dictionary; 8 | option_ = option; 9 | sampler_ = sampler; 10 | 11 | stopwords_table_.clear(); 12 | if (option_->stopwords) { 13 | FILE* fid = fopen(option_->sw_file, "r"); 14 | if (fid == nullptr) { 15 | multiverso::Log::Fatal("Open sw_file failed!\n"); 16 | exit(1); 17 | } 18 | while (ReadWord(word_, fid)) { 19 | stopwords_table_.insert(word_); 20 | } 21 | 22 | fclose(fid); 23 | } 24 | 25 | file_ = fopen(input_file, "r"); 26 | if (file_ == nullptr) { 27 | multiverso::Log::Fatal("Open train_file failed!\n"); 28 | exit(1); 29 | } 30 | } 31 | 32 | Reader::~Reader() { 33 | if (file_ != nullptr) 34 | fclose(file_); 35 | } 36 | //Get sentence by connecting the words extracted 37 | int Reader::GetSentence(int *sentence, int64 &word_count) { 38 | int length = 0, word_idx; 39 | word_count = 0; 40 | while (1) { 41 | if (!ReadWord(word_, file_)) 42 | break; 43 | word_idx = dictionary_->GetWordIdx(word_); 44 | if (word_idx == -1) 45 | continue; 46 | word_count++; 47 | if (option_->stopwords && stopwords_table_.count(word_)) 48 | continue; 49 | if (option_->sample > 0 && 50 | !sampler_->WordSampling( 51 | dictionary_->GetWordInfo(word_idx)->freq, 52 | option_->total_words, option_->sample)) 53 | continue; 54 | sentence[length++] = word_idx; 55 | if (length >= kMaxSentenceLength) 56 | break; 57 | } 58 | 59 | return length; 60 | } 61 | 62 | void Reader::ResetStart() { 63 | fseek(file_, 0, SEEK_SET); 64 | } 65 | 66 | void Reader::ResetSize(int64 size) { 67 | byte_count_ = 0; 68 | byte_size_ = size; 69 | } 70 | //Read words from the file 71 | bool Reader::ReadWord(char *word, FILE *fin) { 72 | int idx = 0; 73 | char ch; 74 | while (!feof(fin) && byte_count_ < byte_size_) { 75 | ch = fgetc(fin); 76 | ++byte_count_; 77 | if (ch == 13) continue; 78 | if ((ch == ' ') || (ch == '\t') || (ch == '\n')) { 79 | if (idx > 0) { 80 | if (ch == '\n') 81 | ungetc(ch, fin); 82 | break; 83 | } 84 | if (ch == '\n') { 85 | strcpy(word, (char *)""); 86 | return true; 87 | } 88 | else continue; 89 | } 90 | word[idx++] = ch; 91 | //Truncate too long words 92 | if (idx >= kMaxString - 1) 93 | idx--; 94 | } 95 | word[idx] = 0; 96 | return idx != 0; 97 | } 98 | } -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/reader.h: -------------------------------------------------------------------------------- 1 | #ifndef WORDEMBEDDING_READER_H_ 2 | #define WORDEMBEDDING_READER_H_ 3 | /*! 4 | * file reader.h 5 | * \brief Class Reader helps the function Loaddata to fill the datablock 6 | */ 7 | 8 | #include 9 | 10 | #include "util.h" 11 | #include "dictionary.h" 12 | #include "constant.h" 13 | 14 | namespace wordembedding { 15 | 16 | class Reader { 17 | public: 18 | Reader(Dictionary *dictionary, Option *option, 19 | Sampler *sampler, const char *input_file); 20 | ~Reader(); 21 | /*! 22 | * \brief Getsentence from the train_file 23 | * \param sentence save the sentence by the word index according to the dictionary 24 | * \param word_count count the sentence length 25 | */ 26 | int GetSentence(int *sentence, int64 &word_count); 27 | void ResetStart(); 28 | void ResetSize(int64 size); 29 | 30 | private: 31 | const Option *option_; 32 | FILE* file_; 33 | char word_[kMaxString + 1]; 34 | Dictionary *dictionary_; 35 | Sampler *sampler_; 36 | int64 byte_count_, byte_size_; 37 | std::unordered_set stopwords_table_; 38 | /*! 39 | * \brief Read words from the train_file 40 | * \param word store the extracted word 41 | * \param file represent the train_file pointer 42 | */ 43 | bool ReadWord(char *word, FILE *file); 44 | 45 | //No copying allowed 46 | Reader(const Reader&); 47 | void operator=(const Reader&); 48 | }; 49 | } 50 | #endif -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/trainer.cpp: -------------------------------------------------------------------------------- 1 | #include "trainer.h" 2 | namespace wordembedding { 3 | 4 | Trainer::Trainer(int trainer_id, Option *option, 5 | Dictionary* dictionary, WordEmbedding* WordEmbedding) { 6 | trainer_id_ = trainer_id; 7 | option_ = option; 8 | word_count = 0; 9 | WordEmbedding_ = WordEmbedding; 10 | dictionary_ = dictionary; 11 | hidden_act_ = (real *)calloc(option_->embeding_size, sizeof(real)); 12 | hidden_err_ = (real *)calloc(option_->embeding_size, sizeof(real)); 13 | process_count_ = -1; 14 | process_id_ = -1; 15 | 16 | assert(hidden_act_ != nullptr); 17 | assert(hidden_err_ != nullptr); 18 | start_ = 0; 19 | train_count_ = 0; 20 | } 21 | 22 | Trainer::~Trainer() { 23 | free(hidden_act_); 24 | free(hidden_err_); 25 | } 26 | 27 | void Trainer::TrainIteration(DataBlock *data_block) { 28 | if (process_id_ == -1) 29 | process_id_ = multiverso::MV_Rank(); 30 | 31 | if (data_block == nullptr) { 32 | return; 33 | } 34 | 35 | int64 last_word_count = word_count; 36 | clock_t start = clock(); 37 | 38 | multiverso::Log::Debug("Rank %d Train %d TrainNN Begin TrainIteration%d ...\n", 39 | process_id_, trainer_id_, train_count_); 40 | 41 | WordEmbedding_->Train(data_block, trainer_id_, option_->thread_cnt, 42 | word_count, hidden_act_, hidden_err_); 43 | 44 | if (word_count > last_word_count) { 45 | multiverso::Log::Info("Rank %d TrainNNSpeed: Words/thread/second %lfk\n", 46 | process_id_, 47 | (static_cast(word_count)-last_word_count) / 48 | (clock() - start) * static_cast(CLOCKS_PER_SEC) / 1000); 49 | } 50 | 51 | multiverso::Log::Debug("Rank %d Trainer %d training time:%lfs\n", process_id_, 52 | trainer_id_, (clock() - start) / static_cast(CLOCKS_PER_SEC)); 53 | train_count_++; 54 | } 55 | } -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/trainer.h: -------------------------------------------------------------------------------- 1 | #ifndef WORDEMBEDDING_TRAINER_H_ 2 | #define WORDEMBEDDING_TRAINER_H_ 3 | /*! 4 | * file trainer.h 5 | * \brief Class Trainer trains the model by every train iteration 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "constant.h" 16 | #include "util.h" 17 | #include "huffman_encoder.h" 18 | #include "wordembedding.h" 19 | #include "data_block.h" 20 | #include "memory_manager.h" 21 | 22 | namespace wordembedding { 23 | 24 | class WordEmbedding; 25 | extern std::string g_log_suffix; 26 | class Trainer{ 27 | public: 28 | int64 word_count; 29 | Trainer(int trainer_id, Option *option, 30 | Dictionary* dictionary, WordEmbedding* WordEmbedding); 31 | /*! 32 | * /brief Train one datablock 33 | */ 34 | 35 | ~Trainer(); 36 | void TrainIteration(DataBlock * data_block); 37 | 38 | private: 39 | int process_count_; 40 | int process_id_; 41 | int trainer_id_; 42 | Option *option_; 43 | real *hidden_act_, *hidden_err_; 44 | WordEmbedding* WordEmbedding_; 45 | Dictionary* dictionary_; 46 | int train_count_; 47 | clock_t start_; 48 | 49 | //No copying allowed 50 | Trainer(const Trainer&); 51 | void operator=(const Trainer&); 52 | }; 53 | } 54 | #endif 55 | -------------------------------------------------------------------------------- /Applications/WordEmbedding/src/util.h: -------------------------------------------------------------------------------- 1 | #ifndef WORDEMBEDDING_UTIL_H_ 2 | #define WORDEMBEDDING_UTIL_H_ 3 | /*! 4 | * file util.h 5 | * \brief Struct Option stores many general arguments in model 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "constant.h" 16 | #include "dictionary.h" 17 | 18 | namespace wordembedding { 19 | 20 | struct Option { 21 | const char* train_file; 22 | const char* read_vocab_file; 23 | const char* output_file; 24 | const char* sw_file; 25 | const char* endpoints_file; 26 | bool hs, output_binary, cbow, stopwords; 27 | bool use_adagrad; 28 | bool is_pipeline; 29 | real sample; 30 | int64 data_block_size; 31 | int embeding_size, thread_cnt, window_size, negative_num, min_count, epoch; 32 | int64 total_words; 33 | int64 max_preload_data_size; 34 | real init_learning_rate; 35 | 36 | Option(); 37 | /*! 38 | * \brief Get the model-set arguments from file 39 | */ 40 | void ParseArgs(int argc, char* argv[]); 41 | void PrintArgs(); 42 | void PrintUsage(); 43 | 44 | }; 45 | 46 | class Sampler { 47 | public: 48 | Sampler(); 49 | ~Sampler(); 50 | /*! 51 | * \brief Set the negative-sampling distribution for every vocabulary 52 | * \param dictionary the train_file dictionary 53 | */ 54 | void SetNegativeSamplingDistribution(Dictionary *dictionary); 55 | bool WordSampling(int64 word_cnt, int64 train_words, real sample); 56 | /*! 57 | * \brief Get the next random according to the existing random seed 58 | */ 59 | uint64 GetNextRandom(uint64 next_random); 60 | int NegativeSampling(uint64 next_random); 61 | 62 | private: 63 | int* table_; 64 | 65 | //No copying allowed 66 | Sampler(const Sampler&); 67 | void operator=(const Sampler&); 68 | }; 69 | 70 | std::string GetSystemTime(); 71 | int64 GetFileSize(const char *filename); 72 | bool ReadWord(char *word, FILE *fin); 73 | 74 | void InitExpTable(); 75 | extern std::string g_log_suffix; 76 | extern real* expTable; 77 | extern int embedding_size; 78 | 79 | } 80 | #endif -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | PROJECT(MULTIVERSO) 4 | 5 | OPTION(USE_HDFS "won't use hdfs on default, set ON to enable" OFF) 6 | OPTION(TEST "Build all tests." ON) 7 | OPTION(USE_ZMQ "weather to build with ZeroMQ.(default: OFF)" OFF) 8 | OPTION(INSTALL_MULTIVERSO "whether install Multiverso to /usr/local/lib" ON) 9 | option(ENABLE_DCASGD "Build with DC-ASGD supported" OFF) 10 | 11 | find_package(MPI REQUIRED) 12 | 13 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11") 14 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") 15 | 16 | if(USE_HDFS) 17 | ADD_DEFINITIONS(-DMULTIVERSO_USE_HDFS) 18 | SET(JVM_LIB /usr/local/java/default/jre/lib/amd64/server) 19 | SET(HDFS_LIB /usr/local/hadoop/lib/native) 20 | LINK_DIRECTORIES(${HDFS_LIB}) 21 | LINK_DIRECTORIES(${JVM_LIB}) 22 | endif(USE_HDFS) 23 | 24 | if(ENABLE_DCASGD) 25 | ADD_DEFINITIONS(-DENABLE_DCASGD) 26 | endif(ENABLE_DCASGD) 27 | 28 | include_directories(${PROJECT_SOURCE_DIR}/include) 29 | 30 | set(MULTIVERSO_DIR ${PROJECT_SOURCE_DIR}) 31 | ADD_SUBDIRECTORY(src) 32 | ADD_SUBDIRECTORY(Test) 33 | ADD_SUBDIRECTORY(Test/unittests) 34 | ADD_SUBDIRECTORY(Applications/WordEmbedding) 35 | ADD_SUBDIRECTORY(Applications/LogisticRegression) 36 | 37 | if(INSTALL_MULTIVERSO) 38 | install (DIRECTORY ${PROJECT_SOURCE_DIR}/include/multiverso DESTINATION include) 39 | endif(INSTALL_MULTIVERSO) 40 | 41 | 42 | # uninstall target 43 | configure_file( 44 | "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in" 45 | "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" 46 | IMMEDIATE @ONLY) 47 | 48 | add_custom_target(uninstall 49 | COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) 50 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Microsoft Corporation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Multiverso 2 | ========== 3 | [![Build Status](https://travis-ci.org/Microsoft/Multiverso.svg?branch=master)](https://travis-ci.org/Microsoft/Multiverso) 4 | 5 | Multiverso is a parameter server based framework for training machine learning models on big data with numbers of machines. It is currently a standard C++ library and provides a series of friendly programming interfaces, and it is extended to support calling from python and Lua programs. With such easy-to-use APIs, machine learning researchers and practitioners do not need to worry about the system routine issues such as distributed model storage and operation, inter-process and inter-thread communication, multi-threading management, and so on. 6 | Instead, they are able to focus on the core machine learning logics: data, model, and training. 7 | 8 | For more details, please view our website [http://www.dmtk.io](http://www.dmtk.io). 9 | 10 | Build 11 | ---------- 12 | 13 | **Linux** (Tested on Ubuntu 14.04) 14 | 15 | ``` 16 | sudo apt-get install libopenmpi-dev openmpi-bin build-essential cmake git 17 | git clone https://github.com/Microsoft/multiverso.git --recursive && cd multiverso 18 | mkdir build && cd build 19 | cmake .. && make && sudo make install 20 | ``` 21 | 22 | **Windows** 23 | 24 | Open the `Multiverso.sln` with [Visual Studio 2013]() and build. 25 | 26 | Related Projects 27 | ---------- 28 | 29 | Current distributed systems based on multiverso: 30 | 31 | * [lightLDA](https://github.com/Microsoft/lightlda): Scalable, fast, lightweight system for large scale topic modeling 32 | * [distributed_word_embedding](https://github.com/Microsoft/multiverso/tree/master/Applications/WordEmbedding) Distributed system for word embedding 33 | * [distributed_word_embedding(deprecated)](https://github.com/Microsoft/distributed_word_embedding) Distributed system for word embedding 34 | * [distributed_skipgram_mixture(deprecated)](https://github.com/Microsoft/distributed_skipgram_mixture) Distributed skipgram mixture for multi-sense word embedding 35 | 36 | Microsoft Open Source Code of Conduct 37 | ------------ 38 | 39 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 40 | -------------------------------------------------------------------------------- /Test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/Test) 2 | 3 | SET(MULTIVERSO_TEST_SRC test_allreduce.cpp test_array_table.cpp test_kv_table.cpp test_matrix_perf.cpp test_matrix_table.cpp test_net.cpp main.cpp) 4 | 5 | SET(CMAKE_CXX_COMPILER mpicxx) 6 | 7 | LINK_DIRECTORIES(${LIBRARY_OUTPUT_PATH}) 8 | 9 | 10 | MESSAGE(${MPI_LIBRARIES}) 11 | MESSAGE(${MPI_CXX_LIBRARIES}) 12 | 13 | ENABLE_TESTING() 14 | 15 | ADD_EXECUTABLE(multiverso.test ${MULTIVERSO_TEST_SRC}) 16 | 17 | if(USE_HDFS) 18 | TARGET_LINK_LIBRARIES(multiverso.test multiverso ${MPI_CXX_LIBRARIES} jvm hdfs) 19 | else() 20 | TARGET_LINK_LIBRARIES(multiverso.test multiverso ${MPI_CXX_LIBRARIES}) 21 | endif(USE_HDFS) 22 | 23 | SET_PROPERTY(TARGET multiverso.test PROPERTY CXX_STANDARD 11) 24 | -------------------------------------------------------------------------------- /Test/Test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | {f42b7b09-e419-4f63-b12e-e93219217a45} 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Test/common.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_TEST_END2ENDTEST_COMMON_H_ 2 | #define MULTIVERSO_TEST_END2ENDTEST_COMMON_H_ 3 | 4 | namespace multiverso { 5 | namespace test { 6 | 7 | void TestAllreduce(int argc, char* argv[]); 8 | 9 | void TestArray(int argc, char* argv[]); 10 | 11 | void TestKV(int argc, char* argv[]); 12 | 13 | void TestMatrix(int argc, char* argv[]); 14 | 15 | void TestNet(int argc, char* argv[]); 16 | 17 | } // namespace test 18 | } // namespace multiverso 19 | 20 | #endif // MULTIVERSO_TEST_END2ENDTEST_COMMON_H_ -------------------------------------------------------------------------------- /Test/main.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | #include 4 | #include 5 | 6 | using namespace multiverso::test; 7 | 8 | void PrintUsage() { 9 | printf("Usage: multiverso.test kv|array|net|matrix|allreduce\n"); 10 | } 11 | 12 | int main(int argc, char* argv[]) { 13 | if (argc != 2) PrintUsage(); 14 | else { 15 | if (strcmp(argv[1], "kv") == 0) TestKV(argc, argv); 16 | else if (strcmp(argv[1], "array") == 0) TestArray(argc, argv); 17 | else if (strcmp(argv[1], "net") == 0) TestNet(argc, argv); 18 | else if (strcmp(argv[1], "matrix") == 0) TestMatrix(argc, argv); 19 | else if (strcmp(argv[1], "allreduce") == 0) TestAllreduce(argc, argv); 20 | else { 21 | PrintUsage(); 22 | } 23 | } 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /Test/test_allreduce.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace multiverso { 8 | namespace test { 9 | 10 | void TestAllreduce(int argc, char* argv[]) { 11 | multiverso::SetCMDFlag("ma", true); 12 | MV_Init(&argc, argv); 13 | int a = 1; 14 | MV_Aggregate(&a, 1); 15 | 16 | CHECK(a == MV_Size()); 17 | 18 | MV_ShutDown(); 19 | } 20 | 21 | } // namespace test 22 | } // namespace multiverso -------------------------------------------------------------------------------- /Test/test_array_table.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace multiverso { 9 | namespace test { 10 | 11 | void TestArray(int argc, char* argv[]) { 12 | Log::Info("Test Array \n"); 13 | 14 | multiverso::SetCMDFlag("sync", true); 15 | MV_Init(&argc, argv); 16 | 17 | size_t array_size = 500; 18 | 19 | auto shared_array = MV_CreateTable(ArrayTableOption(array_size)); 20 | 21 | Log::Info("Create tables OK. Rank = %d, worker_id = %d\n", 22 | MV_Rank(), MV_WorkerId()); 23 | 24 | std::vector delta(array_size); 25 | for (int i = 0; i < array_size; ++i) 26 | delta[i] = static_cast(i); 27 | 28 | int* data = new int[array_size]; 29 | 30 | int iter = 10 * (MV_Rank() + 10); 31 | for (int i = 0; i < iter; ++i) { 32 | shared_array->Add(delta.data(), array_size); 33 | shared_array->Add(delta.data(), array_size); 34 | shared_array->Add(delta.data(), array_size); 35 | shared_array->Get(data, array_size); 36 | shared_array->Get(data, array_size); 37 | shared_array->Get(data, array_size); 38 | if (iter < 100) { 39 | for (int k = 0; k < array_size; ++k) { 40 | CHECK (data[k] != delta[k] * (i + 1) * MV_NumWorkers()) ; 41 | } 42 | } 43 | } 44 | delete[] data; 45 | 46 | MV_ShutDown(); 47 | } 48 | 49 | } // namespace test 50 | } // namespace multiverso -------------------------------------------------------------------------------- /Test/test_kv_table.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace multiverso { 6 | namespace test { 7 | 8 | void TestKV(int argc, char* argv[]) { 9 | Log::Info("Test KV map \n"); 10 | // 1. Start the Multiverso engine 11 | MV_Init(&argc, argv); 12 | 13 | // 2. To create the shared table 14 | KVTableOption option; 15 | auto dht = MV_CreateTable(option); 16 | 17 | // 3. User program 18 | // access the local cache 19 | std::unordered_map& kv = dht->raw(); 20 | 21 | // Get from the server 22 | dht->Get(0); 23 | // Check the result 24 | Log::Info("Get 0 from kv server: result = %d\n", kv[0]); 25 | 26 | // Add 1 to the server 27 | dht->Add(0, 1); 28 | // Check the result 29 | dht->Get(0); 30 | Log::Info("Get 0 from kv server after add 1: result = %d\n", kv[0]); 31 | 32 | // 4. Shutdown the Multiverso engine 33 | MV_ShutDown(); 34 | } 35 | 36 | } // namespace test 37 | } // namespace multiverso -------------------------------------------------------------------------------- /Test/test_net.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | namespace multiverso { 7 | namespace test { 8 | 9 | void TestNet(int argc, char* argv[]) { 10 | NetInterface* net = NetInterface::Get(); 11 | net->Init(&argc, argv); 12 | 13 | const char* chi1 = std::string("hello, world").c_str(); 14 | const char* chi2 = std::string("hello, c++").c_str(); 15 | const char* chi3 = std::string("hello, multiverso").c_str(); 16 | char* hi1 = new char[14]; 17 | 18 | #ifdef _MSC_VER 19 | strcpy_s(hi1, 14, chi1); 20 | #else 21 | strcpy(hi1, chi1); 22 | #endif 23 | 24 | char* hi2 = new char[12]; 25 | #ifdef _MSC_VER 26 | strcpy_s(hi2, 12, chi2); 27 | #else 28 | strcpy(hi2, chi2); 29 | #endif 30 | 31 | char* hi3 = new char[19]; 32 | #ifdef _MSC_VER 33 | strcpy_s(hi3, 19, chi3); 34 | #else 35 | strcpy(hi3, chi3); 36 | #endif 37 | 38 | if (net->rank() == 0) { 39 | for (int rank = 1; rank < net->size(); ++rank) { 40 | MessagePtr msg(new Message()); 41 | msg->set_src(0); 42 | msg->set_dst(rank); 43 | msg->Push(Blob(hi1, 13)); 44 | msg->Push(Blob(hi2, 11)); 45 | msg->Push(Blob(hi3, 18)); 46 | for (int i = 0; i < msg->size(); ++i) { 47 | Log::Info("In Send: %s\n", msg->data()[i].data()); 48 | }; 49 | while (net->Send(msg) == 0); 50 | Log::Info("rank 0 send\n"); 51 | } 52 | 53 | for (int i = 1; i < net->size(); ++i) { 54 | MessagePtr msg(new Message()); 55 | msg.reset(new Message()); 56 | while (net->Recv(&msg) == 0) { 57 | // Log::Info("recv return 0\n"); 58 | } 59 | Log::Info("rank 0 recv\n"); 60 | 61 | std::vector recv_data = msg->data(); 62 | CHECK(recv_data.size() == 3); 63 | for (int i = 0; i < msg->size(); ++i) { 64 | Log::Info("recv from srv %d: %s\n", msg->src(), recv_data[i].data()); 65 | }; 66 | } 67 | } 68 | else {// other rank 69 | MessagePtr msg(new Message()); 70 | while (net->Recv(&msg) == 0) { 71 | // Log::Info("recv return 0\n"); 72 | } 73 | Log::Info("rank %d recv\n", net->rank()); 74 | std::vector& recv_data = msg->data(); 75 | CHECK(recv_data.size() == 3); 76 | for (int i = 0; i < msg->size(); ++i) { 77 | Log::Info("%s\n", recv_data[i].data()); 78 | } 79 | 80 | msg.reset(new Message()); 81 | msg->set_src(net->rank()); 82 | msg->set_dst(0); 83 | msg->Push(Blob(hi1, 13)); 84 | msg->Push(Blob(hi2, 11)); 85 | msg->Push(Blob(hi3, 18)); 86 | while (net->Send(msg) == 0); 87 | Log::Info("rank %d send\n", net->rank()); 88 | } 89 | net->Finalize(); 90 | } 91 | 92 | } // namespace test 93 | } // namespace multiverso -------------------------------------------------------------------------------- /Test/unittests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | enable_testing() 2 | 3 | find_package(Boost COMPONENTS unit_test_framework REQUIRED) 4 | 5 | SET(MULTIVERSO_UNITTEST_SRC test_array.cpp test_blob.cpp test_kv.cpp test_message.cpp test_multiverso.cpp test_node.cpp test_sync.cpp) 6 | 7 | LINK_DIRECTORIES(${LIBRARY_OUTPUT_PATH}) 8 | 9 | MESSAGE(${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) 10 | MESSAGE(${Boost_INCLUDE_DIRS}) 11 | 12 | ADD_EXECUTABLE(multiversotests ${MULTIVERSO_UNITTEST_SRC}) 13 | 14 | TARGET_INCLUDE_DIRECTORIES(multiversotests PRIVATE ${Boost_INCLUDE_DIRS}) 15 | TARGET_LINK_LIBRARIES(multiversotests multiverso ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) 16 | 17 | SET_PROPERTY(TARGET multiversotests PROPERTY CXX_STANDARD 11) 18 | -------------------------------------------------------------------------------- /Test/unittests/MultiversoTests.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /Test/unittests/multiverso_env.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_TEST_UNITTEST_MULTIVERSO_EVN_H_ 2 | #define MULTIVERSO_TEST_UNITTEST_MULTIVERSO_EVN_H_ 3 | 4 | #include 5 | 6 | namespace multiverso { 7 | namespace test { 8 | 9 | struct MultiversoEnv { 10 | MultiversoEnv() { 11 | MV_SetFlag("sync", false); 12 | MV_Init(); 13 | } 14 | 15 | ~MultiversoEnv() { 16 | MV_ShutDown(false); 17 | } 18 | }; 19 | 20 | struct SyncMultiversoEnv { 21 | SyncMultiversoEnv() { 22 | MV_SetFlag("sync", true); 23 | MV_Init(); 24 | } 25 | 26 | ~SyncMultiversoEnv() { 27 | MV_ShutDown(false); 28 | } 29 | }; 30 | 31 | } // namespace test 32 | } // namespace multiverso 33 | 34 | #endif // MULTIVERSO_TEST_UNITTEST_MULTIVERSO_EVN_H_ -------------------------------------------------------------------------------- /Test/unittests/test_array.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "multiverso_env.h" 6 | 7 | namespace multiverso { 8 | namespace test { 9 | 10 | struct ArrayTableEnv : public MultiversoEnv { 11 | ArrayWorker* table; 12 | 13 | ArrayTableEnv() : MultiversoEnv() { 14 | ArrayTableOption option(10); 15 | table = MV_CreateTable(option); 16 | } 17 | 18 | ~ArrayTableEnv() { 19 | delete table; 20 | table = nullptr; 21 | } 22 | }; 23 | 24 | BOOST_FIXTURE_TEST_SUITE(array_test, ArrayTableEnv) 25 | 26 | BOOST_AUTO_TEST_CASE(array_access) { 27 | std::vector delta(10); 28 | std::vector model(10); 29 | for (int i = 0; i < 10; ++i) delta[i] = i; 30 | table->Add(delta.data(), delta.size()); 31 | table->Get(model.data(), model.size()); 32 | 33 | for (int i = 0; i < 10; ++i) { 34 | BOOST_CHECK_EQUAL(model[i], delta[i]); 35 | } 36 | 37 | table->AddAsync(delta.data(), delta.size()); 38 | int handle = table->GetAsync(model.data(), model.size()); 39 | table->Wait(handle); 40 | 41 | for (int i = 0; i < 10; ++i) { 42 | BOOST_CHECK_EQUAL(model[i], 2 * delta[i]); 43 | } 44 | } 45 | 46 | BOOST_AUTO_TEST_CASE(array_partition) { 47 | std::unordered_map> result; 48 | std::vector kv; 49 | int key = -1; 50 | Blob key_blob(&key, sizeof(key)); 51 | std::vector value(10); 52 | Blob value_blob(value.data(), sizeof(int) * value.size()); 53 | kv.push_back(key_blob); 54 | kv.push_back(value_blob); 55 | 56 | table->Partition(kv, MsgType::Request_Get, &result); 57 | 58 | BOOST_CHECK_EQUAL(result.size(), 1); 59 | BOOST_CHECK(result.find(0) != result.end()); 60 | BOOST_CHECK_EQUAL(result[0].size(), 2); 61 | BOOST_CHECK_EQUAL(result[0][0].As(), key); 62 | int* vec = reinterpret_cast(result[0][1].data()); 63 | for (int i = 0; i < 10; ++i) { 64 | BOOST_CHECK_EQUAL(vec[i], value[i]); 65 | } 66 | } 67 | 68 | BOOST_AUTO_TEST_SUITE_END() 69 | 70 | } // namespace test 71 | } // namespace multiverso -------------------------------------------------------------------------------- /Test/unittests/test_blob.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace multiverso { 5 | namespace test { 6 | 7 | BOOST_AUTO_TEST_SUITE(blob) 8 | 9 | BOOST_AUTO_TEST_CASE(blob_constructor_test) { 10 | multiverso::Blob blob; 11 | BOOST_CHECK_EQUAL(blob.size(), 0); 12 | 13 | multiverso::Blob blob2(4); 14 | BOOST_CHECK_EQUAL(blob2.size(), 4); 15 | 16 | int a[3]; 17 | multiverso::Blob blob3(a, 3 * sizeof(int)); 18 | BOOST_CHECK_EQUAL(blob3.size(), 3 * sizeof(int)); 19 | 20 | } 21 | 22 | BOOST_AUTO_TEST_CASE(blob_access_test) { 23 | multiverso::Blob blob(4); 24 | BOOST_CHECK_EQUAL(blob.size(), 4); 25 | 26 | const int value = 3; 27 | int* data = reinterpret_cast(blob.data()); 28 | *data = value; 29 | BOOST_CHECK_EQUAL(blob.As(), value); 30 | 31 | std::string str("hello, world!"); 32 | multiverso::Blob str_blob(str.c_str(), str.size()); 33 | BOOST_CHECK_EQUAL(str_blob[0], 'h'); 34 | BOOST_CHECK_EQUAL(str_blob[4], 'o'); 35 | } 36 | 37 | BOOST_AUTO_TEST_SUITE_END() 38 | 39 | } // namespace test 40 | } // namespace multiverso -------------------------------------------------------------------------------- /Test/unittests/test_kv.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "multiverso_env.h" 5 | 6 | namespace multiverso { 7 | namespace test { 8 | 9 | struct KVTableEnv : public MultiversoEnv { 10 | KVWorkerTable* table; 11 | 12 | KVTableEnv() : MultiversoEnv() { 13 | KVTableOption option; 14 | table = MV_CreateTable(option); 15 | } 16 | 17 | ~KVTableEnv() { 18 | delete table; 19 | table = nullptr; 20 | } 21 | }; 22 | 23 | BOOST_FIXTURE_TEST_SUITE(test_kv, KVTableEnv) 24 | 25 | BOOST_AUTO_TEST_CASE(access) { 26 | auto& map = table->raw(); 27 | table->Get(0); 28 | BOOST_CHECK_EQUAL(map[0], 0); 29 | 30 | table->Add(0, 3); 31 | 32 | table->Get(0); 33 | BOOST_CHECK_EQUAL(map[0], 3); 34 | 35 | table->Add(0, -4); 36 | 37 | table->Get(0); 38 | BOOST_CHECK_EQUAL(map[0], -1); 39 | } 40 | 41 | 42 | BOOST_AUTO_TEST_SUITE_END() 43 | 44 | } // namespace test 45 | } // namespace multiverso 46 | -------------------------------------------------------------------------------- /Test/unittests/test_message.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace multiverso { 5 | namespace test { 6 | 7 | BOOST_AUTO_TEST_SUITE(message) 8 | 9 | BOOST_AUTO_TEST_CASE(message_access) { 10 | multiverso::Message msg; 11 | BOOST_CHECK_EQUAL(msg.data().size(), 0); 12 | 13 | msg.set_msg_id(0); 14 | BOOST_CHECK_EQUAL(msg.msg_id(), 0); 15 | msg.set_src(1); 16 | BOOST_CHECK_EQUAL(msg.src(), 1); 17 | msg.set_dst(2); 18 | BOOST_CHECK_EQUAL(msg.dst(), 2); 19 | msg.set_table_id(3); 20 | BOOST_CHECK_EQUAL(msg.table_id(), 3); 21 | msg.set_type(MsgType::Request_Get); 22 | BOOST_CHECK_EQUAL(msg.type(), MsgType::Request_Get); 23 | 24 | BOOST_TEST_MESSAGE("before blob\n"); 25 | 26 | multiverso::Blob data; 27 | msg.Push(data); 28 | BOOST_CHECK_EQUAL(msg.size(), 1); 29 | 30 | 31 | std::vector vec_data; 32 | msg.set_data(vec_data); 33 | 34 | BOOST_CHECK_EQUAL(msg.size(), 0); 35 | 36 | MessagePtr reply_msg(msg.CreateReplyMessage()); 37 | BOOST_CHECK_EQUAL(reply_msg->src(), msg.dst()); 38 | BOOST_CHECK_EQUAL(reply_msg->dst(), msg.src()); 39 | BOOST_CHECK_EQUAL(reply_msg->type(), MsgType::Reply_Get); 40 | } 41 | 42 | BOOST_AUTO_TEST_SUITE_END() 43 | 44 | } // namespace test 45 | } // namespace multiverso -------------------------------------------------------------------------------- /Test/unittests/test_multiverso.cpp: -------------------------------------------------------------------------------- 1 | #ifndef _WIN32 2 | // Use dynamic library on Linux 3 | #define BOOST_TEST_DYN_LINK 4 | #endif 5 | 6 | #define BOOST_TEST_MODULE multiverso 7 | #include 8 | -------------------------------------------------------------------------------- /Test/unittests/test_node.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace multiverso { 5 | namespace test { 6 | 7 | BOOST_AUTO_TEST_SUITE(node) 8 | 9 | BOOST_AUTO_TEST_CASE(node_role) { 10 | BOOST_CHECK(!multiverso::node::is_worker(multiverso::Role::NONE)); 11 | BOOST_CHECK(multiverso::node::is_worker(multiverso::Role::WORKER)); 12 | BOOST_CHECK(!multiverso::node::is_worker(multiverso::Role::SERVER)); 13 | BOOST_CHECK(multiverso::node::is_worker(multiverso::Role::ALL)); 14 | 15 | BOOST_CHECK(!multiverso::node::is_server(multiverso::Role::NONE)); 16 | BOOST_CHECK(!multiverso::node::is_server(multiverso::Role::WORKER)); 17 | BOOST_CHECK(multiverso::node::is_server(multiverso::Role::SERVER)); 18 | BOOST_CHECK(multiverso::node::is_server(multiverso::Role::ALL)); 19 | } 20 | 21 | BOOST_AUTO_TEST_SUITE_END() 22 | 23 | } // namespace test 24 | } // namespace multiverso -------------------------------------------------------------------------------- /Test/unittests/test_sync.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "multiverso_env.h" 5 | 6 | namespace multiverso { 7 | namespace test { 8 | 9 | struct SyncArrayTableEnv : public SyncMultiversoEnv { 10 | ArrayWorker* table; 11 | 12 | SyncArrayTableEnv() : SyncMultiversoEnv() { 13 | ArrayTableOption option(10); 14 | table = MV_CreateTable(option); 15 | } 16 | 17 | ~SyncArrayTableEnv() { 18 | delete table; 19 | table = nullptr; 20 | } 21 | }; 22 | 23 | BOOST_FIXTURE_TEST_SUITE(test_sync, SyncArrayTableEnv) 24 | 25 | BOOST_AUTO_TEST_CASE(sync) { 26 | std::vector delta(10); 27 | std::vector model(10); 28 | for (int i = 0; i < 10; ++i) delta[i] = i; 29 | table->Add(delta.data(), delta.size()); 30 | table->Get(model.data(), model.size()); 31 | 32 | for (int i = 0; i < 10; ++i) { 33 | BOOST_CHECK_EQUAL(model[i], delta[i]); 34 | } 35 | 36 | table->AddAsync(delta.data(), delta.size()); 37 | int handle = table->GetAsync(model.data(), model.size()); 38 | table->Wait(handle); 39 | 40 | for (int i = 0; i < 10; ++i) { 41 | BOOST_CHECK_EQUAL(model[i], 2 * delta[i]); 42 | } 43 | } 44 | 45 | 46 | BOOST_AUTO_TEST_SUITE_END() 47 | 48 | } // namespace test 49 | } // namespace multiverso 50 | -------------------------------------------------------------------------------- /binding/C#/MultiversoCLR/AssemblyInfo.cpp: -------------------------------------------------------------------------------- 1 | using namespace System; 2 | using namespace System::Reflection; 3 | using namespace System::Runtime::CompilerServices; 4 | using namespace System::Runtime::InteropServices; 5 | using namespace System::Security::Permissions; 6 | 7 | // 8 | // General Information about an assembly is controlled through the following 9 | // set of attributes. Change these attribute values to modify the information 10 | // associated with an assembly. 11 | // 12 | [assembly:AssemblyTitleAttribute(L"MultiversoCLR")]; 13 | [assembly:AssemblyDescriptionAttribute(L"")]; 14 | [assembly:AssemblyConfigurationAttribute(L"")]; 15 | [assembly:AssemblyCompanyAttribute(L"")]; 16 | [assembly:AssemblyProductAttribute(L"MultiversoCLR")]; 17 | [assembly:AssemblyCopyrightAttribute(L"Copyright (c) 2016")]; 18 | [assembly:AssemblyTrademarkAttribute(L"")]; 19 | [assembly:AssemblyCultureAttribute(L"")]; 20 | 21 | // 22 | // Version information for an assembly consists of the following four values: 23 | // 24 | // Major Version 25 | // Minor Version 26 | // Build Number 27 | // Revision 28 | // 29 | // You can specify all the value or you can default the Revision and Build Numbers 30 | // by using the '*' as shown below: 31 | 32 | [assembly:AssemblyVersionAttribute("1.0.*")]; 33 | 34 | [assembly:ComVisible(false)]; 35 | 36 | [assembly:CLSCompliantAttribute(true)]; -------------------------------------------------------------------------------- /binding/C#/MultiversoCLR/MatrixTable.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | using namespace System; 7 | 8 | namespace MultiversoCLR { 9 | 10 | interface class IWorkerTable { 11 | public: 12 | static IWorkerTable^ CreateTable(int table_id, int num_rows, int num_cols, System::String^ type); 13 | void Get(int row_id, void* buffer, int size); 14 | void Get(void* buffer, int size); 15 | void Get(array^ row_ids, array^ buffers, int size); 16 | 17 | void Add(int row_id, void* buffer, int size); 18 | void Add(void* buffer, int size); 19 | void Add(array^ row_ids, array^ buffers, int size); 20 | }; 21 | 22 | template 23 | public ref class MatrixTable : public IWorkerTable { 24 | public: 25 | MatrixTable(int num_rows, int num_cols) { 26 | multiverso::MatrixTableOption option(num_rows, num_cols); 27 | table_ = multiverso::MV_CreateTable(option); 28 | } 29 | 30 | ~MatrixTable() { 31 | delete table_; 32 | } 33 | 34 | virtual void Get(void* buffer, int size) { 35 | table_->Get(static_cast(buffer), size); 36 | } 37 | 38 | virtual void Get(int row_id, void* buffer, int size) { 39 | table_->Get(row_id, static_cast(buffer), size); 40 | } 41 | 42 | virtual void Get(array^ row_ids, array^ buffers, int size) { 43 | std::vector row_id_vec(size); 44 | std::vector buffer_vec; 45 | pin_ptr p = &row_ids[0]; 46 | memcpy(row_id_vec.data(), p, size * sizeof(int)); 47 | for (int i = 0; i < size; ++i) { 48 | buffer_vec.push_back(static_cast(buffers[i])); 49 | } 50 | table_->Get(row_id_vec, buffer_vec, size); 51 | } 52 | 53 | virtual void Add(int row_id, void* buffer, int size) { 54 | table_->Add(row_id, static_cast(buffer), size); 55 | } 56 | 57 | virtual void Add(void* buffer, int size) { 58 | table_->Add(static_cast(buffer), size); 59 | } 60 | 61 | virtual void Add(array^ row_ids, array^ buffers, int size) { 62 | std::vector row_id_vec(size); 63 | std::vector buffer_vec; 64 | pin_ptr p = &row_ids[0]; 65 | memcpy(row_id_vec.data(), p, size * sizeof(int)); 66 | for (int i = 0; i < size; ++i) { 67 | buffer_vec.push_back(static_cast(buffers[i])); 68 | } 69 | table_->Add(row_id_vec, buffer_vec, size); 70 | } 71 | private: 72 | multiverso::MatrixWorkerTable* table_; 73 | }; 74 | 75 | IWorkerTable^ IWorkerTable::CreateTable(int, int num_rows, int num_cols, System::String^ type) { 76 | if (type->Equals("Int")) return gcnew MatrixTable(num_rows, num_cols); 77 | if (type->Equals("Float")) return gcnew MatrixTable(num_rows, num_cols); 78 | if (type->Equals("Double")) return gcnew MatrixTable(num_rows, num_cols); 79 | throw gcnew Exception("Element Type " + type + " not implemented"); 80 | } 81 | } -------------------------------------------------------------------------------- /binding/C#/MultiversoCLR/MultiversoCLR.h: -------------------------------------------------------------------------------- 1 | // MultiversoCLR.h 2 | 3 | #pragma once 4 | 5 | // #include "ITable.h" 6 | #include "MatrixTable.h" 7 | 8 | using namespace System; 9 | 10 | namespace MultiversoCLR { 11 | 12 | public ref class MultiversoWrapper 13 | { 14 | public: 15 | static bool NetBind(int rank, System::String^ endpoint); 16 | static bool NetConnect(array^ ranks, array^ endpoints); 17 | static void NetFinalize(); 18 | 19 | static void Init(int num_tables, bool sync); 20 | static void Shutdown(); 21 | 22 | static void CreateTables(array^ rows, array^ cols, array^ eleTypes); 23 | static void CreateTable(int table_id, int rows, int cols, System::String^ eleType); 24 | 25 | static int Rank(); 26 | static int Size(); 27 | static void Barrier(); 28 | 29 | generic 30 | static void Get(int table_id, array^ p_value); 31 | 32 | generic 33 | static void Get(int table_id, int row_id, array^ p_value); 34 | 35 | generic 36 | static void Add(int table_id, array^ p_update); 37 | 38 | generic 39 | static void Add(int table_id, int row_id, array^ p_value); 40 | 41 | private: 42 | static void Init(); 43 | static void CreateWorkerTable(int table_id, int rows, int cols, System::String^ eleType); 44 | static array^ worker_tables_; 45 | }; 46 | } 47 | -------------------------------------------------------------------------------- /binding/C#/MultiversoCLR/MultiversoCLR.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | 14 | 15 | Header Files 16 | 17 | 18 | Header Files 19 | 20 | 21 | 22 | 23 | Source Files 24 | 25 | 26 | Source Files 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /binding/C#/MultiversoCLR/ReadMe.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/C#/MultiversoCLR/ReadMe.txt -------------------------------------------------------------------------------- /binding/C#/MultiversoCLR/multiverso.snk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/C#/MultiversoCLR/multiverso.snk -------------------------------------------------------------------------------- /binding/C#/NuGet/GenerateNugetPackage.ps1: -------------------------------------------------------------------------------- 1 | copy ../../../x64/Release/MultiversoCLR.dll . 2 | nuget pack MultiversoCLR.nuspec -------------------------------------------------------------------------------- /binding/C#/NuGet/MultiversoCLR.nuspec: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | MultiversoCLR 5 | 1.0.1 6 | feiga 7 | feiga 8 | http://github.com/Microsoft/multiverso/blob/master/LICENSE 9 | http://github.com/Microsoft/multiverso 10 | http://ICON_URL_HERE_OR_DELETE_THIS_LINE 11 | false 12 | MultiversoCLR 13 | Copyright 2016 14 | Multiverso 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /binding/lua/.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Lua sources 2 | luac.out 3 | 4 | # luarocks build files 5 | *.src.rock 6 | *.zip 7 | *.tar.gz 8 | 9 | # Object files 10 | *.o 11 | *.os 12 | *.ko 13 | *.obj 14 | *.elf 15 | 16 | # Precompiled Headers 17 | *.gch 18 | *.pch 19 | 20 | # Libraries 21 | *.lib 22 | *.a 23 | *.la 24 | *.lo 25 | *.def 26 | *.exp 27 | 28 | # Shared objects (inc. Windows DLLs) 29 | *.dll 30 | *.so 31 | *.so.* 32 | *.dylib 33 | 34 | # Executables 35 | *.exe 36 | *.out 37 | *.app 38 | *.i*86 39 | *.x86_64 40 | *.hex 41 | 42 | -------------------------------------------------------------------------------- /binding/lua/ArrayTableHandler.lua: -------------------------------------------------------------------------------- 1 | local ffi = require 'ffi' 2 | local util = require('multiverso.util') 3 | 4 | local tbh = torch.class('ArrayTableHanlder') 5 | 6 | ffi.cdef[[ 7 | void MV_NewArrayTable(int size, TableHandler* out); 8 | void MV_GetArrayTable(TableHandler handler, float* data, int size); 9 | void MV_AddArrayTable(TableHandler handler, float* data, int size); 10 | void MV_AddAsyncArrayTable(TableHandler handler, float* data, int size); 11 | ]] 12 | 13 | function tbh:new(size, init_value) 14 | tbh = {} 15 | size = size or 0 16 | setmetatable(tbh, self) 17 | self.__index = self 18 | tbh._handler = ffi.new("TableHandler[1]") 19 | tbh._size = ffi.new("int", size) 20 | libmv.MV_NewArrayTable( 21 | tbh._size, 22 | tbh._handler 23 | ) 24 | local init = require 'multiverso.init' 25 | if init_value ~= nil then 26 | init_value = init_value:float() 27 | -- sync add is used because we want to make sure that the initial value 28 | -- has taken effect when the call returns. No matter whether it is 29 | -- master worker, we should call add to make sure it works in sync 30 | -- mode 31 | if init.worker_id() == 0 then 32 | self.add(tbh, init_value, true) 33 | else 34 | self.add(tbh, init_value:clone():zero(), true) 35 | end 36 | end 37 | return tbh 38 | end 39 | 40 | function tbh:get() 41 | cdata = ffi.new("float[?]", self._size) 42 | libmv.MV_GetArrayTable(self._handler[0], cdata, self._size) 43 | return util.cdata2tensor(cdata, tonumber(self._size)) 44 | end 45 | 46 | function tbh:add(data, sync) 47 | sync = sync or false 48 | cdata = util.tensor2cdata(data) 49 | if sync then 50 | libmv.MV_AddArrayTable(self._handler[0], cdata, self._size) 51 | else 52 | libmv.MV_AddAsyncArrayTable(self._handler[0], cdata, self._size) 53 | end 54 | end 55 | 56 | return tbh 57 | -------------------------------------------------------------------------------- /binding/lua/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR) 2 | CMAKE_POLICY(VERSION 2.6) 3 | IF(LUAROCKS_PREFIX) 4 | MESSAGE(STATUS "Installing Torch through Luarocks") 5 | STRING(REGEX REPLACE "(.*)lib/luarocks/rocks.*" "\\1" CMAKE_INSTALL_PREFIX "${LUAROCKS_PREFIX}") 6 | MESSAGE(STATUS "Prefix inferred from Luarocks: ${CMAKE_INSTALL_PREFIX}") 7 | ENDIF() 8 | FIND_PACKAGE(Torch REQUIRED) 9 | 10 | FILE(GLOB luasrc *.lua) 11 | SET(luasrc ${luasrc}) 12 | ADD_TORCH_PACKAGE(multiverso "" "${luasrc}" "Multiverso Torch Binding Package") 13 | -------------------------------------------------------------------------------- /binding/lua/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install test 2 | install: 3 | luarocks make 4 | test: 5 | luajit test.lua 6 | -------------------------------------------------------------------------------- /binding/lua/README.md: -------------------------------------------------------------------------------- 1 | # Multiverso Torch/Lua Binding 2 | 3 | ## Introduction 4 | Multiverso is a parameter server framework for distributed machine learning. 5 | This package can enable parallel training of torch program over multiple machines and GPUs. 6 | 7 | ## Requirements 8 | Build multiverso successfully by following the [README > build](https://github.com/Microsoft/multiverso/blob/master/README.md#build). 9 | 10 | ## Installation 11 | 12 | **NOTE**: Before installation, you need to make sure have `libmultiverso.so` 13 | built successfully according to [Requirements](#requirements). 14 | 15 | ``` 16 | make install 17 | ``` 18 | or 19 | ``` 20 | luarocks make 21 | ``` 22 | 23 | ## Unit Tests 24 | ``` 25 | make test 26 | ``` 27 | or 28 | 29 | ``` 30 | luajit test.lua 31 | ``` 32 | 33 | ## Documentation 34 | 35 | - [Tutorial](https://github.com/Microsoft/multiverso/wiki/Integrate-multiverso-into-torch-project) 36 | - [API](https://github.com/Microsoft/multiverso/wiki/Multiverso-Torch-Binding-API) 37 | - [Benchmark](https://github.com/Microsoft/multiverso/wiki/Multiverso-Torch-Binding-Benchmark) 38 | -------------------------------------------------------------------------------- /binding/lua/demos/xor/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: raw multiverso 2 | raw: 3 | th xor.lua 4 | multiverso: 5 | th xor-multiverso.lua 6 | -------------------------------------------------------------------------------- /binding/lua/demos/xor/README.md: -------------------------------------------------------------------------------- 1 | # XOR demo for multiverso. 2 | 3 | The train example is referred from 4 | https://github.com/torch/nn/blob/master/doc/training.md 5 | 6 | There are two versions, where `xor.lua` is the raw version and 7 | `xor-multiverso.lua` is the multiverso version. 8 | 9 | Comments have been add to the every modification in `xor-multiverso.lua` that is 10 | needed to make it run on multiverso. 11 | 12 | ## Run the raw version 13 | ``` 14 | make raw 15 | ``` 16 | or 17 | ``` 18 | th xor.lua 19 | ``` 20 | 21 | ## Run the multiverso version 22 | ``` 23 | make multiverso 24 | ``` 25 | or 26 | ``` 27 | th xor-multiverso.lua 28 | ``` 29 | -------------------------------------------------------------------------------- /binding/lua/demos/xor/xor-multiverso.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- The multiverso version train example referring from 3 | -- https://github.com/torch/nn/blob/master/doc/training.md 4 | -- 5 | 6 | require 'nn' 7 | 8 | -- Load multiverso. 9 | local multiverso = require 'multiverso' 10 | 11 | -- Init multiverso. 12 | multiverso.init(false) 13 | 14 | -- Get some useful parameters from multiverso. 15 | -- 1) The total number of workers. 16 | multiverso.num_workers = multiverso.num_workers() 17 | -- 2) The id for current worker. 18 | multiverso.worker_id = multiverso.worker_id() 19 | -- 3) Easy access to check whether this is master worker. 20 | multiverso.is_master = multiverso.worker_id == 0 21 | 22 | local model = nn.Sequential() 23 | local inputs = 2 24 | local outputs = 1 25 | local HUs = 20 26 | model:add(nn.Linear(inputs, HUs)) 27 | model:add(nn.Tanh()) 28 | model:add(nn.Linear(HUs, outputs)) 29 | 30 | local criterion = nn.MSECriterion() 31 | 32 | local batchSize = 128 33 | local batchInputs = torch.Tensor(batchSize, inputs) 34 | local batchLabels = torch.DoubleTensor(batchSize) 35 | 36 | for i=1,batchSize do 37 | local input = torch.randn(2) 38 | local label = 1 39 | if input[1]*input[2]>0 then 40 | label = -1; 41 | end 42 | batchInputs[i]:copy(input) 43 | batchLabels[i] = label 44 | end 45 | 46 | local params, gradParams = model:getParameters() 47 | 48 | -- Create ArrayTableHandler for syncing parameters. 49 | local tbh = multiverso.ArrayTableHandler:new(params:size(1), params) 50 | -- Wait for finishing the initializing phase. 51 | multiverso.barrier() 52 | -- Get the initial model from the server. 53 | params:copy(tbh:get()) 54 | 55 | for epoch=1,1000 do 56 | model:zeroGradParameters() 57 | local outputs = model:forward(batchInputs) 58 | local loss = criterion:forward(outputs, batchLabels) 59 | local dloss_doutput = criterion:backward(outputs, batchLabels) 60 | model:backward(batchInputs, dloss_doutput) 61 | 62 | -- Sync parameters: 63 | -- 1) Add the gradients (delta value) to the server. 64 | tbh:add(-0.01 * gradParams) 65 | -- 2) (Optional) Sync all workers after each epoch. 66 | multiverso.barrier() 67 | -- 3) Fetch the newest value from the server. 68 | params:copy(tbh:get()) 69 | 70 | -- Print should also only exist in master worker. 71 | if multiverso.is_master then 72 | print(epoch) 73 | end 74 | end 75 | 76 | -- Only test in master worker. 77 | if multiverso.is_master then 78 | local x = torch.Tensor({ 79 | {0.5, 0.5}, 80 | {0.5, -0.5}, 81 | {-0.5, 0.5}, 82 | {-0.5, -0.5} 83 | }) 84 | print(model:forward(x)) 85 | end 86 | 87 | -- Remember to shutdown at last. 88 | multiverso.shutdown() 89 | -------------------------------------------------------------------------------- /binding/lua/demos/xor/xor.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- The train example referring from 3 | -- https://github.com/torch/nn/blob/master/doc/training.md 4 | -- 5 | 6 | require 'nn' 7 | 8 | local model = nn.Sequential() 9 | local inputs = 2 10 | local outputs = 1 11 | local HUs = 20 12 | model:add(nn.Linear(inputs, HUs)) 13 | model:add(nn.Tanh()) 14 | model:add(nn.Linear(HUs, outputs)) 15 | 16 | local criterion = nn.MSECriterion() 17 | 18 | local batchSize = 128 19 | local batchInputs = torch.Tensor(batchSize, inputs) 20 | local batchLabels = torch.DoubleTensor(batchSize) 21 | 22 | for i=1,batchSize do 23 | local input = torch.randn(2) 24 | local label = 1 25 | if input[1]*input[2]>0 then 26 | label = -1; 27 | end 28 | batchInputs[i]:copy(input) 29 | batchLabels[i] = label 30 | end 31 | 32 | local params, gradParams = model:getParameters() 33 | 34 | for epoch=1,2000 do 35 | model:zeroGradParameters() 36 | local outputs = model:forward(batchInputs) 37 | local loss = criterion:forward(outputs, batchLabels) 38 | local dloss_doutput = criterion:backward(outputs, batchLabels) 39 | model:backward(batchInputs, dloss_doutput) 40 | model:updateParameters(0.01) 41 | end 42 | 43 | local x = torch.Tensor({ 44 | {0.5, 0.5}, 45 | {0.5, -0.5}, 46 | {-0.5, 0.5}, 47 | {-0.5, -0.5} 48 | }) 49 | print(model:forward(x)) 50 | -------------------------------------------------------------------------------- /binding/lua/docs/BENCHMARK.md: -------------------------------------------------------------------------------- 1 | # Multiverso Torch Binding Benchmark 2 | 3 | ## Task Description 4 | 5 | Perform CIFAR-10 classification with torch resnet implementation. 6 | 7 | ## Codebase 8 | 9 | [Microsoft/fb.resnet.torch multiverso branch](https://github.com/Microsoft/fb.resnet.torch/tree/multiverso) 10 | 11 | ## Setup 12 | Please follow [this guide](https://github.com/Microsoft/multiverso/wiki/Multiverso-Torch-Lua-Binding) to setup your environment. 13 | 14 | ## Hardware 15 | 16 | - **Hosts** : 1 17 | - **GPU** : Tesla K40m * 8 18 | - **CPU** : Intel(R) Xeon(R) CPU E5-2680 v2 @ 2.80GHz 19 | - **Memory** : 251GB 20 | 21 | ## Common settings 22 | 23 | - depth 32 24 | - nEpochs 164 25 | - learningRate 0.1(epoch <= 80), 0.01(81 <= epoch <= 121), 0.001(121 <= epoch) 26 | 27 | ## Clarification for multiverso settings 28 | 29 | - The train data is divided evenly to each worker. 30 | - Master strategy is used to warm up the initial model. 31 | - Workers sync after each batch and has a barrier after each epoch. 32 | 33 | ## Results 34 | 35 | | Code Name | #Process(es) | #GPU(s) per Process | Use multiverso | Batch size | Initial learning rate | Seconds per epoch | Best Model | 36 | | :-------: | :----------: | :-----------------: | :------------: | :--------: | :-------------------: | :---------------: | :--------: | 37 | | 1P1G0M | 1 | 1 | 0 | 128 | 0.1 | 55.57 | 92.435 % | 38 | | 1P8G0M | 1 | 8 | 0 | 128 | 0.1 | 28.38 | 92.464 % | 39 | | 8P1G1M | 8 | 1 | 1 | 64 | 0.05 | 11.37 | 92.449 % | 40 | 41 | ![top1error_vs_epoch](https://raw.githubusercontent.com/Microsoft/multiverso/master/binding/lua/docs/imgs/top1error_vs_epoch.png) 42 | ![top1error_vs_runningtime](https://raw.githubusercontent.com/Microsoft/multiverso/master/binding/lua/docs/imgs/top1error_vs_runningtime.png) 43 | -------------------------------------------------------------------------------- /binding/lua/docs/imgs/top1error_vs_epoch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/lua/docs/imgs/top1error_vs_epoch.png -------------------------------------------------------------------------------- /binding/lua/docs/imgs/top1error_vs_runningtime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/lua/docs/imgs/top1error_vs_runningtime.png -------------------------------------------------------------------------------- /binding/lua/docs/imgs/top5error_vs_epoch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/lua/docs/imgs/top5error_vs_epoch.png -------------------------------------------------------------------------------- /binding/lua/docs/imgs/top5error_vs_runningtime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/lua/docs/imgs/top5error_vs_runningtime.png -------------------------------------------------------------------------------- /binding/lua/init.lua: -------------------------------------------------------------------------------- 1 | require 'torch' 2 | local ffi = require 'ffi' 3 | local util = require 'multiverso.util' 4 | 5 | local mv = {} 6 | 7 | ffi.cdef[[ 8 | typedef void* TableHandler; 9 | void MV_Init(int* argc, char* argv[]); 10 | void MV_ShutDown(); 11 | void MV_Barrier(); 12 | int MV_NumWorkers(); 13 | int MV_WorkerId(); 14 | int MV_ServerId(); 15 | ]] 16 | 17 | package.cpath = '/usr/local/lib/?.so;' .. package.cpath 18 | libmv_path = package.searchpath('libmultiverso', package.cpath, '') 19 | if libmv_path == nil then 20 | print([[ 21 | [Error] Multiverso shared object, `libmultiverso.so`, NOT FOUND! 22 | Please build & install `multiverso` according to the instruction [1]. 23 | [1] https://github.com/Microsoft/multiverso#build]]) 24 | return 25 | end 26 | libmv = ffi.load(libmv_path, 'true') 27 | 28 | mv.ArrayTableHandler = require('multiverso.ArrayTableHandler') 29 | mv.MatrixTableHandler = require('multiverso.MatrixTableHandler') 30 | 31 | function mv.init(sync) 32 | sync = sync or false -- false for the default value of sync 33 | -- the first argument will be ignored. So we put a placeholder here 34 | args = {""} 35 | if sync then 36 | table.insert(args, "-sync=true") 37 | end 38 | argc = ffi.new("int[1]", #args) 39 | argv = ffi.new("char*[?]", #args) 40 | for i = 1, #args do 41 | argv[i - 1] = ffi.new("char[1]") 42 | ffi.copy(argv[i - 1], args[i]) 43 | end 44 | libmv.MV_Init(argc, argv) 45 | end 46 | 47 | function mv.barrier() 48 | libmv.MV_Barrier() 49 | end 50 | 51 | function mv.shutdown() 52 | libmv.MV_ShutDown() 53 | end 54 | 55 | function mv.num_workers() 56 | return libmv.MV_NumWorkers() 57 | end 58 | 59 | function mv.worker_id() 60 | return libmv.MV_WorkerId() 61 | end 62 | 63 | function mv.server_id() 64 | return libmv.MV_ServerId() 65 | end 66 | 67 | return mv 68 | -------------------------------------------------------------------------------- /binding/lua/multiverso-scm-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "multiverso" 2 | version = "scm-1" 3 | 4 | source = { 5 | url = "https://github.com/Microsoft/multiverso" 6 | } 7 | 8 | description = { 9 | summary = "Torch binding for multiverso.", 10 | detailed = [[ 11 | Multiverso is a parameter server framework for distributed machine 12 | learning. This package can leverage multiple machines and GPUs to 13 | speed up the torch programs. 14 | ]], 15 | homepage = "http://www.dmtk.io", 16 | license = "MIT" 17 | } 18 | 19 | dependencies = { 20 | "torch >= 7.0" 21 | } 22 | 23 | build = { 24 | type = "command", 25 | build_command = [[ 26 | cmake -E make_directory build; 27 | cd build; 28 | cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)"; 29 | $(MAKE) 30 | ]], 31 | install_command = [[ 32 | cd build && $(MAKE) install; 33 | ]] 34 | } 35 | -------------------------------------------------------------------------------- /binding/lua/test.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env lua 2 | 3 | require 'torch' 4 | 5 | mv = require('multiverso') 6 | 7 | local mv_test = torch.TestSuite() 8 | local mv_tester = torch.Tester() 9 | 10 | function Set(list) 11 | local set = {} 12 | for _, l in ipairs(list) do set[l] = true end 13 | return set 14 | end 15 | 16 | function mv_test.testArray() 17 | size = 100000 18 | tbh = mv.ArrayTableHandler:new(size) 19 | mv.barrier() 20 | 21 | for i = 1, 1000 do 22 | print(tbh:get()[{{1, 10}}]) 23 | tbh:add(torch.range(1, size)) 24 | tbh:add(torch.range(1, size)) 25 | mv.barrier() 26 | end 27 | end 28 | 29 | function mv_test.testMatrix() 30 | num_row = 11 31 | num_col = 10 32 | size = num_row * num_col 33 | num_workers = mv.num_workers() 34 | tbh = mv.MatrixTableHandler:new(num_row, num_col) 35 | mv.barrier() 36 | 37 | for i = 1, 20 do 38 | row_ids = {0, 1, 5, 10} 39 | row_ids_set = Set(row_ids) 40 | tbh:add(torch.range(1, size)) 41 | data = torch.range( 42 | row_ids[1] * num_col + 1, 43 | row_ids[1] * num_col + num_col 44 | ) 45 | for j = 2, #row_ids do 46 | row_id = row_ids[j] 47 | data = torch.cat(data, torch.range( 48 | row_id * num_col + 1, 49 | row_id * num_col + num_col 50 | )) 51 | end 52 | tbh:add(data, row_ids) 53 | mv.barrier() 54 | data = tbh:get() 55 | mv.barrier() 56 | for j = 1, data:size(1) do 57 | for k = 1, data:size(2) do 58 | expected = ((j - 1) * num_col + k) * i * num_workers 59 | if row_ids_set[j - 1] then 60 | expected = expected + ((j - 1) * num_col + k) * i * num_workers 61 | end 62 | mv_tester:eq(expected, data[j][k]) 63 | end 64 | end 65 | data = tbh:get(row_ids) 66 | mv.barrier() 67 | for j = 1, data:size(1) do 68 | for k = 1, data:size(2) do 69 | expected = (row_ids[j] * num_col + k) * i * num_workers * 2 70 | mv_tester:eq(expected, data[j][k]) 71 | end 72 | end 73 | end 74 | end 75 | 76 | mv.init() 77 | mv_tester:add(mv_test) 78 | mv_tester:run() 79 | mv.shutdown() 80 | -------------------------------------------------------------------------------- /binding/lua/util.lua: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env lua 2 | 3 | util = {} 4 | 5 | ffi = require('ffi') 6 | 7 | util.tensor_type = { 8 | ['unsigned char'] = 'torch.ByteTensor', 9 | ['char'] = 'torch.CharTensor', 10 | ['short'] = 'torch.ShortTensor', 11 | ['int'] = 'torch.IntTensor', 12 | ['long'] = 'torch.LongTensor', 13 | ['float'] ='torch.FloatTensor', 14 | ['double'] = 'torch.DoubleTensor' 15 | } 16 | 17 | function util.tensor2cdata(data, data_type) 18 | if type(data) == 'table' then 19 | data = torch.Tensor(data) 20 | end 21 | data_type = data_type or 'float' 22 | tensor_type = util.tensor_type[data_type] 23 | return data:contiguous():type(tensor_type):data() 24 | end 25 | 26 | function util.cdata2tensor(cdata, sizes, data_type) 27 | data_type = data_type or 'float' 28 | tensor_type = util.tensor_type[data_type] 29 | data = torch.Tensor(sizes):type(tensor_type) 30 | ffi.copy(data:data(), cdata, data:nElement() * ffi.sizeof(data_type)) 31 | return data 32 | end 33 | 34 | return util 35 | -------------------------------------------------------------------------------- /binding/python/README.md: -------------------------------------------------------------------------------- 1 | # Multiverso Python/Theano/Lasagne Binding 2 | 3 | 4 | ## Introduction 5 | Multiverso is a parameter server framework for distributed machine learning. This package can leverage multiple machines and GPUs to speed up the python programs. 6 | 7 | 8 | ## Installation 9 | 10 | 1. (For GPU support only) Install CUDA, cuDNN according to this [guide](https://github.com/Microsoft/fb.resnet.torch/blob/multiverso/INSTALL.md). You just need finish the steps before [Install Torch](https://github.com/Microsoft/fb.resnet.torch/blob/multiverso/INSTALL.md#install-torch). 11 | 1. Install the multiverso 12 | * On linux: Please follow the [README](https://github.com/Microsoft/multiverso/blob/master/README.md#build) to build and install multiverso. 13 | * On windows: You need MSBuild.exe installed and make sure your system can find it in the $PATH. Then you should run [build_dll.bat](https://github.com/Microsoft/multiverso/blob/master/src/build_dll.bat) to build the .dll file and install the .dll. There isn't auto-installer for windows now, so you have to copy the .dll to either system $PATH or the multiverso package folder. 14 | 1. Install the requirements 15 | * `gfortran` is required by scipy. e.g. you can install it by `sudo apt-get install gfortran` on ubuntu. 16 | * (Optional) You need python-nose to run the unit tests. e.g. you can install it by `sudo apt-get install python-nose` on ubuntu. 17 | 1. Install python binding with the command `sudo python setup.py install` 18 | 19 | 20 | ## Run Unit Tests 21 | ``` 22 | nosetests 23 | ``` 24 | 25 | 26 | ## Documentation 27 | * [Tutorial](https://github.com/Microsoft/multiverso/wiki/How-to-write-python-code-with-multiverso) 28 | * Api documents are written as docstrings in the python source code. 29 | * [Benchmark](https://github.com/Microsoft/multiverso/wiki/Multiverso-Python-Binding-Benchmark) 30 | -------------------------------------------------------------------------------- /binding/python/docs/BENCHMARK.md: -------------------------------------------------------------------------------- 1 | # Multiverso Python Binding Benchmark 2 | 3 | ## Task Description 4 | Perform CIFAR-10 classification with residual networks implementation based on Lasagne. 5 | 6 | ## Codebase 7 | [Deep_Residual_Learning_CIFAR-10](https://github.com/Microsoft/multiverso/blob/master/binding/python/examples/theano/lasagne/Deep_Residual_Learning_CIFAR-10.py) 8 | 9 | ## Setup 10 | Please follow [this guide](https://github.com/Microsoft/multiverso/wiki/Multiverso-Python-Theano-Lasagne-Binding) to setup your environment. 11 | 12 | ## Hardware 13 | ||| 14 | | -------- |:--------:| 15 | |Hosts|1| 16 | |GPU|Tesla K40m * 8| 17 | |CPU|Intel(R) Xeon(R) CPU E5-2680 v2 @ 2.80GHz| 18 | |Memory| 251GB | 19 | 20 | 21 | ## Theano settings 22 | Configuration of `~/.theanorc` 23 | ``` 24 | [global] 25 | device = gpu 26 | floatX = float32 27 | 28 | [cuda] 29 | root = /usr/local/cuda-7.5/ 30 | 31 | [lib] 32 | cnmem = 1 33 | ``` 34 | 35 | ## About the Model 36 | ||| 37 | | :---- | -----: | 38 | |Total epoch|82| 39 | |Batch size|128| 40 | |Depth|32| 41 | |Learning rate change schedule|Initialized as 0.1, Changed to 0.01 from epoch 41, to 0.001 from epoch 61| 42 | |number of parameters in model| 464,154| 43 | 44 | 45 | Clarification 46 | - An epoch represents all the processes divide all the data equally and go through them once together. 47 | - A barrier is used at the end of each epoch. 48 | - This experiment doesn't use warm start in ASGD. 49 | - The time to load the data is not considered in the time of the experiment. 50 | 51 | 52 | # The results 53 | The results of 3 experiments with different configurations are shown as following. 54 | 55 | |Short Name | # Process(es) | #GPU(s) per Process | Use multiverso | Batch size | Initial learning rate | Seconds per epoch | Best model validation accuracy | 56 | | :---- | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | 57 | | 1P1G0M | 1 | 1 | 0 | 128 | 0.1 | 175.4 | 92.69 % | 58 | | 1P1G1M | 1 | 1 | 1 | 128 | 0.1 | 194.4 | 92.53 % | 59 | | 8P1G1M | 8 | 1 | 1 | 64 | 0.05 | 34.1 | 92.11 % | 60 | 61 | ![accuracy_epoch](https://raw.githubusercontent.com/Microsoft/multiverso/master/binding/python/docs/imgs/accuracy_epoch.png) 62 | ![accuracy_time](https://raw.githubusercontent.com/Microsoft/multiverso/master/binding/python/docs/imgs/accuracy_time.png) 63 | -------------------------------------------------------------------------------- /binding/python/docs/imgs/accuracy_epoch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/docs/imgs/accuracy_epoch.png -------------------------------------------------------------------------------- /binding/python/docs/imgs/accuracy_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/docs/imgs/accuracy_time.png -------------------------------------------------------------------------------- /binding/python/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/examples/__init__.py -------------------------------------------------------------------------------- /binding/python/examples/theano/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/examples/theano/__init__.py -------------------------------------------------------------------------------- /binding/python/examples/theano/keras/README.md: -------------------------------------------------------------------------------- 1 | # Keras example 2 | 3 | [addition_rnn_mv.py](./addition_rnn_mv.py) is adapted from 4 | [a keras official example](https://github.com/fchollet/keras/blob/master/examples/addition_rnn.py). 5 | 6 | 7 | It will demonstrate how to use multiverso in keras. 8 | 9 | For example, you can train it with two GPUs with such command. 10 | ``` 11 | mpirun -np 2 python addition_rnn_mv.py 12 | ``` 13 | 14 | It will reach `val_acc: 0.99+` much earlier than training with only one GPU. 15 | -------------------------------------------------------------------------------- /binding/python/examples/theano/lasagne/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: 2P1G1M0.1LR128B1S 4P1G1M0.1LR128B0S 4P1G1M0.1LR128B1S 4P1G1M0.05LR64B0S 4P1G1M0.05LR64B1S 8P1G1M0.1LR128B0S 8P1G1M0.1LR128B1S 8P1G1M0.05LR64B0S 8P1G1M0.05LR64B1S 2 | 3 | 1P1G1M0.1LR128B1S: 4 | mpirun -np 1 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.1 -b 128 -s True 5 | 6 | 4P1G1M0.1LR128B0S: 7 | mpirun -np 4 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.1 -b 128 -s False 8 | 4P1G1M0.1LR128B1S: 9 | mpirun -np 4 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.1 -b 128 -s True 10 | 11 | 4P1G1M0.05LR64B0S: 12 | mpirun -np 4 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.05 -b 64 -s False 13 | 4P1G1M0.05LR64B1S: 14 | mpirun -np 4 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.05 -b 64 -s True 15 | 16 | 8P1G1M0.1LR128B0S: 17 | mpirun -np 8 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.1 -b 128 -s False 18 | 8P1G1M0.1LR128B1S: 19 | mpirun -np 8 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.1 -b 128 -s True 20 | 21 | 8P1G1M0.05LR64B0S: 22 | mpirun -np 8 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.05 -b 64 -s False 23 | 8P1G1M0.05LR64B1S: 24 | mpirun -np 8 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.05 -b 64 -s True 25 | 26 | -------------------------------------------------------------------------------- /binding/python/examples/theano/lasagne/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/examples/theano/lasagne/__init__.py -------------------------------------------------------------------------------- /binding/python/examples/theano/load_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf8 3 | 4 | import cPickle 5 | import os 6 | import sys 7 | CUR_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) 8 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))) 9 | CIFAR10_DIR = os.path.abspath(os.path.join(CUR_DIR, os.path.pardir, 'data', 'cifar-10-batches-py')) 10 | 11 | 12 | import numpy as np 13 | 14 | 15 | def load_cifar10(data_dir=CIFAR10_DIR): 16 | ''' 17 | we assume these files are in data_dir: 18 | batches.meta data_batch_1 data_batch_2 data_batch_3 data_batch_4 19 | data_batch_5 readme.html test_batch 20 | 21 | You can download the data from 22 | https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz 23 | 24 | The RGB values are scaled to [0., 1.]. 25 | ''' 26 | x_train_l = [] 27 | t_train_l = [] 28 | 29 | for i in xrange(1, 6): 30 | filename = os.path.join(data_dir, "data_batch_%d" % i) 31 | with open(filename, "rb") as f: 32 | data_obj = cPickle.load(f) 33 | x_train_l.append(data_obj["data"]) 34 | t_train_l.extend(data_obj["labels"]) 35 | x_train = np.concatenate(x_train_l, axis=0) / 255. 36 | 37 | t_train = np.zeros((x_train.shape[0], 10)) 38 | for i, cls in enumerate(t_train_l): 39 | t_train[i, cls] = 1 40 | 41 | with open(os.path.join(data_dir, "test_batch")) as f: 42 | data_obj = cPickle.load(f) 43 | x_test = data_obj["data"] / 255. 44 | t_test_l = data_obj["labels"] 45 | 46 | t_test = np.zeros((x_test.shape[0], 10)) 47 | for i, cls in enumerate(t_test_l): 48 | t_test[i, cls] = 1 49 | return x_train, t_train, x_test, t_test 50 | -------------------------------------------------------------------------------- /binding/python/multiverso/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf8 3 | 4 | from .api import init, shutdown, barrier, workers_num, worker_id, server_id, is_master_worker 5 | from .tables import ArrayTableHandler, MatrixTableHandler 6 | -------------------------------------------------------------------------------- /binding/python/multiverso/api.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf8 3 | 4 | import ctypes 5 | from .utils import Loader 6 | import numpy as np 7 | 8 | 9 | mv_lib = Loader.get_lib() 10 | 11 | 12 | def init(sync=False): 13 | '''Initialize mutliverso. 14 | 15 | This should be called only once before training at the beginning of the 16 | whole project. 17 | If sync is True, a sync server will be created. Otherwise an async server 18 | will be created. 19 | If a sync server is created, you **must** make sure every process call 20 | `add` and `get` in the same order and for the same times. Otherwise some 21 | processes will be blocked. In sync server mode, all `get` method will 22 | return **exactly the same results**. 23 | If a async server is created, there won't be limitations like a sync 24 | server. But we can't make sure `get` method will return the same results. 25 | If you want to get the same results in async server mode, you should use 26 | `barrier` and `get` with the argument `sync` set to `True` to sync the 27 | processes. 28 | ''' 29 | args = [b""] # the first argument will be ignored. So we put a placeholder here 30 | if sync: 31 | args.append(b"-sync=true") 32 | n = len(args) 33 | args_type = ctypes.c_char_p * n 34 | mv_lib.MV_Init(ctypes.pointer(ctypes.c_int(n)), args_type(*[ctypes.c_char_p(arg) for arg in args])) 35 | 36 | 37 | def shutdown(): 38 | '''Set a barrier for all workers to wait. 39 | 40 | Workers will wait until all workers reach a specific barrier. 41 | ''' 42 | mv_lib.MV_ShutDown() 43 | 44 | 45 | def barrier(): 46 | '''Shutdown multiverso. 47 | 48 | This should be called only once after finishing training at the end of the 49 | whole project. 50 | ''' 51 | mv_lib.MV_Barrier() 52 | 53 | 54 | def workers_num(): 55 | '''Return the total number of workers.''' 56 | return mv_lib.MV_NumWorkers() 57 | 58 | 59 | def worker_id(): 60 | '''Return the id (zero-based index) for current worker.''' 61 | return mv_lib.MV_WorkerId() 62 | 63 | 64 | def server_id(): 65 | return mv_lib.MV_ServerId() 66 | 67 | 68 | def is_master_worker(): 69 | '''If the worker is master worker 70 | 71 | Some things only need one worker process, such as validation, outputing the 72 | result, initializing the parameters and so on. So we mark the worker 0 as 73 | the master worker to finish these things. 74 | ''' 75 | return worker_id() == 0 76 | -------------------------------------------------------------------------------- /binding/python/multiverso/theano_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/multiverso/theano_ext/__init__.py -------------------------------------------------------------------------------- /binding/python/multiverso/theano_ext/keras_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/multiverso/theano_ext/keras_ext/__init__.py -------------------------------------------------------------------------------- /binding/python/multiverso/theano_ext/keras_ext/callbacks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf8 3 | 4 | from keras.callbacks import Callback 5 | from param_manager import KerasParamManager 6 | 7 | 8 | class MVCallback(Callback): 9 | ''' 10 | Please use MVCallback as a callback of keras model.fit function 11 | For e.g. 12 | ``` 13 | model.fit(X_train, Y_train, 14 | batch_size=batch_size, 15 | nb_epoch=nb_epoch, 16 | validation_data=(X_test, Y_test), 17 | shuffle=True, 18 | callbacks=[mvcallback(model, freq=1)]) 19 | ``` 20 | ''' 21 | def __init__(self, model, freq=1): 22 | '''Initialize the MVCallback class 23 | 24 | The `model` should be the be a keras model 25 | The `freq` should be the update frequency of the parameters. For 26 | example, `freq=3` means update the parameters every 3 mini-batch. 27 | ''' 28 | super(MVCallback, self).__init__() 29 | self.kpm = KerasParamManager(model) 30 | self.cur_n = 0 31 | if freq < 0: 32 | raise ValueError("Frequency must be an integer greater than 0.") 33 | self.freq = freq 34 | 35 | def on_batch_end(self, batch, logs={}): 36 | '''sync all parameters at the end of every batch''' 37 | self.cur_n = (self.cur_n + 1) % self.freq 38 | if self.cur_n % self.freq == 0: 39 | self.kpm.sync_all_param() 40 | -------------------------------------------------------------------------------- /binding/python/multiverso/theano_ext/keras_ext/param_manager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ..param_manager import MVModelParamManager 4 | 5 | 6 | class KerasParamManager(MVModelParamManager): 7 | ''' 8 | KerasParamManager is manager to make managing and synchronizing the 9 | variables in keras more easily 10 | ''' 11 | 12 | def get_all_param_values(self): 13 | return self.model.get_weights() 14 | 15 | def set_all_param_values(self, params): 16 | self.model.set_weights(params) 17 | -------------------------------------------------------------------------------- /binding/python/multiverso/theano_ext/lasagne_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/multiverso/theano_ext/lasagne_ext/__init__.py -------------------------------------------------------------------------------- /binding/python/multiverso/theano_ext/lasagne_ext/param_manager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf8 3 | 4 | import lasagne 5 | from ..param_manager import MVModelParamManager 6 | 7 | 8 | class LasagneParamManager(MVModelParamManager): 9 | ''' 10 | LasagneParamManager is manager to make managing and synchronizing the 11 | variables in lasagne more easily 12 | ''' 13 | 14 | def get_all_param_values(self): 15 | return lasagne.layers.get_all_param_values(self.model) 16 | 17 | def set_all_param_values(self, params): 18 | lasagne.layers.set_all_param_values(self.model, params) 19 | -------------------------------------------------------------------------------- /binding/python/multiverso/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf8 3 | 4 | from __future__ import print_function 5 | 6 | import ctypes 7 | import os 8 | import platform 9 | from ctypes.util import find_library 10 | import numpy as np 11 | 12 | PACKAGE_PATH = os.path.abspath(os.path.dirname(__file__)) 13 | 14 | 15 | class Loader(object): 16 | ''' 17 | This loader is responsible for loading multiverso dynamic library in both 18 | *nux and windows 19 | ''' 20 | 21 | LIB = None 22 | 23 | @classmethod 24 | def _find_mv_path(cls): 25 | if platform.system() == "Windows": 26 | mv_lib_path = find_library("Multiverso") 27 | if mv_lib_path is None: 28 | print("* Fail to load Multiverso.dll from the windows $PATH."\ 29 | "Because Multiverso.dll can not be found in the $PATH "\ 30 | "directories. Go on loading Multiverso from the package.") 31 | else: 32 | return mv_lib_path 33 | 34 | mv_lib_path = os.path.join(PACKAGE_PATH, "Multiverso.dll") 35 | if not os.path.exists(mv_lib_path): 36 | print("* Fail to load Multiverso.dll from the package. Because"\ 37 | " the file " + mv_lib_path + " can not be found.") 38 | else: 39 | return mv_lib_path 40 | else: 41 | mv_lib_path = find_library("multiverso") 42 | if mv_lib_path is None: 43 | print("* Fail to load libmultiverso.so from the system"\ 44 | "libraries. Because libmultiverso.so can't be found in"\ 45 | "library paths. Go on loading Multiverso from the package.") 46 | else: 47 | return mv_lib_path 48 | 49 | mv_lib_path = os.path.join(PACKAGE_PATH, "libmultiverso.so") 50 | if not os.path.exists(mv_lib_path): 51 | print("* Fail to load libmultiverso.so from the package. Because"\ 52 | " the file " + mv_lib_path + " can not be found.") 53 | else: 54 | return mv_lib_path 55 | return None 56 | 57 | @classmethod 58 | def load_lib(cls): 59 | mv_lib_path = cls._find_mv_path() 60 | if mv_lib_path is None: 61 | print("Fail to load the multiverso library. Please make sure you"\ 62 | " have installed multiverso successfully") 63 | else: 64 | print("Find the multiverso library successfully(%s)" % mv_lib_path) 65 | return ctypes.cdll.LoadLibrary(mv_lib_path) 66 | 67 | @classmethod 68 | def get_lib(cls): 69 | if not cls.LIB: 70 | cls.LIB = cls.load_lib() 71 | cls.LIB.MV_NumWorkers.restype = ctypes.c_int 72 | return cls.LIB 73 | 74 | 75 | def convert_data(data): 76 | '''convert the data to float32 ndarray''' 77 | if not isinstance(data, np.ndarray): 78 | data = np.array(data) 79 | return data.astype(np.float32) 80 | -------------------------------------------------------------------------------- /binding/python/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os 3 | 4 | 5 | def readme(): 6 | with open('README.md') as f: 7 | return f.read() 8 | 9 | 10 | setup(name='multiverso-python', 11 | version='0.0.1', 12 | long_description=readme(), 13 | description="Multiverso is a parameter server framework for distributed" 14 | " machine learning. This package can leverage multiple machines and GPUs" 15 | " to speed up the python programs.", 16 | url='https://github.com/Microsoft/multiverso', 17 | author='Microsoft', 18 | license='MIT', 19 | packages=['multiverso', 'multiverso.theano_ext', 'multiverso.theano_ext.lasagne_ext'], 20 | # TODO: The lasagne on pypi is too old. multiverso need some functions in 21 | # lasagne-0.2 which is not released yet. Please replace the dev version 22 | # with the stable release later. 23 | dependency_links = ['https://github.com/Lasagne/Lasagne/tarball/master#egg=lasagne-0.2.dev1'], 24 | install_requires=["theano>=0.8.2", "lasagne>=0.2.dev1"], 25 | classifiers=[ 26 | "Intended Audience :: Developers", 27 | "Intended Audience :: Science/Research", 28 | "Programming Language :: Python :: 2", 29 | ], 30 | zip_safe=False) 31 | -------------------------------------------------------------------------------- /cmake_uninstall.cmake.in: -------------------------------------------------------------------------------- 1 | if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 2 | message(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 3 | endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 4 | 5 | file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) 6 | string(REGEX REPLACE "\n" ";" files "${files}") 7 | foreach(file ${files}) 8 | message(STATUS "Uninstalling $ENV{DESTDIR}${file}") 9 | if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 10 | exec_program( 11 | "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" 12 | OUTPUT_VARIABLE rm_out 13 | RETURN_VALUE rm_retval 14 | ) 15 | if(NOT "${rm_retval}" STREQUAL 0) 16 | message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}") 17 | endif(NOT "${rm_retval}" STREQUAL 0) 18 | else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 19 | message(STATUS "File $ENV{DESTDIR}${file} does not exist.") 20 | endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}") 21 | endforeach(file) 22 | -------------------------------------------------------------------------------- /include/multiverso/actor.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_ACTOR_H_ 2 | #define MULTIVERSO_ACTOR_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "multiverso/message.h" 10 | 11 | namespace std { class thread; } 12 | 13 | namespace multiverso { 14 | 15 | template class MtQueue; 16 | 17 | // The basic computation and communication unit in the system 18 | class Actor { 19 | public: 20 | explicit Actor(const std::string& name); 21 | virtual ~Actor(); 22 | // Start to run the Actor 23 | void Start(); 24 | // Stop to run the Actor 25 | void Stop(); 26 | // Accept a message from other actors 27 | void Receive(MessagePtr&); 28 | // Actor name, a unique identifier of a actor 29 | const std::string name() const { return name_; } 30 | 31 | protected: 32 | // Message response function 33 | using Handler = std::function; 34 | // Register message handler function 35 | void RegisterHandler(const MsgType& type, const Handler& task) { 36 | handlers_.insert({ type, task }); 37 | } 38 | // Send a message to a dst actor 39 | void SendTo(const std::string& dst_name, MessagePtr& msg); 40 | 41 | // Main function run in a background thread 42 | // The default main is to receive msg from other actors and process 43 | // messages based on registered message handlers 44 | virtual void Main(); 45 | 46 | // message queue 47 | std::unique_ptr > mailbox_; 48 | // message handlers function 49 | std::unordered_map handlers_; 50 | bool is_working_; 51 | 52 | private: 53 | std::string name_; 54 | std::unique_ptr thread_; 55 | 56 | Actor(const Actor&) = delete; 57 | void operator=(const Actor&) = delete; 58 | }; 59 | 60 | namespace actor { 61 | 62 | const std::string kCommunicator = "communicator"; 63 | const std::string kController = "controller"; 64 | const std::string kServer = "server"; 65 | const std::string kWorker = "worker"; 66 | 67 | } 68 | 69 | } // namespace multiverso 70 | 71 | #endif // MULTIVERSO_ACTOR_H_ 72 | -------------------------------------------------------------------------------- /include/multiverso/blob.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_BLOB_H_ 2 | #define MULTIVERSO_BLOB_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace multiverso { 10 | 11 | // Manage a chunk of memory. Blob can share memory with other Blobs. 12 | // Never use external memory. All external memory should be managed by itself 13 | class Blob { 14 | public: 15 | // an empty blob 16 | Blob() : data_(nullptr), size_(0) {} 17 | 18 | explicit Blob(size_t size); 19 | 20 | // Construct from external memory. Will copy a new piece 21 | Blob(const void* data, size_t size); 22 | 23 | Blob(void* data, size_t size); 24 | 25 | Blob(const Blob& rhs); 26 | 27 | ~Blob(); 28 | 29 | // Shallow copy by default. Call \ref CopyFrom for a deep copy 30 | void operator=(const Blob& rhs); 31 | 32 | inline char operator[](size_t i) const { 33 | return data_[i]; 34 | } 35 | 36 | template 37 | inline T& As(size_t i = 0) const { 38 | return (reinterpret_cast(data_))[i]; 39 | } 40 | template 41 | inline size_t size() const { return size_ / sizeof(T); } 42 | 43 | // DeepCopy, for a shallow copy, use operator= 44 | void CopyFrom(const Blob& src); 45 | 46 | inline char* data() const { return data_; } 47 | inline size_t size() const { return size_; } 48 | 49 | private: 50 | // Memory is shared and auto managed 51 | char *data_; 52 | size_t size_; 53 | }; 54 | 55 | } // namespace multiverso 56 | 57 | #endif // MULTIVERSO_BLOB_H_ 58 | -------------------------------------------------------------------------------- /include/multiverso/c_api.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_C_API_H_ 2 | #define MULTIVERSO_C_API_H_ 3 | 4 | #if defined _WIN32 5 | #define DllExport __declspec(dllexport) 6 | #else 7 | #define DllExport 8 | #endif 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | typedef void* TableHandler; 15 | 16 | DllExport void MV_Init(int* argc, char* argv[]); 17 | 18 | DllExport void MV_ShutDown(); 19 | 20 | DllExport void MV_Barrier(); 21 | 22 | DllExport int MV_NumWorkers(); 23 | 24 | DllExport int MV_WorkerId(); 25 | 26 | DllExport int MV_ServerId(); 27 | 28 | // Array Table 29 | DllExport void MV_NewArrayTable(int size, TableHandler* out); 30 | 31 | DllExport void MV_GetArrayTable(TableHandler handler, float* data, int size); 32 | 33 | DllExport void MV_AddArrayTable(TableHandler handler, float* data, int size); 34 | 35 | DllExport void MV_AddAsyncArrayTable(TableHandler handler, float* data, int size); 36 | 37 | 38 | // Matrix Table 39 | DllExport void MV_NewMatrixTable(int num_row, int num_col, TableHandler* out); 40 | 41 | DllExport void MV_GetMatrixTableAll(TableHandler handler, float* data, int size); 42 | 43 | DllExport void MV_AddMatrixTableAll(TableHandler handler, float* data, int size); 44 | 45 | DllExport void MV_AddAsyncMatrixTableAll(TableHandler handler, float* data, int size); 46 | 47 | DllExport void MV_GetMatrixTableByRows(TableHandler handler, float* data, 48 | int size, int row_ids[], int row_ids_n); 49 | 50 | DllExport void MV_AddMatrixTableByRows(TableHandler handler, float* data, 51 | int size, int row_ids[], int row_ids_n); 52 | 53 | DllExport void MV_AddAsyncMatrixTableByRows(TableHandler handler, float* data, 54 | int size, int row_ids[], int row_ids_n); 55 | 56 | #ifdef __cplusplus 57 | } // end extern "C" 58 | #endif 59 | 60 | #endif // MULTIVERSO_C_API_H_ 61 | -------------------------------------------------------------------------------- /include/multiverso/communicator.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_COMMUNICATION_H_ 2 | #define MULTIVERSO_COMMUNICATION_H_ 3 | 4 | #include "multiverso/actor.h" 5 | #include "multiverso/message.h" 6 | 7 | namespace multiverso { 8 | 9 | class NetInterface; 10 | 11 | class Communicator : public Actor { 12 | public: 13 | Communicator(); 14 | ~Communicator(); 15 | 16 | private: 17 | void Main() override; 18 | // Process message received from other actors, either send to other nodes, or 19 | // forward to local actors. 20 | void ProcessMessage(MessagePtr& msg); 21 | // Thread function to receive messages from other nodes 22 | void Communicate(); 23 | // Forward to other actors in the same node 24 | void LocalForward(MessagePtr& msg); 25 | 26 | NetInterface* net_util_; 27 | std::unique_ptr recv_thread_; 28 | }; 29 | 30 | } // namespace multiverso 31 | 32 | #endif // MULTIVERSO_COMMUNICATION_H_ 33 | -------------------------------------------------------------------------------- /include/multiverso/controller.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_CONTROLLER_H_ 2 | #define MULTIVERSO_CONTROLLER_H_ 3 | 4 | #include "multiverso/actor.h" 5 | #include "multiverso/message.h" 6 | 7 | namespace multiverso { 8 | 9 | class Controller : public Actor { 10 | public: 11 | Controller(); 12 | ~Controller(); 13 | 14 | private: 15 | void ProcessBarrier(MessagePtr& msg); 16 | void ProcessRegister(MessagePtr& msg); 17 | 18 | class RegisterController; 19 | RegisterController* register_controller_; 20 | class BarrierController; 21 | BarrierController* barrier_controller_; 22 | }; 23 | 24 | } // namespace multiverso 25 | 26 | #endif // MULTIVERSO_CONTROLLER_H_ 27 | -------------------------------------------------------------------------------- /include/multiverso/dashboard.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_DASHBOARD_H_ 2 | #define MULTIVERSO_DASHBOARD_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "multiverso/util/timer.h" 9 | 10 | namespace multiverso { 11 | 12 | class Monitor; 13 | 14 | // Dashboard to record and query system running information 15 | // thread safe 16 | class Dashboard { 17 | public: 18 | static void AddMonitor(const std::string& name, Monitor* monitor); 19 | static void RemoveMonitor(const std::string& name); 20 | static std::string Watch(const std::string& name); 21 | static void Display(); 22 | private: 23 | static std::map record_; 24 | static std::mutex m_; 25 | }; 26 | 27 | class Monitor { 28 | public: 29 | explicit Monitor(const std::string& name) { 30 | name_ = name; 31 | timer_.Start(); 32 | Dashboard::AddMonitor(name_, this); 33 | } 34 | 35 | void Begin() { timer_.Start(); } 36 | 37 | void End() { 38 | elapse_ += timer_.elapse(); 39 | ++count_; 40 | } 41 | 42 | double average() const { return elapse_ / count_; } 43 | 44 | std::string name() const { return name_; } 45 | double elapse() const { return elapse_; } 46 | int count() const { return count_; } 47 | 48 | std::string info_string() const; 49 | 50 | private: 51 | // name of the Monitor 52 | std::string name_; 53 | // total elapsed time 54 | double elapse_; 55 | // count of monitor 56 | int count_; 57 | // a timer util 58 | Timer timer_; 59 | }; 60 | 61 | #define REGISTER_MONITOR(name) \ 62 | static Monitor g_##name##_monitor(#name); 63 | 64 | // Guard with MONITOR macro in the code to monitor it's execution 65 | // Usage: 66 | // MONITOR_BEGIN(your_code_short_description) 67 | // your code 68 | // MONITOR_END(your_code_short_description) 69 | #define MONITOR_BEGIN(name) \ 70 | REGISTER_MONITOR(name) \ 71 | g_##name##_monitor.Begin(); 72 | 73 | #define MONITOR_END(name) \ 74 | g_##name##_monitor.End(); 75 | 76 | 77 | } // namespace multiverso 78 | 79 | #endif // MULTIVERSO_DASHBOARD_H_ 80 | -------------------------------------------------------------------------------- /include/multiverso/io/hdfs_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_HDFS_FILE_SYS_H_ 2 | #define MULTIVERSO_HDFS_FILE_SYS_H_ 3 | 4 | #ifdef MULTIVERSO_USE_HDFS 5 | 6 | /*! 7 | * \file local_file_sys.h 8 | * \brief The implement of hdfs io interface. 9 | */ 10 | 11 | #include "multiverso/util/io.h" 12 | #include "hdfs.h" 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | 23 | namespace multiverso { 24 | 25 | class HDFSStream : public Stream { 26 | public: 27 | HDFSStream(hdfsFS fs, const URI& uri, FileOpenMode mode); 28 | 29 | virtual ~HDFSStream(void) override; 30 | 31 | /*! 32 | * \brief write data to a file 33 | * \param buf pointer to a memory buffer 34 | * \param size data size 35 | */ 36 | virtual void Write(const void *buf, size_t size) override; 37 | 38 | 39 | /*! 40 | * \brief read data from Stream 41 | * \param buf pointer to a memory buffer 42 | * \param size the size of buf 43 | */ 44 | virtual size_t Read(void *buf, size_t size) override; 45 | 46 | virtual bool Good() override; 47 | 48 | private: 49 | bool is_good_; 50 | hdfsFS fs_; 51 | hdfsFile fp_; 52 | std::string path_; 53 | std::string mode_; 54 | }; 55 | 56 | class HDFSStreamFactory : public StreamFactory { 57 | public: 58 | explicit HDFSStreamFactory(const std::string& host); 59 | virtual ~HDFSStreamFactory(void) override; 60 | 61 | /*! 62 | * \brief create a Stream 63 | * \param path the path of the file 64 | * \param mode "w" - create an empty file to store data; 65 | * "a" - open the file to append data to it 66 | * "r" - open the file to read 67 | * \return the Stream which is used to write or read data 68 | */ 69 | virtual Stream* Open(const URI& uri, 70 | FileOpenMode mode) override; 71 | 72 | virtual void Close() override; 73 | 74 | private: 75 | std::string namenode_; 76 | hdfsFS fs_; 77 | }; 78 | 79 | } 80 | 81 | #endif 82 | 83 | #endif // MULTIVERSO_HDFS_FILE_SYS_H_ 84 | -------------------------------------------------------------------------------- /include/multiverso/io/local_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_LOCAL_FILE_SYS_H_ 2 | #define MULTIVERSO_LOCAL_FILE_SYS_H_ 3 | 4 | /*! 5 | * \file local_file_sys.h 6 | * \brief the implement of local io interface. 7 | */ 8 | 9 | #include "multiverso/io/io.h" 10 | 11 | namespace multiverso 12 | { 13 | class LocalStream : public Stream 14 | { 15 | public: 16 | LocalStream(const URI& uri, FileOpenMode mode); 17 | virtual ~LocalStream(void) override; 18 | 19 | /*! 20 | * \brief write data to a file 21 | * \param buf pointer to a memory buffer 22 | * \param size data size 23 | */ 24 | virtual void Write(const void *buf, size_t size) override; 25 | 26 | /*! 27 | * \brief read data from Stream 28 | * \param buf pointer to a memory buffer 29 | * \param size the size of buf 30 | */ 31 | virtual size_t Read(void *buf, size_t size) override; 32 | 33 | virtual bool Good() override; 34 | 35 | private: 36 | bool is_good_; 37 | FILE *fp_; 38 | std::string path_; 39 | }; 40 | 41 | class LocalStreamFactory : public StreamFactory 42 | { 43 | public: 44 | LocalStreamFactory(const std::string& host); 45 | ~LocalStreamFactory(void) override; 46 | 47 | /*! 48 | * \brief create a Stream 49 | * \param path the path of the file 50 | * \param mode "w" - create an empty file to store data; 51 | * "a" - open the file to append data to it 52 | * "r" - open the file to read 53 | * \return the Stream which is used to write or read data 54 | */ 55 | virtual Stream* Open(const URI& uri, 56 | FileOpenMode mode) override; 57 | 58 | virtual void Close() override; 59 | 60 | private: 61 | std::string host_; 62 | }; 63 | } 64 | 65 | #endif // MULTIVERSO_LOCAL_FILE_SYS_H_ -------------------------------------------------------------------------------- /include/multiverso/message.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_MESSAGE_H_ 2 | #define MULTIVERSO_MESSAGE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "multiverso/blob.h" 10 | 11 | namespace multiverso { 12 | 13 | enum MsgType { 14 | Request_Get = 1, 15 | Request_Add = 2, 16 | Reply_Get = -1, 17 | Reply_Add = -2, 18 | Server_Finish_Train = 31, 19 | Control_Barrier = 33, // 0x100001 20 | Control_Reply_Barrier = -33, 21 | Control_Register = 34, 22 | Control_Reply_Register = -34, 23 | Default = 0 24 | }; 25 | 26 | class Message { 27 | public: 28 | MsgType type() const { return static_cast(header_[2]); } 29 | inline int src() const { return header_[0]; } 30 | inline int dst() const { return header_[1]; } 31 | inline int table_id() const { return header_[3]; } 32 | inline int msg_id() const { return header_[4]; } 33 | 34 | inline void set_type(MsgType type) { header_[2] = static_cast(type); } 35 | inline void set_src(int src) { header_[0] = src; } 36 | inline void set_dst(int dst) { header_[1] = dst; } 37 | inline void set_table_id(int table_id) { header_[3] = table_id; } 38 | inline void set_msg_id(int msg_id) { header_[4] = msg_id; } 39 | 40 | inline void set_data(const std::vector& data) { 41 | data_ = std::move(data); } 42 | inline std::vector& data() { return data_; } 43 | inline size_t size() const { return data_.size(); } 44 | 45 | inline int* header() { return header_; } 46 | inline const int* header() const { return header_; } 47 | static const int kHeaderSize = 8 * sizeof(int); 48 | 49 | // Create a Message with only headers 50 | // The src/dst, type is opposite with src message 51 | inline Message* CreateReplyMessage() { 52 | Message* reply = new Message(); 53 | reply->set_dst(this->src()); 54 | reply->set_src(this->dst()); 55 | reply->set_type(static_cast(-header_[2])); 56 | reply->set_table_id(this->table_id()); 57 | reply->set_msg_id(this->msg_id()); 58 | return reply; 59 | } 60 | 61 | inline void Push(const Blob& blob) { data_.push_back(blob); } 62 | 63 | private: 64 | int header_[8]; 65 | std::vector data_; 66 | }; 67 | 68 | typedef std::unique_ptr MessagePtr; 69 | 70 | } // namespace multiverso 71 | 72 | #endif // MULTIVERSO_MESSAGE_H_ 73 | -------------------------------------------------------------------------------- /include/multiverso/multiverso.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_INCLUDE_MULTIVERSO_H_ 2 | #define MULTIVERSO_INCLUDE_MULTIVERSO_H_ 3 | 4 | #include 5 | #include "table_factory.h" 6 | 7 | namespace multiverso { 8 | 9 | void MV_Init(int* argc = nullptr, char* argv[] = nullptr); 10 | 11 | void MV_Barrier(); 12 | 13 | void MV_ShutDown(bool finalize_net = true); 14 | 15 | int MV_Rank(); 16 | int MV_Size(); 17 | 18 | int MV_NumWorkers(); 19 | int MV_NumServers(); 20 | 21 | int MV_WorkerId(); 22 | int MV_ServerId(); 23 | 24 | int MV_WorkerIdToRank(int worker_id); 25 | int MV_ServerIdToRank(int server_id); 26 | 27 | template 28 | void MV_SetFlag(const std::string& name, const T& value); 29 | 30 | 31 | // create server table and worker table 32 | // \param option for table initiate 33 | // \return worker table pointer if this node is worker 34 | // otherwise return nullptr 35 | template 36 | typename TableOptionType::WorkerTableType* 37 | MV_CreateTable(const TableOptionType& option) { 38 | auto table = table_factory::CreateTable(option); 39 | Zoo::Get()->Barrier(); 40 | return table; 41 | } 42 | 43 | // inplace sum by allreduce 44 | template 45 | void MV_Aggregate(ElemType* data, int size); 46 | 47 | // --- Net API -------------------------------------------------------------- // 48 | // NOTE(feiga): these API is only used for specific situation. 49 | // Init Multiverso Net with the provided endpoint. Multiverso Net will bind 50 | // the provided endpoint and use this endpoint to listen and recv message 51 | // \param rank the rank of this MV process 52 | // \param endpoint endpoint with format ip:port, e.g., localhost:9999 53 | // \return 0 SUCCESS 54 | // \return -1 FAIL 55 | int MV_NetBind(int rank, char* endpoint); 56 | 57 | // Connect Multiverso Net with other processes in the system. Multiverso Net 58 | // will connect these endpoints and send msgs 59 | // \param ranks array of rank 60 | // \param endpoints endpoints for each rank 61 | // \param size size of the array 62 | // \return 0 SUCCESS 63 | // \return -1 FAIL 64 | int MV_NetConnect(int* rank, char* endpoint[], int size); 65 | void MV_NetFinalize(); 66 | 67 | } // namespace multiverso 68 | 69 | #endif // MULTIVERSO_INCLUDE_MULTIVERSO_H_ 70 | 71 | -------------------------------------------------------------------------------- /include/multiverso/net.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_NET_NET_H_ 2 | #define MULTIVERSO_NET_NET_H_ 3 | 4 | #include 5 | #include "multiverso/message.h" 6 | 7 | namespace multiverso { 8 | 9 | enum NetThreadLevel { 10 | THREAD_SERIALIZED, 11 | THREAD_MULTIPLE 12 | }; 13 | 14 | // Interface of inter process communication method 15 | class NetInterface { 16 | public: 17 | static NetInterface* Get(); 18 | 19 | virtual void Init(int* argc = nullptr, char** argv = nullptr) = 0; 20 | 21 | virtual void Finalize() = 0; 22 | 23 | // Bind with a specific endpoint 24 | virtual int Bind(int rank, char* endpoint) = 0; 25 | // Connect with other endpoints 26 | virtual int Connect(int* rank, char* endpoints[], int size) = 0; 27 | 28 | virtual bool active() const = 0; 29 | 30 | virtual std::string name() const = 0; 31 | virtual int size() const = 0; 32 | virtual int rank() const = 0; 33 | 34 | // \return 1. > 0 sent size 2. = 0 not sent 3. < 0 net error 35 | virtual int Send(MessagePtr& msg) = 0; 36 | 37 | // \return 1. > 0 received size 2. = 0 not received 3. < 0 net error 38 | virtual int Recv(MessagePtr* msg) = 0; 39 | 40 | // Blocking, send raw data to rank 41 | virtual void SendTo(int rank, char* buf, int len) const = 0; 42 | // Blocking, receive raw data from rank 43 | virtual void RecvFrom(int rank, char* buf, int len) const = 0; 44 | // Blocking, send and recv at same time 45 | virtual void SendRecv(int send_rank, char* send_buf, int send_len, 46 | int recv_rank, char* recv_buf, int recv_len) const = 0; 47 | 48 | virtual int thread_level_support() = 0; 49 | }; 50 | 51 | namespace net { 52 | 53 | // inplace allreduce 54 | template 55 | void Allreduce(Typename* data, size_t elem_count); 56 | 57 | } 58 | 59 | } // namespace multiverso 60 | 61 | #endif // MULTIVERSO_NET_NET_H_ 62 | -------------------------------------------------------------------------------- /include/multiverso/node.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_NODE_H_ 2 | #define MULTIVERSO_NODE_H_ 3 | 4 | namespace multiverso { 5 | 6 | enum Role { 7 | NONE = 0, 8 | WORKER = 1, 9 | SERVER = 2, 10 | ALL = 3 11 | }; 12 | 13 | struct Node { 14 | int rank; 15 | int role; 16 | int worker_id; 17 | int server_id; 18 | 19 | Node(); 20 | }; 21 | 22 | namespace node { 23 | 24 | bool is_worker(int role); 25 | bool is_server(int role); 26 | 27 | } // namespace node 28 | 29 | } // namespace multiverso 30 | 31 | #endif // MULTIVERSO_NODE_H_ 32 | -------------------------------------------------------------------------------- /include/multiverso/server.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_SERVER_H_ 2 | #define MULTIVERSO_SERVER_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "multiverso/actor.h" 8 | 9 | namespace multiverso { 10 | 11 | class ServerTable; 12 | 13 | class Server : public Actor { 14 | public: 15 | Server(); 16 | static Server* GetServer(); 17 | int RegisterTable(ServerTable* table); 18 | 19 | protected: 20 | virtual void ProcessGet(MessagePtr& msg); 21 | virtual void ProcessAdd(MessagePtr& msg); 22 | 23 | std::vector store_; 24 | }; 25 | 26 | } // namespace multiverso 27 | 28 | #endif // MULTIVERSO_SERVER_H_ 29 | -------------------------------------------------------------------------------- /include/multiverso/table/array_table.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_ARRAY_TABLE_H_ 2 | #define MULTIVERSO_ARRAY_TABLE_H_ 3 | 4 | #include "multiverso/multiverso.h" 5 | #include "multiverso/table_interface.h" 6 | #include "multiverso/util/log.h" 7 | 8 | namespace multiverso { 9 | 10 | template 11 | struct ArrayTableOption; 12 | 13 | template 14 | class ArrayWorker : public WorkerTable { 15 | public: 16 | explicit ArrayWorker(size_t size); 17 | explicit ArrayWorker(const ArrayTableOption &option); 18 | // std::vector& raw() { return table_; } 19 | 20 | // Get all element, data is user-allocated memory, Blocking IO 21 | void Get(T* data, size_t size); 22 | // Non-blocking IO 23 | int GetAsync(T* data, size_t size); 24 | 25 | // Add all element 26 | void Add(T* data, size_t size, const AddOption* option = nullptr); 27 | int AddAsync(T* data, size_t, const AddOption* option = nullptr); 28 | 29 | int Partition(const std::vector& kv, 30 | MsgType partition_type, 31 | std::unordered_map >* out) override; 32 | 33 | void ProcessReplyGet(std::vector& reply_data) override; 34 | 35 | private: 36 | T* data_; // not owned 37 | size_t size_; 38 | int num_server_; 39 | std::vector server_offsets_; 40 | }; 41 | 42 | template 43 | class Updater; 44 | 45 | // The storage is a continuous large chunk of memory 46 | template 47 | class ArrayServer : public ServerTable { 48 | public: 49 | explicit ArrayServer(size_t size); 50 | explicit ArrayServer(const ArrayTableOption &option); 51 | 52 | void ProcessAdd(const std::vector& data) override; 53 | 54 | void ProcessGet(const std::vector& data, 55 | std::vector* result) override; 56 | 57 | void Store(Stream* s) override; 58 | void Load(Stream* s) override; 59 | 60 | private: 61 | int32_t server_id_; 62 | std::vector storage_; 63 | Updater* updater_; 64 | size_t size_; // number of element with type T 65 | 66 | }; 67 | 68 | template 69 | struct ArrayTableOption { 70 | explicit ArrayTableOption(size_t s) : size(s) {} 71 | size_t size; 72 | DEFINE_TABLE_TYPE(T, ArrayWorker, ArrayServer); 73 | }; 74 | 75 | } 76 | 77 | #endif // MULTIVERSO_ARRAY_TABLE_H_ 78 | -------------------------------------------------------------------------------- /include/multiverso/table/sparse_matrix_table.h: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Microsoft 2 | #ifndef INCLUDE_MULTIVERSO_TABLE_SPARSE_MATRIX_TABLE_H_ 3 | #define INCLUDE_MULTIVERSO_TABLE_SPARSE_MATRIX_TABLE_H_ 4 | 5 | #include 6 | #include 7 | #include "multiverso/multiverso.h" 8 | #include "multiverso/table_interface.h" 9 | #include "multiverso/util/log.h" 10 | #include "multiverso/table/matrix_table.h" 11 | 12 | namespace multiverso { 13 | 14 | template 15 | class SparseMatrixWorkerTable : public MatrixWorkerTable { 16 | public: 17 | SparseMatrixWorkerTable(integer_t num_row, integer_t num_col) 18 | : MatrixWorkerTable(num_row, num_col) { } 19 | int Partition(const std::vector& kv, 20 | MsgType partition_type, 21 | std::unordered_map>* out) override; 22 | void ProcessReplyGet(std::vector& reply_data) override; 23 | 24 | // get whole table, data is user-allocated memory 25 | void Get(T* data, size_t size, 26 | const GetOption* option = nullptr); 27 | 28 | // data is user-allocated memory 29 | void Get(integer_t row_id, T* data, size_t size, 30 | const GetOption* option = nullptr); 31 | 32 | void Get(const std::vector& row_ids, 33 | const std::vector& data_vec, size_t size, 34 | const GetOption* option = nullptr); 35 | 36 | private: 37 | // get whole table, data is user-allocated memory 38 | void Get(T* data, size_t size) = delete; 39 | 40 | // data is user-allocated memory 41 | void Get(integer_t row_id, T* data, size_t size) = delete; 42 | 43 | void Get(const std::vector& row_ids, 44 | const std::vector& data_vec, size_t size) = delete; 45 | }; 46 | 47 | template 48 | class Updater; 49 | template 50 | class SparseMatrixServerTable : public MatrixServerTable { 51 | public: 52 | SparseMatrixServerTable(integer_t num_row, integer_t num_col, bool using_pipeline); 53 | ~SparseMatrixServerTable(); 54 | void ProcessAdd(const std::vector& data) override; 55 | void ProcessGet(const std::vector& data, 56 | std::vector* result) override; 57 | private: 58 | void UpdateAddState(int worker_id, Blob keys); 59 | void UpdateGetState(int worker_id, integer_t* keys, size_t key_size, 60 | std::vector* out_rows); 61 | integer_t GetLogicalRow(integer_t local_row_id) { 62 | return this->row_offset_ + local_row_id; 63 | } 64 | integer_t GetPhysicalRow(integer_t global_row_id) { 65 | return global_row_id - this->row_offset_; 66 | } 67 | private: 68 | bool** up_to_date_; 69 | int workers_nums_; 70 | // std::vector> up_to_date_; 71 | }; 72 | 73 | } // namespace multiverso 74 | #endif // INCLUDE_MULTIVERSO_TABLE_SPARSE_MATRIX_TABLE_H_ 75 | -------------------------------------------------------------------------------- /include/multiverso/table_factory.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_TABLE_FACTORY_H_ 2 | #define MULTIVERSO_TABLE_FACTORY_H_ 3 | 4 | #include "multiverso/table_interface.h" 5 | #include "multiverso/zoo.h" 6 | 7 | #include 8 | 9 | namespace multiverso { 10 | 11 | namespace table_factory { 12 | 13 | void FreeServerTables(); 14 | void PushServerTable(ServerTable*table); 15 | 16 | template 17 | typename OptionType::WorkerTableType* CreateTable(const OptionType& option) { 18 | if (Zoo::Get()->server_rank() >= 0) { 19 | PushServerTable( 20 | new typename OptionType::ServerTableType(option)); 21 | } 22 | if (Zoo::Get()->worker_rank() >= 0) { 23 | return new typename OptionType::WorkerTableType(option); 24 | } 25 | return nullptr; 26 | } 27 | 28 | } // namespace table_factory 29 | 30 | } // namespace multiverso 31 | 32 | #endif // MULTIVERSO_TABLE_FACTORY_H_ 33 | -------------------------------------------------------------------------------- /include/multiverso/table_interface.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_TABLE_INTERFACE_H_ 2 | #define MULTIVERSO_TABLE_INTERFACE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "multiverso/blob.h" 10 | #include "multiverso/message.h" 11 | 12 | namespace std { class mutex; } 13 | 14 | namespace multiverso { 15 | 16 | typedef int32_t integer_t; 17 | 18 | class Waiter; 19 | struct AddOption; 20 | struct GetOption; 21 | enum MsgType; 22 | 23 | // User implementent this 24 | class WorkerTable { 25 | public: 26 | WorkerTable(); 27 | virtual ~WorkerTable(); 28 | 29 | void Get(Blob keys, const GetOption* option = nullptr); 30 | void Add(Blob keys, Blob values, const AddOption* option = nullptr); 31 | 32 | int GetAsync(Blob keys, const GetOption* option = nullptr); 33 | int AddAsync(Blob keys, Blob values, const AddOption* option = nullptr); 34 | 35 | void Wait(int id); 36 | 37 | void Reset(int msg_id, int num_wait); 38 | 39 | void Notify(int id); 40 | 41 | virtual int Partition(const std::vector& kv, 42 | MsgType partition_type, 43 | std::unordered_map >* out) = 0; 44 | 45 | virtual void ProcessReplyGet(std::vector&) = 0; 46 | 47 | // add user defined data structure 48 | private: 49 | std::string table_name_; 50 | // assuming there are at most 2^32 tables 51 | int table_id_; 52 | std::mutex* m_; 53 | std::vector waitings_; 54 | // assuming there are at most 2^32 msgs waiting in line 55 | int msg_id_; 56 | }; 57 | 58 | class Stream; 59 | 60 | // interface for checkpoint table 61 | class Serializable { 62 | public: 63 | virtual void Store(Stream* s) = 0; 64 | virtual void Load(Stream* s) = 0; 65 | }; 66 | 67 | // describe the server parameter storage data structure and related method 68 | class ServerTable : public Serializable { 69 | public: 70 | ServerTable(); 71 | virtual ~ServerTable() = default; 72 | virtual void ProcessAdd(const std::vector& data) = 0; 73 | virtual void ProcessGet(const std::vector& data, 74 | std::vector* result) = 0; 75 | }; 76 | 77 | #define DEFINE_TABLE_TYPE(template_type, \ 78 | worker_table_type, server_table_type) \ 79 | typedef worker_table_type WorkerTableType; \ 80 | typedef server_table_type ServerTableType; 81 | 82 | } // namespace multiverso 83 | 84 | #endif // MULTIVERSO_TABLE_INTERFACE_H_ 85 | -------------------------------------------------------------------------------- /include/multiverso/updater/adagrad_updater.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_UPDATER_ADAGRAD_UPDATER_H_ 2 | #define MULTIVERSO_UPDATER_ADAGRAD_UPDATER_H_ 3 | 4 | #include "multiverso/updater/updater.h" 5 | #include "multiverso/util/log.h" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | namespace multiverso { 13 | 14 | template 15 | class AdaGradUpdater : public Updater { 16 | public: 17 | explicit AdaGradUpdater(size_t size): 18 | e(1e-6f), size_(size) { 19 | historic_g_sqr_.resize(MV_NumWorkers(), std::vector(size_)); 20 | Log::Debug("[AdaGradUpdater] Init with size = %d, e = %f. historic_size = %d\n", size_, e, historic_g_sqr_.size()); 21 | } 22 | 23 | void Update(size_t num_element, T* data, T* delta, 24 | AddOption* option, size_t offset) override { 25 | 26 | auto g_sqr_data_ = historic_g_sqr_.at(option->worker_id()); 27 | for (size_t index = 0; index < num_element; ++index) { 28 | g_sqr_data_[index + offset] -= 29 | delta[index] * delta[index] / option->learning_rate() / 30 | option->learning_rate(); 31 | 32 | //[TODO(qiwye)] sqrt take too much time 33 | data[index + offset] -= option->rho() / 34 | std::sqrt(g_sqr_data_[index + offset] + e) * 35 | delta[index] / option->learning_rate(); 36 | 37 | //data[index + offset] -= option->rho() * 38 | // QuakeRsqrt(g_sqr_data_[index + offset] + e) * 39 | // delta[index] / option->learning_rate(); 40 | } 41 | } 42 | 43 | 44 | private: 45 | 46 | float QuakeRsqrt(float number){ 47 | float x = number * 0.5f, y = number; 48 | std::uint32_t i; 49 | std::memcpy(&i, &y, sizeof(float)); 50 | i = 0x5f3759df - (i >> 1); 51 | std::memcpy(&y, &i, sizeof(float)); 52 | return y * (1.5f - (x * y * y)); 53 | } 54 | 55 | protected: 56 | std::vector< std::vector> historic_g_sqr_; 57 | float e; 58 | size_t size_; 59 | }; 60 | 61 | } 62 | 63 | #endif // MULTIVERSO_UPDATER_ADAGRAD_UPDATER_H_ 64 | -------------------------------------------------------------------------------- /include/multiverso/updater/momentum_updater.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_ 2 | #define MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_ 3 | 4 | #include "updater.h" 5 | #include 6 | 7 | namespace multiverso { 8 | 9 | template 10 | class MomentumUpdater : public Updater { 11 | public: 12 | explicit MomentumUpdater(size_t size) : size_(size) { 13 | Log::Debug("[SmoothGradientUpdater] Init with size = %d. \n", size_); 14 | smooth_gradient_.resize(size_); 15 | } 16 | 17 | void Update(size_t num_element, T* data, T* delta, 18 | AddOption* option, size_t offset) override { 19 | for (size_t index = 0; index < num_element; ++index) { 20 | smooth_gradient_[index + offset] = 21 | option->momentum() * smooth_gradient_[index + offset] 22 | + (1 - option->momentum()) * delta[index]; 23 | data[index + offset] -= smooth_gradient_[index + offset]; 24 | } 25 | } 26 | 27 | ~MomentumUpdater() { smooth_gradient_.clear(); } 28 | protected: 29 | std::vector smooth_gradient_; 30 | size_t size_; 31 | }; 32 | 33 | } 34 | 35 | #endif // MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_ -------------------------------------------------------------------------------- /include/multiverso/updater/sgd_updater.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_UPDATER_SGD_UPDATER_H_ 2 | #define MULTIVERSO_UPDATER_SGD_UPDATER_H_ 3 | 4 | #include "updater.h" 5 | 6 | namespace multiverso { 7 | 8 | template 9 | class SGDUpdater : public Updater { 10 | public: 11 | explicit SGDUpdater(size_t){ 12 | Log::Debug("[SGDUpdater] Init. \n"); 13 | } 14 | void Update(size_t num_element, T* data, T* delta, 15 | AddOption*, size_t offset) override { 16 | for (size_t index = 0; index < num_element; ++index) { 17 | data[index + offset] -= delta[index]; 18 | } 19 | } 20 | 21 | void Access(size_t num_element, T* data, T* blob_data, 22 | size_t offset, AddOption*) override{ 23 | memcpy(blob_data, data + offset, sizeof(T) * num_element); 24 | } 25 | 26 | ~SGDUpdater(){} 27 | }; 28 | 29 | } 30 | 31 | #endif // MULTIVERSO_UPDATER_ASGD_UPDATER_H_ -------------------------------------------------------------------------------- /include/multiverso/util/allocator.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_ALLOCATOR_H_ 2 | #define MULTIVERSO_ALLOCATOR_H_ 3 | 4 | #include 5 | #include 6 | 7 | namespace std { class mutex; } 8 | 9 | namespace multiverso { 10 | 11 | const size_t g_pointer_size = sizeof(void*); 12 | 13 | class MemoryBlock; 14 | class FreeList { 15 | public: 16 | FreeList(size_t size); 17 | ~FreeList(); 18 | char *Pop(); 19 | void Push(MemoryBlock*); 20 | private: 21 | MemoryBlock* free_ = nullptr; 22 | size_t size_; 23 | std::mutex* mutex_; 24 | }; 25 | 26 | class MemoryBlock { 27 | public: 28 | MemoryBlock(size_t size, FreeList* list); 29 | ~MemoryBlock(); 30 | char* data(); 31 | void Unlink(); 32 | void Link(); 33 | MemoryBlock* next; 34 | private: 35 | char* data_; 36 | std::atomic ref_; 37 | static const size_t header_size_ = (sizeof(MemoryBlock*) << 1); 38 | }; 39 | 40 | class Allocator { 41 | public: 42 | virtual ~Allocator() = default; 43 | virtual char* Alloc(size_t size); 44 | virtual void Free(char* data); 45 | virtual void Refer(char *data); 46 | static Allocator* Get(); 47 | private: 48 | static const int header_size_ = sizeof(std::atomic*); 49 | }; 50 | 51 | class SmartAllocator : public Allocator { 52 | public: 53 | SmartAllocator(); 54 | ~SmartAllocator(); 55 | char* Alloc(size_t size); 56 | void Free(char* data); 57 | void Refer(char *data); 58 | private: 59 | std::unordered_map pools_; 60 | std::mutex* mutex_; 61 | }; 62 | 63 | } // namespace multiverso 64 | 65 | #endif // MULTIVERSO_ALLOCATOR_H_ 66 | -------------------------------------------------------------------------------- /include/multiverso/util/net_util.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_UTIL_NET_UTIL_H_ 2 | #define MULTIVERSO_UTIL_NET_UTIL_H_ 3 | 4 | #include 5 | #include 6 | 7 | namespace multiverso { 8 | namespace net { 9 | 10 | void GetLocalIPAddress(std::unordered_set* result); 11 | 12 | } // namespace net 13 | } // namespace multiverso 14 | 15 | #endif // MULTIVERSO_UTIL_NET_UTIL_H_ 16 | -------------------------------------------------------------------------------- /include/multiverso/util/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_TIMER_H_ 2 | #define MULTIVERSO_TIMER_H_ 3 | 4 | #include 5 | #include 6 | 7 | namespace multiverso { 8 | 9 | class Timer { 10 | public: 11 | Timer(); 12 | 13 | // Restart the timer 14 | void Start(); 15 | 16 | // Get elapsed milliseconds since last Timer::Start 17 | double elapse(); 18 | 19 | private: 20 | using Clock = std::chrono::high_resolution_clock; 21 | using TimePoint = Clock::time_point; 22 | 23 | TimePoint start_point_; 24 | }; 25 | 26 | } // namespace multiverso 27 | 28 | #endif // MULTIVERSO_TIMER_H_ 29 | -------------------------------------------------------------------------------- /include/multiverso/util/waiter.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_WAITER_H_ 2 | #define MULTIVERSO_WAITER_H_ 3 | 4 | #include 5 | #include 6 | 7 | namespace multiverso { 8 | 9 | class Waiter { 10 | public: 11 | explicit Waiter(int num_wait = 1) : num_wait_(num_wait) {} 12 | 13 | void Wait() { 14 | std::unique_lock lock(mutex_); 15 | while (num_wait_ > 0) cv_.wait(lock); 16 | } 17 | 18 | void Notify() { 19 | std::unique_lock lock(mutex_); 20 | --num_wait_; 21 | cv_.notify_all(); 22 | } 23 | 24 | void Reset(int num_wait) { 25 | std::unique_lock lock(mutex_); 26 | num_wait_ = num_wait; 27 | } 28 | 29 | private: 30 | std::mutex mutex_; 31 | std::condition_variable cv_; 32 | int num_wait_; 33 | }; 34 | 35 | } // namespace multiverso 36 | 37 | #endif // MULTIVERSO_WAITER_H_ 38 | -------------------------------------------------------------------------------- /include/multiverso/worker.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_WORKER_H_ 2 | #define MULTIVERSO_WORKER_H_ 3 | 4 | #include 5 | 6 | #include "multiverso/actor.h" 7 | 8 | namespace multiverso { 9 | 10 | class WorkerTable; 11 | 12 | class Worker : public Actor { 13 | public: 14 | Worker(); 15 | 16 | int RegisterTable(WorkerTable* worker_table); 17 | 18 | private: 19 | void ProcessGet(MessagePtr& msg); 20 | void ProcessAdd(MessagePtr& msg); 21 | void ProcessReplyGet(MessagePtr& msg); 22 | void ProcessReplyAdd(MessagePtr& msg); 23 | 24 | std::vector cache_; 25 | }; 26 | 27 | } // namespace multiverso 28 | 29 | #endif // MULTIVERSO_WORKER_H_ 30 | -------------------------------------------------------------------------------- /include/multiverso/zoo.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTIVERSO_ZOO_H_ 2 | #define MULTIVERSO_ZOO_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "multiverso/actor.h" 9 | #include "multiverso/node.h" 10 | #include "multiverso/table_interface.h" 11 | 12 | namespace multiverso { 13 | 14 | class NetInterface; 15 | 16 | // Zoo Manage all components in the system, include all actors, and network 17 | // Maintain system information, provide method to access this information 18 | // Control the system, to start and end 19 | class Zoo { 20 | public: 21 | ~Zoo(); 22 | inline static Zoo* Get() { static Zoo zoo; return &zoo; } 23 | 24 | // Start all actors 25 | void Start(int* argc, char** argv); 26 | // Stop all actors 27 | void Stop(bool finalize_net); 28 | 29 | void Barrier(); 30 | 31 | void SendTo(const std::string& name, MessagePtr&); 32 | void Receive(MessagePtr& msg); 33 | 34 | int rank() const; 35 | int size() const; 36 | 37 | inline int worker_rank() const { return nodes_[rank()].worker_id; } 38 | inline int server_rank() const { return nodes_[rank()].server_id; } 39 | 40 | inline int rank_to_worker_id(int rank) const { 41 | return nodes_[rank].worker_id; 42 | } 43 | 44 | inline int rank_to_server_id(int rank) const { 45 | return nodes_[rank].server_id; 46 | } 47 | 48 | inline int worker_id_to_rank(int worker_id) const { 49 | return worker_id_to_rank_[worker_id]; 50 | } 51 | 52 | inline int server_id_to_rank(int server_id) const { 53 | return server_id_to_rank_[server_id]; 54 | } 55 | 56 | inline int num_workers() const { return num_workers_; } 57 | inline int num_servers() const { return num_servers_; } 58 | 59 | 60 | int RegisterTable(WorkerTable* worker_table); 61 | int RegisterTable(ServerTable* server_table); 62 | 63 | void RegisterActor(const std::string name, Actor* actor); 64 | 65 | private: 66 | // private constructor 67 | Zoo(); 68 | void RegisterNode(); 69 | void FinishTrain(); 70 | void StartPS(); 71 | void StopPS(); 72 | 73 | std::unordered_map zoo_; 74 | 75 | std::unique_ptr> mailbox_; 76 | 77 | NetInterface* net_util_; 78 | 79 | std::vector nodes_; 80 | std::vector server_id_to_rank_; 81 | std::vector worker_id_to_rank_; 82 | 83 | int num_workers_; 84 | int num_servers_; 85 | }; 86 | 87 | } // namespace multiverso 88 | 89 | #endif // MULTIVERSO_ZOO_H_ 90 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(${MPI_CXX_INCLUDE_PATH}) 2 | 3 | if (NOT USE_ZMQ) 4 | ADD_DEFINITIONS(-DMULTIVERSO_USE_MPI) 5 | else() 6 | ADD_DEFINITIONS(-DMULTIVERSO_USE_ZMQ) 7 | endif() 8 | 9 | if (NOT USE_ZMQ) 10 | find_package(OpenMP) 11 | if (OPENMP_FOUND) 12 | message("OpenMP found") 13 | set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") 14 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 15 | endif() 16 | endif() 17 | 18 | set(MULTIVERSO_SRC actor.cpp communicator.cpp controller.cpp dashboard.cpp multiverso.cpp net.cpp node.cpp server.cpp table.cpp table/array_table.cpp table/matrix_table.cpp table/sparse_matrix_table.cpp table/matrix.cpp timer.cpp updater/updater.cpp util/configure.cpp io/hdfs_stream.cpp io/io.cpp io/local_stream.cpp util/log.cpp util/net_util.cpp worker.cpp zoo.cpp c_api.cpp util/allocator.cpp table_factory.cpp blob.cpp) 19 | 20 | add_library(multiverso SHARED ${MULTIVERSO_SRC}) 21 | #add_library(imultiverso ${MULTIVERSO_SRC}) 22 | if (NOT USE_ZMQ) 23 | target_link_libraries(multiverso ${MPI_LIBRARY}) 24 | else() 25 | target_link_libraries(multiverso zmq) 26 | endif() 27 | 28 | install (TARGETS multiverso DESTINATION lib) 29 | if (UNIX) 30 | install(CODE "execute_process(COMMAND ldconfig)") # run ldconfig. Otherwise ld.so.cache won't be created. 31 | endif() 32 | -------------------------------------------------------------------------------- /src/actor.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/actor.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "multiverso/message.h" 8 | #include "multiverso/util/log.h" 9 | #include "multiverso/util/mt_queue.h" 10 | #include "multiverso/zoo.h" 11 | 12 | namespace multiverso { 13 | 14 | Actor::Actor(const std::string& name) : name_(name) { 15 | mailbox_.reset(new MtQueue()); 16 | Zoo::Get()->RegisterActor(name, this); 17 | is_working_ = false; 18 | } 19 | 20 | Actor::~Actor() {} 21 | 22 | void Actor::Start() { 23 | thread_.reset(new std::thread(&Actor::Main, this)); 24 | while (!is_working_) { 25 | std::this_thread::sleep_for(std::chrono::milliseconds(10)); 26 | } 27 | } 28 | 29 | void Actor::Stop() { 30 | while (!mailbox_->Empty()) { ; } 31 | is_working_ = false; 32 | mailbox_->Exit(); 33 | thread_->join(); 34 | } 35 | 36 | void Actor::Receive(MessagePtr& msg) { mailbox_->Push(msg); } 37 | 38 | void Actor::Main() { 39 | is_working_ = true; 40 | MessagePtr msg; 41 | while (mailbox_->Pop(msg)) { 42 | if (handlers_.find(msg->type()) != handlers_.end()) { 43 | handlers_[msg->type()](msg); 44 | } else if (handlers_.find(MsgType::Default) != handlers_.end()) { 45 | handlers_[MsgType::Default](msg); 46 | } else { 47 | Log::Fatal("Unexpected msg type\n"); 48 | } 49 | } 50 | } 51 | 52 | void Actor::SendTo(const std::string& dst_name, MessagePtr& msg) { 53 | Zoo::Get()->SendTo(dst_name, msg); 54 | } 55 | 56 | } // namespace multiverso 57 | -------------------------------------------------------------------------------- /src/blob.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/blob.h" 2 | 3 | #include "multiverso/util/allocator.h" 4 | #include "multiverso/util/log.h" 5 | 6 | namespace multiverso { 7 | 8 | Blob::Blob(size_t size) : size_(size) { 9 | CHECK(size > 0); 10 | data_ = Allocator::Get()->Alloc(size); 11 | } 12 | 13 | // Construct from external memory. Will copy a new piece 14 | Blob::Blob(const void* data, size_t size) : size_(size) { 15 | data_ = Allocator::Get()->Alloc(size); 16 | memcpy(data_, data, size_); 17 | } 18 | 19 | Blob::Blob(void* data, size_t size) : size_(size) { 20 | data_ = Allocator::Get()->Alloc(size); 21 | memcpy(data_, data, size_); 22 | } 23 | 24 | Blob::Blob(const Blob& rhs) { 25 | if (rhs.size() != 0) { 26 | Allocator::Get()->Refer(rhs.data_); 27 | } 28 | this->data_ = rhs.data_; 29 | this->size_ = rhs.size_; 30 | } 31 | 32 | Blob::~Blob() { 33 | if (data_ != nullptr) { 34 | Allocator::Get()->Free(data_); 35 | } 36 | } 37 | 38 | // Shallow copy by default. Call \ref CopyFrom for a deep copy 39 | void Blob::operator=(const Blob& rhs) { 40 | if (rhs.size() != 0) { 41 | Allocator::Get()->Refer(rhs.data_); 42 | } 43 | this->data_ = rhs.data_; 44 | this->size_ = rhs.size_; 45 | } 46 | 47 | } // namespace multiverso 48 | -------------------------------------------------------------------------------- /src/build_dll.bat: -------------------------------------------------------------------------------- 1 | MSBuild.exe Multiverso.vcxproj /p:Configuration=Release /p:Platform=x64 /p:ConfigurationType=DynamicLibrary -------------------------------------------------------------------------------- /src/c_api.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/c_api.h" 2 | 3 | #include "multiverso/multiverso.h" 4 | #include "multiverso/table/array_table.h" 5 | #include "multiverso/table/matrix_table.h" 6 | #include "multiverso/util/log.h" 7 | 8 | 9 | extern "C" { 10 | void MV_Init(int* argc, char* argv[]) { 11 | multiverso::MV_Init(argc, argv); 12 | } 13 | 14 | void MV_ShutDown(){ 15 | multiverso::MV_ShutDown(); 16 | } 17 | 18 | void MV_Barrier(){ 19 | multiverso::MV_Barrier(); 20 | } 21 | 22 | int MV_NumWorkers(){ 23 | return multiverso::MV_NumWorkers(); 24 | } 25 | 26 | int MV_WorkerId(){ 27 | return multiverso::MV_WorkerId(); 28 | } 29 | 30 | int MV_ServerId(){ 31 | return multiverso::MV_ServerId(); 32 | } 33 | 34 | // Array Table 35 | void MV_NewArrayTable(int size, TableHandler* out) { 36 | *out = multiverso::MV_CreateTable(multiverso::ArrayTableOption(size)); 37 | } 38 | 39 | void MV_GetArrayTable(TableHandler handler, float* data, int size) { 40 | auto worker = reinterpret_cast*>(handler); 41 | worker->Get(data, size); 42 | } 43 | 44 | void MV_AddArrayTable(TableHandler handler, float* data, int size) { 45 | auto worker = reinterpret_cast*>(handler); 46 | worker->Add(data, size); 47 | } 48 | 49 | void MV_AddAsyncArrayTable(TableHandler handler, float* data, int size) { 50 | auto worker = reinterpret_cast*>(handler); 51 | worker->AddAsync(data, size); 52 | } 53 | 54 | 55 | // MatrixTable 56 | void MV_NewMatrixTable(int num_row, int num_col, TableHandler* out) { 57 | *out = multiverso::MV_CreateTable(multiverso::MatrixTableOption(num_row, num_col)); 58 | } 59 | 60 | void MV_GetMatrixTableAll(TableHandler handler, float* data, int size) { 61 | auto worker = reinterpret_cast*>(handler); 62 | worker->Get(data, size); 63 | } 64 | 65 | void MV_AddMatrixTableAll(TableHandler handler, float* data, int size) { 66 | auto worker = reinterpret_cast*>(handler); 67 | worker->Add(data, size); 68 | } 69 | 70 | void MV_AddAsyncMatrixTableAll(TableHandler handler, float* data, int size) { 71 | auto worker = reinterpret_cast*>(handler); 72 | worker->AddAsync(data, size); 73 | } 74 | 75 | void MV_GetMatrixTableByRows(TableHandler handler, float* data, int size, 76 | int row_ids[], int row_ids_n) { 77 | auto worker = reinterpret_cast*>(handler); 78 | worker->Get(data, size, row_ids, row_ids_n); 79 | } 80 | 81 | void MV_AddMatrixTableByRows(TableHandler handler, float* data, int size, 82 | int row_ids[], int row_ids_n) { 83 | auto worker = reinterpret_cast*>(handler); 84 | worker->Add(data, size, row_ids, row_ids_n); 85 | } 86 | 87 | void MV_AddAsyncMatrixTableByRows(TableHandler handler, float* data, int size, 88 | int row_ids[], int row_ids_n) { 89 | auto worker = reinterpret_cast*>(handler); 90 | worker->AddAsync(data, size, row_ids, row_ids_n); 91 | } 92 | 93 | } 94 | -------------------------------------------------------------------------------- /src/communicator.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/communicator.h" 2 | 3 | #include 4 | #include 5 | 6 | #include "multiverso/zoo.h" 7 | #include "multiverso/net.h" 8 | #include "multiverso/util/log.h" 9 | #include "multiverso/util/mt_queue.h" 10 | 11 | namespace multiverso { 12 | 13 | namespace message { 14 | 15 | bool to_server(MsgType type) { 16 | return (static_cast(type)) > 0 && 17 | (static_cast(type)) < 32; 18 | } 19 | 20 | bool to_worker(MsgType type) { 21 | return (static_cast(type)) < 0 && 22 | (static_cast(type)) > -32; 23 | } 24 | 25 | bool to_controler(MsgType type) { 26 | return (static_cast(type)) > 32; 27 | } 28 | 29 | } // namespace message 30 | 31 | Communicator::Communicator() : Actor(actor::kCommunicator) { 32 | RegisterHandler(MsgType::Default, std::bind( 33 | &Communicator::ProcessMessage, this, std::placeholders::_1)); 34 | net_util_ = NetInterface::Get(); 35 | } 36 | 37 | Communicator::~Communicator() { } 38 | 39 | void Communicator::Main() { 40 | is_working_ = true; 41 | 42 | switch (net_util_->thread_level_support()) { 43 | case NetThreadLevel::THREAD_MULTIPLE: { 44 | recv_thread_.reset(new std::thread(&Communicator::Communicate, this)); 45 | Actor::Main(); 46 | recv_thread_->join(); 47 | break; 48 | } 49 | case NetThreadLevel::THREAD_SERIALIZED: { 50 | MessagePtr msg; 51 | while (mailbox_->Alive()) { 52 | // Try pop and Send 53 | if (mailbox_->TryPop(msg)) { 54 | ProcessMessage(msg); 55 | } 56 | // Probe and Recv 57 | size_t size = net_util_->Recv(&msg); 58 | if (size > 0) LocalForward(msg); 59 | CHECK(msg.get() == nullptr); 60 | net_util_->Send(msg); 61 | } 62 | break; 63 | } 64 | default: 65 | Log::Fatal("Unexpected thread level\n"); 66 | } 67 | } 68 | 69 | void Communicator::ProcessMessage(MessagePtr& msg) { 70 | if (msg->dst() != net_util_->rank()) { 71 | net_util_->Send(msg); 72 | return; 73 | } 74 | LocalForward(msg); 75 | } 76 | 77 | void Communicator::Communicate() { 78 | while (is_working_) { 79 | MessagePtr msg(new Message()); 80 | int size = net_util_->Recv(&msg); 81 | if (size == -1) { 82 | continue; 83 | } 84 | if (size > 0) { 85 | // a message received 86 | CHECK(msg->dst() == Zoo::Get()->rank()); 87 | LocalForward(msg); 88 | } 89 | } 90 | Log::Debug("Comm recv thread exit\n"); 91 | } 92 | 93 | void Communicator::LocalForward(MessagePtr& msg) { 94 | CHECK(msg->dst() == Zoo::Get()->rank()); 95 | if (message::to_server(msg->type())) { 96 | SendTo(actor::kServer, msg); 97 | } else if (message::to_worker(msg->type())) { 98 | SendTo(actor::kWorker, msg); 99 | } else if (message::to_controler(msg->type())) { 100 | SendTo(actor::kController, msg); 101 | } else { 102 | // Send back to the msg queue of zoo 103 | Zoo::Get()->Receive(msg); 104 | } 105 | } 106 | 107 | } // namespace multiverso 108 | -------------------------------------------------------------------------------- /src/dashboard.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/dashboard.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "multiverso/util/log.h" 8 | 9 | namespace multiverso { 10 | 11 | std::map Dashboard::record_; 12 | std::mutex Dashboard::m_; 13 | 14 | void Dashboard::AddMonitor(const std::string& name, Monitor* monitor) { 15 | std::lock_guard l(m_); 16 | CHECK(record_[name] == nullptr); 17 | record_[name] = monitor; 18 | } 19 | 20 | void Dashboard::RemoveMonitor(const std::string& name) { 21 | std::lock_guard l(m_); 22 | CHECK_NOTNULL(record_[name]); 23 | record_.erase(name); 24 | } 25 | 26 | std::string Dashboard::Watch(const std::string& name) { 27 | std::lock_guard l(m_); 28 | std::string result; 29 | if (record_.find(name) == record_.end()) return result; 30 | Monitor* monitor = record_[name]; 31 | CHECK_NOTNULL(monitor); 32 | return monitor->info_string(); 33 | } 34 | 35 | std::string Monitor::info_string() const { 36 | std::ostringstream oss; 37 | oss << "[" << name_ << "] " 38 | << " count = " << count_ 39 | << " elapse = " << elapse_ << "ms" 40 | << " average = " << average() << "ms"; 41 | return oss.str(); 42 | } 43 | 44 | void Dashboard::Display() { 45 | std::lock_guard l(m_); 46 | Log::Info("--------------Show dashboard monitor information--------------\n"); 47 | for (auto& it : record_) Log::Info("%s\n", it.second->info_string().c_str()); 48 | Log::Info("--------------------------------------------------------------\n"); 49 | } 50 | 51 | } // namespace multiverso 52 | -------------------------------------------------------------------------------- /src/io/io.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/io/io.h" 2 | #include "multiverso/io/hdfs_stream.h" 3 | #include "multiverso/io/local_stream.h" 4 | 5 | 6 | namespace multiverso { 7 | 8 | Stream* StreamFactory::GetStream(const URI& uri, 9 | FileOpenMode mode) { 10 | std::string addr = uri.scheme + "://" + uri.host; 11 | if (instances_.find(addr) == instances_.end()) { 12 | if (uri.scheme == std::string("file")) 13 | instances_[addr] = std::shared_ptr(new LocalStreamFactory(uri.host)); 14 | #ifdef MULTIVERSO_USE_HDFS 15 | else if (uri.scheme == std::string("hdfs")) 16 | instances_[addr] = std::shared_ptr(new HDFSStreamFactory(uri.host)); 17 | #endif 18 | else Log::Error("Can not support the StreamFactory '%s'\n", uri.scheme.c_str()); 19 | } 20 | return instances_[addr]->Open(uri, mode); 21 | } 22 | 23 | std::map > StreamFactory::instances_; 24 | 25 | TextReader::TextReader(const URI& uri, size_t buf_size) { 26 | stream_ = StreamFactory::GetStream(uri, FileOpenMode::Read); 27 | buf_size_ = buf_size; 28 | pos_ = length_ = 0; 29 | buf_ = new char[buf_size_]; 30 | assert(buf_ != nullptr); 31 | } 32 | 33 | size_t TextReader::GetLine(std::string &line) { 34 | line.clear(); 35 | bool isEnd = false; 36 | while (!isEnd) { 37 | while(pos_ < length_) { 38 | char & c = buf_[pos_++]; 39 | if (c == '\n') { 40 | isEnd = true; 41 | break; 42 | } else { 43 | line += c; 44 | } 45 | } 46 | if (isEnd || LoadBuffer() == 0) break; 47 | } 48 | return line.size(); 49 | } 50 | 51 | size_t TextReader::LoadBuffer() { 52 | assert (pos_ == length_); 53 | pos_ = length_ = 0; 54 | return length_ = stream_->Read(buf_, buf_size_ - 1); 55 | } 56 | 57 | TextReader::~TextReader() { 58 | delete stream_; 59 | delete [] buf_; 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/io/local_stream.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/io/local_stream.h" 2 | #include 3 | extern "C" { 4 | #include 5 | } 6 | #ifndef _MSC_VER 7 | extern "C" { 8 | #include 9 | #include 10 | } 11 | #else 12 | #include 13 | #define stat _stat64 14 | #endif 15 | 16 | namespace multiverso { 17 | 18 | LocalStream::LocalStream(const URI& uri, FileOpenMode mode) { 19 | path_ = uri.path; 20 | std::string mode_str; 21 | switch (mode) { 22 | case FileOpenMode::Read: 23 | mode_str = "r"; 24 | break; 25 | case FileOpenMode::Write: 26 | mode_str = "w"; 27 | break; 28 | case FileOpenMode::Append: 29 | mode_str = "a"; 30 | break; 31 | case FileOpenMode::BinaryRead: 32 | mode_str = "rb"; 33 | break; 34 | case FileOpenMode::BinaryWrite: 35 | mode_str = "wb"; 36 | break; 37 | case FileOpenMode::BinaryAppend: 38 | mode_str = "ab"; 39 | } 40 | #ifdef _MSC_VER 41 | fopen_s(&fp_, uri.path.c_str(), mode_str.c_str()); 42 | #else 43 | fp_ = fopen(uri.path.c_str(), mode_str.c_str()); 44 | #endif 45 | 46 | if (fp_ == nullptr) { 47 | is_good_ = false; 48 | Log::Error("Failed to open LocalStream %s\n", uri.path.c_str()); 49 | } else { 50 | is_good_ = true; 51 | } 52 | } 53 | 54 | LocalStream::~LocalStream(void) 55 | { 56 | is_good_ = false; 57 | if (fp_ != nullptr) 58 | std::fclose(fp_); 59 | } 60 | 61 | /*! 62 | * \brief write data to a file 63 | * \param buf pointer to a memory buffer 64 | * \param size data size 65 | */ 66 | void LocalStream::Write(const void *buf, size_t size) { 67 | if (std::fwrite(buf, 1, size, fp_) != size) { 68 | is_good_ = false; 69 | Log::Error("LocalStream.Write incomplete\n"); 70 | } 71 | } 72 | 73 | 74 | /*! 75 | * \brief read data from Stream 76 | * \param buf pointer to a memory buffer 77 | * \param size the size of buf 78 | */ 79 | size_t LocalStream::Read(void *buf, size_t size) { 80 | return std::fread(buf, 1, size, fp_); 81 | } 82 | 83 | bool LocalStream::Good() { return is_good_; } 84 | 85 | LocalStreamFactory::LocalStreamFactory(const std::string& host) { 86 | host_ = host; 87 | } 88 | 89 | LocalStreamFactory::~LocalStreamFactory() { 90 | } 91 | 92 | /*! 93 | * \brief create a Stream 94 | * \param path the path of the file 95 | * \param mode "w" - create an empty file to store data; 96 | * "a" - open the file to append data to it 97 | * "r" - open the file to read 98 | * \return the Stream which is used to write or read data 99 | */ 100 | Stream* LocalStreamFactory::Open(const URI& uri, FileOpenMode mode) { 101 | return new LocalStream(uri, mode); 102 | } 103 | 104 | void LocalStreamFactory::Close() { 105 | ///TODO 106 | } 107 | 108 | } -------------------------------------------------------------------------------- /src/multiverso.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/multiverso.h" 2 | 3 | #include "multiverso/dashboard.h" 4 | #include "multiverso/net.h" 5 | #include "multiverso/zoo.h" 6 | #include "multiverso/table_factory.h" 7 | #include "multiverso/util/configure.h" 8 | 9 | namespace multiverso { 10 | 11 | void MV_Init(int* argc, char* argv[]) { 12 | Zoo::Get()->Start(argc, argv); 13 | } 14 | 15 | void MV_ShutDown(bool finalize_net) { 16 | Zoo::Get()->Stop(finalize_net); 17 | table_factory::FreeServerTables(); 18 | } 19 | 20 | void MV_Barrier() { Zoo::Get()->Barrier(); } 21 | 22 | int MV_Rank() { return Zoo::Get()->rank(); } 23 | 24 | int MV_Size() { return Zoo::Get()->size(); } 25 | 26 | int MV_WorkerId() { 27 | return Zoo::Get()->worker_rank(); 28 | } 29 | int MV_ServerId() { 30 | return Zoo::Get()->server_rank(); 31 | } 32 | 33 | int MV_NumWorkers() { 34 | return Zoo::Get()->num_workers(); 35 | } 36 | int MV_NumServers() { 37 | return Zoo::Get()->num_servers(); 38 | } 39 | 40 | int MV_WorkerIdToRank(int worker_id) { 41 | return Zoo::Get()->worker_id_to_rank(worker_id); 42 | } 43 | 44 | int MV_ServerIdToRank(int server_id) { 45 | return Zoo::Get()->server_id_to_rank(server_id); 46 | } 47 | 48 | template 49 | void MV_SetFlag(const std::string& name, const T& value) { 50 | SetCMDFlag(name, value); 51 | } 52 | 53 | template 54 | void MV_Aggregate(ElemType* data, int size) { 55 | net::Allreduce(data, size); 56 | } 57 | 58 | int MV_NetBind(int rank, char* endpoint) { 59 | return NetInterface::Get()->Bind(rank, endpoint); 60 | } 61 | 62 | int MV_NetConnect(int* ranks, char* endpoints[], int size) { 63 | return NetInterface::Get()->Connect(ranks, endpoints, size); 64 | } 65 | 66 | void MV_NetFinalize() { 67 | NetInterface::Get()->Finalize(); 68 | } 69 | 70 | template void MV_Aggregate(char*, int); 71 | template void MV_Aggregate(int*, int); 72 | template void MV_Aggregate(float*, int); 73 | template void MV_Aggregate(double*, int); 74 | 75 | template void MV_SetFlag(const std::string&, const int&); 76 | template void MV_SetFlag(const std::string&, const bool&); 77 | template void MV_SetFlag(const std::string&, const std::string&); 78 | template void MV_SetFlag(const std::string&, const double&); 79 | 80 | } // namespace multiverso 81 | -------------------------------------------------------------------------------- /src/net.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/net.h" 2 | 3 | #include 4 | #include 5 | #include "multiverso/message.h" 6 | #include "multiverso/util/log.h" 7 | 8 | #include "multiverso/net/zmq_net.h" 9 | #include "multiverso/net/mpi_net.h" 10 | 11 | namespace multiverso { 12 | 13 | NetInterface* NetInterface::Get() { 14 | #ifdef MULTIVERSO_USE_ZMQ 15 | static ZMQNetWrapper net_impl; 16 | return &net_impl; 17 | #else 18 | // #ifdef MULTIVERSO_USE_MPI 19 | // Use MPI by default 20 | static MPINetWrapper net_impl; 21 | return &net_impl; 22 | // #endif 23 | #endif 24 | } 25 | 26 | namespace net { 27 | template 28 | void Allreduce(Typename* data, size_t elem_count) { 29 | #ifdef MULTIVERSO_USE_MPI 30 | CHECK(NetInterface::Get()->active()); 31 | MPINetWrapper::Allreduce(data, elem_count); 32 | #else 33 | Log::Fatal("Not implemented yet"); 34 | #endif 35 | } 36 | 37 | template void Allreduce(char*, size_t); 38 | template void Allreduce(int*, size_t); 39 | template void Allreduce(float*, size_t); 40 | template void Allreduce(double*, size_t); 41 | 42 | } // namespace net 43 | 44 | 45 | } // namespace multiverso 46 | -------------------------------------------------------------------------------- /src/net/mpi_net.cpp: -------------------------------------------------------------------------------- 1 | #ifdef MULTIVERSO_USE_MPI 2 | 3 | #include "multiverso/net/mpi_net.h" 4 | 5 | namespace multiverso { 6 | 7 | template void MPINetWrapper::Allreduce(char*, size_t); 8 | template void MPINetWrapper::Allreduce(int*, size_t); 9 | template void MPINetWrapper::Allreduce(float*, size_t); 10 | template void MPINetWrapper::Allreduce(double*, size_t); 11 | 12 | } // namespace multiverso 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /src/node.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/node.h" 2 | 3 | namespace multiverso { 4 | 5 | Node::Node() : rank(-1), role(-1), worker_id(-1), server_id(-1) {} 6 | 7 | namespace node { 8 | 9 | bool is_worker(int role) { return (role & Role::WORKER) != 0; } 10 | bool is_server(int role) { return (role & Role::SERVER) != 0; } 11 | 12 | } // namespace node 13 | 14 | } // namespace multiverso 15 | -------------------------------------------------------------------------------- /src/table.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/table_interface.h" 2 | 3 | #include 4 | 5 | #include "multiverso/dashboard.h" 6 | #include "multiverso/updater/updater.h" 7 | #include "multiverso/util/log.h" 8 | #include "multiverso/util/waiter.h" 9 | #include "multiverso/zoo.h" 10 | 11 | namespace multiverso { 12 | 13 | WorkerTable::WorkerTable() { 14 | msg_id_ = 0; 15 | m_ = new std::mutex(); 16 | table_id_ = Zoo::Get()->RegisterTable(this); 17 | } 18 | 19 | WorkerTable::~WorkerTable() { 20 | delete m_; 21 | } 22 | 23 | ServerTable::ServerTable() { 24 | Zoo::Get()->RegisterTable(this); 25 | } 26 | 27 | void WorkerTable::Get(Blob keys, 28 | const GetOption* option) { 29 | MONITOR_BEGIN(WORKER_TABLE_SYNC_GET) 30 | Wait(GetAsync(keys, option)); 31 | MONITOR_END(WORKER_TABLE_SYNC_GET) 32 | } 33 | 34 | void WorkerTable::Add(Blob keys, Blob values, 35 | const AddOption* option) { 36 | MONITOR_BEGIN(WORKER_TABLE_SYNC_ADD) 37 | Wait(AddAsync(keys, values, option)); 38 | MONITOR_END(WORKER_TABLE_SYNC_ADD) 39 | } 40 | 41 | int WorkerTable::GetAsync(Blob keys, 42 | const GetOption* option) { 43 | m_->lock(); 44 | int id = msg_id_++; 45 | waitings_.push_back(new Waiter()); 46 | m_->unlock(); 47 | MessagePtr msg(new Message()); 48 | msg->set_src(Zoo::Get()->rank()); 49 | msg->set_type(MsgType::Request_Get); 50 | msg->set_msg_id(id); 51 | msg->set_table_id(table_id_); 52 | msg->Push(keys); 53 | // Add general option if necessary 54 | if (option != nullptr) { 55 | Blob general_option(option->data(), option->size()); 56 | msg->Push(general_option); 57 | } 58 | Zoo::Get()->SendTo(actor::kWorker, msg); 59 | return id; 60 | } 61 | 62 | int WorkerTable::AddAsync(Blob keys, Blob values, 63 | const AddOption* option) { 64 | m_->lock(); 65 | int id = msg_id_++; 66 | waitings_.push_back(new Waiter()); 67 | m_->unlock(); 68 | MessagePtr msg(new Message()); 69 | msg->set_src(Zoo::Get()->rank()); 70 | msg->set_type(MsgType::Request_Add); 71 | msg->set_msg_id(id); 72 | msg->set_table_id(table_id_); 73 | msg->Push(keys); 74 | msg->Push(values); 75 | // Add update option if necessary 76 | if (option != nullptr) { 77 | Blob update_option(option->data(), option->size()); 78 | msg->Push(update_option); 79 | } 80 | Zoo::Get()->SendTo(actor::kWorker, msg); 81 | return id; 82 | } 83 | 84 | void WorkerTable::Wait(int id) { 85 | // CHECK(waitings_.find(id) != waitings_.end()); 86 | m_->lock(); 87 | CHECK(waitings_[id] != nullptr); 88 | Waiter* w = waitings_[id]; 89 | m_->unlock(); 90 | 91 | w->Wait(); 92 | 93 | m_->lock(); 94 | delete waitings_[id]; 95 | waitings_[id] = nullptr; 96 | m_->unlock(); 97 | } 98 | 99 | void WorkerTable::Reset(int msg_id, int num_wait) { 100 | m_->lock(); 101 | CHECK_NOTNULL(waitings_[msg_id]); 102 | waitings_[msg_id]->Reset(num_wait); 103 | m_->unlock(); 104 | } 105 | 106 | void WorkerTable::Notify(int id) { 107 | m_->lock(); 108 | CHECK_NOTNULL(waitings_[id]); 109 | waitings_[id]->Notify(); 110 | m_->unlock(); 111 | } 112 | 113 | } // namespace multiverso 114 | -------------------------------------------------------------------------------- /src/table_factory.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/table_factory.h" 2 | 3 | #include "multiverso/table/array_table.h" 4 | #include "multiverso/table/matrix_table.h" 5 | 6 | namespace multiverso { 7 | 8 | namespace table_factory { 9 | std::vector server_tables; 10 | 11 | void FreeServerTables() { 12 | for (auto table : server_tables) { 13 | delete table; 14 | } 15 | server_tables.clear(); 16 | } 17 | 18 | void PushServerTable(ServerTable*table) { 19 | server_tables.push_back(table); 20 | } 21 | 22 | } // namespace table_factory 23 | 24 | } // namespace multiverso -------------------------------------------------------------------------------- /src/timer.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/util/timer.h" 2 | 3 | namespace multiverso { 4 | 5 | Timer::Timer() { 6 | Start(); 7 | } 8 | 9 | void Timer::Start() { 10 | start_point_ = Clock::now(); 11 | } 12 | 13 | double Timer::elapse() { 14 | TimePoint end_point = Clock::now(); 15 | std::chrono::duration time_ms = 16 | end_point - start_point_; 17 | return time_ms.count(); 18 | } 19 | 20 | } // namespace multiverso 21 | -------------------------------------------------------------------------------- /src/updater/updater.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/updater/updater.h" 2 | // TODO(qiwye) to make this a option in CMakelist 3 | //#define ENABLE_DCASGD 4 | 5 | #include "multiverso/updater/adagrad_updater.h" 6 | #include "multiverso/updater/momentum_updater.h" 7 | #ifdef ENABLE_DCASGD 8 | #include "multiverso/updater/dcasgd/dcasgd_updater.h" 9 | #include "multiverso/updater/dcasgd/dcasgda_updater.h" 10 | #endif 11 | #include "multiverso/updater/sgd_updater.h" 12 | #include "multiverso/util/configure.h" 13 | #include "multiverso/util/log.h" 14 | 15 | 16 | namespace multiverso { 17 | 18 | MV_DEFINE_string(updater_type, "default", "multiverso server updater type"); 19 | MV_DEFINE_int(omp_threads, 4 , "#theads used by openMP for updater"); 20 | 21 | template 22 | void Updater::Update(size_t num_element, T* data, T* delta, 23 | AddOption*, size_t offset) { 24 | // parallelism with openMP 25 | #pragma omp parallel for schedule(static) num_threads(MV_CONFIG_omp_threads) 26 | for (int i = 0; i < num_element; ++i) { 27 | data[i + offset] += delta[i]; 28 | } 29 | } 30 | 31 | template 32 | void Updater::Access(size_t num_element, T* data, T* blob_data, 33 | size_t offset , AddOption*) { 34 | // copy data from data to blob 35 | memcpy(blob_data, data + offset, num_element * sizeof(T)); 36 | } 37 | 38 | // Gradient-based updater in only for numerical table 39 | // For simple int table, just using simple updater 40 | template<> 41 | Updater* Updater::GetUpdater(size_t) { 42 | return new Updater(); 43 | } 44 | 45 | template 46 | Updater* Updater::GetUpdater(size_t size) { 47 | std::string type = MV_CONFIG_updater_type; 48 | if (type == "sgd") return new SGDUpdater(size); 49 | if (type == "adagrad") return new AdaGradUpdater(size); 50 | if (type == "momentum_sgd") return new MomentumUpdater(size); 51 | #ifdef ENABLE_DCASGD 52 | if (type == "dcasgd") return new DCASGDUpdater(size); 53 | if (type == "dcasgda") return new DCASGDAUpdater(size); 54 | #endif 55 | // Default: simple updater 56 | return new Updater(); 57 | } 58 | 59 | MV_INSTANTIATE_CLASS_WITH_BASE_TYPE(Updater); 60 | 61 | } -------------------------------------------------------------------------------- /src/util/configure.cpp: -------------------------------------------------------------------------------- 1 | #include "multiverso/util/configure.h" 2 | 3 | #include 4 | #include 5 | #include "multiverso/util/log.h" 6 | 7 | namespace multiverso { 8 | 9 | void ParseCMDFlags(int* argc, char* argv[]) { 10 | if (argc == nullptr || argv == nullptr) return; 11 | 12 | int unused = 0; 13 | size_t pos; 14 | int intval; 15 | bool boolval; 16 | double dval; 17 | std::string line, key, value; 18 | 19 | for (int i = 0; i < *argc; ++i) { 20 | line = argv[i]; 21 | if (line.find("-") != std::string::npos) { 22 | 23 | pos = line.find("="); 24 | CHECK(pos != std::string::npos); 25 | 26 | key = line.substr(1, pos - 1); 27 | value = line.substr(pos + 1); 28 | 29 | if (configure::FlagRegister::Get()->SetFlagIfFound(key, value)) { 30 | continue; 31 | } 32 | 33 | intval = atoi(value.c_str()); 34 | if (configure::FlagRegister::Get()->SetFlagIfFound(key, intval)) { 35 | continue; 36 | } 37 | 38 | dval = strtod(line.c_str(), nullptr); 39 | if (configure::FlagRegister::Get()->SetFlagIfFound(key, dval)) { 40 | continue; 41 | } 42 | 43 | transform(value.begin(), value.end(), value.begin(), ::tolower); 44 | boolval = (value == "true"); 45 | if (configure::FlagRegister::Get()->SetFlagIfFound(key, boolval)) { 46 | continue; 47 | } 48 | } 49 | 50 | std::swap(argv[unused++], argv[i]); 51 | } 52 | 53 | *argc = unused; 54 | } 55 | 56 | } // namespace multiverso 57 | --------------------------------------------------------------------------------