├── .gitattributes
├── .gitignore
├── .gitmodules
├── .travis.yml
├── Applications
    ├── LogisticRegression
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── LogisticRegression.vcxproj
    │   ├── LogisticRegression.vcxproj.filters
    │   ├── README.md
    │   ├── example
    │   │   ├── README.md
    │   │   ├── convert.py
    │   │   ├── mnist.config
    │   │   └── run.sh
    │   └── src
    │   │   ├── configure.cpp
    │   │   ├── configure.h
    │   │   ├── data_type.h
    │   │   ├── logreg.cpp
    │   │   ├── logreg.h
    │   │   ├── main.cpp
    │   │   ├── model
    │   │       ├── model.cpp
    │   │       ├── model.h
    │   │       ├── ps_model.cpp
    │   │       └── ps_model.h
    │   │   ├── objective
    │   │       ├── ftrl_objective.h
    │   │       ├── objective.cpp
    │   │       ├── objective.h
    │   │       ├── sigmoid_objective.h
    │   │       └── softmax_objective.h
    │   │   ├── reader.cpp
    │   │   ├── reader.h
    │   │   ├── regular
    │   │       ├── l1_regular.h
    │   │       ├── l2_regular.h
    │   │       ├── regular.cpp
    │   │       └── regular.h
    │   │   ├── updater
    │   │       ├── ftrl_updater.h
    │   │       ├── sgd_updater.h
    │   │       ├── updater.cpp
    │   │       └── updater.h
    │   │   └── util
    │   │       ├── common.h
    │   │       ├── ftrl_sparse_table.h
    │   │       ├── hopscotch_hash.h
    │   │       ├── log.cpp
    │   │       ├── log.h
    │   │       ├── sparse_table.h
    │   │       └── timer.h
    └── WordEmbedding
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── WordEmbedding.vcxproj
    │   ├── WordEmbedding.vcxproj.filters
    │   ├── example
    │       ├── Readme.txt
    │       ├── imges
    │       │   ├── Analogical Reasoning google vs dmtk.png
    │       │   └── WS 353 google vs dmtk.png
    │       └── run.bat
    │   ├── preprocess
    │       ├── Readme.txt
    │       ├── stopwords_simple.txt
    │       ├── util.cpp
    │       ├── util.h
    │       └── word_count.cpp
    │   └── src
    │       ├── block_queue.cpp
    │       ├── block_queue.h
    │       ├── communicator.cpp
    │       ├── communicator.h
    │       ├── constant.h
    │       ├── data_block.cpp
    │       ├── data_block.h
    │       ├── dictionary.cpp
    │       ├── dictionary.h
    │       ├── distributed_wordembedding.cpp
    │       ├── distributed_wordembedding.h
    │       ├── huffman_encoder.cpp
    │       ├── huffman_encoder.h
    │       ├── main.cpp
    │       ├── memory_manager.cpp
    │       ├── memory_manager.h
    │       ├── reader.cpp
    │       ├── reader.h
    │       ├── trainer.cpp
    │       ├── trainer.h
    │       ├── util.cpp
    │       ├── util.h
    │       ├── wordembedding.cpp
    │       └── wordembedding.h
├── CMakeLists.txt
├── LICENSE.md
├── Multiverso.sln
├── README.md
├── Test
    ├── CMakeLists.txt
    ├── Test.vcxproj
    ├── Test.vcxproj.filters
    ├── common.h
    ├── main.cpp
    ├── test_allreduce.cpp
    ├── test_array_table.cpp
    ├── test_kv_table.cpp
    ├── test_matrix_perf.cpp
    ├── test_matrix_table.cpp
    ├── test_net.cpp
    └── unittests
    │   ├── CMakeLists.txt
    │   ├── MultiversoTests.vcxproj
    │   ├── MultiversoTests.vcxproj.filters
    │   ├── multiverso_env.h
    │   ├── test_array.cpp
    │   ├── test_blob.cpp
    │   ├── test_kv.cpp
    │   ├── test_message.cpp
    │   ├── test_multiverso.cpp
    │   ├── test_node.cpp
    │   └── test_sync.cpp
├── binding
    ├── C#
    │   ├── MultiversoCLR
    │   │   ├── AssemblyInfo.cpp
    │   │   ├── MatrixTable.h
    │   │   ├── MultiversoCLR.cpp
    │   │   ├── MultiversoCLR.h
    │   │   ├── MultiversoCLR.vcxproj
    │   │   ├── MultiversoCLR.vcxproj.filters
    │   │   ├── ReadMe.txt
    │   │   └── multiverso.snk
    │   └── NuGet
    │   │   ├── GenerateNugetPackage.ps1
    │   │   └── MultiversoCLR.nuspec
    ├── lua
    │   ├── .gitignore
    │   ├── ArrayTableHandler.lua
    │   ├── CMakeLists.txt
    │   ├── Makefile
    │   ├── MatrixTableHandler.lua
    │   ├── README.md
    │   ├── demos
    │   │   └── xor
    │   │   │   ├── Makefile
    │   │   │   ├── README.md
    │   │   │   ├── xor-multiverso.lua
    │   │   │   └── xor.lua
    │   ├── docs
    │   │   ├── API.md
    │   │   ├── BENCHMARK.md
    │   │   ├── TUTORIAL.md
    │   │   └── imgs
    │   │   │   ├── top1error_vs_epoch.png
    │   │   │   ├── top1error_vs_runningtime.png
    │   │   │   ├── top5error_vs_epoch.png
    │   │   │   └── top5error_vs_runningtime.png
    │   ├── init.lua
    │   ├── multiverso-scm-1.rockspec
    │   ├── test.lua
    │   └── util.lua
    └── python
    │   ├── README.md
    │   ├── docs
    │       ├── BENCHMARK.md
    │       ├── TUTORIAL.md
    │       └── imgs
    │       │   ├── accuracy_epoch.png
    │       │   └── accuracy_time.png
    │   ├── examples
    │       ├── __init__.py
    │       └── theano
    │       │   ├── __init__.py
    │       │   ├── cnn.py
    │       │   ├── keras
    │       │       ├── README.md
    │       │       └── addition_rnn_mv.py
    │       │   ├── lasagne
    │       │       ├── Deep_Residual_Learning_CIFAR-10.py
    │       │       ├── Makefile
    │       │       └── __init__.py
    │       │   ├── load_data.py
    │       │   └── logistic_regression.py
    │   ├── multiverso
    │       ├── __init__.py
    │       ├── api.py
    │       ├── tables.py
    │       ├── tests
    │       │   └── test_multiverso.py
    │       ├── theano_ext
    │       │   ├── __init__.py
    │       │   ├── keras_ext
    │       │   │   ├── __init__.py
    │       │   │   ├── callbacks.py
    │       │   │   └── param_manager.py
    │       │   ├── lasagne_ext
    │       │   │   ├── __init__.py
    │       │   │   └── param_manager.py
    │       │   ├── param_manager.py
    │       │   └── sharedvar.py
    │       └── utils.py
    │   └── setup.py
├── cmake_uninstall.cmake.in
├── deploy
    └── docker
    │   └── Dockerfile
├── include
    └── multiverso
    │   ├── actor.h
    │   ├── blob.h
    │   ├── c_api.h
    │   ├── communicator.h
    │   ├── controller.h
    │   ├── dashboard.h
    │   ├── io
    │       ├── hdfs_stream.h
    │       ├── io.h
    │       └── local_stream.h
    │   ├── message.h
    │   ├── multiverso.h
    │   ├── net.h
    │   ├── net
    │       ├── allreduce_engine.h
    │       ├── mpi_net.h
    │       └── zmq_net.h
    │   ├── node.h
    │   ├── server.h
    │   ├── table
    │       ├── array_table.h
    │       ├── kv_table.h
    │       ├── matrix.h
    │       ├── matrix_table.h
    │       └── sparse_matrix_table.h
    │   ├── table_factory.h
    │   ├── table_interface.h
    │   ├── updater
    │       ├── adagrad_updater.h
    │       ├── momentum_updater.h
    │       ├── sgd_updater.h
    │       └── updater.h
    │   ├── util
    │       ├── allocator.h
    │       ├── async_buffer.h
    │       ├── configure.h
    │       ├── log.h
    │       ├── mt_queue.h
    │       ├── net_util.h
    │       ├── quantization_util.h
    │       ├── timer.h
    │       └── waiter.h
    │   ├── worker.h
    │   └── zoo.h
└── src
    ├── .gitignore
    ├── CMakeLists.txt
    ├── Multiverso.vcxproj
    ├── Multiverso.vcxproj.filters
    ├── Multiverso_zmq.vcxproj
    ├── actor.cpp
    ├── blob.cpp
    ├── build_dll.bat
    ├── c_api.cpp
    ├── communicator.cpp
    ├── controller.cpp
    ├── dashboard.cpp
    ├── io
        ├── hdfs_stream.cpp
        ├── io.cpp
        └── local_stream.cpp
    ├── multiverso.cpp
    ├── net.cpp
    ├── net
        ├── allreduce_engine.cpp
        ├── allreduce_topo.cpp
        └── mpi_net.cpp
    ├── node.cpp
    ├── server.cpp
    ├── table.cpp
    ├── table
        ├── array_table.cpp
        ├── matrix.cpp
        ├── matrix_table.cpp
        └── sparse_matrix_table.cpp
    ├── table_factory.cpp
    ├── timer.cpp
    ├── updater
        └── updater.cpp
    ├── util
        ├── allocator.cpp
        ├── configure.cpp
        ├── log.cpp
        └── net_util.cpp
    ├── worker.cpp
    └── zoo.cpp


/.gitattributes:
--------------------------------------------------------------------------------
 1 | .gitattributes text
 2 | .gitignore text
 3 | .gitmodules text
 4 | 
 5 | 
 6 | *.md text
 7 | *.txt text
 8 | *.TXT text
 9 | *.yml text
10 | *.yml.bak text
11 | *.config text
12 | 
13 | Makefile text
14 | CMakeLists.txt
15 | *.cmake.in text
16 | *.sln text
17 | *.pyproj text
18 | *.vcxproj text
19 | *.vcxproj.filters text
20 | *.vssettings text
21 | *.csproj text
22 | *.props text
23 | *.asax text
24 | *.nuspec text
25 | *.rockspec text
26 | 
27 | *.h text
28 | *.cpp text
29 | *.cc text
30 | *.cu text
31 | *.cuh text
32 | *.proto text
33 | *.sh text
34 | *.bat text
35 | *.cmd text
36 | *.py text
37 | *.ipynb text
38 | *.pl text
39 | *.ps1 text
40 | *.ps text
41 | *.i text
42 | *.lua text
43 | 
44 | Dockerfile* text
45 | 
46 | # Binary extensions:
47 | *.ark binary
48 | *.chunk binary
49 | *.cmf binary
50 | *.docx binary
51 | *.jpg binary
52 | *.pdf binary
53 | *.png binary
54 | *.pptx binary
55 | *.snk binary
56 | *.vsdm binary
57 | *.zip binary


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "include/multiverso/updater/dcasgd"]
2 | 	path = include/multiverso/updater/dcasgd
3 | 	url = https://github.com/Microsoft/Delayed-Compensation-Asynchronous-Stochastic-Gradient-Descent-for-Multiverso.git
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: cpp
 2 | sudo: required
 3 | dist: trusty
 4 | 
 5 | # solving MPI conflict https://docs.travis-ci.com/user/languages/cpp#OpenMP-projects
 6 | before_install:
 7 | - test -n $CC  && unset CC
 8 | - test -n $CXX && unset CXX
 9 | 
10 | install:
11 | - sudo apt-get install -y libopenmpi-dev openmpi-bin build-essential
12 | 
13 | # for boost unit test
14 | - sudo apt-get install -y libboost-test-dev
15 | 
16 | # for testing python binding
17 | - sudo apt-get install -y gfortran libblas-dev liblapack-dev libatlas-base-dev 
18 | - sudo apt-get install -y cmake python-numpy python-scipy python-nose
19 | 
20 | # for testing lua binding
21 | #- curl -sk https://raw.githubusercontent.com/torch/ezinstall/master/install-deps | bash -e
22 | #- git clone https://github.com/torch/distro.git ~/torch --recursive
23 | #- cd ~/torch; ./install.sh -b
24 | #- source ~/.bashrc
25 | 
26 | before_script:
27 | - cd $TRAVIS_BUILD_DIR
28 | - mkdir build && cd build && cmake ..
29 | 
30 | script:
31 | - make && sudo make install
32 | # run cpp tests
33 | # - mpirun -np 4 ./Test/multiverso.test kv
34 | # - mpirun -np 4 ./Test/multiverso.test array
35 | # - mpirun -np 4 ./Test/multiverso.test allreduce
36 | 
37 | # - ./Test/unittests/multiverso.ut --log_level=test_suite
38 | 
39 | # lua tests
40 | #- cd ../binding/lua/
41 | #- make install
42 | # - make test
43 | 
44 | # python tests
45 | # - cd ../binding/python/
46 | # - sudo python setup.py install
47 | # - sudo nosetests  # sudo is needed when testing python on travis
48 | 
49 | notifications:
50 |   email: false
51 | 
52 | matrix:
53 |   include:
54 |     - compiler: gcc
55 |     - compiler: clang
56 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/.gitignore:
--------------------------------------------------------------------------------
 1 | # User-specific files
 2 | *.suo
 3 | *.user
 4 | *.userosscache
 5 | *.sln.docstates
 6 | 
 7 | # User-specific files (MonoDevelop/Xamarin Studio)
 8 | *.userprefs
 9 | 
10 | # Build results
11 | [Dd]ebug/
12 | [Dd]ebugPublic/
13 | [Rr]elease/
14 | [Rr]eleases/
15 | x64/
16 | x86/
17 | bld/
18 | [Bb]in/
19 | [Oo]bj/
20 | [Ll]og/
21 | [Bb]uild/
22 | 
23 | # Visual C++ cache files
24 | ipch/
25 | *.aps
26 | *.ncb
27 | *.opendb
28 | *.opensdf
29 | *.sdf
30 | *.cachefile
31 | 
32 | # Visual Studio profiler
33 | *.psess
34 | *.vsp
35 | *.vspx
36 | *.sap# Compiled Object files
37 | *.slo
38 | *.lo
39 | *.o
40 | *.obj
41 | 
42 | # Precompiled Headers
43 | *.gch
44 | *.pch
45 | 
46 | # Compiled Dynamic libraries
47 | *.so
48 | *.dylib
49 | *.dll
50 | 
51 | # Fortran module files
52 | *.mod
53 | 
54 | # Compiled Static libraries
55 | *.lai
56 | *.la
57 | *.a
58 | *.lib
59 | 
60 | # Executables
61 | *.exe
62 | *.out
63 | *.app
64 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(MULTIVERSO_DIR ${PROJECT_SOURCE_DIR})
 2 | set(LR_DIR ${PROJECT_SOURCE_DIR}/Applications/LogisticRegression)
 3 | 
 4 | find_package(MPI REQUIRED)
 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11")
 6 | 
 7 | if(LOGLEVEL STREQUAL "DEBUG")
 8 |   add_definitions(-DLOGLEVEL_DEBUG)
 9 | elseif(LOGLEVEL STREQUAL "FATAL")
10 |   add_definitions(-DLOGLEVEL_FATAL)
11 | elseif(LOGLEVEL STREQUAL "ERROR")
12 |   add_definitions(-DLOGLEVEL_ERROR)
13 | else()
14 |   add_definitions(-DLOGLEVEL_INFO)
15 | endif()
16 | 
17 | set(MULTIVERSO_INC ${MULTIVERSO_DIR}/include)
18 | set(MULTIVERSO_LIB ${MULTIVERSO_DIR}/build/src)
19 | set(MULTIVERSO_SRC ${MULTIVERSO_DIR}/src)
20 | 
21 | include_directories(${MULTIVERSO_INC})
22 | include_directories(${LR_DIR}/src)
23 | 
24 | link_directories(${MULTIVERSO_LIB})
25 | 
26 | set(SRCDIR ${LR_DIR}/src)
27 | aux_source_directory(${LR_DIR}/src SRC_ROOT)
28 | aux_source_directory(${SRCDIR}/model SRC_MODEL)
29 | aux_source_directory(${SRCDIR}/objective SRC_OBJECTIVE)
30 | aux_source_directory(${SRCDIR}/regular SRC_REGULAR)
31 | aux_source_directory(${SRCDIR}/updater SRC_UPDATER)
32 | aux_source_directory(${SRCDIR}/util SRC_UTIL)
33 | set(SRC ${SRC_MODEL} ${SRC_OBJECTIVE} ${SRC_REGULAR} ${SRC_UPDATER} ${SRC_UTIL} ${MULTIVERSO_SRC}/table/array_table.cpp ${SRC_ROOT})
34 | 
35 | add_executable(LogisticRegression ${SRC})
36 | 
37 | target_link_libraries(LogisticRegression multiverso ${MPI_CXX_LIBRARIES} pthread)
38 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Logistic Regression
 3 | ======
 4 | The Logistic Regression tool is a parallel implementation of the logistic regression on top of multiverso. It is a easy-to-use tool for training model on big data with numbers of machines. 
 5 | 
 6 | We test the tool in a Bing Ads click prediction dataset in Microsoft. The dataset is about 4TB with more than 5 billions of samples. The experiment is running on a cluster with 24 machines. Each machine has 20 physical cores and 256 GB ram and machines are connected with InfiniBand. The training of one epoch can be finished in about 18 minutes.
 7 | 
 8 | 
 9 | For more details, please refer to [wiki](https://github.com/Microsoft/multiverso/wiki/Logistic-Regression).
10 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/example/README.md:
--------------------------------------------------------------------------------
1 | This is a simple example for running [MNIST](http://yann.lecun.com/exdb/mnist/) data set with a multiple classification task using no parameter server.
2 | 
3 | In Linux just run `sh run.sh`. This script will do build project, download data, convert data format and run the project.
4 | 
5 | In windows, you can build the project and download the data set, then run `python convert.py` to convert the data format and use the `mnist.config` as the command argument to start an program instance.
6 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/example/convert.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import struct
 3 | 
 4 | def convert(info):
 5 | 	in_image, in_label, out_file, line_count = info
 6 | 	
 7 | 	binfile = open(in_label , 'rb')
 8 | 	labels = binfile.read()
 9 | 	binfile.close()
10 | 
11 | 	binfile = open(in_image, 'rb')
12 | 	images = binfile.read()
13 | 	binfile.close()
14 |  
15 | 	label_index = 0
16 | 	image_index = 0;
17 | 	magic, numImages , numRows , numColumns = struct.unpack_from('>IIII' , images , image_index)
18 | 	image_index += struct.calcsize('>IIII')
19 | 	magic, num = struct.unpack_from('>II', labels, label_index)
20 | 	label_index += struct.calcsize('>II')
21 | 
22 | 	output = open(out_file,'w')
23 | 	for i in range(line_count):
24 | 		label = struct.unpack_from('>B', labels, label_index)
25 | 		label = int(label[0])
26 | 		output.write(str(label));
27 | 
28 | 		im = struct.unpack_from('>784B' ,images, image_index)
29 | 		im = np.array(im)
30 | 		for j in range(784): 
31 | 			output.write(' ' + str(im[j]))
32 | 
33 | 		output.write('\n')
34 | 		image_index += struct.calcsize('>784B')
35 | 		label_index += struct.calcsize('>B')
36 | 	output.close()
37 | 
38 | a={'train':('train-images-idx3-ubyte','train-labels-idx1-ubyte','train.data', 60000),'test':('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte', 'test.data', 10000)}		
39 | import sys
40 | convert(a[sys.argv[1]])
41 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/example/mnist.config:
--------------------------------------------------------------------------------
 1 | input_size=784
 2 | output_size=10
 3 | objective_type=softmax
 4 | regular_type=L2
 5 | updater_type=sgd
 6 | train_epoch=9
 7 | sparse=false
 8 | use_ps=false
 9 | minibatch_size=20
10 | train_file=train.data
11 | test_file=test.data
12 | output_file=test.out
13 | learning_rate_coef=7e6
14 | regular_coef=0.0007
15 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/example/run.sh:
--------------------------------------------------------------------------------
 1 | cd ../../../
 2 | mkdir build
 3 | cd build
 4 | cmake .. && make
 5 | 
 6 | cd ../Applications/LogisticRegression/example/
 7 | 
 8 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz && gunzip train-images-idx3-ubyte.gz &
 9 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz && gunzip train-labels-idx1-ubyte.gz &
10 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz && gunzip t10k-images-idx3-ubyte.gz &
11 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz && gunzip t10k-labels-idx1-ubyte.gz &
12 | wait
13 | 
14 | python convert.py train && rm train-images-idx3-ubyte -f && rm train-labels-idx1-ubyte -f &
15 | python convert.py test && rm t10k-images-idx3-ubyte -f && rm t10k-labels-idx1-ubyte -f &
16 | wait
17 | 
18 | ../../../build/Applications/LogisticRegression/LogisticRegression mnist.config
19 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/logreg.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_LOGREG_H_
 2 | #define LOGREG_LOGREG_H_
 3 | 
 4 | #include <string>
 5 | 
 6 | #include "data_type.h"
 7 | #include "model/model.h"
 8 | #include "configure.h"
 9 | 
10 | namespace logreg {
11 | 
12 | // only support EleType = int/float/double
13 | template <typename EleType>
14 | class LogReg {
15 | public:
16 |   // \param config_file each line as: key=value
17 |   explicit LogReg(const std::string &config_file);
18 |   ~LogReg();
19 |   
20 |   void Train(const std::string& train_file);
21 |   // config file should provide
22 |   //  train file
23 |   void Train();
24 | 
25 |   // will save output in result if result != nullptr
26 |   // return test error
27 |   double Test(const std::string& test_file, EleType**result = nullptr);
28 |   // config file should provide 
29 |   //  test file
30 |   double Test(EleType**result = nullptr);
31 |   // When model is too large, the program may crash...
32 |   void SaveModel();
33 |   void SaveModel(const std::string& model_file);
34 | 
35 |   // return the data block of model data
36 |   DataBlock<EleType>* model() const;
37 | 
38 | private:
39 |   Model<EleType> *model_;
40 |   Configure* config_;
41 | };
42 | 
43 | }  // namespace logreg
44 | 
45 | #endif  // LOGREG_LOGREG_H_
46 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "logreg.h"
 2 | 
 3 | #include <string>
 4 | #include <iostream>
 5 | using namespace logreg;
 6 | 
 7 | int main(int argc, char* argv[]) {
 8 |   LogReg<float> lr(argv[1]);
 9 |     
10 |   lr.Train();
11 |   
12 |   return 0;
13 | }


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/model/model.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_MODEL_H_
 2 | #define LOGREG_MODEL_H_
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | 
 7 | #include "data_type.h"
 8 | #include "configure.h"
 9 | #include "updater/updater.h"
10 | #include "objective/objective.h"
11 | #include "regular/regular.h"
12 | 
13 | #include "util/timer.h"
14 | #include "multiverso/util/mt_queue.h"
15 | 
16 | namespace logreg {
17 | 
18 | // class for model data management
19 | // local model
20 | template<typename EleType>
21 | class Model {
22 | public:
23 |   // initiate with config data
24 |   // \param config should provide:
25 |   //  objective type
26 |   //  updater type
27 |   //  input size
28 |   //  output size
29 |   explicit Model(Configure& config);
30 |   virtual ~Model();
31 |   // update model with #count samples
32 |   // \return sum of train loss of every sample
33 |   virtual float Update(int count, Sample<EleType>**samples);
34 |   // \param input one input
35 |   // \return correct number
36 |   virtual int Predict(int count, Sample<EleType>**samples, EleType**predicts);
37 |   // load model data from a binary file
38 |   virtual void Load(const std::string& model_file);
39 |   // write model data in binary method
40 |   virtual void Store(const std::string& model_file);
41 |   virtual void SetKeys(multiverso::MtQueue<SparseBlock<bool>*> *keys) {}
42 |   virtual void DisplayTime();
43 |   DataBlock<EleType>* table() const { return table_; }
44 |   // factory method to get a new instance
45 |   // \param config should contain model needed configs
46 |   //    when use_ps=true, return a distributed model
47 |   //    default use a local version
48 |   static Model<EleType>* Get(Configure& config);
49 | 
50 | protected:
51 |   // compute update delta
52 |   virtual float GetGradient(Sample<EleType>* sample, DataBlock<EleType>* delta);
53 |   // update table
54 |   virtual void UpdateTable(DataBlock<EleType>* delta);
55 | 
56 | protected: 
57 |   bool ftrl_;
58 | 
59 |   Objective<EleType>* objective_;
60 |   Updater<EleType>* updater_;
61 |   // local cache
62 |   DataBlock<EleType>* table_;
63 | 
64 |   int num_row_;
65 | 
66 |   int minibatch_size_;
67 | 
68 |   DataBlock<EleType>* delta_;
69 | 
70 |   Timer timer_;
71 |   double computation_time_;
72 |   double compute_count_;
73 | };
74 | 
75 | }  // namespace logreg
76 | 
77 | #endif  // LOGREG_MODEL_H_
78 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/model/ps_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_MODEL_PS_MODEL_H_
 2 | #define LOGREG_MODEL_PS_MODEL_H_
 3 | 
 4 | #include <string>
 5 | #include <queue>
 6 | 
 7 | #include "model.h"
 8 | 
 9 | #include "multiverso/multiverso.h"
10 | #include "multiverso/table_interface.h"
11 | #include "multiverso/table/array_table.h"
12 | #include "multiverso/util/async_buffer.h"
13 | 
14 | #include "util/timer.h"
15 | 
16 | namespace logreg {
17 | 
18 | template <typename EleType>
19 | class PSModel : public Model<EleType> {
20 | public:
21 |   explicit PSModel(Configure& config);
22 |   ~PSModel();
23 |   int Predict(int count, Sample<EleType>**samples, EleType**predicts);
24 |   void Load(const std::string& model_file);
25 |   void Store(const std::string& model_file);
26 |   void SetKeys(multiverso::MtQueue<SparseBlock<bool>*> *keys);
27 |   void DisplayTime();
28 | 
29 | private:
30 |   // use multiverso table add interface
31 |   void UpdateTable(DataBlock<EleType>* delta);
32 |   void PullModel();
33 |   // sync table if needed
34 |   void DoesNeedSync();
35 |   void PullWholeModel();
36 |   void GetPipelineTable();
37 | 
38 | private:
39 |   // multiverso table
40 |   multiverso::WorkerTable* worker_table_;
41 |   // for pipeline sync
42 |   DataBlock<EleType>* buffer_[2]; 
43 |   int wait_id_;
44 |   int buffer_index_;
45 |   // works when not pipeline
46 |   int count_sample_;
47 |   int sync_frequency_;
48 | 
49 |   multiverso::MtQueue<SparseBlock<bool>*> *keys_;
50 | 
51 |   Timer network_timer_;
52 |   double push_time_;
53 |   double pull_time_;
54 |   size_t pull_count_;
55 |   size_t push_count_;
56 | };
57 | 
58 | }  // namespace logreg
59 | 
60 | #endif  // LOGREG_MODEL_PS_MODEL_H_
61 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/objective/ftrl_objective.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_OBJECTIVE_FTRL_OBJECTIVE_H_
 2 | #define LOGREG_OBJECTIVE_FTRL_OBJECTIVE_H_
 3 | 
 4 | #include "objective.h"
 5 | 
 6 | namespace logreg {
 7 | 
 8 | template <typename EleType>
 9 | class FTRLObjective : public Objective<EleType> {
10 | public:
11 |   explicit FTRLObjective(const Configure& config);
12 | 
13 |   ~FTRLObjective();
14 | 
15 |   float Gradient(Sample<EleType>* sample,
16 |     DataBlock<EleType>* model,
17 |     DataBlock<EleType>* gradient);
18 | 
19 |   float Predict(Sample<EleType>*sample,
20 |     DataBlock<EleType>* model, EleType* predict);
21 | 
22 | private:
23 |   float Predict(Sample<EleType>*sample,
24 |     DataBlock<EleType>* model, EleType* predict, DataBlock<EleType>* w);
25 |   EleType sgn(const EleType x);
26 |   
27 | private:
28 |   Objective<EleType> *objective_;
29 | 
30 |   double lambda1_;
31 |   double lambda2_;
32 |   double alpha_;
33 |   double beta_;
34 | };
35 | 
36 | }  // namespace logreg 
37 | 
38 | #endif  // LOGREG_OBJECTIVE_FTRL_OBJECTIVE_H_
39 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/objective/objective.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_OBJECTIVE_OBJECTIVE_H_
 2 | #define LOGREG_OBJECTIVE_OBJECTIVE_H_
 3 | 
 4 | #include <string>
 5 | 
 6 | #include "data_type.h"
 7 | #include "configure.h"
 8 | #include "regular/regular.h"
 9 | 
10 | namespace logreg {
11 | 
12 | // provide methods for predict and calculate gradient 
13 | template<typename EleType>
14 | class Objective {
15 | public:
16 |   // \param config should provide:
17 |   //  input size
18 |   //  output size
19 |   //  regular type
20 |   explicit Objective(const Configure& config);
21 |   virtual ~Objective();
22 |   // return train loss
23 |   virtual float Gradient(Sample<EleType>* sample,
24 |     DataBlock<EleType>* model,
25 |     DataBlock<EleType>* gradient);
26 |   // return test loss
27 |   virtual float Predict(Sample<EleType>*sample,
28 |     DataBlock<EleType>* model, EleType* predict);
29 | 
30 |   virtual bool Correct(const int label, EleType*predict);
31 |   
32 |   // factory method to get a new instance
33 |   // \param config should contain objective type
34 |   //  and params for Objective initialization
35 |   static Objective<EleType>* Get(const Configure& config);
36 | 
37 | protected:
38 |   // diff -= (label == i)
39 |   virtual void Diff(int label, EleType*diff);
40 |   virtual void AddRegularization(Sample<EleType>*sample,
41 |     DataBlock<EleType>* model,
42 |     EleType* loss,
43 |     DataBlock<EleType>* gradient);
44 |   virtual float Loss(Sample<EleType>*sample, EleType* predict);
45 | 
46 | protected:
47 |   Regular<EleType> *regular_;
48 | 
49 |   size_t input_size_;
50 |   int output_size_;
51 | };
52 | 
53 | }  // namespace logreg
54 | 
55 | #endif  // LOGREG_OBJECTIVE_OBJECTIVE_H_
56 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/objective/sigmoid_objective.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_OBJECTIVE_SIGMOID_OBJECTIVE_H_
 2 | #define LOGREG_OBJECTIVE_SIGMOID_OBJECTIVE_H_
 3 | 
 4 | #include "objective.h"
 5 | 
 6 | namespace logreg {
 7 | 
 8 | template<typename EleType>
 9 | class SigmoidObjective : public Objective<EleType> {
10 | public:
11 |   explicit SigmoidObjective(const Configure& config);
12 | 
13 |   float Gradient(Sample<EleType>* sample,
14 |     DataBlock<EleType>* model,
15 |     DataBlock<EleType>* gradient);
16 | 
17 |   float Predict(Sample<EleType>*sample,
18 |     DataBlock<EleType>* model, EleType* predict);
19 | 
20 | private:
21 |   float Sigmoid(Sample<EleType>* sample,
22 |     DataBlock<EleType>*model);
23 |   float Loss(Sample<EleType>*sample, EleType* predict);
24 | };
25 | 
26 | }  // namespace logreg
27 | 
28 | #endif  // LOGREG_OBJECTIVE_SIGMOID_OBJECTIVE_H_
29 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/objective/softmax_objective.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_OBJECTIVE_SOFTMAX_OBJECTIVE_H_
 2 | #define LOGREG_OBJECTIVE_SOFTMAX_OBJECTIVE_H_
 3 | 
 4 | #include "objective.h"
 5 | 
 6 | namespace logreg {
 7 | 
 8 | template <typename EleType>
 9 | class SoftmaxObjective : public Objective<EleType> {
10 | public:
11 |   explicit SoftmaxObjective(const Configure& config);
12 | 
13 |   virtual float Predict(Sample<EleType>*sample,
14 |     DataBlock<EleType>* model, EleType* predict);
15 | 
16 | protected:
17 |   float Sigmoid(Sample<EleType>* sample,
18 |     DataBlock<EleType>*model, EleType*sigmoid);
19 |   float Loss(Sample<EleType>*sample, EleType* predict);
20 | };
21 | 
22 | }  // namespace logreg 
23 | 
24 | #endif  // LOGREG_OBJECTIVE_SOFTMAX_OBJECTIVE_H
25 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/regular/l1_regular.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_REGULAR_L1_REGULAR_H_
 2 | #define LOGREG_REGULAR_L1_REGULAR_H_
 3 | 
 4 | #include "regular.h"
 5 | 
 6 | namespace logreg {
 7 | 
 8 | template <typename EleType>
 9 | class L1Regular : public Regular<EleType> {
10 | public:
11 |   explicit L1Regular(const Configure& config);
12 |   virtual ~L1Regular() = default;
13 | 
14 |   EleType Calculate(
15 |     size_t key,
16 |     DataBlock<EleType>*model);
17 | };
18 | 
19 | }  // namespace logreg
20 | 
21 | #endif  // LOGREG_REGULAR_L1_REGULAR_H_
22 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/regular/l2_regular.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_REGULAR_L2_REGULAR_H_
 2 | #define LOGREG_REGULAR_L2_REGULAR_H_
 3 | 
 4 | #include "regular.h"
 5 | 
 6 | namespace logreg {
 7 | 
 8 | template <typename EleType>
 9 | class L2Regular : public Regular<EleType> {
10 | public:
11 |   explicit L2Regular(const Configure& config);
12 |   virtual ~L2Regular() = default;
13 | 
14 |   EleType Calculate(
15 |     size_t key,
16 |     DataBlock<EleType>*model);
17 | };
18 | 
19 | }  // namespace logreg
20 | 
21 | #endif  // LOGREG_REGULAR_L2_REGULAR_H_
22 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/regular/regular.cpp:
--------------------------------------------------------------------------------
 1 | #include "regular/regular.h"
 2 | 
 3 | #include <math.h>
 4 | 
 5 | #include "regular/l1_regular.h"
 6 | #include "regular/l2_regular.h"
 7 | 
 8 | #include "util/common.h"
 9 | #include "util/log.h"
10 | 
11 | namespace logreg {
12 | 
13 | template<typename EleType>
14 | Regular<EleType>::Regular(const Configure& config) {
15 |   this->input_size_ = config.input_size;
16 |   this->output_size_ = config.output_size;
17 |   this->regular_coef_ = config.regular_coef;
18 | }
19 | 
20 | template<typename EleType>
21 | EleType Regular<EleType>::Calculate(
22 |   size_t key,
23 |   DataBlock<EleType>*model) {
24 |   return 0;
25 | }
26 | 
27 | template<typename EleType>
28 | L1Regular<EleType>::L1Regular(const Configure& config) :
29 | Regular<EleType>(config) {
30 | }
31 | 
32 | template<typename EleType>
33 | EleType L1Regular<EleType>::Calculate(
34 |   size_t key,
35 |   DataBlock<EleType>*model) {
36 |   EleType* pval = model->Get(key);
37 |   // sgn(x) * regular_coef
38 |   return (pval == nullptr || *pval == 0) ? 0
39 |     : (EleType)(*pval > 0 ? this->regular_coef_ : -this->regular_coef_);
40 | }
41 | 
42 | DECLARE_TEMPLATE_CLASS_WITH_BASIC_TYPE(L1Regular);
43 | 
44 | template<typename EleType>
45 | L2Regular<EleType>::L2Regular(const Configure& config) :
46 | Regular<EleType>(config) {
47 | }
48 | 
49 | template<typename EleType>
50 | EleType L2Regular<EleType>::Calculate(
51 |   size_t key,
52 |   DataBlock<EleType>*model) {
53 |   EleType* pval = model->Get(key);
54 |   // abs(x) * regular_coef
55 |   return pval == nullptr ? 0 : (EleType)(abs(*pval) * this->regular_coef_);
56 | }
57 | 
58 | DECLARE_TEMPLATE_CLASS_WITH_BASIC_TYPE(L2Regular);
59 | 
60 | template<typename EleType>
61 | Regular<EleType>* Regular<EleType>::Get(const Configure& config) {
62 |   const std::string &type = config.regular_type;
63 |   Log::Write(Info, "Regular type %s\n", type.c_str());
64 |   if (type == "L1") {
65 |     return new L1Regular<EleType>(config);
66 |   } else if (type == "L2") {
67 |     return new L2Regular<EleType>(config);
68 |   }
69 |   return new Regular<EleType>(config);
70 | }
71 | 
72 | DECLARE_TEMPLATE_CLASS_WITH_BASIC_TYPE(Regular);
73 | }  // namespace logreg 
74 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/regular/regular.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_REGULAR_REGULAR_H_
 2 | #define LOGREG_REGULAR_REGULAR_H_
 3 | 
 4 | #include <string>
 5 | 
 6 | #include "data_type.h"
 7 | #include "configure.h"
 8 | 
 9 | namespace logreg {
10 |   
11 | // provide regularization term
12 | template <typename EleType>
13 | class Regular {
14 | public:
15 |   // \param config should provide:
16 |   //  input size
17 |   //  output size
18 |   explicit Regular(const Configure& config);
19 |   virtual ~Regular() = default;
20 |   // get regularization term
21 |   virtual EleType Calculate(
22 |     size_t key,
23 |     DataBlock<EleType>*model);
24 |   
25 |   // factory method to get a new instance 
26 |   // \param config should provide regular type
27 |   //  and needed params for Regular initialization
28 |   static Regular<EleType>* Get(const Configure& config);
29 | 
30 | protected:
31 |   size_t input_size_;
32 |   int output_size_;
33 | 
34 |   double regular_coef_;
35 | };
36 | 
37 | }  // namespace logreg
38 | 
39 | #endif  // LOGREG_REGULAR_REGULAR_H_
40 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/updater/ftrl_updater.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_UPDATER_FTRL_UPDATER_
 2 | #define LOGREG_UPDATER_FTRL_UPDATER_
 3 | 
 4 | #include "updater.h"
 5 | 
 6 | namespace logreg {
 7 | 
 8 | template <typename EleType>
 9 | class FTRLUpdater : public Updater<EleType> {
10 | public:
11 |   explicit FTRLUpdater(const Configure& config);
12 |   void Update(DataBlock<EleType>* data, DataBlock<EleType>* delta);
13 | };
14 | 
15 | }  // namespace logreg
16 | 
17 | #endif  // LOGREG_UPDATER_FTRL_UPDATER_
18 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/updater/sgd_updater.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_UPDATER_SGD_UPDATER_H_
 2 | #define LOGREG_UPDATER_SGD_UPDATER_H_
 3 | 
 4 | #include "updater.h"
 5 | 
 6 | namespace logreg {
 7 | 
 8 | template <typename EleType>
 9 | class SGDUpdater : public Updater<EleType> {
10 | public:
11 |   explicit SGDUpdater(const Configure& config);
12 |   void Process(DataBlock<EleType>* delta);
13 | 
14 | private:
15 |   double initial_learning_rate_;
16 |   double learning_rate_;
17 |   double learning_rate_coef_;
18 |   size_t update_count_;
19 |   int minibatch_size_;
20 | };
21 | 
22 | }  // namespace logreg
23 | 
24 | #endif  // LOGREG_UPDATER_SGD_UPDATER_H_
25 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/updater/updater.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_UPDATER_UPDATER_H_
 2 | #define LOGREG_UPDATER_UPDATER_H_
 3 | 
 4 | #include <string>
 5 | 
 6 | #include "data_type.h"
 7 | #include "configure.h"
 8 | 
 9 | namespace logreg {
10 | 
11 | template <typename EleType>
12 | class Updater {
13 | public:
14 |   virtual ~Updater() = default;
15 | 
16 |   virtual void Update(DataBlock<EleType>* data, 
17 |     DataBlock<EleType>* delta);
18 | 
19 |   virtual void Process(DataBlock<EleType>* delta) {}
20 | 
21 |   // factory method to get a new instance
22 |   // \param config should provide updater type and 
23 |   //  params for updater initiate
24 |   static Updater<EleType>* Get(const Configure& config);
25 | 
26 | protected:
27 |   int row_size_;
28 |   int num_row_;
29 | };
30 | 
31 | }  // namespace logreg
32 | 
33 | #endif  // LOGREG_UPDATER_UPDATER_H_
34 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/util/common.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_UTIL_HELPER_H_
 2 | #define LOGREG_UTIL_HELPER_H_
 3 | 
 4 | #include "data_type.h"
 5 | 
 6 | namespace logreg {
 7 | 
 8 | template <typename EleType>
 9 | EleType** CreateMatrix(int num_row, int num_col) {
10 |   EleType **matrix = new EleType*[num_row];
11 |   for (int i = 0; i < num_row; ++i)
12 |     matrix[i] = new EleType[num_col];
13 |   return matrix;
14 | }
15 | 
16 | template <typename EleType>
17 | void FreeMatrix(int num_row, EleType**matrix) {
18 |   for (int i = 0; i < num_row; ++i)
19 |     delete[]matrix[i];
20 |   delete[]matrix;
21 | }
22 | 
23 | template <typename EleType>
24 | EleType Dot(size_t offset, DataBlock<EleType>*matrix, Sample<EleType>*sample) {
25 |   EleType sum = 0;
26 |   int size = static_cast<int>(sample->values.size());
27 |   if (matrix->sparse()) {
28 |     DEBUG_CHECK(sample->keys.size() == sample->values.size());
29 |     for (int i = 0; i < size; ++i) {
30 |       EleType* pval = matrix->Get(sample->keys[i] + offset);
31 |       sum += (pval == nullptr ? 0 : (sample->values[i] * (*pval)));
32 |     }
33 |   } else {
34 |     EleType*rawa = static_cast<EleType*>(matrix->raw()) + offset;
35 |     EleType*rawb = sample->values.data();
36 |     for (int i = 0; i < size; ++i) {
37 |       sum += rawa[i] * rawb[i];
38 |     }
39 |   }
40 |   return sum;
41 | }
42 | 
43 | template <typename EleType>
44 | inline EleType* MatrixRow(EleType*matrix, int row_id, size_t num_col) {
45 |   return matrix + row_id * num_col;
46 | }
47 | 
48 | template <typename EleType>
49 | Sample<EleType>** CeateSamples(int num, size_t size, bool sparse) {
50 |   Sample<EleType>**samples = new Sample<EleType>*[num];
51 |   for (int i = 0; i < num; ++i) {
52 |     samples[i] = new Sample<EleType>(sparse, size);
53 |   }
54 |   return samples;
55 | }
56 | 
57 | template <typename EleType>
58 | void FreeSamples(int num, Sample<EleType>**samples) {
59 |   for (int i = 0; i < num; ++i) {
60 |     delete samples[i];
61 |   }
62 |   delete[]samples;
63 | }
64 | 
65 | #define DECLARE_TEMPLATE_CLASS_WITH_BASIC_TYPE(name)  \
66 |   template class name<int>;                           \
67 |   template class name<float>;                         \
68 |   template class name<double>;
69 | 
70 | }  // namespace logreg
71 | 
72 | #endif  // LOGREG_UTIL_HELPER_H_
73 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/util/ftrl_sparse_table.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_UTIL_FTRL_SPARSE_TABLE_H_
 2 | #define LOGREG_UTIL_FTRL_SPARSE_TABLE_H_
 3 | 
 4 | #include "util/sparse_table.h"
 5 | 
 6 | namespace logreg {
 7 | 
 8 | template<typename EleType>
 9 | struct FTRLTableOption;
10 | 
11 | template<typename EleType>
12 | class FTRLWorkerTable : public SparseWorkerTable<FTRLGradient<EleType>> {
13 | protected:
14 |   using Blob = multiverso::Blob;
15 | 
16 | public:
17 |   explicit FTRLWorkerTable(size_t size) :
18 |     SparseWorkerTable<FTRLGradient<EleType>>(size) {}
19 | 
20 |   explicit FTRLWorkerTable(const FTRLTableOption<EleType> &option) :
21 |     FTRLWorkerTable(option.size) {}
22 | 
23 |   int GetAsync(DataBlock<FTRLEntry<EleType>>* data) {
24 |     LR_CHECK(data != nullptr && data->sparse());
25 |     this->data_ = (DataBlock<FTRLGradient<EleType>>*)data;
26 |     size_t all_key = -1;
27 |     Blob whole_table(&all_key, sizeof(size_t));
28 |     return multiverso::WorkerTable::GetAsync(whole_table);
29 |   }
30 |   void Get(DataBlock<FTRLEntry<EleType>>* data) {
31 |     this->Wait(GetAsync(data));
32 |   }
33 |   int GetAsync(SparseBlock<bool>* keys, DataBlock<FTRLEntry<EleType>>* data) {
34 |     LR_CHECK(keys != nullptr && data != nullptr && data->sparse());
35 |     data->Clear();
36 |     this->data_ = (DataBlock<FTRLGradient<EleType>>*)data;
37 | 
38 |     size_t size = keys->size();
39 |     Blob key(size * sizeof(size_t));
40 |     size_t* pkey = reinterpret_cast<size_t*>(key.data());
41 | 
42 |     SparseBlockIter<bool> iter(keys);
43 |     while (iter.Next()) {
44 |       *(pkey++) = iter.Key();
45 |     }
46 | 
47 |     return multiverso::WorkerTable::GetAsync(key);
48 |   }
49 |   void Get(SparseBlock<bool>* keys, DataBlock<FTRLEntry<EleType>>* data) {
50 |     this->Wait(GetAsync(keys, data));
51 |   }
52 | 
53 |   void ProcessReplyGet(std::vector<multiverso::Blob>& reply_data) {
54 |     DEBUG_CHECK(reply_data.size() == 2 || reply_data.size() == 1);
55 |     DEBUG_CHECK(this->data_ != nullptr);
56 |     // no data
57 |     if (reply_data.size() == 1) {
58 |       return;
59 |     }
60 |     size_t *keys = reinterpret_cast<size_t*>(reply_data[0].data());
61 |     auto vals = reinterpret_cast<FTRLGradient<EleType>*>(reply_data[1].data());
62 |     size_t size = reply_data[0].size<size_t>();
63 |     auto data = (DataBlock<FTRLEntry<EleType>>*)this->data_;
64 |     for (size_t i = 0; i < size; ++i) {
65 |       data->Set(keys[i], FTRLEntry<EleType>(vals+i));
66 |     }
67 |   }
68 | };
69 | 
70 | template<typename EleType>
71 | class FTRLServerTable : public SparseServerTable<FTRLGradient<EleType>> {
72 | public:
73 |   explicit FTRLServerTable(size_t size) : 
74 |     SparseServerTable<FTRLGradient<EleType>>(size) { }
75 | 
76 |   explicit FTRLServerTable(const FTRLTableOption<EleType> &option) :
77 |     FTRLServerTable(option.size) {}
78 | };
79 | 
80 | template<typename EleType>
81 | struct FTRLTableOption {
82 |   explicit FTRLTableOption(size_t size) :
83 |   size(size) {}
84 |   size_t size;
85 |   DEFINE_TABLE_TYPE(EleType, FTRLWorkerTable, FTRLServerTable);
86 | };
87 | 
88 | }  // namespace logreg
89 | 
90 | #endif  // LOGREG_UTIL_FTRL_SPARSE_TABLE_H_
91 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/util/log.cpp:
--------------------------------------------------------------------------------
 1 | #include "util/log.h"
 2 | 
 3 | #include <time.h>
 4 | #include <stdio.h>
 5 | #include <cstdarg>
 6 | 
 7 | namespace logreg {
 8 | 
 9 | // default in Info level
10 | #ifdef LOGLEVEL_FATAL
11 |   LogLevel Log::log_level_ = LogLevel::Fatal;
12 | #elif LOGLEVEL_ERROR
13 |   LogLevel Log::log_level_ = LogLevel::Error;
14 | #elif LOGLEVEL_DEBUG
15 |   LogLevel Log::log_level_ = LogLevel::Debug;
16 | #else 
17 |   LogLevel Log::log_level_ = LogLevel::Info;
18 | #endif
19 | 
20 | void Log::Write(LogLevel level, const char *format, ...) {
21 |   if (static_cast<int>(log_level_) > static_cast<int>(level)) {
22 |     return;
23 |   }
24 |   std::string level_str;
25 | 
26 |   switch (level) {
27 |   case Debug:
28 |     level_str = "DEBUG";
29 |     break;
30 |   case Info:
31 |     level_str = "INFO";
32 |     break;
33 |   case Error:
34 |     level_str = "ERROR";
35 |     break;
36 |   case Fatal:
37 |       level_str = "FATAL";
38 |       break; 
39 |   default:
40 |     break;
41 |   }
42 |   va_list val;
43 |   va_start(val, format);
44 |   printf("[%s] [%s] ", level_str.c_str(), GetSystemTime().c_str());
45 |   vprintf(format, val);
46 |   fflush(stdout);
47 |   va_end(val);
48 | 
49 |   if (level == Fatal) {
50 |     exit(1);
51 |   }
52 | }
53 | 
54 | inline std::string Log::GetSystemTime() {
55 |   time_t t = time(0);
56 |   char str[64];
57 | #ifdef _MSC_VER
58 |   tm time;
59 |   localtime_s(&time, &t);
60 |   strftime(str, sizeof(str), "%Y-%m-%d %H:%M:%S", &time);
61 | #else
62 |   strftime(str, sizeof(str), "%Y-%m-%d %H:%M:%S", localtime(&t));
63 | #endif
64 |   return str;
65 | }
66 | 
67 | }  // namespace logreg
68 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/util/log.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_UTIL_LOG_H_
 2 | #define LOGREG_UTIL_LOG_H_
 3 | 
 4 | #include <string>
 5 | 
 6 | namespace logreg {
 7 | enum LogLevel : int {
 8 |   Debug = 0,
 9 |   Info = 1,
10 |   Error = 2,
11 |   Fatal = 3
12 | };
13 | class Log {
14 | public:
15 |   // print log to stdout
16 |   static void Write(LogLevel level, const char *format, ...);
17 |   static LogLevel& log_level() { return log_level_; }
18 | private:
19 |   static LogLevel log_level_;
20 |   static std::string GetSystemTime();
21 | };
22 | 
23 | #define LR_CHECK(condition)           \
24 | if (!(condition)) {                   \
25 |   Log::Write(Fatal, "Check failed: "  \
26 |     #condition " at %s, line %d .\n", \
27 |     __FILE__, __LINE__);              \
28 | }
29 | 
30 | #ifdef LOGLEVEL_DEBUG
31 | #define DEBUG_CHECK(condition)      \
32 |   LR_CHECK(condition)                
33 | #else
34 | #define DEBUG_CHECK(condition) 
35 | #endif
36 | 
37 | 
38 | }  // namespace logreg
39 | 
40 | #endif  // LOGREG_UTIL_LOG_H_
41 | 


--------------------------------------------------------------------------------
/Applications/LogisticRegression/src/util/timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGREG_UTIL_TIMER_H_
 2 | #define LOGREG_UTIL_TIMER_H_
 3 | 
 4 | #include <chrono>
 5 | 
 6 | namespace logreg {
 7 | 
 8 | class Timer {
 9 | public:
10 |   void Start() { 
11 |     start_ = Clock::now(); 
12 |   }
13 |   double ElapseMilliSeconds() {
14 |     Clock::time_point now = Clock::now();
15 |     return std::chrono::duration<double, std::milli>(now - start_).count();
16 |   }
17 |   double ElapseSeconds() { 
18 |     return ElapseMilliSeconds() / 1000.0; 
19 |   }
20 | 
21 | private:
22 |   using Clock = std::chrono::high_resolution_clock;
23 |   Clock::time_point start_;
24 | };
25 | 
26 | }  // namespace logreg 
27 | 
28 | #endif  // LOGREG_UTIL_TIMER_H_
29 | 


--------------------------------------------------------------------------------
/Applications/WordEmbedding/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | 
 3 | PROJECT(WORDEMBEDDING)
 4 | 
 5 | find_package(MPI REQUIRED)
 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11 -Wno-sign-compare -fno-omit-frame-pointer -fopenmp")
 7 | 
 8 | set(MULTIVERSO_INC ${MULTIVERSO_DIR}/include)
 9 | set(MULTIVERSO_LIB ${MULTIVERSO_DIR}/build/src)
10 | set(MULTIVERSO_SRC ${MULTIVERSO_DIR}/src)
11 | 
12 | include_directories(${MULTIVERSO_INC})
13 | include_directories(${PROJECT_SOURCE_DIR}/src)
14 | 
15 | link_directories(${MULTIVERSO_LIB})
16 | 
17 | set(SRCDIR ${PROJECT_SOURCE_DIR}/src)
18 | aux_source_directory(${PROJECT_SOURCE_DIR}/src SRC_ROOT)
19 | 
20 | set(SRC ${MULTIVERSO_SRC} ${SRC_ROOT})
21 | 
22 | add_executable(wordembedding ${SRC})
23 | 
24 | target_link_libraries(wordembedding multiverso ${MPI_CXX_LIBRARIES})
25 | 


--------------------------------------------------------------------------------
/Applications/WordEmbedding/README.md:
--------------------------------------------------------------------------------
 1 | Word Embedding
 2 | ==========
 3 | 
 4 | The DMTK Word Embedding is a parallelization of the Word2Vec algorithm on top of Multiverso. It provides an efficient "scaling to industry size" solution for word embedding.
 5 | 
 6 | For more details about parameters setting and performance, please view our [Wiki](https://github.com/Microsoft/multiverso/wiki/Word-Embedding).
 7 | 
 8 | ##Why DMTK Word Embedding?
 9 | 
10 | 1. **For traning a large dataset.**
11 | 
12 |    The DMTK parameter server stores the parameters in a distributed way, which means that each machine just holds a partition of the entire parameter set. This allows the entire embedding vector to be very large. For example, in experiment on the ClueWeb data, the vocabulary size is 21 Million, and the parameter size reaches 6 Billion, which is the largest word embedding model ever reported in the literature,as far as we know. 
13 | 
14 | 2. **For high quality word embedding.**
15 | 
16 |    You can view the performance of Distributed Word Embedding in [Wiki](https://github.com/Microsoft/multiverso/wiki/Word-Embedding).
17 | 
18 | 3. **For less traning time.**
19 | 
20 |    Large dataset need long traning time. You can accelerate process of training by use multi-machines.
21 | 
22 | ## Linux Installation
23 | 
24 | 1. cmake ./CMakeLists.txt
25 | 
26 | 2. make
27 | 
28 | ## Windows Installation
29 | 
30 | 1. Get and build the DMTK Framework [Multiverso](https://github.com/Microsoft/multiverso.git).
31 | 
32 | 2. Open Multiverso.sln, change configuration and platform to Release and x64 of WordEmbedding(default setting), set the ```include``` and ```lib``` path of multiverso in project property.
33 | 
34 | 3. Enable openmp 2.0 support.
35 | 
36 |    To set this **compiler** option in the Visual Studio development environment
37 |   
38 |    1)Open the **project's Property** Pages dialog box.
39 |   
40 |    2)Expand the **Configuration Properties** node.
41 |   
42 |    3)Expand the **C/C++** node.
43 |   
44 |    4)Select the **Language** property page.
45 |   
46 |    5)Modify the **OpenMP Support property** to "yes".
47 |    
48 | 4. Build the solution.
49 | 


--------------------------------------------------------------------------------
/Applications/WordEmbedding/WordEmbedding.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <ClInclude Include="src\block_queue.h" />
 5 |     <ClInclude Include="src\communicator.h" />
 6 |     <ClInclude Include="src\constant.h" />
 7 |     <ClInclude Include="src\data_block.h" />
 8 |     <ClInclude Include="src\dictionary.h" />
 9 |     <ClInclude Include="src\distributed_wordembedding.h" />
10 |     <ClInclude Include="src\huffman_encoder.h" />
11 |     <ClInclude Include="src\memory_manager.h" />
12 |     <ClInclude Include="src\reader.h" />
13 |     <ClInclude Include="src\trainer.h" />
14 |     <ClInclude Include="src\util.h" />
15 |     <ClInclude Include="src\wordembedding.h" />
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="src\communicator.cpp" />
19 |     <ClCompile Include="src\data_block.cpp" />
20 |     <ClCompile Include="src\dictionary.cpp" />
21 |     <ClCompile Include="src\distributed_wordembedding.cpp" />
22 |     <ClCompile Include="src\huffman_encoder.cpp" />
23 |     <ClCompile Include="src\main.cpp" />
24 |     <ClCompile Include="src\memory_manager.cpp" />
25 |     <ClCompile Include="src\reader.cpp" />
26 |     <ClCompile Include="src\trainer.cpp" />
27 |     <ClCompile Include="src\util.cpp" />
28 |     <ClCompile Include="src\block_queue.cpp" />
29 |     <ClCompile Include="src\wordembedding.cpp" />
30 |   </ItemGroup>
31 | </Project>


--------------------------------------------------------------------------------
/Applications/WordEmbedding/example/Readme.txt:
--------------------------------------------------------------------------------
 1 |  Usage:
 2 | -size: word embedding size, e.g. 300
 3 | -train_file: the training corpus file, e.g.enwik2014
 4 | -read_vocab: the file to read all the vocab counts info
 5 | -binary: 0 or 1,indicates whether to write all the embeddings vectors into binary format
 6 | -cbow: 0 or 1, default 1, whether to use cbow, otherwise skip-gram
 7 | -alpha: initial learning rate, usually set to 0.025
 8 | -output: the output file to store all the embedding vectors
 9 | -window: the window size
10 | -sample: the sub - sample size, usually set to 0
11 | -hs: 0 or 1, default 1, whether to use hierarchical softmax, otherwise negative-sampling
12 | -negative: the negative word count in negative sampling, please set it to 0 when - hs = 1
13 | -threads: the thread number to run in one machine
14 | -min_count: words with lower frequency than min_count is removed from dictionary
15 | -epoch: the epoch number
16 | -stopwords: 0 or 1, whether to avoid training stop words
17 | -sw_file: the stop words file storing all the stop words, valid when -stopwords = 1
18 | -use_adagrad: 0 or 1, whether to use adagrad to adjust learning rate
19 | -data_block_size: default 1MB, the maximum bytes which a data block will store
20 | -max_preload_data_size: default 8GB, the maximum data size(bytes) which multiverse_WordEmbedding will preload
21 | -is_pipeline: 0 or 1, whether to use pipeline
22 | -server_endpoint_file: default "", server ZMQ socket endpoint file in MPI - free version


--------------------------------------------------------------------------------
/Applications/WordEmbedding/example/imges/Analogical Reasoning google vs dmtk.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/Applications/WordEmbedding/example/imges/Analogical Reasoning google vs dmtk.png


--------------------------------------------------------------------------------
/Applications/WordEmbedding/example/imges/WS 353 google vs dmtk.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/Applications/WordEmbedding/example/imges/WS 353 google vs dmtk.png


--------------------------------------------------------------------------------
/Applications/WordEmbedding/example/run.bat:
--------------------------------------------------------------------------------
 1 | set size=300
 2 | set text=(train_file's name,e.g. enwiki2014)
 3 | set read_vocab=(vocab's Directory string,e.g. "C:\Users\Leif\dataset\enwiki2014_vocab.txt")
 4 | set train_file=(train_file's Directory string,e.g. "C:\Users\Leif\dataset\enwiki2014")
 5 | set binary=1
 6 | set cbow=1
 7 | set alpha=0.01
 8 | set epoch=20
 9 | set window=5
10 | set sample=0
11 | set hs=0
12 | set negative=5
13 | set threads=16
14 | set mincount=5
15 | set sw_file=stopwords_simple.txt
16 | set stopwords=0
17 | set data_block_size=1000000000
18 | set max_preload_data_size=20000000000
19 | set use_adagrad=0
20 | set is_pipeline=0
21 | set output=%text%_%size%.bin
22 | 
23 | distributed_word_embedding.exe -max_preload_data_size %max_preload_data_size% -is_pipeline %is_pipeline% -alpha %alpha% -data_block_size %data_block_size% -train_file %train_file% -output %output% -threads %threads% -size %size% -binary %binary% -cbow %cbow% -epoch %epoch% -negative %negative% -hs %hs% -sample %sample% -min_count %mincount% -window %window% -stopwords %stopwords% -sw_file %sw_file% -read_vocab %read_vocab% -use_adagrad %use_adagrad% 
24 | 
25 | 


--------------------------------------------------------------------------------
/Applications/WordEmbedding/preprocess/Readme.txt:
--------------------------------------------------------------------------------
1 | Distributed_word_embedding's input_file format instruction:
2 | 1.train_file is normal format,in which words are separated by space.
3 | 2.word_count.cpp is a word_frequency generator on the basis of train_file.
4 |   How to use in commandline: word_count.exe [-train_file <train_file>] [-save_vocab_file <vocab for saving>] [-min_count <number>]
5 | 3.stopwords_simple.txt is sw_file which is used to filter dictionary.        


--------------------------------------------------------------------------------
/Applications/WordEmbedding/preprocess/stopwords_simple.txt:
--------------------------------------------------------------------------------
 1 | a
 2 | an
 3 | and
 4 | are
 5 | as
 6 | at
 7 | be
 8 | but
 9 | by
10 | for
11 | if
12 | in
13 | into
14 | is
15 | it
16 | no
17 | not
18 | of
19 | on
20 | or
21 | s
22 | such
23 | t
24 | that
25 | the
26 | their
27 | then
28 | there
29 | these
30 | they
31 | this
32 | to
33 | was
34 | will
35 | with


--------------------------------------------------------------------------------
/Applications/WordEmbedding/preprocess/util.cpp:
--------------------------------------------------------------------------------
 1 | #include "util.h"
 2 | 
 3 | void Option::ParseArgs(int argc, char* argv[])
 4 | {
 5 |     for (int i = 1; i < argc; i += 2)
 6 |     {
 7 |         if (strcmp(argv[i], "-train_file") == 0) train_file = argv[i + 1];
 8 |         if (strcmp(argv[i], "-save_vocab_file") == 0) save_vocab_file = argv[i + 1];
 9 |         if (strcmp(argv[i], "-min_count") == 0) min_count = atoi(argv[i + 1]);
10 |     }
11 | }


--------------------------------------------------------------------------------
/Applications/WordEmbedding/preprocess/util.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | /*!
 3 | * \file util.h
 4 | * \brief Struct Option stores many input arguments 
 5 | */
 6 | 
 7 | #include <cstring>
 8 | #include <cstdlib>
 9 | #include <random>
10 | #include <cassert>
11 | #include <exception>
12 | 
13 | struct Option
14 | {
15 |     const char* train_file;
16 |     const char* save_vocab_file;
17 |     int min_count;
18 | 
19 |     void ParseArgs(int argc, char *argv[]);
20 | }; 


--------------------------------------------------------------------------------
/Applications/WordEmbedding/preprocess/word_count.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | * \file word_count.cpp
 3 | * \brief word_frequency generator on the basis of train_file
 4 | *  Usage:
 5 | *    [-train_file <train_file>] [-save_vocab <vocab for saving>] [-min_count <number>]
 6 | */
 7 | #define _CRT_SECURE_NO_WARNINGS
 8 | #include <map>   
 9 | #include <fstream>   
10 | #include <iostream>   
11 | #include <string>   
12 | 
13 | #include "util.h"
14 | 
15 | using namespace std;
16 | 
17 | void display_map(map<string, int> &wmap, FILE * file_,Option * option_)
18 | {
19 |     map<string, int>::const_iterator map_it;
20 |     for (map_it = wmap.begin(); map_it != wmap.end(); map_it++)
21 |     {
22 |         if (map_it->second >= option_->min_count) 
23 |         {
24 |             fprintf(file_, "%s   %d\n", (map_it->first).c_str(), map_it->second);
25 |         }
26 |     
27 |     }
28 | }
29 | 
30 | int main(int argc, char *argv[])
31 | {
32 |     Option *option_= new Option();
33 |     FILE * output_file;
34 |     option_->ParseArgs(argc, argv);
35 |     output_file = fopen(option_->save_vocab_file, "w");
36 |     ifstream ifs(option_->train_file);
37 |     string szTemp;
38 |     map<string, int> wmap;
39 | 
40 |     while (ifs >> szTemp)
41 |         wmap[szTemp]++;
42 | 
43 |     display_map(wmap,output_file,option_);
44 | 
45 |     return false;
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/block_queue.cpp:
--------------------------------------------------------------------------------
 1 | #include "block_queue.h"
 2 | 
 3 | namespace wordembedding {
 4 | 
 5 |   void BlockQueue::Push(DataBlock *data_block) {
 6 |     std::unique_lock<std::mutex> lock(mtx_);
 7 |     queues_.push(data_block);
 8 |     repo_not_empty_.notify_all();
 9 |     lock.unlock();
10 |   }
11 | 
12 |   DataBlock* BlockQueue::Pop() {
13 |     std::unique_lock<std::mutex> lock(mtx_);
14 |     // block queue is empty, just wait here.
15 |     while (queues_.size() == 0) {
16 |       (repo_not_empty_).wait(lock);
17 |     }
18 | 
19 |     DataBlock* temp = queues_.front();
20 |     queues_.pop();
21 |     lock.unlock();
22 |     return temp;
23 |   }
24 | 
25 |   int const BlockQueue::GetQueueSize() {
26 |     int size = -1;
27 |     //This operation is safe in here and more efficient.
28 |     //std::unique_lock<std::mutex> lock(mtx_);
29 |     size = queues_.size();
30 |     //lock.unlock();
31 |     return size;
32 |   }
33 | }


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/block_queue.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORDEMBEDDING_BLOCK_QUEUE_H_
 2 | #define WORDEMBEDDING_BLOCK_QUEUE_H_
 3 | 
 4 | #include <condition_variable>
 5 | #include <mutex>
 6 | #include <queue>
 7 | 
 8 | #include "data_block.h"
 9 | 
10 | namespace wordembedding {
11 | 
12 |   /*!
13 |   * \brief The block queue push and pop the block data. Load data thread push 
14 |   * datablock in it and training thread take datablock from it.
15 |   */
16 |   class BlockQueue {
17 |   public:
18 |     void Push(DataBlock *data_block);
19 |     DataBlock* Pop();
20 |     int const GetQueueSize();
21 | 
22 |   private:
23 |     std::queue <DataBlock *> queues_;
24 |     std::mutex mtx_;
25 |     std::condition_variable repo_not_empty_;
26 |   };
27 | }
28 | #endif
29 | 


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/communicator.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORDEMBEDDING_COMMUNICATOR_H_
 2 | #define WORDEMBEDDING_COMMUNICATOR_H_
 3 | 
 4 | #include <multiverso/table/matrix_table.h>
 5 | #include <multiverso/table/kv_table.h>
 6 | #include <multiverso/updater/updater.h>
 7 | 
 8 | #include "memory_manager.h"
 9 | #include "block_queue.h"
10 | 
11 | namespace wordembedding {
12 | 
13 |   class Communicator {
14 |   public:
15 |     Communicator(Option* option, MemoryManager* memory_mamanger);
16 |     ~Communicator();
17 | 
18 |     void RequestParameter(DataBlock *data_block);
19 |     void AddDeltaParameter(DataBlock *data_block);
20 | 
21 |     int64 const GetWordCount();
22 |     void AddWordCount(int64 word_count_num);
23 | 
24 |     void GetWorkerTableRows(std::vector<int> &row_nums,
25 |       std::vector<real*> &blocks, int embeding_size);
26 | 
27 |     void PrepareParameterTables(int row_size, int column_size);
28 | 
29 |   private:
30 |     Option* option_ = nullptr;
31 |     MemoryManager* memory_mamanger_ = nullptr;
32 |     int process_id_ = -1;
33 |     int process_count_ = -1;
34 | 
35 |     multiverso::MatrixWorkerTable<real>* worker_input_table_ = nullptr;
36 |     multiverso::MatrixWorkerTable<real>* worker_output_table_ = nullptr;
37 |     multiverso::MatrixServerTable<real>* server_input_table_ = nullptr;
38 |     multiverso::MatrixServerTable<real>* server_output_table_ = nullptr;
39 | 
40 |     multiverso::MatrixWorkerTable<real>* worker_input_gradient_table_ = nullptr;
41 |     multiverso::MatrixWorkerTable<real>* worker_output_gradient_table_ = nullptr;
42 |     multiverso::MatrixServerTable<real>* server_input_gradient_table_ = nullptr;
43 |     multiverso::MatrixServerTable<real>* server_output_gradient_table_ = nullptr;
44 | 
45 |     multiverso::KVWorkerTable<int, int64>* worker_wordcount_table_ = nullptr;
46 |     multiverso::KVServerTable<int, int64>* server_wordcount_table_ = nullptr;
47 | 
48 |     void ClearParameterTables();
49 | 
50 |     void GetRows(multiverso::MatrixWorkerTable<real>* table_, std::vector<int> &row_ids,
51 |       std::vector<real *> &ptrs, int size);
52 | 
53 |     void RequestParameterByTableId(DataBlock *data_block, int table_id,
54 |       std::vector<int> &nodes, std::vector<real*> &blocks);
55 | 
56 |     void SetDataBlockEmbedding(DataBlock *data_block, std::vector<real*> &blocks,
57 |       std::vector<int> &nodes, std::function<void(int, real*)> get_function);
58 | 
59 |     void AddRows(multiverso::MatrixWorkerTable<real>* table, std::vector<int> &row_ids,
60 |       std::vector<real *> &ptrs, int size);
61 | 
62 |     void AddParameterByTableId(DataBlock *data_block, int table_id,
63 |       std::vector<int> &nodes, std::vector<real*> &blocks,
64 |       std::vector<real*> &recycle_blocks);
65 | 
66 |     void GetDeltaLoop(DataBlock *data_block, std::vector<real*> &blocks,
67 |       std::vector<int> &nodes, std::vector<real*> &recycle_blocks,
68 |       std::function<real*(int)> get_function);
69 |   };
70 | }
71 | #endif


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/constant.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORDEMBEDDING_CONSTANT_H_
 2 | #define WORDEMBEDDING_CONSTANT_H_
 3 | 
 4 | /*!
 5 | * \file constant.h
 6 | * \brief The index of parameter tables and some constant.
 7 | */
 8 | #include <cstdint>
 9 | 
10 | namespace wordembedding {
11 |   typedef int64_t int64;
12 |   typedef uint64_t uint64;
13 |   typedef float real;
14 | 
15 |   //multiverso table id
16 |   const int kInputEmbeddingTableId = 0;
17 |   const int kEmbeddingOutputTableId = 1;
18 |   const int kSumGradient2IETableId = 2;
19 |   const int kSumGradient2EOTableId = 3;
20 |   const int kWordCountId = 4;
21 | 
22 |   const int kTableSize = (int)1e8;
23 | 
24 |   const int kMaxWordSize = 901;
25 |   const int kMaxCodeLength = 100;
26 |   const int kMaxString = 500;
27 |   const int kMaxSentenceLength = 1000;
28 |   const int kMaxExp = 6;
29 | 
30 |   const int kExpTableSize = 1000;
31 |   const int kSaveBatch = 100000;
32 | }
33 | #endif
34 | 


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/dictionary.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORDEMBEDDING_DICTIONARY_H_
 2 | #define WORDEMBEDDING_DICTIONARY_H_
 3 | /*!
 4 | * \brief Class dictionary stores the vocabulary and it's frequency
 5 | */
 6 | 
 7 | #include <unordered_map>
 8 | #include <unordered_set>
 9 | #include <vector>
10 | #include <iostream>
11 | #include <cstring>
12 | 
13 | #include <multiverso/util/log.h>
14 | 
15 | #include "constant.h"
16 | 
17 | namespace wordembedding {
18 |   /*!
19 |   * \brief struct WordInfo stores the pair of word&freq
20 |   */
21 |   struct WordInfo {
22 |     std::string word;
23 |     int64 freq;
24 |     WordInfo() {
25 |       freq = 0;
26 |       word.clear();
27 |     }
28 |     WordInfo(const std::string& _word, int64 _freq) {
29 |       word = _word;
30 |       freq = _freq;
31 |     }
32 |   };
33 | 
34 |   class Dictionary {
35 |   public:
36 |     Dictionary();
37 |     Dictionary(int i);
38 |     void Clear();
39 |     /*!
40 |     * \brief Assign value to the set word_whitelist_
41 |     */
42 |     void SetWhiteList(const std::vector<std::string>& whitelist);
43 |     /*!
44 |     * \brief Remove the low-freq word
45 |     */
46 |     void RemoveWordsLessThan(int64 min_count);
47 |     /*!
48 |     * \brief Merge in the frequent words according to threshold
49 |     */
50 |     void MergeInfrequentWords(int64 threshold);
51 |     /*!
52 |     * \brief Insert word-freq pair to the dictionary
53 |     * \param word the word string
54 |     * \param cnt the word's frequency
55 |     */
56 |     void Insert(const char* word, int64 cnt = 1);
57 |     /*!
58 |     * \brief Load the word-freq pair from file
59 |     */
60 |     void LoadFromFile(const char* filename);
61 |     void LoadTriLetterFromFile(const char* filename,
62 |       unsigned int min_cnt = 1, unsigned int letter_count = 3);
63 |     int GetWordIdx(const char* word);
64 |     /*!
65 |     * \brief Get the index of the word according to the dictionary
66 |     */
67 |     const WordInfo* GetWordInfo(const char* word);
68 |     const WordInfo* GetWordInfo(int word_idx);
69 |     int Size();
70 |     void StartIteration();
71 |     /*!
72 |     * \brief Judge the word_iterator_ is the end
73 |     */
74 |     bool HasMore();
75 |     /*!
76 |     * \brief Get the next wordinfo pointer in the vector
77 |     */
78 |     const WordInfo* Next();
79 |     std::vector<WordInfo>::iterator Begin();
80 |     std::vector<WordInfo>::iterator End();
81 | 
82 |     void PrintVocab();
83 | 
84 |   private:
85 |     int combine_;
86 |     std::vector<WordInfo> word_info_;
87 |     std::vector<WordInfo>::iterator word_iterator_;
88 |     std::unordered_map<std::string, int> word_idx_map_;
89 |     std::unordered_set<std::string> word_whitelist_;
90 |   };
91 | }
92 | #endif


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/distributed_wordembedding.h:
--------------------------------------------------------------------------------
 1 | ﻿#ifndef WORDEMBEDDING_DISTRIBUTED_WORDEMBEDDING_H_
 2 | #define WORDEMBEDDING_DISTRIBUTED_WORDEMBEDDING_H_
 3 | /*!
 4 | * file distributed_wordembedding.h
 5 | * \brief Class Distributed_wordembedding describes the main frame of 
 6 | * WordEmbedding and some useful functions
 7 | */
 8 | 
 9 | #include <vector>
10 | 
11 | #include <thread>
12 | #include <functional>
13 | #include <omp.h>
14 | 
15 | #include <multiverso/multiverso.h>
16 | 
17 | #include "util.h"
18 | #include "huffman_encoder.h"
19 | #include "reader.h"
20 | #include "trainer.h"
21 | #include "block_queue.h"
22 | #include "communicator.h"
23 | #include "wordembedding.h"
24 | 
25 | namespace wordembedding {
26 | 
27 |   extern std::string g_log_suffix;
28 | 
29 |   class DistributedWordembedding {
30 |   public:
31 |     /*!
32 |     * \brief Run Function contains everything
33 |     */
34 |     void Run(int argc, char *argv[]);
35 | 
36 |   private:
37 |     clock_t start_;
38 |     int process_id_;
39 |     Option* option_ = nullptr;
40 |     Dictionary* dictionary_ = nullptr;
41 |     HuffmanEncoder* huffman_encoder_ = nullptr;
42 |     Sampler* sampler_ = nullptr;
43 |     Reader* reader_ = nullptr;
44 |     WordEmbedding* WordEmbedding_ = nullptr;
45 |     BlockQueue *block_queue_ = nullptr;
46 |     std::thread load_data_thread_;
47 |     std::thread collect_wordcount_thread_;
48 |     bool is_running_ = false;
49 |     std::vector<Trainer*> trainers_;
50 |     Communicator* communicator_ = nullptr;
51 |     MemoryManager* memory_mamanger_ = nullptr;
52 | 
53 |     /*!
54 |     * \brief Load Dictionary from the vocabulary_file
55 |     * \param opt Some model-set setparams
56 |     * \param dictionary save the vocabulary and its frequency
57 |     * \param huffman_encoder convert dictionary to the huffman_code
58 |     */
59 |     int64 LoadVocab(Option *opt, Dictionary *dictionary,
60 |       HuffmanEncoder *huffman_encoder);
61 | 
62 |     void Train(int argc, char *argv[]);
63 |     void TrainNeuralNetwork();
64 | 
65 |     void PrepareData(DataBlock *data_block);
66 | 
67 |     void StartLoadDataThread(Reader *reader, int64 file_size);
68 |     void LoadOneBlock(DataBlock *data_block,
69 |       Reader *reader, int64 size);
70 | 
71 |     void StartCollectWordcountThread();
72 |     void StopCollectWordcountThread();
73 | 
74 |     void StartWordCount();
75 |     void GetAllWordCount();
76 |     void AddDeltaWordCount();
77 | 
78 |     DataBlock* GetDataFromQueue();
79 |     DataBlock* GetBlockAndPrepareParameter();
80 | 
81 |     void SaveEmbedding(const char *file_path, bool is_binary);
82 |     void WriteToFile(bool is_binary, std::vector<real*> &blocks, FILE* fid,
83 |       std::vector<int> &nodes);
84 |     const char* ChangeFileName(const char *file_path, int iteration);
85 |   };
86 | }
87 | #endif


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/huffman_encoder.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORDEMBEDDING_HUFFMAN_ENCODER_H_
 2 | #define WORDEMBEDDING_HUFFMAN_ENCODER_H_
 3 | /*!
 4 | * \brief Class Huffman_encoder stores the huffman_encode of the vocabulary according the dictionary
 5 | */
 6 | 
 7 | #include <algorithm>
 8 | #include <cassert>
 9 | #include <cstring>
10 | 
11 | #include "dictionary.h"
12 | #include "constant.h"
13 | 
14 | namespace wordembedding {
15 |   struct HuffLabelInfo
16 |   {   /*!
17 |     * \brief Internal node ids in the code path
18 |     */
19 |     std::vector<int> point;
20 |     /*!
21 |     * \brief Huffman code
22 |     */
23 |     std::vector<char> code;
24 |     int codelen;
25 |     HuffLabelInfo() {
26 |       codelen = 0;
27 |       point.clear();
28 |       code.clear();
29 |     }
30 |   };
31 | 
32 |   class HuffmanEncoder {
33 |   public:
34 |     HuffmanEncoder();
35 |     /*!
36 |     * \brief Save the word-huffmancode in the file
37 |     */
38 |     void Save2File(const char* filename);
39 |     /*!
40 |     * \brief Recover the word-huffmancode from the file
41 |     */
42 |     void RecoverFromFile(const char* filename);
43 |     /*!
44 |     * \brief Get the dictionary file and build
45 |     * \hufflabel_info from the dictionary
46 |     */
47 |     void BuildFromTermFrequency(const char* filename);
48 |     void BuildFromTermFrequency(Dictionary* dict);
49 |     /*!
50 |     * \brief Get the label size
51 |     */
52 |     int GetLabelSize();
53 |     /*!
54 |     * \brief Get the label's index
55 |     */
56 |     int GetLabelIdx(const char* label);
57 |     HuffLabelInfo* GetLabelInfo(char* label);
58 |     HuffLabelInfo* GetLabelInfo(int label_idx);
59 |     Dictionary* GetDict();
60 | 
61 |   private:
62 |     void BuildHuffmanTreeFromDict();
63 |     std::vector<HuffLabelInfo> hufflabel_info_;
64 |     Dictionary* dict_;
65 |   };
66 | }
67 | #endif


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/main.cpp:
--------------------------------------------------------------------------------
 1 | ﻿#include <cmath>
 2 | #include <thread>
 3 | #include <string>
 4 | #include <new>
 5 | 
 6 | #include <multiverso/util/log.h>
 7 | #include <multiverso/util/log.h>
 8 | #include <multiverso/multiverso.h>
 9 | 
10 | #include "distributed_wordembedding.h"
11 | #include "memory_manager.h"
12 | #include "util.h"
13 | 
14 | using namespace wordembedding;
15 | 
16 | int main(int argc, char *argv[]) {
17 |   try {
18 |     DistributedWordembedding dwe;
19 |     dwe.Run(argc, argv);
20 |   }
21 |   catch (std::bad_alloc &memExp) {
22 |     multiverso::Log::Info("Something wrong with new() %s\n", memExp.what());
23 |   }
24 |   catch (...) {
25 |     multiverso::Log::Info("Something wrong with other reason!\n");
26 |   }
27 |   return 0;
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/memory_manager.cpp:
--------------------------------------------------------------------------------
 1 | #include "memory_manager.h"
 2 | 
 3 | namespace wordembedding {
 4 | 
 5 |   MemoryManager::MemoryManager(int block_size) {
 6 |     block_size_ = block_size;
 7 |   }
 8 |   //Request memory for blocks
 9 |   void MemoryManager::RequestBlocks(int64 block_number,
10 |     std::vector<real*>& result) {
11 |     std::unique_lock<std::mutex> lock(mutex_);
12 |     for (int64 i = 0; i < block_number; ++i) {
13 |       result.push_back(new (std::nothrow) real[block_size_]);
14 |       assert(result[i] != nullptr);
15 |     }
16 |   }
17 |   //Free the memory for blocks
18 |   void MemoryManager::ReturnBlocks(std::vector<real*>& blocks) {
19 |     std::unique_lock<std::mutex> lock(mutex_);
20 |     for (size_t i = 0; i < blocks.size(); ++i)
21 |       delete[] blocks[i];
22 |   }
23 | 
24 |   MemoryManager::~MemoryManager() {
25 | 
26 |   }
27 | }


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/memory_manager.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORDEMBEDDING_MEMORY_MANAGER_H_
 2 | #define WORDEMBEDDING_MEMORY_MANAGER_H_
 3 | /*!
 4 | * file memory_manager.h
 5 | * \brief Class MemoryManager creates and allocates memory for the local parameter which is needed by the datablock training.
 6 | */
 7 | 
 8 | #include <cassert>
 9 | #include <vector>
10 | #include <condition_variable>
11 | 
12 | #include "constant.h"
13 | 
14 | namespace wordembedding {
15 | 
16 |   class MemoryManager {
17 |   public:
18 |     explicit MemoryManager(int block_size);
19 |     /*!
20 |     * \brief Create memory for the blocks
21 |     * \param block_number the block quantity needed
22 |     * \param result the vector of the head address of allocated memory
23 |     */
24 |     void RequestBlocks(int64 block_number, std::vector<real*>& result);
25 |     /*!
26 |     * \brief Delete the blocks memory
27 |     * \param blocks the vector of the head address of allocated memory
28 |     */
29 |     void ReturnBlocks(std::vector<real*>& blocks);
30 |     ~MemoryManager();
31 | 
32 |   private:
33 |     int64 block_size_;
34 |     std::mutex mutex_;
35 | 
36 |     // No copying allowed
37 |     MemoryManager(const MemoryManager&);
38 |     void operator=(const MemoryManager&);
39 |   };
40 | }
41 | #endif
42 | 


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/reader.cpp:
--------------------------------------------------------------------------------
 1 | #include "reader.h"
 2 | 
 3 | namespace wordembedding {
 4 | 
 5 |   Reader::Reader(Dictionary *dictionary, Option *option,
 6 |     Sampler *sampler, const char *input_file) {
 7 |     dictionary_ = dictionary;
 8 |     option_ = option;
 9 |     sampler_ = sampler;
10 | 
11 |     stopwords_table_.clear();
12 |     if (option_->stopwords) {
13 |       FILE* fid = fopen(option_->sw_file, "r");
14 |       if (fid == nullptr) {
15 |         multiverso::Log::Fatal("Open sw_file failed!\n");
16 |         exit(1);
17 |       }
18 |       while (ReadWord(word_, fid)) {
19 |         stopwords_table_.insert(word_);
20 |       }
21 | 
22 |       fclose(fid);
23 |     }
24 | 
25 |     file_ = fopen(input_file, "r");
26 |     if (file_ == nullptr) {
27 |       multiverso::Log::Fatal("Open train_file failed!\n");
28 |       exit(1);
29 |     }
30 |   }
31 | 
32 |   Reader::~Reader() {
33 |     if (file_ != nullptr)
34 |       fclose(file_);
35 |   }
36 |   //Get sentence by connecting the words extracted
37 |   int Reader::GetSentence(int *sentence, int64 &word_count) {
38 |     int length = 0, word_idx;
39 |     word_count = 0;
40 |     while (1) {
41 |       if (!ReadWord(word_, file_))
42 |         break;
43 |       word_idx = dictionary_->GetWordIdx(word_);
44 |       if (word_idx == -1)
45 |         continue;
46 |       word_count++;
47 |       if (option_->stopwords && stopwords_table_.count(word_))
48 |         continue;
49 |       if (option_->sample > 0 &&
50 |         !sampler_->WordSampling(
51 |         dictionary_->GetWordInfo(word_idx)->freq,
52 |         option_->total_words, option_->sample))
53 |         continue;
54 |       sentence[length++] = word_idx;
55 |       if (length >= kMaxSentenceLength)
56 |         break;
57 |     }
58 | 
59 |     return length;
60 |   }
61 | 
62 |   void Reader::ResetStart() {
63 |     fseek(file_, 0, SEEK_SET);
64 |   }
65 | 
66 |   void Reader::ResetSize(int64 size) {
67 |     byte_count_ = 0;
68 |     byte_size_ = size;
69 |   }
70 |   //Read words from the file
71 |   bool Reader::ReadWord(char *word, FILE *fin) {
72 |     int idx = 0;
73 |     char ch;
74 |     while (!feof(fin) && byte_count_ < byte_size_) {
75 |       ch = fgetc(fin);
76 |       ++byte_count_;
77 |       if (ch == 13) continue;
78 |       if ((ch == ' ') || (ch == '\t') || (ch == '\n')) {
79 |         if (idx > 0) {
80 |           if (ch == '\n')
81 |             ungetc(ch, fin);
82 |           break;
83 |         }
84 |         if (ch == '\n') {
85 |           strcpy(word, (char *)"</s>");
86 |           return true;
87 |         }
88 |         else continue;
89 |       }
90 |       word[idx++] = ch;
91 |       //Truncate too long words
92 |       if (idx >= kMaxString - 1)
93 |         idx--;
94 |     }
95 |     word[idx] = 0;
96 |     return idx != 0;
97 |   }
98 | }


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/reader.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORDEMBEDDING_READER_H_
 2 | #define WORDEMBEDDING_READER_H_
 3 | /*!
 4 | * file reader.h
 5 | * \brief Class Reader helps the function Loaddata to fill the datablock
 6 | */
 7 | 
 8 | #include <unordered_set>
 9 | 
10 | #include "util.h"
11 | #include "dictionary.h"
12 | #include "constant.h"
13 | 
14 | namespace wordembedding {
15 | 
16 |   class Reader {
17 |   public:
18 |     Reader(Dictionary *dictionary, Option *option,
19 |       Sampler *sampler, const char *input_file);
20 |     ~Reader();
21 |     /*!
22 |     * \brief Getsentence from the train_file
23 |     * \param sentence save the sentence by the word index according to the dictionary
24 |     * \param word_count count the sentence length
25 |     */
26 |     int GetSentence(int *sentence, int64 &word_count);
27 |     void ResetStart();
28 |     void ResetSize(int64 size);
29 | 
30 |   private:
31 |     const Option *option_;
32 |     FILE* file_;
33 |     char word_[kMaxString + 1];
34 |     Dictionary *dictionary_;
35 |     Sampler *sampler_;
36 |     int64 byte_count_, byte_size_;
37 |     std::unordered_set<std::string> stopwords_table_;
38 |     /*!
39 |     * \brief Read words from the train_file
40 |     * \param word store the extracted word
41 |     * \param file represent the train_file pointer
42 |     */
43 |     bool ReadWord(char *word, FILE *file);
44 | 
45 |     //No copying allowed
46 |     Reader(const Reader&);
47 |     void operator=(const Reader&);
48 |   };
49 | }
50 | #endif


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/trainer.cpp:
--------------------------------------------------------------------------------
 1 | #include "trainer.h"
 2 | namespace wordembedding {
 3 | 
 4 |   Trainer::Trainer(int trainer_id, Option *option,
 5 |     Dictionary* dictionary, WordEmbedding* WordEmbedding) {
 6 |     trainer_id_ = trainer_id;
 7 |     option_ = option;
 8 |     word_count = 0;
 9 |     WordEmbedding_ = WordEmbedding;
10 |     dictionary_ = dictionary;
11 |     hidden_act_ = (real *)calloc(option_->embeding_size, sizeof(real));
12 |     hidden_err_ = (real *)calloc(option_->embeding_size, sizeof(real));
13 |     process_count_ = -1;
14 |     process_id_ = -1;
15 | 
16 |     assert(hidden_act_ != nullptr);
17 |     assert(hidden_err_ != nullptr);
18 |     start_ = 0;
19 |     train_count_ = 0;
20 |   }
21 | 
22 |   Trainer::~Trainer() {
23 |     free(hidden_act_);
24 |     free(hidden_err_);
25 |   }
26 | 
27 |   void Trainer::TrainIteration(DataBlock *data_block) {
28 |     if (process_id_ == -1)
29 |       process_id_ = multiverso::MV_Rank();
30 | 
31 |     if (data_block == nullptr) {
32 |       return;
33 |     }
34 | 
35 |     int64 last_word_count = word_count;
36 |     clock_t start = clock();
37 | 
38 |     multiverso::Log::Debug("Rank %d Train %d TrainNN Begin TrainIteration%d ...\n",
39 |       process_id_, trainer_id_, train_count_);
40 | 
41 |     WordEmbedding_->Train(data_block, trainer_id_, option_->thread_cnt,
42 |       word_count, hidden_act_, hidden_err_);
43 | 
44 |     if (word_count > last_word_count) {
45 |       multiverso::Log::Info("Rank %d TrainNNSpeed: Words/thread/second %lfk\n",
46 |         process_id_,
47 |         (static_cast<double>(word_count)-last_word_count) /
48 |         (clock() - start) * static_cast<double>(CLOCKS_PER_SEC) / 1000);
49 |     }
50 | 
51 |     multiverso::Log::Debug("Rank %d Trainer %d training time:%lfs\n", process_id_,
52 |       trainer_id_, (clock() - start) / static_cast<double>(CLOCKS_PER_SEC));
53 |     train_count_++;
54 |   }
55 | }


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/trainer.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORDEMBEDDING_TRAINER_H_
 2 | #define WORDEMBEDDING_TRAINER_H_
 3 | /*!
 4 | * file trainer.h
 5 | * \brief Class Trainer trains the model by every train iteration
 6 | */
 7 | 
 8 | #include <thread>
 9 | #include <chrono>
10 | 
11 | #include <multiverso/multiverso.h>
12 | #include <multiverso/updater/updater.h>
13 | #include <multiverso/table/matrix_table.h>
14 | 
15 | #include "constant.h"
16 | #include "util.h"
17 | #include "huffman_encoder.h"
18 | #include "wordembedding.h"
19 | #include "data_block.h"
20 | #include "memory_manager.h"
21 | 
22 | namespace wordembedding {
23 | 
24 |   class WordEmbedding;
25 |   extern std::string g_log_suffix;
26 |   class Trainer{
27 |   public:
28 |     int64 word_count;
29 |     Trainer(int trainer_id, Option *option,
30 |       Dictionary* dictionary, WordEmbedding* WordEmbedding);
31 |     /*!
32 |     * /brief Train one datablock
33 |     */
34 | 
35 |     ~Trainer();
36 |     void TrainIteration(DataBlock * data_block);
37 | 
38 |   private:
39 |     int process_count_;
40 |     int process_id_;
41 |     int trainer_id_;
42 |     Option *option_;
43 |     real *hidden_act_, *hidden_err_;
44 |     WordEmbedding* WordEmbedding_;
45 |     Dictionary* dictionary_;
46 |     int train_count_;
47 |     clock_t start_;
48 | 
49 |     //No copying allowed
50 |     Trainer(const Trainer&);
51 |     void operator=(const Trainer&);
52 |   };
53 | }
54 | #endif
55 | 


--------------------------------------------------------------------------------
/Applications/WordEmbedding/src/util.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORDEMBEDDING_UTIL_H_
 2 | #define WORDEMBEDDING_UTIL_H_
 3 | /*!
 4 | * file util.h
 5 | * \brief Struct Option stores many general arguments in model
 6 | */
 7 | 
 8 | #include <cstring>
 9 | #include <cstdlib>
10 | #include <random>
11 | #include <cassert>
12 | #include <sys/stat.h>
13 | #include <time.h>
14 | 
15 | #include "constant.h"
16 | #include "dictionary.h"
17 | 
18 | namespace wordembedding {
19 | 
20 |   struct Option {
21 |     const char* train_file;
22 |     const char* read_vocab_file;
23 |     const char* output_file;
24 |     const char* sw_file;
25 |     const char* endpoints_file;
26 |     bool hs, output_binary, cbow, stopwords;
27 |     bool use_adagrad;
28 |     bool is_pipeline;
29 |     real sample;
30 |     int64 data_block_size;
31 |     int embeding_size, thread_cnt, window_size, negative_num, min_count, epoch;
32 |     int64 total_words;
33 |     int64 max_preload_data_size;
34 |     real init_learning_rate;
35 | 
36 |     Option();
37 |     /*!
38 |     * \brief Get the model-set arguments from file
39 |     */
40 |     void ParseArgs(int argc, char* argv[]);
41 |     void PrintArgs();
42 |     void PrintUsage();
43 | 
44 |   };
45 | 
46 |   class Sampler {
47 |   public:
48 |     Sampler();
49 |     ~Sampler();
50 |     /*!
51 |     * \brief Set the negative-sampling distribution for every vocabulary
52 |     * \param dictionary the train_file dictionary
53 |     */
54 |     void SetNegativeSamplingDistribution(Dictionary *dictionary);
55 |     bool WordSampling(int64 word_cnt, int64 train_words, real sample);
56 |     /*!
57 |     * \brief Get the next random according to the existing random seed
58 |     */
59 |     uint64 GetNextRandom(uint64 next_random);
60 |     int NegativeSampling(uint64 next_random);
61 | 
62 |   private:
63 |     int* table_;
64 | 
65 |     //No copying allowed
66 |     Sampler(const Sampler&);
67 |     void operator=(const Sampler&);
68 |   };
69 | 
70 |   std::string GetSystemTime();
71 |   int64 GetFileSize(const char *filename);
72 |   bool ReadWord(char *word, FILE *fin);
73 | 
74 |   void InitExpTable();
75 |   extern std::string g_log_suffix;
76 |   extern real* expTable;
77 |   extern int embedding_size;
78 | 
79 | }
80 | #endif


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | 
 3 | PROJECT(MULTIVERSO)
 4 | 
 5 | OPTION(USE_HDFS "won't use hdfs on default, set ON to enable" OFF)
 6 | OPTION(TEST "Build all tests." ON)
 7 | OPTION(USE_ZMQ "weather to build with ZeroMQ.(default: OFF)" OFF)
 8 | OPTION(INSTALL_MULTIVERSO "whether install Multiverso to /usr/local/lib" ON)
 9 | option(ENABLE_DCASGD "Build with DC-ASGD supported" OFF)
10 | 
11 | find_package(MPI REQUIRED)
12 | 
13 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11")
14 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
15 | 
16 | if(USE_HDFS)
17 |     ADD_DEFINITIONS(-DMULTIVERSO_USE_HDFS)
18 |     SET(JVM_LIB /usr/local/java/default/jre/lib/amd64/server)
19 |     SET(HDFS_LIB /usr/local/hadoop/lib/native)
20 |     LINK_DIRECTORIES(${HDFS_LIB})
21 |     LINK_DIRECTORIES(${JVM_LIB})
22 | endif(USE_HDFS)
23 | 
24 | if(ENABLE_DCASGD)
25 |     ADD_DEFINITIONS(-DENABLE_DCASGD)
26 | endif(ENABLE_DCASGD)
27 | 
28 | include_directories(${PROJECT_SOURCE_DIR}/include)
29 | 
30 | set(MULTIVERSO_DIR ${PROJECT_SOURCE_DIR})
31 | ADD_SUBDIRECTORY(src)
32 | ADD_SUBDIRECTORY(Test)
33 | ADD_SUBDIRECTORY(Test/unittests)
34 | ADD_SUBDIRECTORY(Applications/WordEmbedding)
35 | ADD_SUBDIRECTORY(Applications/LogisticRegression)
36 | 
37 | if(INSTALL_MULTIVERSO)
38 |     install (DIRECTORY ${PROJECT_SOURCE_DIR}/include/multiverso DESTINATION include)
39 | endif(INSTALL_MULTIVERSO)
40 | 
41 | 
42 | # uninstall target
43 | configure_file(
44 |     "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in"
45 |     "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake"
46 |     IMMEDIATE @ONLY)
47 | 
48 | add_custom_target(uninstall
49 |     COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
50 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) Microsoft Corporation 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Multiverso
 2 | ==========
 3 | [![Build Status](https://travis-ci.org/Microsoft/Multiverso.svg?branch=master)](https://travis-ci.org/Microsoft/Multiverso)
 4 | 
 5 | Multiverso is a parameter server based framework for training machine learning models on big data with numbers of machines. It is currently a standard C++ library and provides a series of friendly programming interfaces, and it is extended to support calling from python and Lua programs. With such easy-to-use APIs, machine learning researchers and practitioners do not need to worry about the system routine issues such as distributed model storage and operation, inter-process and inter-thread communication, multi-threading management, and so on.
 6 | Instead, they are able to focus on the core machine learning logics: data, model, and training.
 7 | 
 8 | For more details, please view our website [http://www.dmtk.io](http://www.dmtk.io).
 9 | 
10 | Build
11 | ----------
12 | 
13 | **Linux** (Tested on Ubuntu 14.04)
14 | 
15 | ```
16 | sudo apt-get install libopenmpi-dev openmpi-bin build-essential cmake git
17 | git clone https://github.com/Microsoft/multiverso.git --recursive && cd multiverso
18 | mkdir build && cd build
19 | cmake .. && make && sudo make install
20 | ```
21 | 
22 | **Windows**
23 | 
24 | Open the `Multiverso.sln` with [Visual Studio 2013]() and build.
25 | 
26 | Related Projects
27 | ----------
28 | 
29 | Current distributed systems based on multiverso:
30 | 
31 | * [lightLDA](https://github.com/Microsoft/lightlda): Scalable, fast, lightweight system for large scale topic modeling
32 | * [distributed_word_embedding](https://github.com/Microsoft/multiverso/tree/master/Applications/WordEmbedding) Distributed system for word embedding
33 | * [distributed_word_embedding(deprecated)](https://github.com/Microsoft/distributed_word_embedding) Distributed system for word embedding
34 | * [distributed_skipgram_mixture(deprecated)](https://github.com/Microsoft/distributed_skipgram_mixture) Distributed skipgram mixture for multi-sense word embedding
35 | 
36 | Microsoft Open Source Code of Conduct
37 | ------------
38 | 
39 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
40 | 


--------------------------------------------------------------------------------
/Test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/Test)
 2 | 
 3 | SET(MULTIVERSO_TEST_SRC test_allreduce.cpp test_array_table.cpp test_kv_table.cpp test_matrix_perf.cpp test_matrix_table.cpp test_net.cpp main.cpp)
 4 | 
 5 | SET(CMAKE_CXX_COMPILER mpicxx)
 6 | 
 7 | LINK_DIRECTORIES(${LIBRARY_OUTPUT_PATH})
 8 | 
 9 | 
10 | MESSAGE(${MPI_LIBRARIES})
11 | MESSAGE(${MPI_CXX_LIBRARIES})
12 | 
13 | ENABLE_TESTING()
14 | 
15 | ADD_EXECUTABLE(multiverso.test ${MULTIVERSO_TEST_SRC})
16 | 
17 | if(USE_HDFS)
18 | 	TARGET_LINK_LIBRARIES(multiverso.test multiverso ${MPI_CXX_LIBRARIES} jvm hdfs)
19 | else()
20 | 	TARGET_LINK_LIBRARIES(multiverso.test multiverso ${MPI_CXX_LIBRARIES})
21 | endif(USE_HDFS)
22 | 
23 | SET_PROPERTY(TARGET multiverso.test PROPERTY CXX_STANDARD 11)
24 | 


--------------------------------------------------------------------------------
/Test/Test.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <ClCompile Include="main.cpp" />
 5 |     <ClCompile Include="test_kv_table.cpp" />
 6 |     <ClCompile Include="test_array_table.cpp" />
 7 |     <ClCompile Include="test_net.cpp" />
 8 |     <ClCompile Include="test_matrix_table.cpp" />
 9 |     <ClCompile Include="test_allreduce.cpp" />
10 |     <ClCompile Include="test_matrix_perf.cpp" />
11 |   </ItemGroup>
12 |   <ItemGroup>
13 |     <Filter Include="src">
14 |       <UniqueIdentifier>{f42b7b09-e419-4f63-b12e-e93219217a45}</UniqueIdentifier>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClInclude Include="common.h" />
19 |   </ItemGroup>
20 | </Project>


--------------------------------------------------------------------------------
/Test/common.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_TEST_END2ENDTEST_COMMON_H_
 2 | #define MULTIVERSO_TEST_END2ENDTEST_COMMON_H_
 3 | 
 4 | namespace multiverso {
 5 | namespace test {
 6 | 
 7 | void TestAllreduce(int argc, char* argv[]);
 8 | 
 9 | void TestArray(int argc, char* argv[]);
10 | 
11 | void TestKV(int argc, char* argv[]);
12 | 
13 | void TestMatrix(int argc, char* argv[]);
14 | 
15 | void TestNet(int argc, char* argv[]);
16 | 
17 | }  // namespace test
18 | }  // namespace multiverso
19 | 
20 | #endif  // MULTIVERSO_TEST_END2ENDTEST_COMMON_H_


--------------------------------------------------------------------------------
/Test/main.cpp:
--------------------------------------------------------------------------------
 1 | ﻿#include "common.h"
 2 | 
 3 | #include <string.h>
 4 | #include <stdio.h>
 5 | 
 6 | using namespace multiverso::test;
 7 | 
 8 | void PrintUsage() {
 9 |   printf("Usage: multiverso.test kv|array|net|matrix|allreduce\n");
10 | }
11 | 
12 | int main(int argc, char* argv[]) {
13 |   if (argc != 2) PrintUsage();
14 |   else {
15 |     if (strcmp(argv[1], "kv") == 0) TestKV(argc, argv);
16 |     else if (strcmp(argv[1], "array") == 0) TestArray(argc, argv);
17 |     else if (strcmp(argv[1], "net") == 0) TestNet(argc, argv);
18 |     else if (strcmp(argv[1], "matrix") == 0) TestMatrix(argc, argv);
19 |     else if (strcmp(argv[1], "allreduce") == 0) TestAllreduce(argc, argv);
20 |     else {
21 |       PrintUsage();
22 |     }
23 |   }
24 |   return 0;
25 | }
26 | 


--------------------------------------------------------------------------------
/Test/test_allreduce.cpp:
--------------------------------------------------------------------------------
 1 | #include <multiverso/multiverso.h>
 2 | #include <multiverso/net.h>
 3 | #include <multiverso/util/configure.h>
 4 | #include <multiverso/util/log.h>
 5 | #include <multiverso/util/net_util.h>
 6 | 
 7 | namespace multiverso {
 8 | namespace test {
 9 | 
10 | void TestAllreduce(int argc, char* argv[]) {
11 |   multiverso::SetCMDFlag("ma", true);
12 |   MV_Init(&argc, argv);
13 |   int a = 1;
14 |   MV_Aggregate(&a, 1);
15 | 
16 |   CHECK(a == MV_Size());
17 | 
18 |   MV_ShutDown();
19 | }
20 | 
21 | }  // namespace test
22 | }  // namespace multiverso


--------------------------------------------------------------------------------
/Test/test_array_table.cpp:
--------------------------------------------------------------------------------
 1 | #include <multiverso/multiverso.h>
 2 | 
 3 | #include <multiverso/util/log.h>
 4 | #include <multiverso/util/configure.h>
 5 | #include <multiverso/net.h>
 6 | #include <multiverso/table/array_table.h>
 7 | 
 8 | namespace multiverso {
 9 | namespace test {
10 | 
11 | void TestArray(int argc, char* argv[]) {
12 |   Log::Info("Test Array \n");
13 | 
14 |   multiverso::SetCMDFlag("sync", true);
15 |   MV_Init(&argc, argv);
16 | 
17 |   size_t array_size = 500;
18 | 
19 |   auto shared_array = MV_CreateTable(ArrayTableOption<int>(array_size));
20 | 
21 |   Log::Info("Create tables OK. Rank = %d, worker_id = %d\n",
22 |     MV_Rank(), MV_WorkerId());
23 | 
24 |   std::vector<int> delta(array_size);
25 |   for (int i = 0; i < array_size; ++i)
26 |     delta[i] = static_cast<int>(i);
27 | 
28 |   int* data = new int[array_size];
29 | 
30 |   int iter = 10 * (MV_Rank() + 10);
31 |   for (int i = 0; i < iter; ++i) {
32 |     shared_array->Add(delta.data(), array_size);
33 |     shared_array->Add(delta.data(), array_size);
34 |     shared_array->Add(delta.data(), array_size);
35 |     shared_array->Get(data, array_size);
36 |     shared_array->Get(data, array_size);
37 |     shared_array->Get(data, array_size);
38 |     if (iter < 100) {
39 |       for (int k = 0; k < array_size; ++k) {
40 |         CHECK (data[k] != delta[k] * (i + 1) * MV_NumWorkers()) ;
41 |       }
42 |     }
43 |   }
44 |   delete[] data;
45 | 
46 |   MV_ShutDown();
47 | }
48 | 
49 | }  // namespace test
50 | }  // namespace multiverso


--------------------------------------------------------------------------------
/Test/test_kv_table.cpp:
--------------------------------------------------------------------------------
 1 | #include <multiverso/multiverso.h>
 2 | #include <multiverso/util/log.h>
 3 | #include <multiverso/table/kv_table.h>
 4 | 
 5 | namespace multiverso {
 6 | namespace test {
 7 | 
 8 | void TestKV(int argc, char* argv[]) {
 9 |   Log::Info("Test KV map \n");
10 |   // 1. Start the Multiverso engine
11 |   MV_Init(&argc, argv);
12 | 
13 |   // 2. To create the shared table
14 |   KVTableOption<int, int> option;
15 |   auto dht = MV_CreateTable(option);
16 | 
17 |   // 3. User program
18 |   // access the local cache
19 |   std::unordered_map<int, int>& kv = dht->raw();
20 | 
21 |   // Get from the server
22 |   dht->Get(0);
23 |   // Check the result
24 |   Log::Info("Get 0 from kv server: result = %d\n", kv[0]);
25 | 
26 |   // Add 1 to the server
27 |   dht->Add(0, 1);
28 |   // Check the result
29 |   dht->Get(0);
30 |   Log::Info("Get 0 from kv server after add 1: result = %d\n", kv[0]);
31 | 
32 |   // 4. Shutdown the Multiverso engine
33 |   MV_ShutDown();
34 | }
35 | 
36 | }  // namespace test
37 | }  // namespace multiverso


--------------------------------------------------------------------------------
/Test/test_net.cpp:
--------------------------------------------------------------------------------
 1 | #include <multiverso/multiverso.h>
 2 | #include <multiverso/net.h>
 3 | #include <multiverso/util/log.h>
 4 | #include <multiverso/util/net_util.h>
 5 | 
 6 | namespace multiverso {
 7 | namespace test {
 8 | 
 9 | void TestNet(int argc, char* argv[]) {
10 |   NetInterface* net = NetInterface::Get();
11 |   net->Init(&argc, argv);
12 | 
13 |   const char* chi1 = std::string("hello, world").c_str();
14 |   const char* chi2 = std::string("hello, c++").c_str();
15 |   const char* chi3 = std::string("hello, multiverso").c_str();
16 |   char* hi1 = new char[14];
17 | 
18 | #ifdef _MSC_VER
19 |   strcpy_s(hi1, 14, chi1);
20 | #else
21 |   strcpy(hi1, chi1);
22 | #endif
23 | 
24 |   char* hi2 = new char[12];
25 | #ifdef _MSC_VER
26 |   strcpy_s(hi2, 12, chi2);
27 | #else
28 |   strcpy(hi2, chi2);
29 | #endif
30 | 
31 |   char* hi3 = new char[19];
32 | #ifdef _MSC_VER
33 |   strcpy_s(hi3, 19, chi3);
34 | #else
35 |   strcpy(hi3, chi3);
36 | #endif
37 | 
38 |   if (net->rank() == 0) {
39 |     for (int rank = 1; rank < net->size(); ++rank) {
40 |       MessagePtr msg(new Message());
41 |       msg->set_src(0);
42 |       msg->set_dst(rank);
43 |       msg->Push(Blob(hi1, 13));
44 |       msg->Push(Blob(hi2, 11));
45 |       msg->Push(Blob(hi3, 18));
46 |       for (int i = 0; i < msg->size(); ++i) {
47 |         Log::Info("In Send: %s\n", msg->data()[i].data());
48 |       };
49 |       while (net->Send(msg) == 0);
50 |       Log::Info("rank 0 send\n");
51 |     }
52 | 
53 |     for (int i = 1; i < net->size(); ++i) {
54 |       MessagePtr msg(new Message());
55 |       msg.reset(new Message());
56 |       while (net->Recv(&msg) == 0) {
57 |         // Log::Info("recv return 0\n");
58 |       }
59 |       Log::Info("rank 0 recv\n");
60 | 
61 |       std::vector<Blob> recv_data = msg->data();
62 |       CHECK(recv_data.size() == 3);
63 |       for (int i = 0; i < msg->size(); ++i) {
64 |         Log::Info("recv from srv %d: %s\n", msg->src(), recv_data[i].data());
65 |       };
66 |     }
67 |   }
68 |   else {// other rank
69 |     MessagePtr msg(new Message());
70 |     while (net->Recv(&msg) == 0) {
71 |       // Log::Info("recv return 0\n");
72 |     }
73 |     Log::Info("rank %d recv\n", net->rank());
74 |     std::vector<Blob>& recv_data = msg->data();
75 |     CHECK(recv_data.size() == 3);
76 |     for (int i = 0; i < msg->size(); ++i) {
77 |       Log::Info("%s\n", recv_data[i].data());
78 |     }
79 | 
80 |     msg.reset(new Message());
81 |     msg->set_src(net->rank());
82 |     msg->set_dst(0);
83 |     msg->Push(Blob(hi1, 13));
84 |     msg->Push(Blob(hi2, 11));
85 |     msg->Push(Blob(hi3, 18));
86 |     while (net->Send(msg) == 0);
87 |     Log::Info("rank %d send\n", net->rank());
88 |   }
89 |   net->Finalize();
90 | }
91 | 
92 | }  // namespace test
93 | }  // namespace multiverso


--------------------------------------------------------------------------------
/Test/unittests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | enable_testing()
 2 | 
 3 | find_package(Boost COMPONENTS unit_test_framework REQUIRED)
 4 | 
 5 | SET(MULTIVERSO_UNITTEST_SRC test_array.cpp test_blob.cpp test_kv.cpp test_message.cpp test_multiverso.cpp test_node.cpp test_sync.cpp)
 6 | 
 7 | LINK_DIRECTORIES(${LIBRARY_OUTPUT_PATH})
 8 | 
 9 | MESSAGE(${Boost_UNIT_TEST_FRAMEWORK_LIBRARY})
10 | MESSAGE(${Boost_INCLUDE_DIRS})
11 | 
12 | ADD_EXECUTABLE(multiversotests ${MULTIVERSO_UNITTEST_SRC})
13 | 
14 | TARGET_INCLUDE_DIRECTORIES(multiversotests PRIVATE ${Boost_INCLUDE_DIRS})
15 | TARGET_LINK_LIBRARIES(multiversotests multiverso ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY})
16 | 
17 | SET_PROPERTY(TARGET multiversotests PROPERTY CXX_STANDARD 11)
18 | 


--------------------------------------------------------------------------------
/Test/unittests/MultiversoTests.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <ClCompile Include="test_blob.cpp" />
 5 |     <ClCompile Include="test_node.cpp" />
 6 |     <ClCompile Include="test_multiverso.cpp" />
 7 |     <ClCompile Include="test_message.cpp" />
 8 |     <ClCompile Include="test_array.cpp" />
 9 |     <ClCompile Include="test_kv.cpp" />
10 |     <ClCompile Include="test_sync.cpp" />
11 |   </ItemGroup>
12 |   <ItemGroup>
13 |     <ClInclude Include="multiverso_env.h" />
14 |   </ItemGroup>
15 | </Project>


--------------------------------------------------------------------------------
/Test/unittests/multiverso_env.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_TEST_UNITTEST_MULTIVERSO_EVN_H_
 2 | #define MULTIVERSO_TEST_UNITTEST_MULTIVERSO_EVN_H_
 3 | 
 4 | #include <multiverso/multiverso.h>
 5 | 
 6 | namespace multiverso {
 7 | namespace test {
 8 | 
 9 | struct MultiversoEnv {
10 |   MultiversoEnv() {
11 |     MV_SetFlag("sync", false);
12 |     MV_Init();
13 |   }
14 | 
15 |   ~MultiversoEnv() {
16 |     MV_ShutDown(false);
17 |   }
18 | };
19 | 
20 | struct SyncMultiversoEnv {
21 |   SyncMultiversoEnv() {
22 |     MV_SetFlag("sync", true);
23 |     MV_Init();
24 |   }
25 | 
26 |   ~SyncMultiversoEnv() {
27 |     MV_ShutDown(false);
28 |   }
29 | };
30 | 
31 | }  // namespace test
32 | }  // namespace multiverso
33 | 
34 | #endif  // MULTIVERSO_TEST_UNITTEST_MULTIVERSO_EVN_H_


--------------------------------------------------------------------------------
/Test/unittests/test_array.cpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <boost/test/unit_test.hpp>
 3 | #include <multiverso/table/array_table.h>
 4 | 
 5 | #include "multiverso_env.h"
 6 | 
 7 | namespace multiverso {
 8 | namespace test {
 9 | 
10 | struct ArrayTableEnv : public MultiversoEnv {
11 |   ArrayWorker<int>* table;
12 | 
13 |   ArrayTableEnv() : MultiversoEnv() {
14 |     ArrayTableOption<int> option(10);
15 |     table = MV_CreateTable(option);
16 |   }
17 | 
18 |   ~ArrayTableEnv() {
19 |     delete table;
20 |     table = nullptr;
21 |   }
22 | };
23 | 
24 | BOOST_FIXTURE_TEST_SUITE(array_test, ArrayTableEnv)
25 | 
26 | BOOST_AUTO_TEST_CASE(array_access) {
27 |   std::vector<int> delta(10);
28 |   std::vector<int> model(10);
29 |   for (int i = 0; i < 10; ++i) delta[i] = i;
30 |   table->Add(delta.data(), delta.size());
31 |   table->Get(model.data(), model.size());
32 | 
33 |   for (int i = 0; i < 10; ++i) {
34 |     BOOST_CHECK_EQUAL(model[i], delta[i]);
35 |   }
36 | 
37 |   table->AddAsync(delta.data(), delta.size());
38 |   int handle = table->GetAsync(model.data(), model.size());
39 |   table->Wait(handle);
40 |   
41 |   for (int i = 0; i < 10; ++i) {
42 |     BOOST_CHECK_EQUAL(model[i], 2 * delta[i]);
43 |   }
44 | }
45 | 
46 | BOOST_AUTO_TEST_CASE(array_partition) {
47 |   std::unordered_map<int, std::vector<Blob>> result;
48 |   std::vector<Blob> kv;
49 |   int key = -1; 
50 |   Blob key_blob(&key, sizeof(key));
51 |   std::vector<int> value(10); 
52 |   Blob value_blob(value.data(), sizeof(int) * value.size());
53 |   kv.push_back(key_blob);
54 |   kv.push_back(value_blob);
55 | 
56 |   table->Partition(kv, MsgType::Request_Get, &result);
57 | 
58 |   BOOST_CHECK_EQUAL(result.size(), 1);
59 |   BOOST_CHECK(result.find(0) != result.end());
60 |   BOOST_CHECK_EQUAL(result[0].size(), 2);
61 |   BOOST_CHECK_EQUAL(result[0][0].As<int>(), key);
62 |   int* vec = reinterpret_cast<int*>(result[0][1].data());
63 |   for (int i = 0; i < 10; ++i) {
64 |     BOOST_CHECK_EQUAL(vec[i], value[i]);
65 |   }
66 | }
67 | 
68 | BOOST_AUTO_TEST_SUITE_END()
69 | 
70 | }  // namespace test
71 | }  // namespace multiverso


--------------------------------------------------------------------------------
/Test/unittests/test_blob.cpp:
--------------------------------------------------------------------------------
 1 | #include <boost/test/unit_test.hpp>
 2 | #include <multiverso/blob.h>
 3 | 
 4 | namespace multiverso {
 5 | namespace test {
 6 | 
 7 | BOOST_AUTO_TEST_SUITE(blob)
 8 | 
 9 | BOOST_AUTO_TEST_CASE(blob_constructor_test) {
10 |   multiverso::Blob blob;
11 |   BOOST_CHECK_EQUAL(blob.size(), 0);
12 | 
13 |   multiverso::Blob blob2(4);
14 |   BOOST_CHECK_EQUAL(blob2.size(), 4);
15 | 
16 |   int a[3];
17 |   multiverso::Blob blob3(a, 3 * sizeof(int));
18 |   BOOST_CHECK_EQUAL(blob3.size(), 3 * sizeof(int));
19 | 
20 | }
21 | 
22 | BOOST_AUTO_TEST_CASE(blob_access_test) {
23 |   multiverso::Blob blob(4);
24 |   BOOST_CHECK_EQUAL(blob.size(), 4);
25 | 
26 |   const int value = 3;
27 |   int* data = reinterpret_cast<int*>(blob.data());
28 |   *data = value;
29 |   BOOST_CHECK_EQUAL(blob.As<int>(), value);
30 | 
31 |   std::string str("hello, world!");
32 |   multiverso::Blob str_blob(str.c_str(), str.size());
33 |   BOOST_CHECK_EQUAL(str_blob[0], 'h');
34 |   BOOST_CHECK_EQUAL(str_blob[4], 'o');
35 | }
36 | 
37 | BOOST_AUTO_TEST_SUITE_END()
38 | 
39 | }  // namespace test
40 | }  // namespace multiverso


--------------------------------------------------------------------------------
/Test/unittests/test_kv.cpp:
--------------------------------------------------------------------------------
 1 | #include <boost/test/unit_test.hpp>
 2 | #include <multiverso/table/kv_table.h>
 3 | 
 4 | #include "multiverso_env.h"
 5 | 
 6 | namespace multiverso {
 7 | namespace test {
 8 | 
 9 | struct KVTableEnv : public MultiversoEnv {
10 |   KVWorkerTable<int, int>* table;
11 | 
12 |   KVTableEnv() : MultiversoEnv() {
13 |     KVTableOption<int, int> option;
14 |     table = MV_CreateTable(option);
15 |   }
16 | 
17 |   ~KVTableEnv() {
18 |     delete table;
19 |     table = nullptr;
20 |   }
21 | };
22 | 
23 | BOOST_FIXTURE_TEST_SUITE(test_kv, KVTableEnv) 
24 | 
25 | BOOST_AUTO_TEST_CASE(access) {
26 |   auto& map = table->raw();
27 |   table->Get(0);
28 |   BOOST_CHECK_EQUAL(map[0], 0);
29 | 
30 |   table->Add(0, 3);
31 | 
32 |   table->Get(0);
33 |   BOOST_CHECK_EQUAL(map[0], 3);
34 | 
35 |   table->Add(0, -4);
36 | 
37 |   table->Get(0);
38 |   BOOST_CHECK_EQUAL(map[0], -1);
39 | }
40 | 
41 | 
42 | BOOST_AUTO_TEST_SUITE_END()
43 | 
44 | }  // namespace test
45 | }  // namespace multiverso
46 | 


--------------------------------------------------------------------------------
/Test/unittests/test_message.cpp:
--------------------------------------------------------------------------------
 1 | #include <boost/test/unit_test.hpp>
 2 | #include <multiverso/message.h>
 3 | 
 4 | namespace multiverso {
 5 | namespace test {
 6 | 
 7 | BOOST_AUTO_TEST_SUITE(message)
 8 | 
 9 | BOOST_AUTO_TEST_CASE(message_access) {
10 |   multiverso::Message msg;
11 |   BOOST_CHECK_EQUAL(msg.data().size(), 0);
12 | 
13 |   msg.set_msg_id(0);
14 |   BOOST_CHECK_EQUAL(msg.msg_id(), 0);
15 |   msg.set_src(1);
16 |   BOOST_CHECK_EQUAL(msg.src(), 1);
17 |   msg.set_dst(2);
18 |   BOOST_CHECK_EQUAL(msg.dst(), 2);
19 |   msg.set_table_id(3);
20 |   BOOST_CHECK_EQUAL(msg.table_id(), 3);
21 |   msg.set_type(MsgType::Request_Get);
22 |   BOOST_CHECK_EQUAL(msg.type(), MsgType::Request_Get);
23 | 
24 |   BOOST_TEST_MESSAGE("before blob\n");
25 | 
26 |   multiverso::Blob data;
27 |   msg.Push(data);
28 |   BOOST_CHECK_EQUAL(msg.size(), 1);
29 | 
30 | 
31 |   std::vector<multiverso::Blob> vec_data;
32 |   msg.set_data(vec_data);
33 | 
34 |   BOOST_CHECK_EQUAL(msg.size(), 0);
35 | 
36 |   MessagePtr reply_msg(msg.CreateReplyMessage());
37 |   BOOST_CHECK_EQUAL(reply_msg->src(), msg.dst());
38 |   BOOST_CHECK_EQUAL(reply_msg->dst(), msg.src());
39 |   BOOST_CHECK_EQUAL(reply_msg->type(), MsgType::Reply_Get);
40 | }
41 | 
42 | BOOST_AUTO_TEST_SUITE_END()
43 | 
44 | }  // namespace test
45 | }  // namespace multiverso


--------------------------------------------------------------------------------
/Test/unittests/test_multiverso.cpp:
--------------------------------------------------------------------------------
1 | #ifndef _WIN32
2 | // Use dynamic library on Linux
3 | #define BOOST_TEST_DYN_LINK
4 | #endif
5 | 
6 | #define BOOST_TEST_MODULE multiverso
7 | #include <boost/test/unit_test.hpp>
8 | 


--------------------------------------------------------------------------------
/Test/unittests/test_node.cpp:
--------------------------------------------------------------------------------
 1 | #include <boost/test/unit_test.hpp>
 2 | #include <multiverso/node.h>
 3 | 
 4 | namespace multiverso {
 5 | namespace test {
 6 | 
 7 | BOOST_AUTO_TEST_SUITE(node)
 8 | 
 9 | BOOST_AUTO_TEST_CASE(node_role) {
10 |   BOOST_CHECK(!multiverso::node::is_worker(multiverso::Role::NONE));
11 |   BOOST_CHECK(multiverso::node::is_worker(multiverso::Role::WORKER));
12 |   BOOST_CHECK(!multiverso::node::is_worker(multiverso::Role::SERVER));
13 |   BOOST_CHECK(multiverso::node::is_worker(multiverso::Role::ALL));
14 | 
15 |   BOOST_CHECK(!multiverso::node::is_server(multiverso::Role::NONE));
16 |   BOOST_CHECK(!multiverso::node::is_server(multiverso::Role::WORKER));
17 |   BOOST_CHECK(multiverso::node::is_server(multiverso::Role::SERVER));
18 |   BOOST_CHECK(multiverso::node::is_server(multiverso::Role::ALL));
19 | }
20 | 
21 | BOOST_AUTO_TEST_SUITE_END()
22 | 
23 | }  // namespace test
24 | }  // namespace multiverso


--------------------------------------------------------------------------------
/Test/unittests/test_sync.cpp:
--------------------------------------------------------------------------------
 1 | #include <boost/test/unit_test.hpp>
 2 | #include <multiverso/table/array_table.h>
 3 | 
 4 | #include "multiverso_env.h"
 5 | 
 6 | namespace multiverso {
 7 | namespace test {
 8 | 
 9 | struct SyncArrayTableEnv : public SyncMultiversoEnv {
10 |   ArrayWorker<int>* table;
11 | 
12 |   SyncArrayTableEnv() : SyncMultiversoEnv() {
13 |     ArrayTableOption<int> option(10);
14 |     table = MV_CreateTable(option);
15 |   }
16 | 
17 |   ~SyncArrayTableEnv() {
18 |     delete table;
19 |     table = nullptr;
20 |   }
21 | };
22 | 
23 | BOOST_FIXTURE_TEST_SUITE(test_sync, SyncArrayTableEnv)
24 | 
25 | BOOST_AUTO_TEST_CASE(sync) {
26 |   std::vector<int> delta(10);
27 |   std::vector<int> model(10);
28 |   for (int i = 0; i < 10; ++i) delta[i] = i;
29 |   table->Add(delta.data(), delta.size());
30 |   table->Get(model.data(), model.size());
31 | 
32 |   for (int i = 0; i < 10; ++i) {
33 |     BOOST_CHECK_EQUAL(model[i], delta[i]);
34 |   }
35 | 
36 |   table->AddAsync(delta.data(), delta.size());
37 |   int handle = table->GetAsync(model.data(), model.size());
38 |   table->Wait(handle);
39 | 
40 |   for (int i = 0; i < 10; ++i) {
41 |     BOOST_CHECK_EQUAL(model[i], 2 * delta[i]);
42 |   }
43 | }
44 | 
45 | 
46 | BOOST_AUTO_TEST_SUITE_END()
47 | 
48 | }  // namespace test
49 | }  // namespace multiverso
50 | 


--------------------------------------------------------------------------------
/binding/C#/MultiversoCLR/AssemblyInfo.cpp:
--------------------------------------------------------------------------------
 1 | using namespace System;
 2 | using namespace System::Reflection;
 3 | using namespace System::Runtime::CompilerServices;
 4 | using namespace System::Runtime::InteropServices;
 5 | using namespace System::Security::Permissions;
 6 | 
 7 | //
 8 | // General Information about an assembly is controlled through the following
 9 | // set of attributes. Change these attribute values to modify the information
10 | // associated with an assembly.
11 | //
12 | [assembly:AssemblyTitleAttribute(L"MultiversoCLR")];
13 | [assembly:AssemblyDescriptionAttribute(L"")];
14 | [assembly:AssemblyConfigurationAttribute(L"")];
15 | [assembly:AssemblyCompanyAttribute(L"")];
16 | [assembly:AssemblyProductAttribute(L"MultiversoCLR")];
17 | [assembly:AssemblyCopyrightAttribute(L"Copyright (c)  2016")];
18 | [assembly:AssemblyTrademarkAttribute(L"")];
19 | [assembly:AssemblyCultureAttribute(L"")];
20 | 
21 | //
22 | // Version information for an assembly consists of the following four values:
23 | //
24 | //      Major Version
25 | //      Minor Version
26 | //      Build Number
27 | //      Revision
28 | //
29 | // You can specify all the value or you can default the Revision and Build Numbers
30 | // by using the '*' as shown below:
31 | 
32 | [assembly:AssemblyVersionAttribute("1.0.*")];
33 | 
34 | [assembly:ComVisible(false)];
35 | 
36 | [assembly:CLSCompliantAttribute(true)];


--------------------------------------------------------------------------------
/binding/C#/MultiversoCLR/MatrixTable.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <multiverso/table/matrix_table.h>
 4 | #include <vector>
 5 | 
 6 | using namespace System;
 7 | 
 8 | namespace MultiversoCLR {
 9 | 
10 |   interface class IWorkerTable {
11 |   public:
12 |     static IWorkerTable^ CreateTable(int table_id, int num_rows, int num_cols, System::String^ type);
13 |     void Get(int row_id, void* buffer, int size);
14 |     void Get(void* buffer, int size);
15 |     void Get(array<int>^ row_ids, array<void*>^ buffers, int size);
16 | 
17 |     void Add(int row_id, void* buffer, int size);
18 |     void Add(void* buffer, int size);
19 |     void Add(array<int>^ row_ids, array<void*>^ buffers, int size);
20 |   };
21 | 
22 |   template <class Type>
23 |   public ref class MatrixTable : public IWorkerTable {
24 |   public:
25 |     MatrixTable(int num_rows, int num_cols) {
26 |       multiverso::MatrixTableOption<Type> option(num_rows, num_cols);
27 |       table_ = multiverso::MV_CreateTable(option);
28 |     }
29 | 
30 |     ~MatrixTable() {
31 |       delete table_;
32 |     }
33 | 
34 |     virtual void Get(void* buffer, int size) {
35 |       table_->Get(static_cast<Type*>(buffer), size);
36 |     }
37 | 
38 |     virtual void Get(int row_id, void* buffer, int size) {
39 |       table_->Get(row_id, static_cast<Type*>(buffer), size);
40 |     }
41 | 
42 |     virtual void Get(array<int>^ row_ids, array<void*>^ buffers, int size) {
43 |       std::vector<int>  row_id_vec(size);
44 |       std::vector<Type*> buffer_vec;
45 |       pin_ptr<int> p = &row_ids[0];
46 |       memcpy(row_id_vec.data(), p, size * sizeof(int));
47 |       for (int i = 0; i < size; ++i) {
48 |         buffer_vec.push_back(static_cast<Type*>(buffers[i]));
49 |       }
50 |       table_->Get(row_id_vec, buffer_vec, size);
51 |     }
52 | 
53 |     virtual void Add(int row_id, void* buffer, int size) {
54 |       table_->Add(row_id, static_cast<Type*>(buffer), size);
55 |     }
56 | 
57 |     virtual void Add(void* buffer, int size) {
58 |       table_->Add(static_cast<Type*>(buffer), size);
59 |     }
60 | 
61 |     virtual void Add(array<int>^ row_ids, array<void*>^ buffers, int size) {
62 |       std::vector<int>  row_id_vec(size);
63 |       std::vector<Type*> buffer_vec;
64 |       pin_ptr<int> p = &row_ids[0];
65 |       memcpy(row_id_vec.data(), p, size * sizeof(int));
66 |       for (int i = 0; i < size; ++i) {
67 |         buffer_vec.push_back(static_cast<Type*>(buffers[i]));
68 |       }
69 |       table_->Add(row_id_vec, buffer_vec, size);
70 |     }
71 |   private:
72 |     multiverso::MatrixWorkerTable<Type>* table_;
73 |   };
74 | 
75 |   IWorkerTable^ IWorkerTable::CreateTable(int, int num_rows, int num_cols, System::String^ type) {
76 |     if (type->Equals("Int"))    return gcnew MatrixTable<int>(num_rows, num_cols);
77 |     if (type->Equals("Float"))  return gcnew MatrixTable<float>(num_rows, num_cols);
78 |     if (type->Equals("Double")) return gcnew MatrixTable<double>(num_rows, num_cols);
79 |     throw gcnew Exception("Element Type " + type + " not implemented");
80 |   }
81 | }


--------------------------------------------------------------------------------
/binding/C#/MultiversoCLR/MultiversoCLR.h:
--------------------------------------------------------------------------------
 1 | // MultiversoCLR.h
 2 | 
 3 | #pragma once
 4 | 
 5 | // #include "ITable.h"
 6 | #include "MatrixTable.h"
 7 | 
 8 | using namespace System;
 9 | 
10 | namespace MultiversoCLR {
11 | 
12 |   public ref class MultiversoWrapper
13 |   {
14 |   public:
15 |     static bool NetBind(int rank, System::String^ endpoint);
16 |     static bool NetConnect(array<int>^ ranks, array<System::String^>^ endpoints);
17 |     static void NetFinalize();
18 | 
19 |     static void Init(int num_tables, bool sync);
20 |     static void Shutdown();
21 | 
22 |     static void CreateTables(array<int>^ rows, array<int>^ cols, array<System::String^>^ eleTypes);
23 |     static void CreateTable(int table_id, int rows, int cols, System::String^ eleType);
24 | 
25 |     static int Rank();
26 |     static int Size();
27 |     static void Barrier();
28 | 
29 |     generic <class Type>
30 |       static void Get(int table_id, array<Type>^ p_value);
31 | 
32 |     generic <class Type>
33 |     static void Get(int table_id, int row_id, array<Type>^ p_value);
34 | 
35 |     generic <class Type>
36 |     static void Add(int table_id, array<Type>^ p_update);
37 | 
38 |     generic <class Type>
39 |     static void Add(int table_id, int row_id, array<Type>^ p_value);
40 | 
41 |   private:
42 |     static void Init();
43 |     static void CreateWorkerTable(int table_id, int rows, int cols, System::String^ eleType);
44 |     static array<IWorkerTable^>^ worker_tables_;
45 |   };
46 | }
47 | 


--------------------------------------------------------------------------------
/binding/C#/MultiversoCLR/MultiversoCLR.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="Source Files">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="Header Files">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
11 |     </Filter>
12 |   </ItemGroup>
13 |   <ItemGroup>
14 |     <ClInclude Include="MultiversoCLR.h">
15 |       <Filter>Header Files</Filter>
16 |     </ClInclude>
17 |     <ClInclude Include="MatrixTable.h">
18 |       <Filter>Header Files</Filter>
19 |     </ClInclude>
20 |   </ItemGroup>
21 |   <ItemGroup>
22 |     <ClCompile Include="MultiversoCLR.cpp">
23 |       <Filter>Source Files</Filter>
24 |     </ClCompile>
25 |     <ClCompile Include="AssemblyInfo.cpp">
26 |       <Filter>Source Files</Filter>
27 |     </ClCompile>
28 |   </ItemGroup>
29 |   <ItemGroup>
30 |     <Text Include="ReadMe.txt" />
31 |   </ItemGroup>
32 |   <ItemGroup>
33 |     <None Include="..\NuGet\GenerateNugetPackage.ps1" />
34 |     <None Include="multiverso.snk" />
35 |   </ItemGroup>
36 | </Project>


--------------------------------------------------------------------------------
/binding/C#/MultiversoCLR/ReadMe.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/C#/MultiversoCLR/ReadMe.txt


--------------------------------------------------------------------------------
/binding/C#/MultiversoCLR/multiverso.snk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/C#/MultiversoCLR/multiverso.snk


--------------------------------------------------------------------------------
/binding/C#/NuGet/GenerateNugetPackage.ps1:
--------------------------------------------------------------------------------
1 | copy ../../../x64/Release/MultiversoCLR.dll .
2 | nuget pack MultiversoCLR.nuspec


--------------------------------------------------------------------------------
/binding/C#/NuGet/MultiversoCLR.nuspec:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <package >
 3 |   <metadata>
 4 |     <id>MultiversoCLR</id>
 5 |     <version>1.0.1</version>
 6 |     <authors>feiga</authors>
 7 |     <owners>feiga</owners>
 8 |     <licenseUrl>http://github.com/Microsoft/multiverso/blob/master/LICENSE</licenseUrl>
 9 |     <projectUrl>http://github.com/Microsoft/multiverso</projectUrl>
10 |     <iconUrl>http://ICON_URL_HERE_OR_DELETE_THIS_LINE</iconUrl>
11 |     <requireLicenseAcceptance>false</requireLicenseAcceptance>
12 |     <description>MultiversoCLR</description>
13 |     <copyright>Copyright 2016</copyright>
14 |     <tags>Multiverso</tags>
15 |   </metadata>
16 |   <files>
17 |     <file src="MultiversoCLR.dll" target="lib\"/>
18 |     <file src="libzmq-v120-mt-4_0_4.dll" target="build\x64"/>
19 |   </files>
20 | </package>


--------------------------------------------------------------------------------
/binding/lua/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Lua sources
 2 | luac.out
 3 | 
 4 | # luarocks build files
 5 | *.src.rock
 6 | *.zip
 7 | *.tar.gz
 8 | 
 9 | # Object files
10 | *.o
11 | *.os
12 | *.ko
13 | *.obj
14 | *.elf
15 | 
16 | # Precompiled Headers
17 | *.gch
18 | *.pch
19 | 
20 | # Libraries
21 | *.lib
22 | *.a
23 | *.la
24 | *.lo
25 | *.def
26 | *.exp
27 | 
28 | # Shared objects (inc. Windows DLLs)
29 | *.dll
30 | *.so
31 | *.so.*
32 | *.dylib
33 | 
34 | # Executables
35 | *.exe
36 | *.out
37 | *.app
38 | *.i*86
39 | *.x86_64
40 | *.hex
41 | 
42 | 


--------------------------------------------------------------------------------
/binding/lua/ArrayTableHandler.lua:
--------------------------------------------------------------------------------
 1 | local ffi = require 'ffi'
 2 | local util = require('multiverso.util')
 3 | 
 4 | local tbh = torch.class('ArrayTableHanlder')
 5 | 
 6 | ffi.cdef[[
 7 |     void MV_NewArrayTable(int size, TableHandler* out);
 8 |     void MV_GetArrayTable(TableHandler handler, float* data, int size);
 9 |     void MV_AddArrayTable(TableHandler handler, float* data, int size);
10 |     void MV_AddAsyncArrayTable(TableHandler handler, float* data, int size);
11 | ]]
12 | 
13 | function tbh:new(size, init_value)
14 |     tbh = {}
15 |     size = size or 0
16 |     setmetatable(tbh, self)
17 |     self.__index = self
18 |     tbh._handler = ffi.new("TableHandler[1]")
19 |     tbh._size = ffi.new("int", size)
20 |     libmv.MV_NewArrayTable(
21 |         tbh._size,
22 |         tbh._handler
23 |     )
24 |     local init = require 'multiverso.init'
25 |     if init_value ~= nil then
26 |         init_value = init_value:float()
27 |         -- sync add is used because we want to make sure that the initial value
28 |         -- has taken effect when the call returns. No matter whether it is
29 |         -- master worker,  we should call add to make sure it works in sync
30 |         -- mode
31 |         if init.worker_id() == 0 then
32 |             self.add(tbh, init_value, true)
33 |         else
34 |             self.add(tbh, init_value:clone():zero(), true)
35 |         end
36 |     end
37 |     return tbh
38 | end
39 | 
40 | function tbh:get()
41 |     cdata = ffi.new("float[?]", self._size)
42 |     libmv.MV_GetArrayTable(self._handler[0], cdata, self._size)
43 |     return util.cdata2tensor(cdata, tonumber(self._size))
44 | end
45 | 
46 | function tbh:add(data, sync)
47 |     sync = sync or false
48 |     cdata = util.tensor2cdata(data)
49 |     if sync then
50 |         libmv.MV_AddArrayTable(self._handler[0], cdata, self._size)
51 |     else
52 |         libmv.MV_AddAsyncArrayTable(self._handler[0], cdata, self._size)
53 |     end
54 | end
55 | 
56 | return tbh
57 | 


--------------------------------------------------------------------------------
/binding/lua/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR)
 2 | CMAKE_POLICY(VERSION 2.6)
 3 | IF(LUAROCKS_PREFIX)
 4 |     MESSAGE(STATUS "Installing Torch through Luarocks")
 5 |     STRING(REGEX REPLACE "(.*)lib/luarocks/rocks.*" "\\1" CMAKE_INSTALL_PREFIX  "${LUAROCKS_PREFIX}")
 6 |     MESSAGE(STATUS "Prefix inferred from Luarocks: ${CMAKE_INSTALL_PREFIX}")
 7 | ENDIF()
 8 | FIND_PACKAGE(Torch REQUIRED)
 9 | 
10 | FILE(GLOB luasrc *.lua)
11 | SET(luasrc ${luasrc})
12 | ADD_TORCH_PACKAGE(multiverso "" "${luasrc}" "Multiverso Torch Binding Package")
13 | 


--------------------------------------------------------------------------------
/binding/lua/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: install test
2 | install:
3 | 	luarocks make
4 | test:
5 | 	luajit test.lua
6 | 


--------------------------------------------------------------------------------
/binding/lua/README.md:
--------------------------------------------------------------------------------
 1 | # Multiverso Torch/Lua Binding
 2 | 
 3 | ## Introduction
 4 | Multiverso is a parameter server framework for distributed machine learning.
 5 | This package can enable parallel training of torch program over multiple machines and GPUs.
 6 | 
 7 | ## Requirements
 8 | Build multiverso successfully by following the [README > build](https://github.com/Microsoft/multiverso/blob/master/README.md#build).
 9 | 
10 | ## Installation
11 | 
12 | **NOTE**: Before installation, you need to make sure have `libmultiverso.so`
13 | built successfully according to [Requirements](#requirements).
14 | 
15 | ```
16 | make install
17 | ```
18 | or
19 | ```
20 | luarocks make
21 | ```
22 | 
23 | ## Unit Tests
24 | ```
25 | make test
26 | ```
27 | or
28 | 
29 | ```
30 | luajit test.lua
31 | ```
32 | 
33 | ## Documentation
34 | 
35 | - [Tutorial](https://github.com/Microsoft/multiverso/wiki/Integrate-multiverso-into-torch-project)
36 | - [API](https://github.com/Microsoft/multiverso/wiki/Multiverso-Torch-Binding-API)
37 | - [Benchmark](https://github.com/Microsoft/multiverso/wiki/Multiverso-Torch-Binding-Benchmark)
38 | 


--------------------------------------------------------------------------------
/binding/lua/demos/xor/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: raw multiverso
2 | raw:
3 | 	th xor.lua
4 | multiverso:
5 | 	th xor-multiverso.lua
6 | 


--------------------------------------------------------------------------------
/binding/lua/demos/xor/README.md:
--------------------------------------------------------------------------------
 1 | # XOR demo for multiverso.
 2 | 
 3 | The train example is referred from
 4 | https://github.com/torch/nn/blob/master/doc/training.md
 5 | 
 6 | There are two versions, where `xor.lua` is the raw version and
 7 | `xor-multiverso.lua` is the multiverso version.
 8 | 
 9 | Comments have been add to the every modification in `xor-multiverso.lua` that is
10 | needed to make it run on multiverso.
11 | 
12 | ## Run the raw version
13 | ```
14 | make raw
15 | ```
16 | or
17 | ```
18 | th xor.lua
19 | ```
20 | 
21 | ## Run the multiverso version
22 | ```
23 | make multiverso
24 | ```
25 | or
26 | ```
27 | th xor-multiverso.lua
28 | ```
29 | 


--------------------------------------------------------------------------------
/binding/lua/demos/xor/xor-multiverso.lua:
--------------------------------------------------------------------------------
 1 | --
 2 | -- The multiverso version train example referring from
 3 | -- https://github.com/torch/nn/blob/master/doc/training.md
 4 | --
 5 | 
 6 | require 'nn'
 7 | 
 8 | -- Load multiverso.
 9 | local multiverso = require 'multiverso'
10 | 
11 | -- Init multiverso.
12 | multiverso.init(false)
13 | 
14 | -- Get some useful parameters from multiverso.
15 | -- 1) The total number of workers.
16 | multiverso.num_workers = multiverso.num_workers()
17 | -- 2) The id for current worker.
18 | multiverso.worker_id = multiverso.worker_id()
19 | -- 3) Easy access to check whether this is master worker.
20 | multiverso.is_master = multiverso.worker_id == 0
21 | 
22 | local model = nn.Sequential()
23 | local inputs = 2
24 | local outputs = 1
25 | local HUs = 20
26 | model:add(nn.Linear(inputs, HUs))
27 | model:add(nn.Tanh())
28 | model:add(nn.Linear(HUs, outputs))
29 | 
30 | local criterion = nn.MSECriterion()
31 | 
32 | local batchSize = 128
33 | local batchInputs = torch.Tensor(batchSize, inputs)
34 | local batchLabels = torch.DoubleTensor(batchSize)
35 | 
36 | for i=1,batchSize do
37 |   local input = torch.randn(2)
38 |   local label = 1
39 |   if input[1]*input[2]>0 then
40 |     label = -1;
41 |   end
42 |   batchInputs[i]:copy(input)
43 |   batchLabels[i] = label
44 | end
45 | 
46 | local params, gradParams = model:getParameters()
47 | 
48 | -- Create ArrayTableHandler for syncing parameters.
49 | local tbh = multiverso.ArrayTableHandler:new(params:size(1), params)
50 | -- Wait for finishing the initializing phase.
51 | multiverso.barrier()
52 | -- Get the initial model from the server.
53 | params:copy(tbh:get())
54 | 
55 | for epoch=1,1000 do
56 |   model:zeroGradParameters()
57 |   local outputs = model:forward(batchInputs)
58 |   local loss = criterion:forward(outputs, batchLabels)
59 |   local dloss_doutput = criterion:backward(outputs, batchLabels)
60 |   model:backward(batchInputs, dloss_doutput)
61 | 
62 |   -- Sync parameters:
63 |   -- 1) Add the gradients (delta value) to the server.
64 |   tbh:add(-0.01 * gradParams)
65 |   -- 2) (Optional) Sync all workers after each epoch.
66 |   multiverso.barrier()
67 |   -- 3) Fetch the newest value from the server.
68 |   params:copy(tbh:get())
69 | 
70 |   -- Print should also only exist in master worker.
71 |   if multiverso.is_master then
72 |     print(epoch)
73 |   end
74 | end
75 | 
76 | -- Only test in master worker.
77 | if multiverso.is_master then
78 |   local x = torch.Tensor({
79 |     {0.5, 0.5},
80 |     {0.5, -0.5},
81 |     {-0.5, 0.5},
82 |     {-0.5, -0.5}
83 |   })
84 |   print(model:forward(x))
85 | end
86 | 
87 | -- Remember to shutdown at last.
88 | multiverso.shutdown()
89 | 


--------------------------------------------------------------------------------
/binding/lua/demos/xor/xor.lua:
--------------------------------------------------------------------------------
 1 | --
 2 | -- The train example referring from
 3 | -- https://github.com/torch/nn/blob/master/doc/training.md
 4 | --
 5 | 
 6 | require 'nn'
 7 | 
 8 | local model = nn.Sequential()
 9 | local inputs = 2
10 | local outputs = 1
11 | local HUs = 20
12 | model:add(nn.Linear(inputs, HUs))
13 | model:add(nn.Tanh())
14 | model:add(nn.Linear(HUs, outputs))
15 | 
16 | local criterion = nn.MSECriterion()
17 | 
18 | local batchSize = 128
19 | local batchInputs = torch.Tensor(batchSize, inputs)
20 | local batchLabels = torch.DoubleTensor(batchSize)
21 | 
22 | for i=1,batchSize do
23 |   local input = torch.randn(2)
24 |   local label = 1
25 |   if input[1]*input[2]>0 then
26 |     label = -1;
27 |   end
28 |   batchInputs[i]:copy(input)
29 |   batchLabels[i] = label
30 | end
31 | 
32 | local params, gradParams = model:getParameters()
33 | 
34 | for epoch=1,2000 do
35 |   model:zeroGradParameters()
36 |   local outputs = model:forward(batchInputs)
37 |   local loss = criterion:forward(outputs, batchLabels)
38 |   local dloss_doutput = criterion:backward(outputs, batchLabels)
39 |   model:backward(batchInputs, dloss_doutput)
40 |   model:updateParameters(0.01)
41 | end
42 | 
43 | local x = torch.Tensor({
44 |   {0.5, 0.5},
45 |   {0.5, -0.5},
46 |   {-0.5, 0.5},
47 |   {-0.5, -0.5}
48 | })
49 | print(model:forward(x))
50 | 


--------------------------------------------------------------------------------
/binding/lua/docs/BENCHMARK.md:
--------------------------------------------------------------------------------
 1 | # Multiverso Torch Binding Benchmark
 2 | 
 3 | ## Task Description
 4 | 
 5 | Perform CIFAR-10 classification with torch resnet implementation.
 6 | 
 7 | ## Codebase
 8 | 
 9 | [Microsoft/fb.resnet.torch multiverso branch](https://github.com/Microsoft/fb.resnet.torch/tree/multiverso)
10 | 
11 | ## Setup
12 | Please follow [this guide](https://github.com/Microsoft/multiverso/wiki/Multiverso-Torch-Lua-Binding) to setup your environment.
13 | 
14 | ## Hardware
15 | 
16 | - **Hosts** : 1
17 | - **GPU** : Tesla K40m * 8
18 | - **CPU** : Intel(R) Xeon(R) CPU E5-2680 v2 @ 2.80GHz
19 | - **Memory** : 251GB
20 | 
21 | ## Common settings
22 | 
23 | - depth 32
24 | - nEpochs 164
25 | - learningRate 0.1(epoch <= 80), 0.01(81 <= epoch <= 121), 0.001(121 <= epoch)
26 | 
27 | ## Clarification for multiverso settings
28 | 
29 | - The train data is divided evenly to each worker.
30 | - Master strategy is used to warm up the initial model.
31 | - Workers sync after each batch and has a barrier after each epoch.
32 | 
33 | ## Results
34 | 
35 | | Code Name | #Process(es) | #GPU(s) per Process | Use multiverso | Batch size | Initial learning rate | Seconds per epoch | Best Model |
36 | | :-------: | :----------: | :-----------------: | :------------: | :--------: | :-------------------: | :---------------: | :--------: |
37 | | 1P1G0M    | 1            | 1                   | 0              | 128        | 0.1                   | 55.57             | 92.435 %   |
38 | | 1P8G0M    | 1            | 8                   | 0              | 128        | 0.1                   | 28.38             | 92.464 %   |
39 | | 8P1G1M    | 8            | 1                   | 1              | 64         | 0.05                  | 11.37             | 92.449 %   |
40 | 
41 | ![top1error_vs_epoch](https://raw.githubusercontent.com/Microsoft/multiverso/master/binding/lua/docs/imgs/top1error_vs_epoch.png)
42 | ![top1error_vs_runningtime](https://raw.githubusercontent.com/Microsoft/multiverso/master/binding/lua/docs/imgs/top1error_vs_runningtime.png)
43 | 


--------------------------------------------------------------------------------
/binding/lua/docs/imgs/top1error_vs_epoch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/lua/docs/imgs/top1error_vs_epoch.png


--------------------------------------------------------------------------------
/binding/lua/docs/imgs/top1error_vs_runningtime.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/lua/docs/imgs/top1error_vs_runningtime.png


--------------------------------------------------------------------------------
/binding/lua/docs/imgs/top5error_vs_epoch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/lua/docs/imgs/top5error_vs_epoch.png


--------------------------------------------------------------------------------
/binding/lua/docs/imgs/top5error_vs_runningtime.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/lua/docs/imgs/top5error_vs_runningtime.png


--------------------------------------------------------------------------------
/binding/lua/init.lua:
--------------------------------------------------------------------------------
 1 | require 'torch'
 2 | local ffi = require 'ffi'
 3 | local util = require 'multiverso.util'
 4 | 
 5 | local mv = {}
 6 | 
 7 | ffi.cdef[[
 8 |     typedef void* TableHandler;
 9 |     void MV_Init(int* argc, char* argv[]);
10 |     void MV_ShutDown();
11 |     void MV_Barrier();
12 |     int MV_NumWorkers();
13 |     int MV_WorkerId();
14 |     int MV_ServerId();
15 | ]]
16 | 
17 | package.cpath = '/usr/local/lib/?.so;' .. package.cpath
18 | libmv_path = package.searchpath('libmultiverso', package.cpath, '')
19 | if libmv_path == nil then
20 |     print([[
21 | [Error] Multiverso shared object, `libmultiverso.so`, NOT FOUND!
22 | Please build & install `multiverso` according to the instruction [1].
23 | [1] https://github.com/Microsoft/multiverso#build]])
24 |     return
25 | end
26 | libmv = ffi.load(libmv_path, 'true')
27 | 
28 | mv.ArrayTableHandler = require('multiverso.ArrayTableHandler')
29 | mv.MatrixTableHandler = require('multiverso.MatrixTableHandler')
30 | 
31 | function mv.init(sync)
32 |     sync = sync or false  -- false for the default value of sync
33 |     -- the first argument will be ignored. So we put a placeholder here
34 |     args = {""}
35 |     if sync then
36 |         table.insert(args, "-sync=true")
37 |     end
38 |     argc = ffi.new("int[1]", #args)
39 |     argv = ffi.new("char*[?]", #args)
40 |     for i = 1, #args do
41 |         argv[i - 1] = ffi.new("char[1]")
42 |         ffi.copy(argv[i - 1], args[i])
43 |     end
44 |     libmv.MV_Init(argc, argv)
45 | end
46 | 
47 | function mv.barrier()
48 |     libmv.MV_Barrier()
49 | end
50 | 
51 | function mv.shutdown()
52 |     libmv.MV_ShutDown()
53 | end
54 | 
55 | function mv.num_workers()
56 |     return libmv.MV_NumWorkers()
57 | end
58 | 
59 | function mv.worker_id()
60 |     return libmv.MV_WorkerId()
61 | end
62 | 
63 | function mv.server_id()
64 |     return libmv.MV_ServerId()
65 | end
66 | 
67 | return mv
68 | 


--------------------------------------------------------------------------------
/binding/lua/multiverso-scm-1.rockspec:
--------------------------------------------------------------------------------
 1 | package = "multiverso"
 2 | version = "scm-1"
 3 | 
 4 | source = {
 5 |     url = "https://github.com/Microsoft/multiverso"
 6 | }
 7 | 
 8 | description = {
 9 |     summary = "Torch binding for multiverso.",
10 |     detailed = [[
11 |         Multiverso is a parameter server framework for distributed machine
12 |         learning. This package can leverage multiple machines and GPUs to
13 |         speed up the torch programs.
14 |     ]],
15 |     homepage = "http://www.dmtk.io",
16 |     license = "MIT"
17 | }
18 | 
19 | dependencies = {
20 |     "torch >= 7.0"
21 | }
22 | 
23 | build = {
24 |    type = "command",
25 |    build_command = [[
26 | cmake -E make_directory build;
27 | cd build;
28 | cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)"; 
29 | $(MAKE)
30 |    ]],
31 |    install_command = [[
32 | cd build && $(MAKE) install;
33 |     ]]
34 | }
35 | 


--------------------------------------------------------------------------------
/binding/lua/test.lua:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env lua
 2 | 
 3 | require 'torch'
 4 | 
 5 | mv = require('multiverso')
 6 | 
 7 | local mv_test = torch.TestSuite()
 8 | local mv_tester = torch.Tester()
 9 | 
10 | function Set(list)
11 |   local set = {}
12 |   for _, l in ipairs(list) do set[l] = true end
13 |   return set
14 | end
15 | 
16 | function mv_test.testArray()
17 |     size = 100000
18 |     tbh = mv.ArrayTableHandler:new(size)
19 |     mv.barrier()
20 | 
21 |     for i = 1, 1000 do
22 |         print(tbh:get()[{{1, 10}}])
23 |         tbh:add(torch.range(1, size))
24 |         tbh:add(torch.range(1, size))
25 |         mv.barrier()
26 |     end
27 | end
28 | 
29 | function mv_test.testMatrix()
30 |     num_row = 11
31 |     num_col = 10
32 |     size = num_row * num_col
33 |     num_workers = mv.num_workers()
34 |     tbh = mv.MatrixTableHandler:new(num_row, num_col)
35 |     mv.barrier()
36 | 
37 |     for i = 1, 20 do
38 |         row_ids = {0, 1, 5, 10}
39 |         row_ids_set = Set(row_ids)
40 |         tbh:add(torch.range(1, size))
41 |         data = torch.range(
42 |             row_ids[1] * num_col + 1,
43 |             row_ids[1] * num_col + num_col
44 |         )
45 |         for j = 2, #row_ids do
46 |             row_id = row_ids[j]
47 |             data = torch.cat(data, torch.range(
48 |                 row_id * num_col + 1,
49 |                 row_id * num_col + num_col
50 |             ))
51 |         end
52 |         tbh:add(data, row_ids)
53 |         mv.barrier()
54 |         data = tbh:get()
55 |         mv.barrier()
56 |         for j = 1, data:size(1) do
57 |             for k = 1, data:size(2) do
58 |                 expected = ((j - 1) * num_col + k) * i * num_workers
59 |                 if row_ids_set[j - 1] then
60 |                     expected = expected + ((j - 1) * num_col + k) * i * num_workers
61 |                 end
62 |                 mv_tester:eq(expected, data[j][k])
63 |             end
64 |         end
65 |         data = tbh:get(row_ids)
66 |         mv.barrier()
67 |         for j = 1, data:size(1) do
68 |             for k = 1, data:size(2) do
69 |                 expected = (row_ids[j] * num_col + k) * i * num_workers * 2
70 |                 mv_tester:eq(expected, data[j][k])
71 |             end
72 |         end
73 |     end
74 | end
75 | 
76 | mv.init()
77 | mv_tester:add(mv_test)
78 | mv_tester:run()
79 | mv.shutdown()
80 | 


--------------------------------------------------------------------------------
/binding/lua/util.lua:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env lua
 2 | 
 3 | util = {}
 4 | 
 5 | ffi = require('ffi')
 6 | 
 7 | util.tensor_type = {
 8 |     ['unsigned char'] = 'torch.ByteTensor',
 9 |     ['char'] = 'torch.CharTensor',
10 |     ['short'] = 'torch.ShortTensor',
11 |     ['int'] = 'torch.IntTensor',
12 |     ['long'] = 'torch.LongTensor',
13 |     ['float'] ='torch.FloatTensor',
14 |     ['double'] = 'torch.DoubleTensor'
15 | }
16 | 
17 | function util.tensor2cdata(data, data_type)
18 |     if type(data) == 'table' then
19 |         data = torch.Tensor(data)
20 |     end
21 |     data_type = data_type or 'float'
22 |     tensor_type = util.tensor_type[data_type]
23 |     return data:contiguous():type(tensor_type):data()
24 | end
25 | 
26 | function util.cdata2tensor(cdata, sizes, data_type)
27 |     data_type = data_type or 'float'
28 |     tensor_type = util.tensor_type[data_type]
29 |     data = torch.Tensor(sizes):type(tensor_type)
30 |     ffi.copy(data:data(), cdata, data:nElement() * ffi.sizeof(data_type))
31 |     return data
32 | end
33 | 
34 | return util
35 | 


--------------------------------------------------------------------------------
/binding/python/README.md:
--------------------------------------------------------------------------------
 1 | # Multiverso Python/Theano/Lasagne Binding
 2 | 
 3 | 
 4 | ## Introduction
 5 | Multiverso is a parameter server framework for distributed machine learning. This package can leverage multiple machines and GPUs to speed up the python programs.
 6 | 
 7 | 
 8 | ## Installation
 9 | 
10 | 1. (For GPU support only) Install CUDA, cuDNN according to this [guide](https://github.com/Microsoft/fb.resnet.torch/blob/multiverso/INSTALL.md). You just need finish the steps before [Install Torch](https://github.com/Microsoft/fb.resnet.torch/blob/multiverso/INSTALL.md#install-torch).
11 | 1. Install the multiverso
12 |     * On linux: Please follow the [README](https://github.com/Microsoft/multiverso/blob/master/README.md#build) to build and install multiverso.
13 |     * On windows: You need MSBuild.exe installed and make sure your system can find it in the $PATH. Then you should run [build_dll.bat](https://github.com/Microsoft/multiverso/blob/master/src/build_dll.bat) to build the .dll file and install the .dll. There isn't auto-installer for windows now, so you have to copy the .dll to either system $PATH or the multiverso package folder.
14 | 1. Install the requirements
15 |     * `gfortran` is required by scipy. e.g. you can install it by `sudo apt-get install gfortran` on ubuntu.
16 |     * (Optional) You need python-nose to run the unit tests. e.g. you can install it by `sudo apt-get install python-nose` on ubuntu.
17 | 1. Install python binding with the command `sudo python setup.py install`
18 | 
19 | 
20 | ## Run Unit Tests
21 | ```
22 | nosetests
23 | ```
24 | 
25 | 
26 | ## Documentation
27 | * [Tutorial](https://github.com/Microsoft/multiverso/wiki/How-to-write-python-code-with-multiverso)
28 | * Api documents are written as docstrings in the python source code.
29 | * [Benchmark](https://github.com/Microsoft/multiverso/wiki/Multiverso-Python-Binding-Benchmark)
30 | 


--------------------------------------------------------------------------------
/binding/python/docs/BENCHMARK.md:
--------------------------------------------------------------------------------
 1 | # Multiverso Python Binding Benchmark
 2 | 
 3 | ## Task Description
 4 | Perform CIFAR-10 classification with residual networks implementation based on Lasagne.
 5 | 
 6 | ## Codebase
 7 | [Deep_Residual_Learning_CIFAR-10](https://github.com/Microsoft/multiverso/blob/master/binding/python/examples/theano/lasagne/Deep_Residual_Learning_CIFAR-10.py)
 8 | 
 9 | ## Setup
10 | Please follow [this guide](https://github.com/Microsoft/multiverso/wiki/Multiverso-Python-Theano-Lasagne-Binding) to setup your environment.
11 | 
12 | ## Hardware
13 | |||
14 | | -------- |:--------:|
15 | |Hosts|1|
16 | |GPU|Tesla K40m * 8|
17 | |CPU|Intel(R) Xeon(R) CPU E5-2680 v2 @ 2.80GHz|
18 | |Memory| 251GB |
19 | 
20 | 
21 | ## Theano settings
22 | Configuration of `~/.theanorc`
23 | ```
24 | [global]
25 | device = gpu
26 | floatX = float32
27 | 
28 | [cuda]
29 | root = /usr/local/cuda-7.5/
30 | 
31 | [lib]
32 | cnmem = 1
33 | ```
34 | 
35 | ## About the Model
36 | |||
37 | | :---- | -----: |
38 | |Total epoch|82|
39 | |Batch size|128|
40 | |Depth|32|
41 | |Learning rate change schedule|Initialized as 0.1, Changed to 0.01 from epoch 41, to 0.001 from epoch 61|
42 | |number of parameters in model|    464,154|
43 | 
44 | 
45 | Clarification
46 | - An epoch represents all the processes divide all the data equally and go through them once together.
47 | - A barrier is used at the end of each epoch.
48 | - This experiment doesn't use warm start in ASGD.
49 | - The time to load the data is not considered in the time of the experiment.
50 | 
51 | 
52 | # The results
53 | The results of 3 experiments with different configurations are shown as following.
54 | 
55 | |Short Name | # Process(es) | #GPU(s) per Process | Use multiverso | Batch size | Initial learning rate | Seconds per epoch | Best model validation accuracy |
56 | | :---- | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: |
57 | | 1P1G0M | 1 | 1 | 0 | 128 | 0.1 | 175.4 | 92.69 % |
58 | | 1P1G1M | 1 | 1 | 1 | 128 | 0.1 | 194.4 | 92.53 % |
59 | | 8P1G1M | 8 | 1 | 1 | 64 | 0.05 | 34.1 | 92.11 % |
60 | 
61 | ![accuracy_epoch](https://raw.githubusercontent.com/Microsoft/multiverso/master/binding/python/docs/imgs/accuracy_epoch.png)
62 | ![accuracy_time](https://raw.githubusercontent.com/Microsoft/multiverso/master/binding/python/docs/imgs/accuracy_time.png)
63 | 


--------------------------------------------------------------------------------
/binding/python/docs/imgs/accuracy_epoch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/docs/imgs/accuracy_epoch.png


--------------------------------------------------------------------------------
/binding/python/docs/imgs/accuracy_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/docs/imgs/accuracy_time.png


--------------------------------------------------------------------------------
/binding/python/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/examples/__init__.py


--------------------------------------------------------------------------------
/binding/python/examples/theano/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/examples/theano/__init__.py


--------------------------------------------------------------------------------
/binding/python/examples/theano/keras/README.md:
--------------------------------------------------------------------------------
 1 | # Keras example
 2 | 
 3 | [addition_rnn_mv.py](./addition_rnn_mv.py) is adapted from
 4 | [a keras official example](https://github.com/fchollet/keras/blob/master/examples/addition_rnn.py).
 5 | 
 6 | 
 7 | It will demonstrate how to use multiverso in keras.
 8 | 
 9 | For example, you can train it with two GPUs with such command.
10 | ```
11 | mpirun -np 2 python addition_rnn_mv.py
12 | ```
13 | 
14 | It will reach `val_acc: 0.99+` much earlier than training with only one GPU.
15 | 


--------------------------------------------------------------------------------
/binding/python/examples/theano/lasagne/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: 2P1G1M0.1LR128B1S 4P1G1M0.1LR128B0S 4P1G1M0.1LR128B1S 4P1G1M0.05LR64B0S 4P1G1M0.05LR64B1S 8P1G1M0.1LR128B0S 8P1G1M0.1LR128B1S 8P1G1M0.05LR64B0S 8P1G1M0.05LR64B1S
 2 | 
 3 | 1P1G1M0.1LR128B1S:
 4 | 	mpirun -np 1 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.1 -b 128 -s True
 5 | 
 6 | 4P1G1M0.1LR128B0S:
 7 | 	mpirun -np 4 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.1 -b 128 -s False
 8 | 4P1G1M0.1LR128B1S:
 9 | 	mpirun -np 4 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.1 -b 128 -s True
10 | 
11 | 4P1G1M0.05LR64B0S:
12 | 	mpirun -np 4 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.05 -b 64 -s False
13 | 4P1G1M0.05LR64B1S:
14 | 	mpirun -np 4 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.05 -b 64 -s True
15 | 
16 | 8P1G1M0.1LR128B0S:
17 | 	mpirun -np 8 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.1 -b 128 -s False
18 | 8P1G1M0.1LR128B1S:
19 | 	mpirun -np 8 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.1 -b 128 -s True
20 | 
21 | 8P1G1M0.05LR64B0S:
22 | 	mpirun -np 8 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.05 -b 64 -s False
23 | 8P1G1M0.05LR64B1S:
24 | 	mpirun -np 8 python ./Deep_Residual_Learning_CIFAR-10.py -lr 0.05 -b 64 -s True
25 | 
26 | 


--------------------------------------------------------------------------------
/binding/python/examples/theano/lasagne/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/examples/theano/lasagne/__init__.py


--------------------------------------------------------------------------------
/binding/python/examples/theano/load_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding:utf8
 3 | 
 4 | import cPickle
 5 | import os
 6 | import sys
 7 | CUR_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__)))
 8 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir)))
 9 | CIFAR10_DIR = os.path.abspath(os.path.join(CUR_DIR, os.path.pardir, 'data', 'cifar-10-batches-py'))
10 | 
11 | 
12 | import numpy as np
13 | 
14 | 
15 | def load_cifar10(data_dir=CIFAR10_DIR):
16 |     '''
17 |     we assume these files are in data_dir:
18 |     batches.meta  data_batch_1 data_batch_2  data_batch_3  data_batch_4
19 |     data_batch_5  readme.html test_batch
20 | 
21 |     You can download the data from
22 |     https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
23 | 
24 |     The RGB values are scaled to [0., 1.].
25 |     '''
26 |     x_train_l = []
27 |     t_train_l = []
28 | 
29 |     for i in xrange(1, 6):
30 |         filename = os.path.join(data_dir, "data_batch_%d" % i)
31 |         with open(filename, "rb") as f:
32 |             data_obj = cPickle.load(f)
33 |             x_train_l.append(data_obj["data"])
34 |             t_train_l.extend(data_obj["labels"])
35 |     x_train = np.concatenate(x_train_l, axis=0) / 255.
36 | 
37 |     t_train = np.zeros((x_train.shape[0], 10))
38 |     for i, cls in enumerate(t_train_l):
39 |         t_train[i, cls] = 1
40 | 
41 |     with open(os.path.join(data_dir, "test_batch")) as f:
42 |         data_obj = cPickle.load(f)
43 |         x_test = data_obj["data"] / 255.
44 |         t_test_l = data_obj["labels"]
45 | 
46 |         t_test = np.zeros((x_test.shape[0], 10))
47 |         for i, cls in enumerate(t_test_l):
48 |             t_test[i, cls] = 1
49 |     return x_train, t_train, x_test, t_test
50 | 


--------------------------------------------------------------------------------
/binding/python/multiverso/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding:utf8
3 | 
4 | from .api import init, shutdown, barrier, workers_num, worker_id, server_id, is_master_worker
5 | from .tables import ArrayTableHandler, MatrixTableHandler
6 | 


--------------------------------------------------------------------------------
/binding/python/multiverso/api.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding:utf8
 3 | 
 4 | import ctypes
 5 | from .utils import Loader
 6 | import numpy as np
 7 | 
 8 | 
 9 | mv_lib = Loader.get_lib()
10 | 
11 | 
12 | def init(sync=False):
13 |     '''Initialize mutliverso.
14 | 
15 |     This should be called only once before training at the beginning of the
16 |     whole project.
17 |     If sync is True, a sync server will be created. Otherwise an async server
18 |     will be created.
19 |     If a sync server is created, you **must** make sure every process call
20 |     `add` and `get` in the same order and for the same times. Otherwise some
21 |     processes will be blocked. In sync server mode, all `get` method will
22 |     return **exactly the same results**.
23 |     If a async server is created, there won't be limitations like a sync
24 |     server. But we can't make sure `get` method will return the same results.
25 |     If you want to get the same results in async server mode, you should use
26 |     `barrier` and `get` with the argument `sync` set to `True` to sync the
27 |     processes.
28 |     '''
29 |     args = [b""]  # the first argument will be ignored. So we put a placeholder here
30 |     if sync:
31 |         args.append(b"-sync=true")
32 |     n = len(args)
33 |     args_type = ctypes.c_char_p * n
34 |     mv_lib.MV_Init(ctypes.pointer(ctypes.c_int(n)), args_type(*[ctypes.c_char_p(arg) for arg in args]))
35 | 
36 | 
37 | def shutdown():
38 |     '''Set a barrier for all workers to wait.
39 | 
40 |     Workers will wait until all workers reach a specific barrier.
41 |     '''
42 |     mv_lib.MV_ShutDown()
43 | 
44 | 
45 | def barrier():
46 |     '''Shutdown multiverso.
47 | 
48 |     This should be called only once after finishing training at the end of the
49 |     whole project.
50 |     '''
51 |     mv_lib.MV_Barrier()
52 | 
53 | 
54 | def workers_num():
55 |     '''Return the total number of workers.'''
56 |     return mv_lib.MV_NumWorkers()
57 | 
58 | 
59 | def worker_id():
60 |     '''Return the id (zero-based index) for current worker.'''
61 |     return mv_lib.MV_WorkerId()
62 | 
63 | 
64 | def server_id():
65 |     return mv_lib.MV_ServerId()
66 | 
67 | 
68 | def is_master_worker():
69 |     '''If the worker is master worker
70 | 
71 |     Some things only need one worker process, such as validation, outputing the
72 |     result, initializing the parameters and so on. So we mark the worker 0 as
73 |     the master worker to finish these things.
74 |     '''
75 |     return worker_id() == 0
76 | 


--------------------------------------------------------------------------------
/binding/python/multiverso/theano_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/multiverso/theano_ext/__init__.py


--------------------------------------------------------------------------------
/binding/python/multiverso/theano_ext/keras_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/multiverso/theano_ext/keras_ext/__init__.py


--------------------------------------------------------------------------------
/binding/python/multiverso/theano_ext/keras_ext/callbacks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding:utf8
 3 | 
 4 | from keras.callbacks import Callback
 5 | from param_manager import KerasParamManager
 6 | 
 7 | 
 8 | class MVCallback(Callback):
 9 |     '''
10 |     Please use MVCallback as a callback of keras model.fit function
11 |     For e.g.
12 |     ```
13 |     model.fit(X_train, Y_train,
14 |               batch_size=batch_size,
15 |               nb_epoch=nb_epoch,
16 |               validation_data=(X_test, Y_test),
17 |               shuffle=True,
18 |               callbacks=[mvcallback(model, freq=1)])
19 |     ```
20 |     '''
21 |     def __init__(self, model, freq=1):
22 |         '''Initialize the MVCallback class
23 | 
24 |         The `model` should be the be a keras model
25 |         The `freq` should be the update frequency of the parameters. For
26 |         example, `freq=3` means update the parameters every 3 mini-batch.
27 |         '''
28 |         super(MVCallback, self).__init__()
29 |         self.kpm = KerasParamManager(model)
30 |         self.cur_n = 0
31 |         if freq < 0:
32 |             raise ValueError("Frequency must be an integer greater than 0.")
33 |         self.freq = freq
34 | 
35 |     def on_batch_end(self, batch, logs={}):
36 |         '''sync all parameters at the end of every batch'''
37 |         self.cur_n = (self.cur_n + 1) % self.freq
38 |         if self.cur_n % self.freq == 0:
39 |             self.kpm.sync_all_param()
40 | 


--------------------------------------------------------------------------------
/binding/python/multiverso/theano_ext/keras_ext/param_manager.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from ..param_manager import MVModelParamManager
 4 | 
 5 | 
 6 | class KerasParamManager(MVModelParamManager):
 7 |     '''
 8 |     KerasParamManager is manager to make managing and synchronizing the
 9 |     variables in keras more easily
10 |     '''
11 | 
12 |     def get_all_param_values(self):
13 |         return self.model.get_weights()
14 | 
15 |     def set_all_param_values(self, params):
16 |         self.model.set_weights(params)
17 | 


--------------------------------------------------------------------------------
/binding/python/multiverso/theano_ext/lasagne_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Multiverso/e45369e1d07277f656b0900beb2709d86679fa53/binding/python/multiverso/theano_ext/lasagne_ext/__init__.py


--------------------------------------------------------------------------------
/binding/python/multiverso/theano_ext/lasagne_ext/param_manager.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding:utf8
 3 | 
 4 | import lasagne
 5 | from ..param_manager import MVModelParamManager
 6 | 
 7 | 
 8 | class LasagneParamManager(MVModelParamManager):
 9 |     '''
10 |     LasagneParamManager is manager to make managing and synchronizing the
11 |     variables in lasagne more easily
12 |     '''
13 | 
14 |     def get_all_param_values(self):
15 |         return lasagne.layers.get_all_param_values(self.model)
16 | 
17 |     def set_all_param_values(self, params):
18 |         lasagne.layers.set_all_param_values(self.model, params)
19 | 


--------------------------------------------------------------------------------
/binding/python/multiverso/utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding:utf8
 3 | 
 4 | from __future__ import print_function
 5 | 
 6 | import ctypes
 7 | import os
 8 | import platform
 9 | from ctypes.util import find_library
10 | import numpy as np
11 | 
12 | PACKAGE_PATH = os.path.abspath(os.path.dirname(__file__))
13 | 
14 | 
15 | class Loader(object):
16 |     '''
17 |     This loader is responsible for loading multiverso dynamic library in both
18 |     *nux and windows
19 |     '''
20 | 
21 |     LIB = None
22 | 
23 |     @classmethod
24 |     def _find_mv_path(cls):
25 |         if platform.system() == "Windows":
26 |             mv_lib_path = find_library("Multiverso")
27 |             if mv_lib_path is None:
28 |                 print("* Fail to load Multiverso.dll from the windows $PATH."\
29 |                       "Because Multiverso.dll can not be found in the $PATH "\
30 |                       "directories. Go on loading Multiverso from the package.")
31 |             else:
32 |                 return mv_lib_path
33 | 
34 |             mv_lib_path = os.path.join(PACKAGE_PATH, "Multiverso.dll")
35 |             if not os.path.exists(mv_lib_path):
36 |                 print("* Fail to load Multiverso.dll from the package. Because"\
37 |                       " the file " + mv_lib_path + " can not be found.")
38 |             else:
39 |                 return mv_lib_path
40 |         else:
41 |             mv_lib_path = find_library("multiverso")
42 |             if mv_lib_path is None:
43 |                 print("* Fail to load libmultiverso.so from the system"\
44 |                       "libraries. Because libmultiverso.so can't be found in"\
45 |                       "library paths. Go on loading Multiverso from the package.")
46 |             else:
47 |                 return mv_lib_path
48 | 
49 |             mv_lib_path = os.path.join(PACKAGE_PATH, "libmultiverso.so")
50 |             if not os.path.exists(mv_lib_path):
51 |                 print("* Fail to load libmultiverso.so from the package. Because"\
52 |                       " the file " + mv_lib_path + " can not be found.")
53 |             else:
54 |                 return mv_lib_path
55 |         return None
56 | 
57 |     @classmethod
58 |     def load_lib(cls):
59 |         mv_lib_path = cls._find_mv_path()
60 |         if mv_lib_path is None:
61 |             print("Fail to load the multiverso library. Please make sure you"\
62 |                   "  have installed multiverso successfully")
63 |         else:
64 |             print("Find the multiverso library successfully(%s)" % mv_lib_path)
65 |         return ctypes.cdll.LoadLibrary(mv_lib_path)
66 | 
67 |     @classmethod
68 |     def get_lib(cls):
69 |         if not cls.LIB:
70 |             cls.LIB = cls.load_lib()
71 |             cls.LIB.MV_NumWorkers.restype = ctypes.c_int
72 |         return cls.LIB
73 | 
74 | 
75 | def convert_data(data):
76 |     '''convert the data to float32 ndarray'''
77 |     if not isinstance(data, np.ndarray):
78 |         data = np.array(data)
79 |     return data.astype(np.float32)
80 | 


--------------------------------------------------------------------------------
/binding/python/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | import os
 3 | 
 4 | 
 5 | def readme():
 6 |     with open('README.md') as f:
 7 |         return f.read()
 8 | 
 9 | 
10 | setup(name='multiverso-python',
11 |       version='0.0.1',
12 |       long_description=readme(),
13 |       description="Multiverso is a parameter server framework for distributed"
14 |       " machine learning. This package can leverage multiple machines and GPUs"
15 |       " to speed up the python programs.",
16 |       url='https://github.com/Microsoft/multiverso',
17 |       author='Microsoft',
18 |       license='MIT',
19 |       packages=['multiverso', 'multiverso.theano_ext', 'multiverso.theano_ext.lasagne_ext'],
20 |       # TODO: The lasagne on pypi is too old. multiverso need some functions in
21 |       # lasagne-0.2 which is not released yet. Please replace the dev version
22 |       # with the stable release later.
23 |       dependency_links = ['https://github.com/Lasagne/Lasagne/tarball/master#egg=lasagne-0.2.dev1'],
24 |       install_requires=["theano>=0.8.2", "lasagne>=0.2.dev1"],
25 |       classifiers=[
26 |         "Intended Audience :: Developers",
27 |         "Intended Audience :: Science/Research",
28 |         "Programming Language :: Python :: 2",
29 |       ],
30 |       zip_safe=False)
31 | 


--------------------------------------------------------------------------------
/cmake_uninstall.cmake.in:
--------------------------------------------------------------------------------
 1 | if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
 2 |   message(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
 3 | endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
 4 | 
 5 | file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
 6 | string(REGEX REPLACE "\n" ";" files "${files}")
 7 | foreach(file ${files})
 8 |   message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
 9 |   if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
10 |     exec_program(
11 |       "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
12 |       OUTPUT_VARIABLE rm_out
13 |       RETURN_VALUE rm_retval
14 |       )
15 |     if(NOT "${rm_retval}" STREQUAL 0)
16 |       message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
17 |     endif(NOT "${rm_retval}" STREQUAL 0)
18 |   else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
19 |     message(STATUS "File $ENV{DESTDIR}${file} does not exist.")
20 |   endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
21 | endforeach(file)
22 | 


--------------------------------------------------------------------------------
/include/multiverso/actor.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_ACTOR_H_
 2 | #define MULTIVERSO_ACTOR_H_
 3 | 
 4 | #include <functional>
 5 | #include <memory>
 6 | #include <string>
 7 | #include <unordered_map>
 8 | 
 9 | #include "multiverso/message.h"
10 | 
11 | namespace std { class thread; }
12 | 
13 | namespace multiverso {
14 | 
15 | template<typename T> class MtQueue;
16 | 
17 | // The basic computation and communication unit in the system
18 | class Actor {
19 | public:
20 |   explicit Actor(const std::string& name);
21 |   virtual ~Actor();
22 |   // Start to run the Actor
23 |   void Start();
24 |   // Stop to run the Actor
25 |   void Stop();
26 |   // Accept a message from other actors
27 |   void Receive(MessagePtr&);
28 |   // Actor name, a unique identifier of a actor
29 |   const std::string name() const { return name_; }
30 | 
31 | protected:
32 |   // Message response function
33 |   using Handler = std::function<void(MessagePtr&)>;
34 |   // Register message handler function
35 |   void RegisterHandler(const MsgType& type, const Handler& task) {
36 |     handlers_.insert({ type, task });
37 |   }
38 |   // Send a message to a dst actor
39 |   void SendTo(const std::string& dst_name, MessagePtr& msg);
40 | 
41 |   // Main function run in a background thread
42 |   // The default main is to receive msg from other actors and process
43 |   // messages based on registered message handlers
44 |   virtual void Main();
45 | 
46 |   // message queue
47 |   std::unique_ptr<MtQueue<MessagePtr> > mailbox_;
48 |   // message handlers function
49 |   std::unordered_map<int, Handler> handlers_;
50 |   bool is_working_;
51 | 
52 | private:
53 |   std::string name_;
54 |   std::unique_ptr<std::thread> thread_;
55 | 
56 |   Actor(const Actor&) = delete;
57 |   void operator=(const Actor&) = delete;
58 | };
59 | 
60 | namespace actor {
61 | 
62 | const std::string kCommunicator = "communicator";
63 | const std::string kController = "controller";
64 | const std::string kServer = "server";
65 | const std::string kWorker = "worker";
66 | 
67 | }
68 | 
69 | }  // namespace multiverso
70 | 
71 | #endif  // MULTIVERSO_ACTOR_H_
72 | 


--------------------------------------------------------------------------------
/include/multiverso/blob.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_BLOB_H_
 2 | #define MULTIVERSO_BLOB_H_
 3 | 
 4 | #include <cstring>
 5 | #include <iostream>
 6 | #include <memory>
 7 | #include <string>
 8 | 
 9 | namespace multiverso {
10 | 
11 | // Manage a chunk of memory. Blob can share memory with other Blobs.
12 | // Never use external memory. All external memory should be managed by itself
13 | class Blob {
14 | public:
15 |   // an empty blob
16 |   Blob() : data_(nullptr), size_(0) {}
17 | 
18 |   explicit Blob(size_t size);
19 | 
20 |   // Construct from external memory. Will copy a new piece
21 |   Blob(const void* data, size_t size);
22 | 
23 |   Blob(void* data, size_t size);
24 | 
25 |   Blob(const Blob& rhs);
26 | 
27 |   ~Blob();
28 | 
29 |   // Shallow copy by default. Call \ref CopyFrom for a deep copy
30 |   void operator=(const Blob& rhs);
31 | 
32 |   inline char operator[](size_t i) const {
33 |     return data_[i];
34 |   }
35 | 
36 |   template <typename T>
37 |   inline T& As(size_t i = 0) const {
38 |     return (reinterpret_cast<T*>(data_))[i];
39 |   }
40 |   template <typename T>
41 |   inline size_t size() const { return size_ / sizeof(T); }
42 | 
43 |   // DeepCopy, for a shallow copy, use operator=
44 |   void CopyFrom(const Blob& src);
45 | 
46 |   inline char* data() const { return data_; }
47 |   inline size_t size() const { return size_; }
48 | 
49 | private:
50 |   // Memory is shared and auto managed
51 |   char *data_;
52 |   size_t size_;
53 | };
54 | 
55 | }  // namespace multiverso
56 | 
57 | #endif  // MULTIVERSO_BLOB_H_
58 | 


--------------------------------------------------------------------------------
/include/multiverso/c_api.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_C_API_H_
 2 | #define MULTIVERSO_C_API_H_
 3 | 
 4 | #if defined _WIN32
 5 | #define DllExport __declspec(dllexport)
 6 | #else
 7 | #define DllExport
 8 | #endif
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | typedef void* TableHandler;
15 | 
16 | DllExport void MV_Init(int* argc, char* argv[]);
17 | 
18 | DllExport void MV_ShutDown();
19 | 
20 | DllExport void MV_Barrier();
21 | 
22 | DllExport int MV_NumWorkers();
23 | 
24 | DllExport int  MV_WorkerId();
25 | 
26 | DllExport int  MV_ServerId();
27 | 
28 | // Array Table
29 | DllExport void MV_NewArrayTable(int size, TableHandler* out);
30 | 
31 | DllExport void MV_GetArrayTable(TableHandler handler, float* data, int size);
32 | 
33 | DllExport void MV_AddArrayTable(TableHandler handler, float* data, int size);
34 | 
35 | DllExport void MV_AddAsyncArrayTable(TableHandler handler, float* data, int size);
36 | 
37 | 
38 | // Matrix Table
39 | DllExport void MV_NewMatrixTable(int num_row, int num_col, TableHandler* out);
40 | 
41 | DllExport void MV_GetMatrixTableAll(TableHandler handler, float* data, int size);
42 | 
43 | DllExport void MV_AddMatrixTableAll(TableHandler handler, float* data, int size);
44 | 
45 | DllExport void MV_AddAsyncMatrixTableAll(TableHandler handler, float* data, int size);
46 | 
47 | DllExport void MV_GetMatrixTableByRows(TableHandler handler, float* data,
48 |                                        int size, int row_ids[], int row_ids_n);
49 | 
50 | DllExport void MV_AddMatrixTableByRows(TableHandler handler, float* data,
51 |                                        int size, int row_ids[], int row_ids_n);
52 | 
53 | DllExport void MV_AddAsyncMatrixTableByRows(TableHandler handler, float* data,
54 |                                        int size, int row_ids[], int row_ids_n);
55 | 
56 | #ifdef __cplusplus
57 | }  // end extern "C"
58 | #endif
59 | 
60 | #endif  // MULTIVERSO_C_API_H_
61 | 


--------------------------------------------------------------------------------
/include/multiverso/communicator.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_COMMUNICATION_H_
 2 | #define MULTIVERSO_COMMUNICATION_H_
 3 | 
 4 | #include "multiverso/actor.h"
 5 | #include "multiverso/message.h"
 6 | 
 7 | namespace multiverso {
 8 | 
 9 | class NetInterface;
10 | 
11 | class Communicator : public Actor {
12 | public:
13 |   Communicator();
14 |   ~Communicator();
15 | 
16 | private:
17 |   void Main() override;
18 |   // Process message received from other actors, either send to other nodes, or
19 |   // forward to local actors.
20 |   void ProcessMessage(MessagePtr& msg);
21 |   // Thread function to receive messages from other nodes
22 |   void Communicate();
23 |   // Forward to other actors in the same node
24 |   void LocalForward(MessagePtr& msg);
25 | 
26 |   NetInterface* net_util_;
27 |   std::unique_ptr<std::thread> recv_thread_;
28 | };
29 | 
30 | }  // namespace multiverso
31 | 
32 | #endif  // MULTIVERSO_COMMUNICATION_H_
33 | 


--------------------------------------------------------------------------------
/include/multiverso/controller.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_CONTROLLER_H_
 2 | #define MULTIVERSO_CONTROLLER_H_
 3 | 
 4 | #include "multiverso/actor.h"
 5 | #include "multiverso/message.h"
 6 | 
 7 | namespace multiverso {
 8 | 
 9 | class Controller : public Actor {
10 | public:
11 |   Controller();
12 |   ~Controller();
13 | 
14 | private:
15 |   void ProcessBarrier(MessagePtr& msg);
16 |   void ProcessRegister(MessagePtr& msg);
17 | 
18 |   class RegisterController;
19 |   RegisterController* register_controller_;
20 |   class BarrierController;
21 |   BarrierController* barrier_controller_;
22 | };
23 | 
24 | }  // namespace multiverso
25 | 
26 | #endif  // MULTIVERSO_CONTROLLER_H_
27 | 


--------------------------------------------------------------------------------
/include/multiverso/dashboard.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_DASHBOARD_H_
 2 | #define MULTIVERSO_DASHBOARD_H_
 3 | 
 4 | #include <map>
 5 | #include <mutex>
 6 | #include <string>
 7 | 
 8 | #include "multiverso/util/timer.h"
 9 | 
10 | namespace multiverso {
11 | 
12 | class Monitor;
13 | 
14 | // Dashboard to record and query system running information
15 | // thread safe
16 | class Dashboard {
17 | public:
18 |   static void AddMonitor(const std::string& name, Monitor* monitor);
19 |   static void RemoveMonitor(const std::string& name);
20 |   static std::string Watch(const std::string& name);
21 |   static void Display();
22 | private:
23 |   static std::map<std::string, Monitor*> record_;
24 |   static std::mutex m_;
25 | };
26 | 
27 | class Monitor {
28 | public:
29 |   explicit Monitor(const std::string& name) {
30 |     name_ = name;
31 |     timer_.Start();
32 |     Dashboard::AddMonitor(name_, this);
33 |   }
34 | 
35 |   void Begin() { timer_.Start(); }
36 | 
37 |   void End() {
38 |     elapse_ += timer_.elapse();
39 |     ++count_;
40 |   }
41 | 
42 |   double average() const { return elapse_ / count_; }
43 | 
44 |   std::string name() const { return name_; }
45 |   double elapse() const { return elapse_; }
46 |   int count() const { return count_; }
47 | 
48 |   std::string info_string() const;
49 | 
50 | private:
51 |   // name of the Monitor
52 |   std::string name_;
53 |   // total elapsed time
54 |   double elapse_;
55 |   // count of monitor
56 |   int count_;
57 |   // a timer util
58 |   Timer timer_;
59 | };
60 | 
61 | #define REGISTER_MONITOR(name)           \
62 |   static Monitor g_##name##_monitor(#name);
63 | 
64 | // Guard with MONITOR macro in the code to monitor it's execution
65 | // Usage:
66 | // MONITOR_BEGIN(your_code_short_description)
67 | // your code
68 | // MONITOR_END(your_code_short_description)
69 | #define MONITOR_BEGIN(name)              \
70 |   REGISTER_MONITOR(name)                 \
71 |   g_##name##_monitor.Begin();
72 | 
73 | #define MONITOR_END(name)                \
74 |   g_##name##_monitor.End();
75 | 
76 | 
77 | }  // namespace multiverso
78 | 
79 | #endif  // MULTIVERSO_DASHBOARD_H_
80 | 


--------------------------------------------------------------------------------
/include/multiverso/io/hdfs_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_HDFS_FILE_SYS_H_
 2 | #define MULTIVERSO_HDFS_FILE_SYS_H_
 3 | 
 4 | #ifdef MULTIVERSO_USE_HDFS
 5 | 
 6 | /*!
 7 | * \file local_file_sys.h
 8 | * \brief The implement of hdfs io interface.
 9 | */
10 | 
11 | #include "multiverso/util/io.h"
12 | #include "hdfs.h"
13 | 
14 | #include <cstring>
15 | #include <cstdio>
16 | #include <cerrno>
17 | #include <cassert>
18 | 
19 | #include <algorithm>
20 | #include <memory>
21 | 
22 | 
23 | namespace multiverso {
24 | 
25 | class HDFSStream : public Stream {
26 | public:
27 |   HDFSStream(hdfsFS fs, const URI& uri,  FileOpenMode mode);
28 | 
29 |   virtual ~HDFSStream(void) override;
30 | 
31 |   /*!
32 |   * \brief write data to a file
33 |   * \param buf pointer to a memory buffer
34 |   * \param size data size
35 |   */
36 |   virtual void Write(const void *buf, size_t size) override;
37 | 
38 | 
39 |   /*!
40 |   * \brief read data from Stream
41 |   * \param buf pointer to a memory buffer
42 |   * \param size the size of buf
43 |   */
44 |   virtual size_t Read(void *buf, size_t size) override;
45 | 
46 |   virtual bool Good() override;
47 | 
48 | private:
49 |   bool is_good_;
50 |   hdfsFS fs_;
51 |   hdfsFile fp_;
52 |   std::string path_;
53 |   std::string mode_;
54 | };
55 | 
56 | class HDFSStreamFactory : public StreamFactory {
57 | public:
58 |   explicit HDFSStreamFactory(const std::string& host);
59 |   virtual ~HDFSStreamFactory(void) override;
60 | 
61 |   /*!
62 |   * \brief create a Stream
63 |   * \param path the path of the file
64 |   * \param mode "w" - create an empty file to store data;
65 |   *             "a" - open the file to append data to it
66 |   *             "r" - open the file to read
67 |   * \return the Stream which is used to write or read data
68 |   */
69 |   virtual Stream* Open(const URI& uri,
70 |     FileOpenMode mode) override;
71 | 
72 |   virtual void Close() override;
73 | 
74 | private:
75 |   std::string namenode_;
76 |   hdfsFS fs_;
77 | };
78 | 
79 | }
80 | 
81 | #endif
82 | 
83 | #endif // MULTIVERSO_HDFS_FILE_SYS_H_
84 | 


--------------------------------------------------------------------------------
/include/multiverso/io/local_stream.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_LOCAL_FILE_SYS_H_
 2 | #define MULTIVERSO_LOCAL_FILE_SYS_H_
 3 | 
 4 | /*!
 5 | * \file local_file_sys.h
 6 | * \brief the implement of local io interface.
 7 | */
 8 | 
 9 | #include "multiverso/io/io.h"
10 | 
11 | namespace multiverso
12 | {
13 |   class LocalStream : public Stream
14 |   {
15 |   public:
16 |     LocalStream(const URI& uri, FileOpenMode mode);
17 |     virtual ~LocalStream(void) override;
18 | 
19 |     /*!
20 |     * \brief write data to a file
21 |     * \param buf pointer to a memory buffer
22 |     * \param size data size
23 |     */
24 |     virtual void Write(const void *buf, size_t size) override;
25 | 
26 |     /*!
27 |     * \brief read data from Stream
28 |     * \param buf pointer to a memory buffer
29 |     * \param size the size of buf
30 |     */
31 |     virtual size_t Read(void *buf, size_t size) override;
32 | 
33 |     virtual bool Good() override;
34 | 
35 |   private:
36 |     bool is_good_;
37 |     FILE *fp_;
38 |     std::string path_;
39 |   };
40 | 
41 |   class LocalStreamFactory : public StreamFactory
42 |   {
43 |   public:
44 |     LocalStreamFactory(const std::string& host);
45 |     ~LocalStreamFactory(void) override;
46 | 
47 |     /*!
48 |     * \brief create a Stream
49 |     * \param path the path of the file
50 |     * \param mode "w" - create an empty file to store data;
51 |     *             "a" - open the file to append data to it
52 |     *             "r" - open the file to read
53 |     * \return the Stream which is used to write or read data
54 |     */
55 |     virtual Stream* Open(const URI& uri,
56 |       FileOpenMode mode) override;
57 | 
58 |     virtual void Close() override;
59 | 
60 |   private:
61 |     std::string host_;
62 |   };
63 | }
64 | 
65 | #endif // MULTIVERSO_LOCAL_FILE_SYS_H_


--------------------------------------------------------------------------------
/include/multiverso/message.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_MESSAGE_H_
 2 | #define MULTIVERSO_MESSAGE_H_
 3 | 
 4 | #include <functional>
 5 | #include <memory>
 6 | #include <string>
 7 | #include <vector>
 8 | 
 9 | #include "multiverso/blob.h"
10 | 
11 | namespace multiverso {
12 | 
13 | enum MsgType {
14 |   Request_Get = 1,
15 |   Request_Add = 2,
16 |   Reply_Get = -1,
17 |   Reply_Add = -2,
18 |   Server_Finish_Train = 31,
19 |   Control_Barrier = 33,  // 0x100001
20 |   Control_Reply_Barrier = -33,
21 |   Control_Register = 34,
22 |   Control_Reply_Register = -34,
23 |   Default = 0
24 | };
25 | 
26 | class Message {
27 | public:
28 |   MsgType type() const { return static_cast<MsgType>(header_[2]); }
29 |   inline int src() const { return header_[0]; }
30 |   inline int dst() const { return header_[1]; }
31 |   inline int table_id() const { return header_[3]; }
32 |   inline int msg_id() const { return header_[4]; }
33 | 
34 |   inline void set_type(MsgType type) { header_[2] = static_cast<int>(type); }
35 |   inline void set_src(int src) { header_[0] = src; }
36 |   inline void set_dst(int dst) { header_[1] = dst; }
37 |   inline void set_table_id(int table_id) { header_[3] = table_id; }
38 |   inline void set_msg_id(int msg_id) { header_[4] = msg_id; }
39 | 
40 |   inline void set_data(const std::vector<Blob>& data) { 
41 |     data_ = std::move(data); }
42 |   inline std::vector<Blob>& data() { return data_; }
43 |   inline size_t size() const { return data_.size(); }
44 | 
45 |   inline int* header() { return header_; }
46 |   inline const int* header() const { return header_; }
47 |   static const int kHeaderSize = 8 * sizeof(int);
48 | 
49 |   // Create a Message with only headers
50 |   // The src/dst, type is opposite with src message
51 |   inline Message* CreateReplyMessage() {
52 |     Message* reply = new Message();
53 |     reply->set_dst(this->src());
54 |     reply->set_src(this->dst());
55 |     reply->set_type(static_cast<MsgType>(-header_[2]));
56 |     reply->set_table_id(this->table_id());
57 |     reply->set_msg_id(this->msg_id());
58 |     return reply;
59 |   }
60 | 
61 |   inline void Push(const Blob& blob) { data_.push_back(blob); }
62 | 
63 | private:
64 |   int header_[8];
65 |   std::vector<Blob> data_;
66 | };
67 | 
68 | typedef std::unique_ptr<Message> MessagePtr;
69 | 
70 | }  // namespace multiverso
71 | 
72 | #endif  // MULTIVERSO_MESSAGE_H_
73 | 


--------------------------------------------------------------------------------
/include/multiverso/multiverso.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_INCLUDE_MULTIVERSO_H_
 2 | #define MULTIVERSO_INCLUDE_MULTIVERSO_H_
 3 | 
 4 | #include <string>
 5 | #include "table_factory.h"
 6 | 
 7 | namespace multiverso {
 8 | 
 9 | void MV_Init(int* argc = nullptr, char* argv[] = nullptr);
10 | 
11 | void MV_Barrier();
12 | 
13 | void MV_ShutDown(bool finalize_net = true);
14 | 
15 | int  MV_Rank();
16 | int  MV_Size();
17 | 
18 | int  MV_NumWorkers();
19 | int  MV_NumServers();
20 | 
21 | int  MV_WorkerId();
22 | int  MV_ServerId();
23 | 
24 | int  MV_WorkerIdToRank(int worker_id);
25 | int  MV_ServerIdToRank(int server_id);
26 | 
27 | template <typename T>
28 | void MV_SetFlag(const std::string& name, const T& value);
29 | 
30 | 
31 | // create server table and worker table
32 | // \param option for table initiate
33 | // \return worker table pointer if this node is worker
34 | //  otherwise return nullptr
35 | template <typename TableOptionType>
36 | typename TableOptionType::WorkerTableType* 
37 | MV_CreateTable(const TableOptionType& option) {
38 |   auto table = table_factory::CreateTable(option);
39 |   Zoo::Get()->Barrier();
40 |   return table;
41 | }
42 | 
43 | // inplace sum by allreduce
44 | template <typename ElemType>
45 | void MV_Aggregate(ElemType* data, int size);
46 | 
47 | // --- Net API -------------------------------------------------------------- //
48 | // NOTE(feiga): these API is only used for specific situation.
49 | // Init Multiverso Net with the provided endpoint. Multiverso Net will bind
50 | // the provided endpoint and use this endpoint to listen and recv message
51 | // \param rank the rank of this MV process
52 | // \param endpoint endpoint with format ip:port, e.g., localhost:9999
53 | // \return  0 SUCCESS
54 | // \return -1 FAIL
55 | int  MV_NetBind(int rank, char* endpoint);
56 | 
57 | // Connect Multiverso Net with other processes in the system. Multiverso Net
58 | // will connect these endpoints and send msgs
59 | // \param ranks array of rank
60 | // \param endpoints endpoints for each rank
61 | // \param size size of the array
62 | // \return  0 SUCCESS
63 | // \return -1 FAIL
64 | int  MV_NetConnect(int* rank, char* endpoint[], int size);
65 | void MV_NetFinalize();
66 | 
67 | }  // namespace multiverso
68 | 
69 | #endif  // MULTIVERSO_INCLUDE_MULTIVERSO_H_
70 | 
71 | 


--------------------------------------------------------------------------------
/include/multiverso/net.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_NET_NET_H_
 2 | #define MULTIVERSO_NET_NET_H_
 3 | 
 4 | #include <string>
 5 | #include "multiverso/message.h"
 6 | 
 7 | namespace multiverso {
 8 | 
 9 | enum NetThreadLevel {
10 |   THREAD_SERIALIZED,
11 |   THREAD_MULTIPLE
12 | };
13 | 
14 | // Interface of inter process communication method
15 | class NetInterface {
16 | public:
17 |   static NetInterface* Get();
18 | 
19 |   virtual void Init(int* argc = nullptr, char** argv = nullptr) = 0;
20 | 
21 |   virtual void Finalize() = 0;
22 | 
23 |   // Bind with a specific endpoint
24 |   virtual int  Bind(int rank, char* endpoint) = 0;
25 |   // Connect with other endpoints
26 |   virtual int  Connect(int* rank, char* endpoints[], int size) = 0;
27 | 
28 |   virtual bool active() const = 0;
29 | 
30 |   virtual std::string name() const = 0;
31 |   virtual int size() const = 0;
32 |   virtual int rank() const = 0;
33 | 
34 |   // \return 1. > 0 sent size 2. = 0 not sent 3. < 0 net error
35 |   virtual int Send(MessagePtr& msg) = 0;
36 | 
37 |   // \return 1. > 0 received size 2. = 0 not received 3. < 0 net error
38 |   virtual int Recv(MessagePtr* msg) = 0;
39 | 
40 |   // Blocking, send raw data to rank
41 |   virtual void SendTo(int rank, char* buf, int len) const = 0;
42 |   // Blocking, receive raw data from rank 
43 |   virtual void RecvFrom(int rank, char* buf, int len) const = 0;
44 |   // Blocking, send and recv at same time
45 |   virtual void SendRecv(int send_rank, char* send_buf, int send_len,
46 |     int recv_rank, char* recv_buf, int recv_len) const = 0;
47 | 
48 |   virtual int thread_level_support() = 0;
49 | };
50 | 
51 | namespace net {
52 | 
53 | // inplace allreduce
54 | template <typename Typename>
55 | void Allreduce(Typename* data, size_t elem_count);
56 | 
57 | }
58 | 
59 | }  // namespace multiverso
60 | 
61 | #endif  // MULTIVERSO_NET_NET_H_
62 | 


--------------------------------------------------------------------------------
/include/multiverso/node.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_NODE_H_
 2 | #define MULTIVERSO_NODE_H_
 3 | 
 4 | namespace multiverso {
 5 | 
 6 | enum Role {
 7 |   NONE   = 0,
 8 |   WORKER = 1,
 9 |   SERVER = 2,
10 |   ALL    = 3
11 | };
12 | 
13 | struct Node {
14 |   int rank;
15 |   int role;
16 |   int worker_id;
17 |   int server_id;
18 | 
19 |   Node();
20 | };
21 | 
22 | namespace node {
23 | 
24 | bool is_worker(int role);
25 | bool is_server(int role);
26 | 
27 | }  // namespace node
28 | 
29 | }  // namespace multiverso
30 | 
31 | #endif  // MULTIVERSO_NODE_H_
32 | 


--------------------------------------------------------------------------------
/include/multiverso/server.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_SERVER_H_
 2 | #define MULTIVERSO_SERVER_H_
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | 
 7 | #include "multiverso/actor.h"
 8 | 
 9 | namespace multiverso {
10 | 
11 | class ServerTable;
12 | 
13 | class Server : public Actor {
14 | public:
15 |   Server();
16 |   static Server* GetServer();
17 |   int RegisterTable(ServerTable* table);
18 | 
19 | protected:
20 |   virtual void ProcessGet(MessagePtr& msg);
21 |   virtual void ProcessAdd(MessagePtr& msg);
22 | 
23 |   std::vector<ServerTable*> store_;
24 | };
25 | 
26 | }  // namespace multiverso
27 | 
28 | #endif  // MULTIVERSO_SERVER_H_
29 | 


--------------------------------------------------------------------------------
/include/multiverso/table/array_table.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_ARRAY_TABLE_H_
 2 | #define MULTIVERSO_ARRAY_TABLE_H_
 3 | 
 4 | #include "multiverso/multiverso.h"
 5 | #include "multiverso/table_interface.h"
 6 | #include "multiverso/util/log.h"
 7 | 
 8 | namespace multiverso {
 9 | 
10 | template<typename EleType>
11 | struct ArrayTableOption;
12 | 
13 | template <typename T>
14 | class ArrayWorker : public WorkerTable {
15 | public:
16 |   explicit ArrayWorker(size_t size);
17 |   explicit ArrayWorker(const ArrayTableOption<T> &option);
18 |   // std::vector<T>& raw() { return table_; }
19 | 
20 |   // Get all element, data is user-allocated memory, Blocking IO
21 |   void Get(T* data, size_t size);
22 |   // Non-blocking IO
23 |   int GetAsync(T* data, size_t size);
24 | 
25 |   // Add all element
26 |   void Add(T* data, size_t size, const AddOption* option = nullptr);
27 |   int AddAsync(T* data, size_t, const AddOption* option = nullptr);
28 | 
29 |   int Partition(const std::vector<Blob>& kv,
30 |     MsgType partition_type,
31 |     std::unordered_map<int, std::vector<Blob> >* out) override;
32 | 
33 |   void ProcessReplyGet(std::vector<Blob>& reply_data) override;
34 | 
35 | private:
36 |   T* data_; // not owned
37 |   size_t size_;
38 |   int num_server_;
39 |   std::vector<size_t> server_offsets_;
40 | };
41 | 
42 | template <typename T>
43 | class Updater;
44 | 
45 | // The storage is a continuous large chunk of memory
46 | template <typename T>
47 | class ArrayServer : public ServerTable {
48 | public:
49 |   explicit ArrayServer(size_t size);
50 |   explicit ArrayServer(const ArrayTableOption<T> &option);
51 | 
52 |   void ProcessAdd(const std::vector<Blob>& data) override;
53 | 
54 |   void ProcessGet(const std::vector<Blob>& data,
55 |                   std::vector<Blob>* result) override;
56 | 
57 |   void Store(Stream* s) override;
58 |   void Load(Stream* s) override;
59 | 
60 | private:
61 |   int32_t server_id_;
62 |   std::vector<T> storage_;
63 |   Updater<T>* updater_;
64 |   size_t size_; // number of element with type T
65 |   
66 | };
67 | 
68 | template<typename T>
69 | struct ArrayTableOption {
70 |   explicit ArrayTableOption(size_t s) : size(s) {}
71 |   size_t size;
72 |   DEFINE_TABLE_TYPE(T, ArrayWorker, ArrayServer);
73 | };
74 | 
75 | }
76 | 
77 | #endif // MULTIVERSO_ARRAY_TABLE_H_
78 | 


--------------------------------------------------------------------------------
/include/multiverso/table/sparse_matrix_table.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 Microsoft
 2 | #ifndef INCLUDE_MULTIVERSO_TABLE_SPARSE_MATRIX_TABLE_H_
 3 | #define INCLUDE_MULTIVERSO_TABLE_SPARSE_MATRIX_TABLE_H_
 4 | 
 5 | #include <vector>
 6 | #include <bitset>
 7 | #include "multiverso/multiverso.h"
 8 | #include "multiverso/table_interface.h"
 9 | #include "multiverso/util/log.h"
10 | #include "multiverso/table/matrix_table.h"
11 | 
12 | namespace multiverso {
13 | 
14 | template <typename T>
15 | class SparseMatrixWorkerTable : public MatrixWorkerTable<T> {
16 |  public:
17 |    SparseMatrixWorkerTable(integer_t num_row, integer_t num_col)
18 |      : MatrixWorkerTable<T>(num_row, num_col) { }
19 |     int Partition(const std::vector<Blob>& kv,
20 |       MsgType partition_type,
21 |       std::unordered_map<int, std::vector<Blob>>* out) override;
22 |     void ProcessReplyGet(std::vector<Blob>& reply_data) override;
23 | 
24 |     // get whole table, data is user-allocated memory
25 |     void Get(T* data, size_t size,
26 |       const GetOption* option = nullptr);
27 | 
28 |     // data is user-allocated memory
29 |     void Get(integer_t row_id, T* data, size_t size,
30 |       const GetOption* option = nullptr);
31 | 
32 |     void Get(const std::vector<integer_t>& row_ids,
33 |         const std::vector<T*>& data_vec, size_t size,
34 |         const GetOption* option = nullptr);
35 | 
36 |  private:
37 |     // get whole table, data is user-allocated memory
38 |     void Get(T* data, size_t size) = delete;
39 | 
40 |     // data is user-allocated memory
41 |     void Get(integer_t row_id, T* data, size_t size) = delete;
42 | 
43 |     void Get(const std::vector<integer_t>& row_ids,
44 |         const std::vector<T*>& data_vec, size_t size) = delete;
45 | };
46 | 
47 | template <typename T>
48 | class Updater;
49 | template <typename T>
50 | class SparseMatrixServerTable : public MatrixServerTable<T> {
51 |  public:
52 |      SparseMatrixServerTable(integer_t num_row, integer_t num_col, bool using_pipeline);
53 |      ~SparseMatrixServerTable();
54 |     void ProcessAdd(const std::vector<Blob>& data) override;
55 |     void ProcessGet(const std::vector<Blob>& data,
56 |         std::vector<Blob>* result) override;
57 |  private:
58 |      void UpdateAddState(int worker_id, Blob keys);
59 |      void UpdateGetState(int worker_id, integer_t* keys, size_t key_size,
60 |        std::vector<integer_t>* out_rows);
61 |      integer_t GetLogicalRow(integer_t local_row_id) {
62 |        return this->row_offset_ + local_row_id;
63 |      }
64 |      integer_t GetPhysicalRow(integer_t global_row_id) {
65 |        return global_row_id - this->row_offset_;
66 |      }
67 |  private:
68 |    bool** up_to_date_;
69 |    int workers_nums_;
70 |    // std::vector<std::vector<bool>> up_to_date_;
71 | };
72 | 
73 | }   // namespace multiverso
74 | #endif  // INCLUDE_MULTIVERSO_TABLE_SPARSE_MATRIX_TABLE_H_
75 | 


--------------------------------------------------------------------------------
/include/multiverso/table_factory.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_TABLE_FACTORY_H_
 2 | #define MULTIVERSO_TABLE_FACTORY_H_
 3 | 
 4 | #include "multiverso/table_interface.h"
 5 | #include "multiverso/zoo.h"
 6 | 
 7 | #include <string>
 8 | 
 9 | namespace multiverso {
10 | 
11 | namespace table_factory {
12 | 
13 | void FreeServerTables();
14 | void PushServerTable(ServerTable*table);
15 | 
16 | template <typename OptionType>
17 | typename OptionType::WorkerTableType* CreateTable(const OptionType& option) {
18 |   if (Zoo::Get()->server_rank() >= 0) {
19 |     PushServerTable(
20 |       new typename OptionType::ServerTableType(option));
21 |   }
22 |   if (Zoo::Get()->worker_rank() >= 0) {
23 |     return new typename OptionType::WorkerTableType(option);
24 |   }
25 |   return nullptr;
26 | }
27 | 
28 | } // namespace table_factory
29 | 
30 | } // namespace multiverso
31 | 
32 | #endif // MULTIVERSO_TABLE_FACTORY_H_
33 | 


--------------------------------------------------------------------------------
/include/multiverso/table_interface.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_TABLE_INTERFACE_H_
 2 | #define MULTIVERSO_TABLE_INTERFACE_H_
 3 | 
 4 | #include <string>
 5 | #include <unordered_map>
 6 | #include <vector>
 7 | #include <cctype>
 8 | 
 9 | #include "multiverso/blob.h"
10 | #include "multiverso/message.h"
11 | 
12 | namespace std { class mutex; }
13 | 
14 | namespace multiverso {
15 | 
16 | typedef int32_t integer_t;
17 | 
18 | class Waiter;
19 | struct AddOption;
20 | struct GetOption;
21 | enum MsgType;
22 | 
23 | // User implementent this
24 | class WorkerTable {
25 | public:
26 |   WorkerTable();
27 |   virtual ~WorkerTable();
28 | 
29 |   void Get(Blob keys, const GetOption* option = nullptr);
30 |   void Add(Blob keys, Blob values, const AddOption* option = nullptr);
31 | 
32 |   int GetAsync(Blob keys, const GetOption* option = nullptr);
33 |   int AddAsync(Blob keys, Blob values, const AddOption* option = nullptr);
34 | 
35 |   void Wait(int id);
36 | 
37 |   void Reset(int msg_id, int num_wait);
38 | 
39 |   void Notify(int id);
40 | 
41 |   virtual int Partition(const std::vector<Blob>& kv,
42 |    MsgType partition_type,
43 |    std::unordered_map<int, std::vector<Blob> >* out) = 0;
44 | 
45 |   virtual void ProcessReplyGet(std::vector<Blob>&) = 0;
46 | 
47 |   // add user defined data structure
48 | private:
49 |   std::string table_name_;
50 |   // assuming there are at most 2^32 tables
51 |   int table_id_;
52 |   std::mutex* m_;
53 |   std::vector<Waiter*> waitings_;
54 |   // assuming there are at most 2^32 msgs waiting in line
55 |   int msg_id_;
56 | };
57 | 
58 | class Stream;
59 | 
60 | // interface for checkpoint table
61 | class Serializable {
62 | public:
63 |   virtual void Store(Stream* s) = 0;
64 |   virtual void Load(Stream* s) = 0;
65 | };
66 | 
67 | // describe the server parameter storage data structure and related method
68 | class ServerTable : public Serializable {
69 | public:
70 |   ServerTable();
71 |   virtual ~ServerTable() = default;
72 |   virtual void ProcessAdd(const std::vector<Blob>& data) = 0;
73 |   virtual void ProcessGet(const std::vector<Blob>& data,
74 |                           std::vector<Blob>* result) = 0;
75 | };
76 | 
77 | #define DEFINE_TABLE_TYPE(template_type,                    \
78 |   worker_table_type,  server_table_type)                    \
79 |   typedef worker_table_type<template_type> WorkerTableType; \
80 |   typedef server_table_type<template_type> ServerTableType;
81 | 
82 | }  // namespace multiverso
83 | 
84 | #endif  // MULTIVERSO_TABLE_INTERFACE_H_
85 | 


--------------------------------------------------------------------------------
/include/multiverso/updater/adagrad_updater.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_UPDATER_ADAGRAD_UPDATER_H_
 2 | #define MULTIVERSO_UPDATER_ADAGRAD_UPDATER_H_
 3 | 
 4 | #include "multiverso/updater/updater.h"
 5 | #include "multiverso/util/log.h"
 6 | 
 7 | #include <vector>
 8 | #include <cmath>
 9 | #include <cstdint>
10 | 
11 | 
12 | namespace multiverso {
13 | 
14 | template <typename T>
15 | class AdaGradUpdater : public Updater<T> {
16 | public:
17 |   explicit AdaGradUpdater(size_t size):
18 |     e(1e-6f), size_(size) {  
19 |     historic_g_sqr_.resize(MV_NumWorkers(), std::vector<T>(size_));
20 |     Log::Debug("[AdaGradUpdater] Init with size = %d, e = %f. historic_size = %d\n", size_, e, historic_g_sqr_.size());
21 |   }
22 | 
23 |   void Update(size_t num_element, T* data, T* delta, 
24 |               AddOption* option, size_t offset) override {
25 | 
26 |     auto g_sqr_data_ = historic_g_sqr_.at(option->worker_id());
27 |     for (size_t index = 0; index < num_element; ++index) {
28 |       g_sqr_data_[index + offset] -=
29 |         delta[index] * delta[index] / option->learning_rate() / 
30 |         option->learning_rate();
31 | 
32 |       //[TODO(qiwye)] sqrt take too much time
33 |       data[index + offset] -= option->rho() /
34 |         std::sqrt(g_sqr_data_[index + offset] + e) *
35 |         delta[index] / option->learning_rate();
36 | 
37 |       //data[index + offset] -= option->rho() *
38 |       //  QuakeRsqrt(g_sqr_data_[index + offset] + e) *
39 |       //  delta[index] / option->learning_rate();
40 |     }
41 |   }
42 | 
43 | 
44 | private:
45 | 
46 |   float QuakeRsqrt(float number){
47 |     float x = number * 0.5f, y = number;
48 |     std::uint32_t i;
49 |     std::memcpy(&i, &y, sizeof(float));
50 |     i = 0x5f3759df - (i >> 1);
51 |     std::memcpy(&y, &i, sizeof(float));
52 |     return y * (1.5f - (x * y * y));
53 |   }
54 | 
55 | protected:
56 |     std::vector< std::vector<T>> historic_g_sqr_;
57 |     float e;
58 |     size_t size_;
59 | };
60 | 
61 | }
62 | 
63 | #endif // MULTIVERSO_UPDATER_ADAGRAD_UPDATER_H_
64 | 


--------------------------------------------------------------------------------
/include/multiverso/updater/momentum_updater.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_
 2 | #define MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_
 3 | 
 4 | #include "updater.h"
 5 | #include <vector>
 6 | 
 7 | namespace multiverso {
 8 | 
 9 | template <typename T>
10 | class MomentumUpdater : public Updater<T> {
11 | public:
12 |   explicit MomentumUpdater(size_t size) : size_(size) {
13 |     Log::Debug("[SmoothGradientUpdater] Init with size = %d. \n", size_);
14 |     smooth_gradient_.resize(size_);
15 |   }
16 | 
17 |   void Update(size_t num_element, T* data, T* delta, 
18 |               AddOption* option, size_t offset) override {
19 |     for (size_t index = 0; index < num_element; ++index) {
20 |       smooth_gradient_[index + offset] = 
21 |         option->momentum() * smooth_gradient_[index + offset] 
22 |         + (1 - option->momentum()) * delta[index];
23 |       data[index + offset] -= smooth_gradient_[index + offset];
24 |     }
25 |   }
26 | 
27 |   ~MomentumUpdater() { smooth_gradient_.clear(); }
28 | protected:
29 |   std::vector<T> smooth_gradient_;
30 |   size_t size_;
31 | };
32 | 
33 | }
34 | 
35 | #endif // MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_


--------------------------------------------------------------------------------
/include/multiverso/updater/sgd_updater.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_UPDATER_SGD_UPDATER_H_
 2 | #define MULTIVERSO_UPDATER_SGD_UPDATER_H_
 3 | 
 4 | #include "updater.h"
 5 | 
 6 | namespace multiverso {
 7 | 
 8 | template <typename T>
 9 | class SGDUpdater : public Updater<T> {
10 | public:
11 |   explicit SGDUpdater(size_t){
12 |     Log::Debug("[SGDUpdater] Init. \n");
13 |   }
14 |   void Update(size_t num_element, T* data, T* delta,
15 |               AddOption*, size_t offset) override {
16 |     for (size_t index = 0; index < num_element; ++index) {
17 |       data[index + offset] -= delta[index];
18 |     }
19 |   }
20 | 
21 |   void Access(size_t num_element, T* data, T* blob_data,
22 |               size_t offset, AddOption*) override{
23 |     memcpy(blob_data, data + offset, sizeof(T) * num_element);
24 |   }
25 | 
26 |   ~SGDUpdater(){}
27 | };
28 | 
29 | }
30 | 
31 | #endif // MULTIVERSO_UPDATER_ASGD_UPDATER_H_


--------------------------------------------------------------------------------
/include/multiverso/util/allocator.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_ALLOCATOR_H_
 2 | #define MULTIVERSO_ALLOCATOR_H_
 3 | 
 4 | #include <atomic>
 5 | #include <unordered_map>
 6 | 
 7 | namespace std { class mutex; }
 8 | 
 9 | namespace multiverso {
10 | 
11 | const size_t g_pointer_size = sizeof(void*);
12 | 
13 | class MemoryBlock;
14 | class FreeList {
15 | public:
16 |   FreeList(size_t size);
17 |   ~FreeList();
18 |   char *Pop();
19 |   void Push(MemoryBlock*);
20 | private:
21 |   MemoryBlock* free_ = nullptr;
22 |   size_t size_;
23 |   std::mutex* mutex_;
24 | };
25 | 
26 | class MemoryBlock {
27 | public:
28 |   MemoryBlock(size_t size, FreeList* list);
29 |   ~MemoryBlock();
30 |   char* data();
31 |   void Unlink();
32 |   void Link();
33 |   MemoryBlock* next;
34 | private:
35 |   char* data_;
36 |   std::atomic<int> ref_;
37 |   static const size_t header_size_ = (sizeof(MemoryBlock*) << 1);
38 | };
39 | 
40 | class Allocator {
41 | public:
42 |   virtual ~Allocator() = default;
43 |   virtual char* Alloc(size_t size);
44 |   virtual void Free(char* data);
45 |   virtual void Refer(char *data);
46 |   static Allocator* Get();
47 | private:
48 |   static const int header_size_ = sizeof(std::atomic<int>*);
49 | };
50 | 
51 | class SmartAllocator : public Allocator {
52 | public:
53 |   SmartAllocator();
54 |   ~SmartAllocator();
55 |   char* Alloc(size_t size);
56 |   void Free(char* data);
57 |   void Refer(char *data);
58 | private:
59 |   std::unordered_map<size_t, FreeList*> pools_;
60 |   std::mutex* mutex_;
61 | };
62 | 
63 | } // namespace multiverso
64 | 
65 | #endif // MULTIVERSO_ALLOCATOR_H_
66 | 


--------------------------------------------------------------------------------
/include/multiverso/util/net_util.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_UTIL_NET_UTIL_H_
 2 | #define MULTIVERSO_UTIL_NET_UTIL_H_
 3 | 
 4 | #include <string>
 5 | #include <unordered_set>
 6 | 
 7 | namespace multiverso {
 8 | namespace net {
 9 | 
10 | void GetLocalIPAddress(std::unordered_set<std::string>* result);
11 | 
12 | }  // namespace net
13 | }  // namespace multiverso
14 | 
15 | #endif  // MULTIVERSO_UTIL_NET_UTIL_H_
16 | 


--------------------------------------------------------------------------------
/include/multiverso/util/timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_TIMER_H_
 2 | #define MULTIVERSO_TIMER_H_
 3 | 
 4 | #include <chrono>
 5 | #include <string>
 6 | 
 7 | namespace multiverso {
 8 | 
 9 | class Timer {
10 | public:
11 |   Timer();
12 | 
13 |   // Restart the timer
14 |   void Start();
15 | 
16 |   // Get elapsed milliseconds since last Timer::Start
17 |   double elapse();
18 | 
19 | private:
20 |   using Clock = std::chrono::high_resolution_clock;
21 |   using TimePoint = Clock::time_point;
22 | 
23 |   TimePoint start_point_;
24 | };
25 | 
26 | }  // namespace multiverso
27 | 
28 | #endif  // MULTIVERSO_TIMER_H_
29 | 


--------------------------------------------------------------------------------
/include/multiverso/util/waiter.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_WAITER_H_
 2 | #define MULTIVERSO_WAITER_H_
 3 | 
 4 | #include <mutex>
 5 | #include <condition_variable>
 6 | 
 7 | namespace multiverso {
 8 | 
 9 | class Waiter {
10 | public:
11 |   explicit Waiter(int num_wait = 1) : num_wait_(num_wait) {}
12 | 
13 |   void Wait() {
14 |     std::unique_lock<std::mutex> lock(mutex_);
15 |     while (num_wait_ > 0) cv_.wait(lock);
16 |   }
17 | 
18 |   void Notify() {
19 |     std::unique_lock<std::mutex> lock(mutex_);
20 |     --num_wait_;
21 |     cv_.notify_all();
22 |   }
23 | 
24 |   void Reset(int num_wait) {
25 |     std::unique_lock<std::mutex> lock(mutex_);
26 |     num_wait_ = num_wait;
27 |   }
28 | 
29 | private:
30 |   std::mutex mutex_;
31 |   std::condition_variable cv_;
32 |   int num_wait_;
33 | };
34 | 
35 | }  // namespace multiverso
36 | 
37 | #endif  // MULTIVERSO_WAITER_H_
38 | 


--------------------------------------------------------------------------------
/include/multiverso/worker.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_WORKER_H_
 2 | #define MULTIVERSO_WORKER_H_
 3 | 
 4 | #include <vector>
 5 | 
 6 | #include "multiverso/actor.h"
 7 | 
 8 | namespace multiverso {
 9 | 
10 | class WorkerTable;
11 | 
12 | class Worker : public Actor {
13 | public:
14 |   Worker();
15 | 
16 |   int RegisterTable(WorkerTable* worker_table);
17 | 
18 | private:
19 |   void ProcessGet(MessagePtr& msg);
20 |   void ProcessAdd(MessagePtr& msg);
21 |   void ProcessReplyGet(MessagePtr& msg);
22 |   void ProcessReplyAdd(MessagePtr& msg);
23 | 
24 |   std::vector<WorkerTable*> cache_;
25 | };
26 | 
27 | }  // namespace multiverso
28 | 
29 | #endif  // MULTIVERSO_WORKER_H_
30 | 


--------------------------------------------------------------------------------
/include/multiverso/zoo.h:
--------------------------------------------------------------------------------
 1 | #ifndef MULTIVERSO_ZOO_H_
 2 | #define MULTIVERSO_ZOO_H_
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | #include <unordered_map>
 7 | 
 8 | #include "multiverso/actor.h"
 9 | #include "multiverso/node.h"
10 | #include "multiverso/table_interface.h"
11 | 
12 | namespace multiverso {
13 | 
14 | class NetInterface;
15 | 
16 | //  Zoo Manage all components in the system, include all actors, and network
17 | //  Maintain system information, provide method to access this information
18 | //  Control the system, to start and end
19 | class Zoo {
20 | public:
21 |   ~Zoo();
22 |   inline static Zoo* Get() { static Zoo zoo; return &zoo; }
23 | 
24 |   // Start all actors
25 |   void Start(int* argc, char** argv);
26 |   // Stop all actors
27 |   void Stop(bool finalize_net);
28 | 
29 |   void Barrier();
30 | 
31 |   void SendTo(const std::string& name, MessagePtr&);
32 |   void Receive(MessagePtr& msg);
33 | 
34 |   int rank() const;
35 |   int size() const;
36 | 
37 |   inline int worker_rank() const { return nodes_[rank()].worker_id; }
38 |   inline int server_rank() const { return nodes_[rank()].server_id; }
39 | 
40 |   inline int rank_to_worker_id(int rank) const {
41 |     return nodes_[rank].worker_id;
42 |   }
43 | 
44 |   inline int rank_to_server_id(int rank) const {
45 |     return nodes_[rank].server_id;
46 |   }
47 | 
48 |   inline int worker_id_to_rank(int worker_id) const {
49 |     return worker_id_to_rank_[worker_id];
50 |   }
51 | 
52 |   inline int server_id_to_rank(int server_id) const {
53 |     return server_id_to_rank_[server_id];
54 |   }
55 | 
56 |   inline int num_workers() const { return num_workers_; }
57 |   inline int num_servers() const { return num_servers_; }
58 | 
59 | 
60 |   int RegisterTable(WorkerTable* worker_table);
61 |   int RegisterTable(ServerTable* server_table);
62 | 
63 |   void RegisterActor(const std::string name, Actor* actor);
64 | 
65 | private:
66 |   // private constructor
67 |   Zoo();
68 |   void RegisterNode();
69 |   void FinishTrain();
70 |   void StartPS();
71 |   void StopPS();
72 | 
73 |   std::unordered_map<std::string, Actor*> zoo_;
74 | 
75 |   std::unique_ptr<MtQueue<MessagePtr>> mailbox_;
76 | 
77 |   NetInterface* net_util_;
78 | 
79 |   std::vector<Node> nodes_;
80 |   std::vector<int> server_id_to_rank_;
81 |   std::vector<int> worker_id_to_rank_;
82 | 
83 |   int num_workers_;
84 |   int num_servers_;
85 | };
86 | 
87 | }  // namespace multiverso
88 | 
89 | #endif  // MULTIVERSO_ZOO_H_
90 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | *.obj
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Compiled Dynamic libraries
12 | *.so
13 | *.dylib
14 | *.dll
15 | 
16 | # Fortran module files
17 | *.mod
18 | 
19 | # Compiled Static libraries
20 | *.lai
21 | *.la
22 | *.a
23 | *.lib
24 | 
25 | # Executables
26 | *.exe
27 | *.out
28 | *.app


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include_directories(${MPI_CXX_INCLUDE_PATH})
 2 | 
 3 | if (NOT USE_ZMQ)
 4 | ADD_DEFINITIONS(-DMULTIVERSO_USE_MPI)
 5 | else()
 6 | ADD_DEFINITIONS(-DMULTIVERSO_USE_ZMQ)
 7 | endif()
 8 | 
 9 | if (NOT USE_ZMQ)
10 |     find_package(OpenMP)
11 |     if (OPENMP_FOUND)
12 |         message("OpenMP found")
13 |     	set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
14 |     	set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
15 |     endif()
16 | endif()
17 | 
18 | set(MULTIVERSO_SRC actor.cpp communicator.cpp controller.cpp dashboard.cpp multiverso.cpp net.cpp node.cpp server.cpp table.cpp table/array_table.cpp table/matrix_table.cpp table/sparse_matrix_table.cpp table/matrix.cpp timer.cpp  updater/updater.cpp util/configure.cpp io/hdfs_stream.cpp io/io.cpp io/local_stream.cpp util/log.cpp util/net_util.cpp worker.cpp zoo.cpp c_api.cpp util/allocator.cpp table_factory.cpp blob.cpp)
19 | 
20 | add_library(multiverso SHARED ${MULTIVERSO_SRC})
21 | #add_library(imultiverso ${MULTIVERSO_SRC})
22 | if (NOT USE_ZMQ)
23 |     target_link_libraries(multiverso ${MPI_LIBRARY})
24 | else()
25 |     target_link_libraries(multiverso zmq)
26 | endif()
27 | 
28 | install (TARGETS multiverso DESTINATION lib)
29 | if (UNIX)
30 |     install(CODE "execute_process(COMMAND ldconfig)")  # run ldconfig. Otherwise ld.so.cache won't be created.
31 | endif()
32 | 


--------------------------------------------------------------------------------
/src/actor.cpp:
--------------------------------------------------------------------------------
 1 | #include "multiverso/actor.h"
 2 | 
 3 | #include <chrono>
 4 | #include <string>
 5 | #include <thread>
 6 | 
 7 | #include "multiverso/message.h"
 8 | #include "multiverso/util/log.h"
 9 | #include "multiverso/util/mt_queue.h"
10 | #include "multiverso/zoo.h"
11 | 
12 | namespace multiverso {
13 | 
14 | Actor::Actor(const std::string& name) : name_(name) {
15 |   mailbox_.reset(new MtQueue<MessagePtr>());
16 |   Zoo::Get()->RegisterActor(name, this);
17 |   is_working_ = false;
18 | }
19 | 
20 | Actor::~Actor() {}
21 | 
22 | void Actor::Start() {
23 |   thread_.reset(new std::thread(&Actor::Main, this));
24 |   while (!is_working_) {
25 |     std::this_thread::sleep_for(std::chrono::milliseconds(10));
26 |   }
27 | }
28 | 
29 | void Actor::Stop() {
30 |   while (!mailbox_->Empty()) { ; }
31 |   is_working_ = false;
32 |   mailbox_->Exit();
33 |   thread_->join();
34 | }
35 | 
36 | void Actor::Receive(MessagePtr& msg) { mailbox_->Push(msg); }
37 | 
38 | void Actor::Main() {
39 |   is_working_ = true;
40 |   MessagePtr msg;
41 |   while (mailbox_->Pop(msg)) {
42 |     if (handlers_.find(msg->type()) != handlers_.end()) {
43 |       handlers_[msg->type()](msg);
44 |     } else if (handlers_.find(MsgType::Default) != handlers_.end()) {
45 |       handlers_[MsgType::Default](msg);
46 |     } else {
47 |       Log::Fatal("Unexpected msg type\n");
48 |     }
49 |   }
50 | }
51 | 
52 | void Actor::SendTo(const std::string& dst_name, MessagePtr& msg) {
53 |   Zoo::Get()->SendTo(dst_name, msg);
54 | }
55 | 
56 | }  // namespace multiverso
57 | 


--------------------------------------------------------------------------------
/src/blob.cpp:
--------------------------------------------------------------------------------
 1 | #include "multiverso/blob.h"
 2 | 
 3 | #include "multiverso/util/allocator.h"
 4 | #include "multiverso/util/log.h"
 5 | 
 6 | namespace multiverso {
 7 | 
 8 | Blob::Blob(size_t size) : size_(size) {
 9 |   CHECK(size > 0);
10 |   data_ = Allocator::Get()->Alloc(size);
11 | }
12 | 
13 | // Construct from external memory. Will copy a new piece
14 | Blob::Blob(const void* data, size_t size) : size_(size) {
15 |   data_ = Allocator::Get()->Alloc(size);
16 |   memcpy(data_, data, size_);
17 | }
18 | 
19 | Blob::Blob(void* data, size_t size) : size_(size) {
20 |   data_ = Allocator::Get()->Alloc(size);
21 |   memcpy(data_, data, size_);
22 | }
23 | 
24 | Blob::Blob(const Blob& rhs) {
25 |   if (rhs.size() != 0) {
26 |     Allocator::Get()->Refer(rhs.data_);
27 |   }
28 |   this->data_ = rhs.data_;
29 |   this->size_ = rhs.size_;
30 | }
31 | 
32 | Blob::~Blob() {
33 |   if (data_ != nullptr) {
34 |     Allocator::Get()->Free(data_);
35 |   }
36 | }
37 | 
38 | // Shallow copy by default. Call \ref CopyFrom for a deep copy
39 | void Blob::operator=(const Blob& rhs) {
40 |   if (rhs.size() != 0) {
41 |     Allocator::Get()->Refer(rhs.data_);
42 |   }
43 |   this->data_ = rhs.data_;
44 |   this->size_ = rhs.size_;
45 | }
46 | 
47 | }  // namespace multiverso
48 | 


--------------------------------------------------------------------------------
/src/build_dll.bat:
--------------------------------------------------------------------------------
1 | MSBuild.exe Multiverso.vcxproj /p:Configuration=Release /p:Platform=x64 /p:ConfigurationType=DynamicLibrary


--------------------------------------------------------------------------------
/src/c_api.cpp:
--------------------------------------------------------------------------------
 1 | #include "multiverso/c_api.h"
 2 | 
 3 | #include "multiverso/multiverso.h"
 4 | #include "multiverso/table/array_table.h"
 5 | #include "multiverso/table/matrix_table.h"
 6 | #include "multiverso/util/log.h"
 7 | 
 8 | 
 9 | extern "C" {
10 | void MV_Init(int* argc, char* argv[]) {
11 |   multiverso::MV_Init(argc, argv);
12 | }
13 | 
14 | void MV_ShutDown(){
15 |   multiverso::MV_ShutDown();
16 | }
17 | 
18 | void MV_Barrier(){
19 |   multiverso::MV_Barrier();
20 | }
21 | 
22 | int MV_NumWorkers(){
23 |   return multiverso::MV_NumWorkers();
24 | }
25 | 
26 | int  MV_WorkerId(){
27 |   return multiverso::MV_WorkerId();
28 | }
29 | 
30 | int  MV_ServerId(){
31 |   return multiverso::MV_ServerId();
32 | }
33 | 
34 | // Array Table
35 | void MV_NewArrayTable(int size, TableHandler* out) {
36 |   *out = multiverso::MV_CreateTable(multiverso::ArrayTableOption<float>(size));
37 | }
38 | 
39 | void MV_GetArrayTable(TableHandler handler, float* data, int size) {
40 |   auto worker = reinterpret_cast<multiverso::ArrayWorker<float>*>(handler);
41 |   worker->Get(data, size);
42 | }
43 | 
44 | void MV_AddArrayTable(TableHandler handler, float* data, int size) {
45 |   auto worker = reinterpret_cast<multiverso::ArrayWorker<float>*>(handler);
46 |   worker->Add(data, size);
47 | }
48 | 
49 | void MV_AddAsyncArrayTable(TableHandler handler, float* data, int size) {
50 |   auto worker = reinterpret_cast<multiverso::ArrayWorker<float>*>(handler);
51 |   worker->AddAsync(data, size);
52 | }
53 | 
54 | 
55 | // MatrixTable
56 | void MV_NewMatrixTable(int num_row, int num_col, TableHandler* out) {
57 |   *out = multiverso::MV_CreateTable(multiverso::MatrixTableOption<float>(num_row, num_col));
58 | }
59 | 
60 | void MV_GetMatrixTableAll(TableHandler handler, float* data, int size) {
61 |   auto worker = reinterpret_cast<multiverso::MatrixWorkerTable<float>*>(handler);
62 |   worker->Get(data, size);
63 | }
64 | 
65 | void MV_AddMatrixTableAll(TableHandler handler, float* data, int size) {
66 |   auto worker = reinterpret_cast<multiverso::MatrixWorkerTable<float>*>(handler);
67 |   worker->Add(data, size);
68 | }
69 | 
70 | void MV_AddAsyncMatrixTableAll(TableHandler handler, float* data, int size) {
71 |   auto worker = reinterpret_cast<multiverso::MatrixWorkerTable<float>*>(handler);
72 |   worker->AddAsync(data, size);
73 | }
74 | 
75 | void MV_GetMatrixTableByRows(TableHandler handler, float* data, int size,
76 |                              int row_ids[], int row_ids_n) {
77 |   auto worker = reinterpret_cast<multiverso::MatrixWorkerTable<float>*>(handler);
78 |   worker->Get(data, size, row_ids, row_ids_n);
79 | }
80 | 
81 | void MV_AddMatrixTableByRows(TableHandler handler, float* data, int size,
82 |                              int row_ids[], int row_ids_n) {
83 |   auto worker = reinterpret_cast<multiverso::MatrixWorkerTable<float>*>(handler);
84 |   worker->Add(data, size, row_ids, row_ids_n);
85 | }
86 | 
87 | void MV_AddAsyncMatrixTableByRows(TableHandler handler, float* data, int size,
88 |                              int row_ids[], int row_ids_n) {
89 |   auto worker = reinterpret_cast<multiverso::MatrixWorkerTable<float>*>(handler);
90 |   worker->AddAsync(data, size, row_ids, row_ids_n);
91 | }
92 | 
93 | }
94 | 


--------------------------------------------------------------------------------
/src/communicator.cpp:
--------------------------------------------------------------------------------
  1 | #include "multiverso/communicator.h"
  2 | 
  3 | #include <memory>
  4 | #include <thread>
  5 | 
  6 | #include "multiverso/zoo.h"
  7 | #include "multiverso/net.h"
  8 | #include "multiverso/util/log.h"
  9 | #include "multiverso/util/mt_queue.h"
 10 | 
 11 | namespace multiverso {
 12 | 
 13 | namespace message {
 14 | 
 15 | bool to_server(MsgType type) {
 16 |   return (static_cast<int>(type)) > 0 &&
 17 |          (static_cast<int>(type)) < 32;
 18 | }
 19 | 
 20 | bool to_worker(MsgType type) {
 21 |   return (static_cast<int>(type)) < 0  &&
 22 |          (static_cast<int>(type)) > -32;
 23 | }
 24 | 
 25 | bool to_controler(MsgType type) {
 26 |   return (static_cast<int>(type)) > 32;
 27 | }
 28 | 
 29 | }  // namespace message
 30 | 
 31 | Communicator::Communicator() : Actor(actor::kCommunicator) {
 32 |   RegisterHandler(MsgType::Default, std::bind(
 33 |     &Communicator::ProcessMessage, this, std::placeholders::_1));
 34 |   net_util_ = NetInterface::Get();
 35 | }
 36 | 
 37 | Communicator::~Communicator() { }
 38 | 
 39 | void Communicator::Main() {
 40 |   is_working_ = true;
 41 | 
 42 |   switch (net_util_->thread_level_support()) {
 43 |   case NetThreadLevel::THREAD_MULTIPLE: {
 44 |     recv_thread_.reset(new std::thread(&Communicator::Communicate, this));
 45 |     Actor::Main();
 46 |     recv_thread_->join();
 47 |     break;
 48 |   }
 49 |   case NetThreadLevel::THREAD_SERIALIZED: {
 50 |     MessagePtr msg;
 51 |     while (mailbox_->Alive()) {
 52 |       // Try pop and Send
 53 |       if (mailbox_->TryPop(msg)) {
 54 |         ProcessMessage(msg);
 55 |       }
 56 |       // Probe and Recv
 57 |       size_t size = net_util_->Recv(&msg);
 58 |       if (size > 0) LocalForward(msg);
 59 |       CHECK(msg.get() == nullptr);
 60 |       net_util_->Send(msg);
 61 |     }
 62 |     break;
 63 |   }
 64 |   default:
 65 |     Log::Fatal("Unexpected thread level\n");
 66 |   }
 67 | }
 68 | 
 69 | void Communicator::ProcessMessage(MessagePtr& msg) {
 70 |   if (msg->dst() != net_util_->rank()) {
 71 |     net_util_->Send(msg);
 72 |     return;
 73 |   }
 74 |   LocalForward(msg);
 75 | }
 76 | 
 77 | void Communicator::Communicate() {
 78 |   while (is_working_) {
 79 |     MessagePtr msg(new Message());
 80 |     int size = net_util_->Recv(&msg);
 81 |     if (size == -1) {
 82 |       continue;
 83 |     }
 84 |     if (size > 0) {
 85 |       // a message received
 86 |       CHECK(msg->dst() == Zoo::Get()->rank());
 87 |       LocalForward(msg);
 88 |     }
 89 |   }
 90 |   Log::Debug("Comm recv thread exit\n");
 91 | }
 92 | 
 93 | void Communicator::LocalForward(MessagePtr& msg) {
 94 |   CHECK(msg->dst() == Zoo::Get()->rank());
 95 |   if (message::to_server(msg->type())) {
 96 |     SendTo(actor::kServer, msg);
 97 |   } else if (message::to_worker(msg->type())) {
 98 |     SendTo(actor::kWorker, msg);
 99 |   } else if (message::to_controler(msg->type())) {
100 |     SendTo(actor::kController, msg);
101 |   } else {
102 |     // Send back to the msg queue of zoo
103 |     Zoo::Get()->Receive(msg);
104 |   }
105 | }
106 | 
107 | }  // namespace multiverso
108 | 


--------------------------------------------------------------------------------
/src/dashboard.cpp:
--------------------------------------------------------------------------------
 1 | #include "multiverso/dashboard.h"
 2 | 
 3 | #include <map>
 4 | #include <sstream>
 5 | #include <string>
 6 | 
 7 | #include "multiverso/util/log.h"
 8 | 
 9 | namespace multiverso {
10 | 
11 | std::map<std::string, Monitor*> Dashboard::record_;
12 | std::mutex Dashboard::m_;
13 | 
14 | void Dashboard::AddMonitor(const std::string& name, Monitor* monitor) {
15 |   std::lock_guard<std::mutex> l(m_);
16 |   CHECK(record_[name] == nullptr);
17 |   record_[name] = monitor;
18 | }
19 | 
20 | void Dashboard::RemoveMonitor(const std::string& name) {
21 |   std::lock_guard<std::mutex> l(m_);
22 |   CHECK_NOTNULL(record_[name]);
23 |   record_.erase(name);
24 | }
25 | 
26 | std::string Dashboard::Watch(const std::string& name) {
27 |   std::lock_guard<std::mutex> l(m_);
28 |   std::string result;
29 |   if (record_.find(name) == record_.end()) return result;
30 |   Monitor* monitor = record_[name];
31 |   CHECK_NOTNULL(monitor);
32 |   return monitor->info_string();
33 | }
34 | 
35 | std::string Monitor::info_string() const {
36 |   std::ostringstream oss;
37 |   oss << "[" << name_ << "] "
38 |       << " count = " << count_
39 |       << " elapse = " << elapse_ << "ms"
40 |       << " average = " << average() << "ms";
41 |   return oss.str();
42 | }
43 | 
44 | void Dashboard::Display() {
45 |   std::lock_guard<std::mutex> l(m_);
46 |   Log::Info("--------------Show dashboard monitor information--------------\n");
47 |   for (auto& it : record_) Log::Info("%s\n", it.second->info_string().c_str());
48 |   Log::Info("--------------------------------------------------------------\n");
49 | }
50 | 
51 | }  // namespace multiverso
52 | 


--------------------------------------------------------------------------------
/src/io/io.cpp:
--------------------------------------------------------------------------------
 1 | #include "multiverso/io/io.h"
 2 | #include "multiverso/io/hdfs_stream.h"
 3 | #include "multiverso/io/local_stream.h"
 4 | 
 5 | 
 6 | namespace multiverso {
 7 | 
 8 | Stream* StreamFactory::GetStream(const URI& uri,
 9 |   FileOpenMode mode) {
10 |   std::string addr = uri.scheme + "://" + uri.host;
11 |   if (instances_.find(addr) == instances_.end()) {
12 |     if (uri.scheme == std::string("file"))
13 |       instances_[addr] = std::shared_ptr<StreamFactory>(new LocalStreamFactory(uri.host));
14 | #ifdef MULTIVERSO_USE_HDFS
15 |     else if (uri.scheme == std::string("hdfs"))
16 |       instances_[addr] = std::shared_ptr<StreamFactory>(new HDFSStreamFactory(uri.host));
17 | #endif
18 |     else Log::Error("Can not support the StreamFactory '%s'\n", uri.scheme.c_str());
19 |   }
20 |   return instances_[addr]->Open(uri, mode);
21 | }
22 | 
23 | std::map<std::string, std::shared_ptr<StreamFactory> > StreamFactory::instances_;
24 | 
25 | TextReader::TextReader(const URI& uri, size_t buf_size) {
26 |     stream_ = StreamFactory::GetStream(uri, FileOpenMode::Read);
27 |     buf_size_ = buf_size;
28 |   pos_ = length_ = 0;
29 |   buf_ = new char[buf_size_];
30 |   assert(buf_ != nullptr);
31 | }
32 | 
33 | size_t TextReader::GetLine(std::string &line) {
34 |     line.clear();
35 |     bool isEnd = false;
36 |     while (!isEnd) {
37 |         while(pos_ < length_) {
38 |             char & c = buf_[pos_++];
39 |             if (c == '\n') {
40 |                 isEnd = true;
41 |                 break;
42 |             } else {
43 |                 line += c;
44 |             }
45 |         }
46 |         if (isEnd || LoadBuffer() == 0)  break; 
47 |     }
48 |     return line.size();
49 | }
50 | 
51 | size_t TextReader::LoadBuffer() {
52 |     assert (pos_ == length_);
53 |     pos_ = length_ = 0;
54 |     return length_ = stream_->Read(buf_, buf_size_ - 1);
55 | }
56 | 
57 | TextReader::~TextReader() {
58 |   delete stream_;
59 |     delete [] buf_;
60 | }
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/src/io/local_stream.cpp:
--------------------------------------------------------------------------------
  1 | #include "multiverso/io/local_stream.h"
  2 | #include <errno.h>
  3 | extern "C" {
  4 | #include <sys/stat.h>
  5 | }
  6 | #ifndef _MSC_VER
  7 | extern "C" {
  8 | #include <sys/types.h>
  9 | #include <dirent.h>
 10 | }
 11 | #else
 12 | #include <Windows.h>
 13 | #define stat _stat64
 14 | #endif
 15 | 
 16 | namespace multiverso {
 17 | 
 18 | LocalStream::LocalStream(const URI& uri, FileOpenMode mode) {
 19 |   path_ = uri.path;
 20 |   std::string mode_str;
 21 |   switch (mode) {
 22 |   case FileOpenMode::Read:
 23 |     mode_str = "r";
 24 |     break;
 25 |   case FileOpenMode::Write:
 26 |     mode_str = "w";
 27 |     break;
 28 |   case FileOpenMode::Append:
 29 |     mode_str = "a";
 30 |     break;
 31 |   case FileOpenMode::BinaryRead:
 32 |     mode_str = "rb";
 33 |     break;
 34 |   case FileOpenMode::BinaryWrite:
 35 |     mode_str = "wb";
 36 |     break;
 37 |   case FileOpenMode::BinaryAppend:
 38 |     mode_str = "ab";
 39 |   }
 40 | #ifdef _MSC_VER
 41 |   fopen_s(&fp_, uri.path.c_str(), mode_str.c_str());
 42 | #else
 43 |   fp_ = fopen(uri.path.c_str(), mode_str.c_str());
 44 | #endif
 45 | 
 46 |   if (fp_ == nullptr) {
 47 |     is_good_ = false;
 48 |     Log::Error("Failed to open LocalStream %s\n", uri.path.c_str());
 49 |   } else {
 50 |     is_good_ = true;
 51 |   }
 52 | }
 53 | 
 54 | LocalStream::~LocalStream(void)
 55 | {
 56 |   is_good_ = false;
 57 |   if (fp_ != nullptr)
 58 |     std::fclose(fp_);
 59 | }
 60 | 
 61 | /*!
 62 | * \brief write data to a file
 63 | * \param buf pointer to a memory buffer
 64 | * \param size data size
 65 | */
 66 | void LocalStream::Write(const void *buf, size_t size) {
 67 |   if (std::fwrite(buf, 1, size, fp_) != size) {
 68 |     is_good_ = false;
 69 |     Log::Error("LocalStream.Write incomplete\n");
 70 |   }
 71 | }
 72 | 
 73 | 
 74 | /*!
 75 | * \brief read data from Stream
 76 | * \param buf pointer to a memory buffer
 77 | * \param size the size of buf
 78 | */
 79 | size_t LocalStream::Read(void *buf, size_t size) {
 80 |   return std::fread(buf, 1, size, fp_);
 81 | }
 82 | 
 83 | bool LocalStream::Good() { return is_good_; }
 84 | 
 85 | LocalStreamFactory::LocalStreamFactory(const std::string& host) {
 86 |   host_ = host;
 87 | }
 88 | 
 89 | LocalStreamFactory::~LocalStreamFactory() {
 90 | }
 91 | 
 92 | /*!
 93 | * \brief create a Stream
 94 | * \param path the path of the file
 95 | * \param mode "w" - create an empty file to store data;
 96 | *             "a" - open the file to append data to it
 97 | *             "r" - open the file to read
 98 | * \return the Stream which is used to write or read data
 99 | */
100 | Stream* LocalStreamFactory::Open(const URI& uri, FileOpenMode mode) {
101 |   return new LocalStream(uri, mode);
102 | }
103 | 
104 | void LocalStreamFactory::Close() {
105 |   ///TODO
106 | }
107 | 
108 | }


--------------------------------------------------------------------------------
/src/multiverso.cpp:
--------------------------------------------------------------------------------
 1 | #include "multiverso/multiverso.h"
 2 | 
 3 | #include "multiverso/dashboard.h"
 4 | #include "multiverso/net.h"
 5 | #include "multiverso/zoo.h"
 6 | #include "multiverso/table_factory.h"
 7 | #include "multiverso/util/configure.h"
 8 | 
 9 | namespace multiverso {
10 | 
11 | void MV_Init(int* argc, char* argv[]) {
12 |   Zoo::Get()->Start(argc, argv);
13 | }
14 | 
15 | void MV_ShutDown(bool finalize_net) {
16 |   Zoo::Get()->Stop(finalize_net);
17 |   table_factory::FreeServerTables();
18 | }
19 | 
20 | void MV_Barrier() { Zoo::Get()->Barrier(); }
21 | 
22 | int  MV_Rank() { return Zoo::Get()->rank(); }
23 | 
24 | int  MV_Size() { return Zoo::Get()->size(); }
25 | 
26 | int  MV_WorkerId() {
27 |   return Zoo::Get()->worker_rank();
28 | }
29 | int  MV_ServerId() {
30 |   return Zoo::Get()->server_rank();
31 | }
32 | 
33 | int  MV_NumWorkers() {
34 |   return Zoo::Get()->num_workers();
35 | }
36 | int  MV_NumServers() {
37 |   return Zoo::Get()->num_servers();
38 | }
39 | 
40 | int  MV_WorkerIdToRank(int worker_id) {
41 |   return Zoo::Get()->worker_id_to_rank(worker_id);
42 | }
43 | 
44 | int  MV_ServerIdToRank(int server_id) {
45 |   return Zoo::Get()->server_id_to_rank(server_id);
46 | }
47 | 
48 | template <typename T>
49 | void MV_SetFlag(const std::string& name, const T& value) {
50 |   SetCMDFlag(name, value);
51 | }
52 | 
53 | template <typename ElemType>
54 | void MV_Aggregate(ElemType* data, int size) {
55 |   net::Allreduce(data, size);
56 | }
57 | 
58 | int  MV_NetBind(int rank, char* endpoint) {
59 |   return NetInterface::Get()->Bind(rank, endpoint);
60 | }
61 | 
62 | int  MV_NetConnect(int* ranks, char* endpoints[], int size) {
63 |   return NetInterface::Get()->Connect(ranks, endpoints, size);
64 | }
65 | 
66 | void MV_NetFinalize() {
67 |   NetInterface::Get()->Finalize();
68 | }
69 | 
70 | template void MV_Aggregate<char>(char*, int);
71 | template void MV_Aggregate<int>(int*, int);
72 | template void MV_Aggregate<float>(float*, int);
73 | template void MV_Aggregate<double>(double*, int);
74 | 
75 | template void MV_SetFlag<int>(const std::string&, const int&);
76 | template void MV_SetFlag<bool>(const std::string&, const bool&);
77 | template void MV_SetFlag<std::string>(const std::string&, const std::string&);
78 | template void MV_SetFlag<double>(const std::string&, const double&);
79 | 
80 | }  // namespace multiverso
81 | 


--------------------------------------------------------------------------------
/src/net.cpp:
--------------------------------------------------------------------------------
 1 | #include "multiverso/net.h"
 2 | 
 3 | #include <limits>
 4 | #include <mutex>
 5 | #include "multiverso/message.h"
 6 | #include "multiverso/util/log.h"
 7 | 
 8 | #include "multiverso/net/zmq_net.h"
 9 | #include "multiverso/net/mpi_net.h"
10 | 
11 | namespace multiverso {
12 | 
13 | NetInterface* NetInterface::Get() {
14 | #ifdef MULTIVERSO_USE_ZMQ
15 |   static ZMQNetWrapper net_impl;
16 |   return &net_impl;
17 | #else
18 | // #ifdef MULTIVERSO_USE_MPI
19 |   // Use MPI by default
20 |   static MPINetWrapper net_impl;
21 |   return &net_impl;
22 | // #endif
23 | #endif
24 | }
25 | 
26 | namespace net {
27 | template <typename Typename>
28 | void Allreduce(Typename* data, size_t elem_count) {
29 | #ifdef MULTIVERSO_USE_MPI
30 |   CHECK(NetInterface::Get()->active());
31 |   MPINetWrapper::Allreduce(data, elem_count);
32 | #else
33 |   Log::Fatal("Not implemented yet");
34 | #endif
35 | }
36 | 
37 | template void Allreduce<char>(char*, size_t);
38 | template void Allreduce<int>(int*, size_t);
39 | template void Allreduce<float>(float*, size_t);
40 | template void Allreduce<double>(double*, size_t);
41 | 
42 | }  // namespace net
43 | 
44 | 
45 | }  // namespace multiverso
46 | 


--------------------------------------------------------------------------------
/src/net/mpi_net.cpp:
--------------------------------------------------------------------------------
 1 | #ifdef MULTIVERSO_USE_MPI
 2 | 
 3 | #include "multiverso/net/mpi_net.h"
 4 | 
 5 | namespace multiverso {
 6 | 
 7 | template void MPINetWrapper::Allreduce<char>(char*, size_t);
 8 | template void MPINetWrapper::Allreduce<int>(int*, size_t);
 9 | template void MPINetWrapper::Allreduce<float>(float*, size_t);
10 | template void MPINetWrapper::Allreduce<double>(double*, size_t);
11 | 
12 | }  // namespace multiverso
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/src/node.cpp:
--------------------------------------------------------------------------------
 1 | #include "multiverso/node.h"
 2 | 
 3 | namespace multiverso {
 4 | 
 5 | Node::Node() : rank(-1), role(-1), worker_id(-1), server_id(-1) {}
 6 | 
 7 | namespace node {
 8 | 
 9 | bool is_worker(int role) { return (role & Role::WORKER) != 0; }
10 | bool is_server(int role) { return (role & Role::SERVER) != 0; }
11 | 
12 | }  // namespace node
13 | 
14 | }  // namespace multiverso
15 | 


--------------------------------------------------------------------------------
/src/table.cpp:
--------------------------------------------------------------------------------
  1 | #include "multiverso/table_interface.h"
  2 | 
  3 | #include <mutex>
  4 | 
  5 | #include "multiverso/dashboard.h"
  6 | #include "multiverso/updater/updater.h"
  7 | #include "multiverso/util/log.h"
  8 | #include "multiverso/util/waiter.h"
  9 | #include "multiverso/zoo.h"
 10 | 
 11 | namespace multiverso {
 12 | 
 13 | WorkerTable::WorkerTable() {
 14 |   msg_id_ = 0;
 15 |   m_ = new std::mutex();
 16 |   table_id_ = Zoo::Get()->RegisterTable(this);
 17 | }
 18 | 
 19 | WorkerTable::~WorkerTable() {
 20 |   delete m_;
 21 | }
 22 | 
 23 | ServerTable::ServerTable() {
 24 |   Zoo::Get()->RegisterTable(this);
 25 | }
 26 | 
 27 | void WorkerTable::Get(Blob keys, 
 28 |                       const GetOption* option) {
 29 |   MONITOR_BEGIN(WORKER_TABLE_SYNC_GET)
 30 |   Wait(GetAsync(keys, option));
 31 |   MONITOR_END(WORKER_TABLE_SYNC_GET)
 32 | }
 33 | 
 34 | void WorkerTable::Add(Blob keys, Blob values,
 35 |                       const AddOption* option) {
 36 |   MONITOR_BEGIN(WORKER_TABLE_SYNC_ADD)
 37 |   Wait(AddAsync(keys, values, option));
 38 |   MONITOR_END(WORKER_TABLE_SYNC_ADD)
 39 | }
 40 | 
 41 | int WorkerTable::GetAsync(Blob keys,
 42 |                           const GetOption* option) {
 43 |   m_->lock();
 44 |   int id = msg_id_++;
 45 |   waitings_.push_back(new Waiter());
 46 |   m_->unlock();
 47 |   MessagePtr msg(new Message());
 48 |   msg->set_src(Zoo::Get()->rank());
 49 |   msg->set_type(MsgType::Request_Get);
 50 |   msg->set_msg_id(id);
 51 |   msg->set_table_id(table_id_);
 52 |   msg->Push(keys);
 53 |   // Add general option if necessary
 54 |   if (option != nullptr) {
 55 |     Blob general_option(option->data(), option->size());
 56 |     msg->Push(general_option);
 57 |   }
 58 |   Zoo::Get()->SendTo(actor::kWorker, msg);
 59 |   return id;
 60 | }
 61 | 
 62 | int WorkerTable::AddAsync(Blob keys, Blob values,
 63 |                           const AddOption* option) {
 64 |   m_->lock();
 65 |   int id = msg_id_++;
 66 |   waitings_.push_back(new Waiter());
 67 |   m_->unlock();
 68 |   MessagePtr msg(new Message());
 69 |   msg->set_src(Zoo::Get()->rank());
 70 |   msg->set_type(MsgType::Request_Add);
 71 |   msg->set_msg_id(id);
 72 |   msg->set_table_id(table_id_);
 73 |   msg->Push(keys);
 74 |   msg->Push(values);
 75 |   // Add update option if necessary
 76 |   if (option != nullptr) {
 77 |     Blob update_option(option->data(), option->size());
 78 |     msg->Push(update_option);
 79 |   }
 80 |   Zoo::Get()->SendTo(actor::kWorker, msg);
 81 |   return id;
 82 | }
 83 | 
 84 | void WorkerTable::Wait(int id) {
 85 |   // CHECK(waitings_.find(id) != waitings_.end());
 86 |   m_->lock();
 87 |   CHECK(waitings_[id] != nullptr);
 88 |   Waiter* w = waitings_[id];
 89 |   m_->unlock();
 90 | 
 91 |   w->Wait();
 92 | 
 93 |   m_->lock();
 94 |   delete waitings_[id];
 95 |   waitings_[id] = nullptr;
 96 |   m_->unlock();
 97 | }
 98 | 
 99 | void WorkerTable::Reset(int msg_id, int num_wait) {
100 |   m_->lock();
101 |   CHECK_NOTNULL(waitings_[msg_id]);
102 |   waitings_[msg_id]->Reset(num_wait);
103 |   m_->unlock();
104 | }
105 | 
106 | void WorkerTable::Notify(int id) {
107 |   m_->lock();
108 |   CHECK_NOTNULL(waitings_[id]);
109 |   waitings_[id]->Notify();
110 |   m_->unlock();
111 | }
112 | 
113 | }  // namespace multiverso
114 | 


--------------------------------------------------------------------------------
/src/table_factory.cpp:
--------------------------------------------------------------------------------
 1 | #include "multiverso/table_factory.h"
 2 | 
 3 | #include "multiverso/table/array_table.h"
 4 | #include "multiverso/table/matrix_table.h"
 5 | 
 6 | namespace multiverso {
 7 | 
 8 | namespace table_factory {
 9 | std::vector<ServerTable*> server_tables;
10 | 
11 | void FreeServerTables() {
12 |   for (auto table : server_tables) {
13 |     delete table;
14 |   }
15 |   server_tables.clear();
16 | }
17 | 
18 | void PushServerTable(ServerTable*table) {
19 |   server_tables.push_back(table);
20 | }
21 | 
22 | } // namespace table_factory
23 | 
24 | } // namespace multiverso


--------------------------------------------------------------------------------
/src/timer.cpp:
--------------------------------------------------------------------------------
 1 | #include "multiverso/util/timer.h"
 2 | 
 3 | namespace multiverso {
 4 | 
 5 | Timer::Timer() {
 6 |   Start();
 7 | }
 8 | 
 9 | void Timer::Start() {
10 |   start_point_ = Clock::now();
11 | }
12 | 
13 | double Timer::elapse() {
14 |   TimePoint end_point = Clock::now();
15 |   std::chrono::duration<double, std::milli> time_ms =
16 |     end_point - start_point_;
17 |   return time_ms.count();
18 | }
19 | 
20 | }  // namespace multiverso
21 | 


--------------------------------------------------------------------------------
/src/updater/updater.cpp:
--------------------------------------------------------------------------------
 1 | #include "multiverso/updater/updater.h"
 2 | // TODO(qiwye) to make this a option in CMakelist
 3 | //#define ENABLE_DCASGD
 4 | 
 5 | #include "multiverso/updater/adagrad_updater.h"
 6 | #include "multiverso/updater/momentum_updater.h"
 7 | #ifdef ENABLE_DCASGD
 8 | #include "multiverso/updater/dcasgd/dcasgd_updater.h"
 9 | #include "multiverso/updater/dcasgd/dcasgda_updater.h"
10 | #endif
11 | #include "multiverso/updater/sgd_updater.h"
12 | #include "multiverso/util/configure.h"
13 | #include "multiverso/util/log.h"
14 | 
15 | 
16 | namespace multiverso {
17 | 
18 | MV_DEFINE_string(updater_type, "default", "multiverso server updater type");
19 | MV_DEFINE_int(omp_threads, 4 , "#theads used by openMP for updater");
20 | 
21 | template <typename T>
22 | void Updater<T>::Update(size_t num_element, T* data, T* delta,
23 |                         AddOption*, size_t offset) {
24 |   // parallelism with openMP
25 |   #pragma omp parallel for schedule(static) num_threads(MV_CONFIG_omp_threads)
26 |   for (int i = 0; i < num_element; ++i) {
27 |     data[i + offset] += delta[i];
28 |   }
29 | }
30 | 
31 | template <typename T>
32 | void Updater<T>::Access(size_t num_element, T* data, T* blob_data,
33 |   size_t offset , AddOption*) {
34 |   // copy data from data to blob
35 |   memcpy(blob_data, data + offset, num_element * sizeof(T));
36 | }
37 | 
38 | // Gradient-based updater in only for numerical table
39 | // For simple int table, just using simple updater
40 | template<>
41 | Updater<int>* Updater<int>::GetUpdater(size_t) {
42 |   return new Updater<int>();
43 | }
44 | 
45 | template <typename T>
46 | Updater<T>* Updater<T>::GetUpdater(size_t size) {
47 |   std::string type = MV_CONFIG_updater_type;
48 |   if (type == "sgd") return new SGDUpdater<T>(size);
49 |   if (type == "adagrad") return new AdaGradUpdater<T>(size);
50 |   if (type == "momentum_sgd") return new MomentumUpdater<T>(size);
51 | #ifdef ENABLE_DCASGD
52 |   if (type == "dcasgd") return new DCASGDUpdater<T>(size);
53 |   if (type == "dcasgda") return new DCASGDAUpdater<T>(size);
54 | #endif
55 |   // Default: simple updater
56 |   return new Updater<T>();
57 | }
58 | 
59 | MV_INSTANTIATE_CLASS_WITH_BASE_TYPE(Updater);
60 | 
61 | }


--------------------------------------------------------------------------------
/src/util/configure.cpp:
--------------------------------------------------------------------------------
 1 | #include "multiverso/util/configure.h"
 2 | 
 3 | #include <algorithm>
 4 | #include <string>
 5 | #include "multiverso/util/log.h"
 6 | 
 7 | namespace multiverso {
 8 | 
 9 | void ParseCMDFlags(int* argc, char* argv[]) {
10 |   if (argc == nullptr || argv == nullptr) return;
11 | 
12 |   int unused = 0;
13 |   size_t pos;
14 |   int intval;
15 |   bool boolval;
16 |   double dval;
17 |   std::string line, key, value;
18 | 
19 |   for (int i = 0; i < *argc; ++i) {
20 |     line = argv[i];
21 |     if (line.find("-") != std::string::npos) {
22 | 
23 |       pos = line.find("=");
24 |       CHECK(pos != std::string::npos);
25 | 
26 |       key = line.substr(1, pos - 1);
27 |       value = line.substr(pos + 1);
28 | 
29 |       if (configure::FlagRegister<std::string>::Get()->SetFlagIfFound(key, value)) {
30 |         continue;
31 |       }
32 | 
33 |       intval = atoi(value.c_str());
34 |       if (configure::FlagRegister<int>::Get()->SetFlagIfFound(key, intval)) {
35 |         continue;
36 |       }
37 | 
38 |       dval = strtod(line.c_str(), nullptr);
39 |       if (configure::FlagRegister<double>::Get()->SetFlagIfFound(key, dval)) {
40 |         continue;
41 |       }
42 | 
43 |       transform(value.begin(), value.end(), value.begin(), ::tolower);
44 |       boolval = (value == "true");
45 |       if (configure::FlagRegister<bool>::Get()->SetFlagIfFound(key, boolval)) {
46 |         continue;
47 |       }
48 |     }
49 | 
50 |     std::swap(argv[unused++], argv[i]);
51 |   }
52 | 
53 |   *argc = unused;
54 | }
55 | 
56 | }  // namespace multiverso
57 | 


--------------------------------------------------------------------------------