├── repo ├── dmlc-core │ ├── test │ │ ├── .gitignore │ │ ├── unittest │ │ │ ├── .gitignore │ │ │ ├── unittest_main.cc │ │ │ ├── unittest_logging.cc │ │ │ ├── dmlc_unittest.mk │ │ │ ├── unittest_array_view.cc │ │ │ ├── unittest_threaditer.cc │ │ │ ├── unittest_any.cc │ │ │ └── unittest_serializer.cc │ │ ├── logging_test.cc │ │ ├── iostream_test.cc │ │ ├── README.md │ │ ├── split_test.cc │ │ ├── dataiter_test.cc │ │ ├── strtonum_test.cc │ │ ├── split_read_test.cc │ │ ├── libsvm_parser_test.cc │ │ ├── dmlc_test.mk │ │ ├── stream_read_test.cc │ │ ├── registry_test.cc │ │ ├── split_repeat_read_test.cc │ │ ├── filesys_test.cc │ │ └── csv_parser_test.cc │ ├── doc │ │ ├── .gitignore │ │ ├── README │ │ ├── index.md │ │ └── sphinx_util.py │ ├── tracker │ │ ├── yarn │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── build.bat │ │ │ ├── build.sh │ │ │ ├── src │ │ │ │ └── main │ │ │ │ │ └── java │ │ │ │ │ └── org │ │ │ │ │ └── apache │ │ │ │ │ └── hadoop │ │ │ │ │ └── yarn │ │ │ │ │ └── dmlc │ │ │ │ │ └── TaskRecord.java │ │ │ └── run_hdfs_prog.py │ │ ├── dmlc_tracker │ │ │ ├── hostfile │ │ │ ├── stop.sh │ │ │ ├── __init__.py │ │ │ ├── run_local.sh │ │ │ ├── submit.py │ │ │ ├── sge.py │ │ │ ├── mpi.py │ │ │ ├── local.py │ │ │ └── dmlc_mpi.py │ │ ├── dmlc-submit │ │ └── README.md │ ├── scripts │ │ ├── travis │ │ │ ├── travis_before_cache.sh │ │ │ ├── travis_osx_install.sh │ │ │ ├── travis_script.sh │ │ │ └── travis_setup_env.sh │ │ ├── packages.mk │ │ └── setup_nvcc.sh │ ├── windows │ │ ├── .gitignore │ │ └── README.md │ ├── example │ │ ├── dmlc_example.mk │ │ └── parameter.cc │ ├── LICENSE │ ├── include │ │ └── dmlc │ │ │ ├── common.h │ │ │ ├── omp.h │ │ │ ├── timer.h │ │ │ └── thread_local.h │ ├── cmake │ │ ├── lint.cmake │ │ └── Modules │ │ │ ├── FindCrypto.cmake │ │ │ └── FindHDFS.cmake │ ├── src │ │ └── io │ │ │ ├── line_split.h │ │ │ ├── recordio_split.h │ │ │ ├── azure_filesys.h │ │ │ ├── line_split.cc │ │ │ ├── local_filesys.h │ │ │ ├── uri_spec.h │ │ │ └── hdfs_filesys.h │ ├── make │ │ ├── config.mk │ │ └── dmlc.mk │ ├── README.md │ └── Makefile └── ps-lite │ ├── doc │ ├── README.md │ ├── ps-worker.rst │ ├── setup.sh │ └── index.rst │ ├── make │ ├── travis │ │ ├── travis_before_cache.sh │ │ ├── travis_script.sh │ │ └── travis_setup_env.sh │ ├── README.md │ ├── config.mk │ ├── ps.mk │ ├── install_deps.sh │ └── deps.mk │ ├── guide │ ├── deps.png │ ├── ps_guide.mk │ ├── example_a.cc │ ├── example_c.cc │ ├── example_b.cc │ ├── example_d.cc │ ├── local.sh │ ├── network_perf.cc │ └── example_e.cc │ ├── third_party │ ├── v0.3.4.tar.gz │ ├── lz4-r129.tar.gz │ ├── zeromq-4.1.2.tar.gz │ ├── cityhash-1.1.1.tar.gz │ ├── protobuf-2.5.0.tar.gz │ └── gflags-2.0-no-svn-files.tar.gz │ ├── src │ ├── proto │ │ ├── range.proto │ │ ├── assign_op.proto │ │ ├── node.proto │ │ ├── heartbeat.proto │ │ ├── filter.proto │ │ ├── param.proto │ │ ├── data.proto │ │ └── task.proto │ ├── system │ │ ├── env.h │ │ ├── node_assigner.h │ │ ├── postoffice.h │ │ ├── message.cc │ │ ├── van.h │ │ ├── remote_node.cc │ │ ├── remote_node.h │ │ ├── network_usage.h │ │ ├── ps-inl.h │ │ └── monitor.h │ ├── ps_main.cc │ ├── base │ │ ├── dir.h │ │ ├── split.h │ │ ├── sketch.h │ │ ├── barrier.h │ │ ├── parallel_sort.h │ │ ├── bloom_filter.h │ │ ├── countmin.h │ │ ├── assign_op.h │ │ ├── thread_pool.h │ │ ├── producer_consumer.h │ │ ├── thread_pool.cc │ │ ├── common.h │ │ ├── threadsafe_queue.h │ │ ├── block_bloom_filter.h │ │ ├── threadsafe_limited_queue.h │ │ └── bitmap.h │ ├── filter │ │ ├── filter.h │ │ ├── sparse_filter.h │ │ ├── filter.cc │ │ ├── add_noise.h │ │ ├── delta_key.h │ │ ├── compressing.h │ │ ├── frequency_filter.h │ │ ├── key_caching.h │ │ └── truncate_float.h │ ├── ps.h │ ├── windows │ │ └── unistd.h │ ├── ps │ │ ├── node_info.h │ │ └── blob.h │ ├── kv │ │ └── kv_store.h │ └── README.md │ ├── cmake │ ├── External │ │ ├── getopt.cmake │ │ ├── gflags.cmake │ │ └── glog.cmake │ └── Modules │ │ └── FindGlog.cmake │ ├── .travis.yml │ ├── CMakeLists.txt │ └── Makefile ├── stop.sh ├── tracker ├── stop.sh ├── tracker.pyc ├── dmlc-yarn.jar ├── ps_run_local_cmd.sh ├── run_local.sh └── dmlc_mpi.py ├── third_party ├── v0.3.4.tar.gz ├── lz4-r129.tar.gz ├── zeromq-4.1.2.tar.gz ├── cityhash-1.1.1.tar.gz ├── protobuf-2.5.0.tar.gz └── gflags-2.0-no-svn-files.tar.gz ├── dmlc.linear.conf ├── ps_run_local_cmd.sh ├── auc.py ├── src ├── scheduler.h ├── main.cpp ├── load_data.h ├── dump.cc └── server.h ├── README.md ├── ps_run_yarn_cmd.sh └── Makefile /repo/dmlc-core/test/.gitignore: -------------------------------------------------------------------------------- 1 | *_test 2 | *.csv -------------------------------------------------------------------------------- /repo/dmlc-core/doc/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | _build 3 | doxygen 4 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/unittest/.gitignore: -------------------------------------------------------------------------------- 1 | dmlc_unittest 2 | -------------------------------------------------------------------------------- /stop.sh: -------------------------------------------------------------------------------- 1 | ps -ef | grep lr_ftrl | awk '{ print $2 }' | sudo xargs kill -9 2 | -------------------------------------------------------------------------------- /tracker/stop.sh: -------------------------------------------------------------------------------- 1 | ps -ef | grep lr_ftrl | awk '{ print $2 }' | sudo xargs kill -9 2 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/yarn/.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | .classpath 3 | .project 4 | *.jar 5 | -------------------------------------------------------------------------------- /repo/ps-lite/doc/README.md: -------------------------------------------------------------------------------- 1 | # Documents 2 | 3 | Type `doxygen` to generate Doxygen documents. 4 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/dmlc_tracker/hostfile: -------------------------------------------------------------------------------- 1 | 10.101.2.88:8888 2 | 10.101.2.89:8888 3 | 10.101.2.90:8888 4 | -------------------------------------------------------------------------------- /tracker/tracker.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/tracker/tracker.pyc -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/dmlc_tracker/stop.sh: -------------------------------------------------------------------------------- 1 | ps -ef | grep lr_ftrl | awk '{ print $2 }' | sudo xargs kill -9 2 | -------------------------------------------------------------------------------- /repo/ps-lite/make/travis/travis_before_cache.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # do nothing for now 3 | ls -alLR ${CACHE_PREFIX} -------------------------------------------------------------------------------- /tracker/dmlc-yarn.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/tracker/dmlc-yarn.jar -------------------------------------------------------------------------------- /repo/dmlc-core/scripts/travis/travis_before_cache.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # do nothing for now 3 | ls -alLR ${CACHE_PREFIX} -------------------------------------------------------------------------------- /third_party/v0.3.4.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/third_party/v0.3.4.tar.gz -------------------------------------------------------------------------------- /repo/ps-lite/guide/deps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/repo/ps-lite/guide/deps.png -------------------------------------------------------------------------------- /third_party/lz4-r129.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/third_party/lz4-r129.tar.gz -------------------------------------------------------------------------------- /third_party/zeromq-4.1.2.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/third_party/zeromq-4.1.2.tar.gz -------------------------------------------------------------------------------- /third_party/cityhash-1.1.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/third_party/cityhash-1.1.1.tar.gz -------------------------------------------------------------------------------- /third_party/protobuf-2.5.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/third_party/protobuf-2.5.0.tar.gz -------------------------------------------------------------------------------- /repo/ps-lite/third_party/v0.3.4.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/repo/ps-lite/third_party/v0.3.4.tar.gz -------------------------------------------------------------------------------- /repo/ps-lite/third_party/lz4-r129.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/repo/ps-lite/third_party/lz4-r129.tar.gz -------------------------------------------------------------------------------- /repo/dmlc-core/windows/.gitignore: -------------------------------------------------------------------------------- 1 | Debug 2 | *suo 3 | *.dll 4 | *i386 5 | *x64 6 | ipch 7 | *.filters 8 | *.user 9 | *sdf 10 | Release 11 | Debug 12 | -------------------------------------------------------------------------------- /repo/ps-lite/third_party/zeromq-4.1.2.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/repo/ps-lite/third_party/zeromq-4.1.2.tar.gz -------------------------------------------------------------------------------- /third_party/gflags-2.0-no-svn-files.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/third_party/gflags-2.0-no-svn-files.tar.gz -------------------------------------------------------------------------------- /repo/dmlc-core/scripts/travis/travis_osx_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ ${TRAVIS_OS_NAME} != "osx" ]; then 4 | exit 0 5 | fi 6 | 7 | brew update 8 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/dmlc_tracker/__init__.py: -------------------------------------------------------------------------------- 1 | """DMLC Tracker modules for running jobs on different platforms.""" 2 | from __future__ import absolute_import 3 | -------------------------------------------------------------------------------- /repo/ps-lite/third_party/cityhash-1.1.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/repo/ps-lite/third_party/cityhash-1.1.1.tar.gz -------------------------------------------------------------------------------- /repo/ps-lite/third_party/protobuf-2.5.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/repo/ps-lite/third_party/protobuf-2.5.0.tar.gz -------------------------------------------------------------------------------- /repo/ps-lite/src/proto/range.proto: -------------------------------------------------------------------------------- 1 | package ps; 2 | 3 | // TODO may change it to string 4 | message PbRange { 5 | required uint64 begin = 1; 6 | required uint64 end = 2; 7 | } 8 | -------------------------------------------------------------------------------- /repo/ps-lite/third_party/gflags-2.0-no-svn-files.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ljzzju/logistic-regression-ftrl-ps/HEAD/repo/ps-lite/third_party/gflags-2.0-no-svn-files.tar.gz -------------------------------------------------------------------------------- /dmlc.linear.conf: -------------------------------------------------------------------------------- 1 | train_data = "hdfs:///dmlc/data/agaricus.txt.train" 2 | val_data = "hdfs:///dmlc/data/agaricus.txt.test" 3 | model_out = "hdfs:///dmlc/data/out/" 4 | max_data_pass = 3 5 | -------------------------------------------------------------------------------- /ps_run_local_cmd.sh: -------------------------------------------------------------------------------- 1 | python ./tracker/dmlc_local.py -n 2 -s 1 /home/worker/xiaoshu/DML/logistic_regression_ps/lr_ftrl /home/worker/xiaoshu/DML/logistic_regression_ps/data/agaricus.txt.train 2 | -------------------------------------------------------------------------------- /tracker/ps_run_local_cmd.sh: -------------------------------------------------------------------------------- 1 | python dmlc_local.py -n 2 -s 2 /home/worker/xiaoshu/DML/logistic_regression_ps/lr_ftrl /home/worker/xiaoshu/DML/logistic_regression_ps/data/agaricus.txt.train 2 | -------------------------------------------------------------------------------- /repo/dmlc-core/doc/README: -------------------------------------------------------------------------------- 1 | This document is generated by sphinx. 2 | Make sure you cloned the following repos in the root. 3 | 4 | - https://github.com/tqchen/recommonmark 5 | 6 | Type make html in doc folder. 7 | 8 | -------------------------------------------------------------------------------- /repo/dmlc-core/windows/README.md: -------------------------------------------------------------------------------- 1 | MSVC Project 2 | ==== 3 | The solution has been created with Visual Studio Express 2010. 4 | Preliminary project for testing windows compatibility. 5 | It do not come with a warranty. 6 | -------------------------------------------------------------------------------- /repo/dmlc-core/example/dmlc_example.mk: -------------------------------------------------------------------------------- 1 | ALL_EXAMPLE=example/parameter 2 | 3 | 4 | example/parameter: example/parameter.cc libdmlc.a 5 | 6 | $(ALL_EXAMPLE) : 7 | $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) 8 | 9 | 10 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/yarn/README.md: -------------------------------------------------------------------------------- 1 | DMLC YARN AppMaster 2 | =================== 3 | * This folder contains Application code to allow rabit run on Yarn. 4 | * See [tracker](../) for job submission. 5 | - run ```./build.sh``` to build the jar, before using the script 6 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/dmlc-submit: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import os 4 | curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) 5 | sys.path.insert(0, curr_path) 6 | 7 | from dmlc_tracker import submit 8 | 9 | submit.main() 10 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/yarn/build.bat: -------------------------------------------------------------------------------- 1 | mkdir bin 2 | 3 | for /f %%i in ('%HADOOP_HOME%\bin\hadoop classpath') do set CPATH=%%i 4 | %JAVA_HOME%/bin/javac -cp %CPATH% -d bin src/main/java/org/apache/hadoop/yarn/dmlc/*.java 5 | %JAVA_HOME%/bin/jar cf dmlc-yarn.jar -C bin . 6 | -------------------------------------------------------------------------------- /repo/ps-lite/make/travis/travis_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # main script of travis 3 | 4 | if [ ${TASK} == "lint" ]; then 5 | make lint || exit -1 6 | fi 7 | 8 | if [ ${TASK} == "build" ]; then 9 | make DEPS_PATH=${CACHE_PREFIX} CXX=${CXX} || exit -1 10 | fi 11 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/unittest/unittest_main.cc: -------------------------------------------------------------------------------- 1 | // Copyright by Contributors 2 | #include 3 | 4 | int main(int argc, char ** argv) { 5 | testing::InitGoogleTest(&argc, argv); 6 | testing::FLAGS_gtest_death_test_style = "threadsafe"; 7 | return RUN_ALL_TESTS(); 8 | } 9 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/yarn/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ ! -d bin ]; then 3 | mkdir bin 4 | fi 5 | 6 | CPATH=`${HADOOP_HOME}/bin/hadoop classpath` 7 | ${JAVA_HOME}/bin/javac -cp $CPATH -d bin src/main/java/org/apache/hadoop/yarn/dmlc/*.java 8 | ${JAVA_HOME}/bin/jar cf dmlc-yarn.jar -C bin . 9 | -------------------------------------------------------------------------------- /repo/ps-lite/doc/ps-worker.rst: -------------------------------------------------------------------------------- 1 | Worker API 2 | ============= 3 | 4 | .. doxygentypedef:: ps::Key 5 | :project: ps-lite 6 | 7 | .. doxygenstruct:: ps::SyncOpts 8 | :project: ps-lite 9 | :members: 10 | 11 | .. doxygenclass:: ps::KVWorker 12 | :project: ps-lite 13 | :members: 14 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/logging_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(void) { 4 | LOG(INFO) << "hello"; 5 | LOG(ERROR) << "error"; 6 | try { 7 | LOG(FATAL)<<'a'<<11<<33; 8 | } catch (dmlc::Error e) { 9 | LOG(INFO) << "catch " << e.what(); 10 | } 11 | CHECK(2!=3) << "test"; 12 | CHECK(2==3) << "test"; 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /repo/ps-lite/doc/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | doxygen 4 | 5 | # put doxygen doc online 6 | 7 | rm html.tar.gz 8 | tar -zcf html.tar.gz html 9 | 10 | host=linux.gp.cs.cmu.edu 11 | scp html.tar.gz $host:~ 12 | 13 | ssh $host 'bash -s' < 3 | #include 4 | #include 5 | namespace ps { 6 | 7 | inline bool DirExists(const std::string& dir) { 8 | struct stat info; 9 | if (stat(dir.c_str(), &info) != 0) return false; 10 | if (info.st_mode & S_IFDIR) return true; 11 | return false; 12 | } 13 | 14 | inline bool CreateDir(const std::string& dir) { 15 | return mkdir(dir.c_str(), 0755) == 0; 16 | } 17 | 18 | } // namespace ps 19 | -------------------------------------------------------------------------------- /src/scheduler.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace dmlc{ 4 | namespace linear{ 5 | 6 | class Scheduler : public ps::App{ 7 | public: 8 | Scheduler(){} 9 | ~Scheduler(){} 10 | 11 | virtual void ProcessResponse(ps::Message* response) { } 12 | virtual bool Run(){ 13 | std::cout<<"Connected "< 4 | #include 5 | 6 | using namespace std; 7 | 8 | TEST(Logging, basics) { 9 | LOG(INFO) << "hello"; 10 | LOG(ERROR) << "error"; 11 | 12 | int x = 1, y = 1; 13 | CHECK_EQ(x, y); 14 | CHECK_GE(x, y); 15 | 16 | int *z = &x; 17 | CHECK_EQ(*CHECK_NOTNULL(z), x); 18 | 19 | ASSERT_DEATH(CHECK_NE(x, y), ".*"); 20 | } 21 | -------------------------------------------------------------------------------- /repo/ps-lite/make/README.md: -------------------------------------------------------------------------------- 1 | You can modify [config.mk](config.mk) to customize the building. You can copy 2 | this file to the upper directory so that the changes will be ignored by git. 3 | 4 | ## FAQ 5 | 6 | 7 | 8 | 9 | - `undefined reference to `_Ux86_64_getcontext'` 10 | 11 | add `-lunwind` in makefile (e.g `LDFLAGS += -lunwind`) 12 | -------------------------------------------------------------------------------- /repo/dmlc-core/doc/index.md: -------------------------------------------------------------------------------- 1 | DMLC-Core Documentation 2 | ======================= 3 | DMLC Core contains common codebase to help us build machine learning toolkits easier. 4 | 5 | Contents 6 | -------- 7 | * [Parameter Structure for Machine Learning](parameter.md) 8 | * [Doxygen C++ API Reference](https://dmlc-core.readthedocs.org/en/latest/doxygen) 9 | 10 | Indices and tables 11 | ------------------ 12 | 13 | ```eval_rst 14 | * :ref:`genindex` 15 | * :ref:`modindex` 16 | * :ref:`search` 17 | ``` 18 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/unittest/dmlc_unittest.mk: -------------------------------------------------------------------------------- 1 | UTEST_ROOT=test/unittest 2 | UNITTEST=$(UTEST_ROOT)/dmlc_unittest 3 | UNITTEST_SRC=$(wildcard $(UTEST_ROOT)/*.cc) 4 | UNITTEST_OBJ=$(patsubst %.cc,%.o,$(UNITTEST_SRC)) 5 | 6 | GTEST_LIB=$(GTEST_PATH)/lib/ 7 | GTEST_INC=$(GTEST_PATH)/include/ 8 | 9 | $(UTEST_ROOT)/%.o : $(UTEST_ROOT)/%.cc libdmlc.a 10 | $(CXX) $(CFLAGS) -I$(GTEST_INC) -o $@ -c $< 11 | 12 | $(UNITTEST) : $(UNITTEST_OBJ) 13 | $(CXX) $(CFLAGS) -L$(GTEST_LIB) -o $@ $^ libdmlc.a $(LDFLAGS) -lgtest 14 | -------------------------------------------------------------------------------- /repo/ps-lite/make/travis/travis_setup_env.sh: -------------------------------------------------------------------------------- 1 | # script to be sourced in travis yml 2 | # setup all enviroment variables 3 | 4 | export CACHE_PREFIX=${HOME}/.cache/usr 5 | export PATH=${HOME}/.local/bin:${PATH} 6 | export PATH=${PATH}:${CACHE_PREFIX}/bin 7 | export CPLUS_INCLUDE_PATH=${CPLUS_INCLUDE_PATH}:${CACHE_PREFIX}/include 8 | export C_INCLUDE_PATH=${C_INCLUDE_PATH}:${CACHE_PREFIX}/include 9 | export LIBRARY_PATH=${LIBRARY_PATH}:${CACHE_PREFIX}/lib 10 | export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CACHE_PREFIX}/lib 11 | 12 | # alias make="make -j4" 13 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/unittest/unittest_array_view.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void ArrayViewTest(dmlc::array_view view, int base) { 6 | int cnt = base; 7 | for (int v : view) { 8 | CHECK_EQ(v, cnt); 9 | ++cnt; 10 | } 11 | } 12 | 13 | TEST(ArrayView, Basic) { 14 | std::vector vec{0, 1, 2}; 15 | ArrayViewTest(vec, 0); 16 | int arr[] = {1, 2, 3}; 17 | ArrayViewTest(dmlc::array_view(arr, arr + 3), 1); 18 | dmlc::array_view a = vec; 19 | CHECK_EQ(a.size(), vec.size()); 20 | } 21 | -------------------------------------------------------------------------------- /repo/dmlc-core/doc/sphinx_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Helper utilty function for customization.""" 3 | import sys 4 | import os 5 | import docutils 6 | import subprocess 7 | 8 | if os.environ.get('READTHEDOCS', None) == 'True': 9 | subprocess.call('cd ..; rm -rf recommonmark;' + 10 | 'git clone https://github.com/tqchen/recommonmark', shell=True) 11 | 12 | sys.path.insert(0, os.path.abspath('../recommonmark/')) 13 | from recommonmark import parser, transform 14 | 15 | MarkdownParser = parser.CommonMarkParser 16 | AutoStructify = transform.AutoStructify 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 1. Introduction 2 | 3 | Distributed LR With Parameter Server 4 | 5 | 2. Install 6 | 7 | step 1. in the logistic-regression-ftrl-ps 8 | 9 | cd /repo/ps-lite 10 | 11 | step 2. 12 | 13 | cp third_party/* . 14 | 15 | step 3. 16 | 17 | make 18 | 19 | step 4. 20 | 21 | cd /repo/dmlc-core 22 | 23 | make 24 | 25 | step 5. 26 | 27 | cd logistic-regression-ftrl-ps 28 | 29 | make 30 | 31 | 32 | 3. Contact: 33 | 2012wxs@gmail.com 34 | 35 | 4.. References: 36 | [1] Ad Click Prediction: a View from the Trenches:http://dl.acm.org/citation.cfm?id=2488200 37 | 38 | -------------------------------------------------------------------------------- /repo/ps-lite/src/proto/node.proto: -------------------------------------------------------------------------------- 1 | package ps; 2 | import "proto/range.proto"; 3 | 4 | message Node { 5 | enum Role { 6 | SERVER = 0; 7 | WORKER = 1; 8 | SCHEDULER = 3; // each running application has a single scheduler 9 | GROUP = 4; // a virtual node, present a group of node 10 | UNUSED = 5; // a backup node, could turn into another node 11 | } 12 | 13 | required Role role = 1; 14 | optional string id = 2; 15 | optional int32 rank = 5; 16 | // network address 17 | optional string hostname = 3; 18 | optional int32 port = 4; 19 | 20 | optional PbRange key = 6; 21 | } 22 | -------------------------------------------------------------------------------- /repo/dmlc-core/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 by Contributors 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /repo/ps-lite/src/filter/filter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "system/message.h" 3 | #include "proto/filter.pb.h" 4 | 5 | namespace ps { 6 | 7 | /// \brief The interface of a filter 8 | class IFilter { 9 | public: 10 | IFilter() { } 11 | virtual ~IFilter() { } 12 | 13 | /// \brief Factory function 14 | static IFilter* create(const Filter& conf); 15 | 16 | virtual void Encode(Message* msg) { } 17 | virtual void Decode(Message* msg) { } 18 | 19 | static Filter* Find(Filter::Type type, Message* msg) { 20 | return Find(type, &(msg->task)); 21 | } 22 | static Filter* Find(Filter::Type type, Task* task); 23 | }; 24 | 25 | } // namespace 26 | -------------------------------------------------------------------------------- /repo/ps-lite/guide/example_a.cc: -------------------------------------------------------------------------------- 1 | #include "ps.h" 2 | typedef float Val; 3 | 4 | int CreateServerNode(int argc, char *argv[]) { 5 | ps::OnlineServer server; 6 | return 0; 7 | } 8 | 9 | int WorkerNodeMain(int argc, char *argv[]) { 10 | using namespace ps; 11 | std::vector key = {1, 3, 5}; 12 | std::vector val = {1, 1, 1}; 13 | std::vector recv_val; 14 | 15 | KVWorker wk; 16 | int ts = wk.Push(key, val); 17 | wk.Wait(ts); 18 | 19 | ts = wk.Pull(key, &recv_val); 20 | wk.Wait(ts); 21 | 22 | std::cout << "values pulled at " << MyNodeID() << ": " << 23 | Blob(recv_val) << std::endl; 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /repo/ps-lite/make/config.mk: -------------------------------------------------------------------------------- 1 | # default configuration of make 2 | # 3 | # you can copy it to the parent directory and modify it as you want. then 4 | # compile by `make -j 8` using 8 threads 5 | 6 | # compiler 7 | CXX = g++ 8 | 9 | # optimization flag. -O0 -ggdb for debug 10 | # OPT = -O3 -ggdb 11 | 12 | # statically link all dependent libraries, such as gflags, zeromq, if 13 | # 1. otherwise use dynamic linking 14 | # STATIC_DEPS = 0 15 | 16 | # the installed path of third party libraries 17 | # DEPS_PATH = $(shell pwd)/deps 18 | 19 | # additional link flags, such as -ltcmalloc_and_profiler 20 | # EXTRA_LDFLAGS = 21 | 22 | # additional compile flags 23 | # EXTRA_CFLAGS = 24 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/iostream_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char *argv[]) { 5 | if (argc < 2) { 6 | printf("Usage: \n"); 7 | return 0; 8 | } 9 | {// output 10 | dmlc::Stream *fs = dmlc::Stream::Create(argv[1], "w"); 11 | dmlc::ostream os(fs); 12 | os << "hello-world " << 1e-10<< std::endl; 13 | delete fs; 14 | } 15 | {// input 16 | std::string name; 17 | double data; 18 | dmlc::Stream *fs = dmlc::Stream::Create(argv[1], "r"); 19 | dmlc::istream is(fs); 20 | is >> name >> data; 21 | std::cout << name << " " << data << std::endl; 22 | delete fs; 23 | } 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /ps_run_yarn_cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #hdfs_dir=dmlc 4 | 5 | #${HADOOP_HOME}/bin/hadoop fs -rm -r -f $hdfs_dir/data 6 | #${HADOOP_HOME}/bin/hadoop fs -mkdir $hdfs_dir/data 7 | #${HADOOP_HOME}/bin/hadoop fs -put ../data/agaricus.txt.train $hdfs_dir/data 8 | #${HADOOP_HOME}/bin/hadoop fs -put ../data/agaricus.txt.test $hdfs_dir/data 9 | 10 | #cat <<< " 11 | #train_data = \"hdfs://${hdfs_dir}/data/agaricus.txt.train\" 12 | #val_data = \"hdfs://${hdfs_dir}/data/agaricus.txt.test\" 13 | #max_data_pass = 3 14 | #" >guide/demo_hdfs.conf 15 | 16 | ./repo/dmlc-core/tracker/dmlc_tracker/dmlc_yarn.py --jobname dmlc_wxs --vcores 1 -mem 512 -n 2 -s 1 train dmlc.linear.conf --log_dir=log --sync_timeout 500 -alsologtostderr -v 10 17 | -------------------------------------------------------------------------------- /repo/ps-lite/guide/example_c.cc: -------------------------------------------------------------------------------- 1 | #include "ps.h" 2 | typedef float Val; 3 | 4 | int CreateServerNode(int argc, char *argv[]) { 5 | ps::OnlineServer server; 6 | return 0; 7 | } 8 | 9 | int WorkerNodeMain(int argc, char *argv[]) { 10 | using namespace ps; 11 | auto key = std::make_shared>(); 12 | auto val = std::make_shared>(); 13 | 14 | *key = {1, 3, 5}; 15 | *val = {1, 1, 1}; 16 | 17 | KVWorker wk; 18 | wk.Wait(wk.ZPush(key, val)); 19 | 20 | std::vector recv_val; 21 | wk.Wait(wk.ZPull(key, &recv_val)); 22 | 23 | std::cout << "values pulled at " << MyNodeID() << ": " 24 | << Blob(recv_val) << std::endl; 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/README.md: -------------------------------------------------------------------------------- 1 | This folder contains testcases for the project 2 | 3 | test scripts for s3: 4 | 5 | `test.sh` 6 | 7 | ```bash 8 | for r in {0..10}; do 9 | file=data/${RANDOM} 10 | start=`date +'%s.%N'` 11 | ./filesys_test cat s3://dmlc/ilsvrc12/val.rec >$file 12 | # ./filesys_test cat s3://dmlc/cifar10/train.rec >$file 13 | end=`date +'%s.%N'` 14 | res=$(echo "$end - $start" | bc -l) 15 | md5=`md5sum $file` 16 | rm $file 17 | echo "job $1, rp $r, $md5, time $res" 18 | done 19 | echo "job $1 done" 20 | ``` 21 | 22 | `run.sh` 23 | 24 | ```bash 25 | mkdir -p data 26 | rm -f data/* 27 | for i in {0..9}; do 28 | bash test.sh $i & 29 | sleep 1 30 | done 31 | wait 32 | ``` 33 | -------------------------------------------------------------------------------- /repo/ps-lite/guide/example_b.cc: -------------------------------------------------------------------------------- 1 | #include "ps.h" 2 | typedef float Val; 3 | 4 | int CreateServerNode(int argc, char *argv[]) { 5 | ps::OnlineServer server; 6 | return 0; 7 | } 8 | 9 | int WorkerNodeMain(int argc, char *argv[]) { 10 | using namespace ps; 11 | std::vector key = {1, 3, 5}; 12 | std::vector val = {1, 1, 1}; 13 | std::vector recv_val; 14 | 15 | KVWorker wk; 16 | int ts = wk.Push(key, val); 17 | 18 | SyncOpts opts; 19 | opts.deps = {ts}; 20 | opts.callback = [&recv_val]() { 21 | std::cout << "values pulled at " << MyNodeID() << ": " << 22 | Blob(recv_val) << std::endl; 23 | }; 24 | ts = wk.Pull(key, &recv_val, opts); 25 | wk.Wait(ts); 26 | 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/split_test.cc: -------------------------------------------------------------------------------- 1 | // test reading speed from a InputSplit 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | int main(int argc, char *argv[]) { 9 | if (argc < 5) { 10 | printf("Usage: partid npart\n"); 11 | return 0; 12 | } 13 | using namespace dmlc; 14 | InputSplit *split = InputSplit::Create(argv[1], 15 | atoi(argv[2]), 16 | atoi(argv[3]), 17 | "text"); 18 | InputSplit::Blob blb; 19 | while (split->NextChunk(&blb)) { 20 | std::cout << std::string((char*)blb.dptr, blb.size); 21 | } 22 | delete split; 23 | return 0; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /repo/ps-lite/src/filter/sparse_filter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "filter/filter.h" 3 | namespace ps { 4 | 5 | class SparseFilter : public IFilter { 6 | public: 7 | SparseFilter() { 8 | // use 0xffff..ff as the mark when a value is filtered, it is nan for float 9 | // and double. 10 | memcpy(&double_v_, &kuint64max, sizeof(double)); 11 | memcpy(&float_v_, &kuint32max, sizeof(float)); 12 | } 13 | 14 | // mark an entry as filtered 15 | void mark(float* v) { *v = float_v_; } 16 | void mark(double* v) { *v = double_v_; } 17 | 18 | // test whether or not an entry is filtered 19 | bool marked(double v) { return v != v; } 20 | bool marked(float v) { return v != v; } 21 | private: 22 | float float_v_; 23 | double double_v_; 24 | }; 25 | 26 | } // namespace ps 27 | -------------------------------------------------------------------------------- /repo/ps-lite/make/ps.mk: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------------------------------------------- 2 | # parameter server configuration script 3 | # 4 | # include ps.mk after the variables are set 5 | # 6 | # Add PS_CFLAGS to the compile flags 7 | # Add PS_LDFLAGS to the linker flags 8 | #---------------------------------------------------------------------------------------- 9 | 10 | ifeq ($(USE_KEY32), 1) 11 | PS_CFLAGS += -DUSE_KEY32=1 12 | endif 13 | PS_LDFLAGS_SO = -L$(DEPS_PATH)/lib -lglog -lprotobuf -lgflags -lzmq -lcityhash -llz4 14 | PS_LDFLAGS_A = $(addprefix $(DEPS_PATH)/lib/, libprotobuf.a libglog.a libgflags.a libzmq.a libcityhash.a liblz4.a) 15 | 16 | ifeq ($(STATIC_DEPS), 0) 17 | PS_LDFLAGS += $(PS_LDFLAGS_SO) 18 | else 19 | PS_LDFLAGS += $(PS_LDFLAGS_A) 20 | endif 21 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "worker.h" 2 | #include "server.h" 3 | #include "scheduler.h" 4 | 5 | #include "ps.h" 6 | 7 | namespace ps{ 8 | App* App::Create(int argc, char *argv[]){ 9 | NodeInfo n; 10 | if(n.IsWorker()){ 11 | std::cout<<"create worker~"< 10 | #include 11 | #include 12 | 13 | namespace dmlc { 14 | /*! 15 | * \brief Split a string by delimiter 16 | * \param s String to be splitted. 17 | * \param delim The delimiter. 18 | * \return a splitted vector of strings. 19 | */ 20 | inline std::vector Split(const std::string& s, char delim) { 21 | std::string item; 22 | std::istringstream is(s); 23 | std::vector ret; 24 | while (std::getline(is, item, delim)) { 25 | ret.push_back(item); 26 | } 27 | return ret; 28 | } 29 | } // namespace dmlc 30 | 31 | #endif // DMLC_COMMON_H_ 32 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CPP = g++ 3 | CPP_tag = -std=c++11 4 | DEPS_PATH = ./repo/ps-lite/deps 5 | INCLUDE = -I./repo/ps-lite/src -I./repo/ps-lite/deps/include -I./repo/dmlc-core/include 6 | 7 | LDFLAGS = $(addprefix $(DEPS_PATH)/lib/, libglog.a libprotobuf.a libgflags.a libzmq.a libcityhash.a liblz4.a) 8 | 9 | 10 | all: train dump_model 11 | 12 | train: main.o ./repo/ps-lite/build/libps.a $(LDFLAGS) -lpthread 13 | $(CPP) $(CPP_tag) -o $@ $^ $(INCLUDE) 14 | 15 | main.o: src/main.cpp ./repo/ps-lite/build/libps.a 16 | $(CPP) $(CPP_tag) -c src/main.cpp $(INCLUDE) 17 | 18 | dump_model: dump.o ./repo/dmlc-core/libdmlc.a 19 | $(CPP) $(CPP_tag) -o $@ $^ $(INCLUDE) 20 | 21 | dump.o: src/dump.cc ./repo/dmlc-core/libdmlc.a ./repo/ps-lite/deps/lib/libglog.a 22 | $(CPP) $(CPP_tag) -c src/dump.cc $(INCLUDE) 23 | 24 | 25 | clean: 26 | rm -f *~ train 27 | rm -f *.o 28 | -------------------------------------------------------------------------------- /repo/ps-lite/doc/index.rst: -------------------------------------------------------------------------------- 1 | .. ps-lite documentation master file, created by 2 | sphinx-quickstart on Wed Jul 29 00:19:51 2015. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | PS-lite Document 7 | ==================== 8 | 9 | The parameter server is a distributed system scaling to industry size machine 10 | learning problems. It provides asynchronous and zero-copy key-value pair 11 | communications between worker machines and server machines. It also supports 12 | flexiable data consistency model, data filters, and flexiable server machine 13 | programming. 14 | 15 | API document 16 | -------------- 17 | 18 | .. toctree:: 19 | :maxdepth: 2 20 | 21 | ps-worker 22 | 23 | 24 | Indices and tables 25 | ================== 26 | 27 | * :ref:`genindex` 28 | * :ref:`modindex` 29 | * :ref:`search` 30 | -------------------------------------------------------------------------------- /repo/ps-lite/guide/example_d.cc: -------------------------------------------------------------------------------- 1 | #include "ps.h" 2 | typedef float Val; 3 | 4 | DEFINE_int32(nt, 1, "num of server threads"); 5 | 6 | int CreateServerNode(int argc, char *argv[]) { 7 | ps::OnlineServer server(ps::IOnlineHandle(), 1, FLAGS_nt); 8 | return 0; 9 | } 10 | 11 | int WorkerNodeMain(int argc, char *argv[]) { 12 | using namespace ps; 13 | 14 | int n = 1000000; 15 | auto key = std::make_shared>(n); 16 | for (int i = 0; i < n; ++i) (*key)[i] = kMaxKey / n * i; 17 | auto val = std::make_shared>(n, 1.0); 18 | 19 | KVWorker wk; 20 | std::vector recv_val; 21 | for (int i = 0; i < 10; ++i) { 22 | SyncOpts opts; 23 | opts.AddFilter(Filter::KEY_CACHING); 24 | opts.AddFilter(Filter::COMPRESSING); 25 | wk.Wait(wk.ZPush(key, val, opts)); 26 | wk.Wait(wk.ZPull(key, &recv_val, opts)); 27 | } 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/split.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base/common.h" 3 | 4 | namespace ps { 5 | 6 | // split a std::string using a character delimiter. if skip_empty == true, 7 | // split("one:two::three", ':'); will return 4 items 8 | 9 | inline std::vector 10 | split(const std::string &s, char delim, bool skip_empty = false) { 11 | std::vector elems; 12 | std::stringstream ss(s); 13 | string item; 14 | while (std::getline(ss, item, delim)) 15 | if (!(skip_empty && item.empty())) 16 | elems.push_back(item); 17 | return elems; 18 | } 19 | 20 | // TODO support bool skip_empty = false 21 | inline std::string join(const std::vector &elems, const string& delim) { 22 | std::string str; 23 | for (int i = 0; i < elems.size() - 1; ++i) { 24 | str += elems[i] + delim; 25 | } 26 | str += elems.back(); 27 | return str; 28 | } 29 | 30 | } // namespace ps 31 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/dataiter_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char *argv[]) { 5 | if (argc < 4) { 6 | printf("Usage: filename partid npart [format]\n"); 7 | return 0; 8 | } 9 | char libsvm[10] = "libsvm"; 10 | char* format; 11 | if (argc > 4) { 12 | format = argv[4]; 13 | } else { 14 | format = libsvm; 15 | } 16 | 17 | using namespace dmlc; 18 | RowBlockIter *iter = 19 | RowBlockIter::Create( 20 | argv[1], atoi(argv[2]), atoi(argv[3]), format); 21 | double tstart = GetTime(); 22 | size_t bytes_read = 0; 23 | while (iter->Next()) { 24 | const RowBlock &batch = iter->Value(); 25 | bytes_read += batch.MemCostBytes(); 26 | double tdiff = GetTime() - tstart; 27 | LOG(INFO) << (bytes_read >> 20UL) << 28 | " MB read " << ((bytes_read >> 20UL) / tdiff)<< " MB/sec"; 29 | } 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /repo/dmlc-core/scripts/travis/travis_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # main script of travis 4 | if [ ${TASK} == "lint" ]; then 5 | make lint || exit -1 6 | make doxygen 2>log.txt 7 | (cat log.txt| grep -v ENABLE_PREPROCESSING |grep -v "unsupported tag" |grep warning) && exit -1 8 | exit 0 9 | fi 10 | 11 | if [ ${TRAVIS_OS_NAME} == "osx" ]; then 12 | export NO_OPENMP=1 13 | fi 14 | 15 | if [ ${TASK} == "unittest_gtest" ]; then 16 | cp make/config.mk . 17 | make -f scripts/packages.mk gtest 18 | if [ ${TRAVIS_OS_NAME} != "osx" ]; then 19 | echo "USE_S3=1" >> config.mk 20 | echo "export CXX = g++-4.8" >> config.mk 21 | else 22 | echo "USE_S3=0" >> config.mk 23 | echo "USE_OPENMP=0" >> config.mk 24 | fi 25 | echo "GTEST_PATH="${CACHE_PREFIX} >> config.mk 26 | echo "BUILD_TEST=1" >> config.mk 27 | make all || exit -1 28 | test/unittest/dmlc_unittest || exit -1 29 | fi 30 | -------------------------------------------------------------------------------- /repo/dmlc-core/scripts/packages.mk: -------------------------------------------------------------------------------- 1 | # Makfile for easily install dependencies 2 | 3 | # List of packages here 4 | .PHONY: gtest lz4 5 | 6 | # rules for gtest 7 | ${CACHE_PREFIX}/include/gtest: 8 | rm -rf gtest-1.7.0.zip gtest-1.7.0 9 | wget http://googletest.googlecode.com/files/gtest-1.7.0.zip 10 | unzip gtest-1.7.0.zip 11 | cd gtest-1.7.0 ; ./configure; make; cd - 12 | mkdir -p ${CACHE_PREFIX}/include ${CACHE_PREFIX}/lib 13 | cp -r gtest-1.7.0/include/gtest ${CACHE_PREFIX}/include 14 | cp -r gtest-1.7.0/lib/.libs/* ${CACHE_PREFIX}/lib 15 | rm ${CACHE_PREFIX}/lib/libgtest_main.la ${CACHE_PREFIX}/lib/libgtest.la 16 | cp -f gtest-1.7.0/lib/*.la ${CACHE_PREFIX}/lib 17 | rm -rf gtest-1.7.0.zip 18 | 19 | gtest: | ${CACHE_PREFIX}/include/gtest 20 | 21 | lz4: ${CACHE_PREFIX}/include/lz4.h 22 | 23 | ${CACHE_PREFIX}/include/lz4.h: 24 | rm -rf lz4 25 | git clone https://github.com/Cyan4973/lz4 26 | cd lz4; make; make install PREFIX=${CACHE_PREFIX}; cd - 27 | -------------------------------------------------------------------------------- /repo/ps-lite/src/ps.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * @file ps.h 3 | * \brief The parameter server interface 4 | */ 5 | #pragma once 6 | #include 7 | #include "dmlc/io.h" 8 | #include "system/postoffice.h" 9 | namespace ps { 10 | 11 | inline int NextID() { 12 | return Postoffice::instance().manager().NextCustomerID(); 13 | } 14 | 15 | inline void StartSystem(int* argc, char ***argv) { 16 | Postoffice::instance().Run(argc, argv); 17 | } 18 | 19 | inline void StopSystem() { 20 | Postoffice::instance().Stop(); 21 | } 22 | 23 | inline int RunSystem(int* argc, char ***argv) { 24 | StartSystem(argc, argv); StopSystem(); 25 | return 0; 26 | } 27 | } // namespace ps 28 | 29 | /// \brief worker node api 30 | #include "ps/worker.h" 31 | 32 | /// \brief server node api 33 | #include "ps/server.h" 34 | 35 | /// \brief scheduler node api 36 | #include "ps/scheduler.h" 37 | 38 | /// \brief node runtime info 39 | #include "ps/node_info.h" 40 | 41 | 42 | /// \brief implementation 43 | #include "system/ps-inl.h" 44 | -------------------------------------------------------------------------------- /repo/ps-lite/guide/local.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # set -x 3 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../deps/lib 4 | if [ $# -lt 3 ]; then 5 | echo "usage: $0 num_servers num_workers bin [args..]" 6 | exit -1; 7 | fi 8 | 9 | num_servers=$1 10 | shift 11 | num_workers=$1 12 | shift 13 | bin=$1 14 | shift 15 | arg="-num_servers ${num_servers} -num_workers ${num_workers} -log_dir log $@" 16 | 17 | # start the scheduler 18 | Sch="role:SCHEDULER,hostname:'127.0.0.1',port:8001,id:'H'" 19 | ${bin} -my_node ${Sch} -scheduler ${Sch} ${arg} & 20 | 21 | # start servers 22 | for ((i=0; i<${num_servers}; ++i)); do 23 | port=$((9600 + ${i})) 24 | N="role:SERVER,hostname:'127.0.0.1',port:${port},id:'S${i}'" 25 | ${bin} -my_node ${N} -scheduler ${Sch} ${arg} & 26 | done 27 | 28 | # start workers 29 | for ((i=0; i<${num_workers}; ++i)); do 30 | port=$((9500 + ${i})) 31 | N="role:WORKER,hostname:'127.0.0.1',port:${port},id:'W${i}'" 32 | ${bin} -my_node ${N} -scheduler ${Sch} ${arg} & 33 | done 34 | 35 | wait 36 | -------------------------------------------------------------------------------- /repo/ps-lite/src/proto/heartbeat.proto: -------------------------------------------------------------------------------- 1 | package ps; 2 | 3 | message HeartbeatReport { 4 | optional int32 task_id = 1 [default = 0]; 5 | optional string hostname = 14; 6 | 7 | // time stamp 8 | // latest heartbeat report the scheduler has ever received 9 | // from a specified worker/server 10 | optional uint32 seconds_since_epoch = 2; 11 | 12 | optional uint32 total_time_milli = 13; 13 | optional uint32 busy_time_milli = 3; 14 | 15 | // recv/sent bytes via zmq 16 | optional uint32 net_in_mb = 4; 17 | optional uint32 net_out_mb = 5; 18 | 19 | // user+sys (percentage) 20 | optional uint32 process_cpu_usage = 6; 21 | optional uint32 host_cpu_usage = 7; 22 | 23 | optional uint32 process_rss_mb = 8; 24 | optional uint32 process_virt_mb = 9; 25 | optional uint32 host_in_use_gb = 10; 26 | optional uint32 host_in_use_percentage = 15; 27 | 28 | // host's network in/out bandwidth usage (MB/s) 29 | optional uint32 host_net_in_bw = 11; 30 | optional uint32 host_net_out_bw = 12; 31 | } 32 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/sketch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base/common.h" 3 | // #include 4 | namespace ps { 5 | 6 | // the basc class for bloom filters, countmin, etc... 7 | class Sketch { 8 | public: 9 | protected: 10 | 11 | // ver 1 is faster than ver 2, but is comparable to the murmurhash version 12 | // need add -msse4.2 in CFLAGS 13 | // uint64 crc32(uint64 key) const { 14 | // return _mm_crc32_u64(0, key); 15 | // } 16 | // uint32 crc32(uint64 key) const { 17 | // return _mm_crc32_u32((uint32)(key<<32), (uint32)key); 18 | // } 19 | 20 | uint32 hash(const uint64& key) const { 21 | // similar to murmurhash 22 | const uint32 seed = 0xbc9f1d34; 23 | const uint32 m = 0xc6a4a793; 24 | const uint32 n = 8; // sizeof uint64 25 | uint32 h = seed ^ (n * m); 26 | 27 | uint32 w = (uint32) key; 28 | h += w; h *= m; h ^= (h >> 16); 29 | 30 | w = (uint32) (key >> 32); 31 | h += w; h *= m; h ^= (h >> 16); 32 | return h; 33 | } 34 | }; 35 | } // namespace ps 36 | -------------------------------------------------------------------------------- /repo/dmlc-core/cmake/lint.cmake: -------------------------------------------------------------------------------- 1 | get_filename_component(CMAKE_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/.." ABSOLUTE) 2 | if(NOT MSVC) 3 | set(LINT_COMMAND ${CMAKE_SOURCE_DIR}/scripts/lint3.py) 4 | else() 5 | if((NOT PYTHON2_EXECUTABLE) AND (NOT PYTHON3_EXECUTABLE)) 6 | message(FATAL_ERROR "Cannot lint without python") 7 | endif() 8 | # format output so VS can bring us to the offending file/line 9 | if(PYTHON2_EXECUTABLE) 10 | set(LINT_COMMAND ${PYTHON2_EXECUTABLE} ${CMAKE_SOURCE_DIR}/scripts/lint.py) 11 | endif() 12 | if(PYTHON3_EXECUTABLE) 13 | set(LINT_COMMAND ${PYTHON3_EXECUTABLE} ${CMAKE_SOURCE_DIR}/scripts/lint3.py) 14 | endif() 15 | endif() 16 | 17 | set(LINT_DIRS include src scripts) 18 | 19 | cmake_policy(SET CMP0009 NEW) # suppress cmake warning 20 | execute_process( 21 | COMMAND ${LINT_COMMAND} ${PROJECT_NAME} all ${LINT_DIRS} 22 | WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} 23 | ERROR_VARIABLE LINT_OUTPUT 24 | ERROR_STRIP_TRAILING_WHITESPACE 25 | 26 | ) 27 | message(STATUS ${LINT_OUTPUT}) -------------------------------------------------------------------------------- /repo/ps-lite/src/base/barrier.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include "ps/base.h" // for CHECK* and DISALLOW* 6 | 7 | namespace ps { 8 | 9 | class Barrier { 10 | public: 11 | explicit Barrier(int num_threads) 12 | : num_to_block_(num_threads), num_to_exit_(num_threads) {} 13 | 14 | /// Block until all threads have reached this function. 15 | /// return true if this is the last thread 16 | bool Block() { 17 | std::unique_lock l(mu_); 18 | num_to_block_--; 19 | CHECK_GE(num_to_block_, 0); 20 | 21 | if (num_to_block_ > 0) { 22 | while (num_to_block_ > 0) cv_.wait(l); 23 | } else { 24 | cv_.notify_all(); 25 | } 26 | 27 | num_to_exit_--; 28 | CHECK_GE(num_to_exit_, 0); 29 | return (num_to_exit_ == 0); 30 | } 31 | 32 | private: 33 | DISALLOW_COPY_AND_ASSIGN(Barrier); 34 | std::mutex mu_; 35 | std::condition_variable cv_; 36 | int num_to_block_; 37 | int num_to_exit_; 38 | }; 39 | 40 | 41 | } // PS 42 | -------------------------------------------------------------------------------- /repo/ps-lite/.travis.yml: -------------------------------------------------------------------------------- 1 | # disable sudo to use container based build 2 | sudo: false 3 | 4 | # Use Build Matrix to do lint and build seperately 5 | env: 6 | matrix: 7 | - TASK=build CXX=g++-4.8 8 | - TASK=build CXX=g++-5 9 | # - TASK=lint LINT_LANG=cpp 10 | 11 | # dependent apt packages 12 | addons: 13 | apt: 14 | sources: 15 | - ubuntu-toolchain-r-test 16 | packages: 17 | - g++-4.8 18 | - g++-5 19 | - wget 20 | - git 21 | - unzip 22 | 23 | before_install: 24 | - export TRAVIS=make/travis 25 | - source ${TRAVIS}/travis_setup_env.sh 26 | 27 | 28 | install: 29 | - pip install cpplint pylint --user `whoami` 30 | 31 | 32 | script: ${TRAVIS}/travis_script.sh 33 | 34 | 35 | before_cache: 36 | - ${TRAVIS}/travis_before_cache.sh 37 | 38 | cache: 39 | directories: 40 | - ${HOME}/.cache/usr 41 | 42 | 43 | notifications: 44 | # Emails are sent to the committer's git-configured email address by default, 45 | email: 46 | on_success: change 47 | on_failure: always 48 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/yarn/src/main/java/org/apache/hadoop/yarn/dmlc/TaskRecord.java: -------------------------------------------------------------------------------- 1 | package org.apache.hadoop.yarn.dmlc; 2 | 3 | import org.apache.hadoop.yarn.api.records.Container; 4 | import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest; 5 | 6 | /** 7 | * data structure to hold the task information 8 | */ 9 | public class TaskRecord { 10 | // task id of the task 11 | public int taskId = 0; 12 | // role of current node 13 | public String taskRole = "worker"; 14 | // number of failed attempts to run the task 15 | public int attemptCounter = 0; 16 | // container request, can be null if task is already running 17 | public ContainerRequest containerRequest = null; 18 | // running container, can be null if the task is not launched 19 | public Container container = null; 20 | // whether we have requested abortion of this task 21 | public boolean abortRequested = false; 22 | 23 | public TaskRecord(int taskId, String role) { 24 | this.taskId = taskId; 25 | this.taskRole = role; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /repo/ps-lite/src/system/node_assigner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base/common.h" 3 | #include "base/range.h" 4 | #include "proto/node.pb.h" 5 | #include "proto/data.pb.h" 6 | namespace ps { 7 | 8 | // assign *node* with proper rank_id, key_range, etc.. 9 | class NodeAssigner { 10 | public: 11 | NodeAssigner(int num_servers, Range key_range) { 12 | num_servers_ = num_servers; 13 | key_range_ = key_range; 14 | } 15 | ~NodeAssigner() { } 16 | 17 | void Assign(Node* node) { 18 | Range kr = key_range_; 19 | int rank = 0; 20 | if (node->role() == Node::SERVER) { 21 | kr = key_range_.EvenDivide(num_servers_, server_rank_); 22 | rank = server_rank_ ++; 23 | } else if (node->role() == Node::WORKER) { 24 | rank = worker_rank_ ++; 25 | } 26 | node->set_rank(rank); 27 | kr.To(node->mutable_key()); 28 | } 29 | 30 | void Remove(const Node& node) { 31 | // TODO... 32 | } 33 | protected: 34 | int num_servers_ = 0; 35 | int server_rank_ = 0; 36 | int worker_rank_ = 0; 37 | Range key_range_; 38 | }; 39 | 40 | } // namespace ps 41 | -------------------------------------------------------------------------------- /repo/dmlc-core/src/io/line_split.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file line_split.h 4 | * \brief base class implementation of input splitter 5 | * \author Tianqi Chen 6 | */ 7 | #ifndef DMLC_IO_LINE_SPLIT_H_ 8 | #define DMLC_IO_LINE_SPLIT_H_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "./input_split_base.h" 16 | 17 | namespace dmlc { 18 | namespace io { 19 | /*! \brief class that split the files by line */ 20 | class LineSplitter : public InputSplitBase { 21 | public: 22 | LineSplitter(FileSystem *fs, 23 | const char *uri, 24 | unsigned rank, 25 | unsigned nsplit) { 26 | this->Init(fs, uri, 1); 27 | this->ResetPartition(rank, nsplit); 28 | } 29 | 30 | virtual bool ExtractNextRecord(Blob *out_rec, Chunk *chunk); 31 | protected: 32 | virtual size_t SeekRecordBegin(Stream *fi); 33 | virtual const char* 34 | FindLastRecordBegin(const char *begin, const char *end); 35 | }; 36 | } // namespace io 37 | } // namespace dmlc 38 | #endif // DMLC_IO_LINE_SPLIT_H_ 39 | -------------------------------------------------------------------------------- /repo/dmlc-core/include/dmlc/omp.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file omp.h 4 | * \brief header to handle OpenMP compatibility issues 5 | */ 6 | #ifndef DMLC_OMP_H_ 7 | #define DMLC_OMP_H_ 8 | #if defined(_OPENMP) 9 | #include 10 | #else 11 | #ifndef DISABLE_OPENMP 12 | // use pragma message instead of warning 13 | #pragma message("Warning: OpenMP is not available, " \ 14 | "project will be compiled into single-thread code. " \ 15 | "Use OpenMP-enabled compiler to get benefit of multi-threading.") 16 | #endif 17 | //! \cond Doxygen_Suppress 18 | inline int omp_get_thread_num() { return 0; } 19 | inline int omp_get_num_threads() { return 1; } 20 | inline int omp_get_num_procs() { return 1; } 21 | inline void omp_set_num_threads(int nthread) {} 22 | #endif 23 | // loop variable used in openmp 24 | namespace dmlc { 25 | #ifdef _MSC_VER 26 | typedef int omp_uint; 27 | typedef long omp_ulong; // NOLINT(*) 28 | #else 29 | typedef unsigned omp_uint; 30 | typedef unsigned long omp_ulong; // NOLINT(*) 31 | #endif 32 | //! \endcond 33 | } // namespace dmlc 34 | #endif // DMLC_OMP_H_ 35 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/strtonum_test.cc: -------------------------------------------------------------------------------- 1 | #include "../src/data/strtonum.h" 2 | #include 3 | 4 | int main(int argc, char *argv[]) { 5 | using namespace dmlc; 6 | 7 | // float 8 | std::vector f = { 9 | "1234567901234", "+12345.6789", "-0.00123", "+0123.234e-2", 10 | "-234234.123123e20", "3.1029831e+38", "000.123e-28"}; 11 | for (size_t i = 0; i < f.size(); ++i) { 12 | float v1 = data::atof(f[i].c_str()); 13 | float v2 = atof(f[i].c_str()); 14 | CHECK_EQ(v1, v2); 15 | } 16 | 17 | // long 18 | std::vector l = { 19 | "2147483647", "+12345", "-123123", "-2147483648" 20 | }; 21 | for (size_t i = 0; i < l.size(); ++i) { 22 | long v1 = data::atol(l[i].c_str()); 23 | long v2 = atol(l[i].c_str()); 24 | CHECK_EQ(v1, v2); 25 | } 26 | 27 | // uint64 28 | std::vector ull = { 29 | "2147483647", "+12345", "18446744073709551615" 30 | }; 31 | for (size_t i = 0; i < ull.size(); ++i) { 32 | unsigned long long v1 = data::strtoull(ull[i].c_str(), 0, 10); 33 | unsigned long long v2 = strtoull(ull[i].c_str(), 0, 10); 34 | CHECK_EQ(v1, v2); 35 | } 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /repo/ps-lite/src/filter/filter.cc: -------------------------------------------------------------------------------- 1 | #include "filter/filter.h" 2 | #include "filter/compressing.h" 3 | #include "filter/key_caching.h" 4 | #include "filter/fixing_float.h" 5 | #include "filter/add_noise.h" 6 | #include "filter/delta_key.h" 7 | #include "filter/truncate_float.h" 8 | 9 | namespace ps { 10 | 11 | IFilter* IFilter::create(const Filter& conf) { 12 | switch (conf.type()) { 13 | case Filter::KEY_CACHING: 14 | return new KeyCachingFilter(); 15 | case Filter::COMPRESSING: 16 | return new CompressingFilter(); 17 | case Filter::FIXING_FLOAT: 18 | return new FixingFloatFilter(); 19 | case Filter::NOISE: 20 | return new AddNoiseFilter(); 21 | case Filter::DELTA_KEY: 22 | return new DeltaKeyFilter(); 23 | case Filter::TRUNCATE_FLOAT: 24 | return new TruncateFloatFilter(); 25 | default: 26 | CHECK(false) << "unknow filter type"; 27 | } 28 | return nullptr; 29 | } 30 | 31 | 32 | Filter* IFilter::Find(Filter::Type type, Task* task) { 33 | for (int i = 0; i < task->filter_size(); ++i) { 34 | if (task->filter(i).type() == type) return task->mutable_filter(i); 35 | } 36 | return nullptr; 37 | } 38 | 39 | } // namespace ps 40 | -------------------------------------------------------------------------------- /repo/dmlc-core/src/io/recordio_split.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file recordio_split.h 4 | * \brief input split that splits recordio files 5 | * \author Tianqi Chen 6 | */ 7 | #ifndef DMLC_IO_RECORDIO_SPLIT_H_ 8 | #define DMLC_IO_RECORDIO_SPLIT_H_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "./input_split_base.h" 17 | 18 | namespace dmlc { 19 | namespace io { 20 | /*! \brief class that split the files by line */ 21 | class RecordIOSplitter : public InputSplitBase { 22 | public: 23 | RecordIOSplitter(FileSystem *fs, 24 | const char *uri, 25 | unsigned rank, 26 | unsigned nsplit) { 27 | this->Init(fs, uri, 4); 28 | this->ResetPartition(rank, nsplit); 29 | } 30 | 31 | virtual bool ExtractNextRecord(Blob *out_rec, Chunk *chunk); 32 | 33 | protected: 34 | virtual size_t SeekRecordBegin(Stream *fi); 35 | virtual const char* 36 | FindLastRecordBegin(const char *begin, const char *end); 37 | }; 38 | } // namespace io 39 | } // namespace dmlc 40 | #endif // DMLC_IO_RECORDIO_SPLIT_H_ 41 | -------------------------------------------------------------------------------- /repo/ps-lite/make/install_deps.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # usage: install_deps.sh install_dir 3 | # 4 | # if install_dir is not specified, it will install all deps in $ROOT/deps, 5 | # 6 | 7 | if [ $# -ne 1 ]; then 8 | install_dir=$PWD/`dirname $0`/../deps 9 | else 10 | install_dir=$1 11 | fi 12 | 13 | mkdir -p $install_dir 14 | cd $install_dir 15 | 16 | rm -f install.sh 17 | wget -q https://raw.githubusercontent.com/mli/deps/master/install.sh 18 | source ./install.sh 19 | 20 | if [ ! -f include/google/gflags.h ]; then 21 | install_gflags 22 | else 23 | echo "skip gflags" 24 | fi 25 | 26 | if [ ! -f include/google/protobuf/message.h ]; then 27 | install_protobuf 28 | else 29 | echo "skip protobuf" 30 | fi 31 | 32 | if [ ! -f include/glog/logging.h ]; then 33 | install_glog 34 | else 35 | echo "skip glog" 36 | fi 37 | 38 | if [ ! -f include/zmq.h ]; then 39 | install_zmq 40 | else 41 | echo "skip zmq" 42 | fi 43 | 44 | if [ ! -f include/city.h ]; then 45 | install_cityhash 46 | else 47 | echo "skip cityhash" 48 | fi 49 | 50 | if [ ! -f include/lz4.h ]; then 51 | install_lz4 52 | else 53 | echo "skip lz4" 54 | fi 55 | 56 | 57 | rm -rf $install_dir/build 58 | -------------------------------------------------------------------------------- /repo/ps-lite/src/system/postoffice.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base/common.h" 3 | #include "system/message.h" 4 | #include "base/threadsafe_queue.h" 5 | #include "system/manager.h" 6 | namespace ps { 7 | 8 | class Postoffice { 9 | public: 10 | SINGLETON(Postoffice); 11 | ~Postoffice(); 12 | 13 | /** 14 | * @brief Starts the system 15 | */ 16 | void Run(int* argc, char***); 17 | /** 18 | * @brief Stops the system 19 | */ 20 | void Stop() { manager_.Stop(); } 21 | 22 | /** 23 | * @brief Queue a message into the sending buffer, which will be sent by the 24 | * sending thread. It is thread safe. 25 | * 26 | * @param msg it will be DELETE by system after sent successfully. so do NOT 27 | * delete it before 28 | */ 29 | void Queue(Message* msg) { sending_queue_.push(msg); } 30 | 31 | Manager& manager() { return manager_; } 32 | 33 | private: 34 | Postoffice() { } 35 | void Send(); 36 | void Recv(); 37 | 38 | std::unique_ptr recv_thread_; 39 | std::unique_ptr send_thread_; 40 | ThreadsafeQueue sending_queue_; 41 | 42 | Manager manager_; 43 | DISALLOW_COPY_AND_ASSIGN(Postoffice); 44 | }; 45 | 46 | } // namespace ps 47 | -------------------------------------------------------------------------------- /repo/ps-lite/src/filter/add_noise.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "filter/filter.h" 3 | #ifdef __MACH__ 4 | #include 5 | #endif 6 | 7 | namespace ps { 8 | 9 | /** 10 | * @brief Add gaussian noises to float/double values 11 | */ 12 | class AddNoiseFilter : public IFilter { 13 | public: 14 | void Encode(Message* msg) { 15 | auto filter_conf = CHECK_NOTNULL(Find(Filter::NOISE, msg)); 16 | int n = msg->value.size(); 17 | CHECK_EQ(n, msg->task.value_type_size()); 18 | for (int i = 0; i < n; ++i) { 19 | auto type = msg->task.value_type(i); 20 | if (type == DataType::FLOAT) { 21 | AddNoise(msg->value[i], filter_conf); 22 | } 23 | if (type == DataType::DOUBLE) { 24 | AddNoise(msg->value[i], filter_conf); 25 | } 26 | } 27 | } 28 | 29 | private: 30 | 31 | template 32 | void AddNoise(const SArray& array, Filter* cf) { 33 | std::default_random_engine generator; 34 | std::normal_distribution distribution((V)cf->mean(), (V)cf->std()); 35 | SArray data(array); 36 | for (size_t i = 0; i < data.size(); ++i) { 37 | data[i] += distribution(generator); 38 | } 39 | } 40 | 41 | }; 42 | 43 | } // namespace ps 44 | -------------------------------------------------------------------------------- /repo/ps-lite/src/proto/filter.proto: -------------------------------------------------------------------------------- 1 | package ps; 2 | 3 | message Filter { 4 | enum Type { 5 | // cache the keys at both sender and receiver 6 | KEY_CACHING = 1; 7 | // compress data by snappy 8 | COMPRESSING = 2; 9 | // convert a float/double into a fixed-point integer with random rounding 10 | FIXING_FLOAT = 3; 11 | // add noise to data 12 | NOISE = 4; 13 | // key[i] <- key[i+1] - key[i] 14 | DELTA_KEY = 5; 15 | // truncate a float/double into an integer 16 | TRUNCATE_FLOAT = 6; 17 | } 18 | required Type type = 1; 19 | 20 | // -- key caching -- 21 | // if the task is done, then clear the cache (to save memory) 22 | optional bool clear_cache = 20 [default = false]; 23 | 24 | // -- fixing float filter -- 25 | optional int32 num_bytes = 5 [default = 3]; 26 | 27 | // -- nosie -- 28 | optional float mean = 6; 29 | optional float std = 7; 30 | 31 | // -- runtime parameters used by the system -- 32 | message FixedFloatConfig { 33 | optional float min_value = 1 [default = -1]; 34 | optional float max_value = 2 [default = 1]; 35 | } 36 | repeated FixedFloatConfig fixed_point = 4; 37 | optional uint64 signature = 2; 38 | repeated uint64 uncompressed_size = 3; 39 | } 40 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/split_read_test.cc: -------------------------------------------------------------------------------- 1 | // test reading speed from a InputSplit 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char *argv[]) { 8 | if (argc < 4) { 9 | printf("Usage: partid npart\n"); 10 | return 0; 11 | } 12 | using namespace dmlc; 13 | InputSplit *split = InputSplit::Create(argv[1], 14 | atoi(argv[2]), 15 | atoi(argv[3]), 16 | "text"); 17 | std::vector data; 18 | InputSplit::Blob blb; 19 | double tstart = GetTime(); 20 | size_t bytes_read = 0; 21 | size_t bytes_expect = 10UL << 20UL; 22 | while (split->NextRecord(&blb)) { 23 | std::string dat = std::string((char*)blb.dptr, 24 | blb.size); 25 | data.push_back(dat); 26 | bytes_read += blb.size; 27 | double tdiff = GetTime() - tstart; 28 | if (bytes_read >= bytes_expect) { 29 | printf("%lu MB read, %g MB/sec\n", 30 | bytes_read >> 20UL, 31 | (bytes_read >> 20UL) / tdiff); 32 | bytes_expect += 10UL << 20UL; 33 | } 34 | } 35 | delete split; 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/libsvm_parser_test.cc: -------------------------------------------------------------------------------- 1 | // test reading speed from a InputSplit 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "../src/data/libsvm_parser.h" 7 | 8 | int main(int argc, char *argv[]) { 9 | if (argc < 5) { 10 | printf("Usage: partid npart nthread\n"); 11 | return 0; 12 | } 13 | using namespace dmlc; 14 | InputSplit *split = InputSplit::Create(argv[1], 15 | atoi(argv[2]), 16 | atoi(argv[3]), 17 | "text"); 18 | int nthread = atoi(argv[4]); 19 | data::LibSVMParser parser(split, nthread); 20 | double tstart = GetTime(); 21 | size_t bytes_read = 0; 22 | size_t bytes_expect = 10UL << 20UL; 23 | size_t num_ex = 0; 24 | while (parser.Next()) { 25 | bytes_read = parser.BytesRead(); 26 | num_ex += parser.Value().size; 27 | double tdiff = GetTime() - tstart; 28 | if (bytes_read >= bytes_expect) { 29 | printf("%lu examples, %lu MB read, %g MB/sec\n", 30 | num_ex, bytes_read >> 20UL, 31 | (bytes_read >> 20UL) / tdiff); 32 | bytes_expect += 10UL << 20UL; 33 | } 34 | } 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /repo/ps-lite/src/proto/param.proto: -------------------------------------------------------------------------------- 1 | package ps; 2 | 3 | message ParamCall { 4 | // push or pull 5 | optional bool push = 1 [default = true]; 6 | 7 | optional bool dyn_val_size = 4 [default = false]; 8 | // merge operator 9 | // optional AsOp op = 2; 10 | 11 | optional TailKeyIFilter tail_filter = 3; 12 | 13 | // optional bool insert_key = 5; 14 | // optional bool gather = 6; 15 | 16 | // it's a replica request 17 | optional bool replica = 10; 18 | repeated Timestamp backup = 11; 19 | } 20 | 21 | message ParamInitConfig { 22 | enum Type { 23 | ZERO = 1; 24 | CONSTANT = 2; 25 | GAUSSIAN = 3; 26 | FILE = 4; 27 | CLONE = 5; 28 | } 29 | optional Type type = 1 [default = ZERO]; 30 | optional double constant = 2 [default = 1]; 31 | // gaussian random 32 | optional double mean = 3 [default = 0]; 33 | optional double std = 4 [default = 1]; 34 | optional string file_name = 5; 35 | } 36 | 37 | message Timestamp { 38 | required string sender = 1; 39 | required int32 time = 2; 40 | } 41 | 42 | message TailKeyIFilter { 43 | optional bool insert_count = 1; 44 | optional int32 freq_threshold = 2; 45 | optional bool query_value = 3; 46 | optional int32 countmin_n = 4 [default = 1000000]; 47 | optional int32 countmin_k = 5 [default = 2]; 48 | } 49 | -------------------------------------------------------------------------------- /repo/dmlc-core/make/config.mk: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------- 2 | # dmlc-core: the configuration compile script 3 | # 4 | # This is the default configuration setup for 5 | # If you want to change configuration, do the following steps: 6 | # 7 | # - copy this file to the root of dmlc-core folder 8 | # - modify the configuration you want 9 | # - type make or make -j n on each of the folder 10 | #---------------------------------------------------- 11 | 12 | # choice of compiler 13 | export CC = gcc 14 | export CXX = g++ 15 | export MPICXX = mpicxx 16 | 17 | # whether to compile with -fPIC option 18 | # Note: to build shared library(so files), fPIC is required 19 | WITH_FPIC = 1 20 | 21 | # whether use HDFS support during compile 22 | USE_HDFS = 1 23 | 24 | # whether use AWS S3 support during compile 25 | USE_S3 = 0 26 | 27 | # whether use Azure blob support during compile 28 | USE_AZURE = 0 29 | 30 | # path to libjvm.so 31 | LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server 32 | 33 | # whether building unittest (gtest is required) 34 | BUILD_TEST=0 35 | 36 | # path to gtest library (only used when $BUILD_TEST=1) 37 | # there should be an include path in $GTEST_PATH/include and library in $GTEST_PATH/lib 38 | GTEST_PATH= 39 | 40 | # path to third-party dependences such as glog 41 | DEPS_PATH= 42 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/parallel_sort.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file parallel_sort.h 3 | * @date Tue Mar 31 17:01:58 2015 4 | * 5 | * @brief Parallel sort 6 | */ 7 | #pragma once 8 | #include "ps/shared_array.h" 9 | namespace ps { 10 | 11 | namespace { 12 | /// @brief the thread function 13 | template 14 | void ParallelSort(T* data, size_t len, size_t grainsize, const Fn& cmp) { 15 | if (len <= grainsize) { 16 | std::sort(data, data + len, cmp); 17 | } else { 18 | std::thread thr(ParallelSort, data, len/2, grainsize, cmp); 19 | ParallelSort(data + len/2, len - len/2, grainsize, cmp); 20 | thr.join(); 21 | 22 | std::inplace_merge(data, data + len/2, data + len, cmp); 23 | } 24 | } 25 | } // namespace 26 | 27 | 28 | /** 29 | * @brief Parallel Sort 30 | * 31 | * @param arr array 32 | * @param num_threads 33 | * @param cmp the comparision function, such as [](const T& a, const T& b) { 34 | * return a < b; } or an even simplier version: std::less() 35 | */ 36 | template 37 | void ParallelSort(SArray* arr, int num_threads, const Fn& cmp) { 38 | CHECK_GT(num_threads, 0); 39 | size_t grainsize = std::max(arr->size() / num_threads + 5, (size_t)1024*16); 40 | ParallelSort(arr->data(), arr->size(), grainsize, cmp); 41 | } 42 | 43 | } // namespace ps 44 | -------------------------------------------------------------------------------- /repo/ps-lite/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.7) 2 | 3 | project(pslite C CXX) 4 | 5 | list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) 6 | 7 | include(ExternalProject) 8 | set(pslite_LINKER_LIBS "") 9 | 10 | # ---[ Google-gflags 11 | include("cmake/External/gflags.cmake") 12 | include_directories(pslite ${GFLAGS_INCLUDE_DIRS}) 13 | list(APPEND pslite_LINKER_LIBS ${GFLAGS_LIBRARIES}) 14 | 15 | # ---[ Google-glog 16 | include("cmake/External/glog.cmake") 17 | include_directories(pslite ${GLOG_INCLUDE_DIRS}) 18 | list(APPEND pslite_LINKER_LIBS ${GLOG_LIBRARIES}) 19 | 20 | # ---[ Google-protobuf 21 | include(cmake/ProtoBuf.cmake) 22 | 23 | # generate protobuf sources 24 | set(proto_gen_folder "${PROJECT_BINARY_DIR}/include") 25 | file(GLOB proto_files "src/proto/*.proto") 26 | pslite_protobuf_generate_cpp_py(${proto_gen_folder} proto_srcs proto_hdrs proto_python "${PROJECT_SOURCE_DIR}" "src" ${proto_files}) 27 | include_directories(pslite "${PROJECT_BINARY_DIR}/include/") 28 | 29 | FILE(GLOB SOURCE "src/*.cc") 30 | 31 | if(MSVC) 32 | FILE(GLOB getopt_SOURCE "src/windows/getopt.c") 33 | list(APPEND SOURCE ${getopt_SOURCE}) 34 | add_definitions(-DSTATIC_GETOPT) 35 | endif() 36 | 37 | list(APPEND SOURCE ${proto_srcs}) 38 | add_library(pslite ${SOURCE}) 39 | 40 | target_link_libraries(pslite ${pslite_LINKER_LIBS}) -------------------------------------------------------------------------------- /repo/ps-lite/src/filter/delta_key.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "filter/filter.h" 3 | namespace ps { 4 | 5 | /// \brief Used delta encoding: `key[i] = keys[i+1]-key[i]` 6 | class DeltaKeyFilter : public IFilter { 7 | public: 8 | void Encode(Message* msg) { 9 | CHECK_NOTNULL(Find(Filter::DELTA_KEY, msg)); 10 | if (msg->key.empty()) return; 11 | if(msg->task.key_type() == DataType::UINT32) { 12 | msg->key = Encode(msg->key); 13 | } else if (msg->task.key_type() == DataType::UINT64) { 14 | msg->key = Encode(msg->key); 15 | } 16 | } 17 | 18 | void Decode(Message* msg) { 19 | CHECK_NOTNULL(Find(Filter::DELTA_KEY, msg)); 20 | if (msg->key.empty()) return; 21 | if(msg->task.key_type() == DataType::UINT32) { 22 | Decode(SArray(msg->key)); 23 | } else if (msg->task.key_type() == DataType::UINT64) { 24 | Decode(SArray(msg->key)); 25 | } 26 | } 27 | private: 28 | 29 | template 30 | SArray Encode(const SArray& orig) { 31 | SArray key; key.CopyFrom(SArray(orig)); 32 | for (size_t i = key.size(); i > 1; --i) { 33 | key[i-1] -= key[i-2]; 34 | } 35 | return key; 36 | } 37 | 38 | template 39 | void Decode(SArray key) { 40 | for (size_t i = 1; i < key.size(); ++i) { 41 | key[i] += key[i-1]; 42 | } 43 | } 44 | }; 45 | } // namespace ps 46 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/bloom_filter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base/sketch.h" 3 | namespace ps { 4 | 5 | template 6 | class BloomIFilter : public Sketch { 7 | public: 8 | BloomIFilter() { } 9 | BloomIFilter(int m, int k) { resize(m, k); } 10 | ~BloomIFilter() { delete [] data_; } 11 | void resize(int m, int k) { 12 | delete [] data_; 13 | k_ = std::min(64, std::max(1, k)); 14 | m_ = m; 15 | data_size_ = (m / 8) + 1; 16 | data_ = new char[data_size_]; 17 | memset(data_, 0, data_size_ * sizeof(char)); 18 | } 19 | 20 | bool operator[] (K key) const { return query(key); } 21 | bool query(K key) const { 22 | uint32 h = hash(key); 23 | const uint32 delta = (h >> 17) | (h << 15); // Rotate right 17 bits 24 | for (int j = 0; j < k_; ++j) { 25 | uint32 bitpos = h % m_; 26 | if ((data_[bitpos/8] & (1 << (bitpos % 8))) == 0) return false; 27 | h += delta; 28 | } 29 | return true; 30 | } 31 | 32 | void insert(K key) { 33 | uint32 h = hash(key); 34 | const uint32 delta = (h >> 17) | (h << 15); // Rotate right 17 bits 35 | for (int j = 0; j < k_; ++j) { 36 | uint32 bitpos = h % m_; 37 | data_[bitpos/8] |= (1 << (bitpos % 8)); 38 | h += delta; 39 | } 40 | } 41 | 42 | private: 43 | char* data_ = NULL; 44 | int data_size_ = 0; 45 | uint32 m_ = 0; 46 | int k_ = 0; 47 | }; 48 | } // namespace ps 49 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/dmlc_test.mk: -------------------------------------------------------------------------------- 1 | TEST=test/filesys_test test/dataiter_test\ 2 | test/iostream_test test/recordio_test test/split_read_test\ 3 | test/stream_read_test test/split_test test/libsvm_parser_test\ 4 | test/split_repeat_read_test test/strtonum_test\ 5 | test/logging_test test/parameter_test test/registry_test\ 6 | test/csv_parser_test 7 | 8 | test/filesys_test: test/filesys_test.cc src/io/*.h libdmlc.a 9 | test/dataiter_test: test/dataiter_test.cc libdmlc.a 10 | test/iostream_test: test/iostream_test.cc libdmlc.a 11 | test/recordio_test: test/recordio_test.cc libdmlc.a 12 | test/split_read_test: test/split_read_test.cc libdmlc.a 13 | test/split_repeat_read_test: test/split_repeat_read_test.cc libdmlc.a 14 | test/stream_read_test: test/stream_read_test.cc libdmlc.a 15 | test/split_test: test/split_test.cc libdmlc.a 16 | test/libsvm_parser_test: test/libsvm_parser_test.cc src/data/libsvm_parser.h libdmlc.a 17 | test/csv_parser_test: test/csv_parser_test.cc src/data/csv_parser.h libdmlc.a 18 | test/strtonum_test: test/strtonum_test.cc src/data/strtonum.h 19 | test/logging_test: test/logging_test.cc 20 | test/parameter_test: test/parameter_test.cc 21 | test/registry_test: test/registry_test.cc 22 | 23 | $(TEST) : 24 | $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) 25 | 26 | include test/unittest/dmlc_unittest.mk 27 | 28 | ALL_TEST=$(TEST) $(UNITTEST) 29 | ALL_TEST_OBJ=$(UNITTEST_OBJ) 30 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/countmin.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base/sketch.h" 3 | #include 4 | #include "base/shared_array_inl.h" 5 | namespace ps { 6 | 7 | template 8 | class CountMin : public Sketch { 9 | public: 10 | // TODO prefetch to accelerate the memory access 11 | bool empty() { return n_ == 0; } 12 | void clear() { data_.clear(); n_ = 0; } 13 | void resize(int n, int k, V v_max) { 14 | n_ = std::max(n, 64); 15 | data_.resize(n_); 16 | data_.SetZero(); 17 | k_ = std::min(30, std::max(1, k)); 18 | v_max_ = v_max; 19 | } 20 | 21 | void insert(const K& key, const V& count) { 22 | uint32 h = hash(key); 23 | const uint32 delta = (h >> 17) | (h << 15); // Rotate right 17 bits 24 | for (int j = 0; j < k_; ++j) { 25 | V v = data_[h % n_]; 26 | // to avoid overflow 27 | data_[h % n_] = count > v_max_ - v ? v_max_ : v + count; 28 | h += delta; 29 | } 30 | } 31 | 32 | V query(const K& key) const { 33 | V res = v_max_; 34 | uint32 h = hash(key); 35 | const uint32 delta = (h >> 17) | (h << 15); // Rotate right 17 bits 36 | for (int j = 0; j < k_; ++j) { 37 | res = std::min(res, data_[h % n_]); 38 | h += delta; 39 | } 40 | return res; 41 | } 42 | 43 | private: 44 | SArray data_; 45 | int n_ = 0; 46 | int k_ = 1; 47 | V v_max_ = 0; 48 | }; 49 | 50 | } // namespace ps 51 | -------------------------------------------------------------------------------- /repo/ps-lite/guide/network_perf.cc: -------------------------------------------------------------------------------- 1 | #include "ps.h" 2 | #include 3 | #include 4 | 5 | typedef float Val; 6 | 7 | DEFINE_int32(repeat, 1000, "repeat n times"); 8 | DEFINE_int32(kv_pair, 1000, "number of key-value pairs a worker send to server each time."); 9 | DEFINE_string(mode, "online", "online or batch. (TODO)"); 10 | 11 | int CreateServerNode(int argc, char *argv[]) { 12 | ps::OnlineServer server; 13 | return 0; 14 | } 15 | 16 | int WorkerNodeMain(int argc, char *argv[]) { 17 | using namespace ps; 18 | 19 | int n = FLAGS_kv_pair; 20 | auto key = std::make_shared>(); 21 | auto val = std::make_shared>(); 22 | 23 | std::random_device rd; 24 | std::mt19937 gen(rd()); 25 | 26 | std::uniform_int_distribution dis(0, kMaxKey); 27 | key->resize(n); 28 | for (int i = 0; i < n; ++i) (*key)[i] = dis(gen); 29 | std::sort(key->begin(), key->end()); 30 | 31 | std::uniform_real_distribution rdis(-1, 1); 32 | val->resize(n); 33 | for (int i = 0; i < n; ++i) (*val)[i] = rdis(gen); 34 | 35 | std::vector recv_val; 36 | 37 | KVWorker wk; 38 | for (int i = 0; i < FLAGS_repeat; ++i) { 39 | SyncOpts opts; 40 | // opts.AddFilter(Filter::KEY_CACHING); 41 | int ts = wk.ZPush(key, val, opts); 42 | wk.Wait(ts); 43 | 44 | ts = wk.ZPull(key, &recv_val, opts); 45 | wk.Wait(ts); 46 | } 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/stream_read_test.cc: -------------------------------------------------------------------------------- 1 | // test reading speed from a Stream 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char *argv[]) { 8 | if (argc < 3) { 9 | printf("Usage: uri buffersize [skip-proc]\n"); 10 | return 0; 11 | } 12 | int skip_proc = 0; 13 | if (argc > 3) { 14 | skip_proc = atoi(argv[3]); 15 | } 16 | size_t sz = atol(argv[2]); 17 | std::string buffer; buffer.resize(sz); 18 | using namespace dmlc; 19 | Stream *fi = Stream::Create(argv[1], "r", true); 20 | CHECK(fi != NULL) << "cannot open " << argv[1]; 21 | double tstart = GetTime(); 22 | size_t size; 23 | size_t bytes_read = 0; 24 | size_t bytes_expect = 10UL << 20UL; 25 | while ((size = fi->Read(BeginPtr(buffer), sz)) != 0) { 26 | int cnt = 0; 27 | if (skip_proc == 0) { 28 | //#pragma omp parallel for reduction(+:cnt) 29 | for (size_t i = 0; i < size; ++i) { 30 | if (buffer[i] == '\n' || buffer[i] == '\r') { 31 | buffer[i] = '\0'; ++ cnt; 32 | } 33 | } 34 | } 35 | bytes_read += size; 36 | double tdiff = GetTime() - tstart; 37 | if (bytes_read >= bytes_expect) { 38 | printf("%lu MB read, %g MB/sec, cnt=%d\n", 39 | bytes_read >> 20UL, 40 | (bytes_read >> 20UL) / tdiff, cnt); 41 | bytes_expect += 10UL << 20UL; 42 | } 43 | } 44 | delete fi; 45 | return 0; 46 | } 47 | 48 | 49 | -------------------------------------------------------------------------------- /repo/ps-lite/src/system/message.cc: -------------------------------------------------------------------------------- 1 | #include "system/message.h" 2 | namespace ps { 3 | 4 | // Message::Message(const NodeID& dest, int time, int wait_time) 5 | // : recver(dest) { 6 | // task.set_time(time); 7 | // if (wait_time != kInvalidTime) task.add_wait_time(wait_time); 8 | // } 9 | 10 | Filter* Message::add_filter(Filter::Type type) { 11 | auto ptr = task.add_filter(); 12 | ptr->set_type(type); 13 | return ptr; 14 | } 15 | 16 | size_t Message::mem_size() { 17 | size_t nbytes = task.SpaceUsed() + key.MemSize(); 18 | for (const auto& v : value) nbytes += v.MemSize(); 19 | return nbytes; 20 | } 21 | 22 | std::string Message::ShortDebugString() const { 23 | std::stringstream ss; 24 | if (key.size()) ss << "key [" << key.size() << "] "; 25 | if (value.size()) { 26 | ss << "value ["; 27 | for (size_t i = 0; i < value.size(); ++i) { 28 | ss << value[i].size(); 29 | if (i < value.size() - 1) ss << ","; 30 | } 31 | ss << "] "; 32 | } 33 | auto t = task; t.clear_msg(); ss << t.ShortDebugString(); 34 | return ss.str(); 35 | } 36 | 37 | std::string Message::DebugString() const { 38 | std::stringstream ss; 39 | ss << "[message]: " << sender << "=>" << recver 40 | << "[task]:" << task.ShortDebugString() 41 | << "\n[key]:" << key.size() 42 | << "\n[" << value.size() << " value]: "; 43 | for (const auto& x: value) 44 | ss << x.size() << " "; 45 | return ss.str(); 46 | } 47 | 48 | 49 | } // namespace ps 50 | -------------------------------------------------------------------------------- /repo/dmlc-core/include/dmlc/timer.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file timer.h 4 | * \brief cross platform timer for timing 5 | * \author Tianqi Chen 6 | */ 7 | #ifndef DMLC_TIMER_H_ 8 | #define DMLC_TIMER_H_ 9 | 10 | #include "base.h" 11 | 12 | #if DMLC_USE_CXX11 13 | #include 14 | #endif 15 | 16 | #include 17 | #ifdef __MACH__ 18 | #include 19 | #include 20 | #endif 21 | #include "./logging.h" 22 | 23 | namespace dmlc { 24 | /*! 25 | * \brief return time in seconds 26 | */ 27 | inline double GetTime(void) { 28 | #if DMLC_USE_CXX11 29 | return std::chrono::duration( 30 | std::chrono::high_resolution_clock::now().time_since_epoch()).count(); 31 | #elif defined __MACH__ 32 | clock_serv_t cclock; 33 | mach_timespec_t mts; 34 | host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); 35 | CHECK(clock_get_time(cclock, &mts) == 0) << "failed to get time"; 36 | mach_port_deallocate(mach_task_self(), cclock); 37 | return static_cast(mts.tv_sec) + static_cast(mts.tv_nsec) * 1e-9; 38 | #else 39 | #if defined(__unix__) || defined(__linux__) 40 | timespec ts; 41 | CHECK(clock_gettime(CLOCK_REALTIME, &ts) == 0) << "failed to get time"; 42 | return static_cast(ts.tv_sec) + static_cast(ts.tv_nsec) * 1e-9; 43 | #else 44 | return static_cast(time(NULL)); 45 | #endif 46 | #endif 47 | } 48 | } // namespace dmlc 49 | #endif // DMLC_TIMER_H_ 50 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/assign_op.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "proto/assign_op.pb.h" 3 | #include "glog/logging.h" 4 | namespace ps { 5 | // The cost of the switch is minimal. Once "op" is a constant, the compiler will 6 | // do optimization. see test/assign_op_test.cc 7 | 8 | // Returns right op= left. bascial version, works for both floast and intergers 9 | template 10 | T& AssignOp(T& right, const T& left, const AsOp& op) { 11 | switch (op) { 12 | case AsOp::ASSIGN: 13 | right = left; break; 14 | case AsOp::PLUS: 15 | right += left; break; 16 | case AsOp::MINUS: 17 | right -= left; break; 18 | case AsOp::TIMES: 19 | right *= left; break; 20 | case AsOp::DIVIDE: 21 | right /= left; break; 22 | default: 23 | LOG(FATAL) << "use AssignOpI.." ; 24 | } 25 | return right; 26 | } 27 | 28 | // Returns right op= left. for integers 29 | template 30 | T& AssignOpI(T& right, const T& left, const AsOp& op) { 31 | switch (op) { 32 | case AsOp::ASSIGN: 33 | right = left; break; 34 | case AsOp::PLUS: 35 | right += left; break; 36 | case AsOp::MINUS: 37 | right -= left; break; 38 | case AsOp::TIMES: 39 | right *= left; break; 40 | case AsOp::DIVIDE: 41 | right /= left; break; 42 | case AsOp::AND: 43 | right &= left; break; 44 | case AsOp::OR: 45 | right |= left; break; 46 | case AsOp::XOR: 47 | right ^= left; break; 48 | } 49 | return right; 50 | } 51 | 52 | } // namespace ps 53 | -------------------------------------------------------------------------------- /repo/dmlc-core/src/io/azure_filesys.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file azure_filesys.h 4 | * \brief Azure access module 5 | * \author Mu Li 6 | */ 7 | #ifndef DMLC_IO_AZURE_FILESYS_H_ 8 | #define DMLC_IO_AZURE_FILESYS_H_ 9 | 10 | #include 11 | #include 12 | #include "./filesys.h" 13 | 14 | namespace dmlc { 15 | namespace io { 16 | 17 | /*! \brief Microsoft Azure Blob filesystem */ 18 | class AzureFileSystem : public FileSystem { 19 | public: 20 | virtual ~AzureFileSystem() {} 21 | 22 | virtual FileInfo GetPathInfo(const URI &path) { return FileInfo(); } 23 | 24 | virtual void ListDirectory(const URI &path, std::vector *out_list); 25 | 26 | virtual Stream *Open(const URI &path, const char* const flag, bool allow_null) { 27 | return NULL; 28 | } 29 | 30 | virtual SeekStream *OpenForRead(const URI &path, bool allow_null) { 31 | return NULL; 32 | } 33 | 34 | /*! 35 | * \brief get a singleton of AzureFileSystem when needed 36 | * \return a singleton instance 37 | */ 38 | inline static AzureFileSystem *GetInstance(void) { 39 | static AzureFileSystem instance; 40 | return &instance; 41 | } 42 | 43 | private: 44 | /*! \brief constructor */ 45 | AzureFileSystem(); 46 | 47 | /*! \brief Azure storage account name */ 48 | std::string azure_account_; 49 | 50 | /*! \brief Azure storage account key */ 51 | std::string azure_key_; 52 | }; 53 | 54 | } // namespace io 55 | } // namespace dmlc 56 | 57 | #endif // DMLC_IO_AZURE_FILESYS_H_ 58 | -------------------------------------------------------------------------------- /repo/ps-lite/src/proto/data.proto: -------------------------------------------------------------------------------- 1 | package ps; 2 | import "proto/range.proto"; 3 | 4 | message DataConfig { 5 | enum DataFormat { 6 | BIN = 1; 7 | PROTO = 2; 8 | TEXT = 3; 9 | } 10 | required DataFormat format = 1; 11 | 12 | // see https://github.com/mli/parameter_server/wiki/Data 13 | enum TextFormat { 14 | DENSE = 1; 15 | SPARSE = 2; 16 | SPARSE_BINARY = 3; 17 | ADFEA = 4; 18 | LIBSVM = 5; 19 | TERAFEA = 6; 20 | VW = 7; 21 | } 22 | optional TextFormat text = 2; 23 | 24 | // filenames, supports regular expressions 25 | repeated string file = 3; 26 | // files stored in hdfs 27 | optional HDFSConfig hdfs = 5; 28 | // ignore the feature group information 29 | optional bool ignore_feature_group = 6; 30 | // the maximal number of files will be assigned to a worker, -1 means no limit 31 | optional int32 max_num_files_per_worker = 7 [default = -1]; 32 | 33 | // the maximal number of lines will be read from a file, -1 means no limit 34 | optional int32 max_num_lines_per_file = 8 [default = -1]; 35 | 36 | // randomly shuffle the file order 37 | optional bool shuffle = 9 [default = false]; 38 | 39 | // only valid for the binary format 40 | optional PbRange range = 4; 41 | // duplicate the file several times 42 | optional int32 replica = 10 [default = 1]; 43 | } 44 | 45 | message HDFSConfig { 46 | optional string home = 1; // HADOOP_HOME 47 | optional string ugi = 2; // hadoop.job.ugi, format: user,passwd 48 | optional string namenode = 4; // fs.default.name 49 | } 50 | -------------------------------------------------------------------------------- /repo/dmlc-core/cmake/Modules/FindCrypto.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find the Crypto libcrypto library 2 | # Once done this will define 3 | # 4 | # CRYPTO_FOUND - system has the Crypto libcrypto library 5 | # CRYPTO_INCLUDE_DIR - the Crypto libcrypto include directory 6 | # CRYPTO_LIBRARIES - The libraries needed to use Crypto libcrypto 7 | 8 | # Copyright (c) 2009, Matteo Panella, 9 | # Copyright (c) 2006, Alexander Neundorf, 10 | # 11 | # Redistribution and use is allowed according to the terms of the BSD license. 12 | # For details see the accompanying COPYING-CMAKE-SCRIPTS file. 13 | 14 | 15 | IF(CRYPTO_LIBRARIES) 16 | SET(Crypto_FIND_QUIETLY TRUE) 17 | ENDIF(CRYPTO_LIBRARIES) 18 | 19 | IF(SSL_EAY_DEBUG AND SSL_EAY_RELEASE) 20 | SET(LIB_FOUND 1) 21 | ENDIF(SSL_EAY_DEBUG AND SSL_EAY_RELEASE) 22 | 23 | FIND_PATH(CRYPTO_INCLUDE_DIR openssl/crypto.h ) 24 | FIND_LIBRARY(CRYPTO_LIBRARIES NAMES crypto ) 25 | 26 | IF(CRYPTO_INCLUDE_DIR AND CRYPTO_LIBRARIES) 27 | SET(CRYPTO_FOUND TRUE) 28 | ELSE(CRYPTO_INCLUDE_DIR AND CRYPTO_LIBRARIES) 29 | SET(CRYPTO_FOUND FALSE) 30 | ENDIF (CRYPTO_INCLUDE_DIR AND CRYPTO_LIBRARIES) 31 | 32 | IF (CRYPTO_FOUND) 33 | IF (NOT Crypto_FIND_QUIETLY) 34 | MESSAGE(STATUS "Found libcrypto: ${CRYPTO_LIBRARIES}") 35 | ENDIF (NOT Crypto_FIND_QUIETLY) 36 | ELSE (CRYPTO_FOUND) 37 | IF (Crypto_FIND_REQUIRED) 38 | MESSAGE(FATAL_ERROR "Could NOT find libcrypto") 39 | ENDIF (Crypto_FIND_REQUIRED) 40 | ENDIF (CRYPTO_FOUND) 41 | 42 | MARK_AS_ADVANCED(CRYPTO_INCLUDE_DIR CRYPTO_LIBRARIES) -------------------------------------------------------------------------------- /repo/ps-lite/src/windows/unistd.h: -------------------------------------------------------------------------------- 1 | #ifndef _UNISTD_H 2 | #define _UNISTD_H 1 3 | 4 | /* This file intended to serve as a drop-in replacement for 5 | * unistd.h on Windows 6 | * Please add functionality as neeeded 7 | */ 8 | 9 | #include 10 | #include 11 | //#include "getopt.h" /* getopt at: https://gist.github.com/ashelly/7776712 */ 12 | #include /* for getpid() and the exec..() family */ 13 | #include /* for _getcwd() and _chdir() */ 14 | 15 | #define srandom srand 16 | #define random rand 17 | 18 | /* Values for the second argument to access. 19 | These may be OR'd together. */ 20 | #define R_OK 4 /* Test for read permission. */ 21 | #define W_OK 2 /* Test for write permission. */ 22 | //#define X_OK 1 /* execute permission - unsupported in windows*/ 23 | #define F_OK 0 /* Test for existence. */ 24 | 25 | #define access _access 26 | #define dup2 _dup2 27 | #define execve _execve 28 | #define ftruncate _chsize 29 | #define unlink _unlink 30 | #define fileno _fileno 31 | #define getcwd _getcwd 32 | #define chdir _chdir 33 | #define isatty _isatty 34 | #define lseek _lseek 35 | /* read, write, and close are NOT being #defined here, because while there are file handle specific versions for Windows, they probably don't work for sockets. You need to look at your app and consider whether to call e.g. closesocket(). */ 36 | 37 | #define ssize_t int 38 | 39 | #define STDIN_FILENO 0 40 | #define STDOUT_FILENO 1 41 | #define STDERR_FILENO 2 42 | /* should be in some equivalent to */ 43 | 44 | 45 | #endif /* unistd.h */ -------------------------------------------------------------------------------- /repo/ps-lite/src/system/van.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base/common.h" 3 | #include "proto/node.pb.h" 4 | #include "system/message.h" 5 | namespace ps { 6 | 7 | /** 8 | * @brief Van sends (receives) packages to (from) a node The current 9 | * implementation uses ZeroMQ 10 | */ 11 | class Van { 12 | public: 13 | Van() { } 14 | ~Van(); 15 | 16 | void Init(); 17 | 18 | void Disconnect(const Node& node); 19 | bool Connect(const Node& node); 20 | 21 | bool Send(Message* msg, size_t* send_bytes); 22 | bool Recv(Message* msg, size_t* recv_bytes); 23 | 24 | static Node ParseNode(const string& node_str); 25 | 26 | Node& my_node() { return my_node_; } 27 | Node& scheduler() { return scheduler_; }; 28 | 29 | private: 30 | // bind to my port 31 | void Bind(); 32 | 33 | static void FreeData(void *data, void *hint) { 34 | if (hint == NULL) { 35 | delete [] (char*)data; 36 | } else { 37 | delete (SArray*)hint; 38 | } 39 | } 40 | 41 | bool IsScheduler() { return my_node_.role() == Node::SCHEDULER; } 42 | 43 | // for scheduler: monitor the liveness of all other nodes 44 | // for other nodes: monitor the liveness of the scheduler 45 | void Monitor(); 46 | 47 | void *context_ = nullptr; 48 | void *receiver_ = nullptr; 49 | Node my_node_; 50 | Node scheduler_; 51 | std::unordered_map senders_; 52 | 53 | // for connection monitor 54 | std::unordered_map fd_to_nodeid_; 55 | std::mutex fd_to_nodeid_mu_; 56 | std::thread* monitor_thread_; 57 | 58 | DISALLOW_COPY_AND_ASSIGN(Van); 59 | }; 60 | 61 | } // namespace ps 62 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/registry_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace tree { 6 | struct Tree { 7 | virtual void Print() = 0; 8 | virtual ~Tree() {} 9 | }; 10 | 11 | struct BinaryTree : public Tree { 12 | virtual void Print() { 13 | printf("I am binary tree\n"); 14 | } 15 | }; 16 | 17 | struct AVLTree : public Tree { 18 | virtual void Print() { 19 | printf("I am AVL tree\n"); 20 | } 21 | }; 22 | // registry to get the trees 23 | struct TreeFactory 24 | : public dmlc::FunctionRegEntryBase > { 25 | }; 26 | 27 | #define REGISTER_TREE(Name) \ 28 | DMLC_REGISTRY_REGISTER(::tree::TreeFactory, TreeFactory, Name) \ 29 | .set_body([]() { return new Name(); } ) 30 | 31 | DMLC_REGISTRY_FILE_TAG(my_tree); 32 | 33 | } // namespace tree 34 | 35 | 36 | // usually this sits on a seperate file 37 | namespace dmlc { 38 | DMLC_REGISTRY_ENABLE(tree::TreeFactory); 39 | } 40 | 41 | namespace tree { 42 | // Register the trees, can be in seperate files 43 | REGISTER_TREE(BinaryTree) 44 | .describe("This is a binary tree."); 45 | 46 | REGISTER_TREE(AVLTree); 47 | 48 | DMLC_REGISTRY_LINK_TAG(my_tree); 49 | } 50 | 51 | int main(int argc, char *argv[]) { 52 | // construct a binary tree 53 | tree::Tree *binary = dmlc::Registry::Find("BinaryTree")->body(); 54 | binary->Print(); 55 | // construct a binary tree 56 | tree::Tree *avl = dmlc::Registry::Find("AVLTree")->body(); 57 | avl->Print(); 58 | delete binary; delete avl; 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/dmlc_tracker/submit.py: -------------------------------------------------------------------------------- 1 | """Job submission script""" 2 | from __future__ import absolute_import 3 | 4 | import logging 5 | from . import opts 6 | from . import local 7 | from . import mpi 8 | from . import sge 9 | from . import yarn 10 | 11 | def config_logger(args): 12 | """Configure the logger according to the arguments 13 | 14 | Parameters 15 | ---------- 16 | args: argparser.Arguments 17 | The arguments passed in by the user. 18 | """ 19 | fmt = '%(asctime)s %(levelname)s %(message)s' 20 | if args.log_level == 'INFO': 21 | level = logging.INFO 22 | elif args.log_level == 'DEBUG': 23 | level = logging.DEBUG 24 | else: 25 | raise RuntimeError("Unknown logging level %s" % args.log_level) 26 | 27 | if args.log_file is None: 28 | logging.basicConfig(format=fmt, level=level) 29 | else: 30 | logging.basicConfig(format=fmt, level=level, filename=args.log_file) 31 | console = logging.StreamHandler() 32 | console.setFormatter(logging.Formatter(fmt)) 33 | console.setLevel(level) 34 | logging.getLogger('').addHandler(console) 35 | 36 | def main(): 37 | """Main submission function.""" 38 | args = opts.get_opts() 39 | config_logger(args) 40 | 41 | if args.cluster == 'local': 42 | local.submit(args) 43 | elif args.cluster == 'sge': 44 | sge.submit(args) 45 | elif args.cluster == 'yarn': 46 | yarn.submit(args) 47 | elif args.cluster == 'mpi': 48 | mpi.submit(args) 49 | else: 50 | raise RuntimeError('Unknown submission cluster type %s' % args.cluster) 51 | -------------------------------------------------------------------------------- /repo/ps-lite/Makefile: -------------------------------------------------------------------------------- 1 | ifndef config 2 | ifneq ("$(wildcard ./config.mk)","") 3 | config = ./config.mk 4 | else 5 | config = make/config.mk 6 | endif 7 | endif 8 | 9 | include $(config) 10 | include make/ps.mk 11 | 12 | ifndef OPT 13 | OPT = -O3 -ggdb 14 | endif 15 | 16 | ifndef DEPS_PATH 17 | DEPS_PATH = $(shell pwd)/deps 18 | endif 19 | 20 | ifndef PROTOC 21 | PROTOC = ${DEPS_PATH}/bin/protoc 22 | endif 23 | 24 | WARN = -Wall -finline-functions 25 | INCPATH = -I./src -I$(DEPS_PATH)/include $(EXTRA_INCLUDES) 26 | CFLAGS = -std=c++11 -msse2 -fPIC $(WARN) $(OPT) $(INCPATH) $(PS_CFLAGS) $(EXTRA_CFLAGS) 27 | 28 | PS_LIB = build/libps.a 29 | PS_MAIN = build/libps_main.a 30 | 31 | all: deps ps guide 32 | 33 | clean: 34 | rm -rf build 35 | find src -name "*.pb.[ch]*" -delete 36 | 37 | 38 | 39 | ps: $(PS_LIB) $(PS_MAIN) 40 | 41 | ps_srcs = $(wildcard src/*.cc src/*/*.cc) 42 | ps_protos = $(wildcard src/proto/*.proto) 43 | ps_objs = $(patsubst src/%.proto, build/%.pb.o, $(ps_protos)) \ 44 | $(patsubst src/%.cc, build/%.o, $(ps_srcs)) 45 | 46 | build/libps.a: $(patsubst %.proto, %.pb.h, $(ps_protos)) $(ps_objs) 47 | ar crv $@ $(filter %.o, $?) 48 | 49 | build/libps_main.a: build/ps_main.o 50 | ar crv $@ $? 51 | 52 | build/%.o: src/%.cc 53 | @mkdir -p $(@D) 54 | $(CXX) $(INCPATH) -std=c++0x -MM -MT build/$*.o $< >build/$*.d 55 | $(CXX) $(CFLAGS) -c $< -o $@ 56 | 57 | %.pb.cc %.pb.h : %.proto 58 | $(PROTOC) --cpp_out=./src --proto_path=./src $< 59 | 60 | -include build/*/*.d 61 | -include build/*/*/*.d 62 | -include test/ps_test.mk 63 | -include guide/ps_guide.mk 64 | 65 | # deps 66 | include make/deps.mk 67 | 68 | deps: gflags glog protobuf zmq lz4 cityhash 69 | -------------------------------------------------------------------------------- /repo/dmlc-core/scripts/travis/travis_setup_env.sh: -------------------------------------------------------------------------------- 1 | # script to be sourced in travis yml 2 | # setup all enviroment variables 3 | 4 | export PATH=${HOME}/.local/bin:${PATH} 5 | export PATH=${PATH}:${CACHE_PREFIX}/bin 6 | export CACHE_PREFIX=${HOME}/.cache/usr 7 | export CPLUS_INCLUDE_PATH=${CPLUS_INCLUDE_PATH}:${CACHE_PREFIX}/include 8 | export C_INCLUDE_PATH=${C_INCLUDE_PATH}:${CACHE_PREFIX}/include 9 | export LIBRARY_PATH=${LIBRARY_PATH}:${CACHE_PREFIX}/lib 10 | export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CACHE_PREFIX}/lib 11 | export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:${CACHE_PREFIX}/lib 12 | 13 | alias make="make -j4" 14 | 15 | # setup the cache prefix folder 16 | if [ ! -d ${HOME}/.cache ]; then 17 | mkdir ${HOME}/.cache 18 | fi 19 | 20 | if [ ! -d ${CACHE_PREFIX} ]; then 21 | mkdir ${CACHE_PREFIX} 22 | fi 23 | if [ ! -d ${CACHE_PREFIX}/include ]; then 24 | mkdir ${CACHE_PREFIX}/include 25 | fi 26 | if [ ! -d ${CACHE_PREFIX}/lib ]; then 27 | mkdir ${CACHE_PREFIX}/lib 28 | fi 29 | if [ ! -d ${CACHE_PREFIX}/bin ]; then 30 | mkdir ${CACHE_PREFIX}/bin 31 | fi 32 | 33 | # setup CUDA path if NVCC_PREFIX exists 34 | if [ ! -z "$NVCC_PREFIX" ]; then 35 | export PATH=${PATH}:${NVCC_PREFIX}/usr/local/cuda-7.5/bin 36 | export CPLUS_INCLUDE_PATH=${CPLUS_INCLUDE_PATH}:${NVCC_PREFIX}/usr/local/cuda-7.5/include 37 | export C_INCLUDE_PATH=${C_INCLUDE_PATH}:${NVCC_PREFIX}/usr/local/cuda-7.5/include 38 | export LIBRARY_PATH=${LIBRARY_PATH}:${NVCC_PREFIX}/usr/local/cuda-7.5/lib64:${NVCC_PREFIX}/usr/lib/x86_64-linux-gnu 39 | export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${NVCC_PREFIX}/usr/local/cuda-7.5/lib64:${NVCC_PREFIX}/usr/lib/x86_64-linux-gnu 40 | fi 41 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/yarn/run_hdfs_prog.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | this script helps setup classpath env for HDFS, before running program 4 | that links with libhdfs 5 | """ 6 | import glob 7 | import sys 8 | import os 9 | import subprocess 10 | 11 | if len(sys.argv) < 2: 12 | print 'Usage: the command you want to run' 13 | 14 | hadoop_home = os.getenv('HADOOP_HOME') 15 | hdfs_home = os.getenv('HADOOP_HDFS_HOME') 16 | java_home = os.getenv('JAVA_HOME') 17 | if hadoop_home is None: 18 | hadoop_home = os.getenv('HADOOP_PREFIX') 19 | assert hadoop_home is not None, 'need to set HADOOP_HOME' 20 | assert hdfs_home is not None, 'need to set HADOOP_HDFS_HOME' 21 | assert java_home is not None, 'need to set JAVA_HOME' 22 | 23 | (classpath, err) = subprocess.Popen('%s/bin/hadoop classpath' % hadoop_home, 24 | stdout=subprocess.PIPE, shell = True, 25 | env = os.environ).communicate() 26 | cpath = [] 27 | for f in classpath.split(':'): 28 | cpath += glob.glob(f) 29 | 30 | lpath = [] 31 | lpath.append('%s/lib/native' % hdfs_home) 32 | lpath.append('%s/lib' % hdfs_home) 33 | lpath.append('%s/jre/lib/amd64/server' % java_home) 34 | lpath.append('./') 35 | 36 | env = os.environ.copy() 37 | env['CLASSPATH'] = '${CLASSPATH}:' + (':'.join(cpath)) 38 | 39 | # setup hdfs options 40 | if 'DMLC_HDFS_OPTS' in env: 41 | env['LIBHDFS_OPTS'] = env['DMLC_HDFS_OPTS'] 42 | elif 'LIBHDFS_OPTS' not in env: 43 | env['LIBHDFS_OPTS'] = '--Xmx128m' 44 | 45 | env['LD_LIBRARY_PATH'] = '${LD_LIBRARY_PATH}:' + (':'.join(lpath)) 46 | 47 | ret = subprocess.call(args = sys.argv[1:], env = env) 48 | sys.exit(ret) 49 | -------------------------------------------------------------------------------- /repo/ps-lite/cmake/Modules/FindGlog.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find Glog 2 | # 3 | # The following variables are optionally searched for defaults 4 | # GLOG_ROOT_DIR: Base directory where all GLOG components are found 5 | # 6 | # The following are set after configuration is done: 7 | # GLOG_FOUND 8 | # GLOG_INCLUDE_DIRS 9 | # GLOG_LIBRARIES 10 | # GLOG_LIBRARYRARY_DIRS 11 | 12 | include(FindPackageHandleStandardArgs) 13 | 14 | set(GLOG_ROOT_DIR "" CACHE PATH "Folder contains Google glog") 15 | 16 | if(WIN32) 17 | find_path(GLOG_INCLUDE_DIR glog/logging.h 18 | PATHS ${GLOG_ROOT_DIR}/src/windows) 19 | else() 20 | find_path(GLOG_INCLUDE_DIR glog/logging.h 21 | PATHS ${GLOG_ROOT_DIR}) 22 | endif() 23 | 24 | if(MSVC) 25 | find_library(GLOG_LIBRARY_RELEASE libglog_static 26 | PATHS ${GLOG_ROOT_DIR} 27 | PATH_SUFFIXES Release) 28 | 29 | find_library(GLOG_LIBRARY_DEBUG libglog_static 30 | PATHS ${GLOG_ROOT_DIR} 31 | PATH_SUFFIXES Debug) 32 | 33 | set(GLOG_LIBRARY optimized ${GLOG_LIBRARY_RELEASE} debug ${GLOG_LIBRARY_DEBUG}) 34 | else() 35 | find_library(GLOG_LIBRARY glog 36 | PATHS ${GLOG_ROOT_DIR} 37 | PATH_SUFFIXES lib lib64) 38 | endif() 39 | 40 | find_package_handle_standard_args(Glog DEFAULT_MSG GLOG_INCLUDE_DIR GLOG_LIBRARY) 41 | 42 | if(GLOG_FOUND) 43 | set(GLOG_INCLUDE_DIRS ${GLOG_INCLUDE_DIR}) 44 | set(GLOG_LIBRARIES ${GLOG_LIBRARY}) 45 | message(STATUS "Found glog (include: ${GLOG_INCLUDE_DIR}, library: ${GLOG_LIBRARY})") 46 | mark_as_advanced(GLOG_ROOT_DIR GLOG_LIBRARY_RELEASE GLOG_LIBRARY_DEBUG 47 | GLOG_LIBRARY GLOG_INCLUDE_DIR) 48 | endif() 49 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/thread_pool.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "ps/base.h" 13 | 14 | namespace ps { 15 | 16 | class ThreadPool { 17 | public: 18 | explicit ThreadPool(int num_workers) 19 | : num_workers_(num_workers) {} 20 | 21 | /// \brief Guarantee all tasks have been finished if \ref StartWorkers has 22 | /// been called 23 | ~ThreadPool(); 24 | 25 | /// \brief A task type 26 | typedef std::function Task; 27 | 28 | /** 29 | * \brief Start all worker threads. Tasks will not be executed before calling 30 | * this function 31 | */ 32 | void StartWorkers(); 33 | 34 | /** 35 | * \brief Add a task to this pool 36 | */ 37 | void Add(const Task& task); 38 | 39 | 40 | /** 41 | * \brief Block the caller until all tasked added before have been 42 | * finished 43 | */ 44 | void Wait(); 45 | 46 | private: 47 | DISALLOW_COPY_AND_ASSIGN(ThreadPool); 48 | 49 | /// \brief Get next task, for internal use 50 | Task GetNextTask(); 51 | 52 | /// \brief Finished one task, for internal use 53 | void FinishTask(); 54 | 55 | void RunWorker(); 56 | 57 | bool Done() { return tasks_.empty() && num_running_tasks_ == 0; } 58 | 59 | const int num_workers_; 60 | std::list tasks_; 61 | std::mutex mu_; 62 | std::condition_variable worker_cond_, fin_cond_; 63 | 64 | std::vector all_workers_; 65 | 66 | bool waiting_to_finish_ = false; 67 | bool started_ = false; 68 | int num_running_tasks_ = 0; 69 | }; 70 | 71 | } // PS 72 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/producer_consumer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base/threadsafe_limited_queue.h" 3 | #include "base/common.h" 4 | namespace ps { 5 | 6 | template 7 | class ProducerConsumer { 8 | public: 9 | ProducerConsumer() { setCapacity(1000); } 10 | ProducerConsumer(int capacity_in_mb) { setCapacity(capacity_in_mb); } 11 | void setCapacity(int mb) { queue_.setMaxCapacity(mb*1000000); } 12 | 13 | // *func* returns false if finished, true otherwise 14 | void startProducer(const std::function& func) { 15 | producer_thr_ = std::thread([this, func](){ 16 | V entry; 17 | bool done = false; 18 | while (!done) { 19 | size_t size = 0; 20 | done = !func(&entry, &size); 21 | queue_.push(entry, size, done); 22 | } 23 | }); 24 | producer_thr_.detach(); 25 | } 26 | 27 | void startConsumer(const std::function& func) { 28 | consumer_thr_ = std::thread([this, func](){ 29 | V entry; 30 | while (pop(&entry)) { 31 | func(entry); 32 | } 33 | }); 34 | // consumer_thr_.detach(); 35 | } 36 | void waitConsumer() { consumer_thr_.join(); } 37 | 38 | bool pop(V* data) { 39 | return queue_.pop(*data); 40 | } 41 | void push(const V& entry, size_t size = 1, bool finished = false) { 42 | queue_.push(entry, size, finished); 43 | } 44 | void setFinished() { 45 | V empty; 46 | queue_.push(empty, 0, true); 47 | } 48 | private: 49 | DISALLOW_COPY_AND_ASSIGN(ProducerConsumer); 50 | ThreadsafeLimitedQueue queue_; 51 | std::thread producer_thr_; 52 | std::thread consumer_thr_; 53 | }; 54 | } // namespace ps 55 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/README.md: -------------------------------------------------------------------------------- 1 | DMLC Tracker 2 | ============ 3 | Job submission and tracking script for DMLC. To submit your job to cluster. 4 | Use the following command 5 | 6 | ```bash 7 | dmlc-submit --mode [arguments] [command] 8 | ``` 9 | 10 | DMLC job will start executors, each act as role of worker or server. 11 | It works for both parameter server based jobs as well as rabit allreduce jobs. 12 | 13 | Parameters 14 | ---------- 15 | The following is a list of frequently used arguments available in the dmlc-submit command. 16 | To get full list of arguments, you can run 17 | ```bash 18 | dmlc-submit -h 19 | ``` 20 | 21 | - ```--cluster``` string, {'mpi', 'yarn', 'local', 'sge'}, default to ${DMLC_SUBMIT_CLUSTER} 22 | - Job submission mode. 23 | - ```--num-workers``` integer, required 24 | - Number of workers in the job. 25 | - ```--num-servers```` integer, default=0 26 | - Number of servers in the job. 27 | - ```--worker-cores``` integer, default=1 28 | - Number of cores needed to be allocated for worker job. 29 | - ```--server-cores``` integer, default=1 30 | - Number of cores needed to be allocated for server job. 31 | - ```--worker-memory``` string, default='1g' 32 | - Memory needed for server job. 33 | - ```--server-memory``` string, default='1g' 34 | - Memory needed for server job. 35 | - ```--jobname``` string, default=auto specify 36 | - Name of the job. 37 | - ```--queue``` string, default='default' 38 | - The submission queue we should submit the job to. 39 | - ```--log-level``` string, {INFO, DEBUG} 40 | - The logging level. 41 | - ```--log-file``` string, default='None' 42 | - Output log to the specific log file, the log is still printed on stderr. 43 | -------------------------------------------------------------------------------- /repo/ps-lite/src/system/remote_node.cc: -------------------------------------------------------------------------------- 1 | #include "system/remote_node.h" 2 | #include "ps/shared_array.h" 3 | #include "ps/app.h" 4 | namespace ps { 5 | 6 | IFilter* RemoteNode::FindFilterOrCreate(const Filter& conf) { 7 | int id = conf.type(); 8 | auto it = filters.find(id); 9 | if (it == filters.end()) { 10 | filters[id] = IFilter::create(conf); 11 | it = filters.find(id); 12 | } 13 | return it->second; 14 | } 15 | 16 | void RemoteNode::EncodeMessage(Message* msg) { 17 | const auto& tk = msg->task; 18 | for (int i = 0; i < tk.filter_size(); ++i) { 19 | FindFilterOrCreate(tk.filter(i))->Encode(msg); 20 | } 21 | } 22 | void RemoteNode::DecodeMessage(Message* msg) { 23 | const auto& tk = msg->task; 24 | // a reverse order comparing to encode 25 | for (int i = tk.filter_size()-1; i >= 0; --i) { 26 | FindFilterOrCreate(tk.filter(i))->Decode(msg); 27 | } 28 | } 29 | 30 | void RemoteNode::AddGroupNode(RemoteNode* rnode) { 31 | CHECK_NOTNULL(rnode); 32 | // insert s into sub_nodes such as sub_nodes is still ordered 33 | size_t pos = 0; 34 | Range kr(rnode->node.key()); 35 | while (pos < group.size()) { 36 | if (kr.InLeft(Range(group[pos]->node.key()))) { 37 | break; 38 | } 39 | ++ pos; 40 | } 41 | group.insert(group.begin() + pos, rnode); 42 | keys.insert(keys.begin() + pos, kr); 43 | } 44 | 45 | void RemoteNode::RemoveGroupNode(RemoteNode* rnode) { 46 | size_t n = group.size(); 47 | CHECK_EQ(n, keys.size()); 48 | for (size_t i = 0; i < n; ++i) { 49 | if (group[i] == rnode) { 50 | group.erase(group.begin() + i); 51 | keys.erase(keys.begin() + i); 52 | return; 53 | } 54 | } 55 | } 56 | 57 | } // namespace ps 58 | -------------------------------------------------------------------------------- /repo/dmlc-core/scripts/setup_nvcc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Install nvcc and setup environment variable 3 | set -e 4 | if [ $# -lt 1 ]; then 5 | echo "Usage: " 6 | fi 7 | 8 | prefix=$1 9 | 10 | # list of debs to download from nvidia 11 | 12 | files=( \ 13 | "http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-core-7-5_7.5-18_amd64.deb" \ 14 | "http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-cublas-7-5_7.5-18_amd64.deb" \ 15 | "http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-cublas-dev-7-5_7.5-18_amd64.deb" \ 16 | "http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-cudart-7-5_7.5-18_amd64.deb" \ 17 | "http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-cudart-dev-7-5_7.5-18_amd64.deb" \ 18 | "http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-curand-7-5_7.5-18_amd64.deb" \ 19 | "http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-curand-dev-7-5_7.5-18_amd64.deb" \ 20 | "http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-nvrtc-7-5_7.5-18_amd64.deb" \ 21 | "http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-nvrtc-dev-7-5_7.5-18_amd64.deb" \ 22 | "http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-misc-headers-7-5_7.5-18_amd64.deb" \ 23 | "http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/libcuda1-352_352.93-0ubuntu1_amd64.deb" \ 24 | ) 25 | 26 | for item in ${files[*]} 27 | do 28 | wget ${item} 29 | name=$(echo ${item} | tr "/" "\n" | tail -1) 30 | dpkg -x ${name} ${prefix} 31 | done 32 | 33 | -------------------------------------------------------------------------------- /repo/dmlc-core/src/io/line_split.cc: -------------------------------------------------------------------------------- 1 | // Copyright by Contributors 2 | #include 3 | #include 4 | #include 5 | #include "./line_split.h" 6 | 7 | namespace dmlc { 8 | namespace io { 9 | size_t LineSplitter::SeekRecordBegin(Stream *fi) { 10 | char c = '\0'; 11 | size_t nstep = 0; 12 | // search till fist end-of-line 13 | while (true) { 14 | if (fi->Read(&c, sizeof(c)) == 0) return nstep; 15 | nstep += 1; 16 | if (c == '\n' || c == '\r') break; 17 | } 18 | // search until first non-endofline 19 | while (true) { 20 | if (fi->Read(&c, sizeof(c)) == 0) return nstep; 21 | if (c != '\n' && c != '\r') break; 22 | // non-end-of-line should not count 23 | nstep += 1; 24 | } 25 | return nstep; 26 | } 27 | const char* LineSplitter::FindLastRecordBegin(const char *begin, 28 | const char *end) { 29 | CHECK(begin != end); 30 | for (const char *p = end - 1; p != begin; --p) { 31 | if (*p == '\n' || *p == '\r') return p + 1; 32 | } 33 | return begin; 34 | } 35 | 36 | bool LineSplitter::ExtractNextRecord(Blob *out_rec, Chunk *chunk) { 37 | if (chunk->begin == chunk->end) return false; 38 | char *p; 39 | for (p = chunk->begin; p != chunk->end; ++p) { 40 | if (*p == '\n' || *p == '\r') break; 41 | } 42 | for (; p != chunk->end; ++p) { 43 | if (*p != '\n' && *p != '\r') break; 44 | } 45 | // set the string end sign for safety 46 | if (p == chunk->end) { 47 | *p = '\0'; 48 | } else { 49 | *(p - 1) = '\0'; 50 | } 51 | out_rec->dptr = chunk->begin; 52 | out_rec->size = p - chunk->begin; 53 | chunk->begin = p; 54 | return true; 55 | } 56 | 57 | } // namespace io 58 | } // namespace dmlc 59 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/thread_pool.cc: -------------------------------------------------------------------------------- 1 | #include "base/thread_pool.h" 2 | 3 | namespace ps { 4 | 5 | ThreadPool::~ThreadPool() { 6 | if (!started_) return; 7 | 8 | mu_.lock(); 9 | waiting_to_finish_ = true; 10 | mu_.unlock(); 11 | 12 | worker_cond_.notify_all(); 13 | Wait(); 14 | 15 | for (int i = 0; i < num_workers_; ++i) { 16 | all_workers_[i].join(); 17 | } 18 | } 19 | 20 | void ThreadPool::Add(const Task& task) { 21 | std::lock_guard l(mu_); 22 | tasks_.push_back(std::move(task)); 23 | if (started_) worker_cond_.notify_one(); 24 | } 25 | 26 | void ThreadPool::Wait() { 27 | std::unique_lock l(mu_); 28 | fin_cond_.wait(l, [this]{ return Done(); }); 29 | } 30 | 31 | typename ThreadPool::Task ThreadPool::GetNextTask() { 32 | std::unique_lock l(mu_); 33 | for (;;) { 34 | if (!tasks_.empty()) { 35 | auto task = std::move(tasks_.front()); 36 | tasks_.pop_front(); 37 | ++ num_running_tasks_; 38 | return task; 39 | } 40 | if (waiting_to_finish_) { 41 | break; 42 | } else { 43 | worker_cond_.wait(l); 44 | } 45 | } 46 | return Task(); 47 | } 48 | 49 | void ThreadPool::FinishTask() { 50 | std::lock_guard l(mu_); 51 | -- num_running_tasks_; 52 | CHECK_GE(num_running_tasks_, 0); 53 | if (Done()) fin_cond_.notify_all(); 54 | } 55 | 56 | void ThreadPool::RunWorker() { 57 | auto task = GetNextTask(); 58 | while (task) { 59 | task(); 60 | FinishTask(); 61 | task = GetNextTask(); 62 | } 63 | } 64 | 65 | void ThreadPool::StartWorkers() { 66 | started_ = true; 67 | for (int i = 0; i < num_workers_; ++i) { 68 | all_workers_.push_back(std::move(std::thread(&ThreadPool::RunWorker, this))); 69 | } 70 | } 71 | 72 | } // namespace ps 73 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/split_repeat_read_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | int main(int argc, char *argv[]) { 9 | if (argc < 5) { 10 | printf("Usage: partid npart nmax\n"); 11 | return 0; 12 | } 13 | using namespace dmlc; 14 | dmlc::InputSplit *in = dmlc::InputSplit:: 15 | Create(argv[1], 16 | atoi(argv[2]), 17 | atoi(argv[3]), 18 | "text"); 19 | size_t nmax = static_cast(atol(argv[4])); 20 | size_t lcnt = 0; 21 | InputSplit::Blob rec; 22 | std::vector data; 23 | while (in->NextRecord(&rec)) { 24 | data.push_back(std::string((char*)rec.dptr, rec.size)); 25 | ++lcnt; 26 | if (lcnt == nmax) { 27 | LOG(INFO) << "finish loading " << lcnt << " lines"; 28 | break; 29 | } 30 | } 31 | LOG(INFO) << "Call BeforeFirst when lcnt=" 32 | << lcnt << " nmax=" << nmax; 33 | in->BeforeFirst(); 34 | lcnt = 0; 35 | while (in->NextRecord(&rec)) { 36 | std::string dat = std::string((char*)rec.dptr, rec.size); 37 | if (lcnt < nmax) { 38 | CHECK(rec.size == data[lcnt].length()); 39 | CHECK(!memcmp(rec.dptr, BeginPtr(data[lcnt]), rec.size)); 40 | } else { 41 | data.push_back(dat); 42 | } 43 | ++lcnt; 44 | } 45 | LOG(INFO) << "Call BeforeFirst again"; 46 | in->BeforeFirst(); 47 | lcnt = 0; 48 | while (in->NextRecord(&rec)) { 49 | std::string dat = std::string((char*)rec.dptr, rec.size); 50 | CHECK(lcnt < data.size()); 51 | CHECK(rec.size == data[lcnt].length()); 52 | CHECK(!memcmp(rec.dptr, BeginPtr(data[lcnt]), rec.size)); 53 | ++lcnt; 54 | } 55 | delete in; 56 | LOG(INFO) << "All tests passed"; 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /repo/dmlc-core/README.md: -------------------------------------------------------------------------------- 1 | Distributed Machine Learning Common Codebase 2 | ============================================ 3 | 4 | [![Build Status](https://travis-ci.org/dmlc/dmlc-core.svg?branch=master)](https://travis-ci.org/dmlc/dmlc-core) 5 | [![Documentation Status](https://readthedocs.org/projects/dmlc-core/badge/?version=latest)](http://dmlc-core.readthedocs.org/en/latest/) 6 | [![GitHub license](http://dmlc.github.io/img/apache2.svg)](./LICENSE) 7 | 8 | 9 | DMLC-Core is the backbone library to support all DMLC projects, offers the bricks to build efficient and scalable distributed machine learning libraries. 10 | 11 | Developer Channel [![Join the chat at https://gitter.im/dmlc/dmlc-core](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/dmlc/dmlc-core?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 12 | 13 | 14 | What's New 15 | ---------- 16 | * [Note on Parameter Module for Machine Learning](http://dmlc-core.readthedocs.org/en/latest/parameter.html) 17 | 18 | 19 | Contents 20 | -------- 21 | * [Documentation and Tutorials](http://dmlc-core.readthedocs.org/en/latest/) 22 | * [Contributing](#contributing) 23 | 24 | 25 | Contributing 26 | ------------ 27 | 28 | Contributing to dmlc-core is welcomed! dmlc-core follows google's C style guide. If you are interested in contributing, take a look at [feature wishlist](https://github.com/dmlc/dmlc-core/labels/feature%20wishlist) and open a new issue if you like to add something. 29 | 30 | * Use of c++11 is allowed, given that the code is macro guarded with ```DMLC_USE_CXX11``` 31 | * Try to introduce minimum dependency when possible 32 | 33 | ### CheckList before submit code 34 | * Type ```make lint``` and fix all the style problems. 35 | * Type ```make doc``` and fix all the warnings. 36 | 37 | NOTE 38 | ---- 39 | deps: 40 | 41 | libcurl4-openssl-dev 42 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/common.h: -------------------------------------------------------------------------------- 1 | // some utility functions 2 | #pragma once 3 | #include 4 | #include 5 | #ifdef _MSC_VER 6 | #include "../windows/unistd.h" 7 | #else 8 | #include 9 | #endif 10 | #include 11 | #ifdef _MSC_VER 12 | #include 13 | #include 14 | #else 15 | #include 16 | #include 17 | #endif 18 | 19 | 20 | // concurrency 21 | #include 22 | #include 23 | #include 24 | // smart pointers 25 | #include 26 | // stream 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | // containers 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | #include 43 | 44 | 45 | // google staff 46 | #include "gflags/gflags.h" 47 | #include "glog/logging.h" 48 | 49 | // util 50 | #include "base/resource_usage.h" 51 | #include "ps/base.h" 52 | 53 | // base 54 | #include 55 | #include "google/protobuf/text_format.h" 56 | 57 | //const int MAX_NUM_LEN = 1000; 58 | 59 | namespace ps { 60 | 61 | // uint64 is the default key size. We can change it into uint32 to reduce the 62 | // spaces for storing the keys. Howerver, if we want a larger key size, say 63 | // uint128, we need to change proto/range.proto to string type, because uint64 64 | // is the largest integer type supported by protobuf 65 | // typedef uint64 Key; 66 | // static const Key kMaxKey = kuint64max; 67 | 68 | typedef std::string NodeID; 69 | 70 | typedef std::lock_guard Lock; 71 | using std::string; 72 | 73 | #define LL LOG(ERROR) 74 | #define LI LOG(INFO) 75 | 76 | 77 | } // namespace ps 78 | 79 | 80 | // basename(__FILE__) 81 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/filesys_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "../src/io/filesys.h" 6 | 7 | int main(int argc, char *argv[]) { 8 | if (argc < 3) { 9 | printf("Usage: command files\n"); 10 | printf("Possible commands: all path can start with hdfs:// s3:// file:// or no protocol(file:// is used)\n"); 11 | printf("\tcat file\n"); 12 | printf("\tls path\n"); 13 | printf("\tcp file1 file2\n"); 14 | return 0; 15 | } 16 | using namespace dmlc; 17 | using namespace dmlc::io; 18 | if (!strcmp(argv[1], "ls")) { 19 | URI path(argv[2]); 20 | FileSystem *fs = FileSystem::GetInstance(path); 21 | std::vector info; 22 | fs->ListDirectory(path, &info); 23 | for (size_t i = 0; i < info.size(); ++i) { 24 | printf("%s\t%lu\tis_dir=%d\n", info[i].path.name.c_str(), info[i].size, 25 | info[i].type == kDirectory); 26 | } 27 | return 0; 28 | } 29 | if (!strcmp(argv[1], "cat")) { 30 | URI path(argv[2]); 31 | FileSystem *fs = FileSystem::GetInstance(path); 32 | dmlc::Stream *fp = fs->OpenForRead(path); 33 | char buf[32]; 34 | while (true) { 35 | size_t nread = fp->Read(buf, 32); 36 | if (nread == 0) break; 37 | fprintf(stdout, "%s", std::string(buf, nread).c_str()); 38 | } 39 | fflush(stdout); 40 | delete fp; 41 | return 0; 42 | } 43 | if (!strcmp(argv[1], "cp")) { 44 | CHECK(argc >= 4) << "cp requres source and dest"; 45 | Stream *src = Stream::Create(argv[2], "r"); 46 | Stream *dst = Stream::Create(argv[3], "w"); 47 | char buf[32]; 48 | size_t nread; 49 | while ((nread = src->Read(buf, 32)) != 0) { 50 | dst->Write(buf, nread); 51 | } 52 | delete src; delete dst; 53 | printf("copy %s to %s finished\n", argv[2], argv[3]); 54 | return 0; 55 | } 56 | LOG(FATAL) << "unknown command " << argv[1]; 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /repo/ps-lite/src/filter/compressing.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "filter/filter.h" 3 | #include 4 | 5 | #if __LZ4_VERSION_MINOR__ < 7 6 | #define LZ4_compress_default LZ4_compress_limitedOutput 7 | #endif 8 | 9 | namespace ps { 10 | 11 | /// \brief Compress value using LZ4 12 | class CompressingFilter : public IFilter { 13 | public: 14 | void Encode(Message* msg) { 15 | auto conf = Find(Filter::COMPRESSING, msg); 16 | if (!conf) return; 17 | conf->clear_uncompressed_size(); 18 | // if (msg->has_key()) { 19 | // conf->add_uncompressed_size(msg->key.size()); 20 | // msg->key = Compress(msg->key); 21 | // } 22 | for (auto& v : msg->value) { 23 | conf->add_uncompressed_size(v.size()); 24 | v = Compress(v); 25 | } 26 | } 27 | void Decode(Message* msg) { 28 | auto conf = Find(Filter::COMPRESSING, msg); 29 | if (!conf) return; 30 | int has_key = 0; //msg->has_key(); 31 | CHECK_EQ((size_t)conf->uncompressed_size_size(), msg->value.size() + has_key); 32 | 33 | // if (has_key) { 34 | // msg->key = Decompress(msg->key, conf->uncompressed_size(0)); 35 | // } 36 | for (size_t i = 0; i < msg->value.size(); ++i) { 37 | msg->value[i] = Decompress(msg->value[i], conf->uncompressed_size(i+has_key)); 38 | } 39 | } 40 | private: 41 | // based on lz4 42 | SArray Compress(const SArray& src) { 43 | int dst_size = LZ4_compressBound(src.size()); 44 | SArray dst(dst_size); 45 | int actual_size = LZ4_compress_default(src.data(), dst.data(), src.size(), 46 | dst_size); 47 | CHECK_GT(actual_size, 0); 48 | dst.resize(actual_size); 49 | return dst; 50 | } 51 | 52 | SArray Decompress(const SArray& src, size_t orig_size) { 53 | SArray dst(orig_size); 54 | CHECK_EQ((size_t)LZ4_decompress_safe(src.data(), dst.data(), src.size(), orig_size), orig_size); 55 | return dst; 56 | } 57 | }; 58 | 59 | } // namespace ps 60 | -------------------------------------------------------------------------------- /repo/ps-lite/src/system/remote_node.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base/common.h" 3 | #include "proto/task.pb.h" 4 | #include "system/van.h" 5 | #include "system/postoffice.h" 6 | #include "filter/filter.h" 7 | namespace ps { 8 | 9 | // The presentation of a remote node used by Executor. It's not thread 10 | // safe, do not use them directly. 11 | 12 | // Track a request by its timestamp. 13 | class RequestTracker { 14 | public: 15 | RequestTracker() { } 16 | ~RequestTracker() { } 17 | 18 | // Returns true if timestamp "ts" is marked as finished. 19 | bool IsFinished(int ts) { 20 | return ts < 0 || (((int)data_.size() > ts) && data_[ts]); 21 | } 22 | 23 | // Mark timestamp "ts" as finished. 24 | void Finish(int ts) { 25 | CHECK_GE(ts, 0); 26 | CHECK_LT(ts, 100000000); 27 | if ((int)data_.size() <= ts) data_.resize(ts*2+5); 28 | data_[ts] = true; 29 | } 30 | private: 31 | std::vector data_; 32 | }; 33 | 34 | // A remote node 35 | struct RemoteNode { 36 | public: 37 | RemoteNode() { } 38 | ~RemoteNode() { 39 | for (auto f : filters) delete f.second; 40 | } 41 | 42 | void EncodeMessage(Message* msg); 43 | void DecodeMessage(Message* msg); 44 | 45 | Node node; // the remote node 46 | bool alive = true; // aliveness 47 | 48 | // timestamp tracker 49 | RequestTracker sent_req_tracker; 50 | RequestTracker recv_req_tracker; 51 | 52 | // node group info. if "node" is a node group, then "group" contains all node 53 | // pointer in this group. otherwise, group contains "this" 54 | void AddGroupNode(RemoteNode* rnode); 55 | void RemoveGroupNode(RemoteNode* rnode); 56 | std::vector group; 57 | 58 | // keys[i] is the key range of group[i] 59 | std::vector> keys; 60 | 61 | private: 62 | 63 | IFilter* FindFilterOrCreate(const Filter& conf); 64 | // key: filter_type 65 | std::unordered_map filters; 66 | 67 | }; 68 | 69 | 70 | } // namespace ps 71 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/csv_parser_test.cc: -------------------------------------------------------------------------------- 1 | // test reading speed from a InputSplit 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "../src/data/csv_parser.h" 7 | 8 | int main(int argc, char *argv[]) { 9 | if (argc < 5) { 10 | printf("Usage: partid npart nthread [dump csv]\n"); 11 | return 0; 12 | } 13 | FILE *fo = NULL; 14 | if (argc > 5) { 15 | if (!strcmp(argv[5], "stdout")) { 16 | fo = stdout; 17 | } else { 18 | fo = fopen(argv[5], "w"); 19 | } 20 | } 21 | using namespace dmlc; 22 | std::unique_ptr > parser( 23 | dmlc::Parser::Create(argv[1], 24 | atoi(argv[2]), 25 | atoi(argv[3]), 26 | "csv")); 27 | double tstart = GetTime(); 28 | size_t bytes_read = 0; 29 | size_t bytes_expect = 10UL << 20UL; 30 | size_t num_ex = 0; 31 | while (parser->Next()); 32 | parser->BeforeFirst(); 33 | while (parser->Next()) { 34 | bytes_read = parser->BytesRead(); 35 | num_ex += parser->Value().size; 36 | if (fo != NULL){ 37 | const dmlc::RowBlock& batch = parser->Value(); 38 | for (size_t i = 0; i < batch.size; ++i) { 39 | for (size_t j = 0; j < batch[i].length; ++j) { 40 | fprintf(fo, "%g", batch[i].value[j]); 41 | if (j + 1 == batch[i].length) { 42 | fprintf(fo, "\n"); 43 | } else { 44 | fprintf(fo, ","); 45 | } 46 | } 47 | } 48 | } 49 | double tdiff = GetTime() - tstart; 50 | if (bytes_read >= bytes_expect) { 51 | printf("%lu examples, %lu MB read, %g MB/sec\n", 52 | num_ex, bytes_read >> 20UL, 53 | (bytes_read >> 20UL) / tdiff); 54 | bytes_expect += 10UL << 20UL; 55 | } 56 | } 57 | if (fo != NULL && fo != stdout) { 58 | fclose(fo); 59 | } 60 | return 0; 61 | } 62 | -------------------------------------------------------------------------------- /src/load_data.h: -------------------------------------------------------------------------------- 1 | #ifndef LOAD_DATA_H_ 2 | #define LOAD_DATA_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace dmlc{ 10 | namespace linear{ 11 | 12 | struct sparse_feature{ 13 | long int idx; 14 | int val; 15 | }; 16 | 17 | class Load_Data { 18 | public: 19 | std::ifstream fin_; 20 | std::vector > fea_matrix; 21 | std::vector key_val; 22 | sparse_feature sf; 23 | std::vector label; 24 | std::string line; 25 | int y, value, nchar; 26 | long int index; 27 | std::set feaIdx; 28 | std::set::iterator setIter; 29 | 30 | Load_Data(const char * file_name){ 31 | fin_.open(file_name, std::ios::in); 32 | if(!fin_.is_open()) { 33 | std::cout << " open file error: " << file_name << std::endl; 34 | exit(1); 35 | } 36 | } 37 | 38 | ~Load_Data(){ 39 | fin_.close(); 40 | } 41 | 42 | void load_data_minibatch(const int num){ 43 | fea_matrix.clear(); 44 | //std::cout<<"load batch data start..."<= 1){ 51 | pline += nchar; 52 | label.push_back(y); 53 | while(sscanf(pline, "%ld:%d%n", &index, &value, &nchar) >= 2){ 54 | pline += nchar; 55 | sf.idx = index; 56 | setIter = feaIdx.find(index); 57 | if(setIter == feaIdx.end()) feaIdx.insert(index); 58 | sf.val = value; 59 | key_val.push_back(sf); 60 | } 61 | } 62 | fea_matrix.push_back(key_val); 63 | }//end for 64 | } 65 | 66 | }; 67 | 68 | }//end linear 69 | }//end dmlc 70 | #endif 71 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/threadsafe_queue.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | namespace ps { 6 | 7 | // TODO the code style is inconsistent with others 8 | template class ThreadsafeQueue { 9 | public: 10 | ThreadsafeQueue() {} 11 | 12 | void push(T new_value) { 13 | std::lock_guard lk(mut); 14 | data_queue.push(std::move(new_value)); 15 | data_cond.notify_all(); 16 | } 17 | 18 | void wait_and_pop(T& value) { 19 | std::unique_lock lk(mut); 20 | data_cond.wait(lk, [this]{return !data_queue.empty();}); 21 | value = std::move(data_queue.front()); 22 | data_queue.pop(); 23 | } 24 | 25 | bool try_pop(T& value) { 26 | std::lock_guard lk(mut); 27 | if(data_queue.empty()) 28 | return false; 29 | value=std::move(data_queue.front()); 30 | data_queue.pop(); 31 | return true; 32 | } 33 | 34 | size_t size() const { 35 | std::lock_guard lk(mut); 36 | return data_queue.size(); 37 | } 38 | 39 | bool empty() const { 40 | std::lock_guard lk(mut); 41 | return data_queue.empty(); 42 | } 43 | 44 | private: 45 | mutable std::mutex mut; 46 | std::queue data_queue; 47 | std::condition_variable data_cond; 48 | }; 49 | 50 | } // namespace ps 51 | 52 | // std::shared_ptr wait_and_pop() { 53 | // std::unique_lock lk(mut); 54 | // data_cond.wait(lk, [this]{return !data_queue.empty();}); 55 | // std::shared_ptr res( 56 | // std::make_shared(std::move(data_queue.front()))); 57 | // data_queue.pop(); 58 | // return res; 59 | // } 60 | 61 | 62 | // std::shared_ptr try_pop() { 63 | // std::lock_guard lk(mut); 64 | // if(data_queue.empty()) 65 | // return std::shared_ptr(); 66 | // std::shared_ptr res( 67 | // std::make_shared(std::move(data_queue.front()))); 68 | // data_queue.pop(); 69 | // return res; 70 | // } 71 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/unittest/unittest_threaditer.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace dmlc; 8 | namespace producer_test { 9 | inline void delay(int sleep) { 10 | if (sleep < 0) { 11 | int d = rand() % (-sleep); 12 | std::this_thread::sleep_for(std::chrono::milliseconds(d)); 13 | } else { 14 | std::this_thread::sleep_for(std::chrono::milliseconds(sleep)); 15 | } 16 | } 17 | 18 | // int was only used as example, in real life 19 | // use big data blob 20 | struct IntProducer : public ThreadedIter::Producer { 21 | int counter; 22 | int maxcap; 23 | int sleep; 24 | IntProducer(int maxcap, int sleep) 25 | : counter(0), maxcap(maxcap), sleep(sleep) {} 26 | virtual void BeforeFirst(void) { 27 | counter = 0; 28 | } 29 | virtual bool Next(int **inout_dptr) { 30 | if (counter == maxcap) return false; 31 | // allocate space if not exist 32 | if (*inout_dptr == NULL) { 33 | *inout_dptr = new int(); 34 | } 35 | delay(sleep); 36 | **inout_dptr = counter++; 37 | return true; 38 | } 39 | }; 40 | 41 | } 42 | 43 | TEST(ThreadedIter, basics) { 44 | using namespace producer_test; 45 | ThreadedIter iter; 46 | iter.set_max_capacity(1); 47 | IntProducer prod(10, 100); 48 | int d = 100; 49 | iter.Init(&prod); 50 | int counter = 0; 51 | while (iter.Next()) { 52 | CHECK(counter == iter.Value()); 53 | delay(d); 54 | LOG(INFO) << counter; 55 | ++counter; 56 | } 57 | CHECK(!iter.Next()); 58 | iter.BeforeFirst(); 59 | iter.BeforeFirst(); 60 | iter.BeforeFirst(); 61 | iter.Next(); 62 | iter.BeforeFirst(); 63 | iter.BeforeFirst(); 64 | counter = 0; 65 | int *value; 66 | while (iter.Next(&value)) { 67 | LOG(INFO) << *value; 68 | CHECK(counter == *value); 69 | ++counter; 70 | iter.Recycle(&value); 71 | delay(d); 72 | CHECK(value == NULL); 73 | } 74 | LOG(INFO) << "finish"; 75 | } 76 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/dmlc_tracker/sge.py: -------------------------------------------------------------------------------- 1 | """Submit jobs to Sun Grid Engine.""" 2 | # pylint: disable=invalid-name 3 | from __future__ import absolute_import 4 | 5 | import os 6 | import subprocess 7 | from . import tracker 8 | 9 | def submit(args): 10 | """Job submission script for SGE.""" 11 | if args.jobname is None: 12 | args.jobname = ('dmlc%d.' % args.num_workers) + args.command[0].split('/')[-1] 13 | if args.sge_log_dir is None: 14 | args.sge_log_dir = args.jobname + '.log' 15 | 16 | if os.path.exists(args.sge_log_dir): 17 | if not os.path.isdir(args.sge_log_dir): 18 | raise RuntimeError('specified --sge-log-dir %s is not a dir' % args.sge_log_dir) 19 | else: 20 | os.mkdir(args.sge_log_dir) 21 | 22 | runscript = '%s/rundmlc.sh' % args.logdir 23 | fo = open(runscript, 'w') 24 | fo.write('source ~/.bashrc\n') 25 | fo.write('export DMLC_TASK_ID=${SGE_TASK_ID}\n') 26 | fo.write('export DMLC_JOB_CLUSTER=sge\n') 27 | fo.write('\"$@\"\n') 28 | fo.close() 29 | 30 | def sge_submit(nworker, nserver, pass_envs): 31 | """Internal submission function.""" 32 | env_arg = ','.join(['%s=\"%s\"' % (k, str(v)) for k, v in pass_envs.items()]) 33 | cmd = 'qsub -cwd -t 1-%d -S /bin/bash' % (nworker + nserver) 34 | if args.queue != 'default': 35 | cmd += '-q %s' % args.queue 36 | cmd += ' -N %s ' % args.jobname 37 | cmd += ' -e %s -o %s' % (args.logdir, args.logdir) 38 | cmd += ' -pe orte %d' % (args.vcores) 39 | cmd += ' -v %s,PATH=${PATH}:.' % env_arg 40 | cmd += ' %s %s' % (runscript, ' '.join(args.command)) 41 | print(cmd) 42 | subprocess.check_call(cmd, shell=True) 43 | print('Waiting for the jobs to get up...') 44 | 45 | # call submit, with nslave, the commands to run each job and submit function 46 | tracker.submit(args.num_workers, args.num_servers, 47 | fun_submit=sge_submit, 48 | pscmd=' '.join(args.command)) 49 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/unittest/unittest_any.cc: -------------------------------------------------------------------------------- 1 | // Copyright by Contributors 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | TEST(Any, basics) { 13 | std::unordered_map dict; 14 | dict["1"] = 1; 15 | dict["vec"] = std::vector{1,2,3}; 16 | dict["shapex"] = std::string("xtyz"); 17 | std::unordered_map dict2(std::move(dict)); 18 | dmlc::get(dict2["1"]) += 1; 19 | 20 | CHECK_EQ(dmlc::get(dict2["1"]), 2); 21 | CHECK_EQ(dmlc::get >(dict2["vec"])[1], 2); 22 | } 23 | 24 | TEST(Any, cover) { 25 | dmlc::any a = std::string("abc"); 26 | dmlc::any b = 1; 27 | 28 | CHECK_EQ(dmlc::get(a), "abc"); 29 | a = std::move(b); 30 | CHECK(b.empty()); 31 | CHECK_EQ(dmlc::get(a), 1); 32 | 33 | std::shared_ptr x = std::make_shared(10); 34 | { 35 | dmlc::any aa(x); 36 | CHECK_EQ(*dmlc::get >(aa), 10); 37 | } 38 | // aa must be destructed. 39 | CHECK(x.unique()); 40 | } 41 | 42 | DMLC_JSON_ENABLE_ANY(std::vector, IntVector); 43 | DMLC_JSON_ENABLE_ANY(int, Int); 44 | 45 | TEST(Any, json) { 46 | std::unordered_map x; 47 | x["vec"] = std::vector{1, 2, 3}; 48 | x["int"] = 300; 49 | 50 | std::ostringstream os; 51 | { 52 | std::unordered_map temp(x); 53 | dmlc::JSONWriter writer(&os); 54 | writer.Write(temp); 55 | temp.clear(); 56 | } 57 | std::string json = os.str(); 58 | LOG(INFO) << json; 59 | std::istringstream is(json); 60 | dmlc::JSONReader reader(&is); 61 | std::unordered_map copy_data; 62 | reader.Read(©_data); 63 | 64 | ASSERT_EQ(dmlc::get >(x["vec"]), 65 | dmlc::get >(copy_data["vec"])); 66 | ASSERT_EQ(dmlc::get(x["int"]), 67 | dmlc::get(copy_data["int"])); 68 | } 69 | -------------------------------------------------------------------------------- /repo/ps-lite/src/system/network_usage.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "proto/node.pb.h" 3 | #include 4 | #include "base/resource_usage.h" 5 | namespace ps { 6 | 7 | /** 8 | * @brief Monitor network usage 9 | */ 10 | class NetworkUsage { 11 | public: 12 | NetworkUsage() { start_ = tic(); } 13 | ~NetworkUsage() { 14 | Display(); 15 | } 16 | 17 | void AddMyNode(const Node& node) { 18 | my_id_ = node.id(); 19 | my_host_ = node.hostname(); 20 | } 21 | void AddNode(const Node& node) { 22 | auto host = node.hostname(); 23 | CHECK(!host.empty()); 24 | CHECK(!my_host_.empty()); 25 | if (host == my_host_) { 26 | // LL << host << " " << my_host_; 27 | local_machine_.insert(node.id()); 28 | } 29 | } 30 | 31 | inline void IncrSend(const NodeID& recver, size_t bytes) { 32 | if (local_machine_.find(recver) != local_machine_.end()) { 33 | sent_to_local_ += bytes; 34 | } 35 | sent_ += bytes; 36 | } 37 | inline void IncrRecv(const NodeID& sender, size_t bytes) { 38 | if (local_machine_.find(sender) != local_machine_.end()) { 39 | received_from_local_ += bytes; 40 | } 41 | received_ += bytes; 42 | } 43 | 44 | private: 45 | void Display() { 46 | double time = toc(start_); 47 | auto gb = [](size_t x) { return x / 1e9; }; 48 | LOG(INFO) << my_id_ << ": sent " << gb(sent_) 49 | << " (local " << gb(sent_to_local_) << ") Gbyte, avg. speed " 50 | << gb(sent_) / time * 1000 << " Mbyte/sec"; 51 | 52 | LOG(INFO) << my_id_ << ": received " << gb(received_) 53 | << " (local " << gb(received_from_local_) << ") Gbyte, avg. speed " 54 | << gb(received_) / time * 1000 << " Mbyte/sec"; 55 | } 56 | NodeID my_id_; 57 | std::string my_host_; 58 | std::unordered_set local_machine_; 59 | system_clock::time_point start_; 60 | size_t sent_to_local_ = 0; 61 | size_t sent_ = 0; 62 | size_t received_from_local_ = 0; 63 | size_t received_ = 0; 64 | 65 | }; 66 | 67 | } // namespace ps 68 | -------------------------------------------------------------------------------- /repo/dmlc-core/include/dmlc/thread_local.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file thread_local.h 4 | * \brief Portable thread local storage. 5 | */ 6 | #ifndef DMLC_THREAD_LOCAL_H_ 7 | #define DMLC_THREAD_LOCAL_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | namespace dmlc { 14 | 15 | // macro hanlding for threadlocal variables 16 | #ifdef __GNUC__ 17 | #define MX_TREAD_LOCAL __thread 18 | #elif __STDC_VERSION__ >= 201112L 19 | #define MX_TREAD_LOCAL _Thread_local 20 | #elif defined(_MSC_VER) 21 | #define MX_TREAD_LOCAL __declspec(thread) 22 | #endif 23 | 24 | #ifndef MX_TREAD_LOCAL 25 | #message("Warning: Threadlocal is not enabled"); 26 | #endif 27 | 28 | /*! 29 | * \brief A threadlocal store to store threadlocal variables. 30 | * Will return a thread local singleton of type T 31 | * \tparam T the type we like to store 32 | */ 33 | template 34 | class ThreadLocalStore { 35 | public: 36 | /*! \return get a thread local singleton */ 37 | static T* Get() { 38 | static MX_TREAD_LOCAL T* ptr = nullptr; 39 | if (ptr == nullptr) { 40 | ptr = new T(); 41 | Singleton()->RegisterDelete(ptr); 42 | } 43 | return ptr; 44 | } 45 | 46 | private: 47 | /*! \brief constructor */ 48 | ThreadLocalStore() {} 49 | /*! \brief destructor */ 50 | ~ThreadLocalStore() { 51 | for (size_t i = 0; i < data_.size(); ++i) { 52 | delete data_[i]; 53 | } 54 | } 55 | /*! \return singleton of the store */ 56 | static ThreadLocalStore *Singleton() { 57 | static ThreadLocalStore inst; 58 | return &inst; 59 | } 60 | /*! 61 | * \brief register str for internal deletion 62 | * \param str the string pointer 63 | */ 64 | void RegisterDelete(T *str) { 65 | std::unique_lock lock(mutex_); 66 | data_.push_back(str); 67 | lock.unlock(); 68 | } 69 | /*! \brief internal mutex */ 70 | std::mutex mutex_; 71 | /*!\brief internal data */ 72 | std::vector data_; 73 | }; 74 | 75 | } // namespace dmlc 76 | 77 | #endif // DMLC_THREAD_LOCAL_H_ 78 | -------------------------------------------------------------------------------- /repo/dmlc-core/make/dmlc.mk: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------------------------------------------- 2 | # mshadow configuration script 3 | # 4 | # include dmlc.mk after the variables are set 5 | # 6 | # Add DMLC_CFLAGS to the compile flags 7 | # Add DMLC_LDFLAGS to the linker flags 8 | #---------------------------------------------------------------------------------------- 9 | ifndef LIBJVM 10 | LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server 11 | endif 12 | 13 | ifneq ($(USE_OPENMP), 0) 14 | DMLC_CFLAGS += -fopenmp 15 | DMLC_LDFLAGS += -fopenmp 16 | endif 17 | 18 | # Mac OS X does not support "-lrt" flag 19 | ifeq ($(OS), Windows_NT) 20 | UNAME=Windows 21 | else 22 | UNAME=$(shell uname) 23 | endif 24 | 25 | ifeq ($(UNAME), Linux) 26 | DMLC_LDFLAGS += -lrt 27 | endif 28 | 29 | # handle fpic options 30 | ifndef WITH_FPIC 31 | WITH_FPIC = 1 32 | endif 33 | 34 | ifeq ($(WITH_FPIC), 1) 35 | DMLC_CFLAGS += -fPIC 36 | endif 37 | 38 | # Using default hadoop_home 39 | ifndef HADOOP_HDFS_HOME 40 | HADOOP_HDFS_HOME=$(HADOOP_HOME) 41 | endif 42 | 43 | ifeq ($(USE_HDFS),1) 44 | ifndef HDFS_INC_PATH 45 | HDFS_INC_PATH=$(HADOOP_HDFS_HOME)/include 46 | endif 47 | ifndef HDFS_LIB_PATH 48 | HDFS_LIB_PATH=$(HADOOP_HDFS_HOME)/lib/native 49 | endif 50 | 51 | DMLC_CFLAGS+= -DDMLC_USE_HDFS=1 -I$(HDFS_INC_PATH) -I$(JAVA_HOME)/include 52 | 53 | ifneq ("$(wildcard $(HDFS_LIB_PATH)/libhdfs.so)","") 54 | DMLC_LDFLAGS+= -L$(HDFS_LIB_PATH) -lhdfs 55 | else 56 | DMLC_LDFLAGS+= $(HDFS_LIB_PATH)/libhdfs.a 57 | endif 58 | DMLC_LDFLAGS += -L$(LIBJVM) -ljvm -Wl,-rpath=$(LIBJVM) 59 | else 60 | DMLC_CFLAGS+= -DDMLC_USE_HDFS=0 61 | endif 62 | 63 | # setup S3 64 | ifeq ($(USE_S3),1) 65 | DMLC_CFLAGS+= -DDMLC_USE_S3=1 66 | DMLC_LDFLAGS+= -lcurl -lssl -lcrypto 67 | else 68 | DMLC_CFLAGS+= -DDMLC_USE_S3=0 69 | endif 70 | 71 | ifeq ($(USE_GLOG), 1) 72 | DMLC_CFLAGS += -DDMLC_USE_GLOG=1 73 | DMLC_LDFLAGS += -lglog 74 | endif 75 | 76 | ifeq ($(USE_AZURE),1) 77 | DMLC_CFLAGS+= -DDMLC_USE_AZURE=1 78 | DMLC_LDFLAGS+= -lazurestorage 79 | else 80 | DMLC_CFLAGS+= -DDMLC_USE_AZURE=0 81 | endif 82 | -------------------------------------------------------------------------------- /repo/dmlc-core/example/parameter.cc: -------------------------------------------------------------------------------- 1 | // This is an example program showing usage of parameter module 2 | // Build, on root folder, type 3 | // 4 | // make example 5 | // 6 | // Example usage: 7 | // 8 | // example/parameter num_hidden=100 name=aaa activation=relu 9 | // 10 | 11 | #include 12 | 13 | struct MyParam : public dmlc::Parameter { 14 | float learning_rate; 15 | int num_hidden; 16 | int activation; 17 | std::string name; 18 | // declare parameters in header file 19 | DMLC_DECLARE_PARAMETER(MyParam) { 20 | DMLC_DECLARE_FIELD(num_hidden).set_range(0, 1000) 21 | .describe("Number of hidden unit in the fully connected layer."); 22 | DMLC_DECLARE_FIELD(learning_rate).set_default(0.01f) 23 | .describe("Learning rate of SGD optimization."); 24 | DMLC_DECLARE_FIELD(activation).add_enum("relu", 1).add_enum("sigmoid", 2) 25 | .describe("Activation function type."); 26 | DMLC_DECLARE_FIELD(name).set_default("mnet") 27 | .describe("Name of the net."); 28 | 29 | // user can also set nhidden besides num_hidden 30 | DMLC_DECLARE_ALIAS(num_hidden, nhidden); 31 | DMLC_DECLARE_ALIAS(activation, act); 32 | } 33 | }; 34 | 35 | // register it in cc file 36 | DMLC_REGISTER_PARAMETER(MyParam); 37 | 38 | 39 | int main(int argc, char *argv[]) { 40 | if (argc == 1) { 41 | printf("Usage: [key=value] ...\n"); 42 | return 0; 43 | } 44 | 45 | MyParam param; 46 | std::map kwargs; 47 | for (int i = 0; i < argc; ++i) { 48 | char name[256], val[256]; 49 | if (sscanf(argv[i], "%[^=]=%[^\n]", name, val) == 2) { 50 | kwargs[name] = val; 51 | } 52 | } 53 | printf("Docstring\n---------\n%s", MyParam::__DOC__().c_str()); 54 | 55 | printf("start to set parameters ...\n"); 56 | param.Init(kwargs); 57 | printf("-----\n"); 58 | printf("param.num_hidden=%d\n", param.num_hidden); 59 | printf("param.learning_rate=%f\n", param.learning_rate); 60 | printf("param.name=%s\n", param.name.c_str()); 61 | printf("param.activation=%d\n", param.activation); 62 | return 0; 63 | } 64 | 65 | -------------------------------------------------------------------------------- /repo/ps-lite/make/deps.mk: -------------------------------------------------------------------------------- 1 | # Install dependencies 2 | 3 | URL=https://raw.githubusercontent.com/mli/deps/master/build 4 | 5 | # gflags 6 | 7 | ${DEPS_PATH}/include/google/gflags.h: 8 | $(eval FILE=gflags-2.0-no-svn-files.tar.gz) 9 | $(eval DIR=gflags-2.0) 10 | tar -zxf $(FILE) 11 | cd $(DIR) && ./configure -prefix=$(DEPS_PATH) && $(MAKE) && $(MAKE) install 12 | rm -rf $(FILE) $(DIR) 13 | 14 | gflags: | ${DEPS_PATH}/include/google/gflags.h 15 | 16 | # glog 17 | 18 | ${DEPS_PATH}/include/glog/logging.h: | ${DEPS_PATH}/include/google/gflags.h 19 | $(eval FILE=v0.3.4.tar.gz) 20 | $(eval DIR=glog-0.3.4) 21 | tar -zxf $(FILE) 22 | cd $(DIR) && ./configure -prefix=$(DEPS_PATH) --with-gflags=$(DEPS_PATH) && $(MAKE) && $(MAKE) install 23 | rm -rf $(FILE) $(DIR) 24 | 25 | glog: | ${DEPS_PATH}/include/glog/logging.h 26 | 27 | # protobuf 28 | 29 | ${DEPS_PATH}/include/google/protobuf/message.h: 30 | $(eval FILE=protobuf-2.5.0.tar.gz) 31 | $(eval DIR=protobuf-2.5.0) 32 | tar -zxf $(FILE) 33 | cd $(DIR) && ./configure -prefix=$(DEPS_PATH) && $(MAKE) && $(MAKE) install 34 | rm -rf $(FILE) $(DIR) 35 | 36 | protobuf: | ${DEPS_PATH}/include/google/protobuf/message.h 37 | 38 | # zmq 39 | 40 | ${DEPS_PATH}/include/zmq.h: 41 | $(eval FILE=zeromq-4.1.2.tar.gz) 42 | $(eval DIR=zeromq-4.1.2) 43 | tar -zxf $(FILE) 44 | cd $(DIR) && ./configure -prefix=$(DEPS_PATH) --with-libsodium=no --with-libgssapi_krb5=no && $(MAKE) && $(MAKE) install 45 | rm -rf $(FILE) $(DIR) 46 | 47 | zmq: | ${DEPS_PATH}/include/zmq.h 48 | 49 | # lz4 50 | 51 | ${DEPS_PATH}/include/lz4.h: 52 | $(eval FILE=lz4-r129.tar.gz) 53 | $(eval DIR=lz4-r129) 54 | tar -zxf $(FILE) 55 | cd $(DIR) && $(MAKE) && PREFIX=$(DEPS_PATH) $(MAKE) install 56 | rm -rf $(FILE) $(DIR) 57 | 58 | lz4: | ${DEPS_PATH}/include/lz4.h 59 | 60 | # cityhash 61 | 62 | ${DEPS_PATH}/include/city.h: 63 | $(eval FILE=cityhash-1.1.1.tar.gz) 64 | $(eval DIR=cityhash-1.1.1) 65 | tar -zxf $(FILE) 66 | cd $(DIR) && ./configure -prefix=$(DEPS_PATH) --enable-sse4.2 && $(MAKE) CXXFLAGS="-g -O3 -msse4.2" && $(MAKE) install 67 | rm -rf $(FILE) $(DIR) 68 | 69 | cityhash: | ${DEPS_PATH}/include/city.h 70 | -------------------------------------------------------------------------------- /repo/ps-lite/src/ps/node_info.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "ps/app.h" 3 | namespace ps { 4 | 5 | DECLARE_int32(num_workers); 6 | DECLARE_int32(num_servers); 7 | 8 | /// \brief Queries runtime info about the node 9 | class NodeInfo { 10 | public: 11 | 12 | /// \brief Returns the rank of this node in its group, which is in {0, ..., 13 | /// \ref RankSize - 1} . 14 | /// 15 | /// Each node has an unique rank in its group (e.g. worker group or server 16 | /// group), which is a continuous integer starting from 0. 17 | static inline int MyRank() { return MyNode().rank(); } 18 | 19 | /// \brief Returns the group size 20 | static inline int RankSize() { 21 | return IsWorker() ? NumWorkers() : (IsServer() ? NumServers() : 1); 22 | } 23 | 24 | /// \brief Returns the number of worker nodes 25 | static inline int NumWorkers() { return FLAGS_num_workers; } 26 | 27 | /// \brief Returns the number of server nodes 28 | static inline int NumServers() { return FLAGS_num_servers; } 29 | 30 | /// \brief Returns true if this node is a worker node 31 | static bool IsWorker() { return MyNode().role() == Node::WORKER; } 32 | 33 | /// \brief Returns true if this node is a server node. 34 | static inline int IsServer() { return MyNode().role() == Node::SERVER; } 35 | 36 | /// \brief Returns true if this node is a scheduler node. 37 | static inline int IsScheduler() { return MyNode().role() == Node::SCHEDULER; } 38 | 39 | /// \brief Returns the key range this node maintains 40 | static inline Range KeyRange() { return Range(MyNode().key()); } 41 | 42 | /// \brief Returns my node id 43 | static inline std::string MyID() { 44 | return MyNode().id(); 45 | } 46 | 47 | /// \brief Returns my node info 48 | static inline Node MyNode() { 49 | return Postoffice::instance().manager().van().my_node(); 50 | } 51 | 52 | /// \brief Returns the scheduler ID 53 | static inline std::string SchedulerID() { 54 | return Postoffice::instance().manager().van().scheduler().id(); 55 | } 56 | 57 | /// \brief The app this node runs 58 | static inline App* MyApp() { 59 | return Postoffice::instance().manager().app(); 60 | } 61 | }; 62 | 63 | } // namespace ps 64 | -------------------------------------------------------------------------------- /repo/dmlc-core/Makefile: -------------------------------------------------------------------------------- 1 | ifndef config 2 | ifneq ("$(wildcard ./config.mk)","") 3 | config = config.mk 4 | else 5 | config = make/config.mk 6 | endif 7 | endif 8 | # use customized config file 9 | include $(config) 10 | include make/dmlc.mk 11 | 12 | # this is the common build script for dmlc lib 13 | export LDFLAGS= -pthread -lm 14 | export CFLAGS = -O3 -Wall -msse2 -Wno-unknown-pragmas -Iinclude -std=c++0x 15 | LDFLAGS+= $(DMLC_LDFLAGS) 16 | CFLAGS+= $(DMLC_CFLAGS) 17 | 18 | ifdef DEPS_PATH 19 | CFLAGS+= -I$(DEPS_PATH)/include 20 | LDFLAGS+= -L$(DEPS_PATH)/lib 21 | endif 22 | 23 | .PHONY: clean all test lint doc example pylint 24 | 25 | OBJ=line_split.o recordio_split.o input_split_base.o io.o local_filesys.o data.o recordio.o config.o 26 | 27 | ifeq ($(USE_HDFS), 1) 28 | OBJ += hdfs_filesys.o 29 | endif 30 | 31 | ifeq ($(USE_S3), 1) 32 | OBJ += s3_filesys.o 33 | endif 34 | 35 | ifeq ($(USE_AZURE), 1) 36 | OBJ += azure_filesys.o 37 | endif 38 | 39 | ifndef LINT_LANG 40 | LINT_LANG="all" 41 | endif 42 | 43 | 44 | ALIB=libdmlc.a 45 | all: $(ALIB) test 46 | 47 | include test/dmlc_test.mk 48 | include example/dmlc_example.mk 49 | 50 | ifeq ($(BUILD_TEST), 1) 51 | test: $(ALL_TEST) 52 | endif 53 | 54 | example: $(ALL_EXAMPLE) 55 | 56 | line_split.o: src/io/line_split.cc 57 | recordio_split.o: src/io/recordio_split.cc 58 | input_split_base.o: src/io/input_split_base.cc 59 | hdfs_filesys.o: src/io/hdfs_filesys.cc 60 | s3_filesys.o: src/io/s3_filesys.cc 61 | azure_filesys.o: src/io/azure_filesys.cc 62 | local_filesys.o: src/io/local_filesys.cc 63 | io.o: src/io.cc 64 | data.o: src/data.cc 65 | recordio.o: src/recordio.cc 66 | config.o: src/config.cc 67 | 68 | libdmlc.a: $(OBJ) 69 | 70 | 71 | $(BIN) : 72 | $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) 73 | 74 | $(OBJ) : 75 | $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) 76 | 77 | $(ALIB): 78 | ar cr $@ $+ 79 | 80 | lint: 81 | python scripts/lint.py dmlc ${LINT_LANG} include src scripts 82 | 83 | pylint: 84 | python scripts/lint.py dmlc ${LINT_LANG} tracker/dmlc_tracker 85 | 86 | doxygen: 87 | doxygen doc/Doxyfile 88 | 89 | clean: 90 | $(RM) $(OBJ) $(BIN) $(ALIB) $(ALL_TEST) $(ALL_TEST_OBJ) *~ src/*~ src/*/*~ include/dmlc/*~ test/*~ 91 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/block_bloom_filter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base/sketch.h" 3 | namespace ps { 4 | 5 | // a blocked version, see 6 | // Cache-, Hash- and Space-Efficient Bloom IFilters, 7 | // http://algo2.iti.kit.edu/documents/cacheefficientbloomfilters-jea.pdf 8 | 9 | // 1.2x - 1.8x faster than BloomIFilter, but may give slightly large FPR 10 | template 11 | class BlockBloomIFilter : public Sketch { 12 | public: 13 | BlockBloomIFilter() { } 14 | BlockBloomIFilter(int m, int k) { resize(m, k); } 15 | ~BlockBloomIFilter() { delete [] data_; } 16 | void resize(int m, int k) { 17 | m = std::max(m, 1024); 18 | num_bin_ = (m / 8 / bin_size_) + 1; 19 | data_size_ = num_bin_ * bin_size_; 20 | if (m > m_) { 21 | delete [] data_; 22 | data_ = new char[data_size_]; 23 | // CHECK_EQ(posix_memalign((void**)&data_, bin_size_*8, data_size_), 0); 24 | } 25 | k_ = std::min(64, std::max(1, k)); 26 | m_ = m; 27 | reset(); 28 | } 29 | 30 | void reset() { 31 | memset(data_, 0, data_size_ * sizeof(char)); 32 | } 33 | 34 | // make the api be similar to std::set 35 | bool count(K key) const { return query(key); } 36 | bool operator[] (K key) const { return query(key); } 37 | bool query(K key) const { 38 | // auto h = crc32(key); 39 | auto h = hash(key); 40 | auto delta = (h >> 17) | (h << 15); // Rotate right 17 bits 41 | char* data = data_ + (h % num_bin_) * bin_size_; 42 | for (int j = 0; j < k_; ++j) { 43 | uint32 bitpos = h % (bin_size_ * 8); 44 | if ((data[bitpos/8] & (1 << (bitpos % 8))) == 0) return false; 45 | h += delta; 46 | } 47 | return true; 48 | } 49 | 50 | void insert(K key) { 51 | // auto h = crc32(key); 52 | auto h = hash(key); 53 | auto delta = (h >> 17) | (h << 15); // Rotate right 17 bits 54 | char* data = data_ + (h % num_bin_) * bin_size_; 55 | for (int j = 0; j < k_; ++j) { 56 | uint32 bitpos = h % (bin_size_ * 8); 57 | data[bitpos/8] |= (1 << (bitpos % 8)); 58 | h += delta; 59 | } 60 | } 61 | 62 | private: 63 | char* data_ = NULL; 64 | int data_size_ = 0; 65 | uint32 m_ = 0; 66 | int k_ = 0; 67 | const uint32 bin_size_ = 64; // cache line size 68 | uint32 num_bin_ = 0; 69 | }; 70 | 71 | } 72 | -------------------------------------------------------------------------------- /repo/ps-lite/src/kv/kv_store.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "ps/app.h" 3 | #include "proto/param.pb.h" 4 | #include "dmlc/io.h" 5 | namespace ps { 6 | 7 | class KVStore : public Customer { 8 | public: 9 | KVStore(int id) : Customer(id) { } 10 | virtual ~KVStore() { } 11 | 12 | // load and save 13 | virtual void Load(dmlc::Stream *fi) = 0; 14 | virtual void Save(dmlc::Stream *fo) const = 0; 15 | virtual void Clear() = 0; 16 | 17 | // handle system call 18 | void ProcessRequest(Message* request) { 19 | const auto& call = request->task.param(); 20 | Message* response = nullptr; 21 | bool push = call.push(); 22 | if (!push) { 23 | // a pull request, need to reply with the value 24 | response = new Message(*request); 25 | } 26 | 27 | if (call.replica()) { 28 | // a replication request 29 | if (push) { 30 | SetReplica(request); 31 | } else { 32 | GetReplica(response); 33 | } 34 | } else { 35 | // a normal request 36 | if (push) { 37 | HandlePush(request); 38 | } else { 39 | HandlePull(response); 40 | } 41 | } 42 | 43 | if (response) Reply(request, response); 44 | } 45 | 46 | protected: 47 | /// User-defineded functions //// 48 | 49 | /// @brief Fill "msg" with the values it requests, e.g., 50 | /// msg->value(0)[0] = my_val_[msg->key[0]]; 51 | virtual void HandlePull(Message* msg) = 0; 52 | 53 | /// @brief Set the values in "msg" into into my data strcuture, e.g.. 54 | /// my_val_[msg->key[0]] = msg->value(0)[0]; 55 | virtual void HandlePush(const Message* msg) = 0; 56 | 57 | /// @brief the message contains the backup KV pairs sent by the master node of the key 58 | /// segment to its replica node. merge these pairs into my replica, say 59 | /// replica_[msg->sender] = ... 60 | virtual void SetReplica(const Message* msg) { } 61 | 62 | /// @brief retrieve the replica. a new server node replacing a dead server will first 63 | /// ask for the dead's replica node for the data 64 | virtual void GetReplica(Message* msg) { } 65 | 66 | /// @brief a new server node fill its own datastructure via the the replica data from 67 | /// the dead's replica node 68 | virtual void Recover(Message* msg) { } 69 | }; 70 | 71 | } // namespace ps 72 | -------------------------------------------------------------------------------- /repo/dmlc-core/src/io/local_filesys.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file local_filesys.h 4 | * \brief local access module 5 | * \author Tianqi Chen 6 | */ 7 | #ifndef DMLC_IO_LOCAL_FILESYS_H_ 8 | #define DMLC_IO_LOCAL_FILESYS_H_ 9 | 10 | #include 11 | #include "./filesys.h" 12 | 13 | namespace dmlc { 14 | namespace io { 15 | /*! \brief local file system */ 16 | class LocalFileSystem : public FileSystem { 17 | public: 18 | /*! \brief destructor */ 19 | virtual ~LocalFileSystem() {} 20 | /*! 21 | * \brief get information about a path 22 | * \param path the path to the file 23 | * \return the information about the file 24 | */ 25 | virtual FileInfo GetPathInfo(const URI &path); 26 | /*! 27 | * \brief list files in a directory 28 | * \param path to the file 29 | * \param out_list the output information about the files 30 | */ 31 | virtual void ListDirectory(const URI &path, std::vector *out_list); 32 | /*! 33 | * \brief open a stream, will report error and exit if bad thing happens 34 | * NOTE: the IStream can continue to work even when filesystem was destructed 35 | * \param path path to file 36 | * \param uri the uri of the input 37 | * \param allow_null whether NULL can be returned, or directly report error 38 | * \return the created stream, can be NULL when allow_null == true and file do not exist 39 | */ 40 | virtual SeekStream *Open(const URI &path, 41 | const char* const flag, 42 | bool allow_null); 43 | /*! 44 | * \brief open a seekable stream for read 45 | * \param path the path to the file 46 | * \param allow_null whether NULL can be returned, or directly report error 47 | * \return the created stream, can be NULL when allow_null == true and file do not exist 48 | */ 49 | virtual SeekStream *OpenForRead(const URI &path, bool allow_null); 50 | /*! 51 | * \brief get a singleton of LocalFileSystem when needed 52 | * \return a singleton instance 53 | */ 54 | inline static LocalFileSystem *GetInstance(void) { 55 | static LocalFileSystem instance; 56 | return &instance; 57 | } 58 | 59 | private: 60 | LocalFileSystem() {} 61 | }; 62 | } // namespace io 63 | } // namespace dmlc 64 | #endif // DMLC_IO_LOCAL_FILESYS_H_ 65 | -------------------------------------------------------------------------------- /repo/ps-lite/cmake/External/gflags.cmake: -------------------------------------------------------------------------------- 1 | if (NOT __GFLAGS_INCLUDED) # guard against multiple includes 2 | set(__GFLAGS_INCLUDED TRUE) 3 | 4 | # use the system-wide gflags if present 5 | find_package(GFlags) 6 | if (GFLAGS_FOUND) 7 | set(GFLAGS_EXTERNAL FALSE) 8 | else() 9 | # gflags will use pthreads if it's available in the system, so we must link with it 10 | find_package(Threads) 11 | 12 | # build directory 13 | set(gflags_PREFIX ${CMAKE_BINARY_DIR}/external/gflags-prefix) 14 | # install directory 15 | set(gflags_INSTALL ${CMAKE_BINARY_DIR}/external/gflags-install) 16 | 17 | # we build gflags statically, but want to link it into the caffe shared library 18 | # this requires position-independent code 19 | if (UNIX) 20 | set(GFLAGS_EXTRA_COMPILER_FLAGS "-fPIC") 21 | endif() 22 | 23 | set(GFLAGS_CXX_FLAGS ${CMAKE_CXX_FLAGS} ${GFLAGS_EXTRA_COMPILER_FLAGS}) 24 | set(GFLAGS_C_FLAGS ${CMAKE_C_FLAGS} ${GFLAGS_EXTRA_COMPILER_FLAGS}) 25 | 26 | ExternalProject_Add(gflags 27 | PREFIX ${gflags_PREFIX} 28 | GIT_REPOSITORY "https://github.com/gflags/gflags.git" 29 | GIT_TAG "v2.1.2" 30 | UPDATE_COMMAND "" 31 | INSTALL_DIR ${gflags_INSTALL} 32 | CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} 33 | -DCMAKE_INSTALL_PREFIX=${gflags_INSTALL} 34 | -DBUILD_SHARED_LIBS=OFF 35 | -DBUILD_STATIC_LIBS=ON 36 | -DBUILD_PACKAGING=OFF 37 | -DBUILD_TESTING=OFF 38 | -DBUILD_NC_TESTS=OFF 39 | -BUILD_CONFIG_TESTS=OFF 40 | -DINSTALL_HEADERS=ON 41 | -DCMAKE_C_FLAGS=${GFLAGS_C_FLAGS} 42 | -DCMAKE_CXX_FLAGS=${GFLAGS_CXX_FLAGS} 43 | LOG_DOWNLOAD 1 44 | LOG_INSTALL 1 45 | ) 46 | 47 | set(GFLAGS_FOUND TRUE) 48 | set(GFLAGS_INCLUDE_DIRS ${gflags_INSTALL}/include) 49 | if(MSVC) 50 | set(GFLAGS_LIBRARIES ${gflags_INSTALL}/lib/gflags.lib ${CMAKE_THREAD_LIBS_INIT}) 51 | else() 52 | set(GFLAGS_LIBRARIES ${gflags_INSTALL}/lib/libgflags.a ${CMAKE_THREAD_LIBS_INIT}) 53 | endif() 54 | set(GFLAGS_LIBRARY_DIRS ${gflags_INSTALL}/lib) 55 | set(GFLAGS_EXTERNAL TRUE) 56 | 57 | list(APPEND external_project_dependencies gflags) 58 | endif() 59 | 60 | endif() 61 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/threadsafe_limited_queue.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "base/common.h" 7 | 8 | namespace ps { 9 | 10 | template 11 | class ThreadsafeLimitedQueue { 12 | public: 13 | ThreadsafeLimitedQueue() { } 14 | ThreadsafeLimitedQueue(size_t capacity) { setMaxCapacity(capacity); } 15 | void setMaxCapacity(size_t capacity) { max_capacity_ = capacity; } 16 | 17 | void push(const T& value, size_t capacity, bool finished = false) { 18 | CHECK(!done_) << "must not call push again if *finished* is set true"; 19 | if (capacity > max_capacity_) { 20 | LL << "push obj with size " << capacity 21 | << " into queue with capacity " << max_capacity_ 22 | << ". you will be blocked here forever..."; 23 | } 24 | // do not insert 25 | if (finished == false && capacity == 0) return; 26 | std::unique_lock l(mu_); 27 | full_cond_.wait(l, [this, capacity]{ 28 | return (capacity + cur_capacity_ <= max_capacity_); }); 29 | queue_.push(std::move(std::make_pair(value, capacity))); 30 | cur_capacity_ += capacity; 31 | done_ = finished; 32 | empty_cond_.notify_all(); 33 | } 34 | 35 | bool pop(T& value) { 36 | std::unique_lock l(mu_); 37 | // already finished 38 | if (done_ && queue_.empty()) return false; 39 | 40 | empty_cond_.wait(l, [this]{ return !queue_.empty(); }); 41 | std::pair e = std::move(queue_.front()); 42 | 43 | // an empty item, which is inserted only when finished 44 | if (e.second == 0) { 45 | CHECK(done_); 46 | return false; 47 | } 48 | 49 | // get a valid item 50 | value = std::move(e.first); 51 | cur_capacity_ -= e.second; 52 | queue_.pop(); 53 | full_cond_.notify_all(); 54 | return true; 55 | } 56 | 57 | size_t size() const { 58 | std::lock_guard l(mu_); 59 | return queue_.size(); 60 | } 61 | 62 | bool empty() const { 63 | return size() == 0; 64 | } 65 | 66 | private: 67 | mutable std::mutex mu_; 68 | bool done_ = false; 69 | size_t max_capacity_ = 0, cur_capacity_ = 0; 70 | std::queue > queue_; 71 | std::condition_variable empty_cond_, full_cond_; 72 | }; 73 | } // namespace ps 74 | -------------------------------------------------------------------------------- /repo/dmlc-core/cmake/Modules/FindHDFS.cmake: -------------------------------------------------------------------------------- 1 | # DerivedFrom: https://github.com/cloudera/Impala/blob/cdh5-trunk/cmake_modules/FindHDFS.cmake 2 | # - Find HDFS (hdfs.h and libhdfs.so) 3 | # This module defines 4 | # Hadoop_VERSION, version string of ant if found 5 | # HDFS_INCLUDE_DIR, directory containing hdfs.h 6 | # HDFS_LIBRARIES, location of libhdfs.so 7 | # HDFS_FOUND, whether HDFS is found. 8 | # hdfs_static, imported static hdfs library. 9 | 10 | exec_program(hadoop ARGS version OUTPUT_VARIABLE Hadoop_VERSION 11 | RETURN_VALUE Hadoop_RETURN) 12 | 13 | # currently only looking in HADOOP_HOME 14 | find_path(HDFS_INCLUDE_DIR hdfs.h PATHS 15 | $ENV{HADOOP_HOME}/include/ 16 | # make sure we don't accidentally pick up a different version 17 | NO_DEFAULT_PATH 18 | ) 19 | 20 | if ("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8") 21 | set(arch_hint "x64") 22 | elseif ("$ENV{LIB}" MATCHES "(amd64|ia64)") 23 | set(arch_hint "x64") 24 | else () 25 | set(arch_hint "x86") 26 | endif() 27 | 28 | message(STATUS "Architecture: ${arch_hint}") 29 | 30 | if ("${arch_hint}" STREQUAL "x64") 31 | set(HDFS_LIB_PATHS $ENV{HADOOP_HOME}/lib/native) 32 | else () 33 | set(HDFS_LIB_PATHS $ENV{HADOOP_HOME}/lib/native) 34 | endif () 35 | 36 | message(STATUS "HDFS_LIB_PATHS: ${HDFS_LIB_PATHS}") 37 | 38 | find_library(HDFS_LIB NAMES hdfs PATHS 39 | ${HDFS_LIB_PATHS} 40 | # make sure we don't accidentally pick up a different version 41 | NO_DEFAULT_PATH 42 | ) 43 | 44 | if (HDFS_LIB) 45 | set(HDFS_FOUND TRUE) 46 | set(HDFS_LIBRARIES ${HDFS_LIB}) 47 | set(HDFS_STATIC_LIB ${HDFS_LIB_PATHS}/libhdfs.a) 48 | 49 | add_library(hdfs_static STATIC IMPORTED) 50 | set_target_properties(hdfs_static PROPERTIES IMPORTED_LOCATION ${HDFS_STATIC_LIB}) 51 | 52 | else () 53 | set(HDFS_FOUND FALSE) 54 | endif () 55 | 56 | if (HDFS_FOUND) 57 | if (NOT HDFS_FIND_QUIETLY) 58 | message(STATUS "${Hadoop_VERSION}") 59 | message(STATUS "HDFS_INCLUDE_DIR: ${HDFS_INCLUDE_DIR}") 60 | message(STATUS "HDFS_LIBRARIES: ${HDFS_LIBRARIES}") 61 | message(STATUS "hdfs_static: ${HDFS_STATIC_LIB}") 62 | endif () 63 | else () 64 | message(FATAL_ERROR "HDFS includes and libraries NOT found." 65 | "(${HDFS_INCLUDE_DIR}, ${HDFS_LIB})") 66 | endif () 67 | 68 | mark_as_advanced( 69 | HDFS_LIBRARIES 70 | HDFS_INCLUDE_DIR 71 | hdfs_static 72 | ) 73 | -------------------------------------------------------------------------------- /repo/ps-lite/src/system/ps-inl.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file ps-inl.h 3 | * @brief Implementation of ps.h 4 | */ 5 | #pragma once 6 | namespace ps { 7 | 8 | inline Filter* SyncOpts::AddFilter(Filter::Type type) { 9 | filters.push_back(Filter()); 10 | filters.back().set_type(type); 11 | return &(filters.back()); 12 | } 13 | 14 | 15 | inline Task SyncOpts::GetTask() const { 16 | Task req; 17 | req.set_request(true); 18 | for (int l : deps) req.add_wait_time(l); 19 | for (const auto& f : filters) req.add_filter()->CopyFrom(f); 20 | if (cmd != 0) req.set_cmd(cmd); 21 | return req; 22 | } 23 | 24 | /// DEPRECATED 25 | 26 | inline int NextCustomerID() { 27 | return Postoffice::instance().manager().NextCustomerID(); 28 | } 29 | 30 | 31 | 32 | // The app this node runs 33 | inline App* MyApp() { return Postoffice::instance().manager().app(); } 34 | 35 | /*! \brief The global unique string ID of this node */ 36 | inline Node MyNode() { return Postoffice::instance().manager().van().my_node(); } 37 | // Each unique string id of my node 38 | inline std::string MyNodeID() { return MyNode().id(); } 39 | /*! \brief Return true if this node is a worker node. */ 40 | inline int IsWorkerNode() { return MyNode().role() == Node::WORKER; } 41 | /*! \brief Return true if this node is a server node. */ 42 | inline int IsServerNode() { return MyNode().role() == Node::SERVER; } 43 | /*! \brief Return true if this node is a scheduler node. */ 44 | inline int IsSchedulerNode() { return MyNode().role() == Node::SCHEDULER; } 45 | 46 | inline std::string SchedulerID() { 47 | return Postoffice::instance().manager().van().scheduler().id(); 48 | } 49 | inline Range MyKeyRange() { return Range(MyNode().key()); } 50 | 51 | // The rank ID of this node in its group. Assume this a worker node in a worker 52 | // group with N workers. Then this node will be assigned an unique ID from 0, 53 | // ..., N. Similarly for server and scheduler. 54 | inline int MyRank() { return MyNode().rank(); } 55 | // Total nodes in this node group. 56 | inline int RankSize() { 57 | auto& mng = Postoffice::instance().manager(); 58 | return IsWorkerNode() ? mng.num_workers() : (IsServerNode() ? mng.num_servers() : 1); 59 | } 60 | 61 | inline int NumWorkers() { return FLAGS_num_workers; } 62 | inline int NumServers() { return FLAGS_num_servers; } 63 | 64 | } // namespace ps 65 | -------------------------------------------------------------------------------- /repo/ps-lite/cmake/External/glog.cmake: -------------------------------------------------------------------------------- 1 | # glog depends on gflags 2 | include("cmake/External/gflags.cmake") 3 | 4 | set(GFLAGS_ROOT_DIR ${gflags_INSTALL}) 5 | 6 | if (NOT __GLOG_INCLUDED) 7 | set(__GLOG_INCLUDED TRUE) 8 | 9 | # try the system-wide glog first 10 | find_package(Glog) 11 | if (GLOG_FOUND) 12 | set(GLOG_EXTERNAL FALSE) 13 | else() 14 | # fetch and build glog from github 15 | 16 | # build directory 17 | set(glog_PREFIX ${CMAKE_BINARY_DIR}/external/glog-prefix) 18 | # install directory 19 | set(glog_INSTALL ${CMAKE_BINARY_DIR}/external/glog-install) 20 | 21 | # we build glog statically, but want to link it into the caffe shared library 22 | # this requires position-independent code 23 | if (UNIX) 24 | set(GLOG_EXTRA_COMPILER_FLAGS "-fPIC") 25 | endif() 26 | 27 | set(GLOG_CXX_FLAGS ${CMAKE_CXX_FLAGS} ${GLOG_EXTRA_COMPILER_FLAGS}) 28 | set(GLOG_C_FLAGS ${CMAKE_C_FLAGS} ${GLOG_EXTRA_COMPILER_FLAGS}) 29 | 30 | # depend on gflags if we're also building it 31 | if (GFLAGS_EXTERNAL) 32 | set(GLOG_DEPENDS gflags) 33 | endif() 34 | 35 | 36 | ExternalProject_Add(glog 37 | DEPENDS ${GLOG_DEPENDS} 38 | PREFIX ${glog_PREFIX} 39 | GIT_REPOSITORY "https://github.com/google/glog" 40 | UPDATE_COMMAND "" 41 | INSTALL_DIR ${glog_INSTALL} 42 | CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} 43 | -DCMAKE_INSTALL_PREFIX=${glog_INSTALL} 44 | -DCMAKE_PREFIX_PATH=${gflags_INSTALL} 45 | -Dgflags_FOUND=ON 46 | -Dgflags_INCLUDE_DIR=${gflags_INSTALL}/Include 47 | -Dgflags_INCLUDE_DIR=${gflags_INSTALL}/Lib/gflags 48 | -DBUILD_SHARED_LIBS=OFF 49 | -DINSTALL_HEADERS=ON 50 | -DCMAKE_C_FLAGS=${GLOG_C_FLAGS} 51 | -DCMAKE_CXX_FLAGS=${GLOG_CXX_FLAGS} 52 | LOG_DOWNLOAD 1 53 | LOG_CONFIGURE 1 54 | LOG_INSTALL 1 55 | ) 56 | 57 | set(GLOG_FOUND TRUE) 58 | set(GLOG_INCLUDE_DIRS ${glog_INSTALL}/include) 59 | if(MSVC) 60 | set(GLOG_LIBRARIES ${GFLAGS_LIBRARIES} ${glog_INSTALL}/lib/glog.lib) 61 | else() 62 | set(GLOG_LIBRARIES ${GFLAGS_LIBRARIES} ${glog_INSTALL}/lib/libglog.a) 63 | endif() 64 | set(GLOG_LIBRARY_DIRS ${glog_INSTALL}/lib) 65 | set(GLOG_EXTERNAL TRUE) 66 | 67 | list(APPEND external_project_dependencies glog) 68 | endif() 69 | 70 | endif() 71 | 72 | -------------------------------------------------------------------------------- /repo/ps-lite/src/README.md: -------------------------------------------------------------------------------- 1 | # Introduction {#mainpage} 2 | 3 | The parameter server aims for high-performance distributed machine learning 4 | applications. In this framework, multiple nodes runs over multiple machines to 5 | solve machine learning problems. The role of a node can be server, worker, or 6 | scheduler, which can be queried via \ref ps::NodeInfo. 7 | 8 | ![ps arch](https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/ps-arch.png) 9 | 10 | ### Worker node 11 | A worker node performs the main computations such as reading the data and 12 | computing the gradient. It communicates with the server nodes via `push` and 13 | `pull`. For example, it pushes the computed gradient to the servers, or pulls 14 | the recent model from them. The data communicated are presented as key-value 15 | pairs, where the key might be the `uint64_t` (defined by `ps::Key`) feature 16 | index and the value might be the according `float` gradient. 17 | 1. Basic synchronization functions: \ref ps::KVWorker::Push, \ref 18 | ps::KVWorker::Pull, and \ref ps::KVWorker::Wait 19 | 2. Dynamic length value push and pull: \ref ps::KVWorker::VPush and \ref 20 | ps::KVWorker::VPull 21 | 3. Zero-copy versions: \ref ps::KVWorker::ZPush, \ref 22 | ps::KVWorker::ZPull, \ref ps::KVWorker::ZVPush and \ref 23 | ps::KVWorker::ZVPull 24 | 25 | ### Server node 26 | 27 | A server node maintains and updates the model. Each node maintains only a part 28 | of the model, often server i handles the keys (feature indices) within the i-th 29 | segment of [0, uint64_max]. The server node allows user-defined handles to 30 | process the `push` and `pull` requests from the workers. 31 | 1. Online key-value store \ref ps::OnlineServer 32 | 2. Example user-defined value: \ref ps::IVal 33 | 3. Example user-defined handle: \ref ps::IOnlineHandle 34 | 35 | ### Scheduler node 36 | There is an optional scheduler node, which is often used to monitor and control the 37 | progress of the machine learning application. It also can be used to deal with node 38 | failures. See an example in [asynchronous SGD](https://github.com/dmlc/wormhole/blob/master/learn/solver/async_sgd.h#L27). 39 | 40 | ### More 41 | The source codes are available at 42 | [github.com/dmlc/ps-lite](https://github.com/dmlc/ps-lite), which are licensed 43 | under Apache 2.0. 44 | -------------------------------------------------------------------------------- /repo/ps-lite/guide/example_e.cc: -------------------------------------------------------------------------------- 1 | #include "ps.h" 2 | using Val = float; 3 | using Key = ps::Key; 4 | 5 | struct MyVal { 6 | std::vector w; 7 | inline void Load(dmlc::Stream *fi) { fi->Read(&w); } 8 | inline void Save(dmlc::Stream *fo) const { fo->Write(w); } 9 | inline bool Empty() const { return w.empty(); } 10 | }; 11 | 12 | class MyHandle { 13 | public: 14 | void Start(bool push, int timestamp, int cmd, void* msg) { 15 | ps::Message *m = (ps::Message*) msg; 16 | std::cout << "-------\naccepts " << (push ? "push" : "pull") << " from " << m->sender 17 | << " with timestamp " << timestamp 18 | << " and command " << cmd 19 | << std::endl; 20 | ts_ = timestamp; 21 | } 22 | 23 | void Finish() { 24 | std::cout << "finished timestamp " << ts_ 25 | << "\n-------" << std::endl; 26 | } 27 | 28 | void Push(Key recv_key, ps::Blob recv_val, MyVal& my_val) { 29 | size_t n = recv_val.size; 30 | auto& w = my_val.w; 31 | if (w.empty()) w.resize(n); 32 | for (size_t i = 0; i < n; ++i) w[i] += recv_val[i]; 33 | 34 | std::cout << "handle push: key " << recv_key << ", val " << recv_val << std::endl; 35 | } 36 | 37 | void Pull(Key recv_key, MyVal& my_val, ps::Blob& send_val) { 38 | send_val.data = my_val.w.data(); 39 | send_val.size = my_val.w.size(); 40 | 41 | std::cout << "handle pull: key " << recv_key << std::endl; 42 | } 43 | 44 | inline void Load(dmlc::Stream *fi) { } 45 | inline void Save(dmlc::Stream *fo) const { } 46 | private: 47 | int ts_ = 0; 48 | }; 49 | 50 | int CreateServerNode(int argc, char *argv[]) { 51 | using Server = ps::OnlineServer; 52 | Server server; 53 | return 0; 54 | } 55 | 56 | int WorkerNodeMain(int argc, char *argv[]) { 57 | using namespace ps; 58 | KVWorker wk; 59 | std::vector key = {1, 3, 8 }; 60 | std::vector val = {1, 3, 4, 5, 9, 10}; 61 | std::vector siz = {1, 3, 2 }; 62 | 63 | std::vector recv_val; 64 | std::vector recv_siz; 65 | 66 | wk.Wait(wk.VPush(key, val, siz)); 67 | wk.Wait(wk.VPull(key, &recv_val, &recv_siz)); 68 | 69 | std::cout << "values pulled at " << MyNodeID() << ": " 70 | << Blob(recv_val) << "\n" 71 | << Blob(recv_siz) << std::endl; 72 | return 0; 73 | } 74 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/dmlc_tracker/mpi.py: -------------------------------------------------------------------------------- 1 | """ 2 | DMLC submission script, MPI version 3 | """ 4 | # pylint: disable=invalid-name 5 | from __future__ import absolute_import 6 | 7 | import subprocess, logging 8 | from threading import Thread 9 | from . import tracker 10 | 11 | def get_mpi_env(envs): 12 | """get the mpirun command for setting the envornment 13 | support both openmpi and mpich2 14 | """ 15 | # decide MPI version. 16 | (_, err) = subprocess.Popen('mpirun', 17 | stdout=subprocess.PIPE, 18 | stderr=subprocess.PIPE).communicate() 19 | cmd = '' 20 | if 'Open MPI' in err: 21 | for k, v in envs.items(): 22 | cmd += ' -x %s=%s' % (k, str(v)) 23 | elif 'mpich' in err: 24 | for k, v in envs.items(): 25 | cmd += ' -env %s %s' % (k, str(v)) 26 | else: 27 | raise RuntimeError('Unknown MPI Version') 28 | return cmd 29 | 30 | 31 | def submit(args): 32 | """Submission script with MPI.""" 33 | def mpi_submit(nworker, nserver, pass_envs): 34 | """Internal closure for job submission.""" 35 | def run(prog): 36 | """run the program""" 37 | subprocess.check_call(prog, shell=True) 38 | 39 | cmd = '' 40 | if args.host_file is not None: 41 | cmd = '--hostfile %s ' % (args.host_file) 42 | cmd += ' ' + ' '.join(args.command) 43 | 44 | pass_envs['DMLC_JOB_CLUSTER'] = 'mpi' 45 | 46 | # start workers 47 | if nworker > 0: 48 | logging.info('Start %d workers by mpirun' % nworker) 49 | pass_envs['DMLC_ROLE'] = 'worker' 50 | prog = 'mpirun -n %d %s %s' % (nworker, get_mpi_env(pass_envs), cmd) 51 | thread = Thread(target=run, args=(prog,)) 52 | thread.setDaemon(True) 53 | thread.start() 54 | 55 | 56 | # start servers 57 | if nserver > 0: 58 | logging.info('Start %d servers by mpirun' % nserver) 59 | pass_envs['DMLC_ROLE'] = 'server' 60 | prog = 'mpirun -n %d %s %s' % (nserver, get_mpi_env(pass_envs), cmd) 61 | thread = Thread(target=run, args=(prog,)) 62 | thread.setDaemon(True) 63 | thread.start() 64 | 65 | 66 | tracker.submit(args.num_workers, args.num_servers, 67 | fun_submit=mpi_submit, 68 | pscmd=(' '.join(args.command))) 69 | -------------------------------------------------------------------------------- /src/dump.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "dmlc/io.h" 6 | #include "dmlc/logging.h" 7 | 8 | using namespace std; 9 | using namespace dmlc; 10 | typedef int64_t K; 11 | 12 | class Dump { 13 | 14 | public: 15 | 16 | Dump(string file_in, string file_out) : file_in_(file_in),file_out_(file_out) {} 17 | ~Dump() {data_.clear();} 18 | 19 | // value type stored on sever nodes, can be also other Entrys 20 | struct FTRLEntry { 21 | float w = 0; 22 | float z= 0; 23 | float sq_cum_grad = 0; 24 | inline void Load(Stream *fi) { 25 | CHECK_EQ(fi->Read(&w, sizeof(float)), sizeof(float)); 26 | CHECK_EQ(fi->Read(&z, sizeof(float)), sizeof(float)); 27 | CHECK_EQ(fi->Read(&sq_cum_grad, sizeof(float)), sizeof(float)); 28 | } 29 | 30 | inline bool Empty() const { return w == 0;} 31 | }; 32 | 33 | void LoadModel(const std::string filename) { 34 | Stream* fi = CHECK_NOTNULL(Stream::Create(filename.c_str(), "r")); 35 | K key; 36 | while (true) { 37 | if (fi->Read(&key, sizeof(K)) != sizeof(K)) break; 38 | data_[key].Load(fi); 39 | } 40 | cout << "loaded " << data_.size() << " kv pairs\n"; 41 | } 42 | 43 | // how to dump the info 44 | void DumpModel(const std::string filename) { 45 | Stream* fo = CHECK_NOTNULL(Stream::Create(filename.c_str(), "w")); 46 | dmlc::ostream os(fo); 47 | int dumped = 0; 48 | for (const auto& it : data_) { 49 | if (it.second.Empty()) continue; 50 | os << it.first << '\t' << it.second.w << '\t' << it.second.z << '\t' << it.second.sq_cum_grad <<'\n'; // check your entry 51 | dumped ++; 52 | } 53 | cout << "dumped " << dumped << " kv pairs\n"; 54 | } 55 | 56 | void run() { 57 | LoadModel(file_in_); 58 | DumpModel(file_out_); 59 | } 60 | 61 | private: 62 | unordered_map data_; 63 | string file_in_; 64 | string file_out_; 65 | }; 66 | 67 | int main(int argc, char *argv[]) { 68 | if (argc < 3) { 69 | cout << "Usage: \n"; 70 | return 0; 71 | } 72 | //google::InitGoogleLogging(argv[0]); 73 | string model_in, dump_out; 74 | for (int i = 1; i < argc; ++i) { 75 | char name[256], val[256]; 76 | if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) { 77 | if (!strcmp(name, "model_in")) model_in = val; 78 | if (!strcmp(name, "dump_out")) dump_out = val; 79 | } 80 | } 81 | Dump d(model_in, dump_out); 82 | d.run(); 83 | return 0; 84 | } 85 | 86 | 87 | -------------------------------------------------------------------------------- /repo/ps-lite/src/filter/frequency_filter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base/countmin.h" 3 | #include "base/shared_array_inl.h" 4 | namespace ps { 5 | 6 | /** 7 | * @brief Remove infrequent keys via the countmin sketch 8 | * @tparam K key type 9 | * @tparam V counter type 10 | */ 11 | template 12 | class FreqencyFilter { 13 | public: 14 | /** 15 | * @brief Add keys with their key count 16 | * 17 | * @param key the list of keys 18 | * @param count the according frequency count 19 | */ 20 | void InsertKeys(const SArray& key, const SArray& count); 21 | 22 | /** 23 | * @brief IFilters infrequency keys 24 | * 25 | * @param key the list of keys 26 | * @param freq_thr the frequency threshold 27 | * 28 | * @return the keys whose frequency is greater than freq_thr 29 | */ 30 | SArray QueryKeys(const SArray& key, int freq_thr); 31 | 32 | bool Empty() { return count_.empty(); } 33 | 34 | /** 35 | * @brief resize the countmin sketch 36 | * 37 | */ 38 | void Resize(int n, int k) { count_.resize(n, k, 254); } 39 | 40 | void Clear() { count_.clear(); } 41 | 42 | private: 43 | CountMin count_; 44 | }; 45 | 46 | // countmin implementation 47 | template 48 | SArray FreqencyFilter::QueryKeys(const SArray& key, int freqency) { 49 | CHECK_LT(freqency, kuint8max) << "change to uint16 or uint32..."; 50 | SArray filtered_key; 51 | for (auto k : key) { 52 | if ((int)count_.query(k) > freqency) { 53 | filtered_key.push_back(k); 54 | } 55 | } 56 | return filtered_key; 57 | } 58 | 59 | template 60 | void FreqencyFilter::InsertKeys(const SArray& key, const SArray& count) { 61 | CHECK_EQ(key.size(), count.size()); 62 | for (size_t i = 0; i < key.size(); ++i) { 63 | count_.insert(key[i], count[i]); 64 | } 65 | } 66 | 67 | // DEPRECATED hash implementation 68 | // std::unordered_map map_; 69 | 70 | // template 71 | // SArray FreqencyIFilter::QueryKeys(const SArray& key, int freqency) { 72 | // SArray filtered_key; 73 | // for (K k : key) { 74 | // if (map_[k] > freqency) filtered_key.push_back(k); 75 | // } 76 | // return filtered_key; 77 | // } 78 | 79 | // template 80 | // void FreqencyIFilter::InsertKeys(const SArray& key, const SArray& count) { 81 | // CHECK_EQ(key.size(), count.size()); 82 | // for (size_t i = 0; i < key.size(); ++i) { 83 | // map_[key[i]] += count[i]; 84 | // } 85 | // } 86 | 87 | } 88 | -------------------------------------------------------------------------------- /src/server.h: -------------------------------------------------------------------------------- 1 | #include "iostream" 2 | #include "ps.h" 3 | 4 | namespace dmlc{ 5 | namespace linear{ 6 | 7 | struct ISGDHandle{ 8 | public: 9 | ISGDHandle(){ ns_ = ps::NodeInfo::NumServers();} 10 | float alpha = 0.1, beta = 1.0; 11 | inline void Start(bool push, int timestamp, int cmd, void* msg) { }//must has 12 | void Load(Stream* fi) { }//must has 13 | void Save(Stream *fo) const { }//must has 14 | inline void Finish(){ }//must has 15 | private: 16 | int ns_ = 0; 17 | static int64_t new_w; 18 | }; 19 | 20 | template 21 | inline void TSave(Stream* fo, T* const ptr){ 22 | fo->Write(&ptr->w, sizeof(float)); 23 | } 24 | struct FTRLEntry{ 25 | float w = 0; 26 | float z = 0; 27 | float sq_cum_grad = 0; 28 | inline void Load(Stream *fi) { }//must has 29 | inline void Save(Stream *fo) const { 30 | TSave(fo, this); 31 | }//must has 32 | inline bool Empty() const { }//must has 33 | }; 34 | 35 | struct FTRLHandle : public ISGDHandle{ 36 | public: 37 | inline void Push(ps::Key key, ps::Blob grad, FTRLEntry& val){ 38 | float g = grad[0]; 39 | float sqrt_n = val.sq_cum_grad; 40 | float sqrt_n_new = sqrt(sqrt_n * sqrt_n + g * g); 41 | val.z += g - (sqrt_n_new - sqrt_n); 42 | val.sq_cum_grad = sqrt_n_new; 43 | float z = val.z; 44 | if(abs(z) <= lambda1){ 45 | val.w = 0.0; 46 | } 47 | else{ 48 | float tmpr= 0.0; 49 | if(z >= 0) tmpr = z - lambda1; 50 | else tmpr = z + lambda1; 51 | float tmpl = -1 * ( ( beta + val.sq_cum_grad - sqrt_n) / alpha + lambda2 ); 52 | val.w = tmpr / tmpl; 53 | } 54 | } 55 | 56 | inline void Pull(ps::Key key, const FTRLEntry& val, ps::Blob& send){ 57 | send[0] = val.w; 58 | } 59 | 60 | private: 61 | int lambda1 = 1.0; 62 | int lambda2 = 1.0; 63 | }; 64 | 65 | class Server : public ps::App{ 66 | public: 67 | Server(){ 68 | CreateServer(); 69 | } 70 | ~Server(){} 71 | 72 | template 73 | void CreateServer(){ 74 | Handle h; 75 | ps::OnlineServer s(h); 76 | } 77 | 78 | virtual void ProcessRequest(ps::Message* request) { } 79 | }; 80 | }//end linear 81 | }//end dmlc 82 | -------------------------------------------------------------------------------- /repo/dmlc-core/src/io/uri_spec.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file uri_spec.h 4 | * \brief common specification of sugars in URI 5 | * string passed to dmlc Create functions 6 | * such as local file cache 7 | * \author Tianqi Chen 8 | */ 9 | #ifndef DMLC_IO_URI_SPEC_H_ 10 | #define DMLC_IO_URI_SPEC_H_ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "./filesys.h" 19 | 20 | namespace dmlc { 21 | namespace io { 22 | /*! 23 | * \brief some super set of URI 24 | * that allows sugars to be passed around 25 | * Example: 26 | * 27 | * hdfs:///mylibsvm/?format=libsvm&clabel=0#mycache-file. 28 | */ 29 | class URISpec { 30 | public: 31 | /*! \brief the real URI */ 32 | std::string uri; 33 | /*! \brief arguments in the URL */ 34 | std::map args; 35 | /*! \brief the path to cache file */ 36 | std::string cache_file; 37 | /*! 38 | * \brief constructor. 39 | * \param uri The raw uri string. 40 | * \param part_index The parition index of the part. 41 | * \param num_parts total number of parts. 42 | */ 43 | explicit URISpec(const std::string& uri, 44 | unsigned part_index, 45 | unsigned num_parts) { 46 | std::vector name_cache = Split(uri, '#'); 47 | 48 | if (name_cache.size() == 2) { 49 | std::ostringstream os; 50 | os << name_cache[1]; 51 | if (num_parts != 1) { 52 | os << ".split" << num_parts << ".part" << part_index; 53 | } 54 | this->cache_file = os.str(); 55 | } else { 56 | CHECK_EQ(name_cache.size(), 1) 57 | << "only one `#` is allowed in file path for cachefile specification"; 58 | } 59 | std::vector name_args = Split(name_cache[0], '?'); 60 | if (name_args.size() == 2) { 61 | std::vector arg_list = Split(name_args[1], '&'); 62 | for (size_t i = 0; i < arg_list.size(); ++i) { 63 | std::istringstream is(arg_list[i]); 64 | std::pair kv; 65 | CHECK(std::getline(is, kv.first, '=')) << "Invalid uri argument format"; 66 | CHECK(std::getline(is, kv.second)) << "Invalid uri argument format"; 67 | this->args.insert(kv); 68 | } 69 | } else { 70 | CHECK_EQ(name_args.size(), 1) 71 | << "only one `#` is allowed in file path for cachefile specification"; 72 | } 73 | this->uri = name_args[0]; 74 | } 75 | }; 76 | } // namespace io 77 | } // namespace dmlc 78 | #endif // DMLC_IO_URI_SPEC_H_ 79 | -------------------------------------------------------------------------------- /repo/ps-lite/src/filter/key_caching.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "filter/filter.h" 4 | // #include "base/crc32c.h" 5 | namespace ps { 6 | 7 | /// \brief Caches the key lists on both sender and receiver to avoid duplicated 8 | /// communication 9 | class KeyCachingFilter : public IFilter { 10 | public: 11 | // thread safe 12 | void Encode(Message* msg) { 13 | // if (!msg->task.has_key_range()) return; 14 | auto conf = Find(Filter::KEY_CACHING, msg); 15 | if (!conf) return; 16 | 17 | const auto& key = msg->key; 18 | if (key.size() < min_len_) { 19 | conf->clear_signature(); 20 | return; 21 | } 22 | 23 | uint64 sig = FastHash(key); 24 | conf->set_signature(sig); 25 | 26 | Lock l(mu_); 27 | bool clear = conf->clear_cache() && IsDone(msg->task); 28 | auto it = cache_.find(sig); 29 | if (it != cache_.end() && it->second.size() == key.size()) { 30 | // hit cache 31 | msg->clear_key(); 32 | if (clear) cache_.erase(it); 33 | } else { 34 | // not hit 35 | if (!clear) cache_[sig] = key; 36 | } 37 | } 38 | 39 | void Decode(Message* msg) { 40 | // if (!msg->task.has_key_range()) return; 41 | auto conf = Find(Filter::KEY_CACHING, msg); 42 | if (!conf || !conf->has_signature()) return; 43 | auto sig = conf->signature(); 44 | // do a double check 45 | if (msg->has_key()) CHECK_EQ(FastHash(msg->key), sig); 46 | 47 | Lock l(mu_); 48 | bool clear = conf->clear_cache() && IsDone(msg->task); 49 | if (msg->has_key()) { 50 | if (!clear) cache_[sig] = msg->key; 51 | } else { 52 | // a lit bittle danger 53 | auto it = cache_.find(sig); 54 | CHECK(it != cache_.end()) << "invalid key cache"; 55 | msg->set_key(it->second); 56 | if (clear) cache_.erase(it); 57 | } 58 | } 59 | 60 | 61 | private: 62 | bool IsDone(const Task& task) { 63 | return (!task.request() || 64 | (task.has_param() 65 | && task.param().push())); 66 | } 67 | 68 | inline uint64 Hash64(const char* buf, size_t len) { 69 | return CityHash64(buf, len); 70 | } 71 | 72 | inline uint64 FastHash(const SArray& arr) { 73 | if (arr.size() < max_sig_len_) { 74 | return Hash64(arr.data(), arr.size()); 75 | } 76 | return (Hash64(arr.data(), max_sig_len_/2) ^ 77 | Hash64(arr.data()+arr.size()-max_sig_len_/2, max_sig_len_/2)); 78 | } 79 | 80 | std::unordered_map> cache_; 81 | 82 | const size_t min_len_ = 64; 83 | const size_t max_sig_len_ = 4096; 84 | std::mutex mu_; 85 | }; 86 | 87 | } // namespace 88 | -------------------------------------------------------------------------------- /repo/ps-lite/src/ps/blob.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * \file blob.h 3 | * \brief Blob is a simple structure that contains a length and a pointer to an 4 | * external array, 5 | */ 6 | #pragma once 7 | #include 8 | #include 9 | #include "ps/base.h" 10 | #if USE_EIGEN 11 | #include "Eigen/src/Core/Map.h" 12 | #include "Eigen/src/Core/Array.h" 13 | #endif // DMLC_USE_EIGEN 14 | 15 | namespace ps { 16 | 17 | /** 18 | * \brief Binary Large OBject 19 | * 20 | * Blob is a simple structure 21 | * containing a pointer into some external storage and a size. The user of a 22 | * Blob must ensure that the blob is not used after the corresponding external 23 | * storage has been deallocated. 24 | * 25 | * \tparam T the date type 26 | */ 27 | template 28 | struct Blob { 29 | T* data; 30 | size_t size; 31 | 32 | /*! \brief Create an empty blob */ 33 | Blob() : data(NULL), size(0) { } 34 | 35 | /*! \brief Create a blob from a pointer */ 36 | Blob(T* d, size_t s) : data(d), size(s) { } 37 | 38 | /*! \brief Create a blob from std::vector */ 39 | Blob(const std::vector::type>& v) 40 | : data(v.data()), size(v.size()) { } 41 | Blob(std::vector* v) : data(v->data()), size(v->size()) { } 42 | 43 | inline T& operator[] (size_t n) const { 44 | CHECK_LT(n, size); 45 | return data[n]; 46 | } 47 | 48 | T* begin() { return data; } 49 | T* end() { return data+size; } 50 | 51 | /*! \brief Slics a segment [begin, end) */ 52 | Blob Slice(size_t begin, size_t end) const { 53 | CHECK_LE(begin, end); CHECK_LE(end, size); 54 | return Blob(data+begin, end-begin); 55 | } 56 | 57 | #if USE_EIGEN 58 | typedef Eigen::Map< 59 | Eigen::Array > EigenArrayMap; 60 | /*! \brief Return a size() by 1 Eigen3 Array */ 61 | EigenArrayMap EigenArray() const { 62 | return EigenArrayMap(data, size); 63 | } 64 | 65 | typedef Eigen::Map< 66 | const Eigen::Matrix > EigenMatrixMap; 67 | /*! \brief Return a size()/k by k Eigen3 Matrix */ 68 | EigenMatrixMap EigenMatrix(int k = 1) const { 69 | CHECK_EQ(size % k, 0); 70 | return EigenMatrixMap(data, size / k, k); 71 | } 72 | #endif // USE_EIGEN 73 | 74 | /** 75 | * \brief Returns the m head and m tail element as string 76 | */ 77 | std::string ShortDebugString(size_t m = 5) const { 78 | return DebugStr(data, size); 79 | } 80 | }; 81 | 82 | /// \brief for debug use 83 | template 84 | std::ostream& operator<<(std::ostream& os, const Blob& obj) { 85 | os << obj.ShortDebugString(); return os; 86 | } 87 | 88 | } // namespace ps 89 | -------------------------------------------------------------------------------- /repo/ps-lite/src/system/monitor.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file monitor.h 3 | * @brief A distributed monitor 4 | * 5 | */ 6 | #pragma once 7 | #include "ps/app.h" 8 | namespace ps { 9 | 10 | /** 11 | * @brief The master of the monitor, which collects reports from slavers and 12 | * display the progress 13 | * 14 | * @tparam Progress A proto buffer class 15 | */ 16 | template 17 | class MonitorMaster : public Customer { 18 | public: 19 | MonitorMaster(int id = NextCustomerID()) : Customer(id) {} 20 | 21 | typedef std::function*)> Printer; 23 | /** 24 | * @brief set the printer 25 | * 26 | * @param time_interval in sec 27 | * @param printer 28 | */ 29 | void set_printer(double time_interval, Printer printer) { 30 | timer_.start(); 31 | printer_ = printer; 32 | interval_ = time_interval; 33 | } 34 | 35 | typedef std::function Merger; 36 | /** 37 | * @brief set the merger 38 | * 39 | * @param merger merges two reports 40 | */ 41 | void set_merger(Merger merger) { 42 | merger_ = merger; 43 | } 44 | 45 | virtual void ProcessRequest(Message* request) { 46 | NodeID sender = request->sender; 47 | Progress prog; 48 | CHECK(prog.ParseFromString(request->task.msg())); 49 | if (merger_) { 50 | merger_(prog, &progress_[sender]); 51 | } else { 52 | progress_[sender] = prog; 53 | } 54 | 55 | double time = timer_.stop(); 56 | if (time > interval_ && printer_) { 57 | total_time_ += time; 58 | printer_(total_time_, &progress_); 59 | timer_.restart(); 60 | } else { 61 | timer_.start(); 62 | } 63 | } 64 | private: 65 | std::unordered_map progress_; 66 | double interval_; 67 | Timer timer_; 68 | double total_time_ = 0; 69 | Merger merger_; 70 | Printer printer_; 71 | }; 72 | 73 | /** 74 | * @brief A slave monitor, which report to the master monitor 75 | * 76 | * @tparam Progress a proto class 77 | */ 78 | template 79 | class MonitorSlaver : public Customer { 80 | public: 81 | MonitorSlaver(const NodeID& master, int id = NextCustomerID()) 82 | : Customer(id), master_(master) { } 83 | virtual ~MonitorSlaver() { } 84 | 85 | /** 86 | * @brief Sends a report to the master 87 | * 88 | * @param prog 89 | */ 90 | void Report(const Progress& prog) { 91 | string str; CHECK(prog.SerializeToString(&str)); 92 | Task report; report.set_msg(str); 93 | Submit(report, master_); 94 | } 95 | protected: 96 | NodeID master_; 97 | }; 98 | 99 | } // namespace ps 100 | -------------------------------------------------------------------------------- /repo/ps-lite/src/base/bitmap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "base/common.h" 4 | 5 | namespace ps { 6 | class Bitmap; 7 | typedef std::shared_ptr BitmapPtr; 8 | 9 | #define BITCOUNT_(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255) 10 | #define BX_(x) ((x) - (((x)>>1)&0x77777777) \ 11 | - (((x)>>2)&0x33333333) \ 12 | - (((x)>>3)&0x11111111)) 13 | class Bitmap { 14 | public: 15 | Bitmap() { } 16 | Bitmap(uint32 size, bool value = false) { resize(size, value); } 17 | ~Bitmap() { clear(); } 18 | 19 | void resize(uint32 size, bool value = false) { 20 | CHECK_EQ(size_, 0) 21 | << "TODO didn't support resize non-empty bitmap... clear() first "; 22 | size_ = size; 23 | map_size_ = (size >> kBitmapShift) + 1; 24 | map_ = new uint16[map_size_]; 25 | fill(value); 26 | } 27 | 28 | void clear() { 29 | delete [] map_; 30 | map_ = nullptr; 31 | map_size_ = 0; 32 | size_ = 0; 33 | } 34 | 35 | void set(uint32 i) { 36 | map_[i>>kBitmapShift] |= (uint16) (1 << (i&kBitmapMask)); 37 | } 38 | void clear(uint32 i) { 39 | map_[i>>kBitmapShift] &= ~((uint16) (1 << (i&kBitmapMask))); 40 | } 41 | 42 | bool test(uint32 i) const { 43 | return static_cast((map_[i>>kBitmapShift] >> (i&kBitmapMask)) & 1); 44 | } 45 | bool operator[] (uint32 i) const { 46 | return test(i); 47 | } 48 | 49 | void fill(bool value) { 50 | if (value) 51 | memset(map_, 0xFF, map_size_*sizeof(uint16)); 52 | else 53 | memset(map_, 0, map_size_*sizeof(uint16)); 54 | } 55 | 56 | // TODO flip all bits 57 | void flip() { } 58 | 59 | uint32 size() const { return size_; } 60 | size_t memSize() const { return map_size_*sizeof(uint16); } 61 | 62 | // number of bit == true 63 | uint32 nnz() { 64 | if (!init_nnz_) { 65 | for(int i=0; i<65536; i++) 66 | LUT_[i] = (unsigned char)BITCOUNT_(i); 67 | init_nnz_ = true; 68 | } 69 | 70 | uint32 bn = size_ >> kBitmapShift; 71 | uint32 v = 0; 72 | for (uint32_t i = 0; i < bn; i++) 73 | v += LUT_[map_[i]]; 74 | return v + nnz(bn << kBitmapShift, size_); 75 | } 76 | 77 | private: 78 | uint32 nnz(uint32 start, uint32 end) { 79 | CHECK_LE(end, size_); 80 | uint32 v = 0; 81 | for (uint32 i = start; i < end; ++i) 82 | v += (*this)[i]; 83 | return v; 84 | } 85 | 86 | private: 87 | uint16* map_ = nullptr; 88 | uint32 map_size_ = 0; 89 | uint32 size_ = 0; 90 | 91 | static const uint32 kBitmapShift = 4; 92 | static const uint32 kBitmapMask = 0x0F; 93 | 94 | unsigned char LUT_[65536]; 95 | bool init_nnz_ = false; 96 | 97 | }; 98 | 99 | } // namespace ps 100 | -------------------------------------------------------------------------------- /repo/ps-lite/src/proto/task.proto: -------------------------------------------------------------------------------- 1 | package ps; 2 | import "proto/range.proto"; 3 | import "proto/data.proto"; 4 | import "proto/node.proto"; 5 | import "proto/param.proto"; 6 | import "proto/filter.proto"; 7 | import "proto/assign_op.proto"; 8 | 9 | message Task { 10 | /// entries can be used by user programs: 11 | 12 | // the place to store a small amount of data 13 | optional bytes msg = 17; 14 | 15 | // store a user-defined cmd 16 | optional int32 cmd = 19 [default = 0]; 17 | 18 | // a reduce operator 19 | optional AsOp op = 21; 20 | 21 | /// for system usage 22 | 23 | // true: system control task, typically *ctrl* should be set 24 | // false: a task for a customer, and *customer_id* should be set 25 | optional bool control = 1 [default = false]; 26 | 27 | // true: a request task 28 | // false: the response task to the request task with the same *time* 29 | optional bool request = 2 [default = false]; 30 | // the unique id of a customer 31 | optional int32 customer_id = 3; 32 | 33 | // the timestamp if this task 34 | optional int32 time = 5; 35 | 36 | // the depended tasks of this one. that is, this task is executed only if all 37 | // tasks from the same node with time contained in *wait_time* are finished. 38 | // only valid if *request*=true 39 | repeated int32 wait_time = 6; 40 | 41 | // the key range of this task 42 | optional PbRange key_range = 7; 43 | 44 | // namespace of keys 45 | optional int32 key_channel = 8; 46 | 47 | // true: the message sent with this task will contain a list of keys 48 | optional bool has_key = 9 [default = false]; 49 | 50 | // key type 51 | optional DataType key_type = 13; 52 | 53 | // data type 54 | repeated DataType value_type = 14 [packed=true]; 55 | 56 | // filters applied to the data 57 | repeated Filter filter = 12; 58 | 59 | // system control signals 60 | optional Control ctrl = 18; 61 | 62 | // for push & pull 63 | optional ParamCall param = 20; 64 | 65 | extensions 100 to 199; 66 | } 67 | 68 | message Control { 69 | enum Command { 70 | // a node => the scheduler 71 | REGISTER_NODE = 2; 72 | // REPORT_PERF = 3; 73 | READY_TO_EXIT = 4; 74 | READY_TO_RUN = 5; 75 | 76 | // the scheduler => a node 77 | ADD_NODE = 10; 78 | // UPDATE_NODE = 11; 79 | // REPLACE_NODE = 12; 80 | REMOVE_NODE = 13; 81 | EXIT = 14; 82 | } 83 | required Command cmd = 1; 84 | repeated Node node = 2; 85 | } 86 | 87 | enum DataType { 88 | OTHER = 0; 89 | INT8 = 1; 90 | INT16 = 2; 91 | INT32 = 3; 92 | INT64 = 4; 93 | UINT8 = 5; 94 | UINT16 = 6; 95 | UINT32 = 7; 96 | UINT64 = 8; 97 | FLOAT = 9; 98 | DOUBLE = 10; 99 | CHAR = 11; 100 | } 101 | -------------------------------------------------------------------------------- /repo/dmlc-core/test/unittest/unittest_serializer.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | template 13 | inline void TestSaveLoad(T data) { 14 | std::string blob; 15 | dmlc::MemoryStringStream fs(&blob); 16 | { 17 | T temp(data); 18 | static_cast(&fs)->Write(temp); 19 | temp.clear(); 20 | } 21 | fs.Seek(0); 22 | T copy_data; 23 | CHECK(static_cast(&fs)->Read(©_data)); 24 | ASSERT_EQ(data, copy_data); 25 | } 26 | 27 | class MyClass { 28 | public: 29 | MyClass() {} 30 | MyClass(std::string data) : data_(data) {} 31 | inline void Save(dmlc::Stream *strm) const { 32 | strm->Write(this->data_); 33 | } 34 | inline bool Load(dmlc::Stream *strm) { 35 | return strm->Read(&data_); 36 | } 37 | inline bool operator==(const MyClass &other) const { 38 | return data_ == other.data_; 39 | } 40 | 41 | private: 42 | std::string data_; 43 | }; 44 | // need to declare the traits property of my class to dmlc 45 | namespace dmlc { DMLC_DECLARE_TRAITS(has_saveload, MyClass, true); } 46 | 47 | struct Param { 48 | int a; 49 | int b; 50 | Param() {} 51 | Param(int a, int b) : a(a), b(b) {} 52 | inline bool operator==(const Param &other) const { 53 | return a == other.a && b == other.b; 54 | } 55 | }; 56 | // need to declare the traits property of my class to dmlc 57 | namespace dmlc { DMLC_DECLARE_TRAITS(is_pod, Param, true); } 58 | 59 | // test serializer 60 | TEST(Serializer, basics) { 61 | int n = 10; 62 | std::vector a; 63 | for (int i = 0; i < n; ++i) { 64 | a.push_back(i); 65 | } 66 | TestSaveLoad(a); 67 | 68 | std::vector b; 69 | for (int i = 0; i < n; ++i) { 70 | std::string ss(i, 'a' + (i % 26)); 71 | b.push_back(ss); 72 | } 73 | TestSaveLoad(b); 74 | 75 | std::vector > temp {{1,2,3}, {1,2}, {1,2,3,4}}; 76 | TestSaveLoad(temp); 77 | TestSaveLoad( 78 | std::map {{1, "hellkow"}, {2, "world"}}); 79 | TestSaveLoad( 80 | std::unordered_map {{1, "hellkow"}, {2, "world"}}); 81 | TestSaveLoad( 82 | std::unordered_multimap {{1, "hellkow"}, {1, "world"}, {2, "111"}}); 83 | TestSaveLoad(std::set {"hjhjm", "asasa"}); 84 | TestSaveLoad(std::unordered_set {"hjhjm", "asasa"}); 85 | TestSaveLoad(std::list {"hjhjm", "asasa"}); 86 | TestSaveLoad(std::list(a.begin(), a.end())); 87 | TestSaveLoad(std::list {MyClass("abc"), MyClass("def")}); 88 | TestSaveLoad(std::list {Param(3, 4), Param(5, 6)}); 89 | 90 | } 91 | -------------------------------------------------------------------------------- /tracker/dmlc_mpi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | DMLC submission script, MPI version 4 | """ 5 | import argparse 6 | import sys 7 | import os 8 | import subprocess 9 | import tracker 10 | from threading import Thread 11 | 12 | parser = argparse.ArgumentParser(description='DMLC script to submit dmlc job using MPI') 13 | parser.add_argument('-n', '--nworker', required=True, type=int, 14 | help = 'number of worker proccess to be launched') 15 | parser.add_argument('-s', '--server-nodes', default = 0, type=int, 16 | help = 'number of server nodes to be launched') 17 | parser.add_argument('--log-level', default='INFO', type=str, 18 | choices=['INFO', 'DEBUG'], 19 | help = 'logging level') 20 | parser.add_argument('--log-file', type=str, 21 | help = 'output log to the specific log file') 22 | parser.add_argument('-H', '--hostfile', type=str, 23 | help = 'the hostfile of mpi server') 24 | parser.add_argument('command', nargs='+', 25 | help = 'command for dmlc program') 26 | args, unknown = parser.parse_known_args() 27 | # 28 | # submission script using MPI 29 | # 30 | def mpi_submit(nworker, nserver, pass_envs): 31 | """ 32 | customized submit script, that submit nslave jobs, each must contain args as parameter 33 | note this can be a lambda function containing additional parameters in input 34 | Parameters 35 | nworker number of slave process to start up 36 | nserver number of server nodes to start up 37 | pass_envs enviroment variables to be added to the starting programs 38 | """ 39 | env = os.environ.copy() 40 | 41 | for k, v in pass_envs.items(): 42 | env[k] = str(v) 43 | 44 | sargs = ' '.join(args.command) 45 | if args.hostfile is None: 46 | cmd = 'mpirun -n %d' % (nworker + nserver) 47 | else: 48 | cmd = 'mpirun -n %d --hostfile %s ' % (nworker + nserver, args.hostfile) 49 | 50 | for k, v in pass_envs.items(): 51 | # for mpich2 52 | cmd += ' -env %s %s' % (k, v) 53 | # for openmpi 54 | # cmd += ' -x %s' % k 55 | cmd += ' ' 56 | cmd += ' '.join(args.command) 57 | cmd += ' ' 58 | cmd += ' '.join(unknown) 59 | 60 | # print '%s' % cmd 61 | # known issue: results do not show in emacs eshell 62 | def run(): 63 | subprocess.check_call(cmd, shell = True, env = env) 64 | thread = Thread(target = run, args=()) 65 | thread.setDaemon(True) 66 | thread.start() 67 | 68 | tracker.config_logger(args) 69 | # call submit, with nslave, the commands to run each job and submit function 70 | tracker.submit(args.nworker, args.server_nodes, fun_submit = mpi_submit, 71 | pscmd=(' '.join(args.command) + ' ' + ' '.join(unknown))) 72 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/dmlc_tracker/local.py: -------------------------------------------------------------------------------- 1 | """Submission job for local jobs.""" 2 | # pylint: disable=invalid-name 3 | from __future__ import absolute_import 4 | 5 | import sys 6 | import os 7 | import subprocess 8 | import logging 9 | from threading import Thread 10 | from . import tracker 11 | 12 | keepalive = """ 13 | nrep=0 14 | rc=254 15 | while [ $rc -ne 0 ]; 16 | do 17 | export DMLC_NUM_ATTEMPT=$nrep 18 | %s 19 | rc=$?; 20 | nrep=$((nrep+1)); 21 | done 22 | """ 23 | 24 | def exec_cmd(cmd, role, taskid, pass_env): 25 | """Execute the command line command.""" 26 | if cmd[0].find('/') == -1 and os.path.exists(cmd[0]) and os.name != 'nt': 27 | cmd[0] = './' + cmd[0] 28 | cmd = ' '.join(cmd) 29 | env = os.environ.copy() 30 | for k, v in pass_env.items(): 31 | env[k] = str(v) 32 | 33 | env['DMLC_TASK_ID'] = str(taskid) 34 | env['DMLC_ROLE'] = role 35 | env['DMLC_JOB_CLUSTER'] = 'local' 36 | 37 | ntrial = 0 38 | while True: 39 | if os.name == 'nt': 40 | env['DMLC_NUM_ATTEMPT'] = str(ntrial) 41 | ret = subprocess.call(cmd, shell=True, env=env) 42 | if ret != 0: 43 | ntrial += 1 44 | continue 45 | else: 46 | bash = keepalive % (cmd) 47 | ret = subprocess.call(bash, shell=True, executable='bash', env=env) 48 | if ret == 0: 49 | logging.debug('Thread %d exit with 0', taskid) 50 | return 51 | else: 52 | if os.name == 'nt': 53 | sys.exit(-1) 54 | else: 55 | raise RuntimeError('Get nonzero return code=%d' % ret) 56 | 57 | 58 | def submit(args): 59 | """Submit function of local jobs.""" 60 | def mthread_submit(nworker, nserver, envs): 61 | """ 62 | customized submit script, that submit nslave jobs, each must contain args as parameter 63 | note this can be a lambda function containing additional parameters in input 64 | 65 | Parameters 66 | ---------- 67 | nworker: number of slave process to start up 68 | nserver: number of server nodes to start up 69 | envs: enviroment variables to be added to the starting programs 70 | """ 71 | procs = {} 72 | for i in range(nworker + nserver): 73 | if i < nworker: 74 | role = 'worker' 75 | else: 76 | role = 'server' 77 | procs[i] = Thread(target=exec_cmd, args=(args.command, role, i, envs)) 78 | procs[i].setDaemon(True) 79 | procs[i].start() 80 | 81 | # call submit, with nslave, the commands to run each job and submit function 82 | tracker.submit(args.num_workers, args.num_servers, fun_submit=mthread_submit, 83 | pscmd=(' '.join(args.command))) 84 | -------------------------------------------------------------------------------- /repo/dmlc-core/tracker/dmlc_tracker/dmlc_mpi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | DMLC submission script, MPI version 4 | """ 5 | import argparse 6 | import sys 7 | import os 8 | import subprocess 9 | import tracker 10 | from threading import Thread 11 | 12 | parser = argparse.ArgumentParser(description='DMLC script to submit dmlc job using MPI') 13 | parser.add_argument('-n', '--nworker', required=True, type=int, 14 | help = 'number of worker proccess to be launched') 15 | parser.add_argument('-s', '--server-nodes', default = 0, type=int, 16 | help = 'number of server nodes to be launched') 17 | parser.add_argument('--log-level', default='INFO', type=str, 18 | choices=['INFO', 'DEBUG'], 19 | help = 'logging level') 20 | parser.add_argument('--log-file', type=str, 21 | help = 'output log to the specific log file') 22 | parser.add_argument('-H', '--hostfile', type=str, 23 | help = 'the hostfile of mpi server') 24 | parser.add_argument('command', nargs='+', 25 | help = 'command for dmlc program') 26 | args, unknown = parser.parse_known_args() 27 | # 28 | # submission script using MPI 29 | # 30 | def mpi_submit(nworker, nserver, pass_envs): 31 | """ 32 | customized submit script, that submit nslave jobs, each must contain args as parameter 33 | note this can be a lambda function containing additional parameters in input 34 | Parameters 35 | nworker number of slave process to start up 36 | nserver number of server nodes to start up 37 | pass_envs enviroment variables to be added to the starting programs 38 | """ 39 | env = os.environ.copy() 40 | 41 | for k, v in pass_envs.items(): 42 | env[k] = str(v) 43 | 44 | sargs = ' '.join(args.command) 45 | if args.hostfile is None: 46 | cmd = 'mpirun -n %d' % (nworker + nserver) 47 | else: 48 | cmd = 'mpirun -n %d --hostfile %s ' % (nworker + nserver, args.hostfile) 49 | 50 | for k, v in pass_envs.items(): 51 | # for mpich2 52 | cmd += ' -env %s %s' % (k, v) 53 | # for openmpi 54 | # cmd += ' -x %s' % k 55 | cmd += ' ' 56 | cmd += ' '.join(args.command) 57 | cmd += ' ' 58 | cmd += ' '.join(unknown) 59 | 60 | # print '%s' % cmd 61 | # known issue: results do not show in emacs eshell 62 | def run(): 63 | subprocess.check_call(cmd, shell = True, env = env) 64 | thread = Thread(target = run, args=()) 65 | thread.setDaemon(True) 66 | thread.start() 67 | 68 | tracker.config_logger(args) 69 | # call submit, with nslave, the commands to run each job and submit function 70 | tracker.submit(args.nworker, args.server_nodes, fun_submit = mpi_submit, 71 | pscmd=(' '.join(args.command) + ' ' + ' '.join(unknown))) 72 | -------------------------------------------------------------------------------- /repo/ps-lite/src/filter/truncate_float.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "filter/filter.h" 3 | #include 4 | namespace ps { 5 | 6 | /// \brief Convert float/double into less-bit integer by projecting into 7 | /// [int_min, int_max]. 8 | class TruncateFloatFilter : public IFilter { 9 | public: 10 | void Encode(Message* msg) { 11 | Convert(msg, true); 12 | } 13 | 14 | void Decode(Message* msg) { 15 | Convert(msg, false); 16 | } 17 | private: 18 | // Decode / Encode a message 19 | void Convert(Message* msg, bool encode) { 20 | auto filter_conf = CHECK_NOTNULL(Find(Filter::TRUNCATE_FLOAT, msg)); 21 | int nbytes = filter_conf->num_bytes(); 22 | if (nbytes == 0) return; 23 | 24 | int n = msg->value.size(); 25 | CHECK_EQ(n, msg->task.value_type_size()); 26 | for (int i = 0; i < n; ++i) { 27 | if (msg->value[i].size() == 0) continue; 28 | auto type = msg->task.value_type(i); 29 | 30 | if (type == DataType::FLOAT) { 31 | msg->value[i] = Convert(msg->value[i], encode, nbytes); 32 | } else if (type == DataType::DOUBLE) { 33 | msg->value[i] = Convert(msg->value[i], encode, nbytes); 34 | } 35 | } 36 | } 37 | 38 | template 39 | SArray Convert(const SArray& array, bool encode, int nbytes) { 40 | if (nbytes == 1) { 41 | return Convert(array, encode); 42 | } else if (nbytes == 2) { 43 | return Convert(array, encode); 44 | } else if (nbytes == 4) { 45 | return Convert(array, encode); 46 | } else if (nbytes == -1) { 47 | return Convert(array, encode); 48 | } else if (nbytes == -2) { 49 | return Convert(array, encode); 50 | } else if (nbytes == -4) { 51 | return Convert(array, encode); 52 | } else { 53 | LOG(FATAL) << "unsupported num_bytes: " << nbytes; 54 | return SArray(); 55 | } 56 | } 57 | 58 | 59 | template 60 | SArray Convert(const SArray& array, bool encode) { 61 | if (encode) { 62 | SArray in(array); 63 | SArray out(in.size()); 64 | Real max_v = static_cast(std::numeric_limits::max()); 65 | Real min_v = static_cast(std::numeric_limits::min()); 66 | for (size_t i = 0; i < in.size(); ++i) { 67 | Real v = in[i]; 68 | Real proj = v > max_v ? max_v : v < min_v ? min_v : v; 69 | out[i] = static_cast(proj); 70 | } 71 | return SArray(out); 72 | } else { 73 | SArray in(array); 74 | SArray out(in.size()); 75 | for (size_t i = 0; i < in.size(); ++i) { 76 | out[i] = static_cast(in[i]); 77 | } 78 | return SArray(out); 79 | } 80 | } 81 | 82 | }; 83 | } // namespace ps 84 | -------------------------------------------------------------------------------- /repo/dmlc-core/src/io/hdfs_filesys.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file hdfs_filesys.h 4 | * \brief HDFS access module 5 | * \author Tianqi Chen 6 | */ 7 | #ifndef DMLC_IO_HDFS_FILESYS_H_ 8 | #define DMLC_IO_HDFS_FILESYS_H_ 9 | extern "C" { 10 | #include 11 | } 12 | #include 13 | #include 14 | #include "./filesys.h" 15 | 16 | namespace dmlc { 17 | namespace io { 18 | /*! \brief HDFS file system */ 19 | class HDFSFileSystem : public FileSystem { 20 | public: 21 | /*! \brief destructor */ 22 | virtual ~HDFSFileSystem(); 23 | /*! 24 | * \brief get information about a path 25 | * \param path the path to the file 26 | * \return the information about the file 27 | */ 28 | virtual FileInfo GetPathInfo(const URI &path); 29 | /*! 30 | * \brief list files in a directory 31 | * \param path to the file 32 | * \param out_list the output information about the files 33 | */ 34 | virtual void ListDirectory(const URI &path, std::vector *out_list); 35 | /*! 36 | * \brief open a stream, will report error and exit if bad thing happens 37 | * NOTE: the Stream can continue to work even when filesystem was destructed 38 | * \param path path to file 39 | * \param uri the uri of the input, can contain hdfs prefix 40 | * \param flag can be "w", "r", "a" 41 | * \param allow_null whether NULL can be returned, or directly report error 42 | * \return the created stream, can be NULL when allow_null == true and file do not exist 43 | */ 44 | virtual SeekStream *Open(const URI &path, 45 | const char* const flag, 46 | bool allow_null); 47 | /*! 48 | * \brief open a seekable stream for read 49 | * \param path the path to the file 50 | * \param allow_null whether NULL can be returned, or directly report error 51 | * \return the created stream, can be NULL when allow_null == true and file do not exist 52 | */ 53 | virtual SeekStream *OpenForRead(const URI &path, bool allow_null); 54 | /*! 55 | * \brief get a singleton of HDFSFileSystem when needed 56 | * \return a singleton instance 57 | */ 58 | inline static HDFSFileSystem *GetInstance(const std::string &namenode = "default") { 59 | static HDFSFileSystem instance(namenode); 60 | // switch to another hdfs 61 | if (namenode != "default" && instance.namenode_ != namenode) { 62 | instance.ResetNamenode(namenode); 63 | } 64 | return &instance; 65 | } 66 | 67 | private: 68 | /*! \brief constructor */ 69 | explicit HDFSFileSystem(const std::string &namenode); 70 | /*! \brief switch to another hdfs cluster */ 71 | void ResetNamenode(const std::string &namenode); 72 | /*! \brief namenode address */ 73 | std::string namenode_; 74 | /*! \brief hdfs handle */ 75 | hdfsFS fs_; 76 | /*! \brief reference counter of fs */ 77 | int *ref_counter_; 78 | }; 79 | } // namespace io 80 | } // namespace dmlc 81 | #endif // DMLC_IO_HDFS_FILESYS_H_ 82 | --------------------------------------------------------------------------------