├── .gitmodules ├── LICENSE ├── README.md ├── SConscript ├── SConstruct ├── apps └── helloworld │ ├── Makefile │ ├── README.md │ └── helloworld.cpp ├── include ├── geeps-user-defined-types.hpp └── geeps.hpp ├── scripts ├── install-caffe-deps-ubuntu14.sh └── install-geeps-deps-ubuntu14.sh └── src ├── client ├── clientlib-bg-access.cpp ├── clientlib-cbk.cpp ├── clientlib-data.cpp ├── clientlib-viter.cpp ├── clientlib.cpp ├── clientlib.hpp ├── encoder-decoder.cpp ├── encoder-decoder.hpp ├── geeps.cpp └── stats-tracker.hpp ├── common ├── background-worker.cpp ├── background-worker.hpp ├── common-util.hpp ├── gpu-util │ ├── device_alternate.hpp │ ├── math_functions.cpp │ ├── math_functions.hpp │ ├── math_functions_cuda.cu │ └── mkl_alternate.hpp ├── internal-config.hpp ├── portable-bytes.hpp ├── router-handler.cpp ├── router-handler.hpp ├── row-op-util.cu ├── row-op-util.hpp ├── wire-protocol.hpp ├── work-puller.cpp ├── work-puller.hpp ├── work-pusher.cpp ├── work-pusher.hpp ├── zmq-portable-bytes.hpp └── zmq-util.hpp └── server ├── metadata-server.cpp ├── metadata-server.hpp ├── server-encoder-decoder.cpp ├── server-encoder-decoder.hpp ├── server-entry.cpp ├── server-entry.hpp ├── tablet-server.cpp └── tablet-server.hpp /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "apps/caffe"] 2 | path = apps/caffe 3 | url = git@github.com:cuihenggang/caffe.git 4 | branch = cui-geeps 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) <2016>, Carnegie Mellon University. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | 3. Neither the name of the University nor the names of its contributors 13 | may be used to endorse or promote products derived from this software 14 | without specific prior written permission. 15 | * 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 23 | OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 24 | AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 26 | WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GeePS 2 | 3 | [![License](https://img.shields.io/badge/license-BSD-blue.svg)](LICENSE) 4 | 5 | [GeePS](https://cuihenggang.github.io/archive/paper/[eurosys16]geeps.pdf) is a parameter server library that scales single-machine GPU machine learning applications (such as Caffe) to a cluster of machines. 6 | 7 | 8 | ## Download and build GeePS and Caffe application 9 | 10 | Run the following command to download GeePS and (our slightly modified) Caffe: 11 | 12 | ``` 13 | git clone --recurse-submodules https://github.com/cuihenggang/geeps.git 14 | ``` 15 | 16 | If you use the Ubuntu 14.04 system, you can run the following commands (from geeps root directory) to install the dependencies: 17 | 18 | ``` 19 | ./scripts/install-geeps-deps-ubuntu14.sh 20 | ./scripts/install-caffe-deps-ubuntu14.sh 21 | ``` 22 | 23 | Also, please make sure your CUDA library is installed in `/usr/local/cuda`. 24 | 25 | After installing the dependencies, you can build GeePS by simply running this command from geeps root directory: 26 | 27 | ``` 28 | scons -j8 29 | ``` 30 | 31 | You can then build (our slightly modified) Caffe by first entering the `apps/caffe` directory and then running `make -j8`: 32 | 33 | ``` 34 | cd apps/caffe 35 | make -j8 36 | ``` 37 | 38 | 39 | ## Caffe's CIFAR-10 example on two machines 40 | 41 | You can run Caffe distributedly across a cluster of machines with GeePS. In this section, we will show you the steps to run Caffe's CIFAR-10 example on two machines. 42 | 43 | All commands in this section are executed from the `apps/caffe` directory: 44 | 45 | ``` 46 | cd apps/caffe 47 | ``` 48 | 49 | You will first need to prepare a machine file as `examples/cifar10/2parts/machinefile`, with each line being the host name of one machine. Since we use two machines in this example, this machine file should have two lines, such as: 50 | 51 | ``` 52 | host0 53 | host1 54 | ``` 55 | 56 | We will use `pdsh` to launch commands on those machines with the `ssh` protocol, so please make sure that you can `ssh` to those machines without password. 57 | 58 | When you have your machine file in ready, you can run the following command to download and prepare the CIFAR-10 dataset: 59 | 60 | ``` 61 | ./data/cifar10/get_cifar10.sh 62 | ./examples/cifar10/2parts/create_cifar10_pdsh.sh 63 | ``` 64 | 65 | Our script will partition the datasets into two parts, one for each machine. You can then train an Inception network on it with this command: 66 | 67 | ``` 68 | ./examples/cifar10/2parts/train_inception.sh 69 | ``` 70 | 71 | Please look at our [wiki](https://github.com/cuihenggang/geeps/wiki) for more details. Happy training! 72 | 73 | 74 | ## Automatic training hyperparameter tuning 75 | 76 | [MLtuner-GeePS](https://github.com/cuihenggang/mltuner-geeps) is an extended version of GeePS with automatic training hyperparameter tuning support. It includes a lightweight [MLtuner](https://cuihenggang.github.io/archive/paper/[arxiv]mltuner.pdf) module that automatically tunes the training hyperparameters for distributed ML training (including learning rate, momentum, batch size, data staleness, etc). 77 | 78 | 79 | ## Reference Paper 80 | 81 | Henggang Cui, Hao Zhang, Gregory R. Ganger, Phillip B. Gibbons, and Eric P. Xing. 82 | [GeePS: Scalable Deep Learning on Distributed GPUs with a GPU-Specialized Parameter Server](https://cuihenggang.github.io/archive/paper/[eurosys16]geeps.pdf). 83 | In ACM European Conference on Computer Systems, 2016 (EuroSys'16). 84 | 85 | Henggang Cui, Gregory R. Ganger, and Phillip B. Gibbons. 86 | [MLtuner: System Support for Automatic Machine Learning Tuning](https://cuihenggang.github.io/archive/paper/[arxiv]mltuner.pdf). 87 | arXiv preprint 1803.07445. 88 | 89 | -------------------------------------------------------------------------------- /SConscript: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 by Carnegie Mellon University. 2 | 3 | import os 4 | import re 5 | import fnmatch 6 | import filecmp 7 | 8 | #### Decisions are made here. The rest of the file are functions defintions. 9 | def main(): 10 | build_all('build') 11 | 12 | def build_all(variant): 13 | env = DefaultEnvironment().Clone() 14 | env['CXX'] = 'g++' 15 | # Scons has no idea that nvcc compiles the code using '-fPIC' flag, so we need this option to get rid of the link error 16 | env['STATIC_AND_SHARED_OBJECTS_ARE_THE_SAME'] = 1 17 | 18 | # set nvcc builder 19 | nvcc = Builder(action = "/usr/local/cuda/bin/nvcc --compiler-options '-fPIC' --shared -Iinclude -Isrc -c $SOURCE -o $TARGET -g -O3", 20 | suffix = '.o', 21 | src_suffix = '.cu') 22 | 23 | env.Append(CPPPATH = ['src']) 24 | env.Append(CPPPATH = ['include']) 25 | env.Append(LIBS = [ 26 | 'zmq', 'boost_system', 'boost_thread', 'tbb', 27 | 'boost_serialization', 'glog', 'gflags']) 28 | # The -fPIC flag is necessary to build a shared library 29 | #env.Append(CCFLAGS = '-Wall -Werror -g -fPIC') 30 | env.Append(CCFLAGS = '-Wall -Wno-sign-compare -g -fPIC') 31 | if (env['build_debug'] == '1'): 32 | env.Append(CCFLAGS = '-ggdb') 33 | else: 34 | env.Append(CCFLAGS = '-O3') 35 | 36 | env.Append(CPPPATH = ['/usr/local/cuda/include']) 37 | env.Append(LIBPATH = ['/usr/local/cuda/lib64']) 38 | env.Append(LIBS = ['cblas', 'cudart', 'cublas']) 39 | env.Append(BUILDERS = {'NvccBuilder' : nvcc}) 40 | 41 | # build GeePS library 42 | src_files = ['src/client/geeps.cpp'] 43 | src_files.append('src/common/background-worker.cpp') 44 | src_files.append('src/common/work-puller.cpp') 45 | src_files.append('src/common/work-pusher.cpp') 46 | src_files.append('src/common/router-handler.cpp') 47 | src_files.append('src/common/gpu-util/math_functions.cpp') 48 | src_files.append(env.NvccBuilder('src/common/gpu-util/math_functions_cuda.cu')) 49 | src_files.append(env.NvccBuilder('src/common/row-op-util.cu')) 50 | src_files.append('src/client/clientlib.cpp') 51 | src_files.append('src/client/clientlib-data.cpp') 52 | src_files.append('src/client/clientlib-cbk.cpp') 53 | src_files.append('src/client/clientlib-bg-access.cpp') 54 | src_files.append('src/client/clientlib-viter.cpp') 55 | src_files.append('src/client/encoder-decoder.cpp') 56 | src_files.append('src/server/tablet-server.cpp') 57 | src_files.append('src/server/metadata-server.cpp') 58 | src_files.append('src/server/server-encoder-decoder.cpp') 59 | src_files.append('src/server/server-entry.cpp') 60 | clientlib = env.Library('geeps', src_files) 61 | clientsharedlib = env.SharedLibrary('geeps', src_files) 62 | env.Install('lib', clientlib) 63 | 64 | # end of build_all() 65 | 66 | main() 67 | -------------------------------------------------------------------------------- /SConstruct: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 by Carnegie Mellon University. 2 | 3 | import os 4 | 5 | env = DefaultEnvironment() 6 | env['build_debug'] = ARGUMENTS.get('debug', '0') 7 | 8 | build_variants = COMMAND_LINE_TARGETS 9 | if len(build_variants) == 0: 10 | build_variants = ['build'] 11 | 12 | # Make a phony target to time stamp the last build success 13 | def PhonyTarget(target, source, action): 14 | env.Append(BUILDERS = { 'phony' : Builder(action = action) }) 15 | AlwaysBuild(env.phony(target = target, source = source)) 16 | 17 | def write_build_info(target, source, env): 18 | build_options = 'scons build=%s' % env['build_debug'] 19 | os.system('echo %s > build/last-build-info' % build_options) 20 | os.system('date >> build/last-build-info') 21 | 22 | if 'build' in build_variants: 23 | builds = SConscript('SConscript', variant_dir='build', duplicate=1) 24 | PhonyTarget('build/write-build-info', builds, write_build_info) 25 | -------------------------------------------------------------------------------- /apps/helloworld/Makefile: -------------------------------------------------------------------------------- 1 | CC = g++ 2 | CFLAGS = -g -O3 3 | INCLUDE_PATH = -I../../include 4 | LIBPATH = -L../../build 5 | LIBS = -lrt -Wl,-rpath,\$$ORIGIN/../../build -lgeeps 6 | 7 | all: Makefile helloworld.cpp 8 | $(CC) $(CFLAGS) helloworld.cpp $(INCLUDE_PATH) $(LIBPATH) $(LIBS) -o helloworld 9 | -------------------------------------------------------------------------------- /apps/helloworld/README.md: -------------------------------------------------------------------------------- 1 | # Compile and run 2 | 3 | ``` 4 | make 5 | ./helloworld 6 | ``` 7 | 8 | # Expected output 9 | 10 | ``` 11 | Finished "training", hello world! 12 | [some error messages indicating that CUDA functions are called while the driver is shut down] 13 | 14 | ``` 15 | 16 | # Known issues 17 | 18 | After the program finishes, there will be some error messages indicating that 19 | CUDA functions are called while the driver is shut down. 20 | This is because we are yet to implement the shutting-down procedures 21 | of the background working threads. 22 | -------------------------------------------------------------------------------- /apps/helloworld/helloworld.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "geeps.hpp" 5 | 6 | using std::vector; 7 | using std::cout; 8 | using std::endl; 9 | 10 | int main() { 11 | /* Create a GeePs object with a GeePsConfig */ 12 | GeePsConfig geeps_config; 13 | geeps_config.host_list.push_back("localhost"); 14 | int machine_id = 0; 15 | geeps_config.gpu_memory_capacity = (size_t)1 << 32; 16 | /* Set GPU memory capacity to 4 GB */ 17 | GeePs *geeps = new GeePs(machine_id, geeps_config); 18 | 19 | /* Prepare the row keys */ 20 | size_t table_id = 0; 21 | int slack = 0; /* BSP mode */ 22 | vector row_ids; 23 | row_ids.push_back(0); 24 | /* Only one row, with ID 0 */ 25 | 26 | /* Perform virtual iteration */ 27 | int read_handle = geeps->VirtualRead(table_id, row_ids, slack); 28 | int preupdate_handle = geeps->VirtualPreUpdate(table_id, row_ids); 29 | int postread_handle = geeps->VirtualPostRead(read_handle); 30 | int update_handle = geeps->VirtualUpdate(preupdate_handle); 31 | geeps->VirtualClock(); 32 | geeps->FinishVirtualIteration(); 33 | 34 | /* After the FinishVirtualIteration() function is called, 35 | * you can make your reported access with the handles. 36 | * Before you calling the StartIterations() function, 37 | * you can do the reported accesses in whatever orders you want. */ 38 | /* For example, here, we can set some initial values to our parameter data */ 39 | /* First request an update buffer (in GPU memory) */ 40 | RowData *update_buffer; 41 | geeps->PreUpdate(preupdate_handle, &update_buffer); 42 | /* Suppose you have filled the update into the update buffer */ 43 | /* Call the Update() function to release the buffer and finish this update */ 44 | geeps->Update(update_handle); 45 | /* Signal the completion of a clock */ 46 | geeps->Clock(); 47 | 48 | geeps->StartIterations(); 49 | /* After the StartIterations() function is called, 50 | * you can only issue the GeePS calls with the order you reported 51 | * at the virtual iteration. */ 52 | /* Start the iterations */ 53 | for (int clock = 0; clock < 10; clock++) { 54 | /* Read the data */ 55 | RowData *read_buffer; 56 | geeps->Read(read_handle, &read_buffer); 57 | /* Current data is read in read buffer */ 58 | /* Update the data */ 59 | RowData *update_buffer; 60 | geeps->PreUpdate(preupdate_handle, &update_buffer); 61 | /* Suppose you have filled the update into the update buffer */ 62 | /* Call the PostRead() function to release the buffer */ 63 | geeps->PostRead(postread_handle); 64 | geeps->Update(update_handle); 65 | /* Signal the completion of a clock */ 66 | geeps->Clock(); 67 | } 68 | 69 | cout << "Finished \"training\", hello world!\n\n"; 70 | delete geeps; 71 | } 72 | -------------------------------------------------------------------------------- /include/geeps-user-defined-types.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __geeps_user_defined_types_hpp__ 2 | #define __geeps_user_defined_types_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | typedef unsigned int uint; 42 | 43 | typedef uint8_t command_t; 44 | typedef size_t row_idx_t; 45 | typedef float val_t; 46 | typedef size_t table_id_t; 47 | typedef int iter_t; 48 | 49 | typedef std::pair TableRow; 50 | typedef struct { 51 | table_id_t table; 52 | row_idx_t row; 53 | } table_row_t; 54 | 55 | #define ROW_DATA_SIZE 128 56 | struct ArrayData { 57 | val_t data[ROW_DATA_SIZE]; 58 | void init() { 59 | for (size_t i = 0; i < ROW_DATA_SIZE; i++) { 60 | data[i] = 0; 61 | } 62 | } 63 | ArrayData() { 64 | init(); 65 | } 66 | template 67 | void serialize(Archive & ar, const unsigned int version) { 68 | ar & data; 69 | } 70 | }; 71 | 72 | typedef ArrayData RowData; 73 | typedef ArrayData RowOpVal; 74 | 75 | #endif // defined __geeps_user_defined_types_hpp 76 | -------------------------------------------------------------------------------- /include/geeps.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __geeps_hpp__ 2 | #define __geeps_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | 37 | #include "geeps-user-defined-types.hpp" 38 | 39 | using std::string; 40 | using std::vector; 41 | 42 | struct GeePsConfig { 43 | uint num_tables; 44 | std::vector host_list; 45 | std::vector port_list; 46 | uint tcp_base_port; 47 | uint num_comm_channels; 48 | std::string output_dir; 49 | iter_t log_interval; 50 | int pp_policy; 51 | int local_opt; 52 | size_t gpu_memory_capacity; 53 | int mm_warning_level; 54 | /* 0: no warning 55 | * 1: guarantee double buffering for thread cache 56 | * 2: make sure all local data in GPU memory 57 | * 3: make sure all parameter cache in GPU memory */ 58 | int pinned_cpu_memory; 59 | int read_my_writes; 60 | 61 | GeePsConfig() : 62 | num_tables(1), 63 | tcp_base_port(9090), 64 | num_comm_channels(1), 65 | output_dir(""), log_interval(0), 66 | pp_policy(0), local_opt(1), 67 | gpu_memory_capacity(std::numeric_limits::max()), 68 | mm_warning_level(1), 69 | pinned_cpu_memory(1), 70 | read_my_writes(0) {} 71 | }; 72 | 73 | class GeePs { 74 | public: 75 | GeePs(uint process_id, const GeePsConfig& config); 76 | void Shutdown(); 77 | std::string GetStats(); 78 | void StartIterations(); 79 | 80 | /* Interfaces for virtual iteration */ 81 | int VirtualRead(size_t table_id, const vector& row_ids, int slack); 82 | int VirtualPostRead(int prestep_handle); 83 | int VirtualPreUpdate(size_t table_id, const vector& row_ids); 84 | int VirtualUpdate(int prestep_handle); 85 | int VirtualLocalAccess(const vector& row_ids, bool fetch); 86 | int VirtualPostLocalAccess(int prestep_handle, bool keep); 87 | int VirtualClock(); 88 | void FinishVirtualIteration(); 89 | 90 | /* Interfaces for real access */ 91 | bool Read(int handle, RowData **buffer_ptr); 92 | void PostRead(int handle); 93 | void PreUpdate(int handle, RowOpVal **buffer_ptr); 94 | void Update(int handle); 95 | bool LocalAccess(int handle, RowData **buffer_ptr); 96 | void PostLocalAccess(int handle); 97 | void Clock(); 98 | }; 99 | 100 | #endif // defined __geeps_hpp__ 101 | -------------------------------------------------------------------------------- /scripts/install-caffe-deps-ubuntu14.sh: -------------------------------------------------------------------------------- 1 | sudo apt-get update 2 | sudo apt-get install -y --force-yes libatlas-base-dev 3 | sudo apt-get install -y --force-yes libopencv-dev 4 | sudo apt-get install -y --force-yes protobuf-compiler 5 | sudo apt-get install -y --force-yes libprotobuf-dev 6 | sudo apt-get install -y --force-yes libgoogle-glog-dev 7 | sudo apt-get install -y --force-yes libgflags-dev 8 | sudo apt-get install -y --force-yes libhdf5-dev 9 | sudo apt-get install -y --force-yes libleveldb-dev 10 | sudo apt-get install -y --force-yes libsnappy-dev 11 | sudo apt-get install -y --force-yes liblmdb-dev 12 | sudo apt-get install -y --force-yes libpython-dev 13 | sudo apt-get install -y --force-yes libboost-python-dev 14 | sudo apt-get install -y --force-yes libboost-filesystem-dev 15 | sudo apt-get install -y --force-yes cython 16 | sudo apt-get install -y --force-yes python-numpy 17 | sudo apt-get install -y --force-yes python-protobuf 18 | sudo apt-get install -y --force-yes python-skimage 19 | sudo apt-get install -y --force-yes python-h5py 20 | sudo apt-get install -y --force-yes python-matplotlib 21 | sudo apt-get install -y --force-yes openjdk-7-jre 22 | -------------------------------------------------------------------------------- /scripts/install-geeps-deps-ubuntu14.sh: -------------------------------------------------------------------------------- 1 | sudo apt-get update 2 | sudo apt-get install -y --force-yes libboost-thread-dev 3 | sudo apt-get install -y --force-yes libboost-program-options-dev 4 | sudo apt-get install -y --force-yes libgoogle-glog-dev 5 | sudo apt-get install -y --force-yes libgflags-dev 6 | sudo apt-get install -y --force-yes libatlas-base-dev 7 | sudo apt-get install -y --force-yes libtbb-dev 8 | sudo apt-get install -y --force-yes libzmq-dev 9 | sudo apt-get install -y --force-yes scons 10 | sudo apt-get install -y --force-yes pdsh 11 | -------------------------------------------------------------------------------- /src/client/clientlib-cbk.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Carnegie Mellon University. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the University nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 24 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 27 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | #include 36 | 37 | #include 38 | 39 | #include 40 | #include 41 | #include 42 | #include 43 | 44 | #include "common/work-pusher.hpp" 45 | #include "common/background-worker.hpp" 46 | #include "common/common-util.hpp" 47 | #include "common/row-op-util.hpp" 48 | #include "encoder-decoder.hpp" 49 | #include "clientlib.hpp" 50 | 51 | void ClientLib::find_row_cbk( 52 | table_id_t table, row_idx_t row, uint32_t server_id) { 53 | CHECK(0); 54 | } 55 | 56 | void ClientLib::recv_row_batch_cbk( 57 | uint channel_id, iter_t data_age, iter_t self_clock, uint server_id, 58 | uint table_id, RowKey *row_keys, RowData *row_data, uint batch_size) { 59 | CommunicationChannel& comm_channel = comm_channels[channel_id]; 60 | BgthreadStats& bgthread_stats = comm_channel.bgthread_stats; 61 | bool timing = true; 62 | tbb::tick_count recv_row_start; 63 | tbb::tick_count apply_op_start; 64 | tbb::tick_count recv_row_end; 65 | if (timing) { 66 | recv_row_start = tbb::tick_count::now(); 67 | } 68 | 69 | recv_row_batch( 70 | channel_id, server_id, table_id, 71 | row_keys, row_data, batch_size, 72 | data_age, self_clock, true); 73 | 74 | if (timing) { 75 | recv_row_end = tbb::tick_count::now(); 76 | bgthread_stats.tot_recv_row_time += 77 | (recv_row_end - recv_row_start).seconds(); 78 | } 79 | } 80 | 81 | void ClientLib::server_clock_cbk( 82 | uint channel_id, uint server_id, iter_t clock, uint table_id) { 83 | CHECK_GE(clock, 0); 84 | CommunicationChannel& comm_channel = comm_channels[channel_id]; 85 | CHECK_LT(table_id, comm_channel.cached_tables.size()); 86 | CachedTable& cached_table = comm_channel.cached_tables[table_id]; 87 | CHECK_LT(server_id, cached_table.server_clock.size()); 88 | CHECK_LE(cached_table.server_clock[server_id], clock); 89 | cached_table.server_clock[server_id] = clock; 90 | 91 | iter_t min_clock = clock_min(cached_table.server_clock); 92 | if (min_clock > cached_table.server_clock_min) { 93 | /* Remove oplog entries. 94 | * We don't grab any locks because we believe there won't be any threads 95 | * accessing it. */ 96 | iter_t start_clock = cached_table.server_clock_min + 1; 97 | iter_t end_clock = min_clock; 98 | reclaim_oplog( 99 | channel_id, start_clock, end_clock, table_id, true /* gpu */); 100 | reclaim_oplog( 101 | channel_id, start_clock, end_clock, table_id, false /* cpu */); 102 | cached_table.server_clock_min = min_clock; 103 | } 104 | } 105 | 106 | void ClientLib::get_stats_cbk(const string& server_stats) { 107 | unique_lock lock(global_mutex); 108 | proc_stats.server_stats = server_stats; 109 | proc_stats.server_stats_refreshed = true; 110 | global_cvar.notify_all(); 111 | } 112 | -------------------------------------------------------------------------------- /src/client/encoder-decoder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Carnegie Mellon University. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the University nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 24 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 27 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | // Encode and decode messages to/from the server 32 | 33 | #include 34 | #include 35 | 36 | #include "common/work-puller.hpp" 37 | #include "common/background-worker.hpp" 38 | #include "common/row-op-util.hpp" 39 | #include "encoder-decoder.hpp" 40 | 41 | using std::string; 42 | using std::vector; 43 | using std::cerr; 44 | using std::endl; 45 | using boost::shared_ptr; 46 | using boost::make_shared; 47 | 48 | void ClientServerEncode::find_row( 49 | table_id_t table, row_idx_t row, uint metadata_sever_id) { 50 | vector msgs; 51 | msgs.resize(1); 52 | 53 | msgs[0].init_size(sizeof(cs_find_row_msg_t)); 54 | cs_find_row_msg_t *cs_find_row_msg = 55 | reinterpret_cast(msgs[0].data()); 56 | cs_find_row_msg->cmd = FIND_ROW; 57 | cs_find_row_msg->client_id = client_id; 58 | cs_find_row_msg->table = table; 59 | cs_find_row_msg->row = row; 60 | 61 | /* Currently, the tablet servers are also metadata servers */ 62 | string metadata_sever_name = server_names[metadata_sever_id]; 63 | 64 | router_handler->send_to(metadata_sever_name, msgs); 65 | } 66 | 67 | void ClientServerEncode::read_row_batch( 68 | uint server_id, RowKeys& row_keys, iter_t data_age, 69 | bool prioritized) { 70 | vector msgs; 71 | msgs.resize(2); 72 | 73 | msgs[0].init_size(sizeof(cs_read_row_batch_msg_t)); 74 | cs_read_row_batch_msg_t *cs_read_row_batch_msg = 75 | reinterpret_cast(msgs[0].data()); 76 | cs_read_row_batch_msg->cmd = READ_ROW_BATCH; 77 | cs_read_row_batch_msg->client_id = client_id; 78 | cs_read_row_batch_msg->data_age = data_age; 79 | cs_read_row_batch_msg->prioritized = prioritized; 80 | 81 | msgs[1].pack_vector(row_keys); 82 | 83 | CHECK_LT(server_id, server_names.size()); 84 | string tablet_name = server_names[server_id]; 85 | router_handler->send_to(tablet_name, msgs); 86 | } 87 | 88 | void ClientServerEncode::clock_broadcast( 89 | iter_t clock, uint table_id) { 90 | vector msgs; 91 | msgs.resize(1); 92 | 93 | msgs[0].init_size(sizeof(cs_clock_msg_t)); 94 | cs_clock_msg_t *cs_clock_msg = 95 | reinterpret_cast(msgs[0].data()); 96 | cs_clock_msg->cmd = CLOCK; 97 | cs_clock_msg->client_id = client_id; 98 | cs_clock_msg->clock = clock; 99 | cs_clock_msg->table_id = table_id; 100 | 101 | /* Broadcast to all tablet servers */ 102 | router_handler->send_to(server_names, msgs); 103 | } 104 | 105 | void ClientServerEncode::clock_with_updates_batch( 106 | uint server_id, iter_t clock, uint table_id, 107 | const RowOpVal *updates, const RowKey *row_keys, uint batch_size) { 108 | vector msgs; 109 | msgs.resize(3); 110 | 111 | msgs[0].init_size(sizeof(cs_clock_with_updates_batch_msg_t)); 112 | cs_clock_with_updates_batch_msg_t *cs_clock_with_updates_batch_msg = 113 | reinterpret_cast(msgs[0].data()); 114 | cs_clock_with_updates_batch_msg->cmd = CLOCK_WITH_UPDATES_BATCH; 115 | cs_clock_with_updates_batch_msg->client_id = client_id; 116 | cs_clock_with_updates_batch_msg->clock = clock; 117 | cs_clock_with_updates_batch_msg->table_id = table_id; 118 | 119 | msgs[1].pack_memory(row_keys, batch_size * sizeof(RowKey)); 120 | msgs[2].pack_memory(updates, batch_size * sizeof(RowOpVal)); 121 | 122 | CHECK_LT(server_id, server_names.size()); 123 | string tablet_name = server_names[server_id]; 124 | router_handler->send_to(tablet_name, msgs); 125 | } 126 | 127 | void ClientServerEncode::clock_with_updates_batch( 128 | uint server_id, iter_t clock, uint table_id, 129 | const RowOpVal *updates0, const RowKey *row_keys0, uint batch_size0, 130 | const RowOpVal *updates1, const RowKey *row_keys1, uint batch_size1) { 131 | vector msgs; 132 | msgs.resize(3); 133 | 134 | msgs[0].init_size(sizeof(cs_clock_with_updates_batch_msg_t)); 135 | cs_clock_with_updates_batch_msg_t *cs_clock_with_updates_batch_msg = 136 | reinterpret_cast(msgs[0].data()); 137 | cs_clock_with_updates_batch_msg->cmd = CLOCK_WITH_UPDATES_BATCH; 138 | cs_clock_with_updates_batch_msg->client_id = client_id; 139 | cs_clock_with_updates_batch_msg->clock = clock; 140 | cs_clock_with_updates_batch_msg->table_id = table_id; 141 | 142 | msgs[1].pack_memory(row_keys0, batch_size0 * sizeof(RowKey), 143 | row_keys1, batch_size1 * sizeof(RowKey)); 144 | msgs[2].pack_memory(updates0, batch_size0 * sizeof(RowOpVal), 145 | updates1, batch_size1 * sizeof(RowOpVal)); 146 | 147 | CHECK_LT(server_id, server_names.size()); 148 | string tablet_name = server_names[server_id]; 149 | router_handler->send_to(tablet_name, msgs); 150 | } 151 | 152 | void ClientServerEncode::add_access_info( 153 | uint metadata_server_id, const std::vector& access_info) { 154 | /* Currently, the tablet servers are also metadata servers */ 155 | CHECK_LT(metadata_server_id, server_names.size()); 156 | string metadata_sever_name = server_names[metadata_server_id]; 157 | 158 | vector msgs; 159 | msgs.resize(2); 160 | 161 | msgs[0].init_size(sizeof(cs_add_access_info_msg_t)); 162 | cs_add_access_info_msg_t *cs_add_access_info_msg = 163 | reinterpret_cast(msgs[0].data()); 164 | cs_add_access_info_msg->cmd = ADD_ACCESS_INFO; 165 | cs_add_access_info_msg->client_id = client_id; 166 | 167 | msgs[1].pack_vector(access_info); 168 | 169 | router_handler->send_to(metadata_sever_name, msgs); 170 | } 171 | 172 | void ClientServerEncode::get_stats(uint server_id) { 173 | CHECK_LT(server_id, server_names.size()); 174 | string sever_name = server_names[server_id]; 175 | 176 | vector msgs; 177 | msgs.resize(1); 178 | 179 | msgs[0].init_size(sizeof(cs_get_stats_msg_t)); 180 | cs_get_stats_msg_t *cs_get_stats_msg = 181 | reinterpret_cast(msgs[0].data()); 182 | cs_get_stats_msg->cmd = GET_STATS; 183 | cs_get_stats_msg->client_id = client_id; 184 | 185 | router_handler->send_to(sever_name, msgs); 186 | } 187 | 188 | 189 | ServerClientDecode::ServerClientDecode( 190 | uint channel_id, shared_ptr ctx, 191 | ClientLib *client_lib, bool work_in_bg, 192 | const GeePsConfig& config) : 193 | channel_id(channel_id), zmq_ctx(ctx), 194 | client_lib(client_lib), work_in_background(work_in_bg), 195 | config(config) { 196 | if (work_in_background) { 197 | /* Start background worker thread */ 198 | string endpoint = "inproc://bg-recv-worker"; 199 | shared_ptr work_puller = 200 | make_shared(zmq_ctx, endpoint); 201 | BackgroundWorker::WorkerCallback worker_callback = 202 | bind(&ServerClientDecode::decode_msg, this, _1); 203 | BackgroundWorker bg_worker(work_puller); 204 | bg_worker.add_callback(DECODE_CMD, worker_callback); 205 | bg_decode_worker_thread = make_shared(bg_worker); 206 | 207 | /* Init work pusher */ 208 | decode_work_pusher = make_shared(zmq_ctx, endpoint); 209 | } 210 | } 211 | 212 | void ServerClientDecode::find_row(vector& args) { 213 | CHECK_EQ(args.size(), 1); 214 | CHECK_EQ(args[0].size(), sizeof(sc_find_row_msg_t)); 215 | 216 | sc_find_row_msg_t *sc_find_row_msg = 217 | reinterpret_cast(args[0].data()); 218 | table_id_t table = sc_find_row_msg->table; 219 | row_idx_t row = sc_find_row_msg->row; 220 | uint server_id = sc_find_row_msg->server_id; 221 | client_lib->find_row_cbk(table, row, server_id); 222 | 223 | for (uint i = 0; i < args.size(); i++) { 224 | args[i].close(); 225 | } 226 | } 227 | 228 | void ServerClientDecode::read_row_batch(vector& args) { 229 | CHECK_GE(args.size(), 3); 230 | CHECK_EQ(args[0].size(), sizeof(sc_read_row_batch_msg_t)); 231 | sc_read_row_batch_msg_t *sc_read_row_batch_msg = 232 | reinterpret_cast(args[0].data()); 233 | uint server_id = sc_read_row_batch_msg->server_id; 234 | iter_t data_age = sc_read_row_batch_msg->data_age; 235 | iter_t self_clock = sc_read_row_batch_msg->self_clock; 236 | uint table_id = sc_read_row_batch_msg->table_id; 237 | 238 | RowKey *row_keys = 239 | reinterpret_cast(args[1].data()); 240 | uint batch_size = args[1].size() / sizeof(RowKey); 241 | RowData *row_data = 242 | reinterpret_cast(args[2].data()); 243 | CHECK_EQ(batch_size, args[2].size() / sizeof(RowData)); 244 | client_lib->server_clock_cbk(channel_id, server_id, data_age, table_id); 245 | client_lib->recv_row_batch_cbk( 246 | channel_id, data_age, self_clock, server_id, table_id, 247 | row_keys, row_data, batch_size); 248 | for (uint i = 0; i < args.size(); i++) { 249 | args[i].close(); 250 | } 251 | } 252 | 253 | void ServerClientDecode::clock(vector& args) { 254 | CHECK_EQ(args.size(), 1); 255 | CHECK_EQ(args[0].size(), sizeof(sc_clock_msg_t)); 256 | 257 | sc_clock_msg_t *sc_clock_msg = 258 | reinterpret_cast(args[0].data()); 259 | uint server_id = sc_clock_msg->server_id; 260 | iter_t clock = sc_clock_msg->clock; 261 | uint table_id = sc_clock_msg->table_id; 262 | 263 | client_lib->server_clock_cbk(channel_id, server_id, clock, table_id); 264 | 265 | for (uint i = 0; i < args.size(); i++) { 266 | args[i].close(); 267 | } 268 | } 269 | 270 | void ServerClientDecode::get_stats(vector& args) { 271 | CHECK_EQ(args.size(), 2); 272 | 273 | string stats; 274 | args[1].unpack_string(stats); 275 | client_lib->get_stats_cbk(stats); 276 | 277 | for (uint i = 0; i < args.size(); i++) { 278 | args[i].close(); 279 | } 280 | } 281 | 282 | void ServerClientDecode::decode_msg(vector& msgs) { 283 | CHECK_GE(msgs.size(), 1); 284 | CHECK_GE(msgs[0].size(), sizeof(command_t)); 285 | command_t cmd; 286 | msgs[0].unpack(cmd); 287 | switch (cmd) { 288 | case FIND_ROW: 289 | find_row(msgs); 290 | break; 291 | case READ_ROW_BATCH: 292 | read_row_batch(msgs); 293 | break; 294 | case CLOCK: 295 | clock(msgs); 296 | break; 297 | case GET_STATS: 298 | get_stats(msgs); 299 | break; 300 | default: 301 | CHECK(0) << "Client received unknown command!"; 302 | } 303 | } 304 | 305 | void ServerClientDecode::router_callback( 306 | const string& src, vector& msgs) { 307 | /* The "src" field is not send to the background worker, because we don't 308 | * want to construct another string vector object. 309 | */ 310 | if (work_in_background) { 311 | /* Push to the background thread */ 312 | decode_work_pusher->push_work(DECODE_CMD, msgs); 313 | } else { 314 | /* Do it myself if "work_in_background" is not turned on */ 315 | decode_msg(msgs); 316 | } 317 | } 318 | 319 | RouterHandler::RecvCallback ServerClientDecode::get_recv_callback() { 320 | return bind(&ServerClientDecode::router_callback, this, _1, _2); 321 | } 322 | 323 | void ServerClientDecode::stop_decoder() { 324 | if (work_in_background) { 325 | /* Shut down background worker thread */ 326 | vector args; /* Args is empty */ 327 | decode_work_pusher->push_work(BackgroundWorker::STOP_CMD, args); 328 | (*bg_decode_worker_thread).join(); 329 | 330 | /* Set "work_in_background" to false, so that we won't do that again. */ 331 | work_in_background = false; 332 | } 333 | } 334 | 335 | ServerClientDecode::~ServerClientDecode() { 336 | stop_decoder(); 337 | } 338 | -------------------------------------------------------------------------------- /src/client/encoder-decoder.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __encoder_decoder_hpp__ 2 | #define __encoder_decoder_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | // Encode and decode messages to/from the server 35 | 36 | #include 37 | #include 38 | 39 | #include "common/wire-protocol.hpp" 40 | #include "common/router-handler.hpp" 41 | #include "common/portable-bytes.hpp" 42 | #include "clientlib.hpp" 43 | 44 | using std::string; 45 | using std::vector; 46 | using boost::shared_ptr; 47 | 48 | class ClientServerEncode { 49 | shared_ptr router_handler; 50 | uint num_processes; 51 | uint client_id; 52 | vector server_names; 53 | 54 | public: 55 | ClientServerEncode( 56 | shared_ptr router_handler, 57 | uint num_processes, uint client_id, const GeePsConfig& config) : 58 | router_handler(router_handler), 59 | num_processes(num_processes), client_id(client_id) { 60 | for (uint i = 0; i < num_processes; i++) { 61 | std::string server_name("local"); 62 | if (!config.local_opt || i != client_id) { 63 | server_name = (boost::format("tablet-%i") % i).str(); 64 | } 65 | server_names.push_back(server_name); 66 | } 67 | } 68 | void clock_broadcast(iter_t clock, uint table_id); 69 | void clock_with_updates_batch( 70 | uint server_id, iter_t clock, uint table_id, 71 | const RowOpVal *updates, const RowKey *row_keys, uint batch_size); 72 | void clock_with_updates_batch( 73 | uint server_id, iter_t clock, uint table_id, 74 | const RowOpVal *updates0, const RowKey *row_keys0, uint batch_size0, 75 | const RowOpVal *updates1, const RowKey *row_keys1, uint batch_size1); 76 | void find_row(table_id_t table, row_idx_t row, uint metadata_sever_id); 77 | void read_row_batch( 78 | uint server_id, RowKeys& row_keys, iter_t data_age, 79 | bool prioritized); 80 | void add_access_info(uint metadata_server_id, 81 | const std::vector& access_info); 82 | void get_stats(uint server_id); 83 | }; 84 | 85 | class ServerClientDecode { 86 | static const uint DECODE_CMD = 1; 87 | 88 | uint channel_id; 89 | boost::shared_ptr zmq_ctx; 90 | ClientLib *client_lib; 91 | bool work_in_background; 92 | 93 | shared_ptr bg_decode_worker_thread; 94 | shared_ptr decode_work_pusher; 95 | 96 | GeePsConfig config; 97 | 98 | public: 99 | ServerClientDecode( 100 | uint channel_id, 101 | shared_ptr ctx, 102 | ClientLib *client_lib, 103 | bool work_in_bg, const GeePsConfig& config); 104 | ~ServerClientDecode(); 105 | void find_row(vector& args); 106 | void read_row_batch(vector& args); 107 | void clock(vector& args); 108 | void get_stats(vector& args); 109 | void decode_msg(vector& args); 110 | void router_callback(const string& src, vector& msgs); 111 | RouterHandler::RecvCallback get_recv_callback(); 112 | void stop_decoder(); 113 | }; 114 | 115 | #endif // defined __encoder_decoder_hpp__ 116 | -------------------------------------------------------------------------------- /src/client/geeps.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Carnegie Mellon University. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the University nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 24 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 27 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #include 32 | #include 33 | 34 | #include "geeps.hpp" 35 | #include "clientlib.hpp" 36 | 37 | using std::string; 38 | using std::vector; 39 | using boost::shared_ptr; 40 | 41 | GeePs::GeePs( 42 | uint process_id, const GeePsConfig& config) { 43 | ClientLib::CreateInstance(process_id, config); 44 | client_lib->thread_start(); 45 | } 46 | 47 | void GeePs::Shutdown() { 48 | client_lib->thread_stop(); 49 | client_lib->shutdown(); 50 | } 51 | 52 | string GeePs::GetStats() { 53 | return client_lib->json_stats(); 54 | } 55 | 56 | void GeePs::StartIterations() { 57 | client_lib->start_opseq(); 58 | } 59 | 60 | int GeePs::VirtualRead( 61 | size_t table_id, const vector& row_ids, iter_t slack) { 62 | size_t num_val_limit = row_ids.size() * ROW_DATA_SIZE; 63 | return client_lib->virtual_read_batch( 64 | table_id, row_ids, slack, num_val_limit); 65 | } 66 | 67 | int GeePs::VirtualPostRead(int prestep_handle) { 68 | return client_lib->virtual_postread_batch(prestep_handle); 69 | } 70 | 71 | int GeePs::VirtualPreUpdate(size_t table_id, const vector& row_ids) { 72 | size_t num_val_limit = row_ids.size() * ROW_DATA_SIZE; 73 | return client_lib->virtual_prewrite_batch( 74 | table_id, row_ids, num_val_limit); 75 | } 76 | 77 | int GeePs::VirtualUpdate(int prestep_handle) { 78 | return client_lib->virtual_write_batch(prestep_handle); 79 | } 80 | 81 | int GeePs::VirtualLocalAccess(const vector& row_ids, bool fetch) { 82 | /* table_id doesn't matter for local access */ 83 | size_t table_id = 0xdeadbeef; 84 | size_t num_val_limit = row_ids.size() * ROW_DATA_SIZE; 85 | return client_lib->virtual_localaccess_batch( 86 | table_id, row_ids, num_val_limit, fetch); 87 | } 88 | 89 | int GeePs::VirtualPostLocalAccess(int prestep_handle, bool keep) { 90 | return client_lib->virtual_postlocalaccess_batch(prestep_handle, keep); 91 | } 92 | 93 | int GeePs::VirtualClock() { 94 | return client_lib->virtual_clock(); 95 | } 96 | 97 | bool GeePs::Read(int handle, RowData **buffer_ptr) { 98 | bool stat = true; 99 | return client_lib->read_batch( 100 | buffer_ptr, handle, stat); 101 | } 102 | 103 | void GeePs::PostRead(int handle) { 104 | client_lib->postread_batch(handle); 105 | } 106 | 107 | void GeePs::PreUpdate(int handle, RowData **buffer_ptr) { 108 | bool stat = true; 109 | client_lib->preupdate_batch(buffer_ptr, handle, stat); 110 | } 111 | 112 | bool GeePs::LocalAccess(int handle, RowData **buffer_ptr) { 113 | bool stat = false; 114 | return client_lib->read_batch( 115 | buffer_ptr, handle, stat); 116 | } 117 | 118 | void GeePs::PostLocalAccess(int handle) { 119 | client_lib->postread_batch(handle); 120 | } 121 | 122 | void GeePs::Update(int handle) { 123 | client_lib->update_batch(handle); 124 | } 125 | 126 | void GeePs::Clock() { 127 | client_lib->iterate(); 128 | } 129 | 130 | void GeePs::FinishVirtualIteration() { 131 | client_lib->finish_virtual_iteration(); 132 | } 133 | -------------------------------------------------------------------------------- /src/client/stats-tracker.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __client_stats_hpp__ 2 | #define __client_stats_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | // Performance tracking and reporting 35 | 36 | #include 37 | 38 | #include 39 | 40 | #include "common/internal-config.hpp" 41 | 42 | using std::string; 43 | 44 | struct Stats { 45 | int64_t nr_threads; 46 | int64_t row_count; 47 | int64_t row_count_local; 48 | int64_t nr_thread_hit; 49 | int64_t nr_proc_hit; 50 | int64_t nr_miss; 51 | int64_t nr_prefetch_miss; 52 | int64_t nr_proc_hit2; 53 | int64_t nr_miss2; 54 | int64_t nr_prefetch_miss2; 55 | double tot_miss_latency; 56 | int64_t nr_refresh; 57 | int64_t nr_read_requests; 58 | int64_t nr_recv_row; 59 | int64_t nr_non_pending_respond; 60 | double tot_non_pending_respond_time; 61 | double ave_non_pending_respond_time; 62 | double max_non_pending_respond_time; 63 | double min_non_pending_respond_time; 64 | uint32_t prefetch; 65 | double psafe; 66 | uint32_t local_opt; 67 | uint32_t pp_policy; 68 | uint32_t affinity; 69 | 70 | double iterate_time; 71 | double iter_flush_log_time; 72 | double iter_var_time; 73 | 74 | double app_read_time; 75 | double app_postread_time; 76 | double app_prewrite_time; 77 | double app_write_time; 78 | 79 | double bg_read_time; 80 | double bg_read_read_time; 81 | double bg_read_move_time; 82 | double bg_postread_time; 83 | double bg_prewrite_time; 84 | double bg_write_time; 85 | double bg_write_move_time; 86 | double bg_write_update_time; 87 | double bg_clock_time; 88 | 89 | double vi_time; 90 | 91 | int64_t nr_cache_copy; 92 | int64_t tot_read; 93 | int64_t tot_staleness; 94 | int64_t tot_increment; 95 | int64_t tot_apply_thr_cache; 96 | int64_t tot_apply_proc_cache; 97 | int64_t nr_thread_cache; 98 | int64_t nr_thread_cache_max; 99 | int64_t max_tot_access; 100 | uint bucket_count; 101 | uint max_bucket_size; 102 | uint tot_bucket_size; 103 | 104 | string router_stats; 105 | string bgthread_stats; 106 | bool server_stats_refreshed; 107 | string server_stats; 108 | 109 | void reset() { 110 | nr_thread_hit = 0; 111 | nr_proc_hit = 0; 112 | nr_miss = 0; 113 | nr_prefetch_miss = 0; 114 | nr_proc_hit2 = 0; 115 | nr_miss2 = 0; 116 | nr_prefetch_miss2 = 0; 117 | tot_miss_latency = 0.0; 118 | nr_refresh = 0; 119 | nr_read_requests = 0; 120 | nr_recv_row = 0; 121 | nr_non_pending_respond = 0; 122 | tot_non_pending_respond_time = 0.0; 123 | ave_non_pending_respond_time = 0.0; 124 | max_non_pending_respond_time = 0.0; 125 | min_non_pending_respond_time = 1000; 126 | 127 | iterate_time = 0.0; 128 | iter_flush_log_time = 0.0; 129 | iter_var_time = 0.0; 130 | nr_cache_copy = 0; 131 | tot_read = 0; 132 | tot_staleness = 0; 133 | tot_increment = 0; 134 | tot_apply_thr_cache = 0; 135 | tot_apply_proc_cache = 0; 136 | 137 | app_read_time = 0; 138 | app_postread_time = 0; 139 | app_prewrite_time = 0; 140 | app_write_time = 0; 141 | 142 | bg_read_time = 0; 143 | bg_read_read_time = 0; 144 | bg_read_move_time = 0; 145 | bg_postread_time = 0; 146 | bg_prewrite_time = 0; 147 | bg_write_time = 0; 148 | bg_write_move_time = 0; 149 | bg_write_update_time = 0; 150 | bg_clock_time = 0; 151 | } 152 | 153 | Stats() { 154 | nr_threads = 0; 155 | nr_thread_cache = 0; 156 | nr_thread_cache_max = 0; 157 | bucket_count = 0; 158 | max_bucket_size = 0; 159 | tot_bucket_size = 0; 160 | vi_time = 0; 161 | reset(); 162 | } 163 | 164 | Stats& operator += (const Stats& rhs) { 165 | nr_thread_hit += rhs.nr_thread_hit; 166 | nr_proc_hit += rhs.nr_proc_hit; 167 | nr_miss += rhs.nr_miss; 168 | nr_prefetch_miss += rhs.nr_prefetch_miss; 169 | nr_proc_hit2 += rhs.nr_proc_hit2; 170 | nr_miss2 += rhs.nr_miss2; 171 | nr_prefetch_miss2 += rhs.nr_prefetch_miss2; 172 | tot_miss_latency += rhs.tot_miss_latency; 173 | nr_refresh += rhs.nr_refresh; 174 | nr_read_requests += rhs.nr_read_requests; 175 | nr_recv_row += rhs.nr_recv_row; 176 | 177 | iterate_time += rhs.iterate_time; 178 | iter_flush_log_time += rhs.iter_flush_log_time; 179 | 180 | vi_time += rhs.vi_time; 181 | 182 | nr_cache_copy += rhs.nr_cache_copy; 183 | tot_read += rhs.tot_read; 184 | tot_staleness += rhs.tot_staleness; 185 | tot_increment += rhs.tot_increment; 186 | tot_apply_thr_cache += rhs.tot_apply_thr_cache; 187 | tot_apply_proc_cache += rhs.tot_apply_proc_cache; 188 | nr_thread_cache += rhs.nr_thread_cache; 189 | nr_thread_cache_max = 190 | rhs.nr_thread_cache_max > nr_thread_cache_max ? 191 | rhs.nr_thread_cache_max : nr_thread_cache_max; 192 | bucket_count += rhs.bucket_count; 193 | max_bucket_size += rhs.max_bucket_size; 194 | tot_bucket_size += rhs.tot_bucket_size; 195 | return *this; 196 | } 197 | 198 | std::string to_json() { 199 | std::stringstream ss; 200 | ss << "{ " 201 | << "\"nr_threads\": " << nr_threads << ", " 202 | << "\"row_count\": " << row_count << ", " 203 | << "\"row_count_local\": " << row_count_local << ", " 204 | << "\"prefetch\": " << prefetch << ", " 205 | << "\"psafe\": " << psafe << ", " 206 | << "\"local_opt\": " << local_opt << ", " 207 | << "\"pp_policy\": " << pp_policy << ", " 208 | << "\"affinity\": " << affinity << ", " 209 | << "\"nr_thread_cache\": " << nr_thread_cache << ", " 210 | << "\"nr_thread_cache_max\": " << nr_thread_cache_max << ", " 211 | << "\"bucket_count\": " << bucket_count << ", " 212 | << "\"max_bucket_size\": " << max_bucket_size << ", " 213 | << "\"tot_bucket_size\": " << tot_bucket_size << ", " 214 | << "\"nr_thread_hit\": " << nr_thread_hit << ", " 215 | << "\"nr_proc_hit\": " << nr_proc_hit << ", " 216 | << "\"nr_miss\": " << nr_miss << ", " 217 | << "\"nr_prefetch_miss\": " << nr_prefetch_miss << ", " 218 | << "\"nr_proc_hit2\": " << nr_proc_hit2 << ", " 219 | << "\"nr_miss2\": " << nr_miss2 << ", " 220 | << "\"nr_prefetch_miss2\": " << nr_prefetch_miss2 << ", " 221 | << "\"nr_refresh\": " << nr_refresh << ", " 222 | << "\"nr_read_requests\": " << nr_read_requests << ", " 223 | << "\"nr_recv_row\": " << nr_recv_row << ", " 224 | << "\"nr_non_pending_respond\": " << nr_non_pending_respond << ", " 225 | << "\"ave_non_pending_respond_time\": " 226 | << ave_non_pending_respond_time << ", " 227 | << "\"min_non_pending_respond_time\": " 228 | << min_non_pending_respond_time << ", " 229 | << "\"max_non_pending_respond_time\": " 230 | << max_non_pending_respond_time << ", " 231 | << "\"READ_TIMING_FREQ\": " << READ_TIMING_FREQ << ", " 232 | 233 | << "\"iterate_time\": " << iterate_time / nr_threads << ", " 234 | << "\"iter_flush_log_time\": " 235 | << iter_flush_log_time / nr_threads << ", " 236 | << "\"iter_var_time\": " << iter_var_time << ", " 237 | 238 | << "\"app_read_time\": " << app_read_time << ", " 239 | << "\"app_postread_time\": " << app_postread_time << ", " 240 | << "\"app_prewrite_time\": " << app_prewrite_time << ", " 241 | << "\"app_write_time\": " << app_write_time << ", " 242 | 243 | << "\"bg_read_time\": " << bg_read_time << ", " 244 | << "\"bg_read_read_time\": " << bg_read_read_time << ", " 245 | << "\"bg_read_move_time\": " << bg_read_move_time << ", " 246 | << "\"bg_postread_time\": " << bg_postread_time << ", " 247 | << "\"bg_prewrite_time\": " << bg_prewrite_time << ", " 248 | << "\"bg_write_time\": " << bg_write_time << ", " 249 | << "\"bg_write_move_time\": " << bg_write_move_time << ", " 250 | << "\"bg_write_update_time\": " << bg_write_update_time << ", " 251 | << "\"bg_clock_time\": " << bg_clock_time << ", " 252 | << "\"bg_total_time\": " 253 | << bg_read_read_time + bg_read_move_time + bg_postread_time + 254 | bg_prewrite_time + bg_write_move_time + bg_write_update_time + 255 | bg_clock_time << ", " 256 | 257 | << "\"vi_time\": " << vi_time / nr_threads << ", " 258 | 259 | << "\"nr_cache_copy\": " << nr_cache_copy << ", " 260 | << "\"tot_read\": " << tot_read << ", " 261 | << "\"tot_staleness\": " << tot_staleness << ", " 262 | << "\"tot_increment\": " << tot_increment << ", " 263 | << "\"tot_apply_thr_cache\": " << tot_apply_thr_cache << ", " 264 | << "\"tot_apply_proc_cache\": " << tot_apply_proc_cache << ", " 265 | 266 | << "\"router_stats\": " << router_stats << ", " 267 | << "\"bgthread_stats\": " << bgthread_stats << ", " 268 | << "\"server_stats\": " << server_stats << ", " 269 | 270 | << "\"last_entry\": 0" 271 | << "}"; 272 | return ss.str(); 273 | } 274 | }; // end of struct Stats 275 | 276 | struct BgthreadStats { 277 | int64_t tot_recv_row; 278 | int64_t recv_row_nr_apply_oplog; 279 | double tot_recv_row_time; 280 | double recv_row_get_lock_time; 281 | double recv_row_get_memory_time; 282 | double recv_row_erase_fetch_time; 283 | double recv_row_erase_oplog_time; 284 | double recv_row_copy_data_time; 285 | double recv_row_apply_oplog_time; 286 | 287 | double tot_push_updates_time; 288 | double push_updates_get_global_lock_time; 289 | double push_updates_find_row_time; 290 | double push_updates_get_lock_time; 291 | double push_updates_send_update_time; 292 | double push_updates_send_iterate_time; 293 | iter_t push_updates_iter; 294 | int64_t push_updates_count; 295 | 296 | void reset() { 297 | tot_recv_row = 0; 298 | recv_row_nr_apply_oplog = 0; 299 | tot_recv_row_time = 0.0; 300 | recv_row_get_lock_time = 0.0; 301 | recv_row_get_memory_time = 0.0; 302 | recv_row_erase_fetch_time = 0.0; 303 | recv_row_erase_oplog_time = 0.0; 304 | recv_row_copy_data_time = 0.0; 305 | recv_row_apply_oplog_time = 0.0; 306 | 307 | tot_push_updates_time = 0.0; 308 | push_updates_get_global_lock_time = 0.0; 309 | push_updates_find_row_time = 0.0; 310 | push_updates_get_lock_time = 0.0; 311 | push_updates_send_update_time = 0.0; 312 | push_updates_send_iterate_time = 0.0; 313 | push_updates_count = 0; 314 | } 315 | 316 | BgthreadStats() { 317 | reset(); 318 | } 319 | 320 | BgthreadStats& operator += (const BgthreadStats& rhs) { 321 | tot_recv_row += rhs.tot_recv_row; 322 | recv_row_nr_apply_oplog += rhs.recv_row_nr_apply_oplog; 323 | tot_recv_row_time += rhs.tot_recv_row_time; 324 | recv_row_get_lock_time += rhs.recv_row_get_lock_time; 325 | recv_row_get_memory_time += rhs.recv_row_get_memory_time; 326 | recv_row_erase_fetch_time += rhs.recv_row_erase_fetch_time; 327 | recv_row_erase_oplog_time += rhs.recv_row_erase_oplog_time; 328 | recv_row_copy_data_time += rhs.recv_row_copy_data_time; 329 | recv_row_apply_oplog_time += rhs.recv_row_apply_oplog_time; 330 | tot_push_updates_time += rhs.tot_push_updates_time; 331 | push_updates_get_global_lock_time += rhs.push_updates_get_global_lock_time; 332 | push_updates_find_row_time += rhs.push_updates_find_row_time; 333 | push_updates_get_lock_time += rhs.push_updates_get_lock_time; 334 | push_updates_send_update_time += rhs.push_updates_send_update_time; 335 | push_updates_send_iterate_time += rhs.push_updates_send_iterate_time; 336 | push_updates_count += rhs.push_updates_count; 337 | return *this; 338 | } 339 | 340 | BgthreadStats& operator /= (int n) { 341 | tot_recv_row /= n; 342 | recv_row_nr_apply_oplog /= n; 343 | tot_recv_row_time /= n; 344 | recv_row_get_lock_time /= n; 345 | recv_row_get_memory_time /= n; 346 | recv_row_erase_fetch_time /= n; 347 | recv_row_erase_oplog_time /= n; 348 | recv_row_copy_data_time /= n; 349 | recv_row_apply_oplog_time /= n; 350 | tot_push_updates_time /= n; 351 | push_updates_get_global_lock_time /= n; 352 | push_updates_find_row_time /= n; 353 | push_updates_get_lock_time /= n; 354 | push_updates_send_update_time /= n; 355 | push_updates_send_iterate_time /= n; 356 | push_updates_count /= n; 357 | return *this; 358 | } 359 | 360 | string to_json() { 361 | std::stringstream ss; 362 | ss << "{" 363 | << "\"SET_ROW_TIMING_FREQ\": " << SET_ROW_TIMING_FREQ << ", " 364 | << "\"tot_recv_row\": " << tot_recv_row << ", " 365 | << "\"recv_row_nr_apply_oplog\": " << recv_row_nr_apply_oplog << ", " 366 | << "\"tot_recv_row_time\": " 367 | << tot_recv_row_time * SET_ROW_TIMING_FREQ << ", " 368 | << "\"recv_row_get_lock_time\": " 369 | << recv_row_get_lock_time * SET_ROW_TIMING_FREQ << ", " 370 | << "\"recv_row_get_memory_time\": " 371 | << recv_row_get_memory_time * SET_ROW_TIMING_FREQ << ", " 372 | << "\"recv_row_erase_fetch_time\": " 373 | << recv_row_erase_fetch_time * SET_ROW_TIMING_FREQ << ", " 374 | << "\"recv_row_erase_oplog_time\": " 375 | << recv_row_erase_oplog_time * SET_ROW_TIMING_FREQ << ", " 376 | << "\"recv_row_copy_data_time\": " 377 | << recv_row_copy_data_time * SET_ROW_TIMING_FREQ << ", " 378 | << "\"recv_row_apply_oplog_time\": " 379 | << recv_row_apply_oplog_time * SET_ROW_TIMING_FREQ << ", " 380 | 381 | << "\"tot_push_updates_time\": " << tot_push_updates_time << ", " 382 | << "\"push_updates_get_global_lock_time\": " 383 | << push_updates_get_global_lock_time << ", " 384 | << "\"push_updates_get_lock_time\": " 385 | << push_updates_get_lock_time << ", " 386 | << "\"push_updates_find_row_time\": " 387 | << push_updates_find_row_time << ", " 388 | << "\"push_updates_send_update_time\": " 389 | << push_updates_send_update_time << ", " 390 | << "\"push_updates_send_iterate_time\": " 391 | << push_updates_send_iterate_time << ", " 392 | << "\"push_updates_iter\": " << push_updates_iter << ", " 393 | << "\"push_updates_count\": " << push_updates_count << ", " 394 | << "\"last_entry\": 0" 395 | << " } "; 396 | return ss.str(); 397 | } 398 | }; // end of struct BgthreadStats 399 | 400 | #endif // defined __client_stats_hpp__ 401 | -------------------------------------------------------------------------------- /src/common/background-worker.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Carnegie Mellon University. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the University nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 24 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 27 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #include 32 | #include 33 | 34 | #include "background-worker.hpp" 35 | 36 | int BackgroundWorker::add_callback(uint32_t cmd, WorkerCallback callback) { 37 | if (cmd == STOP_CMD) { 38 | return -1; 39 | } 40 | if (callback_map.count(cmd)) { 41 | return -1; 42 | } 43 | 44 | callback_map[cmd] = callback; 45 | return 0; 46 | } 47 | 48 | void BackgroundWorker::pull_work_loop() { 49 | while (1) { 50 | uint32_t cmd = 0; 51 | std::vector args; 52 | int ret = work_puller->pull_work(cmd, args); 53 | if (ret < 0 || cmd == 0) { 54 | break; 55 | } 56 | if (!callback_map.count(cmd)) { 57 | std::cerr << "Received unknown command!" << std::endl; 58 | assert(0); 59 | } 60 | WorkerCallback& callback = callback_map[cmd]; 61 | callback(args); 62 | } 63 | } 64 | 65 | void BackgroundWorker::operator()() { 66 | pull_work_loop(); 67 | } 68 | -------------------------------------------------------------------------------- /src/common/background-worker.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __background_worker_hpp__ 2 | #define __background_worker_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | #include 40 | #include 41 | 42 | #include "work-puller.hpp" 43 | 44 | class BackgroundWorker { 45 | public: 46 | explicit BackgroundWorker(const boost::shared_ptr& work_puller) 47 | : work_puller(work_puller) {} 48 | typedef boost::function&)> 49 | WorkerCallback; 50 | static const uint32_t STOP_CMD = 0; 51 | int add_callback(uint32_t cmd, WorkerCallback callback); 52 | void pull_work_loop(); 53 | void operator()(); 54 | /* This function will be used as the entry point of a boost thread */ 55 | 56 | private: 57 | boost::shared_ptr work_puller; 58 | boost::unordered_map callback_map; 59 | }; 60 | 61 | #endif // defined __background_worker_hpp__ 62 | -------------------------------------------------------------------------------- /src/common/common-util.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __common_util_hpp__ 2 | #define __common_util_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | #include 37 | 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | 44 | #include 45 | 46 | #include "geeps-user-defined-types.hpp" 47 | #include "common/wire-protocol.hpp" 48 | #include "common/gpu-util/math_functions.hpp" 49 | 50 | using std::vector; 51 | using std::string; 52 | using std::cout; 53 | using std::cerr; 54 | using std::endl; 55 | using boost::unordered_map; 56 | 57 | inline iter_t clock_min(vector clocks) { 58 | CHECK(clocks.size()); 59 | iter_t cmin = clocks[0]; 60 | for (uint i = 1; i < clocks.size(); i++) { 61 | cmin = clocks[i] < cmin ? clocks[i] : cmin; 62 | } 63 | return cmin; 64 | } 65 | 66 | inline iter_t clock_max(vector clocks) { 67 | CHECK(clocks.size()); 68 | iter_t cmax = clocks[0]; 69 | for (uint i = 1; i < clocks.size(); i++) { 70 | cmax = clocks[i] > cmax ? clocks[i] : cmax; 71 | } 72 | return cmax; 73 | } 74 | 75 | inline uint get_nearest_power2(uint n) { 76 | uint power2 = 1; 77 | while (power2 < n) { 78 | power2 <<= 1; 79 | } 80 | return power2; 81 | } 82 | 83 | inline void mallocHost(void **ptr, size_t size) { 84 | /* On CUDA 7.5 and CUDA 7.0, the cudaMallocHost() will sometimes fail 85 | * even though there is still available memory to allocate. 86 | * I don't know why it's happening, but as a workaround, 87 | * I added this while loop to retry cudaMallocHost(). */ 88 | while (cudaMallocHost(ptr, size) != cudaSuccess) { 89 | cout << "*** WARNING: cudaMallocHost failed, will retry" << endl; 90 | } 91 | } 92 | 93 | inline void mallocHost(RowData **ptr, size_t size) { 94 | mallocHost(reinterpret_cast(ptr), size); 95 | } 96 | 97 | /* Features: 98 | * - Single threaded 99 | * - The entries are freed in the same order as they are allocated. */ 100 | struct SimpleCacheHelper { 101 | size_t size_; 102 | size_t free_start_; 103 | size_t free_end_; 104 | size_t free_count_; 105 | size_t unused_tail_; 106 | SimpleCacheHelper() { 107 | init(0); 108 | } 109 | ~SimpleCacheHelper() { 110 | clear(); 111 | } 112 | void init(size_t size) { 113 | size_ = size; 114 | free_count_ = size_; 115 | unused_tail_ = size_; 116 | if (size_) { 117 | free_start_ = 0; 118 | free_end_ = size_ - 1; 119 | } 120 | } 121 | void clear() { 122 | init(0); 123 | } 124 | size_t size() { 125 | return size_; 126 | } 127 | size_t get(size_t count, bool wait) { 128 | CHECK_GE(free_count_, count); 129 | CHECK_LT(free_start_, size_); 130 | CHECK_LT(free_end_, size_); 131 | CHECK_NE(free_start_, free_end_); 132 | if (free_start_ < free_end_) { 133 | /* All free space is after free_start_ */ 134 | if (free_start_ + count < free_end_) { 135 | /* There are enough contiguous free space after free_start_ */ 136 | size_t index = free_start_; 137 | free_start_ += count; 138 | free_count_ -= count; 139 | return index; 140 | } else { 141 | cerr << "Insufficient space\n"; 142 | assert(0); 143 | } 144 | } else { 145 | /* There are some free space at the beginning */ 146 | if (free_start_ + count < size_) { 147 | /* There are enough contiguous free space after free_start_ */ 148 | size_t index = free_start_; 149 | free_start_ += count; 150 | free_count_ -= count; 151 | return index; 152 | } else { 153 | /* There are NOT enough contiguous free space after free_start_. 154 | * We mark the space after free_start_ as unused_tail_, 155 | * and we go back to the front. */ 156 | unused_tail_ = free_start_; 157 | free_start_ = 0; 158 | free_count_ -= (size_ - unused_tail_); 159 | CHECK_LT(free_start_, free_end_); 160 | if (free_start_ + count < free_end_) { 161 | size_t index = free_start_; 162 | free_start_ += count; 163 | free_count_ -= count; 164 | return index; 165 | } else { 166 | cerr << "Insufficient space\n"; 167 | assert(0); 168 | } 169 | } 170 | } 171 | } 172 | void put(size_t index, size_t count) { 173 | CHECK_LT(index, size_); 174 | if (index == (free_end_ + 1) % size_) { 175 | free_end_ = (free_end_ + count) % size_; 176 | } else { 177 | /* There should be an unused tail */ 178 | CHECK_EQ(index, 0); 179 | CHECK_EQ(unused_tail_, (free_end_ + 1) % size_); 180 | free_count_ += (size_ - unused_tail_); 181 | unused_tail_ = size_; 182 | CHECK_LE(count, size_); 183 | free_end_ = count - 1; 184 | } 185 | free_count_ += count; 186 | } 187 | }; 188 | 189 | struct MultithreadedCacheHelper { 190 | struct AllocMapEntry { 191 | size_t size; 192 | int tag; 193 | AllocMapEntry(size_t size = 0, int tag = 0) : size(size), tag(tag) {} 194 | }; 195 | typedef std::map AllocMap; 196 | typedef AllocMap::iterator AllocMapIter; 197 | AllocMap alloc_map_; 198 | size_t size_; 199 | size_t allocated_; 200 | size_t last_alloc_start_; 201 | boost::mutex mutex_; 202 | boost::condition_variable cvar_; 203 | MultithreadedCacheHelper() { 204 | init(0); 205 | } 206 | ~MultithreadedCacheHelper() { 207 | clear(); 208 | } 209 | void init(size_t size) { 210 | size_ = size; 211 | allocated_ = 0; 212 | alloc_map_.clear(); 213 | last_alloc_start_ = size_; 214 | /* Initialize last_alloc_start_ to size_, so that this statement is true: 215 | * alloc_map_.find(last_alloc_start_) == alloc_map_.end() */ 216 | } 217 | void clear() { 218 | init(0); 219 | } 220 | size_t size() { 221 | return size_; 222 | } 223 | size_t get(size_t count, bool wait, int tag = 0) { 224 | boost::unique_lock lock(mutex_); 225 | while (true) { 226 | size_t search_start = 0; 227 | AllocMapIter last_alloc_pos_ = alloc_map_.find(last_alloc_start_); 228 | if (last_alloc_pos_ != alloc_map_.end()) { 229 | size_t last_alloc_start = last_alloc_pos_->first; 230 | size_t last_alloc_count = last_alloc_pos_->second.size; 231 | /* Search after the last allocated position */ 232 | search_start = last_alloc_start + last_alloc_count; 233 | } 234 | size_t start; 235 | if (search_start < size_) { 236 | start = search_start; 237 | for (AllocMapIter map_it = alloc_map_.begin(); 238 | map_it != alloc_map_.end(); map_it++) { 239 | CHECK_LT(start, size_); 240 | size_t allocated_start = map_it->first; 241 | size_t allocated_count = map_it->second.size; 242 | if (allocated_start < search_start) { 243 | /* Only search after the last allocated position */ 244 | continue; 245 | } 246 | CHECK_LE(start, allocated_start); 247 | if (start + count <= allocated_start) { 248 | /* Allocated it before this entry */ 249 | alloc_map_[start] = AllocMapEntry(count, tag); 250 | last_alloc_start_ = start; 251 | allocated_ += count; 252 | return start; 253 | } else { 254 | start = allocated_start + allocated_count; 255 | } 256 | } 257 | /* Check the space after the last entry */ 258 | if (start + count <= size_) { 259 | /* Allocated it at the end */ 260 | alloc_map_[start] = AllocMapEntry(count, tag); 261 | last_alloc_start_ = start; 262 | allocated_ += count; 263 | return start; 264 | } 265 | } 266 | /* Search the space before the last allocated position */ 267 | start = 0; 268 | for (AllocMapIter map_it = alloc_map_.begin(); 269 | map_it != alloc_map_.end(); map_it++) { 270 | if (start >= search_start) { 271 | /* Only search before the last allocated position */ 272 | break; 273 | } 274 | CHECK_LT(start, size_); 275 | size_t allocated_start = map_it->first; 276 | size_t allocated_count = map_it->second.size; 277 | CHECK_LE(start, allocated_start); 278 | if (start + count <= allocated_start) { 279 | /* Allocated it before this entry */ 280 | alloc_map_[start] = AllocMapEntry(count, tag); 281 | last_alloc_start_ = start; 282 | allocated_ += count; 283 | return start; 284 | } else { 285 | start = allocated_start + allocated_count; 286 | } 287 | } 288 | /* If no wait, return size_, indicating there's no more space */ 289 | if (!wait) { 290 | cerr << "MultithreadedCacheHelper has no more space\n"; 291 | cout << "need " << count << endl; 292 | cout << "allocated " << allocated_ << endl; 293 | cout << "size " << size_ << endl; 294 | print_space(); 295 | return size_; 296 | } 297 | /* No more space, wait to be notified */ 298 | // cvar_.wait(lock); 299 | if (!cvar_.timed_wait(lock, 300 | boost::posix_time::milliseconds(12000))) { 301 | cerr << "MultithreadedCacheHelper waits for more space timed out\n"; 302 | cout << "need " << count << endl; 303 | cout << "allocated " << allocated_ << endl; 304 | cout << "size " << size_ << endl; 305 | print_space(); 306 | return size_; 307 | } 308 | } 309 | } 310 | void put(size_t start, size_t count) { 311 | boost::unique_lock lock(mutex_); 312 | alloc_map_.erase(start); 313 | allocated_ -= count; 314 | cvar_.notify_all(); 315 | } 316 | void print_space() { 317 | for (AllocMap::iterator map_it = alloc_map_.begin(); 318 | map_it != alloc_map_.end(); map_it++) { 319 | size_t allocated_start = map_it->first; 320 | size_t allocated_count = map_it->second.size; 321 | int tag = map_it->second.tag; 322 | cerr << "allocated_start = " << allocated_start << endl; 323 | cerr << "allocated_count = " << allocated_count << endl; 324 | cerr << "tag = " << tag << endl; 325 | } 326 | } 327 | }; 328 | 329 | template 330 | struct GpuCache { 331 | RowData *data_; 332 | size_t size_; 333 | size_t memsize_; 334 | CacheHelper helper_; 335 | GpuCache() : helper_() { 336 | init(0); 337 | } 338 | ~GpuCache() { 339 | clear(); 340 | } 341 | void init(size_t size) { 342 | size_ = size; 343 | memsize_ = size_ * sizeof(RowData); 344 | data_ = NULL; 345 | if (memsize_) { 346 | CUDA_CHECK(cudaMalloc(&data_, memsize_)); 347 | } 348 | helper_.init(size); 349 | } 350 | void clear() { 351 | if (data_) { 352 | CUDA_CHECK(cudaFree(data_)); 353 | } 354 | init(0); 355 | helper_.clear(); 356 | } 357 | size_t size() { 358 | return size_; 359 | } 360 | RowData *get(size_t count, bool wait, int tag = 0) { 361 | size_t index = helper_.get(count, wait, tag); 362 | if (index >= size_) { 363 | /* No more space */ 364 | return NULL; 365 | } 366 | return &data_[index]; 367 | } 368 | void put(RowData *buffer, size_t count) { 369 | size_t index = static_cast(buffer - data_); 370 | helper_.put(index, count); 371 | } 372 | void print_space() { 373 | helper_.print_space(); 374 | } 375 | }; 376 | 377 | /* TODO: remove row_keys from DataStorage */ 378 | struct DataStorage { 379 | enum MemoryType { 380 | UNINITIALIZED, 381 | GPU, 382 | CPU, 383 | PINNED_CPU 384 | } type_; 385 | size_t size_; 386 | size_t memsize_; 387 | std::vector row_keys; 388 | RowData *ptr_; 389 | void init(size_t size, MemoryType type) { 390 | CHECK_EQ(type_, UNINITIALIZED); 391 | CHECK(!size_); 392 | CHECK(!memsize_); 393 | CHECK(!ptr_); 394 | type_ = type; 395 | size_ = size; 396 | memsize_ = size_ * sizeof(RowData); 397 | row_keys.resize(size_); 398 | switch (type_) { 399 | case GPU: 400 | init_gpu(); 401 | break; 402 | case CPU: 403 | init_cpu(); 404 | break; 405 | case PINNED_CPU: 406 | init_pinned_cpu(); 407 | break; 408 | default: 409 | CHECK_EQ(type_, UNINITIALIZED); 410 | } 411 | } 412 | void init_gpu() { 413 | CHECK_EQ(type_, GPU); 414 | if (!memsize_) { 415 | return; 416 | } 417 | CHECK(!ptr_); 418 | CUDA_CHECK(cudaMalloc(&ptr_, memsize_)); 419 | } 420 | void init_cpu() { 421 | CHECK_EQ(type_, CPU); 422 | if (!memsize_) { 423 | return; 424 | } 425 | CHECK(!ptr_); 426 | ptr_ = reinterpret_cast(malloc(memsize_)); 427 | CHECK(ptr_); 428 | } 429 | void init_pinned_cpu() { 430 | CHECK_EQ(type_, PINNED_CPU); 431 | if (!memsize_) { 432 | return; 433 | } 434 | CHECK(!ptr_); 435 | mallocHost(&ptr_, memsize_); 436 | } 437 | void zerofy_data_cpu() { 438 | CHECK_EQ(type_, CPU); 439 | if (!memsize_) { 440 | return; 441 | } 442 | CHECK(ptr_); 443 | memset(ptr_, 0, memsize_); 444 | } 445 | void zerofy_data_gpu(cudaStream_t cuda_stream) { 446 | CHECK_EQ(type_, GPU); 447 | if (!memsize_) { 448 | return; 449 | } 450 | CHECK(ptr_); 451 | CHECK(cuda_stream); 452 | /* We zerofy the data using cudaMemsetAsync() and 453 | * call cudaStreamSynchronize() after it. */ 454 | CUDA_CHECK(cudaMemsetAsync(ptr_, 0, memsize_, cuda_stream)); 455 | CUDA_CHECK(cudaStreamSynchronize(cuda_stream)); 456 | } 457 | size_t size() { 458 | return size_; 459 | } 460 | size_t memsize() { 461 | return memsize_; 462 | } 463 | MemoryType type() { 464 | return type_; 465 | } 466 | RowData *data() { 467 | if (!memsize_) { 468 | return NULL; 469 | } 470 | CHECK(ptr_); 471 | return ptr_; 472 | } 473 | void init_empty() { 474 | type_ = UNINITIALIZED; 475 | size_ = 0; 476 | memsize_ = 0, 477 | ptr_ = NULL; 478 | } 479 | void init_from(const DataStorage& other) { 480 | init(other.size_, other.type_); 481 | } 482 | void copy(const DataStorage& other) { 483 | clear(); 484 | type_ = other.type_; 485 | size_ = other.size_; 486 | memsize_ = other.memsize_; 487 | row_keys = other.row_keys; 488 | } 489 | void copy_data_gpu(const DataStorage& other, cudaStream_t cuda_stream) { 490 | CHECK_EQ(type_, GPU); 491 | CHECK_EQ(other.type_, GPU); 492 | CHECK_EQ(memsize_, other.memsize_); 493 | CHECK(cuda_stream); 494 | CHECK(ptr_); 495 | CHECK(other.ptr_); 496 | CUDA_CHECK(cudaMemcpyAsync( 497 | ptr_, other.ptr_, memsize_, cudaMemcpyDefault, cuda_stream)); 498 | CUDA_CHECK(cudaStreamSynchronize(cuda_stream)); 499 | } 500 | void copy_data_cpu(const DataStorage& other) { 501 | CHECK_EQ(type_, CPU); 502 | CHECK_EQ(other.type_, CPU); 503 | CHECK_EQ(memsize_, other.memsize_); 504 | CHECK(ptr_); 505 | CHECK(other.ptr_); 506 | memcpy(ptr_, other.ptr_, memsize_); 507 | } 508 | void clear() { 509 | switch (type_) { 510 | case GPU: 511 | clear_gpu(); 512 | break; 513 | case CPU: 514 | clear_cpu(); 515 | break; 516 | case PINNED_CPU: 517 | clear_pinned_cpu(); 518 | break; 519 | default: 520 | CHECK_EQ(type_, UNINITIALIZED); 521 | } 522 | size_ = 0; 523 | memsize_ = 0; 524 | ptr_ = NULL; 525 | type_ = UNINITIALIZED; 526 | } 527 | void clear_gpu() { 528 | CHECK_EQ(type_, GPU); 529 | if (ptr_) { 530 | CUDA_CHECK(cudaFree(ptr_)); 531 | } 532 | } 533 | void clear_cpu() { 534 | CHECK_EQ(type_, CPU); 535 | if (ptr_) { 536 | free(ptr_); 537 | } 538 | } 539 | void clear_pinned_cpu() { 540 | CHECK_EQ(type_, PINNED_CPU); 541 | if (ptr_) { 542 | CUDA_CHECK(cudaFreeHost(ptr_)); 543 | } 544 | } 545 | DataStorage() { 546 | init_empty(); 547 | } 548 | DataStorage(const DataStorage& other) { 549 | init_empty(); 550 | } 551 | ~DataStorage() { 552 | clear(); 553 | } 554 | DataStorage& operator=(const DataStorage& other) { 555 | init_empty(); 556 | return *this; 557 | } 558 | }; 559 | 560 | #endif // defined __common_util_hpp__ 561 | -------------------------------------------------------------------------------- /src/common/gpu-util/device_alternate.hpp: -------------------------------------------------------------------------------- 1 | #ifndef GPU_UTIL_DEVICE_ALTERNATE_H_ 2 | #define GPU_UTIL_DEVICE_ALTERNATE_H_ 3 | 4 | #define NO_GPU LOG(FATAL) << "CPU-only Mode: cannot make GPU call." 5 | 6 | #include 7 | #include 8 | #include 9 | #include // cuda driver types 10 | 11 | // 12 | // CUDA macros 13 | // 14 | 15 | // CUDA: various checks for different function calls. 16 | #define CUDA_CHECK(condition) \ 17 | /* Code block avoids redefinition of cudaError_t error */ \ 18 | do { \ 19 | cudaError_t error = condition; \ 20 | CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \ 21 | } while (0) 22 | 23 | #define CUBLAS_CHECK(condition) \ 24 | do { \ 25 | cublasStatus_t status = condition; \ 26 | CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \ 27 | << gpu_util::cublasGetErrorString(status); \ 28 | } while (0) 29 | 30 | // CUDA: grid stride looping 31 | #define CUDA_KERNEL_LOOP(i, n) \ 32 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ 33 | i < (n); \ 34 | i += blockDim.x * gridDim.x) 35 | 36 | namespace gpu_util { 37 | 38 | // CUDA: thread number configuration. 39 | // Use 1024 threads per block, which requires cuda sm_2x or above, 40 | // or fall back to attempt compatibility (best of luck to you). 41 | #if __CUDA_ARCH__ >= 200 42 | const int CUDA_NUM_THREADS = 1024; 43 | #else 44 | const int CUDA_NUM_THREADS = 512; 45 | #endif 46 | 47 | // CUDA: number of blocks for threads. 48 | inline int GET_BLOCKS(const int N) { 49 | int num_blocks = (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS; 50 | if (num_blocks > 65535) { 51 | num_blocks = 65535; 52 | } 53 | return num_blocks; 54 | } 55 | 56 | inline const char* cublasGetErrorString(cublasStatus_t error) { 57 | switch (error) { 58 | case CUBLAS_STATUS_SUCCESS: 59 | return "CUBLAS_STATUS_SUCCESS"; 60 | case CUBLAS_STATUS_NOT_INITIALIZED: 61 | return "CUBLAS_STATUS_NOT_INITIALIZED"; 62 | case CUBLAS_STATUS_ALLOC_FAILED: 63 | return "CUBLAS_STATUS_ALLOC_FAILED"; 64 | case CUBLAS_STATUS_INVALID_VALUE: 65 | return "CUBLAS_STATUS_INVALID_VALUE"; 66 | case CUBLAS_STATUS_ARCH_MISMATCH: 67 | return "CUBLAS_STATUS_ARCH_MISMATCH"; 68 | case CUBLAS_STATUS_MAPPING_ERROR: 69 | return "CUBLAS_STATUS_MAPPING_ERROR"; 70 | case CUBLAS_STATUS_EXECUTION_FAILED: 71 | return "CUBLAS_STATUS_EXECUTION_FAILED"; 72 | case CUBLAS_STATUS_INTERNAL_ERROR: 73 | return "CUBLAS_STATUS_INTERNAL_ERROR"; 74 | #if CUDA_VERSION >= 6000 75 | case CUBLAS_STATUS_NOT_SUPPORTED: 76 | return "CUBLAS_STATUS_NOT_SUPPORTED"; 77 | #endif 78 | #if CUDA_VERSION >= 6050 79 | case CUBLAS_STATUS_LICENSE_ERROR: 80 | return "CUBLAS_STATUS_LICENSE_ERROR"; 81 | #endif 82 | } 83 | return "Unknown cublas status"; 84 | } 85 | 86 | } // namespace gpu_util 87 | 88 | #endif // GPU_UTIL_DEVICE_ALTERNATE_H_ 89 | -------------------------------------------------------------------------------- /src/common/gpu-util/math_functions.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "math_functions.hpp" 7 | 8 | template<> 9 | void cpu_gemm(const CBLAS_TRANSPOSE TransA, 10 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 11 | const float alpha, const float* A, const float* B, const float beta, 12 | float* C) { 13 | int lda = (TransA == CblasNoTrans) ? K : M; 14 | int ldb = (TransB == CblasNoTrans) ? N : K; 15 | cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, 16 | ldb, beta, C, N); 17 | } 18 | 19 | template<> 20 | void cpu_gemm(const CBLAS_TRANSPOSE TransA, 21 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 22 | const double alpha, const double* A, const double* B, const double beta, 23 | double* C) { 24 | int lda = (TransA == CblasNoTrans) ? K : M; 25 | int ldb = (TransB == CblasNoTrans) ? N : K; 26 | cblas_dgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, 27 | ldb, beta, C, N); 28 | } 29 | 30 | template<> 31 | void cpu_gemm(const CBLAS_TRANSPOSE TransA, 32 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 33 | const float alpha, const float* A, const int lda, 34 | const float* B, const int ldb, 35 | const float beta, float* C, const int ldc) { 36 | cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, 37 | ldb, beta, C, ldc); 38 | } 39 | 40 | template<> 41 | void cpu_gemm(const CBLAS_TRANSPOSE TransA, 42 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 43 | const double alpha, const double* A, const int lda, 44 | const double* B, const int ldb, 45 | const double beta, double* C, const int ldc) { 46 | cblas_dgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, 47 | ldb, beta, C, ldc); 48 | } 49 | 50 | template <> 51 | void cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, 52 | const int N, const float alpha, const float* A, const float* x, 53 | const float beta, float* y) { 54 | cblas_sgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); 55 | } 56 | 57 | template <> 58 | void cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, 59 | const int N, const double alpha, const double* A, const double* x, 60 | const double beta, double* y) { 61 | cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); 62 | } 63 | 64 | template <> 65 | void cpu_axpy(const int N, const float alpha, const float* X, 66 | float* Y) { cblas_saxpy(N, alpha, X, 1, Y, 1); } 67 | 68 | template <> 69 | void cpu_axpy(const int N, const double alpha, const double* X, 70 | double* Y) { cblas_daxpy(N, alpha, X, 1, Y, 1); } 71 | 72 | template <> 73 | void cpu_axpby(const int N, const float alpha, const float* X, 74 | const float beta, float* Y) { 75 | cblas_saxpby(N, alpha, X, 1, beta, Y, 1); 76 | } 77 | 78 | template <> 79 | void cpu_axpby(const int N, const double alpha, const double* X, 80 | const double beta, double* Y) { 81 | cblas_daxpby(N, alpha, X, 1, beta, Y, 1); 82 | } 83 | 84 | template <> 85 | float cpu_strided_dot(const int n, const float* x, const int incx, 86 | const float* y, const int incy) { 87 | return cblas_sdot(n, x, incx, y, incy); 88 | } 89 | 90 | template <> 91 | double cpu_strided_dot(const int n, const double* x, 92 | const int incx, const double* y, const int incy) { 93 | return cblas_ddot(n, x, incx, y, incy); 94 | } 95 | 96 | template 97 | Dtype cpu_dot(const int n, const Dtype* x, const Dtype* y) { 98 | return cpu_strided_dot(n, x, 1, y, 1); 99 | } 100 | 101 | template 102 | float cpu_dot(const int n, const float* x, const float* y); 103 | 104 | template 105 | double cpu_dot(const int n, const double* x, const double* y); 106 | 107 | template <> 108 | float cpu_asum(const int n, const float* x) { 109 | return cblas_sasum(n, x, 1); 110 | } 111 | 112 | template <> 113 | double cpu_asum(const int n, const double* x) { 114 | return cblas_dasum(n, x, 1); 115 | } 116 | 117 | template 118 | void cpu_set(const int N, const Dtype alpha, Dtype* Y) { 119 | if (alpha == 0) { 120 | memset(Y, 0, sizeof(Dtype) * N); // NOLINT(caffe/alt_fn) 121 | return; 122 | } 123 | for (int i = 0; i < N; ++i) { 124 | Y[i] = alpha; 125 | } 126 | } 127 | 128 | template void cpu_set(const int N, const int alpha, int* Y); 129 | template void cpu_set(const int N, const float alpha, float* Y); 130 | template void cpu_set(const int N, const double alpha, double* Y); 131 | 132 | template <> 133 | void cpu_add(const int n, const float* a, const float* b, 134 | float* y) { 135 | vsAdd(n, a, b, y); 136 | } 137 | 138 | template <> 139 | void cpu_add(const int n, const double* a, const double* b, 140 | double* y) { 141 | vdAdd(n, a, b, y); 142 | } 143 | 144 | template <> 145 | void cpu_sub(const int n, const float* a, const float* b, 146 | float* y) { 147 | vsSub(n, a, b, y); 148 | } 149 | 150 | template <> 151 | void cpu_sub(const int n, const double* a, const double* b, 152 | double* y) { 153 | vdSub(n, a, b, y); 154 | } 155 | 156 | template <> 157 | void cpu_mul(const int n, const float* a, const float* b, 158 | float* y) { 159 | vsMul(n, a, b, y); 160 | } 161 | 162 | template <> 163 | void cpu_mul(const int n, const double* a, const double* b, 164 | double* y) { 165 | vdMul(n, a, b, y); 166 | } 167 | 168 | template <> 169 | void cpu_div(const int n, const float* a, const float* b, 170 | float* y) { 171 | vsDiv(n, a, b, y); 172 | } 173 | 174 | template <> 175 | void cpu_div(const int n, const double* a, const double* b, 176 | double* y) { 177 | vdDiv(n, a, b, y); 178 | } 179 | 180 | template <> 181 | void cpu_powx(const int n, const float* a, const float b, 182 | float* y) { 183 | vsPowx(n, a, b, y); 184 | } 185 | 186 | template <> 187 | void cpu_powx(const int n, const double* a, const double b, 188 | double* y) { 189 | vdPowx(n, a, b, y); 190 | } 191 | 192 | template <> 193 | void cpu_sqr(const int n, const float* a, float* y) { 194 | vsSqr(n, a, y); 195 | } 196 | 197 | template <> 198 | void cpu_sqr(const int n, const double* a, double* y) { 199 | vdSqr(n, a, y); 200 | } 201 | 202 | template <> 203 | void cpu_exp(const int n, const float* a, float* y) { 204 | vsExp(n, a, y); 205 | } 206 | 207 | template <> 208 | void cpu_exp(const int n, const double* a, double* y) { 209 | vdExp(n, a, y); 210 | } 211 | 212 | template <> 213 | void cpu_abs(const int n, const float* a, float* y) { 214 | vsAbs(n, a, y); 215 | } 216 | 217 | template <> 218 | void cpu_abs(const int n, const double* a, double* y) { 219 | vdAbs(n, a, y); 220 | } 221 | 222 | template <> 223 | void cpu_add_scalar(const int N, const float alpha, float* Y) { 224 | for (int i = 0; i < N; ++i) { 225 | Y[i] += alpha; 226 | } 227 | } 228 | 229 | template <> 230 | void cpu_add_scalar(const int N, const double alpha, double* Y) { 231 | for (int i = 0; i < N; ++i) { 232 | Y[i] += alpha; 233 | } 234 | } 235 | 236 | template 237 | void cpu_copy(const int N, const Dtype* X, Dtype* Y) { 238 | if (X != Y) { 239 | memcpy(Y, X, sizeof(Dtype) * N); // NOLINT(caffe/alt_fn) 240 | } 241 | } 242 | 243 | template void cpu_copy(const int N, const int* X, int* Y); 244 | template void cpu_copy(const int N, const unsigned int* X, 245 | unsigned int* Y); 246 | template void cpu_copy(const int N, const float* X, float* Y); 247 | template void cpu_copy(const int N, const double* X, double* Y); 248 | -------------------------------------------------------------------------------- /src/common/gpu-util/math_functions.hpp: -------------------------------------------------------------------------------- 1 | #ifndef GPU_UTIL_MATH_FUNCTIONS_H_ 2 | #define GPU_UTIL_MATH_FUNCTIONS_H_ 3 | 4 | #include 5 | #include // for std::fabs and std::signbit 6 | 7 | #include 8 | 9 | #include "glog/logging.h" 10 | 11 | extern "C" { 12 | #include 13 | } 14 | 15 | #include "device_alternate.hpp" 16 | #include "mkl_alternate.hpp" 17 | 18 | // Caffe gemm provides a simpler interface to the gemm functions, with the 19 | // limitation that the data has to be contiguous in memory. 20 | template 21 | void cpu_gemm(const CBLAS_TRANSPOSE TransA, 22 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 23 | const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, 24 | Dtype* C); 25 | 26 | template 27 | void cpu_gemm(const CBLAS_TRANSPOSE TransA, 28 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 29 | const Dtype alpha, const Dtype* A, const int lda, 30 | const Dtype* B, const int ldb, 31 | const Dtype beta, Dtype* C, const int ldc); 32 | 33 | template 34 | void cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, 35 | const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta, 36 | Dtype* y); 37 | 38 | template 39 | void cpu_axpy(const int N, const Dtype alpha, const Dtype* X, 40 | Dtype* Y); 41 | 42 | template 43 | void cpu_axpby(const int N, const Dtype alpha, const Dtype* X, 44 | const Dtype beta, Dtype* Y); 45 | 46 | template 47 | Dtype cpu_dot(const int n, const Dtype* x, const Dtype* y); 48 | 49 | template 50 | Dtype cpu_strided_dot(const int n, const Dtype* x, const int incx, 51 | const Dtype* y, const int incy); 52 | 53 | // Returns the sum of the absolute values of the elements of vector x 54 | template 55 | Dtype cpu_asum(const int n, const Dtype* x); 56 | 57 | template 58 | void cpu_set(const int N, const Dtype alpha, Dtype *X); 59 | 60 | template 61 | void cpu_add(const int N, const Dtype* a, const Dtype* b, Dtype* y); 62 | 63 | template 64 | void cpu_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y); 65 | 66 | template 67 | void cpu_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y); 68 | 69 | template 70 | void cpu_div(const int N, const Dtype* a, const Dtype* b, Dtype* y); 71 | 72 | template 73 | void cpu_powx(const int n, const Dtype* a, const Dtype b, Dtype* y); 74 | 75 | template 76 | void cpu_sqr(const int N, const Dtype* a, Dtype* y); 77 | 78 | template 79 | void cpu_exp(const int n, const Dtype* a, Dtype* y); 80 | 81 | template 82 | void cpu_abs(const int n, const Dtype* a, Dtype* y); 83 | 84 | template 85 | void cpu_add_scalar(const int N, const Dtype alpha, Dtype* Y); 86 | 87 | template 88 | void cpu_copy(const int N, const Dtype* X, Dtype* Y); 89 | 90 | // Decaf gpu gemm provides an interface that is almost the same as the cpu 91 | // gemm function - following the c convention and calling the fortran-order 92 | // gpu code under the hood. 93 | template 94 | void gpu_gemm(cublasHandle_t cublas_handle, const CBLAS_TRANSPOSE TransA, 95 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 96 | const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, 97 | Dtype* C); 98 | 99 | template 100 | void gpu_gemm(cublasHandle_t cublas_handle, const CBLAS_TRANSPOSE TransA, 101 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 102 | const Dtype alpha, const Dtype* A, const int lda, 103 | const Dtype* B, const int ldb, 104 | const Dtype beta, Dtype* C, const int ldc); 105 | 106 | template 107 | void gpu_gemv(cublasHandle_t cublas_handle, 108 | const CBLAS_TRANSPOSE TransA, const int M, const int N, 109 | const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta, 110 | Dtype* y); 111 | 112 | template 113 | void gpu_axpy(cublasHandle_t cublas_handle, const int N, 114 | const Dtype alpha, const Dtype* X, Dtype* Y); 115 | 116 | template 117 | Dtype gpu_dot( 118 | cublasHandle_t cublas_handle, const int n, const Dtype* x, const Dtype* y); 119 | 120 | template 121 | Dtype gpu_asum(cublasHandle_t cublas_handle, const int n, const Dtype* x); 122 | 123 | #endif // GPU_UTIL_MATH_FUNCTIONS_H_ 124 | -------------------------------------------------------------------------------- /src/common/gpu-util/math_functions_cuda.cu: -------------------------------------------------------------------------------- 1 | #ifndef CPU_ONLY 2 | 3 | #include // CUDA's, not caffe's, for fabs, signbit 4 | #include 5 | #include // thrust::plus 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "math_functions.hpp" 13 | 14 | template <> 15 | void gpu_gemm( 16 | cublasHandle_t cublas_handle, const CBLAS_TRANSPOSE TransA, 17 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 18 | const float alpha, const float* A, const float* B, const float beta, 19 | float* C) { 20 | // Note that cublas follows fortran order. 21 | int lda = (TransA == CblasNoTrans) ? K : M; 22 | int ldb = (TransB == CblasNoTrans) ? N : K; 23 | cublasOperation_t cuTransA = 24 | (TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; 25 | cublasOperation_t cuTransB = 26 | (TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; 27 | CUBLAS_CHECK(cublasSgemm(cublas_handle, cuTransB, cuTransA, 28 | N, M, K, &alpha, B, ldb, A, lda, &beta, C, N)); 29 | } 30 | 31 | template <> 32 | void gpu_gemm( 33 | cublasHandle_t cublas_handle, const CBLAS_TRANSPOSE TransA, 34 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 35 | const double alpha, const double* A, const double* B, const double beta, 36 | double* C) { 37 | // Note that cublas follows fortran order. 38 | int lda = (TransA == CblasNoTrans) ? K : M; 39 | int ldb = (TransB == CblasNoTrans) ? N : K; 40 | cublasOperation_t cuTransA = 41 | (TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; 42 | cublasOperation_t cuTransB = 43 | (TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; 44 | CUBLAS_CHECK(cublasDgemm(cublas_handle, cuTransB, cuTransA, 45 | N, M, K, &alpha, B, ldb, A, lda, &beta, C, N)); 46 | } 47 | 48 | template <> 49 | void gpu_gemm( 50 | cublasHandle_t cublas_handle, const CBLAS_TRANSPOSE TransA, 51 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 52 | const float alpha, const float* A, const int lda, 53 | const float* B, const int ldb, 54 | const float beta, float* C, const int ldc) { 55 | cublasOperation_t cuTransA = 56 | (TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; 57 | cublasOperation_t cuTransB = 58 | (TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; 59 | CUBLAS_CHECK(cublasSgemm(cublas_handle, cuTransB, cuTransA, 60 | N, M, K, &alpha, B, ldb, A, lda, &beta, C, ldc)); 61 | } 62 | 63 | template <> 64 | void gpu_gemm( 65 | cublasHandle_t cublas_handle, const CBLAS_TRANSPOSE TransA, 66 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 67 | const double alpha, const double* A, const int lda, 68 | const double* B, const int ldb, 69 | const double beta, double* C, const int ldc) { 70 | cublasOperation_t cuTransA = 71 | (TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; 72 | cublasOperation_t cuTransB = 73 | (TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; 74 | CUBLAS_CHECK(cublasDgemm(cublas_handle, cuTransB, cuTransA, 75 | N, M, K, &alpha, B, ldb, A, lda, &beta, C, ldc)); 76 | } 77 | 78 | template <> 79 | void gpu_gemv( 80 | cublasHandle_t cublas_handle, const CBLAS_TRANSPOSE TransA, const int M, 81 | const int N, const float alpha, const float* A, const float* x, 82 | const float beta, float* y) { 83 | cublasOperation_t cuTransA = 84 | (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N; 85 | CUBLAS_CHECK(cublasSgemv(cublas_handle, cuTransA, N, M, &alpha, 86 | A, N, x, 1, &beta, y, 1)); 87 | } 88 | 89 | template <> 90 | void gpu_gemv( 91 | cublasHandle_t cublas_handle, const CBLAS_TRANSPOSE TransA, const int M, 92 | const int N, const double alpha, const double* A, const double* x, 93 | const double beta, double* y) { 94 | cublasOperation_t cuTransA = 95 | (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N; 96 | CUBLAS_CHECK(cublasDgemv(cublas_handle, cuTransA, N, M, &alpha, 97 | A, N, x, 1, &beta, y, 1)); 98 | } 99 | 100 | template <> 101 | void gpu_axpy( 102 | cublasHandle_t cublas_handle, const int N, 103 | const float alpha, const float* X, float* Y) { 104 | CUBLAS_CHECK(cublasSaxpy(cublas_handle, N, &alpha, X, 1, Y, 1)); 105 | } 106 | 107 | template <> 108 | void gpu_axpy( 109 | cublasHandle_t cublas_handle, const int N, 110 | const double alpha, const double* X, double* Y) { 111 | CUBLAS_CHECK(cublasDaxpy(cublas_handle, N, &alpha, X, 1, Y, 1)); 112 | } 113 | 114 | template <> 115 | float gpu_dot(cublasHandle_t cublas_handle, 116 | const int n, const float* x, const float* y) { 117 | float out; 118 | CUBLAS_CHECK(cublasSdot(cublas_handle, n, x, 1, y, 1, &out)); 119 | return out; 120 | } 121 | 122 | template <> 123 | double gpu_dot(cublasHandle_t cublas_handle, 124 | const int n, const double* x, const double* y) { 125 | double out; 126 | CUBLAS_CHECK(cublasDdot(cublas_handle, n, x, 1, y, 1, &out)); 127 | return out; 128 | } 129 | 130 | template <> 131 | float gpu_asum( 132 | cublasHandle_t cublas_handle, const int n, const float* x) { 133 | float y; 134 | CUBLAS_CHECK(cublasSasum(cublas_handle, n, x, 1, &y)); 135 | return y; 136 | } 137 | 138 | template <> 139 | double gpu_asum( 140 | cublasHandle_t cublas_handle, const int n, const double* x) { 141 | double y; 142 | CUBLAS_CHECK(cublasDasum(cublas_handle, n, x, 1, &y)); 143 | return y; 144 | } 145 | 146 | #endif -------------------------------------------------------------------------------- /src/common/gpu-util/mkl_alternate.hpp: -------------------------------------------------------------------------------- 1 | #ifndef GPU_UTIL_MKL_ALTERNATE_H_ 2 | #define GPU_UTIL_MKL_ALTERNATE_H_ 3 | 4 | #ifdef USE_MKL 5 | 6 | #include 7 | 8 | #else // If use MKL, simply include the MKL header 9 | 10 | extern "C" { 11 | #include 12 | } 13 | #include 14 | 15 | // Functions that caffe uses but are not present if MKL is not linked. 16 | 17 | // A simple way to define the vsl unary functions. The operation should 18 | // be in the form e.g. y[i] = sqrt(a[i]) 19 | #define DEFINE_VSL_UNARY_FUNC(name, operation) \ 20 | template \ 21 | void v##name(const int n, const Dtype* a, Dtype* y) { \ 22 | CHECK_GT(n, 0); CHECK(a); CHECK(y); \ 23 | for (int i = 0; i < n; ++i) { operation; } \ 24 | } \ 25 | inline void vs##name( \ 26 | const int n, const float* a, float* y) { \ 27 | v##name(n, a, y); \ 28 | } \ 29 | inline void vd##name( \ 30 | const int n, const double* a, double* y) { \ 31 | v##name(n, a, y); \ 32 | } 33 | 34 | DEFINE_VSL_UNARY_FUNC(Sqr, y[i] = a[i] * a[i]); 35 | DEFINE_VSL_UNARY_FUNC(Exp, y[i] = exp(a[i])); 36 | DEFINE_VSL_UNARY_FUNC(Abs, y[i] = fabs(a[i])); 37 | 38 | // A simple way to define the vsl unary functions with singular parameter b. 39 | // The operation should be in the form e.g. y[i] = pow(a[i], b) 40 | #define DEFINE_VSL_UNARY_FUNC_WITH_PARAM(name, operation) \ 41 | template \ 42 | void v##name(const int n, const Dtype* a, const Dtype b, Dtype* y) { \ 43 | CHECK_GT(n, 0); CHECK(a); CHECK(y); \ 44 | for (int i = 0; i < n; ++i) { operation; } \ 45 | } \ 46 | inline void vs##name( \ 47 | const int n, const float* a, const float b, float* y) { \ 48 | v##name(n, a, b, y); \ 49 | } \ 50 | inline void vd##name( \ 51 | const int n, const double* a, const float b, double* y) { \ 52 | v##name(n, a, b, y); \ 53 | } 54 | 55 | DEFINE_VSL_UNARY_FUNC_WITH_PARAM(Powx, y[i] = pow(a[i], b)); 56 | 57 | // A simple way to define the vsl binary functions. The operation should 58 | // be in the form e.g. y[i] = a[i] + b[i] 59 | #define DEFINE_VSL_BINARY_FUNC(name, operation) \ 60 | template \ 61 | void v##name(const int n, const Dtype* a, const Dtype* b, Dtype* y) { \ 62 | CHECK_GT(n, 0); CHECK(a); CHECK(b); CHECK(y); \ 63 | for (int i = 0; i < n; ++i) { operation; } \ 64 | } \ 65 | inline void vs##name( \ 66 | const int n, const float* a, const float* b, float* y) { \ 67 | v##name(n, a, b, y); \ 68 | } \ 69 | inline void vd##name( \ 70 | const int n, const double* a, const double* b, double* y) { \ 71 | v##name(n, a, b, y); \ 72 | } 73 | 74 | DEFINE_VSL_BINARY_FUNC(Add, y[i] = a[i] + b[i]); 75 | DEFINE_VSL_BINARY_FUNC(Sub, y[i] = a[i] - b[i]); 76 | DEFINE_VSL_BINARY_FUNC(Mul, y[i] = a[i] * b[i]); 77 | DEFINE_VSL_BINARY_FUNC(Div, y[i] = a[i] / b[i]); 78 | 79 | // In addition, MKL comes with an additional function axpby that is not present 80 | // in standard blas. We will simply use a two-step (inefficient, of course) way 81 | // to mimic that. 82 | inline void cblas_saxpby(const int N, const float alpha, const float* X, 83 | const int incX, const float beta, float* Y, 84 | const int incY) { 85 | cblas_sscal(N, beta, Y, incY); 86 | cblas_saxpy(N, alpha, X, incX, Y, incY); 87 | } 88 | inline void cblas_daxpby(const int N, const double alpha, const double* X, 89 | const int incX, const double beta, double* Y, 90 | const int incY) { 91 | cblas_dscal(N, beta, Y, incY); 92 | cblas_daxpy(N, alpha, X, incX, Y, incY); 93 | } 94 | 95 | #endif // USE_MKL 96 | #endif // GPU_UTIL_MKL_ALTERNATE_H_ 97 | -------------------------------------------------------------------------------- /src/common/internal-config.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __internal_config_hpp__ 2 | #define __internal_config_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #define BIG_ITER 10000000 35 | #define MAX_CLOCK BIG_ITER 36 | #define INITIAL_DATA_AGE -BIG_ITER 37 | #define INITIAL_CLOCK 0 38 | #define BIG_STALENESS 2 * BIG_ITER 39 | /* The staleness to guarantee a local read. 40 | * We assume we won't have more than BIG_ITER iterations, and 41 | * the initial value for cache age is -BIG_ITER, so if we use 42 | * 2*BIG_ITER as the staleness, we can guarantee that we don't 43 | * need to fetch data from the server. 44 | */ 45 | 46 | #define INCREMENT_TIMING_FREQ 1 47 | #define READ_TIMING_FREQ 1 48 | #define SET_ROW_TIMING_FREQ 1 49 | // #define INCREMENT_TIMING_FREQ 1000 50 | // #define READ_TIMING_FREQ 1000 51 | // #define SET_ROW_TIMING_FREQ 1000 52 | 53 | #define ITERATE_CMD 1 54 | #define OPSEQ_CMD 1 55 | 56 | #define OP_BUFFER_SIZE 10 57 | 58 | #endif // defined __internal_config_hpp__ 59 | -------------------------------------------------------------------------------- /src/common/portable-bytes.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __PORTABLE_BYTES_HPP__ 2 | #define __PORTABLE_BYTES_HPP__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | #include 37 | 38 | #include 39 | #include 40 | #include 41 | 42 | #include "common/gpu-util/math_functions.hpp" 43 | 44 | using std::cerr; 45 | using std::endl; 46 | 47 | typedef void(free_func_t)(void *data, void *hint); 48 | 49 | static void empty_free_func(void *data, void *hint) { 50 | } 51 | 52 | class PortableBytes { 53 | public: 54 | virtual int init() = 0; 55 | virtual int init_size(size_t size_) = 0; 56 | virtual int init_data( 57 | void *data_, size_t size_, 58 | free_func_t *ffn_ = empty_free_func, void *hint_ = NULL) = 0; 59 | virtual void *data() = 0; 60 | virtual size_t size() = 0; 61 | 62 | template 63 | void pack(const T& t) { 64 | size_t data_size = sizeof(T); 65 | init_size(data_size); 66 | *(reinterpret_cast(data())) = t; 67 | } 68 | 69 | template 70 | void unpack(T& t) { 71 | assert(size() >= sizeof(T)); 72 | t = *(reinterpret_cast(data())); 73 | } 74 | 75 | template 76 | void pack_vector(const std::vector& vec) { 77 | size_t data_size = vec.size() * sizeof(T); 78 | init_size(data_size); 79 | memcpy(data(), vec.data(), data_size); 80 | } 81 | 82 | template 83 | void unpack_vector(std::vector& vec) { 84 | size_t vec_size = size() / sizeof(T); 85 | vec.resize(vec_size); 86 | memcpy(vec.data(), data(), size()); 87 | } 88 | 89 | void pack_string(const std::string& str) { 90 | init_size(str.size()); 91 | memcpy(data(), str.data(), str.size()); 92 | } 93 | 94 | void unpack_string(std::string& str) { 95 | str.assign(reinterpret_cast(data()), size()); 96 | } 97 | 98 | void pack_memory(const void *buf, size_t size) { 99 | init_size(size); 100 | memcpy(data(), buf, size); 101 | } 102 | 103 | void pack_memory( 104 | const void *buf0, size_t size0, const void *buf1, size_t size1) { 105 | init_size(size0 + size1); 106 | void *dst0 = data(); 107 | if (size0) { 108 | CHECK(buf0); 109 | memcpy(dst0, buf0, size0); 110 | } 111 | if (size1) { 112 | CHECK(buf1); 113 | CHECK_EQ(sizeof(void *), sizeof(unsigned long)); 114 | void *dst1 = reinterpret_cast( 115 | reinterpret_cast(dst0) + size0); 116 | memcpy(dst1, buf1, size1); 117 | } 118 | } 119 | 120 | void unpack_memory(void *buf, size_t size) { 121 | memcpy(buf, data(), size); 122 | } 123 | 124 | void pack_gpu_memory(const void *buf, size_t size, cudaStream_t cuda_stream) { 125 | init_size(size); 126 | cudaMemcpyAsync(data(), buf, size, 127 | cudaMemcpyDefault, cuda_stream); 128 | cudaStreamSynchronize(cuda_stream); 129 | } 130 | }; 131 | 132 | #endif // __PORTABLE_BYTES_HPP__ 133 | -------------------------------------------------------------------------------- /src/common/router-handler.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Carnegie Mellon University. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the University nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 24 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 27 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | // socket layer that handles ZMQ sockets, both in-process and over the network 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | #include 39 | #include 40 | 41 | #include "router-handler.hpp" 42 | 43 | using std::string; 44 | using std::vector; 45 | using std::cerr; 46 | using std::endl; 47 | using boost::shared_ptr; 48 | using boost::make_shared; 49 | using boost::thread; 50 | using boost::bind; 51 | 52 | int64_t msg_size(const vector& msgs) { 53 | int64_t size = 0; 54 | for (uint i = 0; i < msgs.size(); i++) { 55 | size += msgs[i].size(); 56 | } 57 | 58 | return size; 59 | } 60 | 61 | int64_t msg_size(vector& msgs) { 62 | int64_t size = 0; 63 | for (uint i = 0; i < msgs.size(); i++) { 64 | size += msgs[i].size(); 65 | } 66 | 67 | return size; 68 | } 69 | 70 | RouterHandler::RouterHandler(uint channel_id, 71 | shared_ptr ctx, 72 | const vector& connect_list, const vector& bind_list, 73 | const string& identity, const GeePsConfig& config) : 74 | channel_id(channel_id), zmq_ctx(ctx), 75 | connect_to(connect_list), bind_to(bind_list), identity(identity), 76 | router_socket(*zmq_ctx, ZMQ_ROUTER), shutdown_socket(*zmq_ctx, ZMQ_PULL), 77 | pull_socket(*zmq_ctx, ZMQ_PULL), local_recv_socket(*zmq_ctx, ZMQ_PULL), 78 | config(config) { 79 | if (identity != "") { 80 | router_socket.setsockopt(ZMQ_IDENTITY, identity.c_str(), 81 | identity.size()); 82 | } 83 | 84 | BOOST_FOREACH(string s, connect_to) { 85 | try { 86 | router_socket.connect(s.c_str()); 87 | } catch (...) { 88 | cerr << identity << " connect to " << s << " failed\n"; 89 | CHECK(0); 90 | } 91 | } 92 | BOOST_FOREACH(string s, bind_to) { 93 | try { 94 | router_socket.bind(s.c_str()); 95 | } catch (...) { 96 | cerr << identity << " bind to " << s << " failed\n"; 97 | CHECK(0); 98 | } 99 | } 100 | 101 | if (connect_to.size()) { 102 | /* CLient */ 103 | client = true; 104 | try { 105 | shutdown_socket.bind("inproc://client-rh-shutdown"); 106 | pull_socket.bind("inproc://inproc-client-msg"); 107 | local_recv_socket.bind("inproc://inproc-local-client-recv"); 108 | } catch (...) { 109 | cerr << identity << " internal bind failed\n"; 110 | CHECK(0); 111 | } 112 | } else { 113 | /* Server */ 114 | client = false; 115 | try { 116 | shutdown_socket.bind("inproc://server-rh-shutdown"); 117 | pull_socket.bind("inproc://inproc-server-msg"); 118 | local_recv_socket.bind("inproc://inproc-local-server-recv"); 119 | } catch (...) { 120 | cerr << identity << " internal bind failed\n"; 121 | CHECK(0); 122 | } 123 | } 124 | } 125 | 126 | string RouterHandler::get_stats() { 127 | return stats.to_json(); 128 | } 129 | 130 | void RouterHandler::send_to( 131 | const string& dest, vector& msgs) { 132 | stats.total_send += msg_size(msgs); 133 | if (dest.compare("local") != 0) { 134 | if (snd_msg_socket.get() == NULL) { 135 | snd_msg_socket.reset(new zmq::socket_t(*zmq_ctx, ZMQ_PUSH)); 136 | if (client) { 137 | snd_msg_socket->connect("inproc://inproc-client-msg"); 138 | } else { 139 | snd_msg_socket->connect("inproc://inproc-server-msg"); 140 | } 141 | } 142 | send_msg(*snd_msg_socket, dest, true); 143 | send_msgs(*snd_msg_socket, msgs); 144 | } else { 145 | stats.total_local_send += msg_size(msgs); 146 | if (local_snd_msg_socket.get() == NULL) { 147 | local_snd_msg_socket.reset(new zmq::socket_t(*zmq_ctx, ZMQ_PUSH)); 148 | if (client) { 149 | local_snd_msg_socket->connect( 150 | "inproc://inproc-local-server-recv"); 151 | } else { 152 | local_snd_msg_socket->connect( 153 | "inproc://inproc-local-client-recv"); 154 | } 155 | } 156 | send_msgs(*local_snd_msg_socket, msgs); 157 | } 158 | for (uint j = 0; j < msgs.size(); j ++) { 159 | assert(!msgs[j].size()); 160 | } 161 | } 162 | 163 | void RouterHandler::direct_send_to( 164 | const string& dest, vector& msgs) { 165 | stats.total_send += msg_size(msgs); 166 | if (dest.compare("local") != 0) { 167 | send_msg(router_socket, dest, true); 168 | send_msgs(router_socket, msgs); 169 | } else { 170 | stats.total_local_send += msg_size(msgs); 171 | if (local_snd_msg_socket.get() == NULL) { 172 | local_snd_msg_socket.reset(new zmq::socket_t(*zmq_ctx, ZMQ_PUSH)); 173 | if (client) { 174 | local_snd_msg_socket->connect( 175 | "inproc://inproc-local-server-recv"); 176 | } else { 177 | local_snd_msg_socket->connect( 178 | "inproc://inproc-local-client-recv"); 179 | } 180 | } 181 | send_msgs(*local_snd_msg_socket, msgs); 182 | } 183 | } 184 | 185 | void RouterHandler::send_to( 186 | const vector& dests, vector& msgs) { 187 | for (uint i = 0; i < dests.size() - 1; i++) { 188 | vector msgs_copy(msgs.size()); 189 | for (uint j = 0; j < msgs.size(); j ++) { 190 | msgs_copy[j].copy(msgs[j]); 191 | } 192 | send_to(dests[i], msgs_copy); 193 | } 194 | /* Send the original message for the last one */ 195 | send_to(dests[dests.size() - 1], msgs); 196 | } 197 | 198 | void RouterHandler::direct_send_to( 199 | const vector& dests, vector& msgs) { 200 | for (uint i = 0; i < dests.size() - 1; i++) { 201 | vector msgs_copy(msgs.size()); 202 | for (uint j = 0; j < msgs_copy.size(); j ++) { 203 | msgs_copy[j].copy(msgs[j]); 204 | } 205 | direct_send_to(dests[i], msgs_copy); 206 | } 207 | /* Send the original message for the last one */ 208 | direct_send_to(dests[dests.size() - 1], msgs); 209 | } 210 | 211 | void RouterHandler::do_handler(RecvCallback recv_callback) { 212 | zmq::pollitem_t pollitems[4]; 213 | pollitems[0].socket = shutdown_socket; 214 | pollitems[0].events = ZMQ_POLLIN; 215 | pollitems[1].socket = pull_socket; 216 | pollitems[1].events = ZMQ_POLLIN; 217 | pollitems[2].socket = router_socket; 218 | pollitems[2].events = ZMQ_POLLIN; 219 | pollitems[3].socket = local_recv_socket; 220 | pollitems[3].events = ZMQ_POLLIN; 221 | 222 | while (true) { 223 | try { 224 | zmq::poll(pollitems, 4); 225 | } catch(...) { 226 | cerr << "exception in router handler" << endl; 227 | break; 228 | } 229 | 230 | if (pollitems[0].revents) { 231 | /* Shut down router */ 232 | string msg; 233 | recv_msg(shutdown_socket, msg); 234 | 235 | /* Break out the loop */ 236 | break; 237 | } 238 | if (pollitems[1].revents) { 239 | /* Pass PULL to ROUTER */ 240 | // tbb::tick_count timing_start = tbb::tick_count::now(); 241 | forward_msgs(pull_socket, router_socket); 242 | // stats.pull_to_router_time += 243 | // (tbb::tick_count::now() - timing_start).seconds(); 244 | } 245 | if (pollitems[2].revents) { 246 | /* router_socket */ 247 | // tbb::tick_count timing_start = tbb::tick_count::now(); 248 | string src; 249 | bool more = recv_msg(router_socket, src); 250 | assert(more); 251 | vector msgs; 252 | recv_msgs(router_socket, msgs); 253 | stats.total_receive += msg_size(msgs); 254 | recv_callback(src, msgs); 255 | // stats.router_recv_time += 256 | // (tbb::tick_count::now() - timing_start).seconds(); 257 | } 258 | if (pollitems[3].revents) { 259 | /* local_recv_socket */ 260 | // tbb::tick_count timing_start = tbb::tick_count::now(); 261 | string src("local"); 262 | vector msgs; 263 | recv_msgs(local_recv_socket, msgs); 264 | stats.total_local_receive += msg_size(msgs); 265 | stats.total_receive += msg_size(msgs); 266 | recv_callback(src, msgs); 267 | // stats.router_local_recv_time += 268 | // (tbb::tick_count::now() - timing_start).seconds(); 269 | } 270 | } 271 | } 272 | 273 | void RouterHandler::start_handler_thread(RecvCallback recv_callback) { 274 | handler_thread = make_shared ( 275 | bind(&RouterHandler::do_handler, this, recv_callback)); 276 | sleep(1); 277 | /* ZMQ 2.2 does not support connect before bind, so we have to wait 278 | * long enough for the handler thread to start working. 279 | */ 280 | } 281 | 282 | void RouterHandler::stop_handler_thread() { 283 | zmq::socket_t s(*zmq_ctx, ZMQ_PUSH); 284 | if (client) { 285 | s.connect("inproc://client-rh-shutdown"); 286 | } else { 287 | s.connect("inproc://server-rh-shutdown"); 288 | } 289 | send_msg(s, "stop"); 290 | (*handler_thread).join(); 291 | } 292 | 293 | RouterHandler::~RouterHandler() { 294 | } 295 | -------------------------------------------------------------------------------- /src/common/router-handler.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __router_handler_hpp__ 2 | #define __router_handler_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | // socket layer that handles zmq sockets, both in-process and over the network 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | #include 42 | #include 43 | #include 44 | 45 | #include "geeps.hpp" 46 | #include "portable-bytes.hpp" 47 | #include "zmq-util.hpp" 48 | #include "wire-protocol.hpp" 49 | 50 | using std::string; 51 | using std::vector; 52 | 53 | class RouterHandler { 54 | public: 55 | typedef boost::function&)> RecvCallback; 57 | 58 | struct RouterStats { 59 | int64_t total_send; 60 | int64_t total_local_send; 61 | int64_t total_receive; 62 | int64_t total_local_receive; 63 | double pull_to_router_time; 64 | double router_recv_time; 65 | double router_local_recv_time; 66 | RouterStats() { 67 | total_send = 0; 68 | total_local_send = 0; 69 | total_receive = 0; 70 | total_local_receive = 0; 71 | pull_to_router_time = 0.0; 72 | router_recv_time = 0.0; 73 | router_local_recv_time = 0.0; 74 | } 75 | RouterStats& operator += (const RouterStats& rhs) { 76 | return *this; 77 | } 78 | string to_json() { 79 | std::stringstream ss; 80 | ss << "{" 81 | << "\"total_send\": " << total_send << ", " 82 | << "\"total_local_send\": " << total_local_send << ", " 83 | << "\"total_receive\": " << total_receive << ", " 84 | << "\"total_local_receive\": " << total_local_receive << ", " 85 | << "\"pull_to_router_time\": " << pull_to_router_time << ", " 86 | << "\"router_recv_time\": " << router_recv_time << ", " 87 | << "\"router_local_recv_time\": " << router_local_recv_time << ", " 88 | << "\"last_entry\": 0" 89 | << " } "; 90 | return ss.str(); 91 | } 92 | }; 93 | 94 | uint channel_id; 95 | boost::shared_ptr zmq_ctx; 96 | vector connect_to; 97 | vector bind_to; 98 | string identity; 99 | bool client; 100 | 101 | zmq::socket_t router_socket; 102 | zmq::socket_t shutdown_socket; 103 | zmq::socket_t pull_socket; 104 | zmq::socket_t local_recv_socket; 105 | 106 | boost::thread_specific_ptr snd_msg_socket; 107 | boost::thread_specific_ptr local_snd_msg_socket; 108 | 109 | boost::shared_ptr handler_thread; 110 | 111 | GeePsConfig config; 112 | RouterStats stats; 113 | 114 | public: 115 | RouterHandler( 116 | uint channel_id, 117 | boost::shared_ptr ctx, 118 | const vector& connect_list, 119 | const vector& bind_list, 120 | const string& identity, 121 | const GeePsConfig& config); 122 | ~RouterHandler(); 123 | void start_handler_thread(RecvCallback recv_callback); 124 | void do_handler(RecvCallback recv_callback); 125 | void stop_handler_thread(); 126 | 127 | void send_to(const string& dest, vector& msgs); 128 | void send_to(const vector& dests, vector& msgs); 129 | 130 | string get_stats(); 131 | 132 | /* WARNING: Only one thread should be allowed to use these methods */ 133 | void direct_send_to(const string& dest, vector& msgs); 134 | void direct_send_to( 135 | const vector& dests, vector& msgs); 136 | }; 137 | 138 | #endif // defined __router_handler_hpp__ 139 | -------------------------------------------------------------------------------- /src/common/row-op-util.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Carnegie Mellon University. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the University nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 24 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 27 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #include 32 | 33 | #include "common/row-op-util.hpp" 34 | #include "common/gpu-util/math_functions.hpp" 35 | 36 | using std::cout; 37 | using std::endl; 38 | 39 | __global__ void assign_rows_to_double_index_kernel( 40 | val_t *y, const val_t *x, const DoubleIndex *index, 41 | size_t num_rows, DoubleIndex index_offset, size_t row_size, 42 | size_t num_vals_limit) { 43 | CUDA_KERNEL_LOOP(i, num_rows * row_size) { 44 | size_t row_index_id = i / row_size; 45 | size_t val_id = i % row_size; 46 | /* Assign rows from "id1" to "id0" */ 47 | size_t row_from = index[row_index_id].id1 + index_offset.id1; 48 | size_t row_to = index[row_index_id].id0 + index_offset.id0; 49 | size_t x_idx = row_from * row_size + val_id; 50 | size_t y_idx = row_to * row_size + val_id; 51 | if (y_idx < num_vals_limit) { 52 | y[y_idx] = x[x_idx]; 53 | } 54 | } 55 | } 56 | 57 | void assign_rows_to_double_index_gpu( 58 | ArrayData *rows_y, const ArrayData *rows_x, const DoubleIndex *index, 59 | size_t num_rows, DoubleIndex index_offset, size_t row_size, 60 | size_t num_vals_limit, 61 | cudaStream_t cuda_stream) { 62 | if (num_rows == 0) { 63 | return; 64 | } 65 | val_t *y = reinterpret_cast(rows_y); 66 | const val_t *x = reinterpret_cast(rows_x); 67 | assign_rows_to_double_index_kernel 68 | <<>> 70 | (y, x, index, num_rows, index_offset, row_size, num_vals_limit); 71 | CUDA_CHECK(cudaStreamSynchronize(cuda_stream)); 72 | } 73 | 74 | __global__ void assign_rows_from_double_index_kernel( 75 | val_t *y, const val_t *x, const DoubleIndex *index, 76 | size_t num_rows, DoubleIndex index_offset, size_t row_size, 77 | size_t num_vals_limit) { 78 | CUDA_KERNEL_LOOP(i, num_rows * row_size) { 79 | size_t row_index_id = i / row_size; 80 | size_t val_id = i % row_size; 81 | /* Assign rows from "id0" to "id1" */ 82 | size_t row_from = index[row_index_id].id0 + index_offset.id0; 83 | size_t row_to = index[row_index_id].id1 + index_offset.id1; 84 | size_t x_idx = row_from * row_size + val_id; 85 | size_t y_idx = row_to * row_size + val_id; 86 | if (x_idx < num_vals_limit) { 87 | y[y_idx] = x[x_idx]; 88 | } 89 | } 90 | } 91 | 92 | void assign_rows_from_double_index_gpu( 93 | ArrayData *rows_y, const ArrayData *rows_x, const DoubleIndex *index, 94 | size_t num_rows, DoubleIndex index_offset, size_t row_size, 95 | size_t num_vals_limit, 96 | cudaStream_t cuda_stream) { 97 | if (num_rows == 0) { 98 | return; 99 | } 100 | val_t *y = reinterpret_cast(rows_y); 101 | const val_t *x = reinterpret_cast(rows_x); 102 | assign_rows_from_double_index_kernel 103 | <<>> 105 | (y, x, index, num_rows, index_offset, row_size, num_vals_limit); 106 | CUDA_CHECK(cudaStreamSynchronize(cuda_stream)); 107 | } 108 | 109 | __global__ void add_rows_from_double_index_kernel( 110 | val_t *y, const val_t *x, const DoubleIndex *index, 111 | size_t num_rows, DoubleIndex index_offset, size_t row_size, 112 | size_t num_vals_limit) { 113 | CUDA_KERNEL_LOOP(i, num_rows * row_size) { 114 | size_t row_index_id = i / row_size; 115 | size_t val_id = i % row_size; 116 | /* Add rows from "id0" to "id1" */ 117 | size_t row_from = index[row_index_id].id0 + index_offset.id0; 118 | size_t row_to = index[row_index_id].id1 + index_offset.id1; 119 | size_t x_idx = row_from * row_size + val_id; 120 | size_t y_idx = row_to * row_size + val_id; 121 | if (x_idx < num_vals_limit) { 122 | y[y_idx] += x[x_idx]; 123 | } 124 | } 125 | } 126 | 127 | void add_rows_from_double_index_gpu( 128 | ArrayData *rows_y, const ArrayData *rows_x, const DoubleIndex *index, 129 | size_t num_rows, DoubleIndex index_offset, size_t row_size, 130 | size_t num_vals_limit, 131 | cudaStream_t cuda_stream) { 132 | if (num_rows == 0) { 133 | return; 134 | } 135 | val_t *y = reinterpret_cast(rows_y); 136 | const val_t *x = reinterpret_cast(rows_x); 137 | add_rows_from_double_index_kernel 138 | <<>> 140 | (y, x, index, num_rows, index_offset, row_size, num_vals_limit); 141 | CUDA_CHECK(cudaStreamSynchronize(cuda_stream)); 142 | } 143 | -------------------------------------------------------------------------------- /src/common/row-op-util.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __row_op_util_hpp__ 2 | #define __row_op_util_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | 36 | #include "geeps-user-defined-types.hpp" 37 | #include "common/portable-bytes.hpp" 38 | #include "common/gpu-util/math_functions.hpp" 39 | 40 | struct DoubleIndex { 41 | size_t id0; 42 | size_t id1; 43 | DoubleIndex(size_t id0_i = 0, size_t id1_i = 0) : id0(id0_i), id1(id1_i) {} 44 | }; 45 | 46 | inline void set_zero(ArrayData& data) { 47 | data.init(); 48 | } 49 | 50 | inline void pack_data(PortableBytes& bytes, const ArrayData& data) { 51 | bytes.pack(data); 52 | } 53 | 54 | inline void unpack_data(ArrayData& data, PortableBytes& bytes) { 55 | bytes.unpack(data); 56 | } 57 | 58 | inline void operator += (ArrayData& left, const ArrayData& right) { 59 | for (uint i = 0; i < ROW_DATA_SIZE; i++) { 60 | left.data[i] += right.data[i]; 61 | } 62 | } 63 | 64 | inline void add_row_batch( 65 | ArrayData *rows_y, const ArrayData *rows_x, size_t batch_size) { 66 | val_t *y = reinterpret_cast(rows_y); 67 | const val_t *x = reinterpret_cast(rows_x); 68 | size_t n = batch_size * ROW_DATA_SIZE; 69 | cpu_axpy(n, 1, x, y); 70 | } 71 | 72 | inline void add_row_batch_gpu( 73 | cublasHandle_t cublas_handle, 74 | ArrayData *rows_y, const ArrayData *rows_x, size_t batch_size) { 75 | val_t *y = reinterpret_cast(rows_y); 76 | const val_t *x = reinterpret_cast(rows_x); 77 | size_t n = batch_size * ROW_DATA_SIZE; 78 | gpu_axpy(cublas_handle, n, 1, x, y); 79 | } 80 | 81 | inline void assign_rows_to_double_index_cpu( 82 | ArrayData *rows_y, const ArrayData *rows_x, const DoubleIndex *index, 83 | size_t num_rows, DoubleIndex index_offset, size_t row_size, 84 | size_t num_vals_limit) { 85 | val_t *y = reinterpret_cast(rows_y); 86 | const val_t *x = reinterpret_cast(rows_x); 87 | for (size_t row_index_id = 0; row_index_id < num_rows; row_index_id++) { 88 | /* Assign rows from "id1" to "id0" */ 89 | size_t row_from = index[row_index_id].id1 + index_offset.id1; 90 | size_t row_to = index[row_index_id].id0 + index_offset.id0; 91 | for (size_t val_id = 0; val_id < row_size; val_id++) { 92 | size_t x_idx = row_from * row_size + val_id; 93 | size_t y_idx = row_to * row_size + val_id; 94 | if (y_idx < num_vals_limit) { 95 | y[y_idx] = x[x_idx]; 96 | } 97 | } 98 | } 99 | } 100 | 101 | inline void assign_rows_from_double_index_cpu( 102 | ArrayData *rows_y, const ArrayData *rows_x, const DoubleIndex *index, 103 | size_t num_rows, DoubleIndex index_offset, size_t row_size, 104 | size_t num_vals_limit) { 105 | val_t *y = reinterpret_cast(rows_y); 106 | const val_t *x = reinterpret_cast(rows_x); 107 | for (size_t row_index_id = 0; row_index_id < num_rows; row_index_id++) { 108 | /* Add rows from "id0" to "id1" */ 109 | size_t row_from = index[row_index_id].id0 + index_offset.id0; 110 | size_t row_to = index[row_index_id].id1 + index_offset.id1; 111 | for (size_t val_id = 0; val_id < row_size; val_id++) { 112 | size_t x_idx = row_from * row_size + val_id; 113 | size_t y_idx = row_to * row_size + val_id; 114 | if (x_idx < num_vals_limit) { 115 | y[y_idx] = x[x_idx]; 116 | } 117 | } 118 | } 119 | } 120 | 121 | inline void add_rows_from_double_index_cpu( 122 | ArrayData *rows_y, const ArrayData *rows_x, const DoubleIndex *index, 123 | size_t num_rows, DoubleIndex index_offset, size_t row_size, 124 | size_t num_vals_limit) { 125 | val_t *y = reinterpret_cast(rows_y); 126 | const val_t *x = reinterpret_cast(rows_x); 127 | for (size_t row_index_id = 0; row_index_id < num_rows; row_index_id++) { 128 | /* Add rows from "id0" to "id1" */ 129 | size_t row_from = index[row_index_id].id0 + index_offset.id0; 130 | size_t row_to = index[row_index_id].id1 + index_offset.id1; 131 | for (size_t val_id = 0; val_id < row_size; val_id++) { 132 | size_t x_idx = row_from * row_size + val_id; 133 | size_t y_idx = row_to * row_size + val_id; 134 | if (x_idx < num_vals_limit) { 135 | y[y_idx] += x[x_idx]; 136 | } 137 | } 138 | } 139 | } 140 | 141 | void assign_rows_to_double_index_gpu( 142 | ArrayData *rows_y, const ArrayData *rows_x, const DoubleIndex *index, 143 | size_t num_rows, DoubleIndex index_offset, size_t row_size, 144 | size_t num_vals_limit, 145 | cudaStream_t cuda_stream); 146 | void assign_rows_from_double_index_gpu( 147 | ArrayData *rows_y, const ArrayData *rows_x, const DoubleIndex *index, 148 | size_t num_rows, DoubleIndex index_offset, size_t row_size, 149 | size_t num_vals_limit, 150 | cudaStream_t cuda_stream); 151 | void add_rows_from_double_index_gpu( 152 | ArrayData *rows_y, const ArrayData *rows_x, const DoubleIndex *index, 153 | size_t num_rows, DoubleIndex index_offset, size_t row_size, 154 | size_t num_vals_limit, 155 | cudaStream_t cuda_stream); 156 | 157 | #endif // defined __row_op_util_hpp__ 158 | -------------------------------------------------------------------------------- /src/common/wire-protocol.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __wire_protocol_hpp__ 2 | #define __wire_protocol_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | // Common data types shared by the client and the server 35 | 36 | #include 37 | 38 | #include 39 | 40 | #include "geeps-user-defined-types.hpp" 41 | 42 | enum Command { 43 | FIND_ROW, 44 | READ_ROW_BATCH, 45 | CLOCK, 46 | CLOCK_WITH_UPDATES_BATCH, 47 | ADD_ACCESS_INFO, 48 | GET_STATS, 49 | SHUTDOWN 50 | }; 51 | typedef uint8_t command_t; 52 | 53 | struct RowAccessInfo { 54 | table_id_t tid; /* Table ID */ 55 | row_idx_t rid; /* Row ID */ 56 | uint32_t nr_read; /* Read frequency */ 57 | uint32_t nr_write; /* Write frequency */ 58 | }; 59 | 60 | struct RowKey { 61 | table_id_t table; 62 | row_idx_t row; 63 | RowKey(table_id_t table_i = 0, row_idx_t row_i = 0) : 64 | table(table_i), row(row_i) {} 65 | }; 66 | typedef std::vector RowKeys; 67 | 68 | struct cs_find_row_msg_t { 69 | command_t cmd; 70 | uint32_t client_id; 71 | table_id_t table; 72 | row_idx_t row; 73 | }; 74 | 75 | struct cs_read_row_batch_msg_t { 76 | command_t cmd; 77 | uint32_t client_id; 78 | iter_t data_age; 79 | bool prioritized; 80 | }; 81 | 82 | struct cs_clock_msg_t { 83 | command_t cmd; 84 | uint32_t client_id; 85 | iter_t clock; 86 | uint32_t table_id; 87 | int read_branch_id; 88 | }; 89 | 90 | struct cs_clock_with_updates_batch_msg_t { 91 | command_t cmd; 92 | uint32_t client_id; 93 | iter_t clock; 94 | uint32_t table_id; 95 | int update_branch_id; 96 | int read_branch_id; 97 | }; 98 | 99 | struct cs_add_access_info_msg_t { 100 | command_t cmd; 101 | uint32_t client_id; 102 | }; 103 | 104 | struct cs_get_stats_msg_t { 105 | command_t cmd; 106 | uint32_t client_id; 107 | }; 108 | 109 | 110 | struct sc_clock_msg_t { 111 | command_t cmd; 112 | uint32_t server_id; 113 | iter_t clock; 114 | uint32_t table_id; 115 | }; 116 | 117 | struct sc_find_row_msg_t { 118 | command_t cmd; 119 | table_id_t table; 120 | row_idx_t row; 121 | uint32_t server_id; 122 | }; 123 | 124 | struct sc_read_row_batch_msg_t { 125 | command_t cmd; 126 | uint32_t server_id; 127 | iter_t data_age; 128 | iter_t self_clock; 129 | uint32_t table_id; 130 | int branch_id; 131 | }; 132 | 133 | struct sc_get_stats_msg_t { 134 | command_t cmd; 135 | }; 136 | 137 | #endif // defined __wire_protocol_hpp__ 138 | -------------------------------------------------------------------------------- /src/common/work-puller.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Carnegie Mellon University. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the University nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 24 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 27 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #include 32 | #include 33 | 34 | #include "work-puller.hpp" 35 | 36 | WorkPuller::WorkPuller( 37 | boost::shared_ptr ctx, std::string connection_endpoint) : 38 | zmq_ctx(ctx), socket(*zmq_ctx, ZMQ_PULL) { 39 | socket.bind(connection_endpoint.c_str()); 40 | } 41 | 42 | int WorkPuller::pull_work(uint& cmd_ret, std::vector& args) { 43 | ZmqPortableBytes cmd_str; 44 | bool more; 45 | try { 46 | more = recv_msg(socket, cmd_str); 47 | } catch(...) { 48 | std::cerr << "exception in WorkPuller" << std::endl; 49 | return -1; 50 | } 51 | cmd_str.unpack(cmd_ret); 52 | cmd_str.close(); 53 | if (more) { 54 | recv_msgs(socket, args); 55 | } 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /src/common/work-puller.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __work_puller_hpp__ 2 | #define __work_puller_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | #include 37 | 38 | #include "common/zmq-util.hpp" 39 | 40 | class WorkPuller { 41 | boost::shared_ptr zmq_ctx; 42 | zmq::socket_t socket; 43 | 44 | public: 45 | WorkPuller( 46 | boost::shared_ptr ctx, std::string connection_endpoint); 47 | int pull_work(uint& cmd_ret, std::vector& args); 48 | }; 49 | 50 | #endif // defined __work_puller_hpp__ 51 | -------------------------------------------------------------------------------- /src/common/work-pusher.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Carnegie Mellon University. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the University nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 24 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 27 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #include 32 | #include 33 | 34 | #include "work-pusher.hpp" 35 | 36 | void WorkPusher::push_work(uint cmd, std::vector& args) { 37 | if (socket.get() == NULL) { 38 | socket.reset(new zmq::socket_t(*zmq_ctx, ZMQ_PUSH)); 39 | socket->connect(endpoint.c_str()); 40 | } 41 | bool more = args.size() > 0; 42 | ZmqPortableBytes msg; 43 | msg.pack(cmd); 44 | send_msg(*socket, msg, more); 45 | if (more) { 46 | send_msgs(*socket, args); 47 | } 48 | } 49 | 50 | void WorkPusher::push_work(uint cmd) { 51 | std::vector args(0); /* Empty args */ 52 | push_work(cmd, args); 53 | } 54 | 55 | -------------------------------------------------------------------------------- /src/common/work-pusher.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __work_pusher_hpp__ 2 | #define __work_pusher_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | 36 | #include 37 | #include 38 | 39 | #include "common/zmq-util.hpp" 40 | 41 | class WorkPusher { 42 | boost::shared_ptr zmq_ctx; 43 | boost::thread_specific_ptr socket; 44 | std::string endpoint; 45 | 46 | public: 47 | WorkPusher(boost::shared_ptr ctx_i, 48 | std::string connection_endpoint) 49 | : zmq_ctx(ctx_i), endpoint(connection_endpoint) {} 50 | void push_work(uint cmd, std::vector& args); 51 | void push_work(uint cmd); 52 | }; 53 | 54 | #endif // defined __work_pusher_hpp__ 55 | -------------------------------------------------------------------------------- /src/common/zmq-portable-bytes.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __ZMQ_PORTABLE_BYTES_HPP__ 2 | #define __ZMQ_PORTABLE_BYTES_HPP__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | 36 | #include 37 | 38 | #include "portable-bytes.hpp" 39 | 40 | class ZmqPortableBytes : public PortableBytes { 41 | public: 42 | zmq_msg_t msg; 43 | 44 | ZmqPortableBytes() { 45 | init(); 46 | } 47 | 48 | int init() { 49 | return zmq_msg_init(&msg); 50 | } 51 | 52 | int init_size(size_t size_) { 53 | return zmq_msg_init_size(&msg, size_); 54 | } 55 | 56 | int init_data( 57 | void *data_, size_t size_, 58 | free_func_t *ffn_ = empty_free_func, void *hint_ = NULL) { 59 | return zmq_msg_init_data(&msg, data_, size_, ffn_, hint_); 60 | } 61 | 62 | int copy(ZmqPortableBytes& pb_) { 63 | return zmq_msg_copy(&msg, pb_.get_msg_ptr()); 64 | } 65 | 66 | int move(ZmqPortableBytes& pb_) { 67 | return zmq_msg_move(&msg, pb_.get_msg_ptr()); 68 | } 69 | 70 | int close() { 71 | zmq_msg_close(&msg); 72 | zmq_msg_init(&msg); 73 | return 0; 74 | } 75 | 76 | void *data() { 77 | return zmq_msg_data(&msg); 78 | } 79 | 80 | size_t size() { 81 | return zmq_msg_size(&msg); 82 | } 83 | 84 | zmq_msg_t *get_msg_ptr() { 85 | return &msg; 86 | } 87 | 88 | ~ZmqPortableBytes() { 89 | // TODO(hengganc): remove this assertion 90 | assert(!size()); 91 | } 92 | 93 | // /* Disable implicit message copying, so that we won't use shared 94 | // * messages (less efficient) without being aware of the fact. 95 | // */ 96 | // ZmqPortableBytes(const ZmqPortableBytes&); 97 | // void operator = (const ZmqPortableBytes&); 98 | }; 99 | 100 | #endif // __ZMQ_PORTABLE_BYTES_HPP__ 101 | -------------------------------------------------------------------------------- /src/common/zmq-util.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __ZMQ_UTIL_HPP__ 2 | #define __ZMQ_UTIL_HPP__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | // Helper functions over ZMQ 35 | 36 | #include 37 | #include 38 | 39 | #include 40 | #include 41 | 42 | #include "zmq-portable-bytes.hpp" 43 | 44 | using std::vector; 45 | using std::string; 46 | 47 | /* The zmq::message_t object is the superclass of zmq_msg_t */ 48 | inline void move_pb_to_zmq(zmq::message_t& zmq_msg, ZmqPortableBytes& pb) { 49 | zmq_msg_t *zmq_msg_ptr = reinterpret_cast(&zmq_msg); 50 | zmq_msg_t *pb_msg_ptr = pb.get_msg_ptr(); 51 | zmq_msg_move(zmq_msg_ptr, pb_msg_ptr); 52 | } 53 | 54 | inline void move_zmq_to_pb(ZmqPortableBytes& pb, zmq::message_t& zmq_msg) { 55 | zmq_msg_t *zmq_msg_ptr = reinterpret_cast(&zmq_msg); 56 | zmq_msg_t *pb_msg_ptr = pb.get_msg_ptr(); 57 | zmq_msg_move(pb_msg_ptr, zmq_msg_ptr); 58 | } 59 | 60 | inline void move_string_to_zmq( 61 | zmq::message_t& zmq_msg, const std::string& str) { 62 | zmq_msg_t *zmq_msg_ptr = reinterpret_cast(&zmq_msg); 63 | zmq_msg_init_size(zmq_msg_ptr, str.size()); 64 | memcpy(zmq_msg_data(zmq_msg_ptr), str.data(), str.size()); 65 | } 66 | 67 | inline void move_zmq_to_string(std::string& str, zmq::message_t& zmq_msg) { 68 | zmq_msg_t *zmq_msg_ptr = reinterpret_cast(&zmq_msg); 69 | str.assign(reinterpret_cast( 70 | zmq_msg_data(zmq_msg_ptr)), zmq_msg_size(zmq_msg_ptr)); 71 | } 72 | 73 | inline bool recv_msg(zmq::socket_t& sock, std::string& msg) { 74 | zmq::message_t zmq_msg; 75 | sock.recv(&zmq_msg); 76 | move_zmq_to_string(msg, zmq_msg); 77 | int64_t more; 78 | size_t morelen = sizeof(more); 79 | sock.getsockopt(ZMQ_RCVMORE, &more, &morelen); 80 | return (more != 0); 81 | } 82 | 83 | inline bool recv_msg(zmq::socket_t& sock, ZmqPortableBytes& msg) { 84 | zmq::message_t zmq_msg; 85 | sock.recv(&zmq_msg); 86 | move_zmq_to_pb(msg, zmq_msg); 87 | int64_t more; 88 | size_t morelen = sizeof(more); 89 | sock.getsockopt(ZMQ_RCVMORE, &more, &morelen); 90 | return (more != 0); 91 | } 92 | 93 | inline bool recv_msgs(zmq::socket_t& sock, vector& msgs) { 94 | int64_t more = 1; 95 | while (more) { 96 | zmq::message_t zmq_msg; 97 | sock.recv(&zmq_msg); 98 | if (msgs.size() == msgs.capacity()) { 99 | /* Enlarge capacity */ 100 | vector tmp_msgs(msgs.size()); 101 | for (uint i = 0; i < msgs.size(); i++) { 102 | tmp_msgs[i].move(msgs[i]); 103 | } 104 | msgs.reserve(tmp_msgs.size() * 2); 105 | msgs.resize(tmp_msgs.size()); 106 | for (uint i = 0; i < tmp_msgs.size(); i++) { 107 | msgs[i].move(tmp_msgs[i]); 108 | } 109 | } 110 | msgs.push_back(ZmqPortableBytes()); 111 | move_zmq_to_pb(msgs[msgs.size() - 1], zmq_msg); 112 | size_t morelen = sizeof(more); 113 | sock.getsockopt(ZMQ_RCVMORE, &more, &morelen); 114 | } 115 | return false; /* no more messages */ 116 | } 117 | 118 | inline int send_msg( 119 | zmq::socket_t& sock, const std::string& data, bool more = false) { 120 | zmq::message_t zmq_msg; 121 | move_string_to_zmq(zmq_msg, data); 122 | if (more) { 123 | return sock.send(zmq_msg, ZMQ_SNDMORE); 124 | } else { 125 | return sock.send(zmq_msg); 126 | } 127 | } 128 | 129 | inline int send_msg( 130 | zmq::socket_t& sock, ZmqPortableBytes& data, bool more = false) { 131 | zmq::message_t zmq_msg; 132 | move_pb_to_zmq(zmq_msg, data); 133 | if (more) { 134 | return sock.send(zmq_msg, ZMQ_SNDMORE); 135 | } else { 136 | return sock.send(zmq_msg); 137 | } 138 | } 139 | 140 | inline int send_msgs(zmq::socket_t& sock, 141 | std::vector& parts, 142 | bool more = false) { 143 | if (parts.size() == 0) { 144 | return 0; 145 | } 146 | 147 | int end = parts.size() -1; 148 | int ret = 0; 149 | if (more) { 150 | end = parts.size(); 151 | } 152 | for (int i = 0; i < end; i++) { 153 | ret |= send_msg(sock, parts[i], true); 154 | } 155 | if (!more) { 156 | ret |= send_msg(sock, parts[parts.size()-1]); 157 | } 158 | return ret; 159 | } 160 | 161 | inline void forward_msgs(zmq::socket_t& src, zmq::socket_t& dst) { 162 | int64_t more = 1; 163 | size_t morelen = sizeof(more); 164 | while (more) { 165 | zmq::message_t zmq_msg; 166 | /* Process all parts of the message */ 167 | src.recv(&zmq_msg); 168 | src.getsockopt(ZMQ_RCVMORE, &more, &morelen); 169 | if (more) { 170 | dst.send(zmq_msg, ZMQ_SNDMORE); 171 | } else { 172 | dst.send(zmq_msg); 173 | } 174 | } 175 | } 176 | 177 | #endif // __ZMQ_UTIL_HPP__ 178 | -------------------------------------------------------------------------------- /src/server/metadata-server.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Carnegie Mellon University. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the University nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 24 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 27 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | #include "metadata-server.hpp" 36 | 37 | using std::string; 38 | using std::cerr; 39 | using std::cout; 40 | using std::endl; 41 | using std::vector; 42 | 43 | 44 | void MetadataServer::add_access_info( 45 | const string& client, uint client_id, const AccessInfo& access_info) { 46 | if (policy == 3) { 47 | access_info_received = true; 48 | for (uint i = 0; i < access_info.size(); i++) { 49 | const RowAccessInfo& row_access_info = access_info[i]; 50 | TableRow key(row_access_info.tid, row_access_info.rid); 51 | TargetServer server( 52 | client_id, row_access_info.nr_read, row_access_info.nr_write); 53 | if (!tmp_row_tablet_map.count(key)) { 54 | std::vector servers; 55 | servers.push_back(server); 56 | tmp_row_tablet_map[key] = servers; 57 | } else { 58 | std::vector &cur_servers = tmp_row_tablet_map[key]; 59 | cur_servers.push_back(server); 60 | } 61 | } 62 | 63 | nr_access_info_received++; 64 | if (nr_access_info_received == num_processes) { 65 | /* Received access info from all clients, now the row-to-tablet mapping 66 | * should be stable, and we can service FIND_ROW requests 67 | */ 68 | decide_data_assignment(); 69 | ready_to_serve = true; 70 | serve_pending_requests(); 71 | } 72 | } 73 | 74 | /* We also view that as an automatic FIND_ROW request */ 75 | for (uint i = 0; i < access_info.size(); i++) { 76 | const RowAccessInfo& row_access_info = access_info[i]; 77 | find_row(client, client_id, row_access_info.tid, row_access_info.rid); 78 | } 79 | } 80 | 81 | void MetadataServer::decide_data_assignment() { 82 | for (boost::unordered_map >::iterator 83 | it = tmp_row_tablet_map.begin(); 84 | it != tmp_row_tablet_map.end(); it++) { 85 | std::vector candidates = it->second; 86 | uint m_ind = 0; 87 | uint m_freq = 0; 88 | for (std::vector::iterator ts_it = candidates.begin(); 89 | ts_it != candidates.end(); ts_it++) { 90 | uint cur_freq = ts_it->nr_read + ts_it->nr_write; 91 | if (cur_freq > m_freq || 92 | (cur_freq == m_freq && 93 | tablet_load[ts_it->tid] < tablet_load[candidates[m_ind].tid])) { 94 | m_freq = cur_freq; 95 | m_ind = ts_it - candidates.begin(); 96 | } 97 | } 98 | TargetServer chosen_candidate = candidates[m_ind]; 99 | row_tablet_map[it->first] = chosen_candidate; 100 | tablet_load[chosen_candidate.tid] += 101 | chosen_candidate.nr_read + chosen_candidate.nr_write; 102 | } 103 | } 104 | 105 | void MetadataServer::serve_pending_requests() { 106 | for (uint i = 0; i < pending_requests.size(); i ++) { 107 | FindRowRequest& request = pending_requests[i]; 108 | TableRow key(request.table, request.row); 109 | uint server_id = row_tablet_map[key].tid; 110 | communicator->find_row( 111 | request.client, request.table, request.row, server_id); 112 | } 113 | } 114 | 115 | uint64_t MetadataServer::get_hash(table_id_t table, row_idx_t row) { 116 | return row; 117 | } 118 | 119 | void MetadataServer::find_row( 120 | const string& client, uint client_id, table_id_t table, row_idx_t row) { 121 | server_stats.nr_request++; 122 | 123 | CHECK(0); 124 | 125 | uint server_id = 0; 126 | TableRow key(table, row); 127 | 128 | switch (policy) { 129 | case 1: 130 | /* tablet server <-- row_id % num_processes */ 131 | server_id = 132 | get_hash(table, row) % (num_processes * num_channels) / num_channels; 133 | communicator->find_row(client_id, table, row, server_id); 134 | break; 135 | case 2: 136 | /* tablet server <-- first accessing client */ 137 | if (row_tablet_map.find(key) == row_tablet_map.end()) { 138 | row_tablet_map[key].tid = client_id; 139 | } 140 | server_id = row_tablet_map[key].tid; 141 | communicator->find_row(client_id, table, row, server_id); 142 | break; 143 | case 3: 144 | /* max(local access) + load balancing */ 145 | if (ready_to_serve) { 146 | RowTableMap::iterator row_tablet_map_it = row_tablet_map.find(key); 147 | if (row_tablet_map_it != row_tablet_map.end()) { 148 | server_id = row_tablet_map_it->second.tid; 149 | } else { 150 | server_id = 151 | get_hash(table, row) % (num_processes * num_channels) 152 | / num_channels; 153 | } 154 | communicator->find_row(client_id, table, row, server_id); 155 | } else { 156 | if (access_info_received) { 157 | /* row-to-tablet mapping is not ready, save to pending requests */ 158 | pending_requests.push_back(FindRowRequest(client_id, table, row)); 159 | } else { 160 | server_id = 161 | get_hash(table, row) % (num_processes * num_channels) 162 | / num_channels; 163 | communicator->find_row(client_id, table, row, server_id); 164 | } 165 | } 166 | break; 167 | default: 168 | CHECK(0) << "Unknown parameter placement policy: " << policy; 169 | } 170 | } 171 | 172 | string MetadataServer::get_stats() { 173 | return server_stats.to_json(); 174 | } 175 | -------------------------------------------------------------------------------- /src/server/metadata-server.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __metadata_server_hpp__ 2 | #define __metadata_server_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | #include 40 | 41 | #include "geeps-user-defined-types.hpp" 42 | #include "common/common-util.hpp" 43 | #include "server-encoder-decoder.hpp" 44 | 45 | using boost::shared_ptr; 46 | 47 | class MetadataServer { 48 | struct Stats { 49 | uint policy; 50 | int64_t nr_request; 51 | 52 | Stats() { 53 | nr_request = 0; 54 | } 55 | 56 | Stats& operator += (const Stats& rhs) { 57 | nr_request += rhs.nr_request; 58 | return *this; 59 | } 60 | std::string to_json() { 61 | std::stringstream ss; 62 | ss << "{" 63 | << "\"policy\": " << policy << ", " 64 | << "\"nr_request\": " << nr_request 65 | << " } "; 66 | return ss.str(); 67 | } 68 | }; 69 | 70 | typedef std::vector AccessInfo; 71 | 72 | struct TargetServer { 73 | table_id_t tid; /* Table ID */ 74 | uint32_t nr_read; /* Read frequency */ 75 | uint32_t nr_write; /* Write frequency */ 76 | 77 | TargetServer(table_id_t t = 0, uint32_t r = 0, uint32_t w = 0) { 78 | tid = t; 79 | nr_read = r; 80 | nr_write = w; 81 | } 82 | }; 83 | 84 | struct FindRowRequest { 85 | uint client; 86 | table_id_t table; /* Table ID */ 87 | row_idx_t row; /* Row ID */ 88 | 89 | FindRowRequest( 90 | uint client_i = 0, table_id_t table_i = 0, row_idx_t row_i = 0) : 91 | client(client_i), table(table_i), row(row_i) {} 92 | }; 93 | 94 | uint channel_id; 95 | uint num_channels; 96 | uint process_id; 97 | uint num_processes; 98 | 99 | shared_ptr communicator; 100 | Stats server_stats; 101 | std::string log_output_dir; 102 | 103 | bool access_info_received; 104 | bool ready_to_serve; 105 | // 1 - tablet server <-- row_id % nr_tablets 106 | // 2 - tablet server <-- first accessing client 107 | // 3 - max(local access) + load balancing 108 | uint policy; 109 | uint nr_access_info_received; 110 | std::vector pending_requests; 111 | std::vector tablet_load; 112 | boost::unordered_map > tmp_row_tablet_map; 113 | typedef boost::unordered_map RowTableMap; 114 | RowTableMap row_tablet_map; 115 | 116 | private: 117 | void decide_data_assignment(); 118 | void serve_pending_requests(); 119 | uint64_t get_hash(table_id_t table, row_idx_t row); 120 | 121 | public: 122 | MetadataServer( 123 | uint channel_id, uint num_channels, 124 | uint process_id, uint num_processes, 125 | shared_ptr communicator, 126 | const GeePsConfig& config) : 127 | channel_id(channel_id), num_channels(num_channels), 128 | process_id(process_id), num_processes(num_processes), 129 | communicator(communicator), 130 | log_output_dir(config.output_dir), policy(config.pp_policy), 131 | tablet_load(num_processes) { 132 | nr_access_info_received = 0; 133 | access_info_received = false; 134 | ready_to_serve = false; 135 | server_stats.policy = config.pp_policy; 136 | for (uint i = 0; i < num_processes; i++) { 137 | tablet_load[i] = 0; 138 | } 139 | } 140 | void add_access_info( 141 | const std::string& client, uint client_id, 142 | const AccessInfo& access_info); 143 | void find_row( 144 | const std::string& client, uint client_id, 145 | table_id_t table, row_idx_t row); 146 | string get_stats(); 147 | }; 148 | 149 | #endif // defined __metadata_server_hpp__ 150 | -------------------------------------------------------------------------------- /src/server/server-encoder-decoder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Carnegie Mellon University. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the University nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 24 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 27 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #include 32 | #include 33 | 34 | #include "common/portable-bytes.hpp" 35 | #include "server-encoder-decoder.hpp" 36 | #include "tablet-server.hpp" 37 | #include "metadata-server.hpp" 38 | 39 | using std::string; 40 | using std::cerr; 41 | using std::cout; 42 | using std::endl; 43 | using std::vector; 44 | 45 | 46 | ClientServerDecode::ClientServerDecode( 47 | shared_ptr storage, 48 | shared_ptr metadata_server) : 49 | storage(storage), metadata_server(metadata_server) { 50 | } 51 | 52 | void ClientServerDecode::find_row( 53 | const string& src, vector& args) { 54 | CHECK_EQ(args.size(), 1); 55 | CHECK_EQ(args[0].size(), sizeof(cs_find_row_msg_t)); 56 | cs_find_row_msg_t *cs_find_row_msg = 57 | reinterpret_cast(args[0].data()); 58 | uint client_id = cs_find_row_msg->client_id; 59 | table_id_t table = cs_find_row_msg->table; 60 | row_idx_t row = cs_find_row_msg->row; 61 | 62 | metadata_server->find_row(src, client_id, table, row); 63 | 64 | for (uint i = 0; i < args.size(); i++) { 65 | args[i].close(); 66 | } 67 | } 68 | 69 | void ClientServerDecode::clock( 70 | const string& src, vector& args) { 71 | CHECK_EQ(args.size(), 1); 72 | CHECK_EQ(args[0].size(), sizeof(cs_clock_msg_t)); 73 | cs_clock_msg_t *cs_clock_msg = 74 | reinterpret_cast(args[0].data()); 75 | uint client_id = cs_clock_msg->client_id; 76 | iter_t clock = cs_clock_msg->clock; 77 | uint table_id = cs_clock_msg->table_id; 78 | 79 | storage->clock(client_id, clock, table_id); 80 | 81 | for (uint i = 0; i < args.size(); i++) { 82 | args[i].close(); 83 | } 84 | } 85 | 86 | void ClientServerDecode::clock_with_updates_batch( 87 | const string& src, vector& args) { 88 | CHECK_GE(args.size(), 3); 89 | CHECK_EQ(args[0].size(), sizeof(cs_clock_with_updates_batch_msg_t)); 90 | cs_clock_with_updates_batch_msg_t *cs_clock_with_updates_batch_msg = 91 | reinterpret_cast(args[0].data()); 92 | uint client_id = cs_clock_with_updates_batch_msg->client_id; 93 | iter_t clock = cs_clock_with_updates_batch_msg->clock; 94 | uint table_id = cs_clock_with_updates_batch_msg->table_id; 95 | 96 | RowKey *row_keys = 97 | reinterpret_cast(args[1].data()); 98 | uint batch_size = args[1].size() / sizeof(RowKey); 99 | RowOpVal *updates = 100 | reinterpret_cast(args[2].data()); 101 | CHECK_EQ(batch_size, args[2].size() / sizeof(RowOpVal)); 102 | 103 | tbb::tick_count inc_start = tbb::tick_count::now(); 104 | storage->update_row_batch( 105 | client_id, clock, table_id, row_keys, updates, batch_size); 106 | storage->server_stats.inc_time += 107 | (tbb::tick_count::now() - inc_start).seconds(); 108 | storage->clock(client_id, clock, table_id); 109 | 110 | for (uint i = 0; i < args.size(); i++) { 111 | args[i].close(); 112 | } 113 | } 114 | 115 | void ClientServerDecode::read_row_batch( 116 | const string& src, vector& args) { 117 | CHECK(0); 118 | } 119 | 120 | void ClientServerDecode::add_access_info( 121 | const std::string& src, vector& args) { 122 | CHECK_EQ(args.size(), 2); 123 | CHECK_EQ(args[0].size(), sizeof(cs_add_access_info_msg_t)); 124 | cs_add_access_info_msg_t *cs_add_access_info_msg = 125 | reinterpret_cast(args[0].data()); 126 | uint client_id = cs_add_access_info_msg->client_id; 127 | 128 | vector access_info; 129 | args[1].unpack_vector(access_info); 130 | 131 | metadata_server->add_access_info(src, client_id, access_info); 132 | 133 | for (uint i = 0; i < args.size(); i++) { 134 | args[i].close(); 135 | } 136 | } 137 | 138 | void ClientServerDecode::get_stats( 139 | const string& src, vector& args) { 140 | CHECK_EQ(args.size(), 1); 141 | CHECK_EQ(args[0].size(), sizeof(cs_get_stats_msg_t)); 142 | cs_get_stats_msg_t *cs_get_stats_msg = 143 | reinterpret_cast(args[0].data()); 144 | uint client_id = cs_get_stats_msg->client_id; 145 | 146 | storage->get_stats(client_id, metadata_server); 147 | 148 | for (uint i = 0; i < args.size(); i++) { 149 | args[i].close(); 150 | } 151 | } 152 | 153 | void ClientServerDecode::decode_msg( 154 | const string& src, vector& msgs) { 155 | CHECK_GE(msgs.size(), 1); 156 | CHECK_GE(msgs[0].size(), sizeof(command_t)); 157 | command_t cmd; 158 | msgs[0].unpack(cmd); 159 | switch (cmd) { 160 | case FIND_ROW: 161 | find_row(src, msgs); 162 | break; 163 | case CLOCK_WITH_UPDATES_BATCH: 164 | clock_with_updates_batch(src, msgs); 165 | break; 166 | case READ_ROW_BATCH: 167 | read_row_batch(src, msgs); 168 | break; 169 | case CLOCK: 170 | clock(src, msgs); 171 | break; 172 | case ADD_ACCESS_INFO: 173 | add_access_info(src, msgs); 174 | break; 175 | case GET_STATS: 176 | get_stats(src, msgs); 177 | break; 178 | default: 179 | CHECK(0) 180 | << "Server received unknown command: " << static_cast(cmd) 181 | << " size: " << msgs[0].size(); 182 | } 183 | } 184 | 185 | void ClientServerDecode::router_callback(const string& src, 186 | vector& msgs) { 187 | decode_msg(src, msgs); 188 | } 189 | 190 | RouterHandler::RecvCallback ClientServerDecode::get_recv_callback() { 191 | return bind(&ClientServerDecode::router_callback, this, _1, _2); 192 | } 193 | 194 | 195 | void ServerClientEncode::clock(uint server_id, iter_t clock) { 196 | vector msgs; 197 | msgs.resize(1); 198 | 199 | msgs[0].init_size(sizeof(sc_clock_msg_t)); 200 | sc_clock_msg_t *sc_clock_msg = 201 | reinterpret_cast(msgs[0].data()); 202 | sc_clock_msg->cmd = CLOCK; 203 | sc_clock_msg->server_id = server_id; 204 | sc_clock_msg->clock = clock; 205 | 206 | /* Broadcast to all clients */ 207 | router_handler->direct_send_to(client_names, msgs); 208 | } 209 | 210 | void ServerClientEncode::find_row( 211 | uint client_id, table_id_t table, row_idx_t row, uint32_t server_id) { 212 | vector msgs; 213 | msgs.resize(1); 214 | 215 | msgs[0].init_size(sizeof(sc_find_row_msg_t)); 216 | sc_find_row_msg_t *sc_find_row_msg = 217 | reinterpret_cast(msgs[0].data()); 218 | sc_find_row_msg->cmd = FIND_ROW; 219 | sc_find_row_msg->table = table; 220 | sc_find_row_msg->row = row; 221 | sc_find_row_msg->server_id = server_id; 222 | 223 | CHECK_LT(client_id, client_names.size()); 224 | string client_name = client_names[client_id]; 225 | router_handler->direct_send_to(client_name, msgs); 226 | } 227 | 228 | void ServerClientEncode::read_row_batch_reply( 229 | uint client_id, uint server_id, iter_t data_age, iter_t self_clock, 230 | uint table_id, RowKey *row_keys, RowData *row_data, uint batch_size) { 231 | vector msgs; 232 | msgs.resize(3); 233 | 234 | msgs[0].init_size(sizeof(sc_read_row_batch_msg_t)); 235 | sc_read_row_batch_msg_t *sc_read_row_batch_msg = 236 | reinterpret_cast(msgs[0].data()); 237 | sc_read_row_batch_msg->cmd = READ_ROW_BATCH; 238 | sc_read_row_batch_msg->server_id = server_id; 239 | sc_read_row_batch_msg->data_age = data_age; 240 | sc_read_row_batch_msg->self_clock = self_clock; 241 | sc_read_row_batch_msg->table_id = table_id; 242 | 243 | msgs[1].pack_memory(row_keys, sizeof(RowKey) * batch_size); 244 | // msgs[2].pack_gpu_memory(row_data, sizeof(RowData) * batch_size, cuda_stream); 245 | msgs[2].pack_memory(row_data, sizeof(RowData) * batch_size); 246 | 247 | CHECK_LT(client_id, client_names.size()); 248 | string client_name = client_names[client_id]; 249 | router_handler->direct_send_to(client_name, msgs); 250 | } 251 | 252 | void ServerClientEncode::get_stats(uint client_id, const string& stats) { 253 | vector msgs; 254 | msgs.resize(2); 255 | 256 | msgs[0].init_size(sizeof(sc_get_stats_msg_t)); 257 | sc_get_stats_msg_t *sc_get_stats_msg = 258 | reinterpret_cast(msgs[0].data()); 259 | sc_get_stats_msg->cmd = GET_STATS; 260 | 261 | msgs[1].pack_string(stats); 262 | 263 | CHECK_LT(client_id, client_names.size()); 264 | string client_name = client_names[client_id]; 265 | router_handler->direct_send_to(client_name, msgs); 266 | } 267 | 268 | string ServerClientEncode::get_router_stats() { 269 | return router_handler->get_stats(); 270 | } 271 | -------------------------------------------------------------------------------- /src/server/server-encoder-decoder.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __server_encoder_decoder_hpp__ 2 | #define __server_encoder_decoder_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | #include 42 | 43 | #include "common/wire-protocol.hpp" 44 | #include "common/router-handler.hpp" 45 | 46 | using boost::shared_ptr; 47 | 48 | class TabletStorage; 49 | class MetadataServer; 50 | 51 | /* Encodes messages to client */ 52 | class ServerClientEncode { 53 | shared_ptr router_handler; 54 | vector client_names; 55 | cudaStream_t cuda_stream; 56 | cublasHandle_t cublas_handle; 57 | 58 | public: 59 | explicit ServerClientEncode( 60 | shared_ptr router_handler, 61 | cudaStream_t cuda_stream, cublasHandle_t cublas_handle, 62 | uint num_machines, uint server_id, const GeePsConfig& config) 63 | : router_handler(router_handler), 64 | cuda_stream(cuda_stream), cublas_handle(cublas_handle) { 65 | for (uint i = 0; i < num_machines; i++) { 66 | std::string cname("local"); 67 | if (!config.local_opt || i != server_id) { 68 | cname = (boost::format("client-%i") % i).str(); 69 | } 70 | client_names.push_back(cname); 71 | } 72 | } 73 | void find_row( 74 | uint client_id, table_id_t table, row_idx_t row, uint server_id); 75 | void read_row_batch_reply( 76 | uint client_id, uint server_id, iter_t data_age, iter_t self_clock, 77 | uint table_id, RowKey *row_keys, RowData *row_data, uint batch_size); 78 | void clock(uint server_id, iter_t clock); 79 | void get_stats(uint client_id, const string& stats); 80 | string get_router_stats(); 81 | }; 82 | 83 | /* Decodes messages from client */ 84 | class ClientServerDecode { 85 | shared_ptr storage; 86 | shared_ptr metadata_server; 87 | 88 | public: 89 | explicit ClientServerDecode( 90 | shared_ptr storage, 91 | shared_ptr metadata_server); 92 | void find_row(const string& src, vector& msgs); 93 | void clock(const string& src, vector& msgs); 94 | void clock_with_updates_batch( 95 | const string& src, vector& msgs); 96 | void read_row_batch(const string& src, vector& msgs); 97 | void add_access_info(const string& src, vector& msgs); 98 | void get_stats(const string& src, vector& msgs); 99 | void report_progress(const string& src, vector& msgs); 100 | void decode_msg(const string& src, vector& msgs); 101 | void router_callback(const string& src, vector& msgs); 102 | 103 | RouterHandler::RecvCallback get_recv_callback(); 104 | }; 105 | 106 | #endif // defined __server_encoder_decoder_hpp__ 107 | -------------------------------------------------------------------------------- /src/server/server-entry.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Carnegie Mellon University. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the University nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 24 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 27 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #include 32 | #include 33 | 34 | #include 35 | #include 36 | 37 | #include "server-entry.hpp" 38 | #include "server-encoder-decoder.hpp" 39 | #include "tablet-server.hpp" 40 | #include "metadata-server.hpp" 41 | 42 | using std::string; 43 | using std::vector; 44 | using std::cerr; 45 | using std::cout; 46 | using std::endl; 47 | using boost::format; 48 | using boost::shared_ptr; 49 | using boost::make_shared; 50 | 51 | void ServerThreadEntry::server_entry( 52 | uint channel_id, uint num_channels, 53 | uint process_id, uint num_processes, 54 | shared_ptr zmq_ctx, 55 | const GeePsConfig& config) { 56 | uint port = config.tcp_base_port + channel_id; 57 | string request_url = "tcp://*:" + boost::lexical_cast(port); 58 | 59 | /* Init cuda stream and cublas handle */ 60 | cudaStream_t cuda_stream; 61 | cublasHandle_t cublas_handle; 62 | CUDA_CHECK(cudaStreamCreate(&cuda_stream)); 63 | CUBLAS_CHECK(cublasCreate(&cublas_handle)); 64 | CUBLAS_CHECK(cublasSetStream(cublas_handle, cuda_stream)); 65 | 66 | /* Init communication */ 67 | vector connect_list; /* Empty connect to */ 68 | vector bind_list; 69 | bind_list.push_back(request_url); 70 | string tablet_name = (format("tablet-%i") % process_id).str(); 71 | shared_ptr router_handler = make_shared( 72 | channel_id, zmq_ctx, connect_list, bind_list, tablet_name, 73 | config); 74 | 75 | shared_ptr encoder = make_shared( 76 | router_handler, cuda_stream, cublas_handle, 77 | num_processes, process_id, config); 78 | 79 | shared_ptr storage = make_shared( 80 | channel_id, num_channels, process_id, num_processes, 81 | encoder, cuda_stream, cublas_handle, config); 82 | shared_ptr metadata_server = make_shared( 83 | channel_id, num_channels, process_id, num_processes, 84 | encoder, config); 85 | ClientServerDecode decoder(storage, metadata_server); 86 | 87 | router_handler->do_handler(decoder.get_recv_callback()); 88 | } 89 | -------------------------------------------------------------------------------- /src/server/server-entry.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __server_entry_hpp__ 2 | #define __server_entry_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | 36 | #include "common/router-handler.hpp" 37 | 38 | using std::string; 39 | using std::vector; 40 | using std::cerr; 41 | using std::cout; 42 | using std::endl; 43 | using boost::shared_ptr; 44 | 45 | class ServerThreadEntry { 46 | uint channel_id; 47 | uint num_channels; 48 | uint process_id; 49 | uint num_processes; 50 | boost::shared_ptr zmq_ctx; 51 | GeePsConfig config; 52 | 53 | public: 54 | ServerThreadEntry( 55 | uint channel_id, uint num_channels, 56 | uint process_id, uint num_processes, 57 | boost::shared_ptr zmq_ctx, 58 | const GeePsConfig& config) : 59 | channel_id(channel_id), num_channels(num_channels), 60 | process_id(process_id), num_processes(num_processes), 61 | zmq_ctx(zmq_ctx), 62 | config(config) { 63 | } 64 | 65 | void operator()() { 66 | server_entry( 67 | channel_id, num_channels, process_id, num_processes, zmq_ctx, config); 68 | } 69 | 70 | private: 71 | void server_entry( 72 | uint channel_id, uint num_channels, 73 | uint process_id, uint num_processes, 74 | shared_ptr zmq_ctx, 75 | const GeePsConfig& config); 76 | }; 77 | 78 | #endif // defined __server_entry_hpp__ 79 | -------------------------------------------------------------------------------- /src/server/tablet-server.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016, Carnegie Mellon University. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the University nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 23 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 24 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 25 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 27 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | /* GeePS tablet server */ 32 | 33 | #include 34 | #include 35 | #include 36 | 37 | #include 38 | #include 39 | #include 40 | 41 | #include "common/internal-config.hpp" 42 | #include "server-encoder-decoder.hpp" 43 | #include "tablet-server.hpp" 44 | 45 | using std::string; 46 | using std::cerr; 47 | using std::cout; 48 | using std::endl; 49 | using std::vector; 50 | using std::pair; 51 | using std::make_pair; 52 | using boost::format; 53 | using boost::lexical_cast; 54 | using boost::shared_ptr; 55 | using boost::make_shared; 56 | 57 | TabletStorage::TabletStorage( 58 | uint channel_id, uint num_channels, uint process_id, uint num_processes, 59 | shared_ptr communicator, 60 | cudaStream_t cuda_stream, cublasHandle_t cublas_handle, 61 | const GeePsConfig& config) : 62 | channel_id(channel_id), num_channels(num_channels), 63 | process_id(process_id), num_processes(num_processes), 64 | num_clients(num_processes), 65 | communicator(communicator), 66 | cuda_stream(cuda_stream), cublas_handle(cublas_handle), 67 | config(config) { 68 | /* Initialize data tables */ 69 | data_tables.resize(config.num_tables); 70 | for (uint table_id = 0; table_id < data_tables.size(); table_id++) { 71 | DataTable& data_table = data_tables[table_id]; 72 | data_table.vec_clock.resize(num_processes); 73 | for (uint client_id = 0; client_id < num_processes; client_id++) { 74 | data_table.vec_clock[client_id] = INITIAL_DATA_AGE; 75 | } 76 | data_table.global_clock = INITIAL_DATA_AGE; 77 | data_table.row_count = 0; 78 | } 79 | } 80 | 81 | void TabletStorage::update_row_batch( 82 | uint client_id, iter_t clock, uint table_id, 83 | RowKey *row_keys, RowOpVal *updates, uint batch_size) { 84 | server_stats.nr_update += batch_size; 85 | if (client_id == process_id) { 86 | server_stats.nr_local_update += batch_size; 87 | } 88 | 89 | CHECK_LT(table_id, data_tables.size()); 90 | DataTable& data_table = data_tables[table_id]; 91 | DataStorage& data_store = data_table.store; 92 | 93 | CHECK_LT(client_id, data_table.vec_clock.size()); 94 | iter_t cur_clock = data_table.vec_clock[client_id]; 95 | if (cur_clock != INITIAL_DATA_AGE && clock != cur_clock + 1) { 96 | cerr << "WARNING CS clocks out of sync," 97 | << " client = " << client_id 98 | << " clock = " << clock 99 | << " cur_clock = " << cur_clock 100 | << endl; 101 | CHECK(0); 102 | } 103 | 104 | if (batch_size == 0) { 105 | return; 106 | } 107 | 108 | if (data_store.size() == 0) { 109 | data_store.init(batch_size, DataStorage::CPU); 110 | data_store.zerofy_data_cpu(); 111 | data_table.row_count = batch_size; 112 | memcpy(data_store.row_keys.data(), row_keys, 113 | batch_size * sizeof(RowKey)); 114 | } 115 | CHECK_EQ(data_store.size(), batch_size); 116 | apply_updates(table_id, updates, batch_size); 117 | } 118 | 119 | void TabletStorage::apply_updates( 120 | uint table_id, RowOpVal *update_rows, size_t batch_size) { 121 | CHECK_LT(table_id, data_tables.size()); 122 | DataTable& data_table = data_tables[table_id]; 123 | DataStorage& data_store = data_table.store; 124 | 125 | val_t *update = reinterpret_cast(update_rows); 126 | CHECK_EQ(data_store.size(), batch_size); 127 | val_t *master_data = reinterpret_cast(data_store.data()); 128 | size_t num_vals = batch_size * ROW_DATA_SIZE; 129 | 130 | cpu_add(num_vals, 131 | master_data, 132 | update, 133 | master_data); 134 | } 135 | 136 | void TabletStorage::process_multiclient_pending_reads( 137 | iter_t clock, uint table_id) { 138 | /* Rotate the starting client */ 139 | uint client_to_start = clock % num_clients; 140 | /* Process pending reads */ 141 | for (uint i = 0; i < num_clients; i++) { 142 | uint client_id = (client_to_start + i) % num_clients; 143 | process_pending_reads(client_id, clock, table_id); 144 | } 145 | } 146 | 147 | void TabletStorage::process_pending_reads( 148 | uint client_id, iter_t clock, uint table_id) { 149 | /* NOTE: we assume each client uses all the rows */ 150 | CHECK_LT(table_id, data_tables.size()); 151 | DataTable& data_table = data_tables[table_id]; 152 | DataStorage& data_store = data_table.store; 153 | 154 | RowKeys& row_keys = data_store.row_keys; 155 | CHECK_EQ(data_store.size(), row_keys.size()); 156 | RowData *row_data = data_store.data(); 157 | CHECK_EQ(data_table.global_clock, clock); 158 | iter_t data_age = data_table.global_clock; 159 | iter_t self_clock = data_table.vec_clock[client_id]; 160 | communicator->read_row_batch_reply( 161 | client_id, process_id, data_age, self_clock, table_id, 162 | row_keys.data(), row_data, row_keys.size()); 163 | } 164 | 165 | void TabletStorage::reset_perf_counters() { 166 | server_stats.reset(); 167 | } 168 | 169 | void TabletStorage::clock(uint client_id, iter_t clock, uint table_id) { 170 | int timing = true; 171 | tbb::tick_count clock_ad_start; 172 | tbb::tick_count clock_ad_apply_op_end; 173 | tbb::tick_count clock_ad_end; 174 | 175 | if (timing) { 176 | clock_ad_start = tbb::tick_count::now(); 177 | } 178 | 179 | CHECK_LT(table_id, data_tables.size()); 180 | DataTable& data_table = data_tables[table_id]; 181 | 182 | CHECK_LT(client_id, data_table.vec_clock.size()); 183 | if (data_table.vec_clock[client_id] != INITIAL_DATA_AGE) { 184 | CHECK_EQ(clock, data_table.vec_clock[client_id] + 1); 185 | } 186 | data_table.vec_clock[client_id] = clock; 187 | 188 | iter_t new_global_clock = clock_min(data_table.vec_clock); 189 | if (new_global_clock != data_table.global_clock) { 190 | if (data_table.global_clock != INITIAL_DATA_AGE) { 191 | CHECK_EQ(new_global_clock, data_table.global_clock + 1); 192 | } 193 | data_table.global_clock = new_global_clock; 194 | 195 | /* Send pending read requests */ 196 | process_multiclient_pending_reads( 197 | data_table.global_clock, table_id); 198 | 199 | /* Notify clients of new iteration */ 200 | /* We don't need to do that now, because the client will use 201 | * the reception of row data as the notification. */ 202 | // communicator->clock(process_id, global_clock); 203 | } 204 | 205 | if (timing) { 206 | clock_ad_end = tbb::tick_count::now(); 207 | server_stats.clock_ad_send_pending_time += 208 | (clock_ad_end - clock_ad_apply_op_end).seconds(); 209 | server_stats.clock_ad_time_tot += 210 | (clock_ad_end - clock_ad_start).seconds(); 211 | } 212 | } 213 | 214 | void TabletStorage::get_stats( 215 | uint client_id, shared_ptr metadata_server) { 216 | server_stats.nr_rows = 0; 217 | for (uint table_id = 0; table_id < data_tables.size(); table_id++) { 218 | server_stats.nr_rows += data_tables[table_id].row_count; 219 | } 220 | 221 | std::stringstream combined_server_stats; 222 | combined_server_stats << "{" 223 | << "\"storage\": " << server_stats.to_json() << ", " 224 | << "\"metadata\": " << metadata_server->get_stats() << ", " 225 | << "\"router\": " << communicator->get_router_stats() 226 | << " } "; 227 | communicator->get_stats(client_id, combined_server_stats.str()); 228 | } 229 | -------------------------------------------------------------------------------- /src/server/tablet-server.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __tablet_server_hpp__ 2 | #define __tablet_server_hpp__ 3 | 4 | /* 5 | * Copyright (c) 2016, Carnegie Mellon University. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the University nor the names of its contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | * POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | 50 | #include "geeps-user-defined-types.hpp" 51 | #include "common/common-util.hpp" 52 | #include "common/row-op-util.hpp" 53 | #include "server-encoder-decoder.hpp" 54 | #include "metadata-server.hpp" 55 | 56 | using std::string; 57 | using std::vector; 58 | using boost::shared_ptr; 59 | 60 | class ServerClientEncode; 61 | class MetadataServer; /* Used in get_stats() */ 62 | 63 | class TabletStorage { 64 | public: 65 | struct Stats { 66 | int64_t nr_request; 67 | int64_t nr_request_prior; 68 | int64_t nr_local_request; 69 | int64_t nr_send; 70 | int64_t nr_update; 71 | int64_t nr_local_update; 72 | int64_t nr_rows; 73 | double send_data_time; 74 | double clock_ad_apply_op_time; 75 | double clock_ad_send_pending_time; 76 | double clock_ad_time_tot; 77 | double iter_var_time; 78 | double inc_time; 79 | 80 | void reset() { 81 | nr_request = 0; 82 | nr_request_prior = 0; 83 | nr_local_request = 0; 84 | nr_send = 0; 85 | nr_update = 0; 86 | nr_local_update = 0; 87 | send_data_time = 0.0; 88 | clock_ad_time_tot = 0.0; 89 | clock_ad_apply_op_time = 0.0; 90 | clock_ad_send_pending_time = 0.0; 91 | inc_time = 0.0; 92 | iter_var_time = 0.0; 93 | } 94 | 95 | Stats() { 96 | reset(); 97 | } 98 | 99 | Stats& operator += (const Stats& rhs) { 100 | return *this; 101 | } 102 | std::string to_json() { 103 | std::stringstream ss; 104 | ss << "{" 105 | << "\"nr_rows\": " << nr_rows << ", " 106 | << "\"nr_request\": " << nr_request << ", " 107 | << "\"nr_request_prior\": " << nr_request_prior << ", " 108 | << "\"nr_local_request\": " << nr_local_request << ", " 109 | << "\"nr_send\": " << nr_send << ", " 110 | << "\"nr_update\": " << nr_update << ", " 111 | << "\"nr_local_update\": " << nr_local_update << ", " 112 | << "\"send_data_time\": " << send_data_time << ", " 113 | << "\"clock_ad_apply_op_time\": " << clock_ad_apply_op_time << ", " 114 | << "\"clock_ad_send_pending_time\": " 115 | << clock_ad_send_pending_time << ", " 116 | << "\"clock_ad_time_tot\": " << clock_ad_time_tot << ", " 117 | << "\"iter_var_time\": " << iter_var_time << ", " 118 | << "\"inc_time\": " << inc_time 119 | << " } "; 120 | return ss.str(); 121 | } 122 | }; 123 | Stats server_stats; 124 | 125 | typedef boost::unordered_map Row2Index; 126 | 127 | typedef boost::unordered_map PendingReadsLog; 128 | typedef std::vector MulticlientPendingReadsLog; 129 | typedef std::vector VectorClock; 130 | 131 | struct DataTable { 132 | VectorClock vec_clock; 133 | iter_t global_clock; 134 | Row2Index row2idx; 135 | size_t row_count; 136 | DataStorage store; 137 | }; 138 | typedef std::vector DataTables; 139 | 140 | private: 141 | uint channel_id; 142 | uint num_channels; 143 | uint process_id; 144 | uint num_processes; 145 | uint num_clients; 146 | 147 | boost::unordered_map table_directory; 148 | 149 | shared_ptr communicator; 150 | 151 | cudaStream_t cuda_stream; 152 | cublasHandle_t cublas_handle; 153 | 154 | DataTables data_tables; 155 | 156 | GeePsConfig config; 157 | 158 | tbb::tick_count start_time; 159 | tbb::tick_count first_come_time; 160 | 161 | private: 162 | template 163 | void resize_storage(vector& storage, uint size) { 164 | if (storage.capacity() <= size) { 165 | uint capacity = get_nearest_power2(size); 166 | storage.reserve(capacity); 167 | // cerr << "capacity is " << capacity << endl; 168 | } 169 | if (storage.size() <= size) { 170 | storage.resize(size); 171 | // cerr << "size is " << size << endl; 172 | } 173 | } 174 | void process_multiclient_pending_reads( 175 | iter_t clock, uint table_id); 176 | void process_pending_reads( 177 | uint client_id, iter_t clock, uint table_id); 178 | void apply_updates( 179 | uint table_id, RowOpVal *update_rows, size_t batch_size); 180 | 181 | public: 182 | TabletStorage( 183 | uint channel_id, uint num_channels, uint process_id, uint num_processes, 184 | shared_ptr communicator, 185 | cudaStream_t cuda_stream, cublasHandle_t cublas_handle, 186 | const GeePsConfig& config); 187 | void update_row_batch( 188 | uint client_id, iter_t clock, uint table_id, 189 | RowKey *row_keys, RowOpVal *updates, uint batch_size); 190 | void clock(uint client_id, iter_t clock, uint table_id); 191 | void get_stats( 192 | uint client_id, shared_ptr metadata_server); 193 | /* Now it also needs the stats from the MetadataServer. 194 | * This is just a work-around, and we need to fix it in the future. 195 | */ 196 | void reset_perf_counters(); 197 | }; 198 | 199 | 200 | void server(uint channel_id, uint nr_channels, uint server_id, uint nr_servers, 201 | boost::shared_ptr zmq_ctx, 202 | const GeePsConfig& config); 203 | 204 | #endif // defined __tablet_server_hpp__ 205 | --------------------------------------------------------------------------------